├── packages
    ├── tts-app
    │   ├── main.js
    │   ├── README.md
    │   └── package.json
    ├── tts-lib
    │   ├── index.js
    │   ├── README.md
    │   └── package.json
    ├── tts-cli
    │   ├── docs
    │   │   ├── aws-tts.gif
    │   │   ├── tts-cli.gif
    │   │   └── options.md
    │   ├── test
    │   │   ├── jasmine.json
    │   │   ├── cleanup.spec.js
    │   │   ├── file-extensions.spec.js
    │   │   ├── sanitize-opts.spec.js
    │   │   ├── combine-raw-audio.spec.js
    │   │   ├── create-manifest.spec.js
    │   │   ├── move-temp-file.spec.js
    │   │   ├── build-info.spec.js
    │   │   ├── check-usage.spec.js
    │   │   ├── helpers.js
    │   │   ├── combine-encoded-audio.spec.js
    │   │   ├── combine.spec.js
    │   │   ├── read-text.spec.js
    │   │   ├── split-text.spec.js
    │   │   ├── generate-all.spec.js
    │   │   ├── cli.spec.js
    │   │   ├── generate-speech.spec.js
    │   │   ├── providers
    │   │   │   ├── aws.spec.js
    │   │   │   └── gcp.spec.js
    │   │   └── text-chunk.spec.js
    │   ├── lib
    │   │   ├── file-extensions.js
    │   │   ├── sanitize-opts.js
    │   │   ├── move-temp-file.js
    │   │   ├── cleanup.js
    │   │   ├── read-text.js
    │   │   ├── providers
    │   │   │   ├── aws.js
    │   │   │   └── gcp.js
    │   │   ├── combine-parts.js
    │   │   ├── check-usage.js
    │   │   ├── text-chunk.js
    │   │   ├── split-text.js
    │   │   └── generate-speech.js
    │   ├── package.json
    │   ├── tts.js
    │   └── README.md
    └── web-tts
    │   ├── tsconfig.json
    │   ├── package.json
    │   ├── README.md
    │   ├── docs
    │       └── commandfile.md
    │   └── index.ts
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── general.md
    │   ├── web.md
    │   └── cli.md
    └── workflows
    │   └── ci.yaml
├── .editorconfig
├── .gitignore
├── README.md
├── package.json
└── LICENSE.txt


/packages/tts-app/main.js:
--------------------------------------------------------------------------------
1 | // Nothing yet, sorry!
2 | 


--------------------------------------------------------------------------------
/packages/tts-lib/index.js:
--------------------------------------------------------------------------------
1 | // Nothing yet, sorry!
2 | 


--------------------------------------------------------------------------------
/packages/tts-app/README.md:
--------------------------------------------------------------------------------
1 | # Text-To-Speech App
2 | 
3 | Coming soon
4 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/general.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: General
3 | about: A general issue or request
4 | 
5 | ---
6 | 


--------------------------------------------------------------------------------
/packages/tts-cli/docs/aws-tts.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eheikes/tts/HEAD/packages/tts-cli/docs/aws-tts.gif


--------------------------------------------------------------------------------
/packages/tts-cli/docs/tts-cli.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eheikes/tts/HEAD/packages/tts-cli/docs/tts-cli.gif


--------------------------------------------------------------------------------
/packages/tts-cli/test/jasmine.json:
--------------------------------------------------------------------------------
1 | {
2 |   "spec_dir": "test",
3 |   "spec_files": [
4 |     "**/*.spec.js"
5 |   ],
6 |   "random": false
7 | }
8 | 


--------------------------------------------------------------------------------
/packages/tts-lib/README.md:
--------------------------------------------------------------------------------
 1 | # `tts-lib`
 2 | 
 3 | > TODO: description
 4 | 
 5 | ## Usage
 6 | 
 7 | ```
 8 | const ttsLib = require('tts-lib');
 9 | 
10 | // TODO: DEMONSTRATE API
11 | ```
12 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://EditorConfig.org
 2 | root = true
 3 | 
 4 | [*]
 5 | charset = utf-8
 6 | end_of_line = lf
 7 | insert_final_newline = true
 8 | trim_trailing_whitespace = true
 9 | indent_style = space
10 | indent_size = 2
11 | 
12 | [*.md]
13 | trim_trailing_whitespace = false
14 | 


--------------------------------------------------------------------------------
/packages/web-tts/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es6",
 4 |     "module": "commonjs",
 5 |     "lib": [
 6 |       "dom",
 7 |       "esnext"
 8 |     ],
 9 |     "outDir": "dist",
10 |     "strict": true,
11 |     "noUnusedLocals": true,
12 |     "noUnusedParameters": true
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/file-extensions.js:
--------------------------------------------------------------------------------
 1 | exports.extensionFor = (format, service) => {
 2 |   if (format === 'mp3') {
 3 |     return 'mp3'
 4 |   } else if (format === 'ogg' || format === 'ogg_vorbis') {
 5 |     return 'ogg'
 6 |   } else if (format === 'pcm') {
 7 |     return service === 'gcp' ? 'wav' : 'pcm'
 8 |   }
 9 |   throw new Error(`No known file extension for "${format}" format`)
10 | }
11 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/sanitize-opts.js:
--------------------------------------------------------------------------------
 1 | exports.sanitizeOpts = (opts) => {
 2 |   const sanitizedOpts = Object.assign({}, opts)
 3 |   sanitizedOpts['access-key'] = sanitizedOpts['access-key'] ? 'XXXXXXXX' : undefined
 4 |   sanitizedOpts['secret-key'] = sanitizedOpts['secret-key'] ? 'XXXXXXXX' : undefined
 5 |   sanitizedOpts.accessKey = sanitizedOpts.accessKey ? 'XXXXXXXX' : undefined
 6 |   sanitizedOpts.privateKey = sanitizedOpts.privateKey ? 'XXXXXXXX' : undefined
 7 |   sanitizedOpts.secretKey = sanitizedOpts.secretKey ? 'XXXXXXXX' : undefined
 8 |   return sanitizedOpts
 9 | }
10 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/move-temp-file.js:
--------------------------------------------------------------------------------
 1 | const debug = require('debug')('moveTempFile')
 2 | const fs = require('fs-extra')
 3 | 
 4 | /**
 5 |  * Moves the temporary file to the final destination.
 6 |  */
 7 | exports.moveTempFile = (ctx, task) => {
 8 |   const tempFile = ctx.tempFile
 9 |   const outputFilename = ctx.outputFilename
10 |   debug(`copying ${tempFile} to ${outputFilename}`)
11 |   return new Promise((resolve, reject) => {
12 |     fs.move(tempFile, outputFilename, { overwrite: true }, (err) => {
13 |       if (err) { return reject(err) }
14 |       task.title = `Done. Saved to ${outputFilename}`
15 |       resolve()
16 |     })
17 |   })
18 | }
19 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/cleanup.js:
--------------------------------------------------------------------------------
 1 | const debug = require('debug')('cleanup')
 2 | const fs = require('fs-extra')
 3 | 
 4 | /**
 5 |  * Deletes the manifest and its files.
 6 |  */
 7 | exports.cleanup = ctx => {
 8 |   const manifestFile = ctx.manifestFile
 9 |   const manifest = fs.readFileSync(manifestFile, 'utf8')
10 |   debug(`Manifest is ${manifest}`)
11 |   const regexpState = /^file\s+'(.*)'$/gm
12 |   let match
13 |   while ((match = regexpState.exec(manifest)) !== null) {
14 |     debug(`Deleting temporary file ${match[1]}`)
15 |     fs.removeSync(match[1])
16 |   }
17 |   debug(`Deleting manifest file ${manifestFile}`)
18 |   fs.removeSync(manifestFile)
19 | }
20 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/web.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: web-tts
 3 | about: Issue or request regarding the web-tts tool
 4 | 
 5 | ---
 6 | 
 7 | If you are running into a problem when using web-tts, please fill in as much info below as possible. That makes it easier to diagnose and identify the issue. Thanks!
 8 | 
 9 | 1. What is the exact command you are running?
10 | 
11 | 2. Please post as much of your command file as possible.
12 | 
13 | 3. What result are you seeing in the console? Copy & paste the exact output you get.
14 | 
15 | 4. What OS are you using (Windows, OSX, Linux) and what version?
16 | 
17 | 5. What version of Node.js is being used? (Run `node -v` in the console to find out.)
18 | 


--------------------------------------------------------------------------------
/packages/tts-app/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "tts-app",
 3 |   "version": "0.2.0",
 4 |   "description": "Graphical text-to-speech app",
 5 |   "keywords": [
 6 |     "tts",
 7 |     "text-to-speech",
 8 |     "windows",
 9 |     "mac",
10 |     "linux"
11 |   ],
12 |   "author": "Eric Heikes <eheikes@gmail.com>",
13 |   "homepage": "https://github.com/eheikes/tts/tree/master/packages/tts-app#readme",
14 |   "license": "Apache-2.0",
15 |   "main": "main.js",
16 |   "repository": {
17 |     "type": "git",
18 |     "url": "git+https://github.com/eheikes/tts.git"
19 |   },
20 |   "scripts": {},
21 |   "bugs": {
22 |     "url": "https://github.com/eheikes/tts/issues"
23 |   },
24 |   "devDependencies": {
25 |     "codecov": "^3.6.5"
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | dist/
 2 | 
 3 | # Dependency directories
 4 | bower_components/
 5 | node_modules/
 6 | jspm_packages/
 7 | .yarn-cache/
 8 | 
 9 | # Code coverage
10 | .nyc_output/
11 | coverage/
12 | 
13 | # Logs
14 | logs/
15 | *.log
16 | npm-debug.log*
17 | yarn-debug.log*
18 | yarn-error.log*
19 | 
20 | # Runtime data
21 | pids/
22 | *.pid
23 | *.seed
24 | *.pid.lock
25 | 
26 | # Optional npm cache directory
27 | .npm/
28 | 
29 | # Optional REPL history
30 | .node_repl_history
31 | 
32 | # Output of 'npm pack'
33 | *.tgz
34 | 
35 | # Yarn Integrity file
36 | .yarn-integrity
37 | 
38 | # Whitesource
39 | ws-*.json
40 | ws-log-npm-report
41 | 
42 | # Terraform
43 | terraform.tfstate*
44 | 
45 | # dotenv environment variables file
46 | .env
47 | 
48 | # OSX junk
49 | .DS_Store
50 | 


--------------------------------------------------------------------------------
/packages/tts-lib/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "tts-lib",
 3 |   "version": "1.0.0-pre.1",
 4 |   "description": "Abstraction library for TTS services",
 5 |   "keywords": [
 6 |     "tts",
 7 |     "amazon",
 8 |     "google",
 9 |     "polly",
10 |     "cloud",
11 |     "aws",
12 |     "text-to-speech"
13 |   ],
14 |   "author": "Eric Heikes <eheikes@gmail.com>",
15 |   "homepage": "https://github.com/eheikes/tts/tree/master/packages/tts-lib#readme",
16 |   "license": "Apache-2.0",
17 |   "main": "index.js",
18 |   "repository": {
19 |     "type": "git",
20 |     "url": "git+https://github.com/eheikes/tts.git"
21 |   },
22 |   "scripts": {},
23 |   "bugs": {
24 |     "url": "https://github.com/eheikes/tts/issues"
25 |   },
26 |   "devDependencies": {
27 |     "codecov": "^3.6.5"
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: Continuous Integration
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | permissions:
10 |   id-token: write
11 |   contents: write
12 | 
13 | jobs:
14 |   build:
15 | 
16 |     runs-on: ubuntu-latest
17 | 
18 |     strategy:
19 |       matrix:
20 |         node-version: [18.x, 20.x, 21.x, 22.x, 23.x]
21 | 
22 |     steps:
23 |       - uses: actions/checkout@v2
24 |       - name: Use Node.js ${{ matrix.node-version }}
25 |         uses: actions/setup-node@v2
26 |         with:
27 |           node-version: ${{ matrix.node-version }}
28 |       - name: Install dependencies
29 |         run: npm install
30 |       - run: npm run lint
31 |       - run: npm run test
32 |       - name: Upload coverage report
33 |         uses: codecov/codecov-action@v2
34 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/cleanup.spec.js:
--------------------------------------------------------------------------------
 1 | describe('cleanup()', () => {
 2 |   const manifestFilename = 'manifest.txt'
 3 |   const tempFilenames = ['foo.mp3', 'bar.mp3']
 4 | 
 5 |   let cleanup, fs
 6 |   let ctx
 7 | 
 8 |   beforeEach(() => {
 9 |     ({ cleanup, fs } = require('./helpers').loadLib('cleanup'))
10 |     ctx = {
11 |       manifestFile: manifestFilename
12 |     }
13 |   })
14 | 
15 |   beforeEach(() => {
16 |     const manifestContents = tempFilenames.map(filename => `file '${filename}'`).join('\n')
17 |     fs.readFileSync.and.callFake(() => manifestContents)
18 |     return cleanup(ctx)
19 |   })
20 | 
21 |   it('should delete the manifest file', () => {
22 |     expect(fs.removeSync).toHaveBeenCalledWith(manifestFilename)
23 |   })
24 | 
25 |   it('should delete the temporary audio files', () => {
26 |     tempFilenames.forEach(filename => {
27 |       expect(fs.removeSync).toHaveBeenCalledWith(filename)
28 |     })
29 |   })
30 | })
31 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/file-extensions.spec.js:
--------------------------------------------------------------------------------
 1 | const { extensionFor } = require('../lib/file-extensions')
 2 | 
 3 | describe('extensionFor()', () => {
 4 |   it('should return "mp3" for the MP3 format', () => {
 5 |     expect(extensionFor('mp3', 'aws')).toBe('mp3')
 6 |     expect(extensionFor('mp3', 'gcp')).toBe('mp3')
 7 |   })
 8 | 
 9 |   it('should return "ogg" for the Ogg format', () => {
10 |     expect(extensionFor('ogg', 'aws')).toBe('ogg')
11 |     expect(extensionFor('ogg', 'gcp')).toBe('ogg')
12 |   })
13 | 
14 |   it('should return "ogg" for the (deprecated) Ogg Vorbis format', () => {
15 |     expect(extensionFor('ogg_vorbis', 'aws')).toBe('ogg')
16 |   })
17 | 
18 |   it('should return "pcm" for the PCM format on AWS', () => {
19 |     expect(extensionFor('pcm', 'aws')).toBe('pcm')
20 |   })
21 | 
22 |   it('should return "wav" for the PCM format on GCP', () => {
23 |     expect(extensionFor('pcm', 'gcp')).toBe('wav')
24 |   })
25 | 
26 |   it('should throw an error for unknown formats', () => {
27 |     expect(() => {
28 |       extensionFor('foo', 'aws')
29 |     }).toThrow()
30 |   })
31 | })
32 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/sanitize-opts.spec.js:
--------------------------------------------------------------------------------
 1 | const { sanitizeOpts } = require('../lib/sanitize-opts')
 2 | describe('sanitizeOpts()', () => {
 3 |   const exampleOpts = {
 4 |     foo: 1,
 5 |     bar: 2,
 6 |     accessKey: 3,
 7 |     privateKey: 5,
 8 |     secretKey: 4,
 9 |     'access-key': 3,
10 |     'secret-key': 4
11 |   }
12 | 
13 |   let sanitized
14 | 
15 |   beforeEach(() => {
16 |     sanitized = sanitizeOpts(exampleOpts)
17 |   })
18 | 
19 |   it('should not change the original options', () => {
20 |     expect(sanitized).not.toBe(exampleOpts)
21 |   })
22 | 
23 |   it('should sanitize AWS secrets', () => {
24 |     expect(sanitized.accessKey).toMatch(/^X+$/)
25 |     expect(sanitized.secretKey).toMatch(/^X+$/)
26 |     expect(sanitized['access-key']).toMatch(/^X+$/)
27 |     expect(sanitized['secret-key']).toMatch(/^X+$/)
28 |   })
29 | 
30 |   it('should sanitize GCP secrets', () => {
31 |     expect(sanitized.privateKey).toMatch(/^X+$/)
32 |   })
33 | 
34 |   it('should not change other keys', () => {
35 |     expect(sanitized.foo).toBe(exampleOpts.foo)
36 |     expect(sanitized.bar).toBe(exampleOpts.bar)
37 |   })
38 | })
39 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/combine-raw-audio.spec.js:
--------------------------------------------------------------------------------
 1 | describe('combineRawAudio()', () => {
 2 |   const manifestFilename = 'manifest.txt'
 3 |   const outputFilename = 'foobar.mp3'
 4 |   const tempFilenames = ['foo.mp3', 'bar.mp3']
 5 | 
 6 |   let combineRawAudio, fs
 7 | 
 8 |   beforeEach(() => {
 9 |     ({ combineRawAudio, fs } = require('./helpers').loadLib('combine-parts'))
10 |   })
11 | 
12 |   beforeEach(done => {
13 |     const manifestContents = tempFilenames.map(filename => `file '${filename}'`).join('\n')
14 |     fs.readFileSync.and.callFake(() => manifestContents)
15 |     combineRawAudio(manifestFilename, outputFilename).then(done)
16 |   })
17 | 
18 |   it('should create the output file and truncate it', () => {
19 |     expect(fs.createFileSync).toHaveBeenCalledWith(outputFilename)
20 |     expect(fs.truncateSync).toHaveBeenCalledWith(outputFilename)
21 |   })
22 | 
23 |   it('should read and append each file from the manifest', () => {
24 |     tempFilenames.forEach(filename => {
25 |       expect(fs.readFileSync).toHaveBeenCalledWith(filename)
26 |     })
27 |     expect(fs.appendFileSync.calls.count()).toBe(tempFilenames.length)
28 |   })
29 | })
30 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/create-manifest.spec.js:
--------------------------------------------------------------------------------
 1 | describe('createManifest()', () => {
 2 |   const testParts = [
 3 |     { tempfile: 'foo.mp3' },
 4 |     { tempfile: 'bar.mp3' }
 5 |   ]
 6 | 
 7 |   let createManifest, fs
 8 |   let outputFilename, fileContents, options, lines, response
 9 | 
10 |   beforeEach(() => {
11 |     ({ createManifest, fs } = require('./helpers').loadLib('generate-speech'))
12 |   })
13 | 
14 |   beforeEach(() => {
15 |     response = createManifest(testParts);
16 |     [outputFilename, fileContents, options] = fs.writeFileSync.calls.mostRecent().args
17 |     lines = fileContents.split('\n')
18 |   })
19 | 
20 |   it('should create a text file', () => {
21 |     expect(outputFilename).toMatch(/\.txt$/)
22 |     expect(options).toBe('utf8')
23 |   })
24 | 
25 |   it('should have a file entry for each part', () => {
26 |     expect(lines.length).toBe(testParts.length)
27 |   })
28 | 
29 |   it('should use the correct format', () => {
30 |     lines.forEach((line, i) => {
31 |       expect(line).toMatch(`^file '${testParts[i].tempfile}'$`)
32 |     })
33 |   })
34 | 
35 |   it('should return the filename', () => {
36 |     expect(response).toBe(outputFilename)
37 |   })
38 | })
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/cli.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: CLI
 3 | about: Issue or request regarding the command-line tool
 4 | 
 5 | ---
 6 | 
 7 | If you are running into a problem when using tts-cli, please fill in as much info below as possible. That makes it easier to diagnose and identify the issue. Thanks!
 8 | 
 9 | Also, check out the [Troubleshooting section](https://github.com/eheikes/tts/packages/tts-cli#troubleshooting) of the README for some quick tips.
10 | 
11 | 1. What is the exact command you are running? For your security, please "X" out any AWS access keys and secrets.
12 | 
13 | 2. What result are you seeing in the console? Copy & paste the exact output you get, with debugging turned on (see the [Troubleshooting section](https://github.com/eheikes/tts/packages/tts-cli#troubleshooting) for how to enable debugging).
14 | 
15 | 3. If copyright allows, please upload your input file somewhere (e.g. [pastebin](https://pastebin.com/)) and put a link to it here.
16 | 
17 | 4. What OS are you using (Windows, OSX, Linux) and what version?
18 | 
19 | 5. What version of Node.js is being used? (Run `node -v` in the console to find out.)
20 | 
21 | 6. What version of ffmpeg is being used? (Run `ffmpeg -version` in the console to find out.)
22 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/read-text.js:
--------------------------------------------------------------------------------
 1 | const debug = require('debug')('readText')
 2 | const fs = require('fs-extra')
 3 | 
 4 | /**
 5 |  * Read in the text from a file.
 6 |  * If no file is specified, read from stdin.
 7 |  */
 8 | exports.readText = (ctx) => {
 9 |   const inputFilename = ctx.input
10 |   const proc = ctx.process
11 |   return new Promise((resolve, reject) => {
12 |     if (inputFilename) {
13 |       // Read from a file.
14 |       debug(`Reading from ${inputFilename}`)
15 |       fs.readFile(inputFilename, 'utf8', (err, data) => {
16 |         if (err) { return reject(err) }
17 |         debug(`Finished reading (${data.length} bytes)`)
18 |         resolve(data)
19 |       })
20 |     } else {
21 |       // Read from stdin.
22 |       debug('Reading from stdin')
23 |       let data = ''
24 |       proc.stdin.setEncoding('utf8')
25 |       proc.stdin.on('readable', () => {
26 |         const chunk = proc.stdin.read()
27 |         /* istanbul ignore else: need to add test for this */
28 |         if (chunk !== null) { data += chunk }
29 |       })
30 |       proc.stdin.on('end', () => {
31 |         debug(`Finished reading (${data.length} bytes)`)
32 |         resolve(data)
33 |       })
34 |     }
35 |   }).then(text => {
36 |     ctx.text = text
37 |   })
38 | }
39 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/move-temp-file.spec.js:
--------------------------------------------------------------------------------
 1 | describe('moveTempFile()', () => {
 2 |   const sourceFile = 'source file'
 3 |   const destFile = 'destination file'
 4 | 
 5 |   let moveTempFile, fs
 6 |   let ctx, task
 7 | 
 8 |   beforeEach(() => {
 9 |     ({ fs, moveTempFile } = require('./helpers').loadLib('move-temp-file'))
10 |     ctx = {
11 |       tempFile: sourceFile,
12 |       outputFilename: destFile
13 |     }
14 |     task = { title: 'test task' }
15 |     return moveTempFile(ctx, task)
16 |   })
17 | 
18 |   it('should overwrite the destination filename with the specified temp file', () => {
19 |     expect(fs.move).toHaveBeenCalledWith(
20 |       sourceFile,
21 |       destFile,
22 |       { overwrite: true },
23 |       jasmine.any(Function)
24 |     )
25 |   })
26 | 
27 |   it('should update the task title', () => {
28 |     expect(task.title).toContain('Done. Saved to')
29 |   })
30 | 
31 |   it('should return the error if the filesystem call fails', () => {
32 |     fs.move.and.callFake((src, dest, opts, callback) => callback(new Error('test error')))
33 |     return moveTempFile(ctx, task).then(() => {
34 |       throw new Error('should have thrown!')
35 |     }).catch(err => {
36 |       expect(err.message).toBe('test error')
37 |     })
38 |   })
39 | })
40 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/build-info.spec.js:
--------------------------------------------------------------------------------
 1 | describe('buildInfo()', () => {
 2 |   const task = {}
 3 |   const text = 'foobar'
 4 |   const format = 'mp3'
 5 |   const instance = { foo: 1, bar: 2 }
 6 |   const ctx = {
 7 |     opts: { format }
 8 |   }
 9 | 
10 |   let buildInfo, output
11 | 
12 |   beforeEach(() => {
13 |     ({ buildInfo } = require('./helpers').loadLib('generate-speech'))
14 |     output = buildInfo(text, { buildPart: () => instance }, task, ctx)
15 |   })
16 | 
17 |   it('should return an object', () => {
18 |     expect(output).toEqual(jasmine.any(Object))
19 |   })
20 | 
21 |   it('should have an "opts" property with the original options', () => {
22 |     expect(output.opts).toEqual(ctx.opts)
23 |   })
24 | 
25 |   it('should have a "task" property', () => {
26 |     expect(output.task).toEqual(task)
27 |   })
28 | 
29 |   it('should have a "tempfile" property', () => {
30 |     expect(output.tempfile).toEqual(jasmine.any(String))
31 |   })
32 | 
33 |   it('should have an appropriate file extension for the tempfile', () => {
34 |     expect(output.tempfile).toMatch(`\\.${format}$`)
35 |   })
36 | 
37 |   it('should have a "text" property with the original text', () => {
38 |     expect(output.text).toBe(text)
39 |   })
40 | 
41 |   it('should add in the instance\'s properties', () => {
42 |     expect(output.foo).toBe(instance.foo)
43 |     expect(output.bar).toBe(instance.bar)
44 |   })
45 | })
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Text-To-Speech Tools
 2 | 
 3 | This monorepository includes tools to convert text of any size to speech:
 4 | 
 5 | * [Command-line interface (CLI) tool](packages/tts-cli) to convert text to speech
 6 | * [Web TTS CLI tool](packages/web-tts) to convert webpages to speech
 7 | 
 8 | These tools require an account with at least one of these (paid) services:
 9 | 
10 | * [Amazon Web Services](https://aws.amazon.com) for [AWS Polly](https://aws.amazon.com/polly/)
11 | * [Google Cloud Platform](https://cloud.google.com) for [GCP Text-to-Speech](https://cloud.google.com/text-to-speech/)
12 | 
13 | [![Vulnerabilities](https://img.shields.io/snyk/vulnerabilities/npm/tts-cli)](https://snyk.io/vuln/npm:tts-cli)
14 | [![Build Status](https://img.shields.io/travis/eheikes/tts)](https://travis-ci.org/github/eheikes/tts)
15 | [![Coverage](https://img.shields.io/codecov/c/gh/eheikes/tts?token=9bd5731ce1a34766bdf3d780a648fa05)](https://codecov.io/gh/eheikes/tts)
16 | [![License](https://img.shields.io/github/license/eheikes/tts)](https://github.com/eheikes/tts/blob/master/LICENSE.txt)
17 |         
18 | ## Contributing
19 | 
20 | Pull requests and suggestions are welcome. [Create a new issue](https://github.com/eheikes/tts/issues/new) to report a bug or suggest a new feature.
21 | 
22 | Development commands:
23 | 
24 | ```
25 | npm install      # download the project dependencies
26 | npm run lint     # lint code
27 | npm run test     # run tests
28 | ```
29 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "tts-monorepo",
 3 |   "private": true,
 4 |   "workspaces": [
 5 |     "packages/*"
 6 |   ],
 7 |   "description": "Monorepo for text-to-speech tools",
 8 |   "repository": {
 9 |     "type": "git",
10 |     "url": "git+https://github.com/eheikes/tts.git"
11 |   },
12 |   "keywords": [
13 |     "aws",
14 |     "amazon",
15 |     "polly",
16 |     "text",
17 |     "speech",
18 |     "tts"
19 |   ],
20 |   "author": "Eric Heikes <eheikes@gmail.com>",
21 |   "license": "Apache-2.0",
22 |   "bugs": {
23 |     "url": "https://github.com/eheikes/tts/issues"
24 |   },
25 |   "homepage": "https://github.com/eheikes/tts#readme",
26 |   "scripts": {
27 |     "lint": "npm run lint --workspaces --if-present",
28 |     "report-coverage": "npm run report-coverage --workspaces --if-present",
29 |     "test": "npm run test --workspaces --if-present"
30 |   },
31 |   "devDependencies": {
32 |     "cross-env": "^5.2.0",
33 |     "jasmine": "^3.5.0",
34 |     "nyc": "^15.0.1",
35 |     "proxyquire": "^1.8.0",
36 |     "standard": "^17.1.0"
37 |   },
38 |   "dependencies": {
39 |     "tts-cli": "file:packages/tts-cli"
40 |   },
41 |   "resolutions": {
42 |     "**/@grpc/grpc-js": ">=1.1.8",
43 |     "**/@npmcli/git": ">=2.0.8",
44 |     "**/dot-prop": ">=5.1.1 <7",
45 |     "**/json-schema": ">=0.4.0",
46 |     "**/kind-of": ">=6.0.3",
47 |     "**/node-forge": ">=0.10.0",
48 |     "**/sanitize-html": ">=2.3.2",
49 |     "**/tar": ">=4.4.19",
50 |     "**/trim-newlines": ">=3.0.1"
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/check-usage.spec.js:
--------------------------------------------------------------------------------
 1 | const { checkUsage } = require('../lib/check-usage')
 2 | 
 3 | describe('checkUsage()', () => {
 4 |   let proc, exit, write
 5 | 
 6 |   beforeEach(() => {
 7 |     exit = jasmine.createSpy('process.exit')
 8 |     write = jasmine.createSpy('process.stderr.write')
 9 |     proc = {
10 |       argv: ['node', 'tts.js'],
11 |       exit,
12 |       stderr: {
13 |         write
14 |       }
15 |     }
16 |   })
17 | 
18 |   describe('when --help is specified', () => {
19 |     beforeEach(() => {
20 |       checkUsage({ _: [], help: true }, proc)
21 |     })
22 | 
23 |     it('should output the usage statement', () => {
24 |       expect(write).toHaveBeenCalled()
25 |     })
26 | 
27 |     it('should exit without an error', () => {
28 |       expect(exit).toHaveBeenCalledWith(0)
29 |     })
30 |   })
31 | 
32 |   describe('when 1 argument is passed', () => {
33 |     beforeEach(() => {
34 |       checkUsage({ _: ['foo'] }, proc)
35 |     })
36 | 
37 |     it('should NOT output the usage statement', () => {
38 |       expect(write).not.toHaveBeenCalled()
39 |     })
40 | 
41 |     it('should NOT exit', () => {
42 |       expect(exit).not.toHaveBeenCalled()
43 |     })
44 |   })
45 | 
46 |   describe('when no arguments are passed', () => {
47 |     beforeEach(() => {
48 |       checkUsage({ _: [] }, proc)
49 |     })
50 | 
51 |     it('should output the usage statement', () => {
52 |       expect(write).toHaveBeenCalled()
53 |     })
54 | 
55 |     it('should exit with an error', () => {
56 |       expect(exit).toHaveBeenCalledWith(1)
57 |     })
58 |   })
59 | })
60 | 


--------------------------------------------------------------------------------
/packages/web-tts/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "web-tts",
 3 |   "version": "0.2.5",
 4 |   "description": "Convert webpages to audio speech",
 5 |   "main": "dist/index.js",
 6 |   "bin": {
 7 |     "web-tts": "./dist/index.js"
 8 |   },
 9 |   "scripts": {
10 |     "build": "tsc && chmod a+x dist/index.js"
11 |   },
12 |   "author": "Eric Heikes <eheikes@gmail.com>",
13 |   "license": "Apache-2.0",
14 |   "keywords": [
15 |     "audio",
16 |     "text",
17 |     "speech",
18 |     "tts",
19 |     "scrape",
20 |     "web",
21 |     "webpage",
22 |     "website"
23 |   ],
24 |   "repository": {
25 |     "type": "git",
26 |     "url": "git+https://github.com/eheikes/tts.git"
27 |   },
28 |   "homepage": "https://github.com/eheikes/tts/tree/master/packages/web-tts#readme",
29 |   "bugs": {
30 |     "url": "https://github.com/eheikes/tts/issues"
31 |   },
32 |   "files": [
33 |     "dist/index.js"
34 |   ],
35 |   "dependencies": {
36 |     "execa": "^4.0.3",
37 |     "js-yaml": "^3.14.0",
38 |     "minimist": "^1.2.5",
39 |     "puppeteer": "^24.22.3",
40 |     "tempy": "^0.6.0",
41 |     "tts-cli": "^5.0.0"
42 |   },
43 |   "devDependencies": {
44 |     "@types/js-yaml": "^3.12.5",
45 |     "@types/minimist": "^1.2.0",
46 |     "npm-run-all": "^4.1.5",
47 |     "nyc": "^15.1.0",
48 |     "standard": "^17.1.0",
49 |     "ts-node": "^9.0.0",
50 |     "typescript": "^4.0.2"
51 |   },
52 |   "config": {
53 |     "commitizen": {
54 |       "path": "cz-conventional-changelog"
55 |     }
56 |   },
57 |   "standard": {
58 |     "env": {
59 |       "node": true
60 |     }
61 |   },
62 |   "nyc": {
63 |     "check-coverage": true,
64 |     "per-file": false,
65 |     "lines": 95,
66 |     "statements": 95,
67 |     "functions": 95,
68 |     "branches": 95,
69 |     "include": [
70 |       "*.js"
71 |     ],
72 |     "reporter": [
73 |       "lcov",
74 |       "text-summary"
75 |     ],
76 |     "cache": false,
77 |     "all": false,
78 |     "report-dir": "./coverage"
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/packages/tts-cli/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "tts-cli",
 3 |   "version": "5.4.1",
 4 |   "description": "Command-line tool to convert text to speech",
 5 |   "bin": {
 6 |     "tts": "tts.js"
 7 |   },
 8 |   "files": [
 9 |     "lib/*",
10 |     "LICENSE.txt",
11 |     "tts.js",
12 |     "yarn.lock"
13 |   ],
14 |   "scripts": {
15 |     "lint": "standard --env jasmine --fix --verbose",
16 |     "report-coverage": "codecov",
17 |     "test": "cross-env JASMINE_CONFIG_PATH=test/jasmine.json nyc jasmine"
18 |   },
19 |   "repository": {
20 |     "type": "git",
21 |     "url": "git+https://github.com/eheikes/tts.git"
22 |   },
23 |   "keywords": [
24 |     "aws",
25 |     "amazon",
26 |     "polly",
27 |     "text",
28 |     "speech",
29 |     "tts"
30 |   ],
31 |   "author": "Eric Heikes <eheikes@gmail.com>",
32 |   "license": "Apache-2.0",
33 |   "bugs": {
34 |     "url": "https://github.com/eheikes/tts/issues"
35 |   },
36 |   "homepage": "https://github.com/eheikes/tts/tree/master/packages/tts-cli#readme",
37 |   "dependencies": {
38 |     "@aws-sdk/client-polly": "^3.651.1",
39 |     "@google-cloud/text-to-speech": "^5.0.1",
40 |     "async": "^3.0.0",
41 |     "debug": "^3.1.0",
42 |     "fs-extra": "^2.0.0",
43 |     "listr2": "^8.0.2",
44 |     "minimist": "^1.2.0",
45 |     "sax": "^1.2.4",
46 |     "sentence-splitter": "^5.0.0",
47 |     "tempfile": "^1.1.1",
48 |     "zen-observable": "^0.10.0"
49 |   },
50 |   "devDependencies": {
51 |     "codecov": "^3.6.5"
52 |   },
53 |   "standard": {
54 |     "env": {
55 |       "node": true
56 |     }
57 |   },
58 |   "nyc": {
59 |     "check-coverage": true,
60 |     "per-file": false,
61 |     "lines": 95,
62 |     "statements": 95,
63 |     "functions": 95,
64 |     "branches": 95,
65 |     "include": [
66 |       "lib/**/*.js",
67 |       "*.js"
68 |     ],
69 |     "reporter": [
70 |       "lcov",
71 |       "text-summary"
72 |     ],
73 |     "cache": false,
74 |     "all": false,
75 |     "report-dir": "./coverage"
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/helpers.js:
--------------------------------------------------------------------------------
 1 | const async = require('async')
 2 | const originalFs = require('fs')
 3 | const proxyquire = require('proxyquire')
 4 | 
 5 | exports.loadLib = (file) => {
 6 |   // Spy on the async module.
 7 |   spyOn(async, 'eachOfLimit').and.callThrough()
 8 | 
 9 |   // Stub out the fs(-extra) module with spies.
10 |   const fs = jasmine.createSpyObj('fs', [
11 |     'appendFileSync',
12 |     'createFileSync',
13 |     'createWriteStream',
14 |     'move',
15 |     'readFile',
16 |     'readFileSync',
17 |     'removeSync',
18 |     'truncateSync',
19 |     'writeFile',
20 |     'writeFileSync'
21 |   ])
22 |   fs.createWriteStream.and.callFake(filename => {
23 |     const stream = originalFs.createWriteStream(filename)
24 |     return stream
25 |   })
26 |   fs.move.and.callFake((src, dest, opts, callback) => { callback() })
27 |   fs.writeFile.and.callFake((dest, data, opts, callback) => { callback() })
28 | 
29 |   // Stub out a provider.
30 |   const providerStub = {
31 |     create: () => ({
32 |       buildPart: () => ({}),
33 |       generate: (item, key, callback) => callback(null, null)
34 |     })
35 |   }
36 | 
37 |   const spawnOnSpy = jasmine.createSpy('spawn.on').and.callFake((type, callback) => {
38 |     if (type === 'close') { callback() }
39 |   })
40 |   const spawnStderrOn = jasmine.createSpy('spawn.stderr.on')
41 |   const spawn = jasmine.createSpy('spawn').and.callFake(() => {
42 |     return {
43 |       on: spawnOnSpy,
44 |       stderr: {
45 |         on: spawnStderrOn
46 |       }
47 |     }
48 |   })
49 | 
50 |   // Load the library module.
51 |   const lib = proxyquire(`../lib/${file}`, {
52 |     './providers/aws': providerStub,
53 |     './providers/gcp': providerStub,
54 |     async,
55 |     child_process: { spawn }, // eslint-disable-line camelcase
56 |     'fs-extra': fs
57 |   })
58 | 
59 |   // Add the spies for inspection.
60 |   lib.async = async
61 |   lib.fs = fs
62 |   lib.provider = providerStub
63 |   lib.spawn = spawn
64 |   lib.spawn.on = spawnOnSpy
65 |   lib.spawn.stderr = { on: spawnStderrOn }
66 | 
67 |   return lib
68 | }
69 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/providers/aws.js:
--------------------------------------------------------------------------------
 1 | const { Polly, SynthesizeSpeechCommand } = require('@aws-sdk/client-polly')
 2 | const debug = require('debug')
 3 | const fs = require('fs-extra')
 4 | 
 5 | const PollyProvider = function (opts) {
 6 |   this.instance = new Polly({
 7 |     credentials: {
 8 |       accessKeyId: opts.accessKey,
 9 |       secretAccessKey: opts.secretKey
10 |     },
11 |     region: opts.region
12 |   })
13 | }
14 | 
15 | exports.PollyProvider = PollyProvider
16 | 
17 | PollyProvider.prototype.buildPart = function () {
18 |   return {
19 |     send: this.instance.send.bind(this.instance)
20 |   }
21 | }
22 | 
23 | /**
24 |  * Calls the Polly API with the given info.
25 |  */
26 | PollyProvider.prototype.generate = (info, i, callback) => {
27 |   info.task.title = info.task.title.replace(/\d+\//, `${i}/`)
28 | 
29 |   const command = new SynthesizeSpeechCommand({
30 |     Engine: info.opts.engine,
31 |     LanguageCode: info.opts.language,
32 |     LexiconNames: info.opts.lexicon,
33 |     OutputFormat: info.opts.format === 'ogg' ? 'ogg_vorbis' : info.opts.format,
34 |     SampleRate: info.opts.sampleRate ? String(info.opts.sampleRate) : undefined,
35 |     Text: info.text,
36 |     TextType: info.opts.type,
37 |     VoiceId: info.opts.voice
38 |   })
39 | 
40 |   debug('generate')('Making request to Amazon Web Services')
41 |   info.send(command).then(response => {
42 |     debug('generate')(`Writing audio content to ${info.tempfile}`)
43 |     const fileStream = fs.createWriteStream(info.tempfile)
44 |     response.AudioStream.pipe(fileStream)
45 |     fileStream.on('finish', () => {
46 |       fileStream.close()
47 |       callback()
48 |     })
49 |     fileStream.on('error', err => {
50 |       debug('generate')(`Error writing: ${err.message}`)
51 |       return callback(err)
52 |     })
53 |   }, err => {
54 |     debug('generate')(`Error during request: ${err.message}`)
55 |     return callback(err)
56 |   })
57 | }
58 | 
59 | /**
60 |  * Create an AWS Polly instance.
61 |  */
62 | exports.create = opts => {
63 |   debug('create')(`Creating AWS Polly instance in ${opts.region}`)
64 |   return new PollyProvider(opts)
65 | }
66 | 


--------------------------------------------------------------------------------
/packages/tts-cli/tts.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | /**
 3 |  * Takes a text file and calls the AWS Polly API
 4 |  *   to convert it to an audio file.
 5 |  */
 6 | const debug = require('debug')('tts-cli')
 7 | const { checkUsage } = require('./lib/check-usage')
 8 | const { cleanup } = require('./lib/cleanup')
 9 | const { combine } = require('./lib/combine-parts')
10 | const { generateSpeech } = require('./lib/generate-speech')
11 | const { moveTempFile } = require('./lib/move-temp-file')
12 | const { readText } = require('./lib/read-text')
13 | const { sanitizeOpts } = require('./lib/sanitize-opts')
14 | const { splitText } = require('./lib/split-text')
15 | 
16 | const args = require('minimist')(process.argv.slice(2))
17 | debug('called with arguments', JSON.stringify(sanitizeOpts(args)))
18 | 
19 | let [input, outputFilename] = args._
20 | 
21 | // If only 1 argument was given, use that for the output filename.
22 | if (!outputFilename) {
23 |   outputFilename = input
24 |   input = null
25 | }
26 | debug('input:', input)
27 | debug('output:', outputFilename)
28 | 
29 | // Check the usage.
30 | checkUsage(args, process)
31 | 
32 | // Define the tasks and options.
33 | const tasks = [{
34 |   title: 'Reading text',
35 |   task: readText
36 | }, {
37 |   title: 'Splitting text',
38 |   task: splitText
39 | }, {
40 |   title: 'Convert to audio',
41 |   task: generateSpeech
42 | }, {
43 |   title: 'Combine audio',
44 |   task: combine
45 | }, {
46 |   title: 'Clean up',
47 |   task: cleanup
48 | }, {
49 |   title: 'Saving file',
50 |   task: moveTempFile
51 | }]
52 | const service = args.service || 'aws'
53 | const context = {
54 |   args,
55 |   input,
56 |   maxCharacterCount: service === 'gcp' ? 5000 : 1500,
57 |   outputFilename,
58 |   process,
59 |   service
60 | }
61 | 
62 | // Run the tasks.
63 | if (require.main === module) /* istanbul ignore next */{
64 |   const { Listr } = require('listr2')
65 |   const list = new Listr(tasks, {
66 |     renderer: debug.enabled ? 'silent' : 'default'
67 |   })
68 |   list.run(context).catch(err => {
69 |     if (debug.enabled) {
70 |       console.error(err.stack)
71 |     }
72 |   })
73 | }
74 | 
75 | module.exports = { // for testing
76 |   context,
77 |   tasks
78 | }
79 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/combine-encoded-audio.spec.js:
--------------------------------------------------------------------------------
 1 | describe('combineEncodedAudio()', () => {
 2 |   const binary = 'ffmpeg'
 3 |   const manifestFilename = 'manifest.txt'
 4 |   const tempFilename = 'foobar.mp3'
 5 | 
 6 |   let combineEncodedAudio, spawn
 7 | 
 8 |   beforeEach(() => {
 9 |     ({ combineEncodedAudio, spawn } = require('./helpers').loadLib('combine-parts'))
10 |   })
11 | 
12 |   describe('process', () => {
13 |     let cmd, args
14 | 
15 |     beforeEach(() => {
16 |       combineEncodedAudio(binary, manifestFilename, tempFilename);
17 |       [cmd, args] = spawn.calls.mostRecent().args
18 |       args = args.join(' ')
19 |     })
20 | 
21 |     it('should spawn a ffmpeg process', () => {
22 |       expect(cmd).toBe(binary)
23 |     })
24 | 
25 |     it('should use the "concat" demuxer', () => {
26 |       expect(args).toMatch(/-f concat/)
27 |     })
28 | 
29 |     it('should allow any filenames', () => {
30 |       expect(args).toMatch(/-safe 0/)
31 |     })
32 | 
33 |     it('should use the manifest file', () => {
34 |       expect(args).toMatch(`-i ${manifestFilename}`)
35 |     })
36 | 
37 |     it('should make a stream copy', () => {
38 |       expect(args).toMatch(/-c copy/)
39 |     })
40 | 
41 |     it('should copy the result to the temp file', () => {
42 |       expect(args).toMatch(`${tempFilename}$`)
43 |     })
44 |   })
45 | 
46 |   describe('when the process cannot be spawned', () => {
47 |     beforeEach(() => {
48 |       spawn.on.and.callFake((type, callback) => {
49 |         if (type === 'error') { callback() }
50 |       })
51 |     })
52 | 
53 |     it('should return a rejected promise with an error', done => {
54 |       combineEncodedAudio(binary, manifestFilename, tempFilename).catch(err => {
55 |         expect(err.message).toMatch('Could not start ffmpeg process')
56 |       }).then(done)
57 |     })
58 |   })
59 | 
60 |   describe('when the ffmpeg process fails', () => {
61 |     const errorCode = 42
62 |     const errorOutput = 'foobar'
63 | 
64 |     beforeEach(() => {
65 |       spawn.on.and.callFake((type, callback) => {
66 |         if (type === 'close') { callback(errorCode) }
67 |       })
68 |       spawn.stderr.on.and.callFake((type, callback) => {
69 |         if (type === 'data') { callback(errorOutput) }
70 |       })
71 |     })
72 | 
73 |     it('should return a rejected promise with the stderr output', done => {
74 |       combineEncodedAudio(binary, manifestFilename, tempFilename).catch(err => {
75 |         expect(err.message).toMatch(`(${errorCode})`)
76 |         expect(err.message).toMatch(`(${errorOutput})`)
77 |       }).then(done)
78 |     })
79 |   })
80 | })
81 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/combine.spec.js:
--------------------------------------------------------------------------------
 1 | describe('combine()', () => {
 2 |   const testManifest = 'manifest.txt'
 3 | 
 4 |   let combine, fs, spawn
 5 |   let ctx
 6 | 
 7 |   beforeEach(() => {
 8 |     ctx = {
 9 |       manifestFile: testManifest,
10 |       opts: {
11 |         ffmpeg: 'ffmpeg',
12 |         format: 'mp3'
13 |       }
14 |     }
15 |     const lib = require('./helpers').loadLib('combine-parts')
16 |     combine = lib.combine
17 |     fs = lib.fs
18 |     spawn = lib.spawn
19 |   })
20 | 
21 |   describe('when the format is MP3', () => {
22 |     it('should call combineEncodedAudio()', () => {
23 |       return combine(ctx).then(() => {
24 |         // We can't spy on combineEncodedAudio() directly, so look at its internals.
25 |         expect(spawn).toHaveBeenCalled()
26 |         expect(spawn.calls.mostRecent().args[0]).toBe(ctx.opts.ffmpeg)
27 |       })
28 |     })
29 |   })
30 | 
31 |   describe('when the format is PCM', () => {
32 |     describe('and the service is AWS', () => {
33 |       it('should call combineRawAudio()', () => {
34 |         ctx.service = 'aws'
35 |         ctx.opts.format = 'pcm'
36 |         return combine(ctx).then(() => {
37 |           // We can't spy on combineRawAudio() directly, so look at its internals.
38 |           expect(fs.createFileSync).toHaveBeenCalled()
39 |         })
40 |       })
41 |     })
42 | 
43 |     describe('and the service is GCP', () => {
44 |       it('should call combineRawAudio()', () => {
45 |         ctx.service = 'gcm'
46 |         ctx.opts.format = 'pcm'
47 |         return combine(ctx).then(() => {
48 |           // We can't spy on combineEncodedAudio() directly, so look at its internals.
49 |           expect(spawn).toHaveBeenCalled()
50 |           expect(spawn.calls.mostRecent().args[0]).toBe(ctx.opts.ffmpeg)
51 |         })
52 |       })
53 |     })
54 |   })
55 | 
56 |   describe('when it succeeds', () => {
57 |     beforeEach(() => {
58 |       ctx.opts.format = 'pcm'
59 |       return combine(ctx)
60 |     })
61 | 
62 |     it('should return the new filename', () => {
63 |       expect(ctx.tempFile).toMatch(/\.pcm$/)
64 |     })
65 |   })
66 | 
67 |   describe('when it fails', () => {
68 |     let result
69 | 
70 |     beforeEach(() => {
71 |       spawn.on.and.callFake((type, callback) => {
72 |         if (type === 'error') { callback() }
73 |       })
74 |       return combine(ctx).catch(response => {
75 |         result = response
76 |       })
77 |     })
78 | 
79 |     it('should return a rejected promise with the error', () => {
80 |       expect(result.message).toMatch('Could not start ffmpeg process')
81 |     })
82 |   })
83 | })
84 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/read-text.spec.js:
--------------------------------------------------------------------------------
 1 | describe('readText()', () => {
 2 |   const testData = 'hello world'
 3 | 
 4 |   let readText, fs
 5 |   let ctx, stdin
 6 | 
 7 |   beforeEach(() => {
 8 |     ({ readText, fs } = require('./helpers').loadLib('read-text'))
 9 |   })
10 | 
11 |   beforeEach(() => {
12 |     let sentData = false
13 |     stdin = jasmine.createSpyObj('stdin', ['on', 'read', 'setEncoding'])
14 |     stdin.on.and.callFake((type, callback) => { callback() })
15 |     stdin.read.and.callFake(() => {
16 |       const response = sentData ? null : testData
17 |       sentData = true
18 |       return response
19 |     })
20 |     ctx = {
21 |       input: null,
22 |       process: {
23 |         stdin
24 |       }
25 |     }
26 |   })
27 | 
28 |   describe('when it succeeds', () => {
29 |     it('should set the read text', done => {
30 |       readText(ctx).then(() => {
31 |         expect(ctx.text).toBe(testData)
32 |       }).then(done)
33 |     })
34 |   })
35 | 
36 |   describe('when no filename is specified', () => {
37 |     it('should read data from stdin', done => {
38 |       readText(ctx).then(() => {
39 |         expect(stdin.on).toHaveBeenCalled()
40 |         expect(stdin.read).toHaveBeenCalled()
41 |       }).then(done)
42 |     })
43 | 
44 |     it('should use UTF-8 encoding', done => {
45 |       readText(ctx).then(() => {
46 |         expect(stdin.setEncoding).toHaveBeenCalledWith('utf8')
47 |       }).then(done)
48 |     })
49 |   })
50 | 
51 |   describe('when a filename is specified', () => {
52 |     const testFilename = 'test.txt'
53 | 
54 |     beforeEach(() => {
55 |       ctx.input = testFilename
56 |       fs.readFile.and.callFake((filename, opts, callback) => {
57 |         callback(null, testData)
58 |       })
59 |     })
60 | 
61 |     it('should read data from the file', done => {
62 |       readText(ctx).then(() => {
63 |         expect(fs.readFile).toHaveBeenCalledWith(
64 |           testFilename,
65 |           'utf8',
66 |           jasmine.any(Function)
67 |         )
68 |       }).then(done)
69 |     })
70 | 
71 |     describe('and can read the file', () => {
72 |       it('should set the file\'s data', done => {
73 |         readText(ctx).then(() => {
74 |           expect(ctx.text).toBe(testData)
75 |         }).then(done)
76 |       })
77 |     })
78 | 
79 |     describe('and cannot read the file', () => {
80 |       it('should reject with the error', done => {
81 |         const testError = 'error object'
82 |         fs.readFile.and.callFake((filename, opts, callback) => {
83 |           callback(testError)
84 |         })
85 |         readText(ctx).catch(err => {
86 |           expect(err).toBe(testError)
87 |         }).then(done)
88 |       })
89 |     })
90 |   })
91 | })
92 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/providers/gcp.js:
--------------------------------------------------------------------------------
 1 | const debug = require('debug')
 2 | const fs = require('fs-extra')
 3 | const path = require('path')
 4 | const GoogleClient = require('@google-cloud/text-to-speech').TextToSpeechClient
 5 | 
 6 | const GoogleProvider = function (opts) {
 7 |   try {
 8 |     this.instance = new GoogleClient({
 9 |       credentials: opts.email || opts.privateKey
10 |         ? {
11 |             client_email: opts.email,
12 |             private_key: opts.privateKey
13 |           }
14 |         : undefined,
15 |       keyFilename: opts.projectFile ? path.resolve(opts.projectFile) : undefined,
16 |       projectId: opts.projectId
17 |     })
18 |   } catch (err) {
19 |     /* istanbul ignore next */
20 |     this.instance = null
21 |   }
22 | }
23 | 
24 | exports.GoogleProvider = GoogleProvider
25 | 
26 | GoogleProvider.prototype.buildPart = function () {
27 |   return {
28 |     synthesizer: this.instance.synthesizeSpeech.bind(this.instance)
29 |   }
30 | }
31 | 
32 | /**
33 |  * Calls the Google Cloud API with the given info.
34 |  */
35 | GoogleProvider.prototype.generate = (info, i, callback) => {
36 |   info.task.title = info.task.title.replace(/\d+\//, `${i}/`)
37 | 
38 |   const request = {
39 |     input: info.opts.type === 'ssml'
40 |       ? { ssml: info.text }
41 |       : { text: info.text },
42 |     voice: {
43 |       ssmlGender: info.opts.gender ? String(info.opts.gender).toUpperCase() : undefined,
44 |       languageCode: info.opts.language,
45 |       name: info.opts.voice
46 |     },
47 |     audioConfig: {
48 |       audioEncoding: info.opts.format === 'pcm'
49 |         ? 'LINEAR16'
50 |         : info.opts.format === 'ogg' ? 'OGG_OPUS' : 'MP3',
51 |       effectsProfileId: info.opts.effect,
52 |       pitch: info.opts.pitch,
53 |       sampleRateHertz: info.opts.sampleRate,
54 |       speakingRate: info.opts.speed,
55 |       volumeGainDb: info.opts.gain
56 |     }
57 |   }
58 |   const opts = {
59 |     retry: null
60 |   }
61 | 
62 |   debug('generate')('Making request to Google Cloud Platform')
63 |   info.synthesizer(request, opts, (err, response) => {
64 |     if (err) {
65 |       debug('generate')(`Error during request: ${err.message}`)
66 |       return callback(err)
67 |     }
68 | 
69 |     debug('generate')(`Writing audio content to ${info.tempfile}`)
70 |     fs.writeFile(info.tempfile, response.audioContent, 'binary', err => {
71 |       if (err) {
72 |         debug('generate')(`Error writing: ${err.message}`)
73 |         return callback(err)
74 |       }
75 |       callback()
76 |     })
77 |   })
78 | }
79 | 
80 | /**
81 |  * Create a Google Cloud TTS instance.
82 |  */
83 | exports.create = opts => {
84 |   debug('create')('Creating Google Cloud TTS instance')
85 |   return new GoogleProvider(opts)
86 | }
87 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/combine-parts.js:
--------------------------------------------------------------------------------
 1 | const spawn = require('child_process').spawn
 2 | const debug = require('debug')
 3 | const fs = require('fs-extra')
 4 | const tempfile = require('tempfile')
 5 | const { extensionFor } = require('./file-extensions')
 6 | 
 7 | /**
 8 |  * Combines MP3 or OGG files into one file.
 9 |  */
10 | exports.combineEncodedAudio = (binary, manifestFile, outputFile) => {
11 |   const args = [
12 |     '-f', 'concat',
13 |     '-safe', '0',
14 |     '-i', manifestFile,
15 |     '-c', 'copy',
16 |     outputFile
17 |   ]
18 |   return new Promise((resolve, reject) => {
19 |     debug('combineEncodedAudio')(`Running ${binary} ${args.join(' ')}`)
20 |     const ffmpeg = spawn(binary, args)
21 |     let stderr = ''
22 |     ffmpeg.stderr.on('data', (data) => {
23 |       stderr += `\n${data}`
24 |     })
25 |     ffmpeg.on('error', () => {
26 |       reject(new Error('Could not start ffmpeg process'))
27 |     })
28 |     ffmpeg.on('close', code => {
29 |       debug('combineEncodedAudio')(stderr)
30 |       debug('combineEncodedAudio')(`ffmpeg process completed with code ${code}`)
31 |       if (code > 0) {
32 |         return reject(new Error(`ffmpeg returned an error (${code}): ${stderr}`))
33 |       }
34 |       resolve()
35 |     })
36 |   })
37 | }
38 | 
39 | /**
40 |  * Concatenates raw PCM audio into one file.
41 |  */
42 | exports.combineRawAudio = (manifestFile, outputFile) => {
43 |   const manifest = fs.readFileSync(manifestFile, 'utf8')
44 |   debug('combineRawAudio')(`Manifest contains: ${manifest}`)
45 |   const regexpState = /^file\s+'(.*)'$/gm
46 |   debug('combineRawAudio')(`Creating file ${outputFile}`)
47 |   fs.createFileSync(outputFile)
48 |   debug('combineRawAudio')(`Truncating file ${outputFile}`)
49 |   fs.truncateSync(outputFile)
50 |   let match
51 |   while ((match = regexpState.exec(manifest)) !== null) {
52 |     debug('combineRawAudio')(`Reading data from ${match[1]}`)
53 |     const dataBuffer = fs.readFileSync(match[1])
54 |     debug('combineRawAudio')(`Appending data to ${outputFile}`)
55 |     fs.appendFileSync(outputFile, dataBuffer)
56 |   }
57 |   return Promise.resolve()
58 | }
59 | 
60 | /**
61 |  * Combines all the parts into one file.
62 |  * Resolves with the new filename.
63 |  */
64 | exports.combine = (ctx) => {
65 |   const manifestFile = ctx.manifestFile
66 |   const opts = ctx.opts
67 |   const newFile = tempfile(`.${extensionFor(opts.format, ctx.service)}`)
68 |   debug('combine')(`Combining files into ${newFile}`)
69 |   const combiner = opts.format === 'pcm' && ctx.service === 'aws'
70 |     ? exports.combineRawAudio(manifestFile, newFile)
71 |     : exports.combineEncodedAudio(opts.ffmpeg, manifestFile, newFile)
72 |   return combiner.then(() => {
73 |     ctx.tempFile = newFile
74 |   })
75 | }
76 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/check-usage.js:
--------------------------------------------------------------------------------
 1 | const path = require('path')
 2 | 
 3 | /**
 4 |  * Checks if the CLI was called with valid arguments.
 5 |  */
 6 | exports.checkUsage = (args, proc) => {
 7 |   const minNumArgs = 1
 8 |   const script = path.basename(proc.argv[1])
 9 |   const usageStatement = `Converts a text file to speech using AWS Polly or Google Cloud Text-to-Speech.
10 | Usage:
11 |   ${script} [INPUTFILE] OUTPUTFILE [OPTIONS]
12 | Standard:
13 |   INPUTFILE                The text file to convert (reads from stdin if not specified)
14 |   OUTPUTFILE               The filename to save the audio to
15 | Options:
16 |   --help                   Displays this info and exits
17 |   --access-key KEY         AWS access key ID
18 |   --effect ID              Apply an audio effect profile. Can be specified multiple times.
19 |   --email EMAIL            GCP client email address (required if "private-key" or
20 |                              "private-key-file" is used)
21 |   --engine ENGINE          AWS voice engine
22 |   --ffmpeg BINARY          Path to the ffmpeg binary (defaults to the one in PATH)
23 |   --format FORMAT          Target audio format ("mp3", "ogg", or "pcm") (default "mp3")
24 |   --gain GAIN              Volume gain, where 0.0 is normal gain
25 |   --gender GENDER          Gender of the voice ("male", "female", or "neutral")
26 |   --language LANG          Code for the desired language (default "en-US" for GCP,
27 |                              no default for AWS)
28 |   --lexicon NAME           Apply a stored pronunciation lexicon. Can be specified
29 |                              multiple times.
30 |   --pitch PITCH            Change in speaking pich, in semitones
31 |   --private-key KEY        GCP private key
32 |   --private-key-file FILE  GCP private key file (".pem" or ".p12" file)
33 |   --project-file FILE      GCP ".json" file with project info
34 |   --project-id ID          GCP project ID
35 |   --region REGION          AWS region to send requests to (default "us-east-1")
36 |   --sample-rate RATE       Audio frequency, in hertz.
37 |   --secret-key KEY         AWS secret access key
38 |   --service TYPE           Cloud service to use ("aws" or "gcp") (default "aws")
39 |   --speed RATE             Speaking rate, where 1.0 is normal speed
40 |   --throttle SIZE          Number of simultaneous requests allowed against the API
41 |                              (default 5)
42 |   --type TYPE              Type of input text ("text" or "ssml") (default "text")
43 |   --voice VOICE            Voice to use for the speech (default "Joanna" for AWS)
44 | `
45 |   if (args.help) {
46 |     proc.stderr.write(usageStatement)
47 |     proc.exit(0)
48 |   }
49 |   if (args._.length < minNumArgs) {
50 |     proc.stderr.write(usageStatement)
51 |     proc.exit(1)
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/text-chunk.js:
--------------------------------------------------------------------------------
 1 | const { split } = require('sentence-splitter')
 2 | 
 3 | const splitIntoSentences = (text) => {
 4 |   if (typeof text !== 'string') { return [] }
 5 |   return split(text)
 6 |     .filter((node) => node.type === 'Sentence')
 7 |     .map((node) => node.raw)
 8 | }
 9 | 
10 | /**
11 |  * Splits a block of text into groups no longer than `maxLength` characters,
12 |  * using sentence boundaries.
13 |  *
14 |  * Sticks as many sentences into a single group of chars as possible,
15 |  * without going over your specified limit.
16 |  */
17 | const chunkText = (text, maxLength) => {
18 |   const sentences = splitIntoSentences(text)
19 |   const epsilon = 100
20 | 
21 |   // Loop through the sentences, putting them into chunks.
22 |   const chunks = []
23 |   let chunk = ''
24 |   let safety = 0
25 |   for (let i = 0; i < sentences.length;) {
26 |     if (sentences[i].length > maxLength) {
27 |       // The sentence is too long -- break it up.
28 |       chunk = ''
29 |       const words = sentences[i].split(/ +/g)
30 |       let safety3 = 0
31 |       for (let j = 0; j < words.length;) {
32 |         // Add the word to the chunk if it'll fit.
33 |         // Otherwise, add the current chunk to the list and restart the chunk.
34 |         const newWordWithSpace = `${chunk.length > 0 ? ' ' : ''}${words[j]}`
35 |         if (`${chunk}${newWordWithSpace}`.length > maxLength) {
36 |           // If the word is longer than the max length, split it up.
37 |           if (words[j].length > maxLength) {
38 |             const numChars = maxLength - chunk.length
39 |             chunk += newWordWithSpace.slice(0, numChars)
40 |             words[j] = newWordWithSpace.slice(numChars).trim()
41 |           }
42 |           chunks.push(chunk)
43 |           chunk = ''
44 |         } else {
45 |           chunk += newWordWithSpace
46 |           j++
47 |         }
48 |         safety3++
49 |         if (safety3 > words.length + epsilon) { throw new Error('Infinite loop') }
50 |       }
51 | 
52 |       // If there is an unfilled chunk remaining, add it to the list.
53 |       if (chunk !== '') {
54 |         chunks.push(chunk)
55 |       }
56 | 
57 |       i++
58 |     } else {
59 |       // Add as many sentences that will fit in a chunk.
60 |       chunk = ''
61 |       let safety2 = 0
62 |       while (i < sentences.length) {
63 |         const newChunk = `${chunk.length > 0 ? ' ' : ''}${sentences[i]}`
64 |         if (`${chunk}${newChunk}`.length > maxLength) { break }
65 |         chunk += newChunk
66 |         i++
67 |         safety2++
68 |         if (safety2 > sentences.length + epsilon) { throw new Error('Infinite loop') }
69 |       }
70 |       chunks.push(chunk)
71 |     }
72 |     safety++
73 |     if (safety > sentences.length + epsilon) { throw new Error('Infinite loop') }
74 |   }
75 | 
76 |   return chunks
77 | }
78 | 
79 | module.exports = {
80 |   chunkText,
81 |   splitIntoSentences
82 | }
83 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/split-text.spec.js:
--------------------------------------------------------------------------------
 1 | describe('splitText()', () => {
 2 |   const maxChars = 1000
 3 |   const testData = 'hello world'
 4 | 
 5 |   let splitText
 6 |   let ctx
 7 | 
 8 |   beforeEach(() => {
 9 |     ({ splitText } = require('./helpers').loadLib('split-text'))
10 |     ctx = {
11 |       args: {},
12 |       text: testData,
13 |       maxCharacterCount: maxChars
14 |     }
15 |   })
16 | 
17 |   it('should split the text into an array of parts', done => {
18 |     splitText(ctx).then(() => {
19 |       expect(ctx.parts).toEqual([testData])
20 |     }).then(done)
21 |   })
22 | 
23 |   it('should split the text by the given number of characters', done => {
24 |     ctx.maxCharacterCount = 2
25 |     splitText(ctx).then(() => {
26 |       expect(ctx.parts).toEqual(['he', 'll', 'o', 'wo', 'rl', 'd'])
27 |     }).then(done)
28 |   })
29 | 
30 |   it('should propagate SSML tags through the chunks', done => {
31 |     ctx.text = '<speak><prosody volume="loud">Hello there<break/> world<break/></prosody></speak>'
32 |     ctx.maxCharacterCount = 6
33 |     ctx.args = { type: 'ssml' }
34 |     splitText(ctx).then(() => {
35 |       expect(ctx.parts).toEqual([
36 |         '<speak><prosody volume="loud">Hello</prosody></speak>',
37 |         '<speak><prosody volume="loud">there</prosody></speak>',
38 |         '<speak><prosody volume="loud"><break/>world</prosody></speak>'
39 |       ])
40 |     }).then(done)
41 |   })
42 | 
43 |   it('should work when SSML tags are duplicated in sequence', done => {
44 |     ctx.text = '<speak><p>Section 1</p><p>Introduction</p></speak>'
45 |     ctx.maxCharacterCount = 1500
46 |     ctx.args = { type: 'ssml' }
47 |     splitText(ctx).then(() => {
48 |       expect(ctx.parts).toEqual([
49 |         '<speak><p>Section 1</p></speak>',
50 |         '<speak><p>Introduction</p></speak>'
51 |       ])
52 |     }).then(done)
53 |   })
54 | 
55 |   it('should NOT propagate SSML tags for non-SSML text', done => {
56 |     ctx.text = '<speak>Hello there world</speak>'
57 |     ctx.maxCharacterCount = 6
58 |     splitText(ctx).then(() => {
59 |       expect(ctx.parts).toEqual([
60 |         '<speak',
61 |         '>Hello',
62 |         'there',
63 |         'world<',
64 |         '/speak',
65 |         '>'
66 |       ])
67 |     }).then(done)
68 |   })
69 | 
70 |   it('should condense whitespace', done => {
71 |     ctx.text = 'hello   world'
72 |     splitText(ctx).then(() => {
73 |       expect(ctx.parts).toEqual(['hello world'])
74 |     }).then(done)
75 |   })
76 | 
77 |   it('should trim whitespace from the ends', done => {
78 |     ctx.text = ' hello world '
79 |     splitText(ctx).then(() => {
80 |       expect(ctx.parts).toEqual(['hello world'])
81 |     }).then(done)
82 |   })
83 | 
84 |   describe('when no args are specified', () => {
85 |     it('should still work', () => {
86 |       ctx.args = undefined
87 |       return splitText(ctx).then(() => {
88 |         expect(ctx.parts).toEqual([testData])
89 |       })
90 |     })
91 |   })
92 | })
93 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/generate-all.spec.js:
--------------------------------------------------------------------------------
 1 | describe('generateAll()', () => {
 2 |   let async, generateAll, task
 3 |   let iteratorFunction
 4 | 
 5 |   const testLimit = 2
 6 |   const textParts = [
 7 |     'hello', 'world', 'how are you?'
 8 |   ]
 9 | 
10 |   beforeEach(() => {
11 |     iteratorFunction = jasmine.createSpy('async iterator')
12 |     iteratorFunction.and.callFake((data, i, callback) => { callback() })
13 |     task = { title: '' }
14 |     const helpers = require('./helpers').loadLib('generate-speech')
15 |     async = helpers.async
16 |     generateAll = helpers.generateAll
17 |   })
18 | 
19 |   it('should asynchronously call the function for each of the parts', done => {
20 |     generateAll(textParts, { limit: testLimit }, iteratorFunction, task).then(() => {
21 |       const [parts] = async.eachOfLimit.calls.mostRecent().args
22 |       expect(parts).toEqual(textParts)
23 |       expect(parts.length).toBe(textParts.length)
24 |       expect(iteratorFunction.calls.count()).toBe(textParts.length)
25 |     }).then(done)
26 |   })
27 | 
28 |   it('should limit the async calls according to the option', done => {
29 |     generateAll(textParts, { limit: testLimit }, iteratorFunction, task).then(() => {
30 |       const [, limit] = async.eachOfLimit.calls.mostRecent().args
31 |       expect(limit).toBe(testLimit)
32 |     }).then(done)
33 |   })
34 | 
35 |   describe('initial spinner', () => {
36 |     beforeEach(done => {
37 |       async.eachOfLimit.and.callFake((parts, opts, func, callback) => {
38 |         callback(new Error('reject async'))
39 |       })
40 |       generateAll(textParts, {}, iteratorFunction, task).catch(() => {
41 |         done()
42 |       })
43 |     })
44 | 
45 |     it('should be updated', () => {
46 |       expect(task.title).toMatch('Convert to audio')
47 |     })
48 | 
49 |     it('should show the part count', () => {
50 |       expect(task.title).toMatch(`/${textParts.length}\\)$`)
51 |     })
52 | 
53 |     it('should start at 0', () => {
54 |       expect(task.title).toMatch('\\(0/')
55 |     })
56 |   })
57 | 
58 |   describe('when all requests succeed', () => {
59 |     it('should respond with the original parts', done => {
60 |       generateAll(textParts, { limit: testLimit }, iteratorFunction, task).then(response => {
61 |         expect(response).toEqual(textParts)
62 |       }).then(done)
63 |     })
64 | 
65 |     it('should show the final count', done => {
66 |       generateAll(textParts, { limit: testLimit }, iteratorFunction, task).then(() => {
67 |         expect(task.title).toMatch(`\\(${textParts.length}/`)
68 |       }).then(done)
69 |     })
70 |   })
71 | 
72 |   describe('when a request fails', () => {
73 |     const testError = 'test error'
74 | 
75 |     beforeEach(() => {
76 |       iteratorFunction.and.callFake((data, i, callback) => {
77 |         callback(new Error(testError))
78 |       })
79 |     })
80 | 
81 |     it('should return a rejected promise with the error', done => {
82 |       generateAll(textParts, { limit: testLimit }, iteratorFunction, task).catch(err => {
83 |         expect(err.message).toBe(testError)
84 |       }).then(done)
85 |     })
86 |   })
87 | })
88 | 


--------------------------------------------------------------------------------
/packages/web-tts/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Web TTS CLI
 3 | 
 4 | Command-line tool to convert webpages of any size to speech. Uses [`tts-cli`](../tts-cli) and an automated Chrome browser behind the scenes.
 5 | 
 6 | ## Requirements / Installation
 7 | 
 8 | * [Node.js/npm](https://nodejs.org) v10+
 9 | * Please see the [`tts-cli` docs](../tts-cli) for installing and configuring the TTS tool.
10 | 
11 | You can then install the package globally:
12 | 
13 | ```
14 | $ npm install web-tts -g
15 | ```
16 | 
17 | ## Usage
18 | 
19 | ```
20 | $ web-tts commandfile outputfile [options]
21 | ```
22 | 
23 | Examples:
24 | 
25 | ```
26 | # Reads commands from commands.txt and saves the speech in test.mp3
27 | $ web-tts commands.txt test.mp3
28 | 
29 | # Sets the browser window to 3000 x 2000 pixels (default is 2000 x 1000)
30 | $ web-tts commands.txt test.mp3 --width 3000 --height 2000
31 | 
32 | # Any additional options are passed to tts-cli as-is
33 | $ web-tts commands.txt test.mp3 --engine neural
34 | ```
35 | 
36 | Standard arguments:
37 | 
38 | * `commandfile` is a file that describes how to crawl and scrape the webpages. See the [command file docs](docs/commandfile.md) for details.
39 | * `outfile` is the filename to save the audio to.
40 | 
41 | Options:
42 | 
43 | * `--debug [true|false]` -- Shows debugging info: the browser console logs are printed, and the devtools are opened when not headless (default `false`)
44 | * `--delay TIME` -- Amount of delay between executing browser commands (in milliseconds) (default `0`)
45 | * `--headless [true|false]` -- Runs the browser hidden in the background (`true`, the default) or opens it up on the desktop (`false`)
46 | * `--height SIZE` -- Height of the browser in pixels (default `2000`)
47 | * `--width SIZE` -- Width of the browser in pixels (default `3000`)
48 | 
49 | See the [tts-cli docs](../tts-cli/docs/options.md) for text-to-speech options.
50 | 
51 | ## Troubleshooting
52 | 
53 | * Run the tool with `--debug` to see the browser logs. (Note that there will likely be a lot of irrelevant logs included.)
54 | * Run the tool with `--headless false` and, if needed, a delay (e.g. `--delay 250`) to get a visual of what's happening in the browser.
55 | 
56 | ## Contributing
57 | 
58 | Although functional, this tool is still unfinished. Pull requests and suggestions are welcome. [Create a new issue](https://github.com/eheikes/tts/issues/new) to report a bug or suggest a new feature.
59 | 
60 | Please add tests and maintain the existing styling when adding and updating the code. Run `npm run lint` to lint the code.
61 | 
62 | ## Small Print
63 | 
64 | Copyright 2020 Eric Heikes.
65 | 
66 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0).
67 | 
68 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
69 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/cli.spec.js:
--------------------------------------------------------------------------------
  1 | const proxyquire = require('proxyquire')
  2 | 
  3 | describe('CLI', () => {
  4 |   const outputFile = 'output-file'
  5 | 
  6 |   let cli
  7 |   let args, minimist
  8 | 
  9 |   beforeEach(() => {
 10 |     args = { _: [outputFile] }
 11 |     minimist = jasmine.createSpy('minimist').and.callFake(() => args)
 12 |     cli = proxyquire('../tts', { minimist })
 13 |   })
 14 | 
 15 |   it('should construct an array of tasks', () => {
 16 |     expect(cli.tasks).toEqual(jasmine.any(Array))
 17 |     expect(cli.tasks.every(task => {
 18 |       return typeof task.title === 'string' && typeof task.task === 'function'
 19 |     })).toBe(true)
 20 |   })
 21 | 
 22 |   it('should pass the CLI arguments to Listr', () => {
 23 |     expect(cli.context.args).toBe(args)
 24 |   })
 25 | 
 26 |   it('should pass the max character count to Listr', () => {
 27 |     expect(cli.context.maxCharacterCount).toEqual(jasmine.any(Number))
 28 |   })
 29 | 
 30 |   it('should pass the process object to Listr', () => {
 31 |     expect(cli.context.process).toEqual(jasmine.any(Object))
 32 |     expect(cli.context.process.argv).toEqual(jasmine.any(Array))
 33 |   })
 34 | 
 35 |   describe('when 2 arguments are given', () => {
 36 |     const inputFile = 'input-file'
 37 | 
 38 |     beforeEach(() => {
 39 |       args = { _: [inputFile, outputFile] }
 40 |       cli = proxyquire('../tts', { minimist })
 41 |     })
 42 | 
 43 |     it('should use the first argument for the input filename', () => {
 44 |       expect(cli.context.input).toBe(inputFile)
 45 |     })
 46 | 
 47 |     it('should use the second argument for the output filename', () => {
 48 |       expect(cli.context.outputFilename).toBe(outputFile)
 49 |     })
 50 |   })
 51 | 
 52 |   describe('when only 1 argument is given', () => {
 53 |     beforeEach(() => {
 54 |       args = { _: [outputFile] }
 55 |       cli = proxyquire('../tts', { minimist })
 56 |     })
 57 | 
 58 |     it('should use null for the input filename', () => {
 59 |       expect(cli.context.input).toBe(null)
 60 |     })
 61 | 
 62 |     it('should use the first argument for the output filename', () => {
 63 |       expect(cli.context.outputFilename).toBe(outputFile)
 64 |     })
 65 |   })
 66 | 
 67 |   describe('when the "aws" service is specified', () => {
 68 |     beforeEach(() => {
 69 |       args = { _: [outputFile], service: 'aws' }
 70 |       cli = proxyquire('../tts', { minimist })
 71 |     })
 72 | 
 73 |     it('should save that as the service', () => {
 74 |       expect(cli.context.service).toBe('aws')
 75 |     })
 76 | 
 77 |     it('should set the appropriate maxCharacterCount', () => {
 78 |       expect(cli.context.maxCharacterCount).toBe(1500)
 79 |     })
 80 |   })
 81 | 
 82 |   describe('when the "gcp" service is specified', () => {
 83 |     beforeEach(() => {
 84 |       args = { _: [outputFile], service: 'gcp' }
 85 |       cli = proxyquire('../tts', { minimist })
 86 |     })
 87 | 
 88 |     it('should save that as the service', () => {
 89 |       expect(cli.context.service).toBe('gcp')
 90 |     })
 91 | 
 92 |     it('should set the appropriate maxCharacterCount', () => {
 93 |       expect(cli.context.maxCharacterCount).toBe(5000)
 94 |     })
 95 |   })
 96 | 
 97 |   describe('when no service is specified', () => {
 98 |     beforeEach(() => {
 99 |       args = { _: [outputFile] }
100 |       cli = proxyquire('../tts', { minimist })
101 |     })
102 | 
103 |     it('should use the default service', () => {
104 |       expect(cli.context.service).toBe('aws')
105 |     })
106 | 
107 |     it('should set the appropriate maxCharacterCount', () => {
108 |       expect(cli.context.maxCharacterCount).toBe(1500)
109 |     })
110 |   })
111 | })
112 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/split-text.js:
--------------------------------------------------------------------------------
  1 | const debug = require('debug')
  2 | const { chunkText: chunk } = require('./text-chunk')
  3 | 
  4 | /**
  5 |  * Chunk text into pieces.
  6 |  */
  7 | const chunkText = (text, maxCharacterCount) => {
  8 |   const parts = chunk(text, maxCharacterCount)
  9 |   debug('chunkText')(`Chunked into ${parts.length} text parts`)
 10 |   return Promise.resolve(parts)
 11 | }
 12 | 
 13 | /**
 14 |  * Parse and chunk XML.
 15 |  */
 16 | const chunkXml = (xml, maxCharacterCount) => {
 17 |   const parser = require('sax').parser(false, {
 18 |     lowercase: true,
 19 |     normalize: true,
 20 |     trim: true
 21 |   })
 22 |   debug('chunkXml')('Started SAX XML parser')
 23 |   const attributeString = attrs => {
 24 |     let str = ''
 25 |     for (const prop in attrs) {
 26 |       /* istanbul ignore else: need to add test for this */
 27 |       if (Object.prototype.hasOwnProperty.call(attrs, prop)) {
 28 |         str += ` ${prop}="${attrs[prop]}"`
 29 |       }
 30 |     }
 31 |     return str
 32 |   }
 33 |   return new Promise((resolve, reject) => {
 34 |     let err = null
 35 |     let extraTags = '' // self-closing tags
 36 |     const tags = [] // stack of open tags
 37 |     const parts = []
 38 |     /* istanbul ignore next */
 39 |     parser.onerror = e => {
 40 |       debug('chunkXml')(`Encountered error: ${e}`)
 41 |       err = e
 42 |     }
 43 |     parser.ontext = text => {
 44 |       debug('chunkXml')(`Found text: ${text.substr(0, 50)}...`) // eslint-disable-line no-magic-numbers
 45 |       const chunks = chunk(text, maxCharacterCount).map((chunk, index) => {
 46 |         if (index === 0) {
 47 |           debug('chunkXml')('Adding unused self-closing tags:', extraTags)
 48 |           chunk = `${extraTags}${chunk}`
 49 |         }
 50 |         for (let i = tags.length - 1; i >= 0; i--) {
 51 |           debug('chunkXml')(`Wrapping chunk in ${tags[i].name} tag`)
 52 |           chunk = `<${tags[i].name}${attributeString(tags[i].attributes)}>${chunk}</${tags[i].name}>`
 53 |         }
 54 |         return chunk
 55 |       })
 56 |       chunks.forEach(chunk => {
 57 |         debug('chunkXml')(`Adding chunk: ${chunk.substr(0, 50)}...`) // eslint-disable-line no-magic-numbers
 58 |       })
 59 |       parts.push(...chunks)
 60 |       extraTags = ''
 61 |     }
 62 |     parser.onopentag = tagData => {
 63 |       debug('chunkXml')(`Found tag: ${JSON.stringify(tagData)}`)
 64 |       if (tagData.isSelfClosing) {
 65 |         const attrs = attributeString(tagData.attributes)
 66 |         debug('chunkXml')(`Adding "${tagData.name}" to self-closing tags`)
 67 |         extraTags += `<${tagData.name}${attrs}/>`
 68 |       } else {
 69 |         debug('chunkXml')(`Adding "${tagData.name}" to the stack`)
 70 |         tags.push(tagData)
 71 |       }
 72 |     }
 73 |     parser.onclosetag = tagName => {
 74 |       debug('chunkXml')(`Found closing tag: "${tagName}"`)
 75 |       /* istanbul ignore else: need to add test for this */
 76 |       if (tags[tags.length - 1].name === tagName) {
 77 |         debug('chunkXml')(`Popping "${tagName}" from the stack`)
 78 |         tags.pop()
 79 |       } else {
 80 |         // TODO should error
 81 |         debug('chunkXml')('Problem: mismatched tags')
 82 |       }
 83 |     }
 84 |     parser.onend = () => {
 85 |       debug('chunkXml')('Reached end of XML')
 86 |       /* istanbul ignore if */
 87 |       if (err) {
 88 |         reject(err)
 89 |       } else {
 90 |         resolve(parts)
 91 |       }
 92 |     }
 93 |     parser.write(xml).close()
 94 |   })
 95 | }
 96 | 
 97 | /**
 98 |  * Splits a string of text into chunks.
 99 |  */
100 | exports.splitText = (ctx) => {
101 |   const text = ctx.text
102 |   const maxCharacterCount = ctx.maxCharacterCount
103 |   const opts = ctx.args || {}
104 |   const chunker = opts.type === 'ssml' ? chunkXml : chunkText
105 |   return chunker(text, maxCharacterCount).then(parts => {
106 |     debug('splitText')('Stripping whitespace')
107 |     return parts.map(str => {
108 |       // Compress whitespace.
109 |       return str.replace(/\s+/g, ' ')
110 |     }).map(str => {
111 |       // Trim whitespace from the ends.
112 |       return str.trim()
113 |     })
114 |   }).then(parts => {
115 |     ctx.parts = parts
116 |   })
117 | }
118 | 


--------------------------------------------------------------------------------
/packages/web-tts/docs/commandfile.md:
--------------------------------------------------------------------------------
  1 | # Command File
  2 | 
  3 | The command file must be formatted as [YAML](https://en.wikipedia.org/wiki/YAML). If you're not familiar with the format, the CloudBees blog has a [nice intro to YAML](https://rollout.io/blog/yaml-tutorial-everything-you-need-get-started/), and [TutorialsPoint.com has tutorials](https://www.tutorialspoint.com/yaml/yaml_basics.htm) too.
  4 | 
  5 | ## Example
  6 | 
  7 | Here is an example of what a command file might look like:
  8 | 
  9 | ```yaml
 10 | # Go to example.com
 11 | - command: go
 12 |   url: https://example.com
 13 | # Find all articles and save their URLs into "articles"
 14 | - command: getAll
 15 |   selector: '#articles a'
 16 |   property: href
 17 |   saveAs: articles
 18 | # Perform actions for each item in "articles"
 19 | - command: each
 20 |   from: articles
 21 |   actions:
 22 |     # Go to that article URL
 23 |     - command: go
 24 |       url: '{{this}}'
 25 |     # Check if the body of the webpage contain the text "Login"
 26 |     - command: if
 27 |       selector: body
 28 |       test: contains
 29 |       value: Login
 30 |       # If it does, perform the following actions
 31 |       actions:
 32 |         # Type an email address into the "username" field
 33 |         - command: input
 34 |           selector: 'input[name="username"]'
 35 |           value: 'me@example.com'
 36 |         # Type a password into the "password" field
 37 |         - command: input
 38 |           selector: 'input[name="password"]'
 39 |           value: 'mypassword'
 40 |         # Click on the "submit" element
 41 |         - command: click
 42 |           selector: 'input[type="submit"]'
 43 |         # Wait for the page to finish loading
 44 |         - command: waitForPage
 45 |     # Scrape the text inside the "article" element for the TTS tool
 46 |     - command: scrape
 47 |       selector: '#article'
 48 | ```
 49 | 
 50 | The YAML should consist of a sequence of commands (denoted by the `-` syntax). Comments begin with a `#` character and are ignored.
 51 | 
 52 | The "selector" syntax used for finding webpage elements is the same as used with CSS selectors and other web programming. See the [MDN docs on CSS selectors](https://developer.mozilla.org/en-US/docs/Web/CSS/CSS_Selectors) for a more info.
 53 | 
 54 | ## Command Reference
 55 | 
 56 | Each command in the sequence must have a `command` field with one of the following values.
 57 | 
 58 | ### `click`
 59 | 
 60 | Required fields: `selector`
 61 | 
 62 | Clicks on the first element matching `selector`.
 63 | 
 64 | Note that if this causes the page to navigate somewhere else, `waitForPage` should be called immediately afterwards.
 65 | 
 66 | ### `each`
 67 | 
 68 | Required fields: `actions`, `from`
 69 | 
 70 | Performs a sequence of commands (`actions`) for every item in the named `from` collection.
 71 | 
 72 | Note: Any of the given `actions` can contain the text `{{this}}`, which will be replaced by the current `from` value.
 73 | 
 74 | ### `getAll`
 75 | 
 76 | Required fields: `property`, `saveAs`, `selector`
 77 | 
 78 | Finds all elements matching `selector`, grabs the value from the `property` of those elements, and saves them in a collection named with `saveAs`.
 79 | 
 80 | ### `getOne`
 81 | 
 82 | Required fields: `property`, `saveAs`, `selector`
 83 | 
 84 | Finds the first element matching `selector`, grabs the value from the `property` of that element, and save it in a variable named with `saveAs`.
 85 | 
 86 | ### `go`
 87 | 
 88 | Required fields: `url`
 89 | 
 90 | Opens the webpage at the given URL.
 91 | 
 92 | Note that `watiForPage` is _not_ needed with this command.
 93 | 
 94 | ### `if`
 95 | 
 96 | Required fields: `actions`, `selector`, `test`, `value`
 97 | Optional fields: `negate`
 98 | 
 99 | Runs a check (`test`) against the first element matching `selector`. If so, perform the sequence of given `actions`. You can set `negate: true` to run the actions if the test does _not_ pass (i.e. is not true).
100 | 
101 | Currently supported `test`s:
102 | 
103 | * `contains` -- Does the element contain text matching the given `value`?
104 | 
105 | ### `input`
106 | 
107 | Required fields: `selector`, `value`
108 | 
109 | Types the given `value` into the first element matching `selector`.
110 | 
111 | ### `scrape`
112 | 
113 | Required fields: `selector`
114 | 
115 | Gathers the text content from the first element matching `selector`, and formats it for the TTS tool.
116 | 
117 | ### `waitForPage`
118 | 
119 | Waits for the page to finish navigating. It should be used if the page has changed through actions (e.g. clicking a link or submitting a form).
120 | 


--------------------------------------------------------------------------------
/packages/tts-cli/lib/generate-speech.js:
--------------------------------------------------------------------------------
  1 | const async = require('async')
  2 | const debug = require('debug')
  3 | const fs = require('fs-extra')
  4 | const tempfile = require('tempfile')
  5 | const { extensionFor } = require('./file-extensions')
  6 | const { sanitizeOpts } = require('./sanitize-opts')
  7 | 
  8 | /**
  9 |  * Creates an object containing all the data.
 10 |  */
 11 | exports.buildInfo = (text, instance, task, ctx) => {
 12 |   return Object.assign({
 13 |     opts: ctx.opts,
 14 |     task,
 15 |     tempfile: tempfile(`.${extensionFor(ctx.opts.format, ctx.service)}`),
 16 |     text
 17 |   }, instance.buildPart(text, task, ctx.opts))
 18 | }
 19 | 
 20 | /**
 21 |  * Writes down all the temp files for ffmpeg to read in.
 22 |  * Returns the text filename.
 23 |  */
 24 | exports.createManifest = parts => {
 25 |   const txtFile = tempfile('.txt')
 26 |   debug('createManifest')(`Creating ${txtFile} for manifest`)
 27 |   const contents = parts.map(info => {
 28 |     return `file '${info.tempfile}'`
 29 |   }).join('\n')
 30 |   debug('createManifest')(`Writing manifest contents:\n${contents}`)
 31 |   fs.writeFileSync(txtFile, contents, 'utf8')
 32 |   return txtFile
 33 | }
 34 | 
 35 | /**
 36 |  * Calls the API for each text part (throttled). Returns a Promise.
 37 |  */
 38 | exports.generateAll = (parts, opts, func, task) => {
 39 |   const count = parts.length
 40 |   task.title = `Convert to audio (0/${count})`
 41 |   return (new Promise((resolve, reject) => {
 42 |     debug('generateAll')(`Requesting ${count} audio segments, ${opts.limit} at a time`)
 43 |     async.eachOfLimit(
 44 |       parts,
 45 |       opts.limit,
 46 |       func,
 47 |       err => {
 48 |         debug('generateAll')(`Requested all parts, with error ${err}`)
 49 |         if (err) {
 50 |           return reject(err)
 51 |         }
 52 |         task.title = task.title.replace(/\d+\//, `${count}/`)
 53 |         resolve(parts)
 54 |       }
 55 |     )
 56 |   }))
 57 | }
 58 | 
 59 | /**
 60 |  * Returns a Promise with the temporary audio file.
 61 |  */
 62 | exports.generateSpeech = (ctx, task) => {
 63 |   const strParts = ctx.parts
 64 | 
 65 |   // Add in the default options.
 66 |   ctx.opts = Object.assign({}, {
 67 |     accessKey: ctx.args['access-key'],
 68 |     effect: ctx.args.effect,
 69 |     email: ctx.args.email,
 70 |     engine: ctx.args.engine,
 71 |     ffmpeg: ctx.args.ffmpeg || 'ffmpeg',
 72 |     format: ctx.args.format || 'mp3',
 73 |     gain: ctx.args.gain ? parseFloat(ctx.args.gain) : undefined,
 74 |     gender: ctx.args.gender,
 75 |     language: ctx.args.language || (ctx.service === 'gcp' ? 'en-US' : undefined),
 76 |     lexicon: ctx.args.lexicon,
 77 |     limit: Number(ctx.args.throttle) || 5, // eslint-disable-line no-magic-numbers
 78 |     pitch: ctx.args.pitch ? parseFloat(ctx.args.pitch) : undefined,
 79 |     privateKey: ctx.args['private-key'],
 80 |     projectFile: ctx.args['project-file'],
 81 |     projectId: ctx.args['project-id'],
 82 |     region: ctx.args.region || 'us-east-1',
 83 |     sampleRate: ctx.args['sample-rate'] ? Number(ctx.args['sample-rate']) : undefined,
 84 |     secretKey: ctx.args['secret-key'],
 85 |     speed: ctx.args.speed ? parseFloat(ctx.args.speed) : undefined,
 86 |     type: ctx.args.type || 'text',
 87 |     voice: ctx.args.voice || (ctx.service === 'gcp' ? undefined : 'Joanna')
 88 |   })
 89 |   if (typeof ctx.opts.effect !== 'undefined' && !Array.isArray(ctx.opts.effect)) {
 90 |     ctx.opts.effect = [ctx.opts.effect]
 91 |   }
 92 |   if (typeof ctx.opts.lexicon !== 'undefined' && !Array.isArray(ctx.opts.lexicon)) {
 93 |     ctx.opts.lexicon = [ctx.opts.lexicon]
 94 |   }
 95 |   if (ctx.service === 'aws' && ctx.opts.format === 'ogg_vorbis') {
 96 |     debug('generateSpeech')('Warning: Format "ogg_vorbis" is deprecated; use "ogg" instead')
 97 |     ctx.opts.format = 'ogg'
 98 |   }
 99 |   if (ctx.args['private-key-file']) {
100 |     debug('generateSpeech')(`Reading private key from ${ctx.args['private-key-file']}`)
101 |     ctx.opts.privateKey = fs.readFileSync(ctx.args['private-key-file'], 'utf8')
102 |   }
103 |   debug('generateSpeech')(`Options: ${JSON.stringify(sanitizeOpts(ctx.opts))}`)
104 | 
105 |   const provider = require(`./providers/${ctx.service}`)
106 |   const instance = provider.create(ctx.opts)
107 | 
108 |   // Compile the text parts and options together in a packet.
109 |   const parts = strParts.map(part => exports.buildInfo(part, instance, task, ctx))
110 | 
111 |   return exports.generateAll(parts, ctx.opts, instance.generate.bind(instance), task)
112 |     .then(exports.createManifest)
113 |     .then(manifest => {
114 |       ctx.manifestFile = manifest
115 |     })
116 | }
117 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/generate-speech.spec.js:
--------------------------------------------------------------------------------
  1 | describe('generateSpeech()', () => {
  2 |   let fsStub
  3 |   let generateSpeech
  4 |   let ctx, task
  5 | 
  6 |   beforeEach(() => {
  7 |     ({ fs: fsStub, generateSpeech } = require('./helpers').loadLib('generate-speech'))
  8 |     ctx = {
  9 |       args: {
 10 |         'access-key': 'access key',
 11 |         email: 'foo@example.com',
 12 |         effect: 'effect',
 13 |         engine: 'neural',
 14 |         ffmpeg: 'ffmpeg',
 15 |         format: 'mp3',
 16 |         gain: '1.2',
 17 |         gender: 'neutral',
 18 |         language: 'ab-CD',
 19 |         lexicon: 'lexicon',
 20 |         pitch: '9.8',
 21 |         'private-key': 'private key',
 22 |         'project-file': 'project file',
 23 |         'project-id': 'project ID',
 24 |         region: 'region',
 25 |         'sample-rate': '22000',
 26 |         'secret-key': 'secret key',
 27 |         speed: '2.3',
 28 |         throttle: '10',
 29 |         type: 'type',
 30 |         voice: 'voice'
 31 |       },
 32 |       service: 'aws',
 33 |       parts: ['a', 'b', 'c']
 34 |     }
 35 |     task = { title: 'test task' }
 36 |   })
 37 | 
 38 |   it('should set context "opts" from the args', () => {
 39 |     return generateSpeech(ctx, task).then(() => {
 40 |       expect(ctx.opts.accessKey).toBe(ctx.args['access-key'])
 41 |       expect(ctx.opts.effect).toEqual([ctx.args.effect])
 42 |       expect(ctx.opts.email).toBe(ctx.args.email)
 43 |       expect(ctx.opts.engine).toBe(ctx.args.engine)
 44 |       expect(ctx.opts.ffmpeg).toBe(ctx.args.ffmpeg)
 45 |       expect(ctx.opts.format).toBe(ctx.args.format)
 46 |       expect(ctx.opts.gain).toBe(Number(ctx.args.gain))
 47 |       expect(ctx.opts.gender).toBe(ctx.args.gender)
 48 |       expect(ctx.opts.language).toBe(ctx.args.language)
 49 |       expect(ctx.opts.lexicon).toEqual([ctx.args.lexicon])
 50 |       expect(ctx.opts.limit).toBe(Number(ctx.args.throttle))
 51 |       expect(ctx.opts.pitch).toBe(Number(ctx.args.pitch))
 52 |       expect(ctx.opts.privateKey).toBe(ctx.args['private-key'])
 53 |       expect(ctx.opts.projectFile).toBe(ctx.args['project-file'])
 54 |       expect(ctx.opts.projectId).toBe(ctx.args['project-id'])
 55 |       expect(ctx.opts.region).toBe(ctx.args.region)
 56 |       expect(ctx.opts.sampleRate).toBe(Number(ctx.args['sample-rate']))
 57 |       expect(ctx.opts.secretKey).toBe(ctx.args['secret-key'])
 58 |       expect(ctx.opts.speed).toBe(parseFloat(ctx.args.speed))
 59 |       expect(ctx.opts.type).toBe(ctx.args.type)
 60 |       expect(ctx.opts.voice).toBe(ctx.args.voice)
 61 |     })
 62 |   })
 63 | 
 64 |   it('should have context "opts" fall back to the (AWS) defaults', () => {
 65 |     ctx.args = {}
 66 |     ctx.service = 'aws'
 67 |     return generateSpeech(ctx, task).then(() => {
 68 |       expect(ctx.opts.accessKey).toBeUndefined()
 69 |       expect(ctx.opts.effect).toBeUndefined()
 70 |       expect(ctx.opts.engine).toBeUndefined()
 71 |       expect(ctx.opts.ffmpeg).toBe('ffmpeg')
 72 |       expect(ctx.opts.format).toBe('mp3')
 73 |       expect(ctx.opts.gain).toBeUndefined()
 74 |       expect(ctx.opts.gender).toBeUndefined()
 75 |       expect(ctx.opts.language).toBeUndefined()
 76 |       expect(ctx.opts.lexicon).toBeUndefined()
 77 |       expect(ctx.opts.limit).toBe(5)
 78 |       expect(ctx.opts.pitch).toBeUndefined()
 79 |       expect(ctx.opts.region).toBe('us-east-1')
 80 |       expect(ctx.opts.sampleRate).toBeUndefined()
 81 |       expect(ctx.opts.secretKey).toBeUndefined()
 82 |       expect(ctx.opts.speed).toBeUndefined()
 83 |       expect(ctx.opts.type).toBe('text')
 84 |       expect(ctx.opts.voice).toBe('Joanna')
 85 |     })
 86 |   })
 87 | 
 88 |   it('should have context "opts" fall back to the (GCP) defaults', () => {
 89 |     ctx.args = {}
 90 |     ctx.service = 'gcp'
 91 |     return generateSpeech(ctx, task).then(() => {
 92 |       expect(ctx.opts.effect).toBeUndefined()
 93 |       expect(ctx.opts.email).toBeUndefined()
 94 |       expect(ctx.opts.engine).toBeUndefined()
 95 |       expect(ctx.opts.ffmpeg).toBe('ffmpeg')
 96 |       expect(ctx.opts.format).toBe('mp3')
 97 |       expect(ctx.opts.gain).toBeUndefined()
 98 |       expect(ctx.opts.gender).toBeUndefined()
 99 |       expect(ctx.opts.language).toBe('en-US')
100 |       expect(ctx.opts.limit).toBe(5)
101 |       expect(ctx.opts.pitch).toBeUndefined()
102 |       expect(ctx.opts.privateKey).toBeUndefined()
103 |       expect(ctx.opts.projectFile).toBeUndefined()
104 |       expect(ctx.opts.projectId).toBeUndefined()
105 |       expect(ctx.opts.sampleRate).toBeUndefined()
106 |       expect(ctx.opts.speed).toBeUndefined()
107 |       expect(ctx.opts.type).toBe('text')
108 |       expect(ctx.opts.voice).toBeUndefined()
109 |     })
110 |   })
111 | 
112 |   it('should set private-key when private-key-file is specified', () => {
113 |     fsStub.readFileSync.and.returnValue('private key data')
114 |     ctx.args = {
115 |       'private-key-file': 'foobar.pem'
116 |     }
117 |     return generateSpeech(ctx, task).then(() => {
118 |       expect(fsStub.readFileSync).toHaveBeenCalledWith('foobar.pem', 'utf8')
119 |       expect(ctx.opts.privateKey).toBe('private key data')
120 |     })
121 |   })
122 | 
123 |   it('should set format to "ogg" when argument is "ogg_vorbis" (AWS)', () => {
124 |     ctx.service = 'aws'
125 |     ctx.args.format = 'ogg_vorbis'
126 |     return generateSpeech(ctx, task).then(() => {
127 |       expect(ctx.opts.format).toBe('ogg')
128 |     })
129 |   })
130 | 
131 |   it('should save the manifest file to the context', () => {
132 |     return generateSpeech(ctx, task).then(() => {
133 |       expect(ctx.manifestFile).toEqual(jasmine.any(String))
134 |     })
135 |   })
136 | })
137 | 


--------------------------------------------------------------------------------
/packages/tts-cli/README.md:
--------------------------------------------------------------------------------
  1 | # Text-To-Speech CLI
  2 | 
  3 | Command-line tool to convert a text file of any size to speech using [AWS Polly](https://aws.amazon.com/polly/) or [Google Cloud Text-to-Speech](https://cloud.google.com/text-to-speech/).
  4 | 
  5 | ![Animation of the tool in action](docs/tts-cli.gif)
  6 | 
  7 | ## Requirements / Installation
  8 | 
  9 | * [Node.js/npm](https://nodejs.org) v18+
 10 | * [ffmpeg](https://ffmpeg.org/)
 11 | * An Amazon Web Services (AWS) or Google Cloud Platform (GCP) account
 12 | 
 13 | You can then install the package globally:
 14 | 
 15 | ```
 16 | $ npm install tts-cli -g
 17 | ```
 18 | 
 19 | You'll also need to set up your computer:
 20 | 
 21 | * AWS Polly: [Get your AWS access keys](http://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/getting-your-credentials.html) and [configure your machine with your credentials](http://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/loading-node-credentials-shared.html).
 22 | * Google Cloud Text-to-Speech: [Create and set up a Cloud Platform account](https://cloud.google.com/nodejs/docs/reference/text-to-speech/latest/#quickstart) and [download your credentials file](https://cloud.google.com/docs/authentication/production#obtaining_and_providing_service_account_credentials_manually).
 23 | 
 24 | ## Usage
 25 | 
 26 | ```
 27 | $ tts [inputfile] outputfile [options]
 28 | ```
 29 | 
 30 | Example:
 31 | 
 32 | ```
 33 | # Using a text file as the input, changing the default voice, and specifying the AWS keys.
 34 | $ tts test.txt test.mp3 --voice Brian --access-key ABCDEFG --secret-key hwl500CZygitV91n
 35 | 
 36 | # Using Google Cloud Text-to-Speech.
 37 | $ tts test.txt test.mp3 --service gcp --language en-US
 38 | 
 39 | # Passing a string of text as the input.
 40 | $ echo "Hello world! How are you?" | tts test.mp3
 41 | ```
 42 | 
 43 | Standard arguments:
 44 | 
 45 | * `inputfile` is the text file you want to convert to speech. It should be encoded as UTF-8. If excluded, tts-cli will read in the text from `stdin`.
 46 | * `outfile` is the filename to save the audio to.
 47 | 
 48 | Service options:
 49 | 
 50 | * `--access-key KEY` -- AWS access key ID
 51 | * `--email EMAIL` -- GCP client email address (required if `private-key` or `private-key-file` is used)
 52 | * `--private-key KEY` -- GCP private key
 53 | * `--private-key-file FILE` -- GCP private key file (`.pem` or `.p12` file)
 54 | * `--project-file FILE` -- GCP `.json` file with project info
 55 | * `--project-id ID` -- GCP project ID (e.g. `grape-spaceship-123`)
 56 | * `--secret-key KEY` -- AWS secret access key
 57 | * `--service TYPE` -- Cloud service to use (`aws` or `gcp`) (default `aws`)
 58 | * `--throttle SIZE` -- Number of simultaneous requests allowed against the API (default `5`)
 59 | 
 60 | Audio options:
 61 | 
 62 | * `--effect ID` -- Apply an audio effect profile. Can be specified multiple times.
 63 | * `--ffmpeg BINARY` -- Path to the ffmpeg binary (defaults to the one in PATH)
 64 | * `--format FORMAT` -- Target audio format (`mp3`, `ogg`, or `pcm`) (default `mp3`)
 65 | * `--gain GAIN` -- Volume gain, where `0.0` is normal gain
 66 | * `--gender GENDER` -- Gender of the voice (`male`, `female`, or `neutral`)
 67 | * `--language LANG` -- Code for the desired language (default `en-US` for GCP, no default for AWS)
 68 | * `--lexicon NAME` -- Apply a stored pronunciation lexicon. Can be specified multiple times.
 69 | * `--pitch PITCH` -- Change in speaking pich, in semitones
 70 | * `--speed RATE` -- Speaking rate, where `1.0` is normal speed
 71 | * `--region REGION` -- AWS region to send requests to (default `us-east-1`)
 72 | * `--sample-rate RATE` -- Audio frequency, in hertz. See the [API docs](http://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html#polly-SynthesizeSpeech-request-SampleRate) for valid values.
 73 | * `--type TYPE` -- Type of input text (`text` or `ssml`) (default `text`)
 74 | * `--voice VOICE` -- Voice to use for the speech (default `Joanna` for AWS).
 75 | 
 76 | Note that not all services support all options. For example, AWS Polly does not understand the `speed` option. See the [options documentation](docs/options.md) for more info.
 77 | 
 78 | ## What It Does
 79 | 
 80 | * Splits the text into the maximum size allowed by the API (1500 characters for AWS, 5000 characters for Google Cloud).
 81 | * Compresses the white space inside the text to minimize the cost.
 82 | * Uses your credentials in `~/.aws/credentials` (AWS) or the `GOOGLE_APPLICATION_CREDENTIALS` (Google Cloud) file.
 83 | * Calls the API (in a throttled manner) to get each text part converted to audio.
 84 | * Combines the audio together into a single file.
 85 | 
 86 | ## Troubleshooting
 87 | 
 88 | * Make sure Node.js is working. Running `node --version` on the command line should give a version of v10.0.0 or higher.
 89 | * Make sure ffmpeg is installed. Running `ffmpeg -version` on the command line should give you the version information.
 90 | * Make sure you can connect to AWS or Google Cloud normally.
 91 |   * Going to https://polly.us-east-1.amazonaws.com/v1/speech (or whatever AWS region you're using) should give you a "Missing Authentication Token" message. You can use the [AWS CLI tool](https://aws.amazon.com/cli/) to check your configuration -- installing that and running `aws sts get-caller-identity` should return your user info.
 92 | * Run `export DEBUG=*` first (Linux or Mac) to turn on debugging output. On Windows you'll need to use `set DEBUG=*` (command prompt) or `$env:DEBUG = "*"` (PowerShell).
 93 | 
 94 | ## Contributing
 95 | 
 96 | Pull requests and suggestions are welcome. [Create a new issue](https://github.com/eheikes/tts/issues/new) to report a bug or suggest a new feature.
 97 | 
 98 | Please add tests and maintain the existing styling when adding and updating the code. Run `npm run lint` to lint the code.
 99 | 
100 | ## Small Print
101 | 
102 | Copyright 2017-2020 Eric Heikes.
103 | 
104 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at [http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0).
105 | 
106 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
107 | 
108 | This project is not affiliated with Amazon or Google.
109 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/providers/aws.spec.js:
--------------------------------------------------------------------------------
  1 | const { Polly } = require('@aws-sdk/client-polly')
  2 | const fs = require('fs')
  3 | const { Readable } = require('stream')
  4 | const tempfile = require('tempfile')
  5 | 
  6 | describe('AWS provider', () => {
  7 |   let create
  8 |   let fsStub
  9 |   let PollyProvider
 10 |   let provider
 11 | 
 12 |   beforeEach(() => {
 13 |     ({ create, fs: fsStub, PollyProvider } = require('../helpers').loadLib('providers/aws'))
 14 |     provider = create({
 15 |       region: 'aws-west-1',
 16 |       accessKey: 'ACCESS KEY',
 17 |       secretKey: 'SECRET KEY'
 18 |     })
 19 |   })
 20 | 
 21 |   describe('create()', () => {
 22 |     it('should create a provider instance', () => {
 23 |       expect(provider).toEqual(jasmine.any(PollyProvider))
 24 |     })
 25 | 
 26 |     it('should have an underlying AWS object', () => {
 27 |       expect(provider.instance).toEqual(jasmine.any(Polly))
 28 |     })
 29 | 
 30 |     it('should configure the Polly object from the options', async () => {
 31 |       const credentials = await provider.instance.config.credentials()
 32 |       const region = await provider.instance.config.region()
 33 |       expect(region).toBe('aws-west-1')
 34 |       expect(credentials.accessKeyId).toBe('ACCESS KEY')
 35 |       expect(credentials.secretAccessKey).toBe('SECRET KEY')
 36 |     })
 37 |   })
 38 | 
 39 |   describe('buildPart()', () => {
 40 |     it('should return an object with a `send` property', () => {
 41 |       expect(provider.buildPart()).toEqual({
 42 |         send: jasmine.any(Function)
 43 |       })
 44 |     })
 45 |   })
 46 | 
 47 |   describe('generate()', () => {
 48 |     let task, testData, info, send
 49 | 
 50 |     beforeEach(() => {
 51 |       task = {
 52 |         title: 'Convert to audio (0/42)'
 53 |       }
 54 |       testData = {
 55 |         filename: tempfile(),
 56 |         index: 6,
 57 |         opts: {
 58 |           engine: 'neural',
 59 |           format: 'ogg',
 60 |           language: 'en-US',
 61 |           lexicon: ['lexicon1', 'lexicon2'],
 62 |           sampleRate: 16000,
 63 |           type: 'ssml',
 64 |           voice: 'John'
 65 |         },
 66 |         text: 'hello world',
 67 |         url: 'http://example.com/'
 68 |       }
 69 |       const inStream = new Readable({
 70 |         read () {
 71 |           this.push('testing')
 72 |           this.push(null)
 73 |         }
 74 |       })
 75 |       send = jasmine.createSpy('send').and.resolveTo({
 76 |         AudioStream: inStream
 77 |       })
 78 |       info = {
 79 |         opts: testData.opts,
 80 |         task,
 81 |         tempfile: testData.filename,
 82 |         text: testData.text,
 83 |         send
 84 |       }
 85 |     })
 86 | 
 87 |     afterEach(done => {
 88 |       fs.access(testData.filename, fs.constants.F_OK, err => {
 89 |         if (err) { return done() }
 90 |         fs.unlink(testData.filename, done)
 91 |       })
 92 |     })
 93 | 
 94 |     it('should update the task title', done => {
 95 |       provider.generate(info, testData.index, () => {
 96 |         expect(task.title).toMatch(`\\(${testData.index}/`)
 97 |         done()
 98 |       })
 99 |     })
100 | 
101 |     it('should work with the MP3 format', done => {
102 |       testData.opts.format = 'mp3'
103 |       provider.generate(info, 0, () => {
104 |         const command = send.calls.mostRecent().args[0]
105 |         expect(command.input.OutputFormat).toBe('mp3')
106 |         done()
107 |       })
108 |     })
109 | 
110 |     it('should work with the OGG format', done => {
111 |       testData.opts.format = 'ogg'
112 |       provider.generate(info, 0, () => {
113 |         const command = send.calls.mostRecent().args[0]
114 |         expect(command.input.OutputFormat).toBe('ogg_vorbis')
115 |         done()
116 |       })
117 |     })
118 | 
119 |     it('should work with the PCM format', done => {
120 |       testData.opts.format = 'pcm'
121 |       provider.generate(info, 0, () => {
122 |         const command = send.calls.mostRecent().args[0]
123 |         expect(command.input.OutputFormat).toBe('pcm')
124 |         done()
125 |       })
126 |     })
127 | 
128 |     it('should use the given voice engine', done => {
129 |       provider.generate(info, 0, () => {
130 |         const command = send.calls.mostRecent().args[0]
131 |         expect(command.input.Engine).toBe('neural')
132 |         done()
133 |       })
134 |     })
135 | 
136 |     it('should not use sample rate if not specified', done => {
137 |       delete info.opts.sampleRate
138 |       provider.generate(info, 0, () => {
139 |         const command = send.calls.mostRecent().args[0]
140 |         expect(command.input.SampleRate).toBeUndefined()
141 |         done()
142 |       })
143 |     })
144 | 
145 |     it('should use the (stringified) sample rate, when specified', done => {
146 |       provider.generate(info, 0, () => {
147 |         const command = send.calls.mostRecent().args[0]
148 |         expect(command.input.SampleRate).toBe(String(testData.opts.sampleRate))
149 |         done()
150 |       })
151 |     })
152 | 
153 |     it('should not use lexicon names if not specified', done => {
154 |       delete info.opts.lexicon
155 |       provider.generate(info, 0, () => {
156 |         const command = send.calls.mostRecent().args[0]
157 |         expect(command.input.LexiconNames).toBeUndefined()
158 |         done()
159 |       })
160 |     })
161 | 
162 |     it('should use the lexicon names, when specified', done => {
163 |       provider.generate(info, 0, () => {
164 |         const command = send.calls.mostRecent().args[0]
165 |         expect(command.input.LexiconNames).toEqual(testData.opts.lexicon)
166 |         done()
167 |       })
168 |     })
169 | 
170 |     it('should use the given text type', done => {
171 |       provider.generate(info, 0, () => {
172 |         const command = send.calls.mostRecent().args[0]
173 |         expect(command.input.TextType).toBe(testData.opts.type)
174 |         done()
175 |       })
176 |     })
177 | 
178 |     it('should use the given text part', done => {
179 |       provider.generate(info, 0, () => {
180 |         const command = send.calls.mostRecent().args[0]
181 |         expect(command.input.Text).toBe(testData.text)
182 |         done()
183 |       })
184 |     })
185 | 
186 |     it('should not use a language if not specified', done => {
187 |       delete info.opts.language
188 |       provider.generate(info, 0, () => {
189 |         const command = send.calls.mostRecent().args[0]
190 |         expect(command.input.LanguageCode).toBeUndefined()
191 |         done()
192 |       })
193 |     })
194 | 
195 |     it('should use the language, when specified', done => {
196 |       provider.generate(info, 0, () => {
197 |         const command = send.calls.mostRecent().args[0]
198 |         expect(command.input.LanguageCode).toBe(testData.opts.language)
199 |         done()
200 |       })
201 |     })
202 | 
203 |     it('should use the given voice', done => {
204 |       provider.generate(info, 0, () => {
205 |         const command = send.calls.mostRecent().args[0]
206 |         expect(command.input.VoiceId).toBe(String(testData.opts.voice))
207 |         done()
208 |       })
209 |     })
210 | 
211 |     it('should pipe the resulting stream into the file', done => {
212 |       provider.generate(info, 0, () => {
213 |         const contents = fs.readFileSync(testData.filename, 'utf-8')
214 |         expect(contents).toBe('testing')
215 |         done()
216 |       })
217 |     })
218 | 
219 |     it('should callback with an error if send() fails', done => {
220 |       info.send = jasmine.createSpy('send').and.rejectWith(new Error('test error'))
221 |       provider.generate(info, 0, err => {
222 |         expect(err).toEqual(new Error('test error'))
223 |         done()
224 |       })
225 |     })
226 | 
227 |     it('should callback with an error if file saving fails', done => {
228 |       fsStub.createWriteStream.and.callFake(filename => {
229 |         const stream = fs.createWriteStream(filename)
230 |         stream.on('pipe', () => {
231 |           stream.emit('error', new Error('write stream error'))
232 |         })
233 |         return stream
234 |       })
235 |       provider.generate(info, 0, err => {
236 |         expect(err).toEqual(new Error('write stream error'))
237 |         done()
238 |       })
239 |     })
240 |   })
241 | })
242 | 


--------------------------------------------------------------------------------
/packages/web-tts/index.ts:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env node
  2 | import execa = require('execa')
  3 | import { promises as fs } from 'fs'
  4 | import { safeLoad } from 'js-yaml'
  5 | import minimist = require('minimist')
  6 | import puppeteer = require('puppeteer')
  7 | import * as tempy from 'tempy'
  8 | 
  9 | const { copyFile, readFile, writeFile } = fs
 10 | 
 11 | interface Command {
 12 |   [key: string]: any
 13 |   command: 'click' | 'each' | 'getAll' | 'getOne' | 'go' | 'if' | 'input' | 'scrape' | 'waitForPage'
 14 | }
 15 | 
 16 | interface CommandClick extends Command {
 17 |   selector: string
 18 | }
 19 | 
 20 | interface CommandEach extends Command {
 21 |   actions: Command[]
 22 |   from: string
 23 | }
 24 | 
 25 | interface CommandGet extends Command { // getAll or getOne
 26 |   property: string
 27 |   saveAs: string
 28 |   selector: string
 29 | }
 30 | 
 31 | interface CommandGo extends Command {
 32 |   url: string
 33 | }
 34 | 
 35 | interface CommandIf extends Command {
 36 |   actions: Command[]
 37 |   negate?: boolean
 38 |   selector: string
 39 |   test: 'contains'
 40 |   value: string
 41 | }
 42 | 
 43 | interface CommandInput extends Command {
 44 |   selector: string
 45 |   value: string
 46 | }
 47 | 
 48 | interface CommandScrape extends Command {
 49 |   selector: string
 50 | }
 51 | 
 52 | let page: puppeteer.Page
 53 | let sourceText = ''
 54 | let vars: {[name: string]: string | string[]} = {}
 55 | 
 56 | const doAction = async (action: Command) => {
 57 |   if (action.command === 'click') {
 58 |     let opts = action as CommandClick
 59 |     console.log(`Clicking on element ${opts.selector}...`)
 60 |     await page.click(opts.selector)
 61 |   } else if (action.command === 'each') {
 62 |     let opts = action as CommandEach
 63 |     const iterable = typeof vars[opts.from] === 'string'
 64 |       ? [vars[opts.from] as string]
 65 |       : Array.from(vars[opts.from])
 66 |     for (const item of iterable) {
 67 |       const replaceThis = (str: string): string  => {
 68 |         return str.replace(/{{this}}/g, item)
 69 |       }
 70 |       for (const subaction of opts.actions) {
 71 |         const clonedAction = { ...subaction }
 72 |         for (const key in clonedAction) {
 73 |           if (typeof clonedAction[key] === 'string') {
 74 |             clonedAction[key] = replaceThis(clonedAction[key])
 75 |           }
 76 |         }
 77 |         await doAction(clonedAction)
 78 |       }
 79 |     }
 80 |   } else if (action.command === 'getAll') {
 81 |     let opts = action as CommandGet
 82 |     console.log(`Getting selectors matching "${opts.selector}...`)
 83 |     const results = await page.evaluate((selector, prop) => {
 84 |       const matches = Array.from(document.querySelectorAll(selector))
 85 |       return matches.map((el) => String(el[prop]))
 86 |     }, opts.selector, opts.property)
 87 |     console.log(`  ${results.length} matches found. Storing into "${opts.saveAs}".`)
 88 |     vars[opts.saveAs] = results
 89 |   } else if (action.command === 'getOne') {
 90 |     let opts = action as CommandGet
 91 |     console.log(`Getting selector matching "${opts.selector}...`)
 92 |     const result = await page.evaluate((selector, prop) => {
 93 |       const el = document.querySelector(selector)
 94 |       return el && String(el[prop])
 95 |     }, opts.selector, opts.property)
 96 |     console.log(`  ${result ? '1' : 'No'} match found. Storing into "${opts.saveAs}".`)
 97 |     vars[opts.saveAs] = result
 98 |   } else if (action.command === 'go') {
 99 |     let opts = action as CommandGo
100 |     console.log(`Loading URL ${opts.url}...`)
101 |     await page.goto(opts.url)
102 |   } else if (action.command === 'if') {
103 |     let opts = action as CommandIf
104 |     console.log(`Checking if ${opts.negate ? 'not ' : ''}${opts.test} "${opts.value}"...`)
105 |     let isTrue = false
106 |     if (opts.test === 'contains') {
107 |       isTrue = await page.evaluate((selector, text) => {
108 |         const el = document.querySelector(selector)
109 |         return el && el.textContent && el.textContent.includes(text)
110 |       }, opts.selector, opts.value)
111 |     }
112 |     if (opts.negate) { isTrue = !isTrue }
113 |     if (isTrue) {
114 |       console.log(`  is true; running actions.`)
115 |       for (const subaction of opts.actions) {
116 |         await doAction(subaction)
117 |       }
118 |     } else {
119 |       console.log('  is false; skipping actions.')
120 |     }
121 |   } else if (action.command === 'input') {
122 |     let opts = action as CommandInput
123 |     console.log(`Typing into field "${opts.selector}...`)
124 |     await page.type(opts.selector, opts.value)
125 |   } else if (action.command === 'scrape') {
126 |     let opts = action as CommandScrape
127 |     console.log(`Scraping text from "${opts.selector}"...`)
128 |     sourceText += await page.evaluate(selector => {
129 |       const blockElements = [
130 |         'address',
131 |         'article',
132 |         'aside',
133 |         'blockquote',
134 |         'details',
135 |         'dialog',
136 |         'dd',
137 |         'div',
138 |         'dl',
139 |         'dt',
140 |         'fieldset',
141 |         'figcaption',
142 |         'figure',
143 |         'footer',
144 |         'form',
145 |         'h1',
146 |         'h2',
147 |         'h3',
148 |         'h4',
149 |         'h5',
150 |         'h6',
151 |         'header',
152 |         'hgroup',
153 |         'hr',
154 |         'li',
155 |         'main',
156 |         'nav',
157 |         'ol',
158 |         'p',
159 |         'pre',
160 |         'section',
161 |         'table',
162 |         'ul',
163 |       ].map(name => name.toUpperCase())
164 |       const codeElements = [
165 |         'noscript',
166 |         'script',
167 |         'style'
168 |       ].map(name => name.toUpperCase())
169 |       const inlineBreaks = [
170 |         'br'
171 |       ].map(name => name.toUpperCase())
172 |       const domToString = (node: Node) => {
173 |         let str = ''
174 |         for (let child of Array.from(node.childNodes)) {
175 |           if (child.nodeType === 3) { // text node
176 |             str += child.textContent
177 |           } else if (child.nodeType === 1 && !codeElements.includes(child.nodeName)) { // element node
178 |             if (blockElements.includes(child.nodeName)) {
179 |               str += `\n${domToString(child)}.\n`
180 |             } else {
181 |               str += domToString(child)
182 |               if (inlineBreaks.includes(child.nodeName)) {
183 |                 str += '. '
184 |               }
185 |             }
186 |           }
187 |         }
188 |         return str
189 |       }
190 |       const body = document.querySelector<HTMLElement>(selector)
191 |       return body ? domToString(body) : ''
192 |     }, opts.selector)
193 |   } else if (action.command === 'waitForPage') {
194 |     console.log('Waiting for page to load...')
195 |     await page.waitForNavigation()
196 |   }
197 | }
198 | 
199 | ;(async () => {
200 |   const cliOpts = {
201 |     boolean: ['debug', 'headless'],
202 |     default: {
203 |       debug: false,
204 |       delay: 0,
205 |       headless: true,
206 |       height: 1000,
207 |       width: 2000
208 |     }
209 |   }
210 |   const args = minimist(process.argv.slice(2), cliOpts)
211 | 
212 |   const commandsFile = args._[0]
213 |   if (!commandsFile) {
214 |     throw new Error('Missing commands filename')
215 |   }
216 | 
217 |   const outputFile = args._[1]
218 |   if (!outputFile) {
219 |     throw new Error('Missing output filename')
220 |   }
221 | 
222 |   const input = await readFile(commandsFile, 'utf8')
223 |   const actions = safeLoad(input) as Command[]
224 | 
225 |   const browser = await puppeteer.launch({
226 |     headless: args.headless,
227 |     devtools: args.debug,
228 |     slowMo: args.delay
229 |   })
230 |   page = await browser.newPage()
231 |   if (args.debug) {
232 |     page.on('console', msg => console.log('PAGE LOG:', msg.text()))
233 |   }
234 |   await page.setViewport({
235 |     width: args.width,
236 |     height: args.height
237 |   })
238 | 
239 |   for (const action of actions) {
240 |     await doAction(action)
241 |   }
242 | 
243 |   await browser.close()
244 | 
245 |   const textFilename = tempy.file()
246 |   await writeFile(textFilename, sourceText, 'utf8')
247 | 
248 |   console.log('Converting text to audio...')
249 |   const audioFilename = tempy.file()
250 |   let ttsArgs = [
251 |     'node_modules/.bin/tts',
252 |     textFilename,
253 |     audioFilename
254 |   ]
255 |   for (const name in args) {
256 |     if (name === '_' || Object.keys(cliOpts.default).includes(name)) { continue }
257 |     ttsArgs.push(`--${name}`, args[name])
258 |   }
259 |   console.log(`  Running 'node ${ttsArgs.join(' ')}'`)
260 |   await execa('node', ttsArgs, {
261 |     stdout: 'inherit',
262 |     stderr: 'inherit'
263 |   })
264 | 
265 |   await copyFile(audioFilename, outputFile)
266 | 
267 |   console.log(`Done. Wrote file to ${outputFile}`)
268 | })()
269 | 


--------------------------------------------------------------------------------
/packages/tts-cli/docs/options.md:
--------------------------------------------------------------------------------
  1 | # Options Reference
  2 | 
  3 | Options have a 2-hyphen prefix, e.g. `--format`. The value follows, either separated by a space or an equal sign -- `--format mp3` and `--format=mp3` are equally valid.
  4 | 
  5 | ## `--access-key` / `--secret-key`
  6 | 
  7 | **Supported: AWS**
  8 | 
  9 | The access key and secret key are the credentials that AWS uses to identify your account. If you do not want to put your credentials in a [configuration file](https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/loading-node-credentials-shared.html), or if you want to use credentials different than what are in your configuration file, you will need to specify them when you run tts-cli.
 10 | 
 11 | ```
 12 | $ tts test.txt test.mp3 --access-key ABCDEFG --secret-key hwl500CZygitV91n
 13 | ```
 14 | 
 15 | ## `--effect`
 16 | 
 17 | **Supported: GCP**
 18 | 
 19 | Adds an audio effect (also called an audio profile or a device profile) to the speech after synthesis. Can be specified multiple times -- effects are applied on top of each other in the order given.
 20 | 
 21 | ```
 22 | $ tts test.txt test.mp3 --service gcp --effect handset-class-device
 23 | ```
 24 | 
 25 | See the [GCP documentation](https://cloud.google.com/text-to-speech/docs/audio-profiles) for the available effects.
 26 | 
 27 | ## `--email`
 28 | 
 29 | **Supported: GCP**
 30 | 
 31 | Specifies the email address used to identify the account. This is the same as the `client_email` for your GCP project.
 32 | 
 33 | ```
 34 | $ tts test.txt test.mp3 --service gcp --email starting-account-kjd6bvn58@alert-vista-12345.iam.gserviceaccount.com --private-key-file .\key.pem
 35 | ```
 36 | 
 37 | ## `--engine`
 38 | 
 39 | **Supported: AWS**
 40 | 
 41 | Specifies the voice engine to use for generating the speech.
 42 | 
 43 | * `standard` -- uses concatenative synthesis
 44 | * `neural` -- uses a neural network
 45 | * `generative` -- Uses generative AI
 46 | * `long-form` -- uses generative AI designed for longer content
 47 | 
 48 | ```
 49 | $ tts test.txt test.mp3 --engine neural
 50 | ```
 51 | 
 52 | See the [AWS documentation](https://docs.aws.amazon.com/polly/latest/dg/voice-engines-polly.html) for the available engines.
 53 | 
 54 | ## `--ffmpeg`
 55 | 
 56 | **Supported: AWS, GCP**
 57 | 
 58 | Specifies the location of the `ffmpeg` program on your machine. Ideally, the program would automatically be located (usually through the `PATH` environment variable), but you can specify it manually.
 59 | 
 60 | ```
 61 | $ tts test.txt test.mp3 --ffmpeg C:\ffmpeg\ffmpeg.exe
 62 | ```
 63 | 
 64 | ## `--format`
 65 | 
 66 | **Supported: AWS, GCP**
 67 | 
 68 | Specifies the audio format you want for the output file. Possible values:
 69 | 
 70 | * `mp3` (default)
 71 | * `ogg` -- [Vorbis](https://en.wikipedia.org/wiki/Vorbis) (AWS) or [Opus](https://en.wikipedia.org/wiki/Opus_(audio_format)) (GCP) audio wrapped inside an Ogg container.
 72 | * `ogg_vorbis` -- Deprecated, use `ogg` instead.
 73 | * `pcm` -- Audio in a linear PCM sequence  (signed 16-bit, 1 channel mono, little-endian format). Audio frequency depends on the `--sample-rate` option. AWS returns raw audio; GCP includes a WAV file header.
 74 | 
 75 | ```
 76 | $ tts test.txt test.ogg --format ogg
 77 | ```
 78 | 
 79 | ## `--gain`
 80 | 
 81 | **Supported: GCP**
 82 | 
 83 | Volume gain (in dB), from -96.0 to 16.0. A value of 0.0 will play at normal native signal amplitude. A value of -6.0 will play at approximately half the amplitude. A value of +6.0 will play at approximately twice the amplitude.
 84 | 
 85 | ```
 86 | $ tts test.txt test.mp3 --service gcp --gain 6
 87 | ```
 88 | 
 89 | Note that negative gains must be specified using the equal-sign syntax, otherwise the value is interpreted as an option name:
 90 | 
 91 | ```
 92 | $ tts test.txt test.mp3 --service gcp --gain=-12
 93 | ```
 94 | 
 95 | ## `--gender`
 96 | 
 97 | **Supported: GCP**
 98 | 
 99 | Gender of the voice. Leave it unspecified if you don't care what gender the selected voice will have.
100 | 
101 | * `male` for a male voice
102 | * `female` for a female voice
103 | * `neutral` for a gender-neutral voice
104 | 
105 | ```
106 | $ tts test.txt test.mp3 --service gcp --gender female
107 | ```
108 | 
109 | ## `--language`
110 | 
111 | **Supported: GCP**
112 | 
113 | Language for the voice, expressed as a [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag (e.g. `en-US`, `es-419`, `cmn-tw`). This should not include a script tag (e.g. use `cmn-cn` rather than `cmn-Hant-cn`).
114 | 
115 | Defaults to `en-US` if not specified.
116 | 
117 | > Note that the TTS service may choose a voice with a slightly different language code than the one selected; it may substitute a different region (e.g. using `en-US` rather than `en-CA` if there isn't a Canadian voice available), or even a different language, e.g. using `nb` (Norwegian Bokmal) instead of `no` (Norwegian).
118 | 
119 | ```
120 | $ tts test.txt test.mp3 --service gcp --language zh-CN
121 | ```
122 | 
123 | ## `--lexicon`
124 | 
125 | **Supported: AWS**
126 | 
127 | Applies a stored pronunciation lexicon. (See the AWS Polly documentation on [managing lexicons](https://docs.aws.amazon.com/polly/latest/dg/managing-lexicons.html).) Lexicons are applied only if the language of the lexicon is the same as the language of the voice.
128 | 
129 | Can be specified multiple times.
130 | 
131 | ```
132 | $ tts test.txt test.ogg --lexicon lexicon1 --lexicon lexicon2
133 | ```
134 | 
135 | ## `--pitch`
136 | 
137 | **Supported: GCP**
138 | 
139 | Changes the speaking pitch (in semitones), from -20.0 to 20.0.
140 | 
141 | ```
142 | $ tts test.txt test.mp3 --service gcp --pitch 10
143 | ```
144 | 
145 | Note that negative pitch must be specified using the equal-sign syntax, otherwise the value is interpreted as an option name:
146 | 
147 | ```
148 | $ tts test.txt test.mp3 --service gcp --pitch=-10
149 | ```
150 | 
151 | ## `--private-key` / `--private-key-file`
152 | 
153 | **Supported: GCP**
154 | 
155 | Specifies the private key (`--private-key`), or the file containing the private key (`--private-key-file`), used to make secure requests to Google Cloud. It should be either in [PEM format](http://how2ssl.com/articles/working_with_pem_files/) (beginning with "-----BEGIN PRIVATE KEY-----" and ending with "-----END PRIVATE KEY-----") or in [PKCS #12](https://en.wikipedia.org/wiki/PKCS_12) format.
156 | 
157 | You must also specify `--email` if you use either option.
158 | 
159 | For security and ease of use, `--private-key-file` is recommended over `--private-key`.
160 | 
161 | ```
162 | $ tts test.txt test.mp3 --service gcp --email foo@example.com --private-key-file .\key.pem
163 | ```
164 | 
165 | ```
166 | $ tts test.txt test.mp3 --service gcp --email foo@example.com --private-key "-----BEGIN PRIVATE KEY-----\nMIIEvQIBA......DAAMY=\n-----END PRIVATE KEY-----\n"
167 | ```
168 | 
169 | ## `--project-file`
170 | 
171 | **Supported: GCP**
172 | 
173 | Specifies the `.json` file with your project configuration.
174 | 
175 | When setting up a Google Cloud project, you should be able to download a project file that looks something like this:
176 | 
177 | ```json
178 | {
179 |   "type": "service_account",
180 |   "project_id": "alert-vista-895093",
181 |   "private_key_id": "ad386093c2ab",
182 |   "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBAD....AAMY=\n-----END PRIVATE KEY-----\n",
183 |   "client_email": "my-account-gj38dl@alert-vista-895093.iam.gserviceaccount.com",
184 |   "client_id": "6873947275063",
185 |   "auth_uri": "https://accounts.google.com/o/oauth2/auth",
186 |   "token_uri": "https://oauth2.googleapis.com/token",
187 |   "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
188 |   "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/my-account-gj38dl%40alert-vista-895093.iam.gserviceaccount.com"
189 | }
190 | ```
191 | 
192 | If you save this file to your computer, you can then invoke tts-cli and use the `--project-file` option to point to that file.
193 | 
194 | ```
195 | $ tts test.txt test.mp3 --service gcp ---project-file .\my-project.json
196 | ```
197 | 
198 | ## `--project-id`
199 | 
200 | **Supported: GCP**
201 | 
202 | Specifies the ID used to identify your project.
203 | 
204 | Usually you'll want to use `--project-file`, or `--email` + `--private-key-file`, to identify your project instead.
205 | 
206 | ```
207 | $ tts test.txt test.mp3 --service gcp --email foo@example.com ---project-id grape-spaceship-123
208 | ```
209 | 
210 | ## `--region`
211 | 
212 | **Supported: AWS**
213 | 
214 | Specifies the [AWS region](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html) to send requests to. Using a region closer to your location may result in faster processing, but note that regions may vary in cost.
215 | 
216 | ```
217 | $ tts test.txt test.mp3 --region us-west-2
218 | ```
219 | 
220 | ## `--sample-rate`
221 | 
222 | **Supported: AWS, GCP**
223 | 
224 | Specifies the audio frequency (in Hz).
225 | 
226 | * Valid values for `mp3` and `ogg` formats are `8000`, `11025` (GCP only), `16000`, `22050` (default), `32000` (GCP only), `44100` (GCP only), and `48000` (GCP only).
227 | * Valid values for `pcm` are `8000`, `11025` (GCP only), `16000` (default), and `22050` (GCP only).
228 | 
229 | ```
230 | $ tts test.txt test.mp3 --sample-rate 8000
231 | ```
232 | 
233 | ## `--service`
234 | 
235 | **Supported: AWS, GCP**
236 | 
237 | Specifies which service to use:
238 | 
239 | * `aws` to use AWS Polly (default)
240 | * `gcp` to use Google Cloud Text-to-Speech
241 | 
242 | ```
243 | $ tts test.txt test.mp3 --service gcp
244 | ```
245 | 
246 | ## `--speed`
247 | 
248 | **Supported: GCP**
249 | 
250 | Specifies the speaking rate, from 0.25 to 4.0, where 1.0 is normal speed. Using 2.0 will result in speech that is twice as fast, while 0.5 will result in speech that is half as fast as normal.
251 | 
252 | ```
253 | $ tts test.txt test.mp3 --service gcp --speed 2
254 | ```
255 | 
256 | ## `--throttle`
257 | 
258 | **Supported: AWS, GCP**
259 | 
260 | Indicates how many simultaneous requests to make against the service. A higher number will send requests faster to AWS or GCP, but will use of more of your bandwidth, and the service may reject your requests if you send too many at a time.
261 | 
262 | ```
263 | $ tts test.txt test.mp3 --throttle 2
264 | ```
265 | 
266 | ## `--type`
267 | 
268 | **Supported: AWS, GCP**
269 | 
270 | Specifies the type of input text.
271 | 
272 | * `text` indicates plain text (default).
273 | * `ssml` indicates an [SSML](https://www.w3.org/TR/speech-synthesis/)-formatted document. The document must be valid, well-formed SSML. Support and extensions to SSML elements vary by service; check the [AWS docs](https://docs.aws.amazon.com/polly/latest/dg/supported-ssml.html) and [GCP docs](https://cloud.google.com/text-to-speech/docs/ssml) for details.
274 | 
275 | ```
276 | $ tts test.ssml test.mp3 --type ssml
277 | ```
278 | 
279 | ## `--voice`
280 | 
281 | **Supported: AWS, GCP**
282 | 
283 | Indicates the voice to use for speech.
284 | 
285 | AWS has a [collection of voices](https://docs.aws.amazon.com/polly/latest/dg/voicelist.html) available. The default is `Joanna`.
286 | 
287 | ```
288 | $ tts test.ssml test.mp3 --service aws --voice Geraint
289 | ```
290 | 
291 | GCP will choose a voice based on the `--language` and `--gender` values (either specified by you or from the defaults); there is no default voice. You can still specify a voice name using this option -- see the list of [supported voices](https://cloud.google.com/text-to-speech/docs/voices).
292 | 
293 | ```
294 | $ tts test.ssml test.mp3 --service gcp --voice en-US-Standard-E
295 | ```
296 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/providers/gcp.spec.js:
--------------------------------------------------------------------------------
  1 | const TextToSpeechClient = require('@google-cloud/text-to-speech').TextToSpeechClient
  2 | const fs = require('fs')
  3 | const path = require('path')
  4 | const tempfile = require('tempfile')
  5 | 
  6 | describe('Google Cloud provider', () => {
  7 |   let fsStub
  8 |   let create
  9 |   let GoogleProvider
 10 |   let provider
 11 | 
 12 |   beforeEach(() => {
 13 |     ({ fs: fsStub, create, GoogleProvider } = require('../helpers').loadLib('providers/gcp'))
 14 |     provider = create({
 15 |       email: 'foo@example.com',
 16 |       privateKey: 'private key',
 17 |       projectFile: 'project-file.json',
 18 |       projectId: 'project ID'
 19 |     })
 20 |   })
 21 | 
 22 |   describe('create()', () => {
 23 |     it('should create a provider instance', () => {
 24 |       expect(provider).toEqual(jasmine.any(GoogleProvider))
 25 |     })
 26 | 
 27 |     it('should have an underlying Google Cloud object', () => {
 28 |       expect(provider.instance).toEqual(jasmine.any(TextToSpeechClient))
 29 |     })
 30 | 
 31 |     it('should use the email address from the options', () => {
 32 |       expect(provider.instance.auth.jsonContent.client_email).toBe('foo@example.com')
 33 |     })
 34 | 
 35 |     it('should use the private key from the options', () => {
 36 |       expect(provider.instance.auth.jsonContent.private_key).toBe('private key')
 37 |     })
 38 | 
 39 |     it('should use the project ID from the options', () => {
 40 |       provider.instance.getProjectId().then(id => {
 41 |         expect(id).toBe('project ID')
 42 |       })
 43 |     })
 44 | 
 45 |     it('should leave out the project file if not specified', () => {
 46 |       provider = create({
 47 |         email: 'foo@example.com',
 48 |         'private-key': 'private key'
 49 |       })
 50 |       expect(provider.instance.auth.keyFilename).toBeUndefined()
 51 |     })
 52 | 
 53 |     it('should convert the project file to an absolute path if relative', () => {
 54 |       expect(provider.instance.auth.keyFilename).toBe(
 55 |         path.resolve('project-file.json')
 56 |       )
 57 |     })
 58 | 
 59 |     it('should use the project file as-is if an absolute path', () => {
 60 |       provider = create({
 61 |         email: 'foo@example.com',
 62 |         privateKey: 'fake key',
 63 |         projectFile: path.resolve('project-file.json')
 64 |       })
 65 |       expect(provider.instance.auth.keyFilename).toBe(
 66 |         path.resolve('project-file.json')
 67 |       )
 68 |     })
 69 | 
 70 |     it('should work if email and private key are not specified', () => {
 71 |       const filename = tempfile()
 72 |       const fakeProject = {
 73 |         client_email: 'foo@example.com',
 74 |         private_key: 'fake key'
 75 |       }
 76 |       fs.writeFileSync(filename, JSON.stringify(fakeProject), 'utf8')
 77 |       expect(() => {
 78 |         create({ 'project-file': filename })
 79 |       }).not.toThrow()
 80 |     })
 81 |   })
 82 | 
 83 |   describe('buildPart()', () => {
 84 |     it('should return an object with a `synthesizer` property', () => {
 85 |       expect(provider.buildPart()).toEqual({
 86 |         synthesizer: jasmine.any(Function)
 87 |       })
 88 |     })
 89 |   })
 90 | 
 91 |   describe('generate()', () => {
 92 |     let task, testData, info, synthesizer
 93 | 
 94 |     beforeEach(() => {
 95 |       task = {
 96 |         title: 'Convert to audio (0/42)'
 97 |       }
 98 |       testData = {
 99 |         filename: tempfile(),
100 |         index: 6,
101 |         opts: {
102 |           effect: ['effect1', 'effect2'],
103 |           gain: -1.2,
104 |           gender: 'neutral',
105 |           language: 'en-US',
106 |           pitch: -9.8,
107 |           sampleRate: 16000,
108 |           speed: 4.2,
109 |           type: 'text',
110 |           voice: 'John'
111 |         },
112 |         response: 'fake audio data',
113 |         text: 'hello world',
114 |         url: 'http://example.com/'
115 |       }
116 |       synthesizer = jasmine.createSpy('synthesizer')
117 |       info = {
118 |         opts: testData.opts,
119 |         task,
120 |         tempfile: testData.filename,
121 |         text: testData.text,
122 |         synthesizer
123 |       }
124 |     })
125 | 
126 |     afterEach(done => {
127 |       fs.unlink(testData.filename, () => {
128 |         // ignore any errors
129 |         done()
130 |       })
131 |     })
132 | 
133 |     it('should call the synthesizer function', done => {
134 |       synthesizer.and.callFake(() => done())
135 |       provider.generate(info, 0, () => {})
136 |       expect(synthesizer).toHaveBeenCalled()
137 |     })
138 | 
139 |     describe('when everything works', () => {
140 |       beforeEach(() => {
141 |         synthesizer.and.callFake((req, opts, cb) => {
142 |           cb(null, { audioContent: testData.response })
143 |         })
144 |       })
145 | 
146 |       it('should update the task title', done => {
147 |         provider.generate(info, testData.index, () => {
148 |           expect(task.title).toMatch(`\\(${testData.index}/`)
149 |           done()
150 |         })
151 |       })
152 | 
153 |       it('should work with the MP3 format', done => {
154 |         testData.opts.format = 'mp3'
155 |         provider.generate(info, 0, () => {
156 |           const opts = synthesizer.calls.mostRecent().args[0]
157 |           expect(opts.audioConfig.audioEncoding).toBe('MP3')
158 |           done()
159 |         })
160 |       })
161 | 
162 |       it('should work with the OGG format', done => {
163 |         testData.opts.format = 'ogg'
164 |         provider.generate(info, 0, () => {
165 |           const opts = synthesizer.calls.mostRecent().args[0]
166 |           expect(opts.audioConfig.audioEncoding).toBe('OGG_OPUS')
167 |           done()
168 |         })
169 |       })
170 | 
171 |       it('should work with the PCM format', done => {
172 |         testData.opts.format = 'pcm'
173 |         provider.generate(info, 0, () => {
174 |           const opts = synthesizer.calls.mostRecent().args[0]
175 |           expect(opts.audioConfig.audioEncoding).toBe('LINEAR16')
176 |           done()
177 |         })
178 |       })
179 | 
180 |       it('should not use sample rate if not specified', done => {
181 |         delete info.opts.sampleRate
182 |         provider.generate(info, 0, () => {
183 |           const opts = synthesizer.calls.mostRecent().args[0]
184 |           expect(opts.audioConfig.sampleRateHertz).toBeUndefined()
185 |           done()
186 |         })
187 |       })
188 | 
189 |       it('should use the sample rate, when specified', done => {
190 |         provider.generate(info, 0, () => {
191 |           const opts = synthesizer.calls.mostRecent().args[0]
192 |           expect(opts.audioConfig.sampleRateHertz).toBe(testData.opts.sampleRate)
193 |           done()
194 |         })
195 |       })
196 | 
197 |       it('should use the given (plain) text', done => {
198 |         testData.opts.type = 'text'
199 |         provider.generate(info, 0, () => {
200 |           const opts = synthesizer.calls.mostRecent().args[0]
201 |           expect(opts.input.text).toBe(testData.text)
202 |           expect(opts.input.ssml).toBeUndefined()
203 |           done()
204 |         })
205 |       })
206 | 
207 |       it('should use the given (SSML) text', done => {
208 |         testData.opts.type = 'ssml'
209 |         provider.generate(info, 0, () => {
210 |           const opts = synthesizer.calls.mostRecent().args[0]
211 |           expect(opts.input.ssml).toBe(testData.text)
212 |           expect(opts.input.text).toBeUndefined()
213 |           done()
214 |         })
215 |       })
216 | 
217 |       it('should not use effects if not specified', done => {
218 |         delete info.opts.effect
219 |         provider.generate(info, 0, () => {
220 |           const opts = synthesizer.calls.mostRecent().args[0]
221 |           expect(opts.audioConfig.effectsProfileId).toBeUndefined()
222 |           done()
223 |         })
224 |       })
225 | 
226 |       it('should use the effects, when specified', done => {
227 |         provider.generate(info, 0, () => {
228 |           const opts = synthesizer.calls.mostRecent().args[0]
229 |           expect(opts.audioConfig.effectsProfileId).toEqual(testData.opts.effect)
230 |           done()
231 |         })
232 |       })
233 | 
234 |       it('should use the given volume gain', done => {
235 |         provider.generate(info, 0, () => {
236 |           const opts = synthesizer.calls.mostRecent().args[0]
237 |           expect(opts.audioConfig.volumeGainDb).toBe(testData.opts.gain)
238 |           done()
239 |         })
240 |       })
241 | 
242 |       it('should use the given gender', done => {
243 |         provider.generate(info, 0, () => {
244 |           const opts = synthesizer.calls.mostRecent().args[0]
245 |           expect(opts.voice.ssmlGender).toBe(testData.opts.gender.toUpperCase())
246 |           done()
247 |         })
248 |       })
249 | 
250 |       it('should leave out gender if not specified', done => {
251 |         delete testData.opts.gender
252 |         provider.generate(info, 0, () => {
253 |           const opts = synthesizer.calls.mostRecent().args[0]
254 |           expect(opts.voice.ssmlGender).toBeUndefined()
255 |           done()
256 |         })
257 |       })
258 | 
259 |       it('should use the given language', done => {
260 |         provider.generate(info, 0, () => {
261 |           const opts = synthesizer.calls.mostRecent().args[0]
262 |           expect(opts.voice.languageCode).toBe(testData.opts.language)
263 |           done()
264 |         })
265 |       })
266 | 
267 |       it('should use the given pitch', done => {
268 |         provider.generate(info, 0, () => {
269 |           const opts = synthesizer.calls.mostRecent().args[0]
270 |           expect(opts.audioConfig.pitch).toBe(testData.opts.pitch)
271 |           done()
272 |         })
273 |       })
274 | 
275 |       it('should use the given speed', done => {
276 |         provider.generate(info, 0, () => {
277 |           const opts = synthesizer.calls.mostRecent().args[0]
278 |           expect(opts.audioConfig.speakingRate).toBe(testData.opts.speed)
279 |           done()
280 |         })
281 |       })
282 | 
283 |       it('should use the given voice', done => {
284 |         provider.generate(info, 0, () => {
285 |           const opts = synthesizer.calls.mostRecent().args[0]
286 |           expect(opts.voice.name).toBe(testData.opts.voice)
287 |           done()
288 |         })
289 |       })
290 | 
291 |       it('should write the GCP response to the temp file', done => {
292 |         provider.generate(info, 0, () => {
293 |           expect(fsStub.writeFile).toHaveBeenCalledWith(
294 |             testData.filename,
295 |             testData.response,
296 |             'binary',
297 |             jasmine.any(Function)
298 |           )
299 |           done()
300 |         })
301 |       })
302 |     })
303 | 
304 |     describe('when GCP returns an error', () => {
305 |       beforeEach(() => {
306 |         synthesizer.and.callFake((req, opts, cb) => {
307 |           cb(new Error('testing GCP error'))
308 |         })
309 |       })
310 | 
311 |       it('should call back with the error', done => {
312 |         provider.generate(info, 0, (err) => {
313 |           expect(err.message).toBe('testing GCP error')
314 |           done()
315 |         })
316 |       })
317 |     })
318 | 
319 |     describe('when file writing fails', () => {
320 |       beforeEach(() => {
321 |         synthesizer.and.callFake((req, opts, cb) => {
322 |           cb(null, { audioContent: testData.response })
323 |         })
324 |         fsStub.writeFile.and.callFake((dest, data, opts, cb) => {
325 |           cb(new Error('testing write error'))
326 |         })
327 |       })
328 | 
329 |       it('should call back with the error', done => {
330 |         provider.generate(info, 0, (err) => {
331 |           expect(err.message).toBe('testing write error')
332 |           done()
333 |         })
334 |       })
335 |     })
336 |   })
337 | })
338 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/packages/tts-cli/test/text-chunk.spec.js:
--------------------------------------------------------------------------------
  1 | const { chunkText, splitIntoSentences } = require('../lib/text-chunk')
  2 | 
  3 | describe('chunkText()', () => {
  4 |   it('should split into sentences', () => {
  5 |     const text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam dictum dictum ligula at accumsan. Proin vel interdum ligula, dictum suscipit odio. Pellentesque vel enim aliquet, convallis sem a, aliquam nibh. Sed pretium a nulla non finibus.'
  6 |     const parts = chunkText(text, 65)
  7 |     expect(parts).toEqual([
  8 |       'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
  9 |       'Nullam dictum dictum ligula at accumsan.',
 10 |       'Proin vel interdum ligula, dictum suscipit odio.',
 11 |       'Pellentesque vel enim aliquet, convallis sem a, aliquam nibh.',
 12 |       'Sed pretium a nulla non finibus.'
 13 |     ])
 14 |   })
 15 | 
 16 |   it('should combine short sentences into a single chunk', () => {
 17 |     const text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam dictum dictum ligula at accumsan. Proin vel interdum ligula, dictum suscipit odio. Pellentesque vel enim aliquet, convallis sem a, aliquam nibh. Sed pretium a nulla non finibus. Vestibulum ex diam, feugiat sit amet pellentesque id, vestibulum quis turpis.'
 18 |     const parts = chunkText(text, 110)
 19 |     expect(parts).toEqual([
 20 |       'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam dictum dictum ligula at accumsan.',
 21 |       'Proin vel interdum ligula, dictum suscipit odio. Pellentesque vel enim aliquet, convallis sem a, aliquam nibh.',
 22 |       'Sed pretium a nulla non finibus. Vestibulum ex diam, feugiat sit amet pellentesque id, vestibulum quis turpis.'
 23 |     ])
 24 |   })
 25 | 
 26 |   it('should split long sentences into chunks', () => {
 27 |     const text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit nullam dictum dictum ligula at accumsan proin vel interdum ligula, dictum suscipit odio pellentesque vel enim aliquet, convallis sem a, aliquam nibh.'
 28 |     const parts = chunkText(text, 50)
 29 |     expect(parts).toEqual([
 30 |       'Lorem ipsum dolor sit amet, consectetur adipiscing',
 31 |       'elit nullam dictum dictum ligula at accumsan proin',
 32 |       'vel interdum ligula, dictum suscipit odio',
 33 |       'pellentesque vel enim aliquet, convallis sem a,',
 34 |       'aliquam nibh.'
 35 |     ])
 36 |   })
 37 | 
 38 |   it('should split words for abnormally small maximums', () => {
 39 |     const text = 'hello my world'
 40 |     const parts = chunkText(text, 2)
 41 |     expect(parts).toEqual(['he', 'll', 'o', 'my', 'wo', 'rl', 'd'])
 42 |   })
 43 | 
 44 |   it('should split abnormally long words', () => {
 45 |     const text = 'My favorite word is supercalifragilisticexpialidocious. It\'s quite atrocious.'
 46 |     const parts = chunkText(text, 10)
 47 |     expect(parts).toEqual([
 48 |       'My', 'favorite', 'word is su', 'percalifra', 'gilisticex', 'pialidocio', 'us.',
 49 |       'It\'s quite', 'atrocious.'
 50 |     ])
 51 |   })
 52 | 
 53 |   it('should split the Gettysburg Address into parts', () => {
 54 |     const text = `Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal.
 55 | 
 56 |     Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure. We are met on a great battle-field of that war. We have come to dedicate a portion of that field, as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.
 57 | 
 58 |     But, in a larger sense, we can not dedicate -- we can not consecrate -- we can not hallow -- this ground. The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember what we say here, but it can never forget what they did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us -- that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion -- that we here highly resolve that these dead shall not have died in vain -- that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the people, for the people, shall not perish from the earth.`
 59 | 
 60 |     const parts = chunkText(text, 200)
 61 |     expect(parts).toEqual([
 62 |       'Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal.',
 63 |       'Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure. We are met on a great battle-field of that war.',
 64 |       'We have come to dedicate a portion of that field, as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.',
 65 |       'But, in a larger sense, we can not dedicate -- we can not consecrate -- we can not hallow -- this ground.',
 66 |       'The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or detract.',
 67 |       'The world will little note, nor long remember what we say here, but it can never forget what they did here.',
 68 |       'It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced.',
 69 |       'It is rather for us to be here dedicated to the great task remaining before us -- that from these honored dead we take increased devotion to that cause for which they gave the last full measure of',
 70 |       'devotion -- that we here highly resolve that these dead shall not have died in vain -- that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the',
 71 |       'people, for the people, shall not perish from the earth.'
 72 |     ])
 73 |   })
 74 | 
 75 |   it('should work for long texts', () => {
 76 |     const text = new Array(200).fill(0).map(_ => 'This is a sentence.').join(' ')
 77 |     expect(() => {
 78 |       chunkText(text, 20)
 79 |     }).not.toThrow()
 80 |   })
 81 | })
 82 | 
 83 | describe('splitIntoSentences()', () => {
 84 |   const example = `
 85 |   Apache License
 86 |   Version 2.0, January 2004
 87 | http://www.apache.org/licenses/
 88 | 
 89 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 90 | 
 91 | 1. Definitions.
 92 | 
 93 | "License" shall mean the terms and conditions for use, reproduction,
 94 | and distribution as defined by Sections 1 through 9 of this document.
 95 | 
 96 | "Licensor" shall mean the copyright owner or entity authorized by
 97 | the copyright owner that is granting the License.
 98 | 
 99 | "Legal Entity" shall mean the union of the acting entity and all
100 | other entities that control, are controlled by, or are under common
101 | control with that entity. For the purposes of this definition,
102 | "control" means (i) the power, direct or indirect, to cause the
103 | direction or management of such entity, whether by contract or
104 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
105 | outstanding shares, or (iii) beneficial ownership of such entity.
106 | `
107 | 
108 |   it('should split the text into sentences', () => {
109 |     const parts = splitIntoSentences(example)
110 |     expect(parts).toEqual([
111 |       `Apache License
112 |   Version 2.0, January 2004
113 | http://www.apache.org/licenses/
114 | 
115 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
116 | 
117 | 1. Definitions.`,
118 |       `"License" shall mean the terms and conditions for use, reproduction,
119 | and distribution as defined by Sections 1 through 9 of this document.`,
120 |       `"Licensor" shall mean the copyright owner or entity authorized by
121 | the copyright owner that is granting the License.`,
122 |       `"Legal Entity" shall mean the union of the acting entity and all
123 | other entities that control, are controlled by, or are under common
124 | control with that entity.`,
125 |       `For the purposes of this definition,
126 | "control" means (i) the power, direct or indirect, to cause the
127 | direction or management of such entity, whether by contract or
128 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
129 | outstanding shares, or (iii) beneficial ownership of such entity.`
130 |     ])
131 |   })
132 | 
133 |   it('should split the text into sentences', () => {
134 |     const text = 'Lorem ipsum, dolor sed amat frequentor minimus. Second sentence.'
135 |     const parts = splitIntoSentences(text)
136 |     expect(parts).toEqual([
137 |       'Lorem ipsum, dolor sed amat frequentor minimus.',
138 |       'Second sentence.'
139 |     ])
140 |   })
141 | 
142 |   it('should work with difficult sentences (A)', () => {
143 |     const text = 'On Jan. 20, former Sen. Barack Obama became the 44th President of the U.S. Millions attended the Inauguration.'
144 |     const parts = splitIntoSentences(text)
145 |     expect(parts).toEqual([
146 |       'On Jan. 20, former Sen. Barack Obama became the 44th President of the U.S.',
147 |       'Millions attended the Inauguration.'
148 |     ])
149 |   })
150 | 
151 |   it('should work with difficult sentences (B)', () => {
152 |     const text = 'Sen. Barack Obama became the 44th President of the US. Millions attended.'
153 |     const parts = splitIntoSentences(text)
154 |     expect(parts).toEqual([
155 |       'Sen. Barack Obama became the 44th President of the US.',
156 |       'Millions attended.'
157 |     ])
158 |   })
159 | 
160 |   it('should work with difficult sentences (C)', () => {
161 |     const text = 'Barack Obama, previously Sen. of lorem ipsum, became the 44th President of the U.S. Millions attended.'
162 |     const parts = splitIntoSentences(text)
163 |     expect(parts).toEqual([
164 |       'Barack Obama, previously Sen. of lorem ipsum, became the 44th President of the U.S.',
165 |       'Millions attended.'
166 |     ])
167 |   })
168 | 
169 |   /**
170 |    * TODO: Sentence splitting needs better support of acronyms
171 |   it('should work with difficult sentences (D)', () => {
172 |     const text = 'Baril, a Richmond lawyer once nominated for a federal prosecutors job, endorsed a faith-based drug initiative in local jails patterned after the Henrico County jails therapeutic program called Project R.I.S.E. Just as important, he had a great foil across the net.'
173 |     const parts = splitIntoSentences(text)
174 |     expect(parts).toEqual([
175 |       'Baril, a Richmond lawyer once nominated for a federal prosecutors job, endorsed a faith-based drug initiative in local jails patterned after the Henrico County jails therapeutic program called Project R.I.S.E.',
176 |       'Just as important, he had a great foil across the net.'
177 |     ])
178 |   })
179 | 
180 |   it('should work with difficult sentences (E)', () => {
181 |     const text = 'Newsletter AIDs CARE, EDUCATION AND TRAINING Issue No. 7. Acet Home Care, which moves into the building in July, will share the offices with two other AIDS charities, P.A.L.S. (Portsmouth AIDS Link Support) and the Link Project.'
182 |     const parts = splitIntoSentences(text)
183 |     expect(parts).toEqual([
184 |       'Newsletter AIDs CARE, EDUCATION AND TRAINING Issue No. 7.',
185 |       'Acet Home Care, which moves into the building in July, will share the offices with two other AIDS charities, P.A.L.S. (Portsmouth AIDS Link Support) and the Link Project.'
186 |     ])
187 |   })
188 | 
189 |   it('should work with difficult sentences (F)', () => {
190 |     const text = 'Another is expanded hours of operation -- from fewer than five hours a day to 9:30 a.m. to 4 p.m. Monday through Saturday. Sunday remains closed.'
191 |     const parts = splitIntoSentences(text)
192 |     expect(parts).toEqual([
193 |       'Another is expanded hours of operation -- from fewer than five hours a day to 9:30 a.m. to 4 p.m. Monday through Saturday.',
194 |       'Sunday remains closed.'
195 |     ])
196 |   })
197 | 
198 |   it('should work with difficult sentences (G)', () => {
199 |     const text = 'Gold Wing Road Rider\'s Association - Coffee break, Guzzardo\'s Italian Villa, eat, 6 p.m.; ride, 7 p.m. Then at 9 p.m. go home.'
200 |     const parts = splitIntoSentences(text)
201 |     expect(parts).toEqual([
202 |       'Gold Wing Road Rider\'s Association - Coffee break, Guzzardo\'s Italian Villa, eat, 6 p.m.; ride, 7 p.m.',
203 |       'Then at 9 p.m. go home.'
204 |     ])
205 |   })
206 | 
207 |   it('should work with difficult sentences (H)', () => {
208 |     const text = 'It happened around 5:30 p.m. in the 500 block of W. 82nd St. Investigators say Terrence Taylor, 22, and Deontrell Sloan, 17, got into an argument over money during the game.'
209 |     const parts = splitIntoSentences(text)
210 |     expect(parts).toEqual([
211 |       'It happened around 5:30 p.m. in the 500 block of W. 82nd St. Investigators say Terrence Taylor, 22, and Deontrell Sloan, 17, got into an argument over money during the game.'
212 |     ])
213 |   })
214 |   */
215 | 
216 |   it('should work with difficult sentences (I)', () => {
217 |     const text = 'GARY Mayor Scott L. King has declared a \'cash crisis\' and has asked city department heads to put off all non-essential spending until June.'
218 |     const parts = splitIntoSentences(text)
219 |     expect(parts).toEqual([
220 |       'GARY Mayor Scott L. King has declared a \'cash crisis\' and has asked city department heads to put off all non-essential spending until June.'
221 |     ])
222 |   })
223 | 
224 |   it('should work with difficult sentences (J)', () => {
225 |     const text = 'HOWELL, Mich. - Blissfield was only nine outs away from ending the longest winning streak'
226 |     const parts = splitIntoSentences(text)
227 |     expect(parts).toEqual([
228 |       'HOWELL, Mich. - Blissfield was only nine outs away from ending the longest winning streak'
229 |     ])
230 |   })
231 | 
232 |   it('should work with difficult sentences (K)', () => {
233 |     const text = '33 FORT LAUDERDALE U.S. President George W Bush touted free trade as a means of strengthening democracy'
234 |     const parts = splitIntoSentences(text)
235 |     expect(parts).toEqual([
236 |       '33 FORT LAUDERDALE U.S. President George W Bush touted free trade as a means of strengthening democracy'
237 |     ])
238 |   })
239 | 
240 |   it('should work with difficult sentences (L)', () => {
241 |     const text = 'Mike Tyler rides his bike on Del. 1 near Lewes early last month'
242 |     const parts = splitIntoSentences(text)
243 |     expect(parts).toEqual([
244 |       'Mike Tyler rides his bike on Del. 1 near Lewes early last month'
245 |     ])
246 |   })
247 | 
248 |   /**
249 |    * TODO
250 |   it('should not skip a dot in the middle of a word if followed by a capital letter', () => {
251 |     const text = 'Hello Barney.The bird in the word.'
252 |     const parts = splitIntoSentences(text)
253 |     expect(parts).toEqual([
254 |       'Hello Barney.',
255 |       'The bird in the word.'
256 |     ])
257 |   })
258 |   */
259 | 
260 |   it('should skip punctuation inside of brackets', () => {
261 |     const text = 'Lorem ipsum, dolor sed amat frequentor minimus with a sentence [example?] that should not (Though sometimes...) be two or more (but one!) sentences.'
262 |     const parts = splitIntoSentences(text)
263 |     expect(parts).toEqual([
264 |       'Lorem ipsum, dolor sed amat frequentor minimus with a sentence [example?] that should not (Though sometimes...) be two or more (but one!) sentences.'
265 |     ])
266 |   })
267 | 
268 |   it('should skip numbers', () => {
269 |     const text = '10 times 10 = 10.00^2. 13.000 14.50 and 14,000,000.50'
270 |     const parts = splitIntoSentences(text)
271 |     expect(parts).toEqual([
272 |       '10 times 10 = 10.00^2.',
273 |       '13.000 14.50 and 14,000,000.50'
274 |     ])
275 |   })
276 | 
277 |   it('should skip URLs and emails', () => {
278 |     const text = 'Search on http://google.com. Then send me an email: fabien@example.com or fabien@example.org'
279 |     const parts = splitIntoSentences(text)
280 |     expect(parts).toEqual([
281 |       'Search on http://google.com.',
282 |       'Then send me an email: fabien@example.com or fabien@example.org'
283 |     ])
284 |   })
285 | 
286 |   it('should skip phone numbers', () => {
287 |     const text = 'Call +44.3847838 for whatever.'
288 |     const parts = splitIntoSentences(text)
289 |     expect(parts).toEqual([
290 |       'Call +44.3847838 for whatever.'
291 |     ])
292 |   })
293 | 
294 |   it('should skip money with currency indication', () => {
295 |     const text = 'I paid €12.50 for that CD. Twelve dollars and fifty cent ($12.50). Ten pounds - £10.00 it is fine.'
296 |     const parts = splitIntoSentences(text)
297 |     expect(parts).toEqual([
298 |       'I paid €12.50 for that CD.',
299 |       'Twelve dollars and fifty cent ($12.50).',
300 |       'Ten pounds - £10.00 it is fine.'
301 |     ])
302 |   })
303 | 
304 |   it('should not end sentences at newlines/paragraphs', () => {
305 |     const text = 'The humble bundle sale\r\nDate: Monday-Fri starting 2015-01-01'
306 |     const parts = splitIntoSentences(text)
307 |     expect(parts).toEqual([
308 |       'The humble bundle sale\r\nDate: Monday-Fri starting 2015-01-01'
309 |     ])
310 |   })
311 | 
312 |   it('should work with question marks and exclamation marks', () => {
313 |     const text = 'Hello this is my first sentence? There is also a second! A third'
314 |     const parts = splitIntoSentences(text)
315 |     expect(parts).toEqual([
316 |       'Hello this is my first sentence?',
317 |       'There is also a second!',
318 |       'A third'
319 |     ])
320 |   })
321 | 
322 |   it('should skip keywords/code with a dot in it', () => {
323 |     const text = 'HELLO A.TOP IS NICE'
324 |     const parts = splitIntoSentences(text)
325 |     expect(parts).toEqual([
326 |       'HELLO A.TOP IS NICE'
327 |     ])
328 |   })
329 | 
330 |   it('should preserve newlines in sentences with lists', () => {
331 |     const text = 'First sentence... Another list: \n - green \n - blue \n - red'
332 |     const parts = splitIntoSentences(text)
333 |     expect(parts).toEqual([
334 |       'First sentence...',
335 |       'Another list: \n - green \n - blue \n - red'
336 |     ])
337 |   })
338 | 
339 |   it('should ignore multilines', () => {
340 |     const text = `How now brown cow.
341 | 
342 |     Peter Piper Picked a peck of pickled peppers. A peck of pickled peppers peter piper picked.`
343 |     const parts = splitIntoSentences(text)
344 |     expect(parts).toEqual([
345 |       'How now brown cow.',
346 |       'Peter Piper Picked a peck of pickled peppers.',
347 |       'A peck of pickled peppers peter piper picked.'
348 |     ])
349 |   })
350 | 
351 |   it('should ignore newlines in sentences without lists', () => {
352 |     const text = 'First sentence... Another sentence.\nThis is a new paragraph.'
353 |     const parts = splitIntoSentences(text)
354 |     expect(parts).toEqual([
355 |       'First sentence...',
356 |       'Another sentence.',
357 |       'This is a new paragraph.'
358 |     ])
359 |   })
360 | 
361 |   it('should not get a sentence from an empty string', () => {
362 |     const text = ''
363 |     const parts = splitIntoSentences(text)
364 |     expect(parts).toEqual([])
365 |   })
366 | 
367 |   it('should not get a sentence from a string of whitespace', () => {
368 |     const text = '            \n\n                 '
369 |     const parts = splitIntoSentences(text)
370 |     expect(parts).toEqual([])
371 |   })
372 | 
373 |   it('should not get a sentence from undefined', () => {
374 |     const parts = splitIntoSentences()
375 |     expect(parts).toEqual([])
376 |   })
377 | 
378 |   it('should not get a sentence from an array', () => {
379 |     const parts = splitIntoSentences([])
380 |     expect(parts).toEqual([])
381 |   })
382 | 
383 |   it('should not get a sentence from an object', () => {
384 |     const parts = splitIntoSentences({})
385 |     expect(parts).toEqual([])
386 |   })
387 | 
388 |   /**
389 |    * TODO: Sentence splitting needs better support of acronyms
390 |   it('should skip dotted abbreviations (A)', () => {
391 |     const text = 'Lorem ipsum, dolor sed amat frequentor minimus In I.C.T we have multiple challenges! There should only be two sentences.'
392 |     const parts = splitIntoSentences(text)
393 |     expect(parts).toEqual([
394 |       'Lorem ipsum, dolor sed amat frequentor minimus In I.C.T we have multiple challenges!',
395 |       'There should only be two sentences.'
396 |     ])
397 |   })
398 | 
399 |   it('should skip dotted abbreviations (B)', () => {
400 |     const text = 'From amat frequentor minimus hello there at 8 a.m. there p.m. should only be two sentences.'
401 |     const parts = splitIntoSentences(text)
402 |     expect(parts).toEqual([
403 |       'From amat frequentor minimus hello there at 8 a.m. there p.m. should only be two sentences.'
404 |     ])
405 |   })
406 | 
407 |   it('should skip dotted abbreviations (C)', () => {
408 |     const text = 'The school, called Booker T and Stevie Ray\'s Wrestling and Mixed Mart Arts Academy, will have an open house 2-6 p.m. Saturday.'
409 |     const parts = splitIntoSentences(text)
410 |     expect(parts).toEqual([
411 |       'The school, called Booker T and Stevie Ray\'s Wrestling and Mixed Mart Arts Academy, will have an open house 2-6 p.m. Saturday.'
412 |     ])
413 |   })
414 | 
415 |   it('should skip common abbreviations', () => {
416 |     const text = 'Fig. 2. displays currency rates i.e. something libsum. Currencies widely available (i.e. euro, dollar, pound), or alternatively (e.g. €, $, etc.)'
417 |     const parts = splitIntoSentences(text)
418 |     expect(parts).toEqual([
419 |       'Fig. 2. displays currency rates i.e. something libsum.',
420 |       'Currencies widely available (i.e. euro, dollar, pound), or alternatively (e.g. €, $, etc.)'
421 |     ])
422 |   })
423 |   */
424 | 
425 |   it('should skip two-word abbreviations (A)', () => {
426 |     const text = 'Claims 1–6 and 15–26 are rejected under pre-AIA 35 USC § 103(a) as being unpatentable over Chalana et al. (US 2012/0179503) in view of Oh (US 2013/0013993).'
427 |     const parts = splitIntoSentences(text)
428 |     expect(parts).toEqual([
429 |       'Claims 1–6 and 15–26 are rejected under pre-AIA 35 USC § 103(a) as being unpatentable over Chalana et al. (US 2012/0179503) in view of Oh (US 2013/0013993).'
430 |     ])
431 |   })
432 | 
433 |   it('should skip two-word abbreviations (B)', () => {
434 |     const text = 'Et al. is an abbreviation of the Latin loanphrase et alii, meaning and others. It is similar to etc. (short for et cetera, meaning and the rest), but whereas etc. applies to things, et al. applies to people.'
435 |     const parts = splitIntoSentences(text)
436 |     expect(parts).toEqual([
437 |       'Et al. is an abbreviation of the Latin loanphrase et alii, meaning and others.',
438 |       'It is similar to etc. (short for et cetera, meaning and the rest), but whereas etc. applies to things, et al. applies to people.'
439 |     ])
440 |   })
441 | 
442 |   it('should include ellipsis as ending if a capital letter follows', () => {
443 |     const text = 'First sentence... Another sentence'
444 |     const parts = splitIntoSentences(text)
445 |     expect(parts).toEqual([
446 |       'First sentence...',
447 |       'Another sentence'
448 |     ])
449 |   })
450 | })
451 | 


--------------------------------------------------------------------------------