`_)
11 | It's conditionally found in the testing section only so it's only a test_depend
12 | * Contributors: Tully Foote, Yuan "Forrest" Yu, y²
13 |
14 | 1.0.0 (2019-03-20)
15 | ------------------
16 |
--------------------------------------------------------------------------------
/tts/scripts/polly_node.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License").
6 | # You may not use this file except in compliance with the License.
7 | # A copy of the License is located at
8 | #
9 | # http://aws.amazon.com/apache2.0
10 | #
11 | # or in the "license" file accompanying this file. This file is distributed
12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 | # express or implied. See the License for the specific language governing
14 | # permissions and limitations under the License.
15 |
16 |
17 | if __name__ == '__main__':
18 | import tts.amazonpolly
19 | tts.amazonpolly.main()
20 |
--------------------------------------------------------------------------------
/tts/scripts/synthesizer_node.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License").
6 | # You may not use this file except in compliance with the License.
7 | # A copy of the License is located at
8 | #
9 | # http://aws.amazon.com/apache2.0
10 | #
11 | # or in the "license" file accompanying this file. This file is distributed
12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 | # express or implied. See the License for the specific language governing
14 | # permissions and limitations under the License.
15 |
16 |
17 | if __name__ == "__main__":
18 | import tts.synthesizer
19 | tts.synthesizer.main()
20 |
--------------------------------------------------------------------------------
/.github/workflows/automerge.yml:
--------------------------------------------------------------------------------
1 | name: Auto merge
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | master
7 | pull_request_review:
8 | types:
9 | - submitted
10 | check_suite:
11 | types:
12 | - completed
13 | status: {}
14 | jobs:
15 | # Automatically merge approved and green dependabot PRs.
16 | auto-merge-dependabot:
17 | runs-on: ubuntu-latest
18 | steps:
19 | - uses: pascalgn/automerge-action@v0.13.1
20 | if: github.actor == 'dependabot[bot]' || github.actor == 'dependabot-preview[bot]'
21 | env:
22 | GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
23 | MERGE_LABELS: "dependencies"
24 | MERGE_METHOD: "squash" # Sqush and merge
25 | MERGE_COMMIT_MESSAGE: "pull-request-title-and-description"
26 | MERGE_RETRY_SLEEP: "1200000" # Retry after 20m, enough time for check suites to run
27 | UPDATE_RETRIES: "6"
28 | UPDATE_METHOD: "rebase" # Rebase PR on base branch
29 | UPDATE_RETRY_SLEEP: "300000"
--------------------------------------------------------------------------------
/tts/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License").
4 | # You may not use this file except in compliance with the License.
5 | # A copy of the License is located at
6 | #
7 | # http://aws.amazon.com/apache2.0
8 | #
9 | # or in the "license" file accompanying this file. This file is distributed
10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11 | # express or implied. See the License for the specific language governing
12 | # permissions and limitations under the License.
13 |
14 | import os
15 | from distutils.core import setup
16 | from catkin_pkg.python_setup import generate_distutils_setup
17 |
18 |
19 | # ROS PACKAGING
20 | # using distutils : https://docs.python.org/2/distutils
21 | # fetch values from package.xml
22 | setup_args = generate_distutils_setup(
23 | packages=[
24 | 'tts',
25 | ],
26 | package_dir={
27 | '': 'src',
28 | },
29 | package_data={
30 | '': ['data/*.ogg', 'data/models/polly/2016-06-10/*.json']
31 | },
32 | )
33 | setup(**setup_args)
34 |
--------------------------------------------------------------------------------
/tts/launch/tts_polly.launch:
--------------------------------------------------------------------------------
1 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/tts/scripts/voicer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License").
6 | # You may not use this file except in compliance with the License.
7 | # A copy of the License is located at
8 | #
9 | # http://aws.amazon.com/apache2.0
10 | #
11 | # or in the "license" file accompanying this file. This file is distributed
12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 | # express or implied. See the License for the specific language governing
14 | # permissions and limitations under the License.
15 |
16 | """Usage:
17 |
18 | (assuming TTS action server has been started via `roslaunch tts tts_polly.launch`)
19 |
20 | Plain text::
21 |
22 | $ rosrun tts voicer.py 'Hello World'
23 |
24 | SSML::
25 |
26 | $ rosrun tts voicer.py \
27 | 'Mary has a little lamb.' \
28 | '{"text_type":"ssml"}'
29 | """
30 |
31 |
32 | import sys
33 | import actionlib
34 | import rospy
35 | from tts.msg import SpeechAction, SpeechGoal
36 |
37 |
38 | if __name__ == '__main__':
39 | rospy.init_node('tts_action_client')
40 | client = actionlib.SimpleActionClient('tts', SpeechAction)
41 | client.wait_for_server()
42 |
43 | goal = SpeechGoal()
44 |
45 | goal.text = sys.argv[1] if len(sys.argv) > 1 else 'I got no idea.'
46 | goal.metadata = sys.argv[2] if len(sys.argv) > 2 else ''
47 |
48 | client.send_goal(goal)
49 | client.wait_for_result()
50 | print('\n' + client.get_result().response)
51 |
--------------------------------------------------------------------------------
/tts/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | tts
4 | 1.0.2
5 | Package enabling a robot to speak with a human voice by providing a Text-To-Speech ROS service
6 | http://wiki.ros.org/tts
7 |
8 | AWS RoboMaker
9 | AWS RoboMaker
10 |
11 | Apache 2.0
12 |
13 | catkin
14 |
15 | actionlib
16 | actionlib_msgs
17 | message_generation
18 | rospy
19 | std_msgs
20 | python-boto3
21 | sound_play
22 | rosunit
23 | rostest
24 |
25 | actionlib
26 | actionlib_msgs
27 | rospy
28 | std_msgs
29 | sound_play
30 |
31 | actionlib
32 | actionlib_msgs
33 | rospy
34 | std_msgs
35 | message_runtime
36 | python-boto3
37 | sound_play
38 |
39 | rosunit
40 | rostest
41 | python-mock
42 |
43 |
--------------------------------------------------------------------------------
/tts/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8.3)
2 | project(tts)
3 |
4 | find_package(catkin REQUIRED COMPONENTS actionlib_msgs message_generation rospy rosunit std_msgs sound_play)
5 |
6 | catkin_python_setup()
7 |
8 | ################################################
9 | ## Declare ROS messages, services and actions ##
10 | ################################################
11 |
12 | ## Generate services in the 'srv' folder
13 | add_service_files(FILES Synthesizer.srv Polly.srv)
14 |
15 | ## Generate actions in the 'action' folder
16 | add_action_files(FILES Speech.action)
17 |
18 | ## Generate added messages and services with any dependencies listed here
19 | generate_messages(DEPENDENCIES actionlib_msgs std_msgs)
20 |
21 | ###################################
22 | ## catkin specific configuration ##
23 | ###################################
24 | ## The catkin_package macro generates cmake config files for your package
25 | ## Declare things to be passed to dependent projects
26 | ## LIBRARIES: libraries you create in this project that dependent projects also need
27 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need
28 | ## DEPENDS: system dependencies of this project that dependent projects also need
29 | catkin_package(
30 | LIBRARIES tts
31 | CATKIN_DEPENDS actionlib_msgs message_runtime rospy std_msgs
32 | )
33 |
34 | #############
35 | ## Install ##
36 | #############
37 |
38 | # all install targets should use catkin DESTINATION variables
39 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
40 |
41 | ## Mark executable scripts (Python etc.) for installation
42 | ## in contrast to setup.py, you can choose the destination
43 | install(PROGRAMS
44 | scripts/polly_node.py
45 | scripts/synthesizer_node.py
46 | scripts/tts_node.py
47 | scripts/voicer.py
48 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
49 | )
50 |
51 | install(DIRECTORY
52 | config
53 | launch
54 | DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
55 | )
56 |
57 | #############
58 | ## Testing ##
59 | #############
60 | if(CATKIN_ENABLE_TESTING)
61 | ## Add folders to be run by python nosetests
62 | catkin_add_nosetests(test/test_unit_synthesizer.py)
63 | catkin_add_nosetests(test/test_unit_polly.py)
64 |
65 | if(BUILD_AWS_TESTING)
66 | find_package(rostest REQUIRED COMPONENTS tts)
67 | add_rostest(test/integration_tests.test DEPENDENCIES ${tts_EXPORTED_TARGETS})
68 | endif()
69 | endif()
70 |
71 |
72 |
--------------------------------------------------------------------------------
/.github/workflows/build_and_test.yml:
--------------------------------------------------------------------------------
1 | name: Build & Test
2 | on:
3 | pull_request:
4 | push:
5 | branches:
6 | - master
7 | schedule:
8 | # Run every hour. This helps detect flakiness,
9 | # and broken external dependencies.
10 | - cron: '0 * * * *'
11 |
12 | jobs:
13 | build_and_test_master:
14 | name: Build and Test Master ROS ${{ matrix.ros_version }} ${{ matrix.ros_distro }}
15 | runs-on: ubuntu-latest
16 | strategy:
17 | fail-fast: false
18 | matrix:
19 | ros_distro: [kinetic, melodic]
20 | include:
21 | - ros_distro: kinetic
22 | ubuntu_distro: xenial
23 | - ros_distro: melodic
24 | ubuntu_distro: bionic
25 | container:
26 | image: rostooling/setup-ros-docker:ubuntu-${{ matrix.ubuntu_distro }}-ros-${{ matrix.ros_distro }}-ros-base-latest
27 | env:
28 | # Needed for the CMakeLists.txt setup
29 | ROS_DISTRO: ${{ matrix.ros_distro }}
30 | ROS_VERSION: 1
31 | steps:
32 | # Needed to access the vcs repos file from the workspace
33 | - name: Checkout source
34 | uses: actions/checkout@v2
35 | - name: Run action-ros-ci to build and test
36 | uses: ros-tooling/action-ros-ci@0.1.2
37 | with:
38 | target-ros1-distro: ${{ env.ROS_VERSION == '1' && matrix.ros_distro || '' }}
39 | target-ros2-distro: ${{ env.ROS_VERSION == '2' && matrix.ros_distro || '' }}
40 | package-name: tts
41 | vcs-repo-file-url: ''
42 | - name: Upload resulting colcon logs
43 | uses: actions/upload-artifact@v2.2.2
44 | with:
45 | name: colcon-logs-${{ matrix.ubuntu_distro }}-ros-${{ matrix.ros_distro }}
46 | path: ros_ws/log
47 | log_workflow_status_to_cloudwatch:
48 | runs-on: ubuntu-latest
49 | container:
50 | image: ubuntu:bionic
51 | needs:
52 | - build_and_test_master
53 | # Don't skip if prior steps failed, but don't run on a fork because it won't have access to AWS secrets
54 | if: ${{ always() && ! github.event.repository.fork && ! github.event.pull_request.head.repo.fork }}
55 | steps:
56 | - name: Configure AWS Credentials
57 | uses: aws-actions/configure-aws-credentials@v1
58 | with:
59 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
60 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
61 | aws-region: ${{ secrets.AWS_REGION }}
62 | - uses: ros-tooling/action-cloudwatch-metrics@0.0.5
63 | with:
64 | # Checks if any of the jobs have failed.
65 | #
66 | # needs.*.result is returns the list of all success statuses as an
67 | # array, i.e. ['success', 'failure, 'success']
68 | # join() converts the array to a string 'successfailuresuccess'
69 | # contains() checks whether the string contains failure
70 | metric-value: ${{ ! contains(join(needs.*.result, ''), 'failure') && ! contains(join(needs.*.result, ''), 'cancelled') }}
71 |
--------------------------------------------------------------------------------
/.github/workflows/build_and_test_release_latest.yml:
--------------------------------------------------------------------------------
1 | name: Build & Test release-latest
2 | on:
3 | schedule:
4 | # Run every hour. This helps detect flakiness,
5 | # and broken external dependencies.
6 | - cron: '0 * * * *'
7 |
8 | jobs:
9 | build_and_test_release_latest:
10 | name: Build and Test Release Latest ROS ${{ matrix.ros_version }} ${{ matrix.ros_distro }}
11 | runs-on: ubuntu-latest
12 | strategy:
13 | fail-fast: false
14 | matrix:
15 | ros_distro: [kinetic, melodic]
16 | include:
17 | - ros_distro: kinetic
18 | ubuntu_distro: xenial
19 | - ros_distro: melodic
20 | ubuntu_distro: bionic
21 | container:
22 | image: rostooling/setup-ros-docker:ubuntu-${{ matrix.ubuntu_distro }}-ros-${{ matrix.ros_distro }}-ros-base-latest
23 | env:
24 | # Needed for the CMakeLists.txt setup
25 | ROS_DISTRO: ${{ matrix.ros_distro }}
26 | ROS_VERSION: 1
27 | steps:
28 | # Needed to access the vcs repos file from the workspace
29 | - name: Checkout source
30 | uses: actions/checkout@v2
31 | - name: Run action-ros-ci to build and test
32 | uses: ros-tooling/action-ros-ci@0.1.2
33 | with:
34 | target-ros1-distro: ${{ env.ROS_VERSION == '1' && matrix.ros_distro || '' }}
35 | target-ros2-distro: ${{ env.ROS_VERSION == '2' && matrix.ros_distro || '' }}
36 | package-name: tts
37 | # schedule runs against the default branch (master), so specify release-latest via repos file
38 | vcs-repo-file-url: "${{ github.workspace }}/.github/workflows/release_latest.repos"
39 | - name: Upload resulting colcon logs
40 | uses: actions/upload-artifact@v2.2.2
41 | with:
42 | name: colcon-logs-${{ matrix.ubuntu_distro }}-ros-${{ matrix.ros_distro }}
43 | path: ros_ws/log
44 | log_workflow_status_to_cloudwatch:
45 | runs-on: ubuntu-latest
46 | container:
47 | image: ubuntu:bionic
48 | needs:
49 | - build_and_test_release_latest
50 | # Don't skip if prior steps failed, but don't run on a fork because it won't have access to AWS secrets
51 | if: ${{ always() && ! github.event.repository.fork && ! github.event.pull_request.head.repo.fork }}
52 | steps:
53 | - name: Configure AWS Credentials
54 | uses: aws-actions/configure-aws-credentials@v1
55 | with:
56 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
57 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
58 | aws-region: ${{ secrets.AWS_REGION }}
59 | - uses: ros-tooling/action-cloudwatch-metrics@0.0.5
60 | with:
61 | metric-dimensions: >-
62 | [
63 | { "Name": "github.event_name", "Value": "${{ github.event_name }}" },
64 | { "Name": "github.ref", "Value": "release-latest" },
65 | { "Name": "github.repository", "Value": "${{ github.repository }}" }
66 | ]
67 | # Checks if any of the jobs have failed.
68 | #
69 | # needs.*.result is returns the list of all success statuses as an
70 | # array, i.e. ['success', 'failure, 'success']
71 | # join() converts the array to a string 'successfailuresuccess'
72 | # contains() checks whether the string contains failure
73 | metric-value: ${{ ! contains(join(needs.*.result, ''), 'failure') && ! contains(join(needs.*.result, ''), 'cancelled') }}
74 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
4 | documentation, we greatly value feedback and contributions from our community.
5 |
6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
7 | information to effectively respond to your bug report or contribution.
8 |
9 |
10 | ## Reporting Bugs/Feature Requests
11 |
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 |
14 | When filing an issue, please check [existing open](https://github.com/aws-robotics/tts-ros1/issues), or [recently closed](https://github.com/aws-robotics/tts-ros1/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 |
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 |
22 |
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 |
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 |
30 | To send us a pull request, please:
31 |
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 |
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 |
42 |
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-robotics/tts-ros1/labels/help%20wanted) issues is a great place to start.
45 |
46 |
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 |
52 |
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 |
56 |
57 | ## Licensing
58 |
59 | See the [LICENSE](https://github.com/aws-robotics/tts-ros1/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 |
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 |
--------------------------------------------------------------------------------
/tts/scripts/tts_node.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License").
6 | # You may not use this file except in compliance with the License.
7 | # A copy of the License is located at
8 | #
9 | # http://aws.amazon.com/apache2.0
10 | #
11 | # or in the "license" file accompanying this file. This file is distributed
12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 | # express or implied. See the License for the specific language governing
14 | # permissions and limitations under the License.
15 |
16 | """A very simple Action Server that does TTS.
17 |
18 | It is a combination of a synthesizer and a player. Being an action server, it can be used in two different manners.
19 |
20 | 1. Play and wait for it to finish
21 | ---------------------------------
22 |
23 | A user can choose to be blocked until the audio playing is done. This is especially useful in interactive scenarios.
24 |
25 | Example::
26 |
27 | rospy.init_node('tts_action_client')
28 | client = actionlib.SimpleActionClient('tts', SpeechAction)
29 | client.wait_for_server()
30 | goal = SpeechGoal()
31 | goal.text = 'Let me ask you a question, please give me your answer.'
32 | client.send_goal(goal)
33 | client.wait_for_result()
34 |
35 | # start listening to a response or waiting for some input to continue the interaction
36 |
37 | 2. Play and forget
38 | ------------------
39 |
40 | A user can also choose not to wait::
41 |
42 | rospy.init_node('tts_action_client')
43 | client = actionlib.SimpleActionClient('tts', SpeechAction)
44 | client.wait_for_server()
45 | goal = SpeechGoal()
46 | goal.text = 'Let me talk, you can to something else in the meanwhile.'
47 | client.send_goal(goal)
48 |
49 | This is useful when the robot wants to do stuff while the audio is being played. For example, a robot may start to
50 | read some instructions and immediately get ready for any input.
51 | """
52 |
53 | import json
54 |
55 | import actionlib
56 | import rospy
57 | from tts.msg import SpeechAction, SpeechResult
58 | from tts.srv import Synthesizer
59 |
60 | from sound_play.libsoundplay import SoundClient
61 |
62 |
63 | def play(filename):
64 | """plays the wav or ogg file using sound_play"""
65 | SoundClient(blocking=True).playWave(filename)
66 |
67 |
68 | def do_synthesize(goal):
69 | """calls synthesizer service to do the job"""
70 | rospy.wait_for_service('synthesizer')
71 | synthesize = rospy.ServiceProxy('synthesizer', Synthesizer)
72 | return synthesize(goal.text, goal.metadata)
73 |
74 |
75 | def finish_with_result(s):
76 | """responds the client"""
77 | tts_server_result = SpeechResult(s)
78 | server.set_succeeded(tts_server_result)
79 | rospy.loginfo(tts_server_result)
80 |
81 |
82 | def do_speak(goal):
83 | """The action handler.
84 |
85 | Note that although it responds to client after the audio play is finished, a client can choose
86 | not to wait by not calling ``SimpleActionClient.waite_for_result()``.
87 | """
88 | rospy.loginfo('speech goal: {}'.format(goal))
89 |
90 | res = do_synthesize(goal)
91 | rospy.loginfo('synthesizer returns: {}'.format(res))
92 |
93 | try:
94 | r = json.loads(res.result)
95 | except Exception as e:
96 | s = 'Expecting JSON from synthesizer but got {}'.format(res.result)
97 | rospy.logerr('{}. Exception: {}'.format(s, e))
98 | finish_with_result(s)
99 | return
100 |
101 | result = ''
102 |
103 | if 'Audio File' in r:
104 | audio_file = r['Audio File']
105 | rospy.loginfo('Will play {}'.format(audio_file))
106 | play(audio_file)
107 | result = audio_file
108 |
109 | if 'Exception' in r:
110 | result = '[ERROR] {}'.format(r)
111 | rospy.logerr(result)
112 |
113 | finish_with_result(result)
114 |
115 |
116 | if __name__ == '__main__':
117 | rospy.init_node('tts_node')
118 | server = actionlib.SimpleActionServer('tts', SpeechAction, do_speak, False)
119 | server.start()
120 | rospy.spin()
121 |
--------------------------------------------------------------------------------
/tts/src/tts/data/models/polly/2016-06-10/examples-1.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "1.0",
3 | "examples": {
4 | "DeleteLexicon": [
5 | {
6 | "input": {
7 | "Name": "example"
8 | },
9 | "output": {
10 | },
11 | "comments": {
12 | "input": {
13 | },
14 | "output": {
15 | }
16 | },
17 | "description": "Deletes a specified pronunciation lexicon stored in an AWS Region.",
18 | "id": "to-delete-a-lexicon-1481922498332",
19 | "title": "To delete a lexicon"
20 | }
21 | ],
22 | "DescribeVoices": [
23 | {
24 | "input": {
25 | "LanguageCode": "en-GB"
26 | },
27 | "output": {
28 | "Voices": [
29 | {
30 | "Gender": "Female",
31 | "Id": "Emma",
32 | "LanguageCode": "en-GB",
33 | "LanguageName": "British English",
34 | "Name": "Emma"
35 | },
36 | {
37 | "Gender": "Male",
38 | "Id": "Brian",
39 | "LanguageCode": "en-GB",
40 | "LanguageName": "British English",
41 | "Name": "Brian"
42 | },
43 | {
44 | "Gender": "Female",
45 | "Id": "Amy",
46 | "LanguageCode": "en-GB",
47 | "LanguageName": "British English",
48 | "Name": "Amy"
49 | }
50 | ]
51 | },
52 | "comments": {
53 | "input": {
54 | },
55 | "output": {
56 | }
57 | },
58 | "description": "Returns the list of voices that are available for use when requesting speech synthesis. Displayed languages are those within the specified language code. If no language code is specified, voices for all available languages are displayed.",
59 | "id": "to-describe-available-voices-1482180557753",
60 | "title": "To describe available voices"
61 | }
62 | ],
63 | "GetLexicon": [
64 | {
65 | "input": {
66 | "Name": ""
67 | },
68 | "output": {
69 | "Lexicon": {
70 | "Content": "\r\n\r\n \r\n W3C\r\n World Wide Web Consortium\r\n \r\n",
71 | "Name": "example"
72 | },
73 | "LexiconAttributes": {
74 | "Alphabet": "ipa",
75 | "LanguageCode": "en-US",
76 | "LastModified": 1478542980.117,
77 | "LexemesCount": 1,
78 | "LexiconArn": "arn:aws:polly:us-east-1:123456789012:lexicon/example",
79 | "Size": 503
80 | }
81 | },
82 | "comments": {
83 | "input": {
84 | },
85 | "output": {
86 | }
87 | },
88 | "description": "Returns the content of the specified pronunciation lexicon stored in an AWS Region.",
89 | "id": "to-retrieve-a-lexicon-1481912870836",
90 | "title": "To retrieve a lexicon"
91 | }
92 | ],
93 | "ListLexicons": [
94 | {
95 | "input": {
96 | },
97 | "output": {
98 | "Lexicons": [
99 | {
100 | "Attributes": {
101 | "Alphabet": "ipa",
102 | "LanguageCode": "en-US",
103 | "LastModified": 1478542980.117,
104 | "LexemesCount": 1,
105 | "LexiconArn": "arn:aws:polly:us-east-1:123456789012:lexicon/example",
106 | "Size": 503
107 | },
108 | "Name": "example"
109 | }
110 | ]
111 | },
112 | "comments": {
113 | "input": {
114 | },
115 | "output": {
116 | }
117 | },
118 | "description": "Returns a list of pronunciation lexicons stored in an AWS Region.",
119 | "id": "to-list-all-lexicons-in-a-region-1481842106487",
120 | "title": "To list all lexicons in a region"
121 | }
122 | ],
123 | "PutLexicon": [
124 | {
125 | "input": {
126 | "Content": "file://example.pls",
127 | "Name": "W3C"
128 | },
129 | "output": {
130 | },
131 | "comments": {
132 | "input": {
133 | },
134 | "output": {
135 | }
136 | },
137 | "description": "Stores a pronunciation lexicon in an AWS Region.",
138 | "id": "to-save-a-lexicon-1482272584088",
139 | "title": "To save a lexicon"
140 | }
141 | ],
142 | "SynthesizeSpeech": [
143 | {
144 | "input": {
145 | "LexiconNames": [
146 | "example"
147 | ],
148 | "OutputFormat": "mp3",
149 | "SampleRate": "8000",
150 | "Text": "All Gaul is divided into three parts",
151 | "TextType": "text",
152 | "VoiceId": "Joanna"
153 | },
154 | "output": {
155 | "AudioStream": "TEXT",
156 | "ContentType": "audio/mpeg",
157 | "RequestCharacters": 37
158 | },
159 | "comments": {
160 | "input": {
161 | },
162 | "output": {
163 | }
164 | },
165 | "description": "Synthesizes plain text or SSML into a file of human-like speech.",
166 | "id": "to-synthesize-speech-1482186064046",
167 | "title": "To synthesize speech"
168 | }
169 | ]
170 | }
171 | }
172 |
--------------------------------------------------------------------------------
/tts/test/test_unit_synthesizer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License").
6 | # You may not use this file except in compliance with the License.
7 | # A copy of the License is located at
8 | #
9 | # http://aws.amazon.com/apache2.0
10 | #
11 | # or in the "license" file accompanying this file. This file is distributed
12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 | # express or implied. See the License for the specific language governing
14 | # permissions and limitations under the License.
15 |
16 | from __future__ import print_function
17 |
18 | from mock import patch, MagicMock # python2 uses backport of unittest.mock(docs.python.org/3/library/unittest.mock.html)
19 | import unittest
20 |
21 |
22 | class TestSynthesizer(unittest.TestCase):
23 |
24 | def setUp(self):
25 | """important: import tts which is a relay package::
26 |
27 | devel/lib/python2.7/dist-packages/
28 | +-- tts
29 | | +-- __init__.py
30 | +-- ...
31 |
32 | per http://docs.ros.org/api/catkin/html/user_guide/setup_dot_py.html:
33 |
34 | A relay package is a folder with an __init__.py folder and nothing else.
35 | Importing this folder in python will execute the contents of __init__.py,
36 | which will in turn import the original python modules in the folder in
37 | the sourcespace using the python exec() function.
38 | """
39 | import tts
40 | self.assertIsNotNone(tts)
41 |
42 | def test_init(self):
43 | from tts.synthesizer import SpeechSynthesizer
44 | speech_synthesizer = SpeechSynthesizer()
45 | self.assertEqual('text', speech_synthesizer.default_text_type)
46 |
47 | @patch('tts.amazonpolly.AmazonPolly')
48 | def test_good_synthesis_with_mostly_default_args_using_polly_lib(self, polly_class_mock):
49 | polly_obj_mock = MagicMock()
50 | polly_class_mock.return_value = polly_obj_mock
51 |
52 | test_text = 'hello'
53 | test_metadata = '''
54 | {
55 | "output_path": "/tmp/test"
56 | }
57 | '''
58 | expected_polly_synthesize_args = {
59 | 'output_format': 'ogg_vorbis',
60 | 'voice_id': 'Joanna',
61 | 'sample_rate': '22050',
62 | 'text_type': 'text',
63 | 'text': test_text,
64 | 'output_path': "/tmp/test"
65 | }
66 |
67 | from tts.synthesizer import SpeechSynthesizer
68 | from tts.srv import SynthesizerRequest
69 | speech_synthesizer = SpeechSynthesizer(engine='POLLY_LIBRARY')
70 | request = SynthesizerRequest(text=test_text, metadata=test_metadata)
71 | response = speech_synthesizer._node_request_handler(request)
72 |
73 | self.assertGreater(polly_class_mock.call_count, 0)
74 | polly_obj_mock.synthesize.assert_called_with(**expected_polly_synthesize_args)
75 |
76 | self.assertEqual(response.result, polly_obj_mock.synthesize.return_value.result)
77 |
78 | @patch('tts.amazonpolly.AmazonPolly')
79 | def test_synthesis_with_bad_metadata_using_polly_lib(self, polly_class_mock):
80 | polly_obj_mock = MagicMock()
81 | polly_class_mock.return_value = polly_obj_mock
82 |
83 | test_text = 'hello'
84 | test_metadata = '''I am no JSON'''
85 |
86 | from tts.synthesizer import SpeechSynthesizer
87 | from tts.srv import SynthesizerRequest
88 | speech_synthesizer = SpeechSynthesizer(engine='POLLY_LIBRARY')
89 | request = SynthesizerRequest(text=test_text, metadata=test_metadata)
90 | response = speech_synthesizer._node_request_handler(request)
91 |
92 | self.assertTrue(response.result.startswith('Exception: '))
93 |
94 | @patch('tts.amazonpolly.AmazonPolly')
95 | def test_bad_engine(self, polly_class_mock):
96 | polly_obj_mock = MagicMock()
97 | polly_class_mock.return_value = polly_obj_mock
98 |
99 | ex = None
100 |
101 | from tts.synthesizer import SpeechSynthesizer
102 | try:
103 | SpeechSynthesizer(engine='NON-EXIST ENGINE')
104 | except Exception as e:
105 | ex = e
106 |
107 | self.assertTrue(isinstance(ex, SpeechSynthesizer.BadEngineError))
108 |
109 | def test_cli_help_message(self):
110 | import os
111 | source_file_dir = os.path.dirname(os.path.abspath(__file__))
112 | synthersizer_path = os.path.join(source_file_dir, '..', 'scripts', 'synthesizer_node.py')
113 | import subprocess
114 | o = subprocess.check_output(['python', synthersizer_path, '-h'])
115 | self.assertTrue(str(o).startswith('Usage: '))
116 |
117 | @patch('tts.synthesizer.SpeechSynthesizer')
118 | def test_cli_engine_dispatching_1(self, speech_synthesizer_class_mock):
119 | import sys
120 | with patch.object(sys, 'argv', ['synthesizer_node.py']):
121 | import tts.synthesizer
122 | tts.synthesizer.main()
123 | speech_synthesizer_class_mock.assert_called_with(engine='POLLY_SERVICE', polly_service_name='polly')
124 | speech_synthesizer_class_mock.return_value.start.assert_called_with(node_name='synthesizer_node',
125 | service_name='synthesizer')
126 |
127 | @patch('tts.synthesizer.SpeechSynthesizer')
128 | def test_cli_engine_dispatching_2(self, speech_synthesizer_class_mock):
129 | import sys
130 | with patch.object(sys, 'argv', ['synthesizer_node.py', '-e', 'POLLY_LIBRARY']):
131 | from tts import synthesizer
132 | synthesizer.main()
133 | speech_synthesizer_class_mock.assert_called_with(engine='POLLY_LIBRARY')
134 | self.assertGreater(speech_synthesizer_class_mock.return_value.start.call_count, 0)
135 |
136 | @patch('tts.synthesizer.SpeechSynthesizer')
137 | def test_cli_engine_dispatching_3(self, speech_synthesizer_class_mock):
138 | import sys
139 | with patch.object(sys, 'argv', ['synthesizer_node.py', '-p', 'apolly']):
140 | from tts import synthesizer
141 | synthesizer.main()
142 | speech_synthesizer_class_mock.assert_called_with(engine='POLLY_SERVICE', polly_service_name='apolly')
143 | self.assertGreater(speech_synthesizer_class_mock.return_value.start.call_count, 0)
144 |
145 |
146 | if __name__ == '__main__':
147 | import rosunit
148 | rosunit.unitrun('tts', 'unittest-synthesizer', TestSynthesizer)
149 |
--------------------------------------------------------------------------------
/tts/test/test_unit_polly.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License").
6 | # You may not use this file except in compliance with the License.
7 | # A copy of the License is located at
8 | #
9 | # http://aws.amazon.com/apache2.0
10 | #
11 | # or in the "license" file accompanying this file. This file is distributed
12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 | # express or implied. See the License for the specific language governing
14 | # permissions and limitations under the License.
15 |
16 | from __future__ import print_function
17 |
18 |
19 | from mock import patch, MagicMock # python2 uses backport of unittest.mock(docs.python.org/3/library/unittest.mock.html)
20 | import unittest
21 |
22 |
23 | class TestPolly(unittest.TestCase):
24 |
25 | def setUp(self):
26 | """important: import tts which is a relay package::
27 |
28 | devel/lib/python2.7/dist-packages/
29 | +-- tts
30 | | +-- __init__.py
31 | +-- ...
32 |
33 | per http://docs.ros.org/api/catkin/html/user_guide/setup_dot_py.html:
34 |
35 | A relay package is a folder with an __init__.py folder and nothing else.
36 | Importing this folder in python will execute the contents of __init__.py,
37 | which will in turn import the original python modules in the folder in
38 | the sourcespace using the python exec() function.
39 | """
40 | import tts
41 | self.assertIsNotNone(tts)
42 |
43 | @patch('tts.amazonpolly.Session')
44 | def test_init(self, boto3_session_class_mock):
45 | from tts.amazonpolly import AmazonPolly
46 | AmazonPolly()
47 |
48 | self.assertGreater(boto3_session_class_mock.call_count, 0)
49 | boto3_session_class_mock.return_value.client.assert_called_with('polly')
50 |
51 | @patch('tts.amazonpolly.Session')
52 | def test_defaults(self, boto3_session_class_mock):
53 | from tts.amazonpolly import AmazonPolly
54 | polly = AmazonPolly()
55 |
56 | self.assertGreater(boto3_session_class_mock.call_count, 0)
57 | boto3_session_class_mock.return_value.client.assert_called_with('polly')
58 |
59 | self.assertEqual('text', polly.default_text_type)
60 | self.assertEqual('ogg_vorbis', polly.default_output_format)
61 | self.assertEqual('Joanna', polly.default_voice_id)
62 | self.assertEqual('.', polly.default_output_folder)
63 | self.assertEqual('output', polly.default_output_file_basename)
64 |
65 | @patch('tts.amazonpolly.Session')
66 | def test_good_synthesis_with_default_args(self, boto3_session_class_mock):
67 | boto3_session_obj_mock = MagicMock()
68 | boto3_polly_obj_mock = MagicMock()
69 | boto3_polly_response_mock = MagicMock()
70 | audio_stream_mock = MagicMock()
71 | fake_audio_stream_data = 'I am audio.'
72 | fake_audio_content_type = 'super tts'
73 | fake_boto3_polly_response_metadata = {'foo': 'bar'}
74 |
75 | boto3_session_class_mock.return_value = boto3_session_obj_mock
76 | boto3_session_obj_mock.client.return_value = boto3_polly_obj_mock
77 | boto3_polly_obj_mock.synthesize_speech.return_value = boto3_polly_response_mock
78 | audio_stream_mock.read.return_value = fake_audio_stream_data
79 | d = {
80 | 'AudioStream': audio_stream_mock,
81 | 'ContentType': fake_audio_content_type,
82 | 'ResponseMetadata': fake_boto3_polly_response_metadata
83 | }
84 | boto3_polly_response_mock.__contains__.side_effect = d.__contains__
85 | boto3_polly_response_mock.__getitem__.side_effect = d.__getitem__
86 |
87 | from tts.amazonpolly import AmazonPolly
88 | polly_under_test = AmazonPolly()
89 |
90 | self.assertGreater(boto3_session_class_mock.call_count, 0)
91 | boto3_session_obj_mock.client.assert_called_with('polly')
92 |
93 | res = polly_under_test.synthesize(text='hello')
94 |
95 | expected_synthesize_speech_kwargs = {
96 | 'LexiconNames': [],
97 | 'OutputFormat': 'ogg_vorbis',
98 | 'SampleRate': '22050',
99 | 'SpeechMarkTypes': [],
100 | 'Text': 'hello',
101 | 'TextType': 'text',
102 | 'VoiceId': 'Joanna',
103 | }
104 | boto3_polly_obj_mock.synthesize_speech.assert_called_with(**expected_synthesize_speech_kwargs)
105 |
106 | from tts.srv import PollyResponse
107 | self.assertTrue(isinstance(res, PollyResponse))
108 |
109 | import json
110 | j = json.loads(res.result)
111 | observed_audio_file_content = open(j['Audio File']).read()
112 | self.assertEqual(fake_audio_stream_data, observed_audio_file_content)
113 |
114 | self.assertEqual(fake_audio_content_type, j['Audio Type'])
115 | self.assertEqual(str(fake_boto3_polly_response_metadata), j['Amazon Polly Response Metadata'])
116 |
117 | @patch('tts.amazonpolly.Session')
118 | def test_polly_raises(self, boto3_session_class_mock):
119 | boto3_session_obj_mock = MagicMock()
120 | boto3_polly_obj_mock = MagicMock()
121 | boto3_polly_response_mock = MagicMock()
122 | audio_stream_mock = MagicMock()
123 | fake_audio_stream_data = 'I am audio.'
124 | fake_audio_content_type = 'super voice'
125 | fake_boto3_polly_response_metadata = {'foo': 'bar'}
126 |
127 | boto3_session_class_mock.return_value = boto3_session_obj_mock
128 | boto3_session_obj_mock.client.return_value = boto3_polly_obj_mock
129 | boto3_polly_obj_mock.synthesize_speech.side_effect = RuntimeError('Amazon Polly Exception')
130 | audio_stream_mock.read.return_value = fake_audio_stream_data
131 | d = {
132 | 'AudioStream': audio_stream_mock,
133 | 'ContentType': fake_audio_content_type,
134 | 'ResponseMetadata': fake_boto3_polly_response_metadata
135 | }
136 | boto3_polly_response_mock.__contains__.side_effect = d.__contains__
137 | boto3_polly_response_mock.__getitem__.side_effect = d.__getitem__
138 |
139 | from tts.amazonpolly import AmazonPolly
140 | polly_under_test = AmazonPolly()
141 |
142 | self.assertGreater(boto3_session_class_mock.call_count, 0)
143 | boto3_session_obj_mock.client.assert_called_with('polly')
144 |
145 | res = polly_under_test.synthesize(text='hello')
146 |
147 | expected_synthesize_speech_kwargs = {
148 | 'LexiconNames': [],
149 | 'OutputFormat': 'ogg_vorbis',
150 | 'SampleRate': '22050',
151 | 'SpeechMarkTypes': [],
152 | 'Text': 'hello',
153 | 'TextType': 'text',
154 | 'VoiceId': 'Joanna',
155 | }
156 | boto3_polly_obj_mock.synthesize_speech.assert_called_with(**expected_synthesize_speech_kwargs)
157 |
158 | from tts.srv import PollyResponse
159 | self.assertTrue(isinstance(res, PollyResponse))
160 |
161 | import json
162 | j = json.loads(res.result)
163 | self.assertTrue('Exception' in j)
164 | self.assertTrue('Traceback' in j)
165 |
166 | @patch('tts.amazonpolly.AmazonPolly')
167 | def test_cli(self, amazon_polly_class_mock):
168 | import sys
169 | with patch.object(sys, 'argv', ['polly_node.py', '-n', 'polly-node']):
170 | from tts import amazonpolly
171 | amazonpolly.main()
172 | self.assertGreater(amazon_polly_class_mock.call_count, 0)
173 | amazon_polly_class_mock.return_value.start.assert_called_with(node_name='polly-node', service_name='polly')
174 |
175 |
176 | if __name__ == '__main__':
177 | import rosunit
178 | rosunit.unitrun('tts', 'unittest-polly', TestPolly)
179 |
--------------------------------------------------------------------------------
/tts/src/tts/synthesizer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License").
6 | # You may not use this file except in compliance with the License.
7 | # A copy of the License is located at
8 | #
9 | # http://aws.amazon.com/apache2.0
10 | #
11 | # or in the "license" file accompanying this file. This file is distributed
12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 | # express or implied. See the License for the specific language governing
14 | # permissions and limitations under the License.
15 |
16 | import os
17 | import time
18 | import json
19 | import rospy
20 | import hashlib
21 | from optparse import OptionParser
22 | from tts.srv import Synthesizer, SynthesizerResponse
23 |
24 |
25 | class SpeechSynthesizer:
26 | """This class serves as a ROS service node that should be an entry point of a TTS task.
27 |
28 | Although the current implementation uses Amazon Polly as the synthesis engine, it is not hard to let it support
29 | more heterogeneous engines while keeping the API the same.
30 |
31 | In order to support a variety of engines, the SynthesizerRequest was designed with flexibility in mind. It
32 | has two fields: text and metadata. Both are strings. In most cases, a user can ignore the metadata and call
33 | the service with some plain text. If the use case needs any control or engine-specific feature, the extra
34 | information can be put into the JSON-form metadata. This class will use the information when calling the engine.
35 |
36 | The decoupling of the synthesizer and the actual synthesis engine will benefit the users in many ways.
37 |
38 | First, a user will be able to use a unified interface to do the TTS job and have the freedom to use different
39 | engines available with no or very little change from the client side.
40 |
41 | Second, by applying some design patterns, the synthesizer can choose an engine dynamically. For example, a user
42 | may prefer to use Amazon Polly but is also OK with an offline solution when network is not reliable.
43 |
44 | Third, engines can be complicated, thus difficult to use. As an example, Amazon Polly supports dozens of parameters
45 | and is able to accomplish nontrivial synthesis jobs, but majority of the users never need those features. This
46 | class provides a clean interface with two parameters only, so that it is much easier and pleasant to use. If by
47 | any chance the advanced features are required, the user can always leverage the metadata field or even go to the
48 | backend engine directly.
49 |
50 | Also, from an engineering perspective, simple and decoupled modules are easier to maintain.
51 |
52 | This class supports two modes of using polly. It can either call a service node or use AmazonPolly as a library.
53 |
54 | Start the service node::
55 |
56 | $ rosrun tts synthesizer_node.py # use default configuration
57 | $ rosrun tts synthesizer_node.py -e POLLY_LIBRARY # will not call polly service node
58 |
59 | Call the service::
60 |
61 | $ rosservice call /synthesizer 'hello' ''
62 | $ rosservice call /synthesizer 'hello' '"{\"text_type\":\"ssml\"}"'
63 | """
64 |
65 | class PollyViaNode:
66 | def __init__(self, polly_service_name='polly'):
67 | self.service_name = polly_service_name
68 |
69 | def __call__(self, **kwargs):
70 | rospy.loginfo('will call service {}'.format(self.service_name))
71 | from tts.srv import Polly
72 | rospy.wait_for_service(self.service_name)
73 | polly = rospy.ServiceProxy(self.service_name, Polly)
74 | return polly(polly_action='SynthesizeSpeech', **kwargs)
75 |
76 | class PollyDirect:
77 | def __init__(self):
78 | pass
79 |
80 | def __call__(self, **kwargs):
81 | rospy.loginfo('will import amazonpolly.AmazonPolly')
82 | from tts.amazonpolly import AmazonPolly
83 | node = AmazonPolly()
84 | return node.synthesize(**kwargs)
85 |
86 | ENGINES = {
87 | 'POLLY_SERVICE': PollyViaNode,
88 | 'POLLY_LIBRARY': PollyDirect,
89 | }
90 |
91 | class BadEngineError(NameError):
92 | pass
93 |
94 | def __init__(self, engine='POLLY_SERVICE', polly_service_name='polly'):
95 | if engine not in self.ENGINES:
96 | msg = 'bad engine {} which is not one of {}'.format(engine, ', '.join(SpeechSynthesizer.ENGINES.keys()))
97 | raise SpeechSynthesizer.BadEngineError(msg)
98 |
99 | engine_kwargs = {'polly_service_name': polly_service_name} if engine == 'POLLY_SERVICE' else {}
100 | self.engine = self.ENGINES[engine](**engine_kwargs)
101 |
102 | self.default_text_type = 'text'
103 | self.default_voice_id = 'Joanna'
104 | self.default_output_format = 'ogg_vorbis'
105 |
106 | def _call_engine(self, **kw):
107 | """Call engine to do the job.
108 |
109 | If no output path is found from input, the audio file will be put into /tmp and the file name will have
110 | a prefix of the md5 hash of the text.
111 |
112 | :param kw: what AmazonPolly needs to synthesize
113 | :return: response from AmazonPolly
114 | """
115 | if 'output_path' not in kw:
116 | tmp_filename = hashlib.md5(kw['text']).hexdigest()
117 | tmp_filepath = os.path.join(os.sep, 'tmp', 'voice_{}_{}'.format(tmp_filename, str(time.time())))
118 | kw['output_path'] = os.path.abspath(tmp_filepath)
119 | rospy.loginfo('audio will be saved as {}'.format(kw['output_path']))
120 |
121 | return self.engine(**kw)
122 |
123 | def _parse_request_or_raise(self, request):
124 | """It will raise if request is malformed.
125 |
126 | :param request: an instance of SynthesizerRequest
127 | :return: a dict
128 | """
129 | md = json.loads(request.metadata) if request.metadata else {}
130 |
131 | md['output_format'] = md.get('output_format', self.default_output_format)
132 | md['voice_id'] = md.get('voice_id', self.default_voice_id)
133 | md['sample_rate'] = md.get('sample_rate', '16000' if md['output_format'].lower() == 'pcm' else '22050')
134 | md['text_type'] = md.get('text_type', self.default_text_type)
135 | md['text'] = request.text
136 |
137 | return md
138 |
139 | def _node_request_handler(self, request):
140 | """The callback function for processing service request.
141 |
142 | It never raises. If anything unexpected happens, it will return a SynthesizerResponse with the exception.
143 |
144 | :param request: an instance of SynthesizerRequest
145 | :return: a SynthesizerResponse
146 | """
147 | rospy.loginfo(request)
148 | try:
149 | kws = self._parse_request_or_raise(request)
150 | res = self._call_engine(**kws).result
151 |
152 | return SynthesizerResponse(res)
153 | except Exception as e:
154 | return SynthesizerResponse('Exception: {}'.format(e))
155 |
156 | def start(self, node_name='synthesizer_node', service_name='synthesizer'):
157 | """The entry point of a ROS service node.
158 |
159 | :param node_name: name of ROS node
160 | :param service_name: name of ROS service
161 | :return: it doesn't return
162 | """
163 | rospy.init_node(node_name)
164 |
165 | service = rospy.Service(service_name, Synthesizer, self._node_request_handler)
166 |
167 | rospy.loginfo('{} running: {}'.format(node_name, service.uri))
168 |
169 | rospy.spin()
170 |
171 |
172 | def main():
173 | usage = '''usage: %prog [options]
174 | '''
175 |
176 | parser = OptionParser(usage)
177 |
178 | parser.add_option("-n", "--node-name", dest="node_name", default='synthesizer_node',
179 | help="name of the ROS node",
180 | metavar="NODE_NAME")
181 | parser.add_option("-s", "--service-name", dest="service_name", default='synthesizer',
182 | help="name of the ROS service",
183 | metavar="SERVICE_NAME")
184 | parser.add_option("-e", "--engine", dest="engine", default='POLLY_SERVICE',
185 | help="name of the synthesis engine",
186 | metavar="ENGINE")
187 | parser.add_option("-p", "--polly-service-name", dest="polly_service_name", default='polly',
188 | help="name of the polly service",
189 | metavar="POLLY_SERVICE_NAME")
190 |
191 | (options, args) = parser.parse_args()
192 |
193 | node_name = options.node_name
194 | service_name = options.service_name
195 | engine = options.engine
196 | polly_service_name = options.polly_service_name
197 |
198 | if engine == 'POLLY_SERVICE':
199 | synthesizer = SpeechSynthesizer(engine=engine, polly_service_name=polly_service_name)
200 | else:
201 | synthesizer = SpeechSynthesizer(engine=engine)
202 | synthesizer.start(node_name=node_name, service_name=service_name)
203 |
204 |
205 | if __name__ == "__main__":
206 | main()
207 |
--------------------------------------------------------------------------------
/tts/test/test_integration.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License").
6 | # You may not use this file except in compliance with the License.
7 | # A copy of the License is located at
8 | #
9 | # http://aws.amazon.com/apache2.0
10 | #
11 | # or in the "license" file accompanying this file. This file is distributed
12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 | # express or implied. See the License for the specific language governing
14 | # permissions and limitations under the License.
15 |
16 | from __future__ import print_function
17 |
18 | import sys
19 | import json
20 | import unittest
21 |
22 | import rospy
23 | import rostest
24 |
25 | from tts.srv import Polly
26 | from tts.srv import PollyResponse
27 | from tts.srv import Synthesizer
28 | from tts.srv import SynthesizerResponse
29 |
30 | # import tts which is a relay package, otherwise things don't work
31 | #
32 | # devel/lib/python2.7/dist-packages/
33 | # +-- tts
34 | # | +-- __init__.py
35 | # +-- ...
36 | #
37 | # per http://docs.ros.org/api/catkin/html/user_guide/setup_dot_py.html:
38 | #
39 | # A relay package is a folder with an __init__.py folder and nothing else.
40 | # Importing this folder in python will execute the contents of __init__.py,
41 | # which will in turn import the original python modules in the folder in
42 | # the sourcespace using the python exec() function.
43 |
44 |
45 | PKG = 'tts'
46 | NAME = 'amazonpolly'
47 |
48 |
49 | class TestPlainText(unittest.TestCase):
50 |
51 | def test_plain_text_to_wav_via_polly_node(self):
52 | rospy.wait_for_service('polly')
53 | polly = rospy.ServiceProxy('polly', Polly)
54 |
55 | test_text = 'Mary has a little lamb, little lamb, little lamb.'
56 | res = polly(polly_action='SynthesizeSpeech', text=test_text)
57 | self.assertIsNotNone(res)
58 | self.assertTrue(type(res) is PollyResponse)
59 |
60 | r = json.loads(res.result)
61 | self.assertIn('Audio Type', r, 'result should contain audio type')
62 | self.assertIn('Audio File', r, 'result should contain file path')
63 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata')
64 |
65 | audio_type = r['Audio Type']
66 | audio_file = r['Audio File']
67 | md = r['Amazon Polly Response Metadata']
68 | self.assertTrue("'HTTPStatusCode': 200," in md)
69 | self.assertEqual('audio/ogg', audio_type)
70 | self.assertTrue(audio_file.endswith('.ogg'))
71 |
72 | import subprocess
73 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT)
74 | import re
75 | m = re.search(r'.*Ogg data, Vorbis audi.*', o, flags=re.MULTILINE)
76 | self.assertIsNotNone(m)
77 |
78 | def test_plain_text_using_polly_class(self):
79 | from tts.amazonpolly import AmazonPolly
80 | polly = AmazonPolly()
81 | test_text = 'Mary has a little lamb, little lamb, little lamb.'
82 | res = polly.synthesize(text=test_text)
83 | self.assertIsNotNone(res)
84 | self.assertTrue(type(res) is PollyResponse)
85 |
86 | r = json.loads(res.result)
87 | self.assertIn('Audio Type', r, 'result should contain audio type')
88 | self.assertIn('Audio File', r, 'result should contain file path')
89 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata')
90 |
91 | audio_type = r['Audio Type']
92 | audio_file = r['Audio File']
93 | md = r['Amazon Polly Response Metadata']
94 | self.assertTrue("'HTTPStatusCode': 200," in md)
95 | self.assertEqual('audio/ogg', audio_type)
96 | self.assertTrue(audio_file.endswith('.ogg'))
97 |
98 | import subprocess
99 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT)
100 | import re
101 | m = re.search(r'.*Ogg data, Vorbis audi.*', o, flags=re.MULTILINE)
102 | self.assertIsNotNone(m)
103 |
104 | def test_plain_text_via_synthesizer_node(self):
105 | rospy.wait_for_service('synthesizer')
106 | speech_synthesizer = rospy.ServiceProxy('synthesizer', Synthesizer)
107 |
108 | text = 'Mary has a little lamb, little lamb, little lamb.'
109 | res = speech_synthesizer(text=text)
110 | self.assertIsNotNone(res)
111 | self.assertTrue(type(res) is SynthesizerResponse)
112 |
113 | r = json.loads(res.result)
114 | self.assertIn('Audio Type', r, 'result should contain audio type')
115 | self.assertIn('Audio File', r, 'result should contain file path')
116 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata')
117 |
118 | audio_type = r['Audio Type']
119 | audio_file = r['Audio File']
120 | md = r['Amazon Polly Response Metadata']
121 | self.assertTrue("'HTTPStatusCode': 200," in md)
122 | self.assertEqual('audio/ogg', audio_type)
123 | self.assertTrue(audio_file.endswith('.ogg'))
124 |
125 | import subprocess
126 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT)
127 | import re
128 | m = re.search(r'.*Ogg data, Vorbis audi.*', o, flags=re.MULTILINE)
129 | self.assertIsNotNone(m)
130 |
131 | def test_plain_text_to_mp3_via_polly_node(self):
132 | rospy.wait_for_service('polly')
133 | polly = rospy.ServiceProxy('polly', Polly)
134 |
135 | test_text = 'Mary has a little lamb, little lamb, little lamb.'
136 | res = polly(polly_action='SynthesizeSpeech', text=test_text, output_format='mp3')
137 | self.assertIsNotNone(res)
138 | self.assertTrue(type(res) is PollyResponse)
139 |
140 | r = json.loads(res.result)
141 | self.assertIn('Audio Type', r, 'result should contain audio type')
142 | self.assertIn('Audio File', r, 'result should contain file path')
143 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata')
144 |
145 | audio_type = r['Audio Type']
146 | audio_file = r['Audio File']
147 | md = r['Amazon Polly Response Metadata']
148 | self.assertTrue("'HTTPStatusCode': 200," in md)
149 | self.assertEqual('audio/mpeg', audio_type)
150 | self.assertTrue(audio_file.endswith('.mp3'))
151 |
152 | import subprocess
153 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT)
154 | import re
155 | m = re.search(r'.*MPEG.*layer III.*', o, flags=re.MULTILINE)
156 | self.assertIsNotNone(m)
157 |
158 | def test_simple_ssml_via_polly_node(self):
159 | rospy.wait_for_service('polly')
160 | polly = rospy.ServiceProxy('polly', Polly)
161 |
162 | text = 'Mary has a little lamb, little lamb, little lamb.'
163 | res = polly(polly_action='SynthesizeSpeech', text=text, text_type='ssml')
164 | self.assertIsNotNone(res)
165 | self.assertTrue(type(res) is PollyResponse)
166 |
167 | r = json.loads(res.result)
168 | self.assertIn('Audio Type', r, 'result should contain audio type')
169 | self.assertIn('Audio File', r, 'result should contain file path')
170 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata')
171 |
172 | audio_type = r['Audio Type']
173 | audio_file = r['Audio File']
174 | md = r['Amazon Polly Response Metadata']
175 | self.assertTrue("'HTTPStatusCode': 200," in md)
176 | self.assertEqual('audio/ogg', audio_type)
177 | self.assertTrue(audio_file.endswith('.ogg'))
178 |
179 | import subprocess
180 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT)
181 | import re
182 | m = re.search(r'.*Ogg data, Vorbis audi.*', o, flags=re.MULTILINE)
183 | self.assertIsNotNone(m)
184 |
185 | def test_simple_ssml_via_synthesizer_node(self):
186 | rospy.wait_for_service('synthesizer')
187 | speech_synthesizer = rospy.ServiceProxy('synthesizer', Synthesizer)
188 |
189 | text = 'Mary has a little lamb, little lamb, little lamb.'
190 | res = speech_synthesizer(text=text, metadata='''{"text_type":"ssml"}''')
191 | self.assertIsNotNone(res)
192 | self.assertTrue(type(res) is SynthesizerResponse)
193 |
194 | r = json.loads(res.result)
195 | self.assertIn('Audio Type', r, 'result should contain audio type')
196 | self.assertIn('Audio File', r, 'result should contain file path')
197 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata')
198 |
199 | audio_type = r['Audio Type']
200 | audio_file = r['Audio File']
201 | md = r['Amazon Polly Response Metadata']
202 | self.assertTrue("'HTTPStatusCode': 200," in md)
203 | self.assertEqual('audio/ogg', audio_type)
204 | self.assertTrue(audio_file.endswith('.ogg'))
205 |
206 | import subprocess
207 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT)
208 | import re
209 | m = re.search(r'.*Ogg data, Vorbis audi.*', o, flags=re.MULTILINE)
210 | self.assertIsNotNone(m)
211 |
212 |
213 | if __name__ == '__main__':
214 | rostest.rosrun(PKG, NAME, TestPlainText, sys.argv)
215 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
--------------------------------------------------------------------------------
/tts/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright 2018 Amazon.com, Inc. or its affiliates
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # tts
2 |
3 |
4 | ## Overview
5 | The `tts` ROS node enables a robot to speak with a human voice by providing a Text-To-Speech service.
6 | Out of the box this package listens to a speech topic, submits text to the Amazon Polly cloud service to generate an audio stream file,
7 | retrieves the audio stream from Amazon Polly, and plays the audio stream via the default output device.
8 | The nodes can be configured to use different voices as well as custom lexicons and SSML tags which enable you to control aspects of speech,
9 | such as pronunciation, volume, pitch, speed rate, etc. A [sample ROS application] with this node,
10 | and more details on speech customization are available within the [Amazon Polly documentation].
11 |
12 | **Amazon Polly Summary**: Amazon Polly is a service that turns text into lifelike speech, allowing you to create applications that talk,
13 | and build entirely new categories of speech-enabled products. Amazon Polly is a Text-to-Speech service that uses advanced deep learning technologies to synthesize speech that sounds like a human voice.
14 | With dozens of lifelike voices across a variety of languages, you can select the ideal voice and build speech-enabled applications that work in many different countries.
15 |
16 | ### License
17 | The source code is released under an [Apache 2.0].
18 |
19 | **Author**: AWS RoboMaker
20 | **Affiliation**: [Amazon Web Services (AWS)]
21 |
22 | RoboMaker cloud extensions rely on third-party software licensed under open-source licenses and are provided for demonstration purposes only. Incorporation or use of RoboMaker cloud extensions in connection with your production workloads or commercial product(s) or devices may affect your legal rights or obligations under the applicable open-source licenses. License information for this repository can be found [here](https://github.com/aws-robotics/tts-ros1/blob/master/LICENSE). AWS does not provide support for this cloud extension. You are solely responsible for how you configure, deploy, and maintain this cloud extension in your workloads or commercial product(s) or devices.
23 |
24 | ### Supported ROS Distributions
25 | - Kinetic
26 | - Melodic
27 |
28 | ## Installation
29 |
30 | ### AWS Credentials
31 | You will need to create an AWS Account and configure the credentials to be able to communicate with AWS services. You may find [AWS Configuration and Credential Files] helpful.
32 |
33 | This node will require the following AWS account IAM role permissions:
34 | - `polly:SynthesizeSpeech`
35 |
36 | ### Dependencies
37 | In order to use the Text-To-Speech node with ROS kinetic you must update the version of boto3 that is installed on your system to at least version 1.9.0. You can do this by running the command:
38 |
39 | pip3 install -U boto3
40 |
41 | This step is required before the node will work properly because the version of boto3 is not new enough for the features required by this node.
42 |
43 | ### Building from Source
44 |
45 | To build from source you'll need to create a new workspace, clone and checkout the latest release branch of this repository, install all the dependencies, and compile. If you need the latest development features you can clone from the `master` branch instead of the latest release branch. While we guarantee the release branches are stable, __the `master` should be considered to have an unstable build__ due to ongoing development.
46 |
47 | - Install build tool: please refer to `colcon` [installation guide](https://colcon.readthedocs.io/en/released/user/installation.html)
48 |
49 | - Create a ROS workspace and a source directory
50 |
51 | mkdir -p ~/ros-workspace/src
52 |
53 | - Clone the package into the source directory .
54 |
55 | cd ~/ros-workspace/src
56 | git clone https://github.com/aws-robotics/tts-ros1.git -b release-latest
57 |
58 | - Install dependencies
59 |
60 | cd ~/ros-workspace
61 | sudo apt-get update && rosdep update
62 | rosdep install --from-paths src --ignore-src -r -y
63 |
64 | _Note: If building the master branch instead of a release branch you may need to also checkout and build the master branches of the packages this package depends on._
65 |
66 | - Build the packages
67 |
68 | cd ~/ros-workspace && colcon build
69 |
70 | - Configure ROS library Path
71 |
72 | source ~/ros-workspace/install/setup.bash
73 |
74 | - Build and run the unit tests
75 |
76 | colcon test --packages-select tts && colcon test-result --all
77 |
78 | ### Testing in Containers/Virtual Machines
79 |
80 | Even if your container or virtual machine does not have audio device, you can still test TTS by leveraging an audio server.
81 |
82 | The following is an example setup on a MacBook with PulseAudio as the audio server.
83 | If you are new to PulseAudio, you may want to read the [PulseAudio Documentation].
84 |
85 | **Step 1: Start PulseAudio on your laptop**
86 |
87 | After installation, start the audio server with *module-native-protocol-tcp* loaded:
88 |
89 | pulseaudio --load=module-native-protocol-tcp --exit-idle-time=-1 --log-target=stderr -v
90 |
91 | Note the extra arguments `-v` and `--log-target` are used for easier troubleshooting.
92 |
93 | **Step 2: Run TTS nodes in container**
94 |
95 | In your container, make sure you set the right environment variables.
96 | For example, you can start the container using `docker run -it -e PULSE_SERVER=docker.for.mac.localhost ubuntu:16.04`.
97 |
98 | Then you will be able to run ROS nodes in the container and hear the audio from your laptop speakers.
99 |
100 | **Troubleshooting**
101 |
102 | If your laptop has multiple audio output devices, make sure the right one has the right volume.
103 | This command will give you a list of output devices and tell you which one has been selected:
104 |
105 | pacmd list-sinks | grep -E '(index:|name:|product.name)'
106 |
107 | ## Launch Files
108 | An example launch file called `sample_application.launch` is provided.
109 |
110 |
111 | ## Usage
112 |
113 | ### Run the node
114 | - **Plain text**
115 | - `roslaunch tts sample_application.launch`
116 | - `rosrun tts voicer.py 'Hello World'`
117 |
118 | - **SSML**
119 | - `roslaunch tts sample_application.launch`
120 | - `rosrun tts voicer.py 'Mary has a little lamb.' '{"text_type":"ssml"}'`
121 |
122 |
123 | ## Configuration File and Parameters
124 | | Parameter Name | Type | Description |
125 | | -------------- | ---- | ----------- |
126 | | polly_action | *string* | Currently only one action named `SynthesizeSpeech` is supported. |
127 | | text | *string* | The text to be synthesized. It can be plain text or SSML. See also `text_type`. |
128 | | text_type | *string* | A user can choose from `text` and `ssml`. Default: `text`. |
129 | | voice_id | *string* | The list of supported voices can be found on [official Amazon Polly document]. Default: Joanna |
130 | | output_format | *string* | Valid formats are `ogg_vorbis`, `mp3` and `pcm`. Default: `ogg_vorbis` |
131 | | output_path | *string* | The audio data will be saved as a local file for playback and reuse/inspection purposes. This parameter is to provide a preferred path to save the file. Default: `.` |
132 | | sample_rate | *string* | Note `16000` is a valid sample rate for all supported formats. Default: `16000`. |
133 |
134 |
135 | ## Performance and Benchmark Results
136 | We evaluated the performance of this node by runnning the followning scenario on a Raspberry Pi 3 Model B:
137 | - Launch a baseline graph containing the talker and listener nodes from the [roscpp_tutorials package](https://wiki.ros.org/roscpp_tutorials), plus two additional nodes that collect CPU and memory usage statistics. Allow the nodes to run for 60 seconds.
138 | - Launch the nodes `polly_node`, `synthesizer_node` and `tts_node` by using the launch file `sample_application.launch` as described above. At the same time, perform several calls to the action `tts/action/Speech.action` using the `voicer.py` script descried above, by running the following script in the background:
139 |
140 | ```bash
141 | rosrun tts voicer.py 'Amazon Polly is a Text-to-Speech (TTS) cloud service' '{"text_type":"ssml"}' ; sleep 1
142 | rosrun tts voicer.py 'that converts text into lifelike speech' '{"text_type":"ssml"}' ; sleep 1
143 | rosrun tts voicer.py 'You can use Amazon Polly to develop applications that increase engagement and accessibility' '{"text_type":"ssml"}' ; sleep 1
144 | rosrun tts voicer.py 'Amazon Polly supports multiple languages and includes a variety of lifelike voices' '{"text_type":"ssml"}' ; sleep 1
145 | rosrun tts voicer.py 'so you can build speech-enabled applications that work in multiple locations' '{"text_type":"ssml"}' ; sleep 1
146 | rosrun tts voicer.py 'and use the ideal voice for your customers' '{"text_type":"ssml"}' ; sleep 1
147 | ```
148 |
149 | - Allow the nodes to run for 180 seconds.
150 | - Terminate the `polly_node`, `synthesizer_node` and `tts_node` nodes, and allow the reamaining nodes to run for 60 seconds.
151 |
152 | The following graph shows the CPU usage during that scenario. The 1 minute average CPU usage starts at 16.75% during the launch of the baseline graph, and stabilizes at 6%. When we launch the Polly nodes around second 85, the 1 minute average CPU increases up to a peak of 22.25% and stabilizes around 20%. After we stop making requests with the script `voicer.py` around second 206 the 1 minute average CPU usage moves to around 12%, and decreases gradually, and goes down again to 2.5 % after we stop the Polly nodes at the end of the scenario.
153 |
154 | 
155 |
156 | The following graph shows the memory usage during that scenario. We start with a memory usage of around 227 MB that increases to around 335 MB (+47.58%) when we lanch the Polly nodes around second 85, and gets to a peak of 361 MB (+59% wrt. initial value) while we are calling the script `voicer.py`. The memory usage goes back to the initial values after stopping the Polly nodes.
157 |
158 | 
159 |
160 |
161 | ## Nodes
162 |
163 | ### polly
164 | Polly node is the engine for the synthesizing job. It provides user-friendly yet powerful APIs so a user doesn't have to deal with technical details of AWS service calls.
165 |
166 | #### Services
167 | - **`polly (tts/Polly)`**
168 |
169 | Call the service to use Amazon Polly to synthesize the audio.
170 |
171 | #### Reserved for future usage
172 | - `language_code (string, default: None)`
173 |
174 | A user doesn't have to provide a language code and this is reserved for future usage.
175 |
176 | - `lexicon_content (string, default: None)`
177 |
178 | - `lexicon_name (string, default: None)`
179 |
180 | - `lexicon_names (string[], default: empty)`
181 |
182 | - `speech_mark_types (string[], default: empty)`
183 |
184 | - `max_results (uint32, default: None)`
185 |
186 | - `next_token (string, default: None)`
187 |
188 | - `sns_topic_arn (string, default: None)`
189 |
190 | - `task_id (string, default: None)`
191 |
192 | - `task_status (string, default: iNone)`
193 |
194 | - `output_s3_bucket_name (string, default: None)`
195 |
196 | - `output_s3_key_prefix (string, default: None)`
197 |
198 | - `include_additional_language_codes (bool, default: None)`
199 |
200 | ### synthesizer node
201 |
202 | #### Services
203 | - **`synthesizer (tts/Synthesizer)`**
204 |
205 | Call the service to synthesize.
206 |
207 | #### Parameters
208 |
209 | - **`text (string)`**
210 |
211 | The text to be synthesized.
212 |
213 | - **`metadata (string, JSON format)`**
214 |
215 | Optional, for user to have control over how synthesis happens.
216 |
217 | ### tts node
218 |
219 | #### Action
220 |
221 | - **`speech`**
222 |
223 | #### Parameters
224 |
225 | - **`text (string)`**
226 |
227 | The text to be synthesized.
228 |
229 | - **`metadata (string, JSON format)`**
230 |
231 | Optional, for user to have control over how synthesis happens.
232 |
233 | [AWS Configuration and Credential Files]: https://docs.aws.amazon.com/cli/latest/userguide/cli-config-files.html
234 | [Amazon Polly documentation]: https://docs.aws.amazon.com/polly/latest/dg/what-is.html
235 | [Amazon Web Services (AWS)]: https://aws.amazon.com/
236 | [Apache 2.0]: https://aws.amazon.com/apache-2-0/
237 | [Issue Tracker]: https://github.com/aws-robotics/tts-ros1/issues
238 | [PulseAudio Documentation]: https://www.freedesktop.org/wiki/Software/PulseAudio/Documentation/
239 | [official Amazon Polly document]: https://docs.aws.amazon.com/polly/latest/dg/voicelist.html
240 | [sample ROS application]: https://github.com/aws-robotics/aws-robomaker-sample-application-voiceinteraction
241 |
--------------------------------------------------------------------------------
/tts/src/tts/amazonpolly.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License").
6 | # You may not use this file except in compliance with the License.
7 | # A copy of the License is located at
8 | #
9 | # http://aws.amazon.com/apache2.0
10 | #
11 | # or in the "license" file accompanying this file. This file is distributed
12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 | # express or implied. See the License for the specific language governing
14 | # permissions and limitations under the License.
15 |
16 | import json
17 | import os
18 | import sys
19 | import wave
20 | import traceback
21 | import requests
22 | from boto3 import Session
23 | from botocore.credentials import CredentialProvider, RefreshableCredentials
24 | from botocore.session import get_session
25 | from botocore.exceptions import UnknownServiceError
26 | from contextlib import closing
27 | from optparse import OptionParser
28 |
29 | import rospy
30 | from tts.srv import Polly, PollyRequest, PollyResponse
31 |
32 |
33 | def get_ros_param(param, default=None):
34 | try:
35 | key = rospy.search_param(param)
36 | return default if key is None else rospy.get_param(key, default)
37 | except Exception as e:
38 | rospy.logwarn('Failed to get ros param {}, will use default {}. Exception: '.format(param, default, e))
39 | return default
40 |
41 |
42 | class AwsIotCredentialProvider(CredentialProvider):
43 | METHOD = 'aws-iot'
44 | CANONICAL_NAME = 'customIoTwithCertificate'
45 |
46 | DEFAULT_AUTH_CONNECT_TIMEOUT_MS = 5000
47 | DEFAULT_AUTH_TOTAL_TIMEOUT_MS = 10000
48 |
49 | def __init__(self):
50 | super(AwsIotCredentialProvider, self).__init__()
51 | self.ros_param_prefix = 'iot/'
52 |
53 | def get_param(self, param, default=None):
54 | return get_ros_param(self.ros_param_prefix + param, default)
55 |
56 | def retrieve_credentials(self):
57 | try:
58 | cert_file = self.get_param('certfile')
59 | key_file = self.get_param('keyfile')
60 | endpoint = self.get_param('endpoint')
61 | role_alias = self.get_param('role')
62 | connect_timeout = self.get_param('connect_timeout_ms', self.DEFAULT_AUTH_CONNECT_TIMEOUT_MS)
63 | total_timeout = self.get_param('total_timeout_ms', self.DEFAULT_AUTH_TOTAL_TIMEOUT_MS)
64 | thing_name = self.get_param('thing_name', '')
65 |
66 | if any(v is None for v in (cert_file, key_file, endpoint, role_alias, thing_name)):
67 | return None
68 |
69 | headers = {'x-amzn-iot-thingname': thing_name} if len(thing_name) > 0 else None
70 | url = 'https://{}/role-aliases/{}/credentials'.format(endpoint, role_alias)
71 | timeout = (connect_timeout, total_timeout - connect_timeout) # see also: urllib3/util/timeout.py
72 |
73 | response = requests.get(url, cert=(cert_file, key_file), headers=headers, timeout=timeout)
74 | d = response.json()['credentials']
75 |
76 | rospy.loginfo('Credentials expiry time: {}'.format(d['expiration']))
77 |
78 | return {
79 | 'access_key': d['accessKeyId'],
80 | 'secret_key': d['secretAccessKey'],
81 | 'token': d['sessionToken'],
82 | 'expiry_time': d['expiration'],
83 | }
84 | except Exception as e:
85 | rospy.logwarn('Failed to fetch credentials from AWS IoT: {}'.format(e))
86 | return None
87 |
88 | def load(self):
89 | return RefreshableCredentials.create_from_metadata(
90 | self.retrieve_credentials(),
91 | self.retrieve_credentials,
92 | 'aws-iot-with-certificate'
93 | )
94 |
95 |
96 | class AmazonPolly:
97 | """A TTS engine that can be used in two different ways.
98 |
99 | Usage
100 | -----
101 |
102 | 1. It can run as a ROS service node.
103 |
104 | Start a polly node::
105 |
106 | $ rosrun tts polly_node.py
107 |
108 | Call the service from command line::
109 |
110 | $ rosservice call /polly SynthesizeSpeech 'hello polly' '' '' '' '' '' '' '' '' [] [] 0 '' '' '' '' '' '' false
111 |
112 | Call the service programmatically::
113 |
114 | from tts.srv import Polly
115 | rospy.wait_for_service('polly')
116 | polly = rospy.ServiceProxy('polly', Polly)
117 | res = polly(**kw)
118 |
119 | 2. It can also be used as a normal python class::
120 |
121 | AmazonPolly().synthesize(text='hi polly')
122 |
123 | PollyRequest supports many parameters, but the majority of the users can safely ignore most of them and just
124 | use the vanilla version which involves only one argument, ``text``.
125 |
126 | If in some use cases more control is needed, SSML will come handy. Example::
127 |
128 | AmazonPolly().synthesize(
129 | text='Mary has a little lamb.',
130 | text_type='ssml'
131 | )
132 |
133 | A user can also control the voice, output format and so on. Example::
134 |
135 | AmazonPolly().synthesize(
136 | text='Mary has a little lamb.',
137 | text_type='ssml',
138 | voice_id='Joey',
139 | output_format='mp3',
140 | output_path='/tmp/blah'
141 | )
142 |
143 |
144 | Parameters
145 | ----------
146 |
147 | Among the parameters defined in Polly.srv, the following are supported while others are reserved for future.
148 |
149 | * polly_action : currently only ``SynthesizeSpeech`` is supported
150 | * text : the text to speak
151 | * text_type : can be either ``text`` (default) or ``ssml``
152 | * voice_id : any voice id supported by Amazon Polly, default is Joanna
153 | * output_format : ogg (default), mp3 or pcm
154 | * output_path : where the audio file is saved
155 | * sample_rate : default is 16000 for pcm or 22050 for mp3 and ogg
156 |
157 | The following are the reserved ones. Note that ``language_code`` is rarely needed (this may seem counter-intuitive).
158 | See official Amazon Polly documentation for details (link can be found below).
159 |
160 | * language_code
161 | * lexicon_content
162 | * lexicon_name
163 | * lexicon_names
164 | * speech_mark_types
165 | * max_results
166 | * next_token
167 | * sns_topic_arn
168 | * task_id
169 | * task_status
170 | * output_s3_bucket_name
171 | * output_s3_key_prefix
172 | * include_additional_language_codes
173 |
174 |
175 | Links
176 | -----
177 |
178 | Amazon Polly documentation: https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html
179 |
180 | """
181 |
182 | def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name=None):
183 | if region_name is None:
184 | region_name = get_ros_param('aws_client_configuration/region', default='us-west-2')
185 |
186 | self.polly = self._get_polly_client(aws_access_key_id, aws_secret_access_key, aws_session_token, region_name)
187 | self.default_text_type = 'text'
188 | self.default_voice_id = 'Joanna'
189 | self.default_output_format = 'ogg_vorbis'
190 | self.default_output_folder = '.'
191 | self.default_output_file_basename = 'output'
192 |
193 | def _get_polly_client(self, aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None,
194 | region_name=None, with_service_model_patch=False):
195 | """Note we get a new botocore session each time this function is called.
196 | This is to avoid potential problems caused by inner state of the session.
197 | """
198 | botocore_session = get_session()
199 |
200 | if with_service_model_patch:
201 | # Older versions of botocore don't have polly. We can possibly fix it by appending
202 | # extra path with polly service model files to the search path.
203 | current_dir = os.path.dirname(os.path.abspath(__file__))
204 | service_model_path = os.path.join(current_dir, 'data', 'models')
205 | botocore_session.set_config_variable('data_path', service_model_path)
206 | rospy.loginfo('patching service model data path: {}'.format(service_model_path))
207 |
208 | botocore_session.get_component('credential_provider').insert_after('boto-config', AwsIotCredentialProvider())
209 |
210 | botocore_session.user_agent_extra = self._generate_user_agent_suffix()
211 |
212 | session = Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key,
213 | aws_session_token=aws_session_token, region_name=region_name,
214 | botocore_session=botocore_session)
215 |
216 | try:
217 | return session.client("polly")
218 | except UnknownServiceError:
219 | # the first time we reach here, we try to fix the problem
220 | if not with_service_model_patch:
221 | return self._get_polly_client(aws_access_key_id, aws_secret_access_key, aws_session_token, region_name,
222 | with_service_model_patch=True)
223 | else:
224 | # we have tried our best, time to panic
225 | rospy.logerr('Amazon Polly is not available. Please install the latest boto3.')
226 | raise
227 |
228 | def _generate_user_agent_suffix(self):
229 | exec_env = get_ros_param('exec_env', 'AWS_RoboMaker').strip()
230 | if 'AWS_RoboMaker' in exec_env:
231 | ver = get_ros_param('robomaker_version', None)
232 | if ver:
233 | exec_env += '-' + ver.strip()
234 | ros_distro = get_ros_param('rosdistro', 'Unknown_ROS_DISTRO').strip()
235 | ros_version = get_ros_param('rosversion', 'Unknown_ROS_VERSION').strip()
236 | return 'exec-env/{} ros-{}/{}'.format(exec_env, ros_distro, ros_version)
237 |
238 | def _pcm2wav(self, audio_data, wav_filename, sample_rate):
239 | """per Amazon Polly official doc, the pcm in a signed 16-bit, 1 channel (mono), little-endian format."""
240 | wavf = wave.open(wav_filename, 'w')
241 | wavf.setframerate(int(sample_rate))
242 | wavf.setnchannels(1) # 1 channel
243 | wavf.setsampwidth(2) # 2 bytes == 16 bits
244 | wavf.writeframes(audio_data)
245 | wavf.close()
246 |
247 | def _make_audio_file_fullpath(self, output_path, output_format):
248 | """Makes a full path for audio file based on given output path and format.
249 |
250 | If ``output_path`` doesn't have a path, current path is used.
251 |
252 | :param output_path: the output path received
253 | :param output_format: the audio format, e.g., mp3, ogg_vorbis, pcm
254 | :return: a full path for the output audio file. File ext will be constructed from audio format.
255 | """
256 | head, tail = os.path.split(output_path)
257 | if not head:
258 | head = self.default_output_folder
259 | if not tail:
260 | tail = self.default_output_file_basename
261 |
262 | file_ext = {'pcm': '.wav', 'mp3': '.mp3', 'ogg_vorbis': '.ogg'}[output_format.lower()]
263 | if not tail.endswith(file_ext):
264 | tail += file_ext
265 |
266 | return os.path.realpath(os.path.join(head, tail))
267 |
268 | def _synthesize_speech_and_save(self, request):
269 | """Calls Amazon Polly and writes the returned audio data to a local file.
270 |
271 | To make it practical, three things will be returned in a JSON form string, which are audio file path,
272 | audio type and Amazon Polly response metadata.
273 |
274 | If the Amazon Polly call fails, audio file name will be an empty string and audio type will be "N/A".
275 |
276 | Please see https://boto3.readthedocs.io/reference/services/polly.html#Polly.Client.synthesize_speech
277 | for more details on Amazon Polly API.
278 |
279 | :param request: an instance of PollyRequest
280 | :return: a string in JSON form with two attributes, "Audio File" and "Amazon Polly Response".
281 | """
282 | kws = {
283 | 'LexiconNames': request.lexicon_names if request.lexicon_names else [],
284 | 'OutputFormat': request.output_format if request.output_format else self.default_output_format,
285 | 'SampleRate': request.sample_rate,
286 | 'SpeechMarkTypes': request.speech_mark_types if request.speech_mark_types else [],
287 | 'Text': request.text,
288 | 'TextType': request.text_type if request.text_type else self.default_text_type,
289 | 'VoiceId': request.voice_id if request.voice_id else self.default_voice_id
290 | }
291 |
292 | if not kws['SampleRate']:
293 | kws['SampleRate'] = '16000' if kws['OutputFormat'].lower() == 'pcm' else '22050'
294 |
295 | rospy.loginfo('Amazon Polly Request: {}'.format(kws))
296 | response = self.polly.synthesize_speech(**kws)
297 | rospy.loginfo('Amazon Polly Response: {}'.format(response))
298 |
299 | if "AudioStream" in response:
300 | audiofile = self._make_audio_file_fullpath(request.output_path, kws['OutputFormat'])
301 | rospy.loginfo('will save audio as {}'.format(audiofile))
302 |
303 | with closing(response["AudioStream"]) as stream:
304 | if kws['OutputFormat'].lower() == 'pcm':
305 | self._pcm2wav(stream.read(), audiofile, kws['SampleRate'])
306 | else:
307 | with open(audiofile, "wb") as f:
308 | f.write(stream.read())
309 |
310 | audiotype = response['ContentType']
311 | else:
312 | audiofile = ''
313 | audiotype = 'N/A'
314 |
315 | return json.dumps({
316 | 'Audio File': audiofile,
317 | 'Audio Type': audiotype,
318 | 'Amazon Polly Response Metadata': str(response['ResponseMetadata'])
319 | })
320 |
321 | def _dispatch(self, request):
322 | """Amazon Polly supports a number of APIs. This will call the right one based on the content of request.
323 |
324 | Currently "SynthesizeSpeech" is the only recognized action. Basically this method just delegates the work
325 | to ``self._synthesize_speech_and_save`` and returns the result as is. It will simply raise if a different
326 | action is passed in.
327 |
328 | :param request: an instance of PollyRequest
329 | :return: whatever returned by the delegate
330 | """
331 | actions = {
332 | 'SynthesizeSpeech': self._synthesize_speech_and_save
333 | # ... more actions could go in here ...
334 | }
335 |
336 | if request.polly_action not in actions:
337 | raise RuntimeError('bad or unsupported Amazon Polly action: "' + request.polly_action + '".')
338 |
339 | return actions[request.polly_action](request)
340 |
341 | def _node_request_handler(self, request):
342 | """The callback function for processing service request.
343 |
344 | It never raises. If anything unexpected happens, it will return a PollyResponse with details of the exception.
345 |
346 | :param request: an instance of PollyRequest
347 | :return: a PollyResponse
348 | """
349 | rospy.loginfo('Amazon Polly Request: {}'.format(request))
350 |
351 | try:
352 | response = self._dispatch(request)
353 | rospy.loginfo('will return {}'.format(response))
354 | return PollyResponse(result=response)
355 | except Exception as e:
356 | current_dir = os.path.dirname(os.path.abspath(__file__))
357 | exc_type = sys.exc_info()[0]
358 |
359 | # not using `issubclass(exc_type, ConnectionError)` for the condition below because some versions
360 | # of urllib3 raises exception when doing `from requests.exceptions import ConnectionError`
361 | error_ogg_filename = 'connerror.ogg' if 'ConnectionError' in exc_type.__name__ else 'error.ogg'
362 |
363 | error_details = {
364 | 'Audio File': os.path.join(current_dir, 'data', error_ogg_filename),
365 | 'Audio Type': 'ogg',
366 | 'Exception': {
367 | 'Type': str(exc_type),
368 | 'Module': exc_type.__module__,
369 | 'Name': exc_type.__name__,
370 | 'Value': str(e),
371 | },
372 | 'Traceback': traceback.format_exc()
373 | }
374 |
375 | error_str = json.dumps(error_details)
376 | rospy.logerr(error_str)
377 | return PollyResponse(result=error_str)
378 |
379 | def synthesize(self, **kws):
380 | """Call this method if you want to use polly but don't want to start a node.
381 |
382 | :param kws: input as defined in Polly.srv
383 | :return: a string in JSON form with detailed information, success or failure
384 | """
385 | req = PollyRequest(polly_action='SynthesizeSpeech', **kws)
386 | return self._node_request_handler(req)
387 |
388 | def start(self, node_name='polly_node', service_name='polly'):
389 | """The entry point of a ROS service node.
390 |
391 | Details of the service API can be found in Polly.srv.
392 |
393 | :param node_name: name of ROS node
394 | :param service_name: name of ROS service
395 | :return: it doesn't return
396 | """
397 | rospy.init_node(node_name)
398 |
399 | service = rospy.Service(service_name, Polly, self._node_request_handler)
400 |
401 | rospy.loginfo('polly running: {}'.format(service.uri))
402 |
403 | rospy.spin()
404 |
405 |
406 | def main():
407 | usage = '''usage: %prog [options]
408 | '''
409 |
410 | parser = OptionParser(usage)
411 |
412 | parser.add_option("-n", "--node-name", dest="node_name", default='polly_node',
413 | help="name of the ROS node",
414 | metavar="NODE_NAME")
415 | parser.add_option("-s", "--service-name", dest="service_name", default='polly',
416 | help="name of the ROS service",
417 | metavar="SERVICE_NAME")
418 |
419 | (options, args) = parser.parse_args()
420 |
421 | node_name = options.node_name
422 | service_name = options.service_name
423 |
424 | AmazonPolly().start(node_name=node_name, service_name=service_name)
425 |
426 |
427 | if __name__ == "__main__":
428 | main()
429 |
--------------------------------------------------------------------------------
/tts/src/tts/data/models/polly/2016-06-10/service-2.json:
--------------------------------------------------------------------------------
1 | {
2 | "version":"2.0",
3 | "metadata":{
4 | "apiVersion":"2016-06-10",
5 | "endpointPrefix":"polly",
6 | "protocol":"rest-json",
7 | "serviceFullName":"Amazon Polly",
8 | "serviceId":"Polly",
9 | "signatureVersion":"v4",
10 | "uid":"polly-2016-06-10"
11 | },
12 | "operations":{
13 | "DeleteLexicon":{
14 | "name":"DeleteLexicon",
15 | "http":{
16 | "method":"DELETE",
17 | "requestUri":"/v1/lexicons/{LexiconName}",
18 | "responseCode":200
19 | },
20 | "input":{"shape":"DeleteLexiconInput"},
21 | "output":{"shape":"DeleteLexiconOutput"},
22 | "errors":[
23 | {"shape":"LexiconNotFoundException"},
24 | {"shape":"ServiceFailureException"}
25 | ],
26 | "documentation":"Deletes the specified pronunciation lexicon stored in an AWS Region. A lexicon which has been deleted is not available for speech synthesis, nor is it possible to retrieve it using either the GetLexicon or ListLexicon APIs.
For more information, see Managing Lexicons.
"
27 | },
28 | "DescribeVoices":{
29 | "name":"DescribeVoices",
30 | "http":{
31 | "method":"GET",
32 | "requestUri":"/v1/voices",
33 | "responseCode":200
34 | },
35 | "input":{"shape":"DescribeVoicesInput"},
36 | "output":{"shape":"DescribeVoicesOutput"},
37 | "errors":[
38 | {"shape":"InvalidNextTokenException"},
39 | {"shape":"ServiceFailureException"}
40 | ],
41 | "documentation":"Returns the list of voices that are available for use when requesting speech synthesis. Each voice speaks a specified language, is either male or female, and is identified by an ID, which is the ASCII version of the voice name.
When synthesizing speech ( SynthesizeSpeech ), you provide the voice ID for the voice you want from the list of voices returned by DescribeVoices.
For example, you want your news reader application to read news in a specific language, but giving a user the option to choose the voice. Using the DescribeVoices operation you can provide the user with a list of available voices to select from.
You can optionally specify a language code to filter the available voices. For example, if you specify en-US, the operation returns a list of all available US English voices.
This operation requires permissions to perform the polly:DescribeVoices action.
"
42 | },
43 | "GetLexicon":{
44 | "name":"GetLexicon",
45 | "http":{
46 | "method":"GET",
47 | "requestUri":"/v1/lexicons/{LexiconName}",
48 | "responseCode":200
49 | },
50 | "input":{"shape":"GetLexiconInput"},
51 | "output":{"shape":"GetLexiconOutput"},
52 | "errors":[
53 | {"shape":"LexiconNotFoundException"},
54 | {"shape":"ServiceFailureException"}
55 | ],
56 | "documentation":"Returns the content of the specified pronunciation lexicon stored in an AWS Region. For more information, see Managing Lexicons.
"
57 | },
58 | "GetSpeechSynthesisTask":{
59 | "name":"GetSpeechSynthesisTask",
60 | "http":{
61 | "method":"GET",
62 | "requestUri":"/v1/synthesisTasks/{TaskId}",
63 | "responseCode":200
64 | },
65 | "input":{"shape":"GetSpeechSynthesisTaskInput"},
66 | "output":{"shape":"GetSpeechSynthesisTaskOutput"},
67 | "errors":[
68 | {"shape":"InvalidTaskIdException"},
69 | {"shape":"ServiceFailureException"},
70 | {"shape":"SynthesisTaskNotFoundException"}
71 | ],
72 | "documentation":"Retrieves a specific SpeechSynthesisTask object based on its TaskID. This object contains information about the given speech synthesis task, including the status of the task, and a link to the S3 bucket containing the output of the task.
"
73 | },
74 | "ListLexicons":{
75 | "name":"ListLexicons",
76 | "http":{
77 | "method":"GET",
78 | "requestUri":"/v1/lexicons",
79 | "responseCode":200
80 | },
81 | "input":{"shape":"ListLexiconsInput"},
82 | "output":{"shape":"ListLexiconsOutput"},
83 | "errors":[
84 | {"shape":"InvalidNextTokenException"},
85 | {"shape":"ServiceFailureException"}
86 | ],
87 | "documentation":"Returns a list of pronunciation lexicons stored in an AWS Region. For more information, see Managing Lexicons.
"
88 | },
89 | "ListSpeechSynthesisTasks":{
90 | "name":"ListSpeechSynthesisTasks",
91 | "http":{
92 | "method":"GET",
93 | "requestUri":"/v1/synthesisTasks",
94 | "responseCode":200
95 | },
96 | "input":{"shape":"ListSpeechSynthesisTasksInput"},
97 | "output":{"shape":"ListSpeechSynthesisTasksOutput"},
98 | "errors":[
99 | {"shape":"InvalidNextTokenException"},
100 | {"shape":"ServiceFailureException"}
101 | ],
102 | "documentation":"Returns a list of SpeechSynthesisTask objects ordered by their creation date. This operation can filter the tasks by their status, for example, allowing users to list only tasks that are completed.
"
103 | },
104 | "PutLexicon":{
105 | "name":"PutLexicon",
106 | "http":{
107 | "method":"PUT",
108 | "requestUri":"/v1/lexicons/{LexiconName}",
109 | "responseCode":200
110 | },
111 | "input":{"shape":"PutLexiconInput"},
112 | "output":{"shape":"PutLexiconOutput"},
113 | "errors":[
114 | {"shape":"InvalidLexiconException"},
115 | {"shape":"UnsupportedPlsAlphabetException"},
116 | {"shape":"UnsupportedPlsLanguageException"},
117 | {"shape":"LexiconSizeExceededException"},
118 | {"shape":"MaxLexemeLengthExceededException"},
119 | {"shape":"MaxLexiconsNumberExceededException"},
120 | {"shape":"ServiceFailureException"}
121 | ],
122 | "documentation":"Stores a pronunciation lexicon in an AWS Region. If a lexicon with the same name already exists in the region, it is overwritten by the new lexicon. Lexicon operations have eventual consistency, therefore, it might take some time before the lexicon is available to the SynthesizeSpeech operation.
For more information, see Managing Lexicons.
"
123 | },
124 | "StartSpeechSynthesisTask":{
125 | "name":"StartSpeechSynthesisTask",
126 | "http":{
127 | "method":"POST",
128 | "requestUri":"/v1/synthesisTasks",
129 | "responseCode":200
130 | },
131 | "input":{"shape":"StartSpeechSynthesisTaskInput"},
132 | "output":{"shape":"StartSpeechSynthesisTaskOutput"},
133 | "errors":[
134 | {"shape":"TextLengthExceededException"},
135 | {"shape":"InvalidS3BucketException"},
136 | {"shape":"InvalidS3KeyException"},
137 | {"shape":"InvalidSampleRateException"},
138 | {"shape":"InvalidSnsTopicArnException"},
139 | {"shape":"InvalidSsmlException"},
140 | {"shape":"LexiconNotFoundException"},
141 | {"shape":"ServiceFailureException"},
142 | {"shape":"MarksNotSupportedForFormatException"},
143 | {"shape":"SsmlMarksNotSupportedForTextTypeException"},
144 | {"shape":"LanguageNotSupportedException"}
145 | ],
146 | "documentation":"Allows the creation of an asynchronous synthesis task, by starting a new SpeechSynthesisTask. This operation requires all the standard information needed for speech synthesis, plus the name of an Amazon S3 bucket for the service to store the output of the synthesis task and two optional parameters (OutputS3KeyPrefix and SnsTopicArn). Once the synthesis task is created, this operation will return a SpeechSynthesisTask object, which will include an identifier of this task as well as the current status.
"
147 | },
148 | "SynthesizeSpeech":{
149 | "name":"SynthesizeSpeech",
150 | "http":{
151 | "method":"POST",
152 | "requestUri":"/v1/speech",
153 | "responseCode":200
154 | },
155 | "input":{"shape":"SynthesizeSpeechInput"},
156 | "output":{"shape":"SynthesizeSpeechOutput"},
157 | "errors":[
158 | {"shape":"TextLengthExceededException"},
159 | {"shape":"InvalidSampleRateException"},
160 | {"shape":"InvalidSsmlException"},
161 | {"shape":"LexiconNotFoundException"},
162 | {"shape":"ServiceFailureException"},
163 | {"shape":"MarksNotSupportedForFormatException"},
164 | {"shape":"SsmlMarksNotSupportedForTextTypeException"},
165 | {"shape":"LanguageNotSupportedException"}
166 | ],
167 | "documentation":"Synthesizes UTF-8 input, plain text or SSML, to a stream of bytes. SSML input must be valid, well-formed SSML. Some alphabets might not be available with all the voices (for example, Cyrillic might not be read at all by English voices) unless phoneme mapping is used. For more information, see How it Works.
"
168 | }
169 | },
170 | "shapes":{
171 | "Alphabet":{"type":"string"},
172 | "AudioStream":{
173 | "type":"blob",
174 | "streaming":true
175 | },
176 | "ContentType":{"type":"string"},
177 | "DateTime":{"type":"timestamp"},
178 | "DeleteLexiconInput":{
179 | "type":"structure",
180 | "required":["Name"],
181 | "members":{
182 | "Name":{
183 | "shape":"LexiconName",
184 | "documentation":"The name of the lexicon to delete. Must be an existing lexicon in the region.
",
185 | "location":"uri",
186 | "locationName":"LexiconName"
187 | }
188 | }
189 | },
190 | "DeleteLexiconOutput":{
191 | "type":"structure",
192 | "members":{
193 | }
194 | },
195 | "DescribeVoicesInput":{
196 | "type":"structure",
197 | "members":{
198 | "LanguageCode":{
199 | "shape":"LanguageCode",
200 | "documentation":" The language identification tag (ISO 639 code for the language name-ISO 3166 country code) for filtering the list of voices returned. If you don't specify this optional parameter, all available voices are returned.
",
201 | "location":"querystring",
202 | "locationName":"LanguageCode"
203 | },
204 | "IncludeAdditionalLanguageCodes":{
205 | "shape":"IncludeAdditionalLanguageCodes",
206 | "documentation":"Boolean value indicating whether to return any bilingual voices that use the specified language as an additional language. For instance, if you request all languages that use US English (es-US), and there is an Italian voice that speaks both Italian (it-IT) and US English, that voice will be included if you specify yes but not if you specify no.
",
207 | "location":"querystring",
208 | "locationName":"IncludeAdditionalLanguageCodes"
209 | },
210 | "NextToken":{
211 | "shape":"NextToken",
212 | "documentation":"An opaque pagination token returned from the previous DescribeVoices operation. If present, this indicates where to continue the listing.
",
213 | "location":"querystring",
214 | "locationName":"NextToken"
215 | }
216 | }
217 | },
218 | "DescribeVoicesOutput":{
219 | "type":"structure",
220 | "members":{
221 | "Voices":{
222 | "shape":"VoiceList",
223 | "documentation":"A list of voices with their properties.
"
224 | },
225 | "NextToken":{
226 | "shape":"NextToken",
227 | "documentation":"The pagination token to use in the next request to continue the listing of voices. NextToken is returned only if the response is truncated.
"
228 | }
229 | }
230 | },
231 | "ErrorMessage":{"type":"string"},
232 | "Gender":{
233 | "type":"string",
234 | "enum":[
235 | "Female",
236 | "Male"
237 | ]
238 | },
239 | "GetLexiconInput":{
240 | "type":"structure",
241 | "required":["Name"],
242 | "members":{
243 | "Name":{
244 | "shape":"LexiconName",
245 | "documentation":"Name of the lexicon.
",
246 | "location":"uri",
247 | "locationName":"LexiconName"
248 | }
249 | }
250 | },
251 | "GetLexiconOutput":{
252 | "type":"structure",
253 | "members":{
254 | "Lexicon":{
255 | "shape":"Lexicon",
256 | "documentation":"Lexicon object that provides name and the string content of the lexicon.
"
257 | },
258 | "LexiconAttributes":{
259 | "shape":"LexiconAttributes",
260 | "documentation":"Metadata of the lexicon, including phonetic alphabetic used, language code, lexicon ARN, number of lexemes defined in the lexicon, and size of lexicon in bytes.
"
261 | }
262 | }
263 | },
264 | "GetSpeechSynthesisTaskInput":{
265 | "type":"structure",
266 | "required":["TaskId"],
267 | "members":{
268 | "TaskId":{
269 | "shape":"TaskId",
270 | "documentation":"The Amazon Polly generated identifier for a speech synthesis task.
",
271 | "location":"uri",
272 | "locationName":"TaskId"
273 | }
274 | }
275 | },
276 | "GetSpeechSynthesisTaskOutput":{
277 | "type":"structure",
278 | "members":{
279 | "SynthesisTask":{
280 | "shape":"SynthesisTask",
281 | "documentation":"SynthesisTask object that provides information from the requested task, including output format, creation time, task status, and so on.
"
282 | }
283 | }
284 | },
285 | "IncludeAdditionalLanguageCodes":{"type":"boolean"},
286 | "InvalidLexiconException":{
287 | "type":"structure",
288 | "members":{
289 | "message":{"shape":"ErrorMessage"}
290 | },
291 | "documentation":"Amazon Polly can't find the specified lexicon. Verify that the lexicon's name is spelled correctly, and then try again.
",
292 | "error":{"httpStatusCode":400},
293 | "exception":true
294 | },
295 | "InvalidNextTokenException":{
296 | "type":"structure",
297 | "members":{
298 | "message":{"shape":"ErrorMessage"}
299 | },
300 | "documentation":"The NextToken is invalid. Verify that it's spelled correctly, and then try again.
",
301 | "error":{"httpStatusCode":400},
302 | "exception":true
303 | },
304 | "InvalidS3BucketException":{
305 | "type":"structure",
306 | "members":{
307 | "message":{"shape":"ErrorMessage"}
308 | },
309 | "documentation":"The provided Amazon S3 bucket name is invalid. Please check your input with S3 bucket naming requirements and try again.
",
310 | "error":{"httpStatusCode":400},
311 | "exception":true
312 | },
313 | "InvalidS3KeyException":{
314 | "type":"structure",
315 | "members":{
316 | "message":{"shape":"ErrorMessage"}
317 | },
318 | "documentation":"The provided Amazon S3 key prefix is invalid. Please provide a valid S3 object key name.
",
319 | "error":{"httpStatusCode":400},
320 | "exception":true
321 | },
322 | "InvalidSampleRateException":{
323 | "type":"structure",
324 | "members":{
325 | "message":{"shape":"ErrorMessage"}
326 | },
327 | "documentation":"The specified sample rate is not valid.
",
328 | "error":{"httpStatusCode":400},
329 | "exception":true
330 | },
331 | "InvalidSnsTopicArnException":{
332 | "type":"structure",
333 | "members":{
334 | "message":{"shape":"ErrorMessage"}
335 | },
336 | "documentation":"The provided SNS topic ARN is invalid. Please provide a valid SNS topic ARN and try again.
",
337 | "error":{"httpStatusCode":400},
338 | "exception":true
339 | },
340 | "InvalidSsmlException":{
341 | "type":"structure",
342 | "members":{
343 | "message":{"shape":"ErrorMessage"}
344 | },
345 | "documentation":"The SSML you provided is invalid. Verify the SSML syntax, spelling of tags and values, and then try again.
",
346 | "error":{"httpStatusCode":400},
347 | "exception":true
348 | },
349 | "InvalidTaskIdException":{
350 | "type":"structure",
351 | "members":{
352 | "message":{"shape":"ErrorMessage"}
353 | },
354 | "documentation":"The provided Task ID is not valid. Please provide a valid Task ID and try again.
",
355 | "error":{"httpStatusCode":400},
356 | "exception":true
357 | },
358 | "LanguageCode":{
359 | "type":"string",
360 | "enum":[
361 | "cmn-CN",
362 | "cy-GB",
363 | "da-DK",
364 | "de-DE",
365 | "en-AU",
366 | "en-GB",
367 | "en-GB-WLS",
368 | "en-IN",
369 | "en-US",
370 | "es-ES",
371 | "es-US",
372 | "fr-CA",
373 | "fr-FR",
374 | "is-IS",
375 | "it-IT",
376 | "ja-JP",
377 | "hi-IN",
378 | "ko-KR",
379 | "nb-NO",
380 | "nl-NL",
381 | "pl-PL",
382 | "pt-BR",
383 | "pt-PT",
384 | "ro-RO",
385 | "ru-RU",
386 | "sv-SE",
387 | "tr-TR"
388 | ]
389 | },
390 | "LanguageCodeList":{
391 | "type":"list",
392 | "member":{"shape":"LanguageCode"}
393 | },
394 | "LanguageName":{"type":"string"},
395 | "LanguageNotSupportedException":{
396 | "type":"structure",
397 | "members":{
398 | "message":{"shape":"ErrorMessage"}
399 | },
400 | "documentation":"The language specified is not currently supported by Amazon Polly in this capacity.
",
401 | "error":{"httpStatusCode":400},
402 | "exception":true
403 | },
404 | "LastModified":{"type":"timestamp"},
405 | "LexemesCount":{"type":"integer"},
406 | "Lexicon":{
407 | "type":"structure",
408 | "members":{
409 | "Content":{
410 | "shape":"LexiconContent",
411 | "documentation":"Lexicon content in string format. The content of a lexicon must be in PLS format.
"
412 | },
413 | "Name":{
414 | "shape":"LexiconName",
415 | "documentation":"Name of the lexicon.
"
416 | }
417 | },
418 | "documentation":"Provides lexicon name and lexicon content in string format. For more information, see Pronunciation Lexicon Specification (PLS) Version 1.0.
"
419 | },
420 | "LexiconArn":{"type":"string"},
421 | "LexiconAttributes":{
422 | "type":"structure",
423 | "members":{
424 | "Alphabet":{
425 | "shape":"Alphabet",
426 | "documentation":"Phonetic alphabet used in the lexicon. Valid values are ipa and x-sampa.
"
427 | },
428 | "LanguageCode":{
429 | "shape":"LanguageCode",
430 | "documentation":"Language code that the lexicon applies to. A lexicon with a language code such as \"en\" would be applied to all English languages (en-GB, en-US, en-AUS, en-WLS, and so on.
"
431 | },
432 | "LastModified":{
433 | "shape":"LastModified",
434 | "documentation":"Date lexicon was last modified (a timestamp value).
"
435 | },
436 | "LexiconArn":{
437 | "shape":"LexiconArn",
438 | "documentation":"Amazon Resource Name (ARN) of the lexicon.
"
439 | },
440 | "LexemesCount":{
441 | "shape":"LexemesCount",
442 | "documentation":"Number of lexemes in the lexicon.
"
443 | },
444 | "Size":{
445 | "shape":"Size",
446 | "documentation":"Total size of the lexicon, in characters.
"
447 | }
448 | },
449 | "documentation":"Contains metadata describing the lexicon such as the number of lexemes, language code, and so on. For more information, see Managing Lexicons.
"
450 | },
451 | "LexiconContent":{"type":"string"},
452 | "LexiconDescription":{
453 | "type":"structure",
454 | "members":{
455 | "Name":{
456 | "shape":"LexiconName",
457 | "documentation":"Name of the lexicon.
"
458 | },
459 | "Attributes":{
460 | "shape":"LexiconAttributes",
461 | "documentation":"Provides lexicon metadata.
"
462 | }
463 | },
464 | "documentation":"Describes the content of the lexicon.
"
465 | },
466 | "LexiconDescriptionList":{
467 | "type":"list",
468 | "member":{"shape":"LexiconDescription"}
469 | },
470 | "LexiconName":{
471 | "type":"string",
472 | "pattern":"[0-9A-Za-z]{1,20}",
473 | "sensitive":true
474 | },
475 | "LexiconNameList":{
476 | "type":"list",
477 | "member":{"shape":"LexiconName"},
478 | "max":5
479 | },
480 | "LexiconNotFoundException":{
481 | "type":"structure",
482 | "members":{
483 | "message":{"shape":"ErrorMessage"}
484 | },
485 | "documentation":"Amazon Polly can't find the specified lexicon. This could be caused by a lexicon that is missing, its name is misspelled or specifying a lexicon that is in a different region.
Verify that the lexicon exists, is in the region (see ListLexicons) and that you spelled its name is spelled correctly. Then try again.
",
486 | "error":{"httpStatusCode":404},
487 | "exception":true
488 | },
489 | "LexiconSizeExceededException":{
490 | "type":"structure",
491 | "members":{
492 | "message":{"shape":"ErrorMessage"}
493 | },
494 | "documentation":"The maximum size of the specified lexicon would be exceeded by this operation.
",
495 | "error":{"httpStatusCode":400},
496 | "exception":true
497 | },
498 | "ListLexiconsInput":{
499 | "type":"structure",
500 | "members":{
501 | "NextToken":{
502 | "shape":"NextToken",
503 | "documentation":"An opaque pagination token returned from previous ListLexicons operation. If present, indicates where to continue the list of lexicons.
",
504 | "location":"querystring",
505 | "locationName":"NextToken"
506 | }
507 | }
508 | },
509 | "ListLexiconsOutput":{
510 | "type":"structure",
511 | "members":{
512 | "Lexicons":{
513 | "shape":"LexiconDescriptionList",
514 | "documentation":"A list of lexicon names and attributes.
"
515 | },
516 | "NextToken":{
517 | "shape":"NextToken",
518 | "documentation":"The pagination token to use in the next request to continue the listing of lexicons. NextToken is returned only if the response is truncated.
"
519 | }
520 | }
521 | },
522 | "ListSpeechSynthesisTasksInput":{
523 | "type":"structure",
524 | "members":{
525 | "MaxResults":{
526 | "shape":"MaxResults",
527 | "documentation":"Maximum number of speech synthesis tasks returned in a List operation.
",
528 | "location":"querystring",
529 | "locationName":"MaxResults"
530 | },
531 | "NextToken":{
532 | "shape":"NextToken",
533 | "documentation":"The pagination token to use in the next request to continue the listing of speech synthesis tasks.
",
534 | "location":"querystring",
535 | "locationName":"NextToken"
536 | },
537 | "Status":{
538 | "shape":"TaskStatus",
539 | "documentation":"Status of the speech synthesis tasks returned in a List operation
",
540 | "location":"querystring",
541 | "locationName":"Status"
542 | }
543 | }
544 | },
545 | "ListSpeechSynthesisTasksOutput":{
546 | "type":"structure",
547 | "members":{
548 | "NextToken":{
549 | "shape":"NextToken",
550 | "documentation":"An opaque pagination token returned from the previous List operation in this request. If present, this indicates where to continue the listing.
"
551 | },
552 | "SynthesisTasks":{
553 | "shape":"SynthesisTasks",
554 | "documentation":"List of SynthesisTask objects that provides information from the specified task in the list request, including output format, creation time, task status, and so on.
"
555 | }
556 | }
557 | },
558 | "MarksNotSupportedForFormatException":{
559 | "type":"structure",
560 | "members":{
561 | "message":{"shape":"ErrorMessage"}
562 | },
563 | "documentation":"Speech marks are not supported for the OutputFormat selected. Speech marks are only available for content in json format.
",
564 | "error":{"httpStatusCode":400},
565 | "exception":true
566 | },
567 | "MaxLexemeLengthExceededException":{
568 | "type":"structure",
569 | "members":{
570 | "message":{"shape":"ErrorMessage"}
571 | },
572 | "documentation":"The maximum size of the lexeme would be exceeded by this operation.
",
573 | "error":{"httpStatusCode":400},
574 | "exception":true
575 | },
576 | "MaxLexiconsNumberExceededException":{
577 | "type":"structure",
578 | "members":{
579 | "message":{"shape":"ErrorMessage"}
580 | },
581 | "documentation":"The maximum number of lexicons would be exceeded by this operation.
",
582 | "error":{"httpStatusCode":400},
583 | "exception":true
584 | },
585 | "MaxResults":{
586 | "type":"integer",
587 | "max":100,
588 | "min":1
589 | },
590 | "NextToken":{"type":"string"},
591 | "OutputFormat":{
592 | "type":"string",
593 | "enum":[
594 | "json",
595 | "mp3",
596 | "ogg_vorbis",
597 | "pcm"
598 | ]
599 | },
600 | "OutputS3BucketName":{
601 | "type":"string",
602 | "pattern":"^[a-z0-9][\\.\\-a-z0-9]{1,61}[a-z0-9]$"
603 | },
604 | "OutputS3KeyPrefix":{
605 | "type":"string",
606 | "pattern":"^[0-9a-zA-Z\\/\\!\\-_\\.\\*\\'\\(\\)]{0,800}$"
607 | },
608 | "OutputUri":{"type":"string"},
609 | "PutLexiconInput":{
610 | "type":"structure",
611 | "required":[
612 | "Name",
613 | "Content"
614 | ],
615 | "members":{
616 | "Name":{
617 | "shape":"LexiconName",
618 | "documentation":"Name of the lexicon. The name must follow the regular express format [0-9A-Za-z]{1,20}. That is, the name is a case-sensitive alphanumeric string up to 20 characters long.
",
619 | "location":"uri",
620 | "locationName":"LexiconName"
621 | },
622 | "Content":{
623 | "shape":"LexiconContent",
624 | "documentation":"Content of the PLS lexicon as string data.
"
625 | }
626 | }
627 | },
628 | "PutLexiconOutput":{
629 | "type":"structure",
630 | "members":{
631 | }
632 | },
633 | "RequestCharacters":{"type":"integer"},
634 | "SampleRate":{"type":"string"},
635 | "ServiceFailureException":{
636 | "type":"structure",
637 | "members":{
638 | "message":{"shape":"ErrorMessage"}
639 | },
640 | "documentation":"An unknown condition has caused a service failure.
",
641 | "error":{"httpStatusCode":500},
642 | "exception":true,
643 | "fault":true
644 | },
645 | "Size":{"type":"integer"},
646 | "SnsTopicArn":{
647 | "type":"string",
648 | "pattern":"^arn:aws(-(cn|iso(-b)?|us-gov))?:sns:.*:\\w{12}:.+$"
649 | },
650 | "SpeechMarkType":{
651 | "type":"string",
652 | "enum":[
653 | "sentence",
654 | "ssml",
655 | "viseme",
656 | "word"
657 | ]
658 | },
659 | "SpeechMarkTypeList":{
660 | "type":"list",
661 | "member":{"shape":"SpeechMarkType"},
662 | "max":4
663 | },
664 | "SsmlMarksNotSupportedForTextTypeException":{
665 | "type":"structure",
666 | "members":{
667 | "message":{"shape":"ErrorMessage"}
668 | },
669 | "documentation":"SSML speech marks are not supported for plain text-type input.
",
670 | "error":{"httpStatusCode":400},
671 | "exception":true
672 | },
673 | "StartSpeechSynthesisTaskInput":{
674 | "type":"structure",
675 | "required":[
676 | "OutputFormat",
677 | "OutputS3BucketName",
678 | "Text",
679 | "VoiceId"
680 | ],
681 | "members":{
682 | "LexiconNames":{
683 | "shape":"LexiconNameList",
684 | "documentation":"List of one or more pronunciation lexicon names you want the service to apply during synthesis. Lexicons are applied only if the language of the lexicon is the same as the language of the voice.
"
685 | },
686 | "OutputFormat":{
687 | "shape":"OutputFormat",
688 | "documentation":"The format in which the returned output will be encoded. For audio stream, this will be mp3, ogg_vorbis, or pcm. For speech marks, this will be json.
"
689 | },
690 | "OutputS3BucketName":{
691 | "shape":"OutputS3BucketName",
692 | "documentation":"Amazon S3 bucket name to which the output file will be saved.
"
693 | },
694 | "OutputS3KeyPrefix":{
695 | "shape":"OutputS3KeyPrefix",
696 | "documentation":"The Amazon S3 key prefix for the output speech file.
"
697 | },
698 | "SampleRate":{
699 | "shape":"SampleRate",
700 | "documentation":"The audio frequency specified in Hz.
The valid values for mp3 and ogg_vorbis are \"8000\", \"16000\", and \"22050\". The default value is \"22050\".
Valid values for pcm are \"8000\" and \"16000\" The default value is \"16000\".
"
701 | },
702 | "SnsTopicArn":{
703 | "shape":"SnsTopicArn",
704 | "documentation":"ARN for the SNS topic optionally used for providing status notification for a speech synthesis task.
"
705 | },
706 | "SpeechMarkTypes":{
707 | "shape":"SpeechMarkTypeList",
708 | "documentation":"The type of speech marks returned for the input text.
"
709 | },
710 | "Text":{
711 | "shape":"Text",
712 | "documentation":"The input text to synthesize. If you specify ssml as the TextType, follow the SSML format for the input text.
"
713 | },
714 | "TextType":{
715 | "shape":"TextType",
716 | "documentation":"Specifies whether the input text is plain text or SSML. The default value is plain text.
"
717 | },
718 | "VoiceId":{
719 | "shape":"VoiceId",
720 | "documentation":"Voice ID to use for the synthesis.
"
721 | },
722 | "LanguageCode":{
723 | "shape":"LanguageCode",
724 | "documentation":"Optional language code for the Speech Synthesis request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
If a bilingual voice is used and no language code is specified, Amazon Polly will use the default language of the bilingual voice. The default language for any voice is the one returned by the DescribeVoices operation for the LanguageCode parameter. For example, if no language code is specified, Aditi will use Indian English rather than Hindi.
"
725 | }
726 | }
727 | },
728 | "StartSpeechSynthesisTaskOutput":{
729 | "type":"structure",
730 | "members":{
731 | "SynthesisTask":{
732 | "shape":"SynthesisTask",
733 | "documentation":"SynthesisTask object that provides information and attributes about a newly submitted speech synthesis task.
"
734 | }
735 | }
736 | },
737 | "SynthesisTask":{
738 | "type":"structure",
739 | "members":{
740 | "TaskId":{
741 | "shape":"TaskId",
742 | "documentation":"The Amazon Polly generated identifier for a speech synthesis task.
"
743 | },
744 | "TaskStatus":{
745 | "shape":"TaskStatus",
746 | "documentation":"Current status of the individual speech synthesis task.
"
747 | },
748 | "TaskStatusReason":{
749 | "shape":"TaskStatusReason",
750 | "documentation":"Reason for the current status of a specific speech synthesis task, including errors if the task has failed.
"
751 | },
752 | "OutputUri":{
753 | "shape":"OutputUri",
754 | "documentation":"Pathway for the output speech file.
"
755 | },
756 | "CreationTime":{
757 | "shape":"DateTime",
758 | "documentation":"Timestamp for the time the synthesis task was started.
"
759 | },
760 | "RequestCharacters":{
761 | "shape":"RequestCharacters",
762 | "documentation":"Number of billable characters synthesized.
"
763 | },
764 | "SnsTopicArn":{
765 | "shape":"SnsTopicArn",
766 | "documentation":"ARN for the SNS topic optionally used for providing status notification for a speech synthesis task.
"
767 | },
768 | "LexiconNames":{
769 | "shape":"LexiconNameList",
770 | "documentation":"List of one or more pronunciation lexicon names you want the service to apply during synthesis. Lexicons are applied only if the language of the lexicon is the same as the language of the voice.
"
771 | },
772 | "OutputFormat":{
773 | "shape":"OutputFormat",
774 | "documentation":"The format in which the returned output will be encoded. For audio stream, this will be mp3, ogg_vorbis, or pcm. For speech marks, this will be json.
"
775 | },
776 | "SampleRate":{
777 | "shape":"SampleRate",
778 | "documentation":"The audio frequency specified in Hz.
The valid values for mp3 and ogg_vorbis are \"8000\", \"16000\", and \"22050\". The default value is \"22050\".
Valid values for pcm are \"8000\" and \"16000\" The default value is \"16000\".
"
779 | },
780 | "SpeechMarkTypes":{
781 | "shape":"SpeechMarkTypeList",
782 | "documentation":"The type of speech marks returned for the input text.
"
783 | },
784 | "TextType":{
785 | "shape":"TextType",
786 | "documentation":"Specifies whether the input text is plain text or SSML. The default value is plain text.
"
787 | },
788 | "VoiceId":{
789 | "shape":"VoiceId",
790 | "documentation":"Voice ID to use for the synthesis.
"
791 | },
792 | "LanguageCode":{
793 | "shape":"LanguageCode",
794 | "documentation":"Optional language code for a synthesis task. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
If a bilingual voice is used and no language code is specified, Amazon Polly will use the default language of the bilingual voice. The default language for any voice is the one returned by the DescribeVoices operation for the LanguageCode parameter. For example, if no language code is specified, Aditi will use Indian English rather than Hindi.
"
795 | }
796 | },
797 | "documentation":"SynthesisTask object that provides information about a speech synthesis task.
"
798 | },
799 | "SynthesisTaskNotFoundException":{
800 | "type":"structure",
801 | "members":{
802 | "message":{"shape":"ErrorMessage"}
803 | },
804 | "documentation":"The Speech Synthesis task with requested Task ID cannot be found.
",
805 | "error":{"httpStatusCode":400},
806 | "exception":true
807 | },
808 | "SynthesisTasks":{
809 | "type":"list",
810 | "member":{"shape":"SynthesisTask"}
811 | },
812 | "SynthesizeSpeechInput":{
813 | "type":"structure",
814 | "required":[
815 | "OutputFormat",
816 | "Text",
817 | "VoiceId"
818 | ],
819 | "members":{
820 | "LexiconNames":{
821 | "shape":"LexiconNameList",
822 | "documentation":"List of one or more pronunciation lexicon names you want the service to apply during synthesis. Lexicons are applied only if the language of the lexicon is the same as the language of the voice. For information about storing lexicons, see PutLexicon.
"
823 | },
824 | "OutputFormat":{
825 | "shape":"OutputFormat",
826 | "documentation":" The format in which the returned output will be encoded. For audio stream, this will be mp3, ogg_vorbis, or pcm. For speech marks, this will be json.
When pcm is used, the content returned is audio/pcm in a signed 16-bit, 1 channel (mono), little-endian format.
"
827 | },
828 | "SampleRate":{
829 | "shape":"SampleRate",
830 | "documentation":" The audio frequency specified in Hz.
The valid values for mp3 and ogg_vorbis are \"8000\", \"16000\", and \"22050\". The default value is \"22050\".
Valid values for pcm are \"8000\" and \"16000\" The default value is \"16000\".
"
831 | },
832 | "SpeechMarkTypes":{
833 | "shape":"SpeechMarkTypeList",
834 | "documentation":"The type of speech marks returned for the input text.
"
835 | },
836 | "Text":{
837 | "shape":"Text",
838 | "documentation":" Input text to synthesize. If you specify ssml as the TextType, follow the SSML format for the input text.
"
839 | },
840 | "TextType":{
841 | "shape":"TextType",
842 | "documentation":" Specifies whether the input text is plain text or SSML. The default value is plain text. For more information, see Using SSML.
"
843 | },
844 | "VoiceId":{
845 | "shape":"VoiceId",
846 | "documentation":" Voice ID to use for the synthesis. You can get a list of available voice IDs by calling the DescribeVoices operation.
"
847 | },
848 | "LanguageCode":{
849 | "shape":"LanguageCode",
850 | "documentation":"Optional language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
If a bilingual voice is used and no language code is specified, Amazon Polly will use the default language of the bilingual voice. The default language for any voice is the one returned by the DescribeVoices operation for the LanguageCode parameter. For example, if no language code is specified, Aditi will use Indian English rather than Hindi.
"
851 | }
852 | }
853 | },
854 | "SynthesizeSpeechOutput":{
855 | "type":"structure",
856 | "members":{
857 | "AudioStream":{
858 | "shape":"AudioStream",
859 | "documentation":" Stream containing the synthesized speech.
"
860 | },
861 | "ContentType":{
862 | "shape":"ContentType",
863 | "documentation":" Specifies the type audio stream. This should reflect the OutputFormat parameter in your request.
-
If you request mp3 as the OutputFormat, the ContentType returned is audio/mpeg.
-
If you request ogg_vorbis as the OutputFormat, the ContentType returned is audio/ogg.
-
If you request pcm as the OutputFormat, the ContentType returned is audio/pcm in a signed 16-bit, 1 channel (mono), little-endian format.
-
If you request json as the OutputFormat, the ContentType returned is audio/json.
",
864 | "location":"header",
865 | "locationName":"Content-Type"
866 | },
867 | "RequestCharacters":{
868 | "shape":"RequestCharacters",
869 | "documentation":"Number of characters synthesized.
",
870 | "location":"header",
871 | "locationName":"x-amzn-RequestCharacters"
872 | }
873 | },
874 | "payload":"AudioStream"
875 | },
876 | "TaskId":{
877 | "type":"string",
878 | "max":128,
879 | "min":1
880 | },
881 | "TaskStatus":{
882 | "type":"string",
883 | "enum":[
884 | "scheduled",
885 | "inProgress",
886 | "completed",
887 | "failed"
888 | ]
889 | },
890 | "TaskStatusReason":{"type":"string"},
891 | "Text":{"type":"string"},
892 | "TextLengthExceededException":{
893 | "type":"structure",
894 | "members":{
895 | "message":{"shape":"ErrorMessage"}
896 | },
897 | "documentation":"The value of the \"Text\" parameter is longer than the accepted limits. For the SynthesizeSpeech API, the limit for input text is a maximum of 6000 characters total, of which no more than 3000 can be billed characters. For the StartSpeechSynthesisTask API, the maximum is 200,000 characters, of which no more than 100,000 can be billed characters. SSML tags are not counted as billed characters.
",
898 | "error":{"httpStatusCode":400},
899 | "exception":true
900 | },
901 | "TextType":{
902 | "type":"string",
903 | "enum":[
904 | "ssml",
905 | "text"
906 | ]
907 | },
908 | "UnsupportedPlsAlphabetException":{
909 | "type":"structure",
910 | "members":{
911 | "message":{"shape":"ErrorMessage"}
912 | },
913 | "documentation":"The alphabet specified by the lexicon is not a supported alphabet. Valid values are x-sampa and ipa.
",
914 | "error":{"httpStatusCode":400},
915 | "exception":true
916 | },
917 | "UnsupportedPlsLanguageException":{
918 | "type":"structure",
919 | "members":{
920 | "message":{"shape":"ErrorMessage"}
921 | },
922 | "documentation":"The language specified in the lexicon is unsupported. For a list of supported languages, see Lexicon Attributes.
",
923 | "error":{"httpStatusCode":400},
924 | "exception":true
925 | },
926 | "Voice":{
927 | "type":"structure",
928 | "members":{
929 | "Gender":{
930 | "shape":"Gender",
931 | "documentation":"Gender of the voice.
"
932 | },
933 | "Id":{
934 | "shape":"VoiceId",
935 | "documentation":"Amazon Polly assigned voice ID. This is the ID that you specify when calling the SynthesizeSpeech operation.
"
936 | },
937 | "LanguageCode":{
938 | "shape":"LanguageCode",
939 | "documentation":"Language code of the voice.
"
940 | },
941 | "LanguageName":{
942 | "shape":"LanguageName",
943 | "documentation":"Human readable name of the language in English.
"
944 | },
945 | "Name":{
946 | "shape":"VoiceName",
947 | "documentation":"Name of the voice (for example, Salli, Kendra, etc.). This provides a human readable voice name that you might display in your application.
"
948 | },
949 | "AdditionalLanguageCodes":{
950 | "shape":"LanguageCodeList",
951 | "documentation":"Additional codes for languages available for the specified voice in addition to its default language.
For example, the default language for Aditi is Indian English (en-IN) because it was first used for that language. Since Aditi is bilingual and fluent in both Indian English and Hindi, this parameter would show the code hi-IN.
"
952 | }
953 | },
954 | "documentation":"Description of the voice.
"
955 | },
956 | "VoiceId":{
957 | "type":"string",
958 | "enum":[
959 | "Geraint",
960 | "Gwyneth",
961 | "Mads",
962 | "Naja",
963 | "Hans",
964 | "Marlene",
965 | "Nicole",
966 | "Russell",
967 | "Amy",
968 | "Brian",
969 | "Emma",
970 | "Raveena",
971 | "Ivy",
972 | "Joanna",
973 | "Joey",
974 | "Justin",
975 | "Kendra",
976 | "Kimberly",
977 | "Matthew",
978 | "Salli",
979 | "Conchita",
980 | "Enrique",
981 | "Miguel",
982 | "Penelope",
983 | "Chantal",
984 | "Celine",
985 | "Lea",
986 | "Mathieu",
987 | "Dora",
988 | "Karl",
989 | "Carla",
990 | "Giorgio",
991 | "Mizuki",
992 | "Liv",
993 | "Lotte",
994 | "Ruben",
995 | "Ewa",
996 | "Jacek",
997 | "Jan",
998 | "Maja",
999 | "Ricardo",
1000 | "Vitoria",
1001 | "Cristiano",
1002 | "Ines",
1003 | "Carmen",
1004 | "Maxim",
1005 | "Tatyana",
1006 | "Astrid",
1007 | "Filiz",
1008 | "Vicki",
1009 | "Takumi",
1010 | "Seoyeon",
1011 | "Aditi",
1012 | "Zhiyu"
1013 | ]
1014 | },
1015 | "VoiceList":{
1016 | "type":"list",
1017 | "member":{"shape":"Voice"}
1018 | },
1019 | "VoiceName":{"type":"string"}
1020 | },
1021 | "documentation":"Amazon Polly is a web service that makes it easy to synthesize speech from text.
The Amazon Polly service provides API operations for synthesizing high-quality speech from plain text and Speech Synthesis Markup Language (SSML), along with managing pronunciations lexicons that enable you to get the best results for your application domain.
"
1022 | }
1023 |
--------------------------------------------------------------------------------