├── tts ├── src │ └── tts │ │ ├── __init__.py │ │ ├── data │ │ ├── error.ogg │ │ ├── connerror.ogg │ │ └── models │ │ │ └── polly │ │ │ └── 2016-06-10 │ │ │ ├── paginators-1.json │ │ │ ├── examples-1.json │ │ │ └── service-2.json │ │ ├── synthesizer.py │ │ └── amazonpolly.py ├── srv │ ├── Synthesizer.srv │ └── Polly.srv ├── action │ └── Speech.action ├── NOTICE.txt ├── test │ ├── integration_tests.test │ ├── test_unit_synthesizer.py │ ├── test_unit_polly.py │ └── test_integration.py ├── config │ └── sample_configuration.yaml ├── launch │ ├── sample_application.launch │ └── tts_polly.launch ├── CHANGELOG.rst ├── scripts │ ├── polly_node.py │ ├── synthesizer_node.py │ ├── voicer.py │ └── tts_node.py ├── setup.py ├── package.xml ├── CMakeLists.txt └── LICENSE.txt ├── .gitignore ├── .github ├── workflows │ ├── release_latest.repos │ ├── autoapprove.yml │ ├── automerge.yml │ ├── build_and_test.yml │ └── build_and_test_release_latest.yml ├── dependabot.yml └── PULL_REQUEST_TEMPLATE.md ├── CODE_OF_CONDUCT.md ├── .codecov.yml ├── CONTRIBUTING.md ├── LICENSE └── README.md /tts/src/tts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .idea/ 3 | -------------------------------------------------------------------------------- /tts/srv/Synthesizer.srv: -------------------------------------------------------------------------------- 1 | string text 2 | string metadata 3 | --- 4 | string result 5 | -------------------------------------------------------------------------------- /tts/src/tts/data/error.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-robotics/tts-ros1/HEAD/tts/src/tts/data/error.ogg -------------------------------------------------------------------------------- /tts/src/tts/data/connerror.ogg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-robotics/tts-ros1/HEAD/tts/src/tts/data/connerror.ogg -------------------------------------------------------------------------------- /.github/workflows/release_latest.repos: -------------------------------------------------------------------------------- 1 | repositories: 2 | tts-ros1: 3 | type: git 4 | url: https://github.com/aws-robotics/tts-ros1 5 | version: release-latest 6 | -------------------------------------------------------------------------------- /tts/action/Speech.action: -------------------------------------------------------------------------------- 1 | #goal definition 2 | string text 3 | string metadata 4 | --- 5 | #result definition 6 | string response 7 | --- 8 | #feedback 9 | string data 10 | -------------------------------------------------------------------------------- /tts/NOTICE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | This product includes software developed by 4 | Amazon Technologies, Inc (http://www.amazon.com/). 5 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "16:00" 8 | open-pull-requests-limit: 10 9 | 10 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | *Issue #, if available:* 2 | 3 | *Description of changes:* 4 | 5 | 6 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. 7 | -------------------------------------------------------------------------------- /tts/src/tts/data/models/polly/2016-06-10/paginators-1.json: -------------------------------------------------------------------------------- 1 | { 2 | "pagination": { 3 | "DescribeVoices": { 4 | "input_token": "NextToken", 5 | "output_token": "NextToken", 6 | "result_key": "Voices" 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /tts/test/integration_tests.test: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /tts/config/sample_configuration.yaml: -------------------------------------------------------------------------------- 1 | # This is the AWS Client Configuration used by the AWS service client in the Node. If given the node will load the 2 | # provided configuration when initializing the client. 3 | aws_client_configuration: 4 | # Specifies where you want the client to communicate. Examples include us-east-1 or us-west-1. You must ensure that 5 | # the service you want to use has an endpoint in the region you configure. 6 | region: "us-west-2" 7 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | ignore: 3 | - "**/test/*" 4 | status: 5 | # doc: https://docs.codecov.io/docs/commit-status 6 | project: 7 | default: 8 | # will use the coverage from the base commit (pull request base or parent commit) coverage to compare against. 9 | target: auto 10 | threshold: null 11 | # will use the pull request base if the commit is on a pull request. If not, the parent commit will be used. 12 | base: auto 13 | -------------------------------------------------------------------------------- /tts/srv/Polly.srv: -------------------------------------------------------------------------------- 1 | string polly_action 2 | string text 3 | string text_type 4 | string language_code 5 | string voice_id 6 | string output_format 7 | string output_path 8 | string sample_rate 9 | string lexicon_content 10 | string lexicon_name 11 | string[] lexicon_names 12 | string[] speech_mark_types 13 | uint32 max_results 14 | string next_token 15 | string sns_topic_arn 16 | string task_id 17 | string task_status 18 | string output_s3_bucket_name 19 | string output_s3_key_prefix 20 | bool include_additional_language_codes 21 | --- 22 | string result 23 | -------------------------------------------------------------------------------- /.github/workflows/autoapprove.yml: -------------------------------------------------------------------------------- 1 | name: Auto approve 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | # Auto-approve dependabot PRs since this repo requires at least one approving review. 7 | # Dependabot will automatically merge minor version upgrades 8 | # (see .dependabot/config.yml for more info). 9 | auto-approve-dependabot: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: hmarr/auto-approve-action@v2.0.0 13 | if: github.actor == 'dependabot[bot]' || github.actor == 'dependabot-preview[bot]' 14 | with: 15 | github-token: "${{ secrets.GITHUB_TOKEN }}" -------------------------------------------------------------------------------- /tts/launch/sample_application.launch: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /tts/CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 2 | Changelog for package tts 3 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 4 | 5 | 1.0.1 (2019-03-20) 6 | ------------------ 7 | * Merge pull request `#2 `_ from yyu/fix 8 | no assert_called() for older versions of mock 9 | * no assert_called() for older versions of mock 10 | * remove rostest from top level find_package (`#1 `_) 11 | It's conditionally found in the testing section only so it's only a test_depend 12 | * Contributors: Tully Foote, Yuan "Forrest" Yu, y² 13 | 14 | 1.0.0 (2019-03-20) 15 | ------------------ 16 | -------------------------------------------------------------------------------- /tts/scripts/polly_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"). 6 | # You may not use this file except in compliance with the License. 7 | # A copy of the License is located at 8 | # 9 | # http://aws.amazon.com/apache2.0 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed 12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 | # express or implied. See the License for the specific language governing 14 | # permissions and limitations under the License. 15 | 16 | 17 | if __name__ == '__main__': 18 | import tts.amazonpolly 19 | tts.amazonpolly.main() 20 | -------------------------------------------------------------------------------- /tts/scripts/synthesizer_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"). 6 | # You may not use this file except in compliance with the License. 7 | # A copy of the License is located at 8 | # 9 | # http://aws.amazon.com/apache2.0 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed 12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 | # express or implied. See the License for the specific language governing 14 | # permissions and limitations under the License. 15 | 16 | 17 | if __name__ == "__main__": 18 | import tts.synthesizer 19 | tts.synthesizer.main() 20 | -------------------------------------------------------------------------------- /.github/workflows/automerge.yml: -------------------------------------------------------------------------------- 1 | name: Auto merge 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | master 7 | pull_request_review: 8 | types: 9 | - submitted 10 | check_suite: 11 | types: 12 | - completed 13 | status: {} 14 | jobs: 15 | # Automatically merge approved and green dependabot PRs. 16 | auto-merge-dependabot: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: pascalgn/automerge-action@v0.13.1 20 | if: github.actor == 'dependabot[bot]' || github.actor == 'dependabot-preview[bot]' 21 | env: 22 | GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" 23 | MERGE_LABELS: "dependencies" 24 | MERGE_METHOD: "squash" # Sqush and merge 25 | MERGE_COMMIT_MESSAGE: "pull-request-title-and-description" 26 | MERGE_RETRY_SLEEP: "1200000" # Retry after 20m, enough time for check suites to run 27 | UPDATE_RETRIES: "6" 28 | UPDATE_METHOD: "rebase" # Rebase PR on base branch 29 | UPDATE_RETRY_SLEEP: "300000" -------------------------------------------------------------------------------- /tts/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://aws.amazon.com/apache2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | import os 15 | from distutils.core import setup 16 | from catkin_pkg.python_setup import generate_distutils_setup 17 | 18 | 19 | # ROS PACKAGING 20 | # using distutils : https://docs.python.org/2/distutils 21 | # fetch values from package.xml 22 | setup_args = generate_distutils_setup( 23 | packages=[ 24 | 'tts', 25 | ], 26 | package_dir={ 27 | '': 'src', 28 | }, 29 | package_data={ 30 | '': ['data/*.ogg', 'data/models/polly/2016-06-10/*.json'] 31 | }, 32 | ) 33 | setup(**setup_args) 34 | -------------------------------------------------------------------------------- /tts/launch/tts_polly.launch: -------------------------------------------------------------------------------- 1 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /tts/scripts/voicer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"). 6 | # You may not use this file except in compliance with the License. 7 | # A copy of the License is located at 8 | # 9 | # http://aws.amazon.com/apache2.0 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed 12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 | # express or implied. See the License for the specific language governing 14 | # permissions and limitations under the License. 15 | 16 | """Usage: 17 | 18 | (assuming TTS action server has been started via `roslaunch tts tts_polly.launch`) 19 | 20 | Plain text:: 21 | 22 | $ rosrun tts voicer.py 'Hello World' 23 | 24 | SSML:: 25 | 26 | $ rosrun tts voicer.py \ 27 | 'Mary has a little lamb.' \ 28 | '{"text_type":"ssml"}' 29 | """ 30 | 31 | 32 | import sys 33 | import actionlib 34 | import rospy 35 | from tts.msg import SpeechAction, SpeechGoal 36 | 37 | 38 | if __name__ == '__main__': 39 | rospy.init_node('tts_action_client') 40 | client = actionlib.SimpleActionClient('tts', SpeechAction) 41 | client.wait_for_server() 42 | 43 | goal = SpeechGoal() 44 | 45 | goal.text = sys.argv[1] if len(sys.argv) > 1 else 'I got no idea.' 46 | goal.metadata = sys.argv[2] if len(sys.argv) > 2 else '' 47 | 48 | client.send_goal(goal) 49 | client.wait_for_result() 50 | print('\n' + client.get_result().response) 51 | -------------------------------------------------------------------------------- /tts/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | tts 4 | 1.0.2 5 | Package enabling a robot to speak with a human voice by providing a Text-To-Speech ROS service 6 | http://wiki.ros.org/tts 7 | 8 | AWS RoboMaker 9 | AWS RoboMaker 10 | 11 | Apache 2.0 12 | 13 | catkin 14 | 15 | actionlib 16 | actionlib_msgs 17 | message_generation 18 | rospy 19 | std_msgs 20 | python-boto3 21 | sound_play 22 | rosunit 23 | rostest 24 | 25 | actionlib 26 | actionlib_msgs 27 | rospy 28 | std_msgs 29 | sound_play 30 | 31 | actionlib 32 | actionlib_msgs 33 | rospy 34 | std_msgs 35 | message_runtime 36 | python-boto3 37 | sound_play 38 | 39 | rosunit 40 | rostest 41 | python-mock 42 | 43 | -------------------------------------------------------------------------------- /tts/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.3) 2 | project(tts) 3 | 4 | find_package(catkin REQUIRED COMPONENTS actionlib_msgs message_generation rospy rosunit std_msgs sound_play) 5 | 6 | catkin_python_setup() 7 | 8 | ################################################ 9 | ## Declare ROS messages, services and actions ## 10 | ################################################ 11 | 12 | ## Generate services in the 'srv' folder 13 | add_service_files(FILES Synthesizer.srv Polly.srv) 14 | 15 | ## Generate actions in the 'action' folder 16 | add_action_files(FILES Speech.action) 17 | 18 | ## Generate added messages and services with any dependencies listed here 19 | generate_messages(DEPENDENCIES actionlib_msgs std_msgs) 20 | 21 | ################################### 22 | ## catkin specific configuration ## 23 | ################################### 24 | ## The catkin_package macro generates cmake config files for your package 25 | ## Declare things to be passed to dependent projects 26 | ## LIBRARIES: libraries you create in this project that dependent projects also need 27 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need 28 | ## DEPENDS: system dependencies of this project that dependent projects also need 29 | catkin_package( 30 | LIBRARIES tts 31 | CATKIN_DEPENDS actionlib_msgs message_runtime rospy std_msgs 32 | ) 33 | 34 | ############# 35 | ## Install ## 36 | ############# 37 | 38 | # all install targets should use catkin DESTINATION variables 39 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html 40 | 41 | ## Mark executable scripts (Python etc.) for installation 42 | ## in contrast to setup.py, you can choose the destination 43 | install(PROGRAMS 44 | scripts/polly_node.py 45 | scripts/synthesizer_node.py 46 | scripts/tts_node.py 47 | scripts/voicer.py 48 | DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 49 | ) 50 | 51 | install(DIRECTORY 52 | config 53 | launch 54 | DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} 55 | ) 56 | 57 | ############# 58 | ## Testing ## 59 | ############# 60 | if(CATKIN_ENABLE_TESTING) 61 | ## Add folders to be run by python nosetests 62 | catkin_add_nosetests(test/test_unit_synthesizer.py) 63 | catkin_add_nosetests(test/test_unit_polly.py) 64 | 65 | if(BUILD_AWS_TESTING) 66 | find_package(rostest REQUIRED COMPONENTS tts) 67 | add_rostest(test/integration_tests.test DEPENDENCIES ${tts_EXPORTED_TARGETS}) 68 | endif() 69 | endif() 70 | 71 | 72 | -------------------------------------------------------------------------------- /.github/workflows/build_and_test.yml: -------------------------------------------------------------------------------- 1 | name: Build & Test 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - master 7 | schedule: 8 | # Run every hour. This helps detect flakiness, 9 | # and broken external dependencies. 10 | - cron: '0 * * * *' 11 | 12 | jobs: 13 | build_and_test_master: 14 | name: Build and Test Master ROS ${{ matrix.ros_version }} ${{ matrix.ros_distro }} 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | ros_distro: [kinetic, melodic] 20 | include: 21 | - ros_distro: kinetic 22 | ubuntu_distro: xenial 23 | - ros_distro: melodic 24 | ubuntu_distro: bionic 25 | container: 26 | image: rostooling/setup-ros-docker:ubuntu-${{ matrix.ubuntu_distro }}-ros-${{ matrix.ros_distro }}-ros-base-latest 27 | env: 28 | # Needed for the CMakeLists.txt setup 29 | ROS_DISTRO: ${{ matrix.ros_distro }} 30 | ROS_VERSION: 1 31 | steps: 32 | # Needed to access the vcs repos file from the workspace 33 | - name: Checkout source 34 | uses: actions/checkout@v2 35 | - name: Run action-ros-ci to build and test 36 | uses: ros-tooling/action-ros-ci@0.1.2 37 | with: 38 | target-ros1-distro: ${{ env.ROS_VERSION == '1' && matrix.ros_distro || '' }} 39 | target-ros2-distro: ${{ env.ROS_VERSION == '2' && matrix.ros_distro || '' }} 40 | package-name: tts 41 | vcs-repo-file-url: '' 42 | - name: Upload resulting colcon logs 43 | uses: actions/upload-artifact@v2.2.2 44 | with: 45 | name: colcon-logs-${{ matrix.ubuntu_distro }}-ros-${{ matrix.ros_distro }} 46 | path: ros_ws/log 47 | log_workflow_status_to_cloudwatch: 48 | runs-on: ubuntu-latest 49 | container: 50 | image: ubuntu:bionic 51 | needs: 52 | - build_and_test_master 53 | # Don't skip if prior steps failed, but don't run on a fork because it won't have access to AWS secrets 54 | if: ${{ always() && ! github.event.repository.fork && ! github.event.pull_request.head.repo.fork }} 55 | steps: 56 | - name: Configure AWS Credentials 57 | uses: aws-actions/configure-aws-credentials@v1 58 | with: 59 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 60 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 61 | aws-region: ${{ secrets.AWS_REGION }} 62 | - uses: ros-tooling/action-cloudwatch-metrics@0.0.5 63 | with: 64 | # Checks if any of the jobs have failed. 65 | # 66 | # needs.*.result is returns the list of all success statuses as an 67 | # array, i.e. ['success', 'failure, 'success'] 68 | # join() converts the array to a string 'successfailuresuccess' 69 | # contains() checks whether the string contains failure 70 | metric-value: ${{ ! contains(join(needs.*.result, ''), 'failure') && ! contains(join(needs.*.result, ''), 'cancelled') }} 71 | -------------------------------------------------------------------------------- /.github/workflows/build_and_test_release_latest.yml: -------------------------------------------------------------------------------- 1 | name: Build & Test release-latest 2 | on: 3 | schedule: 4 | # Run every hour. This helps detect flakiness, 5 | # and broken external dependencies. 6 | - cron: '0 * * * *' 7 | 8 | jobs: 9 | build_and_test_release_latest: 10 | name: Build and Test Release Latest ROS ${{ matrix.ros_version }} ${{ matrix.ros_distro }} 11 | runs-on: ubuntu-latest 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | ros_distro: [kinetic, melodic] 16 | include: 17 | - ros_distro: kinetic 18 | ubuntu_distro: xenial 19 | - ros_distro: melodic 20 | ubuntu_distro: bionic 21 | container: 22 | image: rostooling/setup-ros-docker:ubuntu-${{ matrix.ubuntu_distro }}-ros-${{ matrix.ros_distro }}-ros-base-latest 23 | env: 24 | # Needed for the CMakeLists.txt setup 25 | ROS_DISTRO: ${{ matrix.ros_distro }} 26 | ROS_VERSION: 1 27 | steps: 28 | # Needed to access the vcs repos file from the workspace 29 | - name: Checkout source 30 | uses: actions/checkout@v2 31 | - name: Run action-ros-ci to build and test 32 | uses: ros-tooling/action-ros-ci@0.1.2 33 | with: 34 | target-ros1-distro: ${{ env.ROS_VERSION == '1' && matrix.ros_distro || '' }} 35 | target-ros2-distro: ${{ env.ROS_VERSION == '2' && matrix.ros_distro || '' }} 36 | package-name: tts 37 | # schedule runs against the default branch (master), so specify release-latest via repos file 38 | vcs-repo-file-url: "${{ github.workspace }}/.github/workflows/release_latest.repos" 39 | - name: Upload resulting colcon logs 40 | uses: actions/upload-artifact@v2.2.2 41 | with: 42 | name: colcon-logs-${{ matrix.ubuntu_distro }}-ros-${{ matrix.ros_distro }} 43 | path: ros_ws/log 44 | log_workflow_status_to_cloudwatch: 45 | runs-on: ubuntu-latest 46 | container: 47 | image: ubuntu:bionic 48 | needs: 49 | - build_and_test_release_latest 50 | # Don't skip if prior steps failed, but don't run on a fork because it won't have access to AWS secrets 51 | if: ${{ always() && ! github.event.repository.fork && ! github.event.pull_request.head.repo.fork }} 52 | steps: 53 | - name: Configure AWS Credentials 54 | uses: aws-actions/configure-aws-credentials@v1 55 | with: 56 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 57 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 58 | aws-region: ${{ secrets.AWS_REGION }} 59 | - uses: ros-tooling/action-cloudwatch-metrics@0.0.5 60 | with: 61 | metric-dimensions: >- 62 | [ 63 | { "Name": "github.event_name", "Value": "${{ github.event_name }}" }, 64 | { "Name": "github.ref", "Value": "release-latest" }, 65 | { "Name": "github.repository", "Value": "${{ github.repository }}" } 66 | ] 67 | # Checks if any of the jobs have failed. 68 | # 69 | # needs.*.result is returns the list of all success statuses as an 70 | # array, i.e. ['success', 'failure, 'success'] 71 | # join() converts the array to a string 'successfailuresuccess' 72 | # contains() checks whether the string contains failure 73 | metric-value: ${{ ! contains(join(needs.*.result, ''), 'failure') && ! contains(join(needs.*.result, ''), 'cancelled') }} 74 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check [existing open](https://github.com/aws-robotics/tts-ros1/issues), or [recently closed](https://github.com/aws-robotics/tts-ros1/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/aws-robotics/tts-ros1/labels/help%20wanted) issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](https://github.com/aws-robotics/tts-ros1/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /tts/scripts/tts_node.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"). 6 | # You may not use this file except in compliance with the License. 7 | # A copy of the License is located at 8 | # 9 | # http://aws.amazon.com/apache2.0 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed 12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 | # express or implied. See the License for the specific language governing 14 | # permissions and limitations under the License. 15 | 16 | """A very simple Action Server that does TTS. 17 | 18 | It is a combination of a synthesizer and a player. Being an action server, it can be used in two different manners. 19 | 20 | 1. Play and wait for it to finish 21 | --------------------------------- 22 | 23 | A user can choose to be blocked until the audio playing is done. This is especially useful in interactive scenarios. 24 | 25 | Example:: 26 | 27 | rospy.init_node('tts_action_client') 28 | client = actionlib.SimpleActionClient('tts', SpeechAction) 29 | client.wait_for_server() 30 | goal = SpeechGoal() 31 | goal.text = 'Let me ask you a question, please give me your answer.' 32 | client.send_goal(goal) 33 | client.wait_for_result() 34 | 35 | # start listening to a response or waiting for some input to continue the interaction 36 | 37 | 2. Play and forget 38 | ------------------ 39 | 40 | A user can also choose not to wait:: 41 | 42 | rospy.init_node('tts_action_client') 43 | client = actionlib.SimpleActionClient('tts', SpeechAction) 44 | client.wait_for_server() 45 | goal = SpeechGoal() 46 | goal.text = 'Let me talk, you can to something else in the meanwhile.' 47 | client.send_goal(goal) 48 | 49 | This is useful when the robot wants to do stuff while the audio is being played. For example, a robot may start to 50 | read some instructions and immediately get ready for any input. 51 | """ 52 | 53 | import json 54 | 55 | import actionlib 56 | import rospy 57 | from tts.msg import SpeechAction, SpeechResult 58 | from tts.srv import Synthesizer 59 | 60 | from sound_play.libsoundplay import SoundClient 61 | 62 | 63 | def play(filename): 64 | """plays the wav or ogg file using sound_play""" 65 | SoundClient(blocking=True).playWave(filename) 66 | 67 | 68 | def do_synthesize(goal): 69 | """calls synthesizer service to do the job""" 70 | rospy.wait_for_service('synthesizer') 71 | synthesize = rospy.ServiceProxy('synthesizer', Synthesizer) 72 | return synthesize(goal.text, goal.metadata) 73 | 74 | 75 | def finish_with_result(s): 76 | """responds the client""" 77 | tts_server_result = SpeechResult(s) 78 | server.set_succeeded(tts_server_result) 79 | rospy.loginfo(tts_server_result) 80 | 81 | 82 | def do_speak(goal): 83 | """The action handler. 84 | 85 | Note that although it responds to client after the audio play is finished, a client can choose 86 | not to wait by not calling ``SimpleActionClient.waite_for_result()``. 87 | """ 88 | rospy.loginfo('speech goal: {}'.format(goal)) 89 | 90 | res = do_synthesize(goal) 91 | rospy.loginfo('synthesizer returns: {}'.format(res)) 92 | 93 | try: 94 | r = json.loads(res.result) 95 | except Exception as e: 96 | s = 'Expecting JSON from synthesizer but got {}'.format(res.result) 97 | rospy.logerr('{}. Exception: {}'.format(s, e)) 98 | finish_with_result(s) 99 | return 100 | 101 | result = '' 102 | 103 | if 'Audio File' in r: 104 | audio_file = r['Audio File'] 105 | rospy.loginfo('Will play {}'.format(audio_file)) 106 | play(audio_file) 107 | result = audio_file 108 | 109 | if 'Exception' in r: 110 | result = '[ERROR] {}'.format(r) 111 | rospy.logerr(result) 112 | 113 | finish_with_result(result) 114 | 115 | 116 | if __name__ == '__main__': 117 | rospy.init_node('tts_node') 118 | server = actionlib.SimpleActionServer('tts', SpeechAction, do_speak, False) 119 | server.start() 120 | rospy.spin() 121 | -------------------------------------------------------------------------------- /tts/src/tts/data/models/polly/2016-06-10/examples-1.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0", 3 | "examples": { 4 | "DeleteLexicon": [ 5 | { 6 | "input": { 7 | "Name": "example" 8 | }, 9 | "output": { 10 | }, 11 | "comments": { 12 | "input": { 13 | }, 14 | "output": { 15 | } 16 | }, 17 | "description": "Deletes a specified pronunciation lexicon stored in an AWS Region.", 18 | "id": "to-delete-a-lexicon-1481922498332", 19 | "title": "To delete a lexicon" 20 | } 21 | ], 22 | "DescribeVoices": [ 23 | { 24 | "input": { 25 | "LanguageCode": "en-GB" 26 | }, 27 | "output": { 28 | "Voices": [ 29 | { 30 | "Gender": "Female", 31 | "Id": "Emma", 32 | "LanguageCode": "en-GB", 33 | "LanguageName": "British English", 34 | "Name": "Emma" 35 | }, 36 | { 37 | "Gender": "Male", 38 | "Id": "Brian", 39 | "LanguageCode": "en-GB", 40 | "LanguageName": "British English", 41 | "Name": "Brian" 42 | }, 43 | { 44 | "Gender": "Female", 45 | "Id": "Amy", 46 | "LanguageCode": "en-GB", 47 | "LanguageName": "British English", 48 | "Name": "Amy" 49 | } 50 | ] 51 | }, 52 | "comments": { 53 | "input": { 54 | }, 55 | "output": { 56 | } 57 | }, 58 | "description": "Returns the list of voices that are available for use when requesting speech synthesis. Displayed languages are those within the specified language code. If no language code is specified, voices for all available languages are displayed.", 59 | "id": "to-describe-available-voices-1482180557753", 60 | "title": "To describe available voices" 61 | } 62 | ], 63 | "GetLexicon": [ 64 | { 65 | "input": { 66 | "Name": "" 67 | }, 68 | "output": { 69 | "Lexicon": { 70 | "Content": "\r\n\r\n \r\n W3C\r\n World Wide Web Consortium\r\n \r\n", 71 | "Name": "example" 72 | }, 73 | "LexiconAttributes": { 74 | "Alphabet": "ipa", 75 | "LanguageCode": "en-US", 76 | "LastModified": 1478542980.117, 77 | "LexemesCount": 1, 78 | "LexiconArn": "arn:aws:polly:us-east-1:123456789012:lexicon/example", 79 | "Size": 503 80 | } 81 | }, 82 | "comments": { 83 | "input": { 84 | }, 85 | "output": { 86 | } 87 | }, 88 | "description": "Returns the content of the specified pronunciation lexicon stored in an AWS Region.", 89 | "id": "to-retrieve-a-lexicon-1481912870836", 90 | "title": "To retrieve a lexicon" 91 | } 92 | ], 93 | "ListLexicons": [ 94 | { 95 | "input": { 96 | }, 97 | "output": { 98 | "Lexicons": [ 99 | { 100 | "Attributes": { 101 | "Alphabet": "ipa", 102 | "LanguageCode": "en-US", 103 | "LastModified": 1478542980.117, 104 | "LexemesCount": 1, 105 | "LexiconArn": "arn:aws:polly:us-east-1:123456789012:lexicon/example", 106 | "Size": 503 107 | }, 108 | "Name": "example" 109 | } 110 | ] 111 | }, 112 | "comments": { 113 | "input": { 114 | }, 115 | "output": { 116 | } 117 | }, 118 | "description": "Returns a list of pronunciation lexicons stored in an AWS Region.", 119 | "id": "to-list-all-lexicons-in-a-region-1481842106487", 120 | "title": "To list all lexicons in a region" 121 | } 122 | ], 123 | "PutLexicon": [ 124 | { 125 | "input": { 126 | "Content": "file://example.pls", 127 | "Name": "W3C" 128 | }, 129 | "output": { 130 | }, 131 | "comments": { 132 | "input": { 133 | }, 134 | "output": { 135 | } 136 | }, 137 | "description": "Stores a pronunciation lexicon in an AWS Region.", 138 | "id": "to-save-a-lexicon-1482272584088", 139 | "title": "To save a lexicon" 140 | } 141 | ], 142 | "SynthesizeSpeech": [ 143 | { 144 | "input": { 145 | "LexiconNames": [ 146 | "example" 147 | ], 148 | "OutputFormat": "mp3", 149 | "SampleRate": "8000", 150 | "Text": "All Gaul is divided into three parts", 151 | "TextType": "text", 152 | "VoiceId": "Joanna" 153 | }, 154 | "output": { 155 | "AudioStream": "TEXT", 156 | "ContentType": "audio/mpeg", 157 | "RequestCharacters": 37 158 | }, 159 | "comments": { 160 | "input": { 161 | }, 162 | "output": { 163 | } 164 | }, 165 | "description": "Synthesizes plain text or SSML into a file of human-like speech.", 166 | "id": "to-synthesize-speech-1482186064046", 167 | "title": "To synthesize speech" 168 | } 169 | ] 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /tts/test/test_unit_synthesizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"). 6 | # You may not use this file except in compliance with the License. 7 | # A copy of the License is located at 8 | # 9 | # http://aws.amazon.com/apache2.0 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed 12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 | # express or implied. See the License for the specific language governing 14 | # permissions and limitations under the License. 15 | 16 | from __future__ import print_function 17 | 18 | from mock import patch, MagicMock # python2 uses backport of unittest.mock(docs.python.org/3/library/unittest.mock.html) 19 | import unittest 20 | 21 | 22 | class TestSynthesizer(unittest.TestCase): 23 | 24 | def setUp(self): 25 | """important: import tts which is a relay package:: 26 | 27 | devel/lib/python2.7/dist-packages/ 28 | +-- tts 29 | | +-- __init__.py 30 | +-- ... 31 | 32 | per http://docs.ros.org/api/catkin/html/user_guide/setup_dot_py.html: 33 | 34 | A relay package is a folder with an __init__.py folder and nothing else. 35 | Importing this folder in python will execute the contents of __init__.py, 36 | which will in turn import the original python modules in the folder in 37 | the sourcespace using the python exec() function. 38 | """ 39 | import tts 40 | self.assertIsNotNone(tts) 41 | 42 | def test_init(self): 43 | from tts.synthesizer import SpeechSynthesizer 44 | speech_synthesizer = SpeechSynthesizer() 45 | self.assertEqual('text', speech_synthesizer.default_text_type) 46 | 47 | @patch('tts.amazonpolly.AmazonPolly') 48 | def test_good_synthesis_with_mostly_default_args_using_polly_lib(self, polly_class_mock): 49 | polly_obj_mock = MagicMock() 50 | polly_class_mock.return_value = polly_obj_mock 51 | 52 | test_text = 'hello' 53 | test_metadata = ''' 54 | { 55 | "output_path": "/tmp/test" 56 | } 57 | ''' 58 | expected_polly_synthesize_args = { 59 | 'output_format': 'ogg_vorbis', 60 | 'voice_id': 'Joanna', 61 | 'sample_rate': '22050', 62 | 'text_type': 'text', 63 | 'text': test_text, 64 | 'output_path': "/tmp/test" 65 | } 66 | 67 | from tts.synthesizer import SpeechSynthesizer 68 | from tts.srv import SynthesizerRequest 69 | speech_synthesizer = SpeechSynthesizer(engine='POLLY_LIBRARY') 70 | request = SynthesizerRequest(text=test_text, metadata=test_metadata) 71 | response = speech_synthesizer._node_request_handler(request) 72 | 73 | self.assertGreater(polly_class_mock.call_count, 0) 74 | polly_obj_mock.synthesize.assert_called_with(**expected_polly_synthesize_args) 75 | 76 | self.assertEqual(response.result, polly_obj_mock.synthesize.return_value.result) 77 | 78 | @patch('tts.amazonpolly.AmazonPolly') 79 | def test_synthesis_with_bad_metadata_using_polly_lib(self, polly_class_mock): 80 | polly_obj_mock = MagicMock() 81 | polly_class_mock.return_value = polly_obj_mock 82 | 83 | test_text = 'hello' 84 | test_metadata = '''I am no JSON''' 85 | 86 | from tts.synthesizer import SpeechSynthesizer 87 | from tts.srv import SynthesizerRequest 88 | speech_synthesizer = SpeechSynthesizer(engine='POLLY_LIBRARY') 89 | request = SynthesizerRequest(text=test_text, metadata=test_metadata) 90 | response = speech_synthesizer._node_request_handler(request) 91 | 92 | self.assertTrue(response.result.startswith('Exception: ')) 93 | 94 | @patch('tts.amazonpolly.AmazonPolly') 95 | def test_bad_engine(self, polly_class_mock): 96 | polly_obj_mock = MagicMock() 97 | polly_class_mock.return_value = polly_obj_mock 98 | 99 | ex = None 100 | 101 | from tts.synthesizer import SpeechSynthesizer 102 | try: 103 | SpeechSynthesizer(engine='NON-EXIST ENGINE') 104 | except Exception as e: 105 | ex = e 106 | 107 | self.assertTrue(isinstance(ex, SpeechSynthesizer.BadEngineError)) 108 | 109 | def test_cli_help_message(self): 110 | import os 111 | source_file_dir = os.path.dirname(os.path.abspath(__file__)) 112 | synthersizer_path = os.path.join(source_file_dir, '..', 'scripts', 'synthesizer_node.py') 113 | import subprocess 114 | o = subprocess.check_output(['python', synthersizer_path, '-h']) 115 | self.assertTrue(str(o).startswith('Usage: ')) 116 | 117 | @patch('tts.synthesizer.SpeechSynthesizer') 118 | def test_cli_engine_dispatching_1(self, speech_synthesizer_class_mock): 119 | import sys 120 | with patch.object(sys, 'argv', ['synthesizer_node.py']): 121 | import tts.synthesizer 122 | tts.synthesizer.main() 123 | speech_synthesizer_class_mock.assert_called_with(engine='POLLY_SERVICE', polly_service_name='polly') 124 | speech_synthesizer_class_mock.return_value.start.assert_called_with(node_name='synthesizer_node', 125 | service_name='synthesizer') 126 | 127 | @patch('tts.synthesizer.SpeechSynthesizer') 128 | def test_cli_engine_dispatching_2(self, speech_synthesizer_class_mock): 129 | import sys 130 | with patch.object(sys, 'argv', ['synthesizer_node.py', '-e', 'POLLY_LIBRARY']): 131 | from tts import synthesizer 132 | synthesizer.main() 133 | speech_synthesizer_class_mock.assert_called_with(engine='POLLY_LIBRARY') 134 | self.assertGreater(speech_synthesizer_class_mock.return_value.start.call_count, 0) 135 | 136 | @patch('tts.synthesizer.SpeechSynthesizer') 137 | def test_cli_engine_dispatching_3(self, speech_synthesizer_class_mock): 138 | import sys 139 | with patch.object(sys, 'argv', ['synthesizer_node.py', '-p', 'apolly']): 140 | from tts import synthesizer 141 | synthesizer.main() 142 | speech_synthesizer_class_mock.assert_called_with(engine='POLLY_SERVICE', polly_service_name='apolly') 143 | self.assertGreater(speech_synthesizer_class_mock.return_value.start.call_count, 0) 144 | 145 | 146 | if __name__ == '__main__': 147 | import rosunit 148 | rosunit.unitrun('tts', 'unittest-synthesizer', TestSynthesizer) 149 | -------------------------------------------------------------------------------- /tts/test/test_unit_polly.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"). 6 | # You may not use this file except in compliance with the License. 7 | # A copy of the License is located at 8 | # 9 | # http://aws.amazon.com/apache2.0 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed 12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 | # express or implied. See the License for the specific language governing 14 | # permissions and limitations under the License. 15 | 16 | from __future__ import print_function 17 | 18 | 19 | from mock import patch, MagicMock # python2 uses backport of unittest.mock(docs.python.org/3/library/unittest.mock.html) 20 | import unittest 21 | 22 | 23 | class TestPolly(unittest.TestCase): 24 | 25 | def setUp(self): 26 | """important: import tts which is a relay package:: 27 | 28 | devel/lib/python2.7/dist-packages/ 29 | +-- tts 30 | | +-- __init__.py 31 | +-- ... 32 | 33 | per http://docs.ros.org/api/catkin/html/user_guide/setup_dot_py.html: 34 | 35 | A relay package is a folder with an __init__.py folder and nothing else. 36 | Importing this folder in python will execute the contents of __init__.py, 37 | which will in turn import the original python modules in the folder in 38 | the sourcespace using the python exec() function. 39 | """ 40 | import tts 41 | self.assertIsNotNone(tts) 42 | 43 | @patch('tts.amazonpolly.Session') 44 | def test_init(self, boto3_session_class_mock): 45 | from tts.amazonpolly import AmazonPolly 46 | AmazonPolly() 47 | 48 | self.assertGreater(boto3_session_class_mock.call_count, 0) 49 | boto3_session_class_mock.return_value.client.assert_called_with('polly') 50 | 51 | @patch('tts.amazonpolly.Session') 52 | def test_defaults(self, boto3_session_class_mock): 53 | from tts.amazonpolly import AmazonPolly 54 | polly = AmazonPolly() 55 | 56 | self.assertGreater(boto3_session_class_mock.call_count, 0) 57 | boto3_session_class_mock.return_value.client.assert_called_with('polly') 58 | 59 | self.assertEqual('text', polly.default_text_type) 60 | self.assertEqual('ogg_vorbis', polly.default_output_format) 61 | self.assertEqual('Joanna', polly.default_voice_id) 62 | self.assertEqual('.', polly.default_output_folder) 63 | self.assertEqual('output', polly.default_output_file_basename) 64 | 65 | @patch('tts.amazonpolly.Session') 66 | def test_good_synthesis_with_default_args(self, boto3_session_class_mock): 67 | boto3_session_obj_mock = MagicMock() 68 | boto3_polly_obj_mock = MagicMock() 69 | boto3_polly_response_mock = MagicMock() 70 | audio_stream_mock = MagicMock() 71 | fake_audio_stream_data = 'I am audio.' 72 | fake_audio_content_type = 'super tts' 73 | fake_boto3_polly_response_metadata = {'foo': 'bar'} 74 | 75 | boto3_session_class_mock.return_value = boto3_session_obj_mock 76 | boto3_session_obj_mock.client.return_value = boto3_polly_obj_mock 77 | boto3_polly_obj_mock.synthesize_speech.return_value = boto3_polly_response_mock 78 | audio_stream_mock.read.return_value = fake_audio_stream_data 79 | d = { 80 | 'AudioStream': audio_stream_mock, 81 | 'ContentType': fake_audio_content_type, 82 | 'ResponseMetadata': fake_boto3_polly_response_metadata 83 | } 84 | boto3_polly_response_mock.__contains__.side_effect = d.__contains__ 85 | boto3_polly_response_mock.__getitem__.side_effect = d.__getitem__ 86 | 87 | from tts.amazonpolly import AmazonPolly 88 | polly_under_test = AmazonPolly() 89 | 90 | self.assertGreater(boto3_session_class_mock.call_count, 0) 91 | boto3_session_obj_mock.client.assert_called_with('polly') 92 | 93 | res = polly_under_test.synthesize(text='hello') 94 | 95 | expected_synthesize_speech_kwargs = { 96 | 'LexiconNames': [], 97 | 'OutputFormat': 'ogg_vorbis', 98 | 'SampleRate': '22050', 99 | 'SpeechMarkTypes': [], 100 | 'Text': 'hello', 101 | 'TextType': 'text', 102 | 'VoiceId': 'Joanna', 103 | } 104 | boto3_polly_obj_mock.synthesize_speech.assert_called_with(**expected_synthesize_speech_kwargs) 105 | 106 | from tts.srv import PollyResponse 107 | self.assertTrue(isinstance(res, PollyResponse)) 108 | 109 | import json 110 | j = json.loads(res.result) 111 | observed_audio_file_content = open(j['Audio File']).read() 112 | self.assertEqual(fake_audio_stream_data, observed_audio_file_content) 113 | 114 | self.assertEqual(fake_audio_content_type, j['Audio Type']) 115 | self.assertEqual(str(fake_boto3_polly_response_metadata), j['Amazon Polly Response Metadata']) 116 | 117 | @patch('tts.amazonpolly.Session') 118 | def test_polly_raises(self, boto3_session_class_mock): 119 | boto3_session_obj_mock = MagicMock() 120 | boto3_polly_obj_mock = MagicMock() 121 | boto3_polly_response_mock = MagicMock() 122 | audio_stream_mock = MagicMock() 123 | fake_audio_stream_data = 'I am audio.' 124 | fake_audio_content_type = 'super voice' 125 | fake_boto3_polly_response_metadata = {'foo': 'bar'} 126 | 127 | boto3_session_class_mock.return_value = boto3_session_obj_mock 128 | boto3_session_obj_mock.client.return_value = boto3_polly_obj_mock 129 | boto3_polly_obj_mock.synthesize_speech.side_effect = RuntimeError('Amazon Polly Exception') 130 | audio_stream_mock.read.return_value = fake_audio_stream_data 131 | d = { 132 | 'AudioStream': audio_stream_mock, 133 | 'ContentType': fake_audio_content_type, 134 | 'ResponseMetadata': fake_boto3_polly_response_metadata 135 | } 136 | boto3_polly_response_mock.__contains__.side_effect = d.__contains__ 137 | boto3_polly_response_mock.__getitem__.side_effect = d.__getitem__ 138 | 139 | from tts.amazonpolly import AmazonPolly 140 | polly_under_test = AmazonPolly() 141 | 142 | self.assertGreater(boto3_session_class_mock.call_count, 0) 143 | boto3_session_obj_mock.client.assert_called_with('polly') 144 | 145 | res = polly_under_test.synthesize(text='hello') 146 | 147 | expected_synthesize_speech_kwargs = { 148 | 'LexiconNames': [], 149 | 'OutputFormat': 'ogg_vorbis', 150 | 'SampleRate': '22050', 151 | 'SpeechMarkTypes': [], 152 | 'Text': 'hello', 153 | 'TextType': 'text', 154 | 'VoiceId': 'Joanna', 155 | } 156 | boto3_polly_obj_mock.synthesize_speech.assert_called_with(**expected_synthesize_speech_kwargs) 157 | 158 | from tts.srv import PollyResponse 159 | self.assertTrue(isinstance(res, PollyResponse)) 160 | 161 | import json 162 | j = json.loads(res.result) 163 | self.assertTrue('Exception' in j) 164 | self.assertTrue('Traceback' in j) 165 | 166 | @patch('tts.amazonpolly.AmazonPolly') 167 | def test_cli(self, amazon_polly_class_mock): 168 | import sys 169 | with patch.object(sys, 'argv', ['polly_node.py', '-n', 'polly-node']): 170 | from tts import amazonpolly 171 | amazonpolly.main() 172 | self.assertGreater(amazon_polly_class_mock.call_count, 0) 173 | amazon_polly_class_mock.return_value.start.assert_called_with(node_name='polly-node', service_name='polly') 174 | 175 | 176 | if __name__ == '__main__': 177 | import rosunit 178 | rosunit.unitrun('tts', 'unittest-polly', TestPolly) 179 | -------------------------------------------------------------------------------- /tts/src/tts/synthesizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"). 6 | # You may not use this file except in compliance with the License. 7 | # A copy of the License is located at 8 | # 9 | # http://aws.amazon.com/apache2.0 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed 12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 | # express or implied. See the License for the specific language governing 14 | # permissions and limitations under the License. 15 | 16 | import os 17 | import time 18 | import json 19 | import rospy 20 | import hashlib 21 | from optparse import OptionParser 22 | from tts.srv import Synthesizer, SynthesizerResponse 23 | 24 | 25 | class SpeechSynthesizer: 26 | """This class serves as a ROS service node that should be an entry point of a TTS task. 27 | 28 | Although the current implementation uses Amazon Polly as the synthesis engine, it is not hard to let it support 29 | more heterogeneous engines while keeping the API the same. 30 | 31 | In order to support a variety of engines, the SynthesizerRequest was designed with flexibility in mind. It 32 | has two fields: text and metadata. Both are strings. In most cases, a user can ignore the metadata and call 33 | the service with some plain text. If the use case needs any control or engine-specific feature, the extra 34 | information can be put into the JSON-form metadata. This class will use the information when calling the engine. 35 | 36 | The decoupling of the synthesizer and the actual synthesis engine will benefit the users in many ways. 37 | 38 | First, a user will be able to use a unified interface to do the TTS job and have the freedom to use different 39 | engines available with no or very little change from the client side. 40 | 41 | Second, by applying some design patterns, the synthesizer can choose an engine dynamically. For example, a user 42 | may prefer to use Amazon Polly but is also OK with an offline solution when network is not reliable. 43 | 44 | Third, engines can be complicated, thus difficult to use. As an example, Amazon Polly supports dozens of parameters 45 | and is able to accomplish nontrivial synthesis jobs, but majority of the users never need those features. This 46 | class provides a clean interface with two parameters only, so that it is much easier and pleasant to use. If by 47 | any chance the advanced features are required, the user can always leverage the metadata field or even go to the 48 | backend engine directly. 49 | 50 | Also, from an engineering perspective, simple and decoupled modules are easier to maintain. 51 | 52 | This class supports two modes of using polly. It can either call a service node or use AmazonPolly as a library. 53 | 54 | Start the service node:: 55 | 56 | $ rosrun tts synthesizer_node.py # use default configuration 57 | $ rosrun tts synthesizer_node.py -e POLLY_LIBRARY # will not call polly service node 58 | 59 | Call the service:: 60 | 61 | $ rosservice call /synthesizer 'hello' '' 62 | $ rosservice call /synthesizer 'hello' '"{\"text_type\":\"ssml\"}"' 63 | """ 64 | 65 | class PollyViaNode: 66 | def __init__(self, polly_service_name='polly'): 67 | self.service_name = polly_service_name 68 | 69 | def __call__(self, **kwargs): 70 | rospy.loginfo('will call service {}'.format(self.service_name)) 71 | from tts.srv import Polly 72 | rospy.wait_for_service(self.service_name) 73 | polly = rospy.ServiceProxy(self.service_name, Polly) 74 | return polly(polly_action='SynthesizeSpeech', **kwargs) 75 | 76 | class PollyDirect: 77 | def __init__(self): 78 | pass 79 | 80 | def __call__(self, **kwargs): 81 | rospy.loginfo('will import amazonpolly.AmazonPolly') 82 | from tts.amazonpolly import AmazonPolly 83 | node = AmazonPolly() 84 | return node.synthesize(**kwargs) 85 | 86 | ENGINES = { 87 | 'POLLY_SERVICE': PollyViaNode, 88 | 'POLLY_LIBRARY': PollyDirect, 89 | } 90 | 91 | class BadEngineError(NameError): 92 | pass 93 | 94 | def __init__(self, engine='POLLY_SERVICE', polly_service_name='polly'): 95 | if engine not in self.ENGINES: 96 | msg = 'bad engine {} which is not one of {}'.format(engine, ', '.join(SpeechSynthesizer.ENGINES.keys())) 97 | raise SpeechSynthesizer.BadEngineError(msg) 98 | 99 | engine_kwargs = {'polly_service_name': polly_service_name} if engine == 'POLLY_SERVICE' else {} 100 | self.engine = self.ENGINES[engine](**engine_kwargs) 101 | 102 | self.default_text_type = 'text' 103 | self.default_voice_id = 'Joanna' 104 | self.default_output_format = 'ogg_vorbis' 105 | 106 | def _call_engine(self, **kw): 107 | """Call engine to do the job. 108 | 109 | If no output path is found from input, the audio file will be put into /tmp and the file name will have 110 | a prefix of the md5 hash of the text. 111 | 112 | :param kw: what AmazonPolly needs to synthesize 113 | :return: response from AmazonPolly 114 | """ 115 | if 'output_path' not in kw: 116 | tmp_filename = hashlib.md5(kw['text']).hexdigest() 117 | tmp_filepath = os.path.join(os.sep, 'tmp', 'voice_{}_{}'.format(tmp_filename, str(time.time()))) 118 | kw['output_path'] = os.path.abspath(tmp_filepath) 119 | rospy.loginfo('audio will be saved as {}'.format(kw['output_path'])) 120 | 121 | return self.engine(**kw) 122 | 123 | def _parse_request_or_raise(self, request): 124 | """It will raise if request is malformed. 125 | 126 | :param request: an instance of SynthesizerRequest 127 | :return: a dict 128 | """ 129 | md = json.loads(request.metadata) if request.metadata else {} 130 | 131 | md['output_format'] = md.get('output_format', self.default_output_format) 132 | md['voice_id'] = md.get('voice_id', self.default_voice_id) 133 | md['sample_rate'] = md.get('sample_rate', '16000' if md['output_format'].lower() == 'pcm' else '22050') 134 | md['text_type'] = md.get('text_type', self.default_text_type) 135 | md['text'] = request.text 136 | 137 | return md 138 | 139 | def _node_request_handler(self, request): 140 | """The callback function for processing service request. 141 | 142 | It never raises. If anything unexpected happens, it will return a SynthesizerResponse with the exception. 143 | 144 | :param request: an instance of SynthesizerRequest 145 | :return: a SynthesizerResponse 146 | """ 147 | rospy.loginfo(request) 148 | try: 149 | kws = self._parse_request_or_raise(request) 150 | res = self._call_engine(**kws).result 151 | 152 | return SynthesizerResponse(res) 153 | except Exception as e: 154 | return SynthesizerResponse('Exception: {}'.format(e)) 155 | 156 | def start(self, node_name='synthesizer_node', service_name='synthesizer'): 157 | """The entry point of a ROS service node. 158 | 159 | :param node_name: name of ROS node 160 | :param service_name: name of ROS service 161 | :return: it doesn't return 162 | """ 163 | rospy.init_node(node_name) 164 | 165 | service = rospy.Service(service_name, Synthesizer, self._node_request_handler) 166 | 167 | rospy.loginfo('{} running: {}'.format(node_name, service.uri)) 168 | 169 | rospy.spin() 170 | 171 | 172 | def main(): 173 | usage = '''usage: %prog [options] 174 | ''' 175 | 176 | parser = OptionParser(usage) 177 | 178 | parser.add_option("-n", "--node-name", dest="node_name", default='synthesizer_node', 179 | help="name of the ROS node", 180 | metavar="NODE_NAME") 181 | parser.add_option("-s", "--service-name", dest="service_name", default='synthesizer', 182 | help="name of the ROS service", 183 | metavar="SERVICE_NAME") 184 | parser.add_option("-e", "--engine", dest="engine", default='POLLY_SERVICE', 185 | help="name of the synthesis engine", 186 | metavar="ENGINE") 187 | parser.add_option("-p", "--polly-service-name", dest="polly_service_name", default='polly', 188 | help="name of the polly service", 189 | metavar="POLLY_SERVICE_NAME") 190 | 191 | (options, args) = parser.parse_args() 192 | 193 | node_name = options.node_name 194 | service_name = options.service_name 195 | engine = options.engine 196 | polly_service_name = options.polly_service_name 197 | 198 | if engine == 'POLLY_SERVICE': 199 | synthesizer = SpeechSynthesizer(engine=engine, polly_service_name=polly_service_name) 200 | else: 201 | synthesizer = SpeechSynthesizer(engine=engine) 202 | synthesizer.start(node_name=node_name, service_name=service_name) 203 | 204 | 205 | if __name__ == "__main__": 206 | main() 207 | -------------------------------------------------------------------------------- /tts/test/test_integration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"). 6 | # You may not use this file except in compliance with the License. 7 | # A copy of the License is located at 8 | # 9 | # http://aws.amazon.com/apache2.0 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed 12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 | # express or implied. See the License for the specific language governing 14 | # permissions and limitations under the License. 15 | 16 | from __future__ import print_function 17 | 18 | import sys 19 | import json 20 | import unittest 21 | 22 | import rospy 23 | import rostest 24 | 25 | from tts.srv import Polly 26 | from tts.srv import PollyResponse 27 | from tts.srv import Synthesizer 28 | from tts.srv import SynthesizerResponse 29 | 30 | # import tts which is a relay package, otherwise things don't work 31 | # 32 | # devel/lib/python2.7/dist-packages/ 33 | # +-- tts 34 | # | +-- __init__.py 35 | # +-- ... 36 | # 37 | # per http://docs.ros.org/api/catkin/html/user_guide/setup_dot_py.html: 38 | # 39 | # A relay package is a folder with an __init__.py folder and nothing else. 40 | # Importing this folder in python will execute the contents of __init__.py, 41 | # which will in turn import the original python modules in the folder in 42 | # the sourcespace using the python exec() function. 43 | 44 | 45 | PKG = 'tts' 46 | NAME = 'amazonpolly' 47 | 48 | 49 | class TestPlainText(unittest.TestCase): 50 | 51 | def test_plain_text_to_wav_via_polly_node(self): 52 | rospy.wait_for_service('polly') 53 | polly = rospy.ServiceProxy('polly', Polly) 54 | 55 | test_text = 'Mary has a little lamb, little lamb, little lamb.' 56 | res = polly(polly_action='SynthesizeSpeech', text=test_text) 57 | self.assertIsNotNone(res) 58 | self.assertTrue(type(res) is PollyResponse) 59 | 60 | r = json.loads(res.result) 61 | self.assertIn('Audio Type', r, 'result should contain audio type') 62 | self.assertIn('Audio File', r, 'result should contain file path') 63 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata') 64 | 65 | audio_type = r['Audio Type'] 66 | audio_file = r['Audio File'] 67 | md = r['Amazon Polly Response Metadata'] 68 | self.assertTrue("'HTTPStatusCode': 200," in md) 69 | self.assertEqual('audio/ogg', audio_type) 70 | self.assertTrue(audio_file.endswith('.ogg')) 71 | 72 | import subprocess 73 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT) 74 | import re 75 | m = re.search(r'.*Ogg data, Vorbis audi.*', o, flags=re.MULTILINE) 76 | self.assertIsNotNone(m) 77 | 78 | def test_plain_text_using_polly_class(self): 79 | from tts.amazonpolly import AmazonPolly 80 | polly = AmazonPolly() 81 | test_text = 'Mary has a little lamb, little lamb, little lamb.' 82 | res = polly.synthesize(text=test_text) 83 | self.assertIsNotNone(res) 84 | self.assertTrue(type(res) is PollyResponse) 85 | 86 | r = json.loads(res.result) 87 | self.assertIn('Audio Type', r, 'result should contain audio type') 88 | self.assertIn('Audio File', r, 'result should contain file path') 89 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata') 90 | 91 | audio_type = r['Audio Type'] 92 | audio_file = r['Audio File'] 93 | md = r['Amazon Polly Response Metadata'] 94 | self.assertTrue("'HTTPStatusCode': 200," in md) 95 | self.assertEqual('audio/ogg', audio_type) 96 | self.assertTrue(audio_file.endswith('.ogg')) 97 | 98 | import subprocess 99 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT) 100 | import re 101 | m = re.search(r'.*Ogg data, Vorbis audi.*', o, flags=re.MULTILINE) 102 | self.assertIsNotNone(m) 103 | 104 | def test_plain_text_via_synthesizer_node(self): 105 | rospy.wait_for_service('synthesizer') 106 | speech_synthesizer = rospy.ServiceProxy('synthesizer', Synthesizer) 107 | 108 | text = 'Mary has a little lamb, little lamb, little lamb.' 109 | res = speech_synthesizer(text=text) 110 | self.assertIsNotNone(res) 111 | self.assertTrue(type(res) is SynthesizerResponse) 112 | 113 | r = json.loads(res.result) 114 | self.assertIn('Audio Type', r, 'result should contain audio type') 115 | self.assertIn('Audio File', r, 'result should contain file path') 116 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata') 117 | 118 | audio_type = r['Audio Type'] 119 | audio_file = r['Audio File'] 120 | md = r['Amazon Polly Response Metadata'] 121 | self.assertTrue("'HTTPStatusCode': 200," in md) 122 | self.assertEqual('audio/ogg', audio_type) 123 | self.assertTrue(audio_file.endswith('.ogg')) 124 | 125 | import subprocess 126 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT) 127 | import re 128 | m = re.search(r'.*Ogg data, Vorbis audi.*', o, flags=re.MULTILINE) 129 | self.assertIsNotNone(m) 130 | 131 | def test_plain_text_to_mp3_via_polly_node(self): 132 | rospy.wait_for_service('polly') 133 | polly = rospy.ServiceProxy('polly', Polly) 134 | 135 | test_text = 'Mary has a little lamb, little lamb, little lamb.' 136 | res = polly(polly_action='SynthesizeSpeech', text=test_text, output_format='mp3') 137 | self.assertIsNotNone(res) 138 | self.assertTrue(type(res) is PollyResponse) 139 | 140 | r = json.loads(res.result) 141 | self.assertIn('Audio Type', r, 'result should contain audio type') 142 | self.assertIn('Audio File', r, 'result should contain file path') 143 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata') 144 | 145 | audio_type = r['Audio Type'] 146 | audio_file = r['Audio File'] 147 | md = r['Amazon Polly Response Metadata'] 148 | self.assertTrue("'HTTPStatusCode': 200," in md) 149 | self.assertEqual('audio/mpeg', audio_type) 150 | self.assertTrue(audio_file.endswith('.mp3')) 151 | 152 | import subprocess 153 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT) 154 | import re 155 | m = re.search(r'.*MPEG.*layer III.*', o, flags=re.MULTILINE) 156 | self.assertIsNotNone(m) 157 | 158 | def test_simple_ssml_via_polly_node(self): 159 | rospy.wait_for_service('polly') 160 | polly = rospy.ServiceProxy('polly', Polly) 161 | 162 | text = 'Mary has a little lamb, little lamb, little lamb.' 163 | res = polly(polly_action='SynthesizeSpeech', text=text, text_type='ssml') 164 | self.assertIsNotNone(res) 165 | self.assertTrue(type(res) is PollyResponse) 166 | 167 | r = json.loads(res.result) 168 | self.assertIn('Audio Type', r, 'result should contain audio type') 169 | self.assertIn('Audio File', r, 'result should contain file path') 170 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata') 171 | 172 | audio_type = r['Audio Type'] 173 | audio_file = r['Audio File'] 174 | md = r['Amazon Polly Response Metadata'] 175 | self.assertTrue("'HTTPStatusCode': 200," in md) 176 | self.assertEqual('audio/ogg', audio_type) 177 | self.assertTrue(audio_file.endswith('.ogg')) 178 | 179 | import subprocess 180 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT) 181 | import re 182 | m = re.search(r'.*Ogg data, Vorbis audi.*', o, flags=re.MULTILINE) 183 | self.assertIsNotNone(m) 184 | 185 | def test_simple_ssml_via_synthesizer_node(self): 186 | rospy.wait_for_service('synthesizer') 187 | speech_synthesizer = rospy.ServiceProxy('synthesizer', Synthesizer) 188 | 189 | text = 'Mary has a little lamb, little lamb, little lamb.' 190 | res = speech_synthesizer(text=text, metadata='''{"text_type":"ssml"}''') 191 | self.assertIsNotNone(res) 192 | self.assertTrue(type(res) is SynthesizerResponse) 193 | 194 | r = json.loads(res.result) 195 | self.assertIn('Audio Type', r, 'result should contain audio type') 196 | self.assertIn('Audio File', r, 'result should contain file path') 197 | self.assertIn('Amazon Polly Response Metadata', r, 'result should contain metadata') 198 | 199 | audio_type = r['Audio Type'] 200 | audio_file = r['Audio File'] 201 | md = r['Amazon Polly Response Metadata'] 202 | self.assertTrue("'HTTPStatusCode': 200," in md) 203 | self.assertEqual('audio/ogg', audio_type) 204 | self.assertTrue(audio_file.endswith('.ogg')) 205 | 206 | import subprocess 207 | o = subprocess.check_output(['file', audio_file], stderr=subprocess.STDOUT) 208 | import re 209 | m = re.search(r'.*Ogg data, Vorbis audi.*', o, flags=re.MULTILINE) 210 | self.assertIsNotNone(m) 211 | 212 | 213 | if __name__ == '__main__': 214 | rostest.rosrun(PKG, NAME, TestPlainText, sys.argv) 215 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | -------------------------------------------------------------------------------- /tts/LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2018 Amazon.com, Inc. or its affiliates 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tts 2 | 3 | 4 | ## Overview 5 | The `tts` ROS node enables a robot to speak with a human voice by providing a Text-To-Speech service. 6 | Out of the box this package listens to a speech topic, submits text to the Amazon Polly cloud service to generate an audio stream file, 7 | retrieves the audio stream from Amazon Polly, and plays the audio stream via the default output device. 8 | The nodes can be configured to use different voices as well as custom lexicons and SSML tags which enable you to control aspects of speech, 9 | such as pronunciation, volume, pitch, speed rate, etc. A [sample ROS application] with this node, 10 | and more details on speech customization are available within the [Amazon Polly documentation]. 11 | 12 | **Amazon Polly Summary**: Amazon Polly is a service that turns text into lifelike speech, allowing you to create applications that talk, 13 | and build entirely new categories of speech-enabled products. Amazon Polly is a Text-to-Speech service that uses advanced deep learning technologies to synthesize speech that sounds like a human voice. 14 | With dozens of lifelike voices across a variety of languages, you can select the ideal voice and build speech-enabled applications that work in many different countries. 15 | 16 | ### License 17 | The source code is released under an [Apache 2.0]. 18 | 19 | **Author**: AWS RoboMaker
20 | **Affiliation**: [Amazon Web Services (AWS)]
21 | 22 | RoboMaker cloud extensions rely on third-party software licensed under open-source licenses and are provided for demonstration purposes only. Incorporation or use of RoboMaker cloud extensions in connection with your production workloads or commercial product(s) or devices may affect your legal rights or obligations under the applicable open-source licenses. License information for this repository can be found [here](https://github.com/aws-robotics/tts-ros1/blob/master/LICENSE). AWS does not provide support for this cloud extension. You are solely responsible for how you configure, deploy, and maintain this cloud extension in your workloads or commercial product(s) or devices. 23 | 24 | ### Supported ROS Distributions 25 | - Kinetic 26 | - Melodic 27 | 28 | ## Installation 29 | 30 | ### AWS Credentials 31 | You will need to create an AWS Account and configure the credentials to be able to communicate with AWS services. You may find [AWS Configuration and Credential Files] helpful. 32 | 33 | This node will require the following AWS account IAM role permissions: 34 | - `polly:SynthesizeSpeech` 35 | 36 | ### Dependencies 37 | In order to use the Text-To-Speech node with ROS kinetic you must update the version of boto3 that is installed on your system to at least version 1.9.0. You can do this by running the command: 38 | 39 | pip3 install -U boto3 40 | 41 | This step is required before the node will work properly because the version of boto3 is not new enough for the features required by this node. 42 | 43 | ### Building from Source 44 | 45 | To build from source you'll need to create a new workspace, clone and checkout the latest release branch of this repository, install all the dependencies, and compile. If you need the latest development features you can clone from the `master` branch instead of the latest release branch. While we guarantee the release branches are stable, __the `master` should be considered to have an unstable build__ due to ongoing development. 46 | 47 | - Install build tool: please refer to `colcon` [installation guide](https://colcon.readthedocs.io/en/released/user/installation.html) 48 | 49 | - Create a ROS workspace and a source directory 50 | 51 | mkdir -p ~/ros-workspace/src 52 | 53 | - Clone the package into the source directory . 54 | 55 | cd ~/ros-workspace/src 56 | git clone https://github.com/aws-robotics/tts-ros1.git -b release-latest 57 | 58 | - Install dependencies 59 | 60 | cd ~/ros-workspace 61 | sudo apt-get update && rosdep update 62 | rosdep install --from-paths src --ignore-src -r -y 63 | 64 | _Note: If building the master branch instead of a release branch you may need to also checkout and build the master branches of the packages this package depends on._ 65 | 66 | - Build the packages 67 | 68 | cd ~/ros-workspace && colcon build 69 | 70 | - Configure ROS library Path 71 | 72 | source ~/ros-workspace/install/setup.bash 73 | 74 | - Build and run the unit tests 75 | 76 | colcon test --packages-select tts && colcon test-result --all 77 | 78 | ### Testing in Containers/Virtual Machines 79 | 80 | Even if your container or virtual machine does not have audio device, you can still test TTS by leveraging an audio server. 81 | 82 | The following is an example setup on a MacBook with PulseAudio as the audio server. 83 | If you are new to PulseAudio, you may want to read the [PulseAudio Documentation]. 84 | 85 | **Step 1: Start PulseAudio on your laptop** 86 | 87 | After installation, start the audio server with *module-native-protocol-tcp* loaded: 88 | 89 | pulseaudio --load=module-native-protocol-tcp --exit-idle-time=-1 --log-target=stderr -v 90 | 91 | Note the extra arguments `-v` and `--log-target` are used for easier troubleshooting. 92 | 93 | **Step 2: Run TTS nodes in container** 94 | 95 | In your container, make sure you set the right environment variables. 96 | For example, you can start the container using `docker run -it -e PULSE_SERVER=docker.for.mac.localhost ubuntu:16.04`. 97 | 98 | Then you will be able to run ROS nodes in the container and hear the audio from your laptop speakers. 99 | 100 | **Troubleshooting** 101 | 102 | If your laptop has multiple audio output devices, make sure the right one has the right volume. 103 | This command will give you a list of output devices and tell you which one has been selected: 104 | 105 | pacmd list-sinks | grep -E '(index:|name:|product.name)' 106 | 107 | ## Launch Files 108 | An example launch file called `sample_application.launch` is provided. 109 | 110 | 111 | ## Usage 112 | 113 | ### Run the node 114 | - **Plain text** 115 | - `roslaunch tts sample_application.launch` 116 | - `rosrun tts voicer.py 'Hello World'` 117 | 118 | - **SSML** 119 | - `roslaunch tts sample_application.launch` 120 | - `rosrun tts voicer.py 'Mary has a little lamb.' '{"text_type":"ssml"}'` 121 | 122 | 123 | ## Configuration File and Parameters 124 | | Parameter Name | Type | Description | 125 | | -------------- | ---- | ----------- | 126 | | polly_action | *string* | Currently only one action named `SynthesizeSpeech` is supported. | 127 | | text | *string* | The text to be synthesized. It can be plain text or SSML. See also `text_type`. | 128 | | text_type | *string* | A user can choose from `text` and `ssml`. Default: `text`. | 129 | | voice_id | *string* | The list of supported voices can be found on [official Amazon Polly document]. Default: Joanna | 130 | | output_format | *string* | Valid formats are `ogg_vorbis`, `mp3` and `pcm`. Default: `ogg_vorbis` | 131 | | output_path | *string* | The audio data will be saved as a local file for playback and reuse/inspection purposes. This parameter is to provide a preferred path to save the file. Default: `.` | 132 | | sample_rate | *string* | Note `16000` is a valid sample rate for all supported formats. Default: `16000`. | 133 | 134 | 135 | ## Performance and Benchmark Results 136 | We evaluated the performance of this node by runnning the followning scenario on a Raspberry Pi 3 Model B: 137 | - Launch a baseline graph containing the talker and listener nodes from the [roscpp_tutorials package](https://wiki.ros.org/roscpp_tutorials), plus two additional nodes that collect CPU and memory usage statistics. Allow the nodes to run for 60 seconds. 138 | - Launch the nodes `polly_node`, `synthesizer_node` and `tts_node` by using the launch file `sample_application.launch` as described above. At the same time, perform several calls to the action `tts/action/Speech.action` using the `voicer.py` script descried above, by running the following script in the background: 139 | 140 | ```bash 141 | rosrun tts voicer.py 'Amazon Polly is a Text-to-Speech (TTS) cloud service' '{"text_type":"ssml"}' ; sleep 1 142 | rosrun tts voicer.py 'that converts text into lifelike speech' '{"text_type":"ssml"}' ; sleep 1 143 | rosrun tts voicer.py 'You can use Amazon Polly to develop applications that increase engagement and accessibility' '{"text_type":"ssml"}' ; sleep 1 144 | rosrun tts voicer.py 'Amazon Polly supports multiple languages and includes a variety of lifelike voices' '{"text_type":"ssml"}' ; sleep 1 145 | rosrun tts voicer.py 'so you can build speech-enabled applications that work in multiple locations' '{"text_type":"ssml"}' ; sleep 1 146 | rosrun tts voicer.py 'and use the ideal voice for your customers' '{"text_type":"ssml"}' ; sleep 1 147 | ``` 148 | 149 | - Allow the nodes to run for 180 seconds. 150 | - Terminate the `polly_node`, `synthesizer_node` and `tts_node` nodes, and allow the reamaining nodes to run for 60 seconds. 151 | 152 | The following graph shows the CPU usage during that scenario. The 1 minute average CPU usage starts at 16.75% during the launch of the baseline graph, and stabilizes at 6%. When we launch the Polly nodes around second 85, the 1 minute average CPU increases up to a peak of 22.25% and stabilizes around 20%. After we stop making requests with the script `voicer.py` around second 206 the 1 minute average CPU usage moves to around 12%, and decreases gradually, and goes down again to 2.5 % after we stop the Polly nodes at the end of the scenario. 153 | 154 | ![cpu](wiki/images/cpu.svg) 155 | 156 | The following graph shows the memory usage during that scenario. We start with a memory usage of around 227 MB that increases to around 335 MB (+47.58%) when we lanch the Polly nodes around second 85, and gets to a peak of 361 MB (+59% wrt. initial value) while we are calling the script `voicer.py`. The memory usage goes back to the initial values after stopping the Polly nodes. 157 | 158 | ![memory](wiki/images/memory.svg) 159 | 160 | 161 | ## Nodes 162 | 163 | ### polly 164 | Polly node is the engine for the synthesizing job. It provides user-friendly yet powerful APIs so a user doesn't have to deal with technical details of AWS service calls. 165 | 166 | #### Services 167 | - **`polly (tts/Polly)`** 168 | 169 | Call the service to use Amazon Polly to synthesize the audio. 170 | 171 | #### Reserved for future usage 172 | - `language_code (string, default: None)` 173 | 174 | A user doesn't have to provide a language code and this is reserved for future usage. 175 | 176 | - `lexicon_content (string, default: None)` 177 | 178 | - `lexicon_name (string, default: None)` 179 | 180 | - `lexicon_names (string[], default: empty)` 181 | 182 | - `speech_mark_types (string[], default: empty)` 183 | 184 | - `max_results (uint32, default: None)` 185 | 186 | - `next_token (string, default: None)` 187 | 188 | - `sns_topic_arn (string, default: None)` 189 | 190 | - `task_id (string, default: None)` 191 | 192 | - `task_status (string, default: iNone)` 193 | 194 | - `output_s3_bucket_name (string, default: None)` 195 | 196 | - `output_s3_key_prefix (string, default: None)` 197 | 198 | - `include_additional_language_codes (bool, default: None)` 199 | 200 | ### synthesizer node 201 | 202 | #### Services 203 | - **`synthesizer (tts/Synthesizer)`** 204 | 205 | Call the service to synthesize. 206 | 207 | #### Parameters 208 | 209 | - **`text (string)`** 210 | 211 | The text to be synthesized. 212 | 213 | - **`metadata (string, JSON format)`** 214 | 215 | Optional, for user to have control over how synthesis happens. 216 | 217 | ### tts node 218 | 219 | #### Action 220 | 221 | - **`speech`** 222 | 223 | #### Parameters 224 | 225 | - **`text (string)`** 226 | 227 | The text to be synthesized. 228 | 229 | - **`metadata (string, JSON format)`** 230 | 231 | Optional, for user to have control over how synthesis happens. 232 | 233 | [AWS Configuration and Credential Files]: https://docs.aws.amazon.com/cli/latest/userguide/cli-config-files.html 234 | [Amazon Polly documentation]: https://docs.aws.amazon.com/polly/latest/dg/what-is.html 235 | [Amazon Web Services (AWS)]: https://aws.amazon.com/ 236 | [Apache 2.0]: https://aws.amazon.com/apache-2-0/ 237 | [Issue Tracker]: https://github.com/aws-robotics/tts-ros1/issues 238 | [PulseAudio Documentation]: https://www.freedesktop.org/wiki/Software/PulseAudio/Documentation/ 239 | [official Amazon Polly document]: https://docs.aws.amazon.com/polly/latest/dg/voicelist.html 240 | [sample ROS application]: https://github.com/aws-robotics/aws-robomaker-sample-application-voiceinteraction 241 | -------------------------------------------------------------------------------- /tts/src/tts/amazonpolly.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2018, Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"). 6 | # You may not use this file except in compliance with the License. 7 | # A copy of the License is located at 8 | # 9 | # http://aws.amazon.com/apache2.0 10 | # 11 | # or in the "license" file accompanying this file. This file is distributed 12 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 | # express or implied. See the License for the specific language governing 14 | # permissions and limitations under the License. 15 | 16 | import json 17 | import os 18 | import sys 19 | import wave 20 | import traceback 21 | import requests 22 | from boto3 import Session 23 | from botocore.credentials import CredentialProvider, RefreshableCredentials 24 | from botocore.session import get_session 25 | from botocore.exceptions import UnknownServiceError 26 | from contextlib import closing 27 | from optparse import OptionParser 28 | 29 | import rospy 30 | from tts.srv import Polly, PollyRequest, PollyResponse 31 | 32 | 33 | def get_ros_param(param, default=None): 34 | try: 35 | key = rospy.search_param(param) 36 | return default if key is None else rospy.get_param(key, default) 37 | except Exception as e: 38 | rospy.logwarn('Failed to get ros param {}, will use default {}. Exception: '.format(param, default, e)) 39 | return default 40 | 41 | 42 | class AwsIotCredentialProvider(CredentialProvider): 43 | METHOD = 'aws-iot' 44 | CANONICAL_NAME = 'customIoTwithCertificate' 45 | 46 | DEFAULT_AUTH_CONNECT_TIMEOUT_MS = 5000 47 | DEFAULT_AUTH_TOTAL_TIMEOUT_MS = 10000 48 | 49 | def __init__(self): 50 | super(AwsIotCredentialProvider, self).__init__() 51 | self.ros_param_prefix = 'iot/' 52 | 53 | def get_param(self, param, default=None): 54 | return get_ros_param(self.ros_param_prefix + param, default) 55 | 56 | def retrieve_credentials(self): 57 | try: 58 | cert_file = self.get_param('certfile') 59 | key_file = self.get_param('keyfile') 60 | endpoint = self.get_param('endpoint') 61 | role_alias = self.get_param('role') 62 | connect_timeout = self.get_param('connect_timeout_ms', self.DEFAULT_AUTH_CONNECT_TIMEOUT_MS) 63 | total_timeout = self.get_param('total_timeout_ms', self.DEFAULT_AUTH_TOTAL_TIMEOUT_MS) 64 | thing_name = self.get_param('thing_name', '') 65 | 66 | if any(v is None for v in (cert_file, key_file, endpoint, role_alias, thing_name)): 67 | return None 68 | 69 | headers = {'x-amzn-iot-thingname': thing_name} if len(thing_name) > 0 else None 70 | url = 'https://{}/role-aliases/{}/credentials'.format(endpoint, role_alias) 71 | timeout = (connect_timeout, total_timeout - connect_timeout) # see also: urllib3/util/timeout.py 72 | 73 | response = requests.get(url, cert=(cert_file, key_file), headers=headers, timeout=timeout) 74 | d = response.json()['credentials'] 75 | 76 | rospy.loginfo('Credentials expiry time: {}'.format(d['expiration'])) 77 | 78 | return { 79 | 'access_key': d['accessKeyId'], 80 | 'secret_key': d['secretAccessKey'], 81 | 'token': d['sessionToken'], 82 | 'expiry_time': d['expiration'], 83 | } 84 | except Exception as e: 85 | rospy.logwarn('Failed to fetch credentials from AWS IoT: {}'.format(e)) 86 | return None 87 | 88 | def load(self): 89 | return RefreshableCredentials.create_from_metadata( 90 | self.retrieve_credentials(), 91 | self.retrieve_credentials, 92 | 'aws-iot-with-certificate' 93 | ) 94 | 95 | 96 | class AmazonPolly: 97 | """A TTS engine that can be used in two different ways. 98 | 99 | Usage 100 | ----- 101 | 102 | 1. It can run as a ROS service node. 103 | 104 | Start a polly node:: 105 | 106 | $ rosrun tts polly_node.py 107 | 108 | Call the service from command line:: 109 | 110 | $ rosservice call /polly SynthesizeSpeech 'hello polly' '' '' '' '' '' '' '' '' [] [] 0 '' '' '' '' '' '' false 111 | 112 | Call the service programmatically:: 113 | 114 | from tts.srv import Polly 115 | rospy.wait_for_service('polly') 116 | polly = rospy.ServiceProxy('polly', Polly) 117 | res = polly(**kw) 118 | 119 | 2. It can also be used as a normal python class:: 120 | 121 | AmazonPolly().synthesize(text='hi polly') 122 | 123 | PollyRequest supports many parameters, but the majority of the users can safely ignore most of them and just 124 | use the vanilla version which involves only one argument, ``text``. 125 | 126 | If in some use cases more control is needed, SSML will come handy. Example:: 127 | 128 | AmazonPolly().synthesize( 129 | text='Mary has a little lamb.', 130 | text_type='ssml' 131 | ) 132 | 133 | A user can also control the voice, output format and so on. Example:: 134 | 135 | AmazonPolly().synthesize( 136 | text='Mary has a little lamb.', 137 | text_type='ssml', 138 | voice_id='Joey', 139 | output_format='mp3', 140 | output_path='/tmp/blah' 141 | ) 142 | 143 | 144 | Parameters 145 | ---------- 146 | 147 | Among the parameters defined in Polly.srv, the following are supported while others are reserved for future. 148 | 149 | * polly_action : currently only ``SynthesizeSpeech`` is supported 150 | * text : the text to speak 151 | * text_type : can be either ``text`` (default) or ``ssml`` 152 | * voice_id : any voice id supported by Amazon Polly, default is Joanna 153 | * output_format : ogg (default), mp3 or pcm 154 | * output_path : where the audio file is saved 155 | * sample_rate : default is 16000 for pcm or 22050 for mp3 and ogg 156 | 157 | The following are the reserved ones. Note that ``language_code`` is rarely needed (this may seem counter-intuitive). 158 | See official Amazon Polly documentation for details (link can be found below). 159 | 160 | * language_code 161 | * lexicon_content 162 | * lexicon_name 163 | * lexicon_names 164 | * speech_mark_types 165 | * max_results 166 | * next_token 167 | * sns_topic_arn 168 | * task_id 169 | * task_status 170 | * output_s3_bucket_name 171 | * output_s3_key_prefix 172 | * include_additional_language_codes 173 | 174 | 175 | Links 176 | ----- 177 | 178 | Amazon Polly documentation: https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html 179 | 180 | """ 181 | 182 | def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, region_name=None): 183 | if region_name is None: 184 | region_name = get_ros_param('aws_client_configuration/region', default='us-west-2') 185 | 186 | self.polly = self._get_polly_client(aws_access_key_id, aws_secret_access_key, aws_session_token, region_name) 187 | self.default_text_type = 'text' 188 | self.default_voice_id = 'Joanna' 189 | self.default_output_format = 'ogg_vorbis' 190 | self.default_output_folder = '.' 191 | self.default_output_file_basename = 'output' 192 | 193 | def _get_polly_client(self, aws_access_key_id=None, aws_secret_access_key=None, aws_session_token=None, 194 | region_name=None, with_service_model_patch=False): 195 | """Note we get a new botocore session each time this function is called. 196 | This is to avoid potential problems caused by inner state of the session. 197 | """ 198 | botocore_session = get_session() 199 | 200 | if with_service_model_patch: 201 | # Older versions of botocore don't have polly. We can possibly fix it by appending 202 | # extra path with polly service model files to the search path. 203 | current_dir = os.path.dirname(os.path.abspath(__file__)) 204 | service_model_path = os.path.join(current_dir, 'data', 'models') 205 | botocore_session.set_config_variable('data_path', service_model_path) 206 | rospy.loginfo('patching service model data path: {}'.format(service_model_path)) 207 | 208 | botocore_session.get_component('credential_provider').insert_after('boto-config', AwsIotCredentialProvider()) 209 | 210 | botocore_session.user_agent_extra = self._generate_user_agent_suffix() 211 | 212 | session = Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, 213 | aws_session_token=aws_session_token, region_name=region_name, 214 | botocore_session=botocore_session) 215 | 216 | try: 217 | return session.client("polly") 218 | except UnknownServiceError: 219 | # the first time we reach here, we try to fix the problem 220 | if not with_service_model_patch: 221 | return self._get_polly_client(aws_access_key_id, aws_secret_access_key, aws_session_token, region_name, 222 | with_service_model_patch=True) 223 | else: 224 | # we have tried our best, time to panic 225 | rospy.logerr('Amazon Polly is not available. Please install the latest boto3.') 226 | raise 227 | 228 | def _generate_user_agent_suffix(self): 229 | exec_env = get_ros_param('exec_env', 'AWS_RoboMaker').strip() 230 | if 'AWS_RoboMaker' in exec_env: 231 | ver = get_ros_param('robomaker_version', None) 232 | if ver: 233 | exec_env += '-' + ver.strip() 234 | ros_distro = get_ros_param('rosdistro', 'Unknown_ROS_DISTRO').strip() 235 | ros_version = get_ros_param('rosversion', 'Unknown_ROS_VERSION').strip() 236 | return 'exec-env/{} ros-{}/{}'.format(exec_env, ros_distro, ros_version) 237 | 238 | def _pcm2wav(self, audio_data, wav_filename, sample_rate): 239 | """per Amazon Polly official doc, the pcm in a signed 16-bit, 1 channel (mono), little-endian format.""" 240 | wavf = wave.open(wav_filename, 'w') 241 | wavf.setframerate(int(sample_rate)) 242 | wavf.setnchannels(1) # 1 channel 243 | wavf.setsampwidth(2) # 2 bytes == 16 bits 244 | wavf.writeframes(audio_data) 245 | wavf.close() 246 | 247 | def _make_audio_file_fullpath(self, output_path, output_format): 248 | """Makes a full path for audio file based on given output path and format. 249 | 250 | If ``output_path`` doesn't have a path, current path is used. 251 | 252 | :param output_path: the output path received 253 | :param output_format: the audio format, e.g., mp3, ogg_vorbis, pcm 254 | :return: a full path for the output audio file. File ext will be constructed from audio format. 255 | """ 256 | head, tail = os.path.split(output_path) 257 | if not head: 258 | head = self.default_output_folder 259 | if not tail: 260 | tail = self.default_output_file_basename 261 | 262 | file_ext = {'pcm': '.wav', 'mp3': '.mp3', 'ogg_vorbis': '.ogg'}[output_format.lower()] 263 | if not tail.endswith(file_ext): 264 | tail += file_ext 265 | 266 | return os.path.realpath(os.path.join(head, tail)) 267 | 268 | def _synthesize_speech_and_save(self, request): 269 | """Calls Amazon Polly and writes the returned audio data to a local file. 270 | 271 | To make it practical, three things will be returned in a JSON form string, which are audio file path, 272 | audio type and Amazon Polly response metadata. 273 | 274 | If the Amazon Polly call fails, audio file name will be an empty string and audio type will be "N/A". 275 | 276 | Please see https://boto3.readthedocs.io/reference/services/polly.html#Polly.Client.synthesize_speech 277 | for more details on Amazon Polly API. 278 | 279 | :param request: an instance of PollyRequest 280 | :return: a string in JSON form with two attributes, "Audio File" and "Amazon Polly Response". 281 | """ 282 | kws = { 283 | 'LexiconNames': request.lexicon_names if request.lexicon_names else [], 284 | 'OutputFormat': request.output_format if request.output_format else self.default_output_format, 285 | 'SampleRate': request.sample_rate, 286 | 'SpeechMarkTypes': request.speech_mark_types if request.speech_mark_types else [], 287 | 'Text': request.text, 288 | 'TextType': request.text_type if request.text_type else self.default_text_type, 289 | 'VoiceId': request.voice_id if request.voice_id else self.default_voice_id 290 | } 291 | 292 | if not kws['SampleRate']: 293 | kws['SampleRate'] = '16000' if kws['OutputFormat'].lower() == 'pcm' else '22050' 294 | 295 | rospy.loginfo('Amazon Polly Request: {}'.format(kws)) 296 | response = self.polly.synthesize_speech(**kws) 297 | rospy.loginfo('Amazon Polly Response: {}'.format(response)) 298 | 299 | if "AudioStream" in response: 300 | audiofile = self._make_audio_file_fullpath(request.output_path, kws['OutputFormat']) 301 | rospy.loginfo('will save audio as {}'.format(audiofile)) 302 | 303 | with closing(response["AudioStream"]) as stream: 304 | if kws['OutputFormat'].lower() == 'pcm': 305 | self._pcm2wav(stream.read(), audiofile, kws['SampleRate']) 306 | else: 307 | with open(audiofile, "wb") as f: 308 | f.write(stream.read()) 309 | 310 | audiotype = response['ContentType'] 311 | else: 312 | audiofile = '' 313 | audiotype = 'N/A' 314 | 315 | return json.dumps({ 316 | 'Audio File': audiofile, 317 | 'Audio Type': audiotype, 318 | 'Amazon Polly Response Metadata': str(response['ResponseMetadata']) 319 | }) 320 | 321 | def _dispatch(self, request): 322 | """Amazon Polly supports a number of APIs. This will call the right one based on the content of request. 323 | 324 | Currently "SynthesizeSpeech" is the only recognized action. Basically this method just delegates the work 325 | to ``self._synthesize_speech_and_save`` and returns the result as is. It will simply raise if a different 326 | action is passed in. 327 | 328 | :param request: an instance of PollyRequest 329 | :return: whatever returned by the delegate 330 | """ 331 | actions = { 332 | 'SynthesizeSpeech': self._synthesize_speech_and_save 333 | # ... more actions could go in here ... 334 | } 335 | 336 | if request.polly_action not in actions: 337 | raise RuntimeError('bad or unsupported Amazon Polly action: "' + request.polly_action + '".') 338 | 339 | return actions[request.polly_action](request) 340 | 341 | def _node_request_handler(self, request): 342 | """The callback function for processing service request. 343 | 344 | It never raises. If anything unexpected happens, it will return a PollyResponse with details of the exception. 345 | 346 | :param request: an instance of PollyRequest 347 | :return: a PollyResponse 348 | """ 349 | rospy.loginfo('Amazon Polly Request: {}'.format(request)) 350 | 351 | try: 352 | response = self._dispatch(request) 353 | rospy.loginfo('will return {}'.format(response)) 354 | return PollyResponse(result=response) 355 | except Exception as e: 356 | current_dir = os.path.dirname(os.path.abspath(__file__)) 357 | exc_type = sys.exc_info()[0] 358 | 359 | # not using `issubclass(exc_type, ConnectionError)` for the condition below because some versions 360 | # of urllib3 raises exception when doing `from requests.exceptions import ConnectionError` 361 | error_ogg_filename = 'connerror.ogg' if 'ConnectionError' in exc_type.__name__ else 'error.ogg' 362 | 363 | error_details = { 364 | 'Audio File': os.path.join(current_dir, 'data', error_ogg_filename), 365 | 'Audio Type': 'ogg', 366 | 'Exception': { 367 | 'Type': str(exc_type), 368 | 'Module': exc_type.__module__, 369 | 'Name': exc_type.__name__, 370 | 'Value': str(e), 371 | }, 372 | 'Traceback': traceback.format_exc() 373 | } 374 | 375 | error_str = json.dumps(error_details) 376 | rospy.logerr(error_str) 377 | return PollyResponse(result=error_str) 378 | 379 | def synthesize(self, **kws): 380 | """Call this method if you want to use polly but don't want to start a node. 381 | 382 | :param kws: input as defined in Polly.srv 383 | :return: a string in JSON form with detailed information, success or failure 384 | """ 385 | req = PollyRequest(polly_action='SynthesizeSpeech', **kws) 386 | return self._node_request_handler(req) 387 | 388 | def start(self, node_name='polly_node', service_name='polly'): 389 | """The entry point of a ROS service node. 390 | 391 | Details of the service API can be found in Polly.srv. 392 | 393 | :param node_name: name of ROS node 394 | :param service_name: name of ROS service 395 | :return: it doesn't return 396 | """ 397 | rospy.init_node(node_name) 398 | 399 | service = rospy.Service(service_name, Polly, self._node_request_handler) 400 | 401 | rospy.loginfo('polly running: {}'.format(service.uri)) 402 | 403 | rospy.spin() 404 | 405 | 406 | def main(): 407 | usage = '''usage: %prog [options] 408 | ''' 409 | 410 | parser = OptionParser(usage) 411 | 412 | parser.add_option("-n", "--node-name", dest="node_name", default='polly_node', 413 | help="name of the ROS node", 414 | metavar="NODE_NAME") 415 | parser.add_option("-s", "--service-name", dest="service_name", default='polly', 416 | help="name of the ROS service", 417 | metavar="SERVICE_NAME") 418 | 419 | (options, args) = parser.parse_args() 420 | 421 | node_name = options.node_name 422 | service_name = options.service_name 423 | 424 | AmazonPolly().start(node_name=node_name, service_name=service_name) 425 | 426 | 427 | if __name__ == "__main__": 428 | main() 429 | -------------------------------------------------------------------------------- /tts/src/tts/data/models/polly/2016-06-10/service-2.json: -------------------------------------------------------------------------------- 1 | { 2 | "version":"2.0", 3 | "metadata":{ 4 | "apiVersion":"2016-06-10", 5 | "endpointPrefix":"polly", 6 | "protocol":"rest-json", 7 | "serviceFullName":"Amazon Polly", 8 | "serviceId":"Polly", 9 | "signatureVersion":"v4", 10 | "uid":"polly-2016-06-10" 11 | }, 12 | "operations":{ 13 | "DeleteLexicon":{ 14 | "name":"DeleteLexicon", 15 | "http":{ 16 | "method":"DELETE", 17 | "requestUri":"/v1/lexicons/{LexiconName}", 18 | "responseCode":200 19 | }, 20 | "input":{"shape":"DeleteLexiconInput"}, 21 | "output":{"shape":"DeleteLexiconOutput"}, 22 | "errors":[ 23 | {"shape":"LexiconNotFoundException"}, 24 | {"shape":"ServiceFailureException"} 25 | ], 26 | "documentation":"

Deletes the specified pronunciation lexicon stored in an AWS Region. A lexicon which has been deleted is not available for speech synthesis, nor is it possible to retrieve it using either the GetLexicon or ListLexicon APIs.

For more information, see Managing Lexicons.

" 27 | }, 28 | "DescribeVoices":{ 29 | "name":"DescribeVoices", 30 | "http":{ 31 | "method":"GET", 32 | "requestUri":"/v1/voices", 33 | "responseCode":200 34 | }, 35 | "input":{"shape":"DescribeVoicesInput"}, 36 | "output":{"shape":"DescribeVoicesOutput"}, 37 | "errors":[ 38 | {"shape":"InvalidNextTokenException"}, 39 | {"shape":"ServiceFailureException"} 40 | ], 41 | "documentation":"

Returns the list of voices that are available for use when requesting speech synthesis. Each voice speaks a specified language, is either male or female, and is identified by an ID, which is the ASCII version of the voice name.

When synthesizing speech ( SynthesizeSpeech ), you provide the voice ID for the voice you want from the list of voices returned by DescribeVoices.

For example, you want your news reader application to read news in a specific language, but giving a user the option to choose the voice. Using the DescribeVoices operation you can provide the user with a list of available voices to select from.

You can optionally specify a language code to filter the available voices. For example, if you specify en-US, the operation returns a list of all available US English voices.

This operation requires permissions to perform the polly:DescribeVoices action.

" 42 | }, 43 | "GetLexicon":{ 44 | "name":"GetLexicon", 45 | "http":{ 46 | "method":"GET", 47 | "requestUri":"/v1/lexicons/{LexiconName}", 48 | "responseCode":200 49 | }, 50 | "input":{"shape":"GetLexiconInput"}, 51 | "output":{"shape":"GetLexiconOutput"}, 52 | "errors":[ 53 | {"shape":"LexiconNotFoundException"}, 54 | {"shape":"ServiceFailureException"} 55 | ], 56 | "documentation":"

Returns the content of the specified pronunciation lexicon stored in an AWS Region. For more information, see Managing Lexicons.

" 57 | }, 58 | "GetSpeechSynthesisTask":{ 59 | "name":"GetSpeechSynthesisTask", 60 | "http":{ 61 | "method":"GET", 62 | "requestUri":"/v1/synthesisTasks/{TaskId}", 63 | "responseCode":200 64 | }, 65 | "input":{"shape":"GetSpeechSynthesisTaskInput"}, 66 | "output":{"shape":"GetSpeechSynthesisTaskOutput"}, 67 | "errors":[ 68 | {"shape":"InvalidTaskIdException"}, 69 | {"shape":"ServiceFailureException"}, 70 | {"shape":"SynthesisTaskNotFoundException"} 71 | ], 72 | "documentation":"

Retrieves a specific SpeechSynthesisTask object based on its TaskID. This object contains information about the given speech synthesis task, including the status of the task, and a link to the S3 bucket containing the output of the task.

" 73 | }, 74 | "ListLexicons":{ 75 | "name":"ListLexicons", 76 | "http":{ 77 | "method":"GET", 78 | "requestUri":"/v1/lexicons", 79 | "responseCode":200 80 | }, 81 | "input":{"shape":"ListLexiconsInput"}, 82 | "output":{"shape":"ListLexiconsOutput"}, 83 | "errors":[ 84 | {"shape":"InvalidNextTokenException"}, 85 | {"shape":"ServiceFailureException"} 86 | ], 87 | "documentation":"

Returns a list of pronunciation lexicons stored in an AWS Region. For more information, see Managing Lexicons.

" 88 | }, 89 | "ListSpeechSynthesisTasks":{ 90 | "name":"ListSpeechSynthesisTasks", 91 | "http":{ 92 | "method":"GET", 93 | "requestUri":"/v1/synthesisTasks", 94 | "responseCode":200 95 | }, 96 | "input":{"shape":"ListSpeechSynthesisTasksInput"}, 97 | "output":{"shape":"ListSpeechSynthesisTasksOutput"}, 98 | "errors":[ 99 | {"shape":"InvalidNextTokenException"}, 100 | {"shape":"ServiceFailureException"} 101 | ], 102 | "documentation":"

Returns a list of SpeechSynthesisTask objects ordered by their creation date. This operation can filter the tasks by their status, for example, allowing users to list only tasks that are completed.

" 103 | }, 104 | "PutLexicon":{ 105 | "name":"PutLexicon", 106 | "http":{ 107 | "method":"PUT", 108 | "requestUri":"/v1/lexicons/{LexiconName}", 109 | "responseCode":200 110 | }, 111 | "input":{"shape":"PutLexiconInput"}, 112 | "output":{"shape":"PutLexiconOutput"}, 113 | "errors":[ 114 | {"shape":"InvalidLexiconException"}, 115 | {"shape":"UnsupportedPlsAlphabetException"}, 116 | {"shape":"UnsupportedPlsLanguageException"}, 117 | {"shape":"LexiconSizeExceededException"}, 118 | {"shape":"MaxLexemeLengthExceededException"}, 119 | {"shape":"MaxLexiconsNumberExceededException"}, 120 | {"shape":"ServiceFailureException"} 121 | ], 122 | "documentation":"

Stores a pronunciation lexicon in an AWS Region. If a lexicon with the same name already exists in the region, it is overwritten by the new lexicon. Lexicon operations have eventual consistency, therefore, it might take some time before the lexicon is available to the SynthesizeSpeech operation.

For more information, see Managing Lexicons.

" 123 | }, 124 | "StartSpeechSynthesisTask":{ 125 | "name":"StartSpeechSynthesisTask", 126 | "http":{ 127 | "method":"POST", 128 | "requestUri":"/v1/synthesisTasks", 129 | "responseCode":200 130 | }, 131 | "input":{"shape":"StartSpeechSynthesisTaskInput"}, 132 | "output":{"shape":"StartSpeechSynthesisTaskOutput"}, 133 | "errors":[ 134 | {"shape":"TextLengthExceededException"}, 135 | {"shape":"InvalidS3BucketException"}, 136 | {"shape":"InvalidS3KeyException"}, 137 | {"shape":"InvalidSampleRateException"}, 138 | {"shape":"InvalidSnsTopicArnException"}, 139 | {"shape":"InvalidSsmlException"}, 140 | {"shape":"LexiconNotFoundException"}, 141 | {"shape":"ServiceFailureException"}, 142 | {"shape":"MarksNotSupportedForFormatException"}, 143 | {"shape":"SsmlMarksNotSupportedForTextTypeException"}, 144 | {"shape":"LanguageNotSupportedException"} 145 | ], 146 | "documentation":"

Allows the creation of an asynchronous synthesis task, by starting a new SpeechSynthesisTask. This operation requires all the standard information needed for speech synthesis, plus the name of an Amazon S3 bucket for the service to store the output of the synthesis task and two optional parameters (OutputS3KeyPrefix and SnsTopicArn). Once the synthesis task is created, this operation will return a SpeechSynthesisTask object, which will include an identifier of this task as well as the current status.

" 147 | }, 148 | "SynthesizeSpeech":{ 149 | "name":"SynthesizeSpeech", 150 | "http":{ 151 | "method":"POST", 152 | "requestUri":"/v1/speech", 153 | "responseCode":200 154 | }, 155 | "input":{"shape":"SynthesizeSpeechInput"}, 156 | "output":{"shape":"SynthesizeSpeechOutput"}, 157 | "errors":[ 158 | {"shape":"TextLengthExceededException"}, 159 | {"shape":"InvalidSampleRateException"}, 160 | {"shape":"InvalidSsmlException"}, 161 | {"shape":"LexiconNotFoundException"}, 162 | {"shape":"ServiceFailureException"}, 163 | {"shape":"MarksNotSupportedForFormatException"}, 164 | {"shape":"SsmlMarksNotSupportedForTextTypeException"}, 165 | {"shape":"LanguageNotSupportedException"} 166 | ], 167 | "documentation":"

Synthesizes UTF-8 input, plain text or SSML, to a stream of bytes. SSML input must be valid, well-formed SSML. Some alphabets might not be available with all the voices (for example, Cyrillic might not be read at all by English voices) unless phoneme mapping is used. For more information, see How it Works.

" 168 | } 169 | }, 170 | "shapes":{ 171 | "Alphabet":{"type":"string"}, 172 | "AudioStream":{ 173 | "type":"blob", 174 | "streaming":true 175 | }, 176 | "ContentType":{"type":"string"}, 177 | "DateTime":{"type":"timestamp"}, 178 | "DeleteLexiconInput":{ 179 | "type":"structure", 180 | "required":["Name"], 181 | "members":{ 182 | "Name":{ 183 | "shape":"LexiconName", 184 | "documentation":"

The name of the lexicon to delete. Must be an existing lexicon in the region.

", 185 | "location":"uri", 186 | "locationName":"LexiconName" 187 | } 188 | } 189 | }, 190 | "DeleteLexiconOutput":{ 191 | "type":"structure", 192 | "members":{ 193 | } 194 | }, 195 | "DescribeVoicesInput":{ 196 | "type":"structure", 197 | "members":{ 198 | "LanguageCode":{ 199 | "shape":"LanguageCode", 200 | "documentation":"

The language identification tag (ISO 639 code for the language name-ISO 3166 country code) for filtering the list of voices returned. If you don't specify this optional parameter, all available voices are returned.

", 201 | "location":"querystring", 202 | "locationName":"LanguageCode" 203 | }, 204 | "IncludeAdditionalLanguageCodes":{ 205 | "shape":"IncludeAdditionalLanguageCodes", 206 | "documentation":"

Boolean value indicating whether to return any bilingual voices that use the specified language as an additional language. For instance, if you request all languages that use US English (es-US), and there is an Italian voice that speaks both Italian (it-IT) and US English, that voice will be included if you specify yes but not if you specify no.

", 207 | "location":"querystring", 208 | "locationName":"IncludeAdditionalLanguageCodes" 209 | }, 210 | "NextToken":{ 211 | "shape":"NextToken", 212 | "documentation":"

An opaque pagination token returned from the previous DescribeVoices operation. If present, this indicates where to continue the listing.

", 213 | "location":"querystring", 214 | "locationName":"NextToken" 215 | } 216 | } 217 | }, 218 | "DescribeVoicesOutput":{ 219 | "type":"structure", 220 | "members":{ 221 | "Voices":{ 222 | "shape":"VoiceList", 223 | "documentation":"

A list of voices with their properties.

" 224 | }, 225 | "NextToken":{ 226 | "shape":"NextToken", 227 | "documentation":"

The pagination token to use in the next request to continue the listing of voices. NextToken is returned only if the response is truncated.

" 228 | } 229 | } 230 | }, 231 | "ErrorMessage":{"type":"string"}, 232 | "Gender":{ 233 | "type":"string", 234 | "enum":[ 235 | "Female", 236 | "Male" 237 | ] 238 | }, 239 | "GetLexiconInput":{ 240 | "type":"structure", 241 | "required":["Name"], 242 | "members":{ 243 | "Name":{ 244 | "shape":"LexiconName", 245 | "documentation":"

Name of the lexicon.

", 246 | "location":"uri", 247 | "locationName":"LexiconName" 248 | } 249 | } 250 | }, 251 | "GetLexiconOutput":{ 252 | "type":"structure", 253 | "members":{ 254 | "Lexicon":{ 255 | "shape":"Lexicon", 256 | "documentation":"

Lexicon object that provides name and the string content of the lexicon.

" 257 | }, 258 | "LexiconAttributes":{ 259 | "shape":"LexiconAttributes", 260 | "documentation":"

Metadata of the lexicon, including phonetic alphabetic used, language code, lexicon ARN, number of lexemes defined in the lexicon, and size of lexicon in bytes.

" 261 | } 262 | } 263 | }, 264 | "GetSpeechSynthesisTaskInput":{ 265 | "type":"structure", 266 | "required":["TaskId"], 267 | "members":{ 268 | "TaskId":{ 269 | "shape":"TaskId", 270 | "documentation":"

The Amazon Polly generated identifier for a speech synthesis task.

", 271 | "location":"uri", 272 | "locationName":"TaskId" 273 | } 274 | } 275 | }, 276 | "GetSpeechSynthesisTaskOutput":{ 277 | "type":"structure", 278 | "members":{ 279 | "SynthesisTask":{ 280 | "shape":"SynthesisTask", 281 | "documentation":"

SynthesisTask object that provides information from the requested task, including output format, creation time, task status, and so on.

" 282 | } 283 | } 284 | }, 285 | "IncludeAdditionalLanguageCodes":{"type":"boolean"}, 286 | "InvalidLexiconException":{ 287 | "type":"structure", 288 | "members":{ 289 | "message":{"shape":"ErrorMessage"} 290 | }, 291 | "documentation":"

Amazon Polly can't find the specified lexicon. Verify that the lexicon's name is spelled correctly, and then try again.

", 292 | "error":{"httpStatusCode":400}, 293 | "exception":true 294 | }, 295 | "InvalidNextTokenException":{ 296 | "type":"structure", 297 | "members":{ 298 | "message":{"shape":"ErrorMessage"} 299 | }, 300 | "documentation":"

The NextToken is invalid. Verify that it's spelled correctly, and then try again.

", 301 | "error":{"httpStatusCode":400}, 302 | "exception":true 303 | }, 304 | "InvalidS3BucketException":{ 305 | "type":"structure", 306 | "members":{ 307 | "message":{"shape":"ErrorMessage"} 308 | }, 309 | "documentation":"

The provided Amazon S3 bucket name is invalid. Please check your input with S3 bucket naming requirements and try again.

", 310 | "error":{"httpStatusCode":400}, 311 | "exception":true 312 | }, 313 | "InvalidS3KeyException":{ 314 | "type":"structure", 315 | "members":{ 316 | "message":{"shape":"ErrorMessage"} 317 | }, 318 | "documentation":"

The provided Amazon S3 key prefix is invalid. Please provide a valid S3 object key name.

", 319 | "error":{"httpStatusCode":400}, 320 | "exception":true 321 | }, 322 | "InvalidSampleRateException":{ 323 | "type":"structure", 324 | "members":{ 325 | "message":{"shape":"ErrorMessage"} 326 | }, 327 | "documentation":"

The specified sample rate is not valid.

", 328 | "error":{"httpStatusCode":400}, 329 | "exception":true 330 | }, 331 | "InvalidSnsTopicArnException":{ 332 | "type":"structure", 333 | "members":{ 334 | "message":{"shape":"ErrorMessage"} 335 | }, 336 | "documentation":"

The provided SNS topic ARN is invalid. Please provide a valid SNS topic ARN and try again.

", 337 | "error":{"httpStatusCode":400}, 338 | "exception":true 339 | }, 340 | "InvalidSsmlException":{ 341 | "type":"structure", 342 | "members":{ 343 | "message":{"shape":"ErrorMessage"} 344 | }, 345 | "documentation":"

The SSML you provided is invalid. Verify the SSML syntax, spelling of tags and values, and then try again.

", 346 | "error":{"httpStatusCode":400}, 347 | "exception":true 348 | }, 349 | "InvalidTaskIdException":{ 350 | "type":"structure", 351 | "members":{ 352 | "message":{"shape":"ErrorMessage"} 353 | }, 354 | "documentation":"

The provided Task ID is not valid. Please provide a valid Task ID and try again.

", 355 | "error":{"httpStatusCode":400}, 356 | "exception":true 357 | }, 358 | "LanguageCode":{ 359 | "type":"string", 360 | "enum":[ 361 | "cmn-CN", 362 | "cy-GB", 363 | "da-DK", 364 | "de-DE", 365 | "en-AU", 366 | "en-GB", 367 | "en-GB-WLS", 368 | "en-IN", 369 | "en-US", 370 | "es-ES", 371 | "es-US", 372 | "fr-CA", 373 | "fr-FR", 374 | "is-IS", 375 | "it-IT", 376 | "ja-JP", 377 | "hi-IN", 378 | "ko-KR", 379 | "nb-NO", 380 | "nl-NL", 381 | "pl-PL", 382 | "pt-BR", 383 | "pt-PT", 384 | "ro-RO", 385 | "ru-RU", 386 | "sv-SE", 387 | "tr-TR" 388 | ] 389 | }, 390 | "LanguageCodeList":{ 391 | "type":"list", 392 | "member":{"shape":"LanguageCode"} 393 | }, 394 | "LanguageName":{"type":"string"}, 395 | "LanguageNotSupportedException":{ 396 | "type":"structure", 397 | "members":{ 398 | "message":{"shape":"ErrorMessage"} 399 | }, 400 | "documentation":"

The language specified is not currently supported by Amazon Polly in this capacity.

", 401 | "error":{"httpStatusCode":400}, 402 | "exception":true 403 | }, 404 | "LastModified":{"type":"timestamp"}, 405 | "LexemesCount":{"type":"integer"}, 406 | "Lexicon":{ 407 | "type":"structure", 408 | "members":{ 409 | "Content":{ 410 | "shape":"LexiconContent", 411 | "documentation":"

Lexicon content in string format. The content of a lexicon must be in PLS format.

" 412 | }, 413 | "Name":{ 414 | "shape":"LexiconName", 415 | "documentation":"

Name of the lexicon.

" 416 | } 417 | }, 418 | "documentation":"

Provides lexicon name and lexicon content in string format. For more information, see Pronunciation Lexicon Specification (PLS) Version 1.0.

" 419 | }, 420 | "LexiconArn":{"type":"string"}, 421 | "LexiconAttributes":{ 422 | "type":"structure", 423 | "members":{ 424 | "Alphabet":{ 425 | "shape":"Alphabet", 426 | "documentation":"

Phonetic alphabet used in the lexicon. Valid values are ipa and x-sampa.

" 427 | }, 428 | "LanguageCode":{ 429 | "shape":"LanguageCode", 430 | "documentation":"

Language code that the lexicon applies to. A lexicon with a language code such as \"en\" would be applied to all English languages (en-GB, en-US, en-AUS, en-WLS, and so on.

" 431 | }, 432 | "LastModified":{ 433 | "shape":"LastModified", 434 | "documentation":"

Date lexicon was last modified (a timestamp value).

" 435 | }, 436 | "LexiconArn":{ 437 | "shape":"LexiconArn", 438 | "documentation":"

Amazon Resource Name (ARN) of the lexicon.

" 439 | }, 440 | "LexemesCount":{ 441 | "shape":"LexemesCount", 442 | "documentation":"

Number of lexemes in the lexicon.

" 443 | }, 444 | "Size":{ 445 | "shape":"Size", 446 | "documentation":"

Total size of the lexicon, in characters.

" 447 | } 448 | }, 449 | "documentation":"

Contains metadata describing the lexicon such as the number of lexemes, language code, and so on. For more information, see Managing Lexicons.

" 450 | }, 451 | "LexiconContent":{"type":"string"}, 452 | "LexiconDescription":{ 453 | "type":"structure", 454 | "members":{ 455 | "Name":{ 456 | "shape":"LexiconName", 457 | "documentation":"

Name of the lexicon.

" 458 | }, 459 | "Attributes":{ 460 | "shape":"LexiconAttributes", 461 | "documentation":"

Provides lexicon metadata.

" 462 | } 463 | }, 464 | "documentation":"

Describes the content of the lexicon.

" 465 | }, 466 | "LexiconDescriptionList":{ 467 | "type":"list", 468 | "member":{"shape":"LexiconDescription"} 469 | }, 470 | "LexiconName":{ 471 | "type":"string", 472 | "pattern":"[0-9A-Za-z]{1,20}", 473 | "sensitive":true 474 | }, 475 | "LexiconNameList":{ 476 | "type":"list", 477 | "member":{"shape":"LexiconName"}, 478 | "max":5 479 | }, 480 | "LexiconNotFoundException":{ 481 | "type":"structure", 482 | "members":{ 483 | "message":{"shape":"ErrorMessage"} 484 | }, 485 | "documentation":"

Amazon Polly can't find the specified lexicon. This could be caused by a lexicon that is missing, its name is misspelled or specifying a lexicon that is in a different region.

Verify that the lexicon exists, is in the region (see ListLexicons) and that you spelled its name is spelled correctly. Then try again.

", 486 | "error":{"httpStatusCode":404}, 487 | "exception":true 488 | }, 489 | "LexiconSizeExceededException":{ 490 | "type":"structure", 491 | "members":{ 492 | "message":{"shape":"ErrorMessage"} 493 | }, 494 | "documentation":"

The maximum size of the specified lexicon would be exceeded by this operation.

", 495 | "error":{"httpStatusCode":400}, 496 | "exception":true 497 | }, 498 | "ListLexiconsInput":{ 499 | "type":"structure", 500 | "members":{ 501 | "NextToken":{ 502 | "shape":"NextToken", 503 | "documentation":"

An opaque pagination token returned from previous ListLexicons operation. If present, indicates where to continue the list of lexicons.

", 504 | "location":"querystring", 505 | "locationName":"NextToken" 506 | } 507 | } 508 | }, 509 | "ListLexiconsOutput":{ 510 | "type":"structure", 511 | "members":{ 512 | "Lexicons":{ 513 | "shape":"LexiconDescriptionList", 514 | "documentation":"

A list of lexicon names and attributes.

" 515 | }, 516 | "NextToken":{ 517 | "shape":"NextToken", 518 | "documentation":"

The pagination token to use in the next request to continue the listing of lexicons. NextToken is returned only if the response is truncated.

" 519 | } 520 | } 521 | }, 522 | "ListSpeechSynthesisTasksInput":{ 523 | "type":"structure", 524 | "members":{ 525 | "MaxResults":{ 526 | "shape":"MaxResults", 527 | "documentation":"

Maximum number of speech synthesis tasks returned in a List operation.

", 528 | "location":"querystring", 529 | "locationName":"MaxResults" 530 | }, 531 | "NextToken":{ 532 | "shape":"NextToken", 533 | "documentation":"

The pagination token to use in the next request to continue the listing of speech synthesis tasks.

", 534 | "location":"querystring", 535 | "locationName":"NextToken" 536 | }, 537 | "Status":{ 538 | "shape":"TaskStatus", 539 | "documentation":"

Status of the speech synthesis tasks returned in a List operation

", 540 | "location":"querystring", 541 | "locationName":"Status" 542 | } 543 | } 544 | }, 545 | "ListSpeechSynthesisTasksOutput":{ 546 | "type":"structure", 547 | "members":{ 548 | "NextToken":{ 549 | "shape":"NextToken", 550 | "documentation":"

An opaque pagination token returned from the previous List operation in this request. If present, this indicates where to continue the listing.

" 551 | }, 552 | "SynthesisTasks":{ 553 | "shape":"SynthesisTasks", 554 | "documentation":"

List of SynthesisTask objects that provides information from the specified task in the list request, including output format, creation time, task status, and so on.

" 555 | } 556 | } 557 | }, 558 | "MarksNotSupportedForFormatException":{ 559 | "type":"structure", 560 | "members":{ 561 | "message":{"shape":"ErrorMessage"} 562 | }, 563 | "documentation":"

Speech marks are not supported for the OutputFormat selected. Speech marks are only available for content in json format.

", 564 | "error":{"httpStatusCode":400}, 565 | "exception":true 566 | }, 567 | "MaxLexemeLengthExceededException":{ 568 | "type":"structure", 569 | "members":{ 570 | "message":{"shape":"ErrorMessage"} 571 | }, 572 | "documentation":"

The maximum size of the lexeme would be exceeded by this operation.

", 573 | "error":{"httpStatusCode":400}, 574 | "exception":true 575 | }, 576 | "MaxLexiconsNumberExceededException":{ 577 | "type":"structure", 578 | "members":{ 579 | "message":{"shape":"ErrorMessage"} 580 | }, 581 | "documentation":"

The maximum number of lexicons would be exceeded by this operation.

", 582 | "error":{"httpStatusCode":400}, 583 | "exception":true 584 | }, 585 | "MaxResults":{ 586 | "type":"integer", 587 | "max":100, 588 | "min":1 589 | }, 590 | "NextToken":{"type":"string"}, 591 | "OutputFormat":{ 592 | "type":"string", 593 | "enum":[ 594 | "json", 595 | "mp3", 596 | "ogg_vorbis", 597 | "pcm" 598 | ] 599 | }, 600 | "OutputS3BucketName":{ 601 | "type":"string", 602 | "pattern":"^[a-z0-9][\\.\\-a-z0-9]{1,61}[a-z0-9]$" 603 | }, 604 | "OutputS3KeyPrefix":{ 605 | "type":"string", 606 | "pattern":"^[0-9a-zA-Z\\/\\!\\-_\\.\\*\\'\\(\\)]{0,800}$" 607 | }, 608 | "OutputUri":{"type":"string"}, 609 | "PutLexiconInput":{ 610 | "type":"structure", 611 | "required":[ 612 | "Name", 613 | "Content" 614 | ], 615 | "members":{ 616 | "Name":{ 617 | "shape":"LexiconName", 618 | "documentation":"

Name of the lexicon. The name must follow the regular express format [0-9A-Za-z]{1,20}. That is, the name is a case-sensitive alphanumeric string up to 20 characters long.

", 619 | "location":"uri", 620 | "locationName":"LexiconName" 621 | }, 622 | "Content":{ 623 | "shape":"LexiconContent", 624 | "documentation":"

Content of the PLS lexicon as string data.

" 625 | } 626 | } 627 | }, 628 | "PutLexiconOutput":{ 629 | "type":"structure", 630 | "members":{ 631 | } 632 | }, 633 | "RequestCharacters":{"type":"integer"}, 634 | "SampleRate":{"type":"string"}, 635 | "ServiceFailureException":{ 636 | "type":"structure", 637 | "members":{ 638 | "message":{"shape":"ErrorMessage"} 639 | }, 640 | "documentation":"

An unknown condition has caused a service failure.

", 641 | "error":{"httpStatusCode":500}, 642 | "exception":true, 643 | "fault":true 644 | }, 645 | "Size":{"type":"integer"}, 646 | "SnsTopicArn":{ 647 | "type":"string", 648 | "pattern":"^arn:aws(-(cn|iso(-b)?|us-gov))?:sns:.*:\\w{12}:.+$" 649 | }, 650 | "SpeechMarkType":{ 651 | "type":"string", 652 | "enum":[ 653 | "sentence", 654 | "ssml", 655 | "viseme", 656 | "word" 657 | ] 658 | }, 659 | "SpeechMarkTypeList":{ 660 | "type":"list", 661 | "member":{"shape":"SpeechMarkType"}, 662 | "max":4 663 | }, 664 | "SsmlMarksNotSupportedForTextTypeException":{ 665 | "type":"structure", 666 | "members":{ 667 | "message":{"shape":"ErrorMessage"} 668 | }, 669 | "documentation":"

SSML speech marks are not supported for plain text-type input.

", 670 | "error":{"httpStatusCode":400}, 671 | "exception":true 672 | }, 673 | "StartSpeechSynthesisTaskInput":{ 674 | "type":"structure", 675 | "required":[ 676 | "OutputFormat", 677 | "OutputS3BucketName", 678 | "Text", 679 | "VoiceId" 680 | ], 681 | "members":{ 682 | "LexiconNames":{ 683 | "shape":"LexiconNameList", 684 | "documentation":"

List of one or more pronunciation lexicon names you want the service to apply during synthesis. Lexicons are applied only if the language of the lexicon is the same as the language of the voice.

" 685 | }, 686 | "OutputFormat":{ 687 | "shape":"OutputFormat", 688 | "documentation":"

The format in which the returned output will be encoded. For audio stream, this will be mp3, ogg_vorbis, or pcm. For speech marks, this will be json.

" 689 | }, 690 | "OutputS3BucketName":{ 691 | "shape":"OutputS3BucketName", 692 | "documentation":"

Amazon S3 bucket name to which the output file will be saved.

" 693 | }, 694 | "OutputS3KeyPrefix":{ 695 | "shape":"OutputS3KeyPrefix", 696 | "documentation":"

The Amazon S3 key prefix for the output speech file.

" 697 | }, 698 | "SampleRate":{ 699 | "shape":"SampleRate", 700 | "documentation":"

The audio frequency specified in Hz.

The valid values for mp3 and ogg_vorbis are \"8000\", \"16000\", and \"22050\". The default value is \"22050\".

Valid values for pcm are \"8000\" and \"16000\" The default value is \"16000\".

" 701 | }, 702 | "SnsTopicArn":{ 703 | "shape":"SnsTopicArn", 704 | "documentation":"

ARN for the SNS topic optionally used for providing status notification for a speech synthesis task.

" 705 | }, 706 | "SpeechMarkTypes":{ 707 | "shape":"SpeechMarkTypeList", 708 | "documentation":"

The type of speech marks returned for the input text.

" 709 | }, 710 | "Text":{ 711 | "shape":"Text", 712 | "documentation":"

The input text to synthesize. If you specify ssml as the TextType, follow the SSML format for the input text.

" 713 | }, 714 | "TextType":{ 715 | "shape":"TextType", 716 | "documentation":"

Specifies whether the input text is plain text or SSML. The default value is plain text.

" 717 | }, 718 | "VoiceId":{ 719 | "shape":"VoiceId", 720 | "documentation":"

Voice ID to use for the synthesis.

" 721 | }, 722 | "LanguageCode":{ 723 | "shape":"LanguageCode", 724 | "documentation":"

Optional language code for the Speech Synthesis request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).

If a bilingual voice is used and no language code is specified, Amazon Polly will use the default language of the bilingual voice. The default language for any voice is the one returned by the DescribeVoices operation for the LanguageCode parameter. For example, if no language code is specified, Aditi will use Indian English rather than Hindi.

" 725 | } 726 | } 727 | }, 728 | "StartSpeechSynthesisTaskOutput":{ 729 | "type":"structure", 730 | "members":{ 731 | "SynthesisTask":{ 732 | "shape":"SynthesisTask", 733 | "documentation":"

SynthesisTask object that provides information and attributes about a newly submitted speech synthesis task.

" 734 | } 735 | } 736 | }, 737 | "SynthesisTask":{ 738 | "type":"structure", 739 | "members":{ 740 | "TaskId":{ 741 | "shape":"TaskId", 742 | "documentation":"

The Amazon Polly generated identifier for a speech synthesis task.

" 743 | }, 744 | "TaskStatus":{ 745 | "shape":"TaskStatus", 746 | "documentation":"

Current status of the individual speech synthesis task.

" 747 | }, 748 | "TaskStatusReason":{ 749 | "shape":"TaskStatusReason", 750 | "documentation":"

Reason for the current status of a specific speech synthesis task, including errors if the task has failed.

" 751 | }, 752 | "OutputUri":{ 753 | "shape":"OutputUri", 754 | "documentation":"

Pathway for the output speech file.

" 755 | }, 756 | "CreationTime":{ 757 | "shape":"DateTime", 758 | "documentation":"

Timestamp for the time the synthesis task was started.

" 759 | }, 760 | "RequestCharacters":{ 761 | "shape":"RequestCharacters", 762 | "documentation":"

Number of billable characters synthesized.

" 763 | }, 764 | "SnsTopicArn":{ 765 | "shape":"SnsTopicArn", 766 | "documentation":"

ARN for the SNS topic optionally used for providing status notification for a speech synthesis task.

" 767 | }, 768 | "LexiconNames":{ 769 | "shape":"LexiconNameList", 770 | "documentation":"

List of one or more pronunciation lexicon names you want the service to apply during synthesis. Lexicons are applied only if the language of the lexicon is the same as the language of the voice.

" 771 | }, 772 | "OutputFormat":{ 773 | "shape":"OutputFormat", 774 | "documentation":"

The format in which the returned output will be encoded. For audio stream, this will be mp3, ogg_vorbis, or pcm. For speech marks, this will be json.

" 775 | }, 776 | "SampleRate":{ 777 | "shape":"SampleRate", 778 | "documentation":"

The audio frequency specified in Hz.

The valid values for mp3 and ogg_vorbis are \"8000\", \"16000\", and \"22050\". The default value is \"22050\".

Valid values for pcm are \"8000\" and \"16000\" The default value is \"16000\".

" 779 | }, 780 | "SpeechMarkTypes":{ 781 | "shape":"SpeechMarkTypeList", 782 | "documentation":"

The type of speech marks returned for the input text.

" 783 | }, 784 | "TextType":{ 785 | "shape":"TextType", 786 | "documentation":"

Specifies whether the input text is plain text or SSML. The default value is plain text.

" 787 | }, 788 | "VoiceId":{ 789 | "shape":"VoiceId", 790 | "documentation":"

Voice ID to use for the synthesis.

" 791 | }, 792 | "LanguageCode":{ 793 | "shape":"LanguageCode", 794 | "documentation":"

Optional language code for a synthesis task. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).

If a bilingual voice is used and no language code is specified, Amazon Polly will use the default language of the bilingual voice. The default language for any voice is the one returned by the DescribeVoices operation for the LanguageCode parameter. For example, if no language code is specified, Aditi will use Indian English rather than Hindi.

" 795 | } 796 | }, 797 | "documentation":"

SynthesisTask object that provides information about a speech synthesis task.

" 798 | }, 799 | "SynthesisTaskNotFoundException":{ 800 | "type":"structure", 801 | "members":{ 802 | "message":{"shape":"ErrorMessage"} 803 | }, 804 | "documentation":"

The Speech Synthesis task with requested Task ID cannot be found.

", 805 | "error":{"httpStatusCode":400}, 806 | "exception":true 807 | }, 808 | "SynthesisTasks":{ 809 | "type":"list", 810 | "member":{"shape":"SynthesisTask"} 811 | }, 812 | "SynthesizeSpeechInput":{ 813 | "type":"structure", 814 | "required":[ 815 | "OutputFormat", 816 | "Text", 817 | "VoiceId" 818 | ], 819 | "members":{ 820 | "LexiconNames":{ 821 | "shape":"LexiconNameList", 822 | "documentation":"

List of one or more pronunciation lexicon names you want the service to apply during synthesis. Lexicons are applied only if the language of the lexicon is the same as the language of the voice. For information about storing lexicons, see PutLexicon.

" 823 | }, 824 | "OutputFormat":{ 825 | "shape":"OutputFormat", 826 | "documentation":"

The format in which the returned output will be encoded. For audio stream, this will be mp3, ogg_vorbis, or pcm. For speech marks, this will be json.

When pcm is used, the content returned is audio/pcm in a signed 16-bit, 1 channel (mono), little-endian format.

" 827 | }, 828 | "SampleRate":{ 829 | "shape":"SampleRate", 830 | "documentation":"

The audio frequency specified in Hz.

The valid values for mp3 and ogg_vorbis are \"8000\", \"16000\", and \"22050\". The default value is \"22050\".

Valid values for pcm are \"8000\" and \"16000\" The default value is \"16000\".

" 831 | }, 832 | "SpeechMarkTypes":{ 833 | "shape":"SpeechMarkTypeList", 834 | "documentation":"

The type of speech marks returned for the input text.

" 835 | }, 836 | "Text":{ 837 | "shape":"Text", 838 | "documentation":"

Input text to synthesize. If you specify ssml as the TextType, follow the SSML format for the input text.

" 839 | }, 840 | "TextType":{ 841 | "shape":"TextType", 842 | "documentation":"

Specifies whether the input text is plain text or SSML. The default value is plain text. For more information, see Using SSML.

" 843 | }, 844 | "VoiceId":{ 845 | "shape":"VoiceId", 846 | "documentation":"

Voice ID to use for the synthesis. You can get a list of available voice IDs by calling the DescribeVoices operation.

" 847 | }, 848 | "LanguageCode":{ 849 | "shape":"LanguageCode", 850 | "documentation":"

Optional language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).

If a bilingual voice is used and no language code is specified, Amazon Polly will use the default language of the bilingual voice. The default language for any voice is the one returned by the DescribeVoices operation for the LanguageCode parameter. For example, if no language code is specified, Aditi will use Indian English rather than Hindi.

" 851 | } 852 | } 853 | }, 854 | "SynthesizeSpeechOutput":{ 855 | "type":"structure", 856 | "members":{ 857 | "AudioStream":{ 858 | "shape":"AudioStream", 859 | "documentation":"

Stream containing the synthesized speech.

" 860 | }, 861 | "ContentType":{ 862 | "shape":"ContentType", 863 | "documentation":"

Specifies the type audio stream. This should reflect the OutputFormat parameter in your request.

", 864 | "location":"header", 865 | "locationName":"Content-Type" 866 | }, 867 | "RequestCharacters":{ 868 | "shape":"RequestCharacters", 869 | "documentation":"

Number of characters synthesized.

", 870 | "location":"header", 871 | "locationName":"x-amzn-RequestCharacters" 872 | } 873 | }, 874 | "payload":"AudioStream" 875 | }, 876 | "TaskId":{ 877 | "type":"string", 878 | "max":128, 879 | "min":1 880 | }, 881 | "TaskStatus":{ 882 | "type":"string", 883 | "enum":[ 884 | "scheduled", 885 | "inProgress", 886 | "completed", 887 | "failed" 888 | ] 889 | }, 890 | "TaskStatusReason":{"type":"string"}, 891 | "Text":{"type":"string"}, 892 | "TextLengthExceededException":{ 893 | "type":"structure", 894 | "members":{ 895 | "message":{"shape":"ErrorMessage"} 896 | }, 897 | "documentation":"

The value of the \"Text\" parameter is longer than the accepted limits. For the SynthesizeSpeech API, the limit for input text is a maximum of 6000 characters total, of which no more than 3000 can be billed characters. For the StartSpeechSynthesisTask API, the maximum is 200,000 characters, of which no more than 100,000 can be billed characters. SSML tags are not counted as billed characters.

", 898 | "error":{"httpStatusCode":400}, 899 | "exception":true 900 | }, 901 | "TextType":{ 902 | "type":"string", 903 | "enum":[ 904 | "ssml", 905 | "text" 906 | ] 907 | }, 908 | "UnsupportedPlsAlphabetException":{ 909 | "type":"structure", 910 | "members":{ 911 | "message":{"shape":"ErrorMessage"} 912 | }, 913 | "documentation":"

The alphabet specified by the lexicon is not a supported alphabet. Valid values are x-sampa and ipa.

", 914 | "error":{"httpStatusCode":400}, 915 | "exception":true 916 | }, 917 | "UnsupportedPlsLanguageException":{ 918 | "type":"structure", 919 | "members":{ 920 | "message":{"shape":"ErrorMessage"} 921 | }, 922 | "documentation":"

The language specified in the lexicon is unsupported. For a list of supported languages, see Lexicon Attributes.

", 923 | "error":{"httpStatusCode":400}, 924 | "exception":true 925 | }, 926 | "Voice":{ 927 | "type":"structure", 928 | "members":{ 929 | "Gender":{ 930 | "shape":"Gender", 931 | "documentation":"

Gender of the voice.

" 932 | }, 933 | "Id":{ 934 | "shape":"VoiceId", 935 | "documentation":"

Amazon Polly assigned voice ID. This is the ID that you specify when calling the SynthesizeSpeech operation.

" 936 | }, 937 | "LanguageCode":{ 938 | "shape":"LanguageCode", 939 | "documentation":"

Language code of the voice.

" 940 | }, 941 | "LanguageName":{ 942 | "shape":"LanguageName", 943 | "documentation":"

Human readable name of the language in English.

" 944 | }, 945 | "Name":{ 946 | "shape":"VoiceName", 947 | "documentation":"

Name of the voice (for example, Salli, Kendra, etc.). This provides a human readable voice name that you might display in your application.

" 948 | }, 949 | "AdditionalLanguageCodes":{ 950 | "shape":"LanguageCodeList", 951 | "documentation":"

Additional codes for languages available for the specified voice in addition to its default language.

For example, the default language for Aditi is Indian English (en-IN) because it was first used for that language. Since Aditi is bilingual and fluent in both Indian English and Hindi, this parameter would show the code hi-IN.

" 952 | } 953 | }, 954 | "documentation":"

Description of the voice.

" 955 | }, 956 | "VoiceId":{ 957 | "type":"string", 958 | "enum":[ 959 | "Geraint", 960 | "Gwyneth", 961 | "Mads", 962 | "Naja", 963 | "Hans", 964 | "Marlene", 965 | "Nicole", 966 | "Russell", 967 | "Amy", 968 | "Brian", 969 | "Emma", 970 | "Raveena", 971 | "Ivy", 972 | "Joanna", 973 | "Joey", 974 | "Justin", 975 | "Kendra", 976 | "Kimberly", 977 | "Matthew", 978 | "Salli", 979 | "Conchita", 980 | "Enrique", 981 | "Miguel", 982 | "Penelope", 983 | "Chantal", 984 | "Celine", 985 | "Lea", 986 | "Mathieu", 987 | "Dora", 988 | "Karl", 989 | "Carla", 990 | "Giorgio", 991 | "Mizuki", 992 | "Liv", 993 | "Lotte", 994 | "Ruben", 995 | "Ewa", 996 | "Jacek", 997 | "Jan", 998 | "Maja", 999 | "Ricardo", 1000 | "Vitoria", 1001 | "Cristiano", 1002 | "Ines", 1003 | "Carmen", 1004 | "Maxim", 1005 | "Tatyana", 1006 | "Astrid", 1007 | "Filiz", 1008 | "Vicki", 1009 | "Takumi", 1010 | "Seoyeon", 1011 | "Aditi", 1012 | "Zhiyu" 1013 | ] 1014 | }, 1015 | "VoiceList":{ 1016 | "type":"list", 1017 | "member":{"shape":"Voice"} 1018 | }, 1019 | "VoiceName":{"type":"string"} 1020 | }, 1021 | "documentation":"

Amazon Polly is a web service that makes it easy to synthesize speech from text.

The Amazon Polly service provides API operations for synthesizing high-quality speech from plain text and Speech Synthesis Markup Language (SSML), along with managing pronunciations lexicons that enable you to get the best results for your application domain.

" 1022 | } 1023 | --------------------------------------------------------------------------------