├── gtts
    ├── tests
    │   ├── __init__.py
    │   ├── input_files
    │   │   ├── test_cli_test_utf8.txt
    │   │   └── test_cli_test_ascii.txt
    │   ├── test_lang.py
    │   ├── test_utils.py
    │   ├── test_tts.py
    │   └── test_cli.py
    ├── version.py
    ├── tokenizer
    │   ├── __init__.py
    │   ├── symbols.py
    │   ├── tests
    │   │   ├── test_pre_processors.py
    │   │   ├── test_tokenizer_cases.py
    │   │   └── test_core.py
    │   ├── pre_processors.py
    │   ├── tokenizer_cases.py
    │   └── core.py
    ├── __init__.py
    ├── langs.py
    ├── lang.py
    ├── utils.py
    ├── cli.py
    └── tts.py
├── news
    └── .gitignore
├── docs
    ├── changelog.rst
    ├── contributing.rst
    ├── license.rst
    ├── Makefile
    ├── index.rst
    ├── cli.rst
    ├── conf.py
    ├── module.rst
    └── tokenizer.rst
├── .mypy.ini
├── MANIFEST.in
├── pytest.ini
├── .readthedocs.yml
├── setup.py
├── pyproject.toml
├── .github
    ├── workflows
    │   ├── autolock.yml
    │   ├── test.yml
    │   └── publish.yml
    └── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── feature.md
    │   └── bug.md
├── .gitignore
├── LICENSE
├── CONTRIBUTING.rst
├── setup.cfg
├── README.md
├── scripts
    └── gen_langs.py
└── CHANGELOG.rst


/gtts/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/news/.gitignore:
--------------------------------------------------------------------------------
1 | !.gitignore
2 | 


--------------------------------------------------------------------------------
/gtts/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '2.2.3'
2 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CHANGELOG.rst
2 | 


--------------------------------------------------------------------------------
/.mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True
3 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
1 | License
2 | =======
3 | 
4 | .. include:: ../LICENSE
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include CHANGELOG.rst
3 | include CONTRIBUTING.rst
4 | include LICENSE
5 | include pytest.ini
6 | 


--------------------------------------------------------------------------------
/gtts/tests/input_files/test_cli_test_utf8.txt:
--------------------------------------------------------------------------------
1 | 这是一个三岁的小孩
2 | 在讲述她从一系列照片里看到的东西。
3 | 对这个世界， 她也许还有很多要学的东西，
4 | 但在一个重要的任务上， 她已经是专家了：
5 | 去理解她所看到的东西。
6 | 


--------------------------------------------------------------------------------
/gtts/tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*
2 | from .core import RegexBuilder, PreProcessorRegex, PreProcessorSub, Tokenizer  # noqa: F401
3 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | maxversion = 4.6.11
3 | 
4 | markers =
5 |     net: marks tests that call use the net (using the URL endpoint, deselect with '-m "not net"')
6 | 


--------------------------------------------------------------------------------
/gtts/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from .version import __version__  # noqa: F401
3 | from .tts import gTTS, gTTSError
4 | 
5 | __all__ = ['gTTS', 'gTTSError']
6 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # Build PDF as extra
 2 | formats:
 3 |     - pdf
 4 | 
 5 | python:
 6 |     version: 3.7
 7 |     pip_install: true
 8 |     extra_requirements:
 9 |         - docs
10 | 


--------------------------------------------------------------------------------
/gtts/tests/input_files/test_cli_test_ascii.txt:
--------------------------------------------------------------------------------
1 | Can you make pink a little more pinkish can you make pink a little more pinkish, nor can you make the font bigger?
2 | How much will it cost the website doesn't have the theme i was going for.


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from codecs import open
 3 | import site
 4 | 
 5 | # PEP517
 6 | site.ENABLE_USER_SITE = True
 7 | 
 8 | exec(open('gtts/version.py').read())
 9 | 
10 | setup(
11 |     version=__version__,   # type: ignore # noqa: F821
12 |     test_suite='gtts.tests',
13 | )
14 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools", "wheel"]
 3 | 
 4 | [tool.towncrier]
 5 | package = "gtts"
 6 | filename = "CHANGELOG.rst"
 7 | directory = "news/"
 8 | underlines = ["-", "~", "_"]
 9 | title_format = "{version} ({project_date})"
10 | issue_format = "`#{issue} <https://github.com/pndurette/gTTS/issues/{issue}>`_"
11 | 


--------------------------------------------------------------------------------
/gtts/tokenizer/symbols.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | ABBREVIATIONS = [
 4 |     'dr', 'jr', 'mr',
 5 |     'mrs', 'ms', 'msgr',
 6 |     'prof', 'sr', 'st']
 7 | 
 8 | SUB_PAIRS = [
 9 |     ('Esq.', 'Esquire')
10 | ]
11 | 
12 | ALL_PUNC = u"?!？！.,¡()[]¿…‥،;:—。，、：\n"
13 | 
14 | TONE_MARKS = u"?!？！"
15 | 
16 | PERIOD_COMMA = u".,"
17 | 
18 | COLON = u":"
19 | 


--------------------------------------------------------------------------------
/.github/workflows/autolock.yml:
--------------------------------------------------------------------------------
 1 | name: 'Lock Inactive Issues'
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '0 * * * *'
 6 | 
 7 | jobs:
 8 |   lock:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: dessant/lock-threads@v2
12 |         with:
13 |           github-token: ${{ github.token }}
14 |           issue-lock-inactive-days: '15'
15 |           process-only: 'issues'


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: false
 2 | contact_links:
 3 |   - name: Bug Report
 4 |     url: https://github.com/pndurette/gTTS/issues/new?template=bug.md
 5 |     about: Create a report to help us improve gTTS!
 6 |   - name: Feature Request
 7 |     url: https://github.com/pndurette/gTTS/issues/new?template=feature.md
 8 |     about: Suggest a new feature for gTTS!
 9 |   - name: Questions and discussions!
10 |     url: https://github.com/pndurette/gTTS/discussions
11 |     about: "Place to ask questions and get help about gTTS!!"
12 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature.md:
--------------------------------------------------------------------------------
 1 | ## Prerequisites
 2 | * [ ] Did you make sure a similar [issue](../) didn't exist?
 3 | * [ ] Did you update gTTS to the latest? (`pip install --upgrade gTTS`)
 4 | 
 5 | ## Proposed Behaviour
 6 | <!--- Tell us what should happen -->
 7 | <!--- Please use code blocks when pasting code or stack traces  -->
 8 | ```
 9 | code
10 | ```
11 | 
12 | ## Context
13 | <!--- How has this issue affected you? What are you trying to accomplish? -->
14 | <!--- Providing context helps us come up with a solution that is most useful in the real world -->
15 | <!--- Add anything useful here! -->
16 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = gTTS
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/gtts/tests/test_lang.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pytest
 3 | from gtts.lang import tts_langs, _extra_langs, _fallback_deprecated_lang
 4 | from gtts.langs import _main_langs
 5 | 
 6 | """Test language list"""
 7 | 
 8 | 
 9 | def test_main_langs():
10 |     """Fetch languages successfully"""
11 |     # Safe to assume 'en' (English) will always be there
12 |     scraped_langs = _main_langs()
13 |     assert 'en' in scraped_langs
14 | 
15 | 
16 | def test_deprecated_lang():
17 |     """Test language deprecation fallback"""
18 |     with pytest.deprecated_call():
19 |         assert _fallback_deprecated_lang('en-gb') == 'en'
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     pytest.main(['-x', __file__])
24 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | gTTS
 2 | =====
 3 | 
 4 | :class:`gTTS` (*Google Text-to-Speech*), a Python library and CLI tool to interface with Google Translate's text-to-speech API. Writes spoken ``mp3`` data to a file, a file-like object (bytestring) for further audio manipulation, or ``stdout``. It features flexible pre-processing and tokenizing.
 5 | 
 6 | Installation
 7 | ------------
 8 | 
 9 | .. code-block:: bash
10 | 
11 |    pip install gTTS
12 | 
13 | .. toctree::
14 |    :maxdepth: 3
15 |    :caption: Documentation
16 | 
17 |    cli
18 |    module
19 |    tokenizer
20 | 
21 | .. toctree::
22 |    :maxdepth: 2
23 |    :caption: Project
24 | 
25 |    license
26 |    contributing
27 |    changelog
28 | 
29 | Misc
30 | ----
31 | 
32 | * :ref:`genindex`
33 | * :ref:`modindex`
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .eggs/
 2 | .mypy_cache/
 3 | pip-wheel-metadata/
 4 | 
 5 | # Byte-compiled / optimized / DLL files
 6 | __pycache__/
 7 | *.py[cod]
 8 | 
 9 | # C extensions
10 | *.so
11 | 
12 | # Distribution / packaging
13 | .Python
14 | env*/
15 | build/
16 | develop-eggs/
17 | dist/
18 | eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | 
28 | # Installer logs
29 | pip-log.txt
30 | pip-delete-this-directory.txt
31 | 
32 | # Unit test / coverage reports
33 | .pytest_cache/
34 | htmlcov/
35 | .tox/
36 | .coverage
37 | .cache
38 | nosetests.xml
39 | coverage.xml
40 | 
41 | # Translations
42 | *.mo
43 | 
44 | # Mr Developer
45 | .mr.developer.cfg
46 | .project
47 | .pydevproject
48 | 
49 | # Rope
50 | .ropeproject
51 | 
52 | # Django stuff:
53 | *.log
54 | *.pot
55 | 
56 | # Sphinx documentation
57 | docs/_build/
58 | 
59 | # Other
60 | .vscode/
61 | .DS_Store
62 | 


--------------------------------------------------------------------------------
/gtts/tokenizer/tests/test_pre_processors.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest
 3 | from gtts.tokenizer.pre_processors import tone_marks, end_of_line, abbreviations, word_sub
 4 | 
 5 | 
 6 | class TestPreProcessors(unittest.TestCase):
 7 |     def test_tone_marks(self):
 8 |         _in = "lorem!ipsum?"
 9 |         _out = "lorem! ipsum? "
10 |         self.assertEqual(tone_marks(_in), _out)
11 | 
12 |     def test_end_of_line(self):
13 |         _in = """test-
14 | ing"""
15 |         _out = "testing"
16 |         self.assertEqual(end_of_line(_in), _out)
17 | 
18 |     def test_abbreviations(self):
19 |         _in = "jr. sr. dr."
20 |         _out = "jr sr dr"
21 |         self.assertEqual(abbreviations(_in), _out)
22 | 
23 |     def test_word_sub(self):
24 |         _in = "Esq. Bacon"
25 |         _out = "Esquire Bacon"
26 |         self.assertEqual(word_sub(_in), _out)
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     unittest.main()
31 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.md:
--------------------------------------------------------------------------------
 1 | ## Prerequisites
 2 | * [ ] Did you make sure a similar [issue](../) didn't exist?
 3 | * [ ] Did you update gTTS to the latest? (`pip install --upgrade gTTS`)
 4 | 
 5 | ## Current Behaviour (steps to reproduce)
 6 | <!--- Tell us what happens instead of the expected behavior -->
 7 | <!--- Please use code blocks when pasting code or stack traces  -->
 8 | ```
 9 | code
10 | ```
11 | 
12 | ## Expected Behaviour
13 | <!--- Tell us what should happen -->
14 | <!--- Please use code blocks when pasting code or stack traces  -->
15 | ```
16 | code
17 | ```
18 | 
19 | ## Context
20 | <!--- How has this issue affected you? What are you trying to accomplish? -->
21 | <!--- Providing context helps us come up with a solution that is most useful in the real world -->
22 | <!--- Add anything useful here! -->
23 | 
24 | ## Environment
25 | <!--- Include as many relevant details about the environment you experienced the bug in -->
26 | * gTTS version:
27 | * Operating System version:


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | on:
 3 |   push:
 4 |     tags-ignore:
 5 |       - 'v*'
 6 |   pull_request:
 7 |     branches:
 8 |       - master
 9 | 
10 | jobs:
11 |   test:
12 |     name: Unit
13 | 
14 |     runs-on: ${{ matrix.os }}
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: ['2.x', 3.6, 3.7, 3.8, 3.9]
19 |         os: [ubuntu-latest, macOS-latest, windows-latest]
20 | 
21 |     steps:
22 |       - uses: actions/checkout@v2
23 | 
24 |       - uses: actions/setup-python@v2
25 |         with:
26 |           python-version: ${{ matrix.python-version }}
27 | 
28 |       - name: Install
29 |         run: |
30 |           python -m pip install --upgrade pip
31 |           pip install -e .[tests]
32 | 
33 |       - name: Unit Tests
34 |         run: pytest -v -s gtts/ --cov=gtts --cov-config=setup.cfg --cov-report=xml
35 |         env:
36 |           TEST_LANGS: all
37 | 
38 |       - name: Upload Coverage Report
39 |         uses: codecov/codecov-action@v1.0.14
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright © 2014-2021 Pierre Nicolas Durette
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - 'v*'
 6 | 
 7 | jobs:
 8 |   package:
 9 |     name: Package
10 | 
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         python-version: ['3.x']
15 | 
16 |     steps:
17 |       - uses: actions/checkout@v2
18 | 
19 |       - uses: actions/setup-python@v2
20 |         with:
21 |           python-version: ${{ matrix.python-version }}
22 | 
23 |       - name: Install
24 |         run: |
25 |           python -m pip install --upgrade pip
26 |           pip install -e .[tests]
27 | 
28 |       - name: Unit Tests
29 |         run: pytest -v -s gtts/
30 |         env:
31 |           TEST_LANGS: all
32 | 
33 |       - name: Install Tools
34 |         run: pip install --upgrade setuptools wheel twine
35 | 
36 |       - name: Package (wheel)
37 |         run: python setup.py bdist_wheel
38 | 
39 |       - name: Package (sdist)
40 |         if: matrix.python-version == '3.x'
41 |         run: python setup.py sdist
42 | 
43 |       - name: Publish
44 |         env:
45 |           TWINE_USERNAME: __token__
46 |           TWINE_PASSWORD: ${{ secrets.pypi_token }}
47 |         run: twine upload dist/*
48 | 


--------------------------------------------------------------------------------
/gtts/langs.py:
--------------------------------------------------------------------------------
 1 | # Note: this file is generated
 2 | _langs = {
 3 |     "af": "Afrikaans",
 4 |     "ar": "Arabic",
 5 |     "bg": "Bulgarian",
 6 |     "bn": "Bengali",
 7 |     "bs": "Bosnian",
 8 |     "ca": "Catalan",
 9 |     "cs": "Czech",
10 |     "cy": "Welsh",
11 |     "da": "Danish",
12 |     "de": "German",
13 |     "el": "Greek",
14 |     "en": "English",
15 |     "eo": "Esperanto",
16 |     "es": "Spanish",
17 |     "et": "Estonian",
18 |     "fi": "Finnish",
19 |     "fr": "French",
20 |     "gu": "Gujarati",
21 |     "hi": "Hindi",
22 |     "hr": "Croatian",
23 |     "hu": "Hungarian",
24 |     "hy": "Armenian",
25 |     "id": "Indonesian",
26 |     "is": "Icelandic",
27 |     "it": "Italian",
28 |     "ja": "Japanese",
29 |     "jw": "Javanese",
30 |     "km": "Khmer",
31 |     "kn": "Kannada",
32 |     "ko": "Korean",
33 |     "la": "Latin",
34 |     "lv": "Latvian",
35 |     "mk": "Macedonian",
36 |     "ml": "Malayalam",
37 |     "mr": "Marathi",
38 |     "my": "Myanmar (Burmese)",
39 |     "ne": "Nepali",
40 |     "nl": "Dutch",
41 |     "no": "Norwegian",
42 |     "pl": "Polish",
43 |     "pt": "Portuguese",
44 |     "ro": "Romanian",
45 |     "ru": "Russian",
46 |     "si": "Sinhala",
47 |     "sk": "Slovak",
48 |     "sq": "Albanian",
49 |     "sr": "Serbian",
50 |     "su": "Sundanese",
51 |     "sv": "Swedish",
52 |     "sw": "Swahili",
53 |     "ta": "Tamil",
54 |     "te": "Telugu",
55 |     "th": "Thai",
56 |     "tl": "Filipino",
57 |     "tr": "Turkish",
58 |     "uk": "Ukrainian",
59 |     "ur": "Urdu",
60 |     "vi": "Vietnamese",
61 |     "zh-CN": "Chinese"
62 | }
63 | 
64 | def _main_langs():
65 |     return _langs
66 | 


--------------------------------------------------------------------------------
/gtts/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pytest
 3 | from gtts.utils import _minimize, _len, _clean_tokens, _translate_url
 4 | 
 5 | delim = ' '
 6 | Lmax = 10
 7 | 
 8 | 
 9 | def test_ascii():
10 |     _in = "Bacon ipsum dolor sit amet"
11 |     _out = ["Bacon", "ipsum", "dolor sit", "amet"]
12 |     assert _minimize(_in, delim, Lmax) == _out
13 | 
14 | 
15 | def test_ascii_no_delim():
16 |     _in = "Baconipsumdolorsitametflankcornedbee"
17 |     _out = ["Baconipsum", "dolorsitam", "etflankcor", "nedbee"]
18 |     assert _minimize(_in, delim, Lmax) == _out
19 | 
20 | 
21 | def test_unicode():
22 |     _in = u"这是一个三岁的小孩在讲述他从一系列照片里看到的东西。"
23 |     _out = [u"这是一个三岁的小孩在", u"讲述他从一系列照片里", u"看到的东西。"]
24 |     assert _minimize(_in, delim, Lmax) == _out
25 | 
26 | 
27 | def test_startwith_delim():
28 |     _in = delim + "test"
29 |     _out = ["test"]
30 |     assert _minimize(_in, delim, Lmax) == _out
31 | 
32 | 
33 | def test_len_ascii():
34 |     text = "Bacon ipsum dolor sit amet flank corned beef."
35 |     assert _len(text) == 45
36 | 
37 | 
38 | def test_len_unicode():
39 |     text = u"但在一个重要的任务上"
40 |     assert _len(text) == 10
41 | 
42 | 
43 | def test_only_space_and_punc():
44 |     _in = [",(:)?", "\t    ", "\n"]
45 |     _out = []
46 |     assert _clean_tokens(_in) == _out
47 | 
48 | 
49 | def test_strip():
50 |     _in = [" Bacon  ", "& ", "ipsum\r", "."]
51 |     _out = ["Bacon", "&", "ipsum"]
52 |     assert _clean_tokens(_in) == _out
53 | 
54 | 
55 | def test_translate_url():
56 |     _in = {"tld": "qwerty", "path": "asdf"}
57 |     _out = "https://translate.google.qwerty/asdf"
58 |     assert _translate_url(**_in) == _out
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     pytest.main(['-x', __file__])
63 | 


--------------------------------------------------------------------------------
/gtts/tokenizer/tests/test_tokenizer_cases.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest
 3 | from gtts.tokenizer.tokenizer_cases import tone_marks, period_comma, colon, other_punctuation, legacy_all_punctuation
 4 | from gtts.tokenizer import Tokenizer, symbols
 5 | 
 6 | 
 7 | class TestPreTokenizerCases(unittest.TestCase):
 8 |     def test_tone_marks(self):
 9 |         t = Tokenizer([tone_marks])
10 |         _in = "Lorem? Ipsum!"
11 |         _out = ['Lorem?', 'Ipsum!']
12 |         self.assertEqual(t.run(_in), _out)
13 | 
14 |     def test_period_comma(self):
15 |         t = Tokenizer([period_comma])
16 |         _in = "Hello, it's 24.5 degrees in the U.K. today. $20,000,000."
17 |         _out = ['Hello', "it's 24.5 degrees in the U.K. today", '$20,000,000.']
18 |         self.assertEqual(t.run(_in), _out)
19 | 
20 |     def test_colon(self):
21 |         t = Tokenizer([colon])
22 |         _in = "It's now 6:30 which means: morning missing:space"
23 |         _out = ["It's now 6:30 which means", ' morning missing', 'space']
24 |         self.assertEqual(t.run(_in), _out)
25 | 
26 |     def test_other_punctuation(self):
27 |         # String of the unique 'other punctuations'
28 |         other_punc_str = ''.join(
29 |             set(symbols.ALL_PUNC) -
30 |             set(symbols.TONE_MARKS) -
31 |             set(symbols.PERIOD_COMMA) -
32 |             set(symbols.COLON))
33 | 
34 |         t = Tokenizer([other_punctuation])
35 |         self.assertEqual(len(t.run(other_punc_str)) - 1, len(other_punc_str))
36 | 
37 |     def test_legacy_all_punctuation(self):
38 |         t = Tokenizer([legacy_all_punctuation])
39 |         self.assertEqual(len(t.run(symbols.ALL_PUNC)) -
40 |                          1, len(symbols.ALL_PUNC))
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     unittest.main()
45 | 


--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
 1 | Command-line (:mod:`gtts-cli`)
 2 | ==============================
 3 | 
 4 | After installing the package, the ``gtts-cli`` tool becomes available::
 5 | 
 6 | $ gtts-cli
 7 | 
 8 | .. click:: gtts.cli:tts_cli
 9 |    :prog: gtts-cli
10 |    :show-nested:
11 | 
12 | Examples
13 | --------
14 | 
15 | List available languages::
16 | 
17 |    $ gtts-cli --all
18 | 
19 | Read 'hello' to ``hello.mp3``::
20 | 
21 |    $ gtts-cli 'hello' --output hello.mp3
22 | 
23 | Read "c'est la vie" in French to ``cestlavie.mp3``::
24 | 
25 |    $ gtts-cli "c'est la vie" --lang fr --output cestlavie.mp3
26 | 
27 | Read '你好' to ``你好.mp3`` (in Mandarin, using google.cn)::
28 | 
29 |    $ gtts-cli '你好' --tld cn --lang zh-cn --output 你好.mp3
30 | 
31 | Read 'slow' slowly to ``slow.mp3``::
32 | 
33 |    $ gtts-cli 'slow' --slow --output slow.mp3
34 | 
35 | Read 'hello' to ``stdout``::
36 | 
37 |    $ gtts-cli 'hello'
38 | 
39 | Read ``stdin`` to ``hello.mp3`` via ``<text>`` or ``<file>``::
40 | 
41 |    $ echo -n 'hello' | gtts-cli - --output hello.mp3
42 |    $ echo -n 'hello' | gtts-cli --file - --output hello.mp3
43 | 
44 | Read 'no check' to ``nocheck.mp3`` without language checking::
45 | 
46 |    $ gtts-cli 'no check' --lang zh --nocheck --ouput nocheck.mp3
47 | 
48 | .. note:: Using ``--nocheck`` can speed up execution. It exists mostly however to force a ``<lang>`` language tag that might not be documented but would work with the API, such as for specific regional sub-tags of documented tags (examples for 'en': 'en-gb', 'en-au', etc.).
49 | 
50 | Playing sound directly
51 | ----------------------
52 | 
53 | You can pipe the output of ``gtts-cli`` into any media player that supports ``stdin``. For example, using the ``play`` command from `SoX <http://sox.sourceforge.net>`_::
54 | 
55 |    $ gtts-cli 'hello' | play -t mp3 -
56 | 
57 | 


--------------------------------------------------------------------------------
/gtts/tokenizer/pre_processors.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from gtts.tokenizer import PreProcessorRegex, PreProcessorSub, symbols
 3 | import re
 4 | 
 5 | 
 6 | def tone_marks(text):
 7 |     """Add a space after tone-modifying punctuation.
 8 | 
 9 |     Because the `tone_marks` tokenizer case will split after a tone-modidfying
10 |     punctuation mark, make sure there's whitespace after.
11 | 
12 |     """
13 |     return PreProcessorRegex(
14 |         search_args=symbols.TONE_MARKS,
15 |         search_func=lambda x: u"(?<={})".format(x),
16 |         repl=' ').run(text)
17 | 
18 | 
19 | def end_of_line(text):
20 |     """Re-form words cut by end-of-line hyphens.
21 | 
22 |     Remove "<hyphen><newline>".
23 | 
24 |     """
25 |     return PreProcessorRegex(
26 |         search_args=u'-',
27 |         search_func=lambda x: u"{}\n".format(x),
28 |         repl='').run(text)
29 | 
30 | 
31 | def abbreviations(text):
32 |     """Remove periods after an abbreviation from a list of known
33 |     abbrevations that can be spoken the same without that period. This
34 |     prevents having to handle tokenization of that period.
35 | 
36 |     Note:
37 |         Could potentially remove the ending period of a sentence.
38 | 
39 |     Note:
40 |         Abbreviations that Google Translate can't pronounce without
41 |         (or even with) a period should be added as a word substitution with a
42 |         :class:`PreProcessorSub` pre-processor. Ex.: 'Esq.', 'Esquire'.
43 | 
44 |     """
45 |     return PreProcessorRegex(
46 |         search_args=symbols.ABBREVIATIONS,
47 |         search_func=lambda x: r"(?<={})(?=\.).".format(x),
48 |         repl='', flags=re.IGNORECASE).run(text)
49 | 
50 | 
51 | def word_sub(text):
52 |     """Word-for-word substitutions."""
53 |     return PreProcessorSub(
54 |         sub_pairs=symbols.SUB_PAIRS).run(text)
55 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
 1 | Contributing
 2 | ============
 3 | 
 4 | Reporting Issues
 5 | ----------------
 6 | 
 7 | On the Github issues_ page. Thanks!
 8 | 
 9 | Submitting Patches
10 | ------------------
11 | 
12 | 1. **Fork**. Follow `PEP 8 <https://www.python.org/dev/peps/pep-0008/>`_!
13 | 2. **Write/Update tests** (see below).
14 | 3. **Document**. Docstrings follow the `Google Python Style Guide`_ (docs by Sphinx_).
15 |    You can 'test' documentation::
16 | 
17 |      $ pip install .[docs]
18 |      $ cd docs && make html # generated in docs/_build/html/
19 | 
20 | 4. **Open Pull Request**. To the ``master`` branch.
21 | 5. **Changelog**. This project uses towncrier_ for managing the changelog. Please consider
22 |    creating one or more 'news fragment' in the ``/news/`` directory and adding them to
23 |    your PR, in the style of ``<issue_or_pr_number>.<type>`` where 'type' is one of:
24 |    'feature', 'bugfix', 'doc', 'removal' or 'misc'.
25 | 
26 |    See towncrier_ (New Fragments) for more details. Example::
27 | 
28 |       $ echo 'Fixed a thing!' > gtts/news/1234.bugfix
29 | 
30 | .. note:: | Please don't hesitate to contribute! While good tests, docs and structure are
31 |           | encouraged, I do welcome great ideas over absolute comformity to the above!
32 |           | Thanks! ❤️
33 | 
34 | Testing
35 | -------
36 | 
37 | | Testing is done with the ``unittest`` framework.
38 | | As a rule, the file ``./tests/test_<module>.py`` file tests the ``<module>`` module.
39 | 
40 | To run all tests (testing only language 'en' and generating an html coverage
41 | report in ``gtts/htmlcov/``)::
42 | 
43 |   $ pip install .[tests]
44 |   $ TEST_LANGS=en pytest -v -s gtts/ --cov=gtts --cov-report=html
45 | 
46 | .. _repo: https://github.com/pndurette/gTTS/
47 | .. _issues: https://github.com/pndurette/gTTS/issues
48 | 
49 | .. _Google Python Style Guide: http://google.github.io/styleguide/pyguide.html#Comments
50 | .. _Sphinx: http://www.sphinx-doc.org/
51 | .. _towncrier: https://github.com/hawkowl/towncrier
52 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = gTTS
 3 | description = gTTS (Google Text-to-Speech), a Python library and CLI tool to interface with Google Translate text-to-speech API
 4 | author = Pierre Nicolas Durette
 5 | author_email = pndurette@gmail.com
 6 | url = https://github.com/pndurette/gTTS
 7 | license = MIT
 8 | keywords =
 9 |   gtts
10 |   text to speech
11 |   Google Translate
12 |   TTS
13 | classifiers =
14 |   Environment :: Console
15 |   Intended Audience :: Developers
16 |   License :: OSI Approved :: MIT License
17 |   Operating System :: MacOS
18 |   Operating System :: Unix
19 |   Operating System :: POSIX
20 |   Operating System :: POSIX :: Linux
21 |   Operating System :: Microsoft :: Windows
22 |   Programming Language :: Python :: 2.7
23 |   Programming Language :: Python :: 3.6
24 |   Programming Language :: Python :: 3.7
25 |   Programming Language :: Python :: 3.8
26 |   Programming Language :: Python :: 3.9
27 |   Topic :: Software Development :: Libraries
28 |   Topic :: Multimedia :: Sound/Audio :: Speech
29 | license_file = LICENSE
30 | long_description = file: README.md
31 | long_description_content_type = text/markdown
32 | 
33 | [options]
34 | python_requires = >= 2.7
35 | include_package_data = True
36 | packages = find:
37 | install_requires =
38 |   six
39 |   click
40 |   requests
41 | 
42 | [options.extras_require]
43 | tests =
44 |   pytest == 4.6.11
45 |   pytest-cov
46 |   flake8
47 |   testfixtures
48 |   mock
49 |   six
50 | docs =
51 |   sphinx
52 |   sphinx-autobuild
53 |   sphinx_rtd_theme
54 |   sphinx-click
55 |   towncrier
56 | 
57 | [options.entry_points]
58 | console_scripts =
59 |   gtts-cli = gtts.cli:tts_cli
60 | 
61 | [flake8]
62 | max-line-length = 132
63 | exclude = .git,__pycache__,.eggs/,doc/,docs/,build/,dist/,archive/
64 | ignore = W605, W503, W504
65 | 
66 | [coverage:run]
67 | cover_pylib = false
68 | omit =
69 |   */site-packages/*
70 |   gtts/tests/*
71 |   gtts/tokenizer/tests/*
72 | 
73 | [coverage:report]
74 | exclude_lines =
75 |   pragma: no cover
76 |   def __repr__
77 |   log.debug
78 |   log.warning
79 | 


--------------------------------------------------------------------------------
/gtts/tokenizer/tokenizer_cases.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from gtts.tokenizer import RegexBuilder, symbols
 3 | 
 4 | 
 5 | def tone_marks():
 6 |     """Keep tone-modifying punctuation by matching following character.
 7 | 
 8 |     Assumes the `tone_marks` pre-processor was run for cases where there might
 9 |     not be any space after a tone-modifying punctuation mark.
10 |     """
11 |     return RegexBuilder(
12 |         pattern_args=symbols.TONE_MARKS,
13 |         pattern_func=lambda x: u"(?<={}).".format(x)).regex
14 | 
15 | 
16 | def period_comma():
17 |     """Period and comma case.
18 | 
19 |     Match if not preceded by ".<letter>" and only if followed by space.
20 |     Won't cut in the middle/after dotted abbreviations; won't cut numbers.
21 | 
22 |     Note:
23 |         Won't match if a dotted abbreviation ends a sentence.
24 | 
25 |     Note:
26 |         Won't match the end of a sentence if not followed by a space.
27 | 
28 |     """
29 |     return RegexBuilder(
30 |         pattern_args=symbols.PERIOD_COMMA,
31 |         pattern_func=lambda x: r"(?<!\.[a-z]){} ".format(x)).regex
32 | 
33 | 
34 | def colon():
35 |     """Colon case.
36 | 
37 |     Match a colon ":" only if not preceeded by a digit.
38 |     Mainly to prevent a cut in the middle of time notations e.g. 10:01
39 | 
40 |     """
41 |     return RegexBuilder(
42 |         pattern_args=symbols.COLON,
43 |         pattern_func=lambda x: r"(?<!\d){}".format(x)).regex
44 | 
45 | 
46 | def other_punctuation():
47 |     """Match other punctuation.
48 | 
49 |     Match other punctuation to split on; punctuation that naturally
50 |     inserts a break in speech.
51 | 
52 |     """
53 |     punc = ''.join(
54 |         set(symbols.ALL_PUNC) -
55 |         set(symbols.TONE_MARKS) -
56 |         set(symbols.PERIOD_COMMA) -
57 |         set(symbols.COLON))
58 |     return RegexBuilder(
59 |         pattern_args=punc,
60 |         pattern_func=lambda x: u"{}".format(x)).regex
61 | 
62 | 
63 | def legacy_all_punctuation():  # pragma: no cover b/c tested but Coveralls: ¯\_(ツ)_/¯
64 |     """Match all punctuation.
65 | 
66 |     Use as only tokenizer case to mimic gTTS 1.x tokenization.
67 |     """
68 |     punc = symbols.ALL_PUNC
69 |     return RegexBuilder(
70 |         pattern_args=punc,
71 |         pattern_func=lambda x: u"{}".format(x)).regex
72 | 


--------------------------------------------------------------------------------
/gtts/tokenizer/tests/test_core.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest
 3 | import re
 4 | from gtts.tokenizer.core import RegexBuilder, PreProcessorRegex, PreProcessorSub, Tokenizer
 5 | 
 6 | # Tests based on classes usage examples
 7 | # See class documentation for details
 8 | 
 9 | 
10 | class TestRegexBuilder(unittest.TestCase):
11 |     def test_regexbuilder(self):
12 |         rb = RegexBuilder('abc', lambda x: "{}".format(x))
13 |         self.assertEqual(rb.regex, re.compile('a|b|c'))
14 | 
15 | 
16 | class TestPreProcessorRegex(unittest.TestCase):
17 |     def test_preprocessorregex(self):
18 |         pp = PreProcessorRegex('ab', lambda x: "{}".format(x), 'c')
19 |         self.assertEqual(len(pp.regexes), 2)
20 |         self.assertEqual(pp.regexes[0].pattern, 'a')
21 |         self.assertEqual(pp.regexes[1].pattern, 'b')
22 | 
23 | 
24 | class TestPreProcessorSub(unittest.TestCase):
25 |     def test_proprocessorsub(self):
26 |         sub_pairs = [('Mac', 'PC'), ('Firefox', 'Chrome')]
27 |         pp = PreProcessorSub(sub_pairs)
28 |         _in = "I use firefox on my mac"
29 |         _out = "I use Chrome on my PC"
30 |         self.assertEqual(pp.run(_in), _out)
31 | 
32 | 
33 | class TestTokenizer(unittest.TestCase):
34 |     # tokenizer case 1
35 |     def case1(self):
36 |         return re.compile(r"\,")
37 | 
38 |     # tokenizer case 2
39 |     def case2(self):
40 |         return RegexBuilder('abc', lambda x: r"{}\.".format(x)).regex
41 | 
42 |     def test_tokenizer(self):
43 |         t = Tokenizer([self.case1, self.case2])
44 |         _in = "Hello, my name is Linda a. Call me Lin, b. I'm your friend"
45 |         _out = [
46 |             'Hello',
47 |             ' my name is Linda ',
48 |             ' Call me Lin',
49 |             ' ',
50 |             " I'm your friend"]
51 |         self.assertEqual(t.run(_in), _out)
52 | 
53 |     def test_bad_params_not_list(self):
54 |         # original exception: TypeError
55 |         with self.assertRaises(TypeError):
56 |             Tokenizer(self.case1)
57 | 
58 |     def test_bad_params_not_callable(self):
59 |         # original exception: TypeError
60 |         with self.assertRaises(TypeError):
61 |             Tokenizer([100])
62 | 
63 |     def test_bad_params_not_callable_returning_regex(self):
64 |         # original exception: AttributeError
65 |         def not_regex():
66 |             return 1
67 | 
68 |         with self.assertRaises(TypeError):
69 |             Tokenizer([not_regex])
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     unittest.main()
74 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # gTTS
 2 | 
 3 | **gTTS** (*Google Text-to-Speech*), a Python library and CLI tool to interface with Google Translate's text-to-speech API. 
 4 | Write spoken `mp3` data to a file, a file-like object (bytestring) for further audio manipulation, or `stdout`. Or simply pre-generate Google Translate TTS request URLs to feed to an external program.
 5 | <http://gtts.readthedocs.org/>
 6 | 
 7 | [![PyPI version](https://img.shields.io/pypi/v/gTTS.svg)](https://pypi.org/project/gTTS/)
 8 | [![Python versions](https://img.shields.io/pypi/pyversions/gTTS.svg)](https://pypi.org/project/gTTS/)
 9 | [![Tests workflow](https://github.com/pndurette/gTTS/workflows/Tests/badge.svg)](https://github.com/pndurette/gTTS/actions)
10 | [![codecov](https://codecov.io/gh/pndurette/gTTS/branch/master/graph/badge.svg)](https://codecov.io/gh/pndurette/gTTS)
11 | [![Commits Since](https://img.shields.io/github/commits-since/pndurette/gTTS/latest.svg)](https://github.com/pndurette/gTTS/commits/)
12 | [![PyPi Downloads](http://pepy.tech/badge/gtts)](http://pepy.tech/project/gtts)
13 | [![Buy me a Coffee](https://img.shields.io/badge/buy%20me%20a-coffee-orange)](https://www.buymeacoffee.com/pndurette)
14 | 
15 | ## Features
16 | 
17 | -   Customizable speech-specific sentence tokenizer that allows for unlimited lengths of text to be read, all while keeping proper intonation, abbreviations, decimals and more;
18 | -   Customizable text pre-processors which can, for example, provide pronunciation corrections;
19 | 
20 | ### Installation
21 | 
22 |     $ pip install gTTS
23 | 
24 | ### Quickstart
25 | 
26 | Command Line:
27 | 
28 |     $ gtts-cli 'hello' --output hello.mp3
29 | 
30 | Module:
31 | 
32 |     >>> from gtts import gTTS
33 |     >>> tts = gTTS('hello')
34 |     >>> tts.save('hello.mp3')
35 | 
36 | See <http://gtts.readthedocs.org/> for documentation and examples.
37 | 
38 | ### Disclaimer
39 | 
40 | This project is *not* affiliated with Google or Google Cloud. Breaking upstream changes *can* occur without notice. This project is leveraging the undocumented [Google Translate](https://translate.google.com) speech functionality and is *different* from [Google Cloud Text-to-Speech](https://cloud.google.com/text-to-speech/).
41 | 
42 | ### Project
43 | 
44 | -   [Questions & community](https://github.com/pndurette/gTTS/discussions)
45 | -   [Changelog](CHANGELOG.rst)
46 | -   [Contributing](CONTRIBUTING.rst)
47 | 
48 | ### Licence
49 | 
50 | [The MIT License (MIT)](LICENSE) Copyright © 2014-2021 Pierre Nicolas Durette & [Contributors](https://github.com/pndurette/gTTS/graphs/contributors)
51 | 


--------------------------------------------------------------------------------
/gtts/lang.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from gtts.langs import _main_langs
 3 | from warnings import warn
 4 | import logging
 5 | 
 6 | __all__ = ['tts_langs']
 7 | 
 8 | # Logger
 9 | log = logging.getLogger(__name__)
10 | log.addHandler(logging.NullHandler())
11 | 
12 | 
13 | def tts_langs():
14 |     """Languages Google Text-to-Speech supports.
15 | 
16 |     Returns:
17 |         dict: A dictionary of the type `{ '<lang>': '<name>'}`
18 | 
19 |             Where `<lang>` is an IETF language tag such as `en` or `zh-TW`,
20 |             and `<name>` is the full English name of the language, such as
21 |             `English` or `Chinese (Mandarin/Taiwan)`.
22 | 
23 |     The dictionary returned combines languages from two origins:
24 | 
25 |     - Languages fetched from Google Translate (pre-generated in :mod:`gtts.langs`)
26 |     - Languages that are undocumented variations that were observed to work and
27 |       present different dialects or accents.
28 | 
29 |     """
30 |     langs = dict()
31 |     langs.update(_main_langs())
32 |     langs.update(_extra_langs())
33 |     log.debug("langs: {}".format(langs))
34 |     return langs
35 | 
36 | 
37 | def _extra_langs():
38 |     """Define extra languages.
39 | 
40 |     Returns:
41 |         dict: A dictionnary of extra languages manually defined.
42 | 
43 |             Variations of the ones generated in `_main_langs`,
44 |             observed to provide different dialects or accents or
45 |             just simply accepted by the Google Translate Text-to-Speech API.
46 | 
47 |     """
48 |     return {
49 |         # Chinese
50 |         'zh-TW': 'Chinese (Mandarin/Taiwan)',
51 |         'zh': 'Chinese (Mandarin)'
52 |     }
53 | 
54 | 
55 | def _fallback_deprecated_lang(lang):
56 |     """Languages Google Text-to-Speech used to support.
57 | 
58 |     Language tags that don't work anymore, but that can
59 |     fallback to a more general language code to maintain
60 |     compatibility.
61 | 
62 |     Args:
63 |         lang (string): The language tag.
64 | 
65 |     Returns:
66 |         string: The language tag, as-is if not deprecated,
67 |             or a fallack if it exits.
68 | 
69 |     Example:
70 |         ``en-GB`` returns ``en``.
71 |         ``en-gb`` returns ``en``.
72 | 
73 |     """
74 | 
75 |     deprecated = {
76 |         # '<fallback>': [<list of deprecated langs>]
77 |         'en': ['en-us', 'en-ca', 'en-uk', 'en-gb', 'en-au', 'en-gh', 'en-in',
78 |                'en-ie', 'en-nz', 'en-ng', 'en-ph', 'en-za', 'en-tz'],
79 |         'fr': ['fr-ca', 'fr-fr'],
80 |         'pt': ['pt-br', 'pt-pt'],
81 |         'es': ['es-es', 'es-us'],
82 |         'zh-CN': ['zh-cn'],
83 |         'zh-TW': ['zh-tw'],
84 |     }
85 | 
86 |     for fallback_lang, deprecated_langs in deprecated.items():
87 |         if lang.lower() in deprecated_langs:
88 |             msg = (
89 |                 "'{}' has been deprecated, falling back to '{}'. "
90 |                 "This fallback will be removed in a future version."
91 |             ).format(lang, fallback_lang)
92 | 
93 |             warn(msg, DeprecationWarning)
94 |             log.warning(msg)
95 | 
96 |             return fallback_lang
97 | 
98 |     return lang


--------------------------------------------------------------------------------
/gtts/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from gtts.tokenizer.symbols import ALL_PUNC as punc
  3 | from string import whitespace as ws
  4 | import re
  5 | 
  6 | _ALL_PUNC_OR_SPACE = re.compile(u"^[{}]*$".format(re.escape(punc + ws)))
  7 | """Regex that matches if an entire line is only comprised
  8 | of whitespace and punctuation
  9 | 
 10 | """
 11 | 
 12 | 
 13 | def _minimize(the_string, delim, max_size):
 14 |     """Recursively split a string in the largest chunks
 15 |     possible from the highest position of a delimiter all the way
 16 |     to a maximum size
 17 | 
 18 |     Args:
 19 |         the_string (string): The string to split.
 20 |         delim (string): The delimiter to split on.
 21 |         max_size (int): The maximum size of a chunk.
 22 | 
 23 |     Returns:
 24 |         list: the minimized string in tokens
 25 | 
 26 |     Every chunk size will be at minimum ``the_string[0:idx]`` where ``idx``
 27 |     is the highest index of ``delim`` found in ``the_string``; and at maximum
 28 |     ``the_string[0:max_size]`` if no ``delim`` was found in ``the_string``.
 29 |     In the latter case, the split will occur at ``the_string[max_size]``
 30 |     which can be any character. The function runs itself again on the rest of
 31 |     ``the_string`` (``the_string[idx:]``) until no chunk is larger than
 32 |     ``max_size``.
 33 | 
 34 |     """
 35 |     # Remove `delim` from start of `the_string`
 36 |     # i.e. prevent a recursive infinite loop on `the_string[0:0]`
 37 |     # if `the_string` starts with `delim` and is larger than `max_size`
 38 |     if the_string.startswith(delim):
 39 |         the_string = the_string[_len(delim):]
 40 | 
 41 |     if _len(the_string) > max_size:
 42 |         try:
 43 |             # Find the highest index of `delim` in `the_string[0:max_size]`
 44 |             # i.e. `the_string` will be cut in half on `delim` index
 45 |             idx = the_string.rindex(delim, 0, max_size)
 46 |         except ValueError:
 47 |             # `delim` not found in `the_string`, index becomes `max_size`
 48 |             # i.e. `the_string` will be cut in half arbitrarily on `max_size`
 49 |             idx = max_size
 50 |         # Call itself again for `the_string[idx:]`
 51 |         return [the_string[:idx]] + \
 52 |             _minimize(the_string[idx:], delim, max_size)
 53 |     else:
 54 |         return [the_string]
 55 | 
 56 | 
 57 | def _len(text):
 58 |     """Same as ``len(text)`` for a string but that decodes
 59 |     ``text`` first in Python 2.x
 60 | 
 61 |     Args:
 62 |         text (string): String to get the size of.
 63 | 
 64 |     Returns:
 65 |         int: The size of the string.
 66 |     """
 67 |     try:
 68 |         # Python 2
 69 |         return len(unicode(text))
 70 |     except NameError:  # pragma: no cover
 71 |         # Python 3
 72 |         return len(text)
 73 | 
 74 | 
 75 | def _clean_tokens(tokens):
 76 |     """Clean a list of strings
 77 | 
 78 |     Args:
 79 |         tokens (list): A list of strings (tokens) to clean.
 80 | 
 81 |     Returns:
 82 |         list: Stripped strings ``tokens`` without the original elements
 83 |             that only consisted of whitespace and/or punctuation characters.
 84 | 
 85 |     """
 86 |     return [t.strip() for t in tokens if not _ALL_PUNC_OR_SPACE.match(t)]
 87 | 
 88 | 
 89 | def _translate_url(tld="com", path=""):
 90 |     """Generates a Google Translate URL
 91 | 
 92 |     Args:
 93 |         tld (string): Top-level domain for the Google Translate host,
 94 |             i.e ``https://translate.google.<tld>``. Default is ``com``.
 95 |         path: (string): A path to append to the Google Translate host,
 96 |             i.e ``https://translate.google.com/<path>``. Default is ``""``.
 97 | 
 98 |     Returns:
 99 |         string: A Google Translate URL `https://translate.google.<tld>/path`
100 |     """
101 |     _GOOGLE_TTS_URL = "https://translate.google.{}/{}"
102 |     return _GOOGLE_TTS_URL.format(tld, path)


--------------------------------------------------------------------------------
/scripts/gen_langs.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from gtts.utils import _translate_url
  3 | from bs4 import BeautifulSoup
  4 | import requests
  5 | import logging
  6 | import js2py
  7 | import json
  8 | import sys
  9 | import re
 10 | 
 11 | # Logger
 12 | log = logging.getLogger(__name__)
 13 | log.addHandler(logging.NullHandler())
 14 | 
 15 | # This file is used to generate the language dict (as a module)
 16 | # Needs cleaning up, very much WIP
 17 | # Usage:
 18 | # * Install gTTS
 19 | # * $ python gen_langs.py <path to gtts>/langs.py
 20 | 
 21 | 
 22 | def _get_data_by_key(js_list):
 23 |     """JavaScript function to generate the languages.
 24 | 
 25 |     A payload with the languages is passed to a JavaScript function.
 26 |     Instead of parsing that payload (combersome), we 'overload' that
 27 |     function to return what we want.
 28 | 
 29 |     """
 30 | 
 31 |     js_function = r"""
 32 |         function AF_initDataCallback(args) {
 33 |             return { key: args['key'], data: args['data'] };
 34 |         };
 35 |     """
 36 | 
 37 |     data_by_key = {}
 38 |     for js in js_list:
 39 |         js_code = js_function + js
 40 |         py_eval = js2py.eval_js(js_code)
 41 |         data_by_key[py_eval['key']] = py_eval['data']
 42 | 
 43 |     return data_by_key
 44 | 
 45 | 
 46 | def _fetch_langs(tld="com"):
 47 |     """Fetch (scrape) languages from Google Translate.
 48 | 
 49 |     Google Translate loads a JavaScript Array of 'languages codes' that can
 50 |     be spoken. We intersect this list with all the languages Google Translate
 51 |     provides to get the ones that support text-to-speech.
 52 | 
 53 |     Args:
 54 |         tld (string): Top-level domain for the Google Translate host
 55 |             to fetch languages from. i.e `https://translate.google.<tld>`.
 56 |             The language names obtained will be in a language locale of the TLD
 57 |             (e.g. ``tld=fr`` will retrieve the French names of the languages).
 58 |             Default is ``com``.
 59 | 
 60 |     Returns:
 61 |         dict: A dictionnary of languages from Google Translate
 62 | 
 63 |     """
 64 | 
 65 |     URL_BASE = _translate_url(tld)
 66 | 
 67 |     headers = {
 68 |         'User-Agent':
 69 |             "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
 70 |             "AppleWebKit/605.1.15 (KHTML, like Gecko) "
 71 |             "Version/14.0 Safari/605.1.15"
 72 |     }
 73 | 
 74 |     page = requests.get(URL_BASE, headers=headers)
 75 |     soup = BeautifulSoup(page.content, 'html.parser')
 76 | 
 77 |     scripts = soup.find_all(name='script', string=re.compile(r"^AF_initDataCallback"))
 78 |     scripts = [s.text for s in scripts]
 79 | 
 80 |     data_by_key = _get_data_by_key(scripts)
 81 | 
 82 |     # Get all languages (ds:3)
 83 |     # data for 'ds:3' is
 84 |     #   [
 85 |     #       [['hi', 'Hindi'], ['ps', 'Pashto'], ... ]],
 86 |     #       [['hi', 'Hindi'], ['ps', 'Pashto'], ... ]]
 87 |     #   ]
 88 |     # (Note: list[0] and list[1] are identical)
 89 |     all_langs_raw = data_by_key["ds:3"]
 90 | 
 91 |     # Get languages codes that have TTS (ds:6)
 92 |     # data for 'ds:6' is
 93 |     #   [
 94 |     #       [['af', 200], ['ar', 200], ...]
 95 |     #   ]
 96 |     tts_langs_raw = data_by_key["ds:6"]
 97 |     tts_langs = [lang[0] for lang in tts_langs_raw[0]]
 98 | 
 99 |     # Create language dict (and filter only TTS-enabled langs)
100 |     # langs = { lang[0], lang[1] for lang in all_langs_raw[0] }
101 | 
102 |     langs = {k: v for k, v in all_langs_raw[0] if k in tts_langs}
103 |     return langs
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     """Language list generation 'main'
108 | 
109 |     CLI to generate the language list as a dict in
110 |     an importable python file/module
111 | 
112 |     Usage:
113 |         python ./scripts/gen_langs.py ./gTTS/gtts/langs.py
114 | 
115 |     """
116 | 
117 |     lang_file_path = sys.argv[1]
118 |     with open(lang_file_path, 'w') as f:
119 |         langs = _fetch_langs()
120 | 
121 |         py_content = f"""# Note: this file is generated
122 | _langs = {json.dumps(langs, indent=4, sort_keys=True)}
123 | 
124 | def _main_langs():
125 |     return _langs
126 | """
127 | 
128 |         f.write(py_content)


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/stable/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | # import os
 16 | # import sys
 17 | # sys.path.insert(0, os.path.abspath('.'))
 18 | 
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = 'gTTS'
 23 | copyright = '2014-2021 Pierre Nicolas Durette'
 24 | author = 'Pierre Nicolas Durette'
 25 | 
 26 | # The short X.Y version
 27 | version = ''
 28 | # The full version, including alpha/beta/rc tags
 29 | release = ''
 30 | 
 31 | 
 32 | # -- General configuration ---------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #
 36 | # needs_sphinx = '1.0'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 |     'sphinx.ext.autodoc',
 43 |     'sphinx.ext.viewcode',
 44 |     'sphinx_click.ext',
 45 |     'sphinx.ext.napoleon',
 46 | ]
 47 | 
 48 | # Add any paths that contain templates here, relative to this directory.
 49 | templates_path = ['_templates']
 50 | 
 51 | # The suffix(es) of source filenames.
 52 | # You can specify multiple suffix as a list of string:
 53 | #
 54 | # source_suffix = ['.rst', '.md']
 55 | source_suffix = '.rst'
 56 | 
 57 | # The master toctree document.
 58 | master_doc = 'index'
 59 | 
 60 | # The language for content autogenerated by Sphinx. Refer to documentation
 61 | # for a list of supported languages.
 62 | #
 63 | # This is also used if you do content translation via gettext catalogs.
 64 | # Usually you set "language" from the command line for these cases.
 65 | language = None
 66 | 
 67 | # List of patterns, relative to source directory, that match files and
 68 | # directories to ignore when looking for source files.
 69 | # This pattern also affects html_static_path and html_extra_path .
 70 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 71 | 
 72 | # The name of the Pygments (syntax highlighting) style to use.
 73 | pygments_style = 'sphinx'
 74 | 
 75 | 
 76 | # -- Options for HTML output -------------------------------------------------
 77 | 
 78 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 79 | # a list of builtin themes.
 80 | #
 81 | html_theme = 'sphinx_rtd_theme'
 82 | 
 83 | # Theme options are theme-specific and customize the look and feel of a theme
 84 | # further.  For a list of options available for each theme, see the
 85 | # documentation.
 86 | #
 87 | # html_theme_options = {}
 88 | 
 89 | # Add any paths that contain custom static files (such as style sheets) here,
 90 | # relative to this directory. They are copied after the builtin static files,
 91 | # so a file named "default.css" will overwrite the builtin "default.css".
 92 | html_static_path = ['_static']
 93 | 
 94 | # Custom sidebar templates, must be a dictionary that maps document names
 95 | # to template names.
 96 | #
 97 | # The default sidebars (for documents that don't match any pattern) are
 98 | # defined by theme itself.  Builtin themes are using these templates by
 99 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
100 | # 'searchbox.html']``.
101 | #
102 | # html_sidebars = {}
103 | 
104 | 
105 | # -- Options for HTMLHelp output ---------------------------------------------
106 | 
107 | # Output file base name for HTML help builder.
108 | htmlhelp_basename = 'gTTSdoc'
109 | 
110 | 
111 | # -- Options for LaTeX output ------------------------------------------------
112 | 
113 | latex_elements = {
114 |     # The paper size ('letterpaper' or 'a4paper').
115 |     #
116 |     # 'papersize': 'letterpaper',
117 | 
118 |     # The font size ('10pt', '11pt' or '12pt').
119 |     #
120 |     # 'pointsize': '10pt',
121 | 
122 |     # Additional stuff for the LaTeX preamble.
123 |     #
124 |     # 'preamble': '',
125 | 
126 |     # Latex figure (float) alignment
127 |     #
128 |     # 'figure_align': 'htbp',
129 | }
130 | 
131 | # Grouping the document tree into LaTeX files. List of tuples
132 | # (source start file, target name, title,
133 | #  author, documentclass [howto, manual, or own class]).
134 | latex_documents = [
135 |     (master_doc, 'gTTS.tex', 'gTTS Documentation',
136 |      'Pierre-Nick Durette', 'manual'),
137 | ]
138 | 
139 | 
140 | # -- Options for manual page output ------------------------------------------
141 | 
142 | # One entry per manual page. List of tuples
143 | # (source start file, name, description, authors, manual section).
144 | man_pages = [
145 |     (master_doc, 'gtts', 'gTTS Documentation',
146 |      [author], 1)
147 | ]
148 | 
149 | 
150 | # -- Options for Texinfo output ----------------------------------------------
151 | 
152 | # Grouping the document tree into Texinfo files. List of tuples
153 | # (source start file, target name, title, author,
154 | #  dir menu entry, description, category)
155 | texinfo_documents = [
156 |     (master_doc, 'gTTS', 'gTTS Documentation',
157 |      author, 'gTTS', 'One line description of project.',
158 |      'Miscellaneous'),
159 | ]
160 | 
161 | 
162 | # -- Extension configuration -------------------------------------------------
163 | 


--------------------------------------------------------------------------------
/docs/module.rst:
--------------------------------------------------------------------------------
  1 | Module (:mod:`gtts`)
  2 | ====================
  3 | 
  4 | .. contents:: :local:
  5 |    :depth: 2
  6 | 
  7 | gTTS (:class:`gtts.gTTS`)
  8 | -------------------------
  9 | 
 10 | .. automodule:: gtts.tts
 11 |    :members:
 12 | 
 13 | Languages (:mod:`gtts.lang`)
 14 | ----------------------------
 15 | 
 16 | .. note:: The easiest way to get a list of available languages is to print them
 17 |     with ``gtts-cli --all``
 18 | 
 19 | .. automodule:: gtts.lang
 20 |    :members:
 21 | 
 22 | Localized 'accents'
 23 | -------------------
 24 | 
 25 | For a given language, Google Translate text-to-speech can speak in different
 26 | local 'accents' depending on the Google domain (``google.<tld>``) of the request,
 27 | with some examples shown in the table below.
 28 | 
 29 | .. note:: This is an **incomplete** list. Try different combinaisons of language codes and
 30 |     `known localized Google domains <https://www.google.com/supported_domains>`_. Feel
 31 |     free to add new combinaisons to this list via a Pull Request!
 32 | 
 33 | +---------------------------+--------------------------+----------------------------+
 34 | |       Local accent        | Language code (``lang``) | Top-level domain (``tld``) |
 35 | +===========================+==========================+============================+
 36 | | English (Australia)       | ``en``                   | ``com.au``                 |
 37 | +---------------------------+--------------------------+----------------------------+
 38 | | English (United Kingdom)  | ``en``                   | ``co.uk``                  |
 39 | +---------------------------+--------------------------+----------------------------+
 40 | | English (United States)   | ``en``                   | ``com`` (default)          |
 41 | +---------------------------+--------------------------+----------------------------+
 42 | | English (Canada)          | ``en``                   | ``ca``                     |
 43 | +---------------------------+--------------------------+----------------------------+
 44 | | English (India)           | ``en``                   | ``co.in``                  |
 45 | +---------------------------+--------------------------+----------------------------+
 46 | | English (Ireland)         | ``en``                   | ``ie``                     |
 47 | +---------------------------+--------------------------+----------------------------+
 48 | | English (South Africa)    | ``en``                   | ``co.za``                  |
 49 | +---------------------------+--------------------------+----------------------------+
 50 | | French (Canada)           | ``fr``                   | ``ca``                     |
 51 | +---------------------------+--------------------------+----------------------------+
 52 | | French (France)           | ``fr``                   | ``fr``                     |
 53 | +---------------------------+--------------------------+----------------------------+
 54 | | Mandarin (China Mainland) | ``zh-CN``                | any                        |
 55 | +---------------------------+--------------------------+----------------------------+
 56 | | Mandarin (Taiwan)         | ``zh-TW``                | any                        |
 57 | +---------------------------+--------------------------+----------------------------+
 58 | | Portuguese (Brazil)       | ``pt``                   | ``com.br``                 |
 59 | +---------------------------+--------------------------+----------------------------+
 60 | | Portuguese (Portugal)     | ``pt``                   | ``pt``                     |
 61 | +---------------------------+--------------------------+----------------------------+
 62 | | Spanish (Mexico)          | ``es``                   | ``com.mx``                 |
 63 | +---------------------------+--------------------------+----------------------------+
 64 | | Spanish (Spain)           | ``es``                   | ``es``                     |
 65 | +---------------------------+--------------------------+----------------------------+
 66 | | Spanish (United States)   | ``es``                   | ``com`` (default)          |
 67 | +---------------------------+--------------------------+----------------------------+
 68 | 
 69 | 
 70 | Examples
 71 | --------
 72 | 
 73 | Write 'hello' in English to ``hello.mp3``::
 74 | 
 75 |     >>> from gtts import gTTS
 76 |     >>> tts = gTTS('hello', lang='en')
 77 |     >>> tts.save('hello.mp3')
 78 | 
 79 | Write 'hello' in Australian English to ``hello.mp3``::
 80 | 
 81 |     >>> from gtts import gTTS
 82 |     >>> tts = gTTS('hello', lang='en', tld='com.au')
 83 |     >>> tts.save('hello.mp3')
 84 | 
 85 | Write 'hello bonjour' in English then French to ``hello_bonjour.mp3``::
 86 | 
 87 |     >>> from gtts import gTTS
 88 |     >>> tts_en = gTTS('hello', lang='en')
 89 |     >>> tts_fr = gTTS('bonjour', lang='fr')
 90 |     >>>
 91 |     >>> with open('hello_bonjour.mp3', 'wb') as f:
 92 |     ...     tts_en.write_to_fp(f)
 93 |     ...     tts_fr.write_to_fp(f)
 94 | 
 95 | Playing sound directly
 96 | ----------------------
 97 | 
 98 | There's quite a few libraries that do this. Write 'hello' to a file-like object
 99 | to do further manipulation:::
100 | 
101 |     >>> from gtts import gTTS
102 |     >>> from io import BytesIO
103 |     >>>
104 |     >>> mp3_fp = BytesIO()
105 |     >>> tts = gTTS('hello', lang='en')
106 |     >>> tts.write_to_fp(mp3_fp)
107 |     >>>
108 |     >>> # Load `mp3_fp` as an mp3 file in
109 |     >>> # the audio library of your choice
110 | 
111 | .. note:: See `Issue #26 <https://github.com/pndurette/gTTS/issues/26>`_ for
112 |     a discussion and examples of direct playback using various methods.
113 | 
114 | 
115 | Logging
116 | -------
117 | 
118 | :mod:`gtts` does logging using the standard Python logging module. The following loggers are available:
119 | 
120 | ``gtts.tts``
121 |   Logger used for the :class:`gTTS` class
122 | 
123 | ``gtts.lang``
124 |   Logger used for the :mod:`lang` module (language fetching)
125 | 
126 | ``gtts``
127 |   Upstream logger for all of the above
128 | 
129 | 


--------------------------------------------------------------------------------
/gtts/tests/test_tts.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import pytest
  4 | from mock import Mock
  5 | from six.moves import urllib
  6 | 
  7 | from gtts.tts import gTTS, gTTSError
  8 | from gtts.langs import _main_langs
  9 | from gtts.lang import _extra_langs
 10 | 
 11 | # Testing all languages takes some time.
 12 | # Set TEST_LANGS envvar to choose languages to test.
 13 | #  * 'main': Languages extracted from the Web
 14 | #  * 'extra': Languagee set in Languages.EXTRA_LANGS
 15 | #  * 'all': All of the above
 16 | #  * <csv>: Languages tags list to test
 17 | # Unset TEST_LANGS to test everything ('all')
 18 | # See: langs_dict()
 19 | 
 20 | 
 21 | """Construct a dict of suites of languages to test.
 22 | { '<suite name>' : <list or dict of language tags> }
 23 | 
 24 | ex.: { 'fetch' : {'en': 'English', 'fr': 'French'},
 25 |        'extra' : {'en': 'English', 'fr': 'French'} }
 26 | ex.: { 'environ' : ['en', 'fr'] }
 27 | """
 28 | env = os.environ.get('TEST_LANGS')
 29 | if not env or env == 'all':
 30 |     langs = _main_langs()
 31 |     langs.update(_extra_langs())
 32 | elif env == 'main':
 33 |     langs = _main_langs()
 34 | elif env == 'extra':
 35 |     langs = _extra_langs()
 36 | else:
 37 |     env_langs = {l: l for l in env.split(',') if l}
 38 |     langs = env_langs
 39 | 
 40 | 
 41 | @pytest.mark.net
 42 | @pytest.mark.parametrize('lang', langs.keys(), ids=list(langs.values()))
 43 | def test_TTS(tmp_path, lang):
 44 |     """Test all supported languages and file save"""
 45 | 
 46 |     text = "This is a test"
 47 |     """Create output .mp3 file successfully"""
 48 |     for slow in (False, True):
 49 |         filename = tmp_path / 'test_{}_.mp3'.format(lang)
 50 |         # Create gTTS and save
 51 |         tts = gTTS(text=text, lang=lang, slow=slow, lang_check=False)
 52 |         tts.save(filename)
 53 | 
 54 |         # Check if files created is > 1.5
 55 |         assert filename.stat().st_size > 1500
 56 | 
 57 | 
 58 | @pytest.mark.net
 59 | def test_unsupported_language_check():
 60 |     """Raise ValueError on unsupported language (with language check)"""
 61 |     lang = 'xx'
 62 |     text = "Lorem ipsum"
 63 |     check = True
 64 |     with pytest.raises(ValueError):
 65 |         gTTS(text=text, lang=lang, lang_check=check)
 66 | 
 67 | 
 68 | def test_empty_string():
 69 |     """Raise AssertionError on empty string"""
 70 |     text = ""
 71 |     with pytest.raises(AssertionError):
 72 |         gTTS(text=text)
 73 | 
 74 | 
 75 | def test_no_text_parts(tmp_path):
 76 |     """Raises AssertionError on no content to send to API (no text_parts)"""
 77 |     text = "                                                                                                          ..,\n"
 78 |     with pytest.raises(AssertionError):
 79 |         filename = tmp_path / 'no_content.txt'
 80 |         tts = gTTS(text=text)
 81 |         tts.save(filename)
 82 | 
 83 | 
 84 | # Test write_to_fp()/save() cases not covered elsewhere in this file
 85 | 
 86 | def test_bad_fp_type():
 87 |     """Raise TypeError if fp is not a file-like object (no .write())"""
 88 |     # Create gTTS and save
 89 |     tts = gTTS(text='test')
 90 |     with pytest.raises(TypeError):
 91 |         tts.write_to_fp(5)
 92 | 
 93 | 
 94 | @pytest.mark.net
 95 | def test_save(tmp_path):
 96 |     """Save .mp3 file successfully"""
 97 |     filename = tmp_path / 'save.mp3'
 98 |     # Create gTTS and save
 99 |     tts = gTTS(text='test')
100 |     tts.save(filename)
101 | 
102 |     # Check if file created is > 2k
103 |     assert filename.stat().st_size > 2000
104 | 
105 | 
106 | @pytest.mark.net
107 | def test_get_bodies():
108 |     """get request bodies list"""
109 |     tts = gTTS(text='test', tld='com', lang='en')
110 |     body = tts.get_bodies()[0]
111 |     assert 'test' in body
112 |     # \"en\" url-encoded
113 |     assert '%5C%22en%5C%22' in body
114 | 
115 | 
116 | def test_msg():
117 |     """Test gTTsError internal exception handling
118 |     Set exception message successfully"""
119 |     error1 = gTTSError('test')
120 |     assert 'test' == error1.msg
121 | 
122 |     error2 = gTTSError()
123 |     assert error2.msg is None
124 | 
125 | 
126 | def test_infer_msg():
127 |     """Infer message sucessfully based on context"""
128 | 
129 |     # Without response:
130 | 
131 |     # Bad TLD
132 |     ttsTLD = Mock(tld='invalid')
133 |     errorTLD = gTTSError(tts=ttsTLD)
134 |     assert errorTLD.msg == "Failed to connect. Probable cause: Host 'https://translate.google.invalid/' is not reachable"
135 | 
136 |     # With response:
137 | 
138 |     # 403
139 |     tts403 = Mock()
140 |     response403 = Mock(status_code=403, reason='aaa')
141 |     error403 = gTTSError(tts=tts403, response=response403)
142 |     assert error403.msg == "403 (aaa) from TTS API. Probable cause: Bad token or upstream API changes"
143 | 
144 |     # 200 (and not lang_check)
145 |     tts200 = Mock(lang='xx', lang_check=False)
146 |     response404 = Mock(status_code=200, reason='bbb')
147 |     error200 = gTTSError(tts=tts200, response=response404)
148 |     assert error200.msg == "200 (bbb) from TTS API. Probable cause: No audio stream in response. Unsupported language 'xx'"
149 | 
150 |     # >= 500
151 |     tts500 = Mock()
152 |     response500 = Mock(status_code=500, reason='ccc')
153 |     error500 = gTTSError(tts=tts500, response=response500)
154 |     assert error500.msg == "500 (ccc) from TTS API. Probable cause: Uptream API error. Try again later."
155 | 
156 |     # Unknown (ex. 100)
157 |     tts100 = Mock()
158 |     response100 = Mock(status_code=100, reason='ddd')
159 |     error100 = gTTSError(tts=tts100, response=response100)
160 |     assert error100.msg == "100 (ddd) from TTS API. Probable cause: Unknown"
161 | 
162 | 
163 | @pytest.mark.net
164 | def test_WebRequest(tmp_path):
165 |     """Test Web Requests"""
166 | 
167 |     text = "Lorem ipsum"
168 | 
169 |     """Raise gTTSError on unsupported language (without language check)"""
170 |     lang = 'xx'
171 |     check = False
172 | 
173 |     with pytest.raises(gTTSError):
174 |         filename = tmp_path / 'xx.txt'
175 |         # Create gTTS
176 |         tts = gTTS(text=text, lang=lang, lang_check=check)
177 |         tts.save(filename)
178 | 
179 | 
180 | if __name__ == '__main__':
181 |     pytest.main(['-x', __file__])
182 | 


--------------------------------------------------------------------------------
/docs/tokenizer.rst:
--------------------------------------------------------------------------------
  1 | .. module:: gtts.tokenizer
  2 | 
  3 | Pre-processing and tokenizing
  4 | =============================
  5 | 
  6 | The :mod:`gtts.tokenizer` module powers the default pre-processing and tokenizing features of ``gTTS`` and provides tools to easily expand them. :class:`gtts.tts.gTTS` takes two arguments ``pre_processor_funcs`` (list of functions) and ``tokenizer_func`` (function). See: `Pre-processing`_, `Tokenizing`_.
  7 | 
  8 | .. contents:: :local:
  9 |    :depth: 2
 10 | 
 11 | Definitions
 12 | -----------
 13 | 
 14 | Pre-processor:
 15 |     Function that takes text and returns text. Its goal is to modify text (for example correcting pronounciation), and/or to prepare text for proper tokenization (for example enuring spacing after certain characters).
 16 | 
 17 | Tokenizer:
 18 |     Function that takes text and returns it split into a list of `tokens` (strings).
 19 |     In the ``gTTS`` context, its goal is to cut the text into smaller segments that do not exceed the maximum character size allowed for each TTS API request, while making the speech sound natural and continuous.
 20 |     It does so by splitting text where speech would naturaly pause (for example on ".") while handling where it should not (for example on "10.5" or "U.S.A."). Such rules are called `tokenizer cases`, which it takes a list of.
 21 | 
 22 | Tokenizer case:
 23 |     Function that defines one of the specific cases used by :class:`gtts.tokenizer.core.Tokenizer`. More specefically, it returns a ``regex`` object that describes what to look for for a particular case. :class:`gtts.tokenizer.core.Tokenizer` then creates its main `regex` pattern by joining all `tokenizer cases` with "|".
 24 | 
 25 | 
 26 | Pre-processing
 27 | --------------
 28 | 
 29 | You can pass a list of any function to :class:`gtts.tts.gTTS`'s ``pre_processor_funcs`` attribute to act as pre-processor (as long as it takes a string and returns a string).
 30 | 
 31 | By default, :class:`gtts.tts.gTTS` takes a list of the following pre-processors, applied in order::
 32 | 
 33 |     [
 34 |         pre_processors.tone_marks,
 35 |         pre_processors.end_of_line,
 36 |         pre_processors.abbreviations,
 37 |         pre_processors.word_sub
 38 |     ]
 39 | 
 40 | .. automodule:: gtts.tokenizer.pre_processors
 41 |    :members:
 42 | 
 43 | Customizing & Examples
 44 | ~~~~~~~~~~~~~~~~~~~~~~
 45 | 
 46 | This module provides two classes to help build pre-processors:
 47 | 
 48 | * :class:`gtts.tokenizer.core.PreProcessorRegex` (for `regex`-based replacing, as would ``re.sub`` use)
 49 | * :class:`gtts.tokenizer.core.PreProcessorSub` (for word-for-word replacements).
 50 | 
 51 | The ``run(text)`` method of those objects returns the processed text.
 52 | 
 53 | Speech corrections (word substitution)
 54 | ______________________________________
 55 | 
 56 | The default substitutions are defined by the :attr:`gtts.tokenizer.symbols.SUB_PAIRS` list. Add a custom one by appending to it:
 57 | 
 58 | ::
 59 | 
 60 |     >>> from gtts.tokenizer import pre_processors
 61 |     >>> import gtts.tokenizer.symbols
 62 |     >>>
 63 |     >>> gtts.tokenizer.symbols.SUB_PAIRS.append(
 64 |     ...     ('sub.', 'submarine')
 65 |     ... )
 66 |     >>> test_text = "Have you seen the Queen's new sub.?"
 67 |     >>> pre_processors.word_sub(test_text)
 68 |     "Have you seen the Queen's new submarine?"
 69 | 
 70 | Abbreviations
 71 | _____________
 72 | 
 73 | The default abbreviations are defined by the :attr:`gtts.tokenizer.symbols.ABBREVIATIONS` list. Add a custom one to it to add a new abbreviation to remove the period from. *Note: the default list already includes an extensive list of English abbreviations that Google Translate will read even without the period.*
 74 | 
 75 | See :mod:`gtts.tokenizer.pre_processors` for more examples.
 76 | 
 77 | Tokenizing
 78 | ----------
 79 | 
 80 | You can pass any function to :class:`gtts.tts.gTTS`'s ``tokenizer_func`` attribute to act as tokenizer (as long as it takes a string and returns a list of strings).
 81 | 
 82 | By default, :class:`gTTS` takes the :class:`gtts.tokenizer.core.Tokenizer`'s :func:`gtts.tokenizer.core.Tokenizer.run()`, initialized with default `tokenizer cases`::
 83 | 
 84 |     Tokenizer([
 85 |         tokenizer_cases.tone_marks,
 86 |         tokenizer_cases.period_comma,
 87 |         tokenizer_cases.other_punctuation
 88 |     ]).run
 89 | 
 90 | The available `tokenizer cases` are as follows:
 91 | 
 92 | .. automodule:: gtts.tokenizer.tokenizer_cases
 93 |    :members:
 94 | 
 95 | Customizing & Examples
 96 | ~~~~~~~~~~~~~~~~~~~~~~
 97 | 
 98 | A `tokenizer case` is a function that returns a compiled `regex` object to be used in a ``re.split()`` context.
 99 | 
100 | :class:`gtts.tokenizer.core.Tokenizer` takes a list of `tokenizer cases` and joins their pattern with "|" in one single pattern.
101 | 
102 | This module provides a class to help build tokenizer cases: :class:`gtts.tokenizer.core.RegexBuilder`. See :class:`gtts.tokenizer.core.RegexBuilder` and :mod:`gtts.tokenizer.tokenizer_cases` for examples.
103 | 
104 | Using a 3rd-party tokenizer
105 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
106 | 
107 | Even though :class:`gtts.tokenizer.core.Tokenizer` works well in this context, there are way more advanced tokenizers and tokenzing techniques. As long as you can restrict the lenght of output tokens, you can use any tokenizer you'd like, such as the ones in `NLTK <http://www.nltk.org>`_.
108 | 
109 | Minimizing
110 | ----------
111 | 
112 | The Google Translate text-to-speech API accepts a maximum of **100 characters**.
113 | 
114 | If after tokenization any of the tokens is larger than 100 characters, it will be split in two:
115 | 
116 | * On the last space character that is closest to, but before the 100th character;
117 | * Between the 100th and 101st characters if there's no space.
118 | 
119 | gtts.tokenizer module reference (:mod:`gtts.tokenizer`)
120 | -------------------------------------------------------
121 | 
122 | .. autoclass:: gtts.tokenizer.core.RegexBuilder
123 |    :members:
124 |    :undoc-members:
125 | 
126 | .. autoclass:: gtts.tokenizer.core.PreProcessorRegex
127 |    :members:
128 |    :undoc-members:
129 | 
130 | .. autoclass:: gtts.tokenizer.core.PreProcessorSub
131 |    :members:
132 |    :undoc-members:
133 | 
134 | .. autoclass:: gtts.tokenizer.core.Tokenizer
135 |    :members:
136 |    :undoc-members:
137 | 
138 | .. autoattribute:: gtts.tokenizer.symbols.ABBREVIATIONS
139 | .. autoattribute:: gtts.tokenizer.symbols.SUB_PAIRS
140 | .. autoattribute:: gtts.tokenizer.symbols.ALL_PUNC
141 | .. autoattribute:: gtts.tokenizer.symbols.TONE_MARKS
142 | 


--------------------------------------------------------------------------------
/gtts/cli.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from gtts import gTTS, gTTSError, __version__
  3 | from gtts.lang import tts_langs
  4 | import click
  5 | import logging
  6 | import logging.config
  7 | 
  8 | # Click settings
  9 | CONTEXT_SETTINGS = {
 10 |     'help_option_names': ['-h', '--help']
 11 | }
 12 | 
 13 | # Logger settings
 14 | LOGGER_SETTINGS = {
 15 |     'version': 1,
 16 |     'formatters': {
 17 |         'default': {
 18 |             'format': '%(name)s - %(levelname)s - %(message)s'
 19 |         }
 20 |     },
 21 |     'handlers': {
 22 |         'console': {
 23 |             'class': 'logging.StreamHandler',
 24 |             'formatter': 'default'
 25 |         }
 26 |     },
 27 |     'loggers': {
 28 |         'gtts': {
 29 |             'handlers': ['console'],
 30 |             'level': 'WARNING'
 31 |         }
 32 |     }
 33 | }
 34 | 
 35 | # Logger
 36 | logging.config.dictConfig(LOGGER_SETTINGS)
 37 | log = logging.getLogger('gtts')
 38 | 
 39 | 
 40 | def sys_encoding():
 41 |     """Charset to use for --file <path>|- (stdin)"""
 42 |     return 'utf8'
 43 | 
 44 | 
 45 | def validate_text(ctx, param, text):
 46 |     """Validation callback for the <text> argument.
 47 |     Ensures <text> (arg) and <file> (opt) are mutually exclusive
 48 |     """
 49 |     if not text and 'file' not in ctx.params:
 50 |         # No <text> and no <file>
 51 |         raise click.BadParameter(
 52 |             "<text> or -f/--file <file> required")
 53 |     if text and 'file' in ctx.params:
 54 |         # Both <text> and <file>
 55 |         raise click.BadParameter(
 56 |             "<text> and -f/--file <file> can't be used together")
 57 |     return text
 58 | 
 59 | 
 60 | def validate_lang(ctx, param, lang):
 61 |     """Validation callback for the <lang> option.
 62 |     Ensures <lang> is a supported language unless the <nocheck> flag is set
 63 |     """
 64 |     if ctx.params['nocheck']:
 65 |         return lang
 66 | 
 67 |     try:
 68 |         if lang not in tts_langs():
 69 |             raise click.UsageError(
 70 |                 "'%s' not in list of supported languages.\n"
 71 |                 "Use --all to list languages or "
 72 |                 "add --nocheck to disable language check." % lang)
 73 |         else:
 74 |             # The language is valid.
 75 |             # No need to let gTTS re-validate.
 76 |             ctx.params['nocheck'] = True
 77 |     except RuntimeError as e:
 78 |         # Only case where the <nocheck> flag can be False
 79 |         # Non-fatal. gTTS will try to re-validate.
 80 |         log.debug(str(e), exc_info=True)
 81 | 
 82 |     return lang
 83 | 
 84 | 
 85 | def print_languages(ctx, param, value):
 86 |     """Callback for <all> flag.
 87 |     Prints formatted sorted list of supported languages and exits
 88 |     """
 89 |     if not value or ctx.resilient_parsing:
 90 |         return
 91 | 
 92 |     try:
 93 |         langs = tts_langs()
 94 |         langs_str_list = sorted("{}: {}".format(k, langs[k]) for k in langs)
 95 |         click.echo('  ' + '\n  '.join(langs_str_list))
 96 |     except RuntimeError as e:  # pragma: no cover
 97 |         log.debug(str(e), exc_info=True)
 98 |         raise click.ClickException("Couldn't fetch language list.")
 99 |     ctx.exit()
100 | 
101 | 
102 | def set_debug(ctx, param, debug):
103 |     """Callback for <debug> flag.
104 |     Sets logger level to DEBUG
105 |     """
106 |     if debug:
107 |         log.setLevel(logging.DEBUG)
108 |     return
109 | 
110 | 
111 | @click.command(context_settings=CONTEXT_SETTINGS)
112 | @click.argument('text', metavar='<text>', required=False, callback=validate_text)
113 | @click.option(
114 |     '-f',
115 |     '--file',
116 |     metavar='<file>',
117 |     # For py2.7/unicode. If encoding not None Click uses io.open
118 |     type=click.File(encoding=sys_encoding()),
119 |     help="Read from <file> instead of <text>.")
120 | @click.option(
121 |     '-o',
122 |     '--output',
123 |     metavar='<file>',
124 |     type=click.File(mode='wb'),
125 |     help="Write to <file> instead of stdout.")
126 | @click.option(
127 |     '-s',
128 |     '--slow',
129 |     default=False,
130 |     is_flag=True,
131 |     help="Read more slowly.")
132 | @click.option(
133 |     '-l',
134 |     '--lang',
135 |     metavar='<lang>',
136 |     default='en',
137 |     show_default=True,
138 |     callback=validate_lang,
139 |     help="IETF language tag. Language to speak in. List documented tags with --all.")
140 | @click.option(
141 |     '-t',
142 |     '--tld',
143 |     metavar='<tld>',
144 |     default='com',
145 |     show_default=True,
146 |     is_eager=True,  # Prioritize <tld> to ensure it gets set before <lang>
147 |     help="Top-level domain for the Google host, i.e https://translate.google.<tld>")
148 | @click.option(
149 |     '--nocheck',
150 |     default=False,
151 |     is_flag=True,
152 |     is_eager=True,  # Prioritize <nocheck> to ensure it gets set before <lang>
153 |     help="Disable strict IETF language tag checking. Allow undocumented tags.")
154 | @click.option(
155 |     '--all',
156 |     default=False,
157 |     is_flag=True,
158 |     is_eager=True,
159 |     expose_value=False,
160 |     callback=print_languages,
161 |     help="Print all documented available IETF language tags and exit.")
162 | @click.option(
163 |     '--debug',
164 |     default=False,
165 |     is_flag=True,
166 |     is_eager=True,  # Prioritize <debug> to see debug logs of callbacks
167 |     expose_value=False,
168 |     callback=set_debug,
169 |     help="Show debug information.")
170 | @click.version_option(version=__version__)
171 | def tts_cli(text, file, output, slow, tld, lang, nocheck):
172 |     """ Read <text> to mp3 format using Google Translate's Text-to-Speech API
173 |     (set <text> or --file <file> to - for standard input)
174 |     """
175 | 
176 |     # stdin for <text>
177 |     if text == '-':
178 |         text = click.get_text_stream('stdin').read()
179 | 
180 |     # stdout (when no <output>)
181 |     if not output:
182 |         output = click.get_binary_stream('stdout')
183 | 
184 |     # <file> input (stdin on '-' is handled by click.File)
185 |     if file:
186 |         try:
187 |             text = file.read()
188 |         except UnicodeDecodeError as e:  # pragma: no cover
189 |             log.debug(str(e), exc_info=True)
190 |             raise click.FileError(
191 |                 file.name,
192 |                 "<file> must be encoded using '%s'." %
193 |                 sys_encoding())
194 | 
195 |     # TTS
196 |     try:
197 |         tts = gTTS(
198 |             text=text,
199 |             lang=lang,
200 |             slow=slow,
201 |             tld=tld,
202 |             lang_check=not nocheck)
203 |         tts.write_to_fp(output)
204 |     except (ValueError, AssertionError) as e:
205 |         raise click.UsageError(str(e))
206 |     except gTTSError as e:
207 |         raise click.ClickException(str(e))
208 | 


--------------------------------------------------------------------------------
/gtts/tests/test_cli.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import pytest
  3 | import re
  4 | import os
  5 | from click.testing import CliRunner
  6 | from gtts.cli import tts_cli
  7 | 
  8 | # Need to look into gTTS' log output to test proper instantiation
  9 | # - Use testfixtures.LogCapture() b/c TestCase.assertLogs() needs py3.4+
 10 | # - Clear 'gtts' logger handlers (set in gtts.cli) to reduce test noise
 11 | import logging
 12 | from testfixtures import LogCapture
 13 | logger = logging.getLogger('gtts')
 14 | logger.handlers = []
 15 | 
 16 | 
 17 | """Test options and arguments"""
 18 | 
 19 | 
 20 | def runner(args, input=None):
 21 |     return CliRunner().invoke(tts_cli, args, input)
 22 | 
 23 | 
 24 | def runner_debug(args, input=None):
 25 |     return CliRunner().invoke(tts_cli, args + ['--debug'], input)
 26 | 
 27 | 
 28 | # <text> tests
 29 | def test_text_no_text_or_file():
 30 |     """One of <test> (arg) and <file> <opt> should be set"""
 31 |     result = runner_debug([])
 32 | 
 33 |     assert "<file> required" in result.output
 34 |     assert result.exit_code != 0
 35 | 
 36 | 
 37 | def test_text_text_and_file(tmp_path):
 38 |     """<test> (arg) and <file> <opt> should not be set together"""
 39 |     filename = tmp_path / 'test_and_file.txt'
 40 |     filename.touch()
 41 | 
 42 |     result = runner_debug(['--file', str(filename), 'test'])
 43 | 
 44 |     assert "<file> can't be used together" in result.output
 45 |     assert result.exit_code != 0
 46 | 
 47 | 
 48 | def test_text_empty(tmp_path):
 49 |     """Exit on no text to speak (via <file>)"""
 50 |     filename = tmp_path / 'text_empty.txt'
 51 |     filename.touch()
 52 | 
 53 |     result = runner_debug(['--file', str(filename)])
 54 | 
 55 |     assert "No text to speak" in result.output
 56 |     assert result.exit_code != 0
 57 | 
 58 | 
 59 | # <file> tests
 60 | def test_file_not_exists():
 61 |     """<file> should exist"""
 62 |     result = runner_debug(['--file', 'notexist.txt', 'test'])
 63 | 
 64 |     assert "No such file or directory" in result.output
 65 |     assert result.exit_code != 0
 66 | 
 67 | 
 68 | # <all> tests
 69 | @pytest.mark.net
 70 | def test_all():
 71 |     """Option <all> should return a list of languages"""
 72 |     result = runner(['--all'])
 73 | 
 74 |     # One or more of "  xy: name" (\n optional to match the last)
 75 |     # Ex. "<start>  xx: xxxxx\n  xx-yy: xxxxx\n  xx: xxxxx<end>"
 76 | 
 77 |     assert re.match(r"^(?:\s{2}(\w{2}|\w{2}-\w{2}): .+\n?)+$", result.output)
 78 |     assert result.exit_code == 0
 79 | 
 80 | 
 81 | # <lang> tests
 82 | @pytest.mark.net
 83 | def test_lang_not_valid():
 84 |     """Invalid <lang> should display an error"""
 85 |     result = runner(['--lang', 'xx', 'test'])
 86 | 
 87 |     assert "xx' not in list of supported languages" in result.output
 88 |     assert result.exit_code != 0
 89 | 
 90 | 
 91 | @pytest.mark.net
 92 | def test_lang_nocheck():
 93 |     """Invalid <lang> (with <nocheck>) should display an error message from gtts"""
 94 |     with LogCapture() as lc:
 95 |         result = runner_debug(['--lang', 'xx', '--nocheck', 'test'])
 96 | 
 97 |         log = str(lc)
 98 | 
 99 |     assert 'lang: xx' in log
100 |     assert 'lang_check: False' in log
101 |     assert "Unsupported language 'xx'" in result.output
102 |     assert result.exit_code != 0
103 | 
104 | # Param set tests
105 | @pytest.mark.net
106 | def test_params_set():
107 |     """Options should set gTTS instance arguments (read from debug log)"""
108 |     with LogCapture() as lc:
109 |         result = runner_debug(['--lang', 'fr', '--tld', 'es', '--slow', '--nocheck', 'test'])
110 | 
111 |         log = str(lc)
112 | 
113 |     assert 'lang: fr' in log
114 |     assert 'tld: es' in log
115 |     assert 'lang_check: False' in log
116 |     assert 'slow: True' in log
117 |     assert 'text: test' in log
118 |     assert result.exit_code == 0
119 | 
120 | 
121 | # Test all input methods
122 | pwd = os.path.dirname(__file__)
123 | 
124 | # Text for stdin ('-' for <text> or <file>)
125 | textstdin = """stdin
126 | test
127 | 123"""
128 | 
129 | # Text for stdin ('-' for <text> or <file>) (Unicode)
130 | textstdin_unicode = u"""你吃饭了吗？
131 | 你最喜欢哪部电影？
132 | 我饿了，我要去做饭了。"""
133 | 
134 | # Text for <text> and <file>
135 | text = """Can you make pink a little more pinkish can you make pink a little more pinkish, nor can you make the font bigger?
136 | How much will it cost the website doesn't have the theme i was going for."""
137 | 
138 | textfile_ascii = os.path.join(pwd, 'input_files', 'test_cli_test_ascii.txt')
139 | 
140 | # Text for <text> and <file> (Unicode)
141 | text_unicode = u"""这是一个三岁的小孩
142 | 在讲述她从一系列照片里看到的东西。
143 | 对这个世界， 她也许还有很多要学的东西，
144 | 但在一个重要的任务上， 她已经是专家了：
145 | 去理解她所看到的东西。"""
146 | 
147 | textfile_utf8 = os.path.join(pwd, 'input_files', 'test_cli_test_utf8.txt')
148 | 
149 | """
150 | Method that mimics's LogCapture's __str__ method to make
151 | the string in the comprehension a unicode literal for P2.7
152 | https://github.com/Simplistix/testfixtures/blob/32c87902cb111b7ede5a6abca9b597db551c88ef/testfixtures/logcapture.py#L149
153 | """
154 | 
155 | 
156 | def logcapture_str(lc):
157 |     if not lc.records:
158 |         return 'No logging captured'
159 | 
160 |     return '\n'.join([u"%s %s\n  %s" % r for r in lc.actual()])
161 | 
162 | 
163 | @pytest.mark.net
164 | def test_stdin_text():
165 |     with LogCapture() as lc:
166 |         result = runner_debug(['-'], textstdin)
167 |         log = logcapture_str(lc)
168 | 
169 |     assert 'text: %s' % textstdin in log
170 |     assert result.exit_code == 0
171 | 
172 | 
173 | @pytest.mark.net
174 | def test_stdin_text_unicode():
175 |     with LogCapture() as lc:
176 |         result = runner_debug(['-'], textstdin_unicode)
177 |         log = logcapture_str(lc)
178 | 
179 |     assert u'text: %s' % textstdin_unicode in log
180 |     assert result.exit_code == 0
181 | 
182 | 
183 | @pytest.mark.net
184 | def test_stdin_file():
185 |     with LogCapture() as lc:
186 |         result = runner_debug(['--file', '-'], textstdin)
187 |         log = logcapture_str(lc)
188 | 
189 |     assert 'text: %s' % textstdin in log
190 |     assert result.exit_code == 0
191 | 
192 | 
193 | @pytest.mark.net
194 | def test_stdin_file_unicode():
195 |     with LogCapture() as lc:
196 |         result = runner_debug(['--file', '-'], textstdin_unicode)
197 |         log = logcapture_str(lc)
198 | 
199 |     assert 'text: %s' % textstdin_unicode in log
200 |     assert result.exit_code == 0
201 | 
202 | 
203 | @pytest.mark.net
204 | def test_text():
205 |     with LogCapture() as lc:
206 |         result = runner_debug([text])
207 |         log = logcapture_str(lc)
208 | 
209 |     assert "text: %s" % text in log
210 |     assert result.exit_code == 0
211 | 
212 | 
213 | @pytest.mark.net
214 | def test_text_unicode():
215 |     with LogCapture() as lc:
216 |         result = runner_debug([text_unicode])
217 |         log = logcapture_str(lc)
218 | 
219 |     assert "text: %s" % text_unicode in log
220 |     assert result.exit_code == 0
221 | 
222 | 
223 | @pytest.mark.net
224 | def test_file_ascii():
225 |     with LogCapture() as lc:
226 |         result = runner_debug(['--file', textfile_ascii])
227 |         log = logcapture_str(lc)
228 | 
229 |     assert "text: %s" % text in log
230 |     assert result.exit_code == 0
231 | 
232 | 
233 | @pytest.mark.net
234 | def test_file_utf8():
235 |     with LogCapture() as lc:
236 |         result = runner_debug(['--file', textfile_utf8])
237 |         log = logcapture_str(lc)
238 | 
239 |     assert "text: %s" % text_unicode in log
240 |     assert result.exit_code == 0
241 | 
242 | 
243 | @pytest.mark.net
244 | def test_stdout():
245 |     result = runner(['test'])
246 | 
247 |     # The MP3 encoding (LAME 3.99.5) used to leave a signature in the raw output
248 |     # This no longer appears to be the case
249 |     assert result.exit_code == 0
250 | 
251 | 
252 | @pytest.mark.net
253 | def test_file(tmp_path):
254 |     filename = tmp_path / 'out.mp3'
255 | 
256 |     result = runner(['test', '--output', str(filename)])
257 | 
258 |     # Check if files created is > 2k
259 |     assert filename.stat().st_size > 2000
260 |     assert result.exit_code == 0
261 | 
262 | 
263 | if __name__ == '__main__':
264 |     pytest.main(['-x', __file__])
265 | 


--------------------------------------------------------------------------------
/gtts/tokenizer/core.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import re
  3 | 
  4 | 
  5 | class RegexBuilder():
  6 |     r"""Builds regex using arguments passed into a pattern template.
  7 | 
  8 |     Builds a regex object for which the pattern is made from an argument
  9 |     passed into a template. If more than one argument is passed (iterable),
 10 |     each pattern is joined by "|" (regex alternation 'or') to create a
 11 |     single pattern.
 12 | 
 13 |     Args:
 14 |         pattern_args (iteratable): String element(s) to be each passed to
 15 |             ``pattern_func`` to create a regex pattern. Each element is
 16 |             ``re.escape``'d before being passed.
 17 |         pattern_func (callable): A 'template' function that should take a
 18 |             string and return a string. It should take an element of
 19 |             ``pattern_args`` and return a valid regex pattern group string.
 20 |         flags: ``re`` flag(s) to compile with the regex.
 21 | 
 22 |     Example:
 23 |         To create a simple regex that matches on the characters "a", "b",
 24 |         or "c", followed by a period::
 25 | 
 26 |             >>> rb = RegexBuilder('abc', lambda x: "{}\.".format(x))
 27 | 
 28 |         Looking at ``rb.regex`` we get the following compiled regex::
 29 | 
 30 |             >>> print(rb.regex)
 31 |             'a\.|b\.|c\.'
 32 | 
 33 |         The above is fairly simple, but this class can help in writing more
 34 |         complex repetitive regex, making them more readable and easier to
 35 |         create by using existing data structures.
 36 | 
 37 |     Example:
 38 |         To match the character following the words "lorem", "ipsum", "meili"
 39 |         or "koda"::
 40 | 
 41 |             >>> words = ['lorem', 'ipsum', 'meili', 'koda']
 42 |             >>> rb = RegexBuilder(words, lambda x: "(?<={}).".format(x))
 43 | 
 44 |         Looking at ``rb.regex`` we get the following compiled regex::
 45 | 
 46 |             >>> print(rb.regex)
 47 |             '(?<=lorem).|(?<=ipsum).|(?<=meili).|(?<=koda).'
 48 | 
 49 |     """
 50 | 
 51 |     def __init__(self, pattern_args, pattern_func, flags=0):
 52 |         self.pattern_args = pattern_args
 53 |         self.pattern_func = pattern_func
 54 |         self.flags = flags
 55 | 
 56 |         # Compile
 57 |         self.regex = self._compile()
 58 | 
 59 |     def _compile(self):
 60 |         alts = []
 61 |         for arg in self.pattern_args:
 62 |             arg = re.escape(arg)
 63 |             alt = self.pattern_func(arg)
 64 |             alts.append(alt)
 65 | 
 66 |         pattern = '|'.join(alts)
 67 |         return re.compile(pattern, self.flags)
 68 | 
 69 |     def __repr__(self):  # pragma: no cover
 70 |         return str(self.regex)
 71 | 
 72 | 
 73 | class PreProcessorRegex():
 74 |     r"""Regex-based substitution text pre-processor.
 75 | 
 76 |     Runs a series of regex substitutions (``re.sub``) from each ``regex`` of a
 77 |     :class:`gtts.tokenizer.core.RegexBuilder` with an extra ``repl``
 78 |     replacement parameter.
 79 | 
 80 |     Args:
 81 |         search_args (iteratable): String element(s) to be each passed to
 82 |             ``search_func`` to create a regex pattern. Each element is
 83 |             ``re.escape``'d before being passed.
 84 |         search_func (callable): A 'template' function that should take a
 85 |             string and return a string. It should take an element of
 86 |             ``search_args`` and return a valid regex search pattern string.
 87 |         repl (string): The common replacement passed to the ``sub`` method for
 88 |             each ``regex``. Can be a raw string (the case of a regex
 89 |             backreference, for example)
 90 |         flags: ``re`` flag(s) to compile with each `regex`.
 91 | 
 92 |     Example:
 93 |         Add "!" after the words "lorem" or "ipsum", while ignoring case::
 94 | 
 95 |             >>> import re
 96 |             >>> words = ['lorem', 'ipsum']
 97 |             >>> pp = PreProcessorRegex(words,
 98 |             ...                        lambda x: "({})".format(x), r'\\1!',
 99 |             ...                        re.IGNORECASE)
100 | 
101 |         In this case, the regex is a group and the replacement uses its
102 |         backreference ``\\1`` (as a raw string). Looking at ``pp`` we get the
103 |         following list of search/replacement pairs::
104 | 
105 |             >>> print(pp)
106 |             (re.compile('(lorem)', re.IGNORECASE), repl='\1!'),
107 |             (re.compile('(ipsum)', re.IGNORECASE), repl='\1!')
108 | 
109 |         It can then be run on any string of text::
110 | 
111 |             >>> pp.run("LOREM ipSuM")
112 |             "LOREM! ipSuM!"
113 | 
114 |     See :mod:`gtts.tokenizer.pre_processors` for more examples.
115 | 
116 |     """
117 | 
118 |     def __init__(self, search_args, search_func, repl, flags=0):
119 |         self.repl = repl
120 | 
121 |         # Create regex list
122 |         self.regexes = []
123 |         for arg in search_args:
124 |             rb = RegexBuilder([arg], search_func, flags)
125 |             self.regexes.append(rb.regex)
126 | 
127 |     def run(self, text):
128 |         """Run each regex substitution on ``text``.
129 | 
130 |         Args:
131 |             text (string): the input text.
132 | 
133 |         Returns:
134 |             string: text after all substitutions have been sequentially
135 |             applied.
136 | 
137 |         """
138 |         for regex in self.regexes:
139 |             text = regex.sub(self.repl, text)
140 |         return text
141 | 
142 |     def __repr__(self):  # pragma: no cover
143 |         subs_strs = []
144 |         for r in self.regexes:
145 |             subs_strs.append("({}, repl='{}')".format(r, self.repl))
146 |         return ", ".join(subs_strs)
147 | 
148 | 
149 | class PreProcessorSub():
150 |     r"""Simple substitution text preprocessor.
151 | 
152 |     Performs string-for-string substitution from list a find/replace pairs.
153 |     It abstracts :class:`gtts.tokenizer.core.PreProcessorRegex` with a default
154 |     simple substitution regex.
155 | 
156 |     Args:
157 |         sub_pairs (list): A list of tuples of the style
158 |             ``(<search str>, <replace str>)``
159 |         ignore_case (bool): Ignore case during search. Defaults to ``True``.
160 | 
161 |     Example:
162 |         Replace all occurences of "Mac" to "PC" and "Firefox" to "Chrome"::
163 | 
164 |             >>> sub_pairs = [('Mac', 'PC'), ('Firefox', 'Chrome')]
165 |             >>> pp = PreProcessorSub(sub_pairs)
166 | 
167 |         Looking at the ``pp``, we get the following list of
168 |         search (regex)/replacement pairs::
169 | 
170 |             >>> print(pp)
171 |             (re.compile('Mac', re.IGNORECASE), repl='PC'),
172 |             (re.compile('Firefox', re.IGNORECASE), repl='Chrome')
173 | 
174 |         It can then be run on any string of text::
175 | 
176 |             >>> pp.run("I use firefox on my mac")
177 |             "I use Chrome on my PC"
178 | 
179 |     See :mod:`gtts.tokenizer.pre_processors` for more examples.
180 | 
181 |     """
182 | 
183 |     def __init__(self, sub_pairs, ignore_case=True):
184 |         def search_func(x):
185 |             return u"{}".format(x)
186 | 
187 |         flags = re.I if ignore_case else 0
188 | 
189 |         # Create pre-processor list
190 |         self.pre_processors = []
191 |         for sub_pair in sub_pairs:
192 |             pattern, repl = sub_pair
193 |             pp = PreProcessorRegex([pattern], search_func, repl, flags)
194 |             self.pre_processors.append(pp)
195 | 
196 |     def run(self, text):
197 |         """Run each substitution on ``text``.
198 | 
199 |         Args:
200 |             text (string): the input text.
201 | 
202 |         Returns:
203 |             string: text after all substitutions have been sequentially
204 |             applied.
205 | 
206 |         """
207 |         for pp in self.pre_processors:
208 |             text = pp.run(text)
209 |         return text
210 | 
211 |     def __repr__(self):  # pragma: no cover
212 |         return ", ".join([str(pp) for pp in self.pre_processors])
213 | 
214 | 
215 | class Tokenizer():
216 |     r"""An extensible but simple generic rule-based tokenizer.
217 | 
218 |     A generic and simple string tokenizer that takes a list of functions
219 |     (called `tokenizer cases`) returning ``regex`` objects and joins them by
220 |     "|" (regex alternation 'or') to create a single regex to use with the
221 |     standard ``regex.split()`` function.
222 | 
223 |     ``regex_funcs`` is a list of any function that can return a ``regex``
224 |     (from ``re.compile()``) object, such as a
225 |     :class:`gtts.tokenizer.core.RegexBuilder` instance (and its ``regex``
226 |     attribute).
227 | 
228 |     See the :mod:`gtts.tokenizer.tokenizer_cases` module for examples.
229 | 
230 |     Args:
231 |         regex_funcs (list): List of compiled ``regex`` objects. Each
232 |             functions's pattern will be joined into a single pattern and
233 |             compiled.
234 |         flags: ``re`` flag(s) to compile with the final regex. Defaults to
235 |             ``re.IGNORECASE``
236 | 
237 |     Note:
238 |         When the ``regex`` objects obtained from ``regex_funcs`` are joined,
239 |         their individual ``re`` flags are ignored in favour of ``flags``.
240 | 
241 |     Raises:
242 |         TypeError: When an element of ``regex_funcs`` is not a function, or
243 |             a function that does not return a compiled ``regex`` object.
244 | 
245 |     Warning:
246 |         Joined ``regex`` patterns can easily interfere with one another in
247 |         unexpected ways. It is recommanded that each tokenizer case operate
248 |         on distinct or non-overlapping chracters/sets of characters
249 |         (For example, a tokenizer case for the period (".") should also
250 |         handle not matching/cutting on decimals, instead of making that
251 |         a seperate tokenizer case).
252 | 
253 |     Example:
254 |         A tokenizer with a two simple case (*Note: these are bad cases to
255 |         tokenize on, this is simply a usage example*)::
256 | 
257 |             >>> import re, RegexBuilder
258 |             >>>
259 |             >>> def case1():
260 |             ...     return re.compile("\,")
261 |             >>>
262 |             >>> def case2():
263 |             ...     return RegexBuilder('abc', lambda x: "{}\.".format(x)).regex
264 |             >>>
265 |             >>> t = Tokenizer([case1, case2])
266 | 
267 |         Looking at ``case1().pattern``, we get::
268 | 
269 |             >>> print(case1().pattern)
270 |             '\\,'
271 | 
272 |         Looking at ``case2().pattern``, we get::
273 | 
274 |             >>> print(case2().pattern)
275 |             'a\\.|b\\.|c\\.'
276 | 
277 |         Finally, looking at ``t``, we get them combined::
278 | 
279 |             >>> print(t)
280 |             're.compile('\\,|a\\.|b\\.|c\\.', re.IGNORECASE)
281 |              from: [<function case1 at 0x10bbcdd08>, <function case2 at 0x10b5c5e18>]'
282 | 
283 |         It can then be run on any string of text::
284 | 
285 |             >>> t.run("Hello, my name is Linda a. Call me Lin, b. I'm your friend")
286 |             ['Hello', ' my name is Linda ', ' Call me Lin', ' ', " I'm your friend"]
287 | 
288 |     """
289 | 
290 |     def __init__(self, regex_funcs, flags=re.IGNORECASE):
291 |         self.regex_funcs = regex_funcs
292 |         self.flags = flags
293 | 
294 |         try:
295 |             # Combine
296 |             self.total_regex = self._combine_regex()
297 |         except (TypeError, AttributeError) as e:  # pragma: no cover
298 |             raise TypeError(
299 |                 "Tokenizer() expects a list of functions returning "
300 |                 "regular expression objects (i.e. re.compile). " + str(e))
301 | 
302 |     def _combine_regex(self):
303 |         alts = []
304 |         for func in self.regex_funcs:
305 |             alts.append(func())
306 | 
307 |         pattern = '|'.join(alt.pattern for alt in alts)
308 |         return re.compile(pattern, self.flags)
309 | 
310 |     def run(self, text):
311 |         """Tokenize `text`.
312 | 
313 |         Args:
314 |             text (string): the input text to tokenize.
315 | 
316 |         Returns:
317 |             list: A list of strings (token) split according to the tokenizer cases.
318 | 
319 |         """
320 |         return self.total_regex.split(text)
321 | 
322 |     def __repr__(self):  # pragma: no cover
323 |         return str(self.total_regex) + " from: " + str(self.regex_funcs)
324 | 


--------------------------------------------------------------------------------
/gtts/tts.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from gtts.tokenizer import pre_processors, Tokenizer, tokenizer_cases
  3 | from gtts.utils import _minimize, _len, _clean_tokens, _translate_url
  4 | from gtts.lang import tts_langs, _fallback_deprecated_lang
  5 | 
  6 | from six.moves import urllib
  7 | try:
  8 |     from urllib.parse import quote
  9 |     import urllib3
 10 | except ImportError:
 11 |     from urllib import quote
 12 |     import urllib2
 13 | import requests
 14 | import logging
 15 | import json
 16 | import re
 17 | import base64
 18 | 
 19 | __all__ = ['gTTS', 'gTTSError']
 20 | 
 21 | # Logger
 22 | log = logging.getLogger(__name__)
 23 | log.addHandler(logging.NullHandler())
 24 | 
 25 | 
 26 | class Speed:
 27 |     """Read Speed
 28 | 
 29 |     The Google TTS Translate API supports two speeds:
 30 |         Slow: True
 31 |         Normal: None
 32 |     """
 33 |     SLOW = True
 34 |     NORMAL = None
 35 | 
 36 | 
 37 | class gTTS:
 38 |     """gTTS -- Google Text-to-Speech.
 39 | 
 40 |     An interface to Google Translate's Text-to-Speech API.
 41 | 
 42 |     Args:
 43 |         text (string): The text to be read.
 44 |         tld (string): Top-level domain for the Google Translate host,
 45 |             i.e `https://translate.google.<tld>`. Different Google domains
 46 |             can produce different localized 'accents' for a given
 47 |             language. This is also useful when ``google.com`` might be blocked
 48 |             within a network but a local or different Google host
 49 |             (e.g. ``google.cn``) is not. Default is ``com``.
 50 |         lang (string, optional): The language (IETF language tag) to
 51 |             read the text in. Default is ``en``.
 52 |         slow (bool, optional): Reads text more slowly. Defaults to ``False``.
 53 |         lang_check (bool, optional): Strictly enforce an existing ``lang``,
 54 |             to catch a language error early. If set to ``True``,
 55 |             a ``ValueError`` is raised if ``lang`` doesn't exist.
 56 |             Setting ``lang_check`` to ``False`` skips Web requests
 57 |             (to validate language) and therefore speeds up instanciation.
 58 |             Default is ``True``.
 59 |         pre_processor_funcs (list): A list of zero or more functions that are
 60 |             called to transform (pre-process) text before tokenizing. Those
 61 |             functions must take a string and return a string. Defaults to::
 62 | 
 63 |                 [
 64 |                     pre_processors.tone_marks,
 65 |                     pre_processors.end_of_line,
 66 |                     pre_processors.abbreviations,
 67 |                     pre_processors.word_sub
 68 |                 ]
 69 | 
 70 |         tokenizer_func (callable): A function that takes in a string and
 71 |             returns a list of string (tokens). Defaults to::
 72 | 
 73 |                 Tokenizer([
 74 |                     tokenizer_cases.tone_marks,
 75 |                     tokenizer_cases.period_comma,
 76 |                     tokenizer_cases.colon,
 77 |                     tokenizer_cases.other_punctuation
 78 |                 ]).run
 79 | 
 80 |     See Also:
 81 |         :doc:`Pre-processing and tokenizing <tokenizer>`
 82 | 
 83 |     Raises:
 84 |         AssertionError: When ``text`` is ``None`` or empty; when there's nothing
 85 |             left to speak after pre-precessing, tokenizing and cleaning.
 86 |         ValueError: When ``lang_check`` is ``True`` and ``lang`` is not supported.
 87 |         RuntimeError: When ``lang_check`` is ``True`` but there's an error loading
 88 |             the languages dictionary.
 89 | 
 90 |     """
 91 | 
 92 |     GOOGLE_TTS_MAX_CHARS = 100  # Max characters the Google TTS API takes at a time
 93 |     GOOGLE_TTS_HEADERS = {
 94 |         "Referer": "http://translate.google.com/",
 95 |         "User-Agent":
 96 |             "Mozilla/5.0 (Windows NT 10.0; WOW64) "
 97 |             "AppleWebKit/537.36 (KHTML, like Gecko) "
 98 |             "Chrome/47.0.2526.106 Safari/537.36",
 99 |         "Content-Type": "application/x-www-form-urlencoded;charset=utf-8"
100 |     }
101 |     GOOGLE_TTS_RPC = "jQ1olc"
102 | 
103 |     def __init__(
104 |             self,
105 |             text,
106 |             tld='com',
107 |             lang='en',
108 |             slow=False,
109 |             lang_check=True,
110 |             pre_processor_funcs=[
111 |                 pre_processors.tone_marks,
112 |                 pre_processors.end_of_line,
113 |                 pre_processors.abbreviations,
114 |                 pre_processors.word_sub
115 |             ],
116 |             tokenizer_func=Tokenizer([
117 |                 tokenizer_cases.tone_marks,
118 |                 tokenizer_cases.period_comma,
119 |                 tokenizer_cases.colon,
120 |                 tokenizer_cases.other_punctuation
121 |             ]).run
122 |     ):
123 | 
124 |         # Debug
125 |         for k, v in dict(locals()).items():
126 |             if k == 'self':
127 |                 continue
128 |             log.debug("%s: %s", k, v)
129 | 
130 |         # Text
131 |         assert text, 'No text to speak'
132 |         self.text = text
133 | 
134 |         # Translate URL top-level domain
135 |         self.tld = tld
136 | 
137 |         # Language
138 |         self.lang_check = lang_check
139 |         self.lang = lang
140 | 
141 |         if self.lang_check:
142 |             # Fallback lang in case it is deprecated
143 |             self.lang = _fallback_deprecated_lang(lang)
144 | 
145 |             try:
146 |                 langs = tts_langs()
147 |                 if self.lang not in langs:
148 |                    raise ValueError("Language not supported: %s" % lang)
149 |             except RuntimeError as e:
150 |                 log.debug(str(e), exc_info=True)
151 |                 log.warning(str(e))
152 | 
153 |         # Read speed
154 |         if slow:
155 |             self.speed = Speed.SLOW
156 |         else:
157 |             self.speed = Speed.NORMAL
158 | 
159 |         # Pre-processors and tokenizer
160 |         self.pre_processor_funcs = pre_processor_funcs
161 |         self.tokenizer_func = tokenizer_func
162 | 
163 |     def _tokenize(self, text):
164 |         # Pre-clean
165 |         text = text.strip()
166 | 
167 |         # Apply pre-processors
168 |         for pp in self.pre_processor_funcs:
169 |             log.debug("pre-processing: %s", pp)
170 |             text = pp(text)
171 | 
172 |         if _len(text) <= self.GOOGLE_TTS_MAX_CHARS:
173 |             return _clean_tokens([text])
174 | 
175 |         # Tokenize
176 |         log.debug("tokenizing: %s", self.tokenizer_func)
177 |         tokens = self.tokenizer_func(text)
178 | 
179 |         # Clean
180 |         tokens = _clean_tokens(tokens)
181 | 
182 |         # Minimize
183 |         min_tokens = []
184 |         for t in tokens:
185 |             min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS)
186 | 
187 |         # Filter empty tokens, post-minimize
188 |         tokens = [t for t in min_tokens if t]
189 | 
190 |         return min_tokens
191 | 
192 |     def _prepare_requests(self):
193 |         """Created the TTS API the request(s) without sending them.
194 | 
195 |         Returns:
196 |             list: ``requests.PreparedRequests_``. <https://2.python-requests.org/en/master/api/#requests.PreparedRequest>`_``.
197 |         """
198 |         # TTS API URL
199 |         translate_url = _translate_url(tld=self.tld, path="_/TranslateWebserverUi/data/batchexecute")
200 | 
201 |         text_parts = self._tokenize(self.text)
202 |         log.debug("text_parts: %s", str(text_parts))
203 |         log.debug("text_parts: %i", len(text_parts))
204 |         assert text_parts, 'No text to send to TTS API'
205 | 
206 |         prepared_requests = []
207 |         for idx, part in enumerate(text_parts):
208 |             data = self._package_rpc(part)
209 | 
210 |             log.debug("data-%i: %s", idx, data)
211 | 
212 |             # Request
213 |             r = requests.Request(method='POST',
214 |                                  url=translate_url,
215 |                                  data=data,
216 |                                  headers=self.GOOGLE_TTS_HEADERS)
217 | 
218 |             # Prepare request
219 |             prepared_requests.append(r.prepare())
220 | 
221 |         return prepared_requests
222 | 
223 |     def _package_rpc(self, text):
224 |         parameter = [text, self.lang, self.speed, "null"]
225 |         escaped_parameter = json.dumps(parameter, separators=(',', ':'))
226 | 
227 |         rpc = [[[self.GOOGLE_TTS_RPC, escaped_parameter, None, "generic"]]]
228 |         espaced_rpc = json.dumps(rpc, separators=(',', ':'))
229 |         return "f.req={}&".format(quote(espaced_rpc))
230 | 
231 |     def get_bodies(self):
232 |         """Get TTS API request bodies(s) that would be sent to the TTS API.
233 | 
234 |         Returns:
235 |             list: A list of TTS API request bodiess to make.
236 |         """
237 |         return [pr.body for pr in self._prepare_requests()]
238 | 
239 |     def stream(self):
240 |         """Do the TTS API request(s) and stream bytes
241 | 
242 |         Raises:
243 |             :class:`gTTSError`: When there's an error with the API request.
244 |             TypeError: When ``fp`` is not a file-like object that takes bytes.
245 | 
246 |         """
247 |         # When disabling ssl verify in requests (for proxies and firewalls),
248 |         # urllib3 prints an insecure warning on stdout. We disable that.
249 |         try:
250 |             urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
251 |         except:
252 |             pass
253 | 
254 | 
255 | 
256 |         prepared_requests = self._prepare_requests()
257 |         for idx, pr in enumerate(prepared_requests):
258 |             try:
259 |                 with requests.Session() as s:
260 |                     # Send request
261 |                     r = s.send(request=pr,
262 |                                proxies=urllib.request.getproxies(),
263 |                                verify=False)
264 | 
265 |                 log.debug("headers-%i: %s", idx, r.request.headers)
266 |                 log.debug("url-%i: %s", idx, r.request.url)
267 |                 log.debug("status-%i: %s", idx, r.status_code)
268 | 
269 |                 r.raise_for_status()
270 |             except requests.exceptions.HTTPError as e:  # pragma: no cover
271 |                 # Request successful, bad response
272 |                 log.debug(str(e))
273 |                 raise gTTSError(tts=self, response=r)
274 |             except requests.exceptions.RequestException as e:  # pragma: no cover
275 |                 # Request failed
276 |                 log.debug(str(e))
277 |                 raise gTTSError(tts=self)
278 | 
279 |             # Write
280 |             for line in r.iter_lines(chunk_size=1024):
281 |                 decoded_line = line.decode('utf-8')
282 |                 if 'jQ1olc' in decoded_line:
283 |                     audio_search = re.search(r'jQ1olc","\[\\"(.*)\\"]', decoded_line)
284 |                     if audio_search:
285 |                         as_bytes = audio_search.group(1).encode('ascii')
286 |                         yield base64.b64decode(as_bytes)
287 |                     else:
288 |                         # Request successful, good response,
289 |                         # no audio stream in response
290 |                         raise gTTSError(tts=self, response=r)
291 |             log.debug("part-%i created", idx)
292 | 
293 |     def write_to_fp(self, fp):
294 |         """Do the TTS API request(s) and write bytes to a file-like object.
295 | 
296 |         Args:
297 |             fp (file object): Any file-like object to write the ``mp3`` to.
298 | 
299 |         Raises:
300 |             :class:`gTTSError`: When there's an error with the API request.
301 |             TypeError: When ``fp`` is not a file-like object that takes bytes.
302 | 
303 |         """
304 | 
305 |         try:
306 |             for idx, decoded in enumerate(self.stream()):
307 |                 fp.write(decoded)
308 |                 log.debug("part-%i written to %s", idx, fp)
309 |         except (AttributeError, TypeError) as e:
310 |             raise TypeError(
311 |                 "'fp' is not a file-like object or it does not take bytes: %s" %
312 |                 str(e))
313 | 
314 |     def save(self, savefile):
315 |         """Do the TTS API request and write result to file.
316 | 
317 |         Args:
318 |             savefile (string): The path and file name to save the ``mp3`` to.
319 | 
320 |         Raises:
321 |             :class:`gTTSError`: When there's an error with the API request.
322 | 
323 |         """
324 |         with open(str(savefile), 'wb') as f:
325 |             self.write_to_fp(f)
326 |             log.debug("Saved to %s", savefile)
327 | 
328 | 
329 | class gTTSError(Exception):
330 |     """Exception that uses context to present a meaningful error message"""
331 | 
332 |     def __init__(self, msg=None, **kwargs):
333 |         self.tts = kwargs.pop('tts', None)
334 |         self.rsp = kwargs.pop('response', None)
335 |         if msg:
336 |             self.msg = msg
337 |         elif self.tts is not None:
338 |             self.msg = self.infer_msg(self.tts, self.rsp)
339 |         else:
340 |             self.msg = None
341 |         super(gTTSError, self).__init__(self.msg)
342 | 
343 |     def infer_msg(self, tts, rsp=None):
344 |         """Attempt to guess what went wrong by using known
345 |         information (e.g. http response) and observed behaviour
346 | 
347 |         """
348 |         cause = "Unknown"
349 | 
350 |         if rsp is None:
351 |             premise = "Failed to connect"
352 | 
353 |             if tts.tld != 'com':
354 |                 host = _translate_url(tld=tts.tld)
355 |                 cause = "Host '{}' is not reachable".format(host)
356 | 
357 |         else:
358 |             # rsp should be <requests.Response>
359 |             # http://docs.python-requests.org/en/master/api/
360 |             status = rsp.status_code
361 |             reason = rsp.reason
362 | 
363 |             premise = "{:d} ({}) from TTS API".format(status, reason)
364 | 
365 |             if status == 403:
366 |                 cause = "Bad token or upstream API changes"
367 |             elif status == 200 and not tts.lang_check:
368 |                 cause = "No audio stream in response. Unsupported language '%s'" % self.tts.lang
369 |             elif status >= 500:
370 |                 cause = "Uptream API error. Try again later."
371 | 
372 |         return "{}. Probable cause: {}".format(premise, cause)
373 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
  1 | .. NOTE: You should *NOT* be adding new change log entries to this file, this
  2 |          file is managed by towncrier. You *may* edit previous change logs to
  3 |          fix problems like typo corrections or such.
  4 | 
  5 |          To add a new change log entry, please see CONTRIBUTING.rst
  6 | 
  7 | Changelog
  8 | =========
  9 | 
 10 | .. towncrier release notes start
 11 | 
 12 | 2.2.3 (2021-06-17)
 13 | ------------------
 14 | 
 15 | Features
 16 | ~~~~~~~~
 17 | 
 18 | - Added Bulgarian language support (`#302 <https://github.com/pndurette/gTTS/issues/302>`_)
 19 | 
 20 | 
 21 | 2.2.2 (2021-02-03)
 22 | ------------------
 23 | 
 24 | Features
 25 | ~~~~~~~~
 26 | 
 27 | - Adds a language fallback feature for deprecated languages to maintain compatiblity  (e.g. ``en-us`` becomes ``en``). Fallback can be disabled with ``lang_check=False`` or ``--nocheck`` for the cli (`#267 <https://github.com/pndurette/gTTS/issues/267>`_)
 28 | 
 29 | 
 30 | Bugfixes
 31 | ~~~~~~~~
 32 | 
 33 | - Fix Python 2.7 compatiblity (!). Python 2 is long gone, but the cut wasn't clearly communicated for gTTS, so it was restored. Python 2 support will be completely removed in the next major release. (`#255 <https://github.com/pndurette/gTTS/issues/255>`_)
 34 | - Language code case sensitivity is maintained throughout (`#267 <https://github.com/pndurette/gTTS/issues/267>`_)
 35 | 
 36 | 
 37 | Deprecations and Removals
 38 | ~~~~~~~~~~~~~~~~~~~~~~~~~
 39 | 
 40 | - The following list of 'hyphenated' language codes no longer work and have been removed: ``en-us``, ``en-ca``, ``en-uk``, ``en-gb``, ``en-au``, ``en-gh``, ``en-in``, ``en-ie``, ``en-nz``, ``en-ng``, ``en-ph``, ``en-za``, ``en-tz``, ``fr-ca``, ``fr-fr``, ``pt-br``, ``pt-pt``, ``es-es``, ``es-us``, ``zh-cn``, ``zh-tw`` (`#267 <https://github.com/pndurette/gTTS/issues/267>`_)
 41 | - Removed the ``gtts.get_url()`` method (outdated since ``2.1.0``) (`#270 <https://github.com/pndurette/gTTS/issues/270>`_)
 42 | 
 43 | 
 44 | 2.2.1 (2020-11-15)
 45 | ------------------
 46 | 
 47 | Bugfixes
 48 | ~~~~~~~~
 49 | 
 50 | - ``_package_rpc()`` was erroneously packaging the entire text instead of tokenized part (`#252 <https://github.com/pndurette/gTTS/issues/252>`_)
 51 | 
 52 | 
 53 | Improved Documentation
 54 | ~~~~~~~~~~~~~~~~~~~~~~
 55 | 
 56 | - Removes reference to automatic retrieval of languages (`#250 <https://github.com/pndurette/gTTS/issues/250>`_)
 57 | 
 58 | 
 59 | Misc
 60 | ~~~~
 61 | 
 62 | - `#251 <https://github.com/pndurette/gTTS/issues/251>`_
 63 | 
 64 | 
 65 | 2.2.0 (2020-11-14)
 66 | ------------------
 67 | 
 68 | Features
 69 | ~~~~~~~~
 70 | 
 71 | - Switch to the newer Google TTS API (thanks to `@Boudewijn26! <https://github.com/pndurette/gTTS/pull/244>`_). See `his great writeup <https://github.com/Boudewijn26/gTTS-token/blob/master/docs/november-2020-translate-changes.md>`_ for more on the methodology and why this was necessary. (`#226 <https://github.com/pndurette/gTTS/issues/226>`_, `#232 <https://github.com/pndurette/gTTS/issues/232>`_, `#236 <https://github.com/pndurette/gTTS/issues/236>`_, `#241 <https://github.com/pndurette/gTTS/issues/241>`_)
 72 | 
 73 | 
 74 | Deprecations and Removals
 75 | ~~~~~~~~~~~~~~~~~~~~~~~~~
 76 | 
 77 | - Removed automatic language download from the main code, which has become too unreliable & slow.
 78 |   Languages will still be fetched but a pre-generated list will be shipped with ``gTTS``. (`#233 <https://github.com/pndurette/gTTS/issues/233>`_, `#241 <https://github.com/pndurette/gTTS/issues/241>`_, `#242 <https://github.com/pndurette/gTTS/issues/242>`_, `#243 <https://github.com/pndurette/gTTS/issues/243>`_)
 79 | - Because languages are now pre-generated, removed custom TLD support for language URL (which allowed to get language **names** in other than English) (`#245 <https://github.com/pndurette/gTTS/issues/245>`_)
 80 | 
 81 | 
 82 | Misc
 83 | ~~~~
 84 | 
 85 | - `#245 <https://github.com/pndurette/gTTS/issues/245>`_
 86 | 
 87 | 
 88 | 2.1.2 (2020-11-10)
 89 | ------------------
 90 | 
 91 | Features
 92 | ~~~~~~~~
 93 | 
 94 | - Update `gTTS-token` to `1.1.4` (`#238 <https://github.com/pndurette/gTTS/issues/238>`_)
 95 | 
 96 | 
 97 | Bugfixes
 98 | ~~~~~~~~
 99 | 
100 | - Fixed an issue where some tokens could be empty after minimization (`#229 <https://github.com/pndurette/gTTS/issues/229>`_, `#239 <https://github.com/pndurette/gTTS/issues/239>`_)
101 | 
102 | 
103 | Improved Documentation
104 | ~~~~~~~~~~~~~~~~~~~~~~
105 | 
106 | - Grammar, spelling and example fixes (`#227 <https://github.com/pndurette/gTTS/issues/227>`_)
107 | 
108 | 
109 | Misc
110 | ~~~~
111 | 
112 | - `#218 <https://github.com/pndurette/gTTS/issues/218>`_, `#230 <https://github.com/pndurette/gTTS/issues/230>`_, `#231 <https://github.com/pndurette/gTTS/issues/231>`_, `#239 <https://github.com/pndurette/gTTS/issues/239>`_
113 | 
114 | 
115 | 2.1.1 (2020-01-25)
116 | ------------------
117 | 
118 | Bugfixes
119 | ~~~~~~~~
120 | 
121 | - Debug mode now uses a copy of locals() to prevent RuntimeError (`#213 <https://github.com/pndurette/gTTS/issues/213>`_)
122 | 
123 | 
124 | 2.1.0 (2020-01-01)
125 | ------------------
126 | 
127 | Features
128 | ~~~~~~~~
129 | 
130 | - The ``gtts`` module
131 | 
132 |   - Added the ability to customize the Google Translate URL hostname.
133 |     This is useful when ``google.com`` might be blocked within a network but
134 |     a local or different Google host (e.g. ``google.cn``) is not
135 |     (`#143 <https://github.com/pndurette/gTTS/issues/143>`_, `#203 <https://github.com/pndurette/gTTS/issues/203>`_):
136 | 
137 |     - New ``gTTS()`` parameter ``tld`` to specify the top-level
138 |       domain to use for the Google hostname, i.e ``https://translate.google.<tld>``
139 |       (default: ``com``).
140 |     - Languages are also now fetched using the same customized hostname.
141 | 
142 |   - Pre-generated TTS API request URLs can now be obtained instead of
143 |     writing an ``mp3`` file to disk (for example to be used in an
144 |     external program):
145 | 
146 |     - New ``get_urls()`` method returns the list of URLs generated by ``gTTS``,
147 |       which can be used in lieu of ``write_to_fp()`` or ``save()``.
148 | 
149 | - The ``gtts-cli`` command-line tool
150 | 
151 |   - New ``--tld`` option to match the new ``gtts`` customizable hostname (`#200 <https://github.com/pndurette/gTTS/issues/200>`_, `#207 <https://github.com/pndurette/gTTS/issues/207>`_)
152 | 
153 | - Other
154 | 
155 |   - Added Python 3.8 support (`#204 <https://github.com/pndurette/gTTS/issues/204>`_)
156 | 
157 | 
158 | Bugfixes
159 | ~~~~~~~~
160 | 
161 | - Changed default word-for-word pre-processor (``('M.', 'Monsieur')``) which would substitute any 'm.' for 'monsieur' (e.g. 'them.' became 'themonsieur') (`#197 <https://github.com/pndurette/gTTS/issues/197>`_)
162 | 
163 | 
164 | Improved Documentation
165 | ~~~~~~~~~~~~~~~~~~~~~~
166 | 
167 | - Added examples for newer features (`#205 <https://github.com/pndurette/gTTS/issues/205>`_, `#207 <https://github.com/pndurette/gTTS/issues/207>`_)
168 | 
169 | 
170 | Misc
171 | ~~~~
172 | 
173 | - `#204 <https://github.com/pndurette/gTTS/issues/204>`_, `#205 <https://github.com/pndurette/gTTS/issues/205>`_, `#207 <https://github.com/pndurette/gTTS/issues/207>`_
174 | 
175 | 
176 | 2.0.4 (2019-08-29)
177 | ------------------
178 | 
179 | Features
180 | ~~~~~~~~
181 | 
182 | - gTTS is now built as a wheel package (Python 2 & 3) (`#181 <https://github.com/pndurette/gTTS/issues/181>`_)
183 | 
184 | 
185 | Improved Documentation
186 | ~~~~~~~~~~~~~~~~~~~~~~
187 | 
188 | - Fixed bad example in docs (`#163 <https://github.com/pndurette/gTTS/issues/163>`_, `#166 <https://github.com/pndurette/gTTS/issues/166>`_)
189 | 
190 | 
191 | Misc
192 | ~~~~
193 | 
194 | - `#164 <https://github.com/pndurette/gTTS/issues/164>`_, `#171 <https://github.com/pndurette/gTTS/issues/171>`_, `#173 <https://github.com/pndurette/gTTS/issues/173>`_, `#185 <https://github.com/pndurette/gTTS/issues/185>`_
195 | 
196 | 
197 | 2.0.3 (2018-12-15)
198 | ------------------
199 | 
200 | Features
201 | ~~~~~~~~
202 | 
203 | - Added new tokenizer case for ':' preventing cut in the middle of a time notation (`#135 <https://github.com/pndurette/gTTS/issues/135>`_)
204 | 
205 | 
206 | Misc
207 | ~~~~
208 | 
209 | - `#159 <https://github.com/pndurette/gTTS/issues/159>`_
210 | 
211 | 
212 | 2.0.2 (2018-12-09)
213 | ------------------
214 | 
215 | Features
216 | ~~~~~~~~
217 | 
218 | - Added Python 3.7 support, modernization of packaging, testing and CI (`#126 <https://github.com/pndurette/gTTS/issues/126>`_)
219 | 
220 | 
221 | Bugfixes
222 | ~~~~~~~~
223 | 
224 | - Fixed language retrieval/validation broken from new Google Translate page (`#156 <https://github.com/pndurette/gTTS/issues/156>`_)
225 | 
226 | 
227 | 2.0.1 (2018-06-20)
228 | ------------------
229 | 
230 | Bugfixes
231 | ~~~~~~~~
232 | 
233 | - Fixed an UnicodeDecodeError when installing gTTS if system locale was not
234 |   utf-8 (`#120 <https://github.com/pndurette/gTTS/issues/120>`_)
235 | 
236 | 
237 | Improved Documentation
238 | ~~~~~~~~~~~~~~~~~~~~~~
239 | 
240 | - Added *Pre-processing and tokenizing > Minimizing* section about the API's
241 |   100 characters limit and how larger tokens are handled (`#121
242 |   <https://github.com/pndurette/gTTS/issues/121>`_)
243 | 
244 | 
245 | Misc
246 | ~~~~
247 | 
248 | - `#122 <https://github.com/pndurette/gTTS/issues/122>`_
249 | 
250 | 
251 | 2.0.0 (2018-04-30)
252 | ------------------
253 | (`#108 <https://github.com/pndurette/gTTS/issues/108>`_)
254 | 
255 | Features
256 | ~~~~~~~~
257 | 
258 | - The ``gtts`` module
259 | 
260 |   - New logger ("gtts") replaces all occurrences of ``print()``
261 |   - Languages list is now obtained automatically (``gtts.lang``)
262 |     (`#91 <https://github.com/pndurette/gTTS/issues/91>`_,
263 |     `#94 <https://github.com/pndurette/gTTS/issues/94>`_,
264 |     `#106 <https://github.com/pndurette/gTTS/issues/106>`_)
265 |   - Added a curated list of language sub-tags that
266 |     have been observed to provide different dialects or accents
267 |     (e.g. "en-gb", "fr-ca")
268 |   - New ``gTTS()`` parameter ``lang_check`` to disable language
269 |     checking.
270 |   - ``gTTS()`` now delegates the ``text`` tokenizing to the
271 |     API request methods (i.e. ``write_to_fp()``, ``save()``),
272 |     allowing ``gTTS`` instances to be modified/reused
273 |   - Rewrote tokenizing and added pre-processing (see below)
274 |   - New ``gTTS()`` parameters ``pre_processor_funcs`` and
275 |     ``tokenizer_func`` to configure pre-processing and tokenizing
276 |     (or use a 3rd party tokenizer)
277 |   - Error handling:
278 | 
279 |     - Added new exception ``gTTSError`` raised on API request errors.
280 |       It attempts to guess what went wrong based on known information
281 |       and observed behaviour
282 |       (`#60 <https://github.com/pndurette/gTTS/issues/60>`_,
283 |       `#106 <https://github.com/pndurette/gTTS/issues/106>`_)
284 |     - ``gTTS.write_to_fp()`` and ``gTTS.save()`` also raise ``gTTSError``
285 |       on `gtts_token` error
286 |     - ``gTTS.write_to_fp()`` raises ``TypeError`` when ``fp`` is not a
287 |       file-like object or one that doesn't take bytes
288 |     - ``gTTS()`` raises ``ValueError`` on unsupported languages
289 |       (and ``lang_check`` is ``True``)
290 |     - More fine-grained error handling throughout (e.g.
291 |       `request failed` vs. `request successful with a bad response`)
292 | 
293 | - Tokenizer (and new pre-processors):
294 | 
295 |   - Rewrote and greatly expanded tokenizer (``gtts.tokenizer``)
296 |   - Smarter token 'cleaning' that will remove tokens that only contain
297 |     characters that can't be spoken (i.e. punctuation and whitespace)
298 |   - Decoupled token minimizing from tokenizing, making the latter usable
299 |     in other contexts
300 |   - New flexible speech-centric text pre-processing
301 |   - New flexible full-featured regex-based tokenizer
302 |     (``gtts.tokenizer.core.Tokenizer``)
303 |   - New ``RegexBuilder``, ``PreProcessorRegex`` and ``PreProcessorSub`` classes
304 |     to make writing regex-powered text `pre-processors` and `tokenizer cases`
305 |     easier
306 |   - Pre-processors:
307 | 
308 |     - Re-form words cut by end-of-line hyphens
309 |     - Remove periods after a (customizable) list of known abbreviations
310 |       (e.g. "jr", "sr", "dr") that can be spoken the same without a period
311 |     - Perform speech corrections by doing word-for-word replacements
312 |       from a (customizable) list of tuples
313 | 
314 |   - Tokenizing:
315 | 
316 |     - Keep punctuation that modify the inflection of speech (e.g. "?", "!")
317 |     - Don't split in the middle of numbers (e.g. "10.5", "20,000,000")
318 |       (`#101 <https://github.com/pndurette/gTTS/issues/101>`_)
319 |     - Don't split on "dotted" abbreviations and accronyms (e.g. "U.S.A")
320 |     - Added Chinese comma ("，"), ellipsis ("…") to punctuation list
321 |       to tokenize on (`#86 <https://github.com/pndurette/gTTS/issues/86>`_)
322 | 
323 | - The ``gtts-cli`` command-line tool
324 | 
325 |   - Rewrote cli as first-class citizen module (``gtts.cli``),
326 |     powered by `Click <http://click.pocoo.org>`_
327 |   - Windows support using `setuptool`'s `entry_points`
328 |   - Better support for Unicode I/O in Python 2
329 |   - All arguments are now pre-validated
330 |   - New ``--nocheck`` flag to skip language pre-checking
331 |   - New ``--all`` flag to list all available languages
332 |   - Either the ``--file`` option or the ``<text>`` argument can be set to
333 |     "-" to read from ``stdin``
334 |   - The ``--debug`` flag uses logging and doesn't pollute ``stdout``
335 |     anymore
336 | 
337 | 
338 | Bugfixes
339 | ~~~~~~~~
340 | 
341 | - ``_minimize()``: Fixed an infinite recursion loop that would occur
342 |   when a token started with the miminizing delimiter (i.e. a space)
343 |   (`#86 <https://github.com/pndurette/gTTS/issues/86>`_)
344 | - ``_minimize()``: Handle the case where a token of more than 100
345 |   characters did not contain a space (e.g. in Chinese).
346 | - Fixed an issue that fused multiline text together if the total number of
347 |   characters was less than 100
348 | - Fixed ``gtts-cli`` Unicode errors in Python 2.7 (famous last words)
349 |   (`#78 <https://github.com/pndurette/gTTS/issues/78>`_,
350 |   `#93 <https://github.com/pndurette/gTTS/issues/93>`_,
351 |   `#96 <https://github.com/pndurette/gTTS/issues/96>`_)
352 | 
353 | 
354 | Deprecations and Removals
355 | ~~~~~~~~~~~~~~~~~~~~~~~~~
356 | 
357 | - Dropped Python 3.3 support
358 | - Removed ``debug`` parameter of ``gTTS`` (in favour of logger)
359 | - ``gtts-cli``: Changed long option name of ``-o`` to ``--output``
360 |   instead of ``--destination``
361 | - ``gTTS()`` will raise a ``ValueError`` rather than an ``AssertionError``
362 |   on unsupported language
363 | 
364 | 
365 | Improved Documentation
366 | ~~~~~~~~~~~~~~~~~~~~~~
367 | 
368 | - Rewrote all documentation files as reStructuredText
369 | - Comprehensive documentation writen for `Sphinx <http://www.sphinx-doc.org>`_, published to http://gtts.readthedocs.io
370 | - Changelog built with `towncrier <https://github.com/hawkowl/towncrier>`_
371 | 
372 | Misc
373 | ~~~~
374 | 
375 | - Major test re-work
376 | - Language tests can read a ``TEST_LANGS`` enviromment variable so
377 |   not all language tests are run every time.
378 | - Added `AppVeyor <https://www.appveyor.com>`_ CI for Windows
379 | - `PEP 8 <https://www.python.org/dev/peps/pep-0008/>`_ compliance
380 | 
381 | 
382 | 1.2.2 (2017-08-15)
383 | ------------------
384 | 
385 | Misc
386 | ~~~~
387 | 
388 | - Update LICENCE, add to manifest (`#77 <https://github.com/pndurette/gTTS/issues/77>`_)
389 | 
390 | 
391 | 1.2.1 (2017-08-02)
392 | ------------------
393 | 
394 | Features
395 | ~~~~~~~~
396 | 
397 | - Add Unicode punctuation to the tokenizer (such as for Chinese and Japanese)
398 |   (`#75 <https://github.com/pndurette/gTTS/issues/75>`_)
399 | 
400 | 
401 | Bugfixes
402 | ~~~~~~~~
403 | 
404 | - Fix > 100 characters non-ASCII split, ``unicode()`` for Python 2 (`#71
405 |   <https://github.com/pndurette/gTTS/issues/71>`_, `#73
406 |   <https://github.com/pndurette/gTTS/issues/73>`_, `#75
407 |   <https://github.com/pndurette/gTTS/issues/75>`_)
408 | 
409 | 
410 | 1.2.0 (2017-04-15)
411 | ------------------
412 | 
413 | Features
414 | ~~~~~~~~
415 | 
416 | - Option for slower read speed (``slow=True`` for ``gTTS()``, ``--slow`` for
417 |   ``gtts-cli``) (`#40 <https://github.com/pndurette/gTTS/issues/40>`_, `#41
418 |   <https://github.com/pndurette/gTTS/issues/41>`_, `#64
419 |   <https://github.com/pndurette/gTTS/issues/64>`_, `#67
420 |   <https://github.com/pndurette/gTTS/issues/67>`_)
421 | - System proxy settings are passed transparently to all http requests (`#45
422 |   <https://github.com/pndurette/gTTS/issues/45>`_, `#68
423 |   <https://github.com/pndurette/gTTS/issues/68>`_)
424 | - Silence SSL warnings from urllib3 (`#69
425 |   <https://github.com/pndurette/gTTS/issues/69>`_)
426 | 
427 | 
428 | Bugfixes
429 | ~~~~~~~~
430 | 
431 | - The text to read is now cut in proper chunks in Python 2 unicode. This
432 |   broke reading for many languages such as Russian.
433 | - Disabled SSL verify on http requests to accommodate certain firewalls
434 |   and proxies.
435 | - Better Python 2/3 support in general (`#9 <https://github.com/pndurette/gTTS/issues/9>`_,
436 |   `#48 <https://github.com/pndurette/gTTS/issues/48>`_, `#68
437 |   <https://github.com/pndurette/gTTS/issues/68>`_)
438 | 
439 | 
440 | Deprecations and Removals
441 | ~~~~~~~~~~~~~~~~~~~~~~~~~
442 | 
443 | - 'pt-br' : 'Portuguese (Brazil)' (it was the same as 'pt' and not Brazilian)
444 |   (`#69 <https://github.com/pndurette/gTTS/issues/69>`_)
445 | 
446 | 
447 | 1.1.8 (2017-01-15)
448 | ------------------
449 | 
450 | Features
451 | ~~~~~~~~
452 | 
453 | - Added ``stdin`` support via the '-' ``text`` argument to ``gtts-cli`` (`#56
454 |   <https://github.com/pndurette/gTTS/issues/56>`_)
455 | 
456 | 
457 | 1.1.7 (2016-12-14)
458 | ------------------
459 | 
460 | Features
461 | ~~~~~~~~
462 | 
463 | - Added utf-8 support to ``gtts-cli`` (`#52
464 |   <https://github.com/pndurette/gTTS/issues/52>`_)
465 | 
466 | 
467 | 1.1.6 (2016-07-20)
468 | ------------------
469 | 
470 | Features
471 | ~~~~~~~~
472 | 
473 | - Added 'bn' : 'Bengali' (`#39 <https://github.com/pndurette/gTTS/issues/39>`_,
474 |   `#44 <https://github.com/pndurette/gTTS/issues/44>`_)
475 | 
476 | 
477 | Deprecations and Removals
478 | ~~~~~~~~~~~~~~~~~~~~~~~~~
479 | 
480 | - 'ht' : 'Haitian Creole' (removed by Google) (`#43
481 |   <https://github.com/pndurette/gTTS/issues/43>`_)
482 | 
483 | 
484 | 1.1.5 (2016-05-13)
485 | ------------------
486 | 
487 | Bugfixes
488 | ~~~~~~~~
489 | 
490 | - Fixed HTTP 403s by updating the client argument to reflect new API usage
491 |   (`#32 <https://github.com/pndurette/gTTS/issues/32>`_, `#33
492 |   <https://github.com/pndurette/gTTS/issues/33>`_)
493 | 
494 | 
495 | 1.1.4 (2016-02-22)
496 | ------------------
497 | 
498 | Features
499 | ~~~~~~~~
500 | 
501 | - Spun-off token calculation to `gTTS-Token
502 |   <https://github.com/Boudewijn26/gTTS-token>`_ (`#23
503 |   <https://github.com/pndurette/gTTS/issues/23>`_, `#29
504 |   <https://github.com/pndurette/gTTS/issues/29>`_)
505 | 
506 | 
507 | 1.1.3 (2016-01-24)
508 | ------------------
509 | 
510 | Bugfixes
511 | ~~~~~~~~
512 | 
513 | - ``gtts-cli`` works with Python 3 (`#20
514 |   <https://github.com/pndurette/gTTS/issues/20>`_)
515 | - Better support for non-ASCII characters (`#21
516 |   <https://github.com/pndurette/gTTS/issues/21>`_, `#22
517 |   <https://github.com/pndurette/gTTS/issues/22>`_)
518 | 
519 | 
520 | Misc
521 | ~~~~
522 | 
523 | - Moved out gTTS token to its own module (`#19 <https://github.com/pndurette/gTTS/issues/19>`_)
524 | 
525 | 
526 | 1.1.2 (2016-01-13)
527 | ------------------
528 | 
529 | Features
530 | ~~~~~~~~
531 | 
532 | - Added gTTS token (tk url parameter) calculation (`#14
533 |   <https://github.com/pndurette/gTTS/issues/14>`_, `#15
534 |   <https://github.com/pndurette/gTTS/issues/15>`_, `#17
535 |   <https://github.com/pndurette/gTTS/issues/17>`_)
536 | 
537 | 
538 | 1.0.7 (2015-10-07)
539 | ------------------
540 | 
541 | Features
542 | ~~~~~~~~
543 | 
544 | - Added ``stdout`` support to ``gtts-cli``, text now an argument rather than an
545 |   option (`#10 <https://github.com/pndurette/gTTS/issues/10>`_)
546 | 
547 | 
548 | 1.0.6 (2015-07-30)
549 | ------------------
550 | 
551 | Features
552 | ~~~~~~~~
553 | 
554 | - Raise an exception on bad HTTP response (4xx or 5xx) (`#8
555 |   <https://github.com/pndurette/gTTS/issues/8>`_)
556 | 
557 | 
558 | Bugfixes
559 | ~~~~~~~~
560 | 
561 | - Added ``client=t`` parameter for the api HTTP request (`#8
562 |   <https://github.com/pndurette/gTTS/issues/8>`_)
563 | 
564 | 
565 | 1.0.5 (2015-07-15)
566 | ------------------
567 | 
568 | Features
569 | ~~~~~~~~
570 | 
571 | - ``write_to_fp()`` to write to a file-like object (`#6
572 |   <https://github.com/pndurette/gTTS/issues/6>`_)
573 | 
574 | 
575 | 1.0.4 (2015-05-11)
576 | ------------------
577 | 
578 | Features
579 | ~~~~~~~~
580 | 
581 | - Added Languages: `zh-yue` : 'Chinese (Cantonese)', `en-uk` : 'English (United
582 |   Kingdom)', `pt-br` : 'Portuguese (Brazil)', `es-es` : 'Spanish (Spain)',
583 |   `es-us` : 'Spanish (United StateS)', `zh-cn` : 'Chinese (Mandarin/China)',
584 |   `zh-tw` : 'Chinese (Mandarin/Taiwan)' (`#4
585 |   <https://github.com/pndurette/gTTS/issues/4>`_)
586 | 
587 | 
588 | Bugfixes
589 | ~~~~~~~~
590 | 
591 | - ``gtts-cli`` print version and pretty printed available languages, language
592 |   codes are now case insensitive (`#4 <https://github.com/pndurette/gTTS/issues/4>`_)
593 | 
594 | 
595 | 1.0.3 (2014-11-21)
596 | ------------------
597 | 
598 | Features
599 | ~~~~~~~~
600 | 
601 | - Added Languages: 'en-us' : 'English (United States)', 'en-au' : 'English
602 |   (Australia)' (`#3 <https://github.com/pndurette/gTTS/issues/3>`_)
603 | 
604 | 
605 | 1.0.2 (2014-05-15)
606 | ------------------
607 | 
608 | Features
609 | ~~~~~~~~
610 | 
611 | - Python 3 support
612 | 
613 | 
614 | 1.0.1 (2014-05-15)
615 | ------------------
616 | 
617 | Misc
618 | ~~~~
619 | 
620 | - SemVer versioning, CI changes
621 | 
622 | 
623 | 1.0 (2014-05-08)
624 | ----------------
625 | 
626 | Features
627 | ~~~~~~~~
628 | 
629 | - Initial release
630 | 
631 | 
632 | 


--------------------------------------------------------------------------------