├── spear-tts.png ├── spear_tts_pytorch ├── __init__.py ├── data.py ├── distributed.py ├── attend.py ├── trainer.py └── spear_tts_pytorch.py ├── setup.py ├── LICENSE ├── .github └── workflows │ └── python-publish.yml ├── .gitignore └── README.md /spear-tts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucidrains/spear-tts-pytorch/HEAD/spear-tts.png -------------------------------------------------------------------------------- /spear_tts_pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | from spear_tts_pytorch.spear_tts_pytorch import ( 2 | TextToSemantic, 3 | SpeechSpeechPretrainWrapper, 4 | SemanticToTextWrapper, 5 | TextToSemanticWrapper, 6 | SemanticToTextDatasetGenerator 7 | ) 8 | 9 | from spear_tts_pytorch.trainer import ( 10 | SpeechSpeechPretrainer, 11 | SemanticToTextTrainer, 12 | TextToSemanticTrainer 13 | ) 14 | 15 | from spear_tts_pytorch.data import ( 16 | GeneratedAudioTextDataset, 17 | MockDataset 18 | ) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name = 'spear-tts-pytorch', 5 | packages = find_packages(exclude=[]), 6 | version = '0.4.8', 7 | license='MIT', 8 | description = 'Spear-TTS - Pytorch', 9 | author = 'Phil Wang', 10 | author_email = 'lucidrains@gmail.com', 11 | long_description_content_type = 'text/markdown', 12 | url = 'https://github.com/lucidrains/spear-tts-pytorch', 13 | keywords = [ 14 | 'artificial intelligence', 15 | 'deep learning', 16 | 'transformers', 17 | 'attention mechanism', 18 | 'text-to-speech' 19 | ], 20 | install_requires=[ 21 | 'audiolm-pytorch>=1.2.8', 22 | 'beartype', 23 | 'einops>=0.6.1', 24 | 'rotary-embedding-torch>=0.3.0', 25 | 'torch>=1.6', 26 | 'tqdm', 27 | 'x-clip>=0.12.2' 28 | ], 29 | classifiers=[ 30 | 'Development Status :: 4 - Beta', 31 | 'Intended Audience :: Developers', 32 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 33 | 'License :: OSI Approved :: MIT License', 34 | 'Programming Language :: Python :: 3.6', 35 | ], 36 | ) 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Phil Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | 2 | 3 | # This workflow will upload a Python Package using Twine when a release is created 4 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 5 | 6 | # This workflow uses actions that are not certified by GitHub. 7 | # They are provided by a third-party and are governed by 8 | # separate terms of service, privacy policy, and support 9 | # documentation. 10 | 11 | name: Upload Python Package 12 | 13 | on: 14 | release: 15 | types: [published] 16 | 17 | jobs: 18 | deploy: 19 | 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Set up Python 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: '3.x' 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install build 32 | - name: Build package 33 | run: python -m build 34 | - name: Publish package 35 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 36 | with: 37 | user: __token__ 38 | password: ${{ secrets.PYPI_API_TOKEN }} 39 | -------------------------------------------------------------------------------- /spear_tts_pytorch/data.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import torch 4 | from torch.utils.data import Dataset 5 | 6 | from beartype import beartype 7 | 8 | # mock dataset 9 | 10 | class MockDataset(Dataset): 11 | def __init__(self, length: int): 12 | self.length = length 13 | 14 | def __len__(self): 15 | return self.length 16 | 17 | def __getitem__(self, ind): 18 | return torch.randn(1024) 19 | 20 | # generated audio-text dataset 21 | 22 | class GeneratedAudioTextDataset(Dataset): 23 | @beartype 24 | def __init__( 25 | self, 26 | folder: str, 27 | delimiter_id: int = -1 28 | ): 29 | self.folder = Path(folder) 30 | assert self.folder.exists() and self.folder.is_dir() 31 | self.paths = list(self.folder.glob('*.pt')) 32 | self.delimiter_id = delimiter_id 33 | 34 | def __len__(self): 35 | return len(self.paths) 36 | 37 | def __getitem__(self, ind): 38 | path = self.paths[ind] 39 | tensor = torch.load(str(path)) 40 | 41 | delimiter_mask = tensor == self.delimiter_id 42 | assert delimiter_mask.any(), f'delimeter (