├── .gitignore ├── archisound ├── __init__.py └── archisound.py ├── setup.py ├── LICENSE ├── .pre-commit-config.yaml ├── .github └── workflows │ └── python-publish.yml └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .mypy_cache 3 | -------------------------------------------------------------------------------- /archisound/__init__.py: -------------------------------------------------------------------------------- 1 | from .archisound import ArchiSound 2 | -------------------------------------------------------------------------------- /archisound/archisound.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from transformers import AutoModel 3 | 4 | REVISION = { 5 | "autoencoder1d-AT-v1": "57b6cde1969208d10fdd3e813708c1abe49f25c1", 6 | "dmae1d-ATC64-v1": "07885065867977af43b460bb9c1422bdc90c29a0", 7 | "dmae1d-ATC64-v2": "3ffeea68d4c069777055fce9ac77bbb67eec1d68", 8 | "dmae1d-ATC32-v3": "3d43b811b83fa395d5ccd6cf58b796b85fddd1d2", 9 | "adapter-A-v1": "2ee66467450389917eab027526ea31c66d4b7edb", 10 | } 11 | 12 | 13 | class ArchiSound: 14 | @staticmethod 15 | def from_pretrained(name: str = "", **kwargs) -> nn.Module: 16 | default_kwargs = dict(revision=REVISION[name]) 17 | return AutoModel.from_pretrained( 18 | f"archinetai/{name}", trust_remote_code=True, **{**default_kwargs, **kwargs} 19 | ) 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name="archisound", 5 | packages=find_packages(exclude=[]), 6 | version="0.0.6", 7 | license="MIT", 8 | description="ArchiSound", 9 | long_description_content_type="text/markdown", 10 | author="Flavio Schneider", 11 | author_email="archinetai@protonmail.com", 12 | url="https://github.com/archinetai/archisound", 13 | keywords=["artificial intelligence", "deep learning"], 14 | install_requires=[ 15 | "torch>=1.6", 16 | "data-science-types>=0.2", 17 | "transformers", 18 | "audio-diffusion-pytorch", 19 | "audio-encoders-pytorch", 20 | ], 21 | classifiers=[ 22 | "Development Status :: 4 - Beta", 23 | "Intended Audience :: Developers", 24 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 25 | "License :: OSI Approved :: MIT License", 26 | "Programming Language :: Python :: 3.6", 27 | ], 28 | ) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 archinet.ai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v2.3.0 4 | hooks: 5 | - id: end-of-file-fixer 6 | - id: trailing-whitespace 7 | 8 | # Formats code correctly 9 | - repo: https://github.com/psf/black 10 | rev: 22.3.0 11 | hooks: 12 | - id: black 13 | args: [ 14 | '--experimental-string-processing' 15 | ] 16 | 17 | # Sorts imports 18 | - repo: https://github.com/pycqa/isort 19 | rev: 5.10.1 20 | hooks: 21 | - id: isort 22 | name: isort (python) 23 | args: ["--profile", "black"] 24 | 25 | # Checks unused imports, like lengths, etc 26 | - repo: https://gitlab.com/pycqa/flake8 27 | rev: 4.0.0 28 | hooks: 29 | - id: flake8 30 | args: [ 31 | '--per-file-ignores=__init__.py:F401', 32 | '--max-line-length=88', 33 | '--ignore=E1,W1,E2,W2,E4,W4,E5,W5' # Handled by black 34 | ] 35 | 36 | # Checks types 37 | - repo: https://github.com/pre-commit/mirrors-mypy 38 | rev: 'v0.971' 39 | hooks: 40 | - id: mypy 41 | additional_dependencies: [data-science-types>=0.2, torch>=1.6] 42 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: '3.x' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | - name: Build package 34 | run: python -m build 35 | - name: Publish package 36 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 37 | with: 38 | user: __token__ 39 | password: ${{ secrets.PYPI_API_TOKEN }} 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # ArchiSound 3 | 4 | A collection of pre-trained audio models in PyTorch from [`audio-encoders-pytorch`](https://github.com/archinetai/audio-encoders-pytorch) and [`audio-diffusion-pytorch`](https://github.com/archinetai/audio-diffusion-pytorch). 5 | 6 | ## Install 7 | ```bash 8 | pip install archisound 9 | ``` 10 | 11 | [![PyPI - Python Version](https://img.shields.io/pypi/v/archisound?style=flat&colorA=black&colorB=black)](https://pypi.org/project/archisound/) 12 | 13 | 14 | ## Autoencoders 15 | 16 | * [`dmae1d-ATC32-v3`](https://huggingface.co/archinetai/dmae1d-ATC32-v3/tree/main) 17 |
Usage and Info 18 | 19 | ```py 20 | from archisound import ArchiSound 21 | 22 | autoencoder = ArchiSound.from_pretrained("dmae1d-ATC32-v3") 23 | 24 | x = torch.randn(1, 2, 2**18) 25 | z = autoencoder.encode(x) # [1, 32, 512] 26 | y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144] 27 | ``` 28 | 29 | | Info | | 30 | | ------------- | ------------- | 31 | | Input type | Audio (stereo @ 48kHz) | 32 | | Number of parameters | 86M | 33 | | Compression Factor | 32x | 34 | | Downsampling Factor | 512x | 35 | | Bottleneck Type | Tanh | 36 | 37 |
38 | 39 | 40 | * [`dmae1d-ATC64-v2`](https://huggingface.co/archinetai/dmae1d-ATC64-v2/tree/main) 41 |
Usage and Info 42 | 43 | ```py 44 | from archisound import ArchiSound 45 | 46 | autoencoder = ArchiSound.from_pretrained("dmae1d-ATC64-v2") 47 | 48 | x = torch.randn(1, 2, 2**18) 49 | z = autoencoder.encode(x) # [1, 32, 256] 50 | y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144] 51 | ``` 52 | 53 | | Info | | 54 | | ------------- | ------------- | 55 | | Input type | Audio (stereo @ 48kHz) | 56 | | Number of parameters | 185M | 57 | | Compression Factor | 64x | 58 | | Downsampling Factor | 1024x | 59 | | Bottleneck Type | Tanh | 60 | 61 |
62 | 63 | 64 | 65 | * [`autoencoder1d-AT-v1`](https://huggingface.co/archinetai/autoencoder1d-AT-v1/tree/main) 66 |
Usage and Info 67 | 68 | ```py 69 | from archisound import ArchiSound 70 | 71 | autoencoder = ArchiSound.from_pretrained('autoencoder1d-AT-v1') 72 | 73 | x = torch.randn(1, 2, 2**18) # [1, 2, 262144] 74 | z = autoencoder.encode(x) # [1, 32, 8192] 75 | y = autoencoder.decode(z) # [1, 2, 262144] 76 | ``` 77 | 78 | | Info | | 79 | | ------------- | ------------- | 80 | | Input type | Audio (stereo @ 48kHz) | 81 | | Number of parameters | 20.7M | 82 | | Compression Factor | 2x | 83 | | Downsampling Factor | 32x | 84 | | Bottleneck Type | Tanh | 85 | | Known Limitations | Slight blurriness in high frequency spectrogram reconstruction | 86 | 87 |
88 | 89 | 90 | 91 | * [`dmae1d-ATC64-v1`](https://huggingface.co/archinetai/dmae1d-ATC64-v1/tree/main) 92 |
Usage and Info 93 | 94 | A diffusion based autoencoder with high compression ratio. Requires `audio_diffusion_pytorch==0.0.92`. 95 | 96 | ```py 97 | from archisound import ArchiSound 98 | 99 | autoencoder = ArchiSound.from_pretrained("dmae1d-ATC64-v1") 100 | 101 | x = torch.randn(1, 2, 2**18) 102 | z = autoencoder.encode(x) # [1, 32, 256] 103 | y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144] 104 | ``` 105 | 106 | | Info | | 107 | | ------------- | ------------- | 108 | | Input type | Audio (stereo @ 48kHz) | 109 | | Number of parameters | 234.2M | 110 | | Compression Factor | 64x | 111 | | Downsampling Factor | 1024x | 112 | | Bottleneck Type | Tanh | 113 |
114 | --------------------------------------------------------------------------------