├── .gitignore
├── archisound
    ├── __init__.py
    └── archisound.py
├── setup.py
├── LICENSE
├── .pre-commit-config.yaml
├── .github
    └── workflows
    │   └── python-publish.yml
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .mypy_cache
3 | 


--------------------------------------------------------------------------------
/archisound/__init__.py:
--------------------------------------------------------------------------------
1 | from .archisound import ArchiSound
2 | 


--------------------------------------------------------------------------------
/archisound/archisound.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from transformers import AutoModel
 3 | 
 4 | REVISION = {
 5 |     "autoencoder1d-AT-v1": "57b6cde1969208d10fdd3e813708c1abe49f25c1",
 6 |     "dmae1d-ATC64-v1": "07885065867977af43b460bb9c1422bdc90c29a0",
 7 |     "dmae1d-ATC64-v2": "3ffeea68d4c069777055fce9ac77bbb67eec1d68",
 8 |     "dmae1d-ATC32-v3": "3d43b811b83fa395d5ccd6cf58b796b85fddd1d2",
 9 |     "adapter-A-v1": "2ee66467450389917eab027526ea31c66d4b7edb",
10 | }
11 | 
12 | 
13 | class ArchiSound:
14 |     @staticmethod
15 |     def from_pretrained(name: str = "", **kwargs) -> nn.Module:
16 |         default_kwargs = dict(revision=REVISION[name])
17 |         return AutoModel.from_pretrained(
18 |             f"archinetai/{name}", trust_remote_code=True, **{**default_kwargs, **kwargs}
19 |         )
20 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | setup(
 4 |     name="archisound",
 5 |     packages=find_packages(exclude=[]),
 6 |     version="0.0.6",
 7 |     license="MIT",
 8 |     description="ArchiSound",
 9 |     long_description_content_type="text/markdown",
10 |     author="Flavio Schneider",
11 |     author_email="archinetai@protonmail.com",
12 |     url="https://github.com/archinetai/archisound",
13 |     keywords=["artificial intelligence", "deep learning"],
14 |     install_requires=[
15 |         "torch>=1.6",
16 |         "data-science-types>=0.2",
17 |         "transformers",
18 |         "audio-diffusion-pytorch",
19 |         "audio-encoders-pytorch",
20 |     ],
21 |     classifiers=[
22 |         "Development Status :: 4 - Beta",
23 |         "Intended Audience :: Developers",
24 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
25 |         "License :: OSI Approved :: MIT License",
26 |         "Programming Language :: Python :: 3.6",
27 |     ],
28 | )
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 archinet.ai
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v2.3.0
 4 |     hooks:
 5 |     -   id: end-of-file-fixer
 6 |     -   id: trailing-whitespace
 7 | 
 8 | # Formats code correctly
 9 | -   repo: https://github.com/psf/black
10 |     rev: 22.3.0
11 |     hooks:
12 |     -   id: black
13 |         args: [
14 |             '--experimental-string-processing'
15 |         ]
16 | 
17 | # Sorts imports
18 | -   repo: https://github.com/pycqa/isort
19 |     rev: 5.10.1
20 |     hooks:
21 |     -   id: isort
22 |         name: isort (python)
23 |         args: ["--profile", "black"]
24 | 
25 | # Checks unused imports, like lengths, etc
26 | -   repo: https://gitlab.com/pycqa/flake8
27 |     rev: 4.0.0
28 |     hooks:
29 |     -   id: flake8
30 |         args: [
31 |             '--per-file-ignores=__init__.py:F401',
32 |             '--max-line-length=88',
33 |             '--ignore=E1,W1,E2,W2,E4,W4,E5,W5' # Handled by black
34 |         ]
35 | 
36 | # Checks types
37 | -   repo: https://github.com/pre-commit/mirrors-mypy
38 |     rev: 'v0.971'
39 |     hooks:
40 |     -   id: mypy
41 |         additional_dependencies: [data-science-types>=0.2, torch>=1.6]
42 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |     - name: Build package
34 |       run: python -m build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 |       with:
38 |         user: __token__
39 |         password: ${{ secrets.PYPI_API_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # ArchiSound
  3 | 
  4 | A collection of pre-trained audio models in PyTorch from [`audio-encoders-pytorch`](https://github.com/archinetai/audio-encoders-pytorch) and [`audio-diffusion-pytorch`](https://github.com/archinetai/audio-diffusion-pytorch).
  5 | 
  6 | ## Install
  7 | ```bash
  8 | pip install archisound
  9 | ```
 10 | 
 11 | [![PyPI - Python Version](https://img.shields.io/pypi/v/archisound?style=flat&colorA=black&colorB=black)](https://pypi.org/project/archisound/)
 12 | 
 13 | 
 14 | ## Autoencoders
 15 | 
 16 | * [`dmae1d-ATC32-v3`](https://huggingface.co/archinetai/dmae1d-ATC32-v3/tree/main)
 17 |   <details> <summary> Usage and Info </summary>
 18 | 
 19 |   ```py
 20 |   from archisound import ArchiSound
 21 | 
 22 |   autoencoder = ArchiSound.from_pretrained("dmae1d-ATC32-v3")
 23 | 
 24 |   x = torch.randn(1, 2, 2**18)
 25 |   z = autoencoder.encode(x) # [1, 32, 512]
 26 |   y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144]
 27 |   ```
 28 | 
 29 |   | Info  | |
 30 |   | ------------- | ------------- |
 31 |   | Input type | Audio (stereo @ 48kHz) |
 32 |   | Number of parameters  | 86M |
 33 |   | Compression Factor | 32x |
 34 |   | Downsampling Factor | 512x |
 35 |   | Bottleneck Type | Tanh |
 36 | 
 37 |   </details>
 38 | 
 39 | 
 40 | * [`dmae1d-ATC64-v2`](https://huggingface.co/archinetai/dmae1d-ATC64-v2/tree/main)
 41 |   <details> <summary> Usage and Info </summary>
 42 | 
 43 |   ```py
 44 |   from archisound import ArchiSound
 45 | 
 46 |   autoencoder = ArchiSound.from_pretrained("dmae1d-ATC64-v2")
 47 | 
 48 |   x = torch.randn(1, 2, 2**18)
 49 |   z = autoencoder.encode(x) # [1, 32, 256]
 50 |   y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144]
 51 |   ```
 52 | 
 53 |   | Info  | |
 54 |   | ------------- | ------------- |
 55 |   | Input type | Audio (stereo @ 48kHz) |
 56 |   | Number of parameters  | 185M |
 57 |   | Compression Factor | 64x |
 58 |   | Downsampling Factor | 1024x |
 59 |   | Bottleneck Type | Tanh |
 60 | 
 61 |   </details>
 62 | 
 63 | 
 64 | 
 65 | * [`autoencoder1d-AT-v1`](https://huggingface.co/archinetai/autoencoder1d-AT-v1/tree/main)
 66 |   <details> <summary> Usage and Info </summary>
 67 | 
 68 |   ```py
 69 |   from archisound import ArchiSound
 70 | 
 71 |   autoencoder = ArchiSound.from_pretrained('autoencoder1d-AT-v1')
 72 | 
 73 |   x = torch.randn(1, 2, 2**18)    # [1, 2, 262144]
 74 |   z = autoencoder.encode(x)       # [1, 32, 8192]
 75 |   y = autoencoder.decode(z)       # [1, 2, 262144]
 76 |   ```
 77 | 
 78 |   | Info  | |
 79 |   | ------------- | ------------- |
 80 |   | Input type | Audio (stereo @ 48kHz) |
 81 |   | Number of parameters  | 20.7M  |
 82 |   | Compression Factor | 2x |
 83 |   | Downsampling Factor | 32x |
 84 |   | Bottleneck Type | Tanh |
 85 |   | Known Limitations | Slight blurriness in high frequency spectrogram reconstruction |
 86 | 
 87 |   </details>
 88 | 
 89 | 
 90 | 
 91 | * [`dmae1d-ATC64-v1`](https://huggingface.co/archinetai/dmae1d-ATC64-v1/tree/main)
 92 |   <details> <summary> Usage and Info </summary>
 93 | 
 94 |   A diffusion based autoencoder with high compression ratio. Requires `audio_diffusion_pytorch==0.0.92`.
 95 | 
 96 |   ```py
 97 |   from archisound import ArchiSound
 98 | 
 99 |   autoencoder = ArchiSound.from_pretrained("dmae1d-ATC64-v1")
100 | 
101 |   x = torch.randn(1, 2, 2**18)
102 |   z = autoencoder.encode(x) # [1, 32, 256]
103 |   y = autoencoder.decode(z, num_steps=20) # [1, 2, 262144]
104 |   ```
105 | 
106 |   | Info  | |
107 |   | ------------- | ------------- |
108 |   | Input type | Audio (stereo @ 48kHz) |
109 |   | Number of parameters  | 234.2M  |
110 |   | Compression Factor | 64x |
111 |   | Downsampling Factor | 1024x |
112 |   | Bottleneck Type | Tanh |
113 |   </details>
114 | 


--------------------------------------------------------------------------------