├── VERSION
├── docs
    └── filters.md
├── .pre-commit-config.yaml
├── audiolab
    ├── av
    │   ├── templates
    │   │   ├── container.txt
    │   │   ├── filter.txt
    │   │   ├── format.txt
    │   │   ├── info.txt
    │   │   └── codec.txt
    │   ├── container.py
    │   ├── layout.py
    │   ├── typing.py
    │   ├── utils.py
    │   ├── __init__.py
    │   ├── codec.py
    │   ├── filter.py
    │   ├── format.py
    │   ├── graph.py
    │   ├── frame.py
    │   └── lhotse.py
    ├── reader
    │   ├── backend
    │   │   ├── __init__.py
    │   │   ├── backend.py
    │   │   ├── wave.py
    │   │   ├── soundfile.py
    │   │   └── pyav.py
    │   ├── __init__.py
    │   ├── stream_reader.py
    │   ├── reader.py
    │   └── info.py
    ├── writer
    │   ├── backend
    │   │   ├── __init__.py
    │   │   ├── backend.py
    │   │   ├── wave.py
    │   │   ├── soundfile.py
    │   │   └── pyav.py
    │   ├── __init__.py
    │   └── writer.py
    ├── pipe.py
    ├── __init__.py
    └── cli.py
├── .github
    └── workflows
    │   ├── ruff.yml
    │   └── release.yml
├── tests
    ├── __init__.py
    ├── layout_test.py
    ├── container_test.py
    ├── graph_test.py
    ├── codec_test.py
    ├── filter_test.py
    ├── pipe_test.py
    ├── format_test.py
    ├── writer_test.py
    ├── reader_test.py
    └── frame_test.py
├── pyproject.toml
├── .gitignore
├── README.md
└── LICENSE


/VERSION:
--------------------------------------------------------------------------------
1 | 0.0.1
2 | 


--------------------------------------------------------------------------------
/docs/filters.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/astral-sh/ruff-pre-commit
3 |   rev: v0.14.4
4 |   hooks:
5 |     - id: ruff-check
6 |       args: [ --fix ]
7 |     - id: ruff-format
8 | 


--------------------------------------------------------------------------------
/audiolab/av/templates/container.txt:
--------------------------------------------------------------------------------
1 | {{ format.name }} ({{ format.long_name }})
2 | 
3 | Supported Extensions:
4 | {%- for extension in format.extensions %}
5 |   - {{ extension }}
6 | {%- endfor %}
7 | 


--------------------------------------------------------------------------------
/audiolab/av/templates/filter.txt:
--------------------------------------------------------------------------------
 1 | {{ description }}
 2 | 
 3 | Args:
 4 | {%- for option in options %}
 5 |     {{ option.name }} ({{ option.type }})
 6 |     {%- if option.help %}{{ ": " + option.help }}{% endif %}
 7 |     {%- if option.default != "" %} (default {{ option.default }}){% endif %}
 8 | {%- endfor %}
 9 | 
10 | See Also:
11 |     - Run `ffmpeg -h filter={{ name }}` for all CLI options
12 |     - [{{ name }}](https://ffmpeg.org/ffmpeg-filters.html#{{ name }})
13 | 


--------------------------------------------------------------------------------
/audiolab/av/templates/format.txt:
--------------------------------------------------------------------------------
 1 | {{ format.name }} (np.{{ dtype }}, {{ "Planar" if format.is_planar else "Packed" }})
 2 | 
 3 | Supported DeCodecs:
 4 | {%- set codecs = decodecs | sort(attribute="name") %}
 5 | {%- for codec in codecs %}
 6 |   - {{ codec.canonical_name }} ({{ codec.long_name }})
 7 | {%- endfor %}
 8 | 
 9 | Supported EnCodecs:
10 | {%- set codecs = encodecs | sort(attribute="name") %}
11 | {%- for codec in codecs %}
12 |   - {{ codec.name }} ({{ codec.long_name }})
13 | {%- endfor %}
14 | 


--------------------------------------------------------------------------------
/audiolab/av/templates/info.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Input File     : {{ name }}
 3 | Channels       : {{ channels }}
 4 | Sample Rate    : {{ rate }}
 5 | Precision      : {{ precision }}-bit
 6 | Duration       : {{ duration }} = {{ samples }} samples ~ {{ cdda_sectors }} CDDA sectors
 7 | File Size      : {{ size }}
 8 | Bit Rate       : {{ bit_rate }}
 9 | Sample Encoding: {{ codec }}
10 | {%- if metadata %}
11 | Comments       :
12 |   {%- for key, value in metadata.items() %}
13 |     {{ key }}: {{ value }}
14 |   {%- endfor %}
15 | {%- endif %}
16 | 


--------------------------------------------------------------------------------
/.github/workflows/ruff.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: ruff
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   ruff:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - uses: astral-sh/ruff-action@v3
18 |         with:
19 |           args: "format --check --diff"
20 | 


--------------------------------------------------------------------------------
/audiolab/av/templates/codec.txt:
--------------------------------------------------------------------------------
 1 | {{ codec.canonical_name }} ({{ codec.long_name }})
 2 | 
 3 | Supported Formats:
 4 | {%- set formats = codec.audio_formats | sort(attribute="name") %}
 5 | {%- for format in formats %}
 6 |   {%- set name = format.name %}
 7 |   {%- set dtype = np.dtype(format_dtypes[name]) %}
 8 |   {%- set layout = "Planar" if format.is_planar else "Packed" %}
 9 |   - {{ name }} (np.{{ dtype }}, {{ layout }})
10 | {%- endfor %}
11 | {% if codec.audio_rates %}
12 | Supported Rates (Hz):
13 | {%- set rates = codec.audio_rates | sort %}
14 |   {%- for rate in rates %}
15 |   - {{ rate }}
16 |   {%- endfor %}
17 | {%- endif %}
18 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Create Release
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       version:
 7 |         description: "Build version (e.g. 0.0.1)"
 8 |         required: true
 9 | 
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v3
15 |       - name: Publish
16 |         env:
17 |           TWINE_USERNAME: __token__
18 |           TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
19 |         run: |
20 |           echo ${{ inputs.version }} > VERSION
21 |           python -m venv .venv
22 |           source .venv/bin/activate
23 |           python -m pip install -U build setuptools twine wheel
24 |           python -m build
25 |           python -m twine upload dist/*
26 | 


--------------------------------------------------------------------------------
/audiolab/reader/backend/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from audiolab.reader.backend.backend import Backend
16 | from audiolab.reader.backend.pyav import PyAV as pyav
17 | from audiolab.reader.backend.soundfile import SoundFile as soundfile
18 | from audiolab.reader.backend.wave import Wave as wave
19 | 
20 | __all__ = ["Backend", "pyav", "soundfile", "wave"]
21 | 


--------------------------------------------------------------------------------
/audiolab/writer/backend/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from audiolab.writer.backend.backend import Backend
16 | from audiolab.writer.backend.pyav import PyAV as pyav
17 | from audiolab.writer.backend.soundfile import SoundFile as soundfile
18 | from audiolab.writer.backend.wave import Wave as wave
19 | 
20 | __all__ = ["Backend", "pyav", "soundfile", "wave"]
21 | 


--------------------------------------------------------------------------------
/tests/layout_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytest
16 | 
17 | from audiolab.av.layout import AudioLayout, audio_layouts
18 | 
19 | 
20 | class TestLayout:
21 |     @pytest.mark.parametrize("name, layout", audio_layouts.items())
22 |     def test_layout_name(self, name, layout):
23 |         _layout = AudioLayout[name]
24 |         assert _layout.value == layout
25 |         assert _layout.nb_channels == len(layout.channels)
26 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=68.0", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "audiolab"
 7 | authors = [{ name = "Zhendong Peng", email = "pzd17@tsinghua.org.cn" }]
 8 | description = "AudioLab"
 9 | readme = "README.md"
10 | license = {file = "LICENSE"}
11 | requires-python = ">=3.8"
12 | dynamic = ["version"]
13 | classifiers = [
14 |   "Programming Language :: Python :: 3",
15 |   "Operating System :: OS Independent",
16 | ]
17 | dependencies = [
18 |   "av",
19 |   "click",
20 |   "humanize",
21 |   "jinja2",
22 |   "smart_open",
23 |   "soundfile",
24 | ]
25 | 
26 | [project.scripts]
27 | audi = "audiolab.cli:main"
28 | 
29 | [project.urls]
30 | Homepage = "https://github.com/pengzhendong/audiolab"
31 | Documentation = "https://github.com/pengzhendong/audiolab#readme"
32 | BugTracker = "https://github.com/pengzhendong/audiolab/issues"
33 | 
34 | [tool.ruff]
35 | line-length = 120
36 | 
37 | [tool.setuptools.dynamic]
38 | version = { file = "VERSION" }
39 | 
40 | [tool.setuptools.package-data]
41 | audiolab = ["av/templates/*"]
42 | 


--------------------------------------------------------------------------------
/audiolab/writer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Any, Optional
16 | 
17 | import numpy as np
18 | 
19 | from audiolab.av.typing import Dtype
20 | from audiolab.writer.writer import Writer
21 | 
22 | 
23 | def save_audio(file: Any, frame: np.ndarray, rate: int, dtype: Optional[Dtype] = None, format: str = "WAV"):
24 |     writer = Writer(file, rate, dtype, format)
25 |     writer.write(frame)
26 |     writer.close()
27 | 
28 | 
29 | __all__ = ["Writer", "save_audio"]
30 | 


--------------------------------------------------------------------------------
/tests/container_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytest
16 | 
17 | from audiolab.av.container import ContainerFormat, container_formats, extension_formats
18 | 
19 | 
20 | class TestContainer:
21 |     @pytest.mark.parametrize("name, format", container_formats.items())
22 |     def test_input_container(self, name, format):
23 |         _format = ContainerFormat[name]
24 |         assert _format.value == format
25 |         for extension in _format.extensions:
26 |             assert extension in extension_formats
27 |             assert name in extension_formats[extension]
28 | 


--------------------------------------------------------------------------------
/audiolab/writer/writer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Any, Optional
16 | 
17 | import numpy as np
18 | import soundfile as sf
19 | 
20 | from audiolab.av.typing import Dtype
21 | from audiolab.writer.backend import pyav, soundfile
22 | 
23 | 
24 | class Writer:
25 |     def __init__(self, file: Any, rate: int, dtype: Optional[Dtype] = None, format: str = "WAV"):
26 |         backend = soundfile if format.upper() in sf.available_formats() else pyav
27 |         self.backend = backend(file, rate, dtype, format)
28 | 
29 |     def write(self, frame: np.ndarray):
30 |         self.backend.write(frame)
31 | 
32 |     def close(self):
33 |         self.backend.close()
34 | 


--------------------------------------------------------------------------------
/audiolab/writer/backend/backend.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import atexit
16 | from io import BytesIO
17 | from typing import Any, Optional
18 | 
19 | import numpy as np
20 | 
21 | from audiolab.av.typing import Dtype
22 | 
23 | 
24 | class Backend:
25 |     def __init__(self, file: Any, sample_rate: int, dtype: Optional[Dtype] = None, format: str = "WAV"):
26 |         self.file = file
27 |         self.sample_rate = sample_rate
28 |         self.dtype = None
29 |         if dtype is not None:
30 |             self.dtype = np.dtype(dtype)
31 |         self.format = format
32 | 
33 |         self.is_closed = False
34 |         atexit.register(self.close)
35 | 
36 |     def close(self):
37 |         if not self.is_closed:
38 |             if isinstance(self.file, BytesIO):
39 |                 self.file.seek(0)
40 |             self.is_closed = True
41 | 


--------------------------------------------------------------------------------
/audiolab/av/container.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from collections import defaultdict
16 | from typing import Dict, Set
17 | 
18 | import av
19 | 
20 | from audiolab.av.typing import ContainerFormatEnum
21 | from audiolab.av.utils import get_template
22 | 
23 | """
24 | $ ffmpeg -formats
25 | """
26 | container_formats: Dict[str, av.ContainerFormat] = {}
27 | extension_formats: Dict[str, Set[str]] = defaultdict(set)
28 | for name in av.formats_available:
29 |     container_formats[name] = av.ContainerFormat(name)
30 |     for extension in container_formats[name].extensions:
31 |         extension_formats[extension].add(name)
32 | ContainerFormat = ContainerFormatEnum("ContainerFormat", container_formats)
33 | 
34 | 
35 | template = get_template("container")
36 | for name, format in container_formats.items():
37 |     getattr(ContainerFormat, name).__doc__ = template.render(format=format)
38 | 


--------------------------------------------------------------------------------
/audiolab/av/layout.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict
16 | 
17 | import av
18 | 
19 | from audiolab.av.typing import AudioLayoutEnum
20 | 
21 | """
22 | $ ffmpeg -layouts
23 | """
24 | standard_channel_layouts = {
25 |     0: ["downmix"],
26 |     1: ["mono"],
27 |     2: ["stereo"],
28 |     3: ["2.1", "3.0", "3.0(back)"],
29 |     4: ["4.0", "quad", "quad(side)", "3.1"],
30 |     5: ["5.0", "5.0(side)", "4.1"],
31 |     6: ["5.1", "6.0", "6.0(front)", "hexagonal", "5.1(side)", "3.1.2"],
32 |     7: ["7.0", "7.0(front)", "6.1", "6.1(back)", "6.1(front)"],
33 |     8: ["7.1", "7.1(wide)", "7.1(wide-side)", "cube", "octagonal", "5.1.2"],
34 |     10: ["5.1.4", "7.1.2"],
35 |     12: ["7.1.4", "7.2.3"],
36 |     14: ["9.1.4"],
37 |     16: ["hexadecagonal"],
38 |     24: ["22.2"],
39 | }
40 | 
41 | audio_layouts: Dict[str, av.AudioLayout] = {
42 |     name: av.AudioLayout(name) for layouts in standard_channel_layouts.values() for name in layouts
43 | }
44 | AudioLayout = AudioLayoutEnum("AudioLayout", audio_layouts)
45 | 


--------------------------------------------------------------------------------
/tests/graph_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | import pytest
17 | 
18 | from audiolab.av import aformat
19 | from audiolab.av.graph import Graph
20 | from audiolab.av.utils import generate_ndarray
21 | 
22 | 
23 | class TestGraph:
24 |     @pytest.fixture
25 |     def sample_rate(self):
26 |         return 48000
27 | 
28 |     def test_push_pull(self, sample_rate):
29 |         duration = 0.5
30 |         frame_size = 1024
31 |         filters = [aformat(dtype=np.int16, rate=16000)]
32 |         num_samples = int(sample_rate * duration)
33 |         graph = Graph(rate=sample_rate, dtype=np.float32, layout="mono", filters=filters, frame_size=frame_size)
34 |         assert graph.rate == sample_rate
35 |         ndarray = generate_ndarray(1, num_samples, np.float32)
36 |         graph.push(ndarray)
37 |         frames = []
38 |         for frame, rate in graph.pull(True, True):
39 |             assert rate == 16000
40 |             frames.append(frame)
41 |         samples = np.concatenate(frames, axis=1)
42 |         assert samples.shape[1] == 16000 * duration
43 | 


--------------------------------------------------------------------------------
/audiolab/av/typing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from enum import Enum
16 | from typing import Dict, Tuple, Union
17 | 
18 | import av
19 | import numpy as np
20 | 
21 | 
22 | class BaseEnum(Enum):
23 |     def __new__(cls, value):
24 |         obj = object.__new__(cls)
25 |         obj._value_ = value
26 |         return obj
27 | 
28 |     def __getattr__(self, attr):
29 |         return getattr(self.value, attr)
30 | 
31 | 
32 | class AudioFormatEnum(BaseEnum):
33 |     pass
34 | 
35 | 
36 | class AudioLayoutEnum(BaseEnum):
37 |     pass
38 | 
39 | 
40 | class CodecEnum(BaseEnum):
41 |     pass
42 | 
43 | 
44 | class ContainerFormatEnum(BaseEnum):
45 |     pass
46 | 
47 | 
48 | UINT32_MAX = np.iinfo(np.uint32).max
49 | AudioFormat = Union[str, av.AudioFormat]
50 | AudioFrame = Union[np.ndarray, av.AudioFrame, Tuple[np.ndarray, int]]
51 | AudioLayout = Union[int, str, av.AudioLayout]
52 | Codec = Union[str, av.Codec]
53 | ContainerFormat = Union[str, av.ContainerFormat]
54 | Dtype = Union[str, type, np.dtype]
55 | Filter = Union[str, Tuple[str, str], Tuple[str, Dict[str, str]], Tuple[str, str, Dict[str, str]]]
56 | Seconds = float
57 | 


--------------------------------------------------------------------------------
/tests/codec_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import pytest
16 | 
17 | from audiolab.av.codec import Decodec, Encodec, decodecs, encodecs
18 | from audiolab.av.format import get_codecs
19 | 
20 | 
21 | class TestCodec:
22 |     @pytest.mark.parametrize("name, codec", decodecs.items())
23 |     def test_decoder_codec(self, name, codec):
24 |         _codec = Decodec[name]
25 |         assert _codec.value == codec
26 |         assert _codec.is_decoder
27 |         assert _codec.mode == "r"
28 |         assert _codec.type == "audio"
29 |         assert _codec.audio_formats is not None
30 |         for format in _codec.audio_formats:
31 |             assert _codec.name in get_codecs(format.name, "r")
32 | 
33 |     @pytest.mark.parametrize("name, codec", encodecs.items())
34 |     def test_encoder_codec(self, name, codec):
35 |         _codec = Encodec[name]
36 |         assert _codec.value == codec
37 |         assert _codec.is_encoder
38 |         assert _codec.mode == "w"
39 |         assert _codec.type == "audio"
40 |         assert _codec.audio_formats is not None
41 |         for format in _codec.audio_formats:
42 |             assert _codec.name in get_codecs(format.name, "w")
43 | 


--------------------------------------------------------------------------------
/tests/filter_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | import pytest
17 | from av.filter import filters_available
18 | 
19 | from audiolab.av import aformat, filter
20 | from audiolab.av.format import format_dtypes, get_format
21 | 
22 | 
23 | class TestFilter:
24 |     @pytest.mark.parametrize("name", filters_available)
25 |     def test_filter(self, name):
26 |         _name, args, kwargs = getattr(filter, name)()
27 |         assert _name == name
28 |         assert args is None
29 |         assert kwargs == {}
30 | 
31 |     def test_aformat(self):
32 |         for is_planar in (True, False):
33 |             for dtype in format_dtypes.values():
34 |                 format = get_format(dtype, is_planar)
35 |                 assert aformat(dtype=np.dtype(dtype), is_planar=is_planar)[2] == {"sample_fmts": format.name}
36 |                 assert aformat(dtype=np.dtype(dtype).name, is_planar=is_planar)[2] == {"sample_fmts": format.name}
37 | 
38 |         for rate in (8000, 16000, 24000, 48000):
39 |             assert aformat(rate=rate)[2] == {"sample_rates": str(rate)}
40 | 
41 |         assert aformat(to_mono=False)[2] == {}
42 |         assert aformat(to_mono=True)[2] == {"channel_layouts": "mono"}
43 | 


--------------------------------------------------------------------------------
/audiolab/av/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import sys
17 | from importlib.resources import files
18 | 
19 | import numpy as np
20 | from jinja2 import Environment, FileSystemLoader
21 | from numpy.random import randint, uniform
22 | 
23 | loader = FileSystemLoader(files("audiolab.av").joinpath("templates"))
24 | 
25 | 
26 | def generate_ndarray(nb_channels: int, samples: int, dtype: np.dtype, always_2d: bool = True) -> np.ndarray:
27 |     if np.dtype(dtype).kind in ("i", "u"):
28 |         ndarray = randint(np.iinfo(dtype).min, np.iinfo(dtype).max, size=(nb_channels, samples), dtype=dtype)
29 |     else:
30 |         ndarray = uniform(-1, 1, size=(nb_channels, samples)).astype(dtype)
31 |     return ndarray if always_2d else ndarray.squeeze()
32 | 
33 | 
34 | def get_template(name: str) -> str:
35 |     return Environment(loader=loader).get_template(f"{name}.txt")
36 | 
37 | 
38 | def get_logger(name, level=logging.INFO):
39 |     logger = logging.getLogger(name)
40 |     logger.setLevel(level)
41 |     if not logger.handlers:
42 |         logger.propagate = False
43 |         handler = logging.StreamHandler(sys.stderr)
44 |         formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s - %(message)s")
45 |         handler.setFormatter(formatter)
46 |         logger.addHandler(handler)
47 |     return logger
48 | 


--------------------------------------------------------------------------------
/tests/pipe_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | import pytest
17 | 
18 | from audiolab.av.filter import atempo
19 | from audiolab.av.utils import generate_ndarray
20 | from audiolab.pipe import AudioPipe
21 | 
22 | 
23 | class TestPipe:
24 |     @pytest.fixture
25 |     def nb_channels(self):
26 |         return 1
27 | 
28 |     @pytest.fixture
29 |     def rate(self):
30 |         return 16000
31 | 
32 |     @pytest.fixture
33 |     def duration(self):
34 |         return 0.5
35 | 
36 |     def test_audio_pipe(self, nb_channels, rate, duration):
37 |         num_chunks = 5
38 |         num_samples = int(rate * duration * num_chunks)
39 |         for ratio in (0.9, 1.1):
40 |             for always_2d in (True, False):
41 |                 pipe = AudioPipe(in_rate=rate, filters=[atempo(ratio)], always_2d=always_2d)
42 |                 frames = []
43 |                 for idx in range(num_chunks):
44 |                     pipe.push(generate_ndarray(nb_channels, int(rate * duration), np.int16))
45 |                     for frame, _ in pipe.pull(partial=idx == num_chunks - 1):
46 |                         frames.append(frame)
47 |                 audio = np.concatenate(frames, axis=1 if always_2d else 0)
48 |                 assert np.isclose(audio.shape[1 if always_2d else 0] / rate * ratio, num_samples / rate, atol=0.05)
49 | 


--------------------------------------------------------------------------------
/audiolab/reader/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Any, Iterator, List, Optional, Union
16 | 
17 | from audiolab.av import aformat
18 | from audiolab.av.graph import Graph
19 | from audiolab.av.typing import UINT32_MAX, AudioFrame
20 | from audiolab.reader.backend import Backend
21 | from audiolab.reader.info import Info
22 | from audiolab.reader.reader import Reader
23 | from audiolab.reader.stream_reader import StreamReader
24 | 
25 | 
26 | def info(file: Any, forced_decoding: bool = False, backends: Optional[List[Backend]] = None) -> Info:
27 |     """
28 |     Get the information of an audio file.
29 | 
30 |     Args:
31 |         file: The input audio file, audio url, path to audio file, bytes of audio data, etc.
32 |         forced_decoding: Whether to forced decoding the audio file to get the duration.
33 |         backends: The list of backends to use to get the information.
34 |     Returns:
35 |         The information of the audio file.
36 |     """
37 |     return Info(file, forced_decoding=forced_decoding, backends=backends)
38 | 
39 | 
40 | def load_audio(file: Any, **kwargs) -> Union[Iterator[AudioFrame], AudioFrame]:
41 |     reader = Reader(file, **kwargs)
42 |     if reader.frame_size < UINT32_MAX:
43 |         return iter(reader)
44 |     else:
45 |         return next(iter(reader))
46 | 
47 | 
48 | __all__ = ["Graph", "Reader", "StreamReader", "aformat", "load_audio"]
49 | 


--------------------------------------------------------------------------------
/tests/format_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | import pytest
17 | 
18 | from audiolab.av.format import (
19 |     AudioFormat,
20 |     audio_formats,
21 |     format_dtypes,
22 |     get_dtype,
23 |     get_format,
24 | )
25 | 
26 | 
27 | class TestFormat:
28 |     @pytest.mark.parametrize("name, format", audio_formats.items())
29 |     def test_format(self, name, format):
30 |         _format = AudioFormat[name]
31 |         assert _format.value == format
32 | 
33 |     @pytest.mark.parametrize("name, dtype", format_dtypes.items())
34 |     def test_get_format(self, name, dtype):
35 |         format = AudioFormat[name].value
36 |         is_planar = name.endswith("p")
37 |         assert get_format(name) == format
38 |         if is_planar:
39 |             assert get_format(dtype, available_formats=[format.packed]).name == format.packed.name
40 |         else:
41 |             assert get_format(dtype, available_formats=[format.planar]).name == format.planar.name
42 |         assert get_format(np.dtype(dtype), is_planar) == format
43 |         assert get_format(np.dtype(dtype).name, is_planar) == format
44 | 
45 |     @pytest.mark.parametrize("name, dtype", format_dtypes.items())
46 |     def test_get_dtype(self, name, dtype):
47 |         format = AudioFormat[name].value
48 |         assert get_dtype(name) == np.dtype(dtype)
49 |         assert get_dtype(format) == np.dtype(dtype)
50 | 


--------------------------------------------------------------------------------
/audiolab/writer/backend/wave.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import wave
16 | from typing import Any, Optional
17 | 
18 | import numpy as np
19 | 
20 | from audiolab.av.frame import clip
21 | from audiolab.av.typing import Dtype
22 | from audiolab.writer.backend.backend import Backend
23 | 
24 | _dtype_to_bytes = {"uint8": 1, "int16": 2, "int32": 4}
25 | 
26 | 
27 | class Wave(Backend):
28 |     def __init__(self, file: Any, sample_rate: int, dtype: Optional[Dtype] = None):
29 |         super().__init__(file, sample_rate, dtype)
30 |         self.wave = None
31 |         self.num_channels = None
32 | 
33 |     def open(self):
34 |         self.wave = wave.open(self.file, "w")
35 |         self.wave.setframerate(self.sample_rate)
36 |         self.wave.setnchannels(self.num_channels)
37 |         sampwidth = _dtype_to_bytes[self.dtype.name]
38 |         self.wave.setsampwidth(sampwidth)
39 | 
40 |     def write(self, frame: np.ndarray):
41 |         if self.dtype is None:
42 |             self.dtype = frame.dtype
43 |         frame = np.atleast_2d(clip(frame, self.dtype))
44 |         if self.num_channels is None:
45 |             self.num_channels = frame.shape[0]
46 |         if self.wave is None:
47 |             self.open()
48 |         self.wave.writeframes(frame.tobytes())
49 | 
50 |     def close(self):
51 |         if self.wave is not None and not self.is_closed:
52 |             self.wave.close()
53 |             super().close()
54 | 


--------------------------------------------------------------------------------
/audiolab/writer/backend/soundfile.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from functools import cached_property
16 | from typing import Any, Optional
17 | 
18 | import numpy as np
19 | import soundfile as sf
20 | 
21 | from audiolab.av.frame import clip
22 | from audiolab.av.typing import Dtype
23 | from audiolab.writer.backend.backend import Backend
24 | 
25 | _dtype_to_subtype = {"int16": "PCM_16", "int32": "PCM_32", "float32": "FLOAT", "float64": "DOUBLE"}
26 | 
27 | 
28 | class SoundFile(Backend):
29 |     def __init__(self, file: Any, sample_rate: int, dtype: Optional[Dtype] = None, format: str = "WAV"):
30 |         super().__init__(file, sample_rate, dtype, format)
31 |         self.sf = None
32 |         self.num_channels = None
33 | 
34 |     @cached_property
35 |     def subtype(self) -> str:
36 |         if self.dtype is None:
37 |             return sf.default_subtype(self.format)
38 |         subtype = _dtype_to_subtype[self.dtype.name]
39 |         # assert subtype in sf.available_subtypes(self.format)
40 |         assert sf.check_format(self.format, subtype)
41 |         return subtype
42 | 
43 |     def open(self):
44 |         self.sf = sf.SoundFile(self.file, "w", self.sample_rate, self.num_channels, self.subtype, format=self.format)
45 | 
46 |     def write(self, frame: np.ndarray):
47 |         if self.dtype is None:
48 |             self.dtype = frame.dtype
49 |         frame = np.atleast_2d(clip(frame, self.dtype))
50 |         if self.num_channels is None:
51 |             self.num_channels = frame.shape[0]
52 |         if self.sf is None:
53 |             self.open()
54 |         # (num_channels, num_samples) => (num_samples, num_channels)
55 |         self.sf.write(frame.T)
56 | 
57 |     def close(self):
58 |         if self.sf is not None and not self.is_closed:
59 |             self.sf.close()
60 |             super().close()
61 | 


--------------------------------------------------------------------------------
/audiolab/pipe.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Iterator, List, Optional, Tuple
16 | 
17 | import numpy as np
18 | 
19 | from audiolab.av.frame import pad
20 | from audiolab.av.typing import AudioFormat, Dtype, Filter
21 | from audiolab.reader import Graph, aformat
22 | 
23 | 
24 | class AudioPipe:
25 |     def __init__(
26 |         self,
27 |         in_rate: int,
28 |         filters: Optional[List[Filter]] = None,
29 |         dtype: Optional[Dtype] = None,
30 |         is_planar: bool = False,
31 |         format: Optional[AudioFormat] = None,
32 |         out_rate: Optional[int] = None,
33 |         to_mono: bool = False,
34 |         frame_size: Optional[int] = 1024,
35 |         fill_value: Optional[float] = None,
36 |         always_2d: bool = True,
37 |     ):
38 |         self.in_rate = in_rate
39 |         self.graph = None
40 |         if not all([dtype is None, format is None, out_rate is None, to_mono is None]):
41 |             filters = filters or []
42 |             filters.append(aformat(dtype, is_planar, format, out_rate, to_mono))
43 |         self.filters = filters
44 |         self.frame_size = frame_size
45 |         self.fill_value = fill_value
46 |         self.always_2d = always_2d
47 | 
48 |     def push(self, frame: np.ndarray):
49 |         if self.graph is None:
50 |             self.graph = Graph(
51 |                 rate=self.in_rate,
52 |                 dtype=frame.dtype,
53 |                 channels=frame.shape[0],
54 |                 filters=self.filters,
55 |                 frame_size=self.frame_size,
56 |                 return_ndarray=True,
57 |             )
58 |         self.graph.push(frame)
59 | 
60 |     def pull(self, partial: bool = False) -> Iterator[Tuple[np.ndarray, int]]:
61 |         for frame, rate in self.graph.pull(partial=partial):
62 |             if self.fill_value is not None:
63 |                 frame = pad(frame, self.frame_size, self.fill_value)
64 |             yield frame if self.always_2d else frame.squeeze(), rate
65 | 


--------------------------------------------------------------------------------
/tests/writer_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from io import BytesIO
16 | 
17 | import numpy as np
18 | import pytest
19 | 
20 | from audiolab.av.utils import generate_ndarray
21 | from audiolab.reader import info
22 | from audiolab.writer import save_audio
23 | 
24 | 
25 | class TestWriter:
26 |     @pytest.fixture
27 |     def nb_channels(self):
28 |         return 1
29 | 
30 |     @pytest.fixture
31 |     def rate(self):
32 |         return 16000
33 | 
34 |     @pytest.fixture
35 |     def duration(self):
36 |         return 0.5
37 | 
38 |     # def test_writer(self, nb_channels, rate, duration):
39 |     #     for always_2d in (True, False):
40 |     #         bytes_io = BytesIO()
41 |     #         # always int16 for pcm_s16le even if dtype of ndarray is float32
42 |     #         ndarray = generate_ndarray(nb_channels, int(rate * duration), np.int16, always_2d)
43 |     #         writer = Writer(bytes_io, rate)
44 |     #         writer.write(ndarray)
45 |     #         writer.close()
46 | 
47 |     #         _info = info(bytes_io)
48 |     #         assert _info.channels == nb_channels
49 |     #         assert "signed 16" in _info.codec.lower()
50 |     #         assert _info.duration == duration
51 |     #         assert _info.precision == 16
52 |     #         assert _info.rate == rate
53 | 
54 |     def test_save_audio(self, nb_channels, rate, duration):
55 |         for always_2d in (True, False):
56 |             bytes_io = BytesIO()
57 |             ndarray = generate_ndarray(nb_channels, int(rate * duration), np.int16, always_2d)
58 |             save_audio(bytes_io, ndarray, rate, format="webm")
59 | 
60 |             _info = info(bytes_io)
61 |             assert _info.channels == nb_channels
62 |             assert _info.codec == "Opus"
63 |             assert np.isclose(_info.duration, duration + 0.014, atol=0.001)  # Pre-skip / Encoder Delay for opus
64 |             assert _info.precision == 32  # always float32 for opus
65 |             assert _info.rate == 48000  # always 48k for opus
66 | 


--------------------------------------------------------------------------------
/audiolab/av/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Optional, Union
16 | 
17 | import av
18 | import numpy as np
19 | 
20 | from audiolab.av import filter
21 | from audiolab.av.codec import Decodec, Encodec, canonical_names, decodecs, encodecs
22 | from audiolab.av.container import ContainerFormat, container_formats, extension_formats
23 | from audiolab.av.format import AudioFormat, audio_formats, get_codecs, get_dtype, get_format
24 | from audiolab.av.frame import clip, from_ndarray, split_audio_frame, to_ndarray
25 | from audiolab.av.graph import Graph
26 | from audiolab.av.layout import AudioLayout, audio_layouts, standard_channel_layouts
27 | from audiolab.av.lhotse import AudioCache, load_url
28 | 
29 | 
30 | def aformat(
31 |     dtype: Optional[Union[str, type, np.dtype]] = None,
32 |     is_planar: bool = False,
33 |     format: Optional[Union[str, av.AudioFormat]] = None,
34 |     rate: Optional[int] = None,
35 |     to_mono: bool = False,
36 | ):
37 |     kwargs = {}
38 |     if dtype is not None:
39 |         kwargs["sample_fmts"] = get_format(dtype, is_planar).name
40 |     if format is not None:
41 |         kwargs["sample_fmts"] = format.name if isinstance(format, av.AudioFormat) else format
42 |     if rate is not None:
43 |         kwargs["sample_rates"] = rate
44 |     if to_mono:
45 |         kwargs["channel_layouts"] = "mono"
46 |     return filter.aformat(**kwargs)
47 | 
48 | 
49 | __all__ = [
50 |     "AudioCache",
51 |     "AudioFormat",
52 |     "AudioLayout",
53 |     "ContainerFormat",
54 |     "Decodec",
55 |     "Encodec",
56 |     "Filter",
57 |     "Graph",
58 |     "aformat",
59 |     "audio_formats",
60 |     "audio_layouts",
61 |     "canonical_names",
62 |     "clip",
63 |     "container_formats",
64 |     "decodecs",
65 |     "encodecs",
66 |     "extension_formats",
67 |     "from_ndarray",
68 |     "get_codecs",
69 |     "get_dtype",
70 |     "get_format",
71 |     "load_url",
72 |     "split_audio_frame",
73 |     "standard_channel_layouts",
74 |     "to_ndarray",
75 | ]
76 | 


--------------------------------------------------------------------------------
/audiolab/reader/backend/backend.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | from functools import cached_property
17 | from io import BytesIO
18 | from typing import Any, Iterator, Optional
19 | 
20 | import numpy as np
21 | 
22 | from audiolab.av import standard_channel_layouts
23 | from audiolab.av.typing import UINT32_MAX, Seconds
24 | 
25 | 
26 | class Backend:
27 |     def __init__(self, file: Any, frame_size: Optional[int] = None, forced_decoding: bool = False):
28 |         self.file = file
29 |         self.frame_size = UINT32_MAX if frame_size is None else min(frame_size, UINT32_MAX)
30 |         self.forced_decoding = forced_decoding
31 | 
32 |     @cached_property
33 |     def bit_rate(self) -> Optional[int]:
34 |         bit_rate = None
35 |         if self.size is not None:
36 |             if self.duration is not None and self.duration > 0:
37 |                 bit_rate = self.size * 8 / self.duration
38 |         return bit_rate
39 | 
40 |     @cached_property
41 |     def is_planar(self) -> bool:
42 |         return False
43 | 
44 |     @cached_property
45 |     def layout(self) -> str:
46 |         layouts = standard_channel_layouts[self.num_channels]
47 |         return layouts[0]
48 | 
49 |     @cached_property
50 |     def metadata(self) -> dict:
51 |         return {}
52 | 
53 |     @cached_property
54 |     def name(self) -> str:
55 |         return "<none>" if isinstance(self.file, BytesIO) else self.file
56 | 
57 |     @cached_property
58 |     def size(self) -> Optional[int]:
59 |         if isinstance(self.file, str):
60 |             if os.path.exists(self.file):
61 |                 return os.stat(self.file).st_size
62 |         elif isinstance(self.file, BytesIO):
63 |             return len(self.file.getbuffer())
64 |         return None
65 | 
66 |     def load_audio(self, offset: Seconds = 0, duration: Optional[Seconds] = None) -> Iterator[np.ndarray]:
67 |         self.seek(int(offset * self.sample_rate))
68 |         frames = UINT32_MAX if duration is None else int(duration * self.sample_rate)
69 |         while frames > 0:
70 |             frame_size = min(frames, self.frame_size)
71 |             ndarray = self.read(frame_size)
72 |             if ndarray is None:
73 |                 break
74 |             frames -= ndarray.shape[1]
75 |             yield ndarray
76 | 


--------------------------------------------------------------------------------
/audiolab/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import annotations
16 | 
17 | from base64 import b64encode
18 | from io import BytesIO
19 | from pathlib import Path
20 | from typing import Optional, Tuple, Union
21 | 
22 | import numpy as np
23 | 
24 | from audiolab.av import (
25 |     AudioCache,
26 |     clip,
27 |     from_ndarray,
28 |     get_dtype,
29 |     get_format,
30 |     split_audio_frame,
31 |     to_ndarray,
32 | )
33 | from audiolab.av.typing import Dtype
34 | from audiolab.pipe import AudioPipe
35 | from audiolab.reader import Reader, StreamReader, aformat, info, load_audio
36 | from audiolab.writer import Writer, save_audio
37 | 
38 | 
39 | def encode(
40 |     audio: Union[str, Path, np.ndarray],
41 |     rate: Optional[int] = None,
42 |     dtype: Optional[Dtype] = None,
43 |     to_mono: bool = False,
44 |     make_wav: bool = True,
45 |     format: str = "WAV",
46 | ) -> Tuple[str, int]:
47 |     """
48 |     Transform an audio to a PCM bytestring.
49 | 
50 |     Args:
51 |         audio: The file path to an audio file or a numpy array.
52 |         rate: The sample rate of the audio.
53 |         dtype: The data type of the audio.
54 |         to_mono: Whether to convert the audio to mono.
55 |         make_wav: Whether to make the audio a WAV file.
56 |         format: The format of the audio container.
57 |     Returns:
58 |         The audio as a PCM bytestring and the sample rate of the audio.
59 |     """
60 |     if isinstance(audio, (str, Path)):
61 |         audio, rate = load_audio(audio, dtype=dtype, rate=rate, to_mono=to_mono)
62 | 
63 |     audio = clip(audio, np.int16)
64 |     if make_wav:
65 |         bytestream = BytesIO()
66 |         save_audio(bytestream, audio, rate, format=format)
67 |         audio = b64encode(bytestream.getvalue()).decode("ascii")
68 |         audio = f"data:audio/{format};base64,{audio}"
69 |     else:
70 |         audio = np.ascontiguousarray(audio)
71 |         audio = b64encode(audio).decode("ascii")
72 |     return audio, rate
73 | 
74 | 
75 | __all__ = [
76 |     "AudioCache",
77 |     "AudioPipe",
78 |     "Reader",
79 |     "StreamReader",
80 |     "Writer",
81 |     "aformat",
82 |     "encode",
83 |     "from_ndarray",
84 |     "get_dtype",
85 |     "get_format",
86 |     "info",
87 |     "load_audio",
88 |     "save_audio",
89 |     "split_audio_frame",
90 |     "to_ndarray",
91 | ]
92 | 


--------------------------------------------------------------------------------
/audiolab/av/codec.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from collections import defaultdict
16 | from typing import Dict, Set
17 | 
18 | import av
19 | import numpy as np
20 | from av import codecs_available
21 | from av.codec.codec import UnknownCodecError
22 | 
23 | from audiolab.av.format import format_dtypes
24 | from audiolab.av.typing import CodecEnum
25 | from audiolab.av.utils import get_template
26 | 
27 | """
28 | $ ffmpeg -codecs
29 | $ ffmpeg -decoders
30 | $ ffmpeg -encoders
31 | """
32 | 
33 | 
34 | class CodecManager:
35 |     def __init__(self):
36 |         self.canonical_names: Dict[str, Set[str]] = defaultdict(set)
37 |         self.decodecs: Dict[str, av.Codec] = {}
38 |         self.encodecs: Dict[str, av.Codec] = {}
39 | 
40 |         for codec in codecs_available:
41 |             try:
42 |                 decoder_codec = av.Codec(codec)
43 |                 if decoder_codec.type != "audio":
44 |                     continue
45 |                 if decoder_codec.audio_formats is not None:
46 |                     canonical_name = decoder_codec.canonical_name
47 |                     codec_name = decoder_codec.name
48 |                     self.canonical_names[canonical_name].add(codec_name)
49 |                     if codec_name not in self.decodecs:
50 |                         self.decodecs[codec_name] = decoder_codec
51 | 
52 |                 encoder_codec = av.Codec(codec, "w")
53 |                 if encoder_codec.audio_formats is not None:
54 |                     canonical_name = encoder_codec.canonical_name
55 |                     codec_name = encoder_codec.name
56 |                     self.canonical_names[canonical_name].add(codec_name)
57 |                     if codec_name not in self.encodecs:
58 |                         self.encodecs[codec_name] = encoder_codec
59 |             except UnknownCodecError:
60 |                 pass
61 | 
62 |         self.Decodec = CodecEnum("Decodec", self.decodecs)
63 |         self.Encodec = CodecEnum("Encodec", self.encodecs)
64 | 
65 |         template = get_template("codec")
66 |         for name, codec in self.decodecs.items():
67 |             getattr(self.Decodec, name).__doc__ = template.render(codec=codec, format_dtypes=format_dtypes, np=np)
68 |         for name, codec in self.encodecs.items():
69 |             getattr(self.Encodec, name).__doc__ = template.render(codec=codec, format_dtypes=format_dtypes, np=np)
70 | 
71 | 
72 | _codec_manager = CodecManager()
73 | canonical_names = _codec_manager.canonical_names
74 | decodecs = _codec_manager.decodecs
75 | encodecs = _codec_manager.encodecs
76 | Decodec = _codec_manager.Decodec
77 | Encodec = _codec_manager.Encodec
78 | 


--------------------------------------------------------------------------------
/audiolab/av/filter.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Any, Callable, Dict, List
16 | 
17 | from av import filter
18 | from av.option import OptionType
19 | 
20 | from audiolab.av.utils import get_template
21 | 
22 | """
23 | $ ffmpeg -filters
24 | """
25 | 
26 | 
27 | class FilterManager:
28 |     def __init__(self):
29 |         self._filter_data: Dict[str, Dict[str, Any]] = {}
30 |         self._initialized: bool = False
31 | 
32 |     def _generate_filter_data(self) -> None:
33 |         for name in filter.filters_available:
34 |             options = []
35 |             _filter = filter.Filter(name)
36 |             if _filter.options is not None:
37 |                 for opt in _filter.options:
38 |                     try:
39 |                         opt_type = opt.type
40 |                     except ValueError:
41 |                         opt_type = OptionType.STRING
42 |                     options.append(
43 |                         {
44 |                             "name": opt.name,
45 |                             "type": opt_type,
46 |                             "default": opt.default,
47 |                             "help": opt.help if opt.name != "temp" else "set temperature °C",
48 |                         }
49 |                     )
50 |             self._filter_data[name] = {"name": _filter.name, "description": _filter.description, "options": options}
51 | 
52 |     def _create_filter_function(self, name: str):
53 |         def filter_func(args=None, **kwargs):
54 |             return (name, None if args is None else str(args), {k: str(v) for k, v in kwargs.items()})
55 | 
56 |         filter_func.__name__ = name
57 |         return filter_func
58 | 
59 |     def _initialize_filters(self) -> None:
60 |         if self._initialized:
61 |             return
62 | 
63 |         self._generate_filter_data()
64 |         for name in filter.filters_available:
65 |             filter_func = self._create_filter_function(name)
66 |             data = self._filter_data[name]
67 |             filter_func.__doc__ = get_template("filter").render(
68 |                 name=data["name"], description=data["description"], options=data["options"]
69 |             )
70 |             globals()[name] = filter_func
71 | 
72 |         self._initialized = True
73 | 
74 |     def __getattr__(self, name: str) -> Callable:
75 |         self._initialize_filters()
76 |         return globals().get(name, None)
77 | 
78 |     @property
79 |     def filters(self) -> List[str]:
80 |         return filter.filters_available
81 | 
82 | 
83 | _filter_manager = FilterManager()
84 | filters = _filter_manager.filters
85 | 
86 | 
87 | def __getattr__(name: str) -> Callable:
88 |     return getattr(_filter_manager, name)
89 | 


--------------------------------------------------------------------------------
/audiolab/reader/backend/wave.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import wave
16 | from functools import cached_property
17 | from typing import Any, Optional
18 | 
19 | import numpy as np
20 | from av.codec import Codec
21 | 
22 | from audiolab.av.typing import Seconds
23 | from audiolab.reader.backend.backend import Backend
24 | 
25 | _bits_to_codec = {8: "pcm_u8le", 16: "pcm_s16le", 24: "pcm_s32le", 32: "pcm_s32le"}
26 | _bits_to_dtype = {8: np.uint8, 16: np.int16, 24: np.int32, 32: np.int32}
27 | 
28 | 
29 | class Wave(Backend):
30 |     def __init__(self, file: Any, frame_size: Optional[int] = None, forced_decoding: bool = False):
31 |         super().__init__(file, frame_size, forced_decoding)
32 |         self.wave = wave.open(file)
33 | 
34 |     @cached_property
35 |     def bits_per_sample(self) -> int:
36 |         return self.wave.getsampwidth() * 8
37 | 
38 |     @cached_property
39 |     def codec(self) -> str:
40 |         return Codec(_bits_to_codec[self.bits_per_sample]).long_name
41 | 
42 |     @cached_property
43 |     def duration(self) -> Optional[Seconds]:
44 |         if self.num_frames is None:
45 |             return None
46 |         return Seconds(self.num_frames / self.sample_rate)
47 | 
48 |     @cached_property
49 |     def dtype(self) -> np.dtype:
50 |         return _bits_to_dtype[self.bits_per_sample]
51 | 
52 |     @cached_property
53 |     def format(self) -> str:
54 |         return "WAV"
55 | 
56 |     @cached_property
57 |     def num_channels(self) -> int:
58 |         return self.wave.getnchannels()
59 | 
60 |     @cached_property
61 |     def num_frames(self) -> Optional[int]:
62 |         if self.forced_decoding:
63 |             num_frames = self.read(np.iinfo(np.int32).max).shape[0]
64 |             self.wave.rewind()
65 |         else:
66 |             num_frames = self.wave.getnframes()
67 |             if num_frames >= np.iinfo(np.int32).max:
68 |                 num_frames = None
69 |         return num_frames
70 | 
71 |     @cached_property
72 |     def sample_rate(self) -> int:
73 |         return self.wave.getframerate()
74 | 
75 |     @cached_property
76 |     def seekable(self) -> bool:
77 |         return True
78 | 
79 |     def frombuffer(self, buffer: bytes) -> np.ndarray:
80 |         if self.bits_per_sample == 24:
81 |             frames = np.frombuffer(buffer, np.uint8)
82 |             frames = (
83 |                 (frames[2::3].astype(np.int32) << 16)
84 |                 | (frames[1::3].astype(np.int32) << 8)
85 |                 | frames[0::3].astype(np.int32)
86 |             )
87 |             frames[frames > 0x7FFFFF] -= 0x1000000
88 |         else:
89 |             frames = np.frombuffer(buffer, self.dtype)
90 |         return frames.reshape(-1, self.num_channels).T
91 | 
92 |     def read(self, nframes: int) -> Optional[np.ndarray]:
93 |         buffer = self.wave.readframes(nframes)
94 |         return self.frombuffer(buffer) if len(buffer) > 0 else None
95 | 
96 |     def seek(self, offset: int):
97 |         self.wave.setpos(offset)
98 | 


--------------------------------------------------------------------------------
/audiolab/writer/backend/pyav.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Any, Optional, Tuple
16 | 
17 | import av
18 | import numpy as np
19 | from av.codec.codec import UnknownCodecError
20 | 
21 | from audiolab.av import from_ndarray
22 | from audiolab.av.format import dtype_formats
23 | from audiolab.av.frame import clip
24 | from audiolab.av.layout import standard_channel_layouts
25 | from audiolab.av.typing import ContainerFormat, Dtype
26 | from audiolab.writer.backend.backend import Backend
27 | 
28 | 
29 | class PyAV(Backend):
30 |     def __init__(self, file: Any, sample_rate: int, dtype: Optional[Dtype] = None, format: ContainerFormat = "WAV"):
31 |         super().__init__(file, sample_rate, dtype, format)
32 |         self.container = av.open(self.file, "w", format=self.format)
33 |         self.num_channels = None
34 |         self.stream = None
35 | 
36 |     def open(self):
37 |         kwargs = {"layout": standard_channel_layouts[self.num_channels][0]}
38 |         audio_codec, audio_format = self.guess_codec_format()
39 |         if audio_format is not None:
40 |             kwargs["format"] = audio_format
41 |         self.stream = self.container.add_stream(audio_codec, self.sample_rate, **kwargs)
42 | 
43 |     def guess_codec_format(self) -> Tuple[str, str]:
44 |         default_codec = self.container.default_audio_codec
45 |         if self.dtype is None:
46 |             return default_codec, None
47 |         else:
48 |             dtype_format = dtype_formats[self.dtype]
49 |             for audio_format in av.Codec(default_codec, "w").audio_formats:
50 |                 if audio_format.name.startswith(dtype_format):
51 |                     return default_codec, audio_format.name
52 | 
53 |             supported_codecs = self.container.supported_codecs
54 |             codecs = sorted(supported_codecs, key=lambda x: (not x.startswith("pcm_") or x.endswith("law"), x))
55 |             for codec in codecs:
56 |                 try:
57 |                     audio_formats = av.Codec(codec, "w").audio_formats
58 |                     if audio_formats is None:
59 |                         continue
60 |                     for audio_format in audio_formats:
61 |                         if audio_format.name.startswith(dtype_format):
62 |                             return codec, audio_format.name
63 |                 except UnknownCodecError:
64 |                     pass
65 | 
66 |     def write(self, frame: np.ndarray):
67 |         if self.dtype is None:
68 |             self.dtype = frame.dtype
69 |         frame = np.atleast_2d(clip(frame, self.dtype))
70 |         if self.num_channels is None:
71 |             self.num_channels = frame.shape[0]
72 |         if self.stream is None:
73 |             self.open()
74 |         frame = from_ndarray(frame, self.stream.format.name, self.stream.layout, self.stream.rate)
75 |         for packet in self.stream.encode(frame):
76 |             self.container.mux(packet)
77 | 
78 |     def close(self):
79 |         if not self.is_closed:
80 |             try:
81 |                 for packet in self.stream.encode():
82 |                     self.container.mux(packet)
83 |             except ValueError:
84 |                 pass
85 |             self.container.close()
86 |             super().close()
87 | 


--------------------------------------------------------------------------------
/audiolab/av/format.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from functools import lru_cache
 16 | from typing import Dict, Iterator, Literal, Optional, Set
 17 | 
 18 | import av
 19 | import numpy as np
 20 | from av import Codec, codecs_available
 21 | from av.codec.codec import UnknownCodecError
 22 | 
 23 | from audiolab.av import typing
 24 | from audiolab.av.utils import get_template
 25 | 
 26 | """
 27 | $ ffmpeg -sample_fmts
 28 | """
 29 | format_dtypes = {
 30 |     "dbl": "f8",
 31 |     "dblp": "f8",
 32 |     "flt": "f4",
 33 |     "fltp": "f4",
 34 |     "s16": "i2",
 35 |     "s16p": "i2",
 36 |     "s32": "i4",
 37 |     "s32p": "i4",
 38 |     "s64": "i8",
 39 |     "s64p": "i8",
 40 |     "u8": "u1",
 41 |     "u8p": "u1",
 42 | }
 43 | dtype_formats = {np.dtype(dtype): name for name, dtype in format_dtypes.items() if not name.endswith("p")}
 44 | audio_formats: Dict[str, av.AudioFormat] = {name: av.AudioFormat(name) for name in format_dtypes.keys()}
 45 | AudioFormat = typing.AudioFormatEnum("AudioFormat", audio_formats)
 46 | 
 47 | 
 48 | @lru_cache(maxsize=None)
 49 | def get_codecs(format: typing.AudioFormat, mode: Literal["r", "w"] = "r") -> Set[str]:
 50 |     codecs = set()
 51 |     if isinstance(format, av.AudioFormat):
 52 |         format = format.name
 53 |     for codec in codecs_available:
 54 |         try:
 55 |             codec = Codec(codec, mode)
 56 |             formats = codec.audio_formats
 57 |             if codec.type != "audio" or formats is None:
 58 |                 continue
 59 |             if format in set(format.name for format in formats):
 60 |                 codecs.add(codec.name)
 61 |         except UnknownCodecError:
 62 |             pass
 63 |     return codecs
 64 | 
 65 | 
 66 | @lru_cache(maxsize=None)
 67 | def get_dtype(format: typing.AudioFormat) -> np.dtype:
 68 |     if isinstance(format, av.AudioFormat):
 69 |         format = format.name
 70 |     return np.dtype(format_dtypes[format])
 71 | 
 72 | 
 73 | def get_format(
 74 |     dtype: typing.Dtype,
 75 |     is_planar: Optional[bool] = None,
 76 |     available_formats: Optional[Iterator[typing.AudioFormat]] = None,
 77 | ) -> av.AudioFormat:
 78 |     if isinstance(dtype, str) and dtype not in format_dtypes or isinstance(dtype, type):
 79 |         dtype = np.dtype(dtype)
 80 |     if isinstance(dtype, np.dtype):
 81 |         dtype = dtype_formats[dtype]
 82 |         if is_planar is not None:
 83 |             dtype = dtype + ("p" if is_planar else "")
 84 |         else:
 85 |             assert available_formats is not None
 86 |             available_formats = [
 87 |                 format.name if isinstance(format, typing.AudioFormat) else format for format in available_formats
 88 |             ]
 89 |             if dtype not in available_formats:
 90 |                 dtype = dtype.rstrip("p") if dtype.endswith("p") else dtype + "p"
 91 |     return AudioFormat[dtype].value
 92 | 
 93 | 
 94 | template = get_template("format")
 95 | for name, format in audio_formats.items():
 96 |     decodecs = get_codecs(name, "r")
 97 |     encodecs = get_codecs(name, "w")
 98 |     dtype = get_dtype(name)
 99 |     getattr(AudioFormat, name).__doc__ = template.render(
100 |         format=format, decodecs=decodecs, encodecs=encodecs, dtype=dtype
101 |     )
102 | 


--------------------------------------------------------------------------------
/audiolab/reader/backend/soundfile.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from functools import cached_property
 16 | from typing import Any, Optional
 17 | 
 18 | import numpy as np
 19 | import soundfile as sf
 20 | 
 21 | from audiolab.av.frame import clip
 22 | from audiolab.av.typing import Dtype, Seconds
 23 | from audiolab.reader.backend.backend import Backend
 24 | 
 25 | _subtype_to_bits = {
 26 |     "PCM_S8": 8,
 27 |     "PCM_U8": 8,
 28 |     "PCM_16": 16,
 29 |     "PCM_24": 24,
 30 |     "PCM_32": 32,
 31 |     "FLOAT": 32,
 32 |     "DOUBLE": 64,
 33 |     "ULAW": 8,
 34 |     "ALAW": 8,
 35 |     "DWVW_12": 12,
 36 |     "DWVW_16": 16,
 37 |     "DWVW_24": 24,
 38 |     "DPCM_8": 8,
 39 |     "DPCM_16": 16,
 40 |     "ALAC_16": 16,
 41 |     "ALAC_20": 20,
 42 |     "ALAC_24": 24,
 43 |     "ALAC_32": 32,
 44 | }
 45 | 
 46 | _subtype_to_dtype = {
 47 |     "PCM_S8": np.int8,
 48 |     "PCM_U8": np.uint8,
 49 |     "PCM_16": np.int16,
 50 |     "PCM_24": np.int32,
 51 |     "PCM_32": np.int32,
 52 |     "FLOAT": np.float32,
 53 |     "DOUBLE": np.float64,
 54 | }
 55 | 
 56 | _supported_dtypes = (np.int16, np.int32, np.float32, np.float64)
 57 | 
 58 | 
 59 | class SoundFile(Backend):
 60 |     def __init__(self, file: Any, frame_size: Optional[int] = None, forced_decoding: bool = False):
 61 |         super().__init__(file, frame_size, forced_decoding)
 62 |         self.sf = sf.SoundFile(file)
 63 | 
 64 |     @cached_property
 65 |     def bits_per_sample(self) -> Optional[int]:
 66 |         return _subtype_to_bits.get(self.sf.subtype, None)
 67 | 
 68 |     @cached_property
 69 |     def codec(self) -> str:
 70 |         return sf.available_subtypes()[self.sf.subtype]
 71 | 
 72 |     @cached_property
 73 |     def duration(self) -> Optional[Seconds]:
 74 |         if self.num_frames is None:
 75 |             return None
 76 |         return Seconds(self.num_frames / self.sample_rate)
 77 | 
 78 |     @cached_property
 79 |     def dtype(self) -> np.dtype:
 80 |         return _subtype_to_dtype.get(self.sf.subtype, np.float64)
 81 | 
 82 |     @cached_property
 83 |     def format(self) -> str:
 84 |         return self.sf.format
 85 | 
 86 |     @cached_property
 87 |     def num_channels(self) -> int:
 88 |         return self.sf.channels
 89 | 
 90 |     @cached_property
 91 |     def num_frames(self) -> Optional[int]:
 92 |         if self.forced_decoding:
 93 |             num_frames = 0
 94 |             pos = self.sf.tell()
 95 |             try:
 96 |                 frames = self.sf.read()
 97 |                 num_frames = frames.shape[0]
 98 |             except sf.LibsndfileError:
 99 |                 self.sf = sf.SoundFile(self.file)
100 |             self.seek(pos)
101 |         else:
102 |             num_frames = self.sf.frames
103 |             if num_frames >= np.iinfo(np.int32).max:
104 |                 num_frames = None
105 |         return num_frames
106 | 
107 |     @cached_property
108 |     def metadata(self) -> dict:
109 |         return self.sf.copy_metadata()
110 | 
111 |     @cached_property
112 |     def sample_rate(self) -> int:
113 |         return self.sf.samplerate
114 | 
115 |     @cached_property
116 |     def seekable(self) -> bool:
117 |         return self.sf.seekable()
118 | 
119 |     def read(self, nframes: int, dtype: Optional[Dtype] = None) -> Optional[np.ndarray]:
120 |         if dtype is None:
121 |             dtype = self.dtype
122 |         frames = self.sf.read(nframes, dtype=dtype if dtype in _supported_dtypes else np.float64)
123 |         return np.atleast_2d(clip(frames, dtype).T) if frames.shape[0] > 0 else None
124 | 
125 |     def seek(self, offset: int):
126 |         self.sf.seek(offset)
127 | 


--------------------------------------------------------------------------------
/tests/reader_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from io import BytesIO
16 | 
17 | import numpy as np
18 | import pytest
19 | 
20 | from audiolab.av.filter import aresample, atempo
21 | from audiolab.av.utils import generate_ndarray
22 | from audiolab.reader import Reader, aformat, load_audio
23 | from audiolab.writer import save_audio
24 | 
25 | 
26 | class TestReader:
27 |     @pytest.fixture
28 |     def nb_channels(self):
29 |         return 1
30 | 
31 |     @pytest.fixture
32 |     def rate(self):
33 |         return 16000
34 | 
35 |     @pytest.fixture
36 |     def duration(self):
37 |         return 0.5
38 | 
39 |     def test_reader(self, nb_channels, rate, duration):
40 |         frame_size = 1024
41 |         for always_2d in (True, False):
42 |             bytes_io = BytesIO()
43 |             ndarray = generate_ndarray(nb_channels, int(rate * duration), np.int16, always_2d)
44 |             save_audio(bytes_io, ndarray, rate=rate)
45 | 
46 |             reader = Reader(bytes_io, frame_size=frame_size, always_2d=always_2d)
47 |             assert reader.channels == nb_channels
48 |             assert "signed 16" in reader.codec.lower()
49 |             assert reader.duration == duration
50 |             assert reader.precision == 16
51 |             assert reader.rate == rate
52 | 
53 |     def test_load_audio(self, nb_channels, rate, duration):
54 |         for always_2d in (True, False):
55 |             for offset in (0.0, 0.1, 0.2):
56 |                 for _duration in (None, 0.1, 0.2, 0.3):
57 |                     bytes_io = BytesIO()
58 |                     ndarray = generate_ndarray(nb_channels, int(rate * duration), np.int16, always_2d)
59 |                     save_audio(bytes_io, ndarray, rate=rate)
60 | 
61 |                     if _duration is None:
62 |                         _duration = duration - offset
63 |                     _duration = min(_duration, duration - offset)
64 | 
65 |                     audio, rate = load_audio(bytes_io, offset=offset, duration=_duration, always_2d=always_2d)
66 |                     assert audio.dtype == np.int16
67 |                     if always_2d:
68 |                         assert audio.shape == (nb_channels, int(rate * _duration))
69 |                         ndarray = ndarray[:, int(offset * rate) : int((offset + _duration) * rate)]
70 |                     else:
71 |                         assert audio.ndim == 1
72 |                         assert audio.shape[0] == int(rate * _duration)
73 |                         ndarray = ndarray[int(offset * rate) : int((offset + _duration) * rate)]
74 |                     assert rate == rate
75 |                     assert np.allclose(ndarray, audio)
76 | 
77 |     def test_load_audio_with_filters(self, nb_channels, rate, duration):
78 |         for ratio in (0.9, 1.1):
79 |             bytes_io = BytesIO()
80 |             ndarray = generate_ndarray(nb_channels, int(rate * duration), np.int16)
81 |             save_audio(bytes_io, ndarray, rate=rate)
82 | 
83 |             audio, rate = load_audio(bytes_io, filters=[atempo(ratio), aresample(8000)])
84 |             assert audio.dtype == np.int16
85 |             assert audio.shape[0] == nb_channels
86 |             assert rate == 8000
87 |             assert np.isclose(audio.shape[1] / rate, duration / ratio, atol=0.05)
88 | 
89 |         bytes_io = BytesIO()
90 |         ndarray = generate_ndarray(2, int(rate * duration), np.int16)
91 |         save_audio(bytes_io, ndarray, rate=rate)
92 | 
93 |         audio, rate = load_audio(bytes_io, filters=[aformat(dtype=np.float32, rate=8000, to_mono=True)])
94 |         assert audio.dtype == np.float32
95 |         assert audio.shape == (1, int(rate * duration))
96 |         assert rate == 8000
97 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # PyPI configuration file
171 | .pypirc
172 | 


--------------------------------------------------------------------------------
/audiolab/av/graph.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import errno
 16 | from fractions import Fraction
 17 | from typing import List, Optional
 18 | 
 19 | import av
 20 | import numpy as np
 21 | from av import filter
 22 | 
 23 | from audiolab.av.format import get_format
 24 | from audiolab.av.frame import from_ndarray, to_ndarray
 25 | from audiolab.av.layout import standard_channel_layouts
 26 | from audiolab.av.typing import UINT32_MAX, AudioFormat, AudioFrame, AudioLayout, Dtype, Filter
 27 | 
 28 | 
 29 | class Graph(filter.Graph):
 30 |     def __init__(
 31 |         self,
 32 |         template: Optional[av.AudioStream] = None,
 33 |         rate: Optional[int] = None,
 34 |         dtype: Optional[Dtype] = None,
 35 |         is_planar: bool = False,
 36 |         format: Optional[AudioFormat] = None,
 37 |         layout: Optional[AudioLayout] = None,
 38 |         channels: Optional[int] = None,
 39 |         time_base: Optional[Fraction] = None,
 40 |         filters: Optional[List[Filter]] = None,
 41 |         frame_size: Optional[int] = None,
 42 |         return_ndarray: bool = True,
 43 |     ):
 44 |         if template is not None:
 45 |             rate = template.sample_rate if rate is None else rate
 46 |             format = template.format if format is None else format
 47 |             layout = template.layout.name if layout is None else layout
 48 |             channels = template.channels if channels is None else channels
 49 |             time_base = template.time_base if time_base is None else time_base
 50 |         format = get_format(dtype, is_planar) if format is None else format
 51 |         format = format.name if isinstance(format, av.AudioFormat) else format
 52 |         time_base = Fraction(1, rate) if time_base is None else time_base
 53 |         if layout is None:
 54 |             layout = standard_channel_layouts[channels][0]
 55 |         abuffer = super().add_abuffer(None, rate, format, layout, channels, time_base=time_base)
 56 | 
 57 |         nodes = [abuffer]
 58 |         if filters is not None:
 59 |             for _filter in filters:
 60 |                 name, args, kwargs = (
 61 |                     (_filter, None, {})
 62 |                     if isinstance(_filter, str)
 63 |                     else ((*_filter, {}) if len(_filter) == 2 else _filter)
 64 |                 )
 65 |                 nodes.append(super().add(name, args, **kwargs))
 66 |         nodes.append(super().add("abuffersink"))
 67 |         super().link_nodes(*nodes).configure()
 68 | 
 69 |         self.frame_size = None
 70 |         if frame_size is not None and frame_size > 0:
 71 |             self.frame_size = min(frame_size, UINT32_MAX)
 72 |             super().set_audio_frame_size(self.frame_size)
 73 | 
 74 |         self.rate = rate
 75 |         self.format = format
 76 |         self.layout = layout
 77 |         self.return_ndarray = return_ndarray
 78 | 
 79 |     def push(self, frame: AudioFrame):
 80 |         if isinstance(frame, tuple):
 81 |             frame, rate = frame
 82 |             assert rate == self.rate
 83 |         if isinstance(frame, np.ndarray):
 84 |             frame = from_ndarray(frame, self.format, self.layout, self.rate)
 85 |         super().push(frame)
 86 | 
 87 |     def pull(self, partial: bool = False, return_ndarray: Optional[bool] = None) -> AudioFrame:
 88 |         if partial:
 89 |             super().push(None)
 90 |         while True:
 91 |             try:
 92 |                 frame = super().pull()
 93 |                 if return_ndarray is None:
 94 |                     return_ndarray = self.return_ndarray
 95 |                 yield (to_ndarray(frame), frame.rate) if return_ndarray else frame
 96 |             except av.EOFError:
 97 |                 break
 98 |             except av.FFmpegError as e:
 99 |                 if e.errno != errno.EAGAIN:
100 |                     raise
101 |                 break
102 | 


--------------------------------------------------------------------------------
/tests/frame_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numpy as np
16 | from av.audio.frame import format_dtypes
17 | from numpy.random import randint
18 | 
19 | from audiolab.av.format import AudioFormat, get_dtype
20 | from audiolab.av.frame import clip, from_ndarray, split_audio_frame, to_ndarray
21 | from audiolab.av.layout import AudioLayout
22 | from audiolab.av.utils import generate_ndarray
23 | 
24 | 
25 | class TestFrame:
26 |     def test_clip(self):
27 |         dtypes = [
28 |             (np.uint8, np.int16),
29 |             (np.int16, np.int32),
30 |             (np.int16, np.float32),
31 |             (np.int32, np.float64),
32 |             (np.float32, np.float64),
33 |         ]
34 |         for src_dtype, dst_dtype in dtypes:
35 |             original_ndarray = generate_ndarray(1, 7, src_dtype)
36 |             ndarray = clip(original_ndarray, dst_dtype)
37 |             reconverted_ndarray = clip(ndarray, src_dtype)
38 | 
39 |             min_value, max_value = ndarray.min(), ndarray.max()
40 |             if np.dtype(dst_dtype).kind in ("i", "u"):
41 |                 assert min_value >= np.iinfo(dst_dtype).min and max_value <= np.iinfo(dst_dtype).max
42 |             else:
43 |                 assert min_value >= -1.0 and max_value <= 1.0
44 | 
45 |             if np.dtype(src_dtype).kind in ("i", "u"):
46 |                 assert np.max(np.abs(original_ndarray - reconverted_ndarray)) <= 1
47 |             else:
48 |                 assert np.allclose(original_ndarray, reconverted_ndarray, rtol=1e-5, atol=1e-8)
49 | 
50 |     def test_from_to_ndarray(self):
51 |         for layout_name in ("mono", "stereo", "2.1", "3.0"):
52 |             layout = AudioLayout[layout_name].value
53 |             nb_channels = layout.nb_channels
54 |             for format_name in format_dtypes.keys():
55 |                 format = AudioFormat[format_name].value
56 |                 dtype = get_dtype(format)
57 |                 for rate in (8000, 16000, 24000, 48000):
58 |                     ndarray = generate_ndarray(nb_channels, rate, dtype)
59 |                     frame = from_ndarray(ndarray, format, layout, rate)
60 |                     assert frame.format.name == format.name
61 |                     assert frame.layout.name == layout.name
62 |                     assert frame.rate == rate
63 |                     assert np.allclose(to_ndarray(frame), ndarray)
64 | 
65 |     def test_split_audio_frame(self):
66 |         pts = 0
67 |         for layout_name in ("mono", "stereo", "2.1", "3.0"):
68 |             layout = AudioLayout[layout_name].value
69 |             nb_channels = layout.nb_channels
70 |             for format_name in format_dtypes.keys():
71 |                 format = AudioFormat[format_name].value
72 |                 dtype = get_dtype(format)
73 |                 for rate in (8000, 16000, 24000, 48000):
74 |                     frames = int(randint(1, 10) * rate)
75 |                     offset = min(int(randint(0, 10) * rate), frames)
76 |                     ndarray = generate_ndarray(nb_channels, frames, dtype)
77 |                     frame = from_ndarray(ndarray, format, layout, rate, pts=pts)
78 |                     left, right = split_audio_frame(frame, offset)
79 |                     if offset > 0:
80 |                         assert left.rate == rate
81 |                         assert left.format.name == format.name
82 |                         assert left.layout.name == layout.name
83 |                         assert left.pts == pts
84 |                         assert left.samples == offset
85 |                     else:
86 |                         assert left is None
87 |                     if offset < frames:
88 |                         assert right.rate == rate
89 |                         assert right.format.name == format.name
90 |                         assert right.layout.name == layout.name
91 |                         assert right.pts == pts + offset
92 |                         assert right.samples == frames - offset
93 |                     else:
94 |                         assert right is None
95 | 


--------------------------------------------------------------------------------
/audiolab/av/frame.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from fractions import Fraction
 16 | from typing import Optional, Tuple
 17 | 
 18 | import av
 19 | import numpy as np
 20 | 
 21 | from audiolab.av.format import get_dtype
 22 | from audiolab.av.typing import AudioFormat, AudioLayout, Dtype
 23 | from audiolab.av.utils import get_logger
 24 | 
 25 | logger = get_logger(__name__)
 26 | 
 27 | 
 28 | def clip(ndarray: np.ndarray, dtype: Dtype) -> np.ndarray:
 29 |     if any(dim == 0 for dim in ndarray.shape):
 30 |         return ndarray
 31 |     src_dtype = ndarray.dtype
 32 |     dst_dtype = np.dtype(dtype)
 33 |     if src_dtype.kind != "f" and src_dtype == dst_dtype:
 34 |         return ndarray
 35 | 
 36 |     if src_dtype.kind == "f":
 37 |         min_value, max_value = ndarray.min(), ndarray.max()
 38 |         if min_value < -1.0 or max_value > 1.0:
 39 |             logger.warning("Cliping %s ndarray from: %g ~ %g to -1.0 ~ 1.0", src_dtype, min_value, max_value)
 40 |             ndarray = np.clip(ndarray, -1.0, 1.0)
 41 |     else:
 42 |         ndarray = ndarray.astype(np.float64)
 43 |         if src_dtype.kind == "u":
 44 |             ndarray = ndarray / np.iinfo(src_dtype).max * 2 - 1
 45 |         elif src_dtype.kind == "i":
 46 |             ndarray = ndarray / np.iinfo(src_dtype).max
 47 | 
 48 |     if dst_dtype.kind in ("u", "i"):
 49 |         max_value = np.float64(np.iinfo(dst_dtype).max)
 50 |         if dst_dtype.kind == "u":
 51 |             ndarray = (ndarray + 1) * 0.5 * max_value
 52 |         else:
 53 |             ndarray = ndarray * max_value
 54 |     return np.asarray(ndarray, dtype=dst_dtype)
 55 | 
 56 | 
 57 | def from_ndarray(
 58 |     ndarray: np.ndarray,
 59 |     format: AudioFormat,
 60 |     layout: AudioLayout,
 61 |     rate: int,
 62 |     pts: Optional[int] = None,
 63 |     time_base: Optional[Fraction] = None,
 64 | ) -> av.AudioFrame:
 65 |     ndarray = np.atleast_2d(ndarray)
 66 |     if isinstance(format, str):
 67 |         format = av.AudioFormat(format)
 68 |     if format.is_packed:
 69 |         # [num_channels, num_samples] => [1, num_channels * num_samples]
 70 |         ndarray = ndarray.T.reshape(1, -1)
 71 |     if isinstance(layout, str):
 72 |         layout = av.AudioLayout(layout)
 73 | 
 74 |     dtype = get_dtype(format)
 75 |     ndarray = clip(ndarray, dtype)
 76 |     ndarray = np.ascontiguousarray(ndarray)
 77 |     frame = av.AudioFrame.from_ndarray(ndarray, format.name, layout)
 78 |     frame.rate = rate
 79 |     if pts is not None:
 80 |         frame.pts = pts
 81 |     if time_base is not None:
 82 |         frame.time_base = time_base
 83 |     return frame
 84 | 
 85 | 
 86 | def to_ndarray(frame: av.AudioFrame) -> np.ndarray:
 87 |     # packed: [num_channels, num_samples]
 88 |     # planar: [1, num_channels * num_samples]
 89 |     ndarray = frame.to_ndarray()
 90 |     if frame.format.is_packed:
 91 |         ndarray = ndarray.reshape(-1, frame.layout.nb_channels).T
 92 |     return ndarray
 93 | 
 94 | 
 95 | def split_audio_frame(frame: av.AudioFrame, offset: int) -> Tuple[av.AudioFrame, av.AudioFrame]:
 96 |     if offset <= 0:
 97 |         return None, frame
 98 |     # number of samples per channel
 99 |     if offset >= frame.samples:
100 |         return frame, None
101 | 
102 |     ndarray = to_ndarray(frame)
103 |     left, right = ndarray[:, :offset], ndarray[:, offset:]
104 |     if frame.format.is_packed:
105 |         left, right = left.T.reshape(1, -1), right.T.reshape(1, -1)
106 |     left = av.AudioFrame.from_ndarray(left, frame.format.name, frame.layout)
107 |     right = av.AudioFrame.from_ndarray(right, frame.format.name, frame.layout)
108 |     left.rate, right.rate = frame.rate, frame.rate
109 |     if frame.pts is not None:
110 |         left.pts, right.pts = frame.pts, frame.pts + offset
111 |     if frame.time_base is not None:
112 |         left.time_base, right.time_base = frame.time_base, frame.time_base
113 |     return left, right
114 | 
115 | 
116 | def pad(frame: np.ndarray, frame_size: int, fill_value: float = 0) -> np.ndarray:
117 |     pad_needed = frame_size - frame.shape[0 if frame.ndim == 1 else 1]
118 |     if pad_needed <= 0:
119 |         return frame
120 |     if frame.ndim == 1:
121 |         return np.pad(frame, (0, pad_needed), constant_values=fill_value)
122 |     else:
123 |         return np.pad(frame, ((0, 0), (0, pad_needed)), constant_values=fill_value)
124 | 


--------------------------------------------------------------------------------
/audiolab/reader/stream_reader.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from io import BytesIO
 16 | from typing import Iterator, List, Optional
 17 | 
 18 | import av
 19 | from av import AudioCodecContext
 20 | 
 21 | from audiolab.av import aformat
 22 | from audiolab.av.graph import Graph
 23 | from audiolab.av.typing import AudioFormat, AudioFrame, Dtype, Filter
 24 | 
 25 | 
 26 | class StreamReader:
 27 |     def __init__(
 28 |         self,
 29 |         filters: Optional[List[Filter]] = None,
 30 |         dtype: Optional[Dtype] = None,
 31 |         is_planar: bool = False,
 32 |         format: Optional[AudioFormat] = None,
 33 |         rate: Optional[int] = None,
 34 |         to_mono: bool = False,
 35 |         frame_size: Optional[int] = 1024,
 36 |     ):
 37 |         """
 38 |         Create a StreamReader object.
 39 | 
 40 |         Args:
 41 |             filters: The filters to apply to the audio stream.
 42 |             dtype: The data type of the output audio frames.
 43 |             is_planar: Whether the output audio frames are planar.
 44 |             format: The format of the output audio frames.
 45 |             rate: The sample rate of the output audio frames.
 46 |             to_mono: Whether to convert the output audio frames to mono.
 47 |             frame_size: The frame size of the audio frames.
 48 |         """
 49 |         self._codec_context = None
 50 |         self._graph = None
 51 |         self.bytes_io = BytesIO()
 52 |         self.bytes_per_decode_attempt = 0
 53 |         if not all([dtype is None, format is None, rate is None, to_mono is None]):
 54 |             filters = filters or []
 55 |             filters.append(aformat(dtype, is_planar, format, rate, to_mono))
 56 |         self.filters = filters
 57 |         self.frame_size = frame_size
 58 |         self.offset = None
 59 |         self.packet = None
 60 | 
 61 |     @property
 62 |     def codec_context(self) -> Optional[AudioCodecContext]:
 63 |         if self._codec_context is None:
 64 |             if self.packet is None:
 65 |                 return None
 66 |             self._codec_context = self.packet.stream.codec_context
 67 |         return self._codec_context
 68 | 
 69 |     @property
 70 |     def graph(self) -> Optional[Graph]:
 71 |         if self._graph is None:
 72 |             if self.packet is None:
 73 |                 return None
 74 |             self._graph = Graph(self.packet.stream, filters=self.filters, frame_size=self.frame_size)
 75 |         return self._graph
 76 | 
 77 |     def push(self, frame: bytes):
 78 |         self.bytes_io.write(frame)
 79 |         self.bytes_per_decode_attempt += len(frame)
 80 | 
 81 |     def pull(self, partial: bool = False) -> Iterator[AudioFrame]:
 82 |         if partial or self.bytes_per_decode_attempt * 2 >= self.frame_size:
 83 |             self.bytes_per_decode_attempt = 0
 84 |             try:
 85 |                 self.bytes_io.seek(0)
 86 |                 container = av.open(self.bytes_io, metadata_encoding="latin1")
 87 |                 for packet in container.demux():
 88 |                     self.packet = packet
 89 |                     if self.packet.pts is None and not partial:
 90 |                         continue
 91 |                     # o: current frame
 92 |                     # pts: self.offset, frame.pts, packet.pts
 93 |                     # +---+---+---+---+---+
 94 |                     # | x | x | x | o |   |
 95 |                     # +---+---+---+---+---+
 96 |                     #             ↑
 97 |                     #             pts
 98 |                     if self.offset is not None and (self.packet.pts is None or self.offset > self.packet.pts):
 99 |                         continue
100 |                     for frame in self.codec_context.decode(packet):
101 |                         self.offset = frame.pts + int(frame.samples / packet.stream.rate / packet.stream.time_base)
102 |                         self.graph.push(frame)
103 |                         yield from self.graph.pull()
104 |                     yield from self.graph.pull(partial=partial)
105 |             except (av.EOFError, av.InvalidDataError, av.OSError, av.PermissionError):
106 |                 pass
107 | 
108 |     def reset(self):
109 |         self._codec_context = None
110 |         self._graph = None
111 |         self.bytes_io = BytesIO()
112 |         self.bytes_per_decode_attempt = 0
113 |         self.offset = None
114 |         self.packet = None
115 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # audiolab
  2 | 
  3 | [![PyPI](https://img.shields.io/pypi/v/audiolab)](https://pypi.org/project/audiolab/)
  4 | [![License](https://img.shields.io/github/license/pengzhendong/audiolab)](LICENSE)
  5 | 
  6 | A Python library for audio processing built on top of [soundfile](https://python-soundfile.readthedocs.io), and [PyAV](https://github.com/PyAV-Org/PyAV) (bindings for FFmpeg). audiolab provides a simple and efficient interface for loading, processing, and saving audio files.
  7 | 
  8 | ## Features
  9 | 
 10 | - Load audio from multiple sources: local paths, HTTP URLs, bytes, and BytesIO streams
 11 | - Load audio files in various formats (WAV, MP3, FLAC, AAC, etc.)
 12 | - Save audio files in different container formats
 13 | - Support for audio streaming and real-time processing
 14 | - Command-line interface for audio file inspection
 15 | - Support for audio transformations and filtering
 16 | 
 17 | ## Installation
 18 | 
 19 | ```bash
 20 | pip install audiolab
 21 | ```
 22 | 
 23 | ## Quick Start
 24 | 
 25 | ### Load an audio file
 26 | 
 27 | ```python
 28 | from audiolab import load_audio
 29 | 
 30 | # Load audio from 7 to 30 seconds (duration: 23s) and convert to 16kHz mono
 31 | audio, rate = load_audio("audio.wav", offset=7, duration=23, rate=16000, to_mono=True)
 32 | print(f"Sample rate: {rate} Hz")
 33 | print(f"Audio shape: {audio.shape}")
 34 | ```
 35 | 
 36 | ### Save an audio file
 37 | 
 38 | ```python
 39 | import numpy as np
 40 | from audiolab import save_audio
 41 | 
 42 | # Create a simple sine wave
 43 | rate = 44100
 44 | duration = 5
 45 | t = np.linspace(0, duration, rate * duration)
 46 | audio = np.sin(2 * np.pi * 440 * t)
 47 | 
 48 | # Save as WAV file
 49 | save_audio("tone.wav", audio, rate)
 50 | ```
 51 | 
 52 | ### Get audio file information
 53 | 
 54 | ```python
 55 | from audiolab import info
 56 | 
 57 | # Get information about an audio file
 58 | print(info("audio.wav"))
 59 | ```
 60 | 
 61 | ### Command-line usage
 62 | 
 63 | ```bash
 64 | # Get information about an audio file
 65 | audi audio.wav
 66 | # Get audio information from URL
 67 | audi https://modelscope.cn/datasets/pengzhendong/filesamples/resolve/master/audio/m4a/sample1.m4a
 68 | 
 69 | # Show only specific information
 70 | audi -r -c audio.wav  # Show sample rate and channels only
 71 | audi -d audio.wav     # Show duration in hours, minutes and seconds
 72 | audi -D audio.wav     # Show duration in seconds
 73 | ```
 74 | 
 75 | #### CLI Options
 76 | 
 77 | - `-f, --forced-decoding`          Forced decoding the audio file to get the duration
 78 | - `-t, --show-file-type`           Show detected file-type
 79 | - `-r, --show-sample-rate`         Show sample-rate
 80 | - `-c, --show-channels`            Show number of channels
 81 | - `-s, --show-samples`             Show number of samples (N/A if unavailable)
 82 | - `-d, --show-duration-hms`        Show duration in hours, minutes and seconds (N/A if unavailable)
 83 | - `-D, --show-duration-seconds`    Show duration in seconds (N/A if unavailable)
 84 | - `-b, --show-bits-per-sample`     Show number of bits per sample (N/A if not applicable)
 85 | - `-B, --show-bitrate`             Show the bitrate averaged over the whole file (N/A if unavailable)
 86 | - `-p, --show-precision`           Show estimated sample precision in bits
 87 | - `-e, --show-encoding`            Show the name of the audio encoding
 88 | - `-a, --show-comments`            Show file comments (annotations) if available
 89 | - `--help`                         Show this message and exit
 90 | 
 91 | If no specific options are selected, all information will be displayed by default.
 92 | 
 93 | ## API Overview
 94 | 
 95 | ### Core Functions
 96 | 
 97 | - `load_audio()`: Load audio from file
 98 | - `save_audio()`: Save audio to file
 99 | - `info()`: Get information about an audio file
100 | - `encode()`: Transform audio to PCM bytestring
101 | 
102 | ### Classes
103 | 
104 | - `Reader`: Read audio files with advanced options
105 | - `StreamReader`: Read audio streams
106 | - `Writer`: Write audio files with custom parameters
107 | 
108 | ## Advanced Usage
109 | 
110 | ### Apply filters during loading
111 | 
112 | ```python
113 | from audiolab import info, load_audio
114 | from audiolab.av.filter import aresample, asetrate, atempo
115 | 
116 | # Speed perturbation
117 | filters = [atempo(1.5)]
118 | audio, rate = load_audio("audio.wav", filters=filters)
119 | 
120 | # Pitch perturbation
121 | ratio = 1.5
122 | rate = info("audio.wav").rate
123 | filters = [asetrate(rate * ratio), atempo(1 / ratio), aresample(rate)]
124 | audio, rate = load_audio("audio.wav", filters=filters)
125 | ```
126 | 
127 | ### Streaming processing
128 | 
129 | ```python
130 | import numpy as np
131 | from audiolab.av.filter import atempo
132 | from audiolab import AudioPipe, Reader, save_audio
133 | 
134 | frames = []
135 | reader = Reader("audio.wav")
136 | pipe = AudioPipe(in_rate=reader.rate, filters=[atempo(2)])
137 | for frame, _ in reader:
138 |     pipe.push(frame)
139 |     for frame, _ in pipe.pull():
140 |         frames.append(frame)
141 | for frame, _ in pipe.pull(True):
142 |     frames.append(frame)
143 | save_audio("output.wav", np.concatenate(frames, axis=1), reader.rate)
144 | ```
145 | 
146 | ## License
147 | 
148 | [Apache License 2.0](LICENSE)
149 | 


--------------------------------------------------------------------------------
/audiolab/reader/reader.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from functools import cached_property, partial
 16 | from io import BytesIO
 17 | from typing import Any, Iterator, List, Optional
 18 | 
 19 | from audiolab.av import aformat, load_url
 20 | from audiolab.av.frame import pad
 21 | from audiolab.av.graph import Graph
 22 | from audiolab.av.typing import UINT32_MAX, AudioFrame, Dtype, Filter, Seconds
 23 | from audiolab.reader.backend import pyav, soundfile
 24 | from audiolab.reader.info import Info
 25 | 
 26 | 
 27 | class Reader(Info):
 28 |     def __init__(
 29 |         self,
 30 |         file: Any,
 31 |         offset: Seconds = 0.0,
 32 |         duration: Optional[Seconds] = None,
 33 |         filters: Optional[List[Filter]] = None,
 34 |         dtype: Optional[Dtype] = None,
 35 |         rate: Optional[int] = None,
 36 |         to_mono: bool = False,
 37 |         frame_size: Optional[int] = None,
 38 |         cache_url: bool = False,
 39 |         always_2d: bool = True,
 40 |         fill_value: Optional[float] = None,
 41 |         backends: Optional[List[str]] = None,
 42 |     ):
 43 |         """
 44 |         Create a Reader object.
 45 | 
 46 |         Args:
 47 |             file: The audio file, audio url, path to audio file, bytes of audio data, etc.
 48 |             offset: The offset of the audio to load.
 49 |             duration: The duration of the audio to load.
 50 |             filters: The filters to apply to the audio.
 51 |             dtype: The data type of the audio frames.
 52 |             rate: The sample rate of the audio frames.
 53 |             to_mono: Whether to convert the audio frames to mono.
 54 |             frame_size: The frame size of the audio frames.
 55 |             cache_url: Whether to cache the audio file.
 56 |             always_2d: Whether to return 2d ndarrays even if the audio frame is mono.
 57 |             fill_value: The fill value to pad the audio to the frame size.
 58 |             backends: The backends to use.
 59 |         """
 60 |         if isinstance(file, bytes):
 61 |             file = BytesIO(file)
 62 |         elif isinstance(file, str) and "://" in file:
 63 |             if cache_url:
 64 |                 file = load_url(file, cache=True)
 65 |             elif offset == 0 and duration is None:
 66 |                 file = load_url(file, cache=False)
 67 | 
 68 |         super().__init__(file, frame_size, backends=backends)
 69 |         if isinstance(self.backend, soundfile):
 70 |             self.backend.read = partial(self.backend.read, dtype=dtype)
 71 |         self.filters = [] if filters is None else filters
 72 |         if not self.is_passthrough(dtype, rate, to_mono):
 73 |             self.filters.append(aformat(dtype, rate=rate, to_mono=to_mono))
 74 | 
 75 |         self.graph = None
 76 |         if len(self.filters) > 0:
 77 |             if isinstance(self.backend, pyav):
 78 |                 self.backend.build_graph = partial(self.backend.build_graph, filters=self.filters)
 79 |             else:
 80 |                 self.graph = Graph(
 81 |                     rate=self.rate,
 82 |                     dtype=self.dtype,
 83 |                     is_planar=self.backend.is_planar,
 84 |                     channels=self.num_channels,
 85 |                     filters=self.filters,
 86 |                     frame_size=self.frame_size,
 87 |                 )
 88 |         self.offset = offset
 89 |         self._duration = duration
 90 |         self.always_2d = always_2d
 91 |         self.fill_value = fill_value
 92 | 
 93 |     @cached_property
 94 |     def frame_size(self) -> int:
 95 |         return self.backend.frame_size
 96 | 
 97 |     def __iter__(self) -> Iterator[AudioFrame]:
 98 |         for frame in self.backend.load_audio(self.offset, self._duration):
 99 |             if self.graph is None:
100 |                 rate = self.rate
101 |                 if isinstance(self.backend, pyav):
102 |                     frame, rate = frame
103 |                 if self.fill_value is not None:
104 |                     frame = pad(frame, self.frame_size, self.fill_value)
105 |                 yield frame if self.always_2d else frame.squeeze(), rate
106 |             else:
107 |                 self.graph.push(frame)
108 |                 yield from self.pull()
109 |         if self.graph is not None:
110 |             yield from self.pull(partial=True)
111 | 
112 |     def is_passthrough(self, dtype: Optional[Dtype] = None, rate: Optional[int] = None, to_mono: bool = False) -> bool:
113 |         passthrough = dtype is None or dtype == self.dtype
114 |         passthrough = passthrough and (rate is None or self.rate == rate)
115 |         passthrough = passthrough and not (to_mono and self.num_channels > 1)
116 |         passthrough = passthrough and self.frame_size >= UINT32_MAX
117 |         passthrough = passthrough and len(self.filters) == 0
118 |         return passthrough
119 | 
120 |     def pull(self, partial: bool = False) -> AudioFrame:
121 |         for frame in self.graph.pull(partial=partial):
122 |             frame, rate = frame
123 |             if self.fill_value is not None:
124 |                 frame = pad(frame, self.frame_size, self.fill_value)
125 |             yield frame if self.always_2d else frame.squeeze(), rate
126 | 


--------------------------------------------------------------------------------
/audiolab/cli.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from io import BytesIO
 16 | from typing import Any
 17 | 
 18 | import click
 19 | 
 20 | import audiolab
 21 | from audiolab.reader.backend import pyav
 22 | from audiolab.reader.info import Info
 23 | 
 24 | 
 25 | @click.command()
 26 | @click.argument("audio-files", nargs=-1)
 27 | @click.option(
 28 |     "-f",
 29 |     "--forced-decoding",
 30 |     is_flag=True,
 31 |     help="Forced decoding the audio file to get the duration",
 32 | )
 33 | @click.option("-t", "--show-file-type", is_flag=True, help="Show detected file-type")
 34 | @click.option("-r", "--show-sample-rate", is_flag=True, help="Show sample-rate")
 35 | @click.option("-c", "--show-channels", is_flag=True, help="Show number of channels")
 36 | @click.option(
 37 |     "-s",
 38 |     "--show-samples",
 39 |     is_flag=True,
 40 |     help="Show number of samples (N/A if unavailable)",
 41 | )
 42 | @click.option(
 43 |     "-d",
 44 |     "--show-duration-hms",
 45 |     is_flag=True,
 46 |     help="Show duration in hours, minutes and seconds (N/A if unavailable)",
 47 | )
 48 | @click.option(
 49 |     "-D",
 50 |     "--show-duration-seconds",
 51 |     is_flag=True,
 52 |     help="Show duration in seconds (N/A if unavailable)",
 53 | )
 54 | @click.option(
 55 |     "-b",
 56 |     "--show-bits-per-sample",
 57 |     is_flag=True,
 58 |     help="Show number of bits per sample (N/A if not applicable)",
 59 | )
 60 | @click.option(
 61 |     "-B",
 62 |     "--show-bitrate",
 63 |     is_flag=True,
 64 |     help="Show the bitrate averaged over the whole file (N/A if unavailable)",
 65 | )
 66 | @click.option(
 67 |     "-p",
 68 |     "--show-precision",
 69 |     is_flag=True,
 70 |     help="Show estimated sample precision in bits",
 71 | )
 72 | @click.option("-e", "--show-encoding", is_flag=True, help="Show the name of the audio encoding")
 73 | @click.option(
 74 |     "-a",
 75 |     "--show-comments",
 76 |     is_flag=True,
 77 |     help="Show file comments (annotations) if available",
 78 | )
 79 | def main(
 80 |     audio_files: Any,
 81 |     forced_decoding: bool = False,
 82 |     show_file_type: bool = False,
 83 |     show_sample_rate: bool = False,
 84 |     show_channels: bool = False,
 85 |     show_samples: bool = False,
 86 |     show_duration_hms: bool = False,
 87 |     show_duration_seconds: bool = False,
 88 |     show_bits_per_sample: bool = False,
 89 |     show_bitrate: bool = False,
 90 |     show_precision: bool = False,
 91 |     show_encoding: bool = False,
 92 |     show_comments: bool = False,
 93 | ):
 94 |     """
 95 |     Print the information of audio files.
 96 | 
 97 |     Args:
 98 | 
 99 |         AUDIO_FILES: The audio files, audio urls, paths to audio files, or stdin.
100 |     """
101 |     # If no files are provided, use stdin
102 |     if not audio_files:
103 |         # Create a file object for stdin
104 |         #   cat audio.wav | audi
105 |         #   audi < audio.wav
106 |         stdin_file = click.File(mode="rb").convert("-", None, None)
107 |         bytesio = BytesIO(stdin_file.read())
108 |         audio_files = [bytesio]
109 | 
110 |     # Initialize total duration and show_any flag
111 |     total_duration = 0.0
112 |     show_any = any(
113 |         [
114 |             show_file_type,
115 |             show_sample_rate,
116 |             show_channels,
117 |             show_samples,
118 |             show_duration_hms,
119 |             show_duration_seconds,
120 |             show_bits_per_sample,
121 |             show_bitrate,
122 |             show_precision,
123 |             show_encoding,
124 |             show_comments,
125 |         ]
126 |     )
127 | 
128 |     # Process each audio file
129 |     for audio_file in audio_files:
130 |         # ffmpeg -i audio.flac -f wav - | > audio.wav
131 |         info = audiolab.info(audio_file, forced_decoding, backends=[pyav])
132 |         # If no specific options are selected, show all information (default behavior)
133 |         if not show_any:
134 |             print(info)
135 |             # Accumulate total duration
136 |             total_duration += info.duration or 0.0
137 | 
138 |         # Display information based on selected options
139 |         if show_file_type:
140 |             print(info.format)
141 |         if show_sample_rate:
142 |             print(info.sample_rate)
143 |         if show_channels:
144 |             print(info.channels)
145 |         if show_samples:
146 |             print(info.num_samples or 0)
147 |         if show_duration_hms:
148 |             print(Info.format_duration(info.duration))
149 |         if show_duration_seconds:
150 |             print(info.duration or 0)
151 |         if show_bits_per_sample:
152 |             print(info.precision)
153 |         if show_bitrate:
154 |             print(Info.format_bit_rate(info.bit_rate))
155 |         if show_precision:
156 |             print(info.precision)
157 |         if show_encoding:
158 |             print(info.codec)
159 |         if show_comments:
160 |             if info.metadata:
161 |                 for key, value in info.metadata.items():
162 |                     print(f"{key}: {value}")
163 | 
164 |     # Print total duration if any files were processed and any options were selected
165 |     if len(audio_files) > 1 and not show_any:
166 |         print(f"\nTotal duration of {len(audio_files)} files: {Info.format_duration(total_duration)}")
167 | 


--------------------------------------------------------------------------------
/audiolab/reader/backend/pyav.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from functools import cached_property
 16 | from typing import Any, Iterator, List, Optional
 17 | 
 18 | import av
 19 | from av import time_base
 20 | from av.codec import Codec
 21 | from av.error import EOFError
 22 | from av.format import Flags
 23 | 
 24 | from audiolab.av import split_audio_frame
 25 | from audiolab.av.format import get_dtype
 26 | from audiolab.av.graph import Graph
 27 | from audiolab.av.typing import UINT32_MAX, AudioFormat, AudioFrame, Filter, Seconds
 28 | from audiolab.reader.backend.backend import Backend
 29 | 
 30 | 
 31 | class PyAV(Backend):
 32 |     def __init__(self, file: Any, frame_size: Optional[int] = None, forced_decoding: bool = False):
 33 |         super().__init__(file, frame_size, forced_decoding)
 34 |         self.container = av.open(file, metadata_errors="ignore")
 35 |         self.stream = self.container.streams.audio[0]
 36 |         self.dtype = get_dtype(self.stream.format)
 37 |         self.graph = None
 38 | 
 39 |     @cached_property
 40 |     def bits_per_sample(self) -> int:
 41 |         return self.stream.format.bits
 42 | 
 43 |     @cached_property
 44 |     def bit_rate(self) -> Optional[int]:
 45 |         bit_rate = None
 46 |         if self.stream.bit_rate is not None:
 47 |             bit_rate = self.stream.bit_rate
 48 |         elif self.container.bit_rate is not None:
 49 |             bit_rate = self.container.bit_rate
 50 |         if bit_rate in (0, None):
 51 |             bit_rate = super().bit_rate
 52 |         return bit_rate
 53 | 
 54 |     @cached_property
 55 |     def codec(self) -> Codec:
 56 |         return self.stream.codec.long_name
 57 | 
 58 |     @cached_property
 59 |     def format(self) -> str:
 60 |         return self.container.format.name
 61 | 
 62 |     @cached_property
 63 |     def duration(self) -> Optional[Seconds]:
 64 |         if self.forced_decoding:
 65 |             num_frames = 0
 66 |             for frame in self.container.decode(self.stream):
 67 |                 num_frames += frame.samples
 68 |             duration = num_frames / self.stream.rate
 69 |         else:
 70 |             duration = None
 71 |             if self.stream.duration is not None:
 72 |                 duration = self.stream.duration * self.stream.time_base
 73 |             elif self.container.duration is not None:
 74 |                 duration = self.container.duration / time_base
 75 |         return None if duration is None else Seconds(duration)
 76 | 
 77 |     @cached_property
 78 |     def is_planar(self) -> bool:
 79 |         return self.stream.format.is_planar
 80 | 
 81 |     @cached_property
 82 |     def name(self) -> str:
 83 |         return self.container.name
 84 | 
 85 |     @cached_property
 86 |     def num_channels(self) -> int:
 87 |         return self.stream.channels
 88 | 
 89 |     @cached_property
 90 |     def num_frames(self) -> Optional[int]:
 91 |         if self.duration is None:
 92 |             return None
 93 |         return int(self.duration * self.stream.rate)
 94 | 
 95 |     @cached_property
 96 |     def metadata(self) -> dict:
 97 |         return {**self.container.metadata, **self.stream.metadata}
 98 | 
 99 |     @cached_property
100 |     def sample_rate(self) -> int:
101 |         return self.stream.sample_rate
102 | 
103 |     @cached_property
104 |     def size(self) -> Optional[int]:
105 |         size = super().size
106 |         if size is None:
107 |             size = self.container.size
108 |         return size
109 | 
110 |     @cached_property
111 |     def seekable(self) -> bool:
112 |         flags = Flags(self.container.format.flags)
113 |         generic_index = Flags.generic_index in flags
114 |         seek_to_pts = Flags.seek_to_pts in flags
115 |         byte_seek = Flags.no_byte_seek not in flags
116 |         return generic_index or seek_to_pts or byte_seek
117 | 
118 |     def build_graph(self, format: AudioFormat, filters: Optional[List[Filter]] = None):
119 |         if self.graph is None:
120 |             self.dtype = get_dtype(format)
121 |             self.graph = Graph(
122 |                 rate=self.sample_rate,
123 |                 dtype=self.dtype,
124 |                 is_planar=self.is_planar,
125 |                 channels=self.num_channels,
126 |                 filters=filters,
127 |                 frame_size=self.frame_size,
128 |             )
129 | 
130 |     def load_audio(self, offset: Seconds = 0, duration: Optional[Seconds] = None) -> Iterator[AudioFrame]:
131 |         self.seek(int(offset / self.stream.time_base))
132 |         frames = UINT32_MAX if duration is None else int(duration * self.sample_rate)
133 |         while frames > 0:
134 |             frame = self.read()
135 |             if frame is None:
136 |                 break
137 |             frame, _ = split_audio_frame(frame, frames)
138 |             frames -= frame.samples
139 |             self.build_graph(frame.format)
140 |             self.graph.push(frame)
141 |             yield from self.graph.pull()
142 |         yield from self.graph.pull(partial=True)
143 | 
144 |     def read(self) -> Optional[AudioFrame]:
145 |         try:
146 |             return next(self.container.decode(self.stream))
147 |         except (EOFError, StopIteration):
148 |             return None
149 | 
150 |     def seek(self, offset: int):
151 |         if offset > 0:
152 |             self.container.seek(offset, any_frame=True, stream=self.stream)
153 | 


--------------------------------------------------------------------------------
/audiolab/av/lhotse.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 Piotr Żelasko
  2 | # From https://github.com/lhotse-speech/lhotse/blob/master/lhotse/caching.py
  3 | #      https://github.com/lhotse-speech/lhotse/blob/master/lhotse/utils.py
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | from io import BytesIO
 18 | from threading import Lock
 19 | from typing import Callable, Dict, Optional
 20 | 
 21 | from smart_open import open as sm_open
 22 | 
 23 | from audiolab.av.utils import get_logger
 24 | 
 25 | logger = get_logger(__name__)
 26 | 
 27 | 
 28 | class AudioCache:
 29 |     """
 30 |     Cache of 'bytes' objects with audio data.
 31 |     It is used to cache the "command" type audio inputs.
 32 | 
 33 |     The cache size is limited to max 100 elements and 500MB of audio.
 34 | 
 35 |     A global dict `__cache_dict` (static member variable of class AudioCache)
 36 |     is holding the wavs as 'bytes' arrays.
 37 |     The key is the 'source' identifier (i.e. the command for loading the data).
 38 | 
 39 |     Thread-safety is ensured by a threading.Lock guard.
 40 |     """
 41 | 
 42 |     max_cache_memory: int = 500 * 1e6  # 500 MB
 43 |     max_cache_elements: int = 100  # 100 audio files
 44 | 
 45 |     __cache_dict: Dict[str, bytes] = {}
 46 |     __cache_memory: int = 0
 47 |     __lock: Lock = Lock()
 48 | 
 49 |     @classmethod
 50 |     def try_cache(cls, key: str) -> Optional[bytes]:
 51 |         """
 52 |         Test if 'key' is in the chache. If yes return the bytes array,
 53 |         otherwise return None.
 54 |         """
 55 | 
 56 |         with cls.__lock:
 57 |             if key in cls.__cache_dict:
 58 |                 return cls.__cache_dict[key]
 59 |             else:
 60 |                 return None
 61 | 
 62 |     @classmethod
 63 |     def add_to_cache(cls, key: str, value: bytes):
 64 |         """
 65 |         Add the new (key,value) pair to cache.
 66 |         Possibly free some elements before adding the new pair.
 67 |         The oldest elements are removed first.
 68 |         """
 69 | 
 70 |         if len(value) > cls.max_cache_memory:
 71 |             return
 72 | 
 73 |         with cls.__lock:
 74 |             # limit cache elements and memory
 75 |             while (
 76 |                 len(cls.__cache_dict) >= cls.max_cache_elements
 77 |                 or len(value) + cls.__cache_memory > cls.max_cache_memory
 78 |             ):
 79 |                 # remove oldest elements from cache
 80 |                 # (dict pairs are sorted according to insertion order)
 81 |                 removed_key = next(iter(cls.__cache_dict))
 82 |                 removed_value = cls.__cache_dict.pop(removed_key)
 83 |                 cls.__cache_memory -= len(removed_value)
 84 | 
 85 |             # store the new (key,value) pair
 86 |             cls.__cache_dict[key] = value
 87 |             cls.__cache_memory += len(value)
 88 | 
 89 |     @property
 90 |     def cache_memory(cls) -> int:
 91 |         """
 92 |         Return size of AudioCache values in bytes.
 93 |         """
 94 |         return cls.__cache_memory
 95 | 
 96 |     @classmethod
 97 |     def clear_cache(cls) -> None:
 98 |         """
 99 |         Clear the cache, remove the data.
100 |         """
101 |         with cls.__lock:
102 |             cls.__cache_dict.clear()
103 |             cls.__cache_memory = 0
104 | 
105 | 
106 | class SmartOpen:
107 |     """Wrapper class around smart_open.open method
108 | 
109 |     The smart_open.open attributes are cached as classed attributes - they play the role of singleton pattern.
110 | 
111 |     The SmartOpen.setup method is intended for initial setup.
112 |     It imports the `open` method from the optional `smart_open` Python package,
113 |     and sets the parameters which are shared between all calls of the `smart_open.open` method.
114 | 
115 |     If you do not call the setup method it is called automatically in SmartOpen.open with the provided parameters.
116 | 
117 |     The example demonstrates that instantiating S3 `session.client` once,
118 |     instead using the defaults and leaving the smart_open creating it every time
119 |     has dramatic performance benefits.
120 |     """
121 | 
122 |     transport_params: Optional[Dict] = None
123 |     smart_open: Optional[Callable] = None
124 | 
125 |     @classmethod
126 |     def setup(cls, transport_params: Optional[dict] = None):
127 |         if cls.transport_params is not None and cls.transport_params != transport_params:
128 |             logger.warning(
129 |                 "SmartOpen.setup second call overwrites existing transport_params with new version\t\n%s\t\nvs\t\n%s",
130 |                 cls.transport_params,
131 |                 transport_params,
132 |             )
133 |         cls.transport_params = transport_params
134 |         cls.smart_open = sm_open
135 | 
136 |     @classmethod
137 |     def open(cls, uri, mode="rb", transport_params=None, **kwargs):
138 |         if cls.smart_open is None:
139 |             cls.setup(transport_params=transport_params)
140 |         transport_params = transport_params if transport_params else cls.transport_params
141 |         return cls.smart_open(
142 |             uri,
143 |             mode=mode,
144 |             transport_params=transport_params,
145 |             **kwargs,
146 |         )
147 | 
148 | 
149 | def load_url(url: str, cache: bool = False) -> BytesIO:
150 |     """
151 |     Load an audio file from a URL.
152 | 
153 |     Args:
154 |         url (str): The URL of the audio file.
155 |         cache (bool): Whether to cache the audio file.
156 |     Returns:
157 |         The audio bytes.
158 |     """
159 |     audio_bytes = AudioCache.try_cache(url) if cache else None
160 |     if audio_bytes is None:
161 |         with SmartOpen.open(url, "rb") as f:
162 |             audio_bytes = f.read()
163 |         if cache:
164 |             AudioCache.add_to_cache(url, audio_bytes)
165 |     return BytesIO(audio_bytes)
166 | 


--------------------------------------------------------------------------------
/audiolab/reader/info.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2025 Zhendong Peng (pzd17@tsinghua.org.cn)
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from functools import cached_property
 16 | from io import BytesIO
 17 | from typing import Any, List, Optional, Union
 18 | 
 19 | import numpy as np
 20 | from av.codec import Codec
 21 | from humanize import naturalsize
 22 | 
 23 | from audiolab.av.typing import Seconds
 24 | from audiolab.av.utils import get_template
 25 | from audiolab.reader.backend import pyav, soundfile, wave
 26 | 
 27 | _backends = {
 28 |     "av": pyav,
 29 |     "pyav": pyav,
 30 |     "sf": soundfile,
 31 |     "soundfile": soundfile,
 32 |     "wave": wave,
 33 | }
 34 | 
 35 | 
 36 | class Info:
 37 |     def __init__(
 38 |         self,
 39 |         file: Any,
 40 |         frame_size: Optional[int] = None,
 41 |         forced_decoding: bool = False,
 42 |         backends: Optional[List[str]] = None,
 43 |     ):
 44 |         self.file = file
 45 |         if backends is None:
 46 |             backends = ["soundfile", "pyav"]
 47 | 
 48 |         for idx, backend in enumerate(backends):
 49 |             pos = file.tell() if isinstance(file, BytesIO) else 0
 50 |             try:
 51 |                 backend = _backends.get(backend, pyav)
 52 |                 self.backend = backend(file, frame_size, forced_decoding)
 53 |                 break
 54 |             except Exception as e:
 55 |                 if isinstance(file, BytesIO):
 56 |                     file.seek(pos)
 57 |                 if idx == len(backends) - 1:
 58 |                     raise e
 59 | 
 60 |     @cached_property
 61 |     def bits_per_sample(self) -> int:
 62 |         return self.backend.bits_per_sample
 63 | 
 64 |     @property
 65 |     def bit_rate(self) -> Optional[int]:
 66 |         return self.backend.bit_rate
 67 | 
 68 |     @cached_property
 69 |     def codec(self) -> Union[Codec, str]:
 70 |         return self.backend.codec
 71 | 
 72 |     @cached_property
 73 |     def duration(self) -> Optional[Seconds]:
 74 |         return self.backend.duration
 75 | 
 76 |     @cached_property
 77 |     def dtype(self) -> np.dtype:
 78 |         return self.backend.dtype
 79 | 
 80 |     @cached_property
 81 |     def format(self) -> str:
 82 |         return self.backend.format
 83 | 
 84 |     @cached_property
 85 |     def layout(self) -> str:
 86 |         return self.backend.layout
 87 | 
 88 |     @cached_property
 89 |     def name(self) -> str:
 90 |         return self.backend.name
 91 | 
 92 |     @property
 93 |     def num_channels(self) -> int:
 94 |         return self.backend.num_channels
 95 | 
 96 |     @property
 97 |     def num_frames(self) -> int:
 98 |         return self.backend.num_frames
 99 | 
100 |     @property
101 |     def metadata(self) -> int:
102 |         return self.backend.metadata
103 | 
104 |     @property
105 |     def sample_rate(self) -> int:
106 |         return self.backend.sample_rate
107 | 
108 |     @property
109 |     def seekable(self) -> bool:
110 |         return self.backend.seekable
111 | 
112 |     @property
113 |     def size(self) -> int:
114 |         return self.backend.size
115 | 
116 |     @property
117 |     def cdda_sectors(self) -> Optional[float]:
118 |         if self.duration is None:
119 |             return None
120 |         return round(self.duration * 75, 2)
121 | 
122 |     @property
123 |     def channels(self) -> int:
124 |         return self.num_channels
125 | 
126 |     @property
127 |     def num_samples(self) -> int:
128 |         # Number of audio samples (per channel).
129 |         return self.num_frames
130 | 
131 |     @property
132 |     def rate(self) -> int:
133 |         return self.sample_rate
134 | 
135 |     @property
136 |     def samplerate(self) -> int:
137 |         return self.sample_rate
138 | 
139 |     @property
140 |     def samples(self) -> int:
141 |         return self.backend.num_frames
142 | 
143 |     @property
144 |     def precision(self) -> int:
145 |         return self.bits_per_sample
146 | 
147 |     @staticmethod
148 |     def rstrip_zeros(s: Optional[Union[int, float, str]]) -> str:
149 |         if s is None:
150 |             return "N/A"
151 |         if not isinstance(s, str):
152 |             s = str(s)
153 |         return " ".join(x.rstrip("0").rstrip(".") for x in s.split())
154 | 
155 |     @staticmethod
156 |     def format_bit_rate(bit_rate: Union[int, None]) -> str:
157 |         if bit_rate is None or bit_rate <= 0:
158 |             return "N/A"
159 |         bit_rate = naturalsize(bit_rate).rstrip("B")
160 |         return Info.rstrip_zeros(bit_rate) + "bps"
161 | 
162 |     @staticmethod
163 |     def format_duration(duration: Union[Seconds, None]) -> str:
164 |         if duration is None:
165 |             return "N/A"
166 |         hours, rest = divmod(duration, 3600)
167 |         minutes, seconds = divmod(rest, 60)
168 |         return f"{int(hours):02d}:{int(minutes):02d}:{seconds:06.3f}"
169 | 
170 |     @staticmethod
171 |     def format_name(name: str, format: str) -> str:
172 |         if name.upper().endswith(format.upper()):
173 |             return f"'{name}'"
174 |         if name in ("<none>", "<stdin>"):
175 |             return f"{name} ({format})"
176 |         return f"'{name}' ({format})"
177 | 
178 |     @staticmethod
179 |     def format_size(size: int) -> str:
180 |         if size not in (-1, -38, -78, None):
181 |             size = naturalsize(size)
182 |         return Info.rstrip_zeros(size)
183 | 
184 |     def __str__(self):
185 |         return get_template("info").render(
186 |             name=Info.format_name(self.name, self.format),
187 |             channels=self.channels,
188 |             rate=self.rate,
189 |             precision=self.precision,
190 |             duration=Info.format_duration(self.duration),
191 |             samples="N/A" if self.samples is None else self.samples,
192 |             cdda_sectors=Info.rstrip_zeros(self.cdda_sectors),
193 |             size=Info.format_size(self.size),
194 |             bit_rate=Info.format_bit_rate(self.bit_rate),
195 |             codec=self.codec,
196 |             metadata=self.metadata,
197 |         )
198 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------