├── .github └── workflows │ └── test_unittests.yml ├── .gitignore ├── .nojekyll ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── cli.html ├── cmds.html ├── index.html ├── read.html └── write.html ├── examples ├── check_codecs.py ├── get_info.py └── readwrite.py ├── pdoc └── config.mako ├── pytest.ini ├── setup.py ├── stempeg ├── __init__.py ├── cli.py ├── cmds.py ├── data │ ├── The Easton Ellises - Falcon 69.stem.mp4 │ └── default_metadata.json ├── read.py ├── test.sh └── write.py └── tests ├── test_random.py ├── test_read.py └── test_write.py /.github/workflows/test_unittests.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | # thanks for @mpariente for copying this workflow 3 | # see: https://help.github.com/en/actions/reference/events-that-trigger-workflows 4 | # Trigger the workflow on push or pull request 5 | on: [push, pull_request] 6 | 7 | jobs: 8 | src-test: 9 | name: conda-tests 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | ffmpeg-version: ["4.3", "5.0", "6.0", "7.0"] 14 | 15 | # Timeout: https://stackoverflow.com/a/59076067/4521646 16 | timeout-minutes: 10 17 | defaults: 18 | run: 19 | shell: bash -l {0} 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Setup Miniconda 23 | uses: conda-incubator/setup-miniconda@v2 24 | with: 25 | activate-environment: stempeg 26 | auto-update-conda: true 27 | auto-activate-base: false 28 | python-version: 3.11 29 | - name: Install dependencies FFMPEG ${{ matrix.ffmpeg-version }} 30 | env: 31 | FFMPEG_INSTALL: ${{ matrix.pytorch-version }} 32 | run: | 33 | sudo apt-get -y install gpac 34 | conda install -c conda-forge ffmpeg==${{ matrix.ffmpeg-version }} 35 | python -m pip install -e .['tests'] 36 | python --version 37 | pip --version 38 | python -m pip list 39 | - name: Conda list 40 | run: conda list 41 | - name: Run tests 42 | run: | 43 | py.test tests -v -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.mp4 2 | *.wav 3 | .pytest_cache/ 4 | miniconda 5 | .env/ 6 | envffmpeg/ 7 | .vscode/ 8 | .circleci/ 9 | 10 | 11 | #####=== OSX ===##### 12 | .DS_Store 13 | .AppleDouble 14 | .LSOverride 15 | 16 | # Icon must end with two \r 17 | Icon 18 | 19 | # Thumbnails 20 | ._* 21 | 22 | # Files that might appear in the root of a volume 23 | .DocumentRevisions-V100 24 | .fseventsd 25 | .Spotlight-V100 26 | .TemporaryItems 27 | .Trashes 28 | .VolumeIcon.icns 29 | 30 | # Directories potentially created on remote AFP share 31 | .AppleDB 32 | .AppleDesktop 33 | Network Trash Folder 34 | Temporary Items 35 | .apdisk 36 | 37 | # Byte-compiled / optimized / DLL files 38 | __pycache__/ 39 | *.py[cod] 40 | *$py.class 41 | 42 | # C extensions 43 | *.so 44 | 45 | # Distribution / packaging 46 | .Python 47 | env/ 48 | build/ 49 | develop-eggs/ 50 | dist/ 51 | downloads/ 52 | eggs/ 53 | .eggs/ 54 | lib/ 55 | lib64/ 56 | parts/ 57 | sdist/ 58 | var/ 59 | wheels/ 60 | *.egg-info/ 61 | .installed.cfg 62 | *.egg 63 | 64 | # PyInstaller 65 | # Usually these files are written by a python script from a template 66 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 67 | *.manifest 68 | *.spec 69 | 70 | # Installer logs 71 | pip-log.txt 72 | pip-delete-this-directory.txt 73 | 74 | # Unit test / coverage reports 75 | htmlcov/ 76 | .tox/ 77 | .coverage 78 | .coverage.* 79 | .cache 80 | nosetests.xml 81 | coverage.xml 82 | *.cover 83 | .hypothesis/ 84 | 85 | # Translations 86 | *.mo 87 | *.pot 88 | 89 | # Django stuff: 90 | *.log 91 | local_settings.py 92 | 93 | # Flask stuff: 94 | instance/ 95 | .webassets-cache 96 | 97 | # Scrapy stuff: 98 | .scrapy 99 | 100 | # Sphinx documentation 101 | docs/_build/ 102 | 103 | # PyBuilder 104 | target/ 105 | 106 | # Jupyter Notebook 107 | .ipynb_checkpoints 108 | 109 | # pyenv 110 | .python-version 111 | 112 | # celery beat schedule file 113 | celerybeat-schedule 114 | 115 | # SageMath parsed files 116 | *.sage.py 117 | 118 | # dotenv 119 | .env 120 | 121 | # virtualenv 122 | .venv 123 | venv/ 124 | ENV/ 125 | 126 | # Spyder project settings 127 | .spyderproject 128 | .spyproject 129 | 130 | # Rope project settings 131 | .ropeproject 132 | 133 | # mkdocs documentation 134 | /site 135 | 136 | # mypy 137 | .mypy_cache/ 138 | -------------------------------------------------------------------------------- /.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/faroit/stempeg/a612e0436dfcb4219c36d818e06a391c65c485fa/.nojekyll -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017, 2018, 2019 Fabian-Robert Stöter 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include stempeg/data/* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stempeg = stems + ffmpeg 2 | 3 | 4 | [![Build Status](https://github.com/faroit/stempeg/workflows/CI/badge.svg)](https://github.com/faroit/stempeg/actions?query=workflow%3ACI+branch%3Amaster+event%3Apush) 5 | [![Latest Version](https://img.shields.io/pypi/v/stempeg.svg)](https://pypi.python.org/pypi/stempeg) 6 | [![Supported Python versions](https://img.shields.io/pypi/pyversions/stempeg.svg)](https://pypi.python.org/pypi/stempeg) 7 | 8 | Python package to read and write [STEM](https://www.native-instruments.com/en/specials/stems/) audio files. 9 | Technically, stems are audio containers that combine multiple audio streams and metadata in a single audio file. This makes it ideal to playback multitrack audio, where users can select the audio sub-stream during playback (e.g. supported by VLC). 10 | 11 | Under the hood, _stempeg_ uses [ffmpeg](https://www.ffmpeg.org/) for reading and writing multistream audio, optionally [MP4Box](https://github.com/gpac/gpac) is used to create STEM files that are compatible with Native Instruments hardware and software. 12 | 13 | #### Features 14 | 15 | - robust and fast interface for ffmpeg to read and write any supported format from/to numpy. 16 | - reading supports seeking and duration. 17 | - control container and codec as well as bitrate when compressed audio is written. 18 | - store multi-track audio within audio formats by aggregate streams into channels (concatenation of pairs of 19 | stereo channels). 20 | - support for internal ffmpeg resampling furing read and write. 21 | - create mp4 stems compatible to Native Instruments traktor. 22 | - using multiprocessing to speed up reading substreams and write multiple files. 23 | 24 | ## Installation 25 | 26 | ### 1. Installation of ffmpeg Library 27 | 28 | _stempeg_ relies on [ffmpeg](https://www.ffmpeg.org/) (>= 3.2 is suggested). 29 | 30 | The Installation if ffmpeg differ among operating systems. If you use [anaconda](https://anaconda.org/anaconda/python) you can install ffmpeg on Windows/Mac/Linux using the following command: 31 | 32 | ``` 33 | conda install -c conda-forge ffmpeg 34 | ``` 35 | 36 | Note that for better quality encoding it is recommended to install ffmpeg with `libfdk-aac` codec support as following: 37 | 38 | * _MacOS_: use homebrew: `brew install ffmpeg --with-fdk-aac` 39 | * _Ubuntu/Debian Linux_: See installation script [here](https://gist.github.com/rafaelbiriba/7f2d7c6f6c3d6ae2a5cb). 40 | * _Docker_: `docker pull jrottenberg/ffmpeg` 41 | 42 | ### 1a. (optional) Installation of MP4Box 43 | 44 | If you plan to write stem files with full compatibility with Native Instruments Traktor DJ hardware and software, you need to install [MP4Box](https://github.com/gpac/gpac). 45 | 46 | * _MacOS_: use homebrew: `brew install gpac` 47 | * _Ubuntu/Debian Linux_: `apt-get install gpac` 48 | 49 | Further installation instructions for all operating systems can be found [here](https://gpac.wp.imt.fr/downloads/). 50 | 51 | ### 2. Installation of the _stempeg_ package 52 | 53 | A) Installation via PyPI using pip 54 | 55 | ``` 56 | pip install stempeg 57 | ``` 58 | 59 | B) Installation via conda 60 | 61 | ``` 62 | conda install -c conda-forge stempeg 63 | ``` 64 | 65 | ## Usage 66 | 67 | ![stempeg_scheme](https://user-images.githubusercontent.com/72940/102477776-16960a00-405d-11eb-9389-1ea9263cf99d.png) 68 | 69 | ### Reading audio 70 | 71 | Stempeg can read multi-stream and single stream audio files, thus, it can replace your normal audio loaders for 1d or 2d (mono/stereo) arrays. 72 | 73 | By default [`read_stems`](https://faroit.com/stempeg/read.html#stempeg.read.read_stems), assumes that multiple substreams can exit (default `reader=stempeg.StreamsReader()`). 74 | To support multi-stream, even when the audio container doesn't support multiple streams 75 | (e.g. WAV), streams can be mapped to multiple pairs of channels. In that 76 | case, `reader=stempeg.ChannelsReader()`, can be passed. Also see: 77 | [`stempeg.ChannelsWriter`](https://faroit.com/stempeg/write.html#stempeg.write.ChannelsWriter). 78 | 79 | ```python 80 | import stempeg 81 | S, rate = stempeg.read_stems(stempeg.example_stem_path()) 82 | ``` 83 | 84 | `S` is a numpy tensor that includes the time domain signals scaled to `[-1..1]`. The shape is `(stems, samples, channels)`. An detailed documentation of the `read_stems` can [be viewed here](https://faroit.com/stempeg/read.html#stempeg.read.read_stems). Note, a small stems excerpt from [The Easton Ellises](https://www.heise.de/ct/artikel/c-t-Remix-Wettbewerb-The-Easton-Ellises-2542427.html#englisch), licensed under Creative Commons CC BY-NC-SA 3.0 is included and can be accessed using `stempeg.example_stem_path()`. 85 | 86 | #### Reading individual streams 87 | 88 | Individual substreams of the stem file can be read by passing the corresponding stem id (starting from 0): 89 | 90 | ```python 91 | S, rate = stempeg.read_stems(stempeg.example_stem_path(), stem_id=[0, 1]) 92 | ``` 93 | 94 | #### Read excerpts (set seek position) 95 | 96 | Excerpts from the stem instead of the full file can be read by providing start (`start`) and duration (`duration`) in seconds to `read_stems`: 97 | 98 | ```python 99 | S, _ = stempeg.read_stems(stempeg.example_stem_path(), start=1, duration=1.5) 100 | # read from second 1.0 to second 2.5 101 | ``` 102 | 103 | ### Writing audio 104 | 105 | As seen in the flow chart above, stempeg supports multiple ways to write multi-track audio. 106 | 107 | #### Write multi-channel audio 108 | 109 | [`stempeg.write_audio`](http://faroit.com/stempeg/write.html#stempeg.write.write_audio) can be used for single-stream, multi-channel audio files. 110 | Stempeg wraps a number of ffmpeg parameter to resample the output sample rate and adjust the audio codec, if necessary. 111 | 112 | ```python 113 | stempeg.write_audio(path="out.mp4", data=S, sample_rate=44100.0, output_sample_rate=48000.0, codec='aac', bitrate=256000) 114 | ``` 115 | 116 | #### Writing multi-stream audio 117 | 118 | Writing stem files from a numpy tensor can done with. 119 | 120 | ```python 121 | stempeg.write_stems(path="output.stem.mp4", data=S, sample_rate=44100, writer=stempeg.StreamsWriter()) 122 | ``` 123 | 124 | As seen in the flow chart above, stempeg supports multiple ways to write multi-stream audio. 125 | Each of the method has different number of parameters. To select a method one of the following setting and be passed: 126 | 127 | * `stempeg.FilesWriter` 128 | Stems will be saved into multiple files. For the naming, 129 | `basename(path)` is ignored and just the 130 | parent of `path` and its `extension` is used. 131 | * `stempeg.ChannelsWriter` 132 | Stems will be saved as multiple channels. 133 | * `stempeg.StreamsWriter` **(default)**. 134 | Stems will be saved into a single a multi-stream file. 135 | * `stempeg.NIStemsWriter` 136 | Stem will be saved into a single multistream audio. 137 | Additionally Native Instruments Stems compabible 138 | Metadata is added. This requires the installation of 139 | `MP4Box`. 140 | 141 | > :warning: __Warning__: Muxing stems using _ffmpeg_ leads to multi-stream files not compatible with Native Instrument Hardware or Software. Please use [MP4Box](https://github.com/gpac/gpac) if you use the `stempeg.NISTemsWriter()` 142 | 143 | For more information on writing stems, see [`stempeg.write_stems`](https://faroit.com/stempeg/write.html#stempeg.write.write_stems). 144 | An example that documents the advanced features of the writer, see [readwrite.py](/examples/readwrite.py). 145 | 146 | ### Use the command line tools 147 | 148 | _stempeg_ provides a convenient cli tool to convert a stem to multiple wavfiles. The `-s` switch sets the start, the `-t` switch sets the duration. 149 | 150 | ```bash 151 | stem2wav The Easton Ellises - Falcon 69.stem.mp4 -s 1.0 -t 2.5 152 | ``` 153 | 154 | ## F.A.Q 155 | 156 | #### How can I improve the reading performance? 157 | 158 | `read_stems` is called repeatedly, it always does two system calls, one for getting the file info and one for the actual reading speed this up you could provide the `Info` object to `read_stems` if the number of streams, the number of channels and the sample rate is identical. 159 | 160 | ```python 161 | file_path = stempeg.example_stem_path() 162 | info = stempeg.Info(file_path) 163 | S, _ = stempeg.read_stems(file_path, info=info) 164 | ``` 165 | 166 | #### How can the quality of the encoded stems be increased 167 | 168 | For __Encoding__ it is recommended to use the Fraunhofer AAC encoder (`libfdk_aac`) which is not included in the default ffmpeg builds. Note that the conda version currently does _not_ include `fdk-aac`. If `libfdk_aac` is not installed _stempeg_ will use the default `aac` codec which will result in slightly inferior audio quality. 169 | -------------------------------------------------------------------------------- /docs/cli.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | stempeg.cli API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 |
21 |
22 |

Module stempeg.cli

23 |
24 |
25 |
26 | 27 | Expand source code 28 | Browse git 29 | 30 |
import argparse
 31 | from . import __version__
 32 | 
 33 | from .read import Info, read_stems
 34 | from .write import write_stems
 35 | from .write import FilesWriter
 36 | 
 37 | from os import path as op
 38 | import os
 39 | 
 40 | 
 41 | def cli(inargs=None):
 42 |     """
 43 |     Commandline interface for receiving stem files
 44 |     """
 45 | 
 46 |     parser = argparse.ArgumentParser()
 47 | 
 48 |     parser.add_argument(
 49 |         '--version', '-V',
 50 |         action='version',
 51 |         version='%%(prog)s %s' % __version__
 52 |     )
 53 | 
 54 |     parser.add_argument(
 55 |         'filename',
 56 |         metavar="filename",
 57 |         help="Input STEM file"
 58 |     )
 59 | 
 60 |     parser.add_argument(
 61 |         '--extension',
 62 |         metavar='extension',
 63 |         type=str,
 64 |         default='.wav',
 65 |         help="Output extension"
 66 |     )
 67 | 
 68 |     parser.add_argument(
 69 |         '--id',
 70 |         metavar='id',
 71 |         type=int,
 72 |         nargs='+',
 73 |         help="A list of stem_ids"
 74 |     )
 75 | 
 76 |     parser.add_argument(
 77 |         '-s',
 78 |         type=float,
 79 |         nargs='?',
 80 |         help="start offset in seconds"
 81 |     )
 82 | 
 83 |     parser.add_argument(
 84 |         '-t',
 85 |         type=float,
 86 |         nargs='?',
 87 |         help="read duration"
 88 |     )
 89 | 
 90 |     parser.add_argument(
 91 |         'outdir',
 92 |         metavar='outdir',
 93 |         nargs='?',
 94 |         help="Output folder"
 95 |     )
 96 | 
 97 |     args = parser.parse_args(inargs)
 98 |     stem2files(
 99 |         args.filename,
100 |         args.outdir,
101 |         args.extension,
102 |         args.id,
103 |         args.s,
104 |         args.t
105 |     )
106 | 
107 | 
108 | def stem2files(
109 |     stems_file,
110 |     outdir=None,
111 |     extension="wav",
112 |     idx=None,
113 |     start=None,
114 |     duration=None,
115 | ):
116 |     info = Info(stems_file)
117 |     S, sr = read_stems(stems_file, stem_id=idx, start=start, duration=duration)
118 | 
119 |     rootpath, filename = op.split(stems_file)
120 | 
121 |     basename = op.splitext(filename)[0]
122 |     if ".stem" in basename:
123 |         basename = basename.split(".stem")[0]
124 | 
125 |     if outdir is not None:
126 |         if not op.exists(outdir):
127 |             os.makedirs(outdir)
128 | 
129 |         rootpath = outdir
130 | 
131 |     if len(set(info.title_streams)) == len(info.title_streams):
132 |         # titles contain duplicates
133 |         # lets not use the metadata
134 |         stem_names = info.title_streams
135 |     else:
136 |         stem_names = None
137 | 
138 |     write_stems(
139 |         (op.join(rootpath, basename), extension),
140 |         S,
141 |         sample_rate=sr,
142 |         writer=FilesWriter(
143 |             multiprocess=True,
144 |             output_sample_rate=sr,
145 |             stem_names=stem_names
146 |         )
147 |     )
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |

Functions

156 |
157 |
158 | def cli(inargs=None) 159 |
160 |
161 |

Commandline interface for receiving stem files

162 |
163 | 164 | Expand source code 165 | Browse git 166 | 167 |
def cli(inargs=None):
168 |     """
169 |     Commandline interface for receiving stem files
170 |     """
171 | 
172 |     parser = argparse.ArgumentParser()
173 | 
174 |     parser.add_argument(
175 |         '--version', '-V',
176 |         action='version',
177 |         version='%%(prog)s %s' % __version__
178 |     )
179 | 
180 |     parser.add_argument(
181 |         'filename',
182 |         metavar="filename",
183 |         help="Input STEM file"
184 |     )
185 | 
186 |     parser.add_argument(
187 |         '--extension',
188 |         metavar='extension',
189 |         type=str,
190 |         default='.wav',
191 |         help="Output extension"
192 |     )
193 | 
194 |     parser.add_argument(
195 |         '--id',
196 |         metavar='id',
197 |         type=int,
198 |         nargs='+',
199 |         help="A list of stem_ids"
200 |     )
201 | 
202 |     parser.add_argument(
203 |         '-s',
204 |         type=float,
205 |         nargs='?',
206 |         help="start offset in seconds"
207 |     )
208 | 
209 |     parser.add_argument(
210 |         '-t',
211 |         type=float,
212 |         nargs='?',
213 |         help="read duration"
214 |     )
215 | 
216 |     parser.add_argument(
217 |         'outdir',
218 |         metavar='outdir',
219 |         nargs='?',
220 |         help="Output folder"
221 |     )
222 | 
223 |     args = parser.parse_args(inargs)
224 |     stem2files(
225 |         args.filename,
226 |         args.outdir,
227 |         args.extension,
228 |         args.id,
229 |         args.s,
230 |         args.t
231 |     )
232 |
233 |
234 |
235 | def stem2files(stems_file, outdir=None, extension='wav', idx=None, start=None, duration=None) 236 |
237 |
238 |
239 |
240 | 241 | Expand source code 242 | Browse git 243 | 244 |
def stem2files(
245 |     stems_file,
246 |     outdir=None,
247 |     extension="wav",
248 |     idx=None,
249 |     start=None,
250 |     duration=None,
251 | ):
252 |     info = Info(stems_file)
253 |     S, sr = read_stems(stems_file, stem_id=idx, start=start, duration=duration)
254 | 
255 |     rootpath, filename = op.split(stems_file)
256 | 
257 |     basename = op.splitext(filename)[0]
258 |     if ".stem" in basename:
259 |         basename = basename.split(".stem")[0]
260 | 
261 |     if outdir is not None:
262 |         if not op.exists(outdir):
263 |             os.makedirs(outdir)
264 | 
265 |         rootpath = outdir
266 | 
267 |     if len(set(info.title_streams)) == len(info.title_streams):
268 |         # titles contain duplicates
269 |         # lets not use the metadata
270 |         stem_names = info.title_streams
271 |     else:
272 |         stem_names = None
273 | 
274 |     write_stems(
275 |         (op.join(rootpath, basename), extension),
276 |         S,
277 |         sample_rate=sr,
278 |         writer=FilesWriter(
279 |             multiprocess=True,
280 |             output_sample_rate=sr,
281 |             stem_names=stem_names
282 |         )
283 |     )
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 | 310 |
311 | 314 | 315 | -------------------------------------------------------------------------------- /docs/cmds.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | stempeg.cmds API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 |
21 |
22 |

Module stempeg.cmds

23 |
24 |
25 |
26 | 27 | Expand source code 28 | Browse git 29 | 30 |
import re
 31 | import subprocess as sp
 32 | import logging
 33 | 
 34 | FFMPEG_PATH = None
 35 | FFPROBE_PATH = None
 36 | MP4BOX_PATH = None
 37 | 
 38 | 
 39 | def find_cmd(cmd):
 40 |     try:
 41 |         from shutil import which
 42 |         return which(cmd)
 43 |     except ImportError:
 44 |         import os
 45 |         for path in os.environ["PATH"].split(os.pathsep):
 46 |             if os.access(os.path.join(path, cmd), os.X_OK):
 47 |                 return path
 48 | 
 49 |     return None
 50 | 
 51 | 
 52 | def ffmpeg_and_ffprobe_exists():
 53 |     global FFMPEG_PATH, FFPROBE_PATH
 54 |     if FFMPEG_PATH is None:
 55 |         FFMPEG_PATH = find_cmd("ffmpeg")
 56 | 
 57 |     if FFPROBE_PATH is None:
 58 |         FFPROBE_PATH = find_cmd("ffprobe")
 59 | 
 60 |     return FFMPEG_PATH is not None and FFPROBE_PATH is not None
 61 | 
 62 | 
 63 | def mp4box_exists():
 64 |     global MP4BOX_PATH
 65 |     if MP4BOX_PATH is None:
 66 |         MP4BOX_PATH = find_cmd("MP4Box")
 67 | 
 68 |     return MP4BOX_PATH is not None
 69 | 
 70 | 
 71 | if not ffmpeg_and_ffprobe_exists():
 72 |     raise RuntimeError(
 73 |         'ffmpeg or ffprobe could not be found! '
 74 |         'Please install them before using stempeg. '
 75 |         'See: https://github.com/faroit/stempeg'
 76 |     )
 77 | 
 78 | 
 79 | def check_available_aac_encoders():
 80 |     """Returns the available AAC encoders
 81 | 
 82 |     Returns:
 83 |         list(str): List of available encoder codecs from ffmpeg
 84 | 
 85 |     """
 86 |     cmd = [
 87 |         FFMPEG_PATH,
 88 |         '-v', 'error',
 89 |         '-codecs'
 90 |     ]
 91 | 
 92 |     output = sp.check_output(cmd)
 93 |     aac_codecs = [
 94 |         x for x in
 95 |         output.splitlines() if "AAC (Advanced Audio Coding)" in str(x)
 96 |     ][0]
 97 |     hay = aac_codecs.decode('ascii')
 98 |     match = re.findall(r'\(encoders: ([^\)]*) \)', hay)
 99 |     if match:
100 |         return match[0].split(" ")
101 |     else:
102 |         return None
103 | 
104 | 
105 | def get_aac_codec():
106 |     """Checks codec and warns if `libfdk_aac` codec
107 |      is not available.
108 | 
109 |     Returns:
110 |         str: ffmpeg aac codec name
111 |     """
112 |     avail = check_available_aac_encoders()
113 |     if avail is not None:
114 |         if 'libfdk_aac' in avail:
115 |             codec = 'libfdk_aac'
116 |         else:
117 |             logging.warning(
118 |                 "For the better audio quality, install `libfdk_aac` codec."
119 |             )
120 |             codec = 'aac'
121 |     else:
122 |         codec = 'aac'
123 | 
124 |     return codec
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |

Functions

133 |
134 |
135 | def check_available_aac_encoders() 136 |
137 |
138 |

Returns the available AAC encoders

139 |

Returns

140 |

list(str): List of available encoder codecs from ffmpeg

141 |
142 | 143 | Expand source code 144 | Browse git 145 | 146 |
def check_available_aac_encoders():
147 |     """Returns the available AAC encoders
148 | 
149 |     Returns:
150 |         list(str): List of available encoder codecs from ffmpeg
151 | 
152 |     """
153 |     cmd = [
154 |         FFMPEG_PATH,
155 |         '-v', 'error',
156 |         '-codecs'
157 |     ]
158 | 
159 |     output = sp.check_output(cmd)
160 |     aac_codecs = [
161 |         x for x in
162 |         output.splitlines() if "AAC (Advanced Audio Coding)" in str(x)
163 |     ][0]
164 |     hay = aac_codecs.decode('ascii')
165 |     match = re.findall(r'\(encoders: ([^\)]*) \)', hay)
166 |     if match:
167 |         return match[0].split(" ")
168 |     else:
169 |         return None
170 |
171 |
172 |
173 | def ffmpeg_and_ffprobe_exists() 174 |
175 |
176 |
177 |
178 | 179 | Expand source code 180 | Browse git 181 | 182 |
def ffmpeg_and_ffprobe_exists():
183 |     global FFMPEG_PATH, FFPROBE_PATH
184 |     if FFMPEG_PATH is None:
185 |         FFMPEG_PATH = find_cmd("ffmpeg")
186 | 
187 |     if FFPROBE_PATH is None:
188 |         FFPROBE_PATH = find_cmd("ffprobe")
189 | 
190 |     return FFMPEG_PATH is not None and FFPROBE_PATH is not None
191 |
192 |
193 |
194 | def find_cmd(cmd) 195 |
196 |
197 |
198 |
199 | 200 | Expand source code 201 | Browse git 202 | 203 |
def find_cmd(cmd):
204 |     try:
205 |         from shutil import which
206 |         return which(cmd)
207 |     except ImportError:
208 |         import os
209 |         for path in os.environ["PATH"].split(os.pathsep):
210 |             if os.access(os.path.join(path, cmd), os.X_OK):
211 |                 return path
212 | 
213 |     return None
214 |
215 |
216 |
217 | def get_aac_codec() 218 |
219 |
220 |

Checks codec and warns if libfdk_aac codec 221 | is not available.

222 |

Returns

223 |
224 |
str
225 |
ffmpeg aac codec name
226 |
227 |
228 | 229 | Expand source code 230 | Browse git 231 | 232 |
def get_aac_codec():
233 |     """Checks codec and warns if `libfdk_aac` codec
234 |      is not available.
235 | 
236 |     Returns:
237 |         str: ffmpeg aac codec name
238 |     """
239 |     avail = check_available_aac_encoders()
240 |     if avail is not None:
241 |         if 'libfdk_aac' in avail:
242 |             codec = 'libfdk_aac'
243 |         else:
244 |             logging.warning(
245 |                 "For the better audio quality, install `libfdk_aac` codec."
246 |             )
247 |             codec = 'aac'
248 |     else:
249 |         codec = 'aac'
250 | 
251 |     return codec
252 |
253 |
254 |
255 | def mp4box_exists() 256 |
257 |
258 |
259 |
260 | 261 | Expand source code 262 | Browse git 263 | 264 |
def mp4box_exists():
265 |     global MP4BOX_PATH
266 |     if MP4BOX_PATH is None:
267 |         MP4BOX_PATH = find_cmd("MP4Box")
268 | 
269 |     return MP4BOX_PATH is not None
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 | 299 |
300 | 303 | 304 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | stempeg API documentation 8 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
21 |
22 |
23 |

Package stempeg

24 |
25 |
26 |

Stempeg is a python package to read and write STEM files. 27 | Technically, stems are audio containers that combine multiple audio streams and metadata in a single audio file. This makes it ideal to playback multitrack audio, where users can select the audio sub-stream during playback (e.g. supported by VLC).

28 |

Under the hood, stempeg uses ffmpeg for reading and writing multistream audio, optionally MP4Box is used to create STEM files that are compatible with Native Instruments hardware and software.

29 | 33 |

stempeg_scheme

34 |

Please checkout the Github repository for more information.

35 |
36 | 37 | Expand source code 38 | Browse git 39 | 40 |
# flake8: noqa
 41 | """
 42 | Stempeg is a python package to read and write [STEM](https://www.native-instruments.com/en/specials/stems/) files.
 43 | Technically, stems are audio containers that combine multiple audio streams and metadata in a single audio file. This makes it ideal to playback multitrack audio, where users can select the audio sub-stream during playback (e.g. supported by VLC). 
 44 | 
 45 | Under the hood, _stempeg_ uses [ffmpeg](https://www.ffmpeg.org/) for reading and writing multistream audio, optionally [MP4Box](https://github.com/gpac/gpac) is used to create STEM files that are compatible with Native Instruments hardware and software.
 46 | 
 47 | - `stempeg.read`: reading audio tensors and metadata.
 48 | - `stempeg.write`: writing audio tensors.
 49 | 
 50 | ![stempeg_scheme](https://user-images.githubusercontent.com/72940/102477776-16960a00-405d-11eb-9389-1ea9263cf99d.png)
 51 | 
 52 | Please checkout [the Github repository](https://github.com/faroit/stempeg) for more information.
 53 | """
 54 | 
 55 | from .read import read_stems
 56 | from .read import Info
 57 | from .read import StreamsReader, ChannelsReader
 58 | from .write import write_stems
 59 | from .write import write_audio
 60 | from .write import FilesWriter, StreamsWriter, ChannelsWriter, NIStemsWriter
 61 | 
 62 | from .cmds import check_available_aac_encoders
 63 | 
 64 | import re
 65 | import os
 66 | import subprocess as sp
 67 | from os import path as op
 68 | import argparse
 69 | import pkg_resources
 70 | 
 71 | __version__ = "0.2.2"
 72 | 
 73 | 
 74 | def example_stem_path():
 75 |     """Get the path to an included stem file.
 76 | 
 77 |     Returns
 78 |     -------
 79 |     filename : str
 80 |         Path to the stem file
 81 |     """
 82 |     return pkg_resources.resource_filename(
 83 |         __name__,
 84 |         'data/The Easton Ellises - Falcon 69.stem.mp4'
 85 |     )
 86 | 
 87 | 
 88 | def default_metadata():
 89 |     """Get the path to included stems metadata.
 90 | 
 91 |     Returns
 92 |     -------
 93 |     filename : str
 94 |         Path to the json file
 95 |     """
 96 |     return pkg_resources.resource_filename(
 97 |         __name__,
 98 |         'data/default_metadata.json'
 99 |     )
100 | 
101 | 
102 | def ffmpeg_version():
103 |     """Returns the available ffmpeg version
104 | 
105 |     Returns
106 |     ----------
107 |     version : str
108 |         version number as string
109 |     """
110 | 
111 |     cmd = [
112 |         'ffmpeg',
113 |         '-version'
114 |     ]
115 | 
116 |     output = sp.check_output(cmd)
117 |     aac_codecs = [
118 |         x for x in
119 |         output.splitlines() if "ffmpeg version " in str(x)
120 |     ][0]
121 |     hay = aac_codecs.decode('ascii')
122 |     match = re.findall(r'ffmpeg version \w?(\d+\.)?(\d+\.)?(\*|\d+)', hay)
123 |     if match:
124 |         return "".join(match[0])
125 |     else:
126 |         return None
127 |
128 |
129 |
130 |

Sub-modules

131 |
132 |
stempeg.cli
133 |
134 |
135 |
136 |
stempeg.cmds
137 |
138 |
139 |
140 |
stempeg.read
141 |
142 |

Writing module to load stems into numpy tensors.

143 |
144 |
stempeg.write
145 |
146 |

Writing module to save stems to disk.

147 |
148 |
149 |
150 |
151 |
152 |
153 |

Functions

154 |
155 |
156 | def default_metadata() 157 |
158 |
159 |

Get the path to included stems metadata.

160 |

Returns

161 |
162 |
filename : str
163 |
Path to the json file
164 |
165 |
166 | 167 | Expand source code 168 | Browse git 169 | 170 |
def default_metadata():
171 |     """Get the path to included stems metadata.
172 | 
173 |     Returns
174 |     -------
175 |     filename : str
176 |         Path to the json file
177 |     """
178 |     return pkg_resources.resource_filename(
179 |         __name__,
180 |         'data/default_metadata.json'
181 |     )
182 |
183 |
184 |
185 | def example_stem_path() 186 |
187 |
188 |

Get the path to an included stem file.

189 |

Returns

190 |
191 |
filename : str
192 |
Path to the stem file
193 |
194 |
195 | 196 | Expand source code 197 | Browse git 198 | 199 |
def example_stem_path():
200 |     """Get the path to an included stem file.
201 | 
202 |     Returns
203 |     -------
204 |     filename : str
205 |         Path to the stem file
206 |     """
207 |     return pkg_resources.resource_filename(
208 |         __name__,
209 |         'data/The Easton Ellises - Falcon 69.stem.mp4'
210 |     )
211 |
212 |
213 |
214 | def ffmpeg_version() 215 |
216 |
217 |

Returns the available ffmpeg version

218 |

Returns

219 |
220 |
version : str
221 |
version number as string
222 |
223 |
224 | 225 | Expand source code 226 | Browse git 227 | 228 |
def ffmpeg_version():
229 |     """Returns the available ffmpeg version
230 | 
231 |     Returns
232 |     ----------
233 |     version : str
234 |         version number as string
235 |     """
236 | 
237 |     cmd = [
238 |         'ffmpeg',
239 |         '-version'
240 |     ]
241 | 
242 |     output = sp.check_output(cmd)
243 |     aac_codecs = [
244 |         x for x in
245 |         output.splitlines() if "ffmpeg version " in str(x)
246 |     ][0]
247 |     hay = aac_codecs.decode('ascii')
248 |     match = re.findall(r'ffmpeg version \w?(\d+\.)?(\d+\.)?(\*|\d+)', hay)
249 |     if match:
250 |         return "".join(match[0])
251 |     else:
252 |         return None
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 | 283 |
284 | 287 | 288 | -------------------------------------------------------------------------------- /docs/read.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | stempeg.read API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 |
21 |
22 |

Module stempeg.read

23 |
24 |
25 |

Writing module to load stems into numpy tensors.

26 |
27 | 28 | Expand source code 29 | Browse git 30 | 31 |
# flake8: noqa
  32 | """
  33 | Writing module to load stems into numpy tensors.
  34 | 
  35 | 
  36 | """
  37 | from stempeg.write import FilesWriter
  38 | import numpy as np
  39 | import warnings
  40 | import ffmpeg
  41 | import pprint
  42 | from multiprocessing import Pool
  43 | import atexit
  44 | from functools import partial
  45 | import datetime as dt
  46 | 
  47 | class Reader(object):
  48 |     """Base class for reader
  49 | 
  50 |     Holds reader options
  51 |     """
  52 | 
  53 |     def __init__(self):
  54 |         pass
  55 | 
  56 | 
  57 | class StreamsReader(Reader):
  58 |     """Holding configuration for streams
  59 | 
  60 |     This is the default reader. Nothing to be hold
  61 |     """
  62 | 
  63 |     def __init__(self):
  64 |         pass
  65 | 
  66 | 
  67 | class ChannelsReader(Reader):
  68 |     """Using multichannels to multiplex to stems
  69 | 
  70 |     stems will be extracted from multichannel-pairs
  71 |     e.g. 8 channels will be converted to 4 stereo pairs
  72 | 
  73 | 
  74 |     Args:
  75 |         from_channels: int
  76 |             number of channels, defaults to `2`.
  77 |     """
  78 | 
  79 |     def __init__(self, nb_channels=2):
  80 |         self.nb_channels = nb_channels
  81 | 
  82 | 
  83 | def _read_ffmpeg(
  84 |     filename,
  85 |     sample_rate,
  86 |     channels,
  87 |     start,
  88 |     duration,
  89 |     dtype,
  90 |     ffmpeg_format,
  91 |     stem_idx
  92 | ):
  93 |     """Loading data using ffmpeg and numpy
  94 | 
  95 |     Args:
  96 |         filename (str): filename path
  97 |         sample_rate (int): sample rate
  98 |         channels (int): metadata info object needed to
  99 |             know the channel configuration in advance
 100 |         start (float): start position in seconds
 101 |         duration (float): duration in seconds
 102 |         dtype (numpy.dtype): Type of audio array to be casted into
 103 |         stem_idx (int): stream id
 104 |         ffmpeg_format (str): ffmpeg intermediate format encoding.
 105 |             Choose "f32le" for best compatibility
 106 | 
 107 |     Returns:
 108 |         (array_like): numpy audio array
 109 |     """
 110 |     output_kwargs = {'format': ffmpeg_format, 'ar': sample_rate}
 111 |     if duration is not None:
 112 |         output_kwargs['t'] = str(dt.timedelta(seconds=duration))
 113 |     if start is not None:
 114 |         output_kwargs['ss'] = str(dt.timedelta(seconds=start))
 115 | 
 116 |     output_kwargs['map'] = '0:' + str(stem_idx)
 117 |     process = (
 118 |         ffmpeg
 119 |         .input(filename)
 120 |         .output('pipe:', **output_kwargs)
 121 |         .run_async(pipe_stdout=True, pipe_stderr=True))
 122 |     buffer, _ = process.communicate()
 123 | 
 124 |     # decode to raw pcm format
 125 |     if ffmpeg_format == "f64le":
 126 |         # PCM 64 bit float
 127 |         numpy_dtype = '<f8'
 128 |     elif ffmpeg_format == "f32le":
 129 |         # PCM 32 bit float
 130 |         numpy_dtype = '<f4'
 131 |     elif ffmpeg_format == "s16le":
 132 |         # PCM 16 bit signed int
 133 |         numpy_dtype = '<i2'
 134 |     else:
 135 |         raise NotImplementedError("ffmpeg format is not supported")
 136 | 
 137 |     waveform = np.frombuffer(buffer, dtype=numpy_dtype).reshape(-1, channels)
 138 | 
 139 |     if not waveform.dtype == np.dtype(dtype):
 140 |         # cast to target/output dtype
 141 |         waveform = waveform.astype(dtype, order='C')
 142 |         # when coming from integer, apply normalization t0 [-1.0, 1.0]
 143 |         if np.issubdtype(numpy_dtype, np.integer):
 144 |             waveform = waveform / (np.iinfo(numpy_dtype).max + 1.0)
 145 |     return waveform
 146 | 
 147 | def read_stems(
 148 |     filename,
 149 |     start=None,
 150 |     duration=None,
 151 |     stem_id=None,
 152 |     always_3d=False,
 153 |     dtype=np.float64,
 154 |     ffmpeg_format="f32le",
 155 |     info=None,
 156 |     sample_rate=None,
 157 |     reader=StreamsReader(),
 158 |     multiprocess=False
 159 | ):
 160 |     """Read stems into numpy tensor
 161 | 
 162 |     This function can read both, multi-stream and single stream audio files.
 163 |     If used for reading normal audio, the output is a 1d or 2d (mono/stereo)
 164 |     array. When multiple streams are read, the output is a 3d array.
 165 | 
 166 |     An option stems_from_multichannel was added to load stems that are
 167 |     aggregated into multichannel audio (concatenation of pairs of
 168 |     stereo channels), see more info on audio `stempeg.write.write_stems`.
 169 | 
 170 |     By default `read_stems` assumes that multiple substreams were used to
 171 |     save the stem file (`reader=stempeg.StreamsReader()`). To support
 172 |     multistream files on audio formats that do not support multiple streams
 173 |     (e.g. WAV), streams can be mapped to multiple pairs of channels. In that
 174 |     case, `stempeg.ChannelsReader()`, can be passed. Also see:
 175 |     `stempeg.write.ChannelsWriter`.
 176 | 
 177 | 
 178 |     Args:
 179 |         filename (str): filename of the audio file to load data from.
 180 |         start (float): Start offset to load from in seconds.
 181 |         duration (float): Duration to load in seconds.
 182 |         stem_id (int, optional): substream id,
 183 |             defauls to `None` (all substreams are loaded).
 184 |         always_3d (bool, optional): By default, reading a
 185 |             single-stream audio file will return a
 186 |             two-dimensional array.  With ``always_3d=True``, audio data is
 187 |             always returned as a three-dimensional array, even if the audio
 188 |             file has only one stream.
 189 |         dtype (np.dtype, optional): Numpy data type to use, default to `np.float32`.
 190 |         info (Info, Optional): Pass ffmpeg `Info` object to reduce number
 191 |             of os calls on file.
 192 |             This can be used e.g. the sample rate and length of a track is
 193 |             already known in advance. Useful for ML training where the
 194 |             info objects can be pre-processed, thus audio loading can
 195 |             be speed up.
 196 |         sample_rate (float, optional): Sample rate of returned audio.
 197 |             Defaults to `None` which results in
 198 |             the sample rate returned from the mixture.
 199 |         reader (Reader): Holds parameters for the reading method.
 200 |             One of the following:
 201 |                 `StreamsReader(...)`
 202 |                     Read from a single multistream audio (default).
 203 |                 `ChannelsReader(...)`
 204 |                     Read/demultiplexed from multiple channels.
 205 |         multiprocess (bool): Applys multi-processing for reading
 206 |             substreams in parallel to speed up reading. Defaults to `True`
 207 | 
 208 |     Returns:
 209 |         stems (array_like):
 210 |             stems tensor of `shape=(stem x samples x channels)`
 211 |         rate (float):
 212 |             sample rate
 213 | 
 214 |     Shape:
 215 |         - Output: `[S, T, C']`, with
 216 |             `S`, if the file has multiple streams and,
 217 |             `C` is the audio has multiple channels.
 218 | 
 219 |     >>> audio, sample_rate = stempeg.read_stems("test.stem.mp4")
 220 |     >>> audio.shape
 221 |     [5, 220500, 2]
 222 |     >>> sample_rate
 223 |     44100
 224 |     """
 225 |     if multiprocess:
 226 |         _pool = Pool()
 227 |         atexit.register(_pool.close)
 228 |     else:
 229 |         _pool = None
 230 | 
 231 |     if not isinstance(filename, str):
 232 |         filename = filename.decode()
 233 | 
 234 |     # use ffprobe to get info object (samplerate, lengths)
 235 |     try:
 236 |         if info is None:
 237 |             metadata = Info(filename)
 238 |         else:
 239 |             metadata = info
 240 | 
 241 |         ffmpeg.probe(filename)
 242 |     except ffmpeg._run.Error as e:
 243 |         raise Warning(
 244 |             'An error occurs with ffprobe (see ffprobe output below)\n\n{}'
 245 |             .format(e.stderr.decode()))
 246 | 
 247 |     # check number of audio streams in file
 248 |     if 'streams' not in metadata.info or metadata.nb_audio_streams == 0:
 249 |         raise Warning('No audio stream found.')
 250 | 
 251 |     # using ChannelReader would ignore substreams
 252 |     if isinstance(reader, ChannelsReader):
 253 |         if metadata.nb_audio_streams != 1:
 254 |             raise Warning(
 255 |                 'stempeg.ChannelsReader() only processes the first substream.'
 256 |             )
 257 |         else:
 258 |             if metadata.audio_streams[0][
 259 |                 'channels'
 260 |             ] % reader.nb_channels != 0:
 261 |                 raise Warning('Stems should be encoded as multi-channel.')
 262 |             else:
 263 |                 substreams = 0
 264 |     else:
 265 |         if stem_id is not None:
 266 |             substreams = stem_id
 267 |         else:
 268 |             substreams = metadata.audio_stream_idx()
 269 | 
 270 |     if not isinstance(substreams, list):
 271 |         substreams = [substreams]
 272 | 
 273 |     # if not, get sample rate from mixture
 274 |     if sample_rate is None:
 275 |         sample_rate = metadata.sample_rate(0)
 276 | 
 277 |     _chans = metadata.channels_streams
 278 |     # check if all substreams have the same number of channels
 279 |     if len(set(_chans)) == 1:
 280 |         channels = min(_chans)
 281 |     else:
 282 |         raise RuntimeError("Stems do not have the same number of channels per substream")
 283 | 
 284 |     # set channels to minimum channel per stream
 285 |     stems = []
 286 | 
 287 |     if _pool:
 288 |         results = _pool.map_async(
 289 |             partial(
 290 |                 _read_ffmpeg,
 291 |                 filename,
 292 |                 sample_rate,
 293 |                 channels,
 294 |                 start,
 295 |                 duration,
 296 |                 dtype,
 297 |                 ffmpeg_format
 298 |             ),
 299 |             substreams,
 300 |             callback=stems.extend
 301 |         )
 302 |         results.wait()
 303 |         _pool.terminate()
 304 |     else:
 305 |         stems = [
 306 |             _read_ffmpeg(
 307 |                 filename,
 308 |                 sample_rate,
 309 |                 channels,
 310 |                 start,
 311 |                 duration,
 312 |                 dtype,
 313 |                 ffmpeg_format,
 314 |                 stem_idx
 315 |             )
 316 |             for stem_idx in substreams
 317 |         ]
 318 |     stem_durations = np.array([t.shape[0] for t in stems])
 319 |     if not (stem_durations == stem_durations[0]).all():
 320 |         warnings.warning("Stems differ in length and were shortend")
 321 |         min_length = np.min(stem_durations)
 322 |         stems = [t[:min_length, :] for t in stems]
 323 | 
 324 |     # aggregate list of stems to numpy tensor
 325 |     stems = np.array(stems)
 326 | 
 327 |     # If ChannelsReader is used, demultiplex from channels
 328 |     if isinstance(reader, (ChannelsReader)) and stems.shape[-1] > 1:
 329 |         stems = stems.transpose(1, 0, 2)
 330 |         stems = stems.reshape(
 331 |             stems.shape[0], stems.shape[1], -1, reader.nb_channels
 332 |         )
 333 |         stems = stems.transpose(2, 0, 3, 1)[..., 0]
 334 | 
 335 |     if not always_3d:
 336 |         stems = np.squeeze(stems)
 337 |     return stems, sample_rate
 338 | 
 339 | 
 340 | class Info(object):
 341 |     """Audio properties that hold a number of metadata.
 342 | 
 343 |     The object is created when can be used when `read_stems` is called.
 344 |     This is can be passed, to `read_stems` to reduce loading time.
 345 |     """
 346 | 
 347 |     def __init__(self, filename):
 348 |         super(Info, self).__init__()
 349 |         self.info = ffmpeg.probe(filename)
 350 |         self.audio_streams = [
 351 |             stream for stream in self.info['streams']
 352 |             if stream['codec_type'] == 'audio'
 353 |         ]
 354 | 
 355 |     @property
 356 |     def nb_audio_streams(self):
 357 |         """Returns the number of audio substreams"""
 358 |         return len(self.audio_streams)
 359 | 
 360 |     @property
 361 |     def nb_samples_streams(self):
 362 |         """Returns a list of number of samples for each substream"""
 363 |         return [self.samples(k) for k, stream in enumerate(self.audio_streams)]
 364 | 
 365 |     @property
 366 |     def channels_streams(self):
 367 |         """Returns the number of channels per substream"""
 368 |         return [
 369 |             self.channels(k) for k, stream in enumerate(self.audio_streams)
 370 |         ]
 371 | 
 372 |     @property
 373 |     def duration_streams(self):
 374 |         """Returns a list of durations (in s) for all substreams"""
 375 |         return [
 376 |             self.duration(k) for k, stream in enumerate(self.audio_streams)
 377 |         ]
 378 | 
 379 |     @property
 380 |     def title_streams(self):
 381 |         """Returns stream titles for all substreams"""
 382 |         return [
 383 |             stream['tags'].get('handler_name')
 384 |             for stream in self.audio_streams
 385 |         ]
 386 | 
 387 |     def audio_stream_idx(self):
 388 |         """Returns audio substream indices"""
 389 |         return [s['index'] for s in self.audio_streams]
 390 | 
 391 |     def samples(self, idx):
 392 |         """Returns the number of samples for a stream index"""
 393 |         return int(self.audio_streams[idx]['duration_ts'])
 394 | 
 395 |     def duration(self, idx):
 396 |         """Returns the duration (in seconds) for a stream index"""
 397 |         return float(self.audio_streams[idx]['duration'])
 398 | 
 399 |     def title(self, idx):
 400 |         """Return the `handler_name` metadata for a given stream index"""
 401 |         return self.audio_streams[idx]['tags']['handler_name']
 402 | 
 403 |     def rate(self, idx):
 404 |         # deprecated from older stempeg version
 405 |         return self.sample_rate(idx)
 406 | 
 407 |     def sample_rate(self, idx):
 408 |         """Return sample rate for a given substream"""
 409 |         return int(self.audio_streams[idx]['sample_rate'])
 410 | 
 411 |     def channels(self, idx):
 412 |         """Returns the number of channels for a gvien substream"""
 413 |         return int(self.audio_streams[idx]['channels'])
 414 | 
 415 |     def __repr__(self):
 416 |         """Print stream information"""
 417 |         return pprint.pformat(self.audio_streams)
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |

Functions

426 |
427 |
428 | def read_stems(filename, start=None, duration=None, stem_id=None, always_3d=False, dtype=numpy.float64, ffmpeg_format='f32le', info=None, sample_rate=None, reader=<stempeg.read.StreamsReader object>, multiprocess=False) 429 |
430 |
431 |

Read stems into numpy tensor

432 |

This function can read both, multi-stream and single stream audio files. 433 | If used for reading normal audio, the output is a 1d or 2d (mono/stereo) 434 | array. When multiple streams are read, the output is a 3d array.

435 |

An option stems_from_multichannel was added to load stems that are 436 | aggregated into multichannel audio (concatenation of pairs of 437 | stereo channels), see more info on audio write_stems().

438 |

By default read_stems() assumes that multiple substreams were used to 439 | save the stem file (reader=stempeg.StreamsReader()). To support 440 | multistream files on audio formats that do not support multiple streams 441 | (e.g. WAV), streams can be mapped to multiple pairs of channels. In that 442 | case, stempeg.ChannelsReader(), can be passed. Also see: 443 | ChannelsWriter.

444 |

Args

445 |
446 |
filename : str
447 |
filename of the audio file to load data from.
448 |
start : float
449 |
Start offset to load from in seconds.
450 |
duration : float
451 |
Duration to load in seconds.
452 |
stem_id : int, optional
453 |
substream id, 454 | defauls to None (all substreams are loaded).
455 |
always_3d : bool, optional
456 |
By default, reading a 457 | single-stream audio file will return a 458 | two-dimensional array. 459 | With always_3d=True, audio data is 460 | always returned as a three-dimensional array, even if the audio 461 | file has only one stream.
462 |
dtype : np.dtype, optional
463 |
Numpy data type to use, default to np.float32.
464 |
info : Info, Optional
465 |
Pass ffmpeg Info object to reduce number 466 | of os calls on file. 467 | This can be used e.g. the sample rate and length of a track is 468 | already known in advance. Useful for ML training where the 469 | info objects can be pre-processed, thus audio loading can 470 | be speed up.
471 |
sample_rate : float, optional
472 |
Sample rate of returned audio. 473 | Defaults to None which results in 474 | the sample rate returned from the mixture.
475 |
reader : Reader
476 |
Holds parameters for the reading method. 477 | One of the following: 478 | StreamsReader(…) 479 | Read from a single multistream audio (default). 480 | ChannelsReader(…) 481 | Read/demultiplexed from multiple channels.
482 |
multiprocess : bool
483 |
Applys multi-processing for reading 484 | substreams in parallel to speed up reading. Defaults to True
485 |
486 |

Returns

487 |

stems (array_like): 488 | stems tensor of shape=(stem x samples x channels) 489 | rate (float): 490 | sample rate

491 |

Shape

492 |
    493 |
  • Output: [S, T, C'], with 494 | S, if the file has multiple streams and, 495 | C is the audio has multiple channels.
  • 496 |
497 |
>>> audio, sample_rate = stempeg.read_stems("test.stem.mp4")
 498 | >>> audio.shape
 499 | [5, 220500, 2]
 500 | >>> sample_rate
 501 | 44100
 502 | 
503 |
504 | 505 | Expand source code 506 | Browse git 507 | 508 |
def read_stems(
 509 |     filename,
 510 |     start=None,
 511 |     duration=None,
 512 |     stem_id=None,
 513 |     always_3d=False,
 514 |     dtype=np.float64,
 515 |     ffmpeg_format="f32le",
 516 |     info=None,
 517 |     sample_rate=None,
 518 |     reader=StreamsReader(),
 519 |     multiprocess=False
 520 | ):
 521 |     """Read stems into numpy tensor
 522 | 
 523 |     This function can read both, multi-stream and single stream audio files.
 524 |     If used for reading normal audio, the output is a 1d or 2d (mono/stereo)
 525 |     array. When multiple streams are read, the output is a 3d array.
 526 | 
 527 |     An option stems_from_multichannel was added to load stems that are
 528 |     aggregated into multichannel audio (concatenation of pairs of
 529 |     stereo channels), see more info on audio `stempeg.write.write_stems`.
 530 | 
 531 |     By default `read_stems` assumes that multiple substreams were used to
 532 |     save the stem file (`reader=stempeg.StreamsReader()`). To support
 533 |     multistream files on audio formats that do not support multiple streams
 534 |     (e.g. WAV), streams can be mapped to multiple pairs of channels. In that
 535 |     case, `stempeg.ChannelsReader()`, can be passed. Also see:
 536 |     `stempeg.write.ChannelsWriter`.
 537 | 
 538 | 
 539 |     Args:
 540 |         filename (str): filename of the audio file to load data from.
 541 |         start (float): Start offset to load from in seconds.
 542 |         duration (float): Duration to load in seconds.
 543 |         stem_id (int, optional): substream id,
 544 |             defauls to `None` (all substreams are loaded).
 545 |         always_3d (bool, optional): By default, reading a
 546 |             single-stream audio file will return a
 547 |             two-dimensional array.  With ``always_3d=True``, audio data is
 548 |             always returned as a three-dimensional array, even if the audio
 549 |             file has only one stream.
 550 |         dtype (np.dtype, optional): Numpy data type to use, default to `np.float32`.
 551 |         info (Info, Optional): Pass ffmpeg `Info` object to reduce number
 552 |             of os calls on file.
 553 |             This can be used e.g. the sample rate and length of a track is
 554 |             already known in advance. Useful for ML training where the
 555 |             info objects can be pre-processed, thus audio loading can
 556 |             be speed up.
 557 |         sample_rate (float, optional): Sample rate of returned audio.
 558 |             Defaults to `None` which results in
 559 |             the sample rate returned from the mixture.
 560 |         reader (Reader): Holds parameters for the reading method.
 561 |             One of the following:
 562 |                 `StreamsReader(...)`
 563 |                     Read from a single multistream audio (default).
 564 |                 `ChannelsReader(...)`
 565 |                     Read/demultiplexed from multiple channels.
 566 |         multiprocess (bool): Applys multi-processing for reading
 567 |             substreams in parallel to speed up reading. Defaults to `True`
 568 | 
 569 |     Returns:
 570 |         stems (array_like):
 571 |             stems tensor of `shape=(stem x samples x channels)`
 572 |         rate (float):
 573 |             sample rate
 574 | 
 575 |     Shape:
 576 |         - Output: `[S, T, C']`, with
 577 |             `S`, if the file has multiple streams and,
 578 |             `C` is the audio has multiple channels.
 579 | 
 580 |     >>> audio, sample_rate = stempeg.read_stems("test.stem.mp4")
 581 |     >>> audio.shape
 582 |     [5, 220500, 2]
 583 |     >>> sample_rate
 584 |     44100
 585 |     """
 586 |     if multiprocess:
 587 |         _pool = Pool()
 588 |         atexit.register(_pool.close)
 589 |     else:
 590 |         _pool = None
 591 | 
 592 |     if not isinstance(filename, str):
 593 |         filename = filename.decode()
 594 | 
 595 |     # use ffprobe to get info object (samplerate, lengths)
 596 |     try:
 597 |         if info is None:
 598 |             metadata = Info(filename)
 599 |         else:
 600 |             metadata = info
 601 | 
 602 |         ffmpeg.probe(filename)
 603 |     except ffmpeg._run.Error as e:
 604 |         raise Warning(
 605 |             'An error occurs with ffprobe (see ffprobe output below)\n\n{}'
 606 |             .format(e.stderr.decode()))
 607 | 
 608 |     # check number of audio streams in file
 609 |     if 'streams' not in metadata.info or metadata.nb_audio_streams == 0:
 610 |         raise Warning('No audio stream found.')
 611 | 
 612 |     # using ChannelReader would ignore substreams
 613 |     if isinstance(reader, ChannelsReader):
 614 |         if metadata.nb_audio_streams != 1:
 615 |             raise Warning(
 616 |                 'stempeg.ChannelsReader() only processes the first substream.'
 617 |             )
 618 |         else:
 619 |             if metadata.audio_streams[0][
 620 |                 'channels'
 621 |             ] % reader.nb_channels != 0:
 622 |                 raise Warning('Stems should be encoded as multi-channel.')
 623 |             else:
 624 |                 substreams = 0
 625 |     else:
 626 |         if stem_id is not None:
 627 |             substreams = stem_id
 628 |         else:
 629 |             substreams = metadata.audio_stream_idx()
 630 | 
 631 |     if not isinstance(substreams, list):
 632 |         substreams = [substreams]
 633 | 
 634 |     # if not, get sample rate from mixture
 635 |     if sample_rate is None:
 636 |         sample_rate = metadata.sample_rate(0)
 637 | 
 638 |     _chans = metadata.channels_streams
 639 |     # check if all substreams have the same number of channels
 640 |     if len(set(_chans)) == 1:
 641 |         channels = min(_chans)
 642 |     else:
 643 |         raise RuntimeError("Stems do not have the same number of channels per substream")
 644 | 
 645 |     # set channels to minimum channel per stream
 646 |     stems = []
 647 | 
 648 |     if _pool:
 649 |         results = _pool.map_async(
 650 |             partial(
 651 |                 _read_ffmpeg,
 652 |                 filename,
 653 |                 sample_rate,
 654 |                 channels,
 655 |                 start,
 656 |                 duration,
 657 |                 dtype,
 658 |                 ffmpeg_format
 659 |             ),
 660 |             substreams,
 661 |             callback=stems.extend
 662 |         )
 663 |         results.wait()
 664 |         _pool.terminate()
 665 |     else:
 666 |         stems = [
 667 |             _read_ffmpeg(
 668 |                 filename,
 669 |                 sample_rate,
 670 |                 channels,
 671 |                 start,
 672 |                 duration,
 673 |                 dtype,
 674 |                 ffmpeg_format,
 675 |                 stem_idx
 676 |             )
 677 |             for stem_idx in substreams
 678 |         ]
 679 |     stem_durations = np.array([t.shape[0] for t in stems])
 680 |     if not (stem_durations == stem_durations[0]).all():
 681 |         warnings.warning("Stems differ in length and were shortend")
 682 |         min_length = np.min(stem_durations)
 683 |         stems = [t[:min_length, :] for t in stems]
 684 | 
 685 |     # aggregate list of stems to numpy tensor
 686 |     stems = np.array(stems)
 687 | 
 688 |     # If ChannelsReader is used, demultiplex from channels
 689 |     if isinstance(reader, (ChannelsReader)) and stems.shape[-1] > 1:
 690 |         stems = stems.transpose(1, 0, 2)
 691 |         stems = stems.reshape(
 692 |             stems.shape[0], stems.shape[1], -1, reader.nb_channels
 693 |         )
 694 |         stems = stems.transpose(2, 0, 3, 1)[..., 0]
 695 | 
 696 |     if not always_3d:
 697 |         stems = np.squeeze(stems)
 698 |     return stems, sample_rate
699 |
700 |
701 |
702 |
703 |
704 |

Classes

705 |
706 |
707 | class ChannelsReader 708 | (nb_channels=2) 709 |
710 |
711 |

Using multichannels to multiplex to stems

712 |

stems will be extracted from multichannel-pairs 713 | e.g. 8 channels will be converted to 4 stereo pairs

714 |

Args

715 |
716 |
from_channels
717 |
int 718 | number of channels, defaults to 2.
719 |
720 |
721 | 722 | Expand source code 723 | Browse git 724 | 725 |
class ChannelsReader(Reader):
 726 |     """Using multichannels to multiplex to stems
 727 | 
 728 |     stems will be extracted from multichannel-pairs
 729 |     e.g. 8 channels will be converted to 4 stereo pairs
 730 | 
 731 | 
 732 |     Args:
 733 |         from_channels: int
 734 |             number of channels, defaults to `2`.
 735 |     """
 736 | 
 737 |     def __init__(self, nb_channels=2):
 738 |         self.nb_channels = nb_channels
739 |
740 |

Ancestors

741 | 744 |
745 |
746 | class Info 747 | (filename) 748 |
749 |
750 |

Audio properties that hold a number of metadata.

751 |

The object is created when can be used when read_stems() is called. 752 | This is can be passed, to read_stems() to reduce loading time.

753 |
754 | 755 | Expand source code 756 | Browse git 757 | 758 |
class Info(object):
 759 |     """Audio properties that hold a number of metadata.
 760 | 
 761 |     The object is created when can be used when `read_stems` is called.
 762 |     This is can be passed, to `read_stems` to reduce loading time.
 763 |     """
 764 | 
 765 |     def __init__(self, filename):
 766 |         super(Info, self).__init__()
 767 |         self.info = ffmpeg.probe(filename)
 768 |         self.audio_streams = [
 769 |             stream for stream in self.info['streams']
 770 |             if stream['codec_type'] == 'audio'
 771 |         ]
 772 | 
 773 |     @property
 774 |     def nb_audio_streams(self):
 775 |         """Returns the number of audio substreams"""
 776 |         return len(self.audio_streams)
 777 | 
 778 |     @property
 779 |     def nb_samples_streams(self):
 780 |         """Returns a list of number of samples for each substream"""
 781 |         return [self.samples(k) for k, stream in enumerate(self.audio_streams)]
 782 | 
 783 |     @property
 784 |     def channels_streams(self):
 785 |         """Returns the number of channels per substream"""
 786 |         return [
 787 |             self.channels(k) for k, stream in enumerate(self.audio_streams)
 788 |         ]
 789 | 
 790 |     @property
 791 |     def duration_streams(self):
 792 |         """Returns a list of durations (in s) for all substreams"""
 793 |         return [
 794 |             self.duration(k) for k, stream in enumerate(self.audio_streams)
 795 |         ]
 796 | 
 797 |     @property
 798 |     def title_streams(self):
 799 |         """Returns stream titles for all substreams"""
 800 |         return [
 801 |             stream['tags'].get('handler_name')
 802 |             for stream in self.audio_streams
 803 |         ]
 804 | 
 805 |     def audio_stream_idx(self):
 806 |         """Returns audio substream indices"""
 807 |         return [s['index'] for s in self.audio_streams]
 808 | 
 809 |     def samples(self, idx):
 810 |         """Returns the number of samples for a stream index"""
 811 |         return int(self.audio_streams[idx]['duration_ts'])
 812 | 
 813 |     def duration(self, idx):
 814 |         """Returns the duration (in seconds) for a stream index"""
 815 |         return float(self.audio_streams[idx]['duration'])
 816 | 
 817 |     def title(self, idx):
 818 |         """Return the `handler_name` metadata for a given stream index"""
 819 |         return self.audio_streams[idx]['tags']['handler_name']
 820 | 
 821 |     def rate(self, idx):
 822 |         # deprecated from older stempeg version
 823 |         return self.sample_rate(idx)
 824 | 
 825 |     def sample_rate(self, idx):
 826 |         """Return sample rate for a given substream"""
 827 |         return int(self.audio_streams[idx]['sample_rate'])
 828 | 
 829 |     def channels(self, idx):
 830 |         """Returns the number of channels for a gvien substream"""
 831 |         return int(self.audio_streams[idx]['channels'])
 832 | 
 833 |     def __repr__(self):
 834 |         """Print stream information"""
 835 |         return pprint.pformat(self.audio_streams)
836 |
837 |

Instance variables

838 |
839 |
var channels_streams
840 |
841 |

Returns the number of channels per substream

842 |
843 | 844 | Expand source code 845 | Browse git 846 | 847 |
@property
 848 | def channels_streams(self):
 849 |     """Returns the number of channels per substream"""
 850 |     return [
 851 |         self.channels(k) for k, stream in enumerate(self.audio_streams)
 852 |     ]
853 |
854 |
855 |
var duration_streams
856 |
857 |

Returns a list of durations (in s) for all substreams

858 |
859 | 860 | Expand source code 861 | Browse git 862 | 863 |
@property
 864 | def duration_streams(self):
 865 |     """Returns a list of durations (in s) for all substreams"""
 866 |     return [
 867 |         self.duration(k) for k, stream in enumerate(self.audio_streams)
 868 |     ]
869 |
870 |
871 |
var nb_audio_streams
872 |
873 |

Returns the number of audio substreams

874 |
875 | 876 | Expand source code 877 | Browse git 878 | 879 |
@property
 880 | def nb_audio_streams(self):
 881 |     """Returns the number of audio substreams"""
 882 |     return len(self.audio_streams)
883 |
884 |
885 |
var nb_samples_streams
886 |
887 |

Returns a list of number of samples for each substream

888 |
889 | 890 | Expand source code 891 | Browse git 892 | 893 |
@property
 894 | def nb_samples_streams(self):
 895 |     """Returns a list of number of samples for each substream"""
 896 |     return [self.samples(k) for k, stream in enumerate(self.audio_streams)]
897 |
898 |
899 |
var title_streams
900 |
901 |

Returns stream titles for all substreams

902 |
903 | 904 | Expand source code 905 | Browse git 906 | 907 |
@property
 908 | def title_streams(self):
 909 |     """Returns stream titles for all substreams"""
 910 |     return [
 911 |         stream['tags'].get('handler_name')
 912 |         for stream in self.audio_streams
 913 |     ]
914 |
915 |
916 |
917 |

Methods

918 |
919 |
920 | def audio_stream_idx(self) 921 |
922 |
923 |

Returns audio substream indices

924 |
925 | 926 | Expand source code 927 | Browse git 928 | 929 |
def audio_stream_idx(self):
 930 |     """Returns audio substream indices"""
 931 |     return [s['index'] for s in self.audio_streams]
932 |
933 |
934 |
935 | def channels(self, idx) 936 |
937 |
938 |

Returns the number of channels for a gvien substream

939 |
940 | 941 | Expand source code 942 | Browse git 943 | 944 |
def channels(self, idx):
 945 |     """Returns the number of channels for a gvien substream"""
 946 |     return int(self.audio_streams[idx]['channels'])
947 |
948 |
949 |
950 | def duration(self, idx) 951 |
952 |
953 |

Returns the duration (in seconds) for a stream index

954 |
955 | 956 | Expand source code 957 | Browse git 958 | 959 |
def duration(self, idx):
 960 |     """Returns the duration (in seconds) for a stream index"""
 961 |     return float(self.audio_streams[idx]['duration'])
962 |
963 |
964 |
965 | def rate(self, idx) 966 |
967 |
968 |
969 |
970 | 971 | Expand source code 972 | Browse git 973 | 974 |
def rate(self, idx):
 975 |     # deprecated from older stempeg version
 976 |     return self.sample_rate(idx)
977 |
978 |
979 |
980 | def sample_rate(self, idx) 981 |
982 |
983 |

Return sample rate for a given substream

984 |
985 | 986 | Expand source code 987 | Browse git 988 | 989 |
def sample_rate(self, idx):
 990 |     """Return sample rate for a given substream"""
 991 |     return int(self.audio_streams[idx]['sample_rate'])
992 |
993 |
994 |
995 | def samples(self, idx) 996 |
997 |
998 |

Returns the number of samples for a stream index

999 |
1000 | 1001 | Expand source code 1002 | Browse git 1003 | 1004 |
def samples(self, idx):
1005 |     """Returns the number of samples for a stream index"""
1006 |     return int(self.audio_streams[idx]['duration_ts'])
1007 |
1008 |
1009 |
1010 | def title(self, idx) 1011 |
1012 |
1013 |

Return the handler_name metadata for a given stream index

1014 |
1015 | 1016 | Expand source code 1017 | Browse git 1018 | 1019 |
def title(self, idx):
1020 |     """Return the `handler_name` metadata for a given stream index"""
1021 |     return self.audio_streams[idx]['tags']['handler_name']
1022 |
1023 |
1024 |
1025 |
1026 |
1027 | class Reader 1028 |
1029 |
1030 |

Base class for reader

1031 |

Holds reader options

1032 |
1033 | 1034 | Expand source code 1035 | Browse git 1036 | 1037 |
class Reader(object):
1038 |     """Base class for reader
1039 | 
1040 |     Holds reader options
1041 |     """
1042 | 
1043 |     def __init__(self):
1044 |         pass
1045 |
1046 |

Subclasses

1047 | 1051 |
1052 |
1053 | class StreamsReader 1054 |
1055 |
1056 |

Holding configuration for streams

1057 |

This is the default reader. Nothing to be hold

1058 |
1059 | 1060 | Expand source code 1061 | Browse git 1062 | 1063 |
class StreamsReader(Reader):
1064 |     """Holding configuration for streams
1065 | 
1066 |     This is the default reader. Nothing to be hold
1067 |     """
1068 | 
1069 |     def __init__(self):
1070 |         pass
1071 |
1072 |

Ancestors

1073 | 1076 |
1077 |
1078 |
1079 |
1080 | 1128 |
1129 | 1132 | 1133 | 1134 | -------------------------------------------------------------------------------- /examples/check_codecs.py: -------------------------------------------------------------------------------- 1 | """Opens a stem file prints stream info 2 | """ 3 | import stempeg 4 | 5 | 6 | if __name__ == '__main__': 7 | # read stems 8 | Info = stempeg.check_available_aac_encoders() 9 | print(Info) 10 | -------------------------------------------------------------------------------- /examples/get_info.py: -------------------------------------------------------------------------------- 1 | """Opens a stem file prints stream info 2 | """ 3 | import argparse 4 | import stempeg 5 | 6 | 7 | if __name__ == '__main__': 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument( 10 | 'input', 11 | ) 12 | args = parser.parse_args() 13 | 14 | # read stems 15 | i = stempeg.Info(args.input) 16 | -------------------------------------------------------------------------------- /examples/readwrite.py: -------------------------------------------------------------------------------- 1 | """Opens a stem file and saves (re-encodes) back to a stem file 2 | """ 3 | import argparse 4 | import stempeg 5 | import subprocess as sp 6 | import numpy as np 7 | from os import path as op 8 | 9 | 10 | if __name__ == '__main__': 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument( 13 | 'input', 14 | ) 15 | args = parser.parse_args() 16 | 17 | # load stems 18 | stems, rate = stempeg.read_stems(args.input) 19 | 20 | # load stems, 21 | # resample to 96000 Hz, 22 | # use multiprocessing 23 | stems, rate = stempeg.read_stems( 24 | args.input, 25 | sample_rate=96000, 26 | multiprocess=True 27 | ) 28 | 29 | # --> stems now has `shape=(stem x samples x channels)`` 30 | 31 | # save stems from tensor as multi-stream mp4 32 | stempeg.write_stems( 33 | "test.stem.m4a", 34 | stems, 35 | sample_rate=96000 36 | ) 37 | 38 | # save stems as dict for convenience 39 | stems = { 40 | "mix": stems[0], 41 | "drums": stems[1], 42 | "bass": stems[2], 43 | "other": stems[3], 44 | "vocals": stems[4], 45 | } 46 | # keys will be automatically used 47 | 48 | # from dict as files 49 | stempeg.write_stems( 50 | "test.stem.m4a", 51 | data=stems, 52 | sample_rate=96000 53 | ) 54 | 55 | # `write_stems` is a preset for the following settings 56 | # here the output signal is resampled to 44100 Hz and AAC codec is used 57 | stempeg.write_stems( 58 | "test.stem.m4a", 59 | stems, 60 | sample_rate=96000, 61 | writer=stempeg.StreamsWriter( 62 | codec="aac", 63 | output_sample_rate=44100, 64 | bitrate="256000", 65 | stem_names=['mix', 'drums', 'bass', 'other', 'vocals'] 66 | ) 67 | ) 68 | 69 | # Native Instruments compatible stems 70 | stempeg.write_stems( 71 | "test_traktor.stem.m4a", 72 | stems, 73 | sample_rate=96000, 74 | writer=stempeg.NIStemsWriter( 75 | stems_metadata=[ 76 | {"color": "#009E73", "name": "Drums"}, 77 | {"color": "#D55E00", "name": "Bass"}, 78 | {"color": "#CC79A7", "name": "Other"}, 79 | {"color": "#56B4E9", "name": "Vocals"} 80 | ] 81 | ) 82 | ) 83 | 84 | # lets write as multistream opus (supports only 48000 khz) 85 | stempeg.write_stems( 86 | "test.stem.opus", 87 | stems, 88 | sample_rate=96000, 89 | writer=stempeg.StreamsWriter( 90 | output_sample_rate=48000, 91 | codec="opus" 92 | ) 93 | ) 94 | 95 | # writing to wav requires to convert streams to multichannel 96 | stempeg.write_stems( 97 | "test.wav", 98 | stems, 99 | sample_rate=96000, 100 | writer=stempeg.ChannelsWriter( 101 | output_sample_rate=48000 102 | ) 103 | ) 104 | 105 | # # stempeg also supports to load merged-multichannel streams using 106 | stems, rate = stempeg.read_stems( 107 | "test.wav", 108 | reader=stempeg.ChannelsReader(nb_channels=2) 109 | ) 110 | 111 | # mp3 does not support multiple channels, 112 | # therefore we have to use `stempeg.FilesWriter` 113 | # outputs are named ["output/0.mp3", "output/1.mp3"] 114 | # for named files, provide a dict or use `stem_names` 115 | # also apply multiprocessing 116 | stempeg.write_stems( 117 | ("output", ".mp3"), 118 | stems, 119 | sample_rate=rate, 120 | writer=stempeg.FilesWriter( 121 | multiprocess=True, 122 | output_sample_rate=48000, 123 | stem_names=["mix", "drums", "bass", "other", "vocals"] 124 | ) 125 | ) 126 | -------------------------------------------------------------------------------- /pdoc/config.mako: -------------------------------------------------------------------------------- 1 | <%! 2 | # Template configuration. Copy over in your template directory 3 | # (used with `--template-dir`) and adapt as necessary. 4 | # Note, defaults are loaded from this distribution file, so your 5 | # config.mako only needs to contain values you want overridden. 6 | # You can also run pdoc with `--config KEY=VALUE` to override 7 | # individual values. 8 | html_lang = 'en' 9 | show_inherited_members = False 10 | extract_module_toc_into_sidebar = True 11 | list_class_variables_in_index = True 12 | sort_identifiers = True 13 | show_type_annotations = True 14 | # Show collapsed source code block next to each item. 15 | # Disabling this can improve rendering speed of large modules. 16 | show_source_code = True 17 | # If set, format links to objects in online source code repository 18 | # according to this template. Supported keywords for interpolation 19 | # are: commit, path, start_line, end_line. 20 | git_link_template = 'https://github.com/faroit/stempeg/blob/{commit}/{path}#L{start_line}-L{end_line}' 21 | #git_link_template = 'https://gitlab.com/USER/PROJECT/blob/{commit}/{path}#L{start_line}-L{end_line}' 22 | #git_link_template = 'https://bitbucket.org/USER/PROJECT/src/{commit}/{path}#lines-{start_line}:{end_line}' 23 | #git_link_template = 'https://CGIT_HOSTNAME/PROJECT/tree/{path}?id={commit}#n{start-line}' 24 | # A prefix to use for every HTML hyperlink in the generated documentation. 25 | # No prefix results in all links being relative. 26 | link_prefix = '' 27 | # Enable syntax highlighting for code/source blocks by including Highlight.js 28 | syntax_highlighting = True 29 | # Set the style keyword such as 'atom-one-light' or 'github-gist' 30 | # Options: https://github.com/highlightjs/highlight.js/tree/master/src/styles 31 | # Demo: https://highlightjs.org/static/demo/ 32 | hljs_style = 'github' 33 | # If set, insert Google Analytics tracking code. Value is GA 34 | # tracking id (UA-XXXXXX-Y). 35 | google_analytics = '' 36 | # If set, insert Google Custom Search search bar widget above the sidebar index. 37 | # The whitespace-separated tokens represent arbitrary extra queries (at least one 38 | # must match) passed to regular Google search. Example: 39 | #google_search_query = 'inurl:github.com/USER/PROJECT site:PROJECT.github.io site:PROJECT.website' 40 | google_search_query = '' 41 | # Enable offline search using Lunr.js. For explanation of 'fuzziness' parameter, which is 42 | # added to every query word, see: https://lunrjs.com/guides/searching.html#fuzzy-matches 43 | # If 'index_docstrings' is False, a shorter index is built, indexing only 44 | # the full object reference names. 45 | #lunr_search = {'fuzziness': 1, 'index_docstrings': True} 46 | lunr_search = None 47 | # If set, render LaTeX math syntax within \(...\) (inline equations), 48 | # or within \[...\] or $$...$$ or `.. math::` (block equations) 49 | # as nicely-formatted math formulas using MathJax. 50 | # Note: in Python docstrings, either all backslashes need to be escaped (\\) 51 | # or you need to use raw r-strings. 52 | latex_math = False 53 | %> -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | ignore:.*U.*mode is deprecated:DeprecationWarning -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Always prefer setuptools over distutils 2 | from setuptools import setup, find_packages 3 | # To use a consistent encoding 4 | from codecs import open 5 | from os import path 6 | 7 | here = path.abspath(path.dirname(__file__)) 8 | 9 | # Get the long description from the README file 10 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 11 | long_description = f.read() 12 | 13 | # Arguments marked as "Required" below must be included for upload to PyPI. 14 | # Fields marked as "Optional" may be commented out. 15 | 16 | setup( 17 | name="stempeg", 18 | version="0.2.4", 19 | description="Read and write stem/multistream audio files", 20 | long_description=long_description, 21 | long_description_content_type="text/markdown", 22 | url="http://github.com/faroit/stempeg", 23 | author="Fabian-Robert Stoeter", 24 | author_email="mail@faroit.com", 25 | classifiers=[ 26 | "Development Status :: 4 - Beta", 27 | "Environment :: Console", 28 | "Intended Audience :: Telecommunications Industry", 29 | "Intended Audience :: Science/Research", 30 | "Programming Language :: Python :: 3.9", 31 | "Programming Language :: Python :: 3.10", 32 | "Programming Language :: Python :: 3.11", 33 | "Topic :: Multimedia :: Sound/Audio :: Analysis", 34 | "Topic :: Multimedia :: Sound/Audio :: Sound Synthesis", 35 | ], 36 | zip_safe=True, 37 | keywords="stems audio reader", 38 | packages=find_packages(exclude=["tests"]), 39 | # Dependencies, this installs the entire Python scientific 40 | # computations stack 41 | install_requires=["numpy>=1.6", "ffmpeg-python>=0.2.0"], 42 | extras_require={ 43 | "tests": [ 44 | "pytest", 45 | ], 46 | }, 47 | entry_points={ 48 | "console_scripts": [ 49 | "stem2files=stempeg.cli:cli", 50 | ] 51 | }, 52 | project_urls={ # Optional 53 | "Bug Reports": "https://github.com/faroit/stempeg/issues", 54 | }, 55 | include_package_data=True, 56 | ) 57 | -------------------------------------------------------------------------------- /stempeg/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | """ 3 | Stempeg is a python package to read and write [STEM](https://www.native-instruments.com/en/specials/stems/) files. 4 | Technically, stems are audio containers that combine multiple audio streams and metadata in a single audio file. This makes it ideal to playback multitrack audio, where users can select the audio sub-stream during playback (e.g. supported by VLC). 5 | 6 | Under the hood, _stempeg_ uses [ffmpeg](https://www.ffmpeg.org/) for reading and writing multistream audio, optionally [MP4Box](https://github.com/gpac/gpac) is used to create STEM files that are compatible with Native Instruments hardware and software. 7 | 8 | - `stempeg.read`: reading audio tensors and metadata. 9 | - `stempeg.write`: writing audio tensors. 10 | 11 | ![stempeg_scheme](https://user-images.githubusercontent.com/72940/102477776-16960a00-405d-11eb-9389-1ea9263cf99d.png) 12 | 13 | Please checkout [the Github repository](https://github.com/faroit/stempeg) for more information. 14 | """ 15 | 16 | from .read import read_stems 17 | from .read import Info 18 | from .read import StreamsReader, ChannelsReader 19 | from .write import write_stems 20 | from .write import write_audio 21 | from .write import FilesWriter, StreamsWriter, ChannelsWriter, NIStemsWriter 22 | 23 | from .cmds import check_available_aac_encoders 24 | 25 | import re 26 | import os 27 | import subprocess as sp 28 | from os import path as op 29 | import argparse 30 | import pkg_resources 31 | 32 | __version__ = "0.2.2" 33 | 34 | 35 | def example_stem_path(): 36 | """Get the path to an included stem file. 37 | 38 | Returns 39 | ------- 40 | filename : str 41 | Path to the stem file 42 | """ 43 | return pkg_resources.resource_filename( 44 | __name__, 45 | 'data/The Easton Ellises - Falcon 69.stem.mp4' 46 | ) 47 | 48 | 49 | def default_metadata(): 50 | """Get the path to included stems metadata. 51 | 52 | Returns 53 | ------- 54 | filename : str 55 | Path to the json file 56 | """ 57 | return pkg_resources.resource_filename( 58 | __name__, 59 | 'data/default_metadata.json' 60 | ) 61 | 62 | 63 | def ffmpeg_version(): 64 | """Returns the available ffmpeg version 65 | 66 | Returns 67 | ---------- 68 | version : str 69 | version number as string 70 | """ 71 | 72 | cmd = [ 73 | 'ffmpeg', 74 | '-version' 75 | ] 76 | 77 | output = sp.check_output(cmd) 78 | aac_codecs = [ 79 | x for x in 80 | output.splitlines() if "ffmpeg version " in str(x) 81 | ][0] 82 | hay = aac_codecs.decode('ascii') 83 | match = re.findall(r'ffmpeg version \w?(\d+\.)?(\d+\.)?(\*|\d+)', hay) 84 | if match: 85 | return "".join(match[0]) 86 | else: 87 | return None 88 | 89 | 90 | -------------------------------------------------------------------------------- /stempeg/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from . import __version__ 3 | 4 | from .read import Info, read_stems 5 | from .write import write_stems 6 | from .write import FilesWriter 7 | 8 | from os import path as op 9 | import os 10 | 11 | 12 | def cli(inargs=None): 13 | """ 14 | Commandline interface for receiving stem files 15 | """ 16 | 17 | parser = argparse.ArgumentParser() 18 | 19 | parser.add_argument( 20 | '--version', '-V', 21 | action='version', 22 | version='%%(prog)s %s' % __version__ 23 | ) 24 | 25 | parser.add_argument( 26 | 'filename', 27 | metavar="filename", 28 | help="Input STEM file" 29 | ) 30 | 31 | parser.add_argument( 32 | '--extension', 33 | metavar='extension', 34 | type=str, 35 | default='.wav', 36 | help="Output extension" 37 | ) 38 | 39 | parser.add_argument( 40 | '--id', 41 | metavar='id', 42 | type=int, 43 | nargs='+', 44 | help="A list of stem_ids" 45 | ) 46 | 47 | parser.add_argument( 48 | '-s', 49 | type=float, 50 | nargs='?', 51 | help="start offset in seconds" 52 | ) 53 | 54 | parser.add_argument( 55 | '-t', 56 | type=float, 57 | nargs='?', 58 | help="read duration" 59 | ) 60 | 61 | parser.add_argument( 62 | 'outdir', 63 | metavar='outdir', 64 | nargs='?', 65 | help="Output folder" 66 | ) 67 | 68 | args = parser.parse_args(inargs) 69 | stem2files( 70 | args.filename, 71 | args.outdir, 72 | args.extension, 73 | args.id, 74 | args.s, 75 | args.t 76 | ) 77 | 78 | 79 | def stem2files( 80 | stems_file, 81 | outdir=None, 82 | extension="wav", 83 | idx=None, 84 | start=None, 85 | duration=None, 86 | ): 87 | info = Info(stems_file) 88 | S, sr = read_stems(stems_file, stem_id=idx, start=start, duration=duration) 89 | 90 | rootpath, filename = op.split(stems_file) 91 | 92 | basename = op.splitext(filename)[0] 93 | if ".stem" in basename: 94 | basename = basename.split(".stem")[0] 95 | 96 | if outdir is not None: 97 | if not op.exists(outdir): 98 | os.makedirs(outdir) 99 | 100 | rootpath = outdir 101 | 102 | if len(set(info.title_streams)) == len(info.title_streams): 103 | # titles contain duplicates 104 | # lets not use the metadata 105 | stem_names = info.title_streams 106 | else: 107 | stem_names = None 108 | 109 | write_stems( 110 | (op.join(rootpath, basename), extension), 111 | S, 112 | sample_rate=sr, 113 | writer=FilesWriter( 114 | multiprocess=True, 115 | output_sample_rate=sr, 116 | stem_names=stem_names 117 | ) 118 | ) 119 | -------------------------------------------------------------------------------- /stempeg/cmds.py: -------------------------------------------------------------------------------- 1 | import re 2 | import subprocess as sp 3 | import logging 4 | 5 | FFMPEG_PATH = None 6 | FFPROBE_PATH = None 7 | MP4BOX_PATH = None 8 | 9 | 10 | def find_cmd(cmd): 11 | try: 12 | from shutil import which 13 | return which(cmd) 14 | except ImportError: 15 | import os 16 | for path in os.environ["PATH"].split(os.pathsep): 17 | if os.access(os.path.join(path, cmd), os.X_OK): 18 | return path 19 | 20 | return None 21 | 22 | 23 | def ffmpeg_and_ffprobe_exists(): 24 | global FFMPEG_PATH, FFPROBE_PATH 25 | if FFMPEG_PATH is None: 26 | FFMPEG_PATH = find_cmd("ffmpeg") 27 | 28 | if FFPROBE_PATH is None: 29 | FFPROBE_PATH = find_cmd("ffprobe") 30 | 31 | return FFMPEG_PATH is not None and FFPROBE_PATH is not None 32 | 33 | 34 | def mp4box_exists(): 35 | global MP4BOX_PATH 36 | if MP4BOX_PATH is None: 37 | MP4BOX_PATH = find_cmd("MP4Box") 38 | 39 | return MP4BOX_PATH is not None 40 | 41 | 42 | if not ffmpeg_and_ffprobe_exists(): 43 | raise RuntimeError( 44 | 'ffmpeg or ffprobe could not be found! ' 45 | 'Please install them before using stempeg. ' 46 | 'See: https://github.com/faroit/stempeg' 47 | ) 48 | 49 | 50 | def check_available_aac_encoders(): 51 | """Returns the available AAC encoders 52 | 53 | Returns: 54 | list(str): List of available encoder codecs from ffmpeg 55 | 56 | """ 57 | cmd = [ 58 | FFMPEG_PATH, 59 | '-v', 'error', 60 | '-codecs' 61 | ] 62 | 63 | output = sp.check_output(cmd) 64 | aac_codecs = [ 65 | x for x in 66 | output.splitlines() if "AAC (Advanced Audio Coding)" in str(x) 67 | ][0] 68 | hay = aac_codecs.decode('ascii') 69 | match = re.findall(r'\(encoders: ([^\)]*) \)', hay) 70 | if match: 71 | return match[0].split(" ") 72 | else: 73 | return None 74 | 75 | 76 | def get_aac_codec(): 77 | """Checks codec and warns if `libfdk_aac` codec 78 | is not available. 79 | 80 | Returns: 81 | str: ffmpeg aac codec name 82 | """ 83 | avail = check_available_aac_encoders() 84 | if avail is not None: 85 | if 'libfdk_aac' in avail: 86 | codec = 'libfdk_aac' 87 | else: 88 | logging.warning( 89 | "For the better audio quality, install `libfdk_aac` codec." 90 | ) 91 | codec = 'aac' 92 | else: 93 | codec = 'aac' 94 | 95 | return codec 96 | -------------------------------------------------------------------------------- /stempeg/data/The Easton Ellises - Falcon 69.stem.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/faroit/stempeg/a612e0436dfcb4219c36d818e06a391c65c485fa/stempeg/data/The Easton Ellises - Falcon 69.stem.mp4 -------------------------------------------------------------------------------- /stempeg/data/default_metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "mastering_dsp": { 3 | "compressor": { 4 | "ratio": 3, 5 | "output_gain": 0.5, 6 | "enabled": false, 7 | "release": 0.300000011920929, 8 | "attack": 0.003000000026077032, 9 | "input_gain": 0.5, 10 | "threshold": 0, 11 | "hp_cutoff": 300, 12 | "dry_wet": 50 13 | }, 14 | "limiter": { 15 | "release": 0.05000000074505806, 16 | "threshold": 0, 17 | "ceiling": -0.3499999940395355, 18 | "enabled": false 19 | } 20 | }, 21 | "version": 1, 22 | "stems": [ 23 | { 24 | "color": "#009E73", 25 | "name": "drums" 26 | }, 27 | { 28 | "color": "#D55E00", 29 | "name": "bass" 30 | }, 31 | { 32 | "color": "#CC79A7", 33 | "name": "other" 34 | }, 35 | { 36 | "color": "#56B4E9", 37 | "name": "vocals" 38 | } 39 | ] 40 | } -------------------------------------------------------------------------------- /stempeg/read.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | """ 3 | Writing module to load stems into numpy tensors. 4 | 5 | 6 | """ 7 | from stempeg.write import FilesWriter 8 | import numpy as np 9 | import warnings 10 | import ffmpeg 11 | import pprint 12 | from multiprocessing import Pool 13 | import atexit 14 | from functools import partial 15 | import datetime as dt 16 | 17 | class Reader(object): 18 | """Base class for reader 19 | 20 | Holds reader options 21 | """ 22 | 23 | def __init__(self): 24 | pass 25 | 26 | 27 | class StreamsReader(Reader): 28 | """Holding configuration for streams 29 | 30 | This is the default reader. Nothing to be hold 31 | """ 32 | 33 | def __init__(self): 34 | pass 35 | 36 | 37 | class ChannelsReader(Reader): 38 | """Using multichannels to multiplex to stems 39 | 40 | stems will be extracted from multichannel-pairs 41 | e.g. 8 channels will be converted to 4 stereo pairs 42 | 43 | 44 | Args: 45 | from_channels: int 46 | number of channels, defaults to `2`. 47 | """ 48 | 49 | def __init__(self, nb_channels=2): 50 | self.nb_channels = nb_channels 51 | 52 | 53 | def _read_ffmpeg( 54 | filename, 55 | sample_rate, 56 | channels, 57 | start, 58 | duration, 59 | dtype, 60 | ffmpeg_format, 61 | stem_idx 62 | ): 63 | """Loading data using ffmpeg and numpy 64 | 65 | Args: 66 | filename (str): filename path 67 | sample_rate (int): sample rate 68 | channels (int): metadata info object needed to 69 | know the channel configuration in advance 70 | start (float): start position in seconds 71 | duration (float): duration in seconds 72 | dtype (numpy.dtype): Type of audio array to be casted into 73 | stem_idx (int): stream id 74 | ffmpeg_format (str): ffmpeg intermediate format encoding. 75 | Choose "f32le" for best compatibility 76 | 77 | Returns: 78 | (array_like): numpy audio array 79 | """ 80 | output_kwargs = {'format': ffmpeg_format, 'ar': sample_rate} 81 | if duration is not None: 82 | output_kwargs['t'] = str(dt.timedelta(seconds=duration)) 83 | if start is not None: 84 | output_kwargs['ss'] = str(dt.timedelta(seconds=start)) 85 | 86 | output_kwargs['map'] = '0:' + str(stem_idx) 87 | process = ( 88 | ffmpeg 89 | .input(filename) 90 | .output('pipe:', **output_kwargs) 91 | .run_async(pipe_stdout=True, pipe_stderr=True)) 92 | buffer, _ = process.communicate() 93 | 94 | # decode to raw pcm format 95 | if ffmpeg_format == "f64le": 96 | # PCM 64 bit float 97 | numpy_dtype = '>> audio, sample_rate = stempeg.read_stems("test.stem.mp4") 190 | >>> audio.shape 191 | [5, 220500, 2] 192 | >>> sample_rate 193 | 44100 194 | """ 195 | if multiprocess: 196 | _pool = Pool() 197 | atexit.register(_pool.close) 198 | else: 199 | _pool = None 200 | 201 | if not isinstance(filename, str): 202 | filename = filename.decode() 203 | 204 | # use ffprobe to get info object (samplerate, lengths) 205 | try: 206 | if info is None: 207 | metadata = Info(filename) 208 | else: 209 | metadata = info 210 | 211 | ffmpeg.probe(filename) 212 | except ffmpeg._run.Error as e: 213 | raise Warning( 214 | 'An error occurs with ffprobe (see ffprobe output below)\n\n{}' 215 | .format(e.stderr.decode())) 216 | 217 | # check number of audio streams in file 218 | if 'streams' not in metadata.info or metadata.nb_audio_streams == 0: 219 | raise Warning('No audio stream found.') 220 | 221 | # using ChannelReader would ignore substreams 222 | if isinstance(reader, ChannelsReader): 223 | if metadata.nb_audio_streams != 1: 224 | raise Warning( 225 | 'stempeg.ChannelsReader() only processes the first substream.' 226 | ) 227 | else: 228 | if metadata.audio_streams[0][ 229 | 'channels' 230 | ] % reader.nb_channels != 0: 231 | raise Warning('Stems should be encoded as multi-channel.') 232 | else: 233 | substreams = 0 234 | else: 235 | if stem_id is not None: 236 | substreams = stem_id 237 | else: 238 | substreams = metadata.audio_stream_idx() 239 | 240 | if not isinstance(substreams, list): 241 | substreams = [substreams] 242 | 243 | # if not, get sample rate from mixture 244 | if sample_rate is None: 245 | sample_rate = metadata.sample_rate(0) 246 | 247 | _chans = metadata.channels_streams 248 | # check if all substreams have the same number of channels 249 | if len(set(_chans)) == 1: 250 | channels = min(_chans) 251 | else: 252 | raise RuntimeError("Stems do not have the same number of channels per substream") 253 | 254 | # set channels to minimum channel per stream 255 | stems = [] 256 | 257 | if _pool: 258 | results = _pool.map_async( 259 | partial( 260 | _read_ffmpeg, 261 | filename, 262 | sample_rate, 263 | channels, 264 | start, 265 | duration, 266 | dtype, 267 | ffmpeg_format 268 | ), 269 | substreams, 270 | callback=stems.extend 271 | ) 272 | results.wait() 273 | _pool.terminate() 274 | else: 275 | stems = [ 276 | _read_ffmpeg( 277 | filename, 278 | sample_rate, 279 | channels, 280 | start, 281 | duration, 282 | dtype, 283 | ffmpeg_format, 284 | stem_idx 285 | ) 286 | for stem_idx in substreams 287 | ] 288 | stem_durations = np.array([t.shape[0] for t in stems]) 289 | if not (stem_durations == stem_durations[0]).all(): 290 | warnings.warning("Stems differ in length and were shortend") 291 | min_length = np.min(stem_durations) 292 | stems = [t[:min_length, :] for t in stems] 293 | 294 | # aggregate list of stems to numpy tensor 295 | stems = np.array(stems) 296 | 297 | # If ChannelsReader is used, demultiplex from channels 298 | if isinstance(reader, (ChannelsReader)) and stems.shape[-1] > 1: 299 | stems = stems.transpose(1, 0, 2) 300 | stems = stems.reshape( 301 | stems.shape[0], stems.shape[1], -1, reader.nb_channels 302 | ) 303 | stems = stems.transpose(2, 0, 3, 1)[..., 0] 304 | 305 | if not always_3d: 306 | stems = np.squeeze(stems) 307 | return stems, sample_rate 308 | 309 | 310 | class Info(object): 311 | """Audio properties that hold a number of metadata. 312 | 313 | The object is created when can be used when `read_stems` is called. 314 | This is can be passed, to `read_stems` to reduce loading time. 315 | """ 316 | 317 | def __init__(self, filename): 318 | super(Info, self).__init__() 319 | self.info = ffmpeg.probe(filename) 320 | self.audio_streams = [ 321 | stream for stream in self.info['streams'] 322 | if stream['codec_type'] == 'audio' 323 | ] 324 | 325 | @property 326 | def nb_audio_streams(self): 327 | """Returns the number of audio substreams""" 328 | return len(self.audio_streams) 329 | 330 | @property 331 | def nb_samples_streams(self): 332 | """Returns a list of number of samples for each substream""" 333 | return [self.samples(k) for k, stream in enumerate(self.audio_streams)] 334 | 335 | @property 336 | def channels_streams(self): 337 | """Returns the number of channels per substream""" 338 | return [ 339 | self.channels(k) for k, stream in enumerate(self.audio_streams) 340 | ] 341 | 342 | @property 343 | def duration_streams(self): 344 | """Returns a list of durations (in s) for all substreams""" 345 | return [ 346 | self.duration(k) for k, stream in enumerate(self.audio_streams) 347 | ] 348 | 349 | @property 350 | def title_streams(self): 351 | """Returns stream titles for all substreams""" 352 | return [ 353 | stream['tags'].get('handler_name') 354 | for stream in self.audio_streams 355 | ] 356 | 357 | def audio_stream_idx(self): 358 | """Returns audio substream indices""" 359 | return [s['index'] for s in self.audio_streams] 360 | 361 | def samples(self, idx): 362 | """Returns the number of samples for a stream index""" 363 | return int(self.audio_streams[idx]['duration_ts']) 364 | 365 | def duration(self, idx): 366 | """Returns the duration (in seconds) for a stream index""" 367 | return float(self.audio_streams[idx]['duration']) 368 | 369 | def title(self, idx): 370 | """Return the `handler_name` metadata for a given stream index""" 371 | return self.audio_streams[idx]['tags']['handler_name'] 372 | 373 | def rate(self, idx): 374 | # deprecated from older stempeg version 375 | return self.sample_rate(idx) 376 | 377 | def sample_rate(self, idx): 378 | """Return sample rate for a given substream""" 379 | return int(self.audio_streams[idx]['sample_rate']) 380 | 381 | def channels(self, idx): 382 | """Returns the number of channels for a gvien substream""" 383 | return int(self.audio_streams[idx]['channels']) 384 | 385 | def __repr__(self): 386 | """Print stream information""" 387 | return pprint.pformat(self.audio_streams) 388 | -------------------------------------------------------------------------------- /stempeg/test.sh: -------------------------------------------------------------------------------- 1 | parallel -j 1 --bar CUDA_VISIBLE_DEVICES=0 python train.py \ 2 | --root /mnt/disks/tdb/tdbva \ 3 | --model-output gs://umx-pro-results/umx-pro-lr{1}-wd{2} \ 4 | --log-output gs://umx-pro-logs \ 5 | --dataset custom \ 6 | --log-steps 500 \ 7 | --batch-size 16 \ 8 | --targets vocals \ 9 | --sources vocals other \ 10 | --seed 42 \ 11 | --augmentations gain \ 12 | --nb-parallel-calls 1 \ 13 | --lr-decay-patience 20000 \ 14 | --patience 20000 \ 15 | --statistics-steps 500 \ 16 | --hidden-size 1024 \ 17 | --steps 20000 \ 18 | --lr {1} \ 19 | --weight-decay {2} \ 20 | ::: 0.001 0.004 0.008 0.012 0.016 0.020 \ 21 | ::: 0.00001 \ -------------------------------------------------------------------------------- /stempeg/write.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | """ 3 | Writing module to save stems to disk. 4 | 5 | """ 6 | 7 | import base64 8 | import json 9 | import logging 10 | import tempfile as tmp 11 | import warnings 12 | from itertools import chain 13 | from multiprocessing import Pool 14 | from pathlib import Path 15 | import subprocess as sp 16 | import atexit 17 | 18 | import ffmpeg 19 | import numpy as np 20 | 21 | import stempeg 22 | 23 | from .cmds import FFMPEG_PATH, mp4box_exists, get_aac_codec, find_cmd 24 | 25 | 26 | def _build_channel_map(nb_stems, nb_channels, stem_names=None): 27 | """Creates an ffmpeg complex filter string 28 | 29 | The filter is designed to multiplex multiple stems into 30 | multiple channels. 31 | 32 | In the case of single channel stems a filter is created that maps 33 | nb_channels = nb_stems 34 | In the case of stereo stems, the filter maps 35 | nb_channels = nb_stems * 2 36 | 37 | Args: 38 | nb_stems: int 39 | Number of stems. 40 | nb_channels: int 41 | Number of channels. 42 | stem_names: list(str) 43 | List of stem names, should match number of stems. 44 | 45 | Returns: 46 | complex_filter: str 47 | """ 48 | 49 | if stem_names is None: 50 | stem_names = [ 51 | "Stem_" % str(i + 1) for i in range(nb_stems) 52 | ] 53 | 54 | if nb_stems != len(stem_names): 55 | raise RuntimeError("Please provide a stem names for each stream") 56 | 57 | if nb_channels == 1: 58 | return ( 59 | [ 60 | '-filter_complex', 61 | # set merging 62 | ';'.join( 63 | "[a:0]pan=mono| c0=c%d[a%d]" % ( 64 | idx, idx 65 | ) 66 | for idx in range(nb_stems) 67 | ), 68 | ] 69 | ) + list( 70 | chain.from_iterable( 71 | [ 72 | [ 73 | '-map', 74 | "[a%d]" % idx, 75 | # add title tag (e.g. displayed by VLC) 76 | "-metadata:s:a:%d" % idx, 77 | "title=%s" % stem_names[idx], 78 | # add handler tag (e.g. read by ffmpeg < 4.1) 79 | "-metadata:s:a:%d" % idx, 80 | "handler=%s" % stem_names[idx], 81 | # add handler tag for ffmpeg >= 4.1 82 | "-metadata:s:a:%d" % idx, 83 | "handler_name=%s" % stem_names[idx] 84 | ] 85 | for idx in range(nb_stems) 86 | ] 87 | ) 88 | ) 89 | elif nb_channels == 2: 90 | return ( 91 | [ 92 | '-filter_complex', 93 | # set merging 94 | ';'.join( 95 | "[a:0]pan=stereo| c0=c%d | c1=c%d[a%d]" % ( 96 | idx * 2, 97 | idx * 2 + 1, 98 | idx 99 | ) 100 | for idx in range(nb_stems) 101 | ), 102 | ] 103 | ) + list( 104 | chain.from_iterable( 105 | [ 106 | [ 107 | '-map', 108 | "[a%d]" % idx, 109 | # add title tag (e.g. displayed by VLC) 110 | "-metadata:s:a:%d" % idx, 111 | "title=%s" % stem_names[idx], 112 | # add handler tag (e.g. read by ffmpeg -i) 113 | "-metadata:s:a:%d" % idx, 114 | "handler=%s" % stem_names[idx], 115 | # add handler tag for ffmpeg >= 4.1 116 | "-metadata:s:a:%d" % idx, 117 | "handler_name=%s" % stem_names[idx] 118 | ] 119 | for idx in range(nb_stems) 120 | ] 121 | ) 122 | ) 123 | else: 124 | raise NotImplementedError("Stempeg only support mono or stereo stems") 125 | 126 | 127 | class Writer(object): 128 | """Base template class for writer 129 | 130 | Takes tensor and writes back to disk 131 | """ 132 | 133 | def __init__(self): 134 | pass 135 | 136 | def __call__(self, data, path, sample_rate): 137 | """forward path 138 | 139 | Args: 140 | data (array): stems tensor of shape `(stems, samples, channel)` 141 | path (str): path with extension 142 | sample_rate (float): audio sample rate 143 | """ 144 | pass 145 | 146 | 147 | class FilesWriter(Writer): 148 | r"""Save Stems as multiple files 149 | 150 | Takes stems tensor and write into multiple files. 151 | 152 | Args: 153 | codec: str 154 | Specifies ffmpeg codec being used. Defaults to `None` which 155 | automatically selects default codec for each container 156 | bitrate: int, optional 157 | Bitrate in Bits per second. Defaults to `None` 158 | output_sample_rate: float, optional 159 | Optionally, applies resampling, if different to `sample_rate`. 160 | Defaults to `None` which `sample_rate`. 161 | stem_names: List(str) 162 | List of stem names to be used for writing. Defaults to `None` which 163 | results in stem names to be enumerated: `['Stem_1', 'Stem_2', ...]` 164 | multiprocess: bool 165 | Enable multiprocessing when writing files. 166 | Can speed up writing of large files. Defaults to `False`. 167 | synchronous bool: 168 | Write multiprocessed synchronous. Defaults to `True`. 169 | """ 170 | def __init__( 171 | self, 172 | codec=None, 173 | bitrate=None, 174 | output_sample_rate=44100, 175 | stem_names=None, 176 | multiprocess=False, 177 | synchronous=True 178 | ): 179 | self.codec = codec 180 | self.bitrate = bitrate 181 | self.output_sample_rate = output_sample_rate 182 | self.stem_names = stem_names 183 | self.synchronous = synchronous 184 | if multiprocess: 185 | self._pool = Pool() 186 | atexit.register(self._pool.close) 187 | else: 188 | self._pool = None 189 | self._tasks = [] 190 | 191 | def join(self, timeout=200): 192 | """Wait for all pending tasks to be finished. 193 | 194 | Args: 195 | timeout (Optional): int 196 | task waiting timeout. 197 | """ 198 | while len(self._tasks) > 0: 199 | task = self._tasks.pop() 200 | task.get() 201 | task.wait(timeout=timeout) 202 | 203 | def __call__( 204 | self, 205 | data, 206 | path, 207 | sample_rate 208 | ): 209 | """ 210 | Args: 211 | data: array_like 212 | stems tensor of shape `(stems, samples, channel)` 213 | path: str or tuple(str, str) 214 | path with extension of output folder. Note that the basename 215 | of the path will be ignored. Wildcard can be used. 216 | Example: `path=/stems/*.wav` writes 217 | `/stems/Stem_1.wav`, `/stems/Stem_2.wav` .. 218 | Alternatively a tuple can be used: 219 | Example: `path=("/stems", ".wav")` 220 | sample_rate: float 221 | audio sample rate 222 | """ 223 | nb_stems = data.shape[0] 224 | 225 | if self.output_sample_rate is None: 226 | self.output_sample_rate = sample_rate 227 | 228 | if self.stem_names is None: 229 | self.stem_names = ["Stem_" + str(k) for k in range(nb_stems)] 230 | 231 | for idx in range(nb_stems): 232 | if type(path) is tuple: 233 | stem_filepath = str(Path( 234 | path[0], self.stem_names[idx] + path[1] 235 | )) 236 | else: 237 | p = Path(path) 238 | stem_filepath = str(Path( 239 | p.parent, self.stem_names[idx] + p.suffix 240 | )) 241 | if self._pool: 242 | task = self._pool.apply_async( 243 | write_audio, 244 | ( 245 | stem_filepath, 246 | data[idx], 247 | sample_rate, 248 | self.output_sample_rate, 249 | self.codec, 250 | self.bitrate 251 | ) 252 | ) 253 | self._tasks.append(task) 254 | else: 255 | write_audio( 256 | path=stem_filepath, 257 | data=data[idx], 258 | sample_rate=sample_rate, 259 | output_sample_rate=self.output_sample_rate, 260 | codec=self.codec, 261 | bitrate=self.bitrate 262 | ) 263 | if self.synchronous and self._pool: 264 | self.join() 265 | 266 | 267 | class ChannelsWriter(Writer): 268 | """Write stems using multichannel audio 269 | 270 | This Writer multiplexes stems into channels. Note, that 271 | the used container would need support for multichannel audio. 272 | E.g. `wav` works but `mp3` won't. 273 | 274 | Args: 275 | codec (str): Specifies ffmpeg codec being used. 276 | Defaults to `None` which automatically selects default 277 | codec for each container 278 | bitrate (int): Bitrate in Bits per second. Defaults to None 279 | output_sample_rate (float, optional): Optionally, applies 280 | resampling, if different to `sample_rate`. 281 | Defaults to `None` which `sample_rate`. 282 | """ 283 | def __init__( 284 | self, 285 | codec=None, 286 | bitrate=None, 287 | output_sample_rate=None 288 | ): 289 | self.codec = codec 290 | self.bitrate = bitrate 291 | self.output_sample_rate = output_sample_rate 292 | 293 | def __call__( 294 | self, 295 | data, 296 | path, 297 | sample_rate 298 | ): 299 | """ 300 | For more than one stem, stems will be reshaped 301 | into the channel dimension, assuming we have 302 | stereo channels: 303 | (stems, samples, 2)->(nb_samples=samples, nb_channels=stems*2) 304 | mono channels: 305 | (stems, samples, 1)-> (nb_samples=samples, nb_channels=stems) 306 | 307 | Args: 308 | data (array): stems tensor of shape `(stems, samples, channel)`. 309 | path (str): path with extension. 310 | sample_rate (float): audio sample rate. 311 | """ 312 | # check output sample rate 313 | if self.output_sample_rate is None: 314 | self.output_sample_rate = sample_rate 315 | 316 | nb_stems, nb_samples, nb_channels = data.shape 317 | 318 | # (stems, samples, channels) -> (samples, stems, channels) 319 | data = data.transpose(1, 0, 2) 320 | # aggregate stem and channels 321 | data = data.reshape(nb_samples, -1) 322 | 323 | data = np.squeeze(data) 324 | write_audio( 325 | path=path, 326 | data=data, 327 | sample_rate=sample_rate, 328 | output_sample_rate=self.output_sample_rate, 329 | codec=self.codec, 330 | bitrate=self.bitrate 331 | ) 332 | 333 | 334 | class StreamsWriter(Writer): 335 | """Write stems using multi-stream audio. 336 | 337 | This writer saves the audio into a multistream format. Note, 338 | that the container needs to have support for multistream audio. 339 | E.g. supported formats are mp4, ogg. 340 | 341 | The `stem_names` are inserted into the metadata. 342 | Note that this writer converts to substreams using a 343 | temporary wav file written to disk. Therefore, writing can be slow. 344 | 345 | Args: 346 | codec (str): Specifies ffmpeg codec being used. 347 | Defaults to `None` which automatically selects default 348 | codec for each container 349 | bitrate (int): Bitrate in Bits per second. Defaults to None 350 | output_sample_rate (float): Optionally, applies 351 | resampling, if different to `sample_rate`. 352 | Defaults to `None` which `sample_rate`. 353 | stem_names (str): list of stem names that 354 | match the number of stems. 355 | """ 356 | def __init__( 357 | self, 358 | codec=None, 359 | bitrate=None, 360 | output_sample_rate=None, 361 | stem_names=None 362 | ): 363 | self.codec = codec 364 | self.bitrate = bitrate 365 | self.output_sample_rate = output_sample_rate 366 | self.stem_names = stem_names 367 | 368 | def __call__( 369 | self, 370 | data, 371 | path, 372 | sample_rate, 373 | ): 374 | """ 375 | Args: 376 | data (array): stems tensor of shape `(stems, samples, channel)` 377 | path (str): path with extension 378 | sample_rate (float): audio sample rate 379 | """ 380 | nb_stems, nb_samples, nb_channels = data.shape 381 | 382 | if self.output_sample_rate is None: 383 | self.output_sample_rate = sample_rate 384 | 385 | if self.stem_names is None: 386 | self.stem_names = ["Stem " + str(k) for k in range(nb_stems)] 387 | 388 | # (stems, samples, channels) -> (samples, stems, channels) 389 | data = data.transpose(1, 0, 2) 390 | # aggregate stem and channels 391 | data = data.reshape(nb_samples, -1) 392 | 393 | # stems as multistream file (real stems) 394 | # create temporary file and merge afterwards 395 | with tmp.NamedTemporaryFile(suffix='.wav') as tempfile: 396 | # write audio to temporary file 397 | write_audio( 398 | path=tempfile.name, 399 | data=data, 400 | sample_rate=sample_rate, 401 | output_sample_rate=self.output_sample_rate, 402 | codec='pcm_s16le' 403 | ) 404 | 405 | # check if path is available and creat it 406 | Path(path).parent.mkdir(parents=True, exist_ok=True) 407 | 408 | channel_map = _build_channel_map( 409 | nb_stems=nb_stems, 410 | nb_channels=nb_channels, 411 | stem_names=self.stem_names 412 | ) 413 | 414 | # convert tempfile to multistem file assuming 415 | # each stem occupies a pair of channels 416 | cmd = ( 417 | [ 418 | FFMPEG_PATH, 419 | '-y', 420 | '-acodec', 'pcm_s%dle' % (16), 421 | '-i', tempfile.name 422 | ] + channel_map + 423 | [ 424 | '-vn' 425 | ] + 426 | ( 427 | ['-c:a', self.codec] 428 | if (self.codec is not None) else [] 429 | ) + 430 | [ 431 | '-ar', "%d" % self.output_sample_rate, 432 | '-strict', '-2', 433 | '-loglevel', 'error' 434 | ] + 435 | ( 436 | [ 437 | '-ab', str(self.bitrate) 438 | ] if (self.bitrate is not None) else [] 439 | ) + 440 | [path] 441 | ) 442 | try: 443 | sp.check_call(cmd) 444 | except sp.CalledProcessError as err: 445 | raise RuntimeError(err) from None 446 | finally: 447 | tempfile.close() 448 | 449 | 450 | class NIStemsWriter(Writer): 451 | """Write stems using native instruments stems format 452 | 453 | This writer is similar to `StreamsWriter` except that certain defaults 454 | and metadata are adjusted to increase compatibility with Native Instruments 455 | Stems format. This writer should be used when users want to play back stems 456 | eg. using Traktor DJ. 457 | 458 | By definition, this format only supports _five_ audio streams where 459 | stream index 0 is the mixture. 460 | 461 | This writer creates intermediate temporary files, which can result in slow 462 | writing. Therefore, `StemsWriter` should be used in all cases where Traktor 463 | compatibility is not necessary. 464 | 465 | Process is originally created by Native Instrument as shown here: 466 | https://github.com/axeldelafosse/stemgen/blob/909d9422af0738457303962262f99072a808d0c1/ni-stem/_internal.py#L38 467 | 468 | Args: 469 | default_metadata (Dict): Metadata to be injected into the mp4 substream. 470 | Defaults to `stempeg.default_metadata()`. 471 | stems_metadata: List 472 | Set dictory of track names and colors 473 | `[{'name': str, 'color': str (hex)}, ...]` 474 | Defaults to `stempeg.default_metadata()['stems']`, which 475 | sets stem names to the following order: 476 | `['mixture', 'drums', 'bass', 'other', 'vocals']` 477 | codec: str 478 | Specifies ffmpeg codec being used. Defaults to `aac` and, 479 | for best quality, will try to use `libfdk_aac` if availability. 480 | bitrate: int 481 | Bitrate in Bits per second. Defaults to None 482 | output_sample_rate Optional: float 483 | Optionally, applies resampling, if different to `sample_rate`. 484 | Defaults to `None` which `sample_rate`. 485 | """ 486 | def __init__( 487 | self, 488 | default_metadata=None, 489 | stems_metadata=None, 490 | codec='aac', 491 | bitrate=256000, 492 | output_sample_rate=44100 493 | ): 494 | if not mp4box_exists(): 495 | raise RuntimeError( 496 | 'MP4Box could not be found! ' 497 | 'Please install them before using NIStemsWriter().' 498 | 'See: https://github.com/faroit/stempeg' 499 | ) 500 | self.mp4boxcli = find_cmd("MP4Box") 501 | self.bitrate = bitrate 502 | self.default_metadata = default_metadata 503 | self.stems_metadata = stems_metadata 504 | self.output_sample_rate = output_sample_rate 505 | self._suffix = '.m4a' # internal suffix for temporarly file 506 | if codec == 'aac': 507 | self.codec = get_aac_codec() 508 | else: 509 | self.codec = codec 510 | 511 | def __call__( 512 | self, 513 | data, 514 | path, 515 | sample_rate 516 | ): 517 | """ 518 | Args: 519 | data: array 520 | stems tensor of shape `(5, samples, channel)` 521 | path: str 522 | path with extension 523 | sample_rate: float 524 | audio sample rate 525 | """ 526 | if data.ndim != 3: 527 | raise RuntimeError("Please pass multiple stems") 528 | 529 | if data.shape[2] % 2 != 0: 530 | raise RuntimeError("Only stereo stems are supported") 531 | 532 | if data.shape[0] != 5: 533 | raise RuntimeError( 534 | "NI Stems requires 5 streams, where stream 0 is the mixture." 535 | ) 536 | 537 | if data.shape[1] % 1024 != 0: 538 | logging.warning( 539 | "Number of samples does not divide by 1024, be aware that " 540 | "the AAC encoder add silence to the input signal" 541 | ) 542 | 543 | # write m4a files to temporary folder 544 | with tmp.TemporaryDirectory() as tempdir: 545 | write_stems( 546 | Path(tempdir, 'tmp' + self._suffix), 547 | data, 548 | sample_rate=sample_rate, 549 | writer=FilesWriter( 550 | codec=self.codec, 551 | bitrate=self.bitrate, 552 | output_sample_rate=self.output_sample_rate, 553 | stem_names=[str(k) for k in range(data.shape[0])] 554 | ) 555 | ) 556 | # add metadata for NI compabtibility 557 | if self.default_metadata is None: 558 | with open(stempeg.default_metadata()) as f: 559 | metadata = json.load(f) 560 | else: 561 | metadata = self.default_metadata 562 | 563 | # replace stems metadata from dict 564 | if self.stems_metadata is not None: 565 | metadata['stems'] = self.stems_metadata 566 | 567 | callArgs = [self.mp4boxcli] 568 | callArgs.extend(["-add", str(Path(tempdir, '0.m4a#ID=Z')), path]) 569 | for s in range(1, data.shape[0]): 570 | callArgs.extend( 571 | [ 572 | "-add", 573 | str(Path( 574 | tempdir, 575 | str(s) + self._suffix + "#ID=Z:disable" 576 | )) 577 | ] 578 | ) 579 | callArgs.extend( 580 | [ 581 | '-brand', 'M4A:0', '-rb', 'isom', '-rb', 'iso2', 582 | "-udta", 583 | "0:type=stem:src=base64," + base64.b64encode( 584 | json.dumps(metadata).encode() 585 | ).decode(), 586 | "-quiet" 587 | ] 588 | ) 589 | try: 590 | sp.check_call(callArgs) 591 | except sp.CalledProcessError as err: 592 | raise RuntimeError(err) from None 593 | 594 | 595 | def write_audio( 596 | path, 597 | data, 598 | sample_rate=44100.0, 599 | output_sample_rate=None, 600 | codec=None, 601 | bitrate=None 602 | ): 603 | """Write multichannel audio from numpy tensor 604 | 605 | Audio writer for multi-channel but not multi-stream audio. 606 | Can be used directly, when stems are not required. 607 | 608 | Args: 609 | path (str): Output file name. 610 | Extension sets container (and default codec). 611 | data (array_like): Audio tensor. The data shape is formatted as 612 | `shape=(samples, channels)` or `(samples,)`. 613 | sample_rate (float): Samplerate. Defaults to 44100.0 Hz. 614 | output_sample_rate (float): Applies resampling, if different 615 | to `sample_rate`. Defaults to `None` which uses `sample_rate`. 616 | codec (str): Specifies ffmpeg codec being used. 617 | Defaults to `None` which automatically selects default 618 | codec for each container 619 | bitrate (int): Bitrate in Bits per second. Defaults to None 620 | """ 621 | 622 | # check if path is available and creat it 623 | Path(path).parent.mkdir(parents=True, exist_ok=True) 624 | 625 | if output_sample_rate is None: 626 | output_sample_rate = sample_rate 627 | 628 | if data.ndim == 1: 629 | nb_channels = 1 630 | elif data.ndim == 2: 631 | nb_channels = data.shape[-1] 632 | else: 633 | raise RuntimeError("Number of channels not supported") 634 | 635 | input_kwargs = {'ar': sample_rate, 'ac': nb_channels} 636 | output_kwargs = {'ar': output_sample_rate, 'strict': '-2'} 637 | if bitrate: 638 | output_kwargs['audio_bitrate'] = bitrate 639 | if codec is not None: 640 | output_kwargs['codec'] = codec 641 | process = ( 642 | ffmpeg 643 | .input('pipe:', format='f32le', **input_kwargs) 644 | .output(path, **output_kwargs) 645 | .overwrite_output() 646 | .run_async(pipe_stdin=True, pipe_stderr=True, quiet=True)) 647 | try: 648 | process.stdin.write(data.astype('>> stempeg.write_stems( 727 | >>> "test.stem.m4a", 728 | >>> data=stems, 729 | >>> sample_rate=44100.0 730 | >>> ) 731 | 732 | ## Example 2: Advanced Example 733 | 734 | Writing a dictionary as a bunch of MP3s, 735 | instead of a single file. 736 | We use `stempeg.FilesWriter`, outputs are named 737 | ["output/mix.mp3", "output/drums.mp3", ...], 738 | we pass `stem_names`; also apply multiprocessing. 739 | 740 | >>> stems = { 741 | >>> "mix": stems[0], "drums": stems[1], 742 | >>> "bass": stems[2], "other": stems[3], 743 | >>> "vocals": stems[4], 744 | >>> } 745 | >>> stempeg.write_stems( 746 | >>> ("output", ".mp3"), 747 | >>> stems, 748 | >>> sample_rate=rate, 749 | >>> writer=stempeg.FilesWriter( 750 | >>> multiprocess=True, 751 | >>> output_sample_rate=48000, 752 | >>> stem_names=["mix", "drums", "bass", "other", "vocals"] 753 | >>> ) 754 | >>> ) 755 | 756 | 757 | 758 | """ 759 | # check if ffmpeg installed 760 | if int(stempeg.ffmpeg_version()[0]) < 3: 761 | warnings.warning( 762 | "Writing stems with FFMPEG version < 3 is unsupported", 763 | UserWarning 764 | ) 765 | 766 | if isinstance(data, dict): 767 | keys = data.keys() 768 | values = data.values() 769 | data = np.array(list(values)) 770 | stem_names = list(keys) 771 | if not isinstance(writer, (ChannelsWriter)): 772 | writer.stem_names = stem_names 773 | 774 | if data.ndim != 3: 775 | raise RuntimeError("Input tensor dimension should be 3d") 776 | 777 | return writer( 778 | path=path, 779 | data=data, 780 | sample_rate=sample_rate 781 | ) 782 | -------------------------------------------------------------------------------- /tests/test_random.py: -------------------------------------------------------------------------------- 1 | import stempeg 2 | import numpy as np 3 | import pytest 4 | 5 | 6 | @pytest.fixture(params=[1024, 2048, 100000]) 7 | def nb_samples(request): 8 | return request.param 9 | 10 | 11 | def test_shape(nb_samples): 12 | R = np.random.random((5, nb_samples, 2)) 13 | stempeg.write_stems("./random.stem.m4a", R, writer=stempeg.StreamsWriter()) 14 | S, rate = stempeg.read_stems( 15 | "./random.stem.m4a" 16 | ) 17 | 18 | assert S.shape[0] == R.shape[0] 19 | assert S.shape[2] == R.shape[2] 20 | assert S.shape[1] % 1024 == 0 21 | -------------------------------------------------------------------------------- /tests/test_read.py: -------------------------------------------------------------------------------- 1 | import stempeg 2 | import numpy as np 3 | import pytest 4 | 5 | 6 | @pytest.fixture(params=[np.float16, np.float32, np.float64]) 7 | def dtype(request): 8 | return request.param 9 | 10 | 11 | @pytest.fixture(params=[None, 0, 0.0000001, 1]) 12 | def start(request): 13 | return request.param 14 | 15 | 16 | @pytest.fixture(params=[None, 0.00000001, 0.5, 2.00000000000001]) 17 | def duration(request): 18 | return request.param 19 | 20 | def test_stem_id(): 21 | S, _ = stempeg.read_stems(stempeg.example_stem_path()) 22 | for k in range(S.shape[0]): 23 | Sk, _ = stempeg.read_stems( 24 | stempeg.example_stem_path(), 25 | stem_id=k 26 | ) 27 | # test number of channels 28 | assert Sk.shape[-1] == 2 29 | # test dim 30 | assert Sk.ndim == 2 31 | 32 | 33 | def test_shape(): 34 | S, _ = stempeg.read_stems(stempeg.example_stem_path()) 35 | assert S.shape[0] == 5 36 | assert ((S.shape[1] % 1024) == 0 and S.shape[1] > 200000) 37 | assert S.shape[2] == 2 38 | 39 | 40 | def test_duration(start, duration): 41 | fp = stempeg.example_stem_path() 42 | info = stempeg.Info(fp) 43 | if start: 44 | if start < min(info.duration_streams): 45 | S, rate = stempeg.read_stems( 46 | fp, 47 | start=start, 48 | duration=duration 49 | ) 50 | if duration is not None: 51 | assert S.shape[1] == int(duration * rate) 52 | else: 53 | S, rate = stempeg.read_stems( 54 | fp, 55 | start=start, 56 | duration=duration 57 | ) 58 | if duration is not None: 59 | assert S.shape[1] == int(duration * rate) 60 | 61 | 62 | def test_outtype(dtype): 63 | S, rate = stempeg.read_stems( 64 | stempeg.example_stem_path(), 65 | dtype=dtype 66 | ) 67 | assert S.dtype == dtype 68 | 69 | 70 | @pytest.mark.parametrize( 71 | ("format", "path"), 72 | [ 73 | ("WAV", "http://samples.ffmpeg.org/A-codecs/wavpcm/madbear.wav"), 74 | pytest.param( 75 | "MP3", "http://samples.ffmpeg.org/A-codecs/MP3/Enrique.mp3", 76 | marks=pytest.mark.xfail 77 | ), 78 | pytest.param( 79 | "AAC", "http://samples.ffmpeg.org/A-codecs/AAC/ct_nero-heaac.mp4", 80 | marks=pytest.mark.xfail 81 | ), 82 | pytest.param( 83 | "OGG", "http://samples.ffmpeg.org/A-codecs/vorbis/ffvorbis_crash.ogm", 84 | marks=pytest.mark.xfail 85 | ), 86 | ], 87 | ) 88 | def test_ffmpeg_format(format, path): 89 | Sint, _ = stempeg.read_stems( 90 | path, 91 | dtype=np.float32, 92 | ffmpeg_format="s16le" 93 | ) 94 | 95 | Sfloat, _ = stempeg.read_stems( 96 | path, 97 | dtype=np.float32, 98 | ffmpeg_format="f32le" 99 | ) 100 | assert np.allclose(Sint, Sfloat) 101 | 102 | 103 | def test_info(): 104 | fp = stempeg.example_stem_path() 105 | info = stempeg.Info(fp) 106 | S, rate = stempeg.read_stems(fp, info=info) 107 | -------------------------------------------------------------------------------- /tests/test_write.py: -------------------------------------------------------------------------------- 1 | from stempeg.write import ChannelsWriter 2 | import stempeg 3 | import numpy as np 4 | import pytest 5 | import tempfile as tmp 6 | import subprocess as sp 7 | import json 8 | import os 9 | import codecs 10 | 11 | 12 | @pytest.fixture(params=[1, 4]) 13 | def nb_stems(request): 14 | return request.param 15 | 16 | 17 | @pytest.fixture(params=[1, 2]) 18 | def nb_channels(request): 19 | return request.param 20 | 21 | 22 | @pytest.fixture(params=[4096, 4096*10]) 23 | def nb_samples(request): 24 | return request.param 25 | 26 | 27 | @pytest.fixture 28 | def audio(request, nb_stems, nb_samples, nb_channels): 29 | return np.random.random((nb_stems, nb_samples, nb_channels)) 30 | 31 | 32 | @pytest.fixture(params=["m4a"]) 33 | def multistream_format(request): 34 | return request.param 35 | 36 | 37 | @pytest.fixture(params=["m4a", "wav", "flac"]) 38 | def multichannel_format(request): 39 | return request.param 40 | 41 | 42 | @pytest.fixture(params=["mp3", "m4a", "wav", "flac"]) 43 | def multifile_format(request): 44 | return request.param 45 | 46 | 47 | def test_multistream_containers(audio, multistream_format, nb_stems): 48 | if nb_stems > 1: 49 | with tmp.NamedTemporaryFile( 50 | delete=False, 51 | suffix='.' + multistream_format 52 | ) as tempfile: 53 | stem_names = [str(k) for k in range(nb_stems)] 54 | stempeg.write_stems( 55 | tempfile.name, 56 | audio, 57 | sample_rate=44100, 58 | writer=stempeg.StreamsWriter( 59 | codec='aac', 60 | stem_names=stem_names 61 | ) 62 | ) 63 | loaded_audio, rate = stempeg.read_stems( 64 | tempfile.name, 65 | always_3d=True 66 | ) 67 | assert audio.shape == loaded_audio.shape 68 | if multistream_format == "m4a": 69 | info = stempeg.Info(tempfile.name) 70 | loaded_stem_names = info.title_streams 71 | # check if titles could be extracted 72 | assert all( 73 | [a == b for a, b in zip(stem_names, loaded_stem_names)] 74 | ) 75 | 76 | 77 | def test_multichannel_containers(audio, nb_channels, multichannel_format): 78 | with tmp.NamedTemporaryFile( 79 | delete=False, 80 | suffix='.' + multichannel_format 81 | ) as tempfile: 82 | stempeg.write_stems( 83 | tempfile.name, 84 | audio, 85 | sample_rate=44100, 86 | writer=ChannelsWriter() 87 | ) 88 | loaded_audio, rate = stempeg.read_stems( 89 | tempfile.name, 90 | always_3d=True, 91 | reader=stempeg.ChannelsReader(nb_channels=nb_channels) 92 | ) 93 | assert audio.shape == loaded_audio.shape 94 | 95 | 96 | def test_multifileformats(audio, multifile_format, nb_stems): 97 | with tmp.NamedTemporaryFile( 98 | delete=False, 99 | suffix='.' + multifile_format 100 | ) as tempfile: 101 | stem_names = [str(k) for k in range(nb_stems)] 102 | stempeg.write_stems( 103 | tempfile.name, 104 | audio, 105 | sample_rate=44100, 106 | writer=stempeg.FilesWriter(stem_names=stem_names) 107 | ) 108 | 109 | 110 | def test_channels(audio, multichannel_format): 111 | if audio.ndim == 1: 112 | with tmp.NamedTemporaryFile( 113 | delete=False, 114 | suffix='.' + multichannel_format 115 | ) as tempfile: 116 | stempeg.write_audio(tempfile.name, audio, sample_rate=44100) 117 | loaded_audio, rate = stempeg.read_stems( 118 | tempfile.name, 119 | ) 120 | assert audio.shape == loaded_audio.shape 121 | 122 | 123 | def test_stereo(audio, multifile_format): 124 | if audio.ndim == 2: 125 | with tmp.NamedTemporaryFile( 126 | delete=False, 127 | suffix='.' + multifile_format 128 | ) as tempfile: 129 | stempeg.write_audio(tempfile.name, audio, sample_rate=44100) 130 | loaded_audio, rate = stempeg.read_stems( 131 | tempfile.name, 132 | always_3d=True, 133 | ) 134 | assert audio.shape == loaded_audio.shape 135 | 136 | 137 | # write multistream as wav, which doesn't support it 138 | @pytest.mark.xfail 139 | def test_ffmpeg_errors(audio): 140 | if audio.ndim == 3: 141 | with pytest.raises(RuntimeError): 142 | with tmp.NamedTemporaryFile( 143 | delete=False, 144 | suffix='.wav' 145 | ) as tempfile: 146 | stempeg.write_stems( 147 | tempfile.name, 148 | audio, 149 | sample_rate=44100, 150 | writer=stempeg.StreamsWriter() 151 | ) 152 | 153 | 154 | def ordered(obj): 155 | if isinstance(obj, dict): 156 | return sorted((k, ordered(v)) for k, v in obj.items()) 157 | if isinstance(obj, list): 158 | return sorted(ordered(x) for x in obj) 159 | else: 160 | return obj 161 | 162 | @pytest.mark.optional 163 | def test_nistems(): 164 | mp4exc = stempeg.cmds.find_cmd("MP4Box") 165 | 166 | stems, rate = stempeg.read_stems(stempeg.example_stem_path()) 167 | with tmp.NamedTemporaryFile( 168 | delete=False, 169 | suffix='.m4a' 170 | ) as tempfile: 171 | 172 | stempeg.write_stems( 173 | tempfile.name, 174 | stems, 175 | sample_rate=rate, 176 | writer=stempeg.NIStemsWriter() 177 | ) 178 | callArgs = [mp4exc] 179 | callArgs.extend(["-dump-udta", "0:stem", tempfile.name]) 180 | sp.check_call(callArgs) 181 | 182 | root, ext = os.path.splitext(tempfile.name) 183 | udtaFile = root + "_stem.udta" 184 | with open(stempeg.default_metadata()) as f: 185 | d_metadata = json.load(f) 186 | 187 | try: 188 | fileObj = codecs.open(udtaFile, encoding="utf-8") 189 | fileObj.seek(8) 190 | l_metadata = json.load(fileObj) 191 | except json.decoder.JSONDecodeError: 192 | with open(udtaFile) as json_file: 193 | l_metadata = json.load(json_file) 194 | 195 | assert ordered(l_metadata) == ordered(d_metadata) 196 | --------------------------------------------------------------------------------