├── .github └── workflows │ ├── build.yml │ ├── lint.yml │ └── test.yml ├── .gitignore ├── .ocamlformat ├── LICENSE ├── README.md ├── bench ├── read │ ├── bench.py │ ├── bench_read.sh │ ├── dune │ ├── mp3.svg │ ├── perf.ml │ └── wav.svg └── stft │ ├── dune │ ├── perf.ml │ ├── perf.py │ ├── stft_comparison_float32.svg │ ├── stft_comparison_float64.svg │ └── wav_stereo_44100hz_1s.wav ├── doc ├── dune └── index.mld ├── dune-project ├── soundml.opam ├── soundml_logo.svg ├── src ├── aformat.ml ├── aformat.mli ├── audio.ml ├── audio.mli ├── dune ├── effects │ ├── dune │ ├── filter │ │ ├── filter.ml │ │ ├── filter.mli │ │ ├── fir.ml │ │ ├── fir.mli │ │ ├── highpass.ml │ │ ├── highpass.mli │ │ ├── iir.ml │ │ ├── iir.mli │ │ ├── lowpass.ml │ │ └── lowpass.mli │ └── time │ │ ├── config │ │ ├── discover.ml │ │ └── dune │ │ ├── rubberband_stubs.cxx │ │ ├── time.ml │ │ └── time.mli ├── feature │ ├── dune │ └── spectral │ │ ├── spectral.ml │ │ └── spectral.mli ├── filterbank.ml ├── filterbank.mli ├── io │ ├── cio │ │ ├── README.md │ │ ├── common.hxx │ │ ├── read.hxx │ │ └── write.hxx │ ├── config │ │ ├── discover.ml │ │ └── dune │ ├── dune │ ├── io.ml │ ├── io.mli │ └── soundml_io.cxx ├── soundml.ml ├── transform.ml ├── transform.mli ├── types.ml ├── types.mli ├── utils.ml ├── utils.mli ├── window.ml └── window.mli └── test ├── README.md ├── dune ├── generate_audio.sh ├── generate_vectors.py ├── test_aformat.ml ├── test_filter.ml ├── test_filterbank.ml ├── test_stft.ml ├── test_time.ml ├── test_timeseries.ml ├── test_utils.ml ├── test_window.ml ├── test_write.ml ├── tutils.ml ├── tutils.mli ├── vutils.ml └── vutils.mli /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | # Inspired by https://github.com/savonet/build-and-test-ocaml-module/blob/main/action.yml 2 | # Thanks for the savonet team for building such an amazing software :p 3 | name: Build 4 | 5 | on: 6 | push: 7 | branches: 8 | - 'main' 9 | pull_request: 10 | types: [opened, synchronize, reopened] 11 | 12 | jobs: 13 | build: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: [ubuntu-latest] # , macos-latest, windows-latest] 18 | ocaml-compiler: 19 | - "5.3.0" # Latest stable 20 | - "5.2.0" 21 | 22 | runs-on: ${{ matrix.os }} 23 | 24 | steps: 25 | - name: Checkout code 26 | uses: actions/checkout@v4 27 | 28 | - name: Set up OCaml ${{ matrix.ocaml-compiler }} on ${{ matrix.os }} 29 | uses: ocaml/setup-ocaml@v3 30 | with: 31 | ocaml-compiler: ${{ matrix.ocaml-compiler }} 32 | dune-cache: true 33 | 34 | # temporary, waiting for the conf-soxr package to be published to opam repo 35 | - name: Install libsoxr-dev 36 | run: | 37 | sudo apt-get update 38 | sudo apt-get install -y libsoxr-dev 39 | shell: bash 40 | 41 | - name: Install SoundML 42 | run: opam install . --yes --confirm-level=unsafe-yes 43 | shell: bash -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint & Format 2 | 3 | on: 4 | push: 5 | branches: 6 | - 'main' 7 | pull_request: 8 | types: [opened, synchronize, reopened] 9 | 10 | jobs: 11 | lint-fmt: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v4 17 | 18 | - name: Set up OCaml for linting 19 | uses: ocaml/setup-ocaml@v3 20 | with: 21 | ocaml-compiler: "5.2" 22 | dune-cache: true 23 | - name: Run OCaml Lint & Format Check 24 | uses: ocaml/setup-ocaml/lint-fmt@v2 -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # Inspired by https://github.com/savonet/build-and-test-ocaml-module/blob/main/action.yml 2 | # Thanks for the savonet team for building such an amazing software :p 3 | 4 | name: Test 5 | 6 | on: 7 | push: 8 | branches: 9 | - 'main' 10 | pull_request: 11 | types: [opened, synchronize, reopened] 12 | 13 | jobs: 14 | test: 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | os: [ubuntu-latest] # , macos-latest, windows-latest] 19 | ocaml-compiler: 20 | - "5.3.0" # Latest stable 21 | - "5.2.0" 22 | include: 23 | - os: ubuntu-latest 24 | ocaml-compiler: "5.3.0" 25 | send-coverage: true 26 | 27 | runs-on: ${{ matrix.os }} 28 | 29 | steps: 30 | - name: Checkout code 31 | uses: actions/checkout@v4 32 | 33 | - name: Set up OCaml ${{ matrix.ocaml-compiler }} on ${{ matrix.os }} 34 | uses: ocaml/setup-ocaml@v3 35 | with: 36 | ocaml-compiler: ${{ matrix.ocaml-compiler }} 37 | dune-cache: true 38 | 39 | - uses: actions/setup-python@v5 40 | with: 41 | python-version: '3.12.3' 42 | - run: pip install --upgrade pip 43 | - run: pip install numpy librosa==0.11.0 44 | 45 | # temporary, waiting for the conf-soxr package to be published to opam repo 46 | - name: Install libsoxr-dev 47 | run: | 48 | sudo apt-get update 49 | sudo apt-get install -y libsoxr-dev 50 | shell: bash 51 | 52 | - name: Install FFmpeg CLI (to generate test data) 53 | run: sudo apt-get install -y ffmpeg 54 | shell: bash 55 | 56 | - name: Install SoundML dependencies (with test) 57 | run: opam install . --deps-only --with-test --yes --confirm-level=unsafe-yes --verbose 58 | shell: bash 59 | 60 | - name: Run tests 61 | if: matrix.send-coverage != true 62 | run: | 63 | opam exec -- dune runtest 64 | shell: bash 65 | 66 | - name: Run tests (and send converage to coveralls) 67 | if: matrix.send-coverage == true 68 | run: | 69 | opam exec -- dune runtest --instrument-with bisect_ppx --force 70 | opam exec -- bisect-ppx-report send-to Coveralls 71 | shell: bash 72 | env: 73 | COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} 74 | PULL_REQUEST_NUMBER: ${{ github.event.number }} 75 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | dune.lock 3 | dataset/ 4 | cache/ 5 | report/ 6 | *.pth 7 | .venv 8 | .vscode 9 | wav/ 10 | mp3/ -------------------------------------------------------------------------------- /.ocamlformat: -------------------------------------------------------------------------------- 1 | profile = ocamlformat 2 | wrap-comments = true 3 | version = 0.27.0 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | SoundML Logo 4 | 5 |

SoundML

6 |

A little and very high level library to perform basic operations on audio files in the OCaml language

7 | 8 |

· Documentation · Report Bug · Request Feature ·

9 | 10 | [![Build](https://github.com/gabyfle/SoundML/actions/workflows/build.yml/badge.svg)](https://github.com/gabyfle/SoundML/actions/workflows/build.yml) 11 | [![Test](https://github.com/gabyfle/SoundML/actions/workflows/test.yml/badge.svg)](https://github.com/gabyfle/SoundML/actions/workflows/test.yml) 12 | [![Coverage Status](https://coveralls.io/repos/github/gabyfle/SoundML/badge.svg)](https://coveralls.io/github/gabyfle/SoundML) 13 |
14 | 15 | ## About the Project 16 | 17 | > [!WARNING] 18 | > The project is still in development and is not yet ready for use. 19 | 20 | ## Features 21 | - A fast I/O for interacting with audio files 22 | - Feature extraction 23 | - Audio effects 24 | - Time stretching and pitch shifting 25 | - Filtering 26 | - IIR filters (Generic, Lowpass, Highpass) 27 | - Generic FIR filter implementation 28 | 29 | ## License 30 | 31 | Distributed under the Apache License Version 2.0. See LICENSE for more information. 32 | 33 | ## References 34 | 35 | - **McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Battenberg, and Oriol Nieto** (2015). *librosa: Audio and music signal analysis in python.* In Proceedings of the 14th python in science conference, pp. 18-25. 36 | 37 | - **Bellanger, M.** (2022). *Traitement numérique du signal. 10e édition.* Dunod. 38 | 39 | - **Wang, L., Zhao, J., & Mortier, R.** (2022). *OCaml Scientific Computing*. Springer International Publishing eBooks. DOI: [10.1007/978-3-030-97645-3](https://doi.org/10.1007/978-3-030-97645-3) 40 | 41 | - **Zoelzer, U.** (2002). *Dafx: Digital Audio Effects*. DOI: [10.1002/9781119991298](https://doi.org/10.1002/9781119991298) 42 | 43 | - **Müller, M.** (2015). *Fundamentals of Music Processing*. Cambridge International Law Journal. DOI: [10.1007/978-3-319-21945-5](https://doi.org/10.1007/978-3-319-21945-5) 44 | 45 | ## Acknowledgements 46 | 47 | * Logo generated with DALL-E by OpenAI 48 | -------------------------------------------------------------------------------- /bench/read/bench.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import time 6 | import argparse 7 | import librosa 8 | import numpy as np 9 | 10 | MIB_DIVISOR = 1024.0 * 1024.0 11 | 12 | 13 | def find_audio_files(root_dir, extension, max_files): 14 | filepaths = [] 15 | extension = extension.lower() 16 | 17 | count = 0 18 | try: 19 | for dirpath, _, filenames in os.walk(root_dir, topdown=True, onerror=None): 20 | relevant_filenames = [f for f in filenames if f.lower().endswith(extension)] 21 | for filename in relevant_filenames: 22 | if count < max_files: 23 | filepaths.append(os.path.join(dirpath, filename)) 24 | count += 1 25 | else: 26 | return filepaths 27 | if count >= max_files: 28 | break 29 | 30 | except OSError: 31 | pass 32 | 33 | return filepaths 34 | 35 | 36 | def get_file_size(filename) -> float: 37 | if not os.path.isfile(filename): 38 | return -1.0 39 | size = os.path.getsize(filename) 40 | if size <= 0: 41 | return 0.0 42 | return size / MIB_DIVISOR 43 | 44 | 45 | def benchmark_read(filename, target_sr) -> tuple[float, float]: 46 | size = get_file_size(filename) 47 | if size is None or size <= 0.0: 48 | return 0.0, 0.0 49 | 50 | sample_rate = target_sr if target_sr is not None and target_sr > 0 else None 51 | 52 | try: 53 | start_time = time.perf_counter() 54 | _audio, _sr = librosa.load( 55 | filename, sr=sample_rate, mono=False, dtype=np.float32 56 | ) 57 | end_time = time.perf_counter() 58 | duration = end_time - start_time 59 | if not isinstance(_audio, np.ndarray) or _audio.size == 0: 60 | return -1.0, -1.0 61 | 62 | return duration, size 63 | except FileNotFoundError: 64 | return -1.0, -1.0 65 | 66 | 67 | def run_benchmark(root_dir, sample_rate, extension, max_files): 68 | all_files = find_audio_files(root_dir, extension, max_files) 69 | nfound = len(all_files) 70 | 71 | if nfound == 0: 72 | sys.exit(0) 73 | 74 | warmup_count = min(5, nfound) 75 | if warmup_count > 0: 76 | warmup_files = all_files[:warmup_count] 77 | for f in warmup_files: 78 | _ = benchmark_read(f, sample_rate) 79 | 80 | total_time = 0.0 81 | total_size = 0.0 82 | 83 | files_to_process = all_files 84 | 85 | for filename in files_to_process: 86 | duration, size = benchmark_read(filename, sample_rate) 87 | 88 | if duration > 0 and size > 0: 89 | total_time += duration 90 | total_size += size 91 | 92 | if total_time > 0 and total_size > 0: 93 | avg_speed = total_size / total_time 94 | print(f"{avg_speed:.5f}") 95 | 96 | 97 | def main(): 98 | parser = argparse.ArgumentParser() 99 | parser.add_argument("root_directory", help=argparse.SUPPRESS) 100 | parser.add_argument("sample_rate", type=int, help=argparse.SUPPRESS) 101 | parser.add_argument("format", help=argparse.SUPPRESS) 102 | parser.add_argument("max_files", type=int, help=argparse.SUPPRESS) 103 | 104 | if len(sys.argv) != 5: 105 | print( 106 | f"Usage: {sys.argv[0]} ", 107 | file=sys.stderr, 108 | ) 109 | sys.exit(1) 110 | 111 | args = parser.parse_args() 112 | if not os.path.isdir(args.root_directory): 113 | sys.exit(1) 114 | 115 | if args.sample_rate < 0: 116 | sys.exit(1) 117 | 118 | if args.max_files <= 0: 119 | sys.exit(1) 120 | 121 | ext = args.format.lstrip(".") 122 | 123 | run_benchmark(args.root_directory, args.sample_rate, ext, args.max_files) 124 | 125 | 126 | if __name__ == "__main__": 127 | main() 128 | -------------------------------------------------------------------------------- /bench/read/bench_read.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | OCAML_CMD="dune exec ./bench/read/perf.exe" 4 | PYTHON_CMD="python3 bench.py" 5 | CACHE_CLEAR_CMD="sync; echo 3 > /proc/sys/vm/drop_caches" 6 | 7 | if [ "$#" -ne 6 ]; then 8 | echo "Usage: $0 " 9 | echo " : 'ocaml' or 'python'" 10 | exit 1 11 | fi 12 | 13 | MODE=$1 14 | shift 15 | 16 | NUM_ITERATIONS=$1 17 | ROOT_DIR=$2 18 | SAMPLE_RATE=$3 19 | FORMAT=$4 20 | MAX_FILES=$5 21 | 22 | if [[ "$MODE" != "ocaml" && "$MODE" != "python" ]]; then 23 | echo "Error: must be either 'ocaml' or 'python'. You provided '$MODE'." 24 | echo "Usage: $0 " 25 | exit 1 26 | fi 27 | 28 | if ! [[ "$NUM_ITERATIONS" =~ ^[1-9][0-9]*$ ]]; then 29 | echo "Error: ('$NUM_ITERATIONS') must be a positive integer." >&2 30 | exit 1 31 | fi 32 | 33 | BENCH_CMD="" 34 | if [[ "$MODE" == "ocaml" ]]; then 35 | BENCH_CMD="$OCAML_CMD" 36 | elif [[ "$MODE" == "python" ]]; then 37 | BENCH_CMD="$PYTHON_CMD" 38 | fi 39 | 40 | results_array=() 41 | valid_run_count=0 42 | 43 | echo "Starting reading test for: $MODE" 44 | echo "Command to run: $BENCH_CMD \"$ROOT_DIR\" \"$SAMPLE_RATE\" \"$FORMAT\" \"$MAX_FILES\"" 45 | echo "Number of iterations: $NUM_ITERATIONS" 46 | echo "--------------------------------------------------" 47 | 48 | for (( i=1; i<=NUM_ITERATIONS; i++ )); do 49 | echo "Iteration $i / $NUM_ITERATIONS" 50 | if sudo bash -c "$CACHE_CLEAR_CMD"; then # ensure that files aren't cached by the system 51 | sleep 1.5 52 | else 53 | echo "Error: Failed to clear cache. Probably error with sudo." >&2 54 | exit 1 55 | fi 56 | result=$( $BENCH_CMD "$ROOT_DIR" "$SAMPLE_RATE" "$FORMAT" "$MAX_FILES" ) 57 | exit_status=$? 58 | 59 | if [ $exit_status -ne 0 ]; then 60 | continue 61 | fi 62 | 63 | if [[ "$result" =~ ^[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$ ]]; then 64 | echo "Result: $result MiB/s" 65 | results_array+=("$result") 66 | ((valid_run_count++)) 67 | fi 68 | sleep 1 69 | done 70 | 71 | echo "--------------------------------------------------" 72 | 73 | num_results=${#results_array[@]} 74 | 75 | if [ "$num_results" -eq 0 ]; then 76 | exit 1 77 | fi 78 | stats=$(printf "%s\n" "${results_array[@]}" | awk ' 79 | NF == 0 { next } 80 | { 81 | if ($1 ~ /^[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$/) { 82 | sum += $1; 83 | sumsq += $1*$1; 84 | count++; 85 | } 86 | } 87 | END { 88 | if (count > 0) { 89 | mean = sum / count; 90 | if (count > 1) { 91 | variance = (sumsq - (sum*sum)/count) / (count-1); 92 | if (variance < 1e-12) variance = 0; 93 | stdev = sqrt(variance); 94 | } else { 95 | stdev = 0; # Standard deviation is undefined/0 for a single point 96 | } 97 | printf "%.6f %.6f %d", mean, stdev, count; 98 | } else { 99 | print "NaN NaN 0"; 100 | } 101 | } 102 | ') 103 | read -r mean stdev count <<< "$stats" 104 | 105 | if [[ -z "$mean" || -z "$stdev" || -z "$count" || "$count" -eq 0 ]]; then 106 | echo "Error: Failed to calculate statistics. Awk output: '$stats'" >&2 107 | exit 1 108 | fi 109 | 110 | echo "Performance Test Summary ($MODE):" 111 | echo "-------------------------" 112 | echo "Command: $BENCH_CMD \"$ROOT_DIR\" \"$SAMPLE_RATE\" \"$FORMAT\" \"$MAX_FILES\"" 113 | printf "Mean Speed (MiB/s): %.6f\n" "$mean" 114 | if [ "$count" -gt 1 ]; then 115 | printf "Std Dev Speed: %.6f\n" "$stdev" 116 | else 117 | printf "Std Dev Speed: N/A (requires >= 2 data points)\n" 118 | fi 119 | 120 | exit 0 121 | -------------------------------------------------------------------------------- /bench/read/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name perf) 3 | (libraries soundml)) 4 | -------------------------------------------------------------------------------- /bench/read/perf.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | open Unix 3 | 4 | let mb_divisor = 1024. *. 1024. 5 | 6 | let is_ext_file filename ext = 7 | String.lowercase_ascii (Filename.extension filename) = ext 8 | 9 | let find_ext_files root_dir ext = 10 | let rec find acc dir = 11 | try 12 | let dh = opendir dir in 13 | try 14 | let rec loop acc = 15 | match readdir dh with 16 | | exception End_of_file -> 17 | closedir dh ; acc 18 | | "." | ".." -> 19 | loop acc 20 | | entry -> ( 21 | let full_path = Filename.concat dir entry in 22 | try 23 | match (stat full_path).st_kind with 24 | | S_REG when is_ext_file full_path ext -> 25 | loop (full_path :: acc) 26 | | S_DIR -> 27 | loop (find acc full_path) 28 | | _ -> 29 | loop acc 30 | with Unix_error (_, _, _) -> loop acc ) 31 | in 32 | loop acc 33 | with ex -> 34 | closedir dh ; 35 | eprintf "\nError reading directory '%s': %s\n%!" dir 36 | (Printexc.to_string ex) ; 37 | acc 38 | with Unix_error (e, _, p) -> 39 | eprintf "\nError opening directory '%s': %s\n%!" p (error_message e) ; 40 | acc 41 | in 42 | find [] root_dir 43 | 44 | let get_file_size filename = 45 | try 46 | let stats = stat filename in 47 | if stats.st_kind = S_REG then Ok (float_of_int stats.st_size /. mb_divisor) 48 | else Error (sprintf "Not a regular file: %s" filename) 49 | with 50 | | Unix_error (e, _, _) -> 51 | Error (sprintf "Cannot stat file '%s': %s" filename (error_message e)) 52 | | Sys_error msg -> 53 | Error (sprintf "System error statting '%s': %s" filename msg) 54 | 55 | let benchmark_read kind filename sample_rate = 56 | match get_file_size filename with 57 | | Error msg -> 58 | Error (filename, msg) 59 | | Ok size_mb -> ( 60 | if size_mb <= 0.0 then Error (filename, "Incorrect file size") 61 | else 62 | try 63 | let res_typ = 64 | match sample_rate with 0 -> Io.NONE | _ -> Io.SOXR_HQ 65 | in 66 | let start_time = Unix.gettimeofday () in 67 | let _audio = 68 | Soundml.Io.read ~res_typ ~sample_rate ~mono:false kind filename 69 | in 70 | let end_time = Unix.gettimeofday () in 71 | let duration = end_time -. start_time in 72 | Ok (duration, size_mb) 73 | with ex -> Error (filename, Printexc.to_string ex) ) 74 | 75 | let run_benchmark root sample_rate extension max_files = 76 | let kind = Bigarray.Float32 in 77 | let all_files = find_ext_files root extension in 78 | let all_files = 79 | List.filteri (fun i _ -> if i >= max_files then false else true) all_files 80 | in 81 | let total_files = List.length all_files in 82 | if total_files = 0 then exit 0 ; 83 | let warmup_count = min 5 total_files in 84 | ( if warmup_count > 0 then 85 | let warmup_files = List.filteri (fun i _ -> i < warmup_count) all_files in 86 | List.iter 87 | (fun f -> 88 | match benchmark_read kind f sample_rate with 89 | | Ok _ -> 90 | () 91 | | Error _ -> 92 | () ) 93 | warmup_files ) ; 94 | let total_time = ref 0.0 in 95 | let total_size = ref 0.0 in 96 | List.iter 97 | (fun filename -> 98 | match benchmark_read kind filename sample_rate with 99 | | Ok (duration, size_mb) -> 100 | total_time := !total_time +. duration ; 101 | total_size := !total_size +. size_mb 102 | | Error _ -> 103 | () ) 104 | all_files ; 105 | if !total_time > 0.0 && !total_size > 0.0 then 106 | let avg_speed = !total_size /. !total_time in 107 | printf "%.5f\n" avg_speed 108 | 109 | let () = 110 | if Array.length Sys.argv <> 5 then 111 | eprintf "Usage: %s \n" 112 | Sys.argv.(0) 113 | else 114 | let root_dir = Sys.argv.(1) in 115 | let sample_rate = int_of_string Sys.argv.(2) in 116 | let extension = Sys.argv.(3) in 117 | let max_files = int_of_string Sys.argv.(4) in 118 | if not (Sys.file_exists root_dir && Sys.is_directory root_dir) then 119 | eprintf "Can't read directory: %s.\n" root_dir 120 | else 121 | try run_benchmark root_dir sample_rate extension max_files 122 | with ex -> 123 | eprintf "An unexpected error occurred: %s\n" (Printexc.to_string ex) ; 124 | exit 1 125 | -------------------------------------------------------------------------------- /bench/stft/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name perf) 3 | (libraries core core_bench soundml)) 4 | -------------------------------------------------------------------------------- /bench/stft/perf.ml: -------------------------------------------------------------------------------- 1 | open! Core 2 | open! Core_bench 3 | open Soundml 4 | 5 | let path = Sys_unix.getcwd () ^ "/bench/stft/wav_stereo_44100hz_1s.wav" 6 | 7 | let f32audio = Audio.data @@ Io.read ~res_typ:Io.NONE Bigarray.Float32 path 8 | 9 | let f64audio = Audio.data @@ Io.read ~res_typ:Io.NONE Bigarray.Float64 path 10 | 11 | let main () = 12 | Command_unix.run 13 | (Bench.make_command 14 | [ Bench.Test.create ~name:"float32" (fun () -> 15 | ignore (Transform.stft Types.B32 f32audio) ) 16 | ; Bench.Test.create ~name:"float64" (fun () -> 17 | ignore (Transform.stft Types.B64 f64audio) ) ] ) 18 | 19 | let () = main () 20 | -------------------------------------------------------------------------------- /bench/stft/perf.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import librosa 3 | import numpy as np 4 | import os 5 | 6 | AUDIO_FILE_PATH = os.path.join(os.getcwd(), "bench/stft/wav_stereo_44100hz_1s.wav") 7 | 8 | STFT_CONFIGURATIONS = [ 9 | {"n_fft": 2048, "win_length": 2048, "hop_length": 512, "window_type": "hann"} 10 | ] 11 | 12 | y_f32, sr_f32 = librosa.load(AUDIO_FILE_PATH, sr=None, mono=True, dtype=np.float32) 13 | y_f64, sr_f64 = librosa.load(AUDIO_FILE_PATH, sr=None, mono=True, dtype=np.float64) 14 | SIGNAL_LENGTH = len(y_f32) 15 | 16 | @pytest.mark.parametrize("config", STFT_CONFIGURATIONS) 17 | @pytest.mark.parametrize("precision", ["float32", "float64"]) 18 | def test_librosa_stft(benchmark, config, precision): 19 | if precision == "float32": 20 | audio_data = y_f32 21 | elif precision == "float64": 22 | audio_data = y_f64 23 | else: 24 | raise ValueError("Invalid precision") 25 | 26 | result = benchmark( 27 | librosa.stft, 28 | y=audio_data, 29 | n_fft=config["n_fft"], 30 | hop_length=config["hop_length"], 31 | win_length=config["win_length"], 32 | window=config["window_type"], 33 | center=False 34 | ) 35 | assert result is not None 36 | -------------------------------------------------------------------------------- /bench/stft/wav_stereo_44100hz_1s.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gabyfle/SoundML/95f3d969cc225781b22d8ffa86c4d5e531fb0db8/bench/stft/wav_stereo_44100hz_1s.wav -------------------------------------------------------------------------------- /doc/dune: -------------------------------------------------------------------------------- 1 | (documentation 2 | (package soundml) 3 | (mld_files index)) 4 | -------------------------------------------------------------------------------- /doc/index.mld: -------------------------------------------------------------------------------- 1 | {0 SoundML} 2 | 3 | {%html: 4 |
5 | SoundML logo 6 |
7 | %} 8 | 9 | {!Soundml} is an OCaml library that enables audio processing. It provides tools to read, write, extract features and manipulate audio as time series data. 10 | The library is built on top of the Owl library and provides a high-level API to work with audio data. It's still in an experimental stage. 11 | 12 | 13 | {1 API Documentation} 14 | 15 | To see the developer API documentation for SoundML, you can visit the following link: {{!Soundml}here}. 16 | 17 | {1 Author} 18 | 19 | {{:https://gabyfle.dev}Gabriel "gabyfle" Santamaria} -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 3.18) 2 | 3 | (name soundml) 4 | 5 | (generate_opam_files true) 6 | 7 | (source 8 | (github gabyfle/SoundML)) 9 | 10 | (authors "Gabriel Santamaria ") 11 | 12 | (maintainers "Gabriel Santamaria ") 13 | 14 | (license Apache-2.0) 15 | 16 | (documentation https://soundml.gabyfle.dev) 17 | 18 | (package 19 | (name soundml) 20 | (synopsis "An OCaml library to embed sound processing in your applications") 21 | (description 22 | "SoundML is a library built on top of Owl to analyse sounds files. It can read, write audio, extract various features from audio files and much more.") 23 | (depends 24 | (ocaml 25 | (>= 5.2.0)) 26 | dune 27 | (conf-sndfile :build) 28 | (conf-rubberband :build) 29 | ; Samplerate is already a requirement of rubberband 30 | (conf-samplerate :build) 31 | (dune-configurator :build) 32 | (owl ; Version 1.2 is required to have the DC transform 33 | (>= 1.2)) 34 | (odoc :with-doc) 35 | (alcotest :with-test) 36 | (yojson :with-test) 37 | (bisect_ppx 38 | (and 39 | :with-test 40 | (>= 2.5.0)))) 41 | (tags 42 | (sound spectrogram fourier digital-filters))) 43 | -------------------------------------------------------------------------------- /soundml.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | synopsis: "An OCaml library to embed sound processing in your applications" 4 | description: 5 | "SoundML is a library built on top of Owl to analyse sounds files. It can read, write audio, extract various features from audio files and much more." 6 | maintainer: ["Gabriel Santamaria "] 7 | authors: ["Gabriel Santamaria "] 8 | license: "Apache-2.0" 9 | tags: ["sound" "spectrogram" "fourier" "digital-filters"] 10 | homepage: "https://github.com/gabyfle/SoundML" 11 | doc: "https://soundml.gabyfle.dev" 12 | bug-reports: "https://github.com/gabyfle/SoundML/issues" 13 | depends: [ 14 | "ocaml" {>= "5.2.0"} 15 | "dune" {>= "3.18"} 16 | "conf-sndfile" {build} 17 | "conf-rubberband" {build} 18 | "conf-samplerate" {build} 19 | "dune-configurator" {build} 20 | "owl" {>= "1.2"} 21 | "odoc" {with-doc} 22 | "alcotest" {with-test} 23 | "yojson" {with-test} 24 | "bisect_ppx" {with-test & >= "2.5.0"} 25 | ] 26 | build: [ 27 | ["dune" "subst"] {dev} 28 | [ 29 | "dune" 30 | "build" 31 | "-p" 32 | name 33 | "-j" 34 | jobs 35 | "@install" 36 | "@runtest" {with-test} 37 | "@doc" {with-doc} 38 | ] 39 | ] 40 | dev-repo: "git+https://github.com/gabyfle/SoundML.git" 41 | x-maintenance-intent: ["(latest)"] 42 | -------------------------------------------------------------------------------- /src/aformat.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023-2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | (** 22 | The {!Aformat} (audio format) module is an abstraction over the different supported audio format from libsndfile. *) 23 | 24 | type ftype = 25 | | WAV 26 | | AIFF 27 | | AU 28 | | RAW 29 | | PAF 30 | | SVX 31 | | NIST 32 | | VOC 33 | | IRCAM 34 | | W64 35 | | MAT4 36 | | MAT5 37 | | PVF 38 | | XI 39 | | HTK 40 | | SDS 41 | | AVR 42 | | WAVEX 43 | | SD2 44 | | FLAC 45 | | CAF 46 | | WVE 47 | | OGG 48 | | MPC2K 49 | | RF64 50 | | MP3 51 | 52 | type subtype = 53 | | PCM_S8 54 | | PCM_16 55 | | PCM_24 56 | | PCM_32 57 | | PCM_U8 58 | | FLOAT 59 | | DOUBLE 60 | | ULAW 61 | | ALAW 62 | | IMA_ADPCM 63 | | MS_ADPCM 64 | | GSM610 65 | | VOX_ADPCM 66 | | NMS_ADPCM_16 67 | | NMS_ADPCM_24 68 | | NMS_ADPCM_32 69 | | G721_32 70 | | G723_24 71 | | G723_40 72 | | DVW_12 73 | | DVW_16 74 | | DVW_24 75 | | DVW_N 76 | | DPCM_8 77 | | DPCM_16 78 | | VORBIS 79 | | OPUS 80 | | ALAC_16 81 | | ALAC_20 82 | | ALAC_24 83 | | ALAC_32 84 | | MPEG_LAYER_I 85 | | MPEG_LAYER_II 86 | | MPEG_LAYER_III 87 | 88 | type endianness = FILE | LITTLE | BIG | CPU 89 | 90 | (** The type for an audio format specification. *) 91 | type t = {ftype: ftype; sub: subtype; endian: endianness} 92 | 93 | val create : 94 | ?subtype:subtype -> ?endian:endianness -> ftype -> (t, string) result 95 | (** 96 | [create ?subtype ?endian ftype] creates a new audio format representation based on the given format specifications.s 97 | 98 | {2 Parameters} 99 | @param subtype is the subtype of the audio file. If not specified, it'll be set to a default value according to the file type. 100 | @param endian is the endianness of the audio file. If not specified, it'll be set to [FILE], which is the default file endianness. 101 | @param ftype is the file type of the audio file. 102 | 103 | {2 Returns} 104 | 105 | @return A result type, where [Ok t] is the created format and [Error msg] is an error message indicating why it failed. 106 | 107 | {2 Usage} 108 | 109 | Creating a new audio format is as simple as calling the [create] function with the desired parameters. 110 | 111 | For the [RAW] file type, the subtype is required. Not specifying one will result in an error. 112 | 113 | {[ 114 | open Soundml.Io 115 | (* This will create a new WAV audio format with PCM_16 subtype and little endian. *) 116 | let fmt = Afmt.create ~subtype:Io.Afmt.PCM_16 ~endian:Io.Afmt.LITTLE Io.Aformat.WAV in 117 | ]} *) 118 | 119 | val to_int : t -> int 120 | (** 121 | [to_int fmt] converts the audio format to an integer representation compatible with libsndfile. 122 | 123 | {2 Parameters} 124 | @param fmt the format that we need to convert to an integer value. 125 | 126 | {2 Returns} 127 | @return The integer representation of the audio format. *) 128 | 129 | val of_int : int -> (t, string) result 130 | (** 131 | [of_int code] converts the integer representation of the audio format to a {!Aformat.t} type. 132 | 133 | {2 Parameters} 134 | @param code is the integer representation of the audio format we're trying to convert. 135 | 136 | @return A result type, where [Ok t] is the created format and [Error msg] is an error message indicating why it failed. *) 137 | 138 | val of_ext : ?sub:subtype -> ?endian:endianness -> string -> (t, string) result 139 | (** 140 | [of_ext ?sub ?endian ext] tries to convert the given file extension to an audio format type. 141 | 142 | This function assumes that the extension is given with its leading dot (e.g. [".wav"]) and is thus compatible with the [Filename] module. 143 | 144 | {2 Parameters} 145 | @param sub is the subtype of the audio file. If not specified, it'll be set to a default value according to the file type. 146 | @param endian is the endianness of the audio file. If not specified, it'll be set to [FILE], which is the default file endianness. 147 | @param ext is the file extension we're trying to convert. 148 | 149 | {2 Returns} 150 | @return A result type, where [Ok t] is the created format and [Error msg] is an error message indicating why it failed. *) 151 | 152 | val pp : Format.formatter -> t -> unit 153 | (** 154 | [pp fmt] pretty prints the audio format to the given formatter. 155 | 156 | {2 Parameters} 157 | @param fmt is the formatter to use for printing the audio format. *) 158 | -------------------------------------------------------------------------------- /src/audio.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Owl 23 | 24 | (* generic multi-dimensionnal array *) 25 | module G = Dense.Ndarray.Generic 26 | module Aformat = Aformat 27 | 28 | module Metadata = struct 29 | type t = 30 | { name: string 31 | ; frames: int 32 | ; channels: int 33 | ; sample_rate: int 34 | ; format: Aformat.t } 35 | 36 | let create ?(name : string = "Unknown") frames channels sample_rate format = 37 | {name; frames; channels; sample_rate; format} 38 | 39 | let name (m : t) = m.name 40 | 41 | let frames (m : t) = m.frames 42 | 43 | let channels (m : t) = m.channels 44 | 45 | let sample_rate (m : t) = m.sample_rate 46 | 47 | let format (m : t) = m.format 48 | end 49 | 50 | type 'a audio = {meta: Metadata.t; data: (float, 'a) G.t} 51 | 52 | let create (meta : Metadata.t) data = {meta; data} 53 | 54 | let meta (a : 'a audio) = a.meta 55 | 56 | let rawsize (a : 'a audio) = G.numel a.data 57 | 58 | let length (a : 'a audio) : int = 59 | let meta = meta a in 60 | let channels = float_of_int (Metadata.channels meta) in 61 | let sr = float_of_int (Metadata.sample_rate meta) in 62 | let size = float_of_int (rawsize a) /. channels in 63 | Int.of_float (size /. sr *. 1000.) 64 | 65 | let data (a : 'a audio) = a.data 66 | 67 | let sr (a : 'a audio) = Metadata.sample_rate @@ meta a 68 | 69 | let channels (a : 'a audio) = Metadata.channels @@ meta a 70 | 71 | let samples (a : 'a audio) = Metadata.frames @@ meta a 72 | 73 | let format (a : 'a audio) = Metadata.format @@ meta a 74 | 75 | let set_data (a : 'a audio) (d : (float, 'a) G.t) = {a with data= d} 76 | 77 | let sample_pos (a : 'a audio) (x : int) = 78 | Int.of_float 79 | ( float_of_int x /. 1000. 80 | *. float_of_int (Metadata.sample_rate (meta a)) 81 | *. float_of_int (Metadata.channels (meta a)) ) 82 | 83 | let get_slice (slice : int * int) (a : 'a audio) : 'a audio = 84 | let x, y = slice in 85 | let x, y = 86 | match (sample_pos a x, sample_pos a y) with 87 | | x, y when x < 0 -> 88 | (rawsize a + x, y) 89 | | x, y when y < 0 -> 90 | (x, rawsize a + y) 91 | | x, y when x < 0 && y < 0 -> 92 | (rawsize a + x, rawsize a + y) 93 | | x, y -> 94 | (x, y) 95 | in 96 | let x, y = if x < y then (x, y) else (y, x) in 97 | if x < 0 || y < 0 then 98 | raise 99 | (Invalid_argument "Audio.get_slice: slice out of bounds, negative values") 100 | else if x >= rawsize a || y >= rawsize a then 101 | raise 102 | (Invalid_argument 103 | "Audio.get_slice: slice out of bounds, values greater than rawsize" ) 104 | else 105 | let data = G.get_slice [[x; y]] a.data in 106 | {a with data} 107 | 108 | let get (x : int) (a : 'a audio) : float = 109 | let slice = get_slice (x, x) a |> data in 110 | G.get slice [|0|] 111 | 112 | let normalize ?(factor : float = 2147483647.) (a : 'a audio) : unit = 113 | G.scalar_mul_ (1. /. factor) a.data 114 | 115 | let reverse (x : 'a audio) : 'a audio = 116 | let data = G.reverse x.data in 117 | {x with data} 118 | 119 | let ( .${} ) x s = get_slice s x 120 | 121 | let ( .%{} ) i x = get x i 122 | 123 | let ( $/ ) x f = normalize ~factor:f x 124 | 125 | let ( /$ ) f x = normalize ~factor:f x 126 | -------------------------------------------------------------------------------- /src/audio.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | (** 23 | The {!Audio} module defines the needed types around the representation of 24 | an audio file and more precisely an audio data. *) 25 | 26 | open Owl 27 | 28 | (** 29 | Alias of the generic [Ndarray] datastructure from [Owl]. This is used 30 | internally to make the computations around the audio data *) 31 | module G = Dense.Ndarray.Generic 32 | 33 | (** @canonical Audio.Aformat 34 | Abstraction over the different supported audio format from libsndfile. *) 35 | module Aformat = Aformat 36 | 37 | (** 38 | {1 Audio Metadata} 39 | 40 | This module contains the metadata of an audio file, which is used to store 41 | information about the audio file when reading it from the filesystem. 42 | 43 | Note: {!Metadata} in {!Soundml} isn't the same thing as the metadata attached 44 | to audio files. In {!Soundml}, we refer to {!Metadata} all the data describing 45 | the audio file in itself (sample rate, number of channels, etc...). If you are 46 | interested in dealing with author name, label and other metadata, we recommend 47 | using the {{:https://github.com/savonet/ocaml-mm} ocaml-mm} library instead. *) 48 | 49 | module Metadata : sig 50 | type t 51 | 52 | val create : ?name:string -> int -> int -> int -> Aformat.t -> t 53 | (** 54 | [create ?name channels frames sample_rate format] creates a new metadata 55 | with the given name, number of channels, number of frames, sample rate 56 | and format. The name is optional and defaults to [""] *) 57 | 58 | val name : t -> string 59 | (** 60 | [name meta] returns the name of the file represented by the metadata *) 61 | 62 | val frames : t -> int 63 | 64 | val channels : t -> int 65 | (** 66 | [channels meta] returns the number of channels of the audio file *) 67 | 68 | val sample_rate : t -> int 69 | (** 70 | [sample_rate meta] returns the sample rate of the audio file *) 71 | 72 | val format : t -> Aformat.t 73 | (** 74 | [format meta] returns the format of the audio file *) 75 | end 76 | 77 | (** 78 | {1 Audio manipulation} 79 | 80 | Most of these functions are used internally, and you'll probably just use the {!Audio.normalize} 81 | function to normalize the audio data before writing it back to a file. *) 82 | 83 | (** 84 | High level representation of an audio file data, used to store data when reading audio files. *) 85 | type 'a audio 86 | 87 | val create : Metadata.t -> (float, 'a) G.t -> 'a audio 88 | (** 89 | [create metadata data] creates a new audio with the given name and metadata *) 90 | 91 | val meta : 'a audio -> Metadata.t 92 | (** 93 | [meta audio] returns the metadata attached to the given audio *) 94 | 95 | val rawsize : 'a audio -> int 96 | (** 97 | [rawsize audio] returns the raw size of the given audio *) 98 | 99 | val length : 'a audio -> int 100 | (** 101 | [length audio] returns the length (in milliseconds) of the given audio *) 102 | 103 | val data : 'a audio -> (float, 'a) Owl.Dense.Ndarray.Generic.t 104 | (** 105 | [data audio] returns the data of the given audio *) 106 | 107 | val sr : 'a audio -> int 108 | (** 109 | [sr audio] returns the sample rate of the given audio *) 110 | 111 | val channels : 'a audio -> int 112 | (** 113 | [channels audio] returns the number of channels of the given audio *) 114 | 115 | val samples : 'a audio -> int 116 | (** 117 | [samples audio] returns the number of samples per channel in the given audio *) 118 | 119 | val format : 'a audio -> Aformat.t 120 | (** 121 | [format audio] returns the format of the given audio *) 122 | 123 | val set_data : 'a audio -> (float, 'a) Owl.Dense.Ndarray.Generic.t -> 'a audio 124 | (** 125 | [set_data audio data] sets the data of the given audio *) 126 | 127 | val get : int -> 'a audio -> float 128 | (** 129 | [get x audio] returns the sample located at the position [x] in milliseconds. 130 | 131 | The position [x] must be between 0 and the length of the audio. 132 | 133 | Example: 134 | 135 | {[ 136 | let audio = Audio.read "audio.wav" in 137 | let sample = Audio.get 1000 audio in (* get the sample at 1 second *) 138 | ]} *) 139 | 140 | val get_slice : int * int -> 'a audio -> 'a audio 141 | (** 142 | [get_slice (start, stop) audio] returns a slice of the audio from the position [start] to [stop]. 143 | 144 | The position [start] and [stop] must be between 0 and the length of the audio. 145 | 146 | This function works like Owl's slicing. Giving negative values to [start] and [stop] will slice the audio 147 | from the end of the audio . 148 | 149 | Example: 150 | 151 | {[ 152 | let audio = Audio.read "audio.wav" in 153 | let slice = Audio.get_slice audio 1000 2000 in (* get the slice from 1 to 2 seconds *) 154 | ]} *) 155 | 156 | val normalize : ?factor:float -> 'a audio -> unit 157 | (** 158 | [normalize ?factor audio] normalizes the data of the given audio data by 159 | the [?factor] parameter, by default equal to $2^31 - 1$. 160 | 161 | Use this function when you need to normalize the audio data by a certain factor. 162 | 163 | Warning: if you normalize the data and end up getting values that goes 164 | beyond 1.0 or under -1.0, it will surely make the audio sound distorted. 165 | 166 | The operation is performed in place (impure function). 167 | 168 | Example: 169 | 170 | {[ 171 | let audio = Audio.read "audio.wav" in 172 | (* you can perform any operation here *) 173 | (* ... *) 174 | let factor = (* ... *) in 175 | Audio.normalize ?factor audio; (* normalizing before writing *) 176 | Audio.write audio "audio.wav" 177 | ]} *) 178 | 179 | val reverse : 'a audio -> 'a audio 180 | (** 181 | [reverse audio] reverses the audio data. 182 | This function does not operate in place: a new audio is created with the reversed data. 183 | 184 | Example: 185 | 186 | {[ 187 | let audio = Audio.read "audio.wav" in 188 | let audio = Audio.reverse audio in 189 | Audio.write audio "reversed.wav" 190 | ]} *) 191 | 192 | (** 193 | {2 Operators on audio data} 194 | 195 | Following the Owl's conventions, few operators are available to deal with 196 | audio data. You can use them to make the code more concise and more readable. 197 | They are just syntaxic sugar on functions over the {!Audio.audio} type. *) 198 | 199 | val ( .%{} ) : 'a audio -> int -> float 200 | (** Operator of {!Audio.get} *) 201 | 202 | val ( .${} ) : 'a audio -> int * int -> 'a audio 203 | (** Operator of {!Audio.get_slice} *) 204 | 205 | val ( $/ ) : 'a audio -> float -> unit 206 | (** Operator of {!Audio.normalize} *) 207 | 208 | val ( /$ ) : float -> 'a audio -> unit 209 | (** Operator of {!Audio.normalize} *) 210 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name aformat) 3 | (package soundml) 4 | (modules aformat) 5 | (instrumentation 6 | (backend bisect_ppx)) 7 | (wrapped false)) 8 | 9 | (library 10 | (name types) 11 | (package soundml) 12 | (modules types) 13 | (instrumentation 14 | (backend bisect_ppx)) 15 | (wrapped false)) 16 | 17 | (library 18 | (name audio) 19 | (package soundml) 20 | (libraries owl types aformat) 21 | (modules audio) 22 | (instrumentation 23 | (backend bisect_ppx)) 24 | (wrapped false)) 25 | 26 | (library 27 | (name window) 28 | (package soundml) 29 | (libraries owl types audio) 30 | (modules window) 31 | (instrumentation 32 | (backend bisect_ppx)) 33 | (wrapped false)) 34 | 35 | (library 36 | (name utils) 37 | (package soundml) 38 | (libraries audio types owl) 39 | (modules utils) 40 | (instrumentation 41 | (backend bisect_ppx)) 42 | (wrapped false)) 43 | 44 | (library 45 | (name transform) 46 | (package soundml) 47 | (modules transform) 48 | (libraries owl window types utils) 49 | (instrumentation 50 | (backend bisect_ppx)) 51 | (wrapped false)) 52 | 53 | (library 54 | (name filterbank) 55 | (package soundml) 56 | (modules filterbank) 57 | (libraries owl utils) 58 | (instrumentation 59 | (backend bisect_ppx)) 60 | (wrapped false)) 61 | 62 | (library 63 | (name soundml) 64 | (public_name soundml) 65 | (modules soundml) 66 | (libraries owl audio io types feature filterbank transform effects window) 67 | (instrumentation 68 | (backend bisect_ppx))) 69 | -------------------------------------------------------------------------------- /src/effects/dune: -------------------------------------------------------------------------------- 1 | (include_subdirs qualified) 2 | 3 | (library 4 | (name effects) 5 | (package soundml) 6 | (foreign_stubs 7 | (language cxx) 8 | (names rubberband_stubs) 9 | (flags 10 | :standard 11 | (:include c_flags.sexp) 12 | -std=c++23 13 | -O3)) 14 | (c_library_flags 15 | (:include c_library_flags.sexp)) 16 | (libraries audio utils owl) 17 | (instrumentation 18 | (backend bisect_ppx)) 19 | (wrapped true)) 20 | 21 | (rule 22 | (targets c_flags.sexp c_library_flags.sexp) 23 | (action 24 | (run ./time/config/discover.exe))) 25 | -------------------------------------------------------------------------------- /src/effects/filter/filter.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | module type S = sig 23 | type t 24 | 25 | type params 26 | 27 | val reset : t -> t 28 | 29 | val create : params -> t 30 | 31 | val process_sample : t -> float -> float 32 | end 33 | 34 | module Make (S : S) = struct 35 | type t = S.t 36 | 37 | type params = S.params 38 | 39 | let reset = S.reset 40 | 41 | let create = S.create 42 | 43 | let process_sample = S.process_sample 44 | 45 | let process (t : t) (x : (Float.t, 'a) Audio.G.t) = 46 | let kd = Audio.G.kind x in 47 | let n = Audio.G.numel x in 48 | let y = Audio.G.create kd [|n|] 0. in 49 | for i = 0 to n - 1 do 50 | Audio.G.set y [|i|] (process_sample t (Audio.G.get x [|i|])) 51 | done ; 52 | y 53 | end 54 | 55 | module IIR = struct 56 | module Generic = Make (Iir) 57 | module HighPass = Make (Highpass) 58 | module LowPass = Make (Lowpass) 59 | end 60 | 61 | module FIR = struct 62 | module Generic = Make (Fir) 63 | end 64 | -------------------------------------------------------------------------------- /src/effects/filter/filter.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | module type S = sig 23 | type t 24 | 25 | type params 26 | 27 | val reset : t -> t 28 | 29 | val create : params -> t 30 | 31 | val process_sample : t -> float -> float 32 | end 33 | 34 | module Make : functor (S : S) -> sig 35 | type t = S.t 36 | 37 | type params = S.params 38 | 39 | val reset : S.t -> S.t 40 | 41 | val create : S.params -> S.t 42 | 43 | val process_sample : S.t -> float -> float 44 | 45 | val process : 46 | S.t 47 | -> (float, 'a) Owl_dense_ndarray.Generic.t 48 | -> (float, 'a) Owl_dense_ndarray.Generic.t 49 | end 50 | 51 | module IIR : sig 52 | module Generic : sig 53 | type t = Iir.t 54 | 55 | type params = Iir.params 56 | 57 | val reset : t -> t 58 | 59 | val create : params -> t 60 | 61 | val process_sample : t -> float -> float 62 | 63 | val process : 64 | t 65 | -> (float, 'a) Owl_dense_ndarray.Generic.t 66 | -> (float, 'a) Owl_dense_ndarray.Generic.t 67 | end 68 | 69 | module HighPass : sig 70 | type t = Highpass.t 71 | 72 | type params = Highpass.params 73 | 74 | val reset : t -> t 75 | 76 | val create : params -> t 77 | 78 | val process_sample : t -> float -> float 79 | 80 | val process : 81 | t 82 | -> (float, 'a) Owl_dense_ndarray.Generic.t 83 | -> (float, 'a) Owl_dense_ndarray.Generic.t 84 | end 85 | 86 | module LowPass : sig 87 | type t = Lowpass.t 88 | 89 | type params = Lowpass.params 90 | 91 | val reset : t -> t 92 | 93 | val create : params -> t 94 | 95 | val process_sample : t -> float -> float 96 | 97 | val process : 98 | t 99 | -> (float, 'a) Owl_dense_ndarray.Generic.t 100 | -> (float, 'a) Owl_dense_ndarray.Generic.t 101 | end 102 | end 103 | 104 | module FIR : sig 105 | module Generic : sig 106 | type t = Fir.t 107 | 108 | type params = Fir.params 109 | 110 | val reset : t -> t 111 | 112 | val create : params -> t 113 | 114 | val process_sample : t -> float -> float 115 | 116 | val process : 117 | t 118 | -> (float, 'a) Owl_dense_ndarray.Generic.t 119 | -> (float, 'a) Owl_dense_ndarray.Generic.t 120 | end 121 | end 122 | -------------------------------------------------------------------------------- /src/effects/filter/fir.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Iir 23 | 24 | type t = Iir.t 25 | 26 | type params = {b: float array} 27 | 28 | let reset t = Iir.reset t 29 | 30 | let create ({b} : params) = Iir.create {a= [|1.|]; b} 31 | 32 | let process_sample = Iir.process_sample 33 | -------------------------------------------------------------------------------- /src/effects/filter/fir.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type t 23 | 24 | type params = {b: float array} 25 | 26 | val reset : t -> t 27 | 28 | val create : params -> t 29 | 30 | val process_sample : t -> float -> float 31 | -------------------------------------------------------------------------------- /src/effects/filter/highpass.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Iir 23 | 24 | type t = Iir.t 25 | 26 | type params = {cutoff: float; sample_rate: int} 27 | 28 | let create ({cutoff; sample_rate} : params) = 29 | let fs = sample_rate |> float_of_int in 30 | let fc = cutoff in 31 | let r = Float.tan (Float.pi *. fc /. fs) in 32 | let c = (r -. 1.) /. (r +. 1.) in 33 | let a = [|1.0; c|] in 34 | let b = [|(1.0 -. c) /. 2.0; (c -. 1.0) /. 2.0|] in 35 | create {a; b} 36 | 37 | let reset = reset 38 | 39 | let process_sample = process_sample 40 | -------------------------------------------------------------------------------- /src/effects/filter/highpass.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type t 23 | 24 | type params = {cutoff: float; sample_rate: int} 25 | 26 | val reset : t -> t 27 | 28 | val create : params -> t 29 | 30 | val process_sample : t -> float -> float 31 | -------------------------------------------------------------------------------- /src/effects/filter/iir.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type params = {a: float array; b: float array} 23 | 24 | type t = 25 | { b: (float, Bigarray.float32_elt) Audio.G.t 26 | ; a: (float, Bigarray.float32_elt) Audio.G.t 27 | ; state: (float, Bigarray.float32_elt) Audio.G.t } 28 | 29 | let reset t = Audio.G.fill t.state 0. ; t 30 | 31 | let create ({a; b} : params) = 32 | let a = Audio.G.of_array Bigarray.Float32 a [|Array.length a|] in 33 | let b = Audio.G.of_array Bigarray.Float32 b [|Array.length b|] in 34 | let size = max (Audio.G.numel a) (Audio.G.numel b) in 35 | let a = Audio.G.(a /$ get a [|0|]) in 36 | (*let b = Audio.G.(b /$ get b [|0|]) in*) 37 | let state = Audio.G.create Bigarray.Float32 [|size|] 0. in 38 | {b; a; state} 39 | 40 | let process_sample t (x : float) = 41 | let n = Audio.G.numel t.state in 42 | let y = 43 | if n > 0 then (Audio.G.get t.b [|0|] *. x) +. Audio.G.get t.state [|0|] 44 | else 0. 45 | in 46 | let nb = Audio.G.numel t.b in 47 | let na = Audio.G.numel t.a in 48 | for i = 0 to Audio.G.numel t.state - 1 do 49 | let b = if i + 1 < nb then Audio.G.get t.b [|i + 1|] *. x else 0. in 50 | let a = 51 | if i + 1 < na then Float.neg (Audio.G.get t.a [|i + 1|]) *. y else 0. 52 | in 53 | if i < n - 1 then 54 | Audio.G.set t.state [|i|] (Audio.G.get t.state [|i + 1|] +. b +. a) 55 | else Audio.G.set t.state [|i|] (b +. a) 56 | done ; 57 | y 58 | -------------------------------------------------------------------------------- /src/effects/filter/iir.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type t 23 | 24 | type params = {a: float array; b: float array} 25 | 26 | val reset : t -> t 27 | 28 | val create : params -> t 29 | 30 | val process_sample : t -> float -> float 31 | -------------------------------------------------------------------------------- /src/effects/filter/lowpass.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Iir 23 | 24 | type t = Iir.t 25 | 26 | type params = {cutoff: float; sample_rate: int} 27 | 28 | let create ({cutoff; sample_rate} : params) = 29 | let fs = float_of_int sample_rate in 30 | let fc = cutoff in 31 | let r = Float.tan (Float.pi *. fc /. fs) in 32 | let c = (r -. 1.) /. (r +. 1.) in 33 | let a = [|1.0; c|] in 34 | let b = [|(1.0 +. c) /. 2.0; (1.0 +. c) /. 2.0|] in 35 | Iir.create {a; b} 36 | 37 | let reset = reset 38 | 39 | let process_sample = process_sample 40 | -------------------------------------------------------------------------------- /src/effects/filter/lowpass.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type t 23 | 24 | type params = {cutoff: float; sample_rate: int} 25 | 26 | val reset : t -> t 27 | 28 | val create : params -> t 29 | 30 | val process_sample : t -> float -> float 31 | -------------------------------------------------------------------------------- /src/effects/time/config/discover.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | module C = Configurator.V1 23 | 24 | let () = 25 | C.main ~name:"rubberband-pkg-config" (fun c -> 26 | let default : C.Pkg_config.package_conf = 27 | {libs= ["-lrubberband"]; cflags= []} 28 | in 29 | let conf = 30 | match C.Pkg_config.get c with 31 | | None -> 32 | default 33 | | Some pc -> ( 34 | match C.Pkg_config.query pc ~package:"rubberband" with 35 | | None -> 36 | default 37 | | Some deps -> 38 | deps ) 39 | in 40 | C.Flags.write_sexp "c_flags.sexp" conf.cflags ; 41 | C.Flags.write_sexp "c_library_flags.sexp" conf.libs ) 42 | -------------------------------------------------------------------------------- /src/effects/time/config/dune: -------------------------------------------------------------------------------- 1 | (include_subdirs no) 2 | 3 | (executable 4 | (name discover) 5 | (libraries dune.configurator)) 6 | -------------------------------------------------------------------------------- /src/effects/time/rubberband_stubs.cxx: -------------------------------------------------------------------------------- 1 | /*****************************************************************************/ 2 | /* */ 3 | /* */ 4 | /* Copyright (C) 2023-2025 */ 5 | /* Gabriel Santamaria */ 6 | /* */ 7 | /* */ 8 | /* Licensed under the Apache License, Version 2.0 (the "License"); */ 9 | /* you may not use this file except in compliance with the License. */ 10 | /* You may obtain a copy of the License at */ 11 | /* */ 12 | /* http://www.apache.org/licenses/LICENSE-2.0 */ 13 | /* */ 14 | /* Unless required by applicable law or agreed to in writing, software */ 15 | /* distributed under the License is distributed on an "AS IS" BASIS, */ 16 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ 17 | /* See the License for the specific language governing permissions and */ 18 | /* limitations under the License. */ 19 | /* */ 20 | /*****************************************************************************/ 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | extern "C" 27 | { 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | } 34 | 35 | namespace SoundML 36 | { 37 | namespace Effects 38 | { 39 | namespace Time 40 | { 41 | using namespace RubberBand; 42 | 43 | /** 44 | * @brief Stretches the input audio data using RubberBand. 45 | * 46 | * @param intput The input audio data pointer. 47 | * @param output The output audio data OCaml value. 48 | * @param samples The number of samples in the input data. 49 | * @param sample_rate The sample rate of the input data. 50 | * @param channels The number of channels in the input data. 51 | * @param config The RubberBand configuration options. 52 | * @param time_ratio The time ratio for stretching. 53 | * @param pitch_scale The pitch scale for stretching. 54 | * 55 | * @return The stretched audio data. 56 | */ 57 | std::expected offline_stretch( 58 | float *intput, 59 | value output, 60 | size_t samples, 61 | size_t sample_rate, 62 | size_t channels, 63 | RubberBandStretcher::Options config, 64 | double time_ratio, 65 | double pitch_scale) 66 | { 67 | RubberBandStretcher stretcher(sample_rate, channels, config, time_ratio, pitch_scale); 68 | 69 | stretcher.setExpectedInputDuration(samples); 70 | 71 | /* we have access to the whole input, so we can feed it directly into the stretcher */ 72 | stretcher.study(&intput, samples, true); 73 | stretcher.process(&intput, samples, true); /* Rubberband expect deinterleaved samples */ 74 | 75 | size_t per_channel_size = stretcher.available(); 76 | 77 | intnat ndims = (channels > 1) ? 2 : 1; 78 | intnat dims[ndims]; 79 | 80 | if (ndims == 1) 81 | dims[0] = static_cast(per_channel_size); 82 | else 83 | { 84 | dims[0] = static_cast(channels); /* we're going to get the data directly deinterleaved */ 85 | dims[1] = static_cast(per_channel_size); 86 | } 87 | 88 | /* memory is managed by OCaml */ 89 | output = caml_ba_alloc(CAML_BA_FLOAT32 | CAML_BA_C_LAYOUT, ndims, NULL, dims); 90 | 91 | size_t retrieved = stretcher.retrieve((float *const *)&Caml_ba_data_val(output), per_channel_size); 92 | if (retrieved != per_channel_size) 93 | { 94 | std::string error_msg = "Rubberband error: retrieved " + std::to_string(retrieved) + " samples, expected " + std::to_string(per_channel_size); 95 | return std::unexpected(error_msg); 96 | } 97 | 98 | return output; 99 | } 100 | 101 | } /* namespace Time */ 102 | } /* namespace Effects */ 103 | } /* namespace SoundML */ 104 | 105 | extern "C" 106 | { 107 | CAMLprim value caml_rubberband_stretch(value input, value params) 108 | { 109 | using namespace SoundML::Effects::Time; 110 | CAMLparam2(input, params); 111 | CAMLlocal1(output); 112 | 113 | size_t samples_val = Long_val(Field(params, 0)); 114 | size_t sample_rate_val = Long_val(Field(params, 1)); 115 | size_t channels_val = Long_val(Field(params, 2)); 116 | RubberBandStretcher::Options config_val = static_cast(Long_val(Field(params, 3))); 117 | double time_ratio_val = Double_val(Field(params, 4)); 118 | double pitch_scale_val = Double_val(Field(params, 5)); 119 | 120 | float *input_data = (float *)Caml_ba_data_val(input); 121 | 122 | auto result = offline_stretch(input_data, output, samples_val, sample_rate_val, channels_val, config_val, time_ratio_val, pitch_scale_val); 123 | 124 | if (!result.has_value()) 125 | { 126 | caml_failwith(result.error().c_str()); 127 | } 128 | 129 | CAMLreturn(result.value()); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/effects/time/time.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023-2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type engine = Faster | Finer 23 | 24 | let engine_to_int = function Faster -> 0x00000000 | Finer -> 0x20000000 25 | 26 | type transients = Crisp | Mixed | Smooth 27 | 28 | let transients_to_int = function 29 | | Crisp -> 30 | 0x00000000 31 | | Mixed -> 32 | 0x00000100 33 | | Smooth -> 34 | 0x00000200 35 | 36 | type detector = Compound | Percussive | Soft 37 | 38 | let detector_to_int = function 39 | | Compound -> 40 | 0x00000000 41 | | Percussive -> 42 | 0x00000400 43 | | Soft -> 44 | 0x00000800 45 | 46 | type phase = Laminar | Independent 47 | 48 | let phase_to_int = function Laminar -> 0x00000000 | Independent -> 0x00002000 49 | 50 | type threading = Auto | Never | Always 51 | 52 | let threading_to_int = function 53 | | Auto -> 54 | 0x00000000 55 | | Never -> 56 | 0x00010000 57 | | Always -> 58 | 0x00020000 59 | 60 | type window = Standard | Short | Long 61 | 62 | let window_to_int = function 63 | | Standard -> 64 | 0x00000000 65 | | Short -> 66 | 0x00100000 67 | | Long -> 68 | 0x00200000 69 | 70 | type smoothing = Off | On 71 | 72 | let smoothing_to_int = function Off -> 0x00000000 | On -> 0x00800000 73 | 74 | type formant = Shifted | Preserved 75 | 76 | let formant_to_int = function Shifted -> 0x00000000 | Preserved -> 0x01000000 77 | 78 | type pitch = HighSpeed | HighQuality | HighConsistency 79 | 80 | let pitch_to_int = function 81 | | HighSpeed -> 82 | 0x00000000 83 | | HighQuality -> 84 | 0x02000000 85 | | HighConsistency -> 86 | 0x04000000 87 | 88 | type channels = Apart | Together 89 | 90 | let channels_to_int = function Apart -> 0x00000000 | Together -> 0x10000000 91 | 92 | module Config = struct 93 | type t = 94 | { engine: engine 95 | ; transients: transients 96 | ; detector: detector 97 | ; phase: phase 98 | ; threading: threading 99 | ; window: window 100 | ; smoothing: smoothing 101 | ; formant: formant 102 | ; pitch: pitch 103 | ; channels: channels } 104 | 105 | let default : t = 106 | { engine= Faster 107 | ; transients= Crisp 108 | ; detector= Compound 109 | ; phase= Laminar 110 | ; threading= Auto 111 | ; window= Standard 112 | ; smoothing= Off 113 | ; formant= Shifted 114 | ; pitch= HighSpeed 115 | ; channels= Apart } 116 | 117 | let percussive : t = {default with window= Short; phase= Independent} 118 | 119 | let with_engine engine config = {config with engine} 120 | 121 | let with_transients transients config = {config with transients} 122 | 123 | let with_detector detector config = {config with detector} 124 | 125 | let with_phase phase config = {config with phase} 126 | 127 | let with_threading threading config = {config with threading} 128 | 129 | let with_window window config = {config with window} 130 | 131 | let with_smoothing smoothing config = {config with smoothing} 132 | 133 | let with_formant formant config = {config with formant} 134 | 135 | let with_pitch pitch config = {config with pitch} 136 | 137 | let with_channels channels config = {config with channels} 138 | 139 | let to_int (cfg : t) : int = 140 | 0 lor engine_to_int cfg.engine 141 | lor transients_to_int cfg.transients 142 | lor detector_to_int cfg.detector 143 | lor phase_to_int cfg.phase 144 | lor threading_to_int cfg.threading 145 | lor window_to_int cfg.window 146 | lor smoothing_to_int cfg.smoothing 147 | lor formant_to_int cfg.formant lor pitch_to_int cfg.pitch 148 | lor channels_to_int cfg.channels 149 | end 150 | 151 | external rubberband_stretch : 152 | (float, Bigarray.float32_elt) Audio.G.t 153 | -> int * int * int * int * float * float 154 | -> (float, Bigarray.float32_elt) Audio.G.t = "caml_rubberband_stretch" 155 | 156 | let to_float32 : type b. 157 | (float, b) Bigarray.kind 158 | -> (float, b) Audio.G.t 159 | -> (float, Bigarray.float32_elt) Audio.G.t = 160 | fun (kd : (float, b) Bigarray.kind) -> 161 | match kd with 162 | | Float32 -> 163 | Fun.id 164 | | Float64 -> 165 | Audio.G.cast_d2s 166 | | Float16 -> 167 | raise 168 | (Invalid_argument 169 | "Float16 elements kind aren't supported. The array kind must be \ 170 | either Float32 or Float64." ) 171 | 172 | let of_float32 : type b. 173 | (float, b) Bigarray.kind 174 | -> (float, Bigarray.float32_elt) Audio.G.t 175 | -> (float, b) Audio.G.t = 176 | fun (kd : (float, b) Bigarray.kind) -> 177 | match kd with 178 | | Float32 -> 179 | Fun.id 180 | | Float64 -> 181 | Audio.G.cast_s2d 182 | | Float16 -> 183 | raise 184 | (Invalid_argument 185 | "Float16 elements kind aren't supported. The array kind must be \ 186 | either Float32 or Float64." ) 187 | 188 | let time_stretch : type a. 189 | ?config:Config.t 190 | -> (float, a) Audio.G.t 191 | -> int 192 | -> float 193 | -> (float, a) Audio.G.t = 194 | fun ?(config : Config.t = Config.default) (x : (float, a) Audio.G.t) 195 | (sample_rate : int) (ratio : float) : (float, a) Audio.G.t -> 196 | if not (ratio > 0.) then invalid_arg "rate must be > 0." 197 | else 198 | let dshape = Audio.G.shape x in 199 | let channels = if Array.length dshape > 1 then dshape.(0) else 1 in 200 | let samples = if Array.length dshape > 1 then dshape.(1) else dshape.(0) in 201 | let config = Config.to_int config in 202 | let to_float32 = to_float32 (Audio.G.kind x) in 203 | let of_float32 = of_float32 (Audio.G.kind x) in 204 | of_float32 205 | (rubberband_stretch (to_float32 x) 206 | (samples, sample_rate, channels, config, ratio, 1.0) ) 207 | 208 | let pitch_shift : type a. 209 | ?config:Config.t 210 | -> ?bins_per_octave:int 211 | -> (float, a) Audio.G.t 212 | -> int 213 | -> int 214 | -> (float, a) Audio.G.t = 215 | fun ?(config : Config.t = Config.default) ?(bins_per_octave : int = 12) 216 | (x : (float, a) Audio.G.t) (sample_rate : int) (steps : int) : 217 | (float, a) Audio.G.t -> 218 | let bins_per_octave = Float.of_int bins_per_octave in 219 | let steps = Float.of_int steps in 220 | let scale = Float.pow 2.0 (steps /. bins_per_octave) in 221 | let dshape = Audio.G.shape x in 222 | let channels = if Array.length dshape > 1 then dshape.(0) else 1 in 223 | let samples = if Array.length dshape > 1 then dshape.(1) else dshape.(0) in 224 | let config = Config.to_int config in 225 | let to_float32 = to_float32 (Audio.G.kind x) in 226 | let of_float32 = of_float32 (Audio.G.kind x) in 227 | of_float32 228 | (rubberband_stretch (to_float32 x) 229 | (samples, sample_rate, channels, config, 1.0, scale) ) 230 | -------------------------------------------------------------------------------- /src/feature/dune: -------------------------------------------------------------------------------- 1 | (include_subdirs qualified) 2 | 3 | (library 4 | (name feature) 5 | (package soundml) 6 | (libraries audio utils window owl) 7 | (instrumentation 8 | (backend bisect_ppx)) 9 | (wrapped true)) 10 | -------------------------------------------------------------------------------- /src/feature/spectral/spectral.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | -------------------------------------------------------------------------------- /src/feature/spectral/spectral.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | -------------------------------------------------------------------------------- /src/filterbank.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type norm = Slaney | PNorm of float 23 | 24 | let mel ?(fmax : float option = None) ?(htk : bool = false) 25 | ?(norm : norm option = None) (kd : ('a, 'b) Bigarray.kind) 26 | (sample_rate : int) (nfft : int) (nmels : int) (fmin : float) = 27 | if nmels = 0 then Audio.G.empty kd [|0; (nfft / 2) + 1|] 28 | else 29 | let fmax = 30 | match fmax with 31 | | Some fmax -> 32 | fmax 33 | | None -> 34 | float_of_int sample_rate /. 2. 35 | in 36 | let fftfreqs = Utils.rfftfreq kd nfft (1. /. float_of_int sample_rate) in 37 | let mel_freqs = Utils.melfreq kd ~nmels:(nmels + 2) ~fmin ~fmax ~htk in 38 | let fdiff = Audio.G.diff mel_freqs in 39 | let ramps = Utils.outer Audio.G.sub mel_freqs fftfreqs in 40 | let open Audio.G in 41 | let lower = 42 | neg ramps.${[0; Int.sub nmels 1]} 43 | / reshape fdiff.${[0; Int.sub nmels 1]} [|nmels; 1|] 44 | in 45 | let upper = 46 | ramps.${[2; Int.add nmels 1]} / reshape fdiff.${[1; nmels]} [|nmels; 1|] 47 | in 48 | (* Intersect slopes *) 49 | let weights = max2 (zeros kd (shape lower)) (min2 lower upper) in 50 | let weights = 51 | match norm with 52 | | Some Slaney -> 53 | let enorm = 54 | 2.0 55 | $/ sub 56 | mel_freqs.${[2; Int.add nmels 1]} 57 | mel_freqs.${[0; Int.sub nmels 1]} 58 | in 59 | let enorm = reshape enorm [|nmels; 1|] in 60 | weights * enorm 61 | | Some (PNorm p) -> 62 | Audio.G.vecnorm ~p ~axis:(-1) weights 63 | | None -> 64 | weights 65 | in 66 | weights 67 | -------------------------------------------------------------------------------- /src/filterbank.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type norm = Slaney | PNorm of float 23 | 24 | val mel : 25 | ?fmax:float option 26 | -> ?htk:bool 27 | -> ?norm:norm option 28 | -> (float, 'b) Bigarray.kind 29 | -> int 30 | -> int 31 | -> int 32 | -> float 33 | -> (float, 'b) Owl_dense_ndarray.Generic.t 34 | (** 35 | [mel ?fmax ?htk ?norm sample_rate nfft nmels fmin] 36 | 37 | Returns a matrix of shape [nmels, nfft/2+1] containing the mel filterbank. *) 38 | -------------------------------------------------------------------------------- /src/io/cio/README.md: -------------------------------------------------------------------------------- 1 | ## SoundML IO C++ library 2 | 3 | This is the C/C++ code for SoundML IO library. It relies on C++23 (for `std::expected`). 4 | 5 | ### Dependencies 6 | 7 | - `sndfile`: for reading and writing audio files (and *very* soon streams) 8 | - `soxr`: for everything related to resampling 9 | 10 | ### General information 11 | 12 | Since we're working with `Owl` that uses the `Bigarray.c_layout` layout, we choose to maintain the interleaved layout for the audio data. This allows us to directly write the data into the `Bigarray` without having to deinterleave it. Thus, when reading an audio files that contains `n` channels and `m` samples per channel, the final shape of the `Bigarray` will be `(m, n)` instead of `(n, m)` as you may be used to using other well known libraries (like *librosa*). 13 | 14 | In this directory, you'll find the following files: 15 | 16 | - `common.hxx` : contains the common functions and types used by both the reader and the writer. 17 | - `read.hxx` : implements the needed `read`s functions. The file reading implementation is split between two classes: 18 | - `SoundML::IO::SndfileReader` : this is used when no resampling is needed. 19 | - `SoundML::IO::SoXrReader` : this is used when resampling is needed. It performs resampling while reading the file. Each read buffer is fed to the soxr resampler and the output is written directly to the `Bigarray` data pointer. 20 | - `write.hxx` implements a simple writing function that directly uses `sndfile` to write the data. 21 | 22 | Exceptions are used to handle the errors. To do so, in `common.hxx` we retreive (inside `raise_caml_exception`) the correct exception to raise in OCaml based on the `Error` provided. 23 | -------------------------------------------------------------------------------- /src/io/cio/common.hxx: -------------------------------------------------------------------------------- 1 | /*****************************************************************************/ 2 | /* */ 3 | /* */ 4 | /* Copyright (C) 2023-2025 */ 5 | /* Gabriel Santamaria */ 6 | /* */ 7 | /* */ 8 | /* Licensed under the Apache License, Version 2.0 (the "License"); */ 9 | /* you may not use this file except in compliance with the License. */ 10 | /* You may obtain a copy of the License at */ 11 | /* */ 12 | /* http://www.apache.org/licenses/LICENSE-2.0 */ 13 | /* */ 14 | /* Unless required by applicable law or agreed to in writing, software */ 15 | /* distributed under the License is distributed on an "AS IS" BASIS, */ 16 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ 17 | /* See the License for the specific language governing permissions and */ 18 | /* limitations under the License. */ 19 | /* */ 20 | /*****************************************************************************/ 21 | 22 | #define SOUNDML_BUFFER_SIZE 4096 23 | #include 24 | #include 25 | #include 26 | 27 | extern "C" /* OCaml imports */ 28 | { 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | } 37 | 38 | #ifndef SOUNDML_IO_COMMON_H 39 | #define SOUNDML_IO_COMMON_H 40 | namespace SoundML 41 | { 42 | namespace IO 43 | { 44 | typedef enum 45 | { 46 | SNDFILE_ERR, 47 | SOXR_ERR, 48 | SOUNDML_ERR 49 | } ErrorType; 50 | 51 | using Error = std::pair, ErrorType>; 52 | 53 | /** 54 | * @brief Little helper to get a string out of an error code 55 | * @param err The error code 56 | * @return A string with the error message 57 | */ 58 | std::string get_error_string(Error error) 59 | { 60 | std::variant err_code = error.first; 61 | ErrorType typ = error.second; 62 | switch (typ) 63 | { 64 | case SNDFILE_ERR: 65 | return std::string(sf_error_number(std::get(err_code))); 66 | case SOXR_ERR: 67 | return std::get(err_code); 68 | case SOUNDML_ERR: 69 | return std::get(err_code); 70 | default: 71 | break; 72 | } 73 | 74 | return std::string("Unknown error"); 75 | } 76 | 77 | /** 78 | * @brief Raise the correct OCaml exception from the given Error 79 | * @param error The error to raise 80 | * @param filename The name of the file that caused the error 81 | */ 82 | void raise_caml_exception(Error error, std::string filename) 83 | { 84 | ErrorType type = error.second; 85 | std::string error_string = SoundML::IO::get_error_string(error) + " in file " + filename; 86 | 87 | #define GET_EXN_TAG(name) \ 88 | (*caml_named_value(name)) 89 | 90 | if (type == SNDFILE_ERR) 91 | { 92 | int err_code = std::get(error.first); 93 | bool is_format_err = err_code == SF_ERR_UNRECOGNISED_FORMAT || err_code == SF_ERR_MALFORMED_FILE || err_code == SF_ERR_UNSUPPORTED_ENCODING; 94 | if (is_format_err) 95 | caml_raise_with_string(GET_EXN_TAG("soundml.exn.invalid_format"), error_string.c_str()); 96 | else /* it's SF_ERR_SYSTEM */ 97 | caml_raise_with_string(GET_EXN_TAG("soundml.exn.file_not_found"), error_string.c_str()); 98 | } 99 | else if (type == SOXR_ERR) 100 | caml_raise_with_string(GET_EXN_TAG("soundml.exn.resampling_error"), error_string.c_str()); 101 | else if (type == SOUNDML_ERR) 102 | caml_raise_with_string(GET_EXN_TAG("soundml.exn.internal_error"), error_string.c_str()); 103 | else 104 | caml_raise_with_string(GET_EXN_TAG("soundml.exn.internal_error"), "Unknown internal error."); 105 | #undef GET_EXN_TAG 106 | } 107 | 108 | /** 109 | * @brief Structure holding metadate related to an audio file 110 | * @param frames number of frames we read from the file 111 | * @param channels number of channels in the file 112 | * @param sample_rate sample-rate of the file (if a resampling has been asked, sample-rate equals the the asked sr) 113 | * @param padded_frames number of frames we padded with zeros 114 | * @param format format of the file (SF_FORMAT_* from libsndfile) 115 | */ 116 | struct AudioMetadata 117 | { 118 | sf_count_t frames; 119 | int channels; 120 | int sample_rate; 121 | sf_count_t padded_frames; 122 | int format; 123 | }; 124 | 125 | /** 126 | * @brief Enum that represents the resampling types 127 | * @note The SoX resampling types are defined in soxr.h 128 | */ 129 | typedef enum 130 | { 131 | RS_NONE = 0, /* No resampling */ 132 | RS_SOXR_QQ, /* 'Quick' cubic interpolation. */ 133 | RS_SOXR_LQ, /* 'Low' 16-bit with larger rolloff. */ 134 | RS_SOXR_MQ, /* 'Medium' 16-bit with medium rolloff. */ 135 | RS_SOXR_HQ, /* 'High quality'. */ 136 | RS_SOXR_VHQ, /* 'Very high quality'. */ 137 | /* TODO: implement these resamplers */ 138 | RS_SCR_LINEAR, 139 | RS_SINC_BEST_QUALITY, 140 | RS_SINC_MEDIUM_QUALITY, 141 | RS_SINC_FASTEST, 142 | RS_ZERO_ORDER_HOLD, 143 | RS_SRC_LINEAR 144 | } resampling_t; 145 | 146 | /** 147 | * @brief Get the (correct) SoX resampling type from the resampling_t enum 148 | * @param type The resampling type to convert 149 | * @return The SoX resampling type (SOXR_* from soxr.h) 150 | */ 151 | unsigned long get_recipe_type(resampling_t type) 152 | { 153 | 154 | switch (type) 155 | { 156 | case RS_SOXR_VHQ: 157 | return SOXR_VHQ; 158 | case RS_SOXR_HQ: 159 | return SOXR_HQ; 160 | case RS_SOXR_MQ: 161 | return SOXR_MQ; 162 | case RS_SOXR_LQ: 163 | return SOXR_LQ; 164 | default: 165 | return SOXR_VHQ; 166 | } 167 | } 168 | } /* namespace SoundML::IO */ 169 | } /* namespace SoundML */ 170 | 171 | #endif /* SOUNDML_IO_COMMON_H */ 172 | -------------------------------------------------------------------------------- /src/io/cio/write.hxx: -------------------------------------------------------------------------------- 1 | /*****************************************************************************/ 2 | /* */ 3 | /* */ 4 | /* Copyright (C) 2023-2025 */ 5 | /* Gabriel Santamaria */ 6 | /* */ 7 | /* */ 8 | /* Licensed under the Apache License, Version 2.0 (the "License"); */ 9 | /* you may not use this file except in compliance with the License. */ 10 | /* You may obtain a copy of the License at */ 11 | /* */ 12 | /* http://www.apache.org/licenses/LICENSE-2.0 */ 13 | /* */ 14 | /* Unless required by applicable law or agreed to in writing, software */ 15 | /* distributed under the License is distributed on an "AS IS" BASIS, */ 16 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ 17 | /* See the License for the specific language governing permissions and */ 18 | /* limitations under the License. */ 19 | /* */ 20 | /*****************************************************************************/ 21 | 22 | #ifndef SOUNDML_WRITER_H 23 | #define SOUNDML_WRITER_H 24 | 25 | #include "common.hxx" 26 | 27 | namespace SoundML 28 | { 29 | namespace IO 30 | { 31 | 32 | class AudioWriter 33 | { 34 | private: 35 | sf_count_t nframes; 36 | 37 | public: 38 | AudioWriter(sf_count_t nframes) 39 | : nframes(nframes) {} 40 | 41 | /** 42 | * @brief Writes the given audio data to the file 43 | * @param sndfile The SndfileHandle to write to 44 | * @param data Pointer to the data to write 45 | * @return An std::expected containing the error code on failure 46 | */ 47 | template 48 | std::expected write(SndfileHandle &sndfile, const T *data) 49 | { 50 | caml_release_runtime_system(); 51 | 52 | sf_count_t written = sndfile.writef(data, nframes); 53 | 54 | caml_acquire_runtime_system(); 55 | 56 | if (written != nframes) 57 | { 58 | int err = sndfile.error() ? sndfile.error() : SF_ERR_SYSTEM; 59 | return std::unexpected(Error(err, SNDFILE_ERR)); 60 | } 61 | 62 | return std::expected{}; 63 | } 64 | }; 65 | } /* namespace SoundML::IO */ 66 | } /* namespace SoundML */ 67 | 68 | /** 69 | * @brief Writes the given audio data to the file 70 | * @param filename The name of the file to write to. 71 | * @param ba_data The data to write (Bigarray). 72 | * @param metadata The metadata of the audio data (number of frames, sample rate, channels, format). 73 | * @tparam T The type of the audio data (float or double). 74 | * 75 | * @return An std::expected containing the error code on failure 76 | */ 77 | template 78 | value caml_write_audio_file(value filename, value ba_data, value metadata) 79 | { 80 | using namespace SoundML::IO; 81 | std::string filename_str = String_val(filename); 82 | sf_count_t nframes_val = Long_val(Field(metadata, 0)); 83 | int sample_rate_val = Long_val(Field(metadata, 1)); 84 | int channels_val = Long_val(Field(metadata, 2)); 85 | int format_val = Long_val(Field(metadata, 3)); 86 | 87 | SndfileHandle sndfile(filename_str, SFM_WRITE, format_val, channels_val, sample_rate_val); 88 | if (int err = sndfile.error(); err) 89 | raise_caml_exception(Error(err, SNDFILE_ERR), filename_str); 90 | 91 | AudioWriter writer(nframes_val); 92 | T *data = (T *)Caml_ba_data_val(ba_data); 93 | 94 | auto result = writer.write(sndfile, data); 95 | if (result.has_value()) 96 | return Val_unit; 97 | else 98 | { 99 | Error err = result.error(); 100 | raise_caml_exception(err, filename_str); 101 | } 102 | 103 | return Val_unit; 104 | } 105 | 106 | #endif /* SOUNDML_WRITER_H */ 107 | -------------------------------------------------------------------------------- /src/io/config/discover.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | module C = Configurator.V1 23 | 24 | let () = 25 | C.main ~name:"sndfile-pkg-config" (fun c -> 26 | let default : C.Pkg_config.package_conf = 27 | {libs= ["-lsndfile"; "-lsoxr"]; cflags= []} 28 | in 29 | let conf = 30 | match C.Pkg_config.get c with 31 | | None -> 32 | default 33 | | Some pc -> ( 34 | match C.Pkg_config.query pc ~package:"sndfile soxr" with 35 | | None -> 36 | default 37 | | Some deps -> 38 | deps ) 39 | in 40 | C.Flags.write_sexp "c_flags.sexp" conf.cflags ; 41 | C.Flags.write_sexp "c_library_flags.sexp" conf.libs ) 42 | -------------------------------------------------------------------------------- /src/io/config/dune: -------------------------------------------------------------------------------- 1 | (include_subdirs no) 2 | 3 | (executable 4 | (name discover) 5 | (libraries dune.configurator)) 6 | -------------------------------------------------------------------------------- /src/io/dune: -------------------------------------------------------------------------------- 1 | (include_subdirs qualified) 2 | 3 | (library 4 | (name io) 5 | (package soundml) 6 | (foreign_stubs 7 | (language cxx) 8 | (names soundml_io) 9 | (include_dirs cio) 10 | (flags 11 | :standard 12 | (:include c_flags.sexp) 13 | -O3 14 | -std=c++23 15 | -llto)) 16 | (c_library_flags 17 | (:include c_library_flags.sexp)) 18 | (modules io) 19 | (libraries audio utils owl) 20 | (instrumentation 21 | (backend bisect_ppx)) 22 | (wrapped true)) 23 | 24 | (rule 25 | (targets c_flags.sexp c_library_flags.sexp) 26 | (action 27 | (run ./config/discover.exe))) 28 | -------------------------------------------------------------------------------- /src/io/io.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023-2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Audio 23 | open Bigarray 24 | 25 | exception File_not_found of string 26 | 27 | exception Invalid_format of string 28 | 29 | exception Resampling_error of string 30 | 31 | exception Internal_error of string 32 | 33 | let _ = 34 | Callback.register_exception "soundml.exn.file_not_found" 35 | (File_not_found "file.wav") 36 | 37 | let _ = 38 | Callback.register_exception "soundml.exn.invalid_format" 39 | (Invalid_format "invalid format") 40 | 41 | let _ = 42 | Callback.register_exception "soundml.exn.resampling_error" 43 | (Resampling_error "error") 44 | 45 | let _ = 46 | Callback.register_exception "soundml.exn.internal_error" 47 | (Internal_error "internal error") 48 | 49 | type resampling_t = NONE | SOXR_QQ | SOXR_LQ | SOXR_MQ | SOXR_HQ | SOXR_VHQ 50 | 51 | (* nframes * channels * sample_rate * format *) 52 | type metadata = int * int * int * int 53 | 54 | external caml_read_audio_file_f32 : 55 | string 56 | -> resampling_t 57 | -> int 58 | -> (float, Bigarray.float32_elt) Audio.G.t * metadata 59 | = "caml_read_audio_file_f32" 60 | 61 | external caml_read_audio_file_f64 : 62 | string 63 | -> resampling_t 64 | -> int 65 | -> (float, Bigarray.float64_elt) Audio.G.t * metadata 66 | = "caml_read_audio_file_f64" 67 | 68 | let to_mono (x : (float, 'a) G.t) = 69 | if G.num_dims x > 1 then G.mean ~axis:1 ~keep_dims:false x else x 70 | 71 | let read : type a. 72 | ?res_typ:resampling_t 73 | -> ?sample_rate:int 74 | -> ?mono:bool 75 | -> (float, a) kind 76 | -> string 77 | -> a audio = 78 | fun ?(res_typ : resampling_t = SOXR_HQ) ?(sample_rate : int = 22050) 79 | ?(mono : bool = true) typ (filename : string) -> 80 | let read_func : type a. 81 | (float, a) kind 82 | -> string 83 | -> resampling_t 84 | -> int 85 | -> (float, a) G.t * metadata = 86 | fun typ -> 87 | match typ with 88 | | Float32 -> 89 | caml_read_audio_file_f32 90 | | Float64 -> 91 | caml_read_audio_file_f64 92 | | Float16 -> 93 | raise 94 | (Invalid_argument 95 | "Float16 elements kind aren't supported. The array kind must be \ 96 | either Float32 or Float64." ) 97 | in 98 | let data, meta = read_func typ filename res_typ sample_rate in 99 | let dshape = Audio.G.shape data in 100 | let nsamples = dshape.(0) in 101 | let data = if mono then to_mono data else data in 102 | let frames, channels, sample_rate, format = meta in 103 | let data = 104 | match (res_typ, frames, nsamples) with 105 | | NONE, real, pred -> 106 | if real = pred then data else Audio.G.sub_left data 0 real 107 | | _ -> 108 | data 109 | in 110 | let channels = if mono then 1 else channels in 111 | let format = 112 | match Aformat.of_int format with 113 | | Ok fmt -> 114 | fmt 115 | | Error e -> 116 | raise (Invalid_format e) 117 | in 118 | let meta = 119 | Metadata.create ~name:filename frames channels sample_rate format 120 | in 121 | let data = Audio.G.transpose data in 122 | Audio.create meta data 123 | 124 | external caml_write_audio_file_f32 : 125 | string 126 | -> (float, Bigarray.float32_elt) Audio.G.t 127 | -> int * int * int * int 128 | -> unit = "caml_write_audio_file_f32" 129 | 130 | external caml_write_audio_file_f64 : 131 | string 132 | -> (float, Bigarray.float64_elt) Audio.G.t 133 | -> int * int * int * int 134 | -> unit = "caml_write_audio_file_f64" 135 | 136 | let write : type a. 137 | ?format:Aformat.t -> string -> (float, a) Audio.G.t -> int -> unit = 138 | fun ?format (filename : string) (x : (float, a) Audio.G.t) sample_rate -> 139 | let format = 140 | if format = None then 141 | match Aformat.of_ext (Filename.extension filename) with 142 | | Ok fmt -> 143 | fmt 144 | | Error e -> 145 | raise (Invalid_format e) 146 | else Option.get format 147 | in 148 | let format = Aformat.to_int format in 149 | let data = Audio.G.transpose x in 150 | let dshape = Audio.G.shape data in 151 | let nframes = dshape.(0) in 152 | let channels = if Array.length dshape > 1 then dshape.(1) else 1 in 153 | (* we get back our interleaved format *) 154 | match Audio.G.kind data with 155 | | Float32 -> 156 | caml_write_audio_file_f32 filename data 157 | (nframes, sample_rate, channels, format) 158 | | Float64 -> 159 | caml_write_audio_file_f64 filename data 160 | (nframes, sample_rate, channels, format) 161 | | _ -> 162 | raise 163 | (Invalid_argument 164 | "Float16 elements kind aren't supported. The array kind must be \ 165 | either Float32 or Float64." ) 166 | -------------------------------------------------------------------------------- /src/io/io.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023-2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | (** 23 | The {!Io} (in/out) module is the entry point for reading and writing audio 24 | data from and to the filesystem. It supports resampling via the {{:https://github.com/chirlu/soxr}SoXr} library. *) 25 | 26 | open Audio 27 | open Bigarray 28 | 29 | (** Thrown when a requested file cannot be found on the system. *) 30 | exception File_not_found of string 31 | 32 | (** Thrown when the file we're trying to read is encoded in an invalid format, or when the format we're trying to write isn't supported. *) 33 | exception Invalid_format of string 34 | 35 | (** Thrown when an error occurred while resampling. *) 36 | exception Resampling_error of string 37 | 38 | (** Thrown when an internal error occurred. This is should not happen, so please report it. *) 39 | exception Internal_error of string 40 | 41 | (** The resampling method to use. The default is [SOXR_HQ]. *) 42 | type resampling_t = 43 | | NONE (** Indicates that no resampling is requested *) 44 | | SOXR_QQ (** 'Quick' cubic interpolation. *) 45 | | SOXR_LQ (** 'Low' 16-bit with larger rolloff. *) 46 | | SOXR_MQ (** 'Medium' 16-bit with medium rolloff. *) 47 | | SOXR_HQ (** 'High quality'. *) 48 | | SOXR_VHQ (** 'Very high quality'. *) 49 | 50 | val read : 51 | 'a. 52 | ?res_typ:resampling_t 53 | -> ?sample_rate:int 54 | -> ?mono:bool 55 | -> (float, 'a) kind 56 | -> string 57 | -> 'a audio 58 | (** 59 | [read ?res_typ ?sample_rate ?fix kind filename] reads an audio file and returns an [audio]. 60 | 61 | @return an [audio] type that contains the audio data read from the file. The type of the audio's data is determined by the [kind] parameter. 62 | 63 | {2 Parameters} 64 | @param ?res_typ is the resampling method to use. The default is [SOXR_HQ]. If [NONE] is used, [?sample_rate] is ignored and no resampling will be done. 65 | @param ?sample_rate is the target sample rate to use when reading the file. Default is 22050 Hz. 66 | @param ?mono is a boolean that indicates if we want to convert to a mono audio. Default is [true]. 67 | @param kind is the format of audio data to read. It can be either [Bigarray.Float32] or [Bigarray.Float64]. 68 | @param filename is the path to the file to read audio from. 69 | 70 | @raise File_not_found If the file does not exist. 71 | @raise Invalid_format If the file is not a valid audio file. 72 | @raise Resampling_error If the resampling fails. 73 | @raise Internal_error If an internal error occurs. 74 | 75 | {2 Usage} 76 | Reading audio is straightfoward. Simply specify the path to the file you want to read. 77 | 78 | {[ 79 | open Soundml 80 | (* This will read the file.wav audio into a Float32 bigarray, resampled using SOXR_HQ at 22050Hz. *) 81 | let audio = Io.read Bigarray.Float32 "path/to/file.wav" 82 | ]} 83 | 84 | {2 Supported formats} 85 | 86 | SoundML relies on {{:https://libsndfile.github.io/libsndfile/}libsndfile} to read audio files. Full detail on the supported formats are available 87 | on the official sndfile's website: {{:https://libsndfile.github.io/libsndfile/formats.html}Supported formats} and in the {!Audio.Aformat} module. *) 88 | 89 | val write : 'a. ?format:Aformat.t -> string -> (float, 'a) G.t -> int -> unit 90 | (** 91 | [write ?format filename data sample_reat] writes an audio file to the filesystem. 92 | 93 | {2 Parameters} 94 | @param ?format is the format to use when writing the file. If not specified, the format is determined by the file extension by {!Aformat.of_ext}. 95 | @param filename is the path to the file to write audio to. 96 | @param data is the audio data to write. It can be either a [Bigarray.Float32] or [Bigarray.Float64]. 97 | @param sample_rate is the sample rate of the audio data. 98 | 99 | 100 | @raise Invalid_format If the file is not a valid audio file. 101 | @raise Internal_error If an internal error occurs. 102 | 103 | 104 | {2 Usage} 105 | Writing audio is as straightfoward as reading it. Simply specify the path to the file you want to write. 106 | 107 | {[ 108 | open Soundml 109 | open Audio 110 | let audio = Io.read Bigarray.Float32 "path/to/file.mp3" in 111 | Io.write "path/to/file.wav" (data audio) 22050 (* we'll automatically detect that you want to write to the WAV format *) 112 | ]} 113 | 114 | {2 Supported formats} 115 | 116 | SoundML relies on {{:https://libsndfile.github.io/libsndfile/}libsndfile} to read audio files. Full detail on the supported formats are available 117 | on the official sndfile's website: {{:https://libsndfile.github.io/libsndfile/formats.html}Supported formats} and in the {!Audio.Aformat} module. *) 118 | -------------------------------------------------------------------------------- /src/io/soundml_io.cxx: -------------------------------------------------------------------------------- 1 | /*****************************************************************************/ 2 | /* */ 3 | /* */ 4 | /* Copyright (C) 2023-2025 */ 5 | /* Gabriel Santamaria */ 6 | /* */ 7 | /* */ 8 | /* Licensed under the Apache License, Version 2.0 (the "License"); */ 9 | /* you may not use this file except in compliance with the License. */ 10 | /* You may obtain a copy of the License at */ 11 | /* */ 12 | /* http://www.apache.org/licenses/LICENSE-2.0 */ 13 | /* */ 14 | /* Unless required by applicable law or agreed to in writing, software */ 15 | /* distributed under the License is distributed on an "AS IS" BASIS, */ 16 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ 17 | /* See the License for the specific language governing permissions and */ 18 | /* limitations under the License. */ 19 | /* */ 20 | /*****************************************************************************/ 21 | 22 | #include "read.hxx" 23 | #include "write.hxx" 24 | 25 | extern "C" 26 | { 27 | #include 28 | #include 29 | 30 | CAMLprim value caml_read_audio_file_f32(value filename, value res_typ, value trgt_sr) 31 | { 32 | CAMLparam3(filename, res_typ, trgt_sr); 33 | CAMLreturn(caml_read_audio_file(filename, res_typ, trgt_sr)); 34 | } 35 | 36 | CAMLprim value caml_read_audio_file_f64(value filename, value res_typ, value trgt_sr) 37 | { 38 | CAMLparam3(filename, res_typ, trgt_sr); 39 | CAMLreturn(caml_read_audio_file(filename, res_typ, trgt_sr)); 40 | } 41 | 42 | CAMLprim value caml_write_audio_file_f32(value filename, value ba_data, value metadata) 43 | { 44 | CAMLparam3(filename, ba_data, metadata); 45 | CAMLreturn(caml_write_audio_file(filename, ba_data, metadata)); 46 | } 47 | 48 | CAMLprim value caml_write_audio_file_f64(value filename, value ba_data, value metadata) 49 | { 50 | CAMLparam3(filename, ba_data, metadata); 51 | CAMLreturn(caml_write_audio_file(filename, ba_data, metadata)); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/soundml.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | module Audio = Audio 23 | module Io = Io 24 | module Types = Types 25 | module Transform = Transform 26 | module Window = Window 27 | module Feature = Feature 28 | module Filterbank = Filterbank 29 | module Effects = Effects 30 | module Utils = Utils 31 | -------------------------------------------------------------------------------- /src/transform.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Bigarray 23 | open Types 24 | 25 | module Config = struct 26 | type t = 27 | { n_fft: int 28 | ; hop_size: int 29 | ; win_length: int 30 | ; window: Window.window 31 | ; center: bool } 32 | 33 | let default = 34 | { n_fft= 2048 35 | ; hop_size= 512 36 | ; win_length= 2048 37 | ; window= `Hanning 38 | ; center= true } 39 | end 40 | 41 | module G = Owl.Dense.Ndarray.Generic 42 | 43 | let to_complex (x : float) : Complex.t = Complex.{re= x; im= 0.} 44 | 45 | let stft : type a b. 46 | ?config:Config.t -> (a, b) precision -> (float, a) G.t -> (Complex.t, b) G.t 47 | = 48 | fun ?(config : Config.t = Config.default) p (x : (float, a) G.t) -> 49 | let kd : (Complex.t, b) kind = 50 | match p with B32 -> Complex32 | B64 -> Complex64 51 | in 52 | let window = (Window.get config.window p ~fftbins:true) config.win_length in 53 | let out_shape = 54 | [| (config.n_fft / 2) + 1 55 | ; ((G.numel x - config.win_length) / config.hop_size) + 1 |] 56 | in 57 | let spectrum = Audio.G.create kd out_shape Complex.zero in 58 | let ym = Audio.G.zeros kd [|config.n_fft; 1|] in 59 | for m = 0 to out_shape.(1) - 1 do 60 | Audio.G.fill ym Complex.zero ; 61 | for p = 0 to config.win_length - 1 do 62 | Audio.G.( 63 | ym.%{p; 0} <- 64 | to_complex 65 | @@ Float.mul 66 | (get x [|Int.(add p (mul m config.hop_size))|]) 67 | window.%{p} ) 68 | done ; 69 | let ym_fft = Owl.Fft.Generic.fft ~axis:0 ym in 70 | let spectrum_slice = 71 | Audio.G.get_slice [[0; out_shape.(0) - 1]; [0]] ym_fft 72 | in 73 | Audio.G.set_slice_ ~out:spectrum [[]; [m]] spectrum spectrum_slice 74 | done ; 75 | spectrum 76 | -------------------------------------------------------------------------------- /src/transform.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Types 23 | 24 | module Config : sig 25 | type t = 26 | { n_fft: int 27 | ; hop_size: int 28 | ; win_length: int 29 | ; window: Window.window 30 | ; center: bool } 31 | 32 | val default : t 33 | end 34 | 35 | val stft : 36 | 'a 'b. 37 | ?config:Config.t 38 | -> ('a, 'b) precision 39 | -> (float, 'a) Audio.G.t 40 | -> (Complex.t, 'b) Audio.G.t 41 | -------------------------------------------------------------------------------- /src/types.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type (_, _) precision = 23 | | B32 : (Bigarray.float32_elt, Bigarray.complex32_elt) precision 24 | | B64 : (Bigarray.float64_elt, Bigarray.complex64_elt) precision 25 | -------------------------------------------------------------------------------- /src/types.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type (_, _) precision = 23 | | B32 : (Bigarray.float32_elt, Bigarray.complex32_elt) precision 24 | | B64 : (Bigarray.float64_elt, Bigarray.complex64_elt) precision 25 | -------------------------------------------------------------------------------- /src/utils.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | (** 23 | Utility conversion module. *) 24 | module Convert : sig 25 | val mel_to_hz : 26 | ?htk:bool 27 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t 28 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t 29 | (** Converts mel-scale values to Hz. *) 30 | 31 | val hz_to_mel : 32 | ?htk:bool 33 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t 34 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t 35 | (** Reverse function of {!mel_to_hz}. *) 36 | 37 | type reference = 38 | | RefFloat of float 39 | | RefFunction of ((float, Bigarray.float32_elt) Audio.G.t -> float) 40 | 41 | val power_to_db : 42 | ?amin:float 43 | -> ?top_db:float option 44 | -> reference 45 | -> (float, Bigarray.float32_elt) Owl_dense_ndarray.Generic.t 46 | -> (float, Bigarray.float32_elt) Owl_dense_ndarray_generic.t 47 | 48 | val db_to_power : 49 | ?amin:float 50 | -> reference 51 | -> (float, Bigarray.float32_elt) Owl.Dense.Ndarray.Generic.t 52 | -> (float, Bigarray.float32_elt) Owl.Dense.Ndarray.Generic.t 53 | end 54 | 55 | val pad_center : 56 | ('a, 'b) Owl.Dense.Ndarray.Generic.t 57 | -> int 58 | -> 'a 59 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t 60 | (** 61 | Pads a ndarray such that *) 62 | 63 | val frame : 64 | ('a, 'b) Owl_dense_ndarray.Generic.t 65 | -> int 66 | -> int 67 | -> int 68 | -> ('a, 'b) Owl_dense_ndarray.Generic.t 69 | 70 | val fftfreq : 71 | int -> float -> (float, Bigarray.float32_elt) Owl.Dense.Ndarray.Generic.t 72 | (** 73 | Implementation of the Numpy's fftfreq function. 74 | See {{:https://numpy.org/doc/stable/reference/generated/numpy.fft.fftfreq.html}numpy.fft.fftfreq} for more information. *) 75 | 76 | val rfftfreq : 77 | (float, 'b) Bigarray.kind 78 | -> int 79 | -> float 80 | -> (float, 'b) Owl.Dense.Ndarray.Generic.t 81 | (** 82 | Implementation of the Numpy's rfftfreq function. 83 | See {{:https://numpy.org/doc/stable/reference/generated/numpy.fft.rfftfreq.html}numpy.fft.rfftfreq} for more information. *) 84 | 85 | val melfreq : 86 | ?nmels:int 87 | -> ?fmin:float 88 | -> ?fmax:float 89 | -> ?htk:bool 90 | -> (float, 'b) Bigarray.kind 91 | -> (float, 'b) Owl.Dense.Ndarray.Generic.t 92 | (** 93 | Implementation of librosa's mel_frequencies. Compute an [Owl.Dense.Ndarray] of acoustic frequencies tuned to the mel scale. 94 | See: {{:https://librosa.org/doc/main/generated/librosa.mel_frequencies.html}librosa.mel_frequencies} for more information. *) 95 | 96 | val roll : 97 | ('a, 'b) Owl.Dense.Ndarray.Generic.t 98 | -> int 99 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t 100 | (** 101 | Implementation of the Numpy's roll function on the 0th axis of the given ndarray. 102 | This function is used to shift elements of an array inside the library and is exposed 103 | as it can be sometimes usefull. 104 | 105 | This function returns a copy of the given ndarray. 106 | 107 | See {{:https://numpy.org/doc/stable/reference/generated/numpy.roll.html}numpy.roll} for more information. *) 108 | 109 | val cov : ?b:('a, 'b) Audio.G.t -> a:('a, 'b) Audio.G.t -> ('a, 'b) Audio.G.t 110 | (** 111 | (re)Implementation of the matrix covariance function from Owl. 112 | 113 | Note: this is temporary and done only because Owl doesn't export any 114 | cov function for the [Ndarray] module on which [Audio.G] is based. This function is 115 | likely to be deleted when Owl library will export such a cov function for n-dimensional arrays. *) 116 | 117 | val unwrap : 118 | ?discont:float option 119 | -> ?axis:int 120 | -> ?period:float 121 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t 122 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t 123 | (** 124 | Implementation of the Numpy's unwrap function. 125 | See {{:https://numpy.org/doc/stable/reference/generated/numpy.unwrap.html}numpy.unwrap} for more information. *) 126 | 127 | val outer : 128 | ( ('a, 'b) Owl.Dense.Ndarray.Generic.t 129 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t 130 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t ) 131 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t 132 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t 133 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t 134 | (** 135 | Generalized outer product of any given operator that supports broadcasting (basically all the common Owl's Ndarray operators.) *) 136 | -------------------------------------------------------------------------------- /src/window.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Types 23 | 24 | type window = [`Hanning | `Hamming | `Blackman | `Boxcar] 25 | 26 | let kind_of_precision : type a b. (a, b) precision -> (float, a) Bigarray.kind = 27 | fun prec -> match prec with B32 -> Bigarray.Float32 | B64 -> Bigarray.Float64 28 | 29 | let cosine_sum ?(fftbins = false) (prec : ('a, 'b) precision) (a : float array) 30 | m = 31 | let kd = kind_of_precision prec in 32 | if m < 0 then invalid_arg "Window length M must be a non-negative integer" 33 | else if m = 0 then Audio.G.empty kd [|0|] 34 | else if m = 1 then Audio.G.ones kd [|1|] 35 | else 36 | let sym = not fftbins in 37 | let m_extended, needs_trunc = 38 | if not sym then (m + 1, true) else (m, false) 39 | in 40 | let fac = Audio.G.linspace kd (-.Owl_const.pi) Owl_const.pi m_extended in 41 | let w = Audio.G.zeros kd [|m_extended|] in 42 | Array.iteri 43 | (fun k coeff_val -> 44 | if coeff_val <> 0.0 then 45 | let term = 46 | if k = 0 then Audio.G.create kd [|m_extended|] coeff_val 47 | else 48 | let k_float = float_of_int k in 49 | let cos_args = Audio.G.mul_scalar fac k_float in 50 | let cos_terms = Audio.G.cos cos_args in 51 | Audio.G.mul_scalar cos_terms coeff_val 52 | in 53 | Audio.G.add_ ~out:w w term ) 54 | a ; 55 | if needs_trunc then Audio.G.get_slice [[0; m - 1]] w else w 56 | 57 | let hanning ?(fftbins = false) (prec : ('a, 'b) precision) m = 58 | cosine_sum ~fftbins prec [|0.5; 1. -. 0.5|] m 59 | 60 | let hamming ?(fftbins = false) (prec : ('a, 'b) precision) m = 61 | cosine_sum ~fftbins prec [|0.54; 1. -. 0.54|] m 62 | 63 | let blackman ?(fftbins = false) (prec : ('a, 'b) precision) m = 64 | cosine_sum ~fftbins prec [|0.42; 0.5; 0.08|] m 65 | 66 | let boxcar ?(fftbins = false) (prec : ('a, 'b) precision) (size : int) : 67 | (float, 'a) Audio.G.t = 68 | let kd = kind_of_precision prec in 69 | if size < 0 then failwith "Window length M must be non-negative" 70 | else if size = 0 then Audio.G.empty kd [|0|] 71 | else Audio.G.ones kd [|size|] 72 | [@@warning "-27"] 73 | 74 | let get (typ : window) (prec : ('a, 'b) precision) : 75 | ?fftbins:bool -> int -> (float, 'a) Audio.G.t = 76 | fun ?fftbins size -> 77 | match typ with 78 | | `Hanning -> 79 | hanning ?fftbins prec size 80 | | `Hamming -> 81 | hamming ?fftbins prec size 82 | | `Blackman -> 83 | blackman ?fftbins prec size 84 | | `Boxcar -> 85 | boxcar ?fftbins prec size 86 | -------------------------------------------------------------------------------- /src/window.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2023 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Types 23 | 24 | (** {1 Window Functions} 25 | 26 | This module provides a few commonly used window functions. *) 27 | 28 | (** The type of window functions. *) 29 | type window = [`Hanning | `Hamming | `Blackman | `Boxcar] 30 | 31 | val get : 32 | window 33 | -> ('a, 'b) precision 34 | -> ?fftbins:bool 35 | -> int 36 | -> (float, 'a) Owl_dense_ndarray.Generic.t 37 | (** 38 | [get window precision n] generates a window of size [n] using the given window function type. 39 | 40 | {2 Parameters} 41 | 42 | @param window The type of window to generate. 43 | @param precision The precision of the Bigarray elements. 44 | @param n The size of the window to generate. The size of the window must be greater than 0. 45 | 46 | @raise Invalid_argument if [n] is less than or equal to 0. *) 47 | 48 | val cosine_sum : 49 | ?fftbins:bool 50 | -> ('a, 'b) precision 51 | -> float array 52 | -> int 53 | -> (float, 'a) Owl_dense_ndarray.Generic.t 54 | (** 55 | [cosine_sum precision coeffs n] generates a cosine-sum window of size [n] using the given coefficients. 56 | 57 | {2 Parameters} 58 | @param precision The precision of the Bigarray elements. 59 | @param coeffs The coefficients of the cosine-sum window. The length of the coefficients array must be greater than 0. 60 | @param n The size of the window to generate. The size of the window must be greater than 0. 61 | 62 | {2 Raises} 63 | @raise Invalid_argument if [n] is less than or equal to 0. 64 | @raise Invalid_argument if the length of [coeffs] is less than 1. 65 | 66 | {2 References} 67 | @see https://en.wikipedia.org/wiki/Window_function#Cosine-sum_windows 68 | @see https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.windows.general_cosine.html *) 69 | 70 | val hanning : 71 | ?fftbins:bool 72 | -> ('a, 'b) precision 73 | -> int 74 | -> (float, 'a) Owl_dense_ndarray.Generic.t 75 | (** 76 | [hanning precision n] generates a Hanning window of size [n]. 77 | 78 | {2 Parameters} 79 | 80 | @param precision The precision of the Bigarray elements. 81 | @param n The size of the window to generate. The size of the window must be greater than 0. 82 | 83 | @raise Invalid_argument if [n] is less than or equal to 0. *) 84 | 85 | val hamming : 86 | ?fftbins:bool 87 | -> ('a, 'b) precision 88 | -> int 89 | -> (float, 'a) Owl_dense_ndarray.Generic.t 90 | (** 91 | [hamming precision n] generates a Hamming window of size [n]. 92 | 93 | {2 Parameters} 94 | 95 | @param precision The precision of the Bigarray elements. 96 | @param n The size of the window to generate. The size of the window must be greater than 0. 97 | 98 | @raise Invalid_argument if [n] is less than or equal to 0. *) 99 | 100 | val blackman : 101 | ?fftbins:bool 102 | -> ('a, 'b) precision 103 | -> int 104 | -> (float, 'a) Owl_dense_ndarray.Generic.t 105 | (** 106 | [blackman precision n] generates a Blackman window of size [n]. 107 | 108 | {2 Parameters} 109 | 110 | @param precision The precision of the Bigarray elements. 111 | @param n The size of the window to generate. The size of the window must be greater than 0. 112 | 113 | @raise Invalid_argument if [n] is less than or equal to 0. *) 114 | 115 | val boxcar : 116 | ?fftbins:bool 117 | -> ('a, 'b) precision 118 | -> int 119 | -> (float, 'a) Owl_dense_ndarray.Generic.t 120 | (** 121 | [boxcar precision n] generates a Rectangular window of size [n]. 122 | 123 | {2 Parameters} 124 | 125 | @param precision The precision of the Bigarray elements. 126 | @param n The size of the window to generate. The size of the window must be greater than 0. 127 | 128 | @raise Invalid_argument if [n] is less than or equal to 0. *) 129 | -------------------------------------------------------------------------------- /test/README.md: -------------------------------------------------------------------------------- 1 | ## About SoundML Testing 2 | 3 | We choose [librosa](https://librosa.org/) as the main reference implementation for the algorithms inside SoundML. Thus, the tests are based on results obtained from librosa runs on a few audio source files either found on [freesound.org](https://freesound.org/), [freemusicarchive.org](https://freemusicarchive.org) or generated by us using FFmpeg. 4 | 5 | The Python script `generate_vectors.py` is used to generate a set of vectors directly from running librosa algorithms onto the audio files present in the `test/audio` directory. Once generated, these vectors are stored in the `test/vectors` directory and are used by `Alcotest` to compare the results of SoundML with the ones from librosa. -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (data_only_dirs vectors audio) 2 | 3 | (tests 4 | (names 5 | test_aformat 6 | test_write 7 | test_utils 8 | test_time 9 | test_window 10 | test_filter 11 | test_filterbank) 12 | (libraries alcotest soundml) 13 | (action 14 | (run %{test}))) 15 | 16 | (tests 17 | (names test_stft test_timeseries) 18 | (libraries alcotest yojson soundml) 19 | (package soundml) 20 | (deps 21 | generate_vectors.py 22 | generate_audio.sh 23 | (source_tree audio)) 24 | (action 25 | (progn 26 | (system "mkdir -p audio") 27 | (system "./generate_audio.sh") 28 | (system "mkdir -p vectors") 29 | (system "python3 %{dep:generate_vectors.py}") 30 | (run %{test})))) 31 | -------------------------------------------------------------------------------- /test/generate_audio.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | OUTPUT_DIR="$(pwd)/audio" 4 | 5 | declare -A AUDIO_FORMATS=( 6 | ["wav"]="pcm_s16le" 7 | ["aif"]="pcm_s16be" 8 | ["flac"]="flac" 9 | ["mp3"]="libmp3lame" 10 | ) 11 | 12 | SAMPLE_RATES=(8000 44100) 13 | DURATION=1 14 | 15 | declare -A SOURCES=( 16 | ["clipping"]="sine=f=440:sample_rate={SR}:duration={DUR},volume=volume=5" 17 | ["lsine"]="sine=f=10:sample_rate={SR}:duration={DUR}" 18 | ["stereo"]="sine=f=500:sample_rate={SR}:duration={DUR} [l]; aevalsrc=exprs='random(0)*0.5':duration={DUR}:sample_rate={SR} [r]; [l][r] amerge=inputs=2" 19 | ) 20 | 21 | if ! command -v ffmpeg &> /dev/null; then 22 | echo "Error: ffmpeg is not installed." >&2 23 | exit 1 24 | fi 25 | 26 | total_files=$(( ${#SOURCES[@]} * ${#AUDIO_FORMATS[@]} * ${#SAMPLE_RATES[@]} )) 27 | successful_files=0 28 | current_file_number=0 29 | 30 | for audio_source in "${!SOURCES[@]}"; do 31 | sft="${SOURCES[$audio_source]}" 32 | 33 | for format in "${!AUDIO_FORMATS[@]}"; do 34 | codec="${AUDIO_FORMATS[$format]}" 35 | 36 | for sr in "${SAMPLE_RATES[@]}"; do 37 | ((current_file_number++)) 38 | 39 | source_key=$(echo "$audio_source" | tr -cd '[:alnum:]_-') 40 | filename="${format}_${source_key}_${sr}hz_${DURATION}s.${format}" 41 | output_path="$OUTPUT_DIR/$filename" 42 | 43 | source_filter="${sft//\{SR\}/$sr}" 44 | source_filter="${source_filter//\{DUR\}/$DURATION}" 45 | 46 | codec_opts=() 47 | case "$codec" in 48 | "libmp3lame") codec_opts+=("-b:a" "32k" "-compression_level" "9") ;; 49 | "libvorbis") codec_opts+=("-q:a" "0") ;; 50 | "aac") codec_opts+=("-b:a" "48k") ;; 51 | esac 52 | 53 | ffmpeg -y \ 54 | -f lavfi -i "$source_filter" \ 55 | -t "$DURATION" \ 56 | -ar "$sr" \ 57 | -c:a "$codec" "${codec_opts[@]}" \ 58 | -vn \ 59 | "$output_path" \ 60 | -loglevel error > /dev/null 2>&1 61 | 62 | if [[ $? -eq 0 ]]; then 63 | ((successful_files++)) 64 | fi 65 | done 66 | done 67 | done 68 | 69 | exit 0 70 | -------------------------------------------------------------------------------- /test/generate_vectors.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is part of SoundML. 3 | 4 | Copyright (C) 2025 Gabriel Santamaria 5 | 6 | This script is used to generate test vectors for SoundML. 7 | The reference implementation choosen is librosa. 8 | It's supposed to be ran only once. Then the generated vectors 9 | haved to be used for the actual testing. 10 | """ 11 | 12 | from typing import Any, Tuple, Dict, List 13 | import os 14 | import json 15 | import numpy as np 16 | import librosa 17 | 18 | AUDIO_DIRECTORY = "audio/" 19 | VECTOR_DIRECTORY = "vectors/" 20 | 21 | 22 | class Parameters: 23 | """ 24 | Class representing parameters used to generate a vector 25 | """ 26 | 27 | parameters: Dict[str, Any] 28 | 29 | def __init__(self, parameters: Dict[str, Any]): 30 | self.parameters = parameters 31 | 32 | def write(self, filename: str): 33 | """ 34 | Write the parameters to a JSON file 35 | """ 36 | with open(filename, "w", encoding="utf-8") as f: 37 | json.dump(self.parameters, f, indent=4) 38 | 39 | 40 | class VectorGenerator: 41 | """ 42 | Abstract class representing an audio vector generators 43 | """ 44 | 45 | BASE_IDENTIFIER: str 46 | 47 | audio_paths: list[str] 48 | output_dir: str 49 | 50 | counter: int = 0 51 | 52 | def __init__(self, audio_paths: list[str], output_dir: str): 53 | self.audio_paths = audio_paths 54 | self.output_dir = os.path.join(output_dir, f"{self.BASE_IDENTIFIER}/") 55 | 56 | def normalize_name(self, name: str) -> str: 57 | """ 58 | Normalize the name of the audio file 59 | """ 60 | return name.replace(" ", "_").replace("-", "_").lower() 61 | 62 | def vector(self, audio_path: str) -> Tuple[np.ndarray, Parameters]: 63 | """ 64 | Generate the vector for the given audio file 65 | """ 66 | raise NotImplementedError("Subclasses should implement this method") 67 | 68 | def generate(self): 69 | """ 70 | Generate the audio vectors 71 | """ 72 | if not os.path.exists(self.output_dir): 73 | os.makedirs(self.output_dir, exist_ok=True) 74 | 75 | for audio_path in self.audio_paths: 76 | identifier = os.path.splitext(os.path.basename(audio_path))[0] 77 | try: 78 | data: Tuple[np.ndarray, Parameters] = self.vector(audio_path) 79 | 80 | self.counter += 1 81 | 82 | y: np.ndarray = data[0] 83 | params = data[1] 84 | filename: str = self.normalize_name( 85 | f"{self.BASE_IDENTIFIER}_{identifier}" 86 | ) 87 | output_filename: str = os.path.join(self.output_dir, f"{filename}.npy") 88 | params_filename: str = os.path.join(self.output_dir, f"{filename}.json") 89 | np.save(output_filename, y) 90 | params.write(params_filename) 91 | except Exception as e: 92 | print(f"ERROR generating for {identifier}: {e}") 93 | 94 | 95 | class TimeSeriesVectorGenerator(VectorGenerator): 96 | """ 97 | Reads an audio file and creates a time-series vector representation of it 98 | """ 99 | 100 | BASE_IDENTIFIER: str = "timeseries" 101 | 102 | resamplers: List[str] = ["soxr_vhq", "soxr_hq", "soxr_mq", "soxr_lq"] 103 | srs = [None, 8000, 16000, 22050] 104 | 105 | def vector(self, audio_path: str) -> Tuple[np.ndarray, Parameters]: 106 | """ 107 | Generate the time-series vector for the given file 108 | """ 109 | params = {} 110 | mono = False if self.counter % 2 == 0 or self.counter % 3 == 0 else False 111 | sr = self.srs[self.counter % len(self.srs)] 112 | res_type = self.resamplers[self.counter % len(self.resamplers)] 113 | params["mono"] = mono 114 | if sr is not None: 115 | params["res_type"] = res_type 116 | y, sr = librosa.load( 117 | audio_path, mono=mono, sr=sr, res_type=res_type, dtype=np.float64 118 | ) 119 | params["sr"] = sr 120 | y = np.ascontiguousarray(y, dtype=np.float64) 121 | 122 | return (y, Parameters(params)) 123 | 124 | 125 | class STFTVectorGenerator(VectorGenerator): 126 | """ 127 | Reads an audio file and creates a STFT vector representation of it 128 | """ 129 | 130 | BASE_IDENTIFIER: str = "stft" 131 | 132 | nffts = [512]#, #1024, 2048, 4096] 133 | window_lengths = [512]#64, 128, 256, 512] 134 | hop_sizes = [128]#, 256, 512] 135 | centers = [False, False, False] 136 | window_types = ["hann"]#, "hamming", "blackman", "boxcar"] 137 | 138 | def vector(self, audio_path: str) -> Tuple[np.ndarray, Parameters]: 139 | """ 140 | Generate the STFT vector for the given file 141 | """ 142 | params = {} 143 | n_fft = self.nffts[self.counter % len(self.nffts)] 144 | hop_size = self.hop_sizes[self.counter % len(self.hop_sizes)] 145 | window_type = self.window_types[self.counter % len(self.window_types)] 146 | window_length = self.window_lengths[self.counter % len(self.window_lengths)] 147 | center = self.centers[self.counter % len(self.centers)] 148 | params["window_length"] = window_length 149 | params["n_fft"] = n_fft 150 | params["hop_size"] = hop_size 151 | params["window_type"] = window_type 152 | params["center"] = center 153 | params["res_type"] = "soxr_hq" 154 | 155 | y, sr = librosa.load(audio_path) 156 | y = y.astype(np.float64) 157 | stft = librosa.stft( 158 | y, 159 | n_fft=n_fft, 160 | hop_length=hop_size, 161 | win_length=window_length, 162 | window=window_type, 163 | dtype=np.complex64, 164 | center=center, 165 | ) 166 | stft = np.ascontiguousarray(stft, dtype=np.complex64) 167 | params = Parameters(params) 168 | 169 | return (stft, params) 170 | 171 | 172 | generators: list[VectorGenerator] = [TimeSeriesVectorGenerator, STFTVectorGenerator] 173 | 174 | if __name__ == "__main__": 175 | audio_files = [ 176 | os.path.join(AUDIO_DIRECTORY, f) for f in os.listdir(AUDIO_DIRECTORY) 177 | ] 178 | if not os.path.exists(VECTOR_DIRECTORY): 179 | os.makedirs(VECTOR_DIRECTORY) 180 | 181 | for generator in generators: 182 | generator: VectorGenerator = generator(audio_files, VECTOR_DIRECTORY) 183 | generator.generate() 184 | -------------------------------------------------------------------------------- /test/test_stft.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Soundml 23 | open Vutils 24 | 25 | let string_to_window = function 26 | | "hann" -> 27 | `Hanning 28 | | "hamming" -> 29 | `Hamming 30 | | "blackman" -> 31 | `Blackman 32 | | "boxcar" -> 33 | `Boxcar 34 | | _ -> 35 | failwith "Unknown window type" 36 | 37 | module StftTestable = struct 38 | type t = Complex.t 39 | 40 | type p = Bigarray.complex32_elt 41 | 42 | type pf = Bigarray.float32_elt 43 | 44 | type pc = Bigarray.complex32_elt 45 | 46 | type ('a, 'b) precision = ('a, 'b) Types.precision 47 | 48 | let precision = Types.B32 49 | 50 | let kd = Bigarray.Complex32 51 | 52 | let typ = "stft" 53 | 54 | let generate (precision : (pf, pc) precision) 55 | (case : string * string * Parameters.t) 56 | (audio : (float, 'c) Owl_dense_ndarray.Generic.t) = 57 | let _, _, params = case in 58 | let n_fft = 59 | Option.value ~default:2048 @@ Parameters.get_int "n_fft" params 60 | in 61 | let hop_size = 62 | Option.value ~default:512 @@ Parameters.get_int "hop_size" params 63 | in 64 | let win_length = 65 | Option.value ~default:2048 @@ Parameters.get_int "window_length" params 66 | in 67 | let window = 68 | string_to_window 69 | (Option.value ~default:"hann" @@ Parameters.get_string "window" params) 70 | in 71 | let center = 72 | Option.value ~default:false @@ Parameters.get_bool "center" params 73 | in 74 | let config = 75 | Transform.Config.{n_fft; hop_size; win_length; window; center} 76 | in 77 | let stft = Transform.stft ~config precision audio in 78 | let _kd = kd in 79 | stft 80 | end 81 | 82 | module Tests = Tests_cases (StftTestable) 83 | 84 | let () = 85 | let name = "Vectors: STFT Comparison" in 86 | let data = Testdata.get StftTestable.typ Vutils.data in 87 | let tests = Tests.create_tests data in 88 | Tests.run name tests 89 | -------------------------------------------------------------------------------- /test/test_time.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Soundml.Effects.Time 23 | 24 | let config_testable : Config.t Alcotest.testable = 25 | Alcotest.testable 26 | (Fmt.of_to_string (fun fmt -> 27 | Format.sprintf "Effects.Time.Config int:%d" (Config.to_int fmt) ) ) 28 | (fun a b -> Config.to_int a = Config.to_int b) 29 | 30 | let test_default_int () = 31 | let expected = 0x00000000 in 32 | let actual = Config.to_int Config.default in 33 | Alcotest.check Alcotest.int "Default config integer value" expected actual 34 | 35 | let test_percussive_int () = 36 | let expected = 0x00102000 in 37 | let actual = Config.to_int Config.percussive in 38 | Alcotest.check Alcotest.int "Percussive config integer" expected actual ; 39 | let expected = {Config.default with window= Short; phase= Independent} in 40 | Alcotest.check config_testable "Percussive config record" expected 41 | Config.percussive 42 | 43 | let test_single_options () = 44 | Alcotest.check Alcotest.int "EngineFiner" 0x20000000 45 | (Config.to_int (Config.with_engine Finer Config.default)) ; 46 | Alcotest.check Alcotest.int "TransientsMixed" 0x00000100 47 | (Config.to_int (Config.with_transients Mixed Config.default)) ; 48 | Alcotest.check Alcotest.int "TransientsSmooth" 0x00000200 49 | (Config.to_int (Config.with_transients Smooth Config.default)) ; 50 | Alcotest.check Alcotest.int "DetectorPercussive" 0x00000400 51 | (Config.to_int (Config.with_detector Percussive Config.default)) ; 52 | Alcotest.check Alcotest.int "DetectorSoft" 0x00000800 53 | (Config.to_int (Config.with_detector Soft Config.default)) ; 54 | Alcotest.check Alcotest.int "PhaseIndependent" 0x00002000 55 | (Config.to_int (Config.with_phase Independent Config.default)) ; 56 | Alcotest.check Alcotest.int "ThreadingNever" 0x00010000 57 | (Config.to_int (Config.with_threading Never Config.default)) ; 58 | Alcotest.check Alcotest.int "ThreadingAlways" 0x00020000 59 | (Config.to_int (Config.with_threading Always Config.default)) ; 60 | Alcotest.check Alcotest.int "WindowShort" 0x00100000 61 | (Config.to_int (Config.with_window Short Config.default)) ; 62 | Alcotest.check Alcotest.int "WindowLong" 0x00200000 63 | (Config.to_int (Config.with_window Long Config.default)) ; 64 | Alcotest.check Alcotest.int "SmoothingOn" 0x00800000 65 | (Config.to_int (Config.with_smoothing On Config.default)) ; 66 | Alcotest.check Alcotest.int "FormantPreserved" 0x01000000 67 | (Config.to_int (Config.with_formant Preserved Config.default)) ; 68 | Alcotest.check Alcotest.int "PitchHighQuality" 0x02000000 69 | (Config.to_int (Config.with_pitch HighQuality Config.default)) ; 70 | Alcotest.check Alcotest.int "PitchHighConsistency" 0x04000000 71 | (Config.to_int (Config.with_pitch HighConsistency Config.default)) ; 72 | Alcotest.check Alcotest.int "ChannelsTogether" 0x10000000 73 | (Config.to_int (Config.with_channels Together Config.default)) 74 | 75 | let test_combinations () = 76 | let cfg = Config.default |> Config.with_engine Finer in 77 | let expected = 0x20000000 in 78 | Alcotest.check Alcotest.int "Combo: RealTime | Finer" expected 79 | (Config.to_int cfg) ; 80 | let cfg = 81 | Config.default |> Config.with_window Short 82 | |> Config.with_threading Never 83 | |> Config.with_formant Preserved 84 | in 85 | let expected = 0x01110000 in 86 | Alcotest.check Alcotest.int "Combo: Short | Never | Preserved" expected 87 | (Config.to_int cfg) ; 88 | let cfg = 89 | Config. 90 | { engine= Finer 91 | ; (* 0x20000000 *) 92 | transients= Smooth 93 | ; (* 0x00000200 *) 94 | detector= Soft 95 | ; (* 0x00000800 *) 96 | phase= Independent 97 | ; (* 0x00002000 *) 98 | threading= Always 99 | ; (* 0x00020000 *) 100 | window= Long 101 | ; (* 0x00200000 *) 102 | smoothing= On 103 | ; (* 0x00800000 *) 104 | formant= Preserved 105 | ; (* 0x01000000 *) 106 | pitch= HighConsistency 107 | ; (* 0x04000000 *) 108 | channels= Together (* 0x10000000 *) } 109 | in 110 | let expected = 0x35A22A00 in 111 | Alcotest.check Alcotest.int "Combo: All non-default" expected 112 | (Config.to_int cfg) 113 | 114 | let test_modifiers () = 115 | let base = Config.default in 116 | let modified_engine = Config.with_engine Finer base in 117 | Alcotest.check config_testable "with_engine changes only engine" 118 | {base with engine= Finer} modified_engine ; 119 | let modified_window_phase = 120 | base |> Config.with_window Short |> Config.with_phase Independent 121 | in 122 | Alcotest.check config_testable "with_window then with_phase" 123 | {base with window= Short; phase= Independent} 124 | modified_window_phase ; 125 | Alcotest.check config_testable "Manual percussive matches preset" 126 | Config.percussive modified_window_phase 127 | 128 | let test_time_stretch () = 129 | let config = Config.default in 130 | let sin_freq = 440.0 in 131 | let sample_rate = 44100 in 132 | let ratio = 2.0 in 133 | let audio_input = 134 | Audio.G.init Bigarray.Float32 [|sample_rate|] (fun i -> 135 | let t = float_of_int i /. float_of_int sample_rate in 136 | sin_freq *. (2.0 *. Float.pi *. t) |> Float.sin ) 137 | in 138 | let _ = time_stretch ~config audio_input sample_rate ratio in 139 | Alcotest.(check pass) 140 | "time_stretch completed without raising an exception" () () 141 | 142 | let test_time_stretch_invalid_ratio_raises () = 143 | let config = Config.default in 144 | let sample_rate = 44100 in 145 | let ratio = 0.0 in 146 | let audio_input = Audio.G.create Bigarray.Float32 [|100|] 0.0 in 147 | let expected_exn = Invalid_argument "rate must be > 0." in 148 | Alcotest.check_raises "Stretching with zero ratio raises Invalid_argument" 149 | expected_exn (fun () -> 150 | ignore (time_stretch ~config audio_input sample_rate ratio) ) 151 | 152 | let test_pitch_shift () = 153 | let config = Config.default in 154 | let sin_freq = 440.0 in 155 | let sample_rate = 44100 in 156 | let ratio = 2.0 in 157 | let audio_input = 158 | Audio.G.init Bigarray.Float32 [|sample_rate|] (fun i -> 159 | let t = float_of_int i /. float_of_int sample_rate in 160 | sin_freq *. (2.0 *. Float.pi *. t) |> Float.sin ) 161 | in 162 | let _ = time_stretch ~config audio_input sample_rate ratio in 163 | Alcotest.(check pass) 164 | "pitch_shift completed without raising an exception" () () 165 | 166 | let () = 167 | Alcotest.run "Effects.Time: Config" 168 | [ ( "Presets" 169 | , [ Alcotest.test_case "Default integer value" `Quick test_default_int 170 | ; Alcotest.test_case "Percussive integer value" `Quick 171 | test_percussive_int ] ) 172 | ; ( "Single Options" 173 | , [ Alcotest.test_case "Integer values for single flags" `Quick 174 | test_single_options ] ) 175 | ; ( "Combinations" 176 | , [ Alcotest.test_case "Integer values for combined flags" `Quick 177 | test_combinations ] ) 178 | ; ( "Modifiers" 179 | , [ Alcotest.test_case "Modifiers create correct configs" `Quick 180 | test_modifiers ] ) 181 | ; ( "Time Stretch/Pitch Shift" 182 | , [ Alcotest.test_case "Time stretch" `Quick test_time_stretch 183 | ; Alcotest.test_case "Time stretch raise" `Quick 184 | test_time_stretch_invalid_ratio_raises ] ) 185 | ; ("Pitch Shift", [Alcotest.test_case "Pitch shift" `Quick test_pitch_shift]) 186 | ] 187 | -------------------------------------------------------------------------------- /test/test_timeseries.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Soundml 23 | open Vutils 24 | 25 | module Timeseries = struct 26 | type t = Float.t 27 | 28 | type p = Bigarray.float64_elt 29 | 30 | type pf = Bigarray.float64_elt 31 | 32 | type pc = Bigarray.complex64_elt 33 | 34 | type ('a, 'b) precision = ('a, 'b) Types.precision 35 | 36 | let precision = Types.B64 37 | 38 | let kd = Bigarray.Float64 39 | 40 | let typ = "timeseries" 41 | 42 | let generate (_ : (pf, pc) precision) (_ : string * string * Parameters.t) 43 | (audio : (float, 'c) Owl_dense_ndarray.Generic.t) = 44 | audio 45 | end 46 | 47 | module Tests = Tests_cases (Timeseries) 48 | 49 | let () = 50 | let name = "Vectors: Timeseries Comparison" in 51 | let data = Testdata.get Timeseries.typ Vutils.data in 52 | let tests = Tests.create_tests data in 53 | Tests.run name tests 54 | -------------------------------------------------------------------------------- /test/test_utils.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | type data = (float, Bigarray.float32_elt) Audio.G.t 23 | 24 | let data_testable : data Alcotest.testable = 25 | ( module struct 26 | type t = data 27 | 28 | let pp : t Fmt.t = 29 | fun fmt ndarray -> 30 | let shape_array = Audio.G.shape ndarray in 31 | let pp_shape = Fmt.brackets (Fmt.array ~sep:Fmt.semi Fmt.int) in 32 | Fmt.pf fmt "%a" pp_shape shape_array 33 | 34 | let equal : t -> t -> bool = Tutils.Check.rallclose 35 | end ) 36 | 37 | module Test_pad_center = struct 38 | let create_data (arr : float array) : data = 39 | Audio.G.of_array Bigarray.Float32 arr [|Array.length arr|] 40 | (* Create 1D Ndarray *) 41 | 42 | let test_no_padding () = 43 | let input_data = create_data [|1.; 2.; 3.|] in 44 | let target_size = 3 in 45 | let pad_value = 0. in 46 | let expected_output = create_data [|1.; 2.; 3.|] in 47 | let actual_output = Utils.pad_center input_data target_size pad_value in 48 | Alcotest.check data_testable "no_padding: Correct padding" expected_output 49 | actual_output 50 | 51 | let test_even_padding () = 52 | let input_data = create_data [|1.; 2.|] in 53 | let target_size = 6 in 54 | let pad_value = 0. in 55 | let expected_output = create_data [|0.; 0.; 1.; 2.; 0.; 0.|] in 56 | let actual_output = Utils.pad_center input_data target_size pad_value in 57 | Alcotest.check data_testable "even_padding: Correct padding" expected_output 58 | actual_output 59 | 60 | let test_odd_padding () = 61 | let input_data = create_data [|1.; 2.; 3.|] in 62 | let target_size = 6 in 63 | let pad_value = 0. in 64 | let expected_output = create_data [|0.; 1.; 2.; 3.; 0.; 0.|] in 65 | let actual_output = Utils.pad_center input_data target_size pad_value in 66 | Alcotest.check data_testable "odd_padding: Correct padding" expected_output 67 | actual_output 68 | 69 | let test_empty_input () = 70 | let input_data = create_data [||] in 71 | let target_size = 4 in 72 | let pad_value = 0. in 73 | let expected_output = create_data [|0.; 0.; 0.; 0.|] in 74 | let actual_output = Utils.pad_center input_data target_size pad_value in 75 | Alcotest.check data_testable "empty_input: Correct padding" expected_output 76 | actual_output 77 | 78 | let test_error_target_too_small () = 79 | let input_data = create_data [|1.; 2.; 3.; 4.|] in 80 | let target_size = 2 in 81 | let pad_value = 0. in 82 | let expected_exn = 83 | Invalid_argument 84 | "An error occured while trying to pad: current_size > target_size" 85 | in 86 | Alcotest.check_raises 87 | "error_target_too_small: raises Invalid_argument when target_size < \ 88 | input_size" 89 | expected_exn (fun () -> 90 | ignore (Utils.pad_center input_data target_size pad_value) ) 91 | 92 | let test_non_zero_padding () = 93 | let input_data = create_data [|5.; 6.|] in 94 | let target_size = 5 in 95 | let pad_value = -1.5 in 96 | let expected_output = create_data [|-1.5; 5.; 6.; -1.5; -1.5|] in 97 | let actual_output = Utils.pad_center input_data target_size pad_value in 98 | Alcotest.check data_testable "non_zero_padding: Correct padding" 99 | expected_output actual_output 100 | 101 | let test_zero_target_empty_input () = 102 | let input_data = create_data [||] in 103 | flush_all () ; 104 | let target_size = 0 in 105 | let pad_value = 0. in 106 | let expected_output = create_data [||] in 107 | let actual_output = Utils.pad_center input_data target_size pad_value in 108 | Alcotest.check data_testable "zero_target_empty_input: Correct padding" 109 | expected_output actual_output 110 | 111 | let test_zero_target_non_empty_input () = 112 | let input_data = create_data [|1.; 2.|] in 113 | let target_size = 0 in 114 | let pad_value = 0. in 115 | let expected_exn = 116 | Invalid_argument 117 | "An error occured while trying to pad: current_size > target_size" 118 | in 119 | Alcotest.check_raises 120 | "zero_target_non_empty_input: raises Invalid_argument when target_size < \ 121 | input_size" 122 | expected_exn (fun () -> 123 | ignore (Utils.pad_center input_data target_size pad_value) ) 124 | 125 | let suite = 126 | [ Alcotest.test_case "no_padding" `Quick test_no_padding 127 | ; Alcotest.test_case "even_padding" `Quick test_even_padding 128 | ; Alcotest.test_case "odd_padding" `Quick test_odd_padding 129 | ; Alcotest.test_case "empty_input" `Quick test_empty_input 130 | ; Alcotest.test_case "error_target_too_small" `Quick 131 | test_error_target_too_small 132 | ; Alcotest.test_case "non_zero_padding" `Quick test_non_zero_padding 133 | ; Alcotest.test_case "zero_target_empty_input" `Quick 134 | test_zero_target_empty_input 135 | ; Alcotest.test_case "zero_target_non_empty_input" `Quick 136 | test_zero_target_non_empty_input ] 137 | end 138 | 139 | let () = 140 | Alcotest.run "SoundML Utils Tests" [("Pad Center", Test_pad_center.suite)] 141 | -------------------------------------------------------------------------------- /test/test_write.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Bigarray 23 | open Soundml 24 | open Tutils 25 | 26 | let temp_dir_name = ref "" 27 | 28 | let setup_test_dir () = 29 | let dir = Filename.temp_dir "soundml_test_" "" in 30 | temp_dir_name := dir ; 31 | if not (Sys.file_exists dir && Sys.is_directory dir) then Unix.mkdir dir 0o755 32 | 33 | let delete_test_dir () = 34 | let rec rm_rf path = 35 | if Sys.is_directory path then ( 36 | let files = Sys.readdir path in 37 | Array.iter (fun f -> rm_rf (Filename.concat path f)) files ; 38 | Unix.rmdir path ) 39 | else Sys.remove path 40 | in 41 | if !temp_dir_name <> "" && Sys.file_exists !temp_dir_name then 42 | rm_rf !temp_dir_name ; 43 | temp_dir_name := "" 44 | 45 | let temp_file ?(ext = ".wav") name = Filename.concat !temp_dir_name (name ^ ext) 46 | 47 | let file_exists name = 48 | try 49 | Unix.access name [Unix.F_OK] ; 50 | true 51 | with 52 | | Unix.Unix_error (Unix.ENOENT, _, _) -> 53 | false 54 | | _ -> 55 | true 56 | 57 | let create_test_audio channels samples sample_rate format = 58 | let shape = if channels > 1 then [|channels; samples|] else [|samples|] in 59 | let data = Audio.G.create Bigarray.Float32 shape 0. in 60 | let freq = 23000. in 61 | for channel = 0 to channels - 1 do 62 | for i = 0 to samples - 1 do 63 | let idx = if channels > 1 then [|channel; i|] else [|i|] in 64 | Audio.G.set data idx 65 | (sin (2. *. Float.pi *. (freq *. Float.of_int channel))) 66 | done 67 | done ; 68 | let meta = Audio.Metadata.create channels samples sample_rate format in 69 | let audio_data = Audio.create meta data in 70 | (audio_data, sample_rate) 71 | 72 | let create_empty_audio channels sample_rate format = 73 | let shape = if channels > 1 then [|channels; 0|] else [|0|] in 74 | let data = Audio.G.create Bigarray.Float32 shape 0. in 75 | let meta = Audio.Metadata.create channels 0 sample_rate format in 76 | let audio_data = Audio.create meta data in 77 | (audio_data, sample_rate) 78 | 79 | let audio_testable = 80 | let pp fmt (a : float32_elt Audio.audio) = 81 | Format.fprintf fmt "{ channels=%d; samples/channel=%d; }" (Audio.channels a) 82 | (if Audio.channels a > 0 then Audio.samples a else 0) 83 | in 84 | let equal a b = 85 | Check.rallclose ~rtol:1e-05 ~atol:1e-08 (Audio.data a) (Audio.data b) 86 | in 87 | Alcotest.testable pp equal 88 | 89 | let check_write_read name 90 | ?(format : Aformat.t = Aformat.{ftype= WAV; sub= PCM_16; endian= FILE}) 91 | channels samples target_sr ext = 92 | let test_name = 93 | Printf.sprintf "%s_%dch_%dsamples_%dHz%s" name channels samples target_sr 94 | ext 95 | in 96 | Alcotest.test_case test_name `Quick (fun () -> 97 | let filename = temp_file ~ext test_name in 98 | let audio, sr = create_test_audio channels samples target_sr format in 99 | Io.write ~format filename (Audio.data audio) sr ; 100 | Alcotest.check Alcotest.bool "Output file exists after write" 101 | (file_exists filename) true ; 102 | let read_audio = 103 | try 104 | Io.read ~mono:(channels = 1) ~sample_rate:target_sr Bigarray.Float32 105 | filename 106 | with ex -> 107 | Alcotest.failf "Failed to read back file %s: %s" filename 108 | (Printexc.to_string ex) 109 | in 110 | Alcotest.check Alcotest.int "Channels match after write" channels 111 | (Audio.channels read_audio) ; 112 | Alcotest.check Alcotest.int "Sample rate match after write" target_sr 113 | (Audio.sr read_audio) ; 114 | Alcotest.check Alcotest.int "Frames match after write" samples 115 | (Audio.samples read_audio) ; 116 | Alcotest.check 117 | (Alcotest.testable Aformat.pp Stdlib.( = )) 118 | "Format match after write" format (Audio.format read_audio) ; 119 | Alcotest.check audio_testable "Data unchanged after write" audio 120 | read_audio ) 121 | 122 | let check_write_empty name 123 | ?(format : Aformat.t = Aformat.{ftype= WAV; sub= PCM_16; endian= FILE}) 124 | channels target_sr ext = 125 | let test_name = 126 | Printf.sprintf "%s_%dch_empty_%dHz%s" name channels target_sr ext 127 | in 128 | Alcotest.test_case test_name `Quick (fun () -> 129 | let filename = temp_file ~ext test_name in 130 | let audio, sr = create_empty_audio channels target_sr format in 131 | Alcotest.check 132 | (Alcotest.neg Alcotest.reject) 133 | "Write empty audio don't raise" 134 | (fun () -> ()) 135 | (fun () -> Io.write ~format filename (Audio.data audio) sr) ) 136 | 137 | let tests = 138 | let wav = Result.get_ok (Aformat.create Aformat.WAV) in 139 | let flac = Result.get_ok (Aformat.create Aformat.FLAC) in 140 | let ogg = Result.get_ok (Aformat.create Aformat.OGG) in 141 | [ check_write_read "write_f32_mono_wav_deduced" 1 1024 44100 ".wav" 142 | ; check_write_read "write_f32_stereo_wav_deduced" 2 1024 44100 ".wav" 143 | ; check_write_read "write_f32_stereo_flac_deduced" 2 512 22050 ".flac" 144 | ; check_write_read "write_f32_mono_ogg_deduced" 1 2048 48000 ".ogg" 145 | ; check_write_read "write_f32_stereo_wav_explicit" ~format:wav 2 1024 44100 146 | ".wav" 147 | ; check_write_read "write_f32_stereo_flac_explicit" ~format:flac 2 512 22050 148 | ".flac" 149 | ; check_write_read "write_f32_mono_ogg_explicit" ~format:ogg 1 2048 48000 150 | ".ogg" 151 | ; check_write_empty "write_f32_mono_empty" 1 44100 ".wav" 152 | ; check_write_empty "write_f32_stereo_empty" 2 44100 ".wav" ] 153 | 154 | let suite = [("Write/Read Roundtrip", tests)] 155 | 156 | let () = 157 | setup_test_dir () ; 158 | Alcotest.run "SoundML Io.write" suite ; 159 | delete_test_dir () 160 | -------------------------------------------------------------------------------- /test/tutils.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | module Check = struct 23 | open Soundml 24 | 25 | let shape (x : ('a, 'b) Audio.G.t) (y : ('a, 'b) Audio.G.t) = 26 | let shape_x = Audio.G.shape x in 27 | let shape_y = Audio.G.shape y in 28 | if Array.length shape_x <> Array.length shape_y then false 29 | else Array.for_all2 (fun x y -> x = y) shape_x shape_y 30 | 31 | let rallclose ?(rtol = 1e-05) ?(atol = 1e-10) (x : ('a, 'b) Audio.G.t) 32 | (y : ('a, 'b) Audio.G.t) : bool = 33 | if not (shape x y) then false 34 | else if Audio.G.numel x = 0 && Audio.G.numel y = 0 then true 35 | else 36 | let abs_diff = Audio.G.abs (Audio.G.sub x y) in 37 | let tolerance = Audio.G.(add_scalar (mul_scalar (abs y) rtol) atol) in 38 | let comparison_mask = Audio.G.elt_less_equal abs_diff tolerance in 39 | Audio.G.min' comparison_mask >= 1.0 40 | 41 | let callclose : type a. 42 | ?rtol:float 43 | -> ?atol:float 44 | -> (Complex.t, a) Audio.G.t 45 | -> (Complex.t, a) Audio.G.t 46 | -> bool = 47 | fun ?(rtol = 1e-05) ?(atol = 1e-05) (x : (Complex.t, a) Audio.G.t) 48 | (y : (Complex.t, a) Audio.G.t) -> 49 | if not (shape x y) then false 50 | else if Audio.G.numel x = 0 && Audio.G.numel y = 0 then true 51 | else 52 | let x, y = 53 | match Audio.G.kind x with 54 | | Bigarray.Complex32 -> 55 | (Audio.G.cast_c2z x, Audio.G.cast_c2z y) 56 | | Bigarray.Complex64 -> 57 | (x, y) 58 | | _ -> 59 | . 60 | in 61 | let diff = Audio.G.sub x y in 62 | let abs_diff = Audio.G.abs2_z2d diff in 63 | let abs_y = Audio.G.abs2_z2d y in 64 | let tolerance = Audio.G.(add_scalar (mul_scalar abs_y rtol) atol) in 65 | let comparison_mask = Audio.G.elt_less_equal abs_diff tolerance in 66 | Audio.G.min' comparison_mask >= 1.0 67 | end 68 | 69 | let allclose : type a b. 70 | (a, b) Bigarray.kind 71 | -> ?rtol:float 72 | -> ?atol:float 73 | -> (a, b) Owl_dense_ndarray.Generic.t 74 | -> (a, b) Owl_dense_ndarray.Generic.t 75 | -> bool = 76 | fun kd -> 77 | match kd with 78 | | Bigarray.Complex32 -> 79 | Check.callclose 80 | | Bigarray.Complex64 -> 81 | Check.callclose 82 | | Bigarray.Float32 -> 83 | Check.rallclose 84 | | Bigarray.Float64 -> 85 | Check.rallclose 86 | | _ -> 87 | failwith "Unsupported datatype." 88 | 89 | let dense_testable : type a b. 90 | ?rtol:float 91 | -> ?atol:float 92 | -> (a, b) Bigarray.kind 93 | -> (a, b) Audio.G.t Alcotest.testable = 94 | fun ?rtol ?atol (_ : (a, b) Bigarray.kind) -> 95 | let kd_to_string (type a b) (kd : (a, b) Bigarray.kind) = 96 | match kd with 97 | | Bigarray.Float32 -> 98 | "Float32" 99 | | Bigarray.Float64 -> 100 | "Float64" 101 | | Bigarray.Complex32 -> 102 | "Complex32" 103 | | Bigarray.Complex64 -> 104 | "Complex64" 105 | | _ -> 106 | failwith "Unsupported kind" 107 | in 108 | let pp_kind fmt k = 109 | let str_k = kd_to_string k in 110 | Format.fprintf fmt "%s" str_k 111 | in 112 | let to_string (type a b) (kd : (a, b) Bigarray.kind) (v : a) = 113 | match kd with 114 | | Bigarray.Float32 -> 115 | Printf.sprintf "%f" v 116 | | Bigarray.Float64 -> 117 | Printf.sprintf "%f" v 118 | | Bigarray.Complex32 -> 119 | Printf.sprintf "%f + %fi" v.re v.im 120 | | Bigarray.Complex64 -> 121 | Printf.sprintf "%f + %fi" v.re v.im 122 | | _ -> 123 | failwith "Unsupported kind" 124 | in 125 | let pp fmt arr = 126 | let kd = Audio.G.kind arr in 127 | let dims = Audio.G.shape arr in 128 | let first_few_max = 10 in 129 | let first_few = ref [] in 130 | let total_elements = Array.fold_left ( * ) 1 dims in 131 | let flattened = Audio.G.flatten arr in 132 | if total_elements > 0 && Array.length dims == 1 then 133 | for i = 0 to first_few_max - 1 do 134 | first_few := Audio.G.get flattened [|i|] :: !first_few 135 | done ; 136 | Format.fprintf fmt 137 | "Audio.G.t " pp_kind 138 | (Audio.G.kind arr) 139 | (String.concat "; " (Array.to_list (Array.map string_of_int dims))) 140 | first_few_max 141 | (String.concat "; " (List.map (to_string kd) (List.rev !first_few))) 142 | in 143 | let equal a b = 144 | let kd = Audio.G.kind a in 145 | allclose ?rtol ?atol kd a b 146 | in 147 | Alcotest.testable pp equal 148 | 149 | let float32_g_testable = dense_testable Bigarray.Float32 150 | 151 | let float64_g_testable = dense_testable Bigarray.Float64 152 | 153 | let complex32_g_testable = dense_testable Bigarray.Complex32 154 | 155 | let complex64_g_testable = dense_testable Bigarray.Complex64 156 | 157 | let get_dense_testable (type a b) (kd : (a, b) Bigarray.kind) : 158 | (a, b) Audio.G.t Alcotest.testable = 159 | match kd with 160 | | Bigarray.Float32 -> 161 | float32_g_testable 162 | | Bigarray.Float64 -> 163 | float64_g_testable 164 | | Bigarray.Complex32 -> 165 | complex32_g_testable 166 | | Bigarray.Complex64 -> 167 | complex64_g_testable 168 | | _ -> 169 | failwith "Unsupported kind" 170 | 171 | (* This snippet has been gathered from the exact same code but for Matrix in 172 | Owl. See: 173 | https://github.com/tachukao/owl/blob/046f703a6890a5ed5ecf4a8c5750d4e392e4ec54/src/owl/dense/owl_dense_matrix_generic.ml#L606-L609 174 | Unfortunately, for the moment this is not yet available for Ndarrays. *) 175 | let load_npy (path : string) (kind : ('a, 'b) Bigarray.kind) : 176 | ('a, 'b) Audio.G.t = 177 | let npy : ('a, 'b) Audio.G.t = 178 | match Npy.read_copy path |> Npy.to_bigarray Bigarray.c_layout kind with 179 | | Some x -> 180 | x 181 | | None -> 182 | failwith Printf.(sprintf "%s: incorrect format" path) 183 | in 184 | npy 185 | -------------------------------------------------------------------------------- /test/tutils.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | (** Module providing usefull checking functions for the tests *) 23 | module Check : sig 24 | val rallclose : 25 | ?rtol:float 26 | -> ?atol:float 27 | -> (float, 'b) Owl_dense_ndarray.Generic.t 28 | -> (float, 'b) Owl_dense_ndarray.Generic.t 29 | -> bool 30 | (** Real-valued all-close function *) 31 | 32 | val callclose : 33 | 'a. 34 | ?rtol:float 35 | -> ?atol:float 36 | -> (Complex.t, 'a) Owl_dense_ndarray.Generic.t 37 | -> (Complex.t, 'a) Owl_dense_ndarray.Generic.t 38 | -> bool 39 | (** Complex-valued all-close function *) 40 | 41 | val shape : 42 | ('a, 'b) Owl_dense_ndarray.Generic.t 43 | -> ('a, 'b) Owl_dense_ndarray.Generic.t 44 | -> bool 45 | (** Check the shape of two ndarrays are equal *) 46 | end 47 | 48 | val allclose : 49 | 'a 'b. 50 | ('a, 'b) Bigarray.kind 51 | -> ?rtol:float 52 | -> ?atol:float 53 | -> ('a, 'b) Owl_dense_ndarray.Generic.t 54 | -> ('a, 'b) Owl_dense_ndarray.Generic.t 55 | -> bool 56 | (** Checks if two Ndarrays are allclose. This is equivalent to NumPy's allclose function. *) 57 | 58 | val dense_testable : 59 | ?rtol:float 60 | -> ?atol:float 61 | -> ('a, 'b) Bigarray.kind 62 | -> ('a, 'b) Owl_dense_ndarray.Generic.t Alcotest.testable 63 | 64 | val get_dense_testable : 65 | ('a, 'b) Bigarray.kind 66 | -> ('a, 'b) Owl_dense_ndarray.Generic.t Alcotest.testable 67 | (** Function that returns a correctly-typed testable based on the passed kind for Dense.Ndarray. *) 68 | 69 | val load_npy : 70 | string -> ('a, 'b) Bigarray.kind -> ('a, 'b) Owl_dense_ndarray.Generic.t 71 | (** Load a numpy file and return the ndarray. 72 | @see https://github.com/tachukao/owl/blob/046f703a6890a5ed5ecf4a8c5750d4e392e4ec54/src/owl/dense/owl_dense_matrix_generic.ml#L606-L609 *) 73 | -------------------------------------------------------------------------------- /test/vutils.ml: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | open Tutils 23 | 24 | let test_audio_dir = Sys.getcwd () ^ "/audio/" 25 | 26 | let test_vectors_dir = Sys.getcwd () ^ "/vectors/" 27 | 28 | let typ_to_readable = function 29 | | "timeseries" -> 30 | "Io.Read" 31 | | "stft" -> 32 | "Spectral.stft" 33 | | _ -> 34 | "Unkown" 35 | 36 | let string_to_resample_typ = function 37 | | "soxr_vhq" -> 38 | Io.SOXR_VHQ 39 | | "soxr_hq" -> 40 | Io.SOXR_HQ 41 | | "soxr_mq" -> 42 | Io.SOXR_MQ 43 | | "soxr_lq" -> 44 | Io.SOXR_LQ 45 | | _ -> 46 | Io.NONE 47 | 48 | module StrMap = Map.Make (String) 49 | 50 | module Parameters = struct 51 | open Yojson.Basic.Util 52 | 53 | type t = Yojson.Basic.t 54 | 55 | let create (path : string) = Yojson.Basic.from_file path 56 | 57 | let get_string (name : string) (params : t) = 58 | member name params |> to_string_option 59 | 60 | let get_int (name : string) (params : t) = member name params |> to_int_option 61 | 62 | let get_float (name : string) (params : t) = 63 | member name params |> to_float_option 64 | 65 | let get_bool (name : string) (params : t) = 66 | member name params |> to_bool_option 67 | end 68 | 69 | module Testdata = struct 70 | type t = (string * string * Parameters.t) list StrMap.t 71 | 72 | let get_test_type (basename : string) : string option = 73 | let split = Str.(split (regexp {|_|}) basename) in 74 | if List.length split >= 1 then Some (List.hd split) else None 75 | 76 | let get_test_filename (basename : string) : string option = 77 | let split = Str.(split (regexp {|_|}) basename) in 78 | if List.length split >= 1 then Some (String.concat "_" (List.tl split)) 79 | else None 80 | 81 | let list_filter_filename (dir : string) (name : string) : string option = 82 | try 83 | let files = Sys.readdir dir in 84 | Option.map (fun elt -> Filename.concat dir elt) 85 | @@ Array.find_opt (fun elt -> Filename.remove_extension elt = name) files 86 | with Sys_error msg -> 87 | Printf.eprintf "Error reading directory '%s': %s\n" dir msg ; 88 | None 89 | 90 | let list_tests_files (dir : string) (ext : string) : string list = 91 | let has_extension path = Filename.check_suffix path ext in 92 | let process_entry base_dir entry = 93 | let full_path = Filename.concat base_dir entry in 94 | try 95 | if Sys.is_directory full_path then 96 | List.filter has_extension 97 | @@ List.map (Filename.concat full_path) 98 | @@ Array.to_list @@ Sys.readdir full_path 99 | else if has_extension full_path then [full_path] 100 | else [] 101 | with Sys_error _ -> [] 102 | in 103 | try 104 | Sys.readdir dir |> Array.to_list |> List.concat_map (process_entry dir) 105 | (* Appliquer process_entry et concaténer les résultats *) 106 | with Sys_error msg -> 107 | Printf.eprintf "Error reading directory '%s': %s\n" dir msg ; 108 | [] 109 | 110 | let filter_test_type (typ : string) = 111 | let filter (typ : string) (full_path : string) : string option = 112 | let basename = Filename.basename full_path in 113 | let split = get_test_type basename in 114 | Option.bind split (fun x -> 115 | if x = typ then Some (Filename.remove_extension full_path) else None ) 116 | in 117 | List.filter_map @@ filter typ 118 | 119 | let construct_parameters (audio_dir : string) (files : string list) = 120 | let aux (file : string) = 121 | let base = Filename.basename file |> Filename.remove_extension in 122 | let audio_filename = 123 | Option.value (get_test_filename base) ~default:base 124 | in 125 | let audio_file_opt = list_filter_filename audio_dir audio_filename in 126 | match audio_file_opt with 127 | | None -> 128 | Printf.eprintf "Warning: couldn't find audio file with name %s\n" base ; 129 | None 130 | | Some audio_file_path -> 131 | let npy_file = file ^ ".npy" in 132 | let json_file = file ^ ".json" in 133 | let params = Parameters.create json_file in 134 | Some (npy_file, audio_file_path, params) 135 | in 136 | List.filter_map aux files 137 | 138 | let create (vectors_dir : string) (audio_dir : string) (types : string list) : 139 | t = 140 | let vectors_files = list_tests_files vectors_dir ".json" in 141 | let fold map typ = 142 | let l = filter_test_type typ vectors_files in 143 | StrMap.add typ l map 144 | in 145 | let files_map = List.fold_left fold StrMap.empty types in 146 | StrMap.map (construct_parameters audio_dir) files_map 147 | 148 | let get (typ : string) (data : t) : (string * string * Parameters.t) list = 149 | StrMap.find typ data 150 | end 151 | 152 | module type Testable = sig 153 | type t 154 | 155 | type p 156 | 157 | type pf 158 | 159 | type pc 160 | 161 | type ('a, 'b) precision = ('a, 'b) Types.precision 162 | 163 | val precision : (pf, pc) precision 164 | 165 | val kd : (t, p) Bigarray.kind 166 | 167 | val typ : string 168 | 169 | val generate : 170 | (pf, pc) precision 171 | -> string * string * Parameters.t 172 | -> (float, pf) Owl_dense_ndarray.Generic.t 173 | -> (t, p) Owl_dense_ndarray.Generic.t 174 | end 175 | 176 | module Tests_cases (T : Testable) = struct 177 | include T 178 | 179 | let akind : type a b. (a, b) precision -> (float, a) Bigarray.kind = 180 | fun prec -> 181 | match prec with 182 | | Types.B32 -> 183 | Bigarray.Float32 184 | | Types.B64 -> 185 | Bigarray.Float64 186 | 187 | let read_audio kd (path : string) (res_typ : Io.resampling_t) 188 | (sample_rate : int) (mono : bool) = 189 | let audio = Io.read ~res_typ ~sample_rate ~mono kd path in 190 | Audio.data audio 191 | 192 | let create_tests (data : (string * string * Parameters.t) list) : 193 | unit Alcotest.test_case list = 194 | List.concat_map 195 | (fun (case : string * string * Parameters.t) -> 196 | let vector_path, audio_path, params = case in 197 | let raw_basename = 198 | Filename.basename vector_path |> Filename.remove_extension 199 | in 200 | let basename = 201 | Option.value ~default:raw_basename 202 | (Testdata.get_test_filename raw_basename) 203 | in 204 | let sr = 205 | Option.value ~default:22050 @@ Parameters.get_int "sr" params 206 | in 207 | let mono = 208 | Option.value ~default:true @@ Parameters.get_bool "mono" params 209 | in 210 | let resampler = 211 | string_to_resample_typ 212 | ( Option.value ~default:"None" 213 | @@ Parameters.get_string "res_type" params ) 214 | in 215 | let audio_kind = akind precision in 216 | let audio = read_audio audio_kind audio_path resampler sr mono in 217 | let generated = generate precision case audio in 218 | let vector = load_npy vector_path kd in 219 | let test_dense () = 220 | Alcotest.check 221 | (Tutils.get_dense_testable kd) 222 | (typ ^ "_dense_" ^ basename) 223 | generated vector 224 | in 225 | let test_dense = ("DENSE: " ^ basename, `Slow, test_dense) in 226 | [test_dense] ) 227 | data 228 | 229 | let run (name : string) (tests : unit Alcotest.test_case list) = 230 | Alcotest.run name [(typ_to_readable typ, tests)] 231 | end 232 | 233 | let tests = ["timeseries"; "stft"] 234 | 235 | let data = Testdata.create test_vectors_dir test_audio_dir tests 236 | -------------------------------------------------------------------------------- /test/vutils.mli: -------------------------------------------------------------------------------- 1 | (*****************************************************************************) 2 | (* *) 3 | (* *) 4 | (* Copyright (C) 2025 *) 5 | (* Gabriel Santamaria *) 6 | (* *) 7 | (* *) 8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *) 9 | (* you may not use this file except in compliance with the License. *) 10 | (* You may obtain a copy of the License at *) 11 | (* *) 12 | (* http://www.apache.org/licenses/LICENSE-2.0 *) 13 | (* *) 14 | (* Unless required by applicable law or agreed to in writing, software *) 15 | (* distributed under the License is distributed on an "AS IS" BASIS, *) 16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *) 17 | (* See the License for the specific language governing permissions and *) 18 | (* limitations under the License. *) 19 | (* *) 20 | (*****************************************************************************) 21 | 22 | val test_audio_dir : string 23 | (** The directory where the test audio files are located. *) 24 | 25 | val test_vectors_dir : string 26 | (** The directory where the test vectors are located. *) 27 | 28 | val typ_to_readable : string -> string 29 | (** Converts a test type to a readable format for Alcotest. *) 30 | 31 | (** A map from strings to values. *) 32 | module StrMap : Map.S with type key = string 33 | 34 | (** A module for handling parameters. *) 35 | module Parameters : sig 36 | type t 37 | 38 | val create : string -> t 39 | 40 | val get_string : string -> t -> string option 41 | 42 | val get_int : string -> t -> int option 43 | 44 | val get_float : string -> t -> float option 45 | 46 | val get_bool : string -> t -> bool option 47 | end 48 | 49 | module Testdata : sig 50 | type t = (string * string * Parameters.t) list StrMap.t 51 | 52 | val get_test_type : string -> string option 53 | (** Returns the test type for a given test name *) 54 | 55 | val get_test_filename : string -> string option 56 | (** Returns the test filename for a given test type *) 57 | 58 | val create : string -> string -> string list -> t 59 | (** Creates a test set given a directory of vectors files, a directory of audio files 60 | and a list of test types *) 61 | 62 | val get : string -> t -> (string * string * Parameters.t) list 63 | (** Returns the test set for a given test type *) 64 | end 65 | 66 | module type Testable = sig 67 | (** Type of the data generated by the functionnality we're testing *) 68 | type t 69 | 70 | (** Bigarray precision *) 71 | type p 72 | 73 | (** Float precision *) 74 | type pf 75 | 76 | (** Complex precision *) 77 | type pc 78 | 79 | type ('a, 'b) precision = ('a, 'b) Types.precision 80 | 81 | val precision : (pf, pc) precision 82 | 83 | val kd : (t, p) Bigarray.kind 84 | 85 | val typ : string 86 | 87 | val generate : 88 | (pf, pc) precision 89 | -> string * string * Parameters.t 90 | -> (float, pf) Owl_dense_ndarray.Generic.t 91 | -> (t, p) Owl_dense_ndarray.Generic.t 92 | (** Generate a testing vector given some parameters and an audio array *) 93 | end 94 | 95 | module Tests_cases (T : Testable) : sig 96 | type t = T.t 97 | 98 | type p = T.p 99 | 100 | type pf = T.pf 101 | 102 | type pc = T.pc 103 | 104 | val create_tests : 105 | (string * string * Parameters.t) list -> unit Alcotest.test_case list 106 | 107 | val run : string -> unit Alcotest.test_case list -> unit 108 | end 109 | 110 | val data : Testdata.t 111 | --------------------------------------------------------------------------------