├── .github
    └── workflows
    │   ├── build.yml
    │   ├── lint.yml
    │   └── test.yml
├── .gitignore
├── .ocamlformat
├── LICENSE
├── README.md
├── bench
    ├── read
    │   ├── bench.py
    │   ├── bench_read.sh
    │   ├── dune
    │   ├── mp3.svg
    │   ├── perf.ml
    │   └── wav.svg
    └── stft
    │   ├── dune
    │   ├── perf.ml
    │   ├── perf.py
    │   ├── stft_comparison_float32.svg
    │   ├── stft_comparison_float64.svg
    │   └── wav_stereo_44100hz_1s.wav
├── doc
    ├── dune
    └── index.mld
├── dune-project
├── soundml.opam
├── soundml_logo.svg
├── src
    ├── aformat.ml
    ├── aformat.mli
    ├── audio.ml
    ├── audio.mli
    ├── dune
    ├── effects
    │   ├── dune
    │   ├── filter
    │   │   ├── filter.ml
    │   │   ├── filter.mli
    │   │   ├── fir.ml
    │   │   ├── fir.mli
    │   │   ├── highpass.ml
    │   │   ├── highpass.mli
    │   │   ├── iir.ml
    │   │   ├── iir.mli
    │   │   ├── lowpass.ml
    │   │   └── lowpass.mli
    │   └── time
    │   │   ├── config
    │   │       ├── discover.ml
    │   │       └── dune
    │   │   ├── rubberband_stubs.cxx
    │   │   ├── time.ml
    │   │   └── time.mli
    ├── feature
    │   ├── dune
    │   └── spectral
    │   │   ├── spectral.ml
    │   │   └── spectral.mli
    ├── filterbank.ml
    ├── filterbank.mli
    ├── io
    │   ├── cio
    │   │   ├── README.md
    │   │   ├── common.hxx
    │   │   ├── read.hxx
    │   │   └── write.hxx
    │   ├── config
    │   │   ├── discover.ml
    │   │   └── dune
    │   ├── dune
    │   ├── io.ml
    │   ├── io.mli
    │   └── soundml_io.cxx
    ├── soundml.ml
    ├── transform.ml
    ├── transform.mli
    ├── types.ml
    ├── types.mli
    ├── utils.ml
    ├── utils.mli
    ├── window.ml
    └── window.mli
└── test
    ├── README.md
    ├── dune
    ├── generate_audio.sh
    ├── generate_vectors.py
    ├── test_aformat.ml
    ├── test_filter.ml
    ├── test_filterbank.ml
    ├── test_stft.ml
    ├── test_time.ml
    ├── test_timeseries.ml
    ├── test_utils.ml
    ├── test_window.ml
    ├── test_write.ml
    ├── tutils.ml
    ├── tutils.mli
    ├── vutils.ml
    └── vutils.mli


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | # Inspired by https://github.com/savonet/build-and-test-ocaml-module/blob/main/action.yml
 2 | # Thanks for the savonet team for building such an amazing software :p
 3 | name: Build
 4 | 
 5 | on:
 6 |   push:
 7 |     branches:
 8 |       - 'main'
 9 |   pull_request:
10 |     types: [opened, synchronize, reopened]
11 | 
12 | jobs:
13 |   build:
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         os: [ubuntu-latest] # , macos-latest, windows-latest]
18 |         ocaml-compiler:
19 |           - "5.3.0" # Latest stable
20 |           - "5.2.0"
21 | 
22 |     runs-on: ${{ matrix.os }}
23 | 
24 |     steps:
25 |       - name: Checkout code
26 |         uses: actions/checkout@v4
27 | 
28 |       - name: Set up OCaml ${{ matrix.ocaml-compiler }} on ${{ matrix.os }}
29 |         uses: ocaml/setup-ocaml@v3
30 |         with:
31 |           ocaml-compiler: ${{ matrix.ocaml-compiler }}
32 |           dune-cache: true
33 | 
34 |       # temporary, waiting for the conf-soxr package to be published to opam repo
35 |       - name: Install libsoxr-dev
36 |         run: |
37 |             sudo apt-get update
38 |             sudo apt-get install -y libsoxr-dev
39 |         shell: bash
40 | 
41 |       - name: Install SoundML
42 |         run: opam install . --yes --confirm-level=unsafe-yes
43 |         shell: bash


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint & Format
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'main'
 7 |   pull_request:
 8 |     types: [opened, synchronize, reopened]
 9 | 
10 | jobs:
11 |   lint-fmt:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |       - name: Checkout code
16 |         uses: actions/checkout@v4
17 | 
18 |       - name: Set up OCaml for linting
19 |         uses: ocaml/setup-ocaml@v3
20 |         with:
21 |           ocaml-compiler: "5.2"
22 |           dune-cache: true
23 |       - name: Run OCaml Lint & Format Check
24 |         uses: ocaml/setup-ocaml/lint-fmt@v2


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | # Inspired by https://github.com/savonet/build-and-test-ocaml-module/blob/main/action.yml
 2 | # Thanks for the savonet team for building such an amazing software :p
 3 | 
 4 | name: Test
 5 | 
 6 | on:
 7 |   push:
 8 |     branches:
 9 |       - 'main'
10 |   pull_request:
11 |     types: [opened, synchronize, reopened]
12 | 
13 | jobs:
14 |   test:
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         os: [ubuntu-latest] # , macos-latest, windows-latest]
19 |         ocaml-compiler:
20 |           - "5.3.0" # Latest stable
21 |           - "5.2.0"
22 |         include:
23 |           - os: ubuntu-latest
24 |             ocaml-compiler: "5.3.0"
25 |             send-coverage: true
26 | 
27 |     runs-on: ${{ matrix.os }}
28 | 
29 |     steps:
30 |       - name: Checkout code
31 |         uses: actions/checkout@v4
32 | 
33 |       - name: Set up OCaml ${{ matrix.ocaml-compiler }} on ${{ matrix.os }}
34 |         uses: ocaml/setup-ocaml@v3
35 |         with:
36 |           ocaml-compiler: ${{ matrix.ocaml-compiler }}
37 |           dune-cache: true
38 | 
39 |       - uses: actions/setup-python@v5
40 |         with:
41 |           python-version: '3.12.3' 
42 |       - run: pip install --upgrade pip
43 |       - run: pip install numpy librosa==0.11.0
44 | 
45 |       # temporary, waiting for the conf-soxr package to be published to opam repo
46 |       - name: Install libsoxr-dev
47 |         run: |
48 |             sudo apt-get update
49 |             sudo apt-get install -y libsoxr-dev
50 |         shell: bash
51 | 
52 |       - name: Install FFmpeg CLI (to generate test data)
53 |         run: sudo apt-get install -y ffmpeg
54 |         shell: bash
55 | 
56 |       - name: Install SoundML dependencies (with test)
57 |         run: opam install . --deps-only --with-test --yes --confirm-level=unsafe-yes --verbose
58 |         shell: bash
59 | 
60 |       - name: Run tests
61 |         if: matrix.send-coverage != true
62 |         run: |
63 |           opam exec -- dune runtest
64 |         shell: bash
65 | 
66 |       - name: Run tests (and send converage to coveralls)
67 |         if: matrix.send-coverage == true
68 |         run: |
69 |           opam exec -- dune runtest --instrument-with bisect_ppx --force
70 |           opam exec -- bisect-ppx-report send-to Coveralls
71 |         shell: bash
72 |         env:
73 |           COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
74 |           PULL_REQUEST_NUMBER: ${{ github.event.number }}
75 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | _build
 2 | dune.lock
 3 | dataset/
 4 | cache/
 5 | report/
 6 | *.pth
 7 | .venv
 8 | .vscode
 9 | wav/
10 | mp3/


--------------------------------------------------------------------------------
/.ocamlformat:
--------------------------------------------------------------------------------
1 | profile = ocamlformat
2 | wrap-comments = true
3 | version = 0.27.0


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align='center'>
 2 | 
 3 | <img src="soundml_logo.svg" width="140px" alt="SoundML Logo">
 4 | 
 5 | <h1>SoundML</h1>
 6 | <p>A little and very high level library to perform basic operations on audio files in the OCaml language</p>
 7 | 
 8 | <h4> <span> · </span> <a href="https://github.com/gabyfle/SoundML/blob/master/README.md"> Documentation </a> <span> · </span> <a href="https://github.com/gabyfle/SoundML/issues"> Report Bug </a> <span> · </span> <a href="https://github.com/gabyfle/SoundML/issues"> Request Feature </a> · </h4>
 9 | 
10 | [![Build](https://github.com/gabyfle/SoundML/actions/workflows/build.yml/badge.svg)](https://github.com/gabyfle/SoundML/actions/workflows/build.yml)
11 | [![Test](https://github.com/gabyfle/SoundML/actions/workflows/test.yml/badge.svg)](https://github.com/gabyfle/SoundML/actions/workflows/test.yml)
12 | [![Coverage Status](https://coveralls.io/repos/github/gabyfle/SoundML/badge.svg)](https://coveralls.io/github/gabyfle/SoundML)
13 | </div>
14 | 
15 | ## About the Project
16 | 
17 | > [!WARNING]
18 | > The project is still in development and is not yet ready for use.
19 | 
20 | ## Features
21 |  - A fast I/O for interacting with audio files
22 |  - Feature extraction
23 |  - Audio effects
24 |    - Time stretching and pitch shifting
25 |    - Filtering
26 |      - IIR filters (Generic, Lowpass, Highpass)
27 |      - Generic FIR filter implementation
28 | 
29 | ## License
30 | 
31 | Distributed under the Apache License Version 2.0. See LICENSE for more information.
32 | 
33 | ## References
34 | 
35 | - **McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Battenberg, and Oriol Nieto** (2015). *librosa: Audio and music signal analysis in python.* In Proceedings of the 14th python in science conference, pp. 18-25.
36 | 
37 | - **Bellanger, M.** (2022). *Traitement numérique du signal. 10e édition.* Dunod.
38 | 
39 | - **Wang, L., Zhao, J., & Mortier, R.** (2022). *OCaml Scientific Computing*. Springer International Publishing eBooks. DOI: [10.1007/978-3-030-97645-3](https://doi.org/10.1007/978-3-030-97645-3)
40 | 
41 | - **Zoelzer, U.** (2002). *Dafx: Digital Audio Effects*. DOI: [10.1002/9781119991298](https://doi.org/10.1002/9781119991298)
42 | 
43 | - **Müller, M.** (2015). *Fundamentals of Music Processing*. Cambridge International Law Journal. DOI: [10.1007/978-3-319-21945-5](https://doi.org/10.1007/978-3-319-21945-5)
44 | 
45 | ## Acknowledgements
46 | 
47 | * Logo generated with DALL-E by OpenAI
48 | 


--------------------------------------------------------------------------------
/bench/read/bench.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import os
  4 | import sys
  5 | import time
  6 | import argparse
  7 | import librosa
  8 | import numpy as np
  9 | 
 10 | MIB_DIVISOR = 1024.0 * 1024.0
 11 | 
 12 | 
 13 | def find_audio_files(root_dir, extension, max_files):
 14 |     filepaths = []
 15 |     extension = extension.lower()
 16 | 
 17 |     count = 0
 18 |     try:
 19 |         for dirpath, _, filenames in os.walk(root_dir, topdown=True, onerror=None):
 20 |             relevant_filenames = [f for f in filenames if f.lower().endswith(extension)]
 21 |             for filename in relevant_filenames:
 22 |                 if count < max_files:
 23 |                     filepaths.append(os.path.join(dirpath, filename))
 24 |                     count += 1
 25 |                 else:
 26 |                     return filepaths
 27 |             if count >= max_files:
 28 |                 break
 29 | 
 30 |     except OSError:
 31 |         pass
 32 | 
 33 |     return filepaths
 34 | 
 35 | 
 36 | def get_file_size(filename) -> float:
 37 |     if not os.path.isfile(filename):
 38 |         return -1.0
 39 |     size = os.path.getsize(filename)
 40 |     if size <= 0:
 41 |         return 0.0
 42 |     return size / MIB_DIVISOR
 43 | 
 44 | 
 45 | def benchmark_read(filename, target_sr) -> tuple[float, float]:
 46 |     size = get_file_size(filename)
 47 |     if size is None or size <= 0.0:
 48 |         return 0.0, 0.0
 49 | 
 50 |     sample_rate = target_sr if target_sr is not None and target_sr > 0 else None
 51 | 
 52 |     try:
 53 |         start_time = time.perf_counter()
 54 |         _audio, _sr = librosa.load(
 55 |             filename, sr=sample_rate, mono=False, dtype=np.float32
 56 |         )
 57 |         end_time = time.perf_counter()
 58 |         duration = end_time - start_time
 59 |         if not isinstance(_audio, np.ndarray) or _audio.size == 0:
 60 |             return -1.0, -1.0
 61 | 
 62 |         return duration, size
 63 |     except FileNotFoundError:
 64 |         return -1.0, -1.0
 65 | 
 66 | 
 67 | def run_benchmark(root_dir, sample_rate, extension, max_files):
 68 |     all_files = find_audio_files(root_dir, extension, max_files)
 69 |     nfound = len(all_files)
 70 | 
 71 |     if nfound == 0:
 72 |         sys.exit(0)
 73 | 
 74 |     warmup_count = min(5, nfound)
 75 |     if warmup_count > 0:
 76 |         warmup_files = all_files[:warmup_count]
 77 |         for f in warmup_files:
 78 |             _ = benchmark_read(f, sample_rate)
 79 | 
 80 |     total_time = 0.0
 81 |     total_size = 0.0
 82 | 
 83 |     files_to_process = all_files
 84 | 
 85 |     for filename in files_to_process:
 86 |         duration, size = benchmark_read(filename, sample_rate)
 87 | 
 88 |         if duration > 0 and size > 0:
 89 |             total_time += duration
 90 |             total_size += size
 91 | 
 92 |     if total_time > 0 and total_size > 0:
 93 |         avg_speed = total_size / total_time
 94 |         print(f"{avg_speed:.5f}")
 95 | 
 96 | 
 97 | def main():
 98 |     parser = argparse.ArgumentParser()
 99 |     parser.add_argument("root_directory", help=argparse.SUPPRESS)
100 |     parser.add_argument("sample_rate", type=int, help=argparse.SUPPRESS)
101 |     parser.add_argument("format", help=argparse.SUPPRESS)
102 |     parser.add_argument("max_files", type=int, help=argparse.SUPPRESS)
103 | 
104 |     if len(sys.argv) != 5:
105 |         print(
106 |             f"Usage: {sys.argv[0]} <root_directory> <sample_rate> <format> <max_files>",
107 |             file=sys.stderr,
108 |         )
109 |         sys.exit(1)
110 | 
111 |     args = parser.parse_args()
112 |     if not os.path.isdir(args.root_directory):
113 |         sys.exit(1)
114 | 
115 |     if args.sample_rate < 0:
116 |         sys.exit(1)
117 | 
118 |     if args.max_files <= 0:
119 |         sys.exit(1)
120 | 
121 |     ext = args.format.lstrip(".")
122 | 
123 |     run_benchmark(args.root_directory, args.sample_rate, ext, args.max_files)
124 | 
125 | 
126 | if __name__ == "__main__":
127 |     main()
128 | 


--------------------------------------------------------------------------------
/bench/read/bench_read.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | OCAML_CMD="dune exec ./bench/read/perf.exe"
  4 | PYTHON_CMD="python3 bench.py"
  5 | CACHE_CLEAR_CMD="sync; echo 3 > /proc/sys/vm/drop_caches"
  6 | 
  7 | if [ "$#" -ne 6 ]; then
  8 |     echo "Usage: $0 <mode> <num_iterations> <root_directory> <sample_rate> <format> <max_files>"
  9 |     echo "  <mode>: 'ocaml' or 'python'"
 10 |     exit 1
 11 | fi
 12 | 
 13 | MODE=$1
 14 | shift
 15 | 
 16 | NUM_ITERATIONS=$1
 17 | ROOT_DIR=$2
 18 | SAMPLE_RATE=$3
 19 | FORMAT=$4
 20 | MAX_FILES=$5
 21 | 
 22 | if [[ "$MODE" != "ocaml" && "$MODE" != "python" ]]; then
 23 |     echo "Error: <mode> must be either 'ocaml' or 'python'. You provided '$MODE'."
 24 |     echo "Usage: $0 <mode> <num_iterations> <root_directory> <sample_rate> <format> <max_files>"
 25 |     exit 1
 26 | fi
 27 | 
 28 | if ! [[ "$NUM_ITERATIONS" =~ ^[1-9][0-9]*$ ]]; then
 29 |      echo "Error: <num_iterations> ('$NUM_ITERATIONS') must be a positive integer." >&2
 30 |      exit 1
 31 | fi
 32 | 
 33 | BENCH_CMD=""
 34 | if [[ "$MODE" == "ocaml" ]]; then
 35 |     BENCH_CMD="$OCAML_CMD"
 36 | elif [[ "$MODE" == "python" ]]; then
 37 |     BENCH_CMD="$PYTHON_CMD"
 38 | fi
 39 | 
 40 | results_array=()
 41 | valid_run_count=0
 42 | 
 43 | echo "Starting reading test for: $MODE"
 44 | echo "Command to run: $BENCH_CMD \"$ROOT_DIR\" \"$SAMPLE_RATE\" \"$FORMAT\" \"$MAX_FILES\""
 45 | echo "Number of iterations: $NUM_ITERATIONS"
 46 | echo "--------------------------------------------------"
 47 | 
 48 | for (( i=1; i<=NUM_ITERATIONS; i++ )); do
 49 |     echo "Iteration $i / $NUM_ITERATIONS"
 50 |     if sudo bash -c "$CACHE_CLEAR_CMD"; then # ensure that files aren't cached by the system
 51 |         sleep 1.5
 52 |     else
 53 |         echo "Error: Failed to clear cache. Probably error with sudo." >&2
 54 |         exit 1
 55 |     fi
 56 |     result=$( $BENCH_CMD "$ROOT_DIR" "$SAMPLE_RATE" "$FORMAT" "$MAX_FILES" )
 57 |     exit_status=$?
 58 | 
 59 |     if [ $exit_status -ne 0 ]; then
 60 |         continue
 61 |     fi
 62 | 
 63 |     if [[ "$result" =~ ^[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$ ]]; then
 64 |         echo "Result: $result MiB/s"
 65 |         results_array+=("$result")
 66 |         ((valid_run_count++))
 67 |     fi
 68 |     sleep 1
 69 | done
 70 | 
 71 | echo "--------------------------------------------------"
 72 | 
 73 | num_results=${#results_array[@]}
 74 | 
 75 | if [ "$num_results" -eq 0 ]; then
 76 |     exit 1
 77 | fi
 78 | stats=$(printf "%s\n" "${results_array[@]}" | awk '
 79 |     NF == 0 { next }
 80 |     {
 81 |         if ($1 ~ /^[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$/) {
 82 |             sum += $1;
 83 |             sumsq += $1*$1;
 84 |             count++;
 85 |         }
 86 |     }
 87 |     END {
 88 |         if (count > 0) {
 89 |             mean = sum / count;
 90 |             if (count > 1) {
 91 |                variance = (sumsq - (sum*sum)/count) / (count-1);
 92 |                if (variance < 1e-12) variance = 0;
 93 |                stdev = sqrt(variance);
 94 |             } else {
 95 |                stdev = 0; # Standard deviation is undefined/0 for a single point
 96 |             }
 97 |             printf "%.6f %.6f %d", mean, stdev, count;
 98 |         } else {
 99 |             print "NaN NaN 0";
100 |         }
101 |     }
102 | ')
103 | read -r mean stdev count <<< "$stats"
104 | 
105 | if [[ -z "$mean" || -z "$stdev" || -z "$count" || "$count" -eq 0 ]]; then
106 |     echo "Error: Failed to calculate statistics. Awk output: '$stats'" >&2
107 |     exit 1
108 | fi
109 | 
110 | echo "Performance Test Summary ($MODE):"
111 | echo "-------------------------"
112 | echo "Command: $BENCH_CMD \"$ROOT_DIR\" \"$SAMPLE_RATE\" \"$FORMAT\" \"$MAX_FILES\""
113 | printf "Mean Speed (MiB/s): %.6f\n" "$mean"
114 | if [ "$count" -gt 1 ]; then
115 |     printf "Std Dev Speed:        %.6f\n" "$stdev"
116 | else
117 |     printf "Std Dev Speed:        N/A (requires >= 2 data points)\n"
118 | fi
119 | 
120 | exit 0
121 | 


--------------------------------------------------------------------------------
/bench/read/dune:
--------------------------------------------------------------------------------
1 | (executable
2 |  (name perf)
3 |  (libraries soundml))
4 | 


--------------------------------------------------------------------------------
/bench/read/perf.ml:
--------------------------------------------------------------------------------
  1 | open Printf
  2 | open Unix
  3 | 
  4 | let mb_divisor = 1024. *. 1024.
  5 | 
  6 | let is_ext_file filename ext =
  7 |   String.lowercase_ascii (Filename.extension filename) = ext
  8 | 
  9 | let find_ext_files root_dir ext =
 10 |   let rec find acc dir =
 11 |     try
 12 |       let dh = opendir dir in
 13 |       try
 14 |         let rec loop acc =
 15 |           match readdir dh with
 16 |           | exception End_of_file ->
 17 |               closedir dh ; acc
 18 |           | "." | ".." ->
 19 |               loop acc
 20 |           | entry -> (
 21 |               let full_path = Filename.concat dir entry in
 22 |               try
 23 |                 match (stat full_path).st_kind with
 24 |                 | S_REG when is_ext_file full_path ext ->
 25 |                     loop (full_path :: acc)
 26 |                 | S_DIR ->
 27 |                     loop (find acc full_path)
 28 |                 | _ ->
 29 |                     loop acc
 30 |               with Unix_error (_, _, _) -> loop acc )
 31 |         in
 32 |         loop acc
 33 |       with ex ->
 34 |         closedir dh ;
 35 |         eprintf "\nError reading  directory '%s': %s\n%!" dir
 36 |           (Printexc.to_string ex) ;
 37 |         acc
 38 |     with Unix_error (e, _, p) ->
 39 |       eprintf "\nError  opening directory '%s': %s\n%!" p (error_message e) ;
 40 |       acc
 41 |   in
 42 |   find [] root_dir
 43 | 
 44 | let get_file_size filename =
 45 |   try
 46 |     let stats = stat filename in
 47 |     if stats.st_kind = S_REG then Ok (float_of_int stats.st_size /. mb_divisor)
 48 |     else Error (sprintf "Not a regular file: %s" filename)
 49 |   with
 50 |   | Unix_error (e, _, _) ->
 51 |       Error (sprintf "Cannot stat file '%s': %s" filename (error_message e))
 52 |   | Sys_error msg ->
 53 |       Error (sprintf "System error  statting '%s': %s" filename msg)
 54 | 
 55 | let benchmark_read kind filename sample_rate =
 56 |   match get_file_size filename with
 57 |   | Error msg ->
 58 |       Error (filename, msg)
 59 |   | Ok size_mb -> (
 60 |       if size_mb <= 0.0 then Error (filename, "Incorrect file size")
 61 |       else
 62 |         try
 63 |           let res_typ =
 64 |             match sample_rate with 0 -> Io.NONE | _ -> Io.SOXR_HQ
 65 |           in
 66 |           let start_time = Unix.gettimeofday () in
 67 |           let _audio =
 68 |             Soundml.Io.read ~res_typ ~sample_rate ~mono:false kind filename
 69 |           in
 70 |           let end_time = Unix.gettimeofday () in
 71 |           let duration = end_time -. start_time in
 72 |           Ok (duration, size_mb)
 73 |         with ex -> Error (filename, Printexc.to_string ex) )
 74 | 
 75 | let run_benchmark root sample_rate extension max_files =
 76 |   let kind = Bigarray.Float32 in
 77 |   let all_files = find_ext_files root extension in
 78 |   let all_files =
 79 |     List.filteri (fun i _ -> if i >= max_files then false else true) all_files
 80 |   in
 81 |   let total_files = List.length all_files in
 82 |   if total_files = 0 then exit 0 ;
 83 |   let warmup_count = min 5 total_files in
 84 |   ( if warmup_count > 0 then
 85 |       let warmup_files = List.filteri (fun i _ -> i < warmup_count) all_files in
 86 |       List.iter
 87 |         (fun f ->
 88 |           match benchmark_read kind f sample_rate with
 89 |           | Ok _ ->
 90 |               ()
 91 |           | Error _ ->
 92 |               () )
 93 |         warmup_files ) ;
 94 |   let total_time = ref 0.0 in
 95 |   let total_size = ref 0.0 in
 96 |   List.iter
 97 |     (fun filename ->
 98 |       match benchmark_read kind filename sample_rate with
 99 |       | Ok (duration, size_mb) ->
100 |           total_time := !total_time +. duration ;
101 |           total_size := !total_size +. size_mb
102 |       | Error _ ->
103 |           () )
104 |     all_files ;
105 |   if !total_time > 0.0 && !total_size > 0.0 then
106 |     let avg_speed = !total_size /. !total_time in
107 |     printf "%.5f\n" avg_speed
108 | 
109 | let () =
110 |   if Array.length Sys.argv <> 5 then
111 |     eprintf "Usage: %s  <root_directory> <sample_rate> <format> <max_files>\n"
112 |       Sys.argv.(0)
113 |   else
114 |     let root_dir = Sys.argv.(1) in
115 |     let sample_rate = int_of_string Sys.argv.(2) in
116 |     let extension = Sys.argv.(3) in
117 |     let max_files = int_of_string Sys.argv.(4) in
118 |     if not (Sys.file_exists root_dir && Sys.is_directory root_dir) then
119 |       eprintf "Can't read directory: %s.\n" root_dir
120 |     else
121 |       try run_benchmark root_dir sample_rate extension max_files
122 |       with ex ->
123 |         eprintf "An unexpected  error occurred: %s\n" (Printexc.to_string ex) ;
124 |         exit 1
125 | 


--------------------------------------------------------------------------------
/bench/stft/dune:
--------------------------------------------------------------------------------
1 | (executable
2 |  (name perf)
3 |  (libraries core core_bench soundml))
4 | 


--------------------------------------------------------------------------------
/bench/stft/perf.ml:
--------------------------------------------------------------------------------
 1 | open! Core
 2 | open! Core_bench
 3 | open Soundml
 4 | 
 5 | let path = Sys_unix.getcwd () ^ "/bench/stft/wav_stereo_44100hz_1s.wav"
 6 | 
 7 | let f32audio = Audio.data @@ Io.read ~res_typ:Io.NONE Bigarray.Float32 path
 8 | 
 9 | let f64audio = Audio.data @@ Io.read ~res_typ:Io.NONE Bigarray.Float64 path
10 | 
11 | let main () =
12 |   Command_unix.run
13 |     (Bench.make_command
14 |        [ Bench.Test.create ~name:"float32" (fun () ->
15 |              ignore (Transform.stft Types.B32 f32audio) )
16 |        ; Bench.Test.create ~name:"float64" (fun () ->
17 |              ignore (Transform.stft Types.B64 f64audio) ) ] )
18 | 
19 | let () = main ()
20 | 


--------------------------------------------------------------------------------
/bench/stft/perf.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import librosa
 3 | import numpy as np
 4 | import os
 5 | 
 6 | AUDIO_FILE_PATH = os.path.join(os.getcwd(), "bench/stft/wav_stereo_44100hz_1s.wav")
 7 | 
 8 | STFT_CONFIGURATIONS = [
 9 |     {"n_fft": 2048, "win_length": 2048, "hop_length": 512, "window_type": "hann"}
10 | ]
11 | 
12 | y_f32, sr_f32 = librosa.load(AUDIO_FILE_PATH, sr=None, mono=True, dtype=np.float32)
13 | y_f64, sr_f64 = librosa.load(AUDIO_FILE_PATH, sr=None, mono=True, dtype=np.float64)
14 | SIGNAL_LENGTH = len(y_f32)
15 | 
16 | @pytest.mark.parametrize("config", STFT_CONFIGURATIONS)
17 | @pytest.mark.parametrize("precision", ["float32", "float64"])
18 | def test_librosa_stft(benchmark, config, precision):
19 |     if precision == "float32":
20 |         audio_data = y_f32
21 |     elif precision == "float64":
22 |         audio_data = y_f64
23 |     else:
24 |         raise ValueError("Invalid precision")
25 | 
26 |     result = benchmark(
27 |         librosa.stft,
28 |         y=audio_data,
29 |         n_fft=config["n_fft"],
30 |         hop_length=config["hop_length"],
31 |         win_length=config["win_length"],
32 |         window=config["window_type"],
33 |         center=False
34 |     )
35 |     assert result is not None
36 | 


--------------------------------------------------------------------------------
/bench/stft/wav_stereo_44100hz_1s.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gabyfle/SoundML/95f3d969cc225781b22d8ffa86c4d5e531fb0db8/bench/stft/wav_stereo_44100hz_1s.wav


--------------------------------------------------------------------------------
/doc/dune:
--------------------------------------------------------------------------------
1 | (documentation
2 |  (package soundml)
3 |  (mld_files index))
4 | 


--------------------------------------------------------------------------------
/doc/index.mld:
--------------------------------------------------------------------------------
 1 | {0 SoundML}
 2 | 
 3 | {%html:
 4 | <center>
 5 |     <img alt="SoundML logo" src="soundml_logo.svg" style="max-width:200px;" />
 6 | </center>
 7 | %}
 8 | 
 9 | {!Soundml} is an OCaml library that enables audio processing. It provides tools to read, write, extract features and manipulate audio as time series data.
10 | The library is built on top of the Owl library and provides a high-level API to work with audio data. It's still in an experimental stage.
11 | 
12 | 
13 | {1 API Documentation}
14 | 
15 | To see the developer API documentation for SoundML, you can visit the following link: {{!Soundml}here}.
16 | 
17 | {1 Author}
18 | 
19 | {{:https://gabyfle.dev}Gabriel "gabyfle" Santamaria}


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
 1 | (lang dune 3.18)
 2 | 
 3 | (name soundml)
 4 | 
 5 | (generate_opam_files true)
 6 | 
 7 | (source
 8 |  (github gabyfle/SoundML))
 9 | 
10 | (authors "Gabriel Santamaria <gaby.santamaria@outlook.fr>")
11 | 
12 | (maintainers "Gabriel Santamaria <gaby.santamaria@outlook.fr>")
13 | 
14 | (license Apache-2.0)
15 | 
16 | (documentation https://soundml.gabyfle.dev)
17 | 
18 | (package
19 |  (name soundml)
20 |  (synopsis "An OCaml library to embed sound processing in your applications")
21 |  (description
22 |   "SoundML is a library built on top of Owl to analyse sounds files. It can read, write audio, extract various features from audio files and much more.")
23 |  (depends
24 |   (ocaml
25 |    (>= 5.2.0))
26 |   dune
27 |   (conf-sndfile :build)
28 |   (conf-rubberband :build)
29 |   ; Samplerate is already a requirement of rubberband
30 |   (conf-samplerate :build)
31 |   (dune-configurator :build)
32 |   (owl ; Version 1.2 is required to have the DC transform
33 |    (>= 1.2))
34 |   (odoc :with-doc)
35 |   (alcotest :with-test)
36 |   (yojson :with-test)
37 |   (bisect_ppx
38 |    (and
39 |     :with-test
40 |     (>= 2.5.0))))
41 |  (tags
42 |   (sound spectrogram fourier digital-filters)))
43 | 


--------------------------------------------------------------------------------
/soundml.opam:
--------------------------------------------------------------------------------
 1 | # This file is generated by dune, edit dune-project instead
 2 | opam-version: "2.0"
 3 | synopsis: "An OCaml library to embed sound processing in your applications"
 4 | description:
 5 |   "SoundML is a library built on top of Owl to analyse sounds files. It can read, write audio, extract various features from audio files and much more."
 6 | maintainer: ["Gabriel Santamaria <gaby.santamaria@outlook.fr>"]
 7 | authors: ["Gabriel Santamaria <gaby.santamaria@outlook.fr>"]
 8 | license: "Apache-2.0"
 9 | tags: ["sound" "spectrogram" "fourier" "digital-filters"]
10 | homepage: "https://github.com/gabyfle/SoundML"
11 | doc: "https://soundml.gabyfle.dev"
12 | bug-reports: "https://github.com/gabyfle/SoundML/issues"
13 | depends: [
14 |   "ocaml" {>= "5.2.0"}
15 |   "dune" {>= "3.18"}
16 |   "conf-sndfile" {build}
17 |   "conf-rubberband" {build}
18 |   "conf-samplerate" {build}
19 |   "dune-configurator" {build}
20 |   "owl" {>= "1.2"}
21 |   "odoc" {with-doc}
22 |   "alcotest" {with-test}
23 |   "yojson" {with-test}
24 |   "bisect_ppx" {with-test & >= "2.5.0"}
25 | ]
26 | build: [
27 |   ["dune" "subst"] {dev}
28 |   [
29 |     "dune"
30 |     "build"
31 |     "-p"
32 |     name
33 |     "-j"
34 |     jobs
35 |     "@install"
36 |     "@runtest" {with-test}
37 |     "@doc" {with-doc}
38 |   ]
39 | ]
40 | dev-repo: "git+https://github.com/gabyfle/SoundML.git"
41 | x-maintenance-intent: ["(latest)"]
42 | 


--------------------------------------------------------------------------------
/src/aformat.mli:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2023-2025                                                  *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | (**
 22 |    The {!Aformat} (audio format) module is an abstraction over the different supported audio format from libsndfile. *)
 23 | 
 24 | type ftype =
 25 |   | WAV
 26 |   | AIFF
 27 |   | AU
 28 |   | RAW
 29 |   | PAF
 30 |   | SVX
 31 |   | NIST
 32 |   | VOC
 33 |   | IRCAM
 34 |   | W64
 35 |   | MAT4
 36 |   | MAT5
 37 |   | PVF
 38 |   | XI
 39 |   | HTK
 40 |   | SDS
 41 |   | AVR
 42 |   | WAVEX
 43 |   | SD2
 44 |   | FLAC
 45 |   | CAF
 46 |   | WVE
 47 |   | OGG
 48 |   | MPC2K
 49 |   | RF64
 50 |   | MP3
 51 | 
 52 | type subtype =
 53 |   | PCM_S8
 54 |   | PCM_16
 55 |   | PCM_24
 56 |   | PCM_32
 57 |   | PCM_U8
 58 |   | FLOAT
 59 |   | DOUBLE
 60 |   | ULAW
 61 |   | ALAW
 62 |   | IMA_ADPCM
 63 |   | MS_ADPCM
 64 |   | GSM610
 65 |   | VOX_ADPCM
 66 |   | NMS_ADPCM_16
 67 |   | NMS_ADPCM_24
 68 |   | NMS_ADPCM_32
 69 |   | G721_32
 70 |   | G723_24
 71 |   | G723_40
 72 |   | DVW_12
 73 |   | DVW_16
 74 |   | DVW_24
 75 |   | DVW_N
 76 |   | DPCM_8
 77 |   | DPCM_16
 78 |   | VORBIS
 79 |   | OPUS
 80 |   | ALAC_16
 81 |   | ALAC_20
 82 |   | ALAC_24
 83 |   | ALAC_32
 84 |   | MPEG_LAYER_I
 85 |   | MPEG_LAYER_II
 86 |   | MPEG_LAYER_III
 87 | 
 88 | type endianness = FILE | LITTLE | BIG | CPU
 89 | 
 90 | (** The type for an audio format specification. *)
 91 | type t = {ftype: ftype; sub: subtype; endian: endianness}
 92 | 
 93 | val create :
 94 |   ?subtype:subtype -> ?endian:endianness -> ftype -> (t, string) result
 95 | (**
 96 |     [create ?subtype ?endian ftype] creates a new audio format representation based on the given format specifications.s
 97 | 
 98 |     {2 Parameters}
 99 |     @param subtype is the subtype of the audio file. If not specified, it'll be set to a default value according to the file type.
100 |     @param endian is the endianness of the audio file. If not specified, it'll be set to [FILE], which is the default file endianness.
101 |     @param ftype is the file type of the audio file.
102 | 
103 |     {2 Returns}
104 | 
105 |     @return A result type, where [Ok t] is the created format and [Error msg] is an error message indicating why it failed.
106 | 
107 |     {2 Usage}
108 | 
109 |     Creating a new audio format is as simple as calling the [create] function with the desired parameters.
110 |     
111 |     For the [RAW] file type, the subtype is required. Not specifying one will result in an error.
112 | 
113 |     {[ 
114 |       open Soundml.Io
115 |       (* This will create a new WAV audio format with PCM_16 subtype and little endian. *)
116 |       let fmt = Afmt.create ~subtype:Io.Afmt.PCM_16 ~endian:Io.Afmt.LITTLE Io.Aformat.WAV in
117 |     ]} *)
118 | 
119 | val to_int : t -> int
120 | (**
121 |     [to_int fmt] converts the audio format to an integer representation compatible with libsndfile.
122 |     
123 |     {2 Parameters}
124 |     @param fmt the format that we need to convert to an integer value.
125 |     
126 |     {2 Returns}
127 |     @return The integer representation of the audio format. *)
128 | 
129 | val of_int : int -> (t, string) result
130 | (**
131 |     [of_int code] converts the integer representation of the audio format to a {!Aformat.t} type.
132 |     
133 |     {2 Parameters}
134 |     @param code is the integer representation of the audio format we're trying to convert.
135 |     
136 |     @return A result type, where [Ok t] is the created format and [Error msg] is an error message indicating why it failed. *)
137 | 
138 | val of_ext : ?sub:subtype -> ?endian:endianness -> string -> (t, string) result
139 | (**
140 |     [of_ext ?sub ?endian ext] tries to convert the given file extension to an audio format type.
141 | 
142 |     This function assumes that the extension is given with its leading dot (e.g. [".wav"]) and is thus compatible with the [Filename] module.
143 |     
144 |     {2 Parameters}
145 |     @param sub is the subtype of the audio file. If not specified, it'll be set to a default value according to the file type.
146 |     @param endian is the endianness of the audio file. If not specified, it'll be set to [FILE], which is the default file endianness.
147 |     @param ext is the file extension we're trying to convert.
148 |     
149 |     {2 Returns}
150 |     @return A result type, where [Ok t] is the created format and [Error msg] is an error message indicating why it failed. *)
151 | 
152 | val pp : Format.formatter -> t -> unit
153 | (**
154 |     [pp fmt] pretty prints the audio format to the given formatter.
155 |     
156 |     {2 Parameters}
157 |     @param fmt is the formatter to use for printing the audio format. *)
158 | 


--------------------------------------------------------------------------------
/src/audio.ml:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2023                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | open Owl
 23 | 
 24 | (* generic multi-dimensionnal array *)
 25 | module G = Dense.Ndarray.Generic
 26 | module Aformat = Aformat
 27 | 
 28 | module Metadata = struct
 29 |   type t =
 30 |     { name: string
 31 |     ; frames: int
 32 |     ; channels: int
 33 |     ; sample_rate: int
 34 |     ; format: Aformat.t }
 35 | 
 36 |   let create ?(name : string = "Unknown") frames channels sample_rate format =
 37 |     {name; frames; channels; sample_rate; format}
 38 | 
 39 |   let name (m : t) = m.name
 40 | 
 41 |   let frames (m : t) = m.frames
 42 | 
 43 |   let channels (m : t) = m.channels
 44 | 
 45 |   let sample_rate (m : t) = m.sample_rate
 46 | 
 47 |   let format (m : t) = m.format
 48 | end
 49 | 
 50 | type 'a audio = {meta: Metadata.t; data: (float, 'a) G.t}
 51 | 
 52 | let create (meta : Metadata.t) data = {meta; data}
 53 | 
 54 | let meta (a : 'a audio) = a.meta
 55 | 
 56 | let rawsize (a : 'a audio) = G.numel a.data
 57 | 
 58 | let length (a : 'a audio) : int =
 59 |   let meta = meta a in
 60 |   let channels = float_of_int (Metadata.channels meta) in
 61 |   let sr = float_of_int (Metadata.sample_rate meta) in
 62 |   let size = float_of_int (rawsize a) /. channels in
 63 |   Int.of_float (size /. sr *. 1000.)
 64 | 
 65 | let data (a : 'a audio) = a.data
 66 | 
 67 | let sr (a : 'a audio) = Metadata.sample_rate @@ meta a
 68 | 
 69 | let channels (a : 'a audio) = Metadata.channels @@ meta a
 70 | 
 71 | let samples (a : 'a audio) = Metadata.frames @@ meta a
 72 | 
 73 | let format (a : 'a audio) = Metadata.format @@ meta a
 74 | 
 75 | let set_data (a : 'a audio) (d : (float, 'a) G.t) = {a with data= d}
 76 | 
 77 | let sample_pos (a : 'a audio) (x : int) =
 78 |   Int.of_float
 79 |     ( float_of_int x /. 1000.
 80 |     *. float_of_int (Metadata.sample_rate (meta a))
 81 |     *. float_of_int (Metadata.channels (meta a)) )
 82 | 
 83 | let get_slice (slice : int * int) (a : 'a audio) : 'a audio =
 84 |   let x, y = slice in
 85 |   let x, y =
 86 |     match (sample_pos a x, sample_pos a y) with
 87 |     | x, y when x < 0 ->
 88 |         (rawsize a + x, y)
 89 |     | x, y when y < 0 ->
 90 |         (x, rawsize a + y)
 91 |     | x, y when x < 0 && y < 0 ->
 92 |         (rawsize a + x, rawsize a + y)
 93 |     | x, y ->
 94 |         (x, y)
 95 |   in
 96 |   let x, y = if x < y then (x, y) else (y, x) in
 97 |   if x < 0 || y < 0 then
 98 |     raise
 99 |       (Invalid_argument "Audio.get_slice: slice out of bounds, negative values")
100 |   else if x >= rawsize a || y >= rawsize a then
101 |     raise
102 |       (Invalid_argument
103 |          "Audio.get_slice: slice out of bounds, values greater than rawsize" )
104 |   else
105 |     let data = G.get_slice [[x; y]] a.data in
106 |     {a with data}
107 | 
108 | let get (x : int) (a : 'a audio) : float =
109 |   let slice = get_slice (x, x) a |> data in
110 |   G.get slice [|0|]
111 | 
112 | let normalize ?(factor : float = 2147483647.) (a : 'a audio) : unit =
113 |   G.scalar_mul_ (1. /. factor) a.data
114 | 
115 | let reverse (x : 'a audio) : 'a audio =
116 |   let data = G.reverse x.data in
117 |   {x with data}
118 | 
119 | let ( .${} ) x s = get_slice s x
120 | 
121 | let ( .%{} ) i x = get x i
122 | 
123 | let ( $/ ) x f = normalize ~factor:f x
124 | 
125 | let ( /$ ) f x = normalize ~factor:f x
126 | 


--------------------------------------------------------------------------------
/src/audio.mli:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2023                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | (**
 23 |     The {!Audio} module defines the needed types around the representation of 
 24 |     an audio file and more precisely an audio data. *)
 25 | 
 26 | open Owl
 27 | 
 28 | (**
 29 |     Alias of the generic [Ndarray] datastructure from [Owl]. This is used
 30 |     internally to make the computations around the audio data *)
 31 | module G = Dense.Ndarray.Generic
 32 | 
 33 | (** @canonical Audio.Aformat
 34 |     Abstraction over the different supported audio format from libsndfile. *)
 35 | module Aformat = Aformat
 36 | 
 37 | (**
 38 |     {1 Audio Metadata}
 39 | 
 40 |     This module contains the metadata of an audio file, which is used to store
 41 |     information about the audio file when reading it from the filesystem.
 42 |     
 43 |     Note: {!Metadata} in {!Soundml} isn't the same thing as the metadata attached
 44 |     to audio files. In {!Soundml}, we refer to {!Metadata} all the data describing
 45 |     the audio file in itself (sample rate, number of channels, etc...). If you are
 46 |     interested in dealing with author name, label and other metadata, we recommend
 47 |     using the {{:https://github.com/savonet/ocaml-mm} ocaml-mm} library instead. *)
 48 | 
 49 | module Metadata : sig
 50 |   type t
 51 | 
 52 |   val create : ?name:string -> int -> int -> int -> Aformat.t -> t
 53 |   (**
 54 |       [create ?name channels frames sample_rate format] creates a new metadata
 55 |       with the given name, number of channels, number of frames, sample rate
 56 |       and format. The name is optional and defaults to [""] *)
 57 | 
 58 |   val name : t -> string
 59 |   (**
 60 |       [name meta] returns the name of the file represented by the metadata *)
 61 | 
 62 |   val frames : t -> int
 63 | 
 64 |   val channels : t -> int
 65 |   (**
 66 |       [channels meta] returns the number of channels of the audio file *)
 67 | 
 68 |   val sample_rate : t -> int
 69 |   (**
 70 |       [sample_rate meta] returns the sample rate of the audio file *)
 71 | 
 72 |   val format : t -> Aformat.t
 73 |   (**
 74 |         [format meta] returns the format of the audio file *)
 75 | end
 76 | 
 77 | (**
 78 |     {1 Audio manipulation}
 79 | 
 80 |     Most of these functions are used internally, and you'll probably just use the {!Audio.normalize}
 81 |     function to normalize the audio data before writing it back to a file. *)
 82 | 
 83 | (**
 84 |     High level representation of an audio file data, used to store data when reading audio files. *)
 85 | type 'a audio
 86 | 
 87 | val create : Metadata.t -> (float, 'a) G.t -> 'a audio
 88 | (**
 89 |     [create metadata data] creates a new audio with the given name and metadata *)
 90 | 
 91 | val meta : 'a audio -> Metadata.t
 92 | (**
 93 |     [meta audio] returns the metadata attached to the given audio *)
 94 | 
 95 | val rawsize : 'a audio -> int
 96 | (**
 97 |     [rawsize audio] returns the raw size of the given audio *)
 98 | 
 99 | val length : 'a audio -> int
100 | (**
101 |     [length audio] returns the length (in milliseconds) of the given audio *)
102 | 
103 | val data : 'a audio -> (float, 'a) Owl.Dense.Ndarray.Generic.t
104 | (**
105 |     [data audio] returns the data of the given audio *)
106 | 
107 | val sr : 'a audio -> int
108 | (**
109 |     [sr audio] returns the sample rate of the given audio *)
110 | 
111 | val channels : 'a audio -> int
112 | (**
113 |     [channels audio] returns the number of channels of the given audio *)
114 | 
115 | val samples : 'a audio -> int
116 | (**
117 |     [samples audio] returns the number of samples per channel in the given audio *)
118 | 
119 | val format : 'a audio -> Aformat.t
120 | (**
121 |     [format audio] returns the format of the given audio *)
122 | 
123 | val set_data : 'a audio -> (float, 'a) Owl.Dense.Ndarray.Generic.t -> 'a audio
124 | (**
125 |     [set_data audio data] sets the data of the given audio *)
126 | 
127 | val get : int -> 'a audio -> float
128 | (**
129 |     [get x audio] returns the sample located at the position [x] in milliseconds.
130 | 
131 |     The position [x] must be between 0 and the length of the audio.
132 |     
133 |     Example:
134 | 
135 |     {[
136 |         let audio = Audio.read "audio.wav" in
137 |         let sample = Audio.get 1000 audio in (* get the sample at 1 second *)
138 |     ]} *)
139 | 
140 | val get_slice : int * int -> 'a audio -> 'a audio
141 | (**
142 |     [get_slice (start, stop) audio] returns a slice of the audio from the position [start] to [stop].
143 | 
144 |     The position [start] and [stop] must be between 0 and the length of the audio.
145 | 
146 |     This function works like Owl's slicing. Giving negative values to [start] and [stop] will slice the audio
147 |     from the end of the audio .
148 | 
149 |     Example:
150 | 
151 |     {[
152 |         let audio = Audio.read "audio.wav" in
153 |         let slice = Audio.get_slice audio 1000 2000 in (* get the slice from 1 to 2 seconds *)
154 |     ]} *)
155 | 
156 | val normalize : ?factor:float -> 'a audio -> unit
157 | (**
158 |     [normalize ?factor audio] normalizes the data of the given audio data by
159 |     the [?factor] parameter, by default equal to $2^31 - 1$.
160 | 
161 |     Use this function when you need to normalize the audio data by a certain factor.
162 |     
163 |     Warning: if you normalize the data and end up getting values that goes
164 |     beyond 1.0 or under -1.0, it will surely make the audio sound distorted.
165 | 
166 |     The operation is performed in place (impure function).
167 | 
168 |     Example:
169 | 
170 |     {[
171 |         let audio = Audio.read "audio.wav" in
172 |         (* you can perform any operation here *)
173 |         (* ... *)
174 |         let factor = (* ... *) in
175 |         Audio.normalize ?factor audio; (* normalizing before writing *)
176 |         Audio.write audio "audio.wav"
177 |     ]} *)
178 | 
179 | val reverse : 'a audio -> 'a audio
180 | (**
181 |     [reverse audio] reverses the audio data.
182 |     This function does not operate in place: a new audio is created with the reversed data.
183 | 
184 |     Example:
185 | 
186 |     {[
187 |         let audio = Audio.read "audio.wav" in
188 |         let audio = Audio.reverse audio in
189 |         Audio.write audio "reversed.wav"
190 |     ]} *)
191 | 
192 | (**
193 |     {2 Operators on audio data}
194 | 
195 |     Following the Owl's conventions, few operators are available to deal with
196 |     audio data. You can use them to make the code more concise and more readable.
197 |     They are just syntaxic sugar on functions over the {!Audio.audio} type. *)
198 | 
199 | val ( .%{} ) : 'a audio -> int -> float
200 | (** Operator of {!Audio.get} *)
201 | 
202 | val ( .${} ) : 'a audio -> int * int -> 'a audio
203 | (** Operator of {!Audio.get_slice} *)
204 | 
205 | val ( $/ ) : 'a audio -> float -> unit
206 | (** Operator of {!Audio.normalize} *)
207 | 
208 | val ( /$ ) : float -> 'a audio -> unit
209 | (** Operator of {!Audio.normalize} *)
210 | 


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
 1 | (library
 2 |  (name aformat)
 3 |  (package soundml)
 4 |  (modules aformat)
 5 |  (instrumentation
 6 |   (backend bisect_ppx))
 7 |  (wrapped false))
 8 | 
 9 | (library
10 |  (name types)
11 |  (package soundml)
12 |  (modules types)
13 |  (instrumentation
14 |   (backend bisect_ppx))
15 |  (wrapped false))
16 | 
17 | (library
18 |  (name audio)
19 |  (package soundml)
20 |  (libraries owl types aformat)
21 |  (modules audio)
22 |  (instrumentation
23 |   (backend bisect_ppx))
24 |  (wrapped false))
25 | 
26 | (library
27 |  (name window)
28 |  (package soundml)
29 |  (libraries owl types audio)
30 |  (modules window)
31 |  (instrumentation
32 |   (backend bisect_ppx))
33 |  (wrapped false))
34 | 
35 | (library
36 |  (name utils)
37 |  (package soundml)
38 |  (libraries audio types owl)
39 |  (modules utils)
40 |  (instrumentation
41 |   (backend bisect_ppx))
42 |  (wrapped false))
43 | 
44 | (library
45 |  (name transform)
46 |  (package soundml)
47 |  (modules transform)
48 |  (libraries owl window types utils)
49 |  (instrumentation
50 |   (backend bisect_ppx))
51 |  (wrapped false))
52 | 
53 | (library
54 |  (name filterbank)
55 |  (package soundml)
56 |  (modules filterbank)
57 |  (libraries owl utils)
58 |  (instrumentation
59 |   (backend bisect_ppx))
60 |  (wrapped false))
61 | 
62 | (library
63 |  (name soundml)
64 |  (public_name soundml)
65 |  (modules soundml)
66 |  (libraries owl audio io types feature filterbank transform effects window)
67 |  (instrumentation
68 |   (backend bisect_ppx)))
69 | 


--------------------------------------------------------------------------------
/src/effects/dune:
--------------------------------------------------------------------------------
 1 | (include_subdirs qualified)
 2 | 
 3 | (library
 4 |  (name effects)
 5 |  (package soundml)
 6 |  (foreign_stubs
 7 |   (language cxx)
 8 |   (names rubberband_stubs)
 9 |   (flags
10 |    :standard
11 |    (:include c_flags.sexp)
12 |    -std=c++23
13 |    -O3))
14 |  (c_library_flags
15 |   (:include c_library_flags.sexp))
16 |  (libraries audio utils owl)
17 |  (instrumentation
18 |   (backend bisect_ppx))
19 |  (wrapped true))
20 | 
21 | (rule
22 |  (targets c_flags.sexp c_library_flags.sexp)
23 |  (action
24 |   (run ./time/config/discover.exe)))
25 | 


--------------------------------------------------------------------------------
/src/effects/filter/filter.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | module type S = sig
23 |   type t
24 | 
25 |   type params
26 | 
27 |   val reset : t -> t
28 | 
29 |   val create : params -> t
30 | 
31 |   val process_sample : t -> float -> float
32 | end
33 | 
34 | module Make (S : S) = struct
35 |   type t = S.t
36 | 
37 |   type params = S.params
38 | 
39 |   let reset = S.reset
40 | 
41 |   let create = S.create
42 | 
43 |   let process_sample = S.process_sample
44 | 
45 |   let process (t : t) (x : (Float.t, 'a) Audio.G.t) =
46 |     let kd = Audio.G.kind x in
47 |     let n = Audio.G.numel x in
48 |     let y = Audio.G.create kd [|n|] 0. in
49 |     for i = 0 to n - 1 do
50 |       Audio.G.set y [|i|] (process_sample t (Audio.G.get x [|i|]))
51 |     done ;
52 |     y
53 | end
54 | 
55 | module IIR = struct
56 |   module Generic = Make (Iir)
57 |   module HighPass = Make (Highpass)
58 |   module LowPass = Make (Lowpass)
59 | end
60 | 
61 | module FIR = struct
62 |   module Generic = Make (Fir)
63 | end
64 | 


--------------------------------------------------------------------------------
/src/effects/filter/filter.mli:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2025                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | module type S = sig
 23 |   type t
 24 | 
 25 |   type params
 26 | 
 27 |   val reset : t -> t
 28 | 
 29 |   val create : params -> t
 30 | 
 31 |   val process_sample : t -> float -> float
 32 | end
 33 | 
 34 | module Make : functor (S : S) -> sig
 35 |   type t = S.t
 36 | 
 37 |   type params = S.params
 38 | 
 39 |   val reset : S.t -> S.t
 40 | 
 41 |   val create : S.params -> S.t
 42 | 
 43 |   val process_sample : S.t -> float -> float
 44 | 
 45 |   val process :
 46 |        S.t
 47 |     -> (float, 'a) Owl_dense_ndarray.Generic.t
 48 |     -> (float, 'a) Owl_dense_ndarray.Generic.t
 49 | end
 50 | 
 51 | module IIR : sig
 52 |   module Generic : sig
 53 |     type t = Iir.t
 54 | 
 55 |     type params = Iir.params
 56 | 
 57 |     val reset : t -> t
 58 | 
 59 |     val create : params -> t
 60 | 
 61 |     val process_sample : t -> float -> float
 62 | 
 63 |     val process :
 64 |          t
 65 |       -> (float, 'a) Owl_dense_ndarray.Generic.t
 66 |       -> (float, 'a) Owl_dense_ndarray.Generic.t
 67 |   end
 68 | 
 69 |   module HighPass : sig
 70 |     type t = Highpass.t
 71 | 
 72 |     type params = Highpass.params
 73 | 
 74 |     val reset : t -> t
 75 | 
 76 |     val create : params -> t
 77 | 
 78 |     val process_sample : t -> float -> float
 79 | 
 80 |     val process :
 81 |          t
 82 |       -> (float, 'a) Owl_dense_ndarray.Generic.t
 83 |       -> (float, 'a) Owl_dense_ndarray.Generic.t
 84 |   end
 85 | 
 86 |   module LowPass : sig
 87 |     type t = Lowpass.t
 88 | 
 89 |     type params = Lowpass.params
 90 | 
 91 |     val reset : t -> t
 92 | 
 93 |     val create : params -> t
 94 | 
 95 |     val process_sample : t -> float -> float
 96 | 
 97 |     val process :
 98 |          t
 99 |       -> (float, 'a) Owl_dense_ndarray.Generic.t
100 |       -> (float, 'a) Owl_dense_ndarray.Generic.t
101 |   end
102 | end
103 | 
104 | module FIR : sig
105 |   module Generic : sig
106 |     type t = Fir.t
107 | 
108 |     type params = Fir.params
109 | 
110 |     val reset : t -> t
111 | 
112 |     val create : params -> t
113 | 
114 |     val process_sample : t -> float -> float
115 | 
116 |     val process :
117 |          t
118 |       -> (float, 'a) Owl_dense_ndarray.Generic.t
119 |       -> (float, 'a) Owl_dense_ndarray.Generic.t
120 |   end
121 | end
122 | 


--------------------------------------------------------------------------------
/src/effects/filter/fir.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | open Iir
23 | 
24 | type t = Iir.t
25 | 
26 | type params = {b: float array}
27 | 
28 | let reset t = Iir.reset t
29 | 
30 | let create ({b} : params) = Iir.create {a= [|1.|]; b}
31 | 
32 | let process_sample = Iir.process_sample
33 | 


--------------------------------------------------------------------------------
/src/effects/filter/fir.mli:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | type t
23 | 
24 | type params = {b: float array}
25 | 
26 | val reset : t -> t
27 | 
28 | val create : params -> t
29 | 
30 | val process_sample : t -> float -> float
31 | 


--------------------------------------------------------------------------------
/src/effects/filter/highpass.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | open Iir
23 | 
24 | type t = Iir.t
25 | 
26 | type params = {cutoff: float; sample_rate: int}
27 | 
28 | let create ({cutoff; sample_rate} : params) =
29 |   let fs = sample_rate |> float_of_int in
30 |   let fc = cutoff in
31 |   let r = Float.tan (Float.pi *. fc /. fs) in
32 |   let c = (r -. 1.) /. (r +. 1.) in
33 |   let a = [|1.0; c|] in
34 |   let b = [|(1.0 -. c) /. 2.0; (c -. 1.0) /. 2.0|] in
35 |   create {a; b}
36 | 
37 | let reset = reset
38 | 
39 | let process_sample = process_sample
40 | 


--------------------------------------------------------------------------------
/src/effects/filter/highpass.mli:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | type t
23 | 
24 | type params = {cutoff: float; sample_rate: int}
25 | 
26 | val reset : t -> t
27 | 
28 | val create : params -> t
29 | 
30 | val process_sample : t -> float -> float
31 | 


--------------------------------------------------------------------------------
/src/effects/filter/iir.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | type params = {a: float array; b: float array}
23 | 
24 | type t =
25 |   { b: (float, Bigarray.float32_elt) Audio.G.t
26 |   ; a: (float, Bigarray.float32_elt) Audio.G.t
27 |   ; state: (float, Bigarray.float32_elt) Audio.G.t }
28 | 
29 | let reset t = Audio.G.fill t.state 0. ; t
30 | 
31 | let create ({a; b} : params) =
32 |   let a = Audio.G.of_array Bigarray.Float32 a [|Array.length a|] in
33 |   let b = Audio.G.of_array Bigarray.Float32 b [|Array.length b|] in
34 |   let size = max (Audio.G.numel a) (Audio.G.numel b) in
35 |   let a = Audio.G.(a /$ get a [|0|]) in
36 |   (*let b = Audio.G.(b /$ get b [|0|]) in*)
37 |   let state = Audio.G.create Bigarray.Float32 [|size|] 0. in
38 |   {b; a; state}
39 | 
40 | let process_sample t (x : float) =
41 |   let n = Audio.G.numel t.state in
42 |   let y =
43 |     if n > 0 then (Audio.G.get t.b [|0|] *. x) +. Audio.G.get t.state [|0|]
44 |     else 0.
45 |   in
46 |   let nb = Audio.G.numel t.b in
47 |   let na = Audio.G.numel t.a in
48 |   for i = 0 to Audio.G.numel t.state - 1 do
49 |     let b = if i + 1 < nb then Audio.G.get t.b [|i + 1|] *. x else 0. in
50 |     let a =
51 |       if i + 1 < na then Float.neg (Audio.G.get t.a [|i + 1|]) *. y else 0.
52 |     in
53 |     if i < n - 1 then
54 |       Audio.G.set t.state [|i|] (Audio.G.get t.state [|i + 1|] +. b +. a)
55 |     else Audio.G.set t.state [|i|] (b +. a)
56 |   done ;
57 |   y
58 | 


--------------------------------------------------------------------------------
/src/effects/filter/iir.mli:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | type t
23 | 
24 | type params = {a: float array; b: float array}
25 | 
26 | val reset : t -> t
27 | 
28 | val create : params -> t
29 | 
30 | val process_sample : t -> float -> float
31 | 


--------------------------------------------------------------------------------
/src/effects/filter/lowpass.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | open Iir
23 | 
24 | type t = Iir.t
25 | 
26 | type params = {cutoff: float; sample_rate: int}
27 | 
28 | let create ({cutoff; sample_rate} : params) =
29 |   let fs = float_of_int sample_rate in
30 |   let fc = cutoff in
31 |   let r = Float.tan (Float.pi *. fc /. fs) in
32 |   let c = (r -. 1.) /. (r +. 1.) in
33 |   let a = [|1.0; c|] in
34 |   let b = [|(1.0 +. c) /. 2.0; (1.0 +. c) /. 2.0|] in
35 |   Iir.create {a; b}
36 | 
37 | let reset = reset
38 | 
39 | let process_sample = process_sample
40 | 


--------------------------------------------------------------------------------
/src/effects/filter/lowpass.mli:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | type t
23 | 
24 | type params = {cutoff: float; sample_rate: int}
25 | 
26 | val reset : t -> t
27 | 
28 | val create : params -> t
29 | 
30 | val process_sample : t -> float -> float
31 | 


--------------------------------------------------------------------------------
/src/effects/time/config/discover.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | module C = Configurator.V1
23 | 
24 | let () =
25 |   C.main ~name:"rubberband-pkg-config" (fun c ->
26 |       let default : C.Pkg_config.package_conf =
27 |         {libs= ["-lrubberband"]; cflags= []}
28 |       in
29 |       let conf =
30 |         match C.Pkg_config.get c with
31 |         | None ->
32 |             default
33 |         | Some pc -> (
34 |           match C.Pkg_config.query pc ~package:"rubberband" with
35 |           | None ->
36 |               default
37 |           | Some deps ->
38 |               deps )
39 |       in
40 |       C.Flags.write_sexp "c_flags.sexp" conf.cflags ;
41 |       C.Flags.write_sexp "c_library_flags.sexp" conf.libs )
42 | 


--------------------------------------------------------------------------------
/src/effects/time/config/dune:
--------------------------------------------------------------------------------
1 | (include_subdirs no)
2 | 
3 | (executable
4 |  (name discover)
5 |  (libraries dune.configurator))
6 | 


--------------------------------------------------------------------------------
/src/effects/time/rubberband_stubs.cxx:
--------------------------------------------------------------------------------
  1 | /*****************************************************************************/
  2 | /*                                                                           */
  3 | /*                                                                           */
  4 | /*  Copyright (C) 2023-2025                                                  */
  5 | /*    Gabriel Santamaria                                                     */
  6 | /*                                                                           */
  7 | /*                                                                           */
  8 | /*  Licensed under the Apache License, Version 2.0 (the "License");          */
  9 | /*  you may not use this file except in compliance with the License.         */
 10 | /*  You may obtain a copy of the License at                                  */
 11 | /*                                                                           */
 12 | /*    http://www.apache.org/licenses/LICENSE-2.0                             */
 13 | /*                                                                           */
 14 | /*  Unless required by applicable law or agreed to in writing, software      */
 15 | /*  distributed under the License is distributed on an "AS IS" BASIS,        */
 16 | /*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
 17 | /*  See the License for the specific language governing permissions and      */
 18 | /*  limitations under the License.                                           */
 19 | /*                                                                           */
 20 | /*****************************************************************************/
 21 | 
 22 | #include <rubberband/RubberBandStretcher.h>
 23 | #include <expected>
 24 | #include <cmath>
 25 | 
 26 | extern "C"
 27 | {
 28 | #include <caml/mlvalues.h>
 29 | #include <caml/memory.h>
 30 | #include <caml/alloc.h>
 31 | #include <caml/bigarray.h>
 32 | #include <caml/fail.h>
 33 | }
 34 | 
 35 | namespace SoundML
 36 | {
 37 |     namespace Effects
 38 |     {
 39 |         namespace Time
 40 |         {
 41 |             using namespace RubberBand;
 42 | 
 43 |             /**
 44 |              * @brief Stretches the input audio data using RubberBand.
 45 |              *
 46 |              * @param intput The input audio data pointer.
 47 |              * @param output The output audio data OCaml value.
 48 |              * @param samples The number of samples in the input data.
 49 |              * @param sample_rate The sample rate of the input data.
 50 |              * @param channels The number of channels in the input data.
 51 |              * @param config The RubberBand configuration options.
 52 |              * @param time_ratio The time ratio for stretching.
 53 |              * @param pitch_scale The pitch scale for stretching.
 54 |              *
 55 |              * @return The stretched audio data.
 56 |              */
 57 |             std::expected<value, std::string> offline_stretch(
 58 |                 float *intput,
 59 |                 value output,
 60 |                 size_t samples,
 61 |                 size_t sample_rate,
 62 |                 size_t channels,
 63 |                 RubberBandStretcher::Options config,
 64 |                 double time_ratio,
 65 |                 double pitch_scale)
 66 |             {
 67 |                 RubberBandStretcher stretcher(sample_rate, channels, config, time_ratio, pitch_scale);
 68 | 
 69 |                 stretcher.setExpectedInputDuration(samples);
 70 | 
 71 |                 /* we have access to the whole input, so we can feed it directly into the stretcher */
 72 |                 stretcher.study(&intput, samples, true);
 73 |                 stretcher.process(&intput, samples, true); /* Rubberband expect deinterleaved samples */
 74 | 
 75 |                 size_t per_channel_size = stretcher.available();
 76 | 
 77 |                 intnat ndims = (channels > 1) ? 2 : 1;
 78 |                 intnat dims[ndims];
 79 | 
 80 |                 if (ndims == 1)
 81 |                     dims[0] = static_cast<intnat>(per_channel_size);
 82 |                 else
 83 |                 {
 84 |                     dims[0] = static_cast<intnat>(channels); /* we're going to get the data directly deinterleaved */
 85 |                     dims[1] = static_cast<intnat>(per_channel_size);
 86 |                 }
 87 | 
 88 |                 /* memory is managed by OCaml */
 89 |                 output = caml_ba_alloc(CAML_BA_FLOAT32 | CAML_BA_C_LAYOUT, ndims, NULL, dims);
 90 | 
 91 |                 size_t retrieved = stretcher.retrieve((float *const *)&Caml_ba_data_val(output), per_channel_size);
 92 |                 if (retrieved != per_channel_size)
 93 |                 {
 94 |                     std::string error_msg = "Rubberband error: retrieved " + std::to_string(retrieved) + " samples, expected " + std::to_string(per_channel_size);
 95 |                     return std::unexpected(error_msg);
 96 |                 }
 97 | 
 98 |                 return output;
 99 |             }
100 | 
101 |         } /* namespace Time */
102 |     } /* namespace Effects */
103 | } /* namespace SoundML */
104 | 
105 | extern "C"
106 | {
107 |     CAMLprim value caml_rubberband_stretch(value input, value params)
108 |     {
109 |         using namespace SoundML::Effects::Time;
110 |         CAMLparam2(input, params);
111 |         CAMLlocal1(output);
112 | 
113 |         size_t samples_val = Long_val(Field(params, 0));
114 |         size_t sample_rate_val = Long_val(Field(params, 1));
115 |         size_t channels_val = Long_val(Field(params, 2));
116 |         RubberBandStretcher::Options config_val = static_cast<RubberBandStretcher::Options>(Long_val(Field(params, 3)));
117 |         double time_ratio_val = Double_val(Field(params, 4));
118 |         double pitch_scale_val = Double_val(Field(params, 5));
119 | 
120 |         float *input_data = (float *)Caml_ba_data_val(input);
121 | 
122 |         auto result = offline_stretch(input_data, output, samples_val, sample_rate_val, channels_val, config_val, time_ratio_val, pitch_scale_val);
123 | 
124 |         if (!result.has_value())
125 |         {
126 |             caml_failwith(result.error().c_str());
127 |         }
128 | 
129 |         CAMLreturn(result.value());
130 |     }
131 | }
132 | 


--------------------------------------------------------------------------------
/src/effects/time/time.ml:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2023-2025                                                  *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | type engine = Faster | Finer
 23 | 
 24 | let engine_to_int = function Faster -> 0x00000000 | Finer -> 0x20000000
 25 | 
 26 | type transients = Crisp | Mixed | Smooth
 27 | 
 28 | let transients_to_int = function
 29 |   | Crisp ->
 30 |       0x00000000
 31 |   | Mixed ->
 32 |       0x00000100
 33 |   | Smooth ->
 34 |       0x00000200
 35 | 
 36 | type detector = Compound | Percussive | Soft
 37 | 
 38 | let detector_to_int = function
 39 |   | Compound ->
 40 |       0x00000000
 41 |   | Percussive ->
 42 |       0x00000400
 43 |   | Soft ->
 44 |       0x00000800
 45 | 
 46 | type phase = Laminar | Independent
 47 | 
 48 | let phase_to_int = function Laminar -> 0x00000000 | Independent -> 0x00002000
 49 | 
 50 | type threading = Auto | Never | Always
 51 | 
 52 | let threading_to_int = function
 53 |   | Auto ->
 54 |       0x00000000
 55 |   | Never ->
 56 |       0x00010000
 57 |   | Always ->
 58 |       0x00020000
 59 | 
 60 | type window = Standard | Short | Long
 61 | 
 62 | let window_to_int = function
 63 |   | Standard ->
 64 |       0x00000000
 65 |   | Short ->
 66 |       0x00100000
 67 |   | Long ->
 68 |       0x00200000
 69 | 
 70 | type smoothing = Off | On
 71 | 
 72 | let smoothing_to_int = function Off -> 0x00000000 | On -> 0x00800000
 73 | 
 74 | type formant = Shifted | Preserved
 75 | 
 76 | let formant_to_int = function Shifted -> 0x00000000 | Preserved -> 0x01000000
 77 | 
 78 | type pitch = HighSpeed | HighQuality | HighConsistency
 79 | 
 80 | let pitch_to_int = function
 81 |   | HighSpeed ->
 82 |       0x00000000
 83 |   | HighQuality ->
 84 |       0x02000000
 85 |   | HighConsistency ->
 86 |       0x04000000
 87 | 
 88 | type channels = Apart | Together
 89 | 
 90 | let channels_to_int = function Apart -> 0x00000000 | Together -> 0x10000000
 91 | 
 92 | module Config = struct
 93 |   type t =
 94 |     { engine: engine
 95 |     ; transients: transients
 96 |     ; detector: detector
 97 |     ; phase: phase
 98 |     ; threading: threading
 99 |     ; window: window
100 |     ; smoothing: smoothing
101 |     ; formant: formant
102 |     ; pitch: pitch
103 |     ; channels: channels }
104 | 
105 |   let default : t =
106 |     { engine= Faster
107 |     ; transients= Crisp
108 |     ; detector= Compound
109 |     ; phase= Laminar
110 |     ; threading= Auto
111 |     ; window= Standard
112 |     ; smoothing= Off
113 |     ; formant= Shifted
114 |     ; pitch= HighSpeed
115 |     ; channels= Apart }
116 | 
117 |   let percussive : t = {default with window= Short; phase= Independent}
118 | 
119 |   let with_engine engine config = {config with engine}
120 | 
121 |   let with_transients transients config = {config with transients}
122 | 
123 |   let with_detector detector config = {config with detector}
124 | 
125 |   let with_phase phase config = {config with phase}
126 | 
127 |   let with_threading threading config = {config with threading}
128 | 
129 |   let with_window window config = {config with window}
130 | 
131 |   let with_smoothing smoothing config = {config with smoothing}
132 | 
133 |   let with_formant formant config = {config with formant}
134 | 
135 |   let with_pitch pitch config = {config with pitch}
136 | 
137 |   let with_channels channels config = {config with channels}
138 | 
139 |   let to_int (cfg : t) : int =
140 |     0 lor engine_to_int cfg.engine
141 |     lor transients_to_int cfg.transients
142 |     lor detector_to_int cfg.detector
143 |     lor phase_to_int cfg.phase
144 |     lor threading_to_int cfg.threading
145 |     lor window_to_int cfg.window
146 |     lor smoothing_to_int cfg.smoothing
147 |     lor formant_to_int cfg.formant lor pitch_to_int cfg.pitch
148 |     lor channels_to_int cfg.channels
149 | end
150 | 
151 | external rubberband_stretch :
152 |      (float, Bigarray.float32_elt) Audio.G.t
153 |   -> int * int * int * int * float * float
154 |   -> (float, Bigarray.float32_elt) Audio.G.t = "caml_rubberband_stretch"
155 | 
156 | let to_float32 : type b.
157 |        (float, b) Bigarray.kind
158 |     -> (float, b) Audio.G.t
159 |     -> (float, Bigarray.float32_elt) Audio.G.t =
160 |  fun (kd : (float, b) Bigarray.kind) ->
161 |   match kd with
162 |   | Float32 ->
163 |       Fun.id
164 |   | Float64 ->
165 |       Audio.G.cast_d2s
166 |   | Float16 ->
167 |       raise
168 |         (Invalid_argument
169 |            "Float16 elements kind aren't supported. The array kind must be \
170 |             either Float32 or Float64." )
171 | 
172 | let of_float32 : type b.
173 |        (float, b) Bigarray.kind
174 |     -> (float, Bigarray.float32_elt) Audio.G.t
175 |     -> (float, b) Audio.G.t =
176 |  fun (kd : (float, b) Bigarray.kind) ->
177 |   match kd with
178 |   | Float32 ->
179 |       Fun.id
180 |   | Float64 ->
181 |       Audio.G.cast_s2d
182 |   | Float16 ->
183 |       raise
184 |         (Invalid_argument
185 |            "Float16 elements kind aren't supported. The array kind must be \
186 |             either Float32 or Float64." )
187 | 
188 | let time_stretch : type a.
189 |        ?config:Config.t
190 |     -> (float, a) Audio.G.t
191 |     -> int
192 |     -> float
193 |     -> (float, a) Audio.G.t =
194 |  fun ?(config : Config.t = Config.default) (x : (float, a) Audio.G.t)
195 |      (sample_rate : int) (ratio : float) : (float, a) Audio.G.t ->
196 |   if not (ratio > 0.) then invalid_arg "rate must be > 0."
197 |   else
198 |     let dshape = Audio.G.shape x in
199 |     let channels = if Array.length dshape > 1 then dshape.(0) else 1 in
200 |     let samples = if Array.length dshape > 1 then dshape.(1) else dshape.(0) in
201 |     let config = Config.to_int config in
202 |     let to_float32 = to_float32 (Audio.G.kind x) in
203 |     let of_float32 = of_float32 (Audio.G.kind x) in
204 |     of_float32
205 |       (rubberband_stretch (to_float32 x)
206 |          (samples, sample_rate, channels, config, ratio, 1.0) )
207 | 
208 | let pitch_shift : type a.
209 |        ?config:Config.t
210 |     -> ?bins_per_octave:int
211 |     -> (float, a) Audio.G.t
212 |     -> int
213 |     -> int
214 |     -> (float, a) Audio.G.t =
215 |  fun ?(config : Config.t = Config.default) ?(bins_per_octave : int = 12)
216 |      (x : (float, a) Audio.G.t) (sample_rate : int) (steps : int) :
217 |      (float, a) Audio.G.t ->
218 |   let bins_per_octave = Float.of_int bins_per_octave in
219 |   let steps = Float.of_int steps in
220 |   let scale = Float.pow 2.0 (steps /. bins_per_octave) in
221 |   let dshape = Audio.G.shape x in
222 |   let channels = if Array.length dshape > 1 then dshape.(0) else 1 in
223 |   let samples = if Array.length dshape > 1 then dshape.(1) else dshape.(0) in
224 |   let config = Config.to_int config in
225 |   let to_float32 = to_float32 (Audio.G.kind x) in
226 |   let of_float32 = of_float32 (Audio.G.kind x) in
227 |   of_float32
228 |     (rubberband_stretch (to_float32 x)
229 |        (samples, sample_rate, channels, config, 1.0, scale) )
230 | 


--------------------------------------------------------------------------------
/src/feature/dune:
--------------------------------------------------------------------------------
 1 | (include_subdirs qualified)
 2 | 
 3 | (library
 4 |  (name feature)
 5 |  (package soundml)
 6 |  (libraries audio utils window owl)
 7 |  (instrumentation
 8 |   (backend bisect_ppx))
 9 |  (wrapped true))
10 | 


--------------------------------------------------------------------------------
/src/feature/spectral/spectral.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2023                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 


--------------------------------------------------------------------------------
/src/feature/spectral/spectral.mli:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2023                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 


--------------------------------------------------------------------------------
/src/filterbank.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | type norm = Slaney | PNorm of float
23 | 
24 | let mel ?(fmax : float option = None) ?(htk : bool = false)
25 |     ?(norm : norm option = None) (kd : ('a, 'b) Bigarray.kind)
26 |     (sample_rate : int) (nfft : int) (nmels : int) (fmin : float) =
27 |   if nmels = 0 then Audio.G.empty kd [|0; (nfft / 2) + 1|]
28 |   else
29 |     let fmax =
30 |       match fmax with
31 |       | Some fmax ->
32 |           fmax
33 |       | None ->
34 |           float_of_int sample_rate /. 2.
35 |     in
36 |     let fftfreqs = Utils.rfftfreq kd nfft (1. /. float_of_int sample_rate) in
37 |     let mel_freqs = Utils.melfreq kd ~nmels:(nmels + 2) ~fmin ~fmax ~htk in
38 |     let fdiff = Audio.G.diff mel_freqs in
39 |     let ramps = Utils.outer Audio.G.sub mel_freqs fftfreqs in
40 |     let open Audio.G in
41 |     let lower =
42 |       neg ramps.${[0; Int.sub nmels 1]}
43 |       / reshape fdiff.${[0; Int.sub nmels 1]} [|nmels; 1|]
44 |     in
45 |     let upper =
46 |       ramps.${[2; Int.add nmels 1]} / reshape fdiff.${[1; nmels]} [|nmels; 1|]
47 |     in
48 |     (* Intersect slopes *)
49 |     let weights = max2 (zeros kd (shape lower)) (min2 lower upper) in
50 |     let weights =
51 |       match norm with
52 |       | Some Slaney ->
53 |           let enorm =
54 |             2.0
55 |             $/ sub
56 |                  mel_freqs.${[2; Int.add nmels 1]}
57 |                  mel_freqs.${[0; Int.sub nmels 1]}
58 |           in
59 |           let enorm = reshape enorm [|nmels; 1|] in
60 |           weights * enorm
61 |       | Some (PNorm p) ->
62 |           Audio.G.vecnorm ~p ~axis:(-1) weights
63 |       | None ->
64 |           weights
65 |     in
66 |     weights
67 | 


--------------------------------------------------------------------------------
/src/filterbank.mli:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | type norm = Slaney | PNorm of float
23 | 
24 | val mel :
25 |      ?fmax:float option
26 |   -> ?htk:bool
27 |   -> ?norm:norm option
28 |   -> (float, 'b) Bigarray.kind
29 |   -> int
30 |   -> int
31 |   -> int
32 |   -> float
33 |   -> (float, 'b) Owl_dense_ndarray.Generic.t
34 | (** 
35 |    [mel ?fmax ?htk ?norm sample_rate nfft nmels fmin]
36 |    
37 |    Returns a matrix of shape [nmels, nfft/2+1] containing the mel filterbank. *)
38 | 


--------------------------------------------------------------------------------
/src/io/cio/README.md:
--------------------------------------------------------------------------------
 1 | ## SoundML IO C++ library
 2 | 
 3 | This is the C/C++ code for SoundML IO library. It relies on C++23 (for `std::expected`).
 4 | 
 5 | ### Dependencies
 6 | 
 7 | - `sndfile`: for reading and writing audio files (and *very* soon<span>&trade;</span> streams)
 8 | - `soxr`: for everything related to resampling
 9 | 
10 | ### General information
11 | 
12 | Since we're working with `Owl` that uses the `Bigarray.c_layout` layout, we choose to maintain the interleaved layout for the audio data. This allows us to directly write the data into the `Bigarray` without having to deinterleave it. Thus, when reading an audio files that contains `n` channels and `m` samples per channel, the final shape of the `Bigarray` will be `(m, n)` instead of `(n, m)` as you may be used to using other well known libraries (like *librosa*).
13 | 
14 | In this directory, you'll find the following files:
15 | 
16 | - `common.hxx` : contains the common functions and types used by both the reader and the writer.
17 | - `read.hxx` : implements the needed `read`s functions. The file reading implementation is split between two classes:
18 |   - `SoundML::IO::SndfileReader` : this is used when no resampling is needed.
19 |   - `SoundML::IO::SoXrReader` : this is used when resampling is needed. It performs resampling while reading the file. Each read buffer is fed to the soxr resampler and the output is written directly to the `Bigarray` data pointer.
20 | - `write.hxx` implements a simple writing function that directly uses `sndfile` to write the data.
21 | 
22 | Exceptions are used to handle the errors. To do so, in `common.hxx` we retreive (inside `raise_caml_exception`) the correct exception to raise in OCaml based on the `Error` provided.
23 | 


--------------------------------------------------------------------------------
/src/io/cio/common.hxx:
--------------------------------------------------------------------------------
  1 | /*****************************************************************************/
  2 | /*                                                                           */
  3 | /*                                                                           */
  4 | /*  Copyright (C) 2023-2025                                                  */
  5 | /*    Gabriel Santamaria                                                     */
  6 | /*                                                                           */
  7 | /*                                                                           */
  8 | /*  Licensed under the Apache License, Version 2.0 (the "License");          */
  9 | /*  you may not use this file except in compliance with the License.         */
 10 | /*  You may obtain a copy of the License at                                  */
 11 | /*                                                                           */
 12 | /*    http://www.apache.org/licenses/LICENSE-2.0                             */
 13 | /*                                                                           */
 14 | /*  Unless required by applicable law or agreed to in writing, software      */
 15 | /*  distributed under the License is distributed on an "AS IS" BASIS,        */
 16 | /*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
 17 | /*  See the License for the specific language governing permissions and      */
 18 | /*  limitations under the License.                                           */
 19 | /*                                                                           */
 20 | /*****************************************************************************/
 21 | 
 22 | #define SOUNDML_BUFFER_SIZE 4096
 23 | #include <expected>
 24 | #include <variant>
 25 | #include <sndfile.hh>
 26 | 
 27 | extern "C" /* OCaml imports */
 28 | {
 29 | #include <caml/mlvalues.h>
 30 | #include <caml/memory.h>
 31 | #include <caml/alloc.h>
 32 | #include <caml/callback.h>
 33 | #include <caml/threads.h>
 34 | #include <caml/bigarray.h>
 35 | #include <caml/fail.h>
 36 | }
 37 | 
 38 | #ifndef SOUNDML_IO_COMMON_H
 39 | #define SOUNDML_IO_COMMON_H
 40 | namespace SoundML
 41 | {
 42 |     namespace IO
 43 |     {
 44 |         typedef enum
 45 |         {
 46 |             SNDFILE_ERR,
 47 |             SOXR_ERR,
 48 |             SOUNDML_ERR
 49 |         } ErrorType;
 50 | 
 51 |         using Error = std::pair<std::variant<int, std::string>, ErrorType>;
 52 | 
 53 |         /**
 54 |          * @brief Little helper to get a string out of an error code
 55 |          * @param err The error code
 56 |          * @return A string with the error message
 57 |          */
 58 |         std::string get_error_string(Error error)
 59 |         {
 60 |             std::variant<int, std::string> err_code = error.first;
 61 |             ErrorType typ = error.second;
 62 |             switch (typ)
 63 |             {
 64 |             case SNDFILE_ERR:
 65 |                 return std::string(sf_error_number(std::get<int>(err_code)));
 66 |             case SOXR_ERR:
 67 |                 return std::get<std::string>(err_code);
 68 |             case SOUNDML_ERR:
 69 |                 return std::get<std::string>(err_code);
 70 |             default:
 71 |                 break;
 72 |             }
 73 | 
 74 |             return std::string("Unknown error");
 75 |         }
 76 | 
 77 |         /**
 78 |          * @brief Raise the correct OCaml exception from the given Error
 79 |          * @param error The error to raise
 80 |          * @param filename The name of the file that caused the error
 81 |          */
 82 |         void raise_caml_exception(Error error, std::string filename)
 83 |         {
 84 |             ErrorType type = error.second;
 85 |             std::string error_string = SoundML::IO::get_error_string(error) + " in file " + filename;
 86 | 
 87 | #define GET_EXN_TAG(name) \
 88 |     (*caml_named_value(name))
 89 | 
 90 |             if (type == SNDFILE_ERR)
 91 |             {
 92 |                 int err_code = std::get<int>(error.first);
 93 |                 bool is_format_err = err_code == SF_ERR_UNRECOGNISED_FORMAT || err_code == SF_ERR_MALFORMED_FILE || err_code == SF_ERR_UNSUPPORTED_ENCODING;
 94 |                 if (is_format_err)
 95 |                     caml_raise_with_string(GET_EXN_TAG("soundml.exn.invalid_format"), error_string.c_str());
 96 |                 else /* it's SF_ERR_SYSTEM */
 97 |                     caml_raise_with_string(GET_EXN_TAG("soundml.exn.file_not_found"), error_string.c_str());
 98 |             }
 99 |             else if (type == SOXR_ERR)
100 |                 caml_raise_with_string(GET_EXN_TAG("soundml.exn.resampling_error"), error_string.c_str());
101 |             else if (type == SOUNDML_ERR)
102 |                 caml_raise_with_string(GET_EXN_TAG("soundml.exn.internal_error"), error_string.c_str());
103 |             else
104 |                 caml_raise_with_string(GET_EXN_TAG("soundml.exn.internal_error"), "Unknown internal error.");
105 | #undef GET_EXN_TAG
106 |         }
107 | 
108 |         /**
109 |          * @brief Structure holding metadate related to an audio file
110 |          * @param frames number of frames we read from the file
111 |          * @param channels number of channels in the file
112 |          * @param sample_rate sample-rate of the file (if a resampling has been asked, sample-rate equals the the asked sr)
113 |          * @param padded_frames number of frames we padded with zeros
114 |          * @param format format of the file (SF_FORMAT_* from libsndfile)
115 |          */
116 |         struct AudioMetadata
117 |         {
118 |             sf_count_t frames;
119 |             int channels;
120 |             int sample_rate;
121 |             sf_count_t padded_frames;
122 |             int format;
123 |         };
124 | 
125 |         /**
126 |          * @brief Enum that represents the resampling types
127 |          * @note The SoX resampling types are defined in soxr.h
128 |          */
129 |         typedef enum
130 |         {
131 |             RS_NONE = 0, /* No resampling */
132 |             RS_SOXR_QQ,  /* 'Quick' cubic interpolation. */
133 |             RS_SOXR_LQ,  /* 'Low' 16-bit with larger rolloff. */
134 |             RS_SOXR_MQ,  /* 'Medium' 16-bit with medium rolloff. */
135 |             RS_SOXR_HQ,  /* 'High quality'. */
136 |             RS_SOXR_VHQ, /* 'Very high quality'. */
137 |             /* TODO: implement these resamplers */
138 |             RS_SCR_LINEAR,
139 |             RS_SINC_BEST_QUALITY,
140 |             RS_SINC_MEDIUM_QUALITY,
141 |             RS_SINC_FASTEST,
142 |             RS_ZERO_ORDER_HOLD,
143 |             RS_SRC_LINEAR
144 |         } resampling_t;
145 | 
146 |         /**
147 |          * @brief Get the (correct) SoX resampling type from the resampling_t enum
148 |          * @param type The resampling type to convert
149 |          * @return The SoX resampling type (SOXR_* from soxr.h)
150 |          */
151 |         unsigned long get_recipe_type(resampling_t type)
152 |         {
153 | 
154 |             switch (type)
155 |             {
156 |             case RS_SOXR_VHQ:
157 |                 return SOXR_VHQ;
158 |             case RS_SOXR_HQ:
159 |                 return SOXR_HQ;
160 |             case RS_SOXR_MQ:
161 |                 return SOXR_MQ;
162 |             case RS_SOXR_LQ:
163 |                 return SOXR_LQ;
164 |             default:
165 |                 return SOXR_VHQ;
166 |             }
167 |         }
168 |     } /* namespace SoundML::IO */
169 | } /* namespace SoundML */
170 | 
171 | #endif /* SOUNDML_IO_COMMON_H */
172 | 


--------------------------------------------------------------------------------
/src/io/cio/write.hxx:
--------------------------------------------------------------------------------
  1 | /*****************************************************************************/
  2 | /*                                                                           */
  3 | /*                                                                           */
  4 | /*  Copyright (C) 2023-2025                                                  */
  5 | /*    Gabriel Santamaria                                                     */
  6 | /*                                                                           */
  7 | /*                                                                           */
  8 | /*  Licensed under the Apache License, Version 2.0 (the "License");          */
  9 | /*  you may not use this file except in compliance with the License.         */
 10 | /*  You may obtain a copy of the License at                                  */
 11 | /*                                                                           */
 12 | /*    http://www.apache.org/licenses/LICENSE-2.0                             */
 13 | /*                                                                           */
 14 | /*  Unless required by applicable law or agreed to in writing, software      */
 15 | /*  distributed under the License is distributed on an "AS IS" BASIS,        */
 16 | /*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
 17 | /*  See the License for the specific language governing permissions and      */
 18 | /*  limitations under the License.                                           */
 19 | /*                                                                           */
 20 | /*****************************************************************************/
 21 | 
 22 | #ifndef SOUNDML_WRITER_H
 23 | #define SOUNDML_WRITER_H
 24 | 
 25 | #include "common.hxx"
 26 | 
 27 | namespace SoundML
 28 | {
 29 |     namespace IO
 30 |     {
 31 | 
 32 |         class AudioWriter
 33 |         {
 34 |         private:
 35 |             sf_count_t nframes;
 36 | 
 37 |         public:
 38 |             AudioWriter(sf_count_t nframes)
 39 |                 : nframes(nframes) {}
 40 | 
 41 |             /**
 42 |              * @brief Writes the given audio data to the file
 43 |              * @param sndfile The SndfileHandle to write to
 44 |              * @param data Pointer to the data to write
 45 |              * @return An std::expected<void, Error> containing the error code on failure
 46 |              */
 47 |             template <typename T>
 48 |             std::expected<void, Error> write(SndfileHandle &sndfile, const T *data)
 49 |             {
 50 |                 caml_release_runtime_system();
 51 | 
 52 |                 sf_count_t written = sndfile.writef(data, nframes);
 53 | 
 54 |                 caml_acquire_runtime_system();
 55 | 
 56 |                 if (written != nframes)
 57 |                 {
 58 |                     int err = sndfile.error() ? sndfile.error() : SF_ERR_SYSTEM;
 59 |                     return std::unexpected(Error(err, SNDFILE_ERR));
 60 |                 }
 61 | 
 62 |                 return std::expected<void, Error>{};
 63 |             }
 64 |         };
 65 |     } /* namespace SoundML::IO */
 66 | } /* namespace SoundML */
 67 | 
 68 | /**
 69 |  * @brief Writes the given audio data to the file
 70 |  * @param filename The name of the file to write to.
 71 |  * @param ba_data The data to write (Bigarray).
 72 |  * @param metadata The metadata of the audio data (number of frames, sample rate, channels, format).
 73 |  * @tparam T The type of the audio data (float or double).
 74 |  *
 75 |  * @return An std::expected<void, Error> containing the error code on failure
 76 |  */
 77 | template <typename T>
 78 | value caml_write_audio_file(value filename, value ba_data, value metadata)
 79 | {
 80 |     using namespace SoundML::IO;
 81 |     std::string filename_str = String_val(filename);
 82 |     sf_count_t nframes_val = Long_val(Field(metadata, 0));
 83 |     int sample_rate_val = Long_val(Field(metadata, 1));
 84 |     int channels_val = Long_val(Field(metadata, 2));
 85 |     int format_val = Long_val(Field(metadata, 3));
 86 | 
 87 |     SndfileHandle sndfile(filename_str, SFM_WRITE, format_val, channels_val, sample_rate_val);
 88 |     if (int err = sndfile.error(); err)
 89 |         raise_caml_exception(Error(err, SNDFILE_ERR), filename_str);
 90 | 
 91 |     AudioWriter writer(nframes_val);
 92 |     T *data = (T *)Caml_ba_data_val(ba_data);
 93 | 
 94 |     auto result = writer.write(sndfile, data);
 95 |     if (result.has_value())
 96 |         return Val_unit;
 97 |     else
 98 |     {
 99 |         Error err = result.error();
100 |         raise_caml_exception(err, filename_str);
101 |     }
102 | 
103 |     return Val_unit;
104 | }
105 | 
106 | #endif /* SOUNDML_WRITER_H */
107 | 


--------------------------------------------------------------------------------
/src/io/config/discover.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | module C = Configurator.V1
23 | 
24 | let () =
25 |   C.main ~name:"sndfile-pkg-config" (fun c ->
26 |       let default : C.Pkg_config.package_conf =
27 |         {libs= ["-lsndfile"; "-lsoxr"]; cflags= []}
28 |       in
29 |       let conf =
30 |         match C.Pkg_config.get c with
31 |         | None ->
32 |             default
33 |         | Some pc -> (
34 |           match C.Pkg_config.query pc ~package:"sndfile soxr" with
35 |           | None ->
36 |               default
37 |           | Some deps ->
38 |               deps )
39 |       in
40 |       C.Flags.write_sexp "c_flags.sexp" conf.cflags ;
41 |       C.Flags.write_sexp "c_library_flags.sexp" conf.libs )
42 | 


--------------------------------------------------------------------------------
/src/io/config/dune:
--------------------------------------------------------------------------------
1 | (include_subdirs no)
2 | 
3 | (executable
4 |  (name discover)
5 |  (libraries dune.configurator))
6 | 


--------------------------------------------------------------------------------
/src/io/dune:
--------------------------------------------------------------------------------
 1 | (include_subdirs qualified)
 2 | 
 3 | (library
 4 |  (name io)
 5 |  (package soundml)
 6 |  (foreign_stubs
 7 |   (language cxx)
 8 |   (names soundml_io)
 9 |   (include_dirs cio)
10 |   (flags
11 |    :standard
12 |    (:include c_flags.sexp)
13 |    -O3
14 |    -std=c++23
15 |    -llto))
16 |  (c_library_flags
17 |   (:include c_library_flags.sexp))
18 |  (modules io)
19 |  (libraries audio utils owl)
20 |  (instrumentation
21 |   (backend bisect_ppx))
22 |  (wrapped true))
23 | 
24 | (rule
25 |  (targets c_flags.sexp c_library_flags.sexp)
26 |  (action
27 |   (run ./config/discover.exe)))
28 | 


--------------------------------------------------------------------------------
/src/io/io.ml:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2023-2025                                                  *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | open Audio
 23 | open Bigarray
 24 | 
 25 | exception File_not_found of string
 26 | 
 27 | exception Invalid_format of string
 28 | 
 29 | exception Resampling_error of string
 30 | 
 31 | exception Internal_error of string
 32 | 
 33 | let _ =
 34 |   Callback.register_exception "soundml.exn.file_not_found"
 35 |     (File_not_found "file.wav")
 36 | 
 37 | let _ =
 38 |   Callback.register_exception "soundml.exn.invalid_format"
 39 |     (Invalid_format "invalid format")
 40 | 
 41 | let _ =
 42 |   Callback.register_exception "soundml.exn.resampling_error"
 43 |     (Resampling_error "error")
 44 | 
 45 | let _ =
 46 |   Callback.register_exception "soundml.exn.internal_error"
 47 |     (Internal_error "internal error")
 48 | 
 49 | type resampling_t = NONE | SOXR_QQ | SOXR_LQ | SOXR_MQ | SOXR_HQ | SOXR_VHQ
 50 | 
 51 | (* nframes * channels * sample_rate * format *)
 52 | type metadata = int * int * int * int
 53 | 
 54 | external caml_read_audio_file_f32 :
 55 |      string
 56 |   -> resampling_t
 57 |   -> int
 58 |   -> (float, Bigarray.float32_elt) Audio.G.t * metadata
 59 |   = "caml_read_audio_file_f32"
 60 | 
 61 | external caml_read_audio_file_f64 :
 62 |      string
 63 |   -> resampling_t
 64 |   -> int
 65 |   -> (float, Bigarray.float64_elt) Audio.G.t * metadata
 66 |   = "caml_read_audio_file_f64"
 67 | 
 68 | let to_mono (x : (float, 'a) G.t) =
 69 |   if G.num_dims x > 1 then G.mean ~axis:1 ~keep_dims:false x else x
 70 | 
 71 | let read : type a.
 72 |        ?res_typ:resampling_t
 73 |     -> ?sample_rate:int
 74 |     -> ?mono:bool
 75 |     -> (float, a) kind
 76 |     -> string
 77 |     -> a audio =
 78 |  fun ?(res_typ : resampling_t = SOXR_HQ) ?(sample_rate : int = 22050)
 79 |      ?(mono : bool = true) typ (filename : string) ->
 80 |   let read_func : type a.
 81 |          (float, a) kind
 82 |       -> string
 83 |       -> resampling_t
 84 |       -> int
 85 |       -> (float, a) G.t * metadata =
 86 |    fun typ ->
 87 |     match typ with
 88 |     | Float32 ->
 89 |         caml_read_audio_file_f32
 90 |     | Float64 ->
 91 |         caml_read_audio_file_f64
 92 |     | Float16 ->
 93 |         raise
 94 |           (Invalid_argument
 95 |              "Float16 elements kind aren't supported. The array kind must be \
 96 |               either Float32 or Float64." )
 97 |   in
 98 |   let data, meta = read_func typ filename res_typ sample_rate in
 99 |   let dshape = Audio.G.shape data in
100 |   let nsamples = dshape.(0) in
101 |   let data = if mono then to_mono data else data in
102 |   let frames, channels, sample_rate, format = meta in
103 |   let data =
104 |     match (res_typ, frames, nsamples) with
105 |     | NONE, real, pred ->
106 |         if real = pred then data else Audio.G.sub_left data 0 real
107 |     | _ ->
108 |         data
109 |   in
110 |   let channels = if mono then 1 else channels in
111 |   let format =
112 |     match Aformat.of_int format with
113 |     | Ok fmt ->
114 |         fmt
115 |     | Error e ->
116 |         raise (Invalid_format e)
117 |   in
118 |   let meta =
119 |     Metadata.create ~name:filename frames channels sample_rate format
120 |   in
121 |   let data = Audio.G.transpose data in
122 |   Audio.create meta data
123 | 
124 | external caml_write_audio_file_f32 :
125 |      string
126 |   -> (float, Bigarray.float32_elt) Audio.G.t
127 |   -> int * int * int * int
128 |   -> unit = "caml_write_audio_file_f32"
129 | 
130 | external caml_write_audio_file_f64 :
131 |      string
132 |   -> (float, Bigarray.float64_elt) Audio.G.t
133 |   -> int * int * int * int
134 |   -> unit = "caml_write_audio_file_f64"
135 | 
136 | let write : type a.
137 |     ?format:Aformat.t -> string -> (float, a) Audio.G.t -> int -> unit =
138 |  fun ?format (filename : string) (x : (float, a) Audio.G.t) sample_rate ->
139 |   let format =
140 |     if format = None then
141 |       match Aformat.of_ext (Filename.extension filename) with
142 |       | Ok fmt ->
143 |           fmt
144 |       | Error e ->
145 |           raise (Invalid_format e)
146 |     else Option.get format
147 |   in
148 |   let format = Aformat.to_int format in
149 |   let data = Audio.G.transpose x in
150 |   let dshape = Audio.G.shape data in
151 |   let nframes = dshape.(0) in
152 |   let channels = if Array.length dshape > 1 then dshape.(1) else 1 in
153 |   (* we get back our interleaved format *)
154 |   match Audio.G.kind data with
155 |   | Float32 ->
156 |       caml_write_audio_file_f32 filename data
157 |         (nframes, sample_rate, channels, format)
158 |   | Float64 ->
159 |       caml_write_audio_file_f64 filename data
160 |         (nframes, sample_rate, channels, format)
161 |   | _ ->
162 |       raise
163 |         (Invalid_argument
164 |            "Float16 elements kind aren't supported. The array kind must be \
165 |             either Float32 or Float64." )
166 | 


--------------------------------------------------------------------------------
/src/io/io.mli:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2023-2025                                                  *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | (**
 23 |     The {!Io} (in/out) module is the entry point for reading and writing audio
 24 |     data from and to the filesystem. It supports resampling via the {{:https://github.com/chirlu/soxr}SoXr} library. *)
 25 | 
 26 | open Audio
 27 | open Bigarray
 28 | 
 29 | (** Thrown when a requested file cannot be found on the system. *)
 30 | exception File_not_found of string
 31 | 
 32 | (** Thrown when the file we're trying to read is encoded in an invalid format, or when the format we're trying to write isn't supported. *)
 33 | exception Invalid_format of string
 34 | 
 35 | (** Thrown when an error occurred while resampling. *)
 36 | exception Resampling_error of string
 37 | 
 38 | (** Thrown when an internal error occurred. This is should not happen, so please report it. *)
 39 | exception Internal_error of string
 40 | 
 41 | (** The resampling method to use. The default is [SOXR_HQ]. *)
 42 | type resampling_t =
 43 |   | NONE  (** Indicates that no resampling is requested *)
 44 |   | SOXR_QQ  (** 'Quick' cubic interpolation. *)
 45 |   | SOXR_LQ  (** 'Low' 16-bit with larger rolloff. *)
 46 |   | SOXR_MQ  (** 'Medium' 16-bit with medium rolloff. *)
 47 |   | SOXR_HQ  (** 'High quality'. *)
 48 |   | SOXR_VHQ  (** 'Very high quality'. *)
 49 | 
 50 | val read :
 51 |   'a.
 52 |      ?res_typ:resampling_t
 53 |   -> ?sample_rate:int
 54 |   -> ?mono:bool
 55 |   -> (float, 'a) kind
 56 |   -> string
 57 |   -> 'a audio
 58 | (**
 59 |     [read ?res_typ ?sample_rate ?fix kind filename] reads an audio file and returns an [audio].
 60 | 
 61 |     @return an [audio] type that contains the audio data read from the file. The type of the audio's data is determined by the [kind] parameter.
 62 | 
 63 |     {2 Parameters}
 64 |     @param ?res_typ is the resampling method to use. The default is [SOXR_HQ]. If [NONE] is used, [?sample_rate] is ignored and no resampling will be done.
 65 |     @param ?sample_rate is the target sample rate to use when reading the file. Default is 22050 Hz.
 66 |     @param ?mono is a boolean that indicates if we want to convert to a mono audio. Default is [true].
 67 |     @param kind is the format of audio data to read. It can be either [Bigarray.Float32] or [Bigarray.Float64].
 68 |     @param filename is the path to the file to read audio from.
 69 | 
 70 |     @raise File_not_found If the file does not exist.
 71 |     @raise Invalid_format If the file is not a valid audio file.
 72 |     @raise Resampling_error If the resampling fails.
 73 |     @raise Internal_error If an internal error occurs.
 74 |     
 75 |     {2 Usage}
 76 |     Reading audio is straightfoward. Simply specify the path to the file you want to read.
 77 |     
 78 |     {[
 79 |       open Soundml
 80 |       (* This will read the file.wav audio into a Float32 bigarray, resampled using SOXR_HQ at 22050Hz. *)
 81 |       let audio = Io.read Bigarray.Float32 "path/to/file.wav"
 82 |     ]}
 83 | 
 84 |     {2 Supported formats}
 85 | 
 86 |     SoundML relies on {{:https://libsndfile.github.io/libsndfile/}libsndfile} to read audio files. Full detail on the supported formats are available
 87 |     on the official sndfile's website: {{:https://libsndfile.github.io/libsndfile/formats.html}Supported formats} and in the {!Audio.Aformat} module. *)
 88 | 
 89 | val write : 'a. ?format:Aformat.t -> string -> (float, 'a) G.t -> int -> unit
 90 | (**
 91 |     [write ?format filename data sample_reat] writes an audio file to the filesystem.
 92 | 
 93 |     {2 Parameters}
 94 |     @param ?format is the format to use when writing the file. If not specified, the format is determined by the file extension by {!Aformat.of_ext}.
 95 |     @param filename is the path to the file to write audio to.
 96 |     @param data is the audio data to write. It can be either a [Bigarray.Float32] or [Bigarray.Float64].
 97 |     @param sample_rate is the sample rate of the audio data.
 98 | 
 99 | 
100 |     @raise Invalid_format If the file is not a valid audio file.
101 |     @raise Internal_error If an internal error occurs.
102 | 
103 | 
104 |     {2 Usage}
105 |     Writing audio is as straightfoward as reading it. Simply specify the path to the file you want to write.
106 |     
107 |     {[
108 |       open Soundml
109 |       open Audio
110 |       let audio = Io.read Bigarray.Float32 "path/to/file.mp3" in
111 |       Io.write "path/to/file.wav" (data audio) 22050 (* we'll automatically detect that you want to write to the WAV format *)
112 |     ]}
113 | 
114 |     {2 Supported formats}
115 | 
116 |     SoundML relies on {{:https://libsndfile.github.io/libsndfile/}libsndfile} to read audio files. Full detail on the supported formats are available
117 |     on the official sndfile's website: {{:https://libsndfile.github.io/libsndfile/formats.html}Supported formats} and in the {!Audio.Aformat} module. *)
118 | 


--------------------------------------------------------------------------------
/src/io/soundml_io.cxx:
--------------------------------------------------------------------------------
 1 | /*****************************************************************************/
 2 | /*                                                                           */
 3 | /*                                                                           */
 4 | /*  Copyright (C) 2023-2025                                                  */
 5 | /*    Gabriel Santamaria                                                     */
 6 | /*                                                                           */
 7 | /*                                                                           */
 8 | /*  Licensed under the Apache License, Version 2.0 (the "License");          */
 9 | /*  you may not use this file except in compliance with the License.         */
10 | /*  You may obtain a copy of the License at                                  */
11 | /*                                                                           */
12 | /*    http://www.apache.org/licenses/LICENSE-2.0                             */
13 | /*                                                                           */
14 | /*  Unless required by applicable law or agreed to in writing, software      */
15 | /*  distributed under the License is distributed on an "AS IS" BASIS,        */
16 | /*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17 | /*  See the License for the specific language governing permissions and      */
18 | /*  limitations under the License.                                           */
19 | /*                                                                           */
20 | /*****************************************************************************/
21 | 
22 | #include "read.hxx"
23 | #include "write.hxx"
24 | 
25 | extern "C"
26 | {
27 | #include <caml/mlvalues.h>
28 | #include <caml/memory.h>
29 | 
30 |     CAMLprim value caml_read_audio_file_f32(value filename, value res_typ, value trgt_sr)
31 |     {
32 |         CAMLparam3(filename, res_typ, trgt_sr);
33 |         CAMLreturn(caml_read_audio_file<float>(filename, res_typ, trgt_sr));
34 |     }
35 | 
36 |     CAMLprim value caml_read_audio_file_f64(value filename, value res_typ, value trgt_sr)
37 |     {
38 |         CAMLparam3(filename, res_typ, trgt_sr);
39 |         CAMLreturn(caml_read_audio_file<double>(filename, res_typ, trgt_sr));
40 |     }
41 | 
42 |     CAMLprim value caml_write_audio_file_f32(value filename, value ba_data, value metadata)
43 |     {
44 |         CAMLparam3(filename, ba_data, metadata);
45 |         CAMLreturn(caml_write_audio_file<float>(filename, ba_data, metadata));
46 |     }
47 | 
48 |     CAMLprim value caml_write_audio_file_f64(value filename, value ba_data, value metadata)
49 |     {
50 |         CAMLparam3(filename, ba_data, metadata);
51 |         CAMLreturn(caml_write_audio_file<double>(filename, ba_data, metadata));
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/soundml.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2023                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | module Audio = Audio
23 | module Io = Io
24 | module Types = Types
25 | module Transform = Transform
26 | module Window = Window
27 | module Feature = Feature
28 | module Filterbank = Filterbank
29 | module Effects = Effects
30 | module Utils = Utils
31 | 


--------------------------------------------------------------------------------
/src/transform.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | open Bigarray
23 | open Types
24 | 
25 | module Config = struct
26 |   type t =
27 |     { n_fft: int
28 |     ; hop_size: int
29 |     ; win_length: int
30 |     ; window: Window.window
31 |     ; center: bool }
32 | 
33 |   let default =
34 |     { n_fft= 2048
35 |     ; hop_size= 512
36 |     ; win_length= 2048
37 |     ; window= `Hanning
38 |     ; center= true }
39 | end
40 | 
41 | module G = Owl.Dense.Ndarray.Generic
42 | 
43 | let to_complex (x : float) : Complex.t = Complex.{re= x; im= 0.}
44 | 
45 | let stft : type a b.
46 |     ?config:Config.t -> (a, b) precision -> (float, a) G.t -> (Complex.t, b) G.t
47 |     =
48 |  fun ?(config : Config.t = Config.default) p (x : (float, a) G.t) ->
49 |   let kd : (Complex.t, b) kind =
50 |     match p with B32 -> Complex32 | B64 -> Complex64
51 |   in
52 |   let window = (Window.get config.window p ~fftbins:true) config.win_length in
53 |   let out_shape =
54 |     [| (config.n_fft / 2) + 1
55 |      ; ((G.numel x - config.win_length) / config.hop_size) + 1 |]
56 |   in
57 |   let spectrum = Audio.G.create kd out_shape Complex.zero in
58 |   let ym = Audio.G.zeros kd [|config.n_fft; 1|] in
59 |   for m = 0 to out_shape.(1) - 1 do
60 |     Audio.G.fill ym Complex.zero ;
61 |     for p = 0 to config.win_length - 1 do
62 |       Audio.G.(
63 |         ym.%{p; 0} <-
64 |           to_complex
65 |           @@ Float.mul
66 |                (get x [|Int.(add p (mul m config.hop_size))|])
67 |                window.%{p} )
68 |     done ;
69 |     let ym_fft = Owl.Fft.Generic.fft ~axis:0 ym in
70 |     let spectrum_slice =
71 |       Audio.G.get_slice [[0; out_shape.(0) - 1]; [0]] ym_fft
72 |     in
73 |     Audio.G.set_slice_ ~out:spectrum [[]; [m]] spectrum spectrum_slice
74 |   done ;
75 |   spectrum
76 | 


--------------------------------------------------------------------------------
/src/transform.mli:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | open Types
23 | 
24 | module Config : sig
25 |   type t =
26 |     { n_fft: int
27 |     ; hop_size: int
28 |     ; win_length: int
29 |     ; window: Window.window
30 |     ; center: bool }
31 | 
32 |   val default : t
33 | end
34 | 
35 | val stft :
36 |   'a 'b.
37 |      ?config:Config.t
38 |   -> ('a, 'b) precision
39 |   -> (float, 'a) Audio.G.t
40 |   -> (Complex.t, 'b) Audio.G.t
41 | 


--------------------------------------------------------------------------------
/src/types.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | type (_, _) precision =
23 |   | B32 : (Bigarray.float32_elt, Bigarray.complex32_elt) precision
24 |   | B64 : (Bigarray.float64_elt, Bigarray.complex64_elt) precision
25 | 


--------------------------------------------------------------------------------
/src/types.mli:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | type (_, _) precision =
23 |   | B32 : (Bigarray.float32_elt, Bigarray.complex32_elt) precision
24 |   | B64 : (Bigarray.float64_elt, Bigarray.complex64_elt) precision
25 | 


--------------------------------------------------------------------------------
/src/utils.mli:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2023                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | (**
 23 |    Utility conversion module. *)
 24 | module Convert : sig
 25 |   val mel_to_hz :
 26 |        ?htk:bool
 27 |     -> (float, 'a) Owl.Dense.Ndarray.Generic.t
 28 |     -> (float, 'a) Owl.Dense.Ndarray.Generic.t
 29 |   (** Converts mel-scale values to Hz. *)
 30 | 
 31 |   val hz_to_mel :
 32 |        ?htk:bool
 33 |     -> (float, 'a) Owl.Dense.Ndarray.Generic.t
 34 |     -> (float, 'a) Owl.Dense.Ndarray.Generic.t
 35 |   (** Reverse function of {!mel_to_hz}. *)
 36 | 
 37 |   type reference =
 38 |     | RefFloat of float
 39 |     | RefFunction of ((float, Bigarray.float32_elt) Audio.G.t -> float)
 40 | 
 41 |   val power_to_db :
 42 |        ?amin:float
 43 |     -> ?top_db:float option
 44 |     -> reference
 45 |     -> (float, Bigarray.float32_elt) Owl_dense_ndarray.Generic.t
 46 |     -> (float, Bigarray.float32_elt) Owl_dense_ndarray_generic.t
 47 | 
 48 |   val db_to_power :
 49 |        ?amin:float
 50 |     -> reference
 51 |     -> (float, Bigarray.float32_elt) Owl.Dense.Ndarray.Generic.t
 52 |     -> (float, Bigarray.float32_elt) Owl.Dense.Ndarray.Generic.t
 53 | end
 54 | 
 55 | val pad_center :
 56 |      ('a, 'b) Owl.Dense.Ndarray.Generic.t
 57 |   -> int
 58 |   -> 'a
 59 |   -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
 60 | (**
 61 |     Pads a ndarray such that *)
 62 | 
 63 | val frame :
 64 |      ('a, 'b) Owl_dense_ndarray.Generic.t
 65 |   -> int
 66 |   -> int
 67 |   -> int
 68 |   -> ('a, 'b) Owl_dense_ndarray.Generic.t
 69 | 
 70 | val fftfreq :
 71 |   int -> float -> (float, Bigarray.float32_elt) Owl.Dense.Ndarray.Generic.t
 72 | (**
 73 |     Implementation of the Numpy's fftfreq function.
 74 |     See {{:https://numpy.org/doc/stable/reference/generated/numpy.fft.fftfreq.html}numpy.fft.fftfreq} for more information. *)
 75 | 
 76 | val rfftfreq :
 77 |      (float, 'b) Bigarray.kind
 78 |   -> int
 79 |   -> float
 80 |   -> (float, 'b) Owl.Dense.Ndarray.Generic.t
 81 | (**
 82 |     Implementation of the Numpy's rfftfreq function.
 83 |     See {{:https://numpy.org/doc/stable/reference/generated/numpy.fft.rfftfreq.html}numpy.fft.rfftfreq} for more information. *)
 84 | 
 85 | val melfreq :
 86 |      ?nmels:int
 87 |   -> ?fmin:float
 88 |   -> ?fmax:float
 89 |   -> ?htk:bool
 90 |   -> (float, 'b) Bigarray.kind
 91 |   -> (float, 'b) Owl.Dense.Ndarray.Generic.t
 92 | (**
 93 |   Implementation of librosa's mel_frequencies. Compute an [Owl.Dense.Ndarray] of acoustic frequencies tuned to the mel scale.
 94 |   See: {{:https://librosa.org/doc/main/generated/librosa.mel_frequencies.html}librosa.mel_frequencies} for more information. *)
 95 | 
 96 | val roll :
 97 |      ('a, 'b) Owl.Dense.Ndarray.Generic.t
 98 |   -> int
 99 |   -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
100 | (**
101 |     Implementation of the Numpy's roll function on the 0th axis of the given ndarray.
102 |     This function is used to shift elements of an array inside the library and is exposed
103 |     as it can be sometimes usefull.
104 | 
105 |     This function returns a copy of the given ndarray.
106 | 
107 |     See {{:https://numpy.org/doc/stable/reference/generated/numpy.roll.html}numpy.roll} for more information. *)
108 | 
109 | val cov : ?b:('a, 'b) Audio.G.t -> a:('a, 'b) Audio.G.t -> ('a, 'b) Audio.G.t
110 | (**
111 |     (re)Implementation of the matrix covariance function from Owl.
112 |     
113 |     Note: this is temporary and done only because Owl doesn't export any
114 |     cov function for the [Ndarray] module on which [Audio.G] is based. This function is
115 |     likely to be deleted when Owl library will export such a cov function for n-dimensional arrays. *)
116 | 
117 | val unwrap :
118 |      ?discont:float option
119 |   -> ?axis:int
120 |   -> ?period:float
121 |   -> (float, 'a) Owl.Dense.Ndarray.Generic.t
122 |   -> (float, 'a) Owl.Dense.Ndarray.Generic.t
123 | (**
124 |     Implementation of the Numpy's unwrap function.
125 |     See {{:https://numpy.org/doc/stable/reference/generated/numpy.unwrap.html}numpy.unwrap} for more information. *)
126 | 
127 | val outer :
128 |      (   ('a, 'b) Owl.Dense.Ndarray.Generic.t
129 |       -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
130 |       -> ('a, 'b) Owl.Dense.Ndarray.Generic.t )
131 |   -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
132 |   -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
133 |   -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
134 | (**
135 |   Generalized outer product of any given operator that supports broadcasting (basically all the common Owl's Ndarray operators.) *)
136 | 


--------------------------------------------------------------------------------
/src/window.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2023                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | open Types
23 | 
24 | type window = [`Hanning | `Hamming | `Blackman | `Boxcar]
25 | 
26 | let kind_of_precision : type a b. (a, b) precision -> (float, a) Bigarray.kind =
27 |  fun prec -> match prec with B32 -> Bigarray.Float32 | B64 -> Bigarray.Float64
28 | 
29 | let cosine_sum ?(fftbins = false) (prec : ('a, 'b) precision) (a : float array)
30 |     m =
31 |   let kd = kind_of_precision prec in
32 |   if m < 0 then invalid_arg "Window length M must be a non-negative integer"
33 |   else if m = 0 then Audio.G.empty kd [|0|]
34 |   else if m = 1 then Audio.G.ones kd [|1|]
35 |   else
36 |     let sym = not fftbins in
37 |     let m_extended, needs_trunc =
38 |       if not sym then (m + 1, true) else (m, false)
39 |     in
40 |     let fac = Audio.G.linspace kd (-.Owl_const.pi) Owl_const.pi m_extended in
41 |     let w = Audio.G.zeros kd [|m_extended|] in
42 |     Array.iteri
43 |       (fun k coeff_val ->
44 |         if coeff_val <> 0.0 then
45 |           let term =
46 |             if k = 0 then Audio.G.create kd [|m_extended|] coeff_val
47 |             else
48 |               let k_float = float_of_int k in
49 |               let cos_args = Audio.G.mul_scalar fac k_float in
50 |               let cos_terms = Audio.G.cos cos_args in
51 |               Audio.G.mul_scalar cos_terms coeff_val
52 |           in
53 |           Audio.G.add_ ~out:w w term )
54 |       a ;
55 |     if needs_trunc then Audio.G.get_slice [[0; m - 1]] w else w
56 | 
57 | let hanning ?(fftbins = false) (prec : ('a, 'b) precision) m =
58 |   cosine_sum ~fftbins prec [|0.5; 1. -. 0.5|] m
59 | 
60 | let hamming ?(fftbins = false) (prec : ('a, 'b) precision) m =
61 |   cosine_sum ~fftbins prec [|0.54; 1. -. 0.54|] m
62 | 
63 | let blackman ?(fftbins = false) (prec : ('a, 'b) precision) m =
64 |   cosine_sum ~fftbins prec [|0.42; 0.5; 0.08|] m
65 | 
66 | let boxcar ?(fftbins = false) (prec : ('a, 'b) precision) (size : int) :
67 |     (float, 'a) Audio.G.t =
68 |   let kd = kind_of_precision prec in
69 |   if size < 0 then failwith "Window length M must be non-negative"
70 |   else if size = 0 then Audio.G.empty kd [|0|]
71 |   else Audio.G.ones kd [|size|]
72 | [@@warning "-27"]
73 | 
74 | let get (typ : window) (prec : ('a, 'b) precision) :
75 |     ?fftbins:bool -> int -> (float, 'a) Audio.G.t =
76 |  fun ?fftbins size ->
77 |   match typ with
78 |   | `Hanning ->
79 |       hanning ?fftbins prec size
80 |   | `Hamming ->
81 |       hamming ?fftbins prec size
82 |   | `Blackman ->
83 |       blackman ?fftbins prec size
84 |   | `Boxcar ->
85 |       boxcar ?fftbins prec size
86 | 


--------------------------------------------------------------------------------
/src/window.mli:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2023                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | open Types
 23 | 
 24 | (** {1 Window Functions}
 25 | 
 26 |     This module provides a few commonly used window functions.  *)
 27 | 
 28 | (** The type of window functions. *)
 29 | type window = [`Hanning | `Hamming | `Blackman | `Boxcar]
 30 | 
 31 | val get :
 32 |      window
 33 |   -> ('a, 'b) precision
 34 |   -> ?fftbins:bool
 35 |   -> int
 36 |   -> (float, 'a) Owl_dense_ndarray.Generic.t
 37 | (** 
 38 |     [get window precision n] generates a window of size [n] using the given window function type.
 39 | 
 40 |     {2 Parameters}
 41 | 
 42 |     @param window The type of window to generate. 
 43 |     @param precision The precision of the Bigarray elements. 
 44 |     @param n The size of the window to generate. The size of the window must be greater than 0.
 45 |     
 46 |     @raise Invalid_argument if [n] is less than or equal to 0. *)
 47 | 
 48 | val cosine_sum :
 49 |      ?fftbins:bool
 50 |   -> ('a, 'b) precision
 51 |   -> float array
 52 |   -> int
 53 |   -> (float, 'a) Owl_dense_ndarray.Generic.t
 54 | (**
 55 |     [cosine_sum precision coeffs n] generates a cosine-sum window of size [n] using the given coefficients.
 56 | 
 57 |     {2 Parameters}
 58 |     @param precision The precision of the Bigarray elements.
 59 |     @param coeffs The coefficients of the cosine-sum window. The length of the coefficients array must be greater than 0.
 60 |     @param n The size of the window to generate. The size of the window must be greater than 0.
 61 | 
 62 |     {2 Raises}
 63 |     @raise Invalid_argument if [n] is less than or equal to 0.
 64 |     @raise Invalid_argument if the length of [coeffs] is less than 1.
 65 | 
 66 |     {2 References}
 67 |     @see https://en.wikipedia.org/wiki/Window_function#Cosine-sum_windows
 68 |     @see https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.windows.general_cosine.html *)
 69 | 
 70 | val hanning :
 71 |      ?fftbins:bool
 72 |   -> ('a, 'b) precision
 73 |   -> int
 74 |   -> (float, 'a) Owl_dense_ndarray.Generic.t
 75 | (** 
 76 |     [hanning precision n] generates a Hanning window of size [n].
 77 | 
 78 |     {2 Parameters}
 79 | 
 80 |     @param precision The precision of the Bigarray elements. 
 81 |     @param n The size of the window to generate. The size of the window must be greater than 0.
 82 |     
 83 |     @raise Invalid_argument if [n] is less than or equal to 0. *)
 84 | 
 85 | val hamming :
 86 |      ?fftbins:bool
 87 |   -> ('a, 'b) precision
 88 |   -> int
 89 |   -> (float, 'a) Owl_dense_ndarray.Generic.t
 90 | (** 
 91 |     [hamming precision n] generates a Hamming window of size [n].
 92 | 
 93 |     {2 Parameters}
 94 | 
 95 |     @param precision The precision of the Bigarray elements. 
 96 |     @param n The size of the window to generate. The size of the window must be greater than 0.
 97 |     
 98 |     @raise Invalid_argument if [n] is less than or equal to 0. *)
 99 | 
100 | val blackman :
101 |      ?fftbins:bool
102 |   -> ('a, 'b) precision
103 |   -> int
104 |   -> (float, 'a) Owl_dense_ndarray.Generic.t
105 | (** 
106 |     [blackman precision n] generates a Blackman window of size [n].
107 | 
108 |     {2 Parameters}
109 | 
110 |     @param precision The precision of the Bigarray elements. 
111 |     @param n The size of the window to generate. The size of the window must be greater than 0.
112 |     
113 |     @raise Invalid_argument if [n] is less than or equal to 0. *)
114 | 
115 | val boxcar :
116 |      ?fftbins:bool
117 |   -> ('a, 'b) precision
118 |   -> int
119 |   -> (float, 'a) Owl_dense_ndarray.Generic.t
120 | (** 
121 |     [boxcar precision n] generates a Rectangular window of size [n].
122 | 
123 |     {2 Parameters}
124 | 
125 |     @param precision The precision of the Bigarray elements.
126 |     @param n The size of the window to generate. The size of the window must be greater than 0.
127 |     
128 |     @raise Invalid_argument if [n] is less than or equal to 0. *)
129 | 


--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
1 | ## About SoundML Testing
2 | 
3 | We choose [librosa](https://librosa.org/) as the main reference implementation for the algorithms inside SoundML. Thus, the tests are based on results obtained from librosa runs on a few audio source files either found on [freesound.org](https://freesound.org/), [freemusicarchive.org](https://freemusicarchive.org) or generated by us using FFmpeg.
4 | 
5 | The Python script `generate_vectors.py` is used to generate a set of vectors directly from running librosa algorithms onto the audio files present in the `test/audio` directory. Once generated, these vectors are stored in the `test/vectors` directory and are used by `Alcotest` to compare the results of SoundML with the ones from librosa.


--------------------------------------------------------------------------------
/test/dune:
--------------------------------------------------------------------------------
 1 | (data_only_dirs vectors audio)
 2 | 
 3 | (tests
 4 |  (names
 5 |   test_aformat
 6 |   test_write
 7 |   test_utils
 8 |   test_time
 9 |   test_window
10 |   test_filter
11 |   test_filterbank)
12 |  (libraries alcotest soundml)
13 |  (action
14 |   (run %{test})))
15 | 
16 | (tests
17 |  (names test_stft test_timeseries)
18 |  (libraries alcotest yojson soundml)
19 |  (package soundml)
20 |  (deps
21 |   generate_vectors.py
22 |   generate_audio.sh
23 |   (source_tree audio))
24 |  (action
25 |   (progn
26 |    (system "mkdir -p audio")
27 |    (system "./generate_audio.sh")
28 |    (system "mkdir -p vectors")
29 |    (system "python3 %{dep:generate_vectors.py}")
30 |    (run %{test}))))
31 | 


--------------------------------------------------------------------------------
/test/generate_audio.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | OUTPUT_DIR="$(pwd)/audio"
 4 | 
 5 | declare -A AUDIO_FORMATS=(
 6 |   ["wav"]="pcm_s16le"
 7 |   ["aif"]="pcm_s16be"
 8 |   ["flac"]="flac"
 9 |   ["mp3"]="libmp3lame"
10 | )
11 | 
12 | SAMPLE_RATES=(8000 44100)
13 | DURATION=1
14 | 
15 | declare -A SOURCES=(
16 |     ["clipping"]="sine=f=440:sample_rate={SR}:duration={DUR},volume=volume=5"
17 |     ["lsine"]="sine=f=10:sample_rate={SR}:duration={DUR}"
18 |     ["stereo"]="sine=f=500:sample_rate={SR}:duration={DUR} [l]; aevalsrc=exprs='random(0)*0.5':duration={DUR}:sample_rate={SR} [r]; [l][r] amerge=inputs=2"
19 | )
20 | 
21 | if ! command -v ffmpeg &> /dev/null; then
22 |     echo "Error: ffmpeg is not installed." >&2
23 |     exit 1
24 | fi
25 | 
26 | total_files=$(( ${#SOURCES[@]} * ${#AUDIO_FORMATS[@]} * ${#SAMPLE_RATES[@]} ))
27 | successful_files=0
28 | current_file_number=0
29 | 
30 | for audio_source in "${!SOURCES[@]}"; do
31 |     sft="${SOURCES[$audio_source]}"
32 | 
33 |     for format in "${!AUDIO_FORMATS[@]}"; do
34 |         codec="${AUDIO_FORMATS[$format]}"
35 | 
36 |         for sr in "${SAMPLE_RATES[@]}"; do
37 |             ((current_file_number++))
38 | 
39 |             source_key=$(echo "$audio_source" | tr -cd '[:alnum:]_-')
40 |             filename="${format}_${source_key}_${sr}hz_${DURATION}s.${format}"
41 |             output_path="$OUTPUT_DIR/$filename"
42 | 
43 |             source_filter="${sft//\{SR\}/$sr}"
44 |             source_filter="${source_filter//\{DUR\}/$DURATION}"
45 | 
46 |             codec_opts=()
47 |             case "$codec" in
48 |                 "libmp3lame") codec_opts+=("-b:a" "32k" "-compression_level" "9") ;;
49 |                 "libvorbis")  codec_opts+=("-q:a" "0") ;;
50 |                 "aac")        codec_opts+=("-b:a" "48k") ;;
51 |             esac
52 | 
53 |             ffmpeg -y \
54 |                 -f lavfi -i "$source_filter" \
55 |                 -t "$DURATION" \
56 |                 -ar "$sr" \
57 |                 -c:a "$codec" "${codec_opts[@]}" \
58 |                 -vn \
59 |                 "$output_path" \
60 |                 -loglevel error > /dev/null 2>&1
61 | 
62 |             if [[ $? -eq 0 ]]; then
63 |                 ((successful_files++))
64 |             fi
65 |         done
66 |     done
67 | done
68 | 
69 | exit 0
70 | 


--------------------------------------------------------------------------------
/test/generate_vectors.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file is part of SoundML.
  3 | 
  4 | Copyright (C) 2025 Gabriel Santamaria
  5 | 
  6 | This script is used to generate test vectors for SoundML.
  7 | The reference implementation choosen is librosa.
  8 | It's supposed to be ran only once. Then the generated vectors
  9 | haved to be used for the actual testing.
 10 | """
 11 | 
 12 | from typing import Any, Tuple, Dict, List
 13 | import os
 14 | import json
 15 | import numpy as np
 16 | import librosa
 17 | 
 18 | AUDIO_DIRECTORY = "audio/"
 19 | VECTOR_DIRECTORY = "vectors/"
 20 | 
 21 | 
 22 | class Parameters:
 23 |     """
 24 |     Class representing parameters used to generate a vector
 25 |     """
 26 | 
 27 |     parameters: Dict[str, Any]
 28 | 
 29 |     def __init__(self, parameters: Dict[str, Any]):
 30 |         self.parameters = parameters
 31 | 
 32 |     def write(self, filename: str):
 33 |         """
 34 |         Write the parameters to a JSON file
 35 |         """
 36 |         with open(filename, "w", encoding="utf-8") as f:
 37 |             json.dump(self.parameters, f, indent=4)
 38 | 
 39 | 
 40 | class VectorGenerator:
 41 |     """
 42 |     Abstract class representing an audio vector generators
 43 |     """
 44 | 
 45 |     BASE_IDENTIFIER: str
 46 | 
 47 |     audio_paths: list[str]
 48 |     output_dir: str
 49 | 
 50 |     counter: int = 0
 51 | 
 52 |     def __init__(self, audio_paths: list[str], output_dir: str):
 53 |         self.audio_paths = audio_paths
 54 |         self.output_dir = os.path.join(output_dir, f"{self.BASE_IDENTIFIER}/")
 55 | 
 56 |     def normalize_name(self, name: str) -> str:
 57 |         """
 58 |         Normalize the name of the audio file
 59 |         """
 60 |         return name.replace(" ", "_").replace("-", "_").lower()
 61 | 
 62 |     def vector(self, audio_path: str) -> Tuple[np.ndarray, Parameters]:
 63 |         """
 64 |         Generate the vector for the given audio file
 65 |         """
 66 |         raise NotImplementedError("Subclasses should implement this method")
 67 | 
 68 |     def generate(self):
 69 |         """
 70 |         Generate the audio vectors
 71 |         """
 72 |         if not os.path.exists(self.output_dir):
 73 |             os.makedirs(self.output_dir, exist_ok=True)
 74 | 
 75 |         for audio_path in self.audio_paths:
 76 |             identifier = os.path.splitext(os.path.basename(audio_path))[0]
 77 |             try:
 78 |                 data: Tuple[np.ndarray, Parameters] = self.vector(audio_path)
 79 | 
 80 |                 self.counter += 1
 81 | 
 82 |                 y: np.ndarray = data[0]
 83 |                 params = data[1]
 84 |                 filename: str = self.normalize_name(
 85 |                     f"{self.BASE_IDENTIFIER}_{identifier}"
 86 |                 )
 87 |                 output_filename: str = os.path.join(self.output_dir, f"{filename}.npy")
 88 |                 params_filename: str = os.path.join(self.output_dir, f"{filename}.json")
 89 |                 np.save(output_filename, y)
 90 |                 params.write(params_filename)
 91 |             except Exception as e:
 92 |                 print(f"ERROR generating for {identifier}: {e}")
 93 | 
 94 | 
 95 | class TimeSeriesVectorGenerator(VectorGenerator):
 96 |     """
 97 |     Reads an audio file and creates a time-series vector representation of it
 98 |     """
 99 | 
100 |     BASE_IDENTIFIER: str = "timeseries"
101 | 
102 |     resamplers: List[str] = ["soxr_vhq", "soxr_hq", "soxr_mq", "soxr_lq"]
103 |     srs = [None, 8000, 16000, 22050]
104 | 
105 |     def vector(self, audio_path: str) -> Tuple[np.ndarray, Parameters]:
106 |         """
107 |         Generate the time-series vector for the given file
108 |         """
109 |         params = {}
110 |         mono = False if self.counter % 2 == 0 or self.counter % 3 == 0 else False
111 |         sr = self.srs[self.counter % len(self.srs)]
112 |         res_type = self.resamplers[self.counter % len(self.resamplers)]
113 |         params["mono"] = mono
114 |         if sr is not None:
115 |             params["res_type"] = res_type
116 |         y, sr = librosa.load(
117 |             audio_path, mono=mono, sr=sr, res_type=res_type, dtype=np.float64
118 |         )
119 |         params["sr"] = sr
120 |         y = np.ascontiguousarray(y, dtype=np.float64)
121 | 
122 |         return (y, Parameters(params))
123 | 
124 | 
125 | class STFTVectorGenerator(VectorGenerator):
126 |     """
127 |     Reads an audio file and creates a STFT vector representation of it
128 |     """
129 | 
130 |     BASE_IDENTIFIER: str = "stft"
131 | 
132 |     nffts = [512]#, #1024, 2048, 4096]
133 |     window_lengths = [512]#64, 128, 256, 512]
134 |     hop_sizes = [128]#, 256, 512]
135 |     centers = [False, False, False]
136 |     window_types = ["hann"]#, "hamming", "blackman", "boxcar"]
137 | 
138 |     def vector(self, audio_path: str) -> Tuple[np.ndarray, Parameters]:
139 |         """
140 |         Generate the STFT vector for the given file
141 |         """
142 |         params = {}
143 |         n_fft = self.nffts[self.counter % len(self.nffts)]
144 |         hop_size = self.hop_sizes[self.counter % len(self.hop_sizes)]
145 |         window_type = self.window_types[self.counter % len(self.window_types)]
146 |         window_length = self.window_lengths[self.counter % len(self.window_lengths)]
147 |         center = self.centers[self.counter % len(self.centers)]
148 |         params["window_length"] = window_length
149 |         params["n_fft"] = n_fft
150 |         params["hop_size"] = hop_size
151 |         params["window_type"] = window_type
152 |         params["center"] = center
153 |         params["res_type"] = "soxr_hq"
154 | 
155 |         y, sr = librosa.load(audio_path)
156 |         y = y.astype(np.float64)
157 |         stft = librosa.stft(
158 |             y,
159 |             n_fft=n_fft,
160 |             hop_length=hop_size,
161 |             win_length=window_length,
162 |             window=window_type,
163 |             dtype=np.complex64,
164 |             center=center,
165 |         )
166 |         stft = np.ascontiguousarray(stft, dtype=np.complex64)
167 |         params = Parameters(params)
168 | 
169 |         return (stft, params)
170 | 
171 | 
172 | generators: list[VectorGenerator] = [TimeSeriesVectorGenerator, STFTVectorGenerator]
173 | 
174 | if __name__ == "__main__":
175 |     audio_files = [
176 |         os.path.join(AUDIO_DIRECTORY, f) for f in os.listdir(AUDIO_DIRECTORY)
177 |     ]
178 |     if not os.path.exists(VECTOR_DIRECTORY):
179 |         os.makedirs(VECTOR_DIRECTORY)
180 | 
181 |     for generator in generators:
182 |         generator: VectorGenerator = generator(audio_files, VECTOR_DIRECTORY)
183 |         generator.generate()
184 | 


--------------------------------------------------------------------------------
/test/test_stft.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | open Soundml
23 | open Vutils
24 | 
25 | let string_to_window = function
26 |   | "hann" ->
27 |       `Hanning
28 |   | "hamming" ->
29 |       `Hamming
30 |   | "blackman" ->
31 |       `Blackman
32 |   | "boxcar" ->
33 |       `Boxcar
34 |   | _ ->
35 |       failwith "Unknown window type"
36 | 
37 | module StftTestable = struct
38 |   type t = Complex.t
39 | 
40 |   type p = Bigarray.complex32_elt
41 | 
42 |   type pf = Bigarray.float32_elt
43 | 
44 |   type pc = Bigarray.complex32_elt
45 | 
46 |   type ('a, 'b) precision = ('a, 'b) Types.precision
47 | 
48 |   let precision = Types.B32
49 | 
50 |   let kd = Bigarray.Complex32
51 | 
52 |   let typ = "stft"
53 | 
54 |   let generate (precision : (pf, pc) precision)
55 |       (case : string * string * Parameters.t)
56 |       (audio : (float, 'c) Owl_dense_ndarray.Generic.t) =
57 |     let _, _, params = case in
58 |     let n_fft =
59 |       Option.value ~default:2048 @@ Parameters.get_int "n_fft" params
60 |     in
61 |     let hop_size =
62 |       Option.value ~default:512 @@ Parameters.get_int "hop_size" params
63 |     in
64 |     let win_length =
65 |       Option.value ~default:2048 @@ Parameters.get_int "window_length" params
66 |     in
67 |     let window =
68 |       string_to_window
69 |         (Option.value ~default:"hann" @@ Parameters.get_string "window" params)
70 |     in
71 |     let center =
72 |       Option.value ~default:false @@ Parameters.get_bool "center" params
73 |     in
74 |     let config =
75 |       Transform.Config.{n_fft; hop_size; win_length; window; center}
76 |     in
77 |     let stft = Transform.stft ~config precision audio in
78 |     let _kd = kd in
79 |     stft
80 | end
81 | 
82 | module Tests = Tests_cases (StftTestable)
83 | 
84 | let () =
85 |   let name = "Vectors: STFT Comparison" in
86 |   let data = Testdata.get StftTestable.typ Vutils.data in
87 |   let tests = Tests.create_tests data in
88 |   Tests.run name tests
89 | 


--------------------------------------------------------------------------------
/test/test_time.ml:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2025                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | open Soundml.Effects.Time
 23 | 
 24 | let config_testable : Config.t Alcotest.testable =
 25 |   Alcotest.testable
 26 |     (Fmt.of_to_string (fun fmt ->
 27 |          Format.sprintf "Effects.Time.Config int:%d" (Config.to_int fmt) ) )
 28 |     (fun a b -> Config.to_int a = Config.to_int b)
 29 | 
 30 | let test_default_int () =
 31 |   let expected = 0x00000000 in
 32 |   let actual = Config.to_int Config.default in
 33 |   Alcotest.check Alcotest.int "Default config integer value" expected actual
 34 | 
 35 | let test_percussive_int () =
 36 |   let expected = 0x00102000 in
 37 |   let actual = Config.to_int Config.percussive in
 38 |   Alcotest.check Alcotest.int "Percussive config integer" expected actual ;
 39 |   let expected = {Config.default with window= Short; phase= Independent} in
 40 |   Alcotest.check config_testable "Percussive config record" expected
 41 |     Config.percussive
 42 | 
 43 | let test_single_options () =
 44 |   Alcotest.check Alcotest.int "EngineFiner" 0x20000000
 45 |     (Config.to_int (Config.with_engine Finer Config.default)) ;
 46 |   Alcotest.check Alcotest.int "TransientsMixed" 0x00000100
 47 |     (Config.to_int (Config.with_transients Mixed Config.default)) ;
 48 |   Alcotest.check Alcotest.int "TransientsSmooth" 0x00000200
 49 |     (Config.to_int (Config.with_transients Smooth Config.default)) ;
 50 |   Alcotest.check Alcotest.int "DetectorPercussive" 0x00000400
 51 |     (Config.to_int (Config.with_detector Percussive Config.default)) ;
 52 |   Alcotest.check Alcotest.int "DetectorSoft" 0x00000800
 53 |     (Config.to_int (Config.with_detector Soft Config.default)) ;
 54 |   Alcotest.check Alcotest.int "PhaseIndependent" 0x00002000
 55 |     (Config.to_int (Config.with_phase Independent Config.default)) ;
 56 |   Alcotest.check Alcotest.int "ThreadingNever" 0x00010000
 57 |     (Config.to_int (Config.with_threading Never Config.default)) ;
 58 |   Alcotest.check Alcotest.int "ThreadingAlways" 0x00020000
 59 |     (Config.to_int (Config.with_threading Always Config.default)) ;
 60 |   Alcotest.check Alcotest.int "WindowShort" 0x00100000
 61 |     (Config.to_int (Config.with_window Short Config.default)) ;
 62 |   Alcotest.check Alcotest.int "WindowLong" 0x00200000
 63 |     (Config.to_int (Config.with_window Long Config.default)) ;
 64 |   Alcotest.check Alcotest.int "SmoothingOn" 0x00800000
 65 |     (Config.to_int (Config.with_smoothing On Config.default)) ;
 66 |   Alcotest.check Alcotest.int "FormantPreserved" 0x01000000
 67 |     (Config.to_int (Config.with_formant Preserved Config.default)) ;
 68 |   Alcotest.check Alcotest.int "PitchHighQuality" 0x02000000
 69 |     (Config.to_int (Config.with_pitch HighQuality Config.default)) ;
 70 |   Alcotest.check Alcotest.int "PitchHighConsistency" 0x04000000
 71 |     (Config.to_int (Config.with_pitch HighConsistency Config.default)) ;
 72 |   Alcotest.check Alcotest.int "ChannelsTogether" 0x10000000
 73 |     (Config.to_int (Config.with_channels Together Config.default))
 74 | 
 75 | let test_combinations () =
 76 |   let cfg = Config.default |> Config.with_engine Finer in
 77 |   let expected = 0x20000000 in
 78 |   Alcotest.check Alcotest.int "Combo: RealTime | Finer" expected
 79 |     (Config.to_int cfg) ;
 80 |   let cfg =
 81 |     Config.default |> Config.with_window Short
 82 |     |> Config.with_threading Never
 83 |     |> Config.with_formant Preserved
 84 |   in
 85 |   let expected = 0x01110000 in
 86 |   Alcotest.check Alcotest.int "Combo: Short | Never | Preserved" expected
 87 |     (Config.to_int cfg) ;
 88 |   let cfg =
 89 |     Config.
 90 |       { engine= Finer
 91 |       ; (* 0x20000000 *)
 92 |         transients= Smooth
 93 |       ; (* 0x00000200 *)
 94 |         detector= Soft
 95 |       ; (* 0x00000800 *)
 96 |         phase= Independent
 97 |       ; (* 0x00002000 *)
 98 |         threading= Always
 99 |       ; (* 0x00020000 *)
100 |         window= Long
101 |       ; (* 0x00200000 *)
102 |         smoothing= On
103 |       ; (* 0x00800000 *)
104 |         formant= Preserved
105 |       ; (* 0x01000000 *)
106 |         pitch= HighConsistency
107 |       ; (* 0x04000000 *)
108 |         channels= Together (* 0x10000000 *) }
109 |   in
110 |   let expected = 0x35A22A00 in
111 |   Alcotest.check Alcotest.int "Combo: All non-default" expected
112 |     (Config.to_int cfg)
113 | 
114 | let test_modifiers () =
115 |   let base = Config.default in
116 |   let modified_engine = Config.with_engine Finer base in
117 |   Alcotest.check config_testable "with_engine changes only engine"
118 |     {base with engine= Finer} modified_engine ;
119 |   let modified_window_phase =
120 |     base |> Config.with_window Short |> Config.with_phase Independent
121 |   in
122 |   Alcotest.check config_testable "with_window then with_phase"
123 |     {base with window= Short; phase= Independent}
124 |     modified_window_phase ;
125 |   Alcotest.check config_testable "Manual percussive matches preset"
126 |     Config.percussive modified_window_phase
127 | 
128 | let test_time_stretch () =
129 |   let config = Config.default in
130 |   let sin_freq = 440.0 in
131 |   let sample_rate = 44100 in
132 |   let ratio = 2.0 in
133 |   let audio_input =
134 |     Audio.G.init Bigarray.Float32 [|sample_rate|] (fun i ->
135 |         let t = float_of_int i /. float_of_int sample_rate in
136 |         sin_freq *. (2.0 *. Float.pi *. t) |> Float.sin )
137 |   in
138 |   let _ = time_stretch ~config audio_input sample_rate ratio in
139 |   Alcotest.(check pass)
140 |     "time_stretch completed without raising an exception" () ()
141 | 
142 | let test_time_stretch_invalid_ratio_raises () =
143 |   let config = Config.default in
144 |   let sample_rate = 44100 in
145 |   let ratio = 0.0 in
146 |   let audio_input = Audio.G.create Bigarray.Float32 [|100|] 0.0 in
147 |   let expected_exn = Invalid_argument "rate must be > 0." in
148 |   Alcotest.check_raises "Stretching with zero ratio raises Invalid_argument"
149 |     expected_exn (fun () ->
150 |       ignore (time_stretch ~config audio_input sample_rate ratio) )
151 | 
152 | let test_pitch_shift () =
153 |   let config = Config.default in
154 |   let sin_freq = 440.0 in
155 |   let sample_rate = 44100 in
156 |   let ratio = 2.0 in
157 |   let audio_input =
158 |     Audio.G.init Bigarray.Float32 [|sample_rate|] (fun i ->
159 |         let t = float_of_int i /. float_of_int sample_rate in
160 |         sin_freq *. (2.0 *. Float.pi *. t) |> Float.sin )
161 |   in
162 |   let _ = time_stretch ~config audio_input sample_rate ratio in
163 |   Alcotest.(check pass)
164 |     "pitch_shift completed without raising an exception" () ()
165 | 
166 | let () =
167 |   Alcotest.run "Effects.Time: Config"
168 |     [ ( "Presets"
169 |       , [ Alcotest.test_case "Default integer value" `Quick test_default_int
170 |         ; Alcotest.test_case "Percussive integer value" `Quick
171 |             test_percussive_int ] )
172 |     ; ( "Single Options"
173 |       , [ Alcotest.test_case "Integer values for single flags" `Quick
174 |             test_single_options ] )
175 |     ; ( "Combinations"
176 |       , [ Alcotest.test_case "Integer values for combined flags" `Quick
177 |             test_combinations ] )
178 |     ; ( "Modifiers"
179 |       , [ Alcotest.test_case "Modifiers create correct configs" `Quick
180 |             test_modifiers ] )
181 |     ; ( "Time Stretch/Pitch Shift"
182 |       , [ Alcotest.test_case "Time stretch" `Quick test_time_stretch
183 |         ; Alcotest.test_case "Time stretch raise" `Quick
184 |             test_time_stretch_invalid_ratio_raises ] )
185 |     ; ("Pitch Shift", [Alcotest.test_case "Pitch shift" `Quick test_pitch_shift])
186 |     ]
187 | 


--------------------------------------------------------------------------------
/test/test_timeseries.ml:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | open Soundml
23 | open Vutils
24 | 
25 | module Timeseries = struct
26 |   type t = Float.t
27 | 
28 |   type p = Bigarray.float64_elt
29 | 
30 |   type pf = Bigarray.float64_elt
31 | 
32 |   type pc = Bigarray.complex64_elt
33 | 
34 |   type ('a, 'b) precision = ('a, 'b) Types.precision
35 | 
36 |   let precision = Types.B64
37 | 
38 |   let kd = Bigarray.Float64
39 | 
40 |   let typ = "timeseries"
41 | 
42 |   let generate (_ : (pf, pc) precision) (_ : string * string * Parameters.t)
43 |       (audio : (float, 'c) Owl_dense_ndarray.Generic.t) =
44 |     audio
45 | end
46 | 
47 | module Tests = Tests_cases (Timeseries)
48 | 
49 | let () =
50 |   let name = "Vectors: Timeseries Comparison" in
51 |   let data = Testdata.get Timeseries.typ Vutils.data in
52 |   let tests = Tests.create_tests data in
53 |   Tests.run name tests
54 | 


--------------------------------------------------------------------------------
/test/test_utils.ml:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2025                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | type data = (float, Bigarray.float32_elt) Audio.G.t
 23 | 
 24 | let data_testable : data Alcotest.testable =
 25 |   ( module struct
 26 |     type t = data
 27 | 
 28 |     let pp : t Fmt.t =
 29 |      fun fmt ndarray ->
 30 |       let shape_array = Audio.G.shape ndarray in
 31 |       let pp_shape = Fmt.brackets (Fmt.array ~sep:Fmt.semi Fmt.int) in
 32 |       Fmt.pf fmt "%a" pp_shape shape_array
 33 | 
 34 |     let equal : t -> t -> bool = Tutils.Check.rallclose
 35 |   end )
 36 | 
 37 | module Test_pad_center = struct
 38 |   let create_data (arr : float array) : data =
 39 |     Audio.G.of_array Bigarray.Float32 arr [|Array.length arr|]
 40 |   (* Create 1D Ndarray *)
 41 | 
 42 |   let test_no_padding () =
 43 |     let input_data = create_data [|1.; 2.; 3.|] in
 44 |     let target_size = 3 in
 45 |     let pad_value = 0. in
 46 |     let expected_output = create_data [|1.; 2.; 3.|] in
 47 |     let actual_output = Utils.pad_center input_data target_size pad_value in
 48 |     Alcotest.check data_testable "no_padding: Correct padding" expected_output
 49 |       actual_output
 50 | 
 51 |   let test_even_padding () =
 52 |     let input_data = create_data [|1.; 2.|] in
 53 |     let target_size = 6 in
 54 |     let pad_value = 0. in
 55 |     let expected_output = create_data [|0.; 0.; 1.; 2.; 0.; 0.|] in
 56 |     let actual_output = Utils.pad_center input_data target_size pad_value in
 57 |     Alcotest.check data_testable "even_padding: Correct padding" expected_output
 58 |       actual_output
 59 | 
 60 |   let test_odd_padding () =
 61 |     let input_data = create_data [|1.; 2.; 3.|] in
 62 |     let target_size = 6 in
 63 |     let pad_value = 0. in
 64 |     let expected_output = create_data [|0.; 1.; 2.; 3.; 0.; 0.|] in
 65 |     let actual_output = Utils.pad_center input_data target_size pad_value in
 66 |     Alcotest.check data_testable "odd_padding: Correct padding" expected_output
 67 |       actual_output
 68 | 
 69 |   let test_empty_input () =
 70 |     let input_data = create_data [||] in
 71 |     let target_size = 4 in
 72 |     let pad_value = 0. in
 73 |     let expected_output = create_data [|0.; 0.; 0.; 0.|] in
 74 |     let actual_output = Utils.pad_center input_data target_size pad_value in
 75 |     Alcotest.check data_testable "empty_input: Correct padding" expected_output
 76 |       actual_output
 77 | 
 78 |   let test_error_target_too_small () =
 79 |     let input_data = create_data [|1.; 2.; 3.; 4.|] in
 80 |     let target_size = 2 in
 81 |     let pad_value = 0. in
 82 |     let expected_exn =
 83 |       Invalid_argument
 84 |         "An error occured while trying to pad: current_size > target_size"
 85 |     in
 86 |     Alcotest.check_raises
 87 |       "error_target_too_small: raises Invalid_argument when target_size < \
 88 |        input_size"
 89 |       expected_exn (fun () ->
 90 |         ignore (Utils.pad_center input_data target_size pad_value) )
 91 | 
 92 |   let test_non_zero_padding () =
 93 |     let input_data = create_data [|5.; 6.|] in
 94 |     let target_size = 5 in
 95 |     let pad_value = -1.5 in
 96 |     let expected_output = create_data [|-1.5; 5.; 6.; -1.5; -1.5|] in
 97 |     let actual_output = Utils.pad_center input_data target_size pad_value in
 98 |     Alcotest.check data_testable "non_zero_padding: Correct padding"
 99 |       expected_output actual_output
100 | 
101 |   let test_zero_target_empty_input () =
102 |     let input_data = create_data [||] in
103 |     flush_all () ;
104 |     let target_size = 0 in
105 |     let pad_value = 0. in
106 |     let expected_output = create_data [||] in
107 |     let actual_output = Utils.pad_center input_data target_size pad_value in
108 |     Alcotest.check data_testable "zero_target_empty_input: Correct padding"
109 |       expected_output actual_output
110 | 
111 |   let test_zero_target_non_empty_input () =
112 |     let input_data = create_data [|1.; 2.|] in
113 |     let target_size = 0 in
114 |     let pad_value = 0. in
115 |     let expected_exn =
116 |       Invalid_argument
117 |         "An error occured while trying to pad: current_size > target_size"
118 |     in
119 |     Alcotest.check_raises
120 |       "zero_target_non_empty_input: raises Invalid_argument when target_size < \
121 |        input_size"
122 |       expected_exn (fun () ->
123 |         ignore (Utils.pad_center input_data target_size pad_value) )
124 | 
125 |   let suite =
126 |     [ Alcotest.test_case "no_padding" `Quick test_no_padding
127 |     ; Alcotest.test_case "even_padding" `Quick test_even_padding
128 |     ; Alcotest.test_case "odd_padding" `Quick test_odd_padding
129 |     ; Alcotest.test_case "empty_input" `Quick test_empty_input
130 |     ; Alcotest.test_case "error_target_too_small" `Quick
131 |         test_error_target_too_small
132 |     ; Alcotest.test_case "non_zero_padding" `Quick test_non_zero_padding
133 |     ; Alcotest.test_case "zero_target_empty_input" `Quick
134 |         test_zero_target_empty_input
135 |     ; Alcotest.test_case "zero_target_non_empty_input" `Quick
136 |         test_zero_target_non_empty_input ]
137 | end
138 | 
139 | let () =
140 |   Alcotest.run "SoundML Utils Tests" [("Pad Center", Test_pad_center.suite)]
141 | 


--------------------------------------------------------------------------------
/test/test_write.ml:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2025                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | open Bigarray
 23 | open Soundml
 24 | open Tutils
 25 | 
 26 | let temp_dir_name = ref ""
 27 | 
 28 | let setup_test_dir () =
 29 |   let dir = Filename.temp_dir "soundml_test_" "" in
 30 |   temp_dir_name := dir ;
 31 |   if not (Sys.file_exists dir && Sys.is_directory dir) then Unix.mkdir dir 0o755
 32 | 
 33 | let delete_test_dir () =
 34 |   let rec rm_rf path =
 35 |     if Sys.is_directory path then (
 36 |       let files = Sys.readdir path in
 37 |       Array.iter (fun f -> rm_rf (Filename.concat path f)) files ;
 38 |       Unix.rmdir path )
 39 |     else Sys.remove path
 40 |   in
 41 |   if !temp_dir_name <> "" && Sys.file_exists !temp_dir_name then
 42 |     rm_rf !temp_dir_name ;
 43 |   temp_dir_name := ""
 44 | 
 45 | let temp_file ?(ext = ".wav") name = Filename.concat !temp_dir_name (name ^ ext)
 46 | 
 47 | let file_exists name =
 48 |   try
 49 |     Unix.access name [Unix.F_OK] ;
 50 |     true
 51 |   with
 52 |   | Unix.Unix_error (Unix.ENOENT, _, _) ->
 53 |       false
 54 |   | _ ->
 55 |       true
 56 | 
 57 | let create_test_audio channels samples sample_rate format =
 58 |   let shape = if channels > 1 then [|channels; samples|] else [|samples|] in
 59 |   let data = Audio.G.create Bigarray.Float32 shape 0. in
 60 |   let freq = 23000. in
 61 |   for channel = 0 to channels - 1 do
 62 |     for i = 0 to samples - 1 do
 63 |       let idx = if channels > 1 then [|channel; i|] else [|i|] in
 64 |       Audio.G.set data idx
 65 |         (sin (2. *. Float.pi *. (freq *. Float.of_int channel)))
 66 |     done
 67 |   done ;
 68 |   let meta = Audio.Metadata.create channels samples sample_rate format in
 69 |   let audio_data = Audio.create meta data in
 70 |   (audio_data, sample_rate)
 71 | 
 72 | let create_empty_audio channels sample_rate format =
 73 |   let shape = if channels > 1 then [|channels; 0|] else [|0|] in
 74 |   let data = Audio.G.create Bigarray.Float32 shape 0. in
 75 |   let meta = Audio.Metadata.create channels 0 sample_rate format in
 76 |   let audio_data = Audio.create meta data in
 77 |   (audio_data, sample_rate)
 78 | 
 79 | let audio_testable =
 80 |   let pp fmt (a : float32_elt Audio.audio) =
 81 |     Format.fprintf fmt "{ channels=%d; samples/channel=%d; }" (Audio.channels a)
 82 |       (if Audio.channels a > 0 then Audio.samples a else 0)
 83 |   in
 84 |   let equal a b =
 85 |     Check.rallclose ~rtol:1e-05 ~atol:1e-08 (Audio.data a) (Audio.data b)
 86 |   in
 87 |   Alcotest.testable pp equal
 88 | 
 89 | let check_write_read name
 90 |     ?(format : Aformat.t = Aformat.{ftype= WAV; sub= PCM_16; endian= FILE})
 91 |     channels samples target_sr ext =
 92 |   let test_name =
 93 |     Printf.sprintf "%s_%dch_%dsamples_%dHz%s" name channels samples target_sr
 94 |       ext
 95 |   in
 96 |   Alcotest.test_case test_name `Quick (fun () ->
 97 |       let filename = temp_file ~ext test_name in
 98 |       let audio, sr = create_test_audio channels samples target_sr format in
 99 |       Io.write ~format filename (Audio.data audio) sr ;
100 |       Alcotest.check Alcotest.bool "Output file exists after write"
101 |         (file_exists filename) true ;
102 |       let read_audio =
103 |         try
104 |           Io.read ~mono:(channels = 1) ~sample_rate:target_sr Bigarray.Float32
105 |             filename
106 |         with ex ->
107 |           Alcotest.failf "Failed to read back file %s: %s" filename
108 |             (Printexc.to_string ex)
109 |       in
110 |       Alcotest.check Alcotest.int "Channels match after write" channels
111 |         (Audio.channels read_audio) ;
112 |       Alcotest.check Alcotest.int "Sample rate match after write" target_sr
113 |         (Audio.sr read_audio) ;
114 |       Alcotest.check Alcotest.int "Frames match after write" samples
115 |         (Audio.samples read_audio) ;
116 |       Alcotest.check
117 |         (Alcotest.testable Aformat.pp Stdlib.( = ))
118 |         "Format match after write" format (Audio.format read_audio) ;
119 |       Alcotest.check audio_testable "Data unchanged after write" audio
120 |         read_audio )
121 | 
122 | let check_write_empty name
123 |     ?(format : Aformat.t = Aformat.{ftype= WAV; sub= PCM_16; endian= FILE})
124 |     channels target_sr ext =
125 |   let test_name =
126 |     Printf.sprintf "%s_%dch_empty_%dHz%s" name channels target_sr ext
127 |   in
128 |   Alcotest.test_case test_name `Quick (fun () ->
129 |       let filename = temp_file ~ext test_name in
130 |       let audio, sr = create_empty_audio channels target_sr format in
131 |       Alcotest.check
132 |         (Alcotest.neg Alcotest.reject)
133 |         "Write empty audio don't raise"
134 |         (fun () -> ())
135 |         (fun () -> Io.write ~format filename (Audio.data audio) sr) )
136 | 
137 | let tests =
138 |   let wav = Result.get_ok (Aformat.create Aformat.WAV) in
139 |   let flac = Result.get_ok (Aformat.create Aformat.FLAC) in
140 |   let ogg = Result.get_ok (Aformat.create Aformat.OGG) in
141 |   [ check_write_read "write_f32_mono_wav_deduced" 1 1024 44100 ".wav"
142 |   ; check_write_read "write_f32_stereo_wav_deduced" 2 1024 44100 ".wav"
143 |   ; check_write_read "write_f32_stereo_flac_deduced" 2 512 22050 ".flac"
144 |   ; check_write_read "write_f32_mono_ogg_deduced" 1 2048 48000 ".ogg"
145 |   ; check_write_read "write_f32_stereo_wav_explicit" ~format:wav 2 1024 44100
146 |       ".wav"
147 |   ; check_write_read "write_f32_stereo_flac_explicit" ~format:flac 2 512 22050
148 |       ".flac"
149 |   ; check_write_read "write_f32_mono_ogg_explicit" ~format:ogg 1 2048 48000
150 |       ".ogg"
151 |   ; check_write_empty "write_f32_mono_empty" 1 44100 ".wav"
152 |   ; check_write_empty "write_f32_stereo_empty" 2 44100 ".wav" ]
153 | 
154 | let suite = [("Write/Read Roundtrip", tests)]
155 | 
156 | let () =
157 |   setup_test_dir () ;
158 |   Alcotest.run "SoundML Io.write" suite ;
159 |   delete_test_dir ()
160 | 


--------------------------------------------------------------------------------
/test/tutils.ml:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2025                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | module Check = struct
 23 |   open Soundml
 24 | 
 25 |   let shape (x : ('a, 'b) Audio.G.t) (y : ('a, 'b) Audio.G.t) =
 26 |     let shape_x = Audio.G.shape x in
 27 |     let shape_y = Audio.G.shape y in
 28 |     if Array.length shape_x <> Array.length shape_y then false
 29 |     else Array.for_all2 (fun x y -> x = y) shape_x shape_y
 30 | 
 31 |   let rallclose ?(rtol = 1e-05) ?(atol = 1e-10) (x : ('a, 'b) Audio.G.t)
 32 |       (y : ('a, 'b) Audio.G.t) : bool =
 33 |     if not (shape x y) then false
 34 |     else if Audio.G.numel x = 0 && Audio.G.numel y = 0 then true
 35 |     else
 36 |       let abs_diff = Audio.G.abs (Audio.G.sub x y) in
 37 |       let tolerance = Audio.G.(add_scalar (mul_scalar (abs y) rtol) atol) in
 38 |       let comparison_mask = Audio.G.elt_less_equal abs_diff tolerance in
 39 |       Audio.G.min' comparison_mask >= 1.0
 40 | 
 41 |   let callclose : type a.
 42 |          ?rtol:float
 43 |       -> ?atol:float
 44 |       -> (Complex.t, a) Audio.G.t
 45 |       -> (Complex.t, a) Audio.G.t
 46 |       -> bool =
 47 |    fun ?(rtol = 1e-05) ?(atol = 1e-05) (x : (Complex.t, a) Audio.G.t)
 48 |        (y : (Complex.t, a) Audio.G.t) ->
 49 |     if not (shape x y) then false
 50 |     else if Audio.G.numel x = 0 && Audio.G.numel y = 0 then true
 51 |     else
 52 |       let x, y =
 53 |         match Audio.G.kind x with
 54 |         | Bigarray.Complex32 ->
 55 |             (Audio.G.cast_c2z x, Audio.G.cast_c2z y)
 56 |         | Bigarray.Complex64 ->
 57 |             (x, y)
 58 |         | _ ->
 59 |             .
 60 |       in
 61 |       let diff = Audio.G.sub x y in
 62 |       let abs_diff = Audio.G.abs2_z2d diff in
 63 |       let abs_y = Audio.G.abs2_z2d y in
 64 |       let tolerance = Audio.G.(add_scalar (mul_scalar abs_y rtol) atol) in
 65 |       let comparison_mask = Audio.G.elt_less_equal abs_diff tolerance in
 66 |       Audio.G.min' comparison_mask >= 1.0
 67 | end
 68 | 
 69 | let allclose : type a b.
 70 |        (a, b) Bigarray.kind
 71 |     -> ?rtol:float
 72 |     -> ?atol:float
 73 |     -> (a, b) Owl_dense_ndarray.Generic.t
 74 |     -> (a, b) Owl_dense_ndarray.Generic.t
 75 |     -> bool =
 76 |  fun kd ->
 77 |   match kd with
 78 |   | Bigarray.Complex32 ->
 79 |       Check.callclose
 80 |   | Bigarray.Complex64 ->
 81 |       Check.callclose
 82 |   | Bigarray.Float32 ->
 83 |       Check.rallclose
 84 |   | Bigarray.Float64 ->
 85 |       Check.rallclose
 86 |   | _ ->
 87 |       failwith "Unsupported datatype."
 88 | 
 89 | let dense_testable : type a b.
 90 |        ?rtol:float
 91 |     -> ?atol:float
 92 |     -> (a, b) Bigarray.kind
 93 |     -> (a, b) Audio.G.t Alcotest.testable =
 94 |  fun ?rtol ?atol (_ : (a, b) Bigarray.kind) ->
 95 |   let kd_to_string (type a b) (kd : (a, b) Bigarray.kind) =
 96 |     match kd with
 97 |     | Bigarray.Float32 ->
 98 |         "Float32"
 99 |     | Bigarray.Float64 ->
100 |         "Float64"
101 |     | Bigarray.Complex32 ->
102 |         "Complex32"
103 |     | Bigarray.Complex64 ->
104 |         "Complex64"
105 |     | _ ->
106 |         failwith "Unsupported kind"
107 |   in
108 |   let pp_kind fmt k =
109 |     let str_k = kd_to_string k in
110 |     Format.fprintf fmt "%s" str_k
111 |   in
112 |   let to_string (type a b) (kd : (a, b) Bigarray.kind) (v : a) =
113 |     match kd with
114 |     | Bigarray.Float32 ->
115 |         Printf.sprintf "%f" v
116 |     | Bigarray.Float64 ->
117 |         Printf.sprintf "%f" v
118 |     | Bigarray.Complex32 ->
119 |         Printf.sprintf "%f + %fi" v.re v.im
120 |     | Bigarray.Complex64 ->
121 |         Printf.sprintf "%f + %fi" v.re v.im
122 |     | _ ->
123 |         failwith "Unsupported kind"
124 |   in
125 |   let pp fmt arr =
126 |     let kd = Audio.G.kind arr in
127 |     let dims = Audio.G.shape arr in
128 |     let first_few_max = 10 in
129 |     let first_few = ref [] in
130 |     let total_elements = Array.fold_left ( * ) 1 dims in
131 |     let flattened = Audio.G.flatten arr in
132 |     if total_elements > 0 && Array.length dims == 1 then
133 |       for i = 0 to first_few_max - 1 do
134 |         first_few := Audio.G.get flattened [|i|] :: !first_few
135 |       done ;
136 |     Format.fprintf fmt
137 |       "Audio.G.t <kind: %a, shape: [%s], data (first %d): [%s]>" pp_kind
138 |       (Audio.G.kind arr)
139 |       (String.concat "; " (Array.to_list (Array.map string_of_int dims)))
140 |       first_few_max
141 |       (String.concat "; " (List.map (to_string kd) (List.rev !first_few)))
142 |   in
143 |   let equal a b =
144 |     let kd = Audio.G.kind a in
145 |     allclose ?rtol ?atol kd a b
146 |   in
147 |   Alcotest.testable pp equal
148 | 
149 | let float32_g_testable = dense_testable Bigarray.Float32
150 | 
151 | let float64_g_testable = dense_testable Bigarray.Float64
152 | 
153 | let complex32_g_testable = dense_testable Bigarray.Complex32
154 | 
155 | let complex64_g_testable = dense_testable Bigarray.Complex64
156 | 
157 | let get_dense_testable (type a b) (kd : (a, b) Bigarray.kind) :
158 |     (a, b) Audio.G.t Alcotest.testable =
159 |   match kd with
160 |   | Bigarray.Float32 ->
161 |       float32_g_testable
162 |   | Bigarray.Float64 ->
163 |       float64_g_testable
164 |   | Bigarray.Complex32 ->
165 |       complex32_g_testable
166 |   | Bigarray.Complex64 ->
167 |       complex64_g_testable
168 |   | _ ->
169 |       failwith "Unsupported kind"
170 | 
171 | (* This snippet has been gathered from the exact same code but for Matrix in
172 |    Owl. See:
173 |    https://github.com/tachukao/owl/blob/046f703a6890a5ed5ecf4a8c5750d4e392e4ec54/src/owl/dense/owl_dense_matrix_generic.ml#L606-L609
174 |    Unfortunately, for the moment this is not yet available for Ndarrays. *)
175 | let load_npy (path : string) (kind : ('a, 'b) Bigarray.kind) :
176 |     ('a, 'b) Audio.G.t =
177 |   let npy : ('a, 'b) Audio.G.t =
178 |     match Npy.read_copy path |> Npy.to_bigarray Bigarray.c_layout kind with
179 |     | Some x ->
180 |         x
181 |     | None ->
182 |         failwith Printf.(sprintf "%s: incorrect format" path)
183 |   in
184 |   npy
185 | 


--------------------------------------------------------------------------------
/test/tutils.mli:
--------------------------------------------------------------------------------
 1 | (*****************************************************************************)
 2 | (*                                                                           *)
 3 | (*                                                                           *)
 4 | (*  Copyright (C) 2025                                                       *)
 5 | (*    Gabriel Santamaria                                                     *)
 6 | (*                                                                           *)
 7 | (*                                                                           *)
 8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
 9 | (*  you may not use this file except in compliance with the License.         *)
10 | (*  You may obtain a copy of the License at                                  *)
11 | (*                                                                           *)
12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
13 | (*                                                                           *)
14 | (*  Unless required by applicable law or agreed to in writing, software      *)
15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (*  See the License for the specific language governing permissions and      *)
18 | (*  limitations under the License.                                           *)
19 | (*                                                                           *)
20 | (*****************************************************************************)
21 | 
22 | (** Module providing usefull checking functions for the tests *)
23 | module Check : sig
24 |   val rallclose :
25 |        ?rtol:float
26 |     -> ?atol:float
27 |     -> (float, 'b) Owl_dense_ndarray.Generic.t
28 |     -> (float, 'b) Owl_dense_ndarray.Generic.t
29 |     -> bool
30 |   (** Real-valued all-close function *)
31 | 
32 |   val callclose :
33 |     'a.
34 |        ?rtol:float
35 |     -> ?atol:float
36 |     -> (Complex.t, 'a) Owl_dense_ndarray.Generic.t
37 |     -> (Complex.t, 'a) Owl_dense_ndarray.Generic.t
38 |     -> bool
39 |   (** Complex-valued all-close function *)
40 | 
41 |   val shape :
42 |        ('a, 'b) Owl_dense_ndarray.Generic.t
43 |     -> ('a, 'b) Owl_dense_ndarray.Generic.t
44 |     -> bool
45 |   (** Check the shape of two ndarrays are equal *)
46 | end
47 | 
48 | val allclose :
49 |   'a 'b.
50 |      ('a, 'b) Bigarray.kind
51 |   -> ?rtol:float
52 |   -> ?atol:float
53 |   -> ('a, 'b) Owl_dense_ndarray.Generic.t
54 |   -> ('a, 'b) Owl_dense_ndarray.Generic.t
55 |   -> bool
56 | (** Checks if two Ndarrays are allclose. This is equivalent to NumPy's allclose function. *)
57 | 
58 | val dense_testable :
59 |      ?rtol:float
60 |   -> ?atol:float
61 |   -> ('a, 'b) Bigarray.kind
62 |   -> ('a, 'b) Owl_dense_ndarray.Generic.t Alcotest.testable
63 | 
64 | val get_dense_testable :
65 |      ('a, 'b) Bigarray.kind
66 |   -> ('a, 'b) Owl_dense_ndarray.Generic.t Alcotest.testable
67 | (** Function that returns a correctly-typed testable based on the passed kind for Dense.Ndarray. *)
68 | 
69 | val load_npy :
70 |   string -> ('a, 'b) Bigarray.kind -> ('a, 'b) Owl_dense_ndarray.Generic.t
71 | (** Load a numpy file and return the ndarray. 
72 |     @see https://github.com/tachukao/owl/blob/046f703a6890a5ed5ecf4a8c5750d4e392e4ec54/src/owl/dense/owl_dense_matrix_generic.ml#L606-L609 *)
73 | 


--------------------------------------------------------------------------------
/test/vutils.ml:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2025                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | open Tutils
 23 | 
 24 | let test_audio_dir = Sys.getcwd () ^ "/audio/"
 25 | 
 26 | let test_vectors_dir = Sys.getcwd () ^ "/vectors/"
 27 | 
 28 | let typ_to_readable = function
 29 |   | "timeseries" ->
 30 |       "Io.Read"
 31 |   | "stft" ->
 32 |       "Spectral.stft"
 33 |   | _ ->
 34 |       "Unkown"
 35 | 
 36 | let string_to_resample_typ = function
 37 |   | "soxr_vhq" ->
 38 |       Io.SOXR_VHQ
 39 |   | "soxr_hq" ->
 40 |       Io.SOXR_HQ
 41 |   | "soxr_mq" ->
 42 |       Io.SOXR_MQ
 43 |   | "soxr_lq" ->
 44 |       Io.SOXR_LQ
 45 |   | _ ->
 46 |       Io.NONE
 47 | 
 48 | module StrMap = Map.Make (String)
 49 | 
 50 | module Parameters = struct
 51 |   open Yojson.Basic.Util
 52 | 
 53 |   type t = Yojson.Basic.t
 54 | 
 55 |   let create (path : string) = Yojson.Basic.from_file path
 56 | 
 57 |   let get_string (name : string) (params : t) =
 58 |     member name params |> to_string_option
 59 | 
 60 |   let get_int (name : string) (params : t) = member name params |> to_int_option
 61 | 
 62 |   let get_float (name : string) (params : t) =
 63 |     member name params |> to_float_option
 64 | 
 65 |   let get_bool (name : string) (params : t) =
 66 |     member name params |> to_bool_option
 67 | end
 68 | 
 69 | module Testdata = struct
 70 |   type t = (string * string * Parameters.t) list StrMap.t
 71 | 
 72 |   let get_test_type (basename : string) : string option =
 73 |     let split = Str.(split (regexp {|_|}) basename) in
 74 |     if List.length split >= 1 then Some (List.hd split) else None
 75 | 
 76 |   let get_test_filename (basename : string) : string option =
 77 |     let split = Str.(split (regexp {|_|}) basename) in
 78 |     if List.length split >= 1 then Some (String.concat "_" (List.tl split))
 79 |     else None
 80 | 
 81 |   let list_filter_filename (dir : string) (name : string) : string option =
 82 |     try
 83 |       let files = Sys.readdir dir in
 84 |       Option.map (fun elt -> Filename.concat dir elt)
 85 |       @@ Array.find_opt (fun elt -> Filename.remove_extension elt = name) files
 86 |     with Sys_error msg ->
 87 |       Printf.eprintf "Error reading directory '%s': %s\n" dir msg ;
 88 |       None
 89 | 
 90 |   let list_tests_files (dir : string) (ext : string) : string list =
 91 |     let has_extension path = Filename.check_suffix path ext in
 92 |     let process_entry base_dir entry =
 93 |       let full_path = Filename.concat base_dir entry in
 94 |       try
 95 |         if Sys.is_directory full_path then
 96 |           List.filter has_extension
 97 |           @@ List.map (Filename.concat full_path)
 98 |           @@ Array.to_list @@ Sys.readdir full_path
 99 |         else if has_extension full_path then [full_path]
100 |         else []
101 |       with Sys_error _ -> []
102 |     in
103 |     try
104 |       Sys.readdir dir |> Array.to_list |> List.concat_map (process_entry dir)
105 |       (* Appliquer process_entry et concaténer les résultats *)
106 |     with Sys_error msg ->
107 |       Printf.eprintf "Error reading directory '%s': %s\n" dir msg ;
108 |       []
109 | 
110 |   let filter_test_type (typ : string) =
111 |     let filter (typ : string) (full_path : string) : string option =
112 |       let basename = Filename.basename full_path in
113 |       let split = get_test_type basename in
114 |       Option.bind split (fun x ->
115 |           if x = typ then Some (Filename.remove_extension full_path) else None )
116 |     in
117 |     List.filter_map @@ filter typ
118 | 
119 |   let construct_parameters (audio_dir : string) (files : string list) =
120 |     let aux (file : string) =
121 |       let base = Filename.basename file |> Filename.remove_extension in
122 |       let audio_filename =
123 |         Option.value (get_test_filename base) ~default:base
124 |       in
125 |       let audio_file_opt = list_filter_filename audio_dir audio_filename in
126 |       match audio_file_opt with
127 |       | None ->
128 |           Printf.eprintf "Warning: couldn't find audio file with name %s\n" base ;
129 |           None
130 |       | Some audio_file_path ->
131 |           let npy_file = file ^ ".npy" in
132 |           let json_file = file ^ ".json" in
133 |           let params = Parameters.create json_file in
134 |           Some (npy_file, audio_file_path, params)
135 |     in
136 |     List.filter_map aux files
137 | 
138 |   let create (vectors_dir : string) (audio_dir : string) (types : string list) :
139 |       t =
140 |     let vectors_files = list_tests_files vectors_dir ".json" in
141 |     let fold map typ =
142 |       let l = filter_test_type typ vectors_files in
143 |       StrMap.add typ l map
144 |     in
145 |     let files_map = List.fold_left fold StrMap.empty types in
146 |     StrMap.map (construct_parameters audio_dir) files_map
147 | 
148 |   let get (typ : string) (data : t) : (string * string * Parameters.t) list =
149 |     StrMap.find typ data
150 | end
151 | 
152 | module type Testable = sig
153 |   type t
154 | 
155 |   type p
156 | 
157 |   type pf
158 | 
159 |   type pc
160 | 
161 |   type ('a, 'b) precision = ('a, 'b) Types.precision
162 | 
163 |   val precision : (pf, pc) precision
164 | 
165 |   val kd : (t, p) Bigarray.kind
166 | 
167 |   val typ : string
168 | 
169 |   val generate :
170 |        (pf, pc) precision
171 |     -> string * string * Parameters.t
172 |     -> (float, pf) Owl_dense_ndarray.Generic.t
173 |     -> (t, p) Owl_dense_ndarray.Generic.t
174 | end
175 | 
176 | module Tests_cases (T : Testable) = struct
177 |   include T
178 | 
179 |   let akind : type a b. (a, b) precision -> (float, a) Bigarray.kind =
180 |    fun prec ->
181 |     match prec with
182 |     | Types.B32 ->
183 |         Bigarray.Float32
184 |     | Types.B64 ->
185 |         Bigarray.Float64
186 | 
187 |   let read_audio kd (path : string) (res_typ : Io.resampling_t)
188 |       (sample_rate : int) (mono : bool) =
189 |     let audio = Io.read ~res_typ ~sample_rate ~mono kd path in
190 |     Audio.data audio
191 | 
192 |   let create_tests (data : (string * string * Parameters.t) list) :
193 |       unit Alcotest.test_case list =
194 |     List.concat_map
195 |       (fun (case : string * string * Parameters.t) ->
196 |         let vector_path, audio_path, params = case in
197 |         let raw_basename =
198 |           Filename.basename vector_path |> Filename.remove_extension
199 |         in
200 |         let basename =
201 |           Option.value ~default:raw_basename
202 |             (Testdata.get_test_filename raw_basename)
203 |         in
204 |         let sr =
205 |           Option.value ~default:22050 @@ Parameters.get_int "sr" params
206 |         in
207 |         let mono =
208 |           Option.value ~default:true @@ Parameters.get_bool "mono" params
209 |         in
210 |         let resampler =
211 |           string_to_resample_typ
212 |             ( Option.value ~default:"None"
213 |             @@ Parameters.get_string "res_type" params )
214 |         in
215 |         let audio_kind = akind precision in
216 |         let audio = read_audio audio_kind audio_path resampler sr mono in
217 |         let generated = generate precision case audio in
218 |         let vector = load_npy vector_path kd in
219 |         let test_dense () =
220 |           Alcotest.check
221 |             (Tutils.get_dense_testable kd)
222 |             (typ ^ "_dense_" ^ basename)
223 |             generated vector
224 |         in
225 |         let test_dense = ("DENSE:    " ^ basename, `Slow, test_dense) in
226 |         [test_dense] )
227 |       data
228 | 
229 |   let run (name : string) (tests : unit Alcotest.test_case list) =
230 |     Alcotest.run name [(typ_to_readable typ, tests)]
231 | end
232 | 
233 | let tests = ["timeseries"; "stft"]
234 | 
235 | let data = Testdata.create test_vectors_dir test_audio_dir tests
236 | 


--------------------------------------------------------------------------------
/test/vutils.mli:
--------------------------------------------------------------------------------
  1 | (*****************************************************************************)
  2 | (*                                                                           *)
  3 | (*                                                                           *)
  4 | (*  Copyright (C) 2025                                                       *)
  5 | (*    Gabriel Santamaria                                                     *)
  6 | (*                                                                           *)
  7 | (*                                                                           *)
  8 | (*  Licensed under the Apache License, Version 2.0 (the "License");          *)
  9 | (*  you may not use this file except in compliance with the License.         *)
 10 | (*  You may obtain a copy of the License at                                  *)
 11 | (*                                                                           *)
 12 | (*    http://www.apache.org/licenses/LICENSE-2.0                             *)
 13 | (*                                                                           *)
 14 | (*  Unless required by applicable law or agreed to in writing, software      *)
 15 | (*  distributed under the License is distributed on an "AS IS" BASIS,        *)
 16 | (*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
 17 | (*  See the License for the specific language governing permissions and      *)
 18 | (*  limitations under the License.                                           *)
 19 | (*                                                                           *)
 20 | (*****************************************************************************)
 21 | 
 22 | val test_audio_dir : string
 23 | (** The directory where the test audio files are located. *)
 24 | 
 25 | val test_vectors_dir : string
 26 | (** The directory where the test vectors are located. *)
 27 | 
 28 | val typ_to_readable : string -> string
 29 | (** Converts a test type to a readable format for Alcotest. *)
 30 | 
 31 | (** A map from strings to values. *)
 32 | module StrMap : Map.S with type key = string
 33 | 
 34 | (** A module for handling parameters. *)
 35 | module Parameters : sig
 36 |   type t
 37 | 
 38 |   val create : string -> t
 39 | 
 40 |   val get_string : string -> t -> string option
 41 | 
 42 |   val get_int : string -> t -> int option
 43 | 
 44 |   val get_float : string -> t -> float option
 45 | 
 46 |   val get_bool : string -> t -> bool option
 47 | end
 48 | 
 49 | module Testdata : sig
 50 |   type t = (string * string * Parameters.t) list StrMap.t
 51 | 
 52 |   val get_test_type : string -> string option
 53 |   (** Returns the test type for a given test name *)
 54 | 
 55 |   val get_test_filename : string -> string option
 56 |   (** Returns the test filename for a given test type *)
 57 | 
 58 |   val create : string -> string -> string list -> t
 59 |   (** Creates a test set given a directory of vectors files, a directory of audio files
 60 |       and a list of test types *)
 61 | 
 62 |   val get : string -> t -> (string * string * Parameters.t) list
 63 |   (** Returns the test set for a given test type *)
 64 | end
 65 | 
 66 | module type Testable = sig
 67 |   (** Type of the data generated by the functionnality we're testing *)
 68 |   type t
 69 | 
 70 |   (** Bigarray precision *)
 71 |   type p
 72 | 
 73 |   (** Float precision *)
 74 |   type pf
 75 | 
 76 |   (** Complex precision *)
 77 |   type pc
 78 | 
 79 |   type ('a, 'b) precision = ('a, 'b) Types.precision
 80 | 
 81 |   val precision : (pf, pc) precision
 82 | 
 83 |   val kd : (t, p) Bigarray.kind
 84 | 
 85 |   val typ : string
 86 | 
 87 |   val generate :
 88 |        (pf, pc) precision
 89 |     -> string * string * Parameters.t
 90 |     -> (float, pf) Owl_dense_ndarray.Generic.t
 91 |     -> (t, p) Owl_dense_ndarray.Generic.t
 92 |   (** Generate a testing vector given some parameters and an audio array *)
 93 | end
 94 | 
 95 | module Tests_cases (T : Testable) : sig
 96 |   type t = T.t
 97 | 
 98 |   type p = T.p
 99 | 
100 |   type pf = T.pf
101 | 
102 |   type pc = T.pc
103 | 
104 |   val create_tests :
105 |     (string * string * Parameters.t) list -> unit Alcotest.test_case list
106 | 
107 |   val run : string -> unit Alcotest.test_case list -> unit
108 | end
109 | 
110 | val data : Testdata.t
111 | 


--------------------------------------------------------------------------------