├── .github
└── workflows
│ ├── build.yml
│ ├── lint.yml
│ └── test.yml
├── .gitignore
├── .ocamlformat
├── LICENSE
├── README.md
├── bench
├── read
│ ├── bench.py
│ ├── bench_read.sh
│ ├── dune
│ ├── mp3.svg
│ ├── perf.ml
│ └── wav.svg
└── stft
│ ├── dune
│ ├── perf.ml
│ ├── perf.py
│ ├── stft_comparison_float32.svg
│ ├── stft_comparison_float64.svg
│ └── wav_stereo_44100hz_1s.wav
├── doc
├── dune
└── index.mld
├── dune-project
├── soundml.opam
├── soundml_logo.svg
├── src
├── aformat.ml
├── aformat.mli
├── audio.ml
├── audio.mli
├── dune
├── effects
│ ├── dune
│ ├── filter
│ │ ├── filter.ml
│ │ ├── filter.mli
│ │ ├── fir.ml
│ │ ├── fir.mli
│ │ ├── highpass.ml
│ │ ├── highpass.mli
│ │ ├── iir.ml
│ │ ├── iir.mli
│ │ ├── lowpass.ml
│ │ └── lowpass.mli
│ └── time
│ │ ├── config
│ │ ├── discover.ml
│ │ └── dune
│ │ ├── rubberband_stubs.cxx
│ │ ├── time.ml
│ │ └── time.mli
├── feature
│ ├── dune
│ └── spectral
│ │ ├── spectral.ml
│ │ └── spectral.mli
├── filterbank.ml
├── filterbank.mli
├── io
│ ├── cio
│ │ ├── README.md
│ │ ├── common.hxx
│ │ ├── read.hxx
│ │ └── write.hxx
│ ├── config
│ │ ├── discover.ml
│ │ └── dune
│ ├── dune
│ ├── io.ml
│ ├── io.mli
│ └── soundml_io.cxx
├── soundml.ml
├── transform.ml
├── transform.mli
├── types.ml
├── types.mli
├── utils.ml
├── utils.mli
├── window.ml
└── window.mli
└── test
├── README.md
├── dune
├── generate_audio.sh
├── generate_vectors.py
├── test_aformat.ml
├── test_filter.ml
├── test_filterbank.ml
├── test_stft.ml
├── test_time.ml
├── test_timeseries.ml
├── test_utils.ml
├── test_window.ml
├── test_write.ml
├── tutils.ml
├── tutils.mli
├── vutils.ml
└── vutils.mli
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | # Inspired by https://github.com/savonet/build-and-test-ocaml-module/blob/main/action.yml
2 | # Thanks for the savonet team for building such an amazing software :p
3 | name: Build
4 |
5 | on:
6 | push:
7 | branches:
8 | - 'main'
9 | pull_request:
10 | types: [opened, synchronize, reopened]
11 |
12 | jobs:
13 | build:
14 | strategy:
15 | fail-fast: false
16 | matrix:
17 | os: [ubuntu-latest] # , macos-latest, windows-latest]
18 | ocaml-compiler:
19 | - "5.3.0" # Latest stable
20 | - "5.2.0"
21 |
22 | runs-on: ${{ matrix.os }}
23 |
24 | steps:
25 | - name: Checkout code
26 | uses: actions/checkout@v4
27 |
28 | - name: Set up OCaml ${{ matrix.ocaml-compiler }} on ${{ matrix.os }}
29 | uses: ocaml/setup-ocaml@v3
30 | with:
31 | ocaml-compiler: ${{ matrix.ocaml-compiler }}
32 | dune-cache: true
33 |
34 | # temporary, waiting for the conf-soxr package to be published to opam repo
35 | - name: Install libsoxr-dev
36 | run: |
37 | sudo apt-get update
38 | sudo apt-get install -y libsoxr-dev
39 | shell: bash
40 |
41 | - name: Install SoundML
42 | run: opam install . --yes --confirm-level=unsafe-yes
43 | shell: bash
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: Lint & Format
2 |
3 | on:
4 | push:
5 | branches:
6 | - 'main'
7 | pull_request:
8 | types: [opened, synchronize, reopened]
9 |
10 | jobs:
11 | lint-fmt:
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - name: Checkout code
16 | uses: actions/checkout@v4
17 |
18 | - name: Set up OCaml for linting
19 | uses: ocaml/setup-ocaml@v3
20 | with:
21 | ocaml-compiler: "5.2"
22 | dune-cache: true
23 | - name: Run OCaml Lint & Format Check
24 | uses: ocaml/setup-ocaml/lint-fmt@v2
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | # Inspired by https://github.com/savonet/build-and-test-ocaml-module/blob/main/action.yml
2 | # Thanks for the savonet team for building such an amazing software :p
3 |
4 | name: Test
5 |
6 | on:
7 | push:
8 | branches:
9 | - 'main'
10 | pull_request:
11 | types: [opened, synchronize, reopened]
12 |
13 | jobs:
14 | test:
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | os: [ubuntu-latest] # , macos-latest, windows-latest]
19 | ocaml-compiler:
20 | - "5.3.0" # Latest stable
21 | - "5.2.0"
22 | include:
23 | - os: ubuntu-latest
24 | ocaml-compiler: "5.3.0"
25 | send-coverage: true
26 |
27 | runs-on: ${{ matrix.os }}
28 |
29 | steps:
30 | - name: Checkout code
31 | uses: actions/checkout@v4
32 |
33 | - name: Set up OCaml ${{ matrix.ocaml-compiler }} on ${{ matrix.os }}
34 | uses: ocaml/setup-ocaml@v3
35 | with:
36 | ocaml-compiler: ${{ matrix.ocaml-compiler }}
37 | dune-cache: true
38 |
39 | - uses: actions/setup-python@v5
40 | with:
41 | python-version: '3.12.3'
42 | - run: pip install --upgrade pip
43 | - run: pip install numpy librosa==0.11.0
44 |
45 | # temporary, waiting for the conf-soxr package to be published to opam repo
46 | - name: Install libsoxr-dev
47 | run: |
48 | sudo apt-get update
49 | sudo apt-get install -y libsoxr-dev
50 | shell: bash
51 |
52 | - name: Install FFmpeg CLI (to generate test data)
53 | run: sudo apt-get install -y ffmpeg
54 | shell: bash
55 |
56 | - name: Install SoundML dependencies (with test)
57 | run: opam install . --deps-only --with-test --yes --confirm-level=unsafe-yes --verbose
58 | shell: bash
59 |
60 | - name: Run tests
61 | if: matrix.send-coverage != true
62 | run: |
63 | opam exec -- dune runtest
64 | shell: bash
65 |
66 | - name: Run tests (and send converage to coveralls)
67 | if: matrix.send-coverage == true
68 | run: |
69 | opam exec -- dune runtest --instrument-with bisect_ppx --force
70 | opam exec -- bisect-ppx-report send-to Coveralls
71 | shell: bash
72 | env:
73 | COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
74 | PULL_REQUEST_NUMBER: ${{ github.event.number }}
75 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | dune.lock
3 | dataset/
4 | cache/
5 | report/
6 | *.pth
7 | .venv
8 | .vscode
9 | wav/
10 | mp3/
--------------------------------------------------------------------------------
/.ocamlformat:
--------------------------------------------------------------------------------
1 | profile = ocamlformat
2 | wrap-comments = true
3 | version = 0.27.0
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |

4 |
5 |
SoundML
6 |
A little and very high level library to perform basic operations on audio files in the OCaml language
7 |
8 |
9 |
10 | [](https://github.com/gabyfle/SoundML/actions/workflows/build.yml)
11 | [](https://github.com/gabyfle/SoundML/actions/workflows/test.yml)
12 | [](https://coveralls.io/github/gabyfle/SoundML)
13 |
14 |
15 | ## About the Project
16 |
17 | > [!WARNING]
18 | > The project is still in development and is not yet ready for use.
19 |
20 | ## Features
21 | - A fast I/O for interacting with audio files
22 | - Feature extraction
23 | - Audio effects
24 | - Time stretching and pitch shifting
25 | - Filtering
26 | - IIR filters (Generic, Lowpass, Highpass)
27 | - Generic FIR filter implementation
28 |
29 | ## License
30 |
31 | Distributed under the Apache License Version 2.0. See LICENSE for more information.
32 |
33 | ## References
34 |
35 | - **McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Battenberg, and Oriol Nieto** (2015). *librosa: Audio and music signal analysis in python.* In Proceedings of the 14th python in science conference, pp. 18-25.
36 |
37 | - **Bellanger, M.** (2022). *Traitement numérique du signal. 10e édition.* Dunod.
38 |
39 | - **Wang, L., Zhao, J., & Mortier, R.** (2022). *OCaml Scientific Computing*. Springer International Publishing eBooks. DOI: [10.1007/978-3-030-97645-3](https://doi.org/10.1007/978-3-030-97645-3)
40 |
41 | - **Zoelzer, U.** (2002). *Dafx: Digital Audio Effects*. DOI: [10.1002/9781119991298](https://doi.org/10.1002/9781119991298)
42 |
43 | - **Müller, M.** (2015). *Fundamentals of Music Processing*. Cambridge International Law Journal. DOI: [10.1007/978-3-319-21945-5](https://doi.org/10.1007/978-3-319-21945-5)
44 |
45 | ## Acknowledgements
46 |
47 | * Logo generated with DALL-E by OpenAI
48 |
--------------------------------------------------------------------------------
/bench/read/bench.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import os
4 | import sys
5 | import time
6 | import argparse
7 | import librosa
8 | import numpy as np
9 |
10 | MIB_DIVISOR = 1024.0 * 1024.0
11 |
12 |
13 | def find_audio_files(root_dir, extension, max_files):
14 | filepaths = []
15 | extension = extension.lower()
16 |
17 | count = 0
18 | try:
19 | for dirpath, _, filenames in os.walk(root_dir, topdown=True, onerror=None):
20 | relevant_filenames = [f for f in filenames if f.lower().endswith(extension)]
21 | for filename in relevant_filenames:
22 | if count < max_files:
23 | filepaths.append(os.path.join(dirpath, filename))
24 | count += 1
25 | else:
26 | return filepaths
27 | if count >= max_files:
28 | break
29 |
30 | except OSError:
31 | pass
32 |
33 | return filepaths
34 |
35 |
36 | def get_file_size(filename) -> float:
37 | if not os.path.isfile(filename):
38 | return -1.0
39 | size = os.path.getsize(filename)
40 | if size <= 0:
41 | return 0.0
42 | return size / MIB_DIVISOR
43 |
44 |
45 | def benchmark_read(filename, target_sr) -> tuple[float, float]:
46 | size = get_file_size(filename)
47 | if size is None or size <= 0.0:
48 | return 0.0, 0.0
49 |
50 | sample_rate = target_sr if target_sr is not None and target_sr > 0 else None
51 |
52 | try:
53 | start_time = time.perf_counter()
54 | _audio, _sr = librosa.load(
55 | filename, sr=sample_rate, mono=False, dtype=np.float32
56 | )
57 | end_time = time.perf_counter()
58 | duration = end_time - start_time
59 | if not isinstance(_audio, np.ndarray) or _audio.size == 0:
60 | return -1.0, -1.0
61 |
62 | return duration, size
63 | except FileNotFoundError:
64 | return -1.0, -1.0
65 |
66 |
67 | def run_benchmark(root_dir, sample_rate, extension, max_files):
68 | all_files = find_audio_files(root_dir, extension, max_files)
69 | nfound = len(all_files)
70 |
71 | if nfound == 0:
72 | sys.exit(0)
73 |
74 | warmup_count = min(5, nfound)
75 | if warmup_count > 0:
76 | warmup_files = all_files[:warmup_count]
77 | for f in warmup_files:
78 | _ = benchmark_read(f, sample_rate)
79 |
80 | total_time = 0.0
81 | total_size = 0.0
82 |
83 | files_to_process = all_files
84 |
85 | for filename in files_to_process:
86 | duration, size = benchmark_read(filename, sample_rate)
87 |
88 | if duration > 0 and size > 0:
89 | total_time += duration
90 | total_size += size
91 |
92 | if total_time > 0 and total_size > 0:
93 | avg_speed = total_size / total_time
94 | print(f"{avg_speed:.5f}")
95 |
96 |
97 | def main():
98 | parser = argparse.ArgumentParser()
99 | parser.add_argument("root_directory", help=argparse.SUPPRESS)
100 | parser.add_argument("sample_rate", type=int, help=argparse.SUPPRESS)
101 | parser.add_argument("format", help=argparse.SUPPRESS)
102 | parser.add_argument("max_files", type=int, help=argparse.SUPPRESS)
103 |
104 | if len(sys.argv) != 5:
105 | print(
106 | f"Usage: {sys.argv[0]} ",
107 | file=sys.stderr,
108 | )
109 | sys.exit(1)
110 |
111 | args = parser.parse_args()
112 | if not os.path.isdir(args.root_directory):
113 | sys.exit(1)
114 |
115 | if args.sample_rate < 0:
116 | sys.exit(1)
117 |
118 | if args.max_files <= 0:
119 | sys.exit(1)
120 |
121 | ext = args.format.lstrip(".")
122 |
123 | run_benchmark(args.root_directory, args.sample_rate, ext, args.max_files)
124 |
125 |
126 | if __name__ == "__main__":
127 | main()
128 |
--------------------------------------------------------------------------------
/bench/read/bench_read.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | OCAML_CMD="dune exec ./bench/read/perf.exe"
4 | PYTHON_CMD="python3 bench.py"
5 | CACHE_CLEAR_CMD="sync; echo 3 > /proc/sys/vm/drop_caches"
6 |
7 | if [ "$#" -ne 6 ]; then
8 | echo "Usage: $0 "
9 | echo " : 'ocaml' or 'python'"
10 | exit 1
11 | fi
12 |
13 | MODE=$1
14 | shift
15 |
16 | NUM_ITERATIONS=$1
17 | ROOT_DIR=$2
18 | SAMPLE_RATE=$3
19 | FORMAT=$4
20 | MAX_FILES=$5
21 |
22 | if [[ "$MODE" != "ocaml" && "$MODE" != "python" ]]; then
23 | echo "Error: must be either 'ocaml' or 'python'. You provided '$MODE'."
24 | echo "Usage: $0 "
25 | exit 1
26 | fi
27 |
28 | if ! [[ "$NUM_ITERATIONS" =~ ^[1-9][0-9]*$ ]]; then
29 | echo "Error: ('$NUM_ITERATIONS') must be a positive integer." >&2
30 | exit 1
31 | fi
32 |
33 | BENCH_CMD=""
34 | if [[ "$MODE" == "ocaml" ]]; then
35 | BENCH_CMD="$OCAML_CMD"
36 | elif [[ "$MODE" == "python" ]]; then
37 | BENCH_CMD="$PYTHON_CMD"
38 | fi
39 |
40 | results_array=()
41 | valid_run_count=0
42 |
43 | echo "Starting reading test for: $MODE"
44 | echo "Command to run: $BENCH_CMD \"$ROOT_DIR\" \"$SAMPLE_RATE\" \"$FORMAT\" \"$MAX_FILES\""
45 | echo "Number of iterations: $NUM_ITERATIONS"
46 | echo "--------------------------------------------------"
47 |
48 | for (( i=1; i<=NUM_ITERATIONS; i++ )); do
49 | echo "Iteration $i / $NUM_ITERATIONS"
50 | if sudo bash -c "$CACHE_CLEAR_CMD"; then # ensure that files aren't cached by the system
51 | sleep 1.5
52 | else
53 | echo "Error: Failed to clear cache. Probably error with sudo." >&2
54 | exit 1
55 | fi
56 | result=$( $BENCH_CMD "$ROOT_DIR" "$SAMPLE_RATE" "$FORMAT" "$MAX_FILES" )
57 | exit_status=$?
58 |
59 | if [ $exit_status -ne 0 ]; then
60 | continue
61 | fi
62 |
63 | if [[ "$result" =~ ^[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$ ]]; then
64 | echo "Result: $result MiB/s"
65 | results_array+=("$result")
66 | ((valid_run_count++))
67 | fi
68 | sleep 1
69 | done
70 |
71 | echo "--------------------------------------------------"
72 |
73 | num_results=${#results_array[@]}
74 |
75 | if [ "$num_results" -eq 0 ]; then
76 | exit 1
77 | fi
78 | stats=$(printf "%s\n" "${results_array[@]}" | awk '
79 | NF == 0 { next }
80 | {
81 | if ($1 ~ /^[+-]?[0-9]*\.?[0-9]+([eE][+-]?[0-9]+)?$/) {
82 | sum += $1;
83 | sumsq += $1*$1;
84 | count++;
85 | }
86 | }
87 | END {
88 | if (count > 0) {
89 | mean = sum / count;
90 | if (count > 1) {
91 | variance = (sumsq - (sum*sum)/count) / (count-1);
92 | if (variance < 1e-12) variance = 0;
93 | stdev = sqrt(variance);
94 | } else {
95 | stdev = 0; # Standard deviation is undefined/0 for a single point
96 | }
97 | printf "%.6f %.6f %d", mean, stdev, count;
98 | } else {
99 | print "NaN NaN 0";
100 | }
101 | }
102 | ')
103 | read -r mean stdev count <<< "$stats"
104 |
105 | if [[ -z "$mean" || -z "$stdev" || -z "$count" || "$count" -eq 0 ]]; then
106 | echo "Error: Failed to calculate statistics. Awk output: '$stats'" >&2
107 | exit 1
108 | fi
109 |
110 | echo "Performance Test Summary ($MODE):"
111 | echo "-------------------------"
112 | echo "Command: $BENCH_CMD \"$ROOT_DIR\" \"$SAMPLE_RATE\" \"$FORMAT\" \"$MAX_FILES\""
113 | printf "Mean Speed (MiB/s): %.6f\n" "$mean"
114 | if [ "$count" -gt 1 ]; then
115 | printf "Std Dev Speed: %.6f\n" "$stdev"
116 | else
117 | printf "Std Dev Speed: N/A (requires >= 2 data points)\n"
118 | fi
119 |
120 | exit 0
121 |
--------------------------------------------------------------------------------
/bench/read/dune:
--------------------------------------------------------------------------------
1 | (executable
2 | (name perf)
3 | (libraries soundml))
4 |
--------------------------------------------------------------------------------
/bench/read/perf.ml:
--------------------------------------------------------------------------------
1 | open Printf
2 | open Unix
3 |
4 | let mb_divisor = 1024. *. 1024.
5 |
6 | let is_ext_file filename ext =
7 | String.lowercase_ascii (Filename.extension filename) = ext
8 |
9 | let find_ext_files root_dir ext =
10 | let rec find acc dir =
11 | try
12 | let dh = opendir dir in
13 | try
14 | let rec loop acc =
15 | match readdir dh with
16 | | exception End_of_file ->
17 | closedir dh ; acc
18 | | "." | ".." ->
19 | loop acc
20 | | entry -> (
21 | let full_path = Filename.concat dir entry in
22 | try
23 | match (stat full_path).st_kind with
24 | | S_REG when is_ext_file full_path ext ->
25 | loop (full_path :: acc)
26 | | S_DIR ->
27 | loop (find acc full_path)
28 | | _ ->
29 | loop acc
30 | with Unix_error (_, _, _) -> loop acc )
31 | in
32 | loop acc
33 | with ex ->
34 | closedir dh ;
35 | eprintf "\nError reading directory '%s': %s\n%!" dir
36 | (Printexc.to_string ex) ;
37 | acc
38 | with Unix_error (e, _, p) ->
39 | eprintf "\nError opening directory '%s': %s\n%!" p (error_message e) ;
40 | acc
41 | in
42 | find [] root_dir
43 |
44 | let get_file_size filename =
45 | try
46 | let stats = stat filename in
47 | if stats.st_kind = S_REG then Ok (float_of_int stats.st_size /. mb_divisor)
48 | else Error (sprintf "Not a regular file: %s" filename)
49 | with
50 | | Unix_error (e, _, _) ->
51 | Error (sprintf "Cannot stat file '%s': %s" filename (error_message e))
52 | | Sys_error msg ->
53 | Error (sprintf "System error statting '%s': %s" filename msg)
54 |
55 | let benchmark_read kind filename sample_rate =
56 | match get_file_size filename with
57 | | Error msg ->
58 | Error (filename, msg)
59 | | Ok size_mb -> (
60 | if size_mb <= 0.0 then Error (filename, "Incorrect file size")
61 | else
62 | try
63 | let res_typ =
64 | match sample_rate with 0 -> Io.NONE | _ -> Io.SOXR_HQ
65 | in
66 | let start_time = Unix.gettimeofday () in
67 | let _audio =
68 | Soundml.Io.read ~res_typ ~sample_rate ~mono:false kind filename
69 | in
70 | let end_time = Unix.gettimeofday () in
71 | let duration = end_time -. start_time in
72 | Ok (duration, size_mb)
73 | with ex -> Error (filename, Printexc.to_string ex) )
74 |
75 | let run_benchmark root sample_rate extension max_files =
76 | let kind = Bigarray.Float32 in
77 | let all_files = find_ext_files root extension in
78 | let all_files =
79 | List.filteri (fun i _ -> if i >= max_files then false else true) all_files
80 | in
81 | let total_files = List.length all_files in
82 | if total_files = 0 then exit 0 ;
83 | let warmup_count = min 5 total_files in
84 | ( if warmup_count > 0 then
85 | let warmup_files = List.filteri (fun i _ -> i < warmup_count) all_files in
86 | List.iter
87 | (fun f ->
88 | match benchmark_read kind f sample_rate with
89 | | Ok _ ->
90 | ()
91 | | Error _ ->
92 | () )
93 | warmup_files ) ;
94 | let total_time = ref 0.0 in
95 | let total_size = ref 0.0 in
96 | List.iter
97 | (fun filename ->
98 | match benchmark_read kind filename sample_rate with
99 | | Ok (duration, size_mb) ->
100 | total_time := !total_time +. duration ;
101 | total_size := !total_size +. size_mb
102 | | Error _ ->
103 | () )
104 | all_files ;
105 | if !total_time > 0.0 && !total_size > 0.0 then
106 | let avg_speed = !total_size /. !total_time in
107 | printf "%.5f\n" avg_speed
108 |
109 | let () =
110 | if Array.length Sys.argv <> 5 then
111 | eprintf "Usage: %s \n"
112 | Sys.argv.(0)
113 | else
114 | let root_dir = Sys.argv.(1) in
115 | let sample_rate = int_of_string Sys.argv.(2) in
116 | let extension = Sys.argv.(3) in
117 | let max_files = int_of_string Sys.argv.(4) in
118 | if not (Sys.file_exists root_dir && Sys.is_directory root_dir) then
119 | eprintf "Can't read directory: %s.\n" root_dir
120 | else
121 | try run_benchmark root_dir sample_rate extension max_files
122 | with ex ->
123 | eprintf "An unexpected error occurred: %s\n" (Printexc.to_string ex) ;
124 | exit 1
125 |
--------------------------------------------------------------------------------
/bench/stft/dune:
--------------------------------------------------------------------------------
1 | (executable
2 | (name perf)
3 | (libraries core core_bench soundml))
4 |
--------------------------------------------------------------------------------
/bench/stft/perf.ml:
--------------------------------------------------------------------------------
1 | open! Core
2 | open! Core_bench
3 | open Soundml
4 |
5 | let path = Sys_unix.getcwd () ^ "/bench/stft/wav_stereo_44100hz_1s.wav"
6 |
7 | let f32audio = Audio.data @@ Io.read ~res_typ:Io.NONE Bigarray.Float32 path
8 |
9 | let f64audio = Audio.data @@ Io.read ~res_typ:Io.NONE Bigarray.Float64 path
10 |
11 | let main () =
12 | Command_unix.run
13 | (Bench.make_command
14 | [ Bench.Test.create ~name:"float32" (fun () ->
15 | ignore (Transform.stft Types.B32 f32audio) )
16 | ; Bench.Test.create ~name:"float64" (fun () ->
17 | ignore (Transform.stft Types.B64 f64audio) ) ] )
18 |
19 | let () = main ()
20 |
--------------------------------------------------------------------------------
/bench/stft/perf.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import librosa
3 | import numpy as np
4 | import os
5 |
6 | AUDIO_FILE_PATH = os.path.join(os.getcwd(), "bench/stft/wav_stereo_44100hz_1s.wav")
7 |
8 | STFT_CONFIGURATIONS = [
9 | {"n_fft": 2048, "win_length": 2048, "hop_length": 512, "window_type": "hann"}
10 | ]
11 |
12 | y_f32, sr_f32 = librosa.load(AUDIO_FILE_PATH, sr=None, mono=True, dtype=np.float32)
13 | y_f64, sr_f64 = librosa.load(AUDIO_FILE_PATH, sr=None, mono=True, dtype=np.float64)
14 | SIGNAL_LENGTH = len(y_f32)
15 |
16 | @pytest.mark.parametrize("config", STFT_CONFIGURATIONS)
17 | @pytest.mark.parametrize("precision", ["float32", "float64"])
18 | def test_librosa_stft(benchmark, config, precision):
19 | if precision == "float32":
20 | audio_data = y_f32
21 | elif precision == "float64":
22 | audio_data = y_f64
23 | else:
24 | raise ValueError("Invalid precision")
25 |
26 | result = benchmark(
27 | librosa.stft,
28 | y=audio_data,
29 | n_fft=config["n_fft"],
30 | hop_length=config["hop_length"],
31 | win_length=config["win_length"],
32 | window=config["window_type"],
33 | center=False
34 | )
35 | assert result is not None
36 |
--------------------------------------------------------------------------------
/bench/stft/wav_stereo_44100hz_1s.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gabyfle/SoundML/95f3d969cc225781b22d8ffa86c4d5e531fb0db8/bench/stft/wav_stereo_44100hz_1s.wav
--------------------------------------------------------------------------------
/doc/dune:
--------------------------------------------------------------------------------
1 | (documentation
2 | (package soundml)
3 | (mld_files index))
4 |
--------------------------------------------------------------------------------
/doc/index.mld:
--------------------------------------------------------------------------------
1 | {0 SoundML}
2 |
3 | {%html:
4 |
5 |
6 |
7 | %}
8 |
9 | {!Soundml} is an OCaml library that enables audio processing. It provides tools to read, write, extract features and manipulate audio as time series data.
10 | The library is built on top of the Owl library and provides a high-level API to work with audio data. It's still in an experimental stage.
11 |
12 |
13 | {1 API Documentation}
14 |
15 | To see the developer API documentation for SoundML, you can visit the following link: {{!Soundml}here}.
16 |
17 | {1 Author}
18 |
19 | {{:https://gabyfle.dev}Gabriel "gabyfle" Santamaria}
--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
1 | (lang dune 3.18)
2 |
3 | (name soundml)
4 |
5 | (generate_opam_files true)
6 |
7 | (source
8 | (github gabyfle/SoundML))
9 |
10 | (authors "Gabriel Santamaria ")
11 |
12 | (maintainers "Gabriel Santamaria ")
13 |
14 | (license Apache-2.0)
15 |
16 | (documentation https://soundml.gabyfle.dev)
17 |
18 | (package
19 | (name soundml)
20 | (synopsis "An OCaml library to embed sound processing in your applications")
21 | (description
22 | "SoundML is a library built on top of Owl to analyse sounds files. It can read, write audio, extract various features from audio files and much more.")
23 | (depends
24 | (ocaml
25 | (>= 5.2.0))
26 | dune
27 | (conf-sndfile :build)
28 | (conf-rubberband :build)
29 | ; Samplerate is already a requirement of rubberband
30 | (conf-samplerate :build)
31 | (dune-configurator :build)
32 | (owl ; Version 1.2 is required to have the DC transform
33 | (>= 1.2))
34 | (odoc :with-doc)
35 | (alcotest :with-test)
36 | (yojson :with-test)
37 | (bisect_ppx
38 | (and
39 | :with-test
40 | (>= 2.5.0))))
41 | (tags
42 | (sound spectrogram fourier digital-filters)))
43 |
--------------------------------------------------------------------------------
/soundml.opam:
--------------------------------------------------------------------------------
1 | # This file is generated by dune, edit dune-project instead
2 | opam-version: "2.0"
3 | synopsis: "An OCaml library to embed sound processing in your applications"
4 | description:
5 | "SoundML is a library built on top of Owl to analyse sounds files. It can read, write audio, extract various features from audio files and much more."
6 | maintainer: ["Gabriel Santamaria "]
7 | authors: ["Gabriel Santamaria "]
8 | license: "Apache-2.0"
9 | tags: ["sound" "spectrogram" "fourier" "digital-filters"]
10 | homepage: "https://github.com/gabyfle/SoundML"
11 | doc: "https://soundml.gabyfle.dev"
12 | bug-reports: "https://github.com/gabyfle/SoundML/issues"
13 | depends: [
14 | "ocaml" {>= "5.2.0"}
15 | "dune" {>= "3.18"}
16 | "conf-sndfile" {build}
17 | "conf-rubberband" {build}
18 | "conf-samplerate" {build}
19 | "dune-configurator" {build}
20 | "owl" {>= "1.2"}
21 | "odoc" {with-doc}
22 | "alcotest" {with-test}
23 | "yojson" {with-test}
24 | "bisect_ppx" {with-test & >= "2.5.0"}
25 | ]
26 | build: [
27 | ["dune" "subst"] {dev}
28 | [
29 | "dune"
30 | "build"
31 | "-p"
32 | name
33 | "-j"
34 | jobs
35 | "@install"
36 | "@runtest" {with-test}
37 | "@doc" {with-doc}
38 | ]
39 | ]
40 | dev-repo: "git+https://github.com/gabyfle/SoundML.git"
41 | x-maintenance-intent: ["(latest)"]
42 |
--------------------------------------------------------------------------------
/src/aformat.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023-2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 | (**
22 | The {!Aformat} (audio format) module is an abstraction over the different supported audio format from libsndfile. *)
23 |
24 | type ftype =
25 | | WAV
26 | | AIFF
27 | | AU
28 | | RAW
29 | | PAF
30 | | SVX
31 | | NIST
32 | | VOC
33 | | IRCAM
34 | | W64
35 | | MAT4
36 | | MAT5
37 | | PVF
38 | | XI
39 | | HTK
40 | | SDS
41 | | AVR
42 | | WAVEX
43 | | SD2
44 | | FLAC
45 | | CAF
46 | | WVE
47 | | OGG
48 | | MPC2K
49 | | RF64
50 | | MP3
51 |
52 | type subtype =
53 | | PCM_S8
54 | | PCM_16
55 | | PCM_24
56 | | PCM_32
57 | | PCM_U8
58 | | FLOAT
59 | | DOUBLE
60 | | ULAW
61 | | ALAW
62 | | IMA_ADPCM
63 | | MS_ADPCM
64 | | GSM610
65 | | VOX_ADPCM
66 | | NMS_ADPCM_16
67 | | NMS_ADPCM_24
68 | | NMS_ADPCM_32
69 | | G721_32
70 | | G723_24
71 | | G723_40
72 | | DVW_12
73 | | DVW_16
74 | | DVW_24
75 | | DVW_N
76 | | DPCM_8
77 | | DPCM_16
78 | | VORBIS
79 | | OPUS
80 | | ALAC_16
81 | | ALAC_20
82 | | ALAC_24
83 | | ALAC_32
84 | | MPEG_LAYER_I
85 | | MPEG_LAYER_II
86 | | MPEG_LAYER_III
87 |
88 | type endianness = FILE | LITTLE | BIG | CPU
89 |
90 | (** The type for an audio format specification. *)
91 | type t = {ftype: ftype; sub: subtype; endian: endianness}
92 |
93 | val create :
94 | ?subtype:subtype -> ?endian:endianness -> ftype -> (t, string) result
95 | (**
96 | [create ?subtype ?endian ftype] creates a new audio format representation based on the given format specifications.s
97 |
98 | {2 Parameters}
99 | @param subtype is the subtype of the audio file. If not specified, it'll be set to a default value according to the file type.
100 | @param endian is the endianness of the audio file. If not specified, it'll be set to [FILE], which is the default file endianness.
101 | @param ftype is the file type of the audio file.
102 |
103 | {2 Returns}
104 |
105 | @return A result type, where [Ok t] is the created format and [Error msg] is an error message indicating why it failed.
106 |
107 | {2 Usage}
108 |
109 | Creating a new audio format is as simple as calling the [create] function with the desired parameters.
110 |
111 | For the [RAW] file type, the subtype is required. Not specifying one will result in an error.
112 |
113 | {[
114 | open Soundml.Io
115 | (* This will create a new WAV audio format with PCM_16 subtype and little endian. *)
116 | let fmt = Afmt.create ~subtype:Io.Afmt.PCM_16 ~endian:Io.Afmt.LITTLE Io.Aformat.WAV in
117 | ]} *)
118 |
119 | val to_int : t -> int
120 | (**
121 | [to_int fmt] converts the audio format to an integer representation compatible with libsndfile.
122 |
123 | {2 Parameters}
124 | @param fmt the format that we need to convert to an integer value.
125 |
126 | {2 Returns}
127 | @return The integer representation of the audio format. *)
128 |
129 | val of_int : int -> (t, string) result
130 | (**
131 | [of_int code] converts the integer representation of the audio format to a {!Aformat.t} type.
132 |
133 | {2 Parameters}
134 | @param code is the integer representation of the audio format we're trying to convert.
135 |
136 | @return A result type, where [Ok t] is the created format and [Error msg] is an error message indicating why it failed. *)
137 |
138 | val of_ext : ?sub:subtype -> ?endian:endianness -> string -> (t, string) result
139 | (**
140 | [of_ext ?sub ?endian ext] tries to convert the given file extension to an audio format type.
141 |
142 | This function assumes that the extension is given with its leading dot (e.g. [".wav"]) and is thus compatible with the [Filename] module.
143 |
144 | {2 Parameters}
145 | @param sub is the subtype of the audio file. If not specified, it'll be set to a default value according to the file type.
146 | @param endian is the endianness of the audio file. If not specified, it'll be set to [FILE], which is the default file endianness.
147 | @param ext is the file extension we're trying to convert.
148 |
149 | {2 Returns}
150 | @return A result type, where [Ok t] is the created format and [Error msg] is an error message indicating why it failed. *)
151 |
152 | val pp : Format.formatter -> t -> unit
153 | (**
154 | [pp fmt] pretty prints the audio format to the given formatter.
155 |
156 | {2 Parameters}
157 | @param fmt is the formatter to use for printing the audio format. *)
158 |
--------------------------------------------------------------------------------
/src/audio.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Owl
23 |
24 | (* generic multi-dimensionnal array *)
25 | module G = Dense.Ndarray.Generic
26 | module Aformat = Aformat
27 |
28 | module Metadata = struct
29 | type t =
30 | { name: string
31 | ; frames: int
32 | ; channels: int
33 | ; sample_rate: int
34 | ; format: Aformat.t }
35 |
36 | let create ?(name : string = "Unknown") frames channels sample_rate format =
37 | {name; frames; channels; sample_rate; format}
38 |
39 | let name (m : t) = m.name
40 |
41 | let frames (m : t) = m.frames
42 |
43 | let channels (m : t) = m.channels
44 |
45 | let sample_rate (m : t) = m.sample_rate
46 |
47 | let format (m : t) = m.format
48 | end
49 |
50 | type 'a audio = {meta: Metadata.t; data: (float, 'a) G.t}
51 |
52 | let create (meta : Metadata.t) data = {meta; data}
53 |
54 | let meta (a : 'a audio) = a.meta
55 |
56 | let rawsize (a : 'a audio) = G.numel a.data
57 |
58 | let length (a : 'a audio) : int =
59 | let meta = meta a in
60 | let channels = float_of_int (Metadata.channels meta) in
61 | let sr = float_of_int (Metadata.sample_rate meta) in
62 | let size = float_of_int (rawsize a) /. channels in
63 | Int.of_float (size /. sr *. 1000.)
64 |
65 | let data (a : 'a audio) = a.data
66 |
67 | let sr (a : 'a audio) = Metadata.sample_rate @@ meta a
68 |
69 | let channels (a : 'a audio) = Metadata.channels @@ meta a
70 |
71 | let samples (a : 'a audio) = Metadata.frames @@ meta a
72 |
73 | let format (a : 'a audio) = Metadata.format @@ meta a
74 |
75 | let set_data (a : 'a audio) (d : (float, 'a) G.t) = {a with data= d}
76 |
77 | let sample_pos (a : 'a audio) (x : int) =
78 | Int.of_float
79 | ( float_of_int x /. 1000.
80 | *. float_of_int (Metadata.sample_rate (meta a))
81 | *. float_of_int (Metadata.channels (meta a)) )
82 |
83 | let get_slice (slice : int * int) (a : 'a audio) : 'a audio =
84 | let x, y = slice in
85 | let x, y =
86 | match (sample_pos a x, sample_pos a y) with
87 | | x, y when x < 0 ->
88 | (rawsize a + x, y)
89 | | x, y when y < 0 ->
90 | (x, rawsize a + y)
91 | | x, y when x < 0 && y < 0 ->
92 | (rawsize a + x, rawsize a + y)
93 | | x, y ->
94 | (x, y)
95 | in
96 | let x, y = if x < y then (x, y) else (y, x) in
97 | if x < 0 || y < 0 then
98 | raise
99 | (Invalid_argument "Audio.get_slice: slice out of bounds, negative values")
100 | else if x >= rawsize a || y >= rawsize a then
101 | raise
102 | (Invalid_argument
103 | "Audio.get_slice: slice out of bounds, values greater than rawsize" )
104 | else
105 | let data = G.get_slice [[x; y]] a.data in
106 | {a with data}
107 |
108 | let get (x : int) (a : 'a audio) : float =
109 | let slice = get_slice (x, x) a |> data in
110 | G.get slice [|0|]
111 |
112 | let normalize ?(factor : float = 2147483647.) (a : 'a audio) : unit =
113 | G.scalar_mul_ (1. /. factor) a.data
114 |
115 | let reverse (x : 'a audio) : 'a audio =
116 | let data = G.reverse x.data in
117 | {x with data}
118 |
119 | let ( .${} ) x s = get_slice s x
120 |
121 | let ( .%{} ) i x = get x i
122 |
123 | let ( $/ ) x f = normalize ~factor:f x
124 |
125 | let ( /$ ) f x = normalize ~factor:f x
126 |
--------------------------------------------------------------------------------
/src/audio.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | (**
23 | The {!Audio} module defines the needed types around the representation of
24 | an audio file and more precisely an audio data. *)
25 |
26 | open Owl
27 |
28 | (**
29 | Alias of the generic [Ndarray] datastructure from [Owl]. This is used
30 | internally to make the computations around the audio data *)
31 | module G = Dense.Ndarray.Generic
32 |
33 | (** @canonical Audio.Aformat
34 | Abstraction over the different supported audio format from libsndfile. *)
35 | module Aformat = Aformat
36 |
37 | (**
38 | {1 Audio Metadata}
39 |
40 | This module contains the metadata of an audio file, which is used to store
41 | information about the audio file when reading it from the filesystem.
42 |
43 | Note: {!Metadata} in {!Soundml} isn't the same thing as the metadata attached
44 | to audio files. In {!Soundml}, we refer to {!Metadata} all the data describing
45 | the audio file in itself (sample rate, number of channels, etc...). If you are
46 | interested in dealing with author name, label and other metadata, we recommend
47 | using the {{:https://github.com/savonet/ocaml-mm} ocaml-mm} library instead. *)
48 |
49 | module Metadata : sig
50 | type t
51 |
52 | val create : ?name:string -> int -> int -> int -> Aformat.t -> t
53 | (**
54 | [create ?name channels frames sample_rate format] creates a new metadata
55 | with the given name, number of channels, number of frames, sample rate
56 | and format. The name is optional and defaults to [""] *)
57 |
58 | val name : t -> string
59 | (**
60 | [name meta] returns the name of the file represented by the metadata *)
61 |
62 | val frames : t -> int
63 |
64 | val channels : t -> int
65 | (**
66 | [channels meta] returns the number of channels of the audio file *)
67 |
68 | val sample_rate : t -> int
69 | (**
70 | [sample_rate meta] returns the sample rate of the audio file *)
71 |
72 | val format : t -> Aformat.t
73 | (**
74 | [format meta] returns the format of the audio file *)
75 | end
76 |
77 | (**
78 | {1 Audio manipulation}
79 |
80 | Most of these functions are used internally, and you'll probably just use the {!Audio.normalize}
81 | function to normalize the audio data before writing it back to a file. *)
82 |
83 | (**
84 | High level representation of an audio file data, used to store data when reading audio files. *)
85 | type 'a audio
86 |
87 | val create : Metadata.t -> (float, 'a) G.t -> 'a audio
88 | (**
89 | [create metadata data] creates a new audio with the given name and metadata *)
90 |
91 | val meta : 'a audio -> Metadata.t
92 | (**
93 | [meta audio] returns the metadata attached to the given audio *)
94 |
95 | val rawsize : 'a audio -> int
96 | (**
97 | [rawsize audio] returns the raw size of the given audio *)
98 |
99 | val length : 'a audio -> int
100 | (**
101 | [length audio] returns the length (in milliseconds) of the given audio *)
102 |
103 | val data : 'a audio -> (float, 'a) Owl.Dense.Ndarray.Generic.t
104 | (**
105 | [data audio] returns the data of the given audio *)
106 |
107 | val sr : 'a audio -> int
108 | (**
109 | [sr audio] returns the sample rate of the given audio *)
110 |
111 | val channels : 'a audio -> int
112 | (**
113 | [channels audio] returns the number of channels of the given audio *)
114 |
115 | val samples : 'a audio -> int
116 | (**
117 | [samples audio] returns the number of samples per channel in the given audio *)
118 |
119 | val format : 'a audio -> Aformat.t
120 | (**
121 | [format audio] returns the format of the given audio *)
122 |
123 | val set_data : 'a audio -> (float, 'a) Owl.Dense.Ndarray.Generic.t -> 'a audio
124 | (**
125 | [set_data audio data] sets the data of the given audio *)
126 |
127 | val get : int -> 'a audio -> float
128 | (**
129 | [get x audio] returns the sample located at the position [x] in milliseconds.
130 |
131 | The position [x] must be between 0 and the length of the audio.
132 |
133 | Example:
134 |
135 | {[
136 | let audio = Audio.read "audio.wav" in
137 | let sample = Audio.get 1000 audio in (* get the sample at 1 second *)
138 | ]} *)
139 |
140 | val get_slice : int * int -> 'a audio -> 'a audio
141 | (**
142 | [get_slice (start, stop) audio] returns a slice of the audio from the position [start] to [stop].
143 |
144 | The position [start] and [stop] must be between 0 and the length of the audio.
145 |
146 | This function works like Owl's slicing. Giving negative values to [start] and [stop] will slice the audio
147 | from the end of the audio .
148 |
149 | Example:
150 |
151 | {[
152 | let audio = Audio.read "audio.wav" in
153 | let slice = Audio.get_slice audio 1000 2000 in (* get the slice from 1 to 2 seconds *)
154 | ]} *)
155 |
156 | val normalize : ?factor:float -> 'a audio -> unit
157 | (**
158 | [normalize ?factor audio] normalizes the data of the given audio data by
159 | the [?factor] parameter, by default equal to $2^31 - 1$.
160 |
161 | Use this function when you need to normalize the audio data by a certain factor.
162 |
163 | Warning: if you normalize the data and end up getting values that goes
164 | beyond 1.0 or under -1.0, it will surely make the audio sound distorted.
165 |
166 | The operation is performed in place (impure function).
167 |
168 | Example:
169 |
170 | {[
171 | let audio = Audio.read "audio.wav" in
172 | (* you can perform any operation here *)
173 | (* ... *)
174 | let factor = (* ... *) in
175 | Audio.normalize ?factor audio; (* normalizing before writing *)
176 | Audio.write audio "audio.wav"
177 | ]} *)
178 |
179 | val reverse : 'a audio -> 'a audio
180 | (**
181 | [reverse audio] reverses the audio data.
182 | This function does not operate in place: a new audio is created with the reversed data.
183 |
184 | Example:
185 |
186 | {[
187 | let audio = Audio.read "audio.wav" in
188 | let audio = Audio.reverse audio in
189 | Audio.write audio "reversed.wav"
190 | ]} *)
191 |
192 | (**
193 | {2 Operators on audio data}
194 |
195 | Following the Owl's conventions, few operators are available to deal with
196 | audio data. You can use them to make the code more concise and more readable.
197 | They are just syntaxic sugar on functions over the {!Audio.audio} type. *)
198 |
199 | val ( .%{} ) : 'a audio -> int -> float
200 | (** Operator of {!Audio.get} *)
201 |
202 | val ( .${} ) : 'a audio -> int * int -> 'a audio
203 | (** Operator of {!Audio.get_slice} *)
204 |
205 | val ( $/ ) : 'a audio -> float -> unit
206 | (** Operator of {!Audio.normalize} *)
207 |
208 | val ( /$ ) : float -> 'a audio -> unit
209 | (** Operator of {!Audio.normalize} *)
210 |
--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
1 | (library
2 | (name aformat)
3 | (package soundml)
4 | (modules aformat)
5 | (instrumentation
6 | (backend bisect_ppx))
7 | (wrapped false))
8 |
9 | (library
10 | (name types)
11 | (package soundml)
12 | (modules types)
13 | (instrumentation
14 | (backend bisect_ppx))
15 | (wrapped false))
16 |
17 | (library
18 | (name audio)
19 | (package soundml)
20 | (libraries owl types aformat)
21 | (modules audio)
22 | (instrumentation
23 | (backend bisect_ppx))
24 | (wrapped false))
25 |
26 | (library
27 | (name window)
28 | (package soundml)
29 | (libraries owl types audio)
30 | (modules window)
31 | (instrumentation
32 | (backend bisect_ppx))
33 | (wrapped false))
34 |
35 | (library
36 | (name utils)
37 | (package soundml)
38 | (libraries audio types owl)
39 | (modules utils)
40 | (instrumentation
41 | (backend bisect_ppx))
42 | (wrapped false))
43 |
44 | (library
45 | (name transform)
46 | (package soundml)
47 | (modules transform)
48 | (libraries owl window types utils)
49 | (instrumentation
50 | (backend bisect_ppx))
51 | (wrapped false))
52 |
53 | (library
54 | (name filterbank)
55 | (package soundml)
56 | (modules filterbank)
57 | (libraries owl utils)
58 | (instrumentation
59 | (backend bisect_ppx))
60 | (wrapped false))
61 |
62 | (library
63 | (name soundml)
64 | (public_name soundml)
65 | (modules soundml)
66 | (libraries owl audio io types feature filterbank transform effects window)
67 | (instrumentation
68 | (backend bisect_ppx)))
69 |
--------------------------------------------------------------------------------
/src/effects/dune:
--------------------------------------------------------------------------------
1 | (include_subdirs qualified)
2 |
3 | (library
4 | (name effects)
5 | (package soundml)
6 | (foreign_stubs
7 | (language cxx)
8 | (names rubberband_stubs)
9 | (flags
10 | :standard
11 | (:include c_flags.sexp)
12 | -std=c++23
13 | -O3))
14 | (c_library_flags
15 | (:include c_library_flags.sexp))
16 | (libraries audio utils owl)
17 | (instrumentation
18 | (backend bisect_ppx))
19 | (wrapped true))
20 |
21 | (rule
22 | (targets c_flags.sexp c_library_flags.sexp)
23 | (action
24 | (run ./time/config/discover.exe)))
25 |
--------------------------------------------------------------------------------
/src/effects/filter/filter.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | module type S = sig
23 | type t
24 |
25 | type params
26 |
27 | val reset : t -> t
28 |
29 | val create : params -> t
30 |
31 | val process_sample : t -> float -> float
32 | end
33 |
34 | module Make (S : S) = struct
35 | type t = S.t
36 |
37 | type params = S.params
38 |
39 | let reset = S.reset
40 |
41 | let create = S.create
42 |
43 | let process_sample = S.process_sample
44 |
45 | let process (t : t) (x : (Float.t, 'a) Audio.G.t) =
46 | let kd = Audio.G.kind x in
47 | let n = Audio.G.numel x in
48 | let y = Audio.G.create kd [|n|] 0. in
49 | for i = 0 to n - 1 do
50 | Audio.G.set y [|i|] (process_sample t (Audio.G.get x [|i|]))
51 | done ;
52 | y
53 | end
54 |
55 | module IIR = struct
56 | module Generic = Make (Iir)
57 | module HighPass = Make (Highpass)
58 | module LowPass = Make (Lowpass)
59 | end
60 |
61 | module FIR = struct
62 | module Generic = Make (Fir)
63 | end
64 |
--------------------------------------------------------------------------------
/src/effects/filter/filter.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | module type S = sig
23 | type t
24 |
25 | type params
26 |
27 | val reset : t -> t
28 |
29 | val create : params -> t
30 |
31 | val process_sample : t -> float -> float
32 | end
33 |
34 | module Make : functor (S : S) -> sig
35 | type t = S.t
36 |
37 | type params = S.params
38 |
39 | val reset : S.t -> S.t
40 |
41 | val create : S.params -> S.t
42 |
43 | val process_sample : S.t -> float -> float
44 |
45 | val process :
46 | S.t
47 | -> (float, 'a) Owl_dense_ndarray.Generic.t
48 | -> (float, 'a) Owl_dense_ndarray.Generic.t
49 | end
50 |
51 | module IIR : sig
52 | module Generic : sig
53 | type t = Iir.t
54 |
55 | type params = Iir.params
56 |
57 | val reset : t -> t
58 |
59 | val create : params -> t
60 |
61 | val process_sample : t -> float -> float
62 |
63 | val process :
64 | t
65 | -> (float, 'a) Owl_dense_ndarray.Generic.t
66 | -> (float, 'a) Owl_dense_ndarray.Generic.t
67 | end
68 |
69 | module HighPass : sig
70 | type t = Highpass.t
71 |
72 | type params = Highpass.params
73 |
74 | val reset : t -> t
75 |
76 | val create : params -> t
77 |
78 | val process_sample : t -> float -> float
79 |
80 | val process :
81 | t
82 | -> (float, 'a) Owl_dense_ndarray.Generic.t
83 | -> (float, 'a) Owl_dense_ndarray.Generic.t
84 | end
85 |
86 | module LowPass : sig
87 | type t = Lowpass.t
88 |
89 | type params = Lowpass.params
90 |
91 | val reset : t -> t
92 |
93 | val create : params -> t
94 |
95 | val process_sample : t -> float -> float
96 |
97 | val process :
98 | t
99 | -> (float, 'a) Owl_dense_ndarray.Generic.t
100 | -> (float, 'a) Owl_dense_ndarray.Generic.t
101 | end
102 | end
103 |
104 | module FIR : sig
105 | module Generic : sig
106 | type t = Fir.t
107 |
108 | type params = Fir.params
109 |
110 | val reset : t -> t
111 |
112 | val create : params -> t
113 |
114 | val process_sample : t -> float -> float
115 |
116 | val process :
117 | t
118 | -> (float, 'a) Owl_dense_ndarray.Generic.t
119 | -> (float, 'a) Owl_dense_ndarray.Generic.t
120 | end
121 | end
122 |
--------------------------------------------------------------------------------
/src/effects/filter/fir.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Iir
23 |
24 | type t = Iir.t
25 |
26 | type params = {b: float array}
27 |
28 | let reset t = Iir.reset t
29 |
30 | let create ({b} : params) = Iir.create {a= [|1.|]; b}
31 |
32 | let process_sample = Iir.process_sample
33 |
--------------------------------------------------------------------------------
/src/effects/filter/fir.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type t
23 |
24 | type params = {b: float array}
25 |
26 | val reset : t -> t
27 |
28 | val create : params -> t
29 |
30 | val process_sample : t -> float -> float
31 |
--------------------------------------------------------------------------------
/src/effects/filter/highpass.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Iir
23 |
24 | type t = Iir.t
25 |
26 | type params = {cutoff: float; sample_rate: int}
27 |
28 | let create ({cutoff; sample_rate} : params) =
29 | let fs = sample_rate |> float_of_int in
30 | let fc = cutoff in
31 | let r = Float.tan (Float.pi *. fc /. fs) in
32 | let c = (r -. 1.) /. (r +. 1.) in
33 | let a = [|1.0; c|] in
34 | let b = [|(1.0 -. c) /. 2.0; (c -. 1.0) /. 2.0|] in
35 | create {a; b}
36 |
37 | let reset = reset
38 |
39 | let process_sample = process_sample
40 |
--------------------------------------------------------------------------------
/src/effects/filter/highpass.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type t
23 |
24 | type params = {cutoff: float; sample_rate: int}
25 |
26 | val reset : t -> t
27 |
28 | val create : params -> t
29 |
30 | val process_sample : t -> float -> float
31 |
--------------------------------------------------------------------------------
/src/effects/filter/iir.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type params = {a: float array; b: float array}
23 |
24 | type t =
25 | { b: (float, Bigarray.float32_elt) Audio.G.t
26 | ; a: (float, Bigarray.float32_elt) Audio.G.t
27 | ; state: (float, Bigarray.float32_elt) Audio.G.t }
28 |
29 | let reset t = Audio.G.fill t.state 0. ; t
30 |
31 | let create ({a; b} : params) =
32 | let a = Audio.G.of_array Bigarray.Float32 a [|Array.length a|] in
33 | let b = Audio.G.of_array Bigarray.Float32 b [|Array.length b|] in
34 | let size = max (Audio.G.numel a) (Audio.G.numel b) in
35 | let a = Audio.G.(a /$ get a [|0|]) in
36 | (*let b = Audio.G.(b /$ get b [|0|]) in*)
37 | let state = Audio.G.create Bigarray.Float32 [|size|] 0. in
38 | {b; a; state}
39 |
40 | let process_sample t (x : float) =
41 | let n = Audio.G.numel t.state in
42 | let y =
43 | if n > 0 then (Audio.G.get t.b [|0|] *. x) +. Audio.G.get t.state [|0|]
44 | else 0.
45 | in
46 | let nb = Audio.G.numel t.b in
47 | let na = Audio.G.numel t.a in
48 | for i = 0 to Audio.G.numel t.state - 1 do
49 | let b = if i + 1 < nb then Audio.G.get t.b [|i + 1|] *. x else 0. in
50 | let a =
51 | if i + 1 < na then Float.neg (Audio.G.get t.a [|i + 1|]) *. y else 0.
52 | in
53 | if i < n - 1 then
54 | Audio.G.set t.state [|i|] (Audio.G.get t.state [|i + 1|] +. b +. a)
55 | else Audio.G.set t.state [|i|] (b +. a)
56 | done ;
57 | y
58 |
--------------------------------------------------------------------------------
/src/effects/filter/iir.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type t
23 |
24 | type params = {a: float array; b: float array}
25 |
26 | val reset : t -> t
27 |
28 | val create : params -> t
29 |
30 | val process_sample : t -> float -> float
31 |
--------------------------------------------------------------------------------
/src/effects/filter/lowpass.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Iir
23 |
24 | type t = Iir.t
25 |
26 | type params = {cutoff: float; sample_rate: int}
27 |
28 | let create ({cutoff; sample_rate} : params) =
29 | let fs = float_of_int sample_rate in
30 | let fc = cutoff in
31 | let r = Float.tan (Float.pi *. fc /. fs) in
32 | let c = (r -. 1.) /. (r +. 1.) in
33 | let a = [|1.0; c|] in
34 | let b = [|(1.0 +. c) /. 2.0; (1.0 +. c) /. 2.0|] in
35 | Iir.create {a; b}
36 |
37 | let reset = reset
38 |
39 | let process_sample = process_sample
40 |
--------------------------------------------------------------------------------
/src/effects/filter/lowpass.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type t
23 |
24 | type params = {cutoff: float; sample_rate: int}
25 |
26 | val reset : t -> t
27 |
28 | val create : params -> t
29 |
30 | val process_sample : t -> float -> float
31 |
--------------------------------------------------------------------------------
/src/effects/time/config/discover.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | module C = Configurator.V1
23 |
24 | let () =
25 | C.main ~name:"rubberband-pkg-config" (fun c ->
26 | let default : C.Pkg_config.package_conf =
27 | {libs= ["-lrubberband"]; cflags= []}
28 | in
29 | let conf =
30 | match C.Pkg_config.get c with
31 | | None ->
32 | default
33 | | Some pc -> (
34 | match C.Pkg_config.query pc ~package:"rubberband" with
35 | | None ->
36 | default
37 | | Some deps ->
38 | deps )
39 | in
40 | C.Flags.write_sexp "c_flags.sexp" conf.cflags ;
41 | C.Flags.write_sexp "c_library_flags.sexp" conf.libs )
42 |
--------------------------------------------------------------------------------
/src/effects/time/config/dune:
--------------------------------------------------------------------------------
1 | (include_subdirs no)
2 |
3 | (executable
4 | (name discover)
5 | (libraries dune.configurator))
6 |
--------------------------------------------------------------------------------
/src/effects/time/rubberband_stubs.cxx:
--------------------------------------------------------------------------------
1 | /*****************************************************************************/
2 | /* */
3 | /* */
4 | /* Copyright (C) 2023-2025 */
5 | /* Gabriel Santamaria */
6 | /* */
7 | /* */
8 | /* Licensed under the Apache License, Version 2.0 (the "License"); */
9 | /* you may not use this file except in compliance with the License. */
10 | /* You may obtain a copy of the License at */
11 | /* */
12 | /* http://www.apache.org/licenses/LICENSE-2.0 */
13 | /* */
14 | /* Unless required by applicable law or agreed to in writing, software */
15 | /* distributed under the License is distributed on an "AS IS" BASIS, */
16 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17 | /* See the License for the specific language governing permissions and */
18 | /* limitations under the License. */
19 | /* */
20 | /*****************************************************************************/
21 |
22 | #include
23 | #include
24 | #include
25 |
26 | extern "C"
27 | {
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 | }
34 |
35 | namespace SoundML
36 | {
37 | namespace Effects
38 | {
39 | namespace Time
40 | {
41 | using namespace RubberBand;
42 |
43 | /**
44 | * @brief Stretches the input audio data using RubberBand.
45 | *
46 | * @param intput The input audio data pointer.
47 | * @param output The output audio data OCaml value.
48 | * @param samples The number of samples in the input data.
49 | * @param sample_rate The sample rate of the input data.
50 | * @param channels The number of channels in the input data.
51 | * @param config The RubberBand configuration options.
52 | * @param time_ratio The time ratio for stretching.
53 | * @param pitch_scale The pitch scale for stretching.
54 | *
55 | * @return The stretched audio data.
56 | */
57 | std::expected offline_stretch(
58 | float *intput,
59 | value output,
60 | size_t samples,
61 | size_t sample_rate,
62 | size_t channels,
63 | RubberBandStretcher::Options config,
64 | double time_ratio,
65 | double pitch_scale)
66 | {
67 | RubberBandStretcher stretcher(sample_rate, channels, config, time_ratio, pitch_scale);
68 |
69 | stretcher.setExpectedInputDuration(samples);
70 |
71 | /* we have access to the whole input, so we can feed it directly into the stretcher */
72 | stretcher.study(&intput, samples, true);
73 | stretcher.process(&intput, samples, true); /* Rubberband expect deinterleaved samples */
74 |
75 | size_t per_channel_size = stretcher.available();
76 |
77 | intnat ndims = (channels > 1) ? 2 : 1;
78 | intnat dims[ndims];
79 |
80 | if (ndims == 1)
81 | dims[0] = static_cast(per_channel_size);
82 | else
83 | {
84 | dims[0] = static_cast(channels); /* we're going to get the data directly deinterleaved */
85 | dims[1] = static_cast(per_channel_size);
86 | }
87 |
88 | /* memory is managed by OCaml */
89 | output = caml_ba_alloc(CAML_BA_FLOAT32 | CAML_BA_C_LAYOUT, ndims, NULL, dims);
90 |
91 | size_t retrieved = stretcher.retrieve((float *const *)&Caml_ba_data_val(output), per_channel_size);
92 | if (retrieved != per_channel_size)
93 | {
94 | std::string error_msg = "Rubberband error: retrieved " + std::to_string(retrieved) + " samples, expected " + std::to_string(per_channel_size);
95 | return std::unexpected(error_msg);
96 | }
97 |
98 | return output;
99 | }
100 |
101 | } /* namespace Time */
102 | } /* namespace Effects */
103 | } /* namespace SoundML */
104 |
105 | extern "C"
106 | {
107 | CAMLprim value caml_rubberband_stretch(value input, value params)
108 | {
109 | using namespace SoundML::Effects::Time;
110 | CAMLparam2(input, params);
111 | CAMLlocal1(output);
112 |
113 | size_t samples_val = Long_val(Field(params, 0));
114 | size_t sample_rate_val = Long_val(Field(params, 1));
115 | size_t channels_val = Long_val(Field(params, 2));
116 | RubberBandStretcher::Options config_val = static_cast(Long_val(Field(params, 3)));
117 | double time_ratio_val = Double_val(Field(params, 4));
118 | double pitch_scale_val = Double_val(Field(params, 5));
119 |
120 | float *input_data = (float *)Caml_ba_data_val(input);
121 |
122 | auto result = offline_stretch(input_data, output, samples_val, sample_rate_val, channels_val, config_val, time_ratio_val, pitch_scale_val);
123 |
124 | if (!result.has_value())
125 | {
126 | caml_failwith(result.error().c_str());
127 | }
128 |
129 | CAMLreturn(result.value());
130 | }
131 | }
132 |
--------------------------------------------------------------------------------
/src/effects/time/time.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023-2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type engine = Faster | Finer
23 |
24 | let engine_to_int = function Faster -> 0x00000000 | Finer -> 0x20000000
25 |
26 | type transients = Crisp | Mixed | Smooth
27 |
28 | let transients_to_int = function
29 | | Crisp ->
30 | 0x00000000
31 | | Mixed ->
32 | 0x00000100
33 | | Smooth ->
34 | 0x00000200
35 |
36 | type detector = Compound | Percussive | Soft
37 |
38 | let detector_to_int = function
39 | | Compound ->
40 | 0x00000000
41 | | Percussive ->
42 | 0x00000400
43 | | Soft ->
44 | 0x00000800
45 |
46 | type phase = Laminar | Independent
47 |
48 | let phase_to_int = function Laminar -> 0x00000000 | Independent -> 0x00002000
49 |
50 | type threading = Auto | Never | Always
51 |
52 | let threading_to_int = function
53 | | Auto ->
54 | 0x00000000
55 | | Never ->
56 | 0x00010000
57 | | Always ->
58 | 0x00020000
59 |
60 | type window = Standard | Short | Long
61 |
62 | let window_to_int = function
63 | | Standard ->
64 | 0x00000000
65 | | Short ->
66 | 0x00100000
67 | | Long ->
68 | 0x00200000
69 |
70 | type smoothing = Off | On
71 |
72 | let smoothing_to_int = function Off -> 0x00000000 | On -> 0x00800000
73 |
74 | type formant = Shifted | Preserved
75 |
76 | let formant_to_int = function Shifted -> 0x00000000 | Preserved -> 0x01000000
77 |
78 | type pitch = HighSpeed | HighQuality | HighConsistency
79 |
80 | let pitch_to_int = function
81 | | HighSpeed ->
82 | 0x00000000
83 | | HighQuality ->
84 | 0x02000000
85 | | HighConsistency ->
86 | 0x04000000
87 |
88 | type channels = Apart | Together
89 |
90 | let channels_to_int = function Apart -> 0x00000000 | Together -> 0x10000000
91 |
92 | module Config = struct
93 | type t =
94 | { engine: engine
95 | ; transients: transients
96 | ; detector: detector
97 | ; phase: phase
98 | ; threading: threading
99 | ; window: window
100 | ; smoothing: smoothing
101 | ; formant: formant
102 | ; pitch: pitch
103 | ; channels: channels }
104 |
105 | let default : t =
106 | { engine= Faster
107 | ; transients= Crisp
108 | ; detector= Compound
109 | ; phase= Laminar
110 | ; threading= Auto
111 | ; window= Standard
112 | ; smoothing= Off
113 | ; formant= Shifted
114 | ; pitch= HighSpeed
115 | ; channels= Apart }
116 |
117 | let percussive : t = {default with window= Short; phase= Independent}
118 |
119 | let with_engine engine config = {config with engine}
120 |
121 | let with_transients transients config = {config with transients}
122 |
123 | let with_detector detector config = {config with detector}
124 |
125 | let with_phase phase config = {config with phase}
126 |
127 | let with_threading threading config = {config with threading}
128 |
129 | let with_window window config = {config with window}
130 |
131 | let with_smoothing smoothing config = {config with smoothing}
132 |
133 | let with_formant formant config = {config with formant}
134 |
135 | let with_pitch pitch config = {config with pitch}
136 |
137 | let with_channels channels config = {config with channels}
138 |
139 | let to_int (cfg : t) : int =
140 | 0 lor engine_to_int cfg.engine
141 | lor transients_to_int cfg.transients
142 | lor detector_to_int cfg.detector
143 | lor phase_to_int cfg.phase
144 | lor threading_to_int cfg.threading
145 | lor window_to_int cfg.window
146 | lor smoothing_to_int cfg.smoothing
147 | lor formant_to_int cfg.formant lor pitch_to_int cfg.pitch
148 | lor channels_to_int cfg.channels
149 | end
150 |
151 | external rubberband_stretch :
152 | (float, Bigarray.float32_elt) Audio.G.t
153 | -> int * int * int * int * float * float
154 | -> (float, Bigarray.float32_elt) Audio.G.t = "caml_rubberband_stretch"
155 |
156 | let to_float32 : type b.
157 | (float, b) Bigarray.kind
158 | -> (float, b) Audio.G.t
159 | -> (float, Bigarray.float32_elt) Audio.G.t =
160 | fun (kd : (float, b) Bigarray.kind) ->
161 | match kd with
162 | | Float32 ->
163 | Fun.id
164 | | Float64 ->
165 | Audio.G.cast_d2s
166 | | Float16 ->
167 | raise
168 | (Invalid_argument
169 | "Float16 elements kind aren't supported. The array kind must be \
170 | either Float32 or Float64." )
171 |
172 | let of_float32 : type b.
173 | (float, b) Bigarray.kind
174 | -> (float, Bigarray.float32_elt) Audio.G.t
175 | -> (float, b) Audio.G.t =
176 | fun (kd : (float, b) Bigarray.kind) ->
177 | match kd with
178 | | Float32 ->
179 | Fun.id
180 | | Float64 ->
181 | Audio.G.cast_s2d
182 | | Float16 ->
183 | raise
184 | (Invalid_argument
185 | "Float16 elements kind aren't supported. The array kind must be \
186 | either Float32 or Float64." )
187 |
188 | let time_stretch : type a.
189 | ?config:Config.t
190 | -> (float, a) Audio.G.t
191 | -> int
192 | -> float
193 | -> (float, a) Audio.G.t =
194 | fun ?(config : Config.t = Config.default) (x : (float, a) Audio.G.t)
195 | (sample_rate : int) (ratio : float) : (float, a) Audio.G.t ->
196 | if not (ratio > 0.) then invalid_arg "rate must be > 0."
197 | else
198 | let dshape = Audio.G.shape x in
199 | let channels = if Array.length dshape > 1 then dshape.(0) else 1 in
200 | let samples = if Array.length dshape > 1 then dshape.(1) else dshape.(0) in
201 | let config = Config.to_int config in
202 | let to_float32 = to_float32 (Audio.G.kind x) in
203 | let of_float32 = of_float32 (Audio.G.kind x) in
204 | of_float32
205 | (rubberband_stretch (to_float32 x)
206 | (samples, sample_rate, channels, config, ratio, 1.0) )
207 |
208 | let pitch_shift : type a.
209 | ?config:Config.t
210 | -> ?bins_per_octave:int
211 | -> (float, a) Audio.G.t
212 | -> int
213 | -> int
214 | -> (float, a) Audio.G.t =
215 | fun ?(config : Config.t = Config.default) ?(bins_per_octave : int = 12)
216 | (x : (float, a) Audio.G.t) (sample_rate : int) (steps : int) :
217 | (float, a) Audio.G.t ->
218 | let bins_per_octave = Float.of_int bins_per_octave in
219 | let steps = Float.of_int steps in
220 | let scale = Float.pow 2.0 (steps /. bins_per_octave) in
221 | let dshape = Audio.G.shape x in
222 | let channels = if Array.length dshape > 1 then dshape.(0) else 1 in
223 | let samples = if Array.length dshape > 1 then dshape.(1) else dshape.(0) in
224 | let config = Config.to_int config in
225 | let to_float32 = to_float32 (Audio.G.kind x) in
226 | let of_float32 = of_float32 (Audio.G.kind x) in
227 | of_float32
228 | (rubberband_stretch (to_float32 x)
229 | (samples, sample_rate, channels, config, 1.0, scale) )
230 |
--------------------------------------------------------------------------------
/src/feature/dune:
--------------------------------------------------------------------------------
1 | (include_subdirs qualified)
2 |
3 | (library
4 | (name feature)
5 | (package soundml)
6 | (libraries audio utils window owl)
7 | (instrumentation
8 | (backend bisect_ppx))
9 | (wrapped true))
10 |
--------------------------------------------------------------------------------
/src/feature/spectral/spectral.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
--------------------------------------------------------------------------------
/src/feature/spectral/spectral.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
--------------------------------------------------------------------------------
/src/filterbank.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type norm = Slaney | PNorm of float
23 |
24 | let mel ?(fmax : float option = None) ?(htk : bool = false)
25 | ?(norm : norm option = None) (kd : ('a, 'b) Bigarray.kind)
26 | (sample_rate : int) (nfft : int) (nmels : int) (fmin : float) =
27 | if nmels = 0 then Audio.G.empty kd [|0; (nfft / 2) + 1|]
28 | else
29 | let fmax =
30 | match fmax with
31 | | Some fmax ->
32 | fmax
33 | | None ->
34 | float_of_int sample_rate /. 2.
35 | in
36 | let fftfreqs = Utils.rfftfreq kd nfft (1. /. float_of_int sample_rate) in
37 | let mel_freqs = Utils.melfreq kd ~nmels:(nmels + 2) ~fmin ~fmax ~htk in
38 | let fdiff = Audio.G.diff mel_freqs in
39 | let ramps = Utils.outer Audio.G.sub mel_freqs fftfreqs in
40 | let open Audio.G in
41 | let lower =
42 | neg ramps.${[0; Int.sub nmels 1]}
43 | / reshape fdiff.${[0; Int.sub nmels 1]} [|nmels; 1|]
44 | in
45 | let upper =
46 | ramps.${[2; Int.add nmels 1]} / reshape fdiff.${[1; nmels]} [|nmels; 1|]
47 | in
48 | (* Intersect slopes *)
49 | let weights = max2 (zeros kd (shape lower)) (min2 lower upper) in
50 | let weights =
51 | match norm with
52 | | Some Slaney ->
53 | let enorm =
54 | 2.0
55 | $/ sub
56 | mel_freqs.${[2; Int.add nmels 1]}
57 | mel_freqs.${[0; Int.sub nmels 1]}
58 | in
59 | let enorm = reshape enorm [|nmels; 1|] in
60 | weights * enorm
61 | | Some (PNorm p) ->
62 | Audio.G.vecnorm ~p ~axis:(-1) weights
63 | | None ->
64 | weights
65 | in
66 | weights
67 |
--------------------------------------------------------------------------------
/src/filterbank.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type norm = Slaney | PNorm of float
23 |
24 | val mel :
25 | ?fmax:float option
26 | -> ?htk:bool
27 | -> ?norm:norm option
28 | -> (float, 'b) Bigarray.kind
29 | -> int
30 | -> int
31 | -> int
32 | -> float
33 | -> (float, 'b) Owl_dense_ndarray.Generic.t
34 | (**
35 | [mel ?fmax ?htk ?norm sample_rate nfft nmels fmin]
36 |
37 | Returns a matrix of shape [nmels, nfft/2+1] containing the mel filterbank. *)
38 |
--------------------------------------------------------------------------------
/src/io/cio/README.md:
--------------------------------------------------------------------------------
1 | ## SoundML IO C++ library
2 |
3 | This is the C/C++ code for SoundML IO library. It relies on C++23 (for `std::expected`).
4 |
5 | ### Dependencies
6 |
7 | - `sndfile`: for reading and writing audio files (and *very* soon™ streams)
8 | - `soxr`: for everything related to resampling
9 |
10 | ### General information
11 |
12 | Since we're working with `Owl` that uses the `Bigarray.c_layout` layout, we choose to maintain the interleaved layout for the audio data. This allows us to directly write the data into the `Bigarray` without having to deinterleave it. Thus, when reading an audio files that contains `n` channels and `m` samples per channel, the final shape of the `Bigarray` will be `(m, n)` instead of `(n, m)` as you may be used to using other well known libraries (like *librosa*).
13 |
14 | In this directory, you'll find the following files:
15 |
16 | - `common.hxx` : contains the common functions and types used by both the reader and the writer.
17 | - `read.hxx` : implements the needed `read`s functions. The file reading implementation is split between two classes:
18 | - `SoundML::IO::SndfileReader` : this is used when no resampling is needed.
19 | - `SoundML::IO::SoXrReader` : this is used when resampling is needed. It performs resampling while reading the file. Each read buffer is fed to the soxr resampler and the output is written directly to the `Bigarray` data pointer.
20 | - `write.hxx` implements a simple writing function that directly uses `sndfile` to write the data.
21 |
22 | Exceptions are used to handle the errors. To do so, in `common.hxx` we retreive (inside `raise_caml_exception`) the correct exception to raise in OCaml based on the `Error` provided.
23 |
--------------------------------------------------------------------------------
/src/io/cio/common.hxx:
--------------------------------------------------------------------------------
1 | /*****************************************************************************/
2 | /* */
3 | /* */
4 | /* Copyright (C) 2023-2025 */
5 | /* Gabriel Santamaria */
6 | /* */
7 | /* */
8 | /* Licensed under the Apache License, Version 2.0 (the "License"); */
9 | /* you may not use this file except in compliance with the License. */
10 | /* You may obtain a copy of the License at */
11 | /* */
12 | /* http://www.apache.org/licenses/LICENSE-2.0 */
13 | /* */
14 | /* Unless required by applicable law or agreed to in writing, software */
15 | /* distributed under the License is distributed on an "AS IS" BASIS, */
16 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17 | /* See the License for the specific language governing permissions and */
18 | /* limitations under the License. */
19 | /* */
20 | /*****************************************************************************/
21 |
22 | #define SOUNDML_BUFFER_SIZE 4096
23 | #include
24 | #include
25 | #include
26 |
27 | extern "C" /* OCaml imports */
28 | {
29 | #include
30 | #include
31 | #include
32 | #include
33 | #include
34 | #include
35 | #include
36 | }
37 |
38 | #ifndef SOUNDML_IO_COMMON_H
39 | #define SOUNDML_IO_COMMON_H
40 | namespace SoundML
41 | {
42 | namespace IO
43 | {
44 | typedef enum
45 | {
46 | SNDFILE_ERR,
47 | SOXR_ERR,
48 | SOUNDML_ERR
49 | } ErrorType;
50 |
51 | using Error = std::pair, ErrorType>;
52 |
53 | /**
54 | * @brief Little helper to get a string out of an error code
55 | * @param err The error code
56 | * @return A string with the error message
57 | */
58 | std::string get_error_string(Error error)
59 | {
60 | std::variant err_code = error.first;
61 | ErrorType typ = error.second;
62 | switch (typ)
63 | {
64 | case SNDFILE_ERR:
65 | return std::string(sf_error_number(std::get(err_code)));
66 | case SOXR_ERR:
67 | return std::get(err_code);
68 | case SOUNDML_ERR:
69 | return std::get(err_code);
70 | default:
71 | break;
72 | }
73 |
74 | return std::string("Unknown error");
75 | }
76 |
77 | /**
78 | * @brief Raise the correct OCaml exception from the given Error
79 | * @param error The error to raise
80 | * @param filename The name of the file that caused the error
81 | */
82 | void raise_caml_exception(Error error, std::string filename)
83 | {
84 | ErrorType type = error.second;
85 | std::string error_string = SoundML::IO::get_error_string(error) + " in file " + filename;
86 |
87 | #define GET_EXN_TAG(name) \
88 | (*caml_named_value(name))
89 |
90 | if (type == SNDFILE_ERR)
91 | {
92 | int err_code = std::get(error.first);
93 | bool is_format_err = err_code == SF_ERR_UNRECOGNISED_FORMAT || err_code == SF_ERR_MALFORMED_FILE || err_code == SF_ERR_UNSUPPORTED_ENCODING;
94 | if (is_format_err)
95 | caml_raise_with_string(GET_EXN_TAG("soundml.exn.invalid_format"), error_string.c_str());
96 | else /* it's SF_ERR_SYSTEM */
97 | caml_raise_with_string(GET_EXN_TAG("soundml.exn.file_not_found"), error_string.c_str());
98 | }
99 | else if (type == SOXR_ERR)
100 | caml_raise_with_string(GET_EXN_TAG("soundml.exn.resampling_error"), error_string.c_str());
101 | else if (type == SOUNDML_ERR)
102 | caml_raise_with_string(GET_EXN_TAG("soundml.exn.internal_error"), error_string.c_str());
103 | else
104 | caml_raise_with_string(GET_EXN_TAG("soundml.exn.internal_error"), "Unknown internal error.");
105 | #undef GET_EXN_TAG
106 | }
107 |
108 | /**
109 | * @brief Structure holding metadate related to an audio file
110 | * @param frames number of frames we read from the file
111 | * @param channels number of channels in the file
112 | * @param sample_rate sample-rate of the file (if a resampling has been asked, sample-rate equals the the asked sr)
113 | * @param padded_frames number of frames we padded with zeros
114 | * @param format format of the file (SF_FORMAT_* from libsndfile)
115 | */
116 | struct AudioMetadata
117 | {
118 | sf_count_t frames;
119 | int channels;
120 | int sample_rate;
121 | sf_count_t padded_frames;
122 | int format;
123 | };
124 |
125 | /**
126 | * @brief Enum that represents the resampling types
127 | * @note The SoX resampling types are defined in soxr.h
128 | */
129 | typedef enum
130 | {
131 | RS_NONE = 0, /* No resampling */
132 | RS_SOXR_QQ, /* 'Quick' cubic interpolation. */
133 | RS_SOXR_LQ, /* 'Low' 16-bit with larger rolloff. */
134 | RS_SOXR_MQ, /* 'Medium' 16-bit with medium rolloff. */
135 | RS_SOXR_HQ, /* 'High quality'. */
136 | RS_SOXR_VHQ, /* 'Very high quality'. */
137 | /* TODO: implement these resamplers */
138 | RS_SCR_LINEAR,
139 | RS_SINC_BEST_QUALITY,
140 | RS_SINC_MEDIUM_QUALITY,
141 | RS_SINC_FASTEST,
142 | RS_ZERO_ORDER_HOLD,
143 | RS_SRC_LINEAR
144 | } resampling_t;
145 |
146 | /**
147 | * @brief Get the (correct) SoX resampling type from the resampling_t enum
148 | * @param type The resampling type to convert
149 | * @return The SoX resampling type (SOXR_* from soxr.h)
150 | */
151 | unsigned long get_recipe_type(resampling_t type)
152 | {
153 |
154 | switch (type)
155 | {
156 | case RS_SOXR_VHQ:
157 | return SOXR_VHQ;
158 | case RS_SOXR_HQ:
159 | return SOXR_HQ;
160 | case RS_SOXR_MQ:
161 | return SOXR_MQ;
162 | case RS_SOXR_LQ:
163 | return SOXR_LQ;
164 | default:
165 | return SOXR_VHQ;
166 | }
167 | }
168 | } /* namespace SoundML::IO */
169 | } /* namespace SoundML */
170 |
171 | #endif /* SOUNDML_IO_COMMON_H */
172 |
--------------------------------------------------------------------------------
/src/io/cio/write.hxx:
--------------------------------------------------------------------------------
1 | /*****************************************************************************/
2 | /* */
3 | /* */
4 | /* Copyright (C) 2023-2025 */
5 | /* Gabriel Santamaria */
6 | /* */
7 | /* */
8 | /* Licensed under the Apache License, Version 2.0 (the "License"); */
9 | /* you may not use this file except in compliance with the License. */
10 | /* You may obtain a copy of the License at */
11 | /* */
12 | /* http://www.apache.org/licenses/LICENSE-2.0 */
13 | /* */
14 | /* Unless required by applicable law or agreed to in writing, software */
15 | /* distributed under the License is distributed on an "AS IS" BASIS, */
16 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17 | /* See the License for the specific language governing permissions and */
18 | /* limitations under the License. */
19 | /* */
20 | /*****************************************************************************/
21 |
22 | #ifndef SOUNDML_WRITER_H
23 | #define SOUNDML_WRITER_H
24 |
25 | #include "common.hxx"
26 |
27 | namespace SoundML
28 | {
29 | namespace IO
30 | {
31 |
32 | class AudioWriter
33 | {
34 | private:
35 | sf_count_t nframes;
36 |
37 | public:
38 | AudioWriter(sf_count_t nframes)
39 | : nframes(nframes) {}
40 |
41 | /**
42 | * @brief Writes the given audio data to the file
43 | * @param sndfile The SndfileHandle to write to
44 | * @param data Pointer to the data to write
45 | * @return An std::expected containing the error code on failure
46 | */
47 | template
48 | std::expected write(SndfileHandle &sndfile, const T *data)
49 | {
50 | caml_release_runtime_system();
51 |
52 | sf_count_t written = sndfile.writef(data, nframes);
53 |
54 | caml_acquire_runtime_system();
55 |
56 | if (written != nframes)
57 | {
58 | int err = sndfile.error() ? sndfile.error() : SF_ERR_SYSTEM;
59 | return std::unexpected(Error(err, SNDFILE_ERR));
60 | }
61 |
62 | return std::expected{};
63 | }
64 | };
65 | } /* namespace SoundML::IO */
66 | } /* namespace SoundML */
67 |
68 | /**
69 | * @brief Writes the given audio data to the file
70 | * @param filename The name of the file to write to.
71 | * @param ba_data The data to write (Bigarray).
72 | * @param metadata The metadata of the audio data (number of frames, sample rate, channels, format).
73 | * @tparam T The type of the audio data (float or double).
74 | *
75 | * @return An std::expected containing the error code on failure
76 | */
77 | template
78 | value caml_write_audio_file(value filename, value ba_data, value metadata)
79 | {
80 | using namespace SoundML::IO;
81 | std::string filename_str = String_val(filename);
82 | sf_count_t nframes_val = Long_val(Field(metadata, 0));
83 | int sample_rate_val = Long_val(Field(metadata, 1));
84 | int channels_val = Long_val(Field(metadata, 2));
85 | int format_val = Long_val(Field(metadata, 3));
86 |
87 | SndfileHandle sndfile(filename_str, SFM_WRITE, format_val, channels_val, sample_rate_val);
88 | if (int err = sndfile.error(); err)
89 | raise_caml_exception(Error(err, SNDFILE_ERR), filename_str);
90 |
91 | AudioWriter writer(nframes_val);
92 | T *data = (T *)Caml_ba_data_val(ba_data);
93 |
94 | auto result = writer.write(sndfile, data);
95 | if (result.has_value())
96 | return Val_unit;
97 | else
98 | {
99 | Error err = result.error();
100 | raise_caml_exception(err, filename_str);
101 | }
102 |
103 | return Val_unit;
104 | }
105 |
106 | #endif /* SOUNDML_WRITER_H */
107 |
--------------------------------------------------------------------------------
/src/io/config/discover.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | module C = Configurator.V1
23 |
24 | let () =
25 | C.main ~name:"sndfile-pkg-config" (fun c ->
26 | let default : C.Pkg_config.package_conf =
27 | {libs= ["-lsndfile"; "-lsoxr"]; cflags= []}
28 | in
29 | let conf =
30 | match C.Pkg_config.get c with
31 | | None ->
32 | default
33 | | Some pc -> (
34 | match C.Pkg_config.query pc ~package:"sndfile soxr" with
35 | | None ->
36 | default
37 | | Some deps ->
38 | deps )
39 | in
40 | C.Flags.write_sexp "c_flags.sexp" conf.cflags ;
41 | C.Flags.write_sexp "c_library_flags.sexp" conf.libs )
42 |
--------------------------------------------------------------------------------
/src/io/config/dune:
--------------------------------------------------------------------------------
1 | (include_subdirs no)
2 |
3 | (executable
4 | (name discover)
5 | (libraries dune.configurator))
6 |
--------------------------------------------------------------------------------
/src/io/dune:
--------------------------------------------------------------------------------
1 | (include_subdirs qualified)
2 |
3 | (library
4 | (name io)
5 | (package soundml)
6 | (foreign_stubs
7 | (language cxx)
8 | (names soundml_io)
9 | (include_dirs cio)
10 | (flags
11 | :standard
12 | (:include c_flags.sexp)
13 | -O3
14 | -std=c++23
15 | -llto))
16 | (c_library_flags
17 | (:include c_library_flags.sexp))
18 | (modules io)
19 | (libraries audio utils owl)
20 | (instrumentation
21 | (backend bisect_ppx))
22 | (wrapped true))
23 |
24 | (rule
25 | (targets c_flags.sexp c_library_flags.sexp)
26 | (action
27 | (run ./config/discover.exe)))
28 |
--------------------------------------------------------------------------------
/src/io/io.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023-2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Audio
23 | open Bigarray
24 |
25 | exception File_not_found of string
26 |
27 | exception Invalid_format of string
28 |
29 | exception Resampling_error of string
30 |
31 | exception Internal_error of string
32 |
33 | let _ =
34 | Callback.register_exception "soundml.exn.file_not_found"
35 | (File_not_found "file.wav")
36 |
37 | let _ =
38 | Callback.register_exception "soundml.exn.invalid_format"
39 | (Invalid_format "invalid format")
40 |
41 | let _ =
42 | Callback.register_exception "soundml.exn.resampling_error"
43 | (Resampling_error "error")
44 |
45 | let _ =
46 | Callback.register_exception "soundml.exn.internal_error"
47 | (Internal_error "internal error")
48 |
49 | type resampling_t = NONE | SOXR_QQ | SOXR_LQ | SOXR_MQ | SOXR_HQ | SOXR_VHQ
50 |
51 | (* nframes * channels * sample_rate * format *)
52 | type metadata = int * int * int * int
53 |
54 | external caml_read_audio_file_f32 :
55 | string
56 | -> resampling_t
57 | -> int
58 | -> (float, Bigarray.float32_elt) Audio.G.t * metadata
59 | = "caml_read_audio_file_f32"
60 |
61 | external caml_read_audio_file_f64 :
62 | string
63 | -> resampling_t
64 | -> int
65 | -> (float, Bigarray.float64_elt) Audio.G.t * metadata
66 | = "caml_read_audio_file_f64"
67 |
68 | let to_mono (x : (float, 'a) G.t) =
69 | if G.num_dims x > 1 then G.mean ~axis:1 ~keep_dims:false x else x
70 |
71 | let read : type a.
72 | ?res_typ:resampling_t
73 | -> ?sample_rate:int
74 | -> ?mono:bool
75 | -> (float, a) kind
76 | -> string
77 | -> a audio =
78 | fun ?(res_typ : resampling_t = SOXR_HQ) ?(sample_rate : int = 22050)
79 | ?(mono : bool = true) typ (filename : string) ->
80 | let read_func : type a.
81 | (float, a) kind
82 | -> string
83 | -> resampling_t
84 | -> int
85 | -> (float, a) G.t * metadata =
86 | fun typ ->
87 | match typ with
88 | | Float32 ->
89 | caml_read_audio_file_f32
90 | | Float64 ->
91 | caml_read_audio_file_f64
92 | | Float16 ->
93 | raise
94 | (Invalid_argument
95 | "Float16 elements kind aren't supported. The array kind must be \
96 | either Float32 or Float64." )
97 | in
98 | let data, meta = read_func typ filename res_typ sample_rate in
99 | let dshape = Audio.G.shape data in
100 | let nsamples = dshape.(0) in
101 | let data = if mono then to_mono data else data in
102 | let frames, channels, sample_rate, format = meta in
103 | let data =
104 | match (res_typ, frames, nsamples) with
105 | | NONE, real, pred ->
106 | if real = pred then data else Audio.G.sub_left data 0 real
107 | | _ ->
108 | data
109 | in
110 | let channels = if mono then 1 else channels in
111 | let format =
112 | match Aformat.of_int format with
113 | | Ok fmt ->
114 | fmt
115 | | Error e ->
116 | raise (Invalid_format e)
117 | in
118 | let meta =
119 | Metadata.create ~name:filename frames channels sample_rate format
120 | in
121 | let data = Audio.G.transpose data in
122 | Audio.create meta data
123 |
124 | external caml_write_audio_file_f32 :
125 | string
126 | -> (float, Bigarray.float32_elt) Audio.G.t
127 | -> int * int * int * int
128 | -> unit = "caml_write_audio_file_f32"
129 |
130 | external caml_write_audio_file_f64 :
131 | string
132 | -> (float, Bigarray.float64_elt) Audio.G.t
133 | -> int * int * int * int
134 | -> unit = "caml_write_audio_file_f64"
135 |
136 | let write : type a.
137 | ?format:Aformat.t -> string -> (float, a) Audio.G.t -> int -> unit =
138 | fun ?format (filename : string) (x : (float, a) Audio.G.t) sample_rate ->
139 | let format =
140 | if format = None then
141 | match Aformat.of_ext (Filename.extension filename) with
142 | | Ok fmt ->
143 | fmt
144 | | Error e ->
145 | raise (Invalid_format e)
146 | else Option.get format
147 | in
148 | let format = Aformat.to_int format in
149 | let data = Audio.G.transpose x in
150 | let dshape = Audio.G.shape data in
151 | let nframes = dshape.(0) in
152 | let channels = if Array.length dshape > 1 then dshape.(1) else 1 in
153 | (* we get back our interleaved format *)
154 | match Audio.G.kind data with
155 | | Float32 ->
156 | caml_write_audio_file_f32 filename data
157 | (nframes, sample_rate, channels, format)
158 | | Float64 ->
159 | caml_write_audio_file_f64 filename data
160 | (nframes, sample_rate, channels, format)
161 | | _ ->
162 | raise
163 | (Invalid_argument
164 | "Float16 elements kind aren't supported. The array kind must be \
165 | either Float32 or Float64." )
166 |
--------------------------------------------------------------------------------
/src/io/io.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023-2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | (**
23 | The {!Io} (in/out) module is the entry point for reading and writing audio
24 | data from and to the filesystem. It supports resampling via the {{:https://github.com/chirlu/soxr}SoXr} library. *)
25 |
26 | open Audio
27 | open Bigarray
28 |
29 | (** Thrown when a requested file cannot be found on the system. *)
30 | exception File_not_found of string
31 |
32 | (** Thrown when the file we're trying to read is encoded in an invalid format, or when the format we're trying to write isn't supported. *)
33 | exception Invalid_format of string
34 |
35 | (** Thrown when an error occurred while resampling. *)
36 | exception Resampling_error of string
37 |
38 | (** Thrown when an internal error occurred. This is should not happen, so please report it. *)
39 | exception Internal_error of string
40 |
41 | (** The resampling method to use. The default is [SOXR_HQ]. *)
42 | type resampling_t =
43 | | NONE (** Indicates that no resampling is requested *)
44 | | SOXR_QQ (** 'Quick' cubic interpolation. *)
45 | | SOXR_LQ (** 'Low' 16-bit with larger rolloff. *)
46 | | SOXR_MQ (** 'Medium' 16-bit with medium rolloff. *)
47 | | SOXR_HQ (** 'High quality'. *)
48 | | SOXR_VHQ (** 'Very high quality'. *)
49 |
50 | val read :
51 | 'a.
52 | ?res_typ:resampling_t
53 | -> ?sample_rate:int
54 | -> ?mono:bool
55 | -> (float, 'a) kind
56 | -> string
57 | -> 'a audio
58 | (**
59 | [read ?res_typ ?sample_rate ?fix kind filename] reads an audio file and returns an [audio].
60 |
61 | @return an [audio] type that contains the audio data read from the file. The type of the audio's data is determined by the [kind] parameter.
62 |
63 | {2 Parameters}
64 | @param ?res_typ is the resampling method to use. The default is [SOXR_HQ]. If [NONE] is used, [?sample_rate] is ignored and no resampling will be done.
65 | @param ?sample_rate is the target sample rate to use when reading the file. Default is 22050 Hz.
66 | @param ?mono is a boolean that indicates if we want to convert to a mono audio. Default is [true].
67 | @param kind is the format of audio data to read. It can be either [Bigarray.Float32] or [Bigarray.Float64].
68 | @param filename is the path to the file to read audio from.
69 |
70 | @raise File_not_found If the file does not exist.
71 | @raise Invalid_format If the file is not a valid audio file.
72 | @raise Resampling_error If the resampling fails.
73 | @raise Internal_error If an internal error occurs.
74 |
75 | {2 Usage}
76 | Reading audio is straightfoward. Simply specify the path to the file you want to read.
77 |
78 | {[
79 | open Soundml
80 | (* This will read the file.wav audio into a Float32 bigarray, resampled using SOXR_HQ at 22050Hz. *)
81 | let audio = Io.read Bigarray.Float32 "path/to/file.wav"
82 | ]}
83 |
84 | {2 Supported formats}
85 |
86 | SoundML relies on {{:https://libsndfile.github.io/libsndfile/}libsndfile} to read audio files. Full detail on the supported formats are available
87 | on the official sndfile's website: {{:https://libsndfile.github.io/libsndfile/formats.html}Supported formats} and in the {!Audio.Aformat} module. *)
88 |
89 | val write : 'a. ?format:Aformat.t -> string -> (float, 'a) G.t -> int -> unit
90 | (**
91 | [write ?format filename data sample_reat] writes an audio file to the filesystem.
92 |
93 | {2 Parameters}
94 | @param ?format is the format to use when writing the file. If not specified, the format is determined by the file extension by {!Aformat.of_ext}.
95 | @param filename is the path to the file to write audio to.
96 | @param data is the audio data to write. It can be either a [Bigarray.Float32] or [Bigarray.Float64].
97 | @param sample_rate is the sample rate of the audio data.
98 |
99 |
100 | @raise Invalid_format If the file is not a valid audio file.
101 | @raise Internal_error If an internal error occurs.
102 |
103 |
104 | {2 Usage}
105 | Writing audio is as straightfoward as reading it. Simply specify the path to the file you want to write.
106 |
107 | {[
108 | open Soundml
109 | open Audio
110 | let audio = Io.read Bigarray.Float32 "path/to/file.mp3" in
111 | Io.write "path/to/file.wav" (data audio) 22050 (* we'll automatically detect that you want to write to the WAV format *)
112 | ]}
113 |
114 | {2 Supported formats}
115 |
116 | SoundML relies on {{:https://libsndfile.github.io/libsndfile/}libsndfile} to read audio files. Full detail on the supported formats are available
117 | on the official sndfile's website: {{:https://libsndfile.github.io/libsndfile/formats.html}Supported formats} and in the {!Audio.Aformat} module. *)
118 |
--------------------------------------------------------------------------------
/src/io/soundml_io.cxx:
--------------------------------------------------------------------------------
1 | /*****************************************************************************/
2 | /* */
3 | /* */
4 | /* Copyright (C) 2023-2025 */
5 | /* Gabriel Santamaria */
6 | /* */
7 | /* */
8 | /* Licensed under the Apache License, Version 2.0 (the "License"); */
9 | /* you may not use this file except in compliance with the License. */
10 | /* You may obtain a copy of the License at */
11 | /* */
12 | /* http://www.apache.org/licenses/LICENSE-2.0 */
13 | /* */
14 | /* Unless required by applicable law or agreed to in writing, software */
15 | /* distributed under the License is distributed on an "AS IS" BASIS, */
16 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17 | /* See the License for the specific language governing permissions and */
18 | /* limitations under the License. */
19 | /* */
20 | /*****************************************************************************/
21 |
22 | #include "read.hxx"
23 | #include "write.hxx"
24 |
25 | extern "C"
26 | {
27 | #include
28 | #include
29 |
30 | CAMLprim value caml_read_audio_file_f32(value filename, value res_typ, value trgt_sr)
31 | {
32 | CAMLparam3(filename, res_typ, trgt_sr);
33 | CAMLreturn(caml_read_audio_file(filename, res_typ, trgt_sr));
34 | }
35 |
36 | CAMLprim value caml_read_audio_file_f64(value filename, value res_typ, value trgt_sr)
37 | {
38 | CAMLparam3(filename, res_typ, trgt_sr);
39 | CAMLreturn(caml_read_audio_file(filename, res_typ, trgt_sr));
40 | }
41 |
42 | CAMLprim value caml_write_audio_file_f32(value filename, value ba_data, value metadata)
43 | {
44 | CAMLparam3(filename, ba_data, metadata);
45 | CAMLreturn(caml_write_audio_file(filename, ba_data, metadata));
46 | }
47 |
48 | CAMLprim value caml_write_audio_file_f64(value filename, value ba_data, value metadata)
49 | {
50 | CAMLparam3(filename, ba_data, metadata);
51 | CAMLreturn(caml_write_audio_file(filename, ba_data, metadata));
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/soundml.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | module Audio = Audio
23 | module Io = Io
24 | module Types = Types
25 | module Transform = Transform
26 | module Window = Window
27 | module Feature = Feature
28 | module Filterbank = Filterbank
29 | module Effects = Effects
30 | module Utils = Utils
31 |
--------------------------------------------------------------------------------
/src/transform.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Bigarray
23 | open Types
24 |
25 | module Config = struct
26 | type t =
27 | { n_fft: int
28 | ; hop_size: int
29 | ; win_length: int
30 | ; window: Window.window
31 | ; center: bool }
32 |
33 | let default =
34 | { n_fft= 2048
35 | ; hop_size= 512
36 | ; win_length= 2048
37 | ; window= `Hanning
38 | ; center= true }
39 | end
40 |
41 | module G = Owl.Dense.Ndarray.Generic
42 |
43 | let to_complex (x : float) : Complex.t = Complex.{re= x; im= 0.}
44 |
45 | let stft : type a b.
46 | ?config:Config.t -> (a, b) precision -> (float, a) G.t -> (Complex.t, b) G.t
47 | =
48 | fun ?(config : Config.t = Config.default) p (x : (float, a) G.t) ->
49 | let kd : (Complex.t, b) kind =
50 | match p with B32 -> Complex32 | B64 -> Complex64
51 | in
52 | let window = (Window.get config.window p ~fftbins:true) config.win_length in
53 | let out_shape =
54 | [| (config.n_fft / 2) + 1
55 | ; ((G.numel x - config.win_length) / config.hop_size) + 1 |]
56 | in
57 | let spectrum = Audio.G.create kd out_shape Complex.zero in
58 | let ym = Audio.G.zeros kd [|config.n_fft; 1|] in
59 | for m = 0 to out_shape.(1) - 1 do
60 | Audio.G.fill ym Complex.zero ;
61 | for p = 0 to config.win_length - 1 do
62 | Audio.G.(
63 | ym.%{p; 0} <-
64 | to_complex
65 | @@ Float.mul
66 | (get x [|Int.(add p (mul m config.hop_size))|])
67 | window.%{p} )
68 | done ;
69 | let ym_fft = Owl.Fft.Generic.fft ~axis:0 ym in
70 | let spectrum_slice =
71 | Audio.G.get_slice [[0; out_shape.(0) - 1]; [0]] ym_fft
72 | in
73 | Audio.G.set_slice_ ~out:spectrum [[]; [m]] spectrum spectrum_slice
74 | done ;
75 | spectrum
76 |
--------------------------------------------------------------------------------
/src/transform.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Types
23 |
24 | module Config : sig
25 | type t =
26 | { n_fft: int
27 | ; hop_size: int
28 | ; win_length: int
29 | ; window: Window.window
30 | ; center: bool }
31 |
32 | val default : t
33 | end
34 |
35 | val stft :
36 | 'a 'b.
37 | ?config:Config.t
38 | -> ('a, 'b) precision
39 | -> (float, 'a) Audio.G.t
40 | -> (Complex.t, 'b) Audio.G.t
41 |
--------------------------------------------------------------------------------
/src/types.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type (_, _) precision =
23 | | B32 : (Bigarray.float32_elt, Bigarray.complex32_elt) precision
24 | | B64 : (Bigarray.float64_elt, Bigarray.complex64_elt) precision
25 |
--------------------------------------------------------------------------------
/src/types.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type (_, _) precision =
23 | | B32 : (Bigarray.float32_elt, Bigarray.complex32_elt) precision
24 | | B64 : (Bigarray.float64_elt, Bigarray.complex64_elt) precision
25 |
--------------------------------------------------------------------------------
/src/utils.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | (**
23 | Utility conversion module. *)
24 | module Convert : sig
25 | val mel_to_hz :
26 | ?htk:bool
27 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t
28 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t
29 | (** Converts mel-scale values to Hz. *)
30 |
31 | val hz_to_mel :
32 | ?htk:bool
33 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t
34 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t
35 | (** Reverse function of {!mel_to_hz}. *)
36 |
37 | type reference =
38 | | RefFloat of float
39 | | RefFunction of ((float, Bigarray.float32_elt) Audio.G.t -> float)
40 |
41 | val power_to_db :
42 | ?amin:float
43 | -> ?top_db:float option
44 | -> reference
45 | -> (float, Bigarray.float32_elt) Owl_dense_ndarray.Generic.t
46 | -> (float, Bigarray.float32_elt) Owl_dense_ndarray_generic.t
47 |
48 | val db_to_power :
49 | ?amin:float
50 | -> reference
51 | -> (float, Bigarray.float32_elt) Owl.Dense.Ndarray.Generic.t
52 | -> (float, Bigarray.float32_elt) Owl.Dense.Ndarray.Generic.t
53 | end
54 |
55 | val pad_center :
56 | ('a, 'b) Owl.Dense.Ndarray.Generic.t
57 | -> int
58 | -> 'a
59 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
60 | (**
61 | Pads a ndarray such that *)
62 |
63 | val frame :
64 | ('a, 'b) Owl_dense_ndarray.Generic.t
65 | -> int
66 | -> int
67 | -> int
68 | -> ('a, 'b) Owl_dense_ndarray.Generic.t
69 |
70 | val fftfreq :
71 | int -> float -> (float, Bigarray.float32_elt) Owl.Dense.Ndarray.Generic.t
72 | (**
73 | Implementation of the Numpy's fftfreq function.
74 | See {{:https://numpy.org/doc/stable/reference/generated/numpy.fft.fftfreq.html}numpy.fft.fftfreq} for more information. *)
75 |
76 | val rfftfreq :
77 | (float, 'b) Bigarray.kind
78 | -> int
79 | -> float
80 | -> (float, 'b) Owl.Dense.Ndarray.Generic.t
81 | (**
82 | Implementation of the Numpy's rfftfreq function.
83 | See {{:https://numpy.org/doc/stable/reference/generated/numpy.fft.rfftfreq.html}numpy.fft.rfftfreq} for more information. *)
84 |
85 | val melfreq :
86 | ?nmels:int
87 | -> ?fmin:float
88 | -> ?fmax:float
89 | -> ?htk:bool
90 | -> (float, 'b) Bigarray.kind
91 | -> (float, 'b) Owl.Dense.Ndarray.Generic.t
92 | (**
93 | Implementation of librosa's mel_frequencies. Compute an [Owl.Dense.Ndarray] of acoustic frequencies tuned to the mel scale.
94 | See: {{:https://librosa.org/doc/main/generated/librosa.mel_frequencies.html}librosa.mel_frequencies} for more information. *)
95 |
96 | val roll :
97 | ('a, 'b) Owl.Dense.Ndarray.Generic.t
98 | -> int
99 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
100 | (**
101 | Implementation of the Numpy's roll function on the 0th axis of the given ndarray.
102 | This function is used to shift elements of an array inside the library and is exposed
103 | as it can be sometimes usefull.
104 |
105 | This function returns a copy of the given ndarray.
106 |
107 | See {{:https://numpy.org/doc/stable/reference/generated/numpy.roll.html}numpy.roll} for more information. *)
108 |
109 | val cov : ?b:('a, 'b) Audio.G.t -> a:('a, 'b) Audio.G.t -> ('a, 'b) Audio.G.t
110 | (**
111 | (re)Implementation of the matrix covariance function from Owl.
112 |
113 | Note: this is temporary and done only because Owl doesn't export any
114 | cov function for the [Ndarray] module on which [Audio.G] is based. This function is
115 | likely to be deleted when Owl library will export such a cov function for n-dimensional arrays. *)
116 |
117 | val unwrap :
118 | ?discont:float option
119 | -> ?axis:int
120 | -> ?period:float
121 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t
122 | -> (float, 'a) Owl.Dense.Ndarray.Generic.t
123 | (**
124 | Implementation of the Numpy's unwrap function.
125 | See {{:https://numpy.org/doc/stable/reference/generated/numpy.unwrap.html}numpy.unwrap} for more information. *)
126 |
127 | val outer :
128 | ( ('a, 'b) Owl.Dense.Ndarray.Generic.t
129 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
130 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t )
131 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
132 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
133 | -> ('a, 'b) Owl.Dense.Ndarray.Generic.t
134 | (**
135 | Generalized outer product of any given operator that supports broadcasting (basically all the common Owl's Ndarray operators.) *)
136 |
--------------------------------------------------------------------------------
/src/window.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Types
23 |
24 | type window = [`Hanning | `Hamming | `Blackman | `Boxcar]
25 |
26 | let kind_of_precision : type a b. (a, b) precision -> (float, a) Bigarray.kind =
27 | fun prec -> match prec with B32 -> Bigarray.Float32 | B64 -> Bigarray.Float64
28 |
29 | let cosine_sum ?(fftbins = false) (prec : ('a, 'b) precision) (a : float array)
30 | m =
31 | let kd = kind_of_precision prec in
32 | if m < 0 then invalid_arg "Window length M must be a non-negative integer"
33 | else if m = 0 then Audio.G.empty kd [|0|]
34 | else if m = 1 then Audio.G.ones kd [|1|]
35 | else
36 | let sym = not fftbins in
37 | let m_extended, needs_trunc =
38 | if not sym then (m + 1, true) else (m, false)
39 | in
40 | let fac = Audio.G.linspace kd (-.Owl_const.pi) Owl_const.pi m_extended in
41 | let w = Audio.G.zeros kd [|m_extended|] in
42 | Array.iteri
43 | (fun k coeff_val ->
44 | if coeff_val <> 0.0 then
45 | let term =
46 | if k = 0 then Audio.G.create kd [|m_extended|] coeff_val
47 | else
48 | let k_float = float_of_int k in
49 | let cos_args = Audio.G.mul_scalar fac k_float in
50 | let cos_terms = Audio.G.cos cos_args in
51 | Audio.G.mul_scalar cos_terms coeff_val
52 | in
53 | Audio.G.add_ ~out:w w term )
54 | a ;
55 | if needs_trunc then Audio.G.get_slice [[0; m - 1]] w else w
56 |
57 | let hanning ?(fftbins = false) (prec : ('a, 'b) precision) m =
58 | cosine_sum ~fftbins prec [|0.5; 1. -. 0.5|] m
59 |
60 | let hamming ?(fftbins = false) (prec : ('a, 'b) precision) m =
61 | cosine_sum ~fftbins prec [|0.54; 1. -. 0.54|] m
62 |
63 | let blackman ?(fftbins = false) (prec : ('a, 'b) precision) m =
64 | cosine_sum ~fftbins prec [|0.42; 0.5; 0.08|] m
65 |
66 | let boxcar ?(fftbins = false) (prec : ('a, 'b) precision) (size : int) :
67 | (float, 'a) Audio.G.t =
68 | let kd = kind_of_precision prec in
69 | if size < 0 then failwith "Window length M must be non-negative"
70 | else if size = 0 then Audio.G.empty kd [|0|]
71 | else Audio.G.ones kd [|size|]
72 | [@@warning "-27"]
73 |
74 | let get (typ : window) (prec : ('a, 'b) precision) :
75 | ?fftbins:bool -> int -> (float, 'a) Audio.G.t =
76 | fun ?fftbins size ->
77 | match typ with
78 | | `Hanning ->
79 | hanning ?fftbins prec size
80 | | `Hamming ->
81 | hamming ?fftbins prec size
82 | | `Blackman ->
83 | blackman ?fftbins prec size
84 | | `Boxcar ->
85 | boxcar ?fftbins prec size
86 |
--------------------------------------------------------------------------------
/src/window.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2023 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Types
23 |
24 | (** {1 Window Functions}
25 |
26 | This module provides a few commonly used window functions. *)
27 |
28 | (** The type of window functions. *)
29 | type window = [`Hanning | `Hamming | `Blackman | `Boxcar]
30 |
31 | val get :
32 | window
33 | -> ('a, 'b) precision
34 | -> ?fftbins:bool
35 | -> int
36 | -> (float, 'a) Owl_dense_ndarray.Generic.t
37 | (**
38 | [get window precision n] generates a window of size [n] using the given window function type.
39 |
40 | {2 Parameters}
41 |
42 | @param window The type of window to generate.
43 | @param precision The precision of the Bigarray elements.
44 | @param n The size of the window to generate. The size of the window must be greater than 0.
45 |
46 | @raise Invalid_argument if [n] is less than or equal to 0. *)
47 |
48 | val cosine_sum :
49 | ?fftbins:bool
50 | -> ('a, 'b) precision
51 | -> float array
52 | -> int
53 | -> (float, 'a) Owl_dense_ndarray.Generic.t
54 | (**
55 | [cosine_sum precision coeffs n] generates a cosine-sum window of size [n] using the given coefficients.
56 |
57 | {2 Parameters}
58 | @param precision The precision of the Bigarray elements.
59 | @param coeffs The coefficients of the cosine-sum window. The length of the coefficients array must be greater than 0.
60 | @param n The size of the window to generate. The size of the window must be greater than 0.
61 |
62 | {2 Raises}
63 | @raise Invalid_argument if [n] is less than or equal to 0.
64 | @raise Invalid_argument if the length of [coeffs] is less than 1.
65 |
66 | {2 References}
67 | @see https://en.wikipedia.org/wiki/Window_function#Cosine-sum_windows
68 | @see https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.windows.general_cosine.html *)
69 |
70 | val hanning :
71 | ?fftbins:bool
72 | -> ('a, 'b) precision
73 | -> int
74 | -> (float, 'a) Owl_dense_ndarray.Generic.t
75 | (**
76 | [hanning precision n] generates a Hanning window of size [n].
77 |
78 | {2 Parameters}
79 |
80 | @param precision The precision of the Bigarray elements.
81 | @param n The size of the window to generate. The size of the window must be greater than 0.
82 |
83 | @raise Invalid_argument if [n] is less than or equal to 0. *)
84 |
85 | val hamming :
86 | ?fftbins:bool
87 | -> ('a, 'b) precision
88 | -> int
89 | -> (float, 'a) Owl_dense_ndarray.Generic.t
90 | (**
91 | [hamming precision n] generates a Hamming window of size [n].
92 |
93 | {2 Parameters}
94 |
95 | @param precision The precision of the Bigarray elements.
96 | @param n The size of the window to generate. The size of the window must be greater than 0.
97 |
98 | @raise Invalid_argument if [n] is less than or equal to 0. *)
99 |
100 | val blackman :
101 | ?fftbins:bool
102 | -> ('a, 'b) precision
103 | -> int
104 | -> (float, 'a) Owl_dense_ndarray.Generic.t
105 | (**
106 | [blackman precision n] generates a Blackman window of size [n].
107 |
108 | {2 Parameters}
109 |
110 | @param precision The precision of the Bigarray elements.
111 | @param n The size of the window to generate. The size of the window must be greater than 0.
112 |
113 | @raise Invalid_argument if [n] is less than or equal to 0. *)
114 |
115 | val boxcar :
116 | ?fftbins:bool
117 | -> ('a, 'b) precision
118 | -> int
119 | -> (float, 'a) Owl_dense_ndarray.Generic.t
120 | (**
121 | [boxcar precision n] generates a Rectangular window of size [n].
122 |
123 | {2 Parameters}
124 |
125 | @param precision The precision of the Bigarray elements.
126 | @param n The size of the window to generate. The size of the window must be greater than 0.
127 |
128 | @raise Invalid_argument if [n] is less than or equal to 0. *)
129 |
--------------------------------------------------------------------------------
/test/README.md:
--------------------------------------------------------------------------------
1 | ## About SoundML Testing
2 |
3 | We choose [librosa](https://librosa.org/) as the main reference implementation for the algorithms inside SoundML. Thus, the tests are based on results obtained from librosa runs on a few audio source files either found on [freesound.org](https://freesound.org/), [freemusicarchive.org](https://freemusicarchive.org) or generated by us using FFmpeg.
4 |
5 | The Python script `generate_vectors.py` is used to generate a set of vectors directly from running librosa algorithms onto the audio files present in the `test/audio` directory. Once generated, these vectors are stored in the `test/vectors` directory and are used by `Alcotest` to compare the results of SoundML with the ones from librosa.
--------------------------------------------------------------------------------
/test/dune:
--------------------------------------------------------------------------------
1 | (data_only_dirs vectors audio)
2 |
3 | (tests
4 | (names
5 | test_aformat
6 | test_write
7 | test_utils
8 | test_time
9 | test_window
10 | test_filter
11 | test_filterbank)
12 | (libraries alcotest soundml)
13 | (action
14 | (run %{test})))
15 |
16 | (tests
17 | (names test_stft test_timeseries)
18 | (libraries alcotest yojson soundml)
19 | (package soundml)
20 | (deps
21 | generate_vectors.py
22 | generate_audio.sh
23 | (source_tree audio))
24 | (action
25 | (progn
26 | (system "mkdir -p audio")
27 | (system "./generate_audio.sh")
28 | (system "mkdir -p vectors")
29 | (system "python3 %{dep:generate_vectors.py}")
30 | (run %{test}))))
31 |
--------------------------------------------------------------------------------
/test/generate_audio.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | OUTPUT_DIR="$(pwd)/audio"
4 |
5 | declare -A AUDIO_FORMATS=(
6 | ["wav"]="pcm_s16le"
7 | ["aif"]="pcm_s16be"
8 | ["flac"]="flac"
9 | ["mp3"]="libmp3lame"
10 | )
11 |
12 | SAMPLE_RATES=(8000 44100)
13 | DURATION=1
14 |
15 | declare -A SOURCES=(
16 | ["clipping"]="sine=f=440:sample_rate={SR}:duration={DUR},volume=volume=5"
17 | ["lsine"]="sine=f=10:sample_rate={SR}:duration={DUR}"
18 | ["stereo"]="sine=f=500:sample_rate={SR}:duration={DUR} [l]; aevalsrc=exprs='random(0)*0.5':duration={DUR}:sample_rate={SR} [r]; [l][r] amerge=inputs=2"
19 | )
20 |
21 | if ! command -v ffmpeg &> /dev/null; then
22 | echo "Error: ffmpeg is not installed." >&2
23 | exit 1
24 | fi
25 |
26 | total_files=$(( ${#SOURCES[@]} * ${#AUDIO_FORMATS[@]} * ${#SAMPLE_RATES[@]} ))
27 | successful_files=0
28 | current_file_number=0
29 |
30 | for audio_source in "${!SOURCES[@]}"; do
31 | sft="${SOURCES[$audio_source]}"
32 |
33 | for format in "${!AUDIO_FORMATS[@]}"; do
34 | codec="${AUDIO_FORMATS[$format]}"
35 |
36 | for sr in "${SAMPLE_RATES[@]}"; do
37 | ((current_file_number++))
38 |
39 | source_key=$(echo "$audio_source" | tr -cd '[:alnum:]_-')
40 | filename="${format}_${source_key}_${sr}hz_${DURATION}s.${format}"
41 | output_path="$OUTPUT_DIR/$filename"
42 |
43 | source_filter="${sft//\{SR\}/$sr}"
44 | source_filter="${source_filter//\{DUR\}/$DURATION}"
45 |
46 | codec_opts=()
47 | case "$codec" in
48 | "libmp3lame") codec_opts+=("-b:a" "32k" "-compression_level" "9") ;;
49 | "libvorbis") codec_opts+=("-q:a" "0") ;;
50 | "aac") codec_opts+=("-b:a" "48k") ;;
51 | esac
52 |
53 | ffmpeg -y \
54 | -f lavfi -i "$source_filter" \
55 | -t "$DURATION" \
56 | -ar "$sr" \
57 | -c:a "$codec" "${codec_opts[@]}" \
58 | -vn \
59 | "$output_path" \
60 | -loglevel error > /dev/null 2>&1
61 |
62 | if [[ $? -eq 0 ]]; then
63 | ((successful_files++))
64 | fi
65 | done
66 | done
67 | done
68 |
69 | exit 0
70 |
--------------------------------------------------------------------------------
/test/generate_vectors.py:
--------------------------------------------------------------------------------
1 | """
2 | This file is part of SoundML.
3 |
4 | Copyright (C) 2025 Gabriel Santamaria
5 |
6 | This script is used to generate test vectors for SoundML.
7 | The reference implementation choosen is librosa.
8 | It's supposed to be ran only once. Then the generated vectors
9 | haved to be used for the actual testing.
10 | """
11 |
12 | from typing import Any, Tuple, Dict, List
13 | import os
14 | import json
15 | import numpy as np
16 | import librosa
17 |
18 | AUDIO_DIRECTORY = "audio/"
19 | VECTOR_DIRECTORY = "vectors/"
20 |
21 |
22 | class Parameters:
23 | """
24 | Class representing parameters used to generate a vector
25 | """
26 |
27 | parameters: Dict[str, Any]
28 |
29 | def __init__(self, parameters: Dict[str, Any]):
30 | self.parameters = parameters
31 |
32 | def write(self, filename: str):
33 | """
34 | Write the parameters to a JSON file
35 | """
36 | with open(filename, "w", encoding="utf-8") as f:
37 | json.dump(self.parameters, f, indent=4)
38 |
39 |
40 | class VectorGenerator:
41 | """
42 | Abstract class representing an audio vector generators
43 | """
44 |
45 | BASE_IDENTIFIER: str
46 |
47 | audio_paths: list[str]
48 | output_dir: str
49 |
50 | counter: int = 0
51 |
52 | def __init__(self, audio_paths: list[str], output_dir: str):
53 | self.audio_paths = audio_paths
54 | self.output_dir = os.path.join(output_dir, f"{self.BASE_IDENTIFIER}/")
55 |
56 | def normalize_name(self, name: str) -> str:
57 | """
58 | Normalize the name of the audio file
59 | """
60 | return name.replace(" ", "_").replace("-", "_").lower()
61 |
62 | def vector(self, audio_path: str) -> Tuple[np.ndarray, Parameters]:
63 | """
64 | Generate the vector for the given audio file
65 | """
66 | raise NotImplementedError("Subclasses should implement this method")
67 |
68 | def generate(self):
69 | """
70 | Generate the audio vectors
71 | """
72 | if not os.path.exists(self.output_dir):
73 | os.makedirs(self.output_dir, exist_ok=True)
74 |
75 | for audio_path in self.audio_paths:
76 | identifier = os.path.splitext(os.path.basename(audio_path))[0]
77 | try:
78 | data: Tuple[np.ndarray, Parameters] = self.vector(audio_path)
79 |
80 | self.counter += 1
81 |
82 | y: np.ndarray = data[0]
83 | params = data[1]
84 | filename: str = self.normalize_name(
85 | f"{self.BASE_IDENTIFIER}_{identifier}"
86 | )
87 | output_filename: str = os.path.join(self.output_dir, f"{filename}.npy")
88 | params_filename: str = os.path.join(self.output_dir, f"{filename}.json")
89 | np.save(output_filename, y)
90 | params.write(params_filename)
91 | except Exception as e:
92 | print(f"ERROR generating for {identifier}: {e}")
93 |
94 |
95 | class TimeSeriesVectorGenerator(VectorGenerator):
96 | """
97 | Reads an audio file and creates a time-series vector representation of it
98 | """
99 |
100 | BASE_IDENTIFIER: str = "timeseries"
101 |
102 | resamplers: List[str] = ["soxr_vhq", "soxr_hq", "soxr_mq", "soxr_lq"]
103 | srs = [None, 8000, 16000, 22050]
104 |
105 | def vector(self, audio_path: str) -> Tuple[np.ndarray, Parameters]:
106 | """
107 | Generate the time-series vector for the given file
108 | """
109 | params = {}
110 | mono = False if self.counter % 2 == 0 or self.counter % 3 == 0 else False
111 | sr = self.srs[self.counter % len(self.srs)]
112 | res_type = self.resamplers[self.counter % len(self.resamplers)]
113 | params["mono"] = mono
114 | if sr is not None:
115 | params["res_type"] = res_type
116 | y, sr = librosa.load(
117 | audio_path, mono=mono, sr=sr, res_type=res_type, dtype=np.float64
118 | )
119 | params["sr"] = sr
120 | y = np.ascontiguousarray(y, dtype=np.float64)
121 |
122 | return (y, Parameters(params))
123 |
124 |
125 | class STFTVectorGenerator(VectorGenerator):
126 | """
127 | Reads an audio file and creates a STFT vector representation of it
128 | """
129 |
130 | BASE_IDENTIFIER: str = "stft"
131 |
132 | nffts = [512]#, #1024, 2048, 4096]
133 | window_lengths = [512]#64, 128, 256, 512]
134 | hop_sizes = [128]#, 256, 512]
135 | centers = [False, False, False]
136 | window_types = ["hann"]#, "hamming", "blackman", "boxcar"]
137 |
138 | def vector(self, audio_path: str) -> Tuple[np.ndarray, Parameters]:
139 | """
140 | Generate the STFT vector for the given file
141 | """
142 | params = {}
143 | n_fft = self.nffts[self.counter % len(self.nffts)]
144 | hop_size = self.hop_sizes[self.counter % len(self.hop_sizes)]
145 | window_type = self.window_types[self.counter % len(self.window_types)]
146 | window_length = self.window_lengths[self.counter % len(self.window_lengths)]
147 | center = self.centers[self.counter % len(self.centers)]
148 | params["window_length"] = window_length
149 | params["n_fft"] = n_fft
150 | params["hop_size"] = hop_size
151 | params["window_type"] = window_type
152 | params["center"] = center
153 | params["res_type"] = "soxr_hq"
154 |
155 | y, sr = librosa.load(audio_path)
156 | y = y.astype(np.float64)
157 | stft = librosa.stft(
158 | y,
159 | n_fft=n_fft,
160 | hop_length=hop_size,
161 | win_length=window_length,
162 | window=window_type,
163 | dtype=np.complex64,
164 | center=center,
165 | )
166 | stft = np.ascontiguousarray(stft, dtype=np.complex64)
167 | params = Parameters(params)
168 |
169 | return (stft, params)
170 |
171 |
172 | generators: list[VectorGenerator] = [TimeSeriesVectorGenerator, STFTVectorGenerator]
173 |
174 | if __name__ == "__main__":
175 | audio_files = [
176 | os.path.join(AUDIO_DIRECTORY, f) for f in os.listdir(AUDIO_DIRECTORY)
177 | ]
178 | if not os.path.exists(VECTOR_DIRECTORY):
179 | os.makedirs(VECTOR_DIRECTORY)
180 |
181 | for generator in generators:
182 | generator: VectorGenerator = generator(audio_files, VECTOR_DIRECTORY)
183 | generator.generate()
184 |
--------------------------------------------------------------------------------
/test/test_stft.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Soundml
23 | open Vutils
24 |
25 | let string_to_window = function
26 | | "hann" ->
27 | `Hanning
28 | | "hamming" ->
29 | `Hamming
30 | | "blackman" ->
31 | `Blackman
32 | | "boxcar" ->
33 | `Boxcar
34 | | _ ->
35 | failwith "Unknown window type"
36 |
37 | module StftTestable = struct
38 | type t = Complex.t
39 |
40 | type p = Bigarray.complex32_elt
41 |
42 | type pf = Bigarray.float32_elt
43 |
44 | type pc = Bigarray.complex32_elt
45 |
46 | type ('a, 'b) precision = ('a, 'b) Types.precision
47 |
48 | let precision = Types.B32
49 |
50 | let kd = Bigarray.Complex32
51 |
52 | let typ = "stft"
53 |
54 | let generate (precision : (pf, pc) precision)
55 | (case : string * string * Parameters.t)
56 | (audio : (float, 'c) Owl_dense_ndarray.Generic.t) =
57 | let _, _, params = case in
58 | let n_fft =
59 | Option.value ~default:2048 @@ Parameters.get_int "n_fft" params
60 | in
61 | let hop_size =
62 | Option.value ~default:512 @@ Parameters.get_int "hop_size" params
63 | in
64 | let win_length =
65 | Option.value ~default:2048 @@ Parameters.get_int "window_length" params
66 | in
67 | let window =
68 | string_to_window
69 | (Option.value ~default:"hann" @@ Parameters.get_string "window" params)
70 | in
71 | let center =
72 | Option.value ~default:false @@ Parameters.get_bool "center" params
73 | in
74 | let config =
75 | Transform.Config.{n_fft; hop_size; win_length; window; center}
76 | in
77 | let stft = Transform.stft ~config precision audio in
78 | let _kd = kd in
79 | stft
80 | end
81 |
82 | module Tests = Tests_cases (StftTestable)
83 |
84 | let () =
85 | let name = "Vectors: STFT Comparison" in
86 | let data = Testdata.get StftTestable.typ Vutils.data in
87 | let tests = Tests.create_tests data in
88 | Tests.run name tests
89 |
--------------------------------------------------------------------------------
/test/test_time.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Soundml.Effects.Time
23 |
24 | let config_testable : Config.t Alcotest.testable =
25 | Alcotest.testable
26 | (Fmt.of_to_string (fun fmt ->
27 | Format.sprintf "Effects.Time.Config int:%d" (Config.to_int fmt) ) )
28 | (fun a b -> Config.to_int a = Config.to_int b)
29 |
30 | let test_default_int () =
31 | let expected = 0x00000000 in
32 | let actual = Config.to_int Config.default in
33 | Alcotest.check Alcotest.int "Default config integer value" expected actual
34 |
35 | let test_percussive_int () =
36 | let expected = 0x00102000 in
37 | let actual = Config.to_int Config.percussive in
38 | Alcotest.check Alcotest.int "Percussive config integer" expected actual ;
39 | let expected = {Config.default with window= Short; phase= Independent} in
40 | Alcotest.check config_testable "Percussive config record" expected
41 | Config.percussive
42 |
43 | let test_single_options () =
44 | Alcotest.check Alcotest.int "EngineFiner" 0x20000000
45 | (Config.to_int (Config.with_engine Finer Config.default)) ;
46 | Alcotest.check Alcotest.int "TransientsMixed" 0x00000100
47 | (Config.to_int (Config.with_transients Mixed Config.default)) ;
48 | Alcotest.check Alcotest.int "TransientsSmooth" 0x00000200
49 | (Config.to_int (Config.with_transients Smooth Config.default)) ;
50 | Alcotest.check Alcotest.int "DetectorPercussive" 0x00000400
51 | (Config.to_int (Config.with_detector Percussive Config.default)) ;
52 | Alcotest.check Alcotest.int "DetectorSoft" 0x00000800
53 | (Config.to_int (Config.with_detector Soft Config.default)) ;
54 | Alcotest.check Alcotest.int "PhaseIndependent" 0x00002000
55 | (Config.to_int (Config.with_phase Independent Config.default)) ;
56 | Alcotest.check Alcotest.int "ThreadingNever" 0x00010000
57 | (Config.to_int (Config.with_threading Never Config.default)) ;
58 | Alcotest.check Alcotest.int "ThreadingAlways" 0x00020000
59 | (Config.to_int (Config.with_threading Always Config.default)) ;
60 | Alcotest.check Alcotest.int "WindowShort" 0x00100000
61 | (Config.to_int (Config.with_window Short Config.default)) ;
62 | Alcotest.check Alcotest.int "WindowLong" 0x00200000
63 | (Config.to_int (Config.with_window Long Config.default)) ;
64 | Alcotest.check Alcotest.int "SmoothingOn" 0x00800000
65 | (Config.to_int (Config.with_smoothing On Config.default)) ;
66 | Alcotest.check Alcotest.int "FormantPreserved" 0x01000000
67 | (Config.to_int (Config.with_formant Preserved Config.default)) ;
68 | Alcotest.check Alcotest.int "PitchHighQuality" 0x02000000
69 | (Config.to_int (Config.with_pitch HighQuality Config.default)) ;
70 | Alcotest.check Alcotest.int "PitchHighConsistency" 0x04000000
71 | (Config.to_int (Config.with_pitch HighConsistency Config.default)) ;
72 | Alcotest.check Alcotest.int "ChannelsTogether" 0x10000000
73 | (Config.to_int (Config.with_channels Together Config.default))
74 |
75 | let test_combinations () =
76 | let cfg = Config.default |> Config.with_engine Finer in
77 | let expected = 0x20000000 in
78 | Alcotest.check Alcotest.int "Combo: RealTime | Finer" expected
79 | (Config.to_int cfg) ;
80 | let cfg =
81 | Config.default |> Config.with_window Short
82 | |> Config.with_threading Never
83 | |> Config.with_formant Preserved
84 | in
85 | let expected = 0x01110000 in
86 | Alcotest.check Alcotest.int "Combo: Short | Never | Preserved" expected
87 | (Config.to_int cfg) ;
88 | let cfg =
89 | Config.
90 | { engine= Finer
91 | ; (* 0x20000000 *)
92 | transients= Smooth
93 | ; (* 0x00000200 *)
94 | detector= Soft
95 | ; (* 0x00000800 *)
96 | phase= Independent
97 | ; (* 0x00002000 *)
98 | threading= Always
99 | ; (* 0x00020000 *)
100 | window= Long
101 | ; (* 0x00200000 *)
102 | smoothing= On
103 | ; (* 0x00800000 *)
104 | formant= Preserved
105 | ; (* 0x01000000 *)
106 | pitch= HighConsistency
107 | ; (* 0x04000000 *)
108 | channels= Together (* 0x10000000 *) }
109 | in
110 | let expected = 0x35A22A00 in
111 | Alcotest.check Alcotest.int "Combo: All non-default" expected
112 | (Config.to_int cfg)
113 |
114 | let test_modifiers () =
115 | let base = Config.default in
116 | let modified_engine = Config.with_engine Finer base in
117 | Alcotest.check config_testable "with_engine changes only engine"
118 | {base with engine= Finer} modified_engine ;
119 | let modified_window_phase =
120 | base |> Config.with_window Short |> Config.with_phase Independent
121 | in
122 | Alcotest.check config_testable "with_window then with_phase"
123 | {base with window= Short; phase= Independent}
124 | modified_window_phase ;
125 | Alcotest.check config_testable "Manual percussive matches preset"
126 | Config.percussive modified_window_phase
127 |
128 | let test_time_stretch () =
129 | let config = Config.default in
130 | let sin_freq = 440.0 in
131 | let sample_rate = 44100 in
132 | let ratio = 2.0 in
133 | let audio_input =
134 | Audio.G.init Bigarray.Float32 [|sample_rate|] (fun i ->
135 | let t = float_of_int i /. float_of_int sample_rate in
136 | sin_freq *. (2.0 *. Float.pi *. t) |> Float.sin )
137 | in
138 | let _ = time_stretch ~config audio_input sample_rate ratio in
139 | Alcotest.(check pass)
140 | "time_stretch completed without raising an exception" () ()
141 |
142 | let test_time_stretch_invalid_ratio_raises () =
143 | let config = Config.default in
144 | let sample_rate = 44100 in
145 | let ratio = 0.0 in
146 | let audio_input = Audio.G.create Bigarray.Float32 [|100|] 0.0 in
147 | let expected_exn = Invalid_argument "rate must be > 0." in
148 | Alcotest.check_raises "Stretching with zero ratio raises Invalid_argument"
149 | expected_exn (fun () ->
150 | ignore (time_stretch ~config audio_input sample_rate ratio) )
151 |
152 | let test_pitch_shift () =
153 | let config = Config.default in
154 | let sin_freq = 440.0 in
155 | let sample_rate = 44100 in
156 | let ratio = 2.0 in
157 | let audio_input =
158 | Audio.G.init Bigarray.Float32 [|sample_rate|] (fun i ->
159 | let t = float_of_int i /. float_of_int sample_rate in
160 | sin_freq *. (2.0 *. Float.pi *. t) |> Float.sin )
161 | in
162 | let _ = time_stretch ~config audio_input sample_rate ratio in
163 | Alcotest.(check pass)
164 | "pitch_shift completed without raising an exception" () ()
165 |
166 | let () =
167 | Alcotest.run "Effects.Time: Config"
168 | [ ( "Presets"
169 | , [ Alcotest.test_case "Default integer value" `Quick test_default_int
170 | ; Alcotest.test_case "Percussive integer value" `Quick
171 | test_percussive_int ] )
172 | ; ( "Single Options"
173 | , [ Alcotest.test_case "Integer values for single flags" `Quick
174 | test_single_options ] )
175 | ; ( "Combinations"
176 | , [ Alcotest.test_case "Integer values for combined flags" `Quick
177 | test_combinations ] )
178 | ; ( "Modifiers"
179 | , [ Alcotest.test_case "Modifiers create correct configs" `Quick
180 | test_modifiers ] )
181 | ; ( "Time Stretch/Pitch Shift"
182 | , [ Alcotest.test_case "Time stretch" `Quick test_time_stretch
183 | ; Alcotest.test_case "Time stretch raise" `Quick
184 | test_time_stretch_invalid_ratio_raises ] )
185 | ; ("Pitch Shift", [Alcotest.test_case "Pitch shift" `Quick test_pitch_shift])
186 | ]
187 |
--------------------------------------------------------------------------------
/test/test_timeseries.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Soundml
23 | open Vutils
24 |
25 | module Timeseries = struct
26 | type t = Float.t
27 |
28 | type p = Bigarray.float64_elt
29 |
30 | type pf = Bigarray.float64_elt
31 |
32 | type pc = Bigarray.complex64_elt
33 |
34 | type ('a, 'b) precision = ('a, 'b) Types.precision
35 |
36 | let precision = Types.B64
37 |
38 | let kd = Bigarray.Float64
39 |
40 | let typ = "timeseries"
41 |
42 | let generate (_ : (pf, pc) precision) (_ : string * string * Parameters.t)
43 | (audio : (float, 'c) Owl_dense_ndarray.Generic.t) =
44 | audio
45 | end
46 |
47 | module Tests = Tests_cases (Timeseries)
48 |
49 | let () =
50 | let name = "Vectors: Timeseries Comparison" in
51 | let data = Testdata.get Timeseries.typ Vutils.data in
52 | let tests = Tests.create_tests data in
53 | Tests.run name tests
54 |
--------------------------------------------------------------------------------
/test/test_utils.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | type data = (float, Bigarray.float32_elt) Audio.G.t
23 |
24 | let data_testable : data Alcotest.testable =
25 | ( module struct
26 | type t = data
27 |
28 | let pp : t Fmt.t =
29 | fun fmt ndarray ->
30 | let shape_array = Audio.G.shape ndarray in
31 | let pp_shape = Fmt.brackets (Fmt.array ~sep:Fmt.semi Fmt.int) in
32 | Fmt.pf fmt "%a" pp_shape shape_array
33 |
34 | let equal : t -> t -> bool = Tutils.Check.rallclose
35 | end )
36 |
37 | module Test_pad_center = struct
38 | let create_data (arr : float array) : data =
39 | Audio.G.of_array Bigarray.Float32 arr [|Array.length arr|]
40 | (* Create 1D Ndarray *)
41 |
42 | let test_no_padding () =
43 | let input_data = create_data [|1.; 2.; 3.|] in
44 | let target_size = 3 in
45 | let pad_value = 0. in
46 | let expected_output = create_data [|1.; 2.; 3.|] in
47 | let actual_output = Utils.pad_center input_data target_size pad_value in
48 | Alcotest.check data_testable "no_padding: Correct padding" expected_output
49 | actual_output
50 |
51 | let test_even_padding () =
52 | let input_data = create_data [|1.; 2.|] in
53 | let target_size = 6 in
54 | let pad_value = 0. in
55 | let expected_output = create_data [|0.; 0.; 1.; 2.; 0.; 0.|] in
56 | let actual_output = Utils.pad_center input_data target_size pad_value in
57 | Alcotest.check data_testable "even_padding: Correct padding" expected_output
58 | actual_output
59 |
60 | let test_odd_padding () =
61 | let input_data = create_data [|1.; 2.; 3.|] in
62 | let target_size = 6 in
63 | let pad_value = 0. in
64 | let expected_output = create_data [|0.; 1.; 2.; 3.; 0.; 0.|] in
65 | let actual_output = Utils.pad_center input_data target_size pad_value in
66 | Alcotest.check data_testable "odd_padding: Correct padding" expected_output
67 | actual_output
68 |
69 | let test_empty_input () =
70 | let input_data = create_data [||] in
71 | let target_size = 4 in
72 | let pad_value = 0. in
73 | let expected_output = create_data [|0.; 0.; 0.; 0.|] in
74 | let actual_output = Utils.pad_center input_data target_size pad_value in
75 | Alcotest.check data_testable "empty_input: Correct padding" expected_output
76 | actual_output
77 |
78 | let test_error_target_too_small () =
79 | let input_data = create_data [|1.; 2.; 3.; 4.|] in
80 | let target_size = 2 in
81 | let pad_value = 0. in
82 | let expected_exn =
83 | Invalid_argument
84 | "An error occured while trying to pad: current_size > target_size"
85 | in
86 | Alcotest.check_raises
87 | "error_target_too_small: raises Invalid_argument when target_size < \
88 | input_size"
89 | expected_exn (fun () ->
90 | ignore (Utils.pad_center input_data target_size pad_value) )
91 |
92 | let test_non_zero_padding () =
93 | let input_data = create_data [|5.; 6.|] in
94 | let target_size = 5 in
95 | let pad_value = -1.5 in
96 | let expected_output = create_data [|-1.5; 5.; 6.; -1.5; -1.5|] in
97 | let actual_output = Utils.pad_center input_data target_size pad_value in
98 | Alcotest.check data_testable "non_zero_padding: Correct padding"
99 | expected_output actual_output
100 |
101 | let test_zero_target_empty_input () =
102 | let input_data = create_data [||] in
103 | flush_all () ;
104 | let target_size = 0 in
105 | let pad_value = 0. in
106 | let expected_output = create_data [||] in
107 | let actual_output = Utils.pad_center input_data target_size pad_value in
108 | Alcotest.check data_testable "zero_target_empty_input: Correct padding"
109 | expected_output actual_output
110 |
111 | let test_zero_target_non_empty_input () =
112 | let input_data = create_data [|1.; 2.|] in
113 | let target_size = 0 in
114 | let pad_value = 0. in
115 | let expected_exn =
116 | Invalid_argument
117 | "An error occured while trying to pad: current_size > target_size"
118 | in
119 | Alcotest.check_raises
120 | "zero_target_non_empty_input: raises Invalid_argument when target_size < \
121 | input_size"
122 | expected_exn (fun () ->
123 | ignore (Utils.pad_center input_data target_size pad_value) )
124 |
125 | let suite =
126 | [ Alcotest.test_case "no_padding" `Quick test_no_padding
127 | ; Alcotest.test_case "even_padding" `Quick test_even_padding
128 | ; Alcotest.test_case "odd_padding" `Quick test_odd_padding
129 | ; Alcotest.test_case "empty_input" `Quick test_empty_input
130 | ; Alcotest.test_case "error_target_too_small" `Quick
131 | test_error_target_too_small
132 | ; Alcotest.test_case "non_zero_padding" `Quick test_non_zero_padding
133 | ; Alcotest.test_case "zero_target_empty_input" `Quick
134 | test_zero_target_empty_input
135 | ; Alcotest.test_case "zero_target_non_empty_input" `Quick
136 | test_zero_target_non_empty_input ]
137 | end
138 |
139 | let () =
140 | Alcotest.run "SoundML Utils Tests" [("Pad Center", Test_pad_center.suite)]
141 |
--------------------------------------------------------------------------------
/test/test_write.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Bigarray
23 | open Soundml
24 | open Tutils
25 |
26 | let temp_dir_name = ref ""
27 |
28 | let setup_test_dir () =
29 | let dir = Filename.temp_dir "soundml_test_" "" in
30 | temp_dir_name := dir ;
31 | if not (Sys.file_exists dir && Sys.is_directory dir) then Unix.mkdir dir 0o755
32 |
33 | let delete_test_dir () =
34 | let rec rm_rf path =
35 | if Sys.is_directory path then (
36 | let files = Sys.readdir path in
37 | Array.iter (fun f -> rm_rf (Filename.concat path f)) files ;
38 | Unix.rmdir path )
39 | else Sys.remove path
40 | in
41 | if !temp_dir_name <> "" && Sys.file_exists !temp_dir_name then
42 | rm_rf !temp_dir_name ;
43 | temp_dir_name := ""
44 |
45 | let temp_file ?(ext = ".wav") name = Filename.concat !temp_dir_name (name ^ ext)
46 |
47 | let file_exists name =
48 | try
49 | Unix.access name [Unix.F_OK] ;
50 | true
51 | with
52 | | Unix.Unix_error (Unix.ENOENT, _, _) ->
53 | false
54 | | _ ->
55 | true
56 |
57 | let create_test_audio channels samples sample_rate format =
58 | let shape = if channels > 1 then [|channels; samples|] else [|samples|] in
59 | let data = Audio.G.create Bigarray.Float32 shape 0. in
60 | let freq = 23000. in
61 | for channel = 0 to channels - 1 do
62 | for i = 0 to samples - 1 do
63 | let idx = if channels > 1 then [|channel; i|] else [|i|] in
64 | Audio.G.set data idx
65 | (sin (2. *. Float.pi *. (freq *. Float.of_int channel)))
66 | done
67 | done ;
68 | let meta = Audio.Metadata.create channels samples sample_rate format in
69 | let audio_data = Audio.create meta data in
70 | (audio_data, sample_rate)
71 |
72 | let create_empty_audio channels sample_rate format =
73 | let shape = if channels > 1 then [|channels; 0|] else [|0|] in
74 | let data = Audio.G.create Bigarray.Float32 shape 0. in
75 | let meta = Audio.Metadata.create channels 0 sample_rate format in
76 | let audio_data = Audio.create meta data in
77 | (audio_data, sample_rate)
78 |
79 | let audio_testable =
80 | let pp fmt (a : float32_elt Audio.audio) =
81 | Format.fprintf fmt "{ channels=%d; samples/channel=%d; }" (Audio.channels a)
82 | (if Audio.channels a > 0 then Audio.samples a else 0)
83 | in
84 | let equal a b =
85 | Check.rallclose ~rtol:1e-05 ~atol:1e-08 (Audio.data a) (Audio.data b)
86 | in
87 | Alcotest.testable pp equal
88 |
89 | let check_write_read name
90 | ?(format : Aformat.t = Aformat.{ftype= WAV; sub= PCM_16; endian= FILE})
91 | channels samples target_sr ext =
92 | let test_name =
93 | Printf.sprintf "%s_%dch_%dsamples_%dHz%s" name channels samples target_sr
94 | ext
95 | in
96 | Alcotest.test_case test_name `Quick (fun () ->
97 | let filename = temp_file ~ext test_name in
98 | let audio, sr = create_test_audio channels samples target_sr format in
99 | Io.write ~format filename (Audio.data audio) sr ;
100 | Alcotest.check Alcotest.bool "Output file exists after write"
101 | (file_exists filename) true ;
102 | let read_audio =
103 | try
104 | Io.read ~mono:(channels = 1) ~sample_rate:target_sr Bigarray.Float32
105 | filename
106 | with ex ->
107 | Alcotest.failf "Failed to read back file %s: %s" filename
108 | (Printexc.to_string ex)
109 | in
110 | Alcotest.check Alcotest.int "Channels match after write" channels
111 | (Audio.channels read_audio) ;
112 | Alcotest.check Alcotest.int "Sample rate match after write" target_sr
113 | (Audio.sr read_audio) ;
114 | Alcotest.check Alcotest.int "Frames match after write" samples
115 | (Audio.samples read_audio) ;
116 | Alcotest.check
117 | (Alcotest.testable Aformat.pp Stdlib.( = ))
118 | "Format match after write" format (Audio.format read_audio) ;
119 | Alcotest.check audio_testable "Data unchanged after write" audio
120 | read_audio )
121 |
122 | let check_write_empty name
123 | ?(format : Aformat.t = Aformat.{ftype= WAV; sub= PCM_16; endian= FILE})
124 | channels target_sr ext =
125 | let test_name =
126 | Printf.sprintf "%s_%dch_empty_%dHz%s" name channels target_sr ext
127 | in
128 | Alcotest.test_case test_name `Quick (fun () ->
129 | let filename = temp_file ~ext test_name in
130 | let audio, sr = create_empty_audio channels target_sr format in
131 | Alcotest.check
132 | (Alcotest.neg Alcotest.reject)
133 | "Write empty audio don't raise"
134 | (fun () -> ())
135 | (fun () -> Io.write ~format filename (Audio.data audio) sr) )
136 |
137 | let tests =
138 | let wav = Result.get_ok (Aformat.create Aformat.WAV) in
139 | let flac = Result.get_ok (Aformat.create Aformat.FLAC) in
140 | let ogg = Result.get_ok (Aformat.create Aformat.OGG) in
141 | [ check_write_read "write_f32_mono_wav_deduced" 1 1024 44100 ".wav"
142 | ; check_write_read "write_f32_stereo_wav_deduced" 2 1024 44100 ".wav"
143 | ; check_write_read "write_f32_stereo_flac_deduced" 2 512 22050 ".flac"
144 | ; check_write_read "write_f32_mono_ogg_deduced" 1 2048 48000 ".ogg"
145 | ; check_write_read "write_f32_stereo_wav_explicit" ~format:wav 2 1024 44100
146 | ".wav"
147 | ; check_write_read "write_f32_stereo_flac_explicit" ~format:flac 2 512 22050
148 | ".flac"
149 | ; check_write_read "write_f32_mono_ogg_explicit" ~format:ogg 1 2048 48000
150 | ".ogg"
151 | ; check_write_empty "write_f32_mono_empty" 1 44100 ".wav"
152 | ; check_write_empty "write_f32_stereo_empty" 2 44100 ".wav" ]
153 |
154 | let suite = [("Write/Read Roundtrip", tests)]
155 |
156 | let () =
157 | setup_test_dir () ;
158 | Alcotest.run "SoundML Io.write" suite ;
159 | delete_test_dir ()
160 |
--------------------------------------------------------------------------------
/test/tutils.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | module Check = struct
23 | open Soundml
24 |
25 | let shape (x : ('a, 'b) Audio.G.t) (y : ('a, 'b) Audio.G.t) =
26 | let shape_x = Audio.G.shape x in
27 | let shape_y = Audio.G.shape y in
28 | if Array.length shape_x <> Array.length shape_y then false
29 | else Array.for_all2 (fun x y -> x = y) shape_x shape_y
30 |
31 | let rallclose ?(rtol = 1e-05) ?(atol = 1e-10) (x : ('a, 'b) Audio.G.t)
32 | (y : ('a, 'b) Audio.G.t) : bool =
33 | if not (shape x y) then false
34 | else if Audio.G.numel x = 0 && Audio.G.numel y = 0 then true
35 | else
36 | let abs_diff = Audio.G.abs (Audio.G.sub x y) in
37 | let tolerance = Audio.G.(add_scalar (mul_scalar (abs y) rtol) atol) in
38 | let comparison_mask = Audio.G.elt_less_equal abs_diff tolerance in
39 | Audio.G.min' comparison_mask >= 1.0
40 |
41 | let callclose : type a.
42 | ?rtol:float
43 | -> ?atol:float
44 | -> (Complex.t, a) Audio.G.t
45 | -> (Complex.t, a) Audio.G.t
46 | -> bool =
47 | fun ?(rtol = 1e-05) ?(atol = 1e-05) (x : (Complex.t, a) Audio.G.t)
48 | (y : (Complex.t, a) Audio.G.t) ->
49 | if not (shape x y) then false
50 | else if Audio.G.numel x = 0 && Audio.G.numel y = 0 then true
51 | else
52 | let x, y =
53 | match Audio.G.kind x with
54 | | Bigarray.Complex32 ->
55 | (Audio.G.cast_c2z x, Audio.G.cast_c2z y)
56 | | Bigarray.Complex64 ->
57 | (x, y)
58 | | _ ->
59 | .
60 | in
61 | let diff = Audio.G.sub x y in
62 | let abs_diff = Audio.G.abs2_z2d diff in
63 | let abs_y = Audio.G.abs2_z2d y in
64 | let tolerance = Audio.G.(add_scalar (mul_scalar abs_y rtol) atol) in
65 | let comparison_mask = Audio.G.elt_less_equal abs_diff tolerance in
66 | Audio.G.min' comparison_mask >= 1.0
67 | end
68 |
69 | let allclose : type a b.
70 | (a, b) Bigarray.kind
71 | -> ?rtol:float
72 | -> ?atol:float
73 | -> (a, b) Owl_dense_ndarray.Generic.t
74 | -> (a, b) Owl_dense_ndarray.Generic.t
75 | -> bool =
76 | fun kd ->
77 | match kd with
78 | | Bigarray.Complex32 ->
79 | Check.callclose
80 | | Bigarray.Complex64 ->
81 | Check.callclose
82 | | Bigarray.Float32 ->
83 | Check.rallclose
84 | | Bigarray.Float64 ->
85 | Check.rallclose
86 | | _ ->
87 | failwith "Unsupported datatype."
88 |
89 | let dense_testable : type a b.
90 | ?rtol:float
91 | -> ?atol:float
92 | -> (a, b) Bigarray.kind
93 | -> (a, b) Audio.G.t Alcotest.testable =
94 | fun ?rtol ?atol (_ : (a, b) Bigarray.kind) ->
95 | let kd_to_string (type a b) (kd : (a, b) Bigarray.kind) =
96 | match kd with
97 | | Bigarray.Float32 ->
98 | "Float32"
99 | | Bigarray.Float64 ->
100 | "Float64"
101 | | Bigarray.Complex32 ->
102 | "Complex32"
103 | | Bigarray.Complex64 ->
104 | "Complex64"
105 | | _ ->
106 | failwith "Unsupported kind"
107 | in
108 | let pp_kind fmt k =
109 | let str_k = kd_to_string k in
110 | Format.fprintf fmt "%s" str_k
111 | in
112 | let to_string (type a b) (kd : (a, b) Bigarray.kind) (v : a) =
113 | match kd with
114 | | Bigarray.Float32 ->
115 | Printf.sprintf "%f" v
116 | | Bigarray.Float64 ->
117 | Printf.sprintf "%f" v
118 | | Bigarray.Complex32 ->
119 | Printf.sprintf "%f + %fi" v.re v.im
120 | | Bigarray.Complex64 ->
121 | Printf.sprintf "%f + %fi" v.re v.im
122 | | _ ->
123 | failwith "Unsupported kind"
124 | in
125 | let pp fmt arr =
126 | let kd = Audio.G.kind arr in
127 | let dims = Audio.G.shape arr in
128 | let first_few_max = 10 in
129 | let first_few = ref [] in
130 | let total_elements = Array.fold_left ( * ) 1 dims in
131 | let flattened = Audio.G.flatten arr in
132 | if total_elements > 0 && Array.length dims == 1 then
133 | for i = 0 to first_few_max - 1 do
134 | first_few := Audio.G.get flattened [|i|] :: !first_few
135 | done ;
136 | Format.fprintf fmt
137 | "Audio.G.t " pp_kind
138 | (Audio.G.kind arr)
139 | (String.concat "; " (Array.to_list (Array.map string_of_int dims)))
140 | first_few_max
141 | (String.concat "; " (List.map (to_string kd) (List.rev !first_few)))
142 | in
143 | let equal a b =
144 | let kd = Audio.G.kind a in
145 | allclose ?rtol ?atol kd a b
146 | in
147 | Alcotest.testable pp equal
148 |
149 | let float32_g_testable = dense_testable Bigarray.Float32
150 |
151 | let float64_g_testable = dense_testable Bigarray.Float64
152 |
153 | let complex32_g_testable = dense_testable Bigarray.Complex32
154 |
155 | let complex64_g_testable = dense_testable Bigarray.Complex64
156 |
157 | let get_dense_testable (type a b) (kd : (a, b) Bigarray.kind) :
158 | (a, b) Audio.G.t Alcotest.testable =
159 | match kd with
160 | | Bigarray.Float32 ->
161 | float32_g_testable
162 | | Bigarray.Float64 ->
163 | float64_g_testable
164 | | Bigarray.Complex32 ->
165 | complex32_g_testable
166 | | Bigarray.Complex64 ->
167 | complex64_g_testable
168 | | _ ->
169 | failwith "Unsupported kind"
170 |
171 | (* This snippet has been gathered from the exact same code but for Matrix in
172 | Owl. See:
173 | https://github.com/tachukao/owl/blob/046f703a6890a5ed5ecf4a8c5750d4e392e4ec54/src/owl/dense/owl_dense_matrix_generic.ml#L606-L609
174 | Unfortunately, for the moment this is not yet available for Ndarrays. *)
175 | let load_npy (path : string) (kind : ('a, 'b) Bigarray.kind) :
176 | ('a, 'b) Audio.G.t =
177 | let npy : ('a, 'b) Audio.G.t =
178 | match Npy.read_copy path |> Npy.to_bigarray Bigarray.c_layout kind with
179 | | Some x ->
180 | x
181 | | None ->
182 | failwith Printf.(sprintf "%s: incorrect format" path)
183 | in
184 | npy
185 |
--------------------------------------------------------------------------------
/test/tutils.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | (** Module providing usefull checking functions for the tests *)
23 | module Check : sig
24 | val rallclose :
25 | ?rtol:float
26 | -> ?atol:float
27 | -> (float, 'b) Owl_dense_ndarray.Generic.t
28 | -> (float, 'b) Owl_dense_ndarray.Generic.t
29 | -> bool
30 | (** Real-valued all-close function *)
31 |
32 | val callclose :
33 | 'a.
34 | ?rtol:float
35 | -> ?atol:float
36 | -> (Complex.t, 'a) Owl_dense_ndarray.Generic.t
37 | -> (Complex.t, 'a) Owl_dense_ndarray.Generic.t
38 | -> bool
39 | (** Complex-valued all-close function *)
40 |
41 | val shape :
42 | ('a, 'b) Owl_dense_ndarray.Generic.t
43 | -> ('a, 'b) Owl_dense_ndarray.Generic.t
44 | -> bool
45 | (** Check the shape of two ndarrays are equal *)
46 | end
47 |
48 | val allclose :
49 | 'a 'b.
50 | ('a, 'b) Bigarray.kind
51 | -> ?rtol:float
52 | -> ?atol:float
53 | -> ('a, 'b) Owl_dense_ndarray.Generic.t
54 | -> ('a, 'b) Owl_dense_ndarray.Generic.t
55 | -> bool
56 | (** Checks if two Ndarrays are allclose. This is equivalent to NumPy's allclose function. *)
57 |
58 | val dense_testable :
59 | ?rtol:float
60 | -> ?atol:float
61 | -> ('a, 'b) Bigarray.kind
62 | -> ('a, 'b) Owl_dense_ndarray.Generic.t Alcotest.testable
63 |
64 | val get_dense_testable :
65 | ('a, 'b) Bigarray.kind
66 | -> ('a, 'b) Owl_dense_ndarray.Generic.t Alcotest.testable
67 | (** Function that returns a correctly-typed testable based on the passed kind for Dense.Ndarray. *)
68 |
69 | val load_npy :
70 | string -> ('a, 'b) Bigarray.kind -> ('a, 'b) Owl_dense_ndarray.Generic.t
71 | (** Load a numpy file and return the ndarray.
72 | @see https://github.com/tachukao/owl/blob/046f703a6890a5ed5ecf4a8c5750d4e392e4ec54/src/owl/dense/owl_dense_matrix_generic.ml#L606-L609 *)
73 |
--------------------------------------------------------------------------------
/test/vutils.ml:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | open Tutils
23 |
24 | let test_audio_dir = Sys.getcwd () ^ "/audio/"
25 |
26 | let test_vectors_dir = Sys.getcwd () ^ "/vectors/"
27 |
28 | let typ_to_readable = function
29 | | "timeseries" ->
30 | "Io.Read"
31 | | "stft" ->
32 | "Spectral.stft"
33 | | _ ->
34 | "Unkown"
35 |
36 | let string_to_resample_typ = function
37 | | "soxr_vhq" ->
38 | Io.SOXR_VHQ
39 | | "soxr_hq" ->
40 | Io.SOXR_HQ
41 | | "soxr_mq" ->
42 | Io.SOXR_MQ
43 | | "soxr_lq" ->
44 | Io.SOXR_LQ
45 | | _ ->
46 | Io.NONE
47 |
48 | module StrMap = Map.Make (String)
49 |
50 | module Parameters = struct
51 | open Yojson.Basic.Util
52 |
53 | type t = Yojson.Basic.t
54 |
55 | let create (path : string) = Yojson.Basic.from_file path
56 |
57 | let get_string (name : string) (params : t) =
58 | member name params |> to_string_option
59 |
60 | let get_int (name : string) (params : t) = member name params |> to_int_option
61 |
62 | let get_float (name : string) (params : t) =
63 | member name params |> to_float_option
64 |
65 | let get_bool (name : string) (params : t) =
66 | member name params |> to_bool_option
67 | end
68 |
69 | module Testdata = struct
70 | type t = (string * string * Parameters.t) list StrMap.t
71 |
72 | let get_test_type (basename : string) : string option =
73 | let split = Str.(split (regexp {|_|}) basename) in
74 | if List.length split >= 1 then Some (List.hd split) else None
75 |
76 | let get_test_filename (basename : string) : string option =
77 | let split = Str.(split (regexp {|_|}) basename) in
78 | if List.length split >= 1 then Some (String.concat "_" (List.tl split))
79 | else None
80 |
81 | let list_filter_filename (dir : string) (name : string) : string option =
82 | try
83 | let files = Sys.readdir dir in
84 | Option.map (fun elt -> Filename.concat dir elt)
85 | @@ Array.find_opt (fun elt -> Filename.remove_extension elt = name) files
86 | with Sys_error msg ->
87 | Printf.eprintf "Error reading directory '%s': %s\n" dir msg ;
88 | None
89 |
90 | let list_tests_files (dir : string) (ext : string) : string list =
91 | let has_extension path = Filename.check_suffix path ext in
92 | let process_entry base_dir entry =
93 | let full_path = Filename.concat base_dir entry in
94 | try
95 | if Sys.is_directory full_path then
96 | List.filter has_extension
97 | @@ List.map (Filename.concat full_path)
98 | @@ Array.to_list @@ Sys.readdir full_path
99 | else if has_extension full_path then [full_path]
100 | else []
101 | with Sys_error _ -> []
102 | in
103 | try
104 | Sys.readdir dir |> Array.to_list |> List.concat_map (process_entry dir)
105 | (* Appliquer process_entry et concaténer les résultats *)
106 | with Sys_error msg ->
107 | Printf.eprintf "Error reading directory '%s': %s\n" dir msg ;
108 | []
109 |
110 | let filter_test_type (typ : string) =
111 | let filter (typ : string) (full_path : string) : string option =
112 | let basename = Filename.basename full_path in
113 | let split = get_test_type basename in
114 | Option.bind split (fun x ->
115 | if x = typ then Some (Filename.remove_extension full_path) else None )
116 | in
117 | List.filter_map @@ filter typ
118 |
119 | let construct_parameters (audio_dir : string) (files : string list) =
120 | let aux (file : string) =
121 | let base = Filename.basename file |> Filename.remove_extension in
122 | let audio_filename =
123 | Option.value (get_test_filename base) ~default:base
124 | in
125 | let audio_file_opt = list_filter_filename audio_dir audio_filename in
126 | match audio_file_opt with
127 | | None ->
128 | Printf.eprintf "Warning: couldn't find audio file with name %s\n" base ;
129 | None
130 | | Some audio_file_path ->
131 | let npy_file = file ^ ".npy" in
132 | let json_file = file ^ ".json" in
133 | let params = Parameters.create json_file in
134 | Some (npy_file, audio_file_path, params)
135 | in
136 | List.filter_map aux files
137 |
138 | let create (vectors_dir : string) (audio_dir : string) (types : string list) :
139 | t =
140 | let vectors_files = list_tests_files vectors_dir ".json" in
141 | let fold map typ =
142 | let l = filter_test_type typ vectors_files in
143 | StrMap.add typ l map
144 | in
145 | let files_map = List.fold_left fold StrMap.empty types in
146 | StrMap.map (construct_parameters audio_dir) files_map
147 |
148 | let get (typ : string) (data : t) : (string * string * Parameters.t) list =
149 | StrMap.find typ data
150 | end
151 |
152 | module type Testable = sig
153 | type t
154 |
155 | type p
156 |
157 | type pf
158 |
159 | type pc
160 |
161 | type ('a, 'b) precision = ('a, 'b) Types.precision
162 |
163 | val precision : (pf, pc) precision
164 |
165 | val kd : (t, p) Bigarray.kind
166 |
167 | val typ : string
168 |
169 | val generate :
170 | (pf, pc) precision
171 | -> string * string * Parameters.t
172 | -> (float, pf) Owl_dense_ndarray.Generic.t
173 | -> (t, p) Owl_dense_ndarray.Generic.t
174 | end
175 |
176 | module Tests_cases (T : Testable) = struct
177 | include T
178 |
179 | let akind : type a b. (a, b) precision -> (float, a) Bigarray.kind =
180 | fun prec ->
181 | match prec with
182 | | Types.B32 ->
183 | Bigarray.Float32
184 | | Types.B64 ->
185 | Bigarray.Float64
186 |
187 | let read_audio kd (path : string) (res_typ : Io.resampling_t)
188 | (sample_rate : int) (mono : bool) =
189 | let audio = Io.read ~res_typ ~sample_rate ~mono kd path in
190 | Audio.data audio
191 |
192 | let create_tests (data : (string * string * Parameters.t) list) :
193 | unit Alcotest.test_case list =
194 | List.concat_map
195 | (fun (case : string * string * Parameters.t) ->
196 | let vector_path, audio_path, params = case in
197 | let raw_basename =
198 | Filename.basename vector_path |> Filename.remove_extension
199 | in
200 | let basename =
201 | Option.value ~default:raw_basename
202 | (Testdata.get_test_filename raw_basename)
203 | in
204 | let sr =
205 | Option.value ~default:22050 @@ Parameters.get_int "sr" params
206 | in
207 | let mono =
208 | Option.value ~default:true @@ Parameters.get_bool "mono" params
209 | in
210 | let resampler =
211 | string_to_resample_typ
212 | ( Option.value ~default:"None"
213 | @@ Parameters.get_string "res_type" params )
214 | in
215 | let audio_kind = akind precision in
216 | let audio = read_audio audio_kind audio_path resampler sr mono in
217 | let generated = generate precision case audio in
218 | let vector = load_npy vector_path kd in
219 | let test_dense () =
220 | Alcotest.check
221 | (Tutils.get_dense_testable kd)
222 | (typ ^ "_dense_" ^ basename)
223 | generated vector
224 | in
225 | let test_dense = ("DENSE: " ^ basename, `Slow, test_dense) in
226 | [test_dense] )
227 | data
228 |
229 | let run (name : string) (tests : unit Alcotest.test_case list) =
230 | Alcotest.run name [(typ_to_readable typ, tests)]
231 | end
232 |
233 | let tests = ["timeseries"; "stft"]
234 |
235 | let data = Testdata.create test_vectors_dir test_audio_dir tests
236 |
--------------------------------------------------------------------------------
/test/vutils.mli:
--------------------------------------------------------------------------------
1 | (*****************************************************************************)
2 | (* *)
3 | (* *)
4 | (* Copyright (C) 2025 *)
5 | (* Gabriel Santamaria *)
6 | (* *)
7 | (* *)
8 | (* Licensed under the Apache License, Version 2.0 (the "License"); *)
9 | (* you may not use this file except in compliance with the License. *)
10 | (* You may obtain a copy of the License at *)
11 | (* *)
12 | (* http://www.apache.org/licenses/LICENSE-2.0 *)
13 | (* *)
14 | (* Unless required by applicable law or agreed to in writing, software *)
15 | (* distributed under the License is distributed on an "AS IS" BASIS, *)
16 | (* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *)
17 | (* See the License for the specific language governing permissions and *)
18 | (* limitations under the License. *)
19 | (* *)
20 | (*****************************************************************************)
21 |
22 | val test_audio_dir : string
23 | (** The directory where the test audio files are located. *)
24 |
25 | val test_vectors_dir : string
26 | (** The directory where the test vectors are located. *)
27 |
28 | val typ_to_readable : string -> string
29 | (** Converts a test type to a readable format for Alcotest. *)
30 |
31 | (** A map from strings to values. *)
32 | module StrMap : Map.S with type key = string
33 |
34 | (** A module for handling parameters. *)
35 | module Parameters : sig
36 | type t
37 |
38 | val create : string -> t
39 |
40 | val get_string : string -> t -> string option
41 |
42 | val get_int : string -> t -> int option
43 |
44 | val get_float : string -> t -> float option
45 |
46 | val get_bool : string -> t -> bool option
47 | end
48 |
49 | module Testdata : sig
50 | type t = (string * string * Parameters.t) list StrMap.t
51 |
52 | val get_test_type : string -> string option
53 | (** Returns the test type for a given test name *)
54 |
55 | val get_test_filename : string -> string option
56 | (** Returns the test filename for a given test type *)
57 |
58 | val create : string -> string -> string list -> t
59 | (** Creates a test set given a directory of vectors files, a directory of audio files
60 | and a list of test types *)
61 |
62 | val get : string -> t -> (string * string * Parameters.t) list
63 | (** Returns the test set for a given test type *)
64 | end
65 |
66 | module type Testable = sig
67 | (** Type of the data generated by the functionnality we're testing *)
68 | type t
69 |
70 | (** Bigarray precision *)
71 | type p
72 |
73 | (** Float precision *)
74 | type pf
75 |
76 | (** Complex precision *)
77 | type pc
78 |
79 | type ('a, 'b) precision = ('a, 'b) Types.precision
80 |
81 | val precision : (pf, pc) precision
82 |
83 | val kd : (t, p) Bigarray.kind
84 |
85 | val typ : string
86 |
87 | val generate :
88 | (pf, pc) precision
89 | -> string * string * Parameters.t
90 | -> (float, pf) Owl_dense_ndarray.Generic.t
91 | -> (t, p) Owl_dense_ndarray.Generic.t
92 | (** Generate a testing vector given some parameters and an audio array *)
93 | end
94 |
95 | module Tests_cases (T : Testable) : sig
96 | type t = T.t
97 |
98 | type p = T.p
99 |
100 | type pf = T.pf
101 |
102 | type pc = T.pc
103 |
104 | val create_tests :
105 | (string * string * Parameters.t) list -> unit Alcotest.test_case list
106 |
107 | val run : string -> unit Alcotest.test_case list -> unit
108 | end
109 |
110 | val data : Testdata.t
111 |
--------------------------------------------------------------------------------