├── requirements.txt
├── download_data.sh
├── Dockerfile
├── check_audios.py
└── resample.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | soundfile
2 | librosa
3 | pandas
4 | tqdm


--------------------------------------------------------------------------------
/download_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | curl -L -o vctk-corpus.zip https://www.kaggle.com/api/v1/datasets/download/pratt3000/vctk-corpus


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use Python base image
 2 | FROM python:3.11-slim
 3 | 
 4 | # Install required system dependencies
 5 | RUN apt-get update && apt-get install -y \
 6 |     curl \
 7 |     unzip \
 8 |     && rm -rf /var/lib/apt/lists/*
 9 | 
10 | # Set working directory
11 | WORKDIR /app
12 | 
13 | # Download and unzip the VCTK corpus dataset
14 | RUN curl -L -o vctk-corpus.zip https://www.kaggle.com/api/v1/datasets/download/pratt3000/vctk-corpus && \
15 |     unzip vctk-corpus.zip -d /app/data && \
16 |     rm vctk-corpus.zip
17 | 
18 | # Install Python dependencies
19 | COPY requirements.txt .
20 | RUN pip install --no-cache-dir -r requirements.txt
21 | 
22 | # Copy scripts
23 | COPY resample.py .
24 | 
25 | COPY check_audios.py .
26 | 
27 | # Create output directory
28 | RUN mkdir -p /app/output
29 | 
30 | # Run the Python script with arguments
31 | CMD ["python", "check_audios.py", \
32 |      "--input_dir", "/app/data", \
33 |      "--output_dir", "/app/output", \
34 |      "--n_jobs", "4"]


--------------------------------------------------------------------------------
/check_audios.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os 
 3 | import soundfile as sf
 4 | from multiprocessing import Pool
 5 | import numpy as np
 6 | import struct
 7 | import glob
 8 | import pandas as pd
 9 | 
10 | from argparse import RawTextHelpFormatter
11 | from tqdm import tqdm
12 | 
13 | 
14 | def get_durations(input_file):
15 |     """ Get audio durations using soundfile"""
16 |     try: 
17 |         x, fs = sf.read(input_file)
18 |         metadata = sf.info(input_file)
19 |         duration = metadata.duration
20 |         return (input_file, duration)
21 |     except Exception as e:
22 |         print(input_file)
23 |         return (input_file, float("NaN"))
24 |         
25 | 
26 | def process_files(input_dir, output_dir, file_ext="wav", n_jobs=os.cpu_count() -1):
27 | 
28 |     print("Getting audio file durations...")
29 |     audio_files = glob.glob(os.path.join(input_dir, f"**/*.{file_ext}"), recursive=True)
30 |     print(f"Found {len(audio_files)} files...")
31 | 
32 |     with Pool(processes=n_jobs) as p:
33 |         results = list(tqdm(p.imap(get_durations, audio_files), total=len(audio_files)))
34 | 
35 |     data_dict = {"files":[], "durations":[]}
36 | 
37 |     for file, duration in results:
38 |         data_dict["files"].append(file)
39 |         data_dict["durations"].append(duration) 
40 | 
41 |     # save to csv
42 |     df = pd.DataFrame.from_dict(data_dict)
43 |     df.to_csv(os.path.join( output_dir, "durations.csv" ),index=False)
44 |     print("Done !")
45 | 
46 | if __name__ == "__main__":
47 |     parser = argparse.ArgumentParser(
48 |         description="""Get durations of a collection of audios \n\n
49 |                        Example run:
50 |                             python get_durations.py
51 |                                 --input_dir /root/LJSpeech-1.1/
52 |                                 --output_dir /root/LJSpeech-1.1/
53 |                                 --n_jobs 24
54 |                     """,
55 |         formatter_class=RawTextHelpFormatter,
56 |     )
57 | 
58 |     parser.add_argument(
59 |         "--input_dir",
60 |         type=str,
61 |         default=None,
62 |         required=True,
63 |         help="Path of the folder containing the audio files to get durations",
64 |     )
65 | 
66 |     parser.add_argument(
67 |         "--output_dir",
68 |         type=str,
69 |         default=None,
70 |         required=True,
71 |         help="Path of the destination folder to store the csv with durations.",
72 |     )
73 | 
74 |     parser.add_argument(
75 |         "--n_jobs", type=int, default=None, help="Number of threads to use, by default it uses all cores"
76 |     )
77 | 
78 |     args = parser.parse_args()
79 | 
80 |     process_files(input_dir=args.input_dir, output_dir=args.output_dir, file_ext="wav", n_jobs=args.n_jobs)


--------------------------------------------------------------------------------
/resample.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import glob
 3 | import os
 4 | from argparse import RawTextHelpFormatter
 5 | from multiprocessing import Pool
 6 | from shutil import copytree
 7 | 
 8 | import librosa
 9 | import soundfile as sf
10 | from tqdm import tqdm
11 | 
12 | # from coqui https://github.com/coqui-ai/TTS/blob/dev/TTS/bin/resample.py
13 | 
14 | def resample_file(func_args):
15 |     filename, output_sr = func_args
16 |     y, sr = librosa.load(filename, sr=output_sr)
17 |     sf.write(filename, y, sr)
18 | 
19 | 
20 | def resample_files(input_dir, output_sr, output_dir=None, file_ext="wav", n_jobs=10):
21 |     if output_dir:
22 |         print("Recursively copying the input folder...")
23 |         copytree(input_dir, output_dir)
24 |         input_dir = output_dir
25 | 
26 |     print("Resampling the audio files...")
27 |     audio_files = glob.glob(os.path.join(input_dir, f"**/*.{file_ext}"), recursive=True)
28 |     print(f"Found {len(audio_files)} files...")
29 |     audio_files = list(zip(audio_files, len(audio_files) * [output_sr]))
30 |     with Pool(processes=n_jobs) as p:
31 |         with tqdm(total=len(audio_files)) as pbar:
32 |             for _, _ in enumerate(p.imap_unordered(resample_file, audio_files)):
33 |                 pbar.update()
34 | 
35 |     print("Done !")
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     parser = argparse.ArgumentParser(
40 |         description="""Resample a folder recusively with librosa
41 |                        Can be used in place or create a copy of the folder as an output.\n\n
42 |                        Example run:
43 |                             python resample.py
44 |                                 --input_dir /root/LJSpeech-1.1/
45 |                                 --output_sr 22050
46 |                                 --output_dir /root/resampled_LJSpeech-1.1/
47 |                                 --file_ext wav
48 |                                 --n_jobs 24
49 |                     """,
50 |         formatter_class=RawTextHelpFormatter,
51 |     )
52 | 
53 |     parser.add_argument(
54 |         "--input_dir",
55 |         type=str,
56 |         default=None,
57 |         required=True,
58 |         help="Path of the folder containing the audio files to resample",
59 |     )
60 | 
61 |     parser.add_argument(
62 |         "--output_sr",
63 |         type=int,
64 |         default=22050,
65 |         required=False,
66 |         help="Samlple rate to which the audio files should be resampled",
67 |     )
68 | 
69 |     parser.add_argument(
70 |         "--output_dir",
71 |         type=str,
72 |         default=None,
73 |         required=False,
74 |         help="Path of the destination folder. If not defined, the operation is done in place",
75 |     )
76 | 
77 |     parser.add_argument(
78 |         "--file_ext",
79 |         type=str,
80 |         default="wav",
81 |         required=False,
82 |         help="Extension of the audio files to resample",
83 |     )
84 | 
85 |     parser.add_argument(
86 |         "--n_jobs", type=int, default=None, help="Number of threads to use, by default it uses all cores"
87 |     )
88 | 
89 |     args = parser.parse_args()
90 | 
91 |     resample_files(args.input_dir, args.output_sr, args.output_dir, args.file_ext, args.n_jobs)


--------------------------------------------------------------------------------