├── requirements.txt ├── download_data.sh ├── Dockerfile ├── check_audios.py └── resample.py /requirements.txt: -------------------------------------------------------------------------------- 1 | soundfile 2 | librosa 3 | pandas 4 | tqdm -------------------------------------------------------------------------------- /download_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | curl -L -o vctk-corpus.zip https://www.kaggle.com/api/v1/datasets/download/pratt3000/vctk-corpus -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use Python base image 2 | FROM python:3.11-slim 3 | 4 | # Install required system dependencies 5 | RUN apt-get update && apt-get install -y \ 6 | curl \ 7 | unzip \ 8 | && rm -rf /var/lib/apt/lists/* 9 | 10 | # Set working directory 11 | WORKDIR /app 12 | 13 | # Download and unzip the VCTK corpus dataset 14 | RUN curl -L -o vctk-corpus.zip https://www.kaggle.com/api/v1/datasets/download/pratt3000/vctk-corpus && \ 15 | unzip vctk-corpus.zip -d /app/data && \ 16 | rm vctk-corpus.zip 17 | 18 | # Install Python dependencies 19 | COPY requirements.txt . 20 | RUN pip install --no-cache-dir -r requirements.txt 21 | 22 | # Copy scripts 23 | COPY resample.py . 24 | 25 | COPY check_audios.py . 26 | 27 | # Create output directory 28 | RUN mkdir -p /app/output 29 | 30 | # Run the Python script with arguments 31 | CMD ["python", "check_audios.py", \ 32 | "--input_dir", "/app/data", \ 33 | "--output_dir", "/app/output", \ 34 | "--n_jobs", "4"] -------------------------------------------------------------------------------- /check_audios.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import soundfile as sf 4 | from multiprocessing import Pool 5 | import numpy as np 6 | import struct 7 | import glob 8 | import pandas as pd 9 | 10 | from argparse import RawTextHelpFormatter 11 | from tqdm import tqdm 12 | 13 | 14 | def get_durations(input_file): 15 | """ Get audio durations using soundfile""" 16 | try: 17 | x, fs = sf.read(input_file) 18 | metadata = sf.info(input_file) 19 | duration = metadata.duration 20 | return (input_file, duration) 21 | except Exception as e: 22 | print(input_file) 23 | return (input_file, float("NaN")) 24 | 25 | 26 | def process_files(input_dir, output_dir, file_ext="wav", n_jobs=os.cpu_count() -1): 27 | 28 | print("Getting audio file durations...") 29 | audio_files = glob.glob(os.path.join(input_dir, f"**/*.{file_ext}"), recursive=True) 30 | print(f"Found {len(audio_files)} files...") 31 | 32 | with Pool(processes=n_jobs) as p: 33 | results = list(tqdm(p.imap(get_durations, audio_files), total=len(audio_files))) 34 | 35 | data_dict = {"files":[], "durations":[]} 36 | 37 | for file, duration in results: 38 | data_dict["files"].append(file) 39 | data_dict["durations"].append(duration) 40 | 41 | # save to csv 42 | df = pd.DataFrame.from_dict(data_dict) 43 | df.to_csv(os.path.join( output_dir, "durations.csv" ),index=False) 44 | print("Done !") 45 | 46 | if __name__ == "__main__": 47 | parser = argparse.ArgumentParser( 48 | description="""Get durations of a collection of audios \n\n 49 | Example run: 50 | python get_durations.py 51 | --input_dir /root/LJSpeech-1.1/ 52 | --output_dir /root/LJSpeech-1.1/ 53 | --n_jobs 24 54 | """, 55 | formatter_class=RawTextHelpFormatter, 56 | ) 57 | 58 | parser.add_argument( 59 | "--input_dir", 60 | type=str, 61 | default=None, 62 | required=True, 63 | help="Path of the folder containing the audio files to get durations", 64 | ) 65 | 66 | parser.add_argument( 67 | "--output_dir", 68 | type=str, 69 | default=None, 70 | required=True, 71 | help="Path of the destination folder to store the csv with durations.", 72 | ) 73 | 74 | parser.add_argument( 75 | "--n_jobs", type=int, default=None, help="Number of threads to use, by default it uses all cores" 76 | ) 77 | 78 | args = parser.parse_args() 79 | 80 | process_files(input_dir=args.input_dir, output_dir=args.output_dir, file_ext="wav", n_jobs=args.n_jobs) -------------------------------------------------------------------------------- /resample.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import os 4 | from argparse import RawTextHelpFormatter 5 | from multiprocessing import Pool 6 | from shutil import copytree 7 | 8 | import librosa 9 | import soundfile as sf 10 | from tqdm import tqdm 11 | 12 | # from coqui https://github.com/coqui-ai/TTS/blob/dev/TTS/bin/resample.py 13 | 14 | def resample_file(func_args): 15 | filename, output_sr = func_args 16 | y, sr = librosa.load(filename, sr=output_sr) 17 | sf.write(filename, y, sr) 18 | 19 | 20 | def resample_files(input_dir, output_sr, output_dir=None, file_ext="wav", n_jobs=10): 21 | if output_dir: 22 | print("Recursively copying the input folder...") 23 | copytree(input_dir, output_dir) 24 | input_dir = output_dir 25 | 26 | print("Resampling the audio files...") 27 | audio_files = glob.glob(os.path.join(input_dir, f"**/*.{file_ext}"), recursive=True) 28 | print(f"Found {len(audio_files)} files...") 29 | audio_files = list(zip(audio_files, len(audio_files) * [output_sr])) 30 | with Pool(processes=n_jobs) as p: 31 | with tqdm(total=len(audio_files)) as pbar: 32 | for _, _ in enumerate(p.imap_unordered(resample_file, audio_files)): 33 | pbar.update() 34 | 35 | print("Done !") 36 | 37 | 38 | if __name__ == "__main__": 39 | parser = argparse.ArgumentParser( 40 | description="""Resample a folder recusively with librosa 41 | Can be used in place or create a copy of the folder as an output.\n\n 42 | Example run: 43 | python resample.py 44 | --input_dir /root/LJSpeech-1.1/ 45 | --output_sr 22050 46 | --output_dir /root/resampled_LJSpeech-1.1/ 47 | --file_ext wav 48 | --n_jobs 24 49 | """, 50 | formatter_class=RawTextHelpFormatter, 51 | ) 52 | 53 | parser.add_argument( 54 | "--input_dir", 55 | type=str, 56 | default=None, 57 | required=True, 58 | help="Path of the folder containing the audio files to resample", 59 | ) 60 | 61 | parser.add_argument( 62 | "--output_sr", 63 | type=int, 64 | default=22050, 65 | required=False, 66 | help="Samlple rate to which the audio files should be resampled", 67 | ) 68 | 69 | parser.add_argument( 70 | "--output_dir", 71 | type=str, 72 | default=None, 73 | required=False, 74 | help="Path of the destination folder. If not defined, the operation is done in place", 75 | ) 76 | 77 | parser.add_argument( 78 | "--file_ext", 79 | type=str, 80 | default="wav", 81 | required=False, 82 | help="Extension of the audio files to resample", 83 | ) 84 | 85 | parser.add_argument( 86 | "--n_jobs", type=int, default=None, help="Number of threads to use, by default it uses all cores" 87 | ) 88 | 89 | args = parser.parse_args() 90 | 91 | resample_files(args.input_dir, args.output_sr, args.output_dir, args.file_ext, args.n_jobs) --------------------------------------------------------------------------------