├── .gitignore ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── COMBINE ├── asr1 │ ├── cmd.sh │ ├── combine_cmvn_stats.sh │ ├── combine_train_data.sh │ ├── conf │ │ ├── fbank.yaml │ │ ├── fbank_pitch.yaml │ │ ├── gpu.conf │ │ ├── pbs.conf │ │ ├── queue.conf │ │ └── slurm.conf │ ├── db.sh │ ├── local │ │ └── combine_datasets.py │ ├── multi_tokenize.sh │ ├── path.sh │ ├── run.sh │ └── utils └── tts1 │ ├── cmd.sh │ ├── combine_cmvn_stats.sh │ ├── combine_train_data.sh │ ├── conf │ ├── fbank.yaml │ ├── fbank_pitch.yaml │ ├── gpu.conf │ ├── pbs.conf │ ├── queue.conf │ └── slurm.conf │ ├── db.sh │ ├── local │ └── combine_datasets.py │ ├── multi_tokenize.sh │ ├── path.sh │ ├── run.sh │ └── utils ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── SECURITY.md ├── TEMPLATE ├── asr1 │ ├── asr.sh │ ├── cmd.sh │ ├── conf │ │ ├── fbank.yaml │ │ ├── fbank_pitch.yaml │ │ ├── pbs.conf │ │ ├── queue.conf │ │ └── slurm.conf │ ├── db.sh │ ├── path.sh │ ├── setup.sh │ └── utils └── tts1 │ ├── cmd.sh │ ├── conf │ ├── fbank.yaml │ ├── fbank_pitch.yaml │ ├── pbs.conf │ ├── queue.conf │ └── slurm.conf │ ├── db.sh │ ├── path.sh │ ├── setup.sh │ ├── tts.sh │ └── utils ├── commonvoice └── asr1 │ ├── asr.sh │ ├── cmd.sh │ ├── conf │ ├── fbank.yaml │ ├── fbank_pitch.yaml │ ├── pbs.conf │ ├── queue.conf │ └── slurm.conf │ ├── db.sh │ ├── local │ ├── data.sh │ ├── data_prep.pl │ ├── download_and_untar.sh │ ├── filter_text.py │ ├── reduce_data_dir.sh │ └── split_tr_dt_et.sh │ ├── path.sh │ ├── run.sh │ └── utils ├── docker └── Dockerfile ├── example ├── README.md ├── main.py ├── model.py ├── requirements.txt ├── resources │ ├── fbank.yaml │ ├── fbank_pitch.yaml │ ├── global_cmvn_fbank.ark │ ├── global_cmvn_fbank_pitch.ark │ ├── librispeech_bpe2000.model │ └── tokens.txt └── utils.py ├── fisher └── asr1 │ ├── asr.sh │ ├── cmd.sh │ ├── conf │ ├── fbank.yaml │ ├── fbank_pitch.yaml │ ├── pbs.conf │ ├── queue.conf │ └── slurm.conf │ ├── db.sh │ ├── local │ ├── data.sh │ └── fisher_data_prep.sh │ ├── path.sh │ ├── run.sh │ └── utils ├── librispeech └── asr1 │ ├── asr.sh │ ├── cmd.sh │ ├── conf │ ├── fbank.yaml │ ├── fbank_pitch.yaml │ ├── gpu.conf │ ├── pbs.conf │ ├── queue.conf │ └── slurm.conf │ ├── db.sh │ ├── local │ ├── data.sh │ └── download_and_untar.sh │ ├── path.sh │ ├── run.sh │ └── utils ├── setup.py ├── speech_datasets ├── __init__.py ├── bin │ ├── __init__.py │ ├── apply_cmvn.py │ ├── combine_cmvn_stats.py │ ├── compute_cmvn_stats.py │ ├── dump.py │ ├── feat_to_shape.py │ └── spm_train.py ├── dataloader.py ├── text │ ├── __init__.py │ └── tokenizers.py ├── transform │ ├── README.md │ ├── __init__.py │ ├── add_deltas.py │ ├── cmvn.py │ ├── interface.py │ ├── perturb.py │ ├── sparse_time_warp.py │ ├── spec_augment.py │ ├── spectrogram.py │ └── transformation.py └── utils │ ├── __init__.py │ ├── io_utils.py │ ├── misc.py │ ├── readers.py │ ├── types.py │ └── writers.py ├── swbd └── asr1 │ ├── asr.sh │ ├── cmd.sh │ ├── conf │ ├── fbank.yaml │ ├── fbank_pitch.yaml │ ├── gpu.conf │ ├── pbs.conf │ ├── queue.conf │ └── slurm.conf │ ├── db.sh │ ├── local │ ├── MSU_single_letter.txt │ ├── data.sh │ ├── dict.patch │ ├── eval2000_data_prep.sh │ ├── extend_segments.pl │ ├── format_acronyms_dict.py │ ├── map_acronyms_transcripts.py │ ├── rt03_data_prep.sh │ ├── swbd1_data_prep.sh │ ├── swbd1_fix_speakerid.pl │ ├── swbd1_map_words.pl │ └── swbd1_prepare_dict.sh │ ├── path.sh │ ├── run.sh │ └── utils ├── tools ├── check_install.py ├── install_anaconda.sh ├── install_pkgs.sh └── install_sph2pipe.sh ├── utils ├── apply_cmvn.sh ├── apply_map.pl ├── combine_data.sh ├── compute_cmvn_stats.sh ├── copy_data_dir.sh ├── dump.sh ├── feat_to_shape.sh ├── filter_scp.pl ├── filter_scps.pl ├── fix_data_dir.sh ├── make_absolute.sh ├── parse_options.sh ├── pbs.pl ├── perturb_data_dir_speed.sh ├── queue.pl ├── remove_dup_utts.sh ├── run.pl ├── shuffle_list.pl ├── slurm.pl ├── spk2utt_to_utt2spk.pl ├── split_scp.pl ├── ssh.pl ├── stdout.pl ├── subset_data_dir.sh ├── subset_data_dir_tr_cv.sh ├── subset_scp.pl ├── sym2int.pl ├── utt2spk_to_spk2utt.pl ├── validate_data_dir.sh └── validate_text.pl └── wsj └── asr1 ├── asr.sh ├── cmd.sh ├── conf ├── fbank.yaml ├── fbank_pitch.yaml ├── gpu.conf ├── pbs.conf ├── queue.conf └── slurm.conf ├── db.sh ├── local ├── data.sh ├── find_transcripts.pl ├── flist2scp.pl ├── ndx2flist.pl ├── normalize_transcript.pl ├── wsj_data_prep.sh └── wsj_format_data.sh ├── path.sh ├── run.sh └── utils /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | .DS_Store 4 | .idea/ 5 | cmake-build-debug/ 6 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | # Comment line immediately above ownership line is reserved for related gus information. Please be careful while editing. 2 | #ECCN:Open Source 3 | -------------------------------------------------------------------------------- /COMBINE/asr1/cmd.sh: -------------------------------------------------------------------------------- 1 | ../../TEMPLATE/asr1/cmd.sh -------------------------------------------------------------------------------- /COMBINE/asr1/combine_cmvn_stats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Set bash to 'debug' mode, it will exit on : 3 | # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', 4 | set -e 5 | set -u 6 | set -o pipefail 7 | 8 | log() { 9 | local fname=${BASH_SOURCE[1]##*/} 10 | echo -e "$(date '+%Y-%m-%dT%H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" 11 | } 12 | 13 | feats_type=fbank # fbank or fbank_pitch are valid 14 | cmvn_type=global # global or speaker or utterance are valid 15 | 16 | help_message=$(cat </ / / ... 18 | 19 | Combines CMVN stats for the specified dataset splits (pre-computed by Stage 5 run.sh for each dataset split specified) 20 | into a single file. 21 | 22 | Options: 23 | --feats_type # Feature type (fbank or fbank_pitch) (default=${feats_type}). 24 | --cmvn_type # Type of CMVN stats to compute (global or speaker or utterance) (default=${cmvn_type}). 25 | EOF 26 | ) 27 | 28 | 29 | . ./path.sh || exit 1 30 | . ./cmd.sh || exit 1 31 | 32 | log "$0 $*" 33 | . utils/parse_options.sh || exit 1 34 | if [ $# -eq 0 ]; then 35 | log "${help_message}" 36 | log "Error: Please specify dataset splits as positional arguments." 37 | exit 2 38 | fi 39 | 40 | workspace=$PWD 41 | task=$(basename "$(utils/make_absolute.sh "$workspace")") 42 | 43 | # Get CMVN's from all the relevant dataset splits 44 | cmvns= 45 | for dset in "$@"; do 46 | base=$(echo ${dset} | sed -E "s/\/.*//g") 47 | split=$(echo ${dset} | sed -E "s/.*\///g") 48 | base_dir="${MAIN_ROOT}/${base}/${task}" 49 | dset_dir="${base_dir}/dump/${feats_type}"/${split} 50 | cmvn="${dset_dir}/${cmvn_type}_cmvn.ark" 51 | 52 | if [ ! -d ${base_dir} ]; then 53 | log "${base} is not a valid dataset for task ${task//1/}" 54 | exit 1 55 | elif [ "${base}" = "${dset}" ]; then 56 | log "Expected dataset to specified as /, but got ${dset}" 57 | exit 1 58 | elif [ ! -d ${dset_dir} ]; then 59 | log "Either ${split} is not a valid split for dataset ${base}, or" 60 | log "${base_dir}/run.sh has not yet been run with feats_type=${feats_type}" 61 | exit 1 62 | elif [ ! -f ${cmvn} ]; then 63 | log "${cmvn_type} CMVN statistics have not been computed for feats_type=${feats_type} for data split ${dset}." 64 | log "Please run stage 5 of ${base_dir}/${task}/run.sh." 65 | exit 1 66 | fi 67 | cmvns+="${cmvn} " 68 | done 69 | 70 | # Combine CMVN's 71 | combo_idx=$(python3 local/combine_datasets.py --task "${task//1/}" --write_dir false "$@") 72 | dumpdir="dump/${feats_type}/no_short/${combo_idx}" 73 | mkdir -p "${dumpdir}" 74 | python3 -m speech_datasets.bin.combine_cmvn_stats --cmvn_type ${cmvn_type} \ 75 | --output_file "${dumpdir}/${cmvn_type}_cmvn.ark" ${cmvns} 76 | -------------------------------------------------------------------------------- /COMBINE/asr1/combine_train_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | log() { 4 | local fname=${BASH_SOURCE[1]##*/} 5 | echo -e "$(date '+%Y-%m-%dT%H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" 6 | } 7 | 8 | help_message="Usage: $0 [asr.sh options] / / / ..." 9 | 10 | log "$0 $*" 11 | if [ $# -eq 0 ]; then 12 | log "$help_message" 13 | log "Error: at least 1 argument required" 14 | exit 2 15 | fi 16 | 17 | kwargs=() 18 | stage=2 19 | stop_stage=5 20 | while true; do 21 | case "$1" in 22 | --stage) 23 | if [ "$2" -lt 2 ]; then 24 | log "Specify --stage 2 or higher (got --stage $2)." 25 | log "We expect stage 1 to be complete for all datasets given." 26 | exit 2 27 | else 28 | stage=$2 29 | fi 30 | shift 2 31 | ;; 32 | --stop-stage|--stop_stage) 33 | if [ "$2" -gt 5 ]; then 34 | log "Specify --stop-stage 5 or lower (got --stop-stage $2)." 35 | log "Use combine_cmvn_stats.sh to combine CMVN statistics from multiple datasets (stage 5)." 36 | log "Use multi_tokenize.sh to obtain token inventories from multiple datasets (stages 6-7)." 37 | exit 2 38 | else 39 | stop_stage=$2 40 | fi 41 | shift 2 42 | ;; 43 | --*) kwargs+=( "$1" "$2" ); shift 2; ;; 44 | *) break; 45 | esac 46 | done 47 | kwargs+=( --stage "$stage" --stop_stage "$stop_stage" ) 48 | 49 | if [ $# -eq 0 ]; then 50 | log "${help_message}" 51 | log "Error: Please specify dataset splits as positional arguments." 52 | exit 2 53 | fi 54 | 55 | task=$(basename "$(utils/make_absolute.sh "$PWD")") 56 | idx=$(python local/combine_datasets.py --task "${task//1/}" --write_dir true "$@") 57 | datadir="data/${idx}" 58 | for f in wav.scp segments utt2spk text; do 59 | sort "${datadir}/${f}" > "${datadir}/${f}.tmp" 60 | mv "${datadir}/${f}.tmp" "${datadir}/${f}" 61 | done 62 | ./run.sh "${kwargs[@]}" --train_sets "${idx}" 63 | -------------------------------------------------------------------------------- /COMBINE/asr1/conf/fbank.yaml: -------------------------------------------------------------------------------- 1 | - type: fbank 2 | num_mel_bins: 80 3 | sample_frequency: 16000 4 | -------------------------------------------------------------------------------- /COMBINE/asr1/conf/fbank_pitch.yaml: -------------------------------------------------------------------------------- 1 | - type: fbank_pitch 2 | num_mel_bins: 80 3 | sample_frequency: 16000 4 | -------------------------------------------------------------------------------- /COMBINE/asr1/conf/gpu.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* 3 | option mem=* -l mem_free=$0,ram_free=$0 4 | option mem=0 # Do not add anything to qsub_opts 5 | option num_threads=* -pe smp $0 6 | option num_threads=1 # Do not add anything to qsub_opts 7 | option max_jobs_run=* -tc $0 8 | default gpu=0 9 | option gpu=0 10 | option gpu=* -l 'hostname=b1[12345678]*|c*,gpu=$0' -q g.q -------------------------------------------------------------------------------- /COMBINE/asr1/conf/pbs.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command qsub -V -v PATH -S /bin/bash 3 | option name=* -N $0 4 | option mem=* -l mem=$0 5 | option mem=0 # Do not add anything to qsub_opts 6 | option num_threads=* -l ncpus=$0 7 | option num_threads=1 # Do not add anything to qsub_opts 8 | option num_nodes=* -l nodes=$0:ppn=1 9 | default gpu=0 10 | option gpu=0 11 | option gpu=* -l ngpus=$0 12 | -------------------------------------------------------------------------------- /COMBINE/asr1/conf/queue.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* 3 | option name=* -N $0 4 | option mem=* -l mem_free=$0,ram_free=$0 5 | option mem=0 # Do not add anything to qsub_opts 6 | option num_threads=* -pe smp $0 7 | option num_threads=1 # Do not add anything to qsub_opts 8 | option max_jobs_run=* -tc $0 9 | option num_nodes=* -pe mpi $0 # You must set this PE as allocation_rule=1 10 | default gpu=0 11 | option gpu=0 12 | option gpu=* -l gpu=$0 -q g.q 13 | -------------------------------------------------------------------------------- /COMBINE/asr1/conf/slurm.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command sbatch --export=PATH 3 | option name=* --job-name $0 4 | option time=* --time $0 5 | option mem=* --mem-per-cpu $0 6 | option mem=0 # Do not add anything to qsub_opts 7 | option num_threads=* --cpus-per-task $0 8 | option num_threads=1 --cpus-per-task 1 9 | option num_nodes=* --nodes $0 10 | default gpu=0 11 | option gpu=0 -p cpu 12 | option gpu=* -p gpu --gres=gpu:$0 13 | # note: the --max-jobs-run option is supported as a special case 14 | # by slurm.pl and you don't have to handle it in the config file. 15 | -------------------------------------------------------------------------------- /COMBINE/asr1/db.sh: -------------------------------------------------------------------------------- 1 | ../../TEMPLATE/asr1/db.sh -------------------------------------------------------------------------------- /COMBINE/asr1/local/combine_datasets.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import shutil 4 | 5 | from speech_datasets.utils import get_root 6 | from speech_datasets.utils.io_utils import get_combo_idx 7 | from speech_datasets.utils.types import str2bool 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--task", type=str, choices=["asr", "tts"]) 13 | parser.add_argument("--write_dir", type=str2bool, default=True) 14 | parser.add_argument("datasets", nargs="+", type=str) 15 | args = parser.parse_args() 16 | 17 | # Ensure that all datasets are specified as / 18 | datasets = sorted(set(args.datasets)) 19 | dataset_splits = [d.split("/", maxsplit=1) for d in datasets] 20 | assert all(len(d) == 2 for d in dataset_splits), \ 21 | f"All datasets must be specified as /, but got " \ 22 | f"{datasets} instead" 23 | 24 | # Verify that all datasets have been prepared 25 | dataset_dirs = [os.path.join(get_root(), ds[0], f"{args.task}1", "data", ds[1]) 26 | for ds in dataset_splits] 27 | assert all(os.path.isdir(d) for d in dataset_dirs), \ 28 | f"Please make sure that all dataset splits are valid, and that all " \ 29 | f"datasets you wish to combine have already been prepared by stage 1 " \ 30 | f"of {args.task}.sh" 31 | 32 | # Get the index of this dataset combination (add to the registry if needed) 33 | idx = get_combo_idx(datasets, args.task) 34 | data_dir = os.path.join(get_root(), "COMBINE", f"{args.task}1", "data") 35 | if idx < 0: 36 | os.makedirs(data_dir, exist_ok=True) 37 | with open(os.path.join(data_dir, "registry.txt"), "a") as f: 38 | f.write(" ".join(datasets) + "\n") 39 | idx = get_combo_idx(datasets, args.task) 40 | 41 | if not args.write_dir: 42 | return idx 43 | 44 | # Create a directory for this dataset combo & prepare it 45 | dirname = os.path.join(data_dir, str(idx)) 46 | os.makedirs(dirname, exist_ok=True) 47 | write_segments = any(os.path.isfile(os.path.join(d, "segments")) 48 | for d in dataset_dirs) 49 | with open(os.path.join(dirname, "wav.scp"), "wb") as wav, \ 50 | open(os.path.join(dirname, "text"), "wb") as text, \ 51 | open(os.path.join(dirname, "utt2spk"), "wb") as utt2spk, \ 52 | open(os.path.join(dirname, "segments"), "w") as segments: 53 | for d in dataset_dirs: 54 | 55 | # wav.scp, text, and utt2spk can just be concatenated on 56 | with open(os.path.join(d, "wav.scp"), "rb") as src_wav: 57 | shutil.copyfileobj(src_wav, wav) 58 | with open(os.path.join(d, "text"), "rb") as src_text: 59 | shutil.copyfileobj(src_text, text) 60 | with open(os.path.join(d, "utt2spk"), "rb") as src_utt2spk: 61 | shutil.copyfileobj(src_utt2spk, utt2spk) 62 | 63 | if write_segments: 64 | # If a segments file exists, we can just concatenate it on 65 | if os.path.isfile(os.path.join(d, "segments")): 66 | with open(os.path.join(d, "segments"), "r") as src_segments: 67 | shutil.copyfileobj(src_segments, segments) 68 | 69 | # Otherwise, we need to use wav.scp to create a dummy segments 70 | # line format is 71 | # = 0, = -1 means use the whole recording 72 | else: 73 | with open(os.path.join(d, "wav.scp"), "r") as src_wav: 74 | for line in src_wav: 75 | utt_id, _ = line.rstrip().split(None, maxsplit=1) 76 | segments.write(f"{utt_id} {utt_id} 0.0 -1.0\n") 77 | 78 | return idx 79 | 80 | 81 | if __name__ == "__main__": 82 | combo_idx = main() 83 | print(combo_idx) 84 | -------------------------------------------------------------------------------- /COMBINE/asr1/multi_tokenize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Set bash to 'debug' mode, it will exit on : 3 | # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', 4 | set -e 5 | set -u 6 | set -o pipefail 7 | 8 | log() { 9 | local fname=${BASH_SOURCE[1]##*/} 10 | echo -e "$(date '+%Y-%m-%dT%H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" 11 | } 12 | 13 | # Tokenization related options from asr.sh 14 | token_type=bpe # Tokenization type (char or bpe). 15 | n_tokens=2000 # The number of BPE vocabulary. 16 | nlsyms="" # non-linguistic symbols list, separated by a comma 17 | 18 | help_message=$(cat < ... 20 | 21 | Produces a token inventory of the given type for all the datasets provided. 22 | 23 | Options: 24 | --token_type # Tokenization type (char or bpe, default="${token_type}"). 25 | --n_tokens # The maximum number of tokens allowed (default="${n_tokens}"). 26 | --nlsyms # Non-linguistic symbol list for BPE/char, separated by a comma. (default="${nlsyms}"). 27 | EOF 28 | ) 29 | 30 | . ./path.sh || exit 1 31 | . ./cmd.sh || exit 1 32 | 33 | log "$0 $*" 34 | . utils/parse_options.sh || exit 1 35 | if [ $# -eq 0 ]; then 36 | log "${help_message}" 37 | log "Error: Please specify datasets as positional arguments." 38 | exit 2 39 | fi 40 | 41 | workspace=$PWD 42 | task=$(basename "$(utils/make_absolute.sh "$workspace")") 43 | run_args="--token-type ${token_type} --n_tokens ${n_tokens} --nlsyms ${nlsyms} " 44 | 45 | # Compile srctexts from all the relevant datasets 46 | srctexts= 47 | for dset in "$@"; do 48 | log "Concatenating all source texts from dataset $dset..." 49 | dset_dir="${MAIN_ROOT}/${dset}/${task}" 50 | cd ${dset_dir} 51 | ./run.sh --stage 6 --stop-stage 6 ${run_args} 52 | cd ${workspace} 53 | srctexts+="${dset_dir}/dump/srctexts " 54 | echo "" 55 | done 56 | 57 | # Concatenate all the relevant text data & prepare a token inventory 58 | log "Concatenating all source texts from all datasets..." 59 | mkdir -p dump data 60 | cat $srctexts > dump/srctexts 61 | ./run.sh --stage 7 --stop-stage 7 ${run_args} 62 | 63 | -------------------------------------------------------------------------------- /COMBINE/asr1/path.sh: -------------------------------------------------------------------------------- 1 | ../../TEMPLATE/asr1/path.sh -------------------------------------------------------------------------------- /COMBINE/asr1/run.sh: -------------------------------------------------------------------------------- 1 | ../../TEMPLATE/asr1/asr.sh -------------------------------------------------------------------------------- /COMBINE/asr1/utils: -------------------------------------------------------------------------------- 1 | ../../TEMPLATE/asr1/utils -------------------------------------------------------------------------------- /COMBINE/tts1/cmd.sh: -------------------------------------------------------------------------------- 1 | ../asr1/cmd.sh -------------------------------------------------------------------------------- /COMBINE/tts1/combine_cmvn_stats.sh: -------------------------------------------------------------------------------- 1 | ../asr1/combine_cmvn_stats.sh -------------------------------------------------------------------------------- /COMBINE/tts1/combine_train_data.sh: -------------------------------------------------------------------------------- 1 | ../asr1/combine_train_data.sh -------------------------------------------------------------------------------- /COMBINE/tts1/conf/fbank.yaml: -------------------------------------------------------------------------------- 1 | - type: fbank 2 | num_mel_bins: 80 3 | sample_frequency: 16000 4 | -------------------------------------------------------------------------------- /COMBINE/tts1/conf/fbank_pitch.yaml: -------------------------------------------------------------------------------- 1 | - type: fbank_pitch 2 | num_mel_bins: 80 3 | sample_frequency: 16000 4 | -------------------------------------------------------------------------------- /COMBINE/tts1/conf/gpu.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* 3 | option mem=* -l mem_free=$0,ram_free=$0 4 | option mem=0 # Do not add anything to qsub_opts 5 | option num_threads=* -pe smp $0 6 | option num_threads=1 # Do not add anything to qsub_opts 7 | option max_jobs_run=* -tc $0 8 | default gpu=0 9 | option gpu=0 10 | option gpu=* -l 'hostname=b1[12345678]*|c*,gpu=$0' -q g.q -------------------------------------------------------------------------------- /COMBINE/tts1/conf/pbs.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command qsub -V -v PATH -S /bin/bash 3 | option name=* -N $0 4 | option mem=* -l mem=$0 5 | option mem=0 # Do not add anything to qsub_opts 6 | option num_threads=* -l ncpus=$0 7 | option num_threads=1 # Do not add anything to qsub_opts 8 | option num_nodes=* -l nodes=$0:ppn=1 9 | default gpu=0 10 | option gpu=0 11 | option gpu=* -l ngpus=$0 12 | -------------------------------------------------------------------------------- /COMBINE/tts1/conf/queue.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64* 3 | option name=* -N $0 4 | option mem=* -l mem_free=$0,ram_free=$0 5 | option mem=0 # Do not add anything to qsub_opts 6 | option num_threads=* -pe smp $0 7 | option num_threads=1 # Do not add anything to qsub_opts 8 | option max_jobs_run=* -tc $0 9 | option num_nodes=* -pe mpi $0 # You must set this PE as allocation_rule=1 10 | default gpu=0 11 | option gpu=0 12 | option gpu=* -l gpu=$0 -q g.q 13 | -------------------------------------------------------------------------------- /COMBINE/tts1/conf/slurm.conf: -------------------------------------------------------------------------------- 1 | # Default configuration 2 | command sbatch --export=PATH 3 | option name=* --job-name $0 4 | option time=* --time $0 5 | option mem=* --mem-per-cpu $0 6 | option mem=0 # Do not add anything to qsub_opts 7 | option num_threads=* --cpus-per-task $0 8 | option num_threads=1 --cpus-per-task 1 9 | option num_nodes=* --nodes $0 10 | default gpu=0 11 | option gpu=0 -p cpu 12 | option gpu=* -p gpu --gres=gpu:$0 13 | # note: the --max-jobs-run option is supported as a special case 14 | # by slurm.pl and you don't have to handle it in the config file. 15 | -------------------------------------------------------------------------------- /COMBINE/tts1/db.sh: -------------------------------------------------------------------------------- 1 | ../asr1/db.sh -------------------------------------------------------------------------------- /COMBINE/tts1/local/combine_datasets.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import shutil 4 | 5 | from speech_datasets.utils import get_root 6 | from speech_datasets.utils.io_utils import get_combo_idx 7 | from speech_datasets.utils.types import str2bool 8 | 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--task", type=str, choices=["asr", "tts"]) 13 | parser.add_argument("--write_dir", type=str2bool, default=True) 14 | parser.add_argument("datasets", nargs="+", type=str) 15 | args = parser.parse_args() 16 | 17 | # Ensure that all datasets are specified as / 18 | datasets = sorted(set(args.datasets)) 19 | dataset_splits = [d.split("/", maxsplit=1) for d in datasets] 20 | assert all(len(d) == 2 for d in dataset_splits), \ 21 | f"All datasets must be specified as /, but got " \ 22 | f"{datasets} instead" 23 | 24 | # Verify that all datasets have been prepared 25 | dataset_dirs = [os.path.join(get_root(), ds[0], f"{args.task}1", "data", ds[1]) 26 | for ds in dataset_splits] 27 | assert all(os.path.isdir(d) for d in dataset_dirs), \ 28 | f"Please make sure that all dataset splits are valid, and that all " \ 29 | f"datasets you wish to combine have already been prepared by stage 1 " \ 30 | f"of {args.task}.sh" 31 | 32 | # Get the index of this dataset combination (add to the registry if needed) 33 | idx = get_combo_idx(datasets, args.task) 34 | data_dir = os.path.join(get_root(), "COMBINE", f"{args.task}1", "data") 35 | if idx < 0: 36 | os.makedirs(data_dir, exist_ok=True) 37 | with open(os.path.join(data_dir, "registry.txt"), "a") as f: 38 | f.write(" ".join(datasets) + "\n") 39 | idx = get_combo_idx(datasets, args.task) 40 | 41 | if not args.write_dir: 42 | return idx 43 | 44 | # Create a directory for this dataset combo & prepare it 45 | dirname = os.path.join(data_dir, str(idx)) 46 | os.makedirs(dirname, exist_ok=True) 47 | write_segments = any(os.path.isfile(os.path.join(d, "segments")) 48 | for d in dataset_dirs) 49 | with open(os.path.join(dirname, "wav.scp"), "wb") as wav, \ 50 | open(os.path.join(dirname, "text"), "wb") as text, \ 51 | open(os.path.join(dirname, "utt2spk"), "wb") as utt2spk, \ 52 | open(os.path.join(dirname, "segments"), "w") as segments: 53 | for d in dataset_dirs: 54 | 55 | # wav.scp, text, and utt2spk can just be concatenated on 56 | with open(os.path.join(d, "wav.scp"), "rb") as src_wav: 57 | shutil.copyfileobj(src_wav, wav) 58 | with open(os.path.join(d, "text"), "rb") as src_text: 59 | shutil.copyfileobj(src_text, text) 60 | with open(os.path.join(d, "utt2spk"), "rb") as src_utt2spk: 61 | shutil.copyfileobj(src_utt2spk, utt2spk) 62 | 63 | if write_segments: 64 | # If a segments file exists, we can just concatenate it on 65 | if os.path.isfile(os.path.join(d, "segments")): 66 | with open(os.path.join(d, "segments"), "r") as src_segments: 67 | shutil.copyfileobj(src_segments, segments) 68 | 69 | # Otherwise, we need to use wav.scp to create a dummy segments 70 | # line format is 71 | # = 0, = -1 means use the whole recording 72 | else: 73 | with open(os.path.join(d, "wav.scp"), "r") as src_wav: 74 | for line in src_wav: 75 | utt_id, _ = line.rstrip().split(None, maxsplit=1) 76 | segments.write(f"{utt_id} {utt_id} 0.0 -1.0\n") 77 | 78 | return idx 79 | 80 | 81 | if __name__ == "__main__": 82 | combo_idx = main() 83 | print(combo_idx) 84 | -------------------------------------------------------------------------------- /COMBINE/tts1/multi_tokenize.sh: -------------------------------------------------------------------------------- 1 | ../asr1/multi_tokenize.sh -------------------------------------------------------------------------------- /COMBINE/tts1/path.sh: -------------------------------------------------------------------------------- 1 | ../asr1/path.sh -------------------------------------------------------------------------------- /COMBINE/tts1/run.sh: -------------------------------------------------------------------------------- 1 | ../asr1/run.sh -------------------------------------------------------------------------------- /COMBINE/tts1/utils: -------------------------------------------------------------------------------- 1 | ../asr1/utils -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Use shell /bin/bash instead of /bin/sh so the source command can be used 2 | SHELL := /bin/bash 3 | # Use the default conda unless a specific install is specified. If there is 4 | # no conda, we will download a fresh one and use it to set up the virtual env. 5 | CONDA := 6 | VENV_NAME := datasets 7 | # The python version installed in the conda setup 8 | PYTHON_VERSION := 3.7.9 9 | # PyTorch version: 1.2.0, 1.3.0, 1.3.1, 1.4.0, 1.5.0, 1.5.1 (>= 1.2.0 required) 10 | # 1.5.0 and later do not work with PyKaldi... 11 | TORCH_VERSION := 1.4.0 12 | 13 | ifeq ($(CONDA),) 14 | CONDA := $(shell which conda) 15 | endif 16 | ifeq ($(TORCH_VERSION),) 17 | pytorch := pytorch 18 | else 19 | pytorch := pytorch=$(TORCH_VERSION) 20 | endif 21 | 22 | ifneq ($(shell which nvidia-smi),) # 'nvcc' found 23 | CUDA_VERSION := $(shell nvcc --version | grep "release" | sed -E "s/.*release ([0-9.]*).*/\1/") 24 | CONDA_PYTORCH := $(pytorch) cudatoolkit=$(CUDA_VERSION) -c pytorch 25 | else 26 | CUDA_VERSION := 27 | CONDA_PYTORCH := $(pytorch) cpuonly -c pytorch 28 | endif 29 | # Install CPU version of PyKaldi, so we can run feature extraction on CPU while training on GPU 30 | CONDA_PYKALDI := -c pykaldi pykaldi-cpu 31 | 32 | .PHONY: all clean 33 | 34 | all: conda sph2pipe check_install example 35 | 36 | tools/conda.done: 37 | # Only install PyTorch if the PyTorch version is non-empty 38 | tools/install_anaconda.sh $(PYTHON_VERSION) "$(CONDA)" tools/venv $(VENV_NAME) . "$(CONDA_PYTORCH)" "$(CONDA_PYKALDI)" 39 | @echo $(VENV_NAME) > tools/conda.done 40 | 41 | conda: tools/conda.done 42 | 43 | tools/sph2pipe.done: 44 | tools/install_sph2pipe.sh tools 45 | touch tools/sph2pipe.done 46 | 47 | sph2pipe: tools/sph2pipe.done 48 | 49 | check_install: conda 50 | ifneq ($(strip $(CUDA_VERSION)),) 51 | source tools/venv/etc/profile.d/conda.sh && conda deactivate && conda activate $(shell cat tools/conda.done) && python tools/check_install.py 52 | else 53 | source tools/venv/etc/profile.d/conda.sh && conda deactivate && conda activate $(shell cat tools/conda.done) && python tools/check_install.py --no-cuda 54 | endif 55 | 56 | example: conda 57 | source tools/venv/etc/profile.d/conda.sh && conda deactivate && conda activate $(shell cat tools/conda.done) && pip install -r example/requirements.txt 58 | 59 | clean: clean_conda 60 | rm -rf tools/*.done 61 | 62 | clean_conda: 63 | rm -rf *.egg-info 64 | rm -rf tools/venv 65 | rm -f tools/miniconda.sh 66 | find . -iname "*.pyc" -delete 67 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | ## Security 2 | 3 | Please report any security issue to [security@salesforce.com](mailto:security@salesforce.com) 4 | as soon as it is discovered. This library limits its runtime dependencies in 5 | order to reduce the total cost of ownership as much as can be, but all consumers 6 | should remain vigilant and have their security stakeholders review all third-party 7 | products (3PP) like this one and their dependencies. 8 | -------------------------------------------------------------------------------- /TEMPLATE/asr1/cmd.sh: -------------------------------------------------------------------------------- 1 | # ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ====== 2 | # Usage: .pl [options] JOB=1: 3 | # e.g. 4 | # run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB 5 | # 6 | # Options: 7 | # --time