├── README.md ├── move_files.py ├── reorganize_and_archive.py ├── download_cv_split.py ├── generate_datasets.py ├── dataset_script.py └── cv-corpus-13.0-2023-03-09.json /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | duplicated_from: anton-l/common_voice_generator 3 | --- 4 | ## Common voice release generator 5 | 6 | 1. Copy the latest release id from the `RELEASES` dict in https://github.com/common-voice/common-voice/blob/main/web/src/components/pages/datasets/releases.ts 7 | to the `VERSIONS` variable in `generate_datasets.py`. 8 | 2. Copy the languages from https://github.com/common-voice/common-voice/blob/release-v1.78.0/web/locales/en/messages.ftl 9 | (replacing `release-v1.78.0` with the latest version tag) to the `languages.ftl` file. 10 | 3. Run `python generate_datasets.py` to generate the dataset repos. 11 | 4. `cd ..` 12 | 5. `huggingface-cli repo create --type dataset --organization mozilla-foundation common_voice_11_0` 13 | 6. `git clone https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0` 14 | 7. `cd common_voice_11_0` 15 | 8. `cp ../common_voice_generator/common_voice_11_0/* ./` 16 | 9. `git add . && git commit -m "Release" && git push` -------------------------------------------------------------------------------- /move_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import pandas 4 | from tqdm import tqdm 5 | 6 | # To change according to version and language 7 | # --------- 8 | lang = "ab" 9 | clip_path = f"/home/vaibhav_huggingface_co/common_voice_dataset_generator/data/{lang}/cv-corpus-12.0-2022-12-07/{lang}/clips" 10 | # --------- 11 | 12 | splits = ("test", "dev", "train", "other", "invalidated", "validated") 13 | for split in splits: 14 | data = pandas.read_csv(f"/home/vaibhav_huggingface_co/common_voice_dataset_generator/data/{lang}/cv-corpus-12.0-2022-12-07/{lang}/{split}.tsv", sep='\t') 15 | all_files = [os.path.join(clip_path, f) for f in list(data["path"])] 16 | 17 | # nums = [f.split("_")[-1].split(".mp3")[0] for f in all_files] 18 | # nums = [int(s) for s in nums] 19 | 20 | num_files = len(all_files) 21 | files_per_archive = 4_000 22 | 23 | print(f"Moving {num_files} files...") 24 | 25 | # max_num = max([int(s) for s in nums]) 26 | # num_per_dir = 1_000_000 27 | dir_path = "{lang}_{split}_{idx}" 28 | new_clip_path = f"./audio/{lang}/{split}" 29 | 30 | for start_idx in tqdm(range(0, num_files, files_per_archive), desc="moving files"): 31 | target_dir = os.path.join(new_clip_path, dir_path.format(lang=lang, split=split, idx=start_idx)) 32 | command = f"mkdir -p {target_dir}" 33 | print(command) 34 | os.system(command) 35 | curr_archive_files = all_files[start_idx:start_idx+files_per_archive] 36 | for file in curr_archive_files: 37 | command = f"mv {os.path.join(clip_path, file)} {os.path.join(target_dir, file)}" 38 | os.system(command) 39 | 40 | all_dirs = [d for d in os.listdir(new_clip_path) if os.path.isdir(os.path.join(new_clip_path, d))] 41 | for directory in tqdm(all_dirs, desc="taring files"): 42 | command = f"tar -cvf {new_clip_path}/{directory}.tar {new_clip_path}/{directory}" 43 | print(command) 44 | os.system(command) 45 | -------------------------------------------------------------------------------- /reorganize_and_archive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import logging 4 | import pandas 5 | from tqdm import tqdm 6 | import tarfile 7 | import logging 8 | import sys 9 | import json 10 | from pathlib import Path 11 | import csv 12 | from functools import partial 13 | from multiprocessing import Pool 14 | 15 | logging.basicConfig( 16 | format='%(asctime)s %(levelname)s: %(message)s', 17 | level=logging.INFO, 18 | handlers=[ 19 | logging.FileHandler("cv13_org.log"), 20 | logging.StreamHandler(sys.stdout) 21 | ] 22 | ) 23 | 24 | 25 | files_per_archive = 40_000 26 | 27 | 28 | def make_archive(archive_index_with_files, output_dir, lang, split): 29 | archive_index, files = archive_index_with_files 30 | archive_dir = f"{lang}_{split}_{archive_index}" 31 | archive_path = os.path.join(output_dir, f"{archive_dir}.tar") 32 | with tarfile.open(archive_path, "w") as tar: 33 | for file in files: 34 | _, filename = os.path.split(file) 35 | tar.add(file, arcname=os.path.join(archive_dir, filename)) 36 | 37 | 38 | def extract_archive(archive_path, target_dir): 39 | with tarfile.open(archive_path, 'r:gz') as f: 40 | f.extractall(path=target_dir) 41 | 42 | def main(): 43 | with open("cv-corpus-13.0-2023-03-09.json", "r") as f: 44 | langs = list(json.load(f)["locales"].keys()) 45 | 46 | for lang in tqdm(langs, desc="languages"): 47 | 48 | logging.info(f"Starting language: {lang}") 49 | 50 | clip_path = f"/home/vaibhav_huggingface_co/common_voice_dataset_generator/data/{lang}/cv-corpus-13.0-2023-03-09/{lang}/clips" 51 | 52 | splits = ("test", "dev", "train", "other", "invalidated") 53 | 54 | for split in splits: 55 | meta_path = f"/home/vaibhav_huggingface_co/common_voice_dataset_generator/data/{lang}/cv-corpus-13.0-2023-03-09/{lang}/{split}.tsv" 56 | new_meta_dir = f"repos/common_voice_13_0/transcript/{lang}/" 57 | Path(new_meta_dir).mkdir(parents=True, exist_ok=True) 58 | 59 | data = pandas.read_csv(meta_path, sep='\t', quoting=csv.QUOTE_NONE, low_memory=False) 60 | copy_command = f"cp {meta_path} {new_meta_dir}" 61 | os.system(copy_command) 62 | 63 | all_files = [os.path.join(clip_path, filename) for filename in list(data["path"])] 64 | 65 | num_files = len(all_files) 66 | if num_files == 0: 67 | continue 68 | 69 | logging.info(f"split: {split.upper()}, num_files: {num_files}") 70 | 71 | new_clip_path = f"repos/common_voice_13_0/audio/{lang}/{split}" 72 | Path(new_clip_path).mkdir(parents=True, exist_ok=True) 73 | 74 | file_groups = [ 75 | (arch_index_in_dir, all_files[start_index:start_index + files_per_archive]) 76 | for arch_index_in_dir, start_index in enumerate(range(0, num_files, files_per_archive)) 77 | ] 78 | 79 | n_file_groups = len(file_groups) 80 | num_procs = max(1, min(n_file_groups, 26)) 81 | logging.info(f"N groups: {n_file_groups}, num procs: {num_procs}") 82 | 83 | if n_file_groups > 1: 84 | pool = Pool(num_procs) 85 | pool.map( 86 | partial( 87 | make_archive, 88 | output_dir=new_clip_path, 89 | lang=lang, 90 | split=split, 91 | ), 92 | tqdm(file_groups, desc=f"Taring {split} subset...", position=0), 93 | ) 94 | else: 95 | make_archive( 96 | file_groups[0], 97 | output_dir=new_clip_path, 98 | lang=lang, 99 | split=split, 100 | ) 101 | 102 | logging.info(f"Done with language: {lang}") 103 | 104 | 105 | if __name__ == "__main__": 106 | main() 107 | -------------------------------------------------------------------------------- /download_cv_split.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import sys 3 | import requests 4 | import os 5 | import logging 6 | import shutil 7 | import json 8 | from tqdm import tqdm 9 | import time 10 | from pathlib import Path 11 | from datasets.download import DownloadConfig, DownloadManager 12 | 13 | 14 | logging.basicConfig( 15 | format='%(asctime)s %(levelname)s: %(message)s', 16 | level=logging.INFO, 17 | handlers=[ 18 | logging.FileHandler("cv13_download.log"), 19 | logging.StreamHandler(sys.stdout) 20 | ] 21 | ) 22 | 23 | #Step 1: Update the BUNDLE URL -> You can get this by trying to manually download a split and looking for the download URL. 24 | _BUNDLE_URL_TEMPLATE_DELTA = 'cv-corpus-13.0-2023-03-09/cv-corpus-13.0-2023-03-09-{locale}.tar.gz' 25 | _BUNDLE_VERSION = _BUNDLE_URL_TEMPLATE_DELTA.split("/")[0] 26 | _API_URL = "https://commonvoice.mozilla.org/api/v1" 27 | 28 | #Step 2: Place the path to the CV release JSON from https://github.com/common-voice/cv-dataset/tree/main/datasets 29 | _CV_DATASET_RELEASE_JSON = "cv-corpus-13.0-2023-03-09.json" 30 | 31 | def _get_bundle_url(locale, url_template): 32 | path = url_template.replace("{locale}", locale) 33 | path = urllib.parse.quote(path.encode("utf-8"), safe="~()*!.'") 34 | response = requests.get(f"{_API_URL}/bucket/dataset/{path}", timeout=10.0).json() 35 | return response["url"] 36 | 37 | 38 | def _log_download(locale, bundle_version): 39 | email = "vaibhav@huggingface.co" 40 | payload = {"email": email, "locale": locale, "dataset": bundle_version} 41 | requests.post(f"{_API_URL}/{locale}/downloaders", json=payload).json() 42 | 43 | 44 | def download_language(dl_manager, lang, root_dir): 45 | _log_download(lang, _BUNDLE_VERSION) 46 | url = _get_bundle_url(lang, _BUNDLE_URL_TEMPLATE_DELTA) 47 | i = 1 48 | while url == "https://s3.dualstack.us-west-2.amazonaws.com/": 49 | if i == 6: 50 | raise ConnectionError(f"Cannot download '{lang.upper()}' data, fetched url: {url}. ") 51 | i += 1 52 | logging.warning(f"Unsuccessful attempt to fetch data url. Trying {i} time. ") 53 | time.sleep(15) 54 | _log_download(lang, _BUNDLE_VERSION) 55 | url = _get_bundle_url(lang, _BUNDLE_URL_TEMPLATE_DELTA) 56 | 57 | logging.info(f"Trying to download data for '{lang.upper()}'... ") 58 | path = dl_manager.download_and_extract(url) 59 | if os.path.isdir(path): 60 | logging.info(f"'{lang.upper()}' data downloaded to {path}. ") 61 | shutil.move(path, root_dir / f"data/{lang}") 62 | else: # if it's not a dir, there was no data update in the release 63 | logging.info(f"No data for '{lang.upper()}' found. ") 64 | 65 | 66 | def main(): 67 | root_dir = Path("") 68 | with open(_CV_DATASET_RELEASE_JSON, "r") as f: 69 | languages = json.load(f)["locales"].keys() 70 | 71 | if (root_dir / "langs_ok.txt").exists(): 72 | with open(root_dir / "langs_ok.txt") as f: 73 | langs_to_skip = set([line.strip().split("_")[1] for line in f.read().split("\n") if line]) 74 | logging.info(f"Already downloaded languages: {langs_to_skip}") 75 | else: 76 | langs_to_skip = set() 77 | 78 | dl_config = DownloadConfig( 79 | cache_dir=root_dir / "cache", 80 | resume_download=True, 81 | max_retries=5, 82 | ) 83 | dl_manager = DownloadManager( 84 | download_config=dl_config, 85 | record_checksums=False, 86 | ) 87 | 88 | for lang_id, lang in enumerate(tqdm(languages, desc="Processing languages...")): 89 | if lang in langs_to_skip: 90 | logging.info(f"Data for '{lang.upper()}' language already downloaded, skipping it. ") 91 | continue 92 | try: 93 | download_language(dl_manager, lang, root_dir=root_dir) 94 | with open(root_dir / "langs_ok.txt", "a") as f: 95 | f.write(f"{lang_id}_{lang}\n") 96 | except ConnectionError as e: 97 | logging.error(e.strerror) 98 | with open(root_dir / "langs_failed.txt", "a") as f: 99 | f.write(f"{lang_id}_{lang}\n") 100 | time.sleep(10) 101 | 102 | 103 | if __name__ == "__main__": 104 | main() 105 | -------------------------------------------------------------------------------- /generate_datasets.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | 5 | import requests 6 | 7 | RELEASE_STATS_URL = "https://commonvoice.mozilla.org/dist/releases/{}.json" 8 | RELEASE_STATS_GH_URL = "https://raw.githubusercontent.com/common-voice/cv-dataset/main/datasets/{}.json" 9 | VERSIONS = [ 10 | {"semver": "1.0.0", "name": "common_voice_1_0", "release": "cv-corpus-1"}, 11 | {"semver": "2.0.0", "name": "common_voice_2_0", "release": "cv-corpus-2"}, 12 | {"semver": "3.0.0", "name": "common_voice_3_0", "release": "cv-corpus-3"}, 13 | { 14 | "semver": "4.0.0", 15 | "name": "common_voice_4_0", 16 | "release": "cv-corpus-4-2019-12-10", 17 | }, 18 | { 19 | "semver": "5.0.0", 20 | "name": "common_voice_5_0", 21 | "release": "cv-corpus-5-2020-06-22", 22 | }, 23 | { 24 | "semver": "5.1.0", 25 | "name": "common_voice_5_1", 26 | "release": "cv-corpus-5.1-2020-06-22", 27 | }, 28 | { 29 | "semver": "6.0.0", 30 | "name": "common_voice_6_0", 31 | "release": "cv-corpus-6.0-2020-12-11", 32 | }, 33 | { 34 | "semver": "6.1.0", 35 | "name": "common_voice_6_1", 36 | "release": "cv-corpus-6.1-2020-12-11", 37 | }, 38 | { 39 | "semver": "7.0.0", 40 | "name": "common_voice_7_0", 41 | "release": "cv-corpus-7.0-2021-07-21", 42 | }, 43 | { 44 | "semver": "8.0.0", 45 | "name": "common_voice_8_0", 46 | "release": "cv-corpus-8.0-2022-01-19", 47 | }, 48 | { 49 | "semver": "9.0.0", 50 | "name": "common_voice_9_0", 51 | "release": "cv-corpus-9.0-2022-04-27", 52 | }, 53 | { 54 | "semver": "10.0.0", 55 | "name": "common_voice_10_0", 56 | "release": "cv-corpus-10.0-2022-07-04", 57 | }, 58 | { 59 | "semver": "11.0.0", 60 | "name": "common_voice_11_0", 61 | "release": "cv-corpus-11.0-2022-09-21", 62 | }, 63 | { 64 | "semver": "12.0.0", 65 | "name": "common_voice_12_0", 66 | "release": "cv-corpus-12.0-2022-12-07", 67 | "release_name": "Common Voice Corpus 12", 68 | "date": "2022-12-07", 69 | }, 70 | { 71 | "semver": "13.0.0", 72 | "name": "common_voice_13_0", 73 | "release": "cv-corpus-13.0-2023-03-09", 74 | "release_name": "Common Voice Corpus 13", 75 | "date": "2022-03-15", 76 | }, 77 | ] 78 | 79 | 80 | def num_to_size(num: int): 81 | if num < 1000: 82 | return "n<1K" 83 | elif num < 10_000: 84 | return "1K accents in CV 8.0 240 | if "accents" in row: 241 | row["accent"] = row["accents"] 242 | del row["accents"] 243 | # if data is incomplete, fill with empty values 244 | for field in data_fields: 245 | if field not in row: 246 | row[field] = "" 247 | metadata[row["path"]] = row 248 | elif path.startswith(path_to_clips): 249 | assert metadata_found, "Found audio clips before the metadata TSV file." 250 | if not metadata: 251 | break 252 | if path in metadata: 253 | result = dict(metadata[path]) 254 | # set the audio feature and the path to the extracted file 255 | path = os.path.join(local_extracted_archive, path) if local_extracted_archive else path 256 | result["audio"] = {"path": path, "bytes": f.read()} 257 | # set path to None if the audio file doesn't exist locally (i.e. in streaming mode) 258 | result["path"] = path if local_extracted_archive else None 259 | 260 | yield path, result 261 | -------------------------------------------------------------------------------- /cv-corpus-13.0-2023-03-09.json: -------------------------------------------------------------------------------- 1 | { 2 | "bundleURLTemplate": "https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-13.0-2023-03-09/{locale}.tar.gz", 3 | "locales": { 4 | "de": { 5 | "duration": 4821107393, 6 | "buckets": { 7 | "dev": 16143, 8 | "invalidated": 50705, 9 | "other": 6381, 10 | "reported": 9131, 11 | "test": 16143, 12 | "train": 540437, 13 | "validated": 868264 14 | }, 15 | "reportedSentences": 9100, 16 | "clips": 925350, 17 | "splits": { 18 | "accent": { "": 1 }, 19 | "age": { 20 | "twenties": 0.18, 21 | "fourties": 0.17, 22 | "": 0.32, 23 | "thirties": 0.16, 24 | "teens": 0.03, 25 | "sixties": 0.02, 26 | "fifties": 0.11, 27 | "seventies": 0, 28 | "eighties": 0, 29 | "nineties": 0 30 | }, 31 | "gender": { "male": 0.59, "": 0.32, "female": 0.08, "other": 0.01 } 32 | }, 33 | "users": 17867, 34 | "size": 33828262029, 35 | "checksum": "71664fadd4189922f3c814889f640111e925fb511b290242e10e7a768bd7b1bb", 36 | "avgDurationSecs": 5.21, 37 | "validDurationSecs": 4523687.242, 38 | "totalHrs": 1339.19, 39 | "validHrs": 1256.57 40 | }, 41 | "en": { 42 | "buckets": { 43 | "dev": 16372, 44 | "invalidated": 264713, 45 | "other": 278333, 46 | "reported": 4732, 47 | "test": 16372, 48 | "train": 1013968, 49 | "validated": 1689599 50 | }, 51 | "reportedSentences": 4657, 52 | "duration": 11550150103, 53 | "clips": 2232645, 54 | "splits": { 55 | "accent": { "": 1 }, 56 | "age": { 57 | "": 0.37, 58 | "twenties": 0.24, 59 | "sixties": 0.04, 60 | "thirties": 0.13, 61 | "teens": 0.06, 62 | "seventies": 0.01, 63 | "fourties": 0.1, 64 | "fifties": 0.05, 65 | "eighties": 0, 66 | "nineties": 0 67 | }, 68 | "gender": { "": 0.37, "male": 0.46, "female": 0.16, "other": 0.02 } 69 | }, 70 | "users": 86942, 71 | "size": 82019442116, 72 | "checksum": "e9af8ba157ea45d94b98490004b8e7e1b8432414e27be396b27b94e99e284421", 73 | "avgDurationSecs": 5.173, 74 | "validDurationSecs": 8740808.352, 75 | "totalHrs": 3208.37, 76 | "validHrs": 2428 77 | }, 78 | "fa": { 79 | "buckets": { 80 | "dev": 10440, 81 | "invalidated": 14071, 82 | "other": 20673, 83 | "reported": 2268, 84 | "test": 10440, 85 | "train": 28024, 86 | "validated": 320143 87 | }, 88 | "reportedSentences": 2259, 89 | "duration": 1415098376, 90 | "clips": 354887, 91 | "splits": { 92 | "accent": { "": 1 }, 93 | "age": { 94 | "": 0.25, 95 | "twenties": 0.31, 96 | "thirties": 0.37, 97 | "fifties": 0.02, 98 | "fourties": 0.02, 99 | "teens": 0.03, 100 | "sixties": 0 101 | }, 102 | "gender": { "": 0.22, "male": 0.71, "female": 0.07, "other": 0 } 103 | }, 104 | "users": 4188, 105 | "size": 10368977174, 106 | "checksum": "921ff70850b58468bcc232f1d6f8e7c5bf58aff2ee1efdd4f26de19e75f7ed2a", 107 | "avgDurationSecs": 3.987, 108 | "validDurationSecs": 1276558.001, 109 | "totalHrs": 393.08, 110 | "validHrs": 354.59 111 | }, 112 | "fr": { 113 | "buckets": { 114 | "dev": 16114, 115 | "invalidated": 58926, 116 | "other": 31371, 117 | "reported": 6922, 118 | "test": 16114, 119 | "train": 509300, 120 | "validated": 676602 121 | }, 122 | "reportedSentences": 6846, 123 | "duration": 3839056354, 124 | "clips": 766899, 125 | "splits": { 126 | "accent": { "": 1 }, 127 | "age": { 128 | "twenties": 0.17, 129 | "thirties": 0.16, 130 | "": 0.37, 131 | "teens": 0.03, 132 | "fourties": 0.14, 133 | "fifties": 0.1, 134 | "sixties": 0.03, 135 | "seventies": 0.01, 136 | "eighties": 0, 137 | "nineties": 0 138 | }, 139 | "gender": { "male": 0.59, "": 0.3, "female": 0.1, "other": 0.01 } 140 | }, 141 | "users": 17428, 142 | "size": 27191480970, 143 | "checksum": "a44e73aa19a6805838a1302c1d311b704e90de7463da187b4b4ac3bcfba053fd", 144 | "avgDurationSecs": 5.006, 145 | "validDurationSecs": 3387034.286, 146 | "totalHrs": 1066.4, 147 | "validHrs": 940.84 148 | }, 149 | "es": { 150 | "buckets": { 151 | "dev": 15708, 152 | "invalidated": 68143, 153 | "other": 1167766, 154 | "reported": 2095, 155 | "test": 15708, 156 | "train": 280329, 157 | "validated": 356713 158 | }, 159 | "reportedSentences": 2078, 160 | "duration": 7746327810, 161 | "clips": 1592622, 162 | "splits": { 163 | "accent": { "": 1 }, 164 | "age": { 165 | "thirties": 0.1, 166 | "": 0.13, 167 | "fifties": 0.04, 168 | "twenties": 0.55, 169 | "teens": 0.08, 170 | "fourties": 0.03, 171 | "sixties": 0.08, 172 | "eighties": 0, 173 | "seventies": 0, 174 | "nineties": 0 175 | }, 176 | "gender": { "male": 0.55, "": 0.13, "other": 0, "female": 0.33 } 177 | }, 178 | "users": 25096, 179 | "size": 48860618846, 180 | "checksum": "4d2b6be24bc13b4dce65bb8f63531225ed68b9590ef768c4245decfe319bc7cf", 181 | "avgDurationSecs": 4.864, 182 | "validDurationSecs": 1735010.462, 183 | "totalHrs": 2151.75, 184 | "validHrs": 481.94 185 | }, 186 | "sl": { 187 | "buckets": { 188 | "dev": 1162, 189 | "invalidated": 261, 190 | "other": 2302, 191 | "reported": 37, 192 | "test": 1240, 193 | "train": 1436, 194 | "validated": 9909 195 | }, 196 | "reportedSentences": 38, 197 | "duration": 47128294, 198 | "clips": 12472, 199 | "splits": { 200 | "accent": { "": 1 }, 201 | "age": { 202 | "twenties": 0.47, 203 | "teens": 0.07, 204 | "": 0.18, 205 | "sixties": 0.07, 206 | "fifties": 0.06, 207 | "fourties": 0.09, 208 | "thirties": 0.05 209 | }, 210 | "gender": { "female": 0.15, "male": 0.67, "": 0.18, "other": 0 } 211 | }, 212 | "users": 141, 213 | "size": 330113132, 214 | "checksum": "cbd0b177478b176d3b3162b9a48e54ad3240f6be46b35308b45d6f7d5788c289", 215 | "avgDurationSecs": 3.779, 216 | "validDurationSecs": 37443.414, 217 | "totalHrs": 13.09, 218 | "validHrs": 10.4 219 | }, 220 | "kab": { 221 | "buckets": { 222 | "dev": 14996, 223 | "invalidated": 19513, 224 | "other": 111915, 225 | "reported": 9012, 226 | "test": 14996, 227 | "train": 151730, 228 | "validated": 609062 229 | }, 230 | "reportedSentences": 9007, 231 | "duration": 2470790068, 232 | "clips": 740490, 233 | "splits": { 234 | "accent": { "": 1 }, 235 | "age": { 236 | "fourties": 0.09, 237 | "thirties": 0.29, 238 | "": 0.28, 239 | "fifties": 0.19, 240 | "twenties": 0.12, 241 | "eighties": 0, 242 | "teens": 0, 243 | "sixties": 0.03, 244 | "seventies": 0 245 | }, 246 | "gender": { "male": 0.53, "": 0.26, "female": 0.2, "other": 0 } 247 | }, 248 | "users": 1514, 249 | "size": 18439964913, 250 | "checksum": "9461233ae95d9850048bacf0c2d5d3102384e6bf8e542978cdd11732053dcf42", 251 | "avgDurationSecs": 3.337, 252 | "validDurationSecs": 2032254.778, 253 | "totalHrs": 686.33, 254 | "validHrs": 564.51 255 | }, 256 | "cy": { 257 | "buckets": { 258 | "dev": 5293, 259 | "invalidated": 4390, 260 | "other": 18680, 261 | "reported": 162, 262 | "test": 5307, 263 | "train": 7810, 264 | "validated": 89159 265 | }, 266 | "reportedSentences": 162, 267 | "duration": 546587361, 268 | "clips": 112229, 269 | "splits": { 270 | "accent": { "": 1 }, 271 | "age": { 272 | "fourties": 0.16, 273 | "twenties": 0.13, 274 | "sixties": 0.06, 275 | "fifties": 0.09, 276 | "": 0.43, 277 | "thirties": 0.09, 278 | "seventies": 0.01, 279 | "eighties": 0, 280 | "teens": 0.02 281 | }, 282 | "gender": { "male": 0.33, "female": 0.24, "": 0.41, "other": 0.01 } 283 | }, 284 | "users": 1750, 285 | "size": 4009004534, 286 | "checksum": "c54adb86f51b50eb5813d793e3a7b21ddd69e9a69df8f73eb7a147e47957ca68", 287 | "avgDurationSecs": 4.87, 288 | "validDurationSecs": 434229.856, 289 | "totalHrs": 151.82, 290 | "validHrs": 120.61 291 | }, 292 | "ca": { 293 | "duration": 10415935674, 294 | "buckets": { 295 | "dev": 16380, 296 | "invalidated": 86112, 297 | "other": 493015, 298 | "reported": 5738, 299 | "test": 16380, 300 | "train": 1046965, 301 | "validated": 1325430 302 | }, 303 | "reportedSentences": 5690, 304 | "clips": 1904557, 305 | "splits": { 306 | "accent": { "": 1 }, 307 | "age": { 308 | "thirties": 0.06, 309 | "fifties": 0.17, 310 | "fourties": 0.11, 311 | "twenties": 0.05, 312 | "": 0.34, 313 | "sixties": 0.23, 314 | "teens": 0.01, 315 | "seventies": 0.04, 316 | "eighties": 0, 317 | "nineties": 0 318 | }, 319 | "gender": { "male": 0.44, "": 0.34, "female": 0.21, "other": 0 } 320 | }, 321 | "users": 31941, 322 | "size": 63732227938, 323 | "checksum": "7a7f050ea8e98ad8aa8082758ec2065d16024ebcc5300c6e8baa47a5dfdeecf2", 324 | "avgDurationSecs": 5.469, 325 | "validDurationSecs": 7248716.431, 326 | "totalHrs": 2893.31, 327 | "validHrs": 2013.53 328 | }, 329 | "tt": { 330 | "duration": 110016998, 331 | "buckets": { 332 | "dev": 3296, 333 | "invalidated": 404, 334 | "other": 34, 335 | "reported": 4, 336 | "test": 5117, 337 | "train": 9553, 338 | "validated": 28851 339 | }, 340 | "reportedSentences": 5, 341 | "clips": 29289, 342 | "splits": { 343 | "accent": { "": 1 }, 344 | "age": { 345 | "": 0.2, 346 | "thirties": 0.72, 347 | "twenties": 0.05, 348 | "sixties": 0, 349 | "fifties": 0.01, 350 | "teens": 0, 351 | "fourties": 0, 352 | "seventies": 0.01 353 | }, 354 | "gender": { "": 0.2, "male": 0.78, "female": 0.02 } 355 | }, 356 | "users": 234, 357 | "size": 811793536, 358 | "checksum": "88f304cc84abff09d809698f58de875663516f9a2b7e85ed351f525dcec520ce", 359 | "avgDurationSecs": 3.756, 360 | "validDurationSecs": 108371.758, 361 | "totalHrs": 30.56, 362 | "validHrs": 30.1 363 | }, 364 | "ta": { 365 | "duration": 1404253896, 366 | "buckets": { 367 | "dev": 11937, 368 | "invalidated": 5621, 369 | "other": 87707, 370 | "reported": 3343, 371 | "test": 11973, 372 | "train": 43350, 373 | "validated": 132638 374 | }, 375 | "reportedSentences": 3343, 376 | "clips": 225966, 377 | "splits": { 378 | "accent": { "": 1 }, 379 | "age": { 380 | "twenties": 0.08, 381 | "thirties": 0.09, 382 | "": 0.72, 383 | "fourties": 0.03, 384 | "seventies": 0.02, 385 | "fifties": 0.03, 386 | "teens": 0.03, 387 | "sixties": 0, 388 | "eighties": 0 389 | }, 390 | "gender": { "male": 0.16, "": 0.71, "other": 0, "female": 0.13 } 391 | }, 392 | "users": 850, 393 | "size": 8408601882, 394 | "checksum": "51c38e4ce57c21b1352f9d981dbe19702d01a093f230fc6271fb8bf97f8665d1", 395 | "avgDurationSecs": 6.214, 396 | "validDurationSecs": 824271.918, 397 | "totalHrs": 390.07, 398 | "validHrs": 228.96 399 | }, 400 | "ru": { 401 | "duration": 883818540, 402 | "buckets": { 403 | "dev": 10185, 404 | "invalidated": 9523, 405 | "other": 13491, 406 | "reported": 391, 407 | "test": 10186, 408 | "train": 26269, 409 | "validated": 148642 410 | }, 411 | "reportedSentences": 385, 412 | "clips": 171656, 413 | "splits": { 414 | "accent": { "": 1 }, 415 | "age": { 416 | "twenties": 0.35, 417 | "teens": 0.09, 418 | "": 0.24, 419 | "fourties": 0.14, 420 | "thirties": 0.15, 421 | "fifties": 0.03, 422 | "sixties": 0, 423 | "seventies": 0 424 | }, 425 | "gender": { "male": 0.6, "": 0.24, "other": 0, "female": 0.15 } 426 | }, 427 | "users": 2901, 428 | "size": 6058265022, 429 | "checksum": "0992166fab13d7a8a8df539b0667c4cd947de88f2b61f107c3805c04ceae0a43", 430 | "avgDurationSecs": 5.149, 431 | "validDurationSecs": 765324.576, 432 | "totalHrs": 245.5, 433 | "validHrs": 212.59 434 | }, 435 | "nl": { 436 | "duration": 411681817, 437 | "buckets": { 438 | "dev": 10930, 439 | "invalidated": 5331, 440 | "other": 2723, 441 | "reported": 334, 442 | "test": 10936, 443 | "train": 31906, 444 | "validated": 86798 445 | }, 446 | "reportedSentences": 334, 447 | "clips": 94852, 448 | "splits": { 449 | "accent": { "": 1 }, 450 | "age": { 451 | "": 0.41, 452 | "twenties": 0.21, 453 | "fourties": 0.15, 454 | "thirties": 0.11, 455 | "teens": 0.02, 456 | "fifties": 0.08, 457 | "sixties": 0.02, 458 | "nineties": 0, 459 | "eighties": 0, 460 | "seventies": 0 461 | }, 462 | "gender": { "": 0.42, "male": 0.47, "female": 0.11, "other": 0 } 463 | }, 464 | "users": 1610, 465 | "size": 2808697434, 466 | "checksum": "2a8edc9005bbc8a3623ce25bfe95979bc9144e49a09468e8fd574ea76de30d94", 467 | "avgDurationSecs": 4.34, 468 | "validDurationSecs": 376725.407, 469 | "totalHrs": 114.35, 470 | "validHrs": 104.64 471 | }, 472 | "it": { 473 | "duration": 1327694592, 474 | "buckets": { 475 | "dev": 15086, 476 | "invalidated": 17764, 477 | "other": 209, 478 | "reported": 5752, 479 | "test": 15096, 480 | "train": 162637, 481 | "validated": 229467 482 | }, 483 | "reportedSentences": 5748, 484 | "clips": 247440, 485 | "splits": { 486 | "accent": { "": 1 }, 487 | "age": { 488 | "thirties": 0.17, 489 | "twenties": 0.22, 490 | "": 0.29, 491 | "fifties": 0.15, 492 | "fourties": 0.14, 493 | "seventies": 0, 494 | "sixties": 0.03, 495 | "teens": 0.01, 496 | "eighties": 0, 497 | "nineties": 0 498 | }, 499 | "gender": { "female": 0.11, "male": 0.6, "": 0.28, "other": 0 } 500 | }, 501 | "users": 6881, 502 | "size": 9117088260, 503 | "checksum": "38d09ee666b8b813415e37d2c149d804c25f1d4dc149e602325554c7ed5f2580", 504 | "avgDurationSecs": 5.366, 505 | "validDurationSecs": 1231256.446, 506 | "totalHrs": 368.8, 507 | "validHrs": 342.01 508 | }, 509 | "eu": { 510 | "duration": 538424295, 511 | "buckets": { 512 | "dev": 6591, 513 | "invalidated": 5819, 514 | "other": 28376, 515 | "reported": 97, 516 | "test": 6591, 517 | "train": 10918, 518 | "validated": 69567 519 | }, 520 | "reportedSentences": 97, 521 | "clips": 103762, 522 | "splits": { 523 | "accent": { "": 1 }, 524 | "age": { 525 | "fourties": 0.13, 526 | "thirties": 0.07, 527 | "fifties": 0.13, 528 | "twenties": 0.35, 529 | "": 0.26, 530 | "teens": 0.03, 531 | "sixties": 0.02, 532 | "seventies": 0 533 | }, 534 | "gender": { "male": 0.46, "female": 0.25, "": 0.27, "other": 0.02 } 535 | }, 536 | "users": 1252, 537 | "size": 4040542343, 538 | "checksum": "07e2c8846ad8307b9790f402fa45a66ce71a2f1c95df8ec8c82d6fb3e9214a33", 539 | "avgDurationSecs": 5.189, 540 | "validDurationSecs": 360985.36, 541 | "totalHrs": 149.56, 542 | "validHrs": 100.27 543 | }, 544 | "tr": { 545 | "duration": 353861415, 546 | "buckets": { 547 | "dev": 10837, 548 | "invalidated": 4090, 549 | "other": 156, 550 | "reported": 356, 551 | "test": 10839, 552 | "train": 31091, 553 | "validated": 93160 554 | }, 555 | "reportedSentences": 357, 556 | "clips": 97406, 557 | "splits": { 558 | "accent": { "": 1 }, 559 | "age": { 560 | "": 0.3, 561 | "thirties": 0.08, 562 | "twenties": 0.28, 563 | "teens": 0.02, 564 | "fourties": 0.03, 565 | "fifties": 0.09, 566 | "sixties": 0.16, 567 | "eighties": 0.02, 568 | "seventies": 0.03 569 | }, 570 | "gender": { "": 0.3, "male": 0.47, "female": 0.22, "other": 0 } 571 | }, 572 | "users": 1435, 573 | "size": 2154735517, 574 | "checksum": "c1a9adf7e17d82022f0ef16d9143560f8fdb426eba15dea385d1dcf64432a1b1", 575 | "avgDurationSecs": 3.633, 576 | "validDurationSecs": 338436.333, 577 | "totalHrs": 98.29, 578 | "validHrs": 94.01 579 | }, 580 | "ar": { 581 | "duration": 533060847, 582 | "buckets": { 583 | "dev": 10409, 584 | "invalidated": 14994, 585 | "other": 36192, 586 | "reported": 2093, 587 | "test": 10445, 588 | "train": 28167, 589 | "validated": 76677 590 | }, 591 | "reportedSentences": 2085, 592 | "clips": 127863, 593 | "splits": { 594 | "accent": { "": 1 }, 595 | "age": { 596 | "thirties": 0.11, 597 | "": 0.56, 598 | "twenties": 0.28, 599 | "fourties": 0.01, 600 | "teens": 0.03, 601 | "fifties": 0, 602 | "sixties": 0, 603 | "nineties": 0 604 | }, 605 | "gender": { "female": 0.18, "": 0.56, "male": 0.27, "other": 0 } 606 | }, 607 | "users": 1407, 608 | "size": 3160282600, 609 | "checksum": "b51f50ecee1a7323ecf6d5f0dae5c1140df0c583d978cb8b3cf05fe6cc1e2d38", 610 | "avgDurationSecs": 4.169, 611 | "validDurationSecs": 319666.413, 612 | "totalHrs": 148.07, 613 | "validHrs": 88.79 614 | }, 615 | "zh-TW": { 616 | "duration": 429109942, 617 | "buckets": { 618 | "dev": 4825, 619 | "invalidated": 4657, 620 | "other": 44705, 621 | "reported": 142, 622 | "test": 4825, 623 | "train": 6799, 624 | "validated": 79531 625 | }, 626 | "reportedSentences": 143, 627 | "clips": 128893, 628 | "splits": { 629 | "accent": { "": 1 }, 630 | "age": { 631 | "thirties": 0.2, 632 | "twenties": 0.31, 633 | "teens": 0.06, 634 | "": 0.27, 635 | "fifties": 0.07, 636 | "seventies": 0, 637 | "fourties": 0.1, 638 | "sixties": 0 639 | }, 640 | "gender": { "male": 0.49, "": 0.26, "female": 0.23, "other": 0.02 } 641 | }, 642 | "users": 2121, 643 | "size": 2930301331, 644 | "checksum": "2bfa61c1c915713204aa8875dfa8dd2dc1aa6fc56245cf90c2ed13fded6a7253", 645 | "avgDurationSecs": 3.329, 646 | "validDurationSecs": 264774.214, 647 | "totalHrs": 119.19, 648 | "validHrs": 73.54 649 | }, 650 | "br": { 651 | "duration": 86037091, 652 | "buckets": { 653 | "dev": 2142, 654 | "invalidated": 846, 655 | "other": 15235, 656 | "reported": 274, 657 | "test": 2129, 658 | "train": 2618, 659 | "validated": 11369 660 | }, 661 | "reportedSentences": 274, 662 | "clips": 27450, 663 | "splits": { 664 | "accent": { "": 1 }, 665 | "age": { 666 | "twenties": 0.33, 667 | "": 0.31, 668 | "fifties": 0.05, 669 | "fourties": 0.06, 670 | "thirties": 0.07, 671 | "sixties": 0.15, 672 | "seventies": 0.02, 673 | "teens": 0.01 674 | }, 675 | "gender": { "male": 0.67, "": 0.31, "female": 0.02, "other": 0 } 676 | }, 677 | "users": 181, 678 | "size": 611265443, 679 | "checksum": "ca761f982405be3e675b6d11e42938cf72f2ec1d1622f3da3d1c4df2e1758e45", 680 | "avgDurationSecs": 3.134, 681 | "validDurationSecs": 35634.087, 682 | "totalHrs": 23.89, 683 | "validHrs": 9.89 684 | }, 685 | "pt": { 686 | "duration": 707442559, 687 | "buckets": { 688 | "dev": 9072, 689 | "invalidated": 6465, 690 | "other": 33792, 691 | "reported": 2544, 692 | "test": 9072, 693 | "train": 19948, 694 | "validated": 133154 695 | }, 696 | "reportedSentences": 2537, 697 | "clips": 173411, 698 | "splits": { 699 | "accent": { "": 1 }, 700 | "age": { 701 | "": 0.23, 702 | "twenties": 0.38, 703 | "teens": 0.02, 704 | "thirties": 0.19, 705 | "fourties": 0.14, 706 | "sixties": 0.02, 707 | "fifties": 0.02, 708 | "seventies": 0 709 | }, 710 | "gender": { "": 0.23, "male": 0.69, "female": 0.06, "other": 0.02 } 711 | }, 712 | "users": 3099, 713 | "size": 4475741482, 714 | "checksum": "7dc2dbf1ad261825ebca28ec594a3adc9b9d14d33a64e5f05e0fccad9ed44bd1", 715 | "avgDurationSecs": 4.08, 716 | "validDurationSecs": 543211.252, 717 | "totalHrs": 196.51, 718 | "validHrs": 150.89 719 | }, 720 | "eo": { 721 | "duration": 6787549508, 722 | "buckets": { 723 | "dev": 14909, 724 | "invalidated": 127338, 725 | "other": 142397, 726 | "reported": 2210, 727 | "test": 14913, 728 | "train": 143984, 729 | "validated": 848846 730 | }, 731 | "reportedSentences": 2209, 732 | "clips": 1118581, 733 | "splits": { 734 | "accent": { "": 1 }, 735 | "age": { 736 | "twenties": 0.56, 737 | "thirties": 0.12, 738 | "": 0.2, 739 | "fourties": 0.04, 740 | "fifties": 0.02, 741 | "seventies": 0, 742 | "teens": 0.05, 743 | "sixties": 0, 744 | "eighties": 0 745 | }, 746 | "gender": { "male": 0.69, "": 0.2, "female": 0.11, "other": 0 } 747 | }, 748 | "users": 1616, 749 | "size": 40520544051, 750 | "checksum": "df26401578f024d2d1748438bc2bb8da1f4fcd066d26b5b1f1fb7dbc88ca4537", 751 | "avgDurationSecs": 6.068, 752 | "validDurationSecs": 5150797.528, 753 | "totalHrs": 1885.43, 754 | "validHrs": 1430.77 755 | }, 756 | "zh-CN": { 757 | "duration": 3779771748, 758 | "buckets": { 759 | "dev": 10624, 760 | "invalidated": 56661, 761 | "other": 594422, 762 | "reported": 681, 763 | "test": 10624, 764 | "train": 29383, 765 | "validated": 179473 766 | }, 767 | "reportedSentences": 675, 768 | "clips": 830556, 769 | "splits": { 770 | "accent": { "": 1 }, 771 | "age": { 772 | "": 0.94, 773 | "teens": 0.01, 774 | "twenties": 0.04, 775 | "thirties": 0.01, 776 | "fourties": 0, 777 | "nineties": 0, 778 | "fifties": 0, 779 | "sixties": 0 780 | }, 781 | "gender": { "": 0.94, "male": 0.05, "female": 0.01, "other": 0 } 782 | }, 783 | "users": 6667, 784 | "size": 22348267124, 785 | "checksum": "c160d6c83e20424692ffacbbf2fc6480c6613c0913ae5cb1d2a2ddf7d1c239cc", 786 | "avgDurationSecs": 4.551, 787 | "validDurationSecs": 816762.476, 788 | "totalHrs": 1049.93, 789 | "validHrs": 226.87 790 | }, 791 | "id": { 792 | "duration": 226821756, 793 | "buckets": { 794 | "dev": 3292, 795 | "invalidated": 2553, 796 | "other": 29260, 797 | "reported": 345, 798 | "test": 3649, 799 | "train": 5041, 800 | "validated": 25027 801 | }, 802 | "reportedSentences": 346, 803 | "clips": 56840, 804 | "splits": { 805 | "accent": { "": 1 }, 806 | "age": { 807 | "": 0.25, 808 | "twenties": 0.41, 809 | "thirties": 0.1, 810 | "teens": 0.23, 811 | "fifties": 0, 812 | "fourties": 0.02 813 | }, 814 | "gender": { "": 0.25, "male": 0.44, "female": 0.27, "other": 0.04 } 815 | }, 816 | "users": 492, 817 | "size": 1427745911, 818 | "checksum": "35e1c00612c9a306be8597d9544c032bafac35d892456f6a6186e53117a0933c", 819 | "avgDurationSecs": 3.991, 820 | "validDurationSecs": 99871.008, 821 | "totalHrs": 63, 822 | "validHrs": 27.74 823 | }, 824 | "ia": { 825 | "duration": 60756600, 826 | "buckets": { 827 | "dev": 1796, 828 | "invalidated": 331, 829 | "other": 2615, 830 | "reported": 275, 831 | "test": 1777, 832 | "train": 5018, 833 | "validated": 11583 834 | }, 835 | "reportedSentences": 271, 836 | "clips": 14529, 837 | "splits": { 838 | "accent": { "": 1 }, 839 | "age": { 840 | "seventies": 0.22, 841 | "fourties": 0.3, 842 | "": 0.38, 843 | "twenties": 0.05, 844 | "thirties": 0.02, 845 | "teens": 0, 846 | "fifties": 0.03, 847 | "sixties": 0 848 | }, 849 | "gender": { "male": 0.61, "": 0.38, "female": 0.01 } 850 | }, 851 | "users": 62, 852 | "size": 411366463, 853 | "checksum": "cae091b1e805fb1bc4621b5547ca755dc8e655b872bf19c2d06c2218ef12ab13", 854 | "avgDurationSecs": 4.182, 855 | "validDurationSecs": 48437.174, 856 | "totalHrs": 16.87, 857 | "validHrs": 13.45 858 | }, 859 | "lv": { 860 | "duration": 61503893, 861 | "buckets": { 862 | "dev": 2246, 863 | "invalidated": 448, 864 | "other": 3247, 865 | "reported": 37, 866 | "test": 2295, 867 | "train": 2756, 868 | "validated": 13277 869 | }, 870 | "reportedSentences": 38, 871 | "clips": 16972, 872 | "splits": { 873 | "accent": { "": 1 }, 874 | "age": { 875 | "thirties": 0.43, 876 | "fourties": 0.04, 877 | "": 0.3, 878 | "twenties": 0.18, 879 | "teens": 0.02, 880 | "fifties": 0.03 881 | }, 882 | "gender": { "male": 0.58, "female": 0.13, "": 0.3 } 883 | }, 884 | "users": 321, 885 | "size": 398577312, 886 | "checksum": "717c7e08b7ea15f95a4629b7bdacfa416c74b66e1d0ea491ee8f1cf16715b359", 887 | "avgDurationSecs": 3.624, 888 | "validDurationSecs": 48113.787, 889 | "totalHrs": 17.08, 890 | "validHrs": 13.36 891 | }, 892 | "ja": { 893 | "duration": 621734109, 894 | "buckets": { 895 | "dev": 4961, 896 | "invalidated": 5993, 897 | "other": 73033, 898 | "reported": 193, 899 | "test": 4961, 900 | "train": 7071, 901 | "validated": 57527 902 | }, 903 | "reportedSentences": 193, 904 | "clips": 136553, 905 | "splits": { 906 | "accent": { "": 1 }, 907 | "age": { 908 | "twenties": 0.5, 909 | "": 0.21, 910 | "teens": 0.05, 911 | "fifties": 0.01, 912 | "thirties": 0.09, 913 | "fourties": 0.14, 914 | "sixties": 0, 915 | "seventies": 0, 916 | "nineties": 0 917 | }, 918 | "gender": { "male": 0.47, "": 0.21, "female": 0.32, "other": 0.01 } 919 | }, 920 | "users": 1630, 921 | "size": 3675036058, 922 | "checksum": "14188e016cc264e7d4381e4835d1efd54623f35f0ffb85562004d1ae906fe5c4", 923 | "avgDurationSecs": 4.553, 924 | "validDurationSecs": 261923.928, 925 | "totalHrs": 172.7, 926 | "validHrs": 72.75 927 | }, 928 | "rw": { 929 | "duration": 8581498601, 930 | "buckets": { 931 | "dev": 15987, 932 | "invalidated": 227795, 933 | "other": 47246, 934 | "reported": 629, 935 | "test": 16213, 936 | "train": 1003023, 937 | "validated": 1438618 938 | }, 939 | "reportedSentences": 630, 940 | "clips": 1713659, 941 | "splits": { 942 | "accent": { "": 1 }, 943 | "age": { 944 | "": 0.05, 945 | "twenties": 0.61, 946 | "thirties": 0.12, 947 | "teens": 0.2, 948 | "fourties": 0.02, 949 | "fifties": 0 950 | }, 951 | "gender": { "": 0.1, "male": 0.57, "female": 0.33, "other": 0 } 952 | }, 953 | "users": 1103, 954 | "size": 60972955216, 955 | "checksum": "9570b365918a227121b1111de3f2404a6624b51e0374683da7c2701a4327fb99", 956 | "avgDurationSecs": 5.008, 957 | "validDurationSecs": 7204174.433, 958 | "totalHrs": 2383.74, 959 | "validHrs": 2001.15 960 | }, 961 | "sv-SE": { 962 | "duration": 187757835, 963 | "buckets": { 964 | "dev": 5114, 965 | "invalidated": 1381, 966 | "other": 6313, 967 | "reported": 580, 968 | "test": 5120, 969 | "train": 7407, 970 | "validated": 39506 971 | }, 972 | "reportedSentences": 581, 973 | "clips": 47200, 974 | "splits": { 975 | "accent": { "": 1 }, 976 | "age": { 977 | "thirties": 0.25, 978 | "": 0.18, 979 | "teens": 0.03, 980 | "fifties": 0.03, 981 | "twenties": 0.12, 982 | "fourties": 0.38, 983 | "sixties": 0, 984 | "seventies": 0 985 | }, 986 | "gender": { "male": 0.48, "": 0.19, "female": 0.32, "other": 0.01 } 987 | }, 988 | "users": 791, 989 | "size": 1188005367, 990 | "checksum": "760816a9eb17ede1738f08fc44ff87d6e9f9e26a6e6f4fe4f979084aba8de708", 991 | "avgDurationSecs": 3.978, 992 | "validDurationSecs": 157151.717, 993 | "totalHrs": 52.15, 994 | "validHrs": 43.65 995 | }, 996 | "cnh": { 997 | "duration": 20675832, 998 | "buckets": { 999 | "dev": 761, 1000 | "invalidated": 436, 1001 | "other": 2908, 1002 | "reported": 8, 1003 | "test": 763, 1004 | "train": 817, 1005 | "validated": 2458 1006 | }, 1007 | "reportedSentences": 9, 1008 | "clips": 5802, 1009 | "splits": { 1010 | "accent": { "": 1 }, 1011 | "age": { 1012 | "": 0.51, 1013 | "twenties": 0.36, 1014 | "fourties": 0.01, 1015 | "teens": 0.02, 1016 | "thirties": 0.08, 1017 | "fifties": 0.02 1018 | }, 1019 | "gender": { "": 0.51, "male": 0.33, "female": 0.16 } 1020 | }, 1021 | "users": 299, 1022 | "size": 161331878, 1023 | "checksum": "38066864bff36a1ee125d901be9602f6d25061ef9a5f30fcb8f0e953e3a37208", 1024 | "avgDurationSecs": 3.564, 1025 | "validDurationSecs": 8759.255, 1026 | "totalHrs": 5.74, 1027 | "validHrs": 2.43 1028 | }, 1029 | "et": { 1030 | "duration": 195989330, 1031 | "buckets": { 1032 | "dev": 2638, 1033 | "invalidated": 6697, 1034 | "other": 506, 1035 | "reported": 490, 1036 | "test": 2638, 1037 | "train": 3138, 1038 | "validated": 21796 1039 | }, 1040 | "reportedSentences": 487, 1041 | "clips": 28999, 1042 | "splits": { 1043 | "accent": { "": 1 }, 1044 | "age": { 1045 | "": 0.2, 1046 | "thirties": 0.08, 1047 | "twenties": 0.68, 1048 | "fourties": 0.04, 1049 | "fifties": 0, 1050 | "seventies": 0, 1051 | "teens": 0 1052 | }, 1053 | "gender": { "": 0.2, "male": 0.54, "female": 0.26, "other": 0 } 1054 | }, 1055 | "users": 809, 1056 | "size": 1336581339, 1057 | "checksum": "dfce8875a0c27ab49fa5167ab20b264d1e5773c052bb45c3524d1e2c3e400454", 1058 | "avgDurationSecs": 6.758, 1059 | "validDurationSecs": 147307.957, 1060 | "totalHrs": 54.44, 1061 | "validHrs": 40.91 1062 | }, 1063 | "ky": { 1064 | "duration": 162502536, 1065 | "buckets": { 1066 | "dev": 1612, 1067 | "invalidated": 5618, 1068 | "other": 76, 1069 | "reported": 37, 1070 | "test": 1613, 1071 | "train": 1788, 1072 | "validated": 30117 1073 | }, 1074 | "reportedSentences": 38, 1075 | "clips": 35811, 1076 | "splits": { 1077 | "accent": { "": 1 }, 1078 | "age": { 1079 | "thirties": 0.08, 1080 | "": 0.07, 1081 | "fourties": 0.01, 1082 | "twenties": 0.66, 1083 | "teens": 0.18, 1084 | "fifties": 0 1085 | }, 1086 | "gender": { "male": 0.54, "": 0.11, "female": 0.35, "other": 0 } 1087 | }, 1088 | "users": 257, 1089 | "size": 1049223157, 1090 | "checksum": "d964f43dacf1b4dd69ec8e2ac32ee6ab745d46c760c4c23632752b00abaebc51", 1091 | "avgDurationSecs": 4.538, 1092 | "validDurationSecs": 136664.401, 1093 | "totalHrs": 45.13, 1094 | "validHrs": 37.96 1095 | }, 1096 | "ro": { 1097 | "duration": 151497031, 1098 | "buckets": { 1099 | "dev": 3743, 1100 | "invalidated": 922, 1101 | "other": 21057, 1102 | "reported": 380, 1103 | "test": 3861, 1104 | "train": 5206, 1105 | "validated": 15960 1106 | }, 1107 | "reportedSentences": 381, 1108 | "clips": 37939, 1109 | "splits": { 1110 | "accent": { "": 1 }, 1111 | "age": { 1112 | "thirties": 0.13, 1113 | "teens": 0.02, 1114 | "": 0.11, 1115 | "fourties": 0.06, 1116 | "sixties": 0, 1117 | "twenties": 0.67, 1118 | "fifties": 0.01, 1119 | "eighties": 0 1120 | }, 1121 | "gender": { "male": 0.74, "": 0.1, "female": 0.15, "other": 0.01 } 1122 | }, 1123 | "users": 391, 1124 | "size": 944768429, 1125 | "checksum": "d4e66e8267946831ffb6af6e6e53957cbfa6dbf6621af23be0197482f25328f5", 1126 | "avgDurationSecs": 3.993, 1127 | "validDurationSecs": 63731.058, 1128 | "totalHrs": 42.08, 1129 | "validHrs": 17.7 1130 | }, 1131 | "hsb": { 1132 | "duration": 10318356, 1133 | "buckets": { 1134 | "dev": 172, 1135 | "invalidated": 247, 1136 | "other": 15, 1137 | "reported": 86, 1138 | "test": 444, 1139 | "train": 808, 1140 | "validated": 1424 1141 | }, 1142 | "reportedSentences": 87, 1143 | "clips": 1686, 1144 | "splits": { 1145 | "accent": { "": 1 }, 1146 | "age": { 1147 | "fourties": 0.54, 1148 | "": 0.18, 1149 | "thirties": 0.1, 1150 | "sixties": 0, 1151 | "seventies": 0.03, 1152 | "twenties": 0.11, 1153 | "fifties": 0.03 1154 | }, 1155 | "gender": { "male": 0.81, "": 0.18, "other": 0 } 1156 | }, 1157 | "users": 21, 1158 | "size": 80367147, 1159 | "checksum": "3e9666e60a5da9bfd80879d8b1f015cce61d59729a15eee0912162b0bb5d93ce", 1160 | "avgDurationSecs": 6.12, 1161 | "validDurationSecs": 8714.91, 1162 | "totalHrs": 2.86, 1163 | "validHrs": 2.42 1164 | }, 1165 | "el": { 1166 | "duration": 110402278, 1167 | "buckets": { 1168 | "dev": 1663, 1169 | "invalidated": 818, 1170 | "other": 10261, 1171 | "reported": 69, 1172 | "test": 1704, 1173 | "train": 1944, 1174 | "validated": 15637 1175 | }, 1176 | "reportedSentences": 70, 1177 | "clips": 26716, 1178 | "splits": { 1179 | "accent": { "": 1 }, 1180 | "age": { 1181 | "thirties": 0.37, 1182 | "fourties": 0.16, 1183 | "": 0.31, 1184 | "twenties": 0.12, 1185 | "fifties": 0.03, 1186 | "teens": 0.01, 1187 | "sixties": 0 1188 | }, 1189 | "gender": { "male": 0.64, "": 0.31, "other": 0.02, "female": 0.03 } 1190 | }, 1191 | "users": 360, 1192 | "size": 736695676, 1193 | "checksum": "77499bae45f5a2308de48f71e20fec3ed92a54a7a7583b255f1c0a80c1d027df", 1194 | "avgDurationSecs": 4.132, 1195 | "validDurationSecs": 64618.971, 1196 | "totalHrs": 30.66, 1197 | "validHrs": 17.94 1198 | }, 1199 | "cs": { 1200 | "duration": 920669619, 1201 | "buckets": { 1202 | "dev": 8769, 1203 | "invalidated": 1970, 1204 | "other": 145750, 1205 | "reported": 883, 1206 | "test": 8829, 1207 | "train": 18877, 1208 | "validated": 58629 1209 | }, 1210 | "reportedSentences": 877, 1211 | "clips": 206349, 1212 | "splits": { 1213 | "accent": { "": 1 }, 1214 | "age": { 1215 | "fourties": 0.07, 1216 | "": 0.23, 1217 | "thirties": 0.48, 1218 | "teens": 0.02, 1219 | "twenties": 0.18, 1220 | "fifties": 0.01, 1221 | "sixties": 0, 1222 | "seventies": 0 1223 | }, 1224 | "gender": { "male": 0.54, "": 0.23, "female": 0.23, "other": 0 } 1225 | }, 1226 | "users": 876, 1227 | "size": 5640424340, 1228 | "checksum": "68e53a82eb28d99cc10a90d7ec32a7a98930d7a7f764d2e321ae413eb0924558", 1229 | "avgDurationSecs": 4.462, 1230 | "validDurationSecs": 261585.659, 1231 | "totalHrs": 255.74, 1232 | "validHrs": 72.66 1233 | }, 1234 | "pl": { 1235 | "duration": 617355218, 1236 | "buckets": { 1237 | "dev": 8534, 1238 | "invalidated": 6266, 1239 | "other": 3824, 1240 | "reported": 553, 1241 | "test": 8534, 1242 | "train": 17518, 1243 | "validated": 126997 1244 | }, 1245 | "reportedSentences": 553, 1246 | "clips": 137087, 1247 | "splits": { 1248 | "accent": { "": 1 }, 1249 | "age": { 1250 | "twenties": 0.28, 1251 | "": 0.24, 1252 | "teens": 0.02, 1253 | "thirties": 0.33, 1254 | "fourties": 0.12, 1255 | "fifties": 0.01, 1256 | "nineties": 0.01, 1257 | "sixties": 0 1258 | }, 1259 | "gender": { "male": 0.6, "": 0.25, "female": 0.14, "other": 0.01 } 1260 | }, 1261 | "users": 3183, 1262 | "size": 4411276400, 1263 | "checksum": "017a81af2eb81a533410e03f7d0b67cce4e5bcfa40683852d261a3421ec35264", 1264 | "avgDurationSecs": 4.503, 1265 | "validDurationSecs": 571916.087, 1266 | "totalHrs": 171.48, 1267 | "validHrs": 158.86 1268 | }, 1269 | "rm-sursilv": { 1270 | "duration": 38788025, 1271 | "buckets": { 1272 | "dev": 1352, 1273 | "invalidated": 677, 1274 | "other": 2140, 1275 | "reported": 16, 1276 | "test": 1347, 1277 | "train": 1554, 1278 | "validated": 4260 1279 | }, 1280 | "reportedSentences": 17, 1281 | "clips": 7077, 1282 | "splits": { 1283 | "accent": { "": 1 }, 1284 | "age": { 1285 | "thirties": 0.03, 1286 | "twenties": 0.1, 1287 | "": 0.64, 1288 | "teens": 0.06, 1289 | "fourties": 0.17 1290 | }, 1291 | "gender": { "male": 0.17, "female": 0.19, "": 0.64, "other": 0 } 1292 | }, 1293 | "users": 87, 1294 | "size": 292892713, 1295 | "checksum": "adef75ee647c88a0ec20a1b8977b4f89ae3bafee1d491862e92e77f869ae3140", 1296 | "avgDurationSecs": 5.481, 1297 | "validDurationSecs": 23348.451, 1298 | "totalHrs": 10.77, 1299 | "validHrs": 6.48 1300 | }, 1301 | "rm-vallader": { 1302 | "duration": 15115550, 1303 | "buckets": { 1304 | "dev": 377, 1305 | "invalidated": 394, 1306 | "other": 709, 1307 | "reported": 36, 1308 | "test": 442, 1309 | "train": 672, 1310 | "validated": 1498 1311 | }, 1312 | "reportedSentences": 35, 1313 | "clips": 2601, 1314 | "splits": { 1315 | "accent": { "": 1 }, 1316 | "age": { 1317 | "": 0.36, 1318 | "fourties": 0.41, 1319 | "twenties": 0.14, 1320 | "thirties": 0.06, 1321 | "fifties": 0, 1322 | "sixties": 0.03 1323 | }, 1324 | "gender": { "": 0.36, "male": 0.44, "female": 0.19, "other": 0.01 } 1325 | }, 1326 | "users": 52, 1327 | "size": 115424323, 1328 | "checksum": "f303f525cf8ee884a1538c895b433f62b99fcfdfaeeaea5e81312f06ddc0c969", 1329 | "avgDurationSecs": 5.811, 1330 | "validDurationSecs": 8705.534, 1331 | "totalHrs": 4.19, 1332 | "validHrs": 2.41 1333 | }, 1334 | "mn": { 1335 | "duration": 69547436, 1336 | "buckets": { 1337 | "dev": 1767, 1338 | "invalidated": 767, 1339 | "other": 3522, 1340 | "reported": 31, 1341 | "test": 1877, 1342 | "train": 2275, 1343 | "validated": 8396 1344 | }, 1345 | "reportedSentences": 32, 1346 | "clips": 12685, 1347 | "splits": { 1348 | "accent": { "": 1 }, 1349 | "age": { 1350 | "thirties": 0.23, 1351 | "": 0.28, 1352 | "twenties": 0.41, 1353 | "fourties": 0.01, 1354 | "teens": 0.02, 1355 | "nineties": 0.06, 1356 | "fifties": 0 1357 | }, 1358 | "gender": { "male": 0.36, "": 0.28, "female": 0.31, "other": 0.06 } 1359 | }, 1360 | "users": 488, 1361 | "size": 526725245, 1362 | "checksum": "919edfde38ca1b875cfda0da52157b3074f323e010acf6c8b0ae28ba534f3a87", 1363 | "avgDurationSecs": 5.483, 1364 | "validDurationSecs": 46032.343, 1365 | "totalHrs": 19.31, 1366 | "validHrs": 12.78 1367 | }, 1368 | "zh-HK": { 1369 | "duration": 491515152, 1370 | "buckets": { 1371 | "dev": 5593, 1372 | "invalidated": 4345, 1373 | "other": 21594, 1374 | "reported": 671, 1375 | "test": 5593, 1376 | "train": 8425, 1377 | "validated": 91509 1378 | }, 1379 | "reportedSentences": 660, 1380 | "clips": 117448, 1381 | "splits": { 1382 | "accent": { "": 1 }, 1383 | "age": { 1384 | "fourties": 0.13, 1385 | "thirties": 0.11, 1386 | "": 0.39, 1387 | "teens": 0.02, 1388 | "fifties": 0.02, 1389 | "seventies": 0, 1390 | "sixties": 0.01, 1391 | "twenties": 0.33 1392 | }, 1393 | "gender": { "male": 0.42, "": 0.35, "female": 0.22, "other": 0.01 } 1394 | }, 1395 | "users": 2971, 1396 | "size": 3518822638, 1397 | "checksum": "d1e4e28bc38aa02d32919a4bd4bee55dc1a195963a7e2487da1057095a3b94d0", 1398 | "avgDurationSecs": 4.185, 1399 | "validDurationSecs": 382961.481, 1400 | "totalHrs": 136.53, 1401 | "validHrs": 106.37 1402 | }, 1403 | "ab": { 1404 | "duration": 303841224, 1405 | "buckets": { 1406 | "dev": 9160, 1407 | "invalidated": 5275, 1408 | "other": 11948, 1409 | "reported": 220, 1410 | "test": 9116, 1411 | "train": 21027, 1412 | "validated": 41972 1413 | }, 1414 | "reportedSentences": 219, 1415 | "clips": 59195, 1416 | "splits": { 1417 | "accent": { "": 1 }, 1418 | "age": { 1419 | "seventies": 0.01, 1420 | "thirties": 0.13, 1421 | "": 0.18, 1422 | "teens": 0.28, 1423 | "twenties": 0.18, 1424 | "fifties": 0.06, 1425 | "sixties": 0.05, 1426 | "fourties": 0.09, 1427 | "eighties": 0.01 1428 | }, 1429 | "gender": { "male": 0.18, "female": 0.64, "": 0.18 } 1430 | }, 1431 | "users": 400, 1432 | "size": 1734407911, 1433 | "checksum": "f50ed6c61818d641ecc81d9fb6eeb2b713adf2ac0ad5eca60c7e9869b83247fd", 1434 | "avgDurationSecs": 5.133, 1435 | "validDurationSecs": 215437.518, 1436 | "totalHrs": 84.4, 1437 | "validHrs": 59.84 1438 | }, 1439 | "cv": { 1440 | "duration": 98308740, 1441 | "buckets": { 1442 | "dev": 1158, 1443 | "invalidated": 2120, 1444 | "other": 457, 1445 | "reported": 143, 1446 | "test": 1288, 1447 | "train": 1540, 1448 | "validated": 16840 1449 | }, 1450 | "reportedSentences": 139, 1451 | "clips": 19417, 1452 | "splits": { 1453 | "accent": { "": 1 }, 1454 | "age": { 1455 | "twenties": 0.51, 1456 | "": 0.22, 1457 | "fourties": 0.06, 1458 | "thirties": 0.01, 1459 | "teens": 0.19, 1460 | "fifties": 0.01 1461 | }, 1462 | "gender": { "male": 0.54, "": 0.19, "female": 0.27 } 1463 | }, 1464 | "users": 108, 1465 | "size": 668315995, 1466 | "checksum": "75c84a1c2b8cef32a2eed9a692c156c55e4cc16f511f482832a1ea53768debb5", 1467 | "avgDurationSecs": 5.063, 1468 | "validDurationSecs": 85261.327, 1469 | "totalHrs": 27.3, 1470 | "validHrs": 23.68 1471 | }, 1472 | "uk": { 1473 | "duration": 316242528, 1474 | "buckets": { 1475 | "dev": 8377, 1476 | "invalidated": 2531, 1477 | "other": 0, 1478 | "reported": 606, 1479 | "test": 8383, 1480 | "train": 16911, 1481 | "validated": 63928 1482 | }, 1483 | "reportedSentences": 607, 1484 | "clips": 66459, 1485 | "splits": { 1486 | "accent": { "": 1 }, 1487 | "age": { 1488 | "twenties": 0.25, 1489 | "teens": 0.1, 1490 | "": 0.26, 1491 | "fourties": 0.12, 1492 | "thirties": 0.27, 1493 | "fifties": 0, 1494 | "sixties": 0 1495 | }, 1496 | "gender": { "male": 0.58, "female": 0.15, "": 0.26 } 1497 | }, 1498 | "users": 818, 1499 | "size": 2137250020, 1500 | "checksum": "a00db74b6941d8f735ec7be3e84154eac0972707c523063f52126174d23fc341", 1501 | "avgDurationSecs": 4.758, 1502 | "validDurationSecs": 304198.864, 1503 | "totalHrs": 87.84, 1504 | "validHrs": 84.49 1505 | }, 1506 | "mt": { 1507 | "duration": 61454772, 1508 | "buckets": { 1509 | "dev": 1593, 1510 | "invalidated": 321, 1511 | "other": 6259, 1512 | "reported": 9, 1513 | "test": 1644, 1514 | "train": 1943, 1515 | "validated": 6383 1516 | }, 1517 | "reportedSentences": 10, 1518 | "clips": 12963, 1519 | "splits": { 1520 | "accent": { "": 1 }, 1521 | "age": { 1522 | "twenties": 0.19, 1523 | "": 0.26, 1524 | "fourties": 0.17, 1525 | "thirties": 0.09, 1526 | "teens": 0.03, 1527 | "fifties": 0.25, 1528 | "sixties": 0.01 1529 | }, 1530 | "gender": { "male": 0.25, "": 0.26, "female": 0.48, "other": 0.01 } 1531 | }, 1532 | "users": 211, 1533 | "size": 456683345, 1534 | "checksum": "7f8c9084a18a7789b9d1b227df6cbf27bcbd8576f999dfbf238c9be9b18165ef", 1535 | "avgDurationSecs": 4.741, 1536 | "validDurationSecs": 30260.419, 1537 | "totalHrs": 17.07, 1538 | "validHrs": 8.4 1539 | }, 1540 | "as": { 1541 | "duration": 11755685, 1542 | "buckets": { 1543 | "dev": 508, 1544 | "invalidated": 203, 1545 | "other": 195, 1546 | "reported": 9, 1547 | "test": 548, 1548 | "train": 612, 1549 | "validated": 1668 1550 | }, 1551 | "reportedSentences": 10, 1552 | "clips": 2066, 1553 | "splits": { 1554 | "accent": { "": 1 }, 1555 | "age": { "twenties": 0.36, "": 0.59, "thirties": 0.04, "teens": 0 }, 1556 | "gender": { "male": 0.41, "": 0.59, "female": 0 } 1557 | }, 1558 | "users": 44, 1559 | "size": 73430340, 1560 | "checksum": "a536305b88375d09560d5a30b2d99c6ac90dcc38f480e3da7c4b499d7419225e", 1561 | "avgDurationSecs": 5.69, 1562 | "validDurationSecs": 9491.037, 1563 | "totalHrs": 3.26, 1564 | "validHrs": 2.63 1565 | }, 1566 | "ka": { 1567 | "duration": 160786692, 1568 | "buckets": { 1569 | "dev": 4424, 1570 | "invalidated": 1495, 1571 | "other": 9739, 1572 | "reported": 243, 1573 | "test": 4532, 1574 | "train": 6379, 1575 | "validated": 18962 1576 | }, 1577 | "reportedSentences": 243, 1578 | "clips": 30196, 1579 | "splits": { 1580 | "accent": { "": 1 }, 1581 | "age": { 1582 | "twenties": 0.28, 1583 | "thirties": 0.17, 1584 | "": 0.27, 1585 | "fourties": 0.05, 1586 | "fifties": 0.17, 1587 | "teens": 0.01, 1588 | "sixties": 0.06 1589 | }, 1590 | "gender": { "male": 0.45, "female": 0.28, "": 0.27 } 1591 | }, 1592 | "users": 523, 1593 | "size": 959259578, 1594 | "checksum": "bf45a832f2765d39f0d83c1d9c09649723f82d00def1cab320cccc719bd89a64", 1595 | "avgDurationSecs": 5.325, 1596 | "validDurationSecs": 100968.249, 1597 | "totalHrs": 44.66, 1598 | "validHrs": 28.04 1599 | }, 1600 | "fy-NL": { 1601 | "duration": 751674303, 1602 | "buckets": { 1603 | "dev": 3170, 1604 | "invalidated": 3773, 1605 | "other": 102007, 1606 | "reported": 535, 1607 | "test": 3169, 1608 | "train": 3917, 1609 | "validated": 49112 1610 | }, 1611 | "reportedSentences": 533, 1612 | "clips": 154892, 1613 | "splits": { 1614 | "accent": { "": 1 }, 1615 | "age": { 1616 | "": 0.57, 1617 | "fifties": 0.1, 1618 | "thirties": 0.11, 1619 | "twenties": 0.04, 1620 | "fourties": 0.06, 1621 | "sixties": 0.13, 1622 | "seventies": 0, 1623 | "teens": 0, 1624 | "eighties": 0 1625 | }, 1626 | "gender": { "": 0.57, "male": 0.12, "female": 0.31 } 1627 | }, 1628 | "users": 1969, 1629 | "size": 4506898320, 1630 | "checksum": "9324a2ee92e9651ddb0630d0ea534960288f2fa4534903b8644cff661ce2baac", 1631 | "avgDurationSecs": 4.853, 1632 | "validDurationSecs": 238335.281, 1633 | "totalHrs": 208.79, 1634 | "validHrs": 66.2 1635 | }, 1636 | "dv": { 1637 | "duration": 227748735, 1638 | "buckets": { 1639 | "dev": 2227, 1640 | "invalidated": 1653, 1641 | "other": 16395, 1642 | "reported": 55, 1643 | "test": 2212, 1644 | "train": 2677, 1645 | "validated": 26964 1646 | }, 1647 | "reportedSentences": 56, 1648 | "clips": 45012, 1649 | "splits": { 1650 | "accent": { "": 1 }, 1651 | "age": { 1652 | "": 0.22, 1653 | "twenties": 0.18, 1654 | "thirties": 0.36, 1655 | "fourties": 0.22, 1656 | "teens": 0.01, 1657 | "nineties": 0, 1658 | "fifties": 0.02 1659 | }, 1660 | "gender": { "": 0.21, "male": 0.29, "female": 0.5 } 1661 | }, 1662 | "users": 331, 1663 | "size": 1442355118, 1664 | "checksum": "942e0269c339009b7d7d6ac17337ae5f593249926926fee6c37d64aa5c25a2b5", 1665 | "avgDurationSecs": 5.06, 1666 | "validDurationSecs": 136430.661, 1667 | "totalHrs": 63.26, 1668 | "validHrs": 37.89 1669 | }, 1670 | "pa-IN": { 1671 | "duration": 14215922, 1672 | "buckets": { 1673 | "dev": 285, 1674 | "invalidated": 77, 1675 | "other": 1414, 1676 | "reported": 249, 1677 | "test": 459, 1678 | "train": 712, 1679 | "validated": 1456 1680 | }, 1681 | "reportedSentences": 244, 1682 | "clips": 2947, 1683 | "splits": { 1684 | "accent": { "": 1 }, 1685 | "age": { 1686 | "": 0.3, 1687 | "fourties": 0.04, 1688 | "fifties": 0.05, 1689 | "thirties": 0.38, 1690 | "twenties": 0.23, 1691 | "sixties": 0, 1692 | "teens": 0 1693 | }, 1694 | "gender": { "": 0.3, "male": 0.69, "female": 0.01 } 1695 | }, 1696 | "users": 62, 1697 | "size": 100051978, 1698 | "checksum": "b6f1ba247777341e72d91a83f34e9114b739110b3ad4f60fb05e63eec8b1b825", 1699 | "avgDurationSecs": 4.824, 1700 | "validDurationSecs": 7023.543, 1701 | "totalHrs": 3.94, 1702 | "validHrs": 1.95 1703 | }, 1704 | "vi": { 1705 | "duration": 65730176, 1706 | "buckets": { 1707 | "dev": 392, 1708 | "invalidated": 350, 1709 | "other": 11486, 1710 | "reported": 186, 1711 | "test": 1225, 1712 | "train": 2462, 1713 | "validated": 4732 1714 | }, 1715 | "reportedSentences": 185, 1716 | "clips": 16568, 1717 | "splits": { 1718 | "accent": { "": 1 }, 1719 | "age": { 1720 | "thirties": 0.02, 1721 | "twenties": 0.19, 1722 | "": 0.25, 1723 | "teens": 0.21, 1724 | "seventies": 0, 1725 | "fourties": 0.02, 1726 | "sixties": 0.3 1727 | }, 1728 | "gender": { "male": 0.53, "": 0.25, "female": 0.2, "other": 0.02 } 1729 | }, 1730 | "users": 252, 1731 | "size": 381782157, 1732 | "checksum": "0c012f9c8aa77bf5de9c5263fe30a64218afb2c48534b868d55f100583239746", 1733 | "avgDurationSecs": 3.967, 1734 | "validDurationSecs": 18773.249, 1735 | "totalHrs": 18.25, 1736 | "validHrs": 5.21 1737 | }, 1738 | "or": { 1739 | "duration": 38655420, 1740 | "buckets": { 1741 | "dev": 342, 1742 | "invalidated": 176, 1743 | "other": 6284, 1744 | "reported": 23, 1745 | "test": 227, 1746 | "train": 482, 1747 | "validated": 1227 1748 | }, 1749 | "reportedSentences": 24, 1750 | "clips": 7687, 1751 | "splits": { 1752 | "accent": { "": 1 }, 1753 | "age": { 1754 | "twenties": 0.19, 1755 | "": 0.08, 1756 | "thirties": 0.73, 1757 | "fourties": 0, 1758 | "teens": 0 1759 | }, 1760 | "gender": { "male": 0.88, "": 0.08, "female": 0.05 } 1761 | }, 1762 | "users": 94, 1763 | "size": 274647208, 1764 | "checksum": "0df8ef262d2b0e69b4ed0601620e6c581fa283632fb3f39efbd8fb8902217baa", 1765 | "avgDurationSecs": 5.029, 1766 | "validDurationSecs": 6170.183, 1767 | "totalHrs": 10.73, 1768 | "validHrs": 1.71 1769 | }, 1770 | "ga-IE": { 1771 | "duration": 36168975, 1772 | "buckets": { 1773 | "dev": 506, 1774 | "invalidated": 882, 1775 | "other": 4226, 1776 | "reported": 27, 1777 | "test": 511, 1778 | "train": 549, 1779 | "validated": 5022 1780 | }, 1781 | "reportedSentences": 28, 1782 | "clips": 10130, 1783 | "splits": { 1784 | "accent": { "": 1 }, 1785 | "age": { 1786 | "twenties": 0.26, 1787 | "": 0.37, 1788 | "thirties": 0.25, 1789 | "fourties": 0.06, 1790 | "sixties": 0.01, 1791 | "teens": 0.01, 1792 | "fifties": 0.05 1793 | }, 1794 | "gender": { "male": 0.5, "": 0.37, "female": 0.13, "other": 0 } 1795 | }, 1796 | "users": 175, 1797 | "size": 247174669, 1798 | "checksum": "3cf9de0620073155dab06d73c89b6200f33b08f9bf410089ec4abe0fdb10d5e6", 1799 | "avgDurationSecs": 3.57, 1800 | "validDurationSecs": 17930.957, 1801 | "totalHrs": 10.04, 1802 | "validHrs": 4.98 1803 | }, 1804 | "fi": { 1805 | "duration": 66664227, 1806 | "buckets": { 1807 | "dev": 1694, 1808 | "invalidated": 235, 1809 | "other": 5900, 1810 | "reported": 54, 1811 | "test": 1748, 1812 | "train": 2105, 1813 | "validated": 8356 1814 | }, 1815 | "reportedSentences": 55, 1816 | "clips": 14491, 1817 | "splits": { 1818 | "accent": { "": 1 }, 1819 | "age": { 1820 | "thirties": 0.18, 1821 | "": 0.34, 1822 | "twenties": 0.11, 1823 | "fourties": 0.33, 1824 | "teens": 0.01, 1825 | "fifties": 0.04, 1826 | "seventies": 0 1827 | }, 1828 | "gender": { "male": 0.32, "": 0.34, "female": 0.34, "other": 0 } 1829 | }, 1830 | "users": 208, 1831 | "size": 397079516, 1832 | "checksum": "b78d40a194095854d5682d6714ea25a3f311bb824f4e351cef9a1f32562e6ec2", 1833 | "avgDurationSecs": 4.6, 1834 | "validDurationSecs": 38440.845, 1835 | "totalHrs": 18.51, 1836 | "validHrs": 10.67 1837 | }, 1838 | "hu": { 1839 | "duration": 297940429, 1840 | "buckets": { 1841 | "dev": 7688, 1842 | "invalidated": 1838, 1843 | "other": 23308, 1844 | "reported": 338, 1845 | "test": 7875, 1846 | "train": 15516, 1847 | "validated": 31212 1848 | }, 1849 | "reportedSentences": 339, 1850 | "clips": 56358, 1851 | "splits": { 1852 | "accent": { "": 1 }, 1853 | "age": { 1854 | "teens": 0.03, 1855 | "": 0.29, 1856 | "thirties": 0.16, 1857 | "twenties": 0.17, 1858 | "fifties": 0.17, 1859 | "fourties": 0.14, 1860 | "sixties": 0.02, 1861 | "seventies": 0.01 1862 | }, 1863 | "gender": { "male": 0.35, "": 0.3, "female": 0.34, "other": 0.01 } 1864 | }, 1865 | "users": 920, 1866 | "size": 1769841289, 1867 | "checksum": "01ab3d2fa87729761f250cbd4c1304f1e3e2c38a9fc050a88a584114ad126d6a", 1868 | "avgDurationSecs": 5.287, 1869 | "validDurationSecs": 165004.377, 1870 | "totalHrs": 82.76, 1871 | "validHrs": 45.83 1872 | }, 1873 | "th": { 1874 | "duration": 1497189504, 1875 | "buckets": { 1876 | "dev": 11002, 1877 | "invalidated": 9117, 1878 | "other": 204730, 1879 | "reported": 4168, 1880 | "test": 11002, 1881 | "train": 32477, 1882 | "validated": 143263 1883 | }, 1884 | "reportedSentences": 4168, 1885 | "clips": 357110, 1886 | "splits": { 1887 | "accent": { "": 1 }, 1888 | "age": { 1889 | "twenties": 0.23, 1890 | "": 0.41, 1891 | "thirties": 0.07, 1892 | "fourties": 0.04, 1893 | "teens": 0.04, 1894 | "fifties": 0.2, 1895 | "eighties": 0, 1896 | "sixties": 0 1897 | }, 1898 | "gender": { "male": 0.42, "": 0.41, "female": 0.17, "other": 0.01 } 1899 | }, 1900 | "users": 7784, 1901 | "size": 8707281050, 1902 | "checksum": "3b3bc4c1bad6ff2590a7e764941592fa89435a827cadd4e2c5371980adefc31b", 1903 | "avgDurationSecs": 4.193, 1904 | "validDurationSecs": 600632.466, 1905 | "totalHrs": 415.88, 1906 | "validHrs": 166.84 1907 | }, 1908 | "lt": { 1909 | "duration": 88824932, 1910 | "buckets": { 1911 | "dev": 4215, 1912 | "invalidated": 651, 1913 | "other": 1704, 1914 | "reported": 185, 1915 | "test": 4319, 1916 | "train": 6205, 1917 | "validated": 14917 1918 | }, 1919 | "reportedSentences": 185, 1920 | "clips": 17272, 1921 | "splits": { 1922 | "accent": { "": 1 }, 1923 | "age": { 1924 | "twenties": 0.36, 1925 | "": 0.21, 1926 | "thirties": 0.31, 1927 | "fifties": 0.04, 1928 | "sixties": 0.01, 1929 | "teens": 0.03, 1930 | "fourties": 0.04 1931 | }, 1932 | "gender": { "male": 0.56, "": 0.21, "female": 0.22 } 1933 | }, 1934 | "users": 273, 1935 | "size": 541352467, 1936 | "checksum": "5b9308a384488b8964896bfdf654400825a1ec4bd973c7357269f4a11d16bab5", 1937 | "avgDurationSecs": 5.143, 1938 | "validDurationSecs": 76713.844, 1939 | "totalHrs": 24.67, 1940 | "validHrs": 21.3 1941 | }, 1942 | "lg": { 1943 | "duration": 2011525119, 1944 | "buckets": { 1945 | "dev": 13389, 1946 | "invalidated": 39159, 1947 | "other": 36922, 1948 | "reported": 6450, 1949 | "test": 13420, 1950 | "train": 70813, 1951 | "validated": 271640 1952 | }, 1953 | "reportedSentences": 6445, 1954 | "clips": 347721, 1955 | "splits": { 1956 | "accent": { "": 1 }, 1957 | "age": { 1958 | "": 0.26, 1959 | "thirties": 0.22, 1960 | "twenties": 0.38, 1961 | "fourties": 0.07, 1962 | "fifties": 0.05, 1963 | "teens": 0.01, 1964 | "nineties": 0, 1965 | "sixties": 0.02, 1966 | "seventies": 0, 1967 | "eighties": 0 1968 | }, 1969 | "gender": { "": 0.25, "female": 0.39, "male": 0.36 } 1970 | }, 1971 | "users": 646, 1972 | "size": 11779660819, 1973 | "checksum": "ce391d09f447bdd33da0b3249362580179ddf3d7cc522f3902a706ce7b35669d", 1974 | "avgDurationSecs": 5.785, 1975 | "validDurationSecs": 1571405.475, 1976 | "totalHrs": 558.75, 1977 | "validHrs": 436.5 1978 | }, 1979 | "hi": { 1980 | "duration": 67634340, 1981 | "buckets": { 1982 | "dev": 2281, 1983 | "invalidated": 706, 1984 | "other": 3487, 1985 | "reported": 132, 1986 | "test": 2947, 1987 | "train": 4479, 1988 | "validated": 9751 1989 | }, 1990 | "reportedSentences": 133, 1991 | "clips": 13944, 1992 | "splits": { 1993 | "accent": { "": 1 }, 1994 | "age": { 1995 | "twenties": 0.33, 1996 | "fourties": 0.03, 1997 | "": 0.34, 1998 | "thirties": 0.26, 1999 | "teens": 0.01, 2000 | "fifties": 0.01, 2001 | "sixties": 0.01 2002 | }, 2003 | "gender": { "male": 0.62, "female": 0.04, "": 0.34, "other": 0 } 2004 | }, 2005 | "users": 365, 2006 | "size": 398945100, 2007 | "checksum": "d8df02f35f6fb84ec14bfa3ca491e8f8090cfc52475b84f4e8b79ce0d66d3765", 2008 | "avgDurationSecs": 4.85, 2009 | "validDurationSecs": 47296.504, 2010 | "totalHrs": 18.78, 2011 | "validHrs": 13.13 2012 | }, 2013 | "bas": { 2014 | "duration": 10040580, 2015 | "buckets": { 2016 | "dev": 457, 2017 | "invalidated": 508, 2018 | "other": 10, 2019 | "reported": 7, 2020 | "test": 528, 2021 | "train": 763, 2022 | "validated": 1748 2023 | }, 2024 | "reportedSentences": 8, 2025 | "clips": 2266, 2026 | "splits": { 2027 | "accent": { "": 1 }, 2028 | "age": { "": 0.98, "fourties": 0.01, "teens": 0.01 }, 2029 | "gender": { "": 0.98, "female": 0.02 } 2030 | }, 2031 | "users": 33, 2032 | "size": 55827418, 2033 | "checksum": "83f512a6347ebac5df13162cee85ad2986fb99d3fecc6488c1d734051d505002", 2034 | "avgDurationSecs": 4.431, 2035 | "validDurationSecs": 7745.337, 2036 | "totalHrs": 2.78, 2037 | "validHrs": 2.15 2038 | }, 2039 | "sk": { 2040 | "duration": 87440076, 2041 | "buckets": { 2042 | "dev": 2492, 2043 | "invalidated": 793, 2044 | "other": 2329, 2045 | "reported": 47, 2046 | "test": 2552, 2047 | "train": 3227, 2048 | "validated": 18222 2049 | }, 2050 | "reportedSentences": 48, 2051 | "clips": 21344, 2052 | "splits": { 2053 | "accent": { "": 1 }, 2054 | "age": { 2055 | "": 0.46, 2056 | "thirties": 0.22, 2057 | "twenties": 0.08, 2058 | "fourties": 0.16, 2059 | "teens": 0.09 2060 | }, 2061 | "gender": { "": 0.45, "male": 0.46, "female": 0.07, "other": 0.01 } 2062 | }, 2063 | "users": 185, 2064 | "size": 492582779, 2065 | "checksum": "5d4904324f11e6a17b4d6fe1aec4278ac58ed71ae84fcb86b67dd7339b583bb6", 2066 | "avgDurationSecs": 4.097, 2067 | "validDurationSecs": 74650.162, 2068 | "totalHrs": 24.28, 2069 | "validHrs": 20.73 2070 | }, 2071 | "kmr": { 2072 | "duration": 347753052, 2073 | "buckets": { 2074 | "dev": 3491, 2075 | "invalidated": 2368, 2076 | "other": 31131, 2077 | "reported": 1030, 2078 | "test": 3485, 2079 | "train": 4451, 2080 | "validated": 52411 2081 | }, 2082 | "reportedSentences": 1025, 2083 | "clips": 85910, 2084 | "splits": { 2085 | "accent": { "": 1 }, 2086 | "age": { 2087 | "": 0.49, 2088 | "twenties": 0.35, 2089 | "thirties": 0.07, 2090 | "fourties": 0.03, 2091 | "fifties": 0.04, 2092 | "teens": 0.01, 2093 | "sixties": 0 2094 | }, 2095 | "gender": { "": 0.49, "male": 0.41, "female": 0.1, "other": 0 } 2096 | }, 2097 | "users": 545, 2098 | "size": 1831680486, 2099 | "checksum": "af6a3a2865cca9d90c2e377c6bd1fe2d80f3865845c3e5a780d588b1c242571d", 2100 | "avgDurationSecs": 4.048, 2101 | "validDurationSecs": 212153.244, 2102 | "totalHrs": 96.59, 2103 | "validHrs": 58.93 2104 | }, 2105 | "bg": { 2106 | "duration": 66152520, 2107 | "buckets": { 2108 | "dev": 2358, 2109 | "invalidated": 586, 2110 | "other": 3272, 2111 | "reported": 181, 2112 | "test": 2463, 2113 | "train": 3385, 2114 | "validated": 8222 2115 | }, 2116 | "reportedSentences": 182, 2117 | "clips": 12080, 2118 | "splits": { 2119 | "accent": { "": 1 }, 2120 | "age": { 2121 | "fourties": 0.25, 2122 | "thirties": 0.22, 2123 | "": 0.39, 2124 | "twenties": 0.12, 2125 | "teens": 0, 2126 | "sixties": 0, 2127 | "fifties": 0 2128 | }, 2129 | "gender": { "male": 0.56, "female": 0.05, "": 0.39 } 2130 | }, 2131 | "users": 96, 2132 | "size": 386104092, 2133 | "checksum": "69f56f3125411a9d9c1c7d907d0cf7b0116316113b3d273805415fc9185ba823", 2134 | "avgDurationSecs": 5.476, 2135 | "validDurationSecs": 45025.333, 2136 | "totalHrs": 18.37, 2137 | "validHrs": 12.5 2138 | }, 2139 | "kk": { 2140 | "duration": 7002288, 2141 | "buckets": { 2142 | "dev": 369, 2143 | "invalidated": 197, 2144 | "other": 1, 2145 | "reported": 41, 2146 | "test": 396, 2147 | "train": 453, 2148 | "validated": 1223 2149 | }, 2150 | "reportedSentences": 42, 2151 | "clips": 1421, 2152 | "splits": { 2153 | "accent": { "": 1 }, 2154 | "age": { 2155 | "": 0.51, 2156 | "thirties": 0.03, 2157 | "twenties": 0.31, 2158 | "teens": 0.05, 2159 | "fifties": 0.09 2160 | }, 2161 | "gender": { "": 0.52, "male": 0.46, "female": 0.02 } 2162 | }, 2163 | "users": 90, 2164 | "size": 40191047, 2165 | "checksum": "088026aa813183a4d5a61ba8788c079b2f9253df77cef68546d426ee5868e777", 2166 | "avgDurationSecs": 4.928, 2167 | "validDurationSecs": 6026.6, 2168 | "totalHrs": 1.94, 2169 | "validHrs": 1.67 2170 | }, 2171 | "ba": { 2172 | "duration": 960572412, 2173 | "buckets": { 2174 | "dev": 14507, 2175 | "invalidated": 7913, 2176 | "other": 131, 2177 | "reported": 866, 2178 | "test": 14580, 2179 | "train": 119038, 2180 | "validated": 208963 2181 | }, 2182 | "reportedSentences": 863, 2183 | "clips": 217007, 2184 | "splits": { 2185 | "accent": { "": 1 }, 2186 | "age": { 2187 | "thirties": 0.17, 2188 | "": 0.3, 2189 | "fourties": 0.06, 2190 | "fifties": 0.05, 2191 | "twenties": 0.17, 2192 | "sixties": 0.2, 2193 | "seventies": 0, 2194 | "teens": 0.04 2195 | }, 2196 | "gender": { "male": 0.3, "": 0.3, "female": 0.4 } 2197 | }, 2198 | "users": 904, 2199 | "size": 5383162213, 2200 | "checksum": "95a8b68a7b66484e7726fea9abbb0b5fc7773a01da36fcc58a75242b197d9cd8", 2201 | "avgDurationSecs": 4.426, 2202 | "validDurationSecs": 924965.982, 2203 | "totalHrs": 266.82, 2204 | "validHrs": 256.93 2205 | }, 2206 | "gl": { 2207 | "duration": 213131016, 2208 | "buckets": { 2209 | "dev": 6397, 2210 | "invalidated": 1871, 2211 | "other": 17121, 2212 | "reported": 355, 2213 | "test": 6546, 2214 | "train": 10951, 2215 | "validated": 24179 2216 | }, 2217 | "reportedSentences": 355, 2218 | "clips": 43171, 2219 | "splits": { 2220 | "accent": { "": 1 }, 2221 | "age": { 2222 | "": 0.31, 2223 | "thirties": 0.18, 2224 | "fifties": 0.07, 2225 | "twenties": 0.21, 2226 | "fourties": 0.19, 2227 | "teens": 0.02, 2228 | "sixties": 0.01, 2229 | "seventies": 0 2230 | }, 2231 | "gender": { "": 0.36, "male": 0.35, "female": 0.3, "other": 0 } 2232 | }, 2233 | "users": 997, 2234 | "size": 1222354918, 2235 | "checksum": "433a9dd928cb2f0d9f92c60e0d10e8a7d1aca1005e9cdd092f6a5bdfe9df7e37", 2236 | "avgDurationSecs": 4.937, 2237 | "validDurationSecs": 119369.365, 2238 | "totalHrs": 59.2, 2239 | "validHrs": 33.15 2240 | }, 2241 | "ug": { 2242 | "duration": 435382452, 2243 | "buckets": { 2244 | "dev": 3555, 2245 | "invalidated": 3927, 2246 | "other": 0, 2247 | "reported": 284, 2248 | "test": 3555, 2249 | "train": 4521, 2250 | "validated": 68366 2251 | }, 2252 | "reportedSentences": 285, 2253 | "clips": 72293, 2254 | "splits": { 2255 | "accent": { "": 1 }, 2256 | "age": { 2257 | "": 0.6, 2258 | "fifties": 0.01, 2259 | "twenties": 0.11, 2260 | "thirties": 0.15, 2261 | "fourties": 0.12, 2262 | "teens": 0.01, 2263 | "eighties": 0 2264 | }, 2265 | "gender": { "": 0.6, "male": 0.32, "female": 0.08, "other": 0 } 2266 | }, 2267 | "users": 887, 2268 | "size": 2530721785, 2269 | "checksum": "b09aff4bec1cda909cfdf10f021bba6421206d13da6d9037d281b657960b2bdf", 2270 | "avgDurationSecs": 6.022, 2271 | "validDurationSecs": 411732.211, 2272 | "totalHrs": 120.93, 2273 | "validHrs": 114.37 2274 | }, 2275 | "hy-AM": { 2276 | "duration": 16940484, 2277 | "buckets": { 2278 | "dev": 364, 2279 | "invalidated": 105, 2280 | "other": 1230, 2281 | "reported": 44, 2282 | "test": 442, 2283 | "train": 631, 2284 | "validated": 1438 2285 | }, 2286 | "reportedSentences": 45, 2287 | "clips": 2773, 2288 | "splits": { 2289 | "accent": { "": 1 }, 2290 | "age": { 2291 | "": 0.37, 2292 | "thirties": 0.13, 2293 | "twenties": 0.37, 2294 | "fifties": 0.04, 2295 | "teens": 0.09 2296 | }, 2297 | "gender": { "": 0.37, "male": 0.22, "female": 0.41 } 2298 | }, 2299 | "users": 66, 2300 | "size": 99409773, 2301 | "checksum": "109b4522d04e14184142ac022527346abe18c7bb6200d5ea98aedaea240e7f08", 2302 | "avgDurationSecs": 6.109, 2303 | "validDurationSecs": 8784.86, 2304 | "totalHrs": 4.7, 2305 | "validHrs": 2.44 2306 | }, 2307 | "be": { 2308 | "duration": 5334186312, 2309 | "buckets": { 2310 | "dev": 15880, 2311 | "invalidated": 30692, 2312 | "other": 75973, 2313 | "reported": 3134, 2314 | "test": 15879, 2315 | "train": 347355, 2316 | "validated": 1013522 2317 | }, 2318 | "reportedSentences": 3133, 2319 | "clips": 1120187, 2320 | "splits": { 2321 | "accent": { "": 1 }, 2322 | "age": { 2323 | "": 0.84, 2324 | "fourties": 0.05, 2325 | "thirties": 0.05, 2326 | "twenties": 0.04, 2327 | "teens": 0.01, 2328 | "fifties": 0, 2329 | "sixties": 0, 2330 | "seventies": 0 2331 | }, 2332 | "gender": { "": 0.84, "male": 0.07, "female": 0.09, "other": 0 } 2333 | }, 2334 | "users": 8052, 2335 | "size": 30184769413, 2336 | "checksum": "00a53691d285774e98f5d62a7f9f513b3e237ee580f5665083e009b9430f208f", 2337 | "avgDurationSecs": 4.762, 2338 | "validDurationSecs": 4826261.311, 2339 | "totalHrs": 1481.71, 2340 | "validHrs": 1340.62 2341 | }, 2342 | "ur": { 2343 | "duration": 507179196, 2344 | "buckets": { 2345 | "dev": 3302, 2346 | "invalidated": 3312, 2347 | "other": 85366, 2348 | "reported": 50, 2349 | "test": 3304, 2350 | "train": 4129, 2351 | "validated": 41882 2352 | }, 2353 | "reportedSentences": 50, 2354 | "clips": 130560, 2355 | "splits": { 2356 | "accent": { "": 1 }, 2357 | "age": { 2358 | "twenties": 0.88, 2359 | "": 0.1, 2360 | "fourties": 0.01, 2361 | "thirties": 0, 2362 | "teens": 0.01, 2363 | "fifties": 0, 2364 | "sixties": 0 2365 | }, 2366 | "gender": { "male": 0.7, "": 0.1, "female": 0.2 } 2367 | }, 2368 | "users": 202, 2369 | "size": 2947911505, 2370 | "checksum": "22e848a2ad547205dbb78c929ef0e21b02ce82e5c926ddfe805b7e03fcc0cee9", 2371 | "avgDurationSecs": 3.885, 2372 | "validDurationSecs": 162696.684, 2373 | "totalHrs": 140.88, 2374 | "validHrs": 45.19 2375 | }, 2376 | "gn": { 2377 | "duration": 34785252, 2378 | "buckets": { 2379 | "dev": 352, 2380 | "invalidated": 142, 2381 | "other": 5426, 2382 | "reported": 35, 2383 | "test": 811, 2384 | "train": 1414, 2385 | "validated": 2655 2386 | }, 2387 | "reportedSentences": 36, 2388 | "clips": 8223, 2389 | "splits": { 2390 | "accent": { "": 1 }, 2391 | "age": { 2392 | "": 0.2, 2393 | "twenties": 0.21, 2394 | "thirties": 0.41, 2395 | "sixties": 0, 2396 | "fourties": 0.15, 2397 | "teens": 0.02 2398 | }, 2399 | "gender": { "": 0.2, "male": 0.6, "female": 0.19 } 2400 | }, 2401 | "users": 90, 2402 | "size": 199897821, 2403 | "checksum": "4a2932b1ea10d2b47fbf26a46d3ddc77e7b1a68cd4fdbbb24319218779fa5fa6", 2404 | "avgDurationSecs": 4.23, 2405 | "validDurationSecs": 11231.283, 2406 | "totalHrs": 9.66, 2407 | "validHrs": 3.11 2408 | }, 2409 | "sr": { 2410 | "duration": 16845480, 2411 | "buckets": { 2412 | "dev": 1276, 2413 | "invalidated": 119, 2414 | "other": 1529, 2415 | "reported": 28, 2416 | "test": 1263, 2417 | "train": 1499, 2418 | "validated": 4330 2419 | }, 2420 | "reportedSentences": 29, 2421 | "clips": 5978, 2422 | "splits": { 2423 | "accent": { "": 1 }, 2424 | "age": { 2425 | "twenties": 0.36, 2426 | "": 0.24, 2427 | "fifties": 0.2, 2428 | "fourties": 0.07, 2429 | "thirties": 0.13, 2430 | "teens": 0 2431 | }, 2432 | "gender": { "male": 0.51, "": 0.24, "female": 0.25 } 2433 | }, 2434 | "users": 134, 2435 | "size": 92911009, 2436 | "checksum": "768c88391cc827b95c05bba2377ec5564cf19935e9b56e5016d765ab56fab0c3", 2437 | "avgDurationSecs": 2.818, 2438 | "validDurationSecs": 12201.56, 2439 | "totalHrs": 4.67, 2440 | "validHrs": 3.38 2441 | }, 2442 | "uz": { 2443 | "duration": 943613712, 2444 | "buckets": { 2445 | "dev": 12061, 2446 | "invalidated": 13811, 2447 | "other": 127766, 2448 | "reported": 1823, 2449 | "test": 12321, 2450 | "train": 48286, 2451 | "validated": 85995 2452 | }, 2453 | "reportedSentences": 1806, 2454 | "clips": 227572, 2455 | "splits": { 2456 | "accent": { "": 1 }, 2457 | "age": { 2458 | "twenties": 0.4, 2459 | "": 0.41, 2460 | "thirties": 0.01, 2461 | "teens": 0.18, 2462 | "fifties": 0, 2463 | "fourties": 0.01, 2464 | "nineties": 0 2465 | }, 2466 | "gender": { "male": 0.44, "": 0.41, "female": 0.15, "other": 0 } 2467 | }, 2468 | "users": 2109, 2469 | "size": 5261185727, 2470 | "checksum": "64d120e4adf234f10b244d6f3dcf7a0ac889a6e669458fd16c7b57f4c049e93b", 2471 | "avgDurationSecs": 4.146, 2472 | "validDurationSecs": 356573.134, 2473 | "totalHrs": 262.11, 2474 | "validHrs": 99.04 2475 | }, 2476 | "mr": { 2477 | "duration": 98491032, 2478 | "buckets": { 2479 | "dev": 1783, 2480 | "invalidated": 2251, 2481 | "other": 2799, 2482 | "reported": 58, 2483 | "test": 1735, 2484 | "train": 2226, 2485 | "validated": 10854 2486 | }, 2487 | "reportedSentences": 59, 2488 | "clips": 15904, 2489 | "splits": { 2490 | "accent": { "": 1 }, 2491 | "age": { 2492 | "thirties": 0.13, 2493 | "sixties": 0, 2494 | "twenties": 0.29, 2495 | "": 0.05, 2496 | "teens": 0.53 2497 | }, 2498 | "gender": { "male": 0.19, "female": 0.76, "": 0.05 } 2499 | }, 2500 | "users": 83, 2501 | "size": 577008222, 2502 | "checksum": "a71e722038eb3690641f30d8a8046b65c4c8d33210ad716e0286d2751d1e8498", 2503 | "avgDurationSecs": 6.193, 2504 | "validDurationSecs": 67217.157, 2505 | "totalHrs": 27.35, 2506 | "validHrs": 18.67 2507 | }, 2508 | "da": { 2509 | "duration": 42516504, 2510 | "buckets": { 2511 | "dev": 2222, 2512 | "invalidated": 354, 2513 | "other": 841, 2514 | "reported": 379, 2515 | "test": 2160, 2516 | "train": 2746, 2517 | "validated": 9015 2518 | }, 2519 | "reportedSentences": 379, 2520 | "clips": 10210, 2521 | "splits": { 2522 | "accent": { "": 1 }, 2523 | "age": { 2524 | "": 0.32, 2525 | "thirties": 0.26, 2526 | "twenties": 0.18, 2527 | "sixties": 0, 2528 | "fourties": 0.21, 2529 | "fifties": 0.03, 2530 | "teens": 0 2531 | }, 2532 | "gender": { "": 0.32, "female": 0.08, "male": 0.61 } 2533 | }, 2534 | "users": 228, 2535 | "size": 243857551, 2536 | "checksum": "6c85261bcf8dffe5c06ad29c82760cda5cd1fdc7d9c1c99b6285a425f11d105e", 2537 | "avgDurationSecs": 4.164, 2538 | "validDurationSecs": 37540.282, 2539 | "totalHrs": 11.81, 2540 | "validHrs": 10.42 2541 | }, 2542 | "myv": { 2543 | "duration": 11274696, 2544 | "buckets": { 2545 | "dev": 239, 2546 | "invalidated": 21, 2547 | "other": 0, 2548 | "reported": 30, 2549 | "test": 449, 2550 | "train": 1241, 2551 | "validated": 1939 2552 | }, 2553 | "reportedSentences": 31, 2554 | "clips": 1960, 2555 | "splits": { 2556 | "accent": { "": 1 }, 2557 | "age": { 2558 | "sixties": 0.26, 2559 | "": 0.39, 2560 | "thirties": 0.25, 2561 | "twenties": 0.09, 2562 | "teens": 0.01 2563 | }, 2564 | "gender": { "male": 0.54, "": 0.39, "female": 0.07 } 2565 | }, 2566 | "users": 12, 2567 | "size": 65894328, 2568 | "checksum": "741e516b505e2552aa54adf74c10811a3230d2084edb02fd4d6783b2b20f4e72", 2569 | "avgDurationSecs": 5.752, 2570 | "validDurationSecs": 11153.896, 2571 | "totalHrs": 3.13, 2572 | "validHrs": 3.09 2573 | }, 2574 | "nn-NO": { 2575 | "duration": 3677832, 2576 | "buckets": { 2577 | "dev": 197, 2578 | "invalidated": 42, 2579 | "other": 16, 2580 | "reported": 22, 2581 | "test": 230, 2582 | "train": 314, 2583 | "validated": 746 2584 | }, 2585 | "reportedSentences": 23, 2586 | "clips": 804, 2587 | "splits": { 2588 | "accent": { "": 1 }, 2589 | "age": { 2590 | "": 0.37, 2591 | "thirties": 0.32, 2592 | "twenties": 0.23, 2593 | "fourties": 0.04, 2594 | "fifties": 0.01, 2595 | "teens": 0.03 2596 | }, 2597 | "gender": { "": 0.37, "female": 0.23, "male": 0.37, "other": 0.03 } 2598 | }, 2599 | "users": 29, 2600 | "size": 20884241, 2601 | "checksum": "f39f03129e67eb69534db439c127e62bdc2d39a02ef92e5e2daddc3358208512", 2602 | "avgDurationSecs": 4.574, 2603 | "validDurationSecs": 3412.516, 2604 | "totalHrs": 1.02, 2605 | "validHrs": 0.94 2606 | }, 2607 | "ha": { 2608 | "duration": 43793136, 2609 | "buckets": { 2610 | "dev": 580, 2611 | "invalidated": 169, 2612 | "other": 6660, 2613 | "reported": 32, 2614 | "test": 659, 2615 | "train": 1926, 2616 | "validated": 3277 2617 | }, 2618 | "reportedSentences": 32, 2619 | "clips": 10106, 2620 | "splits": { 2621 | "accent": { "": 1 }, 2622 | "age": { 2623 | "": 0.18, 2624 | "thirties": 0.68, 2625 | "twenties": 0.12, 2626 | "fourties": 0, 2627 | "fifties": 0.02 2628 | }, 2629 | "gender": { "": 0.16, "male": 0.59, "female": 0.25 } 2630 | }, 2631 | "users": 39, 2632 | "size": 256132468, 2633 | "checksum": "dde8d350a4f0a109108adfc8e55a53b2d7d26a08ec537ecc4060836213ded990", 2634 | "avgDurationSecs": 4.333, 2635 | "validDurationSecs": 14200.486, 2636 | "totalHrs": 12.16, 2637 | "validHrs": 3.94 2638 | }, 2639 | "ckb": { 2640 | "duration": 456040944, 2641 | "buckets": { 2642 | "dev": 4940, 2643 | "invalidated": 7379, 2644 | "other": 7350, 2645 | "reported": 2380, 2646 | "test": 4940, 2647 | "train": 7035, 2648 | "validated": 102012 2649 | }, 2650 | "reportedSentences": 2380, 2651 | "clips": 116741, 2652 | "splits": { 2653 | "accent": { "": 1 }, 2654 | "age": { 2655 | "": 0.35, 2656 | "thirties": 0.12, 2657 | "twenties": 0.48, 2658 | "fourties": 0.03, 2659 | "teens": 0.02, 2660 | "fifties": 0.02 2661 | }, 2662 | "gender": { "": 0.33, "male": 0.61, "female": 0.07, "other": 0 } 2663 | }, 2664 | "users": 1269, 2665 | "size": 2470127013, 2666 | "checksum": "6592784e044be63d085d9b288db2dfa61d82a227e70803022c48b9758d68c840", 2667 | "avgDurationSecs": 3.906, 2668 | "validDurationSecs": 398503.086, 2669 | "totalHrs": 126.67, 2670 | "validHrs": 110.69 2671 | }, 2672 | "ml": { 2673 | "duration": 14479308, 2674 | "buckets": { 2675 | "dev": 0, 2676 | "invalidated": 16, 2677 | "other": 2817, 2678 | "reported": 115, 2679 | "test": 215, 2680 | "train": 509, 2681 | "validated": 724 2682 | }, 2683 | "reportedSentences": 116, 2684 | "clips": 3557, 2685 | "splits": { 2686 | "accent": { "": 1 }, 2687 | "age": { 2688 | "": 0.33, 2689 | "twenties": 0.35, 2690 | "thirties": 0.04, 2691 | "fourties": 0.29 2692 | }, 2693 | "gender": { "": 0.33, "male": 0.67 } 2694 | }, 2695 | "users": 33, 2696 | "size": 84013164, 2697 | "checksum": "0c53537225d238bcbf6ae11d8e3ecbffbaf8f74922bbd2a4989ea3f5931e3321", 2698 | "avgDurationSecs": 4.071, 2699 | "validDurationSecs": 2947.152, 2700 | "totalHrs": 4.02, 2701 | "validHrs": 0.81 2702 | }, 2703 | "mdf": { 2704 | "duration": 1811340, 2705 | "buckets": { 2706 | "dev": 54, 2707 | "invalidated": 8, 2708 | "other": 1, 2709 | "reported": 14, 2710 | "test": 107, 2711 | "train": 175, 2712 | "validated": 336 2713 | }, 2714 | "reportedSentences": 15, 2715 | "clips": 345, 2716 | "splits": { 2717 | "accent": { "": 1 }, 2718 | "age": { 2719 | "sixties": 0.06, 2720 | "": 0.58, 2721 | "fourties": 0.34, 2722 | "twenties": 0.02 2723 | }, 2724 | "gender": { "male": 0.08, "": 0.58, "female": 0.34 } 2725 | }, 2726 | "users": 11, 2727 | "size": 10631887, 2728 | "checksum": "33f38dce47f710bef75e57e5b40e98e61f1877cd136775e6e3e7f1dc45895413", 2729 | "avgDurationSecs": 5.25, 2730 | "validDurationSecs": 1764.088, 2731 | "totalHrs": 0.5, 2732 | "validHrs": 0.49 2733 | }, 2734 | "sw": { 2735 | "duration": 3264314328, 2736 | "buckets": { 2737 | "dev": 11274, 2738 | "invalidated": 64047, 2739 | "other": 324868, 2740 | "reported": 1866, 2741 | "test": 11271, 2742 | "train": 34980, 2743 | "validated": 231468 2744 | }, 2745 | "reportedSentences": 1861, 2746 | "clips": 620383, 2747 | "splits": { 2748 | "accent": { "": 1 }, 2749 | "age": { 2750 | "": 0.34, 2751 | "twenties": 0.44, 2752 | "thirties": 0.13, 2753 | "teens": 0, 2754 | "fifties": 0.05, 2755 | "fourties": 0.04, 2756 | "sixties": 0.01 2757 | }, 2758 | "gender": { "": 0.32, "male": 0.36, "female": 0.32, "other": 0 } 2759 | }, 2760 | "users": 1103, 2761 | "size": 19044510370, 2762 | "checksum": "a04acaa50b6fe75d5f8ece78660b2827716a91d99e93600d78c09ebecce42e72", 2763 | "avgDurationSecs": 5.262, 2764 | "validDurationSecs": 1217932.001, 2765 | "totalHrs": 906.75, 2766 | "validHrs": 338.31 2767 | }, 2768 | "sat": { 2769 | "duration": 3624516, 2770 | "buckets": { 2771 | "dev": 0, 2772 | "invalidated": 9, 2773 | "other": 364, 2774 | "reported": 6, 2775 | "test": 144, 2776 | "train": 292, 2777 | "validated": 436 2778 | }, 2779 | "reportedSentences": 7, 2780 | "clips": 809, 2781 | "splits": { 2782 | "accent": { "": 1 }, 2783 | "age": { 2784 | "": 0.43, 2785 | "twenties": 0.43, 2786 | "fourties": 0.01, 2787 | "fifties": 0.01, 2788 | "teens": 0.01, 2789 | "thirties": 0.1 2790 | }, 2791 | "gender": { "": 0.41, "male": 0.58, "female": 0.01 } 2792 | }, 2793 | "users": 14, 2794 | "size": 19983401, 2795 | "checksum": "c58f2434a2974518e727897fceae8030be7ea5403b2dc18652e4921c046ba2cc", 2796 | "avgDurationSecs": 4.48, 2797 | "validDurationSecs": 1953.386, 2798 | "totalHrs": 1, 2799 | "validHrs": 0.54 2800 | }, 2801 | "tig": { 2802 | "duration": 103284, 2803 | "buckets": { 2804 | "dev": 0, 2805 | "invalidated": 12, 2806 | "other": 0, 2807 | "reported": 0, 2808 | "test": 1, 2809 | "train": 10, 2810 | "validated": 11 2811 | }, 2812 | "reportedSentences": 1, 2813 | "clips": 23, 2814 | "splits": { 2815 | "accent": { "": 1 }, 2816 | "age": { "": 0.78, "twenties": 0.22 }, 2817 | "gender": { "": 0.78, "male": 0.22 } 2818 | }, 2819 | "users": 5, 2820 | "size": 602992, 2821 | "checksum": "b0a1697ef65f1f5f30476c789aac7e6f54acc7dc99c82112642e2c87cd5665f6", 2822 | "avgDurationSecs": 4.491, 2823 | "validDurationSecs": 49.397, 2824 | "totalHrs": 0.02, 2825 | "validHrs": 0.01 2826 | }, 2827 | "ig": { 2828 | "duration": 31531356, 2829 | "buckets": { 2830 | "dev": 2, 2831 | "invalidated": 3, 2832 | "other": 5754, 2833 | "reported": 15, 2834 | "test": 4, 2835 | "train": 8, 2836 | "validated": 14 2837 | }, 2838 | "reportedSentences": 15, 2839 | "clips": 5771, 2840 | "splits": { 2841 | "accent": { "": 1 }, 2842 | "age": { 2843 | "": 0.56, 2844 | "twenties": 0.32, 2845 | "teens": 0.05, 2846 | "eighties": 0, 2847 | "thirties": 0.04, 2848 | "sixties": 0.02, 2849 | "fourties": 0 2850 | }, 2851 | "gender": { "": 0.56, "male": 0.14, "female": 0.3 } 2852 | }, 2853 | "users": 112, 2854 | "size": 184140356, 2855 | "checksum": "7a580953242f8f1072d1b816859e9b9a00fa2f34311a9af0ebfda6120ae0806a", 2856 | "avgDurationSecs": 5.464, 2857 | "validDurationSecs": 76.493, 2858 | "totalHrs": 8.75, 2859 | "validHrs": 0.02 2860 | }, 2861 | "nan-tw": { 2862 | "duration": 38958552, 2863 | "buckets": { 2864 | "dev": 755, 2865 | "invalidated": 294, 2866 | "other": 10569, 2867 | "reported": 140, 2868 | "test": 1139, 2869 | "train": 1833, 2870 | "validated": 3737 2871 | }, 2872 | "reportedSentences": 141, 2873 | "clips": 14600, 2874 | "splits": { 2875 | "accent": { "": 1 }, 2876 | "age": { 2877 | "thirties": 0.25, 2878 | "": 0.13, 2879 | "twenties": 0.36, 2880 | "fourties": 0.22, 2881 | "teens": 0.02, 2882 | "fifties": 0.01, 2883 | "sixties": 0 2884 | }, 2885 | "gender": { "male": 0.53, "": 0.13, "other": 0.12, "female": 0.22 } 2886 | }, 2887 | "users": 120, 2888 | "size": 214575545, 2889 | "checksum": "c253dbed21c6e4100dc9e3f3ebbde78e32ca38b7d47128d596732f60fdb5ab37", 2890 | "avgDurationSecs": 2.668, 2891 | "validDurationSecs": 9971.788, 2892 | "totalHrs": 10.82, 2893 | "validHrs": 2.76 2894 | }, 2895 | "mhr": { 2896 | "duration": 745755624, 2897 | "buckets": { 2898 | "dev": 13132, 2899 | "invalidated": 4762, 2900 | "other": 41051, 2901 | "reported": 77, 2902 | "test": 13752, 2903 | "train": 86150, 2904 | "validated": 114848 2905 | }, 2906 | "reportedSentences": 76, 2907 | "clips": 160661, 2908 | "splits": { 2909 | "accent": { "": 1 }, 2910 | "age": { 2911 | "fifties": 0.05, 2912 | "": 0.16, 2913 | "sixties": 0.05, 2914 | "thirties": 0.29, 2915 | "fourties": 0.15, 2916 | "twenties": 0.22, 2917 | "teens": 0.07, 2918 | "seventies": 0.01 2919 | }, 2920 | "gender": { "male": 0.23, "": 0.16, "female": 0.61 } 2921 | }, 2922 | "users": 388, 2923 | "size": 4162732159, 2924 | "checksum": "fdc30f70750bb13213f0d5c699f31be429445533a4acc29e7aacbd4ba438f332", 2925 | "avgDurationSecs": 4.642, 2926 | "validDurationSecs": 533101.013, 2927 | "totalHrs": 207.15, 2928 | "validHrs": 148.08 2929 | }, 2930 | "bn": { 2931 | "duration": 4559272416, 2932 | "buckets": { 2933 | "dev": 9230, 2934 | "invalidated": 7683, 2935 | "other": 994175, 2936 | "reported": 1862, 2937 | "test": 9230, 2938 | "train": 20729, 2939 | "validated": 43362 2940 | }, 2941 | "reportedSentences": 1856, 2942 | "clips": 1045220, 2943 | "splits": { 2944 | "accent": { "": 1 }, 2945 | "age": { 2946 | "thirties": 0.03, 2947 | "twenties": 0.67, 2948 | "": 0.22, 2949 | "teens": 0.07, 2950 | "fourties": 0.01, 2951 | "fifties": 0 2952 | }, 2953 | "gender": { "male": 0.54, "": 0.22, "female": 0.23, "other": 0 } 2954 | }, 2955 | "users": 22817, 2956 | "size": 26255160682, 2957 | "checksum": "fa3f3cfc75b68741dac50f0bb271a7e83ca28a3191e920042d881a6f8badf112", 2958 | "avgDurationSecs": 4.362, 2959 | "validDurationSecs": 189145.989, 2960 | "totalHrs": 1266.46, 2961 | "validHrs": 52.54 2962 | }, 2963 | "tok": { 2964 | "duration": 43560792, 2965 | "buckets": { 2966 | "dev": 1883, 2967 | "invalidated": 234, 2968 | "other": 2261, 2969 | "reported": 129, 2970 | "test": 1907, 2971 | "train": 2732, 2972 | "validated": 9033 2973 | }, 2974 | "reportedSentences": 130, 2975 | "clips": 11528, 2976 | "splits": { 2977 | "accent": { "": 1 }, 2978 | "age": { 2979 | "": 0.41, 2980 | "twenties": 0.18, 2981 | "teens": 0.3, 2982 | "thirties": 0.1, 2983 | "fourties": 0 2984 | }, 2985 | "gender": { "": 0.41, "male": 0.4, "other": 0.12, "female": 0.07 } 2986 | }, 2987 | "users": 119, 2988 | "size": 253192772, 2989 | "checksum": "1de25832d6417466aa16561594ced6d4a2b72da694910e1198f69f1c22e3ff5e", 2990 | "avgDurationSecs": 3.779, 2991 | "validDurationSecs": 34132.949, 2992 | "totalHrs": 12.1, 2993 | "validHrs": 9.48 2994 | }, 2995 | "yue": { 2996 | "duration": 251348868, 2997 | "buckets": { 2998 | "dev": 2485, 2999 | "invalidated": 1629, 3000 | "other": 39882, 3001 | "reported": 868, 3002 | "test": 2526, 3003 | "train": 3030, 3004 | "validated": 20081 3005 | }, 3006 | "reportedSentences": 864, 3007 | "clips": 61592, 3008 | "splits": { 3009 | "accent": { "": 1 }, 3010 | "age": { 3011 | "thirties": 0.21, 3012 | "": 0.32, 3013 | "twenties": 0.4, 3014 | "fourties": 0.03, 3015 | "sixties": 0.01, 3016 | "fifties": 0, 3017 | "teens": 0.03 3018 | }, 3019 | "gender": { "male": 0.26, "": 0.36, "female": 0.37, "other": 0.01 } 3020 | }, 3021 | "users": 827, 3022 | "size": 1406690648, 3023 | "checksum": "1d51745b0e7b4dd79f9eb25f8129d9f90dcc7346ec2ff3aaa4ab9a69f5aaff66", 3024 | "avgDurationSecs": 4.081, 3025 | "validDurationSecs": 81947.925, 3026 | "totalHrs": 69.81, 3027 | "validHrs": 22.76 3028 | }, 3029 | "sah": { 3030 | "duration": 24741156, 3031 | "buckets": { 3032 | "dev": 1083, 3033 | "invalidated": 102, 3034 | "other": 1, 3035 | "reported": 2, 3036 | "test": 1270, 3037 | "train": 1594, 3038 | "validated": 4015 3039 | }, 3040 | "reportedSentences": 3, 3041 | "clips": 4118, 3042 | "splits": { 3043 | "accent": { "": 1 }, 3044 | "age": { 3045 | "": 0.36, 3046 | "twenties": 0.03, 3047 | "fourties": 0.07, 3048 | "thirties": 0.43, 3049 | "teens": 0.1, 3050 | "fifties": 0 3051 | }, 3052 | "gender": { "": 0.36, "male": 0.53, "female": 0.11 } 3053 | }, 3054 | "users": 55, 3055 | "size": 187502644, 3056 | "checksum": "e6970ac845dd0f3377404eaba252c0407f3c64b1d931aabd75ec7c6f7aed132d", 3057 | "avgDurationSecs": 6.008, 3058 | "validDurationSecs": 24122.327, 3059 | "totalHrs": 6.87, 3060 | "validHrs": 6.7 3061 | }, 3062 | "mk": { 3063 | "duration": 1110348, 3064 | "buckets": { 3065 | "dev": 0, 3066 | "invalidated": 11, 3067 | "other": 57, 3068 | "reported": 8, 3069 | "test": 26, 3070 | "train": 115, 3071 | "validated": 141 3072 | }, 3073 | "reportedSentences": 9, 3074 | "clips": 209, 3075 | "splits": { 3076 | "accent": { "": 1 }, 3077 | "age": { "thirties": 0.43, "": 0.33, "twenties": 0.17, "teens": 0.07 }, 3078 | "gender": { "male": 0.67, "": 0.33 } 3079 | }, 3080 | "users": 7, 3081 | "size": 6527029, 3082 | "checksum": "c2275e2901850f3a54cef485a8099714fc38c48fab148838f7d89c37296a4c8f", 3083 | "avgDurationSecs": 5.313, 3084 | "validDurationSecs": 749.086, 3085 | "totalHrs": 0.3, 3086 | "validHrs": 0.2 3087 | }, 3088 | "sc": { 3089 | "duration": 5936256, 3090 | "buckets": { 3091 | "dev": 234, 3092 | "invalidated": 25, 3093 | "other": 284, 3094 | "reported": 2, 3095 | "test": 177, 3096 | "train": 476, 3097 | "validated": 1036 3098 | }, 3099 | "reportedSentences": 3, 3100 | "clips": 1345, 3101 | "splits": { 3102 | "accent": { "": 1 }, 3103 | "age": { "": 0.62, "thirties": 0.31, "twenties": 0.08 }, 3104 | "gender": { "": 0.62, "female": 0.31, "male": 0.08 } 3105 | }, 3106 | "users": 12, 3107 | "size": 34282030, 3108 | "checksum": "5848ec3b17ae8c47e3e15ee762d930e7757701a1f91c8f195b0384c5a5db2193", 3109 | "avgDurationSecs": 4.414, 3110 | "validDurationSecs": 4572.462, 3111 | "totalHrs": 1.64, 3112 | "validHrs": 1.27 3113 | }, 3114 | "skr": { 3115 | "duration": 23051304, 3116 | "buckets": { 3117 | "dev": 1026, 3118 | "invalidated": 428, 3119 | "other": 1967, 3120 | "reported": 16, 3121 | "test": 834, 3122 | "train": 1327, 3123 | "validated": 3192 3124 | }, 3125 | "reportedSentences": 2, 3126 | "clips": 5587, 3127 | "splits": { 3128 | "accent": { "": 1 }, 3129 | "age": { 3130 | "": 0.35, 3131 | "twenties": 0.25, 3132 | "fifties": 0.39, 3133 | "teens": 0, 3134 | "fourties": 0, 3135 | "thirties": 0.01 3136 | }, 3137 | "gender": { "": 0.35, "male": 0.65 } 3138 | }, 3139 | "users": 41, 3140 | "size": 131854098, 3141 | "checksum": "b6c3c9ee1e9661de973298e331efe26f523589e300eddaa1e23b74969f2e4f42", 3142 | "avgDurationSecs": 4.126, 3143 | "validDurationSecs": 13169.816, 3144 | "totalHrs": 6.4, 3145 | "validHrs": 3.65 3146 | }, 3147 | "ti": { 3148 | "duration": 132912, 3149 | "buckets": { 3150 | "dev": 0, 3151 | "invalidated": 0, 3152 | "other": 10, 3153 | "reported": 1, 3154 | "test": 3, 3155 | "train": 10, 3156 | "validated": 13 3157 | }, 3158 | "reportedSentences": 2, 3159 | "clips": 23, 3160 | "splits": { "accent": { "": 1 }, "age": { "": 1 }, "gender": { "": 1 } }, 3161 | "users": 4, 3162 | "size": 784757, 3163 | "checksum": "317936e0faac748f4a73ab5f3b17d4fb6909d553e6bc67c456666b3d11d415ab", 3164 | "avgDurationSecs": 5.779, 3165 | "validDurationSecs": 75.124, 3166 | "totalHrs": 0.03, 3167 | "validHrs": 0.02 3168 | }, 3169 | "mrj": { 3170 | "duration": 104076468, 3171 | "buckets": { 3172 | "dev": 3297, 3173 | "invalidated": 272, 3174 | "other": 10180, 3175 | "reported": 49, 3176 | "test": 4428, 3177 | "train": 7272, 3178 | "validated": 15125 3179 | }, 3180 | "reportedSentences": 50, 3181 | "clips": 25577, 3182 | "splits": { 3183 | "accent": { "": 1 }, 3184 | "age": { 3185 | "twenties": 0.29, 3186 | "": 0.12, 3187 | "thirties": 0.24, 3188 | "sixties": 0.04, 3189 | "fourties": 0.15, 3190 | "fifties": 0.14, 3191 | "teens": 0.01 3192 | }, 3193 | "gender": { "male": 0.26, "female": 0.63, "": 0.12 } 3194 | }, 3195 | "users": 48, 3196 | "size": 575489473, 3197 | "checksum": "42877608f77aa7a1a548bce9fea0d5aee628a75d2688340923b008648a976273", 3198 | "avgDurationSecs": 4.069, 3199 | "validDurationSecs": 61545.786, 3200 | "totalHrs": 28.91, 3201 | "validHrs": 17.09 3202 | }, 3203 | "tw": { 3204 | "duration": 943884, 3205 | "buckets": { 3206 | "dev": 0, 3207 | "invalidated": 0, 3208 | "other": 202, 3209 | "reported": 2, 3210 | "test": 3, 3211 | "train": 32, 3212 | "validated": 35 3213 | }, 3214 | "reportedSentences": 3, 3215 | "clips": 237, 3216 | "splits": { 3217 | "accent": { "": 1 }, 3218 | "age": { "": 0.07, "twenties": 0.02, "thirties": 0.91 }, 3219 | "gender": { "": 0.07, "male": 0.93 } 3220 | }, 3221 | "users": 5, 3222 | "size": 5529362, 3223 | "checksum": "2c8e68f83c5ef0926855a6ede4a24bcca6f806293fffd952c6adf9a00782ffdc", 3224 | "avgDurationSecs": 3.983, 3225 | "validDurationSecs": 139.392, 3226 | "totalHrs": 0.26, 3227 | "validHrs": 0.03 3228 | }, 3229 | "ko": { 3230 | "duration": 14055624, 3231 | "buckets": { 3232 | "dev": 105, 3233 | "invalidated": 97, 3234 | "other": 1746, 3235 | "reported": 13, 3236 | "test": 131, 3237 | "train": 192, 3238 | "validated": 429 3239 | }, 3240 | "reportedSentences": 14, 3241 | "clips": 2272, 3242 | "splits": { 3243 | "accent": { "": 1 }, 3244 | "age": { 3245 | "": 0.14, 3246 | "thirties": 0.43, 3247 | "twenties": 0.35, 3248 | "teens": 0.04, 3249 | "fourties": 0.04 3250 | }, 3251 | "gender": { "": 0.14, "male": 0.72, "female": 0.14, "other": 0 } 3252 | }, 3253 | "users": 42, 3254 | "size": 81829460, 3255 | "checksum": "a308f2a8b7991716417c12aedba39dd4a93d2410a5a148531ddd5ecc89434af4", 3256 | "avgDurationSecs": 6.186, 3257 | "validDurationSecs": 2653.989, 3258 | "totalHrs": 3.9, 3259 | "validHrs": 0.73 3260 | }, 3261 | "yo": { 3262 | "duration": 24984468, 3263 | "buckets": { 3264 | "dev": 554, 3265 | "invalidated": 151, 3266 | "other": 2082, 3267 | "reported": 17, 3268 | "test": 638, 3269 | "train": 797, 3270 | "validated": 1990 3271 | }, 3272 | "reportedSentences": 18, 3273 | "clips": 4223, 3274 | "splits": { 3275 | "accent": { "": 1 }, 3276 | "age": { 3277 | "thirties": 0.02, 3278 | "twenties": 0.65, 3279 | "": 0.3, 3280 | "teens": 0.03, 3281 | "sixties": 0 3282 | }, 3283 | "gender": { "male": 0.33, "": 0.3, "female": 0.37 } 3284 | }, 3285 | "users": 93, 3286 | "size": 145667521, 3287 | "checksum": "9d10c538a7d0237e5f21d2c516e1c07957e0ae318c25203d33b5d5fedb2ac2c6", 3288 | "avgDurationSecs": 5.916, 3289 | "validDurationSecs": 11773.405, 3290 | "totalHrs": 6.94, 3291 | "validHrs": 3.27 3292 | }, 3293 | "oc": { 3294 | "duration": 37154340, 3295 | "buckets": { 3296 | "dev": 173, 3297 | "invalidated": 129, 3298 | "other": 6647, 3299 | "reported": 8, 3300 | "test": 184, 3301 | "train": 208, 3302 | "validated": 840 3303 | }, 3304 | "reportedSentences": 9, 3305 | "clips": 7616, 3306 | "splits": { 3307 | "accent": { "": 1 }, 3308 | "age": { 3309 | "": 0.37, 3310 | "fifties": 0.2, 3311 | "twenties": 0.01, 3312 | "thirties": 0.04, 3313 | "seventies": 0, 3314 | "fourties": 0.22, 3315 | "teens": 0.01, 3316 | "sixties": 0.14 3317 | }, 3318 | "gender": { "": 0.39, "male": 0.09, "female": 0.52 } 3319 | }, 3320 | "users": 138, 3321 | "size": 208779711, 3322 | "checksum": "e241c12159ac7b3d880f41d5e91d804775da188a3ac413c775341eef3406001b", 3323 | "avgDurationSecs": 4.878, 3324 | "validDurationSecs": 4097.905, 3325 | "totalHrs": 10.32, 3326 | "validHrs": 1.13 3327 | }, 3328 | "tk": { 3329 | "duration": 6197616, 3330 | "buckets": { 3331 | "dev": 232, 3332 | "invalidated": 53, 3333 | "other": 270, 3334 | "reported": 5, 3335 | "test": 281, 3336 | "train": 383, 3337 | "validated": 896 3338 | }, 3339 | "reportedSentences": 6, 3340 | "clips": 1219, 3341 | "splits": { 3342 | "accent": { "": 1 }, 3343 | "age": { "": 0.08, "twenties": 0.75, "thirties": 0.18 }, 3344 | "gender": { "": 0.08, "male": 0.29, "female": 0.63 } 3345 | }, 3346 | "users": 14, 3347 | "size": 36062710, 3348 | "checksum": "5dcb82753e5192d731761b364836bb1fbc70b82e234a55f8e4a2168cca8746a8", 3349 | "avgDurationSecs": 5.084, 3350 | "validDurationSecs": 4555.426, 3351 | "totalHrs": 1.72, 3352 | "validHrs": 1.26 3353 | }, 3354 | "vot": { 3355 | "duration": 1025976, 3356 | "buckets": { 3357 | "dev": 0, 3358 | "invalidated": 324, 3359 | "other": 0, 3360 | "test": 6, 3361 | "train": 96, 3362 | "validated": 102 3363 | }, 3364 | "clips": 426, 3365 | "splits": { 3366 | "accent": { "": 1 }, 3367 | "age": { "": 0.25, "twenties": 0.73, "teens": 0.01 }, 3368 | "gender": { "": 0.25, "male": 0.75 } 3369 | }, 3370 | "users": 5, 3371 | "size": 7891176, 3372 | "checksum": "5ec41e90ea98ece390981fd44ca5b51728791988a2c116b52b2b3ad3b216c35f", 3373 | "avgDurationSecs": 2.408, 3374 | "validDurationSecs": 245.656, 3375 | "totalHrs": 0.28, 3376 | "validHrs": 0.06 3377 | }, 3378 | "az": { 3379 | "duration": 1441620, 3380 | "buckets": { 3381 | "dev": 21, 3382 | "invalidated": 34, 3383 | "other": 149, 3384 | "reported": 0, 3385 | "test": 27, 3386 | "train": 39, 3387 | "validated": 87 3388 | }, 3389 | "clips": 270, 3390 | "splits": { 3391 | "accent": { "": 1 }, 3392 | "age": { 3393 | "": 0.38, 3394 | "twenties": 0.37, 3395 | "fourties": 0.01, 3396 | "thirties": 0.24 3397 | }, 3398 | "gender": { "": 0.38, "male": 0.62 } 3399 | }, 3400 | "users": 21, 3401 | "size": 8439871, 3402 | "checksum": "dab12e4b9250a93cfdb89b97a038a616f5fc95c28b91bc0cfa966e313503bd9a", 3403 | "avgDurationSecs": 5.339, 3404 | "validDurationSecs": 464.522, 3405 | "totalHrs": 0.4, 3406 | "validHrs": 0.12 3407 | }, 3408 | "ast": { 3409 | "duration": 4813164, 3410 | "buckets": { 3411 | "dev": 81, 3412 | "invalidated": 12, 3413 | "other": 684, 3414 | "test": 79, 3415 | "train": 205, 3416 | "validated": 365 3417 | }, 3418 | "clips": 1061, 3419 | "splits": { 3420 | "accent": { "": 1 }, 3421 | "age": { "": 0.25, "fourties": 0.42, "fifties": 0.33 }, 3422 | "gender": { "": 0.25, "male": 0.75 } 3423 | }, 3424 | "users": 12, 3425 | "size": 27001785, 3426 | "checksum": "c40efb1033e7ced0168637485bda22b2e60c53cab4cfe880b09b1ec0d80634bc", 3427 | "avgDurationSecs": 4.536, 3428 | "validDurationSecs": 1655.801, 3429 | "totalHrs": 1.33, 3430 | "validHrs": 0.45 3431 | }, 3432 | "ne-NP": { 3433 | "duration": 2276280, 3434 | "buckets": { 3435 | "dev": 49, 3436 | "invalidated": 35, 3437 | "other": 245, 3438 | "test": 74, 3439 | "train": 173, 3440 | "validated": 296 3441 | }, 3442 | "clips": 576, 3443 | "splits": { 3444 | "accent": { "": 1 }, 3445 | "age": { "thirties": 0.29, "": 0.46, "twenties": 0.24 }, 3446 | "gender": { "male": 0.54, "": 0.46 } 3447 | }, 3448 | "users": 15, 3449 | "size": 13220758, 3450 | "checksum": "637db7008c47c43e3d0d10c097d7dd25a3e3a00a1849c9b8f1f06a56e06cde8b", 3451 | "avgDurationSecs": 3.952, 3452 | "validDurationSecs": 1169.755, 3453 | "totalHrs": 0.63, 3454 | "validHrs": 0.32 3455 | }, 3456 | "quy": { 3457 | "duration": 4320, 3458 | "buckets": { 3459 | "dev": 0, 3460 | "invalidated": 0, 3461 | "other": 0, 3462 | "test": 0, 3463 | "train": 1, 3464 | "validated": 1 3465 | }, 3466 | "clips": 1, 3467 | "splits": { "accent": { "": 1 }, "age": { "": 1 }, "gender": { "": 1 } }, 3468 | "users": 1, 3469 | "size": 25833, 3470 | "checksum": "f7b24f25235e87f58a0b89362e541255dc3bfb55f19f94d6a0caa352ef86b6eb", 3471 | "avgDurationSecs": 4.32, 3472 | "validDurationSecs": 4.32, 3473 | "totalHrs": 0, 3474 | "validHrs": 0 3475 | }, 3476 | "lo": { 3477 | "duration": 294552, 3478 | "buckets": { 3479 | "dev": 0, 3480 | "invalidated": 5, 3481 | "other": 1, 3482 | "test": 10, 3483 | "train": 28, 3484 | "validated": 38 3485 | }, 3486 | "clips": 44, 3487 | "splits": { 3488 | "accent": { "": 1 }, 3489 | "age": { 3490 | "": 0.25, 3491 | "twenties": 0.05, 3492 | "fourties": 0.02, 3493 | "thirties": 0.68 3494 | }, 3495 | "gender": { "": 0.25, "male": 0.75 } 3496 | }, 3497 | "users": 6, 3498 | "size": 1736458, 3499 | "checksum": "3e1cd64e6bcca689dce85e9b6c1f04a467c1e255041921d9563c99d5265b2c6b", 3500 | "avgDurationSecs": 6.694, 3501 | "validDurationSecs": 254.386, 3502 | "totalHrs": 0.08, 3503 | "validHrs": 0.07 3504 | }, 3505 | "dyu": { 3506 | "duration": 6228, 3507 | "buckets": { 3508 | "dev": 0, 3509 | "invalidated": 0, 3510 | "other": 0, 3511 | "test": 0, 3512 | "train": 1, 3513 | "validated": 1 3514 | }, 3515 | "clips": 1, 3516 | "splits": { "accent": { "": 1 }, "age": { "": 1 }, "gender": { "": 1 } }, 3517 | "users": 1, 3518 | "size": 37380, 3519 | "checksum": "767317573e21e6d5279d5499f3ec3402a24d7547218283a36c2148767a120422", 3520 | "avgDurationSecs": 6.228, 3521 | "validDurationSecs": 6.228, 3522 | "totalHrs": 0, 3523 | "validHrs": 0 3524 | }, 3525 | "is": { 3526 | "duration": 70416, 3527 | "buckets": { 3528 | "dev": 0, 3529 | "invalidated": 0, 3530 | "other": 10, 3531 | "test": 0, 3532 | "train": 0, 3533 | "validated": 0 3534 | }, 3535 | "clips": 10, 3536 | "splits": { "accent": { "": 1 }, "age": { "": 1 }, "gender": { "": 1 } }, 3537 | "users": 1, 3538 | "size": 410396, 3539 | "checksum": "48db6e809f5b6eb0c00b077e6b736aeeee5d544ee3f2fdd059244da88926c040", 3540 | "avgDurationSecs": 7.042, 3541 | "validDurationSecs": 0, 3542 | "totalHrs": 0.01, 3543 | "validHrs": 0 3544 | } 3545 | }, 3546 | "totalDuration": 97709611853, 3547 | "totalValidDurationSecs": 63681475, 3548 | "totalHrs": 27141, 3549 | "totalValidHrs": 17689 3550 | } 3551 | --------------------------------------------------------------------------------