├── .gitignore ├── LICENSE ├── README.md ├── sample └── abk │ ├── audio │ ├── abk-002-000.wav │ ├── abk-002-001.wav │ ├── abk-002-006.wav │ ├── abk-002-009.wav │ ├── abk-002-010.wav │ ├── abk-002-011.wav │ ├── abk-002-023.wav │ ├── abk-002-024.wav │ ├── abk-002-026.wav │ ├── abk-002-027.wav │ ├── abk-002-028.wav │ ├── abk-002-030.wav │ ├── abk-002-032.wav │ ├── abk-002-033.wav │ ├── abk-002-034.wav │ ├── abk-002-035.wav │ ├── abk-002-036.wav │ ├── abk-002-037.wav │ ├── abk-002-038.wav │ ├── abk-002-039.wav │ ├── abk-002-040.wav │ ├── abk-002-041.wav │ ├── abk-002-042.wav │ ├── abk-002-043.wav │ ├── abk-002-044.wav │ ├── abk-002-045.wav │ ├── abk-002-046.wav │ ├── abk-002-047.wav │ ├── abk-002-049.wav │ ├── abk-002-050.wav │ ├── abk-002-051.wav │ ├── abk-002-052.wav │ ├── abk-002-053.wav │ ├── abk-002-067.wav │ ├── abk-002-070.wav │ ├── abk-002-071.wav │ ├── abk-002-072.wav │ ├── abk-002-073.wav │ ├── abk-002-074.wav │ ├── abk-002-077.wav │ ├── abk-002-078.wav │ ├── abk-002-079.wav │ ├── abk-002-080.wav │ ├── abk-002-083.wav │ ├── abk-002-084.wav │ ├── abk-002-085.wav │ ├── abk-002-090.wav │ ├── abk-002-097.wav │ ├── abk-002-098.wav │ ├── abk-002-101.wav │ ├── abk-002-102.wav │ ├── abk-002-103.wav │ ├── abk-002-105.wav │ └── abk-002-106.wav │ ├── inventory │ ├── allophone.txt │ ├── phone.txt │ └── phoneme.txt │ ├── raw │ └── text.txt └── ucla_phonetic_corpus └── bin └── preprocess_text.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | .idea/* 3 | notes/* 4 | exp/* 5 | scripts/* 6 | data.tar.gz -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-NonCommercial-ShareAlike 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International 58 | Public License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-NonCommercial-ShareAlike 4.0 International Public License 63 | ("Public License"). To the extent this Public License may be 64 | interpreted as a contract, You are granted the Licensed Rights in 65 | consideration of Your acceptance of these terms and conditions, and the 66 | Licensor grants You such rights in consideration of benefits the 67 | Licensor receives from making the Licensed Material available under 68 | these terms and conditions. 69 | 70 | 71 | Section 1 -- Definitions. 72 | 73 | a. Adapted Material means material subject to Copyright and Similar 74 | Rights that is derived from or based upon the Licensed Material 75 | and in which the Licensed Material is translated, altered, 76 | arranged, transformed, or otherwise modified in a manner requiring 77 | permission under the Copyright and Similar Rights held by the 78 | Licensor. For purposes of this Public License, where the Licensed 79 | Material is a musical work, performance, or sound recording, 80 | Adapted Material is always produced where the Licensed Material is 81 | synched in timed relation with a moving image. 82 | 83 | b. Adapter's License means the license You apply to Your Copyright 84 | and Similar Rights in Your contributions to Adapted Material in 85 | accordance with the terms and conditions of this Public License. 86 | 87 | c. BY-NC-SA Compatible License means a license listed at 88 | creativecommons.org/compatiblelicenses, approved by Creative 89 | Commons as essentially the equivalent of this Public License. 90 | 91 | d. Copyright and Similar Rights means copyright and/or similar rights 92 | closely related to copyright including, without limitation, 93 | performance, broadcast, sound recording, and Sui Generis Database 94 | Rights, without regard to how the rights are labeled or 95 | categorized. For purposes of this Public License, the rights 96 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 97 | Rights. 98 | 99 | e. Effective Technological Measures means those measures that, in the 100 | absence of proper authority, may not be circumvented under laws 101 | fulfilling obligations under Article 11 of the WIPO Copyright 102 | Treaty adopted on December 20, 1996, and/or similar international 103 | agreements. 104 | 105 | f. Exceptions and Limitations means fair use, fair dealing, and/or 106 | any other exception or limitation to Copyright and Similar Rights 107 | that applies to Your use of the Licensed Material. 108 | 109 | g. License Elements means the license attributes listed in the name 110 | of a Creative Commons Public License. The License Elements of this 111 | Public License are Attribution, NonCommercial, and ShareAlike. 112 | 113 | h. Licensed Material means the artistic or literary work, database, 114 | or other material to which the Licensor applied this Public 115 | License. 116 | 117 | i. Licensed Rights means the rights granted to You subject to the 118 | terms and conditions of this Public License, which are limited to 119 | all Copyright and Similar Rights that apply to Your use of the 120 | Licensed Material and that the Licensor has authority to license. 121 | 122 | j. Licensor means the individual(s) or entity(ies) granting rights 123 | under this Public License. 124 | 125 | k. NonCommercial means not primarily intended for or directed towards 126 | commercial advantage or monetary compensation. For purposes of 127 | this Public License, the exchange of the Licensed Material for 128 | other material subject to Copyright and Similar Rights by digital 129 | file-sharing or similar means is NonCommercial provided there is 130 | no payment of monetary compensation in connection with the 131 | exchange. 132 | 133 | l. Share means to provide material to the public by any means or 134 | process that requires permission under the Licensed Rights, such 135 | as reproduction, public display, public performance, distribution, 136 | dissemination, communication, or importation, and to make material 137 | available to the public including in ways that members of the 138 | public may access the material from a place and at a time 139 | individually chosen by them. 140 | 141 | m. Sui Generis Database Rights means rights other than copyright 142 | resulting from Directive 96/9/EC of the European Parliament and of 143 | the Council of 11 March 1996 on the legal protection of databases, 144 | as amended and/or succeeded, as well as other essentially 145 | equivalent rights anywhere in the world. 146 | 147 | n. You means the individual or entity exercising the Licensed Rights 148 | under this Public License. Your has a corresponding meaning. 149 | 150 | 151 | Section 2 -- Scope. 152 | 153 | a. License grant. 154 | 155 | 1. Subject to the terms and conditions of this Public License, 156 | the Licensor hereby grants You a worldwide, royalty-free, 157 | non-sublicensable, non-exclusive, irrevocable license to 158 | exercise the Licensed Rights in the Licensed Material to: 159 | 160 | a. reproduce and Share the Licensed Material, in whole or 161 | in part, for NonCommercial purposes only; and 162 | 163 | b. produce, reproduce, and Share Adapted Material for 164 | NonCommercial purposes only. 165 | 166 | 2. Exceptions and Limitations. For the avoidance of doubt, where 167 | Exceptions and Limitations apply to Your use, this Public 168 | License does not apply, and You do not need to comply with 169 | its terms and conditions. 170 | 171 | 3. Term. The term of this Public License is specified in Section 172 | 6(a). 173 | 174 | 4. Media and formats; technical modifications allowed. The 175 | Licensor authorizes You to exercise the Licensed Rights in 176 | all media and formats whether now known or hereafter created, 177 | and to make technical modifications necessary to do so. The 178 | Licensor waives and/or agrees not to assert any right or 179 | authority to forbid You from making technical modifications 180 | necessary to exercise the Licensed Rights, including 181 | technical modifications necessary to circumvent Effective 182 | Technological Measures. For purposes of this Public License, 183 | simply making modifications authorized by this Section 2(a) 184 | (4) never produces Adapted Material. 185 | 186 | 5. Downstream recipients. 187 | 188 | a. Offer from the Licensor -- Licensed Material. Every 189 | recipient of the Licensed Material automatically 190 | receives an offer from the Licensor to exercise the 191 | Licensed Rights under the terms and conditions of this 192 | Public License. 193 | 194 | b. Additional offer from the Licensor -- Adapted Material. 195 | Every recipient of Adapted Material from You 196 | automatically receives an offer from the Licensor to 197 | exercise the Licensed Rights in the Adapted Material 198 | under the conditions of the Adapter's License You apply. 199 | 200 | c. No downstream restrictions. You may not offer or impose 201 | any additional or different terms or conditions on, or 202 | apply any Effective Technological Measures to, the 203 | Licensed Material if doing so restricts exercise of the 204 | Licensed Rights by any recipient of the Licensed 205 | Material. 206 | 207 | 6. No endorsement. Nothing in this Public License constitutes or 208 | may be construed as permission to assert or imply that You 209 | are, or that Your use of the Licensed Material is, connected 210 | with, or sponsored, endorsed, or granted official status by, 211 | the Licensor or others designated to receive attribution as 212 | provided in Section 3(a)(1)(A)(i). 213 | 214 | b. Other rights. 215 | 216 | 1. Moral rights, such as the right of integrity, are not 217 | licensed under this Public License, nor are publicity, 218 | privacy, and/or other similar personality rights; however, to 219 | the extent possible, the Licensor waives and/or agrees not to 220 | assert any such rights held by the Licensor to the limited 221 | extent necessary to allow You to exercise the Licensed 222 | Rights, but not otherwise. 223 | 224 | 2. Patent and trademark rights are not licensed under this 225 | Public License. 226 | 227 | 3. To the extent possible, the Licensor waives any right to 228 | collect royalties from You for the exercise of the Licensed 229 | Rights, whether directly or through a collecting society 230 | under any voluntary or waivable statutory or compulsory 231 | licensing scheme. In all other cases the Licensor expressly 232 | reserves any right to collect such royalties, including when 233 | the Licensed Material is used other than for NonCommercial 234 | purposes. 235 | 236 | 237 | Section 3 -- License Conditions. 238 | 239 | Your exercise of the Licensed Rights is expressly made subject to the 240 | following conditions. 241 | 242 | a. Attribution. 243 | 244 | 1. If You Share the Licensed Material (including in modified 245 | form), You must: 246 | 247 | a. retain the following if it is supplied by the Licensor 248 | with the Licensed Material: 249 | 250 | i. identification of the creator(s) of the Licensed 251 | Material and any others designated to receive 252 | attribution, in any reasonable manner requested by 253 | the Licensor (including by pseudonym if 254 | designated); 255 | 256 | ii. a copyright notice; 257 | 258 | iii. a notice that refers to this Public License; 259 | 260 | iv. a notice that refers to the disclaimer of 261 | warranties; 262 | 263 | v. a URI or hyperlink to the Licensed Material to the 264 | extent reasonably practicable; 265 | 266 | b. indicate if You modified the Licensed Material and 267 | retain an indication of any previous modifications; and 268 | 269 | c. indicate the Licensed Material is licensed under this 270 | Public License, and include the text of, or the URI or 271 | hyperlink to, this Public License. 272 | 273 | 2. You may satisfy the conditions in Section 3(a)(1) in any 274 | reasonable manner based on the medium, means, and context in 275 | which You Share the Licensed Material. For example, it may be 276 | reasonable to satisfy the conditions by providing a URI or 277 | hyperlink to a resource that includes the required 278 | information. 279 | 3. If requested by the Licensor, You must remove any of the 280 | information required by Section 3(a)(1)(A) to the extent 281 | reasonably practicable. 282 | 283 | b. ShareAlike. 284 | 285 | In addition to the conditions in Section 3(a), if You Share 286 | Adapted Material You produce, the following conditions also apply. 287 | 288 | 1. The Adapter's License You apply must be a Creative Commons 289 | license with the same License Elements, this version or 290 | later, or a BY-NC-SA Compatible License. 291 | 292 | 2. You must include the text of, or the URI or hyperlink to, the 293 | Adapter's License You apply. You may satisfy this condition 294 | in any reasonable manner based on the medium, means, and 295 | context in which You Share Adapted Material. 296 | 297 | 3. You may not offer or impose any additional or different terms 298 | or conditions on, or apply any Effective Technological 299 | Measures to, Adapted Material that restrict exercise of the 300 | rights granted under the Adapter's License You apply. 301 | 302 | 303 | Section 4 -- Sui Generis Database Rights. 304 | 305 | Where the Licensed Rights include Sui Generis Database Rights that 306 | apply to Your use of the Licensed Material: 307 | 308 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 309 | to extract, reuse, reproduce, and Share all or a substantial 310 | portion of the contents of the database for NonCommercial purposes 311 | only; 312 | 313 | b. if You include all or a substantial portion of the database 314 | contents in a database in which You have Sui Generis Database 315 | Rights, then the database in which You have Sui Generis Database 316 | Rights (but not its individual contents) is Adapted Material, 317 | including for purposes of Section 3(b); and 318 | 319 | c. You must comply with the conditions in Section 3(a) if You Share 320 | all or a substantial portion of the contents of the database. 321 | 322 | For the avoidance of doubt, this Section 4 supplements and does not 323 | replace Your obligations under this Public License where the Licensed 324 | Rights include other Copyright and Similar Rights. 325 | 326 | 327 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 328 | 329 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 330 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 331 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 332 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 333 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 334 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 335 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 336 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 337 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 338 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 339 | 340 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 341 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 342 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 343 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 344 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 345 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 346 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 347 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 348 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 349 | 350 | c. The disclaimer of warranties and limitation of liability provided 351 | above shall be interpreted in a manner that, to the extent 352 | possible, most closely approximates an absolute disclaimer and 353 | waiver of all liability. 354 | 355 | 356 | Section 6 -- Term and Termination. 357 | 358 | a. This Public License applies for the term of the Copyright and 359 | Similar Rights licensed here. However, if You fail to comply with 360 | this Public License, then Your rights under this Public License 361 | terminate automatically. 362 | 363 | b. Where Your right to use the Licensed Material has terminated under 364 | Section 6(a), it reinstates: 365 | 366 | 1. automatically as of the date the violation is cured, provided 367 | it is cured within 30 days of Your discovery of the 368 | violation; or 369 | 370 | 2. upon express reinstatement by the Licensor. 371 | 372 | For the avoidance of doubt, this Section 6(b) does not affect any 373 | right the Licensor may have to seek remedies for Your violations 374 | of this Public License. 375 | 376 | c. For the avoidance of doubt, the Licensor may also offer the 377 | Licensed Material under separate terms or conditions or stop 378 | distributing the Licensed Material at any time; however, doing so 379 | will not terminate this Public License. 380 | 381 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 382 | License. 383 | 384 | 385 | Section 7 -- Other Terms and Conditions. 386 | 387 | a. The Licensor shall not be bound by any additional or different 388 | terms or conditions communicated by You unless expressly agreed. 389 | 390 | b. Any arrangements, understandings, or agreements regarding the 391 | Licensed Material not stated herein are separate from and 392 | independent of the terms and conditions of this Public License. 393 | 394 | 395 | Section 8 -- Interpretation. 396 | 397 | a. For the avoidance of doubt, this Public License does not, and 398 | shall not be interpreted to, reduce, limit, restrict, or impose 399 | conditions on any use of the Licensed Material that could lawfully 400 | be made without permission under this Public License. 401 | 402 | b. To the extent possible, if any provision of this Public License is 403 | deemed unenforceable, it shall be automatically reformed to the 404 | minimum extent necessary to make it enforceable. If the provision 405 | cannot be reformed, it shall be severed from this Public License 406 | without affecting the enforceability of the remaining terms and 407 | conditions. 408 | 409 | c. No term or condition of this Public License will be waived and no 410 | failure to comply consented to unless expressly agreed to by the 411 | Licensor. 412 | 413 | d. Nothing in this Public License constitutes or may be interpreted 414 | as a limitation upon, or waiver of, any privileges and immunities 415 | that apply to the Licensor or You, including from the legal 416 | processes of any jurisdiction or authority. 417 | 418 | ======================================================================= 419 | 420 | Creative Commons is not a party to its public 421 | licenses. Notwithstanding, Creative Commons may elect to apply one of 422 | its public licenses to material it publishes and in those instances 423 | will be considered the “Licensor.” The text of the Creative Commons 424 | public licenses is dedicated to the public domain under the CC0 Public 425 | Domain Dedication. Except for the limited purpose of indicating that 426 | material is shared under a Creative Commons public license or as 427 | otherwise permitted by the Creative Commons policies published at 428 | creativecommons.org/policies, Creative Commons does not authorize the 429 | use of the trademark "Creative Commons" or any other trademark or logo 430 | of Creative Commons without its prior written consent including, 431 | without limitation, in connection with any unauthorized modifications 432 | to any of its public licenses or any other arrangements, 433 | understandings, or agreements concerning use of licensed material. For 434 | the avoidance of doubt, this paragraph does not form part of the 435 | public licenses. 436 | 437 | Creative Commons may be contacted at creativecommons.org. 438 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UCLA Phonetic Corpus 2 | 3 | This repository contains instructions of the dataset described in our ICASSP 2021 paper `MULTILINGUAL PHONETIC DATASET FOR LOW RESOURCE SPEECH RECOGNITION`. 4 | 5 | 6 | We would also distribute scripts and baselines here in the future. 7 | 8 | 9 | If you have any suggestions or find any mistakes in the dataset, please feel free to send email to us (xinjianl [at] cs.cmu.edu) or submit an issue in this repo. Thanks! 10 | 11 | 12 | ## Instructions 13 | 14 | Since the entire dataset is too large to be uploaded to Github, we only provide a sample of the first language (`abk`) in this repository. The full dataset can be downloaded from the [release page](https://github.com/xinjli/ucla-phonetic-corpus/releases/tag/v1.0) 15 | 16 | It is a cleaned version of the dataset in the paper. Each directory on the top level is corresponding to a language name identified by its 3 character ISO id. There are currently 97 languages in this dataset. 17 | 18 | Inside each directory, there will be 3 files and 1 directory 19 | 20 | - `raw`: it contains the narrow phone annotations of each utterance. The first field is the utterance id. 21 | - `text.txt`: it contains the segmented and normalized transcription from the raw utterance. 22 | - `inventory`: it is directory contains the unique phoneme/phone inventory for this language. It is derived from `text.txt` 23 | - `audio`: it contains all the wav format audios of each utterance. Its name is the corresponding utterance id. 24 | 25 | ## Acknowledgements 26 | 27 | This dataset is derived from the [UCLA Phonetics Lab Archive](http://archive.phonetics.ucla.edu/). The website contains much more data and resources than we could clean in this dataset. Thank you UCLA Phonetics Lab Archive! 28 | 29 | ## License 30 | 31 | Contents of this dataset and the original website are licensed under a [Creative Commons license](https://creativecommons.org/licenses/by-nc/2.0/). You are free to copy, distribute, or adapt these materials for noncommercial purposes, under the following conditions: 32 | 33 | - For any reuse or distribution, you must make clear to others the license terms of this work. 34 | - Any derivative work may be distributed only under a license identical to this one. That is, you cannot claim exclusive right to any creation based on these materials, nor can anyone who further adapts your creation. 35 | - Please attribute the material to the UCLA Phonetics Lab Archive (and this paper). See below for suggested citation format. 36 | 37 | 38 | ## Reference 39 | 40 | If you find this work helpful, please cite the following papers 41 | 42 | ``` 43 | @inproceedings{li2021multilingual, 44 | title={Multilingual phonetic dataset for low resource speech recognition}, 45 | author={Li, Xinjian and Mortensen, David R and Metze, Florian and Black, Alan W}, 46 | booktitle={ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 47 | pages={6958--6962}, 48 | year={2021}, 49 | organization={IEEE} 50 | } 51 | 52 | 2007. The UCLA Phonetics Lab Archive. Los Angeles, CA: UCLA Department of Linguistics. http://archive.phonetics.ucla.edu/. 53 | ``` -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-000.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-000.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-001.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-006.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-006.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-009.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-009.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-010.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-010.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-011.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-011.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-023.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-023.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-024.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-024.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-026.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-026.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-027.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-027.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-028.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-028.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-030.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-030.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-032.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-032.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-033.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-033.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-034.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-034.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-035.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-035.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-036.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-036.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-037.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-037.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-038.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-038.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-039.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-039.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-040.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-040.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-041.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-041.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-042.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-042.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-043.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-043.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-044.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-044.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-045.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-045.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-046.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-046.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-047.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-047.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-049.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-049.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-050.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-050.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-051.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-051.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-052.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-052.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-053.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-053.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-067.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-067.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-070.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-070.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-071.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-071.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-072.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-072.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-073.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-073.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-074.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-074.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-077.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-077.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-078.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-078.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-079.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-079.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-080.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-080.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-083.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-083.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-084.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-084.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-085.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-085.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-090.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-090.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-097.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-097.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-098.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-098.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-101.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-101.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-102.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-102.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-103.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-103.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-105.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-105.wav -------------------------------------------------------------------------------- /sample/abk/audio/abk-002-106.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinjli/ucla-phonetic-corpus/4cca56e50d56885bca349900979c5a87f854f047/sample/abk/audio/abk-002-106.wav -------------------------------------------------------------------------------- /sample/abk/inventory/allophone.txt: -------------------------------------------------------------------------------- 1 | a a 2 | b b 3 | d d 4 | d͡ʒ d͡ʒ 5 | i i 6 | j j 7 | kʼ kʼ 8 | m m 9 | n n 10 | p p 11 | pʰ pʰ 12 | r r 13 | s s 14 | t t 15 | tʰ tʰ 16 | t͡ʃ t͡ʃ 17 | t͡ʃʰ t͡ʃʰ 18 | t͡ʃʼ t͡ʃʼ 19 | z z 20 | ä ä 21 | æ̈ æ̈ 22 | ă ă 23 | ħ ħ 24 | ħʷ ħʷ 25 | œ̈ œ̈ 26 | ɘ ɘ 27 | ə ə 28 | ə̆ ə̆ 29 | ɛ̈ ɛ̈ 30 | ɜ ɜ 31 | ɜ̆ ɜ̆ 32 | ɡ ɡ 33 | ɤ̈ ɤ̈ 34 | ɥ ɥ 35 | ɨ ɨ 36 | ɹ ɹ 37 | ɾ ɾ 38 | ʁ ʁ 39 | ʁʷ ʁʷ 40 | ʃ ʃ 41 | ʃʰ ʃʰ 42 | ʃʲ ʃʲ 43 | ʌ̈ ʌ̈ 44 | ʒ ʒ 45 | ʒʲ ʒʲ 46 | ˀa ˀa 47 | χ χ 48 | χʲ χʲ 49 | -------------------------------------------------------------------------------- /sample/abk/inventory/phone.txt: -------------------------------------------------------------------------------- 1 | a 2 | b 3 | d 4 | d͡ʒ 5 | i 6 | j 7 | kʼ 8 | m 9 | n 10 | p 11 | pʰ 12 | r 13 | s 14 | t 15 | tʰ 16 | t͡ʃ 17 | t͡ʃʰ 18 | t͡ʃʼ 19 | z 20 | ä 21 | æ̈ 22 | ă 23 | ħ 24 | ħʷ 25 | œ̈ 26 | ɘ 27 | ə 28 | ə̆ 29 | ɛ̈ 30 | ɜ 31 | ɜ̆ 32 | ɡ 33 | ɤ̈ 34 | ɥ 35 | ɨ 36 | ɹ 37 | ɾ 38 | ʁ 39 | ʁʷ 40 | ʃ 41 | ʃʰ 42 | ʃʲ 43 | ʌ̈ 44 | ʒ 45 | ʒʲ 46 | ˀa 47 | χ 48 | χʲ 49 | -------------------------------------------------------------------------------- /sample/abk/inventory/phoneme.txt: -------------------------------------------------------------------------------- 1 | a 2 | b 3 | d 4 | d͡ʒ 5 | i 6 | j 7 | kʼ 8 | m 9 | n 10 | p 11 | pʰ 12 | r 13 | s 14 | t 15 | tʰ 16 | t͡ʃ 17 | t͡ʃʰ 18 | t͡ʃʼ 19 | z 20 | ä 21 | æ̈ 22 | ă 23 | ħ 24 | ħʷ 25 | œ̈ 26 | ɘ 27 | ə 28 | ə̆ 29 | ɛ̈ 30 | ɜ 31 | ɜ̆ 32 | ɡ 33 | ɤ̈ 34 | ɥ 35 | ɨ 36 | ɹ 37 | ɾ 38 | ʁ 39 | ʁʷ 40 | ʃ 41 | ʃʰ 42 | ʃʲ 43 | ʌ̈ 44 | ʒ 45 | ʒʲ 46 | ˀa 47 | χ 48 | χʲ 49 | -------------------------------------------------------------------------------- /sample/abk/raw: -------------------------------------------------------------------------------- 1 | abk-002-000 aˑdʒʃʲ 2 | abk-002-001 ˈaˑdʒmɜ 3 | abk-002-006 adʒɘmʃɘ́ 4 | abk-002-009 atʃʰɜrä́ˆˑ 5 | abk-002-010 átʃə̆pʰɜ̆rʌ̈ 6 | abk-002-011 áttʃʃʰɜrɜ 7 | abk-002-023 akʼáʒʲərɜ 8 | abk-002-024 ăbᵊʒʲɨ́ 9 | abk-002-026 aˈʃæ̈́ 10 | abk-002-027 ájəʃʲɛ̈ˇ 11 | abk-002-028 ˆaʃæ̈ 12 | abk-002-030 aˆʃɘpɘ́ 13 | abk-002-032 adʒɘ́r 14 | abk-002-033 adʒɘ́ʃ 15 | abk-002-034 adʒ 16 | abk-002-035 atʃədæ̈́ˇ 17 | abk-002-036 atʃʰnɘ́ 18 | abk-002-037 atʃʰɘ́ɥrɜ 19 | abk-002-038 dɜtʃä́ 20 | abk-002-039 atʃʰbɘ́ɡə 21 | abk-002-040 aptʃráˑ 22 | abk-002-041 atʃʼɘ́ 23 | abk-002-042 atʃʼɘ́χrɜ 24 | abk-002-043 amᵊtʃʼɘ́ 25 | abk-002-044 atʃʼá 26 | abk-002-045 ˈˀäʒəħʷərə 27 | abk-002-046 äʒᵊɹə 28 | abk-002-047 äʒəħœ̈ɾə 29 | abk-002-049 ˈˀáʒə 30 | abk-002-050 ˈabᵊʒə 31 | abk-002-051 aʃəɾɜ 32 | abk-002-052 áˑʃə 33 | abk-002-053 adʒɘ́ʃ 34 | abk-002-067 ˈäʁdərɜ 35 | abk-002-070 ˀaχɤ̈́ 36 | abk-002-071 χpʰæ̈ 37 | abk-002-072 ˈäχᵊrɛ̈ 38 | abk-002-073 aχᵊrdzɛ̈ 39 | abk-002-074 aiˇχæ̈́ 40 | abk-002-077 ˈäχᵊrɛ̈ 41 | abk-002-078 aχᵊrdzɛ̈ 42 | abk-002-079 aiˇχæ̈́ 43 | abk-002-080 amʒɤ̈́ 44 | abk-002-083 ˈaχʲtʰɛ̈ 45 | abk-002-084 ˈaχʲɾɛ̈ 46 | abk-002-085 aχʲɘ́ts 47 | abk-002-090 atsᵊʁʷərə 48 | abk-002-097 aχɘ́ 49 | abk-002-098 aχáɡə 50 | abk-002-101 ˀaχáˑ 51 | abk-002-102 aχəra 52 | abk-002-103 aχʷɘ́ 53 | abk-002-105 adχʷa 54 | abk-002-106 anχʷa 55 | -------------------------------------------------------------------------------- /sample/abk/text.txt: -------------------------------------------------------------------------------- 1 | abk-002-000 a d͡ʒ ʃʲ 2 | abk-002-001 a d͡ʒ m ɜ 3 | abk-002-006 a d͡ʒ ɘ m ʃ ɘ 4 | abk-002-009 a t͡ʃʰ ɜ r ä 5 | abk-002-010 a t͡ʃ ə̆ pʰ ɜ̆ r ʌ̈ 6 | abk-002-011 a t t͡ʃ ʃʰ ɜ r ɜ 7 | abk-002-023 a kʼ a ʒʲ ə r ɜ 8 | abk-002-024 ă b ʒʲ ɨ 9 | abk-002-026 a ʃ æ̈ 10 | abk-002-027 a j ə ʃʲ ɛ̈ 11 | abk-002-028 a ʃ æ̈ 12 | abk-002-030 a ʃ ɘ p ɘ 13 | abk-002-032 a d͡ʒ ɘ r 14 | abk-002-033 a d͡ʒ ɘ ʃ 15 | abk-002-034 a d͡ʒ 16 | abk-002-035 a t͡ʃ ə d æ̈ 17 | abk-002-036 a t͡ʃʰ n ɘ 18 | abk-002-037 a t͡ʃʰ ɘ ɥ r ɜ 19 | abk-002-038 d ɜ t͡ʃ ä 20 | abk-002-039 a t͡ʃʰ b ɘ ɡ ə 21 | abk-002-040 a p t͡ʃ r a 22 | abk-002-041 a t͡ʃʼ ɘ 23 | abk-002-042 a t͡ʃʼ ɘ χ r ɜ 24 | abk-002-043 a m t͡ʃʼ ɘ 25 | abk-002-044 a t͡ʃʼ a 26 | abk-002-045 ˀa ʒ ə ħʷ ə r ə 27 | abk-002-046 ä ʒ ɹ ə 28 | abk-002-047 ä ʒ ə ħ œ̈ ɾ ə 29 | abk-002-049 ˀa ʒ ə 30 | abk-002-050 a b ʒ ə 31 | abk-002-051 a ʃ ə ɾ ɜ 32 | abk-002-052 a ʃ ə 33 | abk-002-053 a d͡ʒ ɘ ʃ 34 | abk-002-067 ä ʁ d ə r ɜ 35 | abk-002-070 ˀa χ ɤ̈ 36 | abk-002-071 χ pʰ æ̈ 37 | abk-002-072 ä χ r ɛ̈ 38 | abk-002-073 a χ r d z ɛ̈ 39 | abk-002-074 a i χ æ̈ 40 | abk-002-077 ä χ r ɛ̈ 41 | abk-002-078 a χ r d z ɛ̈ 42 | abk-002-079 a i χ æ̈ 43 | abk-002-080 a m ʒ ɤ̈ 44 | abk-002-083 a χʲ tʰ ɛ̈ 45 | abk-002-084 a χʲ ɾ ɛ̈ 46 | abk-002-085 a χʲ ɘ t s 47 | abk-002-090 a t s ʁʷ ə r ə 48 | abk-002-097 a χ ɘ 49 | abk-002-098 a χ a ɡ ə 50 | abk-002-101 ˀa χ a 51 | abk-002-102 a χ ə r a 52 | abk-002-103 a χ ɘ 53 | abk-002-105 a d χ a 54 | abk-002-106 a n χ a 55 | -------------------------------------------------------------------------------- /ucla_phonetic_corpus/bin/preprocess_text.py: -------------------------------------------------------------------------------- 1 | from phonepiece.ipa import read_ipa 2 | from pathlib import Path 3 | from phonepiece.inventory import read_inventory, create_inventory, write_inventory 4 | import shutil 5 | from tqdm import tqdm 6 | import editdistance 7 | import os 8 | 9 | if __name__ == '__main__': 10 | 11 | data_dir = Path('./data/') 12 | 13 | ipa = read_ipa() 14 | 15 | # run recognition 16 | for lang_dir in data_dir.glob('*'): 17 | 18 | lang_id = lang_dir.name 19 | 20 | print("Processing ", lang_id) 21 | 22 | raw_file = open(lang_dir / 'raw', 'r') 23 | text_file = open(lang_dir / 'text.txt', 'w') 24 | 25 | phoneme_set = set() 26 | 27 | for line in raw_file: 28 | fields = line.strip().split() 29 | phonemes = " ".join(fields[1:]) 30 | utt_id = fields[0] 31 | phonemes = ipa.tokenize(phonemes) 32 | phoneme_set.update(phonemes) 33 | 34 | text_file.write(utt_id+' ' + ' '.join(phonemes)+'\n') 35 | 36 | inv = create_inventory(lang_id, phoneme_set) 37 | write_inventory(inv, lang_dir / 'inventory') 38 | 39 | text_file.close() --------------------------------------------------------------------------------