├── .gitignore ├── LICENSE.txt ├── README.md ├── environment.yml ├── setup.py └── zerospeech2021 ├── __init__.py ├── cli ├── __init__.py ├── evaluate.py ├── leaderboard.py ├── upload.py └── validate.py ├── exception.py ├── leaderboard.py ├── lexical.py ├── meta.py ├── phonetic.py ├── phonetic_eval ├── ABX_src │ ├── __init__.py │ ├── abx_group_computation.py │ ├── abx_iterators.py │ ├── dtw.c │ └── dtw.pyx ├── CPC_loader.py ├── LICENCE.txt ├── README.md ├── __init__.py └── eval_ABX.py ├── semantic.py ├── syntactic.py └── zr_upload_lib ├── __init__.py ├── api_fn.py ├── auth.py ├── model.py ├── split.py └── upload.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__/ 3 | /zerospeech2021.egg-info/ 4 | build/ 5 | dist/ 6 | .idea/ 7 | .DS_Store 8 | *.so 9 | zerospeech2021/libri_light_eval/ABX_src/dtw.c -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ZeroSpeech Challenge 2021 Python package 2 | 3 | 4 | This repository bundles all the scripts required to evaluate and validate a 5 | submission to the [ZeroSpeech Challenge 2021](https://zerospeech.com/2021). 6 | 7 | ## Installation 8 | 9 | * First clone this repository 10 | 11 | git clone https://github.com/bootphon/zerospeech2021.git 12 | cd zerospeech2021 13 | 14 | * Setup a conda environment: 15 | 16 | conda env create -f environment.yml 17 | 18 | * Activate the created environment: 19 | 20 | conda activate zerospeech2021 21 | 22 | * Install the package: 23 | 24 | python setup.py install 25 | 26 | ## Usage 27 | 28 | The `zerospeech2021` package provides 2 command-line tools: 29 | 30 | * `zerospeech2021-validate` which validates a submission, ensuring all the 31 | required files are here and correctly formatted. 32 | 33 | * `zerospeech2021-evaluate` which evaluates a submission (supposed valid). Only 34 | the development datasets are evaluated. The test datasets can only be 35 | evaluated by doing an official submission to the challenge. 36 | 37 | * `zerospeech2021-leaderboard` which allows generation of leaderboard entries from scores. 38 | 39 | * ![VERSION](https://img.shields.io/badge/-WIP-red) `zerospeech2021-upload` utility to allow upload submission to zerospeech.com. 40 | 41 | Each tool comes with a `--help` option describing the possible arguments (e.g. 42 | `zerospeech2021-validate --help`). 43 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: zerospeech2021 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - python=3 7 | - click 8 | - cudatoolkit=9.2 9 | - cython 10 | - joblib 11 | - numpy 12 | - pandas 13 | - pip 14 | - pytorch 15 | - pyyaml 16 | - scipy 17 | - torchaudio 18 | - tqdm 19 | - pip: 20 | - progressbar2 21 | - sox 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Setup script for the zerospeech2021 Python package""" 3 | 4 | import codecs 5 | import numpy 6 | import setuptools 7 | 8 | import zerospeech2021 9 | 10 | 11 | setuptools.setup( 12 | # general description 13 | name='zerospeech2021', 14 | description="Evaluation and validation tools for ZeroSpeech2021", 15 | version=zerospeech2021.__version__, 16 | 17 | # python package dependencies 18 | setup_requires=['cython', 'numpy'], 19 | 20 | # include Python code 21 | packages=setuptools.find_packages(), 22 | 23 | # build cython extension 24 | ext_modules=[setuptools.Extension( 25 | 'libri_light_dtw', 26 | sources=['zerospeech2021/phonetic_eval/ABX_src/dtw.pyx'], 27 | extra_compile_args=['-O3'], 28 | include_dirs=[numpy.get_include()])], 29 | 30 | # needed for cython/setuptools, see 31 | # http://docs.cython.org/en/latest/src/quickstart/build.html 32 | zip_safe=False, 33 | 34 | # the command-line scripts to export 35 | entry_points={ 36 | 'console_scripts': [ 37 | 'zerospeech2021-validate = zerospeech2021.cli.validate:validate', 38 | 'zerospeech2021-evaluate = zerospeech2021.cli.evaluate:evaluate', 39 | 'zerospeech2021-leaderboard = zerospeech2021.cli.leaderboard:leaderboard', 40 | 'zerospeech2021-upload = zerospeech2021.cli.upload:upload_cmd' 41 | ]}, 42 | 43 | # metadata 44 | author='CoML team', 45 | author_email='zerospeech2021@gmail.com', 46 | license='GPL3', 47 | url='https://zerospeech.com/2021', 48 | long_description=codecs.open('README.md', encoding='utf-8').read(), 49 | long_description_content_type="text/markdown", 50 | python_requires='>=3.7', 51 | ) 52 | -------------------------------------------------------------------------------- /zerospeech2021/__init__.py: -------------------------------------------------------------------------------- 1 | """Evaluation and validation tools for the ZeroSpeech Challenge 2021""" 2 | 3 | 4 | __version__ = '0.5' 5 | -------------------------------------------------------------------------------- /zerospeech2021/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zerospeech/zerospeech2021/199624adfba52901bab564b076fe7d4a63f47ddb/zerospeech2021/cli/__init__.py -------------------------------------------------------------------------------- /zerospeech2021/cli/evaluate.py: -------------------------------------------------------------------------------- 1 | """Evaluation program for ZR2021 submissions""" 2 | 3 | import atexit 4 | import os 5 | import pathlib 6 | import shutil 7 | import sys 8 | import tempfile 9 | import zipfile 10 | 11 | import click 12 | import pandas 13 | import yaml 14 | 15 | from zerospeech2021 import phonetic, lexical, syntactic, semantic 16 | 17 | 18 | def write_csv(frame, filename): 19 | frame.to_csv(filename, index=False, float_format='%.4f') 20 | print(f' > Wrote {filename}') 21 | 22 | 23 | def eval_lexical(dataset, submission, output, kinds): 24 | for kind in kinds: # 'dev' or 'test' 25 | print(f'Evaluating lexical {kind}...') 26 | 27 | gold_file = dataset / 'lexical' / kind / 'gold.csv' 28 | submission_file = submission / 'lexical' / f'{kind}.txt' 29 | 30 | by_pair, by_frequency, by_length = lexical.evaluate( 31 | gold_file, submission_file) 32 | 33 | write_csv( 34 | by_pair, output / f'score_lexical_{kind}_by_pair.csv') 35 | write_csv( 36 | by_frequency, output / f'score_lexical_{kind}_by_frequency.csv') 37 | write_csv( 38 | by_length, output / f'score_lexical_{kind}_by_length.csv') 39 | 40 | 41 | def eval_semantic(dataset, submission, output, kinds, njobs): 42 | # load metric and poling parameters from meta.yaml 43 | meta = yaml.safe_load((submission / 'meta.yaml').open('r').read()) 44 | metric = meta['parameters']['semantic']['metric'] 45 | pooling = meta['parameters']['semantic']['pooling'] 46 | 47 | for kind in kinds: # 'dev' or 'test' 48 | print(f'Evaluating semantic {kind} ' 49 | f'(metric={metric}, pooling={pooling})...') 50 | 51 | gold_file = dataset / 'semantic' / kind / 'gold.csv' 52 | pairs_file = dataset / 'semantic' / kind / 'pairs.csv' 53 | pairs, correlation = semantic.evaluate( 54 | gold_file, pairs_file, submission / 'semantic' / kind, 55 | metric, pooling, njobs=njobs) 56 | 57 | write_csv( 58 | pairs, output / f'score_semantic_{kind}_pairs.csv') 59 | write_csv( 60 | correlation, output / f'score_semantic_{kind}_correlation.csv') 61 | 62 | 63 | def eval_syntactic(dataset, submission, output, kinds): 64 | for kind in kinds: # 'dev' or 'test' 65 | print(f'Evaluating syntactic {kind}...') 66 | 67 | gold_file = dataset / 'syntactic' / kind / 'gold.csv' 68 | submission_file = submission / 'syntactic' / f'{kind}.txt' 69 | 70 | by_pair, by_type = syntactic.evaluate(gold_file, submission_file) 71 | 72 | write_csv( 73 | by_pair, output / f'score_syntactic_{kind}_by_pair.csv') 74 | write_csv( 75 | by_type, output / f'score_syntactic_{kind}_by_type.csv') 76 | 77 | 78 | def eval_phonetic(dataset, submission, output, kinds, force_cpu): 79 | meta = yaml.safe_load((submission / 'meta.yaml').open('r').read()) 80 | metric = meta['parameters']['phonetic']['metric'] 81 | frame_shift = meta['parameters']['phonetic']['frame_shift'] 82 | 83 | results = [] 84 | for kind in kinds: # 'dev' or 'test' 85 | results.append(phonetic.evaluate( 86 | submission / 'phonetic', dataset / 'phonetic', 87 | kind, metric, frame_shift, force_cpu=force_cpu)) 88 | 89 | write_csv(pandas.concat(results), output / 'score_phonetic.csv') 90 | 91 | 92 | @click.command(epilog='See https://zerospeech.com/2021 for more details') 93 | @click.argument('dataset', type=pathlib.Path) 94 | @click.argument('submission', type=pathlib.Path) 95 | @click.option( 96 | '-j', '--njobs', default=1, type=int, 97 | help='Parallel jobs to use for semantic part (default to 1)') 98 | @click.option( 99 | '--force-cpu', help='Do not use GPU for phonetic part', is_flag=True) 100 | @click.option( 101 | '-o', '--output-directory', type=pathlib.Path, 102 | default='.', show_default=True, 103 | help="Directory to store output results") 104 | @click.option('--no-phonetic', help="Skip phonetic part", is_flag=True) 105 | @click.option('--no-lexical', help="Skip lexical part", is_flag=True) 106 | @click.option('--no-syntactic', help="Skip syntactic part", is_flag=True) 107 | @click.option('--no-semantic', help="Skip semantic part", is_flag=True) 108 | def evaluate( 109 | dataset, submission, njobs, force_cpu, output_directory, 110 | no_phonetic, no_lexical, no_syntactic, no_semantic): 111 | """Evaluate a submission to the Zero Resource Speech Challenge 2021 112 | 113 | DATASET is the root directory of the ZR2021 dataset, as downloaded from 114 | https://zerospeech.com/2021. 115 | 116 | SUBMISSION is the submission to evaluate, it can be a .zip file or a 117 | directory. 118 | 119 | """ 120 | try: 121 | # regular participants can only evaluate dev datasets, test can only be 122 | # evaluated by doing an official submission to the challenge. The 123 | # ZEROSPEECH2021_TEST_GOLD environment variable is used by organizers 124 | # to provide test gold files to the evaluation program while keeping 125 | # the program as simple as possible to participants. 126 | kinds = ['dev'] 127 | if 'ZEROSPEECH2021_TEST_GOLD' in os.environ: 128 | kinds.append('test') 129 | dataset = pathlib.Path(os.environ['ZEROSPEECH2021_TEST_GOLD']) 130 | 131 | # ensures the dataset exists 132 | dataset = dataset.resolve(strict=True) 133 | if not dataset.is_dir(): 134 | raise ValueError(f'dataset not found: {dataset}') 135 | 136 | # ensures the submission exists, it it is a zip, uncompress it 137 | submission = submission.resolve(strict=True) 138 | if submission.is_file() and zipfile.is_zipfile(submission): 139 | # create a temp directory we remove at exit 140 | submission_unzip = tempfile.mkdtemp() 141 | atexit.register(shutil.rmtree, submission_unzip) 142 | 143 | # uncompress to the temp directory 144 | print(f'Unzip submission to {submission_unzip}...') 145 | zipfile.ZipFile(submission, 'r').extractall(submission_unzip) 146 | submission = pathlib.Path(submission_unzip) 147 | elif not submission.is_dir(): 148 | raise ValueError( 149 | f'submssion is not a zip file or a directory: {submission}') 150 | 151 | if not output_directory.is_dir(): 152 | output_directory.mkdir(exist_ok=True, parents=True) 153 | 154 | if not no_lexical: 155 | eval_lexical(dataset, submission, output_directory, kinds) 156 | 157 | if not no_semantic: 158 | eval_semantic(dataset, submission, output_directory, kinds, njobs) 159 | 160 | if not no_syntactic: 161 | eval_syntactic(dataset, submission, output_directory, kinds) 162 | 163 | if not no_phonetic: 164 | eval_phonetic( 165 | dataset, submission, output_directory, kinds, force_cpu) 166 | 167 | except ValueError as error: 168 | print(f'ERROR: {error}') 169 | sys.exit(-1) 170 | -------------------------------------------------------------------------------- /zerospeech2021/cli/leaderboard.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | from pathlib import Path 4 | 5 | import click 6 | 7 | from zerospeech2021.leaderboard import get_semantic_size, ZeroSpeechSubmission 8 | 9 | 10 | def create(submission_location: Path, dataset_location, score_location: Path, 11 | user_meta, leaderboard_file: Path): 12 | """ Function that builds a leaderboard entry from the computed scores of evaluation 13 | 14 | ARGS: 15 | submission_location: location to the submission entry files (as described in ...) 16 | dataset_location: location of the test set 17 | score_location: location of the scores computed by evaluation 18 | user_meta: file containing platform metadata (user, submission date etc.) 19 | leaderboard_file: location & name to write result file 20 | """ 21 | print("Building leaderboard entry from scores...") 22 | semantic_size = get_semantic_size(dataset_location) 23 | 24 | if not submission_location.is_dir(): 25 | print("SUBMISSION folder not found", file=sys.stderr) 26 | sys.exit(-1) 27 | 28 | if not dataset_location.is_dir(): 29 | print("DATASET folder not found", file=sys.stderr) 30 | sys.exit(-1) 31 | 32 | if not score_location.is_dir(): 33 | print("SCORE folder not found", file=sys.stderr) 34 | sys.exit(-1) 35 | 36 | if leaderboard_file.is_file(): 37 | print(f"WARNING: leaderboard specified already exists: [OVERWRITING] {leaderboard_file}", file=sys.stderr) 38 | 39 | subs = ZeroSpeechSubmission( 40 | submission_location=submission_location, external_meta_file=user_meta, 41 | _semantic_size=semantic_size, score_location=score_location, 42 | ) 43 | 44 | leaderboard_file = leaderboard_file.with_suffix(".json") 45 | with leaderboard_file.open('w') as fp: 46 | json.dump(subs.leaderboard(), fp, indent=4) 47 | print(f"\t> Wrote {leaderboard_file}") 48 | 49 | 50 | @click.command(epilog='See https://zerospeech.com/2021 for more details') 51 | @click.argument('submission', type=Path) 52 | @click.argument('dataset', type=Path) 53 | @click.argument('scores', type=Path) 54 | @click.option('-u', '--user-meta', type=Path, help="Location of platform metadata") 55 | @click.option('-o', '--output-file', type=Path, help="Location & name of the leaderboard file") 56 | def leaderboard(submission: Path, dataset: Path, scores: Path, user_meta, output_file): 57 | """ CLI wrapper to build leaderboard entry """ 58 | try: 59 | create(submission, dataset, scores, user_meta, output_file) 60 | except ValueError as error: 61 | print(f'ERROR: {error}') 62 | sys.exit(-1) 63 | -------------------------------------------------------------------------------- /zerospeech2021/cli/upload.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from getpass import getpass 3 | from pathlib import Path 4 | 5 | import click 6 | 7 | from rich.console import Console 8 | from rich.progress import Progress, BarColumn 9 | 10 | from zerospeech2021 import zr_upload_lib as zr_up 11 | 12 | # Fancy console 13 | console = Console() 14 | 15 | # The challenge to use for uploads 16 | # ID 6 => zerospeech2021 17 | CHALLENGE_ID: int = 6 18 | 19 | 20 | @click.group(epilog='See https://zerospeech.com/2021 for more details') 21 | @click.option('--debug', help="Print debug info", is_flag=True) 22 | @click.pass_context 23 | def upload_cmd(ctx, debug): 24 | ctx.debug = debug 25 | 26 | 27 | @upload_cmd.command() 28 | @click.option('-u', '--username', type=str) 29 | @click.option('-p', '--password', type=str) 30 | @click.option('--clear', is_flag=True) 31 | @click.pass_obj 32 | def login(debug, username, password, clear): 33 | # clear session 34 | if clear: 35 | zr_up.auth.clear_session() 36 | sys.exit(1) 37 | 38 | if not username: 39 | username = input('Username: ') 40 | 41 | if not password: 42 | password = getpass("Password: ") 43 | 44 | # login 45 | token = zr_up.auth.login(username, password) 46 | # save session 47 | zr_up.auth.create_session(token) 48 | console.print(f'Successfully logged in as {username}', style='green bold') 49 | 50 | 51 | @upload_cmd.command() 52 | @click.argument('archive_file', type=Path) 53 | @click.pass_obj 54 | def multipart(debug, archive_file): 55 | """ Upload an archive using multipart upload """ 56 | if archive_file.is_file() and archive_file.suffix != ".zip": 57 | console.print(f"ERROR: given file: {archive_file} was not found or is not a .zip file !!", 58 | style="red bold") 59 | sys.exit(1) 60 | 61 | # check if file is large enough for splitting 62 | will_split = archive_file.stat().st_size > zr_up.model.MULTIPART_THRESHOLD * 2 63 | 64 | checkpoint_file = archive_file.parent / f"{archive_file.stem}.checkpoint.json" 65 | zr_up.upload.ask_resume(checkpoint_file) 66 | token = zr_up.auth.get_session() 67 | 68 | with Progress( 69 | "[progress.description]{task.description}", BarColumn(), 70 | ) as progress: 71 | task = progress.add_task("[red]Uploading...", start=False, total=100) 72 | 73 | if will_split: 74 | zr_up.upload.multipart_upload(CHALLENGE_ID, archive_file, token, checkpoint_file) 75 | else: 76 | zr_up.upload.single_part_upload(CHALLENGE_ID, archive_file, token) 77 | 78 | progress.advance(task, advance=100) 79 | 80 | console.print(f"Successfully uploaded archive {archive_file} to zerospeech.com", style="green") 81 | 82 | 83 | @upload_cmd.command() 84 | @click.argument('archive_file', type=Path) 85 | @click.pass_obj 86 | def simple(debug, archive_file): 87 | """ Upload an archive using simple upload """ 88 | if archive_file.is_file() and archive_file.suffix != ".zip": 89 | console.print(f"ERROR: given file: {archive_file} was not found or is not a .zip file !!", 90 | style="red bold") 91 | sys.exit(1) 92 | 93 | token = zr_up.auth.get_session() 94 | with Progress( 95 | "[progress.description]{task.description}", BarColumn(), 96 | ) as progress: 97 | task = progress.add_task("[red]Uploading...", start=False, total=100) 98 | 99 | # upload 100 | zr_up.upload.single_part_upload(CHALLENGE_ID, archive_file, token) 101 | 102 | progress.advance(task, advance=100) 103 | 104 | console.print(f"Successfully uploaded archive {archive_file} to zerospeech.com", style="green") -------------------------------------------------------------------------------- /zerospeech2021/cli/validate.py: -------------------------------------------------------------------------------- 1 | """Validation program for ZR2021 submissions""" 2 | 3 | import atexit 4 | import pathlib 5 | import shutil 6 | import sys 7 | import tempfile 8 | import zipfile 9 | 10 | import click 11 | 12 | from zerospeech2021 import ( 13 | exception, meta, phonetic, lexical, syntactic, semantic) 14 | 15 | 16 | def _validate_directory(directory, expected): 17 | """Ensures the expected content is present in the directory""" 18 | expected = set(expected) 19 | observed = set( 20 | str(f.relative_to(directory)) 21 | for f in pathlib.Path(directory).glob('*')) 22 | 23 | if expected != observed: 24 | raise exception.MismatchError( 25 | f'mismatch in directory {directory}', expected, observed) 26 | 27 | 28 | def _validate_phonetic(submission, dataset, only_dev, njobs): 29 | print('Validating phonetic...') 30 | _validate_directory( 31 | submission / 'phonetic', 32 | ['dev-clean', 'dev-other'] if only_dev 33 | else ['dev-clean', 'dev-other', 'test-clean', 'test-other']) 34 | 35 | print(' > phonetic/dev') 36 | phonetic.validate( 37 | submission / 'phonetic', 38 | dataset / 'phonetic', 'dev', 39 | njobs=njobs) 40 | 41 | if not only_dev: 42 | print(' > phonetic/test') 43 | phonetic.validate( 44 | submission / 'phonetic', 45 | dataset / 'phonetic', 'test', 46 | njobs=njobs) 47 | 48 | 49 | def _validate_lexical(submission, dataset, only_dev): 50 | print('Validating lexical...') 51 | _validate_directory( 52 | submission / 'lexical', 53 | ['dev.txt'] if only_dev else ['dev.txt', 'test.txt']) 54 | 55 | print(' > lexical/dev') 56 | lexical.validate( 57 | submission / 'lexical' / 'dev.txt', 58 | dataset, 'dev') 59 | 60 | if not only_dev: 61 | print(' > lexical/test') 62 | lexical.validate( 63 | submission / 'lexical' / 'test.txt', 64 | dataset, 'test') 65 | 66 | 67 | def _validate_syntactic(submission, dataset, only_dev): 68 | print('Validating syntactic...') 69 | _validate_directory( 70 | submission / 'syntactic', 71 | ['dev.txt'] if only_dev else ['dev.txt', 'test.txt']) 72 | 73 | print(' > syntactic/dev') 74 | syntactic.validate( 75 | submission / 'syntactic' / 'dev.txt', 76 | dataset, 'dev') 77 | 78 | if not only_dev: 79 | print(' > syntactic/test') 80 | syntactic.validate( 81 | submission / 'syntactic' / 'test.txt', 82 | dataset, 'test') 83 | 84 | 85 | def _validate_semantic(submission, dataset, only_dev, njobs): 86 | print('Validating semantic...') 87 | semantic_content = ['dev'] if only_dev else ['dev', 'test'] 88 | _validate_directory(submission / 'semantic', semantic_content) 89 | 90 | for subdir in semantic_content: 91 | _validate_directory( 92 | submission / 'semantic' / subdir, 93 | ['librispeech', 'synthetic']) 94 | 95 | print(' > semantic/dev/synthetic') 96 | semantic.validate( 97 | submission / 'semantic', dataset, 'dev', 'synthetic', njobs=njobs) 98 | 99 | print(' > semantic/dev/librispeech') 100 | semantic.validate( 101 | submission / 'semantic', dataset, 'dev', 'librispeech', njobs=njobs) 102 | 103 | if not only_dev: 104 | print(' > semantic/test/synthetic') 105 | semantic.validate( 106 | submission / 'semantic', dataset, 'test', 'synthetic', njobs=njobs) 107 | 108 | print(' > semantic/test/librispeech') 109 | semantic.validate( 110 | submission / 'semantic', dataset, 'test', 'librispeech', njobs=njobs) 111 | 112 | 113 | @click.command(epilog='See https://zerospeech.com/2021 for more details') 114 | @click.argument('dataset', type=pathlib.Path) 115 | @click.argument('submission', type=pathlib.Path) 116 | @click.option( 117 | '-j', '--njobs', default=1, type=int, 118 | help='Number of parallel jobs (default to 1)') 119 | @click.option('--only-dev', help='Skip test part', is_flag=True) 120 | @click.option('--no-phonetic', help="Skip phonetic part", is_flag=True) 121 | @click.option('--no-lexical', help="Skip lexical part", is_flag=True) 122 | @click.option('--no-syntactic', help="Skip syntactic part", is_flag=True) 123 | @click.option('--no-semantic', help="Skip semantic part", is_flag=True) 124 | def validate( 125 | dataset, submission, njobs, only_dev, 126 | no_phonetic, no_lexical, no_syntactic, no_semantic): 127 | """Validate a submission to the Zero Resource Speech Challenge 2021 128 | 129 | DATASET is the root directory of the ZR2021 dataset, as downloaded with the 130 | zerospeech2021-download tool. 131 | 132 | SUBMISSION is the submission to validate, it can be a .zip file or a 133 | directory. 134 | 135 | """ 136 | try: 137 | # ensures the dataset exists 138 | dataset = dataset.resolve(strict=True) 139 | if not dataset.is_dir(): 140 | raise ValueError(f'dataset not found: {dataset}') 141 | 142 | # ensures the submission exists, it it is a zip, uncompress it 143 | submission = submission.resolve(strict=True) 144 | 145 | print('Prepare input...') 146 | print(f' > dataset: {dataset}') 147 | print(f' > submission: {submission}') 148 | 149 | if submission.is_file() and zipfile.is_zipfile(submission): 150 | # create a temp directory we remove at exit 151 | submission_unzip = tempfile.mkdtemp() 152 | atexit.register(shutil.rmtree, submission_unzip) 153 | 154 | # uncompress to the temp directory 155 | print(f' > unzip submission to {submission_unzip}...') 156 | zipfile.ZipFile(submission, 'r').extractall(submission_unzip) 157 | submission = pathlib.Path(submission_unzip) 158 | elif not submission.is_dir(): 159 | raise ValueError( 160 | f'submssion is not a zip file or a directory: {submission}') 161 | 162 | print('Validating root folder...') 163 | print(' > meta.yaml') 164 | is_open_source = meta.validate(submission) 165 | 166 | print(' > root folder') 167 | root_content = [ 168 | 'meta.yaml', 'phonetic', 'lexical', 'syntactic', 'semantic'] 169 | if is_open_source: 170 | root_content.append('code') 171 | _validate_directory(submission, root_content) 172 | 173 | if is_open_source: 174 | if not (submission / 'code').is_dir(): 175 | raise exception.ValidationError( 176 | 'submission specified as open source but ' 177 | 'code folder is missing') 178 | if not list((submission / 'code').iterdir()): 179 | raise exception.ValidationError( 180 | 'submission specified as open source but ' 181 | 'code folder is empty') 182 | print(' > code folder detected: submission will be manually ' 183 | 'inspected to ensure it is open source') 184 | 185 | if not no_phonetic: 186 | _validate_phonetic(submission, dataset, only_dev, njobs) 187 | 188 | if not no_lexical: 189 | _validate_lexical(submission, dataset, only_dev) 190 | 191 | if not no_syntactic: 192 | _validate_syntactic(submission, dataset, only_dev) 193 | 194 | if not no_semantic: 195 | _validate_semantic(submission, dataset, only_dev, njobs) 196 | 197 | except (exception.ValidationError, ValueError, FileNotFoundError) as error: 198 | print(f'ERROR: {error}') 199 | print('Validation failed, please fix it and try again!') 200 | sys.exit(-1) 201 | 202 | print('Success!') 203 | sys.exit(0) 204 | -------------------------------------------------------------------------------- /zerospeech2021/exception.py: -------------------------------------------------------------------------------- 1 | """Custom exceptions for ZR2021 validation steps""" 2 | 3 | 4 | def _print_sublist(entries, num=3): 5 | """Returns a string containing the `n` first elements of `entries`""" 6 | if len(entries) <= num: 7 | return '[' + ', '.join(str(e) for e in entries) + ']' 8 | 9 | return ( 10 | '[' + ', '.join(list(str(e) for e in entries)[:num]) + 11 | f', ...] and {len(entries) - num} more') 12 | 13 | 14 | class ValidationError(Exception): 15 | """Raised when detecting a validation error""" 16 | 17 | 18 | class FormatError(ValidationError): 19 | """Raised when detecting a bad format in submission file""" 20 | def __init__(self, line, message): 21 | super().__init__(message) 22 | self._line = line 23 | 24 | def __str__(self): 25 | return f'bad format (line {self._line}): ' + super().__str__() 26 | 27 | 28 | class FileFormatError(ValidationError): 29 | """Raised when detecting a bad format in submission file""" 30 | def __init__(self, file, message): 31 | super().__init__(message) 32 | self._file = file 33 | 34 | def __str__(self): 35 | return f'bad format (file {self._file}): ' + super().__str__() 36 | 37 | 38 | class MismatchError(ValidationError): 39 | """Raised when detecting a mismatch between two sets""" 40 | def __init__(self, message, expected, observed): 41 | super().__init__() 42 | self._message = message 43 | 44 | expected = set(expected) 45 | observed = set(observed) 46 | 47 | missing = expected - observed 48 | extra = observed - expected 49 | 50 | if missing or extra: 51 | self._message += ': ' 52 | if missing: 53 | self._message += f'missing {_print_sublist(missing)}' 54 | if missing and extra: 55 | self._message += ', ' 56 | if extra: 57 | self._message += f'extra {_print_sublist(extra)}' 58 | 59 | def __str__(self): 60 | return self._message 61 | 62 | 63 | class EntryMissingError(ValidationError): 64 | """Raised when an entry is missing from the result set """ 65 | 66 | def __init__(self, expected, source): 67 | super().__init__() 68 | self._message = f"Input file ({source} does not have a matching feature ({expected})!!!" 69 | 70 | def __str__(self): 71 | return self._message 72 | -------------------------------------------------------------------------------- /zerospeech2021/leaderboard.py: -------------------------------------------------------------------------------- 1 | import json 2 | from dataclasses import dataclass 3 | from datetime import datetime 4 | from pathlib import Path 5 | from typing import Dict, Optional 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import yaml 10 | 11 | 12 | class LexicalScores: 13 | """ Class that extracts lexical scores resume from a scores directory """ 14 | # score files 15 | __dev_pairs = 'score_lexical_dev_by_pair.csv' 16 | __test_pairs = 'score_lexical_test_by_pair.csv' 17 | __dev_frequency = 'score_lexical_dev_by_frequency.csv' 18 | __test_frequency = 'score_lexical_test_by_frequency.csv' 19 | __dev_length = 'score_lexical_dev_by_length.csv' 20 | __test_length = 'score_lexical_test_by_length.csv' 21 | 22 | def is_valid(self, location: Path): 23 | """ Verify that all files are present """ 24 | 25 | if not (location / self.__dev_length).is_file(): 26 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_length score file!") 27 | if not (location / self.__test_length).is_file(): 28 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_test_by_length score file!") 29 | if not (location / self.__dev_frequency).is_file(): 30 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_frequency score file!") 31 | if not (location / self.__test_frequency).is_file(): 32 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_frequency score file!") 33 | if not (location / self.__dev_pairs).is_file(): 34 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_pairs score file!") 35 | if not (location / self.__test_pairs).is_file(): 36 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_test_by_pairs score file!") 37 | 38 | def __init__(self, location: Path): 39 | """ Initialise lexical score object """ 40 | self.is_valid(location) 41 | self.location = location 42 | 43 | @staticmethod 44 | def _score_invocab(frame): 45 | """Weighted mean of scores by frequency, excluding OOVs""" 46 | # filter out OOVs 47 | frame = frame[frame['frequency'] != 'oov'] 48 | 49 | # weighted mean 50 | return np.average( 51 | frame['score'].to_numpy(), 52 | weights=frame['n'].to_numpy()) 53 | 54 | def general(self): 55 | """ Extract general lexical score """ 56 | dev_score = pd.read_csv(self.location / self.__dev_pairs)['score'].mean() 57 | test_score = pd.read_csv(self.location / self.__test_pairs)['score'].mean() 58 | # weighted scores 59 | dev_score_invocab = self._score_invocab( 60 | pd.read_csv(self.location / self.__dev_frequency) 61 | ) 62 | 63 | test_score_invocab = self._score_invocab( 64 | pd.read_csv(self.location / self.__test_frequency) 65 | ) 66 | 67 | return { 68 | 'lexical_all': [dev_score, test_score], 69 | 'lexical_invocab': [dev_score_invocab, test_score_invocab] 70 | } 71 | 72 | def detailed(self): 73 | """ Extract detailed lexical score """ 74 | frequency_dev = pd.read_csv(self.location / self.__dev_frequency) 75 | frequency_test = pd.read_csv(self.location / self.__test_frequency) 76 | 77 | by_frequency = pd.merge(frequency_dev, frequency_test, 78 | how="outer", on=['frequency'], suffixes=("_dev", "_test")) 79 | 80 | length_dev = pd.read_csv(self.location / self.__dev_length) 81 | length_test = pd.read_csv(self.location / self.__test_length) 82 | 83 | by_length = pd.merge(length_dev, length_test, how="outer", on=['length'], suffixes=['_dev', '_test']) 84 | 85 | return { 86 | "by_length": by_length.to_dict(orient='records'), 87 | "by_frequency": by_frequency.to_dict(orient='records') 88 | } 89 | 90 | 91 | class SemanticScore: 92 | """ Class that extracts lexical scores resume from a scores directory """ 93 | # score files 94 | __dev_correlation = 'score_semantic_dev_correlation.csv' 95 | __test_correlation = 'score_semantic_test_correlation.csv' 96 | 97 | def is_valid(self, location: Path): 98 | """ Verify that all files are present """ 99 | 100 | if not (location / self.__dev_correlation): 101 | raise FileNotFoundError(f"Score folder {location}, is missing semantic_dev_correlation score file!") 102 | if not (location / self.__test_correlation): 103 | raise FileNotFoundError(f"Score folder {location}, is missing semantic_test_correlation score file!") 104 | 105 | def __init__(self, location: Path, size: Dict): 106 | """ Initialise semantic score object """ 107 | self.is_valid(location) 108 | self.location = location 109 | self.size = size 110 | 111 | def general(self): 112 | """ Extract general semantic score """ 113 | dev_correlations = pd.read_csv(self.location / self.__dev_correlation) 114 | dev_librispeech_mean = dev_correlations[dev_correlations['type'] == 'librispeech']['correlation'].mean() 115 | dev_synthetic_mean = dev_correlations[dev_correlations['type'] == 'synthetic']['correlation'].mean() 116 | 117 | dev_correlations['size'] = self.size['dev']['size'] 118 | dev_librispeech_wmean = np.average( 119 | dev_correlations[dev_correlations['type'] == 'librispeech']['correlation'].to_numpy(), 120 | weights=dev_correlations[dev_correlations['type'] == 'librispeech']['size'].to_numpy()) 121 | dev_synthetic_wmean = np.average( 122 | dev_correlations[dev_correlations['type'] == 'synthetic']['correlation'].to_numpy(), 123 | weights=dev_correlations[dev_correlations['type'] == 'synthetic']['size'].to_numpy()) 124 | 125 | test_correlations = pd.read_csv(self.location / self.__test_correlation) 126 | test_librispeech_mean = test_correlations[test_correlations['type'] == 'librispeech']['correlation'].mean() 127 | test_synthetic_mean = test_correlations[test_correlations['type'] == 'synthetic']['correlation'].mean() 128 | 129 | test_correlations['size'] = self.size['test']['size'] 130 | test_librispeech_wmean = np.average( 131 | test_correlations[test_correlations['type'] == 'librispeech']['correlation'].to_numpy(), 132 | weights=test_correlations[test_correlations['type'] == 'librispeech']['size'].to_numpy()) 133 | test_synthetic_wmean = np.average( 134 | test_correlations[test_correlations['type'] == 'synthetic']['correlation'].to_numpy(), 135 | weights=test_correlations[test_correlations['type'] == 'synthetic']['size'].to_numpy()) 136 | 137 | return { 138 | "semantic_synthetic": [ 139 | dev_synthetic_mean, test_synthetic_mean], 140 | "semantic_librispeech": [ 141 | dev_librispeech_mean, test_librispeech_mean], 142 | "weighted_semantic_synthetic": [ 143 | dev_synthetic_wmean, test_synthetic_wmean], 144 | "weighted_semantic_librispeech": [ 145 | dev_librispeech_wmean, test_librispeech_wmean] 146 | } 147 | 148 | def detailed(self): 149 | """ Extract detailed semantic score """ 150 | dev_correlations = pd.read_csv(self.location / self.__dev_correlation) 151 | test_correlations = pd.read_csv(self.location / self.__test_correlation) 152 | 153 | ndev_correlations = dev_correlations \ 154 | .set_index(['dataset', dev_correlations.groupby('dataset').cumcount()])['correlation'] \ 155 | .unstack() \ 156 | .reset_index() 157 | ndev_correlations.columns = ['dataset', 'librispeech', 'synthetic'] 158 | ndev_correlations["set"] = "dev" 159 | 160 | ntest_correlations = test_correlations \ 161 | .set_index(['dataset', test_correlations.groupby('dataset').cumcount()])['correlation'] \ 162 | .unstack() \ 163 | .reset_index() 164 | ntest_correlations.columns = ['dataset', 'librispeech', 'synthetic'] 165 | ntest_correlations["set"] = "test" 166 | 167 | # DeprecationWarning from pandas: append is to be replaced by concat 168 | correlations = pd.concat([ndev_correlations, ntest_correlations], axis=0) 169 | # correlations = ndev_correlations.append(ntest_correlations) 170 | 171 | return correlations.to_dict(orient='records') 172 | 173 | 174 | class SyntacticScores: 175 | """ Class that extracts syntactic scores resume from a scores directory """ 176 | # score files 177 | __dev_pairs = 'score_syntactic_dev_by_pair.csv' 178 | __test_pairs = 'score_syntactic_test_by_pair.csv' 179 | __dev_types = 'score_syntactic_dev_by_type.csv' 180 | __test_types = 'score_syntactic_test_by_type.csv' 181 | 182 | def is_valid(self, location: Path): 183 | """ Verify that all files are present """ 184 | 185 | if not (location / self.__dev_pairs): 186 | raise FileNotFoundError(f"Score folder {location}, is missing syntactic_dev_by_pair score file!") 187 | if not (location / self.__test_pairs): 188 | raise FileNotFoundError(f"Score folder {location}, is missing syntactic_test_by_pair score file!") 189 | if not (location / self.__dev_types): 190 | raise FileNotFoundError(f"Score folder {location}, is missing syntactic_dev_by_type score file!") 191 | if not (location / self.__test_types): 192 | raise FileNotFoundError(f"Score folder {location}, is missing syntactic_test_by_type score file!") 193 | 194 | def __init__(self, location: Path): 195 | """ Initialise syntactic score object """ 196 | self.is_valid(location) 197 | self.location = location 198 | 199 | def general(self): 200 | """ Extract general semantic score """ 201 | dev_mean = pd.read_csv(self.location / self.__dev_pairs)['score'].mean() 202 | test_mean = pd.read_csv(self.location / self.__test_pairs)['score'].mean() 203 | return [dev_mean, test_mean] 204 | 205 | def detailed(self): 206 | """ Extract detailed semantic score """ 207 | dev_types = pd.read_csv(self.location / self.__dev_types) 208 | test_types = pd.read_csv(self.location / self.__test_types) 209 | 210 | merged = pd.merge(dev_types, test_types, how="outer", on=["type"], suffixes=("_dev", "_test")) 211 | 212 | return merged.to_dict(orient='records') 213 | 214 | 215 | class PhoneticScores: 216 | """ Class that extracts syntactic scores resume from a scores directory """ 217 | # score files 218 | __scores = 'score_phonetic.csv' 219 | 220 | def is_valid(self, location: Path): 221 | """ Verify that all files are present """ 222 | 223 | if not (location / self.__scores): 224 | raise FileNotFoundError(f"Score folder {location}, is missing phonetic score file!") 225 | 226 | def __init__(self, location: Path): 227 | """ Initialise phonetic score object """ 228 | self.is_valid(location) 229 | self.location = location 230 | 231 | def general(self): 232 | """ Extract general semantic score """ 233 | 234 | def e(d): 235 | return {s['type']: s['score'] for s in d} 236 | 237 | frame = pd.read_csv(self.location / self.__scores) 238 | dev_clean = frame[(frame["dataset"] == 'dev') & (frame["sub-dataset"] == 'clean')][['type', 'score']] \ 239 | .to_dict(orient='records') 240 | dev_other = frame[(frame["dataset"] == 'dev') & (frame["sub-dataset"] == 'other')][['type', 'score']] \ 241 | .to_dict(orient='records') 242 | test_clean = frame[(frame["dataset"] == 'test') & (frame["sub-dataset"] == 'clean')][['type', 'score']] \ 243 | .to_dict(orient='records') 244 | test_other = frame[(frame["dataset"] == 'test') & (frame["sub-dataset"] == 'other')][['type', 'score']] \ 245 | .to_dict(orient='records') 246 | 247 | return { 248 | "phonetic_clean_within": [e(dev_clean)['within'], e(test_clean)['within']], 249 | "phonetic_clean_across": [e(dev_clean)['across'], e(test_clean)['across']], 250 | "phonetic_other_within": [e(dev_other)['within'], e(test_other)['within']], 251 | "phonetic_other_across": [e(dev_other)['across'], e(test_other)['across']] 252 | } 253 | 254 | @staticmethod 255 | def detailed(): 256 | """ Extract detailed semantic score """ 257 | # phonetic task has no detailed view of scores 258 | return {} 259 | 260 | 261 | @dataclass 262 | class Metadata: 263 | author: str 264 | affiliation: str 265 | description: str 266 | open_source: bool 267 | train_set: str 268 | gpu_budget: float 269 | parameters: Dict 270 | visually_grounded: bool = False 271 | submission_id: Optional[str] = None 272 | submission_date: Optional[datetime] = None 273 | submitted_by: Optional[str] = None 274 | 275 | @staticmethod 276 | def parse_external_meta(filepath: Path) -> Dict: 277 | if filepath is None or not filepath.is_file(): 278 | return {} 279 | elif filepath.suffix == '.json': 280 | with filepath.open() as fp: 281 | return json.load(fp) 282 | else: 283 | # old txt based file 284 | submitted_at = None 285 | with filepath.open() as fp: 286 | for line in fp.readlines(): 287 | line = line.rstrip() 288 | if line.startswith('submitted-at:'): 289 | submitted_at = line.replace('submitted-at:', '').replace(' ', '') 290 | return {"submitted-at": submitted_at} 291 | 292 | @staticmethod 293 | def filter_external_meta(data: Dict): 294 | try: 295 | sub_data = datetime.fromisoformat(data.get("submitted-at", None)) 296 | except (ValueError, TypeError): 297 | sub_data = None 298 | 299 | return { 300 | "submission_date": sub_data, 301 | "submitted_by": data.get("user", None), 302 | "submission_id": data.get("submission_id", None) 303 | } 304 | 305 | @classmethod 306 | def create_from(cls, filepath: Path, external_meta_file: Path): 307 | with (filepath / 'meta.yaml').open() as fp: 308 | meta = yaml.load(fp, Loader=yaml.SafeLoader) 309 | 310 | # parse & filter items of platform metadata 311 | external_meta = cls.filter_external_meta(cls.parse_external_meta(external_meta_file)) 312 | 313 | return cls(**meta, **external_meta) 314 | 315 | def to_dict(self): 316 | if self.submission_date: 317 | sub_date = self.submission_date.isoformat() 318 | else: 319 | sub_date = datetime.now().isoformat() 320 | 321 | return { 322 | "submitted_at": sub_date, 323 | "author": self.author, 324 | "affiliation": self.affiliation, 325 | "submitted_by": self.submitted_by, 326 | "submission_id": self.submission_id, 327 | "description": self.description, 328 | "visually_grounded": self.visually_grounded, 329 | "open_source": self.open_source, 330 | "train_set": self.train_set, 331 | "gpu_budget": self.gpu_budget, 332 | "parameters": self.parameters 333 | } 334 | 335 | 336 | class ZeroSpeechSubmission: 337 | 338 | def __init__(self, submission_location: Path, _semantic_size: Dict, 339 | score_location: Path, external_meta_file: Path): 340 | 341 | # fetch metadata 342 | self.description = Metadata.create_from(submission_location, external_meta_file) 343 | 344 | # create scores 345 | self.lexical = LexicalScores(score_location) 346 | self.semantic = SemanticScore(score_location, _semantic_size) 347 | self.syntactic = SyntacticScores(score_location) 348 | self.phonetic = PhoneticScores(score_location) 349 | 350 | def leaderboard(self): 351 | """ Build leaderboard object """ 352 | ph = self.phonetic.general() 353 | le = self.lexical.general() 354 | se = self.semantic.general() 355 | sy = self.syntactic.general() 356 | more = { 357 | "description": self.description.to_dict(), 358 | "lexical": self.lexical.detailed(), 359 | "syntactic": self.syntactic.detailed(), 360 | "semantic": self.semantic.detailed(), 361 | } 362 | return { 363 | "author_label": self.description.author, 364 | "set": ['dev', 'test'], 365 | **le, 366 | "syntactic": sy, 367 | **ph, 368 | **se, 369 | "more": more 370 | } 371 | 372 | 373 | def get_semantic_size(dataset: Path): 374 | test_size = pd.read_csv(dataset / 'semantic/test/pairs.csv', header=0) \ 375 | .groupby(['type', 'dataset'], as_index=False).size() 376 | dev_size = pd.read_csv(dataset / 'semantic/dev/pairs.csv', header=0) \ 377 | .groupby(['type', 'dataset'], as_index=False).size() 378 | return {'dev': dev_size, 'test': test_size} 379 | -------------------------------------------------------------------------------- /zerospeech2021/lexical.py: -------------------------------------------------------------------------------- 1 | """Lexical part of the ZR2021 (validation and evaluation)""" 2 | 3 | import collections 4 | import pathlib 5 | import sys 6 | 7 | import pandas 8 | from zerospeech2021.exception import FormatError, MismatchError 9 | 10 | 11 | def _validate_line(index, line): 12 | """Auxiliary function to validate() 13 | 14 | Returns the filename in `line`, checks the score and raises FormatError if 15 | the line is not valid. 16 | 17 | """ 18 | # ensure the line has two fields separated by a space 19 | line = line.strip() 20 | fields = line.split(' ') 21 | if len(fields) != 2: 22 | raise FormatError( 23 | index, f'must be " " but is "{line}"') 24 | 25 | filename, score = tuple(fields) 26 | 27 | # ensure the second field is a positive float 28 | try: 29 | float(score) 30 | except ValueError: 31 | raise FormatError( 32 | index, f' must be a float but is "{score}"') 33 | 34 | return filename 35 | 36 | 37 | def validate(submission, dataset, kind): 38 | """Raises a ValidationError if the `submission` file is not valid 39 | 40 | * The submission file must be in text format, each line as: 41 | 42 | 43 | * The is the name of a wav file in the lexical dataset, without 44 | path nor extension ("xKtnLJYiWGt", not "lexical/dev/xKtnLJYiWGt.wav") 45 | 46 | * The is a positive float 47 | 48 | Parameters 49 | ---------- 50 | submisison: path 51 | The submisison file to validate, each line must be formatted as 52 | " ". 53 | dataset: path 54 | The root path of the ZR2021 dataset 55 | kind: str, optional 56 | Must be 'dev' or 'test' 57 | 58 | Raises 59 | ------ 60 | ValueError 61 | If `kind` is not 'dev' or 'test', if `submisison` is not a file or if 62 | the dataset is not an existing directory. 63 | ValidationError 64 | If one line of the submisison file is not valid or if the submitted 65 | filenames does not fit the required ones. 66 | 67 | """ 68 | if kind not in ('dev', 'test'): 69 | raise ValueError( 70 | f'kind must be "dev" or "test", it is {kind}') 71 | 72 | if not pathlib.Path(submission).is_file(): 73 | raise ValueError( 74 | f'{kind} submission file not found: {submission}') 75 | 76 | # retrieve the required filenames that must be present in the submission 77 | dataset = pathlib.Path(dataset) / f'lexical/{kind}' 78 | if not dataset.is_dir(): 79 | raise ValueError(f'dataset not found: {dataset}') 80 | required_files = set(w.stem for w in dataset.glob('*.wav')) 81 | 82 | # ensure each line in the submission is valid and retrieve the filenames 83 | submitted_files = list( 84 | _validate_line(index + 1, line) 85 | for index, line in enumerate(open(submission, 'r'))) 86 | 87 | # ensures the is no duplicate in the filenames 88 | duplicates = [ 89 | f for f, n in collections.Counter(submitted_files).items() if n > 1] 90 | if duplicates: 91 | raise MismatchError('duplicates found', [], duplicates) 92 | 93 | # ensure all the required files are here and there is no extra filename 94 | if required_files != set(submitted_files): 95 | raise MismatchError( 96 | 'mismatch in filenames', required_files, submitted_files) 97 | 98 | 99 | def load_data(gold_file, submission_file): 100 | """Returns the data required for evaluation as a pandas data frame 101 | 102 | Each line of the returned data frame contains a pair (word, non word) and 103 | has the following columns: 'id', 'voice', 'frequency', 'word', 'score 104 | word', 'non word', 'score non word'. 105 | 106 | Parameters 107 | ---------- 108 | gold_file : path 109 | The gold file for the lexical dataset (test or dev). 110 | submission_file : path 111 | The submission corresponding to the provided gold file. 112 | 113 | Returns 114 | ------- 115 | data : pandas.DataFrame 116 | The data ready for evaluation 117 | 118 | Raise 119 | ----- 120 | ValueError 121 | If the input files cannot be opened or in case of data mismatch between 122 | the two files. 123 | 124 | """ 125 | # ensures the two input files are here 126 | for input_file in (gold_file, submission_file): 127 | if not pathlib.Path(input_file).is_file(): 128 | raise ValueError(f'file not found: {input_file}') 129 | 130 | # load them as data frames indexed by filenames 131 | gold = pandas.read_csv( 132 | gold_file, header=0, index_col='filename').astype( 133 | {'frequency': pandas.Int64Dtype()}) 134 | score = pandas.read_csv( 135 | submission_file, sep=' ', header=None, 136 | names=['filename', 'score'], index_col='filename') 137 | 138 | # ensures the filenames in gold and submission are the same 139 | if set(gold.index) != set(score.index): 140 | has_less_files = set(gold.index) - set(score.index) 141 | has_more_files = set(score.index) - set(gold.index) 142 | print("MismatchError:", file=sys.stderr) 143 | if len(has_more_files) > 0: 144 | print('submission has extra files', file=sys.stderr) 145 | print(f'extra files: {has_more_files}', file=sys.stderr) 146 | 147 | if len(has_less_files) > 0: 148 | print('submission is missing files', file=sys.stderr) 149 | print(f'missing files: {has_less_files}:', file=sys.stderr) 150 | sys.exit(1) 151 | 152 | # merge the gold and score using filenames, then remove the columns 153 | # 'phones' and 'filename' as we don't use them for evaluation 154 | data = pandas.merge(gold, score, on='filename', how='inner') 155 | data.reset_index(inplace=True) 156 | # if all non words have their textual version set to NaN, we take their phonemic version instead. 157 | if data[data.correct == 0]['word'].isnull().sum() == len(data[data.correct==0]): 158 | data['word'] = data['phones'] 159 | data.drop(columns=['phones', 'filename'], inplace=True) 160 | 161 | # going from a word per line to a pair (word, non word) per line 162 | words = data.loc[data['correct'] == 1].reset_index().rename(lambda x: 'w_' + x, axis=1) 163 | non_words = data.loc[data['correct'] == 0].reset_index().rename(lambda x: 'nw_' + x, axis=1) 164 | data = pandas.merge(words, non_words, left_on=['w_voice', 'w_id'], right_on=['nw_voice', 'nw_id']) 165 | 166 | data.drop( 167 | ['w_index', 'nw_index', 'nw_voice', 'nw_frequency', 168 | 'w_correct', 'nw_correct', 'nw_id', 'nw_length'], 169 | axis=1, inplace=True) 170 | data.rename( 171 | {'w_id': 'id', 'w_voice': 'voice', 'w_frequency': 'frequency', 172 | 'w_word': 'word', 'nw_word': 'non word', 'w_length': 'length', 173 | 'w_score': 'score word', 'nw_score': 'score non word'}, 174 | axis=1, inplace=True) 175 | 176 | return data 177 | 178 | 179 | def evaluate_by_pair(data): 180 | """Returns a data frame with the computed scores by (word, non word) pair 181 | 182 | Parameters 183 | ---------- 184 | data : pandas.DataFrame 185 | The result of `load_data` 186 | 187 | Returns 188 | ------- 189 | by_pair : pandas.DataFrame 190 | The evaluated (word, non word) pairs, the data frame has the columns: 191 | 'word', 'non word' 'frequency', 'length' and 'score'. 192 | 193 | """ 194 | # compute the score for each pair in an additional 'score' column, then 195 | # delete the 'score word' and 'score non word' columns that become useless 196 | score = data.loc[:, ['score word', 'score non word']].to_numpy() 197 | data['score'] = ( 198 | 0.5 * (score[:, 0] == score[:, 1]) 199 | + (score[:, 0] > score[:, 1])) 200 | data.drop(columns=['score word', 'score non word'], inplace=True) 201 | 202 | # finally get the mean score across voices for all pairs 203 | score = data.groupby('id').apply(lambda x: ( 204 | x.iat[0, 3], # word 205 | x.iat[0, 5], # non word 206 | x.iat[0, 2], # frequency 207 | x.iat[0, 4], # length 208 | x['score'].mean())) 209 | return pandas.DataFrame( 210 | score.to_list(), 211 | columns=['word', 'non word', 'frequency', 'length', 'score']) 212 | 213 | 214 | def evaluate_by_frequency(by_pair): 215 | """Returns a data frame with mean scores by frequency bands 216 | 217 | The frequency is defined as the number of occurences of the word in the 218 | LibriSpeech dataset. The following frequency bands are considered : oov, 219 | 1-5, 6-20, 21-100 and >100. 220 | 221 | Parameters 222 | ---------- 223 | by_pair: pandas.DataFrame 224 | The output of `evaluate_by_pair` 225 | 226 | Returns 227 | ------- 228 | by_frequency : pandas.DataFrame 229 | The score collapsed on frequency bands, the data frame has the 230 | following columns: 'frequency', 'score'. 231 | 232 | """ 233 | bands = pandas.cut( 234 | by_pair.frequency, 235 | [0, 1, 5, 20, 100, float('inf')], 236 | labels=['oov', '1-5', '6-20', '21-100', '>100'], 237 | right=False) 238 | 239 | return by_pair.score.groupby(bands).agg( 240 | n='count', score='mean', std='std').reset_index() 241 | 242 | 243 | def evaluate_by_length(by_pair): 244 | """Returns a data frame with mean scores by word length 245 | 246 | Parameters 247 | ---------- 248 | by_pair: pandas.DataFrame 249 | The output of `evaluate_by_pair` 250 | 251 | Returns 252 | ------- 253 | by_length : pandas.DataFrame 254 | The score collapsed on word length, the data frame has the 255 | following columns: 'length', 'score'. 256 | 257 | """ 258 | return by_pair.score.groupby(by_pair.length).agg( 259 | n='count', score='mean', std='std').reset_index() 260 | 261 | 262 | def evaluate(gold_file, submission_file): 263 | """Returns the score by (word, non word) pair, by frequency and by length 264 | 265 | Parameters 266 | ---------- 267 | gold_file : path 268 | The gold file (csv format) for the lexical dataset (test or dev). 269 | submission_file : path 270 | The submission corresponding to the provided gold file. 271 | 272 | Returns 273 | ------- 274 | by_pair : pandas.DataFrame 275 | The evaluated (word, non word) pairs, the data frame has the columns: 276 | 'word', 'non word' and 'score'. 277 | by_frequency : pandas.DataFrame 278 | The score collapsed on frequency bands, the data frame has the 279 | following columns: 'frequency', 'score'. 280 | by_length : pandas.DataFrame 281 | The score collapsed on word length (in number of phones), the data 282 | frame has the following columns: 'length', 'score'. 283 | 284 | Raise 285 | ----- 286 | ValueError 287 | If the input files cannot be opened or in case of data mismatch between 288 | the two files. 289 | 290 | """ 291 | data = load_data(gold_file, submission_file) 292 | 293 | by_pair = evaluate_by_pair(data) 294 | by_frequency = evaluate_by_frequency(by_pair) 295 | by_length = evaluate_by_length(by_pair) 296 | by_pair.drop(['frequency', 'length'], axis=1, inplace=True) 297 | 298 | return by_pair, by_frequency, by_length 299 | -------------------------------------------------------------------------------- /zerospeech2021/meta.py: -------------------------------------------------------------------------------- 1 | """Validation of meta.yaml""" 2 | 3 | import numbers 4 | import numpy as np 5 | import scipy.spatial 6 | import yaml 7 | 8 | from zerospeech2021.exception import ValidationError, MismatchError 9 | 10 | 11 | def _validate_entries(meta, entries, prefix=None): 12 | if sorted(meta.keys()) != sorted(entries.keys()): 13 | message = 'invalid entries' 14 | if prefix: 15 | message += f' in {prefix}' 16 | raise MismatchError(message, entries.keys(), meta.keys()) 17 | 18 | for key, value in entries.items(): 19 | _validate_entry(meta, key, value[0], values=value[1], prefix=prefix) 20 | 21 | 22 | def _validate_entry(meta, name, expected_type, values=None, prefix=None): 23 | prefix = prefix + '/' if prefix else '' 24 | 25 | if name not in meta: 26 | raise ValidationError(f'{prefix}{name} section missing') 27 | 28 | value = meta[name] 29 | if not isinstance(value, expected_type): 30 | raise ValidationError( 31 | f'{prefix}{name} must be a {expected_type}, it is {type(value)}') 32 | 33 | if values and value not in values: 34 | raise ValidationError( 35 | f'{prefix}{name} must be in ({", ".join(values)}) but is {value}') 36 | 37 | if expected_type == str and not value: 38 | raise ValidationError(f'{prefix}{name} must not be an empty string') 39 | 40 | 41 | def _validate_scipy_metric(metric): 42 | """"Raises a ValidationError if `metric` is not a valid metric in scipy""" 43 | try: 44 | scipy.spatial.distance.cdist( 45 | np.ones((5, 2)), np.ones((5, 2)), metric) 46 | except: 47 | raise ValidationError(f'invalid metric for semantic: {metric}') 48 | 49 | 50 | def validate(submission): 51 | """Validation of the meta.yaml in submission 52 | 53 | Testing that submission/meta.yaml is a valid yaml file and corresponds to 54 | the following format: 55 | 56 | author: 57 | affiliation: 58 | description: 59 | open_source: 60 | train_set: 61 | visually_grounded: 62 | gpu_budget: 63 | parameters: 64 | phonetic: 65 | metric: , "cosine", "euclidean", "kl" or "kl_symmetric" 66 | frame_shift: 67 | semantic: 68 | metric: 69 | pooling: , "min", "max", "mean", "sum", "last", lastlast" or 70 | "off" 71 | 72 | Raises 73 | ------ 74 | exception.ValidationError 75 | For any item not corresponding to prototype. 76 | 77 | """ 78 | meta_file = submission / 'meta.yaml' 79 | 80 | if not meta_file.is_file(): 81 | raise ValidationError("missing meta.yaml file") 82 | 83 | try: 84 | meta = yaml.safe_load(meta_file.open('r').read().replace('\t', ' ')) 85 | except yaml.YAMLError as err: 86 | raise ValidationError(f'failed to parse {meta_file}: {err}') 87 | 88 | if not meta or not isinstance(meta, dict): 89 | raise ValidationError("meta.yaml file is not valid") 90 | 91 | # top level entries 92 | _validate_entries( 93 | meta, 94 | {'author': (str, None), 95 | 'affiliation': (str, None), 96 | 'description': (str, None), 97 | 'open_source': (bool, None), 98 | 'train_set': (str, None), 99 | 'visually_grounded': (bool, None), 100 | 'gpu_budget': (numbers.Number, None), 101 | 'parameters': (dict, None)}) 102 | 103 | # parameters entries 104 | _validate_entries( 105 | meta['parameters'], 106 | {'phonetic': (dict, None), 'semantic': (dict, None)}, 107 | prefix='parameters') 108 | 109 | # parameters/phonetic level 110 | _validate_entries( 111 | meta['parameters']['phonetic'], 112 | {'metric': (str, ['cosine', 'euclidean', 'kl', 'kl_symmetric']), 113 | 'frame_shift': (numbers.Number, None)}, 114 | prefix='parameters/phonetic') 115 | 116 | # parameters/semantic level 117 | _validate_entries( 118 | meta['parameters']['semantic'], 119 | {'metric': (str, None), 120 | 'pooling': (str, [ 121 | 'min', 'max', 'mean', 'sum', 'last', 'lastlast', 'off'])}, 122 | prefix='parameters/semantic') 123 | 124 | _validate_scipy_metric(meta['parameters']['semantic']['metric']) 125 | 126 | return meta['open_source'] 127 | -------------------------------------------------------------------------------- /zerospeech2021/phonetic.py: -------------------------------------------------------------------------------- 1 | """ Phonetic task zerospeech 2021 """ 2 | import collections 3 | from dataclasses import dataclass 4 | from itertools import chain 5 | from typing import Optional 6 | from enum import Enum 7 | 8 | import numpy as np 9 | import pandas 10 | import joblib 11 | 12 | from zerospeech2021 import exception 13 | from zerospeech2021.phonetic_eval import eval_ABX 14 | 15 | LIBRISPEECH_SETS = { 16 | 'dev': ['dev-clean', 'dev-other'], 17 | 'test': ['test-clean', 'test-other']} 18 | 19 | 20 | ABXFileTypes = Enum('ABXFileTypes', 21 | '.pt .npy .txt .wav .flac .mp3') 22 | ABXMode = Enum('ABXMode', 'all within across') 23 | 24 | ABXDistanceMode = Enum('ABXDistanceMode', 25 | 'euclidian cosine kl kl_symmetric') 26 | 27 | 28 | @dataclass 29 | class AbxArguments: 30 | """ List of arguments to provide to abx in phonetic_eval.abx""" 31 | # path to input data 32 | path_data: str 33 | # path to item file 34 | path_item_file: str 35 | # Path to a CPC checkpoint 36 | path_checkpoint: Optional[str] = None 37 | # size of a single feature 38 | feature_size: Optional[float] = float(0.1) 39 | # Use the GPU to compute distances 40 | cuda: bool = True 41 | # extension (of input files ?) 42 | file_extension: ABXFileTypes = '.txt' 43 | # Choose the mode of the ABX score to compute 44 | mode: ABXMode = 'all' 45 | # Choose the kind of distance to use to compute 46 | distance_mode: ABXDistanceMode = 'cosine' 47 | # Max size of a group while computing the ABX score 48 | max_size_group: int = 10 49 | # When computing the ABX across score, maximum 50 | # number of speaker X to sample per couple A,B. 51 | max_x_across: int = 5 52 | # location to output the results 53 | out: Optional[str] = None 54 | 55 | 56 | def get_input_files(dataset_directory, _set, file_type): 57 | """ Returns a list of all the files in a set """ 58 | res = [] 59 | for s in LIBRISPEECH_SETS[_set]: 60 | res.append((dataset_directory / s).rglob(f"*.{file_type}")) 61 | return list(chain(*res)) 62 | 63 | 64 | def get_submitted_files(submission_directory, _set): 65 | """ Returns a list of all the files in a set """ 66 | res = [] 67 | for s in LIBRISPEECH_SETS[_set]: 68 | res.append((submission_directory / s).rglob("*")) 69 | return list(chain(*res)) 70 | 71 | 72 | def _validate_file(source_file, submission, dataset): 73 | """Ensure a file has the correct format 74 | 75 | Verifies that a feature file is a 2D numpy array of floats and it matches a 76 | file in the dataset. 77 | 78 | :param source_file: input file from dataset 79 | :param submission: location of submitted files 80 | :param dataset: location of dataset 81 | 82 | :return: a pair (target_file, ncols), where target_file is the file in the 83 | submission directory and ncols is the number of columns in the array. 84 | 85 | :raises exception.EntryMissingError if an entry is not present 86 | 87 | """ 88 | try: 89 | target_file = submission / source_file.relative_to(dataset) 90 | target_file = target_file.with_suffix('.txt') 91 | if not target_file.is_file(): 92 | raise exception.EntryMissingError( 93 | source=source_file, expected=target_file) 94 | 95 | try: 96 | array = np.loadtxt(str(target_file)) 97 | except Exception: 98 | raise exception.FileFormatError( 99 | target_file, 'not a valid numpy array') 100 | 101 | if array.dtype != np.dtype('float'): 102 | raise exception.FileFormatError( 103 | target_file, "array loaded is not dtype = float") 104 | 105 | if array.ndim != 2: 106 | raise exception.FileFormatError( 107 | target_file, 'not a 2D array') 108 | except exception.ValidationError as error: 109 | return str(error), None, None 110 | 111 | return None, target_file, array.shape[1] 112 | 113 | 114 | def validate(submission, dataset, kind, njobs=1): 115 | """Validate a subset of the submissions for the phonetic task 116 | 117 | :param submission_directory: location of submissions 118 | :param dataset_directory: location of data 119 | :param kind: subset type (dev | test) 120 | :param njobs: number of paralle processes to use for validation 121 | 122 | :raise ValidationError: if the submission is not valid 123 | 124 | """ 125 | if kind not in LIBRISPEECH_SETS.keys(): 126 | raise ValueError(f'kind must be "dev" or "test", it is {kind}') 127 | 128 | input_files = get_input_files(dataset, kind, "wav") 129 | if not input_files: 130 | raise exception.ValidationError( 131 | f'found no wav files in {dataset}') 132 | 133 | submitted_files = get_submitted_files(submission, kind) 134 | if not submitted_files: 135 | raise exception.ValidationError( 136 | f'found no files in {submission}') 137 | 138 | # ensure we have only .txt files in submission 139 | no_txt_files = [str(f) for f in submitted_files if f.suffix != '.txt'] 140 | if no_txt_files: 141 | raise exception.MismatchError('extra files found', [], no_txt_files) 142 | 143 | # ensure that there are no duplicates 144 | duplicates = [ 145 | f for f, n in collections.Counter(submitted_files).items() if n > 1 146 | ] 147 | if duplicates: 148 | raise exception.MismatchError('duplicates found', [], duplicates) 149 | 150 | # check that necessary files are present and valid 151 | valid_entries = joblib.Parallel(n_jobs=njobs)( 152 | joblib.delayed(_validate_file)(f, submission, dataset) 153 | for f in input_files) 154 | errors, valid_entries, ncols = zip(*valid_entries) 155 | 156 | # ensure there are no detected errors 157 | errors = [e for e in errors if e] 158 | if errors: 159 | for e in errors[:10]: 160 | print(f'ERROR: {e}') 161 | if len(errors) > 10: 162 | print(f'ERROR: ... and {len(errors) - 10} more!') 163 | raise exception.ValidationError(f'error detected in phonetic {kind}') 164 | 165 | # ensure all submitted files have the same number of columns 166 | if len(set(ncols)) != 1: 167 | raise exception.ValidationError( 168 | f'all files must have the same number of columns ' 169 | f'but have: {set(ncols)}') 170 | 171 | if collections.Counter(submitted_files) != collections.Counter(valid_entries): 172 | raise exception.MismatchError( 173 | 'mismatch in filenames', valid_entries, submitted_files) 174 | 175 | 176 | def evaluate(submission, dataset, kind, metric, frame_shift, force_cpu=False): 177 | """Writes the phonetic evaluation results to `output_dir` 178 | 179 | Parameters 180 | ---------- 181 | submission : path 182 | The directory where the phonetic submission is stored (expect 183 | subdirectories dev-clean, dev-other, etc) 184 | dataset : path 185 | The directory where the phonetic dataset is stored 186 | output_dir : path 187 | The directory where to write results 188 | kind : str 189 | Must be 'dev' or 'test' 190 | metric : str 191 | Must be 'cosine', 'euclidean', 'kl' or 'kl_symmetric' 192 | frame_shift : float 193 | The shift between two features frames in s. 194 | force_cpu: bool, optional 195 | When True use CPU, elsewise use PU (default to False) 196 | 197 | Returns 198 | ------- 199 | score : pandas.DataFrame 200 | A data frame with the ABX score obtained for each combination of 201 | {dev, test}, {clean, other} and {across, within}. 202 | 203 | """ 204 | results = {} 205 | for subkind in LIBRISPEECH_SETS[kind]: 206 | print( 207 | f'Evaluating phonetic {subkind} ' 208 | f'(metric={metric}, frame_shift={frame_shift})') 209 | 210 | arg_obj = AbxArguments( 211 | path_data=str(submission / subkind), 212 | path_item_file=str(dataset / subkind / f'{subkind}.item'), 213 | distance_mode=metric, 214 | feature_size=frame_shift, 215 | cuda=not force_cpu) 216 | 217 | results[subkind] = eval_ABX.main(arg_obj=arg_obj) 218 | 219 | results2 = [ 220 | (dset.split('-')[0], dset.split('-')[1], kind, score) 221 | for dset, v in results.items() for kind, score in v.items()] 222 | return pandas.DataFrame( 223 | results2, columns=['dataset', 'sub-dataset', 'type', 'score']) 224 | -------------------------------------------------------------------------------- /zerospeech2021/phonetic_eval/ABX_src/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | -------------------------------------------------------------------------------- /zerospeech2021/phonetic_eval/ABX_src/abx_group_computation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | import math 4 | import libri_light_dtw as dtw 5 | import progressbar 6 | 7 | 8 | def get_distance_function_from_name(name_str): 9 | if name_str == 'euclidean': 10 | return get_euclidian_distance_batch 11 | if name_str == 'cosine': 12 | return get_cosine_distance_batch 13 | if name_str == 'kl': 14 | return get_kl_distance_batch 15 | if name_str == 'kl_symmetric': 16 | return get_kl_distance_symmetric_batch 17 | raise ValueError(f"Invalid distance mode") 18 | 19 | 20 | def check_dtw_group_validity(a, b, x): 21 | assert (len(a.size()) == len(b.size())) 22 | assert (len(a.size()) == len(x.size())) 23 | assert (a.size(2) == x.size(2)) 24 | assert (a.size(2) == b.size(2)) 25 | 26 | 27 | def get_kl_distance_batch(a1, a2, epsilon=1e-6): 28 | N1, S1, D = a1.size() # Batch x Seq x Channel 29 | N2, S2, D = a2.size() # Batch x Seq x Channel 30 | 31 | # (P * (P / Q).log()).sum() 32 | div = (a1.view(N1, 1, S1, 1, D) + epsilon) / (a2.view(1, N2, 1, S2, D) + epsilon) 33 | prod = (a1.view(N1, 1, S1, 1, D)) * div.log() 34 | 35 | return prod.sum(dim=4) 36 | 37 | 38 | def get_kl_distance_symmetric_batch(a1, a2, epsilon=1e-6): 39 | N1, S1, D = a1.size() 40 | N2, S2, D = a2.size() 41 | 42 | div1 = (a1.view(N1, 1, S1, 1, D) + epsilon) / (a2.view(1, N2, 1, S2, D) + epsilon) 43 | div2 = (a2.view(1, N2, 1, S2, D) + epsilon) / (a1.view(N1, 1, S1, 1, D) + epsilon) 44 | 45 | prod1 = (a1.view(N1, 1, S1, 1, D)) * div1.log() 46 | prod2 = (a2.view(1, N2, 1, S2, D)) * div2.log() 47 | 48 | return (0.5 * prod1 + 0.5 * prod2).sum(dim=4) 49 | 50 | 51 | def get_cosine_distance_batch(a1, a2, epsilon=1e-8): 52 | r""" a1 and a2 must be normalized""" 53 | N1, S1, D = a1.size() # Batch x Seq x Channel 54 | N2, S2, D = a2.size() # Batch x Seq x Channel 55 | 56 | prod = (a1.view(N1, 1, S1, 1, D)) * (a2.view(1, N2, 1, S2, D)) 57 | # Sum accross the channel dimension 58 | prod = torch.clamp(prod.sum(dim=4), -1, 1).acos() / math.pi 59 | 60 | return prod 61 | 62 | 63 | def get_euclidian_distance_batch(a1, a2): 64 | N1, S1, D = a1.size() 65 | N2, S2, D = a2.size() 66 | diff = a1.view(N1, 1, S1, 1, D) - a2.view(1, N2, 1, S2, D) 67 | return torch.sqrt((diff ** 2).sum(dim=4)) 68 | 69 | 70 | def get_distance_group_dtw(a1, a2, size1, size2, 71 | ignore_diag=False, symmetric=False, 72 | distance_function=get_cosine_distance_batch): 73 | N1, S1, D = a1.size() 74 | N2, S2, D = a2.size() 75 | if size1.size(0) != N1: 76 | print(a1.size(), size1.size()) 77 | print(a2.size(), size2.size()) 78 | assert (size1.size(0) == N1) 79 | assert (size2.size(0) == N2) 80 | 81 | distance_mat = distance_function(a1, a2).detach().cpu().numpy() 82 | return dtw.dtw_batch(a1, a2, size1, size2, 83 | distance_mat, 84 | ignore_diag, symmetric) 85 | 86 | 87 | def get_theta_group_dtw(a, b, x, sa, sb, sx, distance_function, symmetric): 88 | check_dtw_group_validity(a, b, x) 89 | 90 | dxb = get_distance_group_dtw( 91 | x, b, sx, sb, distance_function=distance_function) 92 | dxa = get_distance_group_dtw(x, a, sx, sa, ignore_diag=symmetric, 93 | symmetric=symmetric, 94 | distance_function=distance_function) 95 | 96 | Nx, Na = dxa.size() 97 | Nx, Nb = dxb.size() 98 | 99 | if symmetric: 100 | n_pos = Na * (Na - 1) 101 | max_val = dxb.max().item() 102 | for i in range(Na): 103 | dxa[i, i] = max_val + 1 104 | else: 105 | n_pos = Na * Nx 106 | 107 | dxb = dxb.view(Nx, 1, Nb).expand(Nx, Na, Nb) 108 | dxa = dxa.view(Nx, Na, 1).expand(Nx, Na, Nb) 109 | 110 | sc = (dxa < dxb).sum() + 0.5 * (dxa == dxb).sum() 111 | sc /= (n_pos * Nb) 112 | 113 | return sc.item() 114 | 115 | 116 | def loc_dtw(data, distance_function, symmetric): 117 | coords, group_a, group_b, group_x = data 118 | group_a_data, group_a_size = group_a 119 | group_b_data, group_b_size = group_b 120 | group_x_data, group_x_size = group_x 121 | theta = get_theta_group_dtw(group_a_data, 122 | group_b_data, 123 | group_x_data, 124 | group_a_size, 125 | group_b_size, 126 | group_x_size, 127 | distance_function, 128 | symmetric) 129 | 130 | return (coords, 1 - theta) 131 | 132 | 133 | def get_abx_scores_dtw_on_group(group_iterator, 134 | distance_function, 135 | symmetric): 136 | data_list = [] 137 | coords_list = [] 138 | bar = progressbar.ProgressBar(prefix=' > ', maxval=len(group_iterator)) 139 | bar.start() 140 | 141 | with torch.no_grad(): 142 | for index, group in enumerate(group_iterator): 143 | bar.update(index) 144 | coords, abx = loc_dtw(group, distance_function, symmetric) 145 | data_list.append(abx) 146 | coords_list.append(coords) 147 | bar.finish() 148 | 149 | return torch.sparse.FloatTensor(torch.LongTensor(coords_list).t(), 150 | torch.FloatTensor(data_list), 151 | group_iterator.get_board_size()) 152 | -------------------------------------------------------------------------------- /zerospeech2021/phonetic_eval/ABX_src/abx_iterators.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | import progressbar 4 | import math 5 | import random 6 | 7 | 8 | def normalize_with_singularity(x): 9 | r""" 10 | Normalize the given vector across the third dimension. 11 | Extend all vectors by eps=1e-12 to put the null vector at the maximal 12 | cosine distance from any non-null vector. 13 | """ 14 | S, H = x.size() 15 | norm_x = (x**2).sum(dim=1, keepdim=True) 16 | 17 | x /= torch.sqrt(norm_x) 18 | zero_vals = (norm_x == 0).view(S) 19 | x[zero_vals] = 1 / math.sqrt(H) 20 | border_vect = torch.zeros((S, 1), 21 | dtype=x.dtype, 22 | device=x.device) + 1e-12 23 | border_vect[zero_vals] = -2*1e12 24 | return torch.cat([x, border_vect], dim=1) 25 | 26 | 27 | def load_item_file(path_item_file): 28 | r""" Load a .item file indicating the triplets for the ABX score. The 29 | input file must have the following fomat: 30 | line 0 : whatever (not read) 31 | line > 0: #file_ID onset offset #phone prev-phone next-phone speaker 32 | onset : begining of the triplet (in s) 33 | onset : end of the triplet (in s) 34 | """ 35 | with open(path_item_file, 'r') as file: 36 | data = file.readlines()[1:] 37 | 38 | data = [x.replace('\n', '') for x in data] 39 | 40 | out = {} 41 | 42 | phone_match = {} 43 | speaker_match = {} 44 | context_match = {} 45 | 46 | for line in data: 47 | items = line.split() 48 | assert(len(items) == 7) 49 | fileID = items[0] 50 | if fileID not in out: 51 | out[fileID] = [] 52 | 53 | onset, offset = float(items[1]), float(items[2]) 54 | context = '+'.join([items[4], items[5]]) 55 | phone = items[3] 56 | speaker = items[6] 57 | 58 | if phone not in phone_match: 59 | s = len(phone_match) 60 | phone_match[phone] = s 61 | phone_id = phone_match[phone] 62 | 63 | if context not in context_match: 64 | s = len(context_match) 65 | context_match[context] = s 66 | context_id = context_match[context] 67 | 68 | if speaker not in speaker_match: 69 | s = len(speaker_match) 70 | speaker_match[speaker] = s 71 | speaker_id = speaker_match[speaker] 72 | 73 | out[fileID].append([onset, offset, context_id, phone_id, speaker_id]) 74 | 75 | return out, context_match, phone_match, speaker_match 76 | 77 | 78 | def get_features_group(in_data, index_order): 79 | 80 | in_index = list(range(len(in_data))) 81 | in_index.sort(key=lambda x: [in_data[x][i] for i in index_order]) 82 | out_groups = [] 83 | last_values = [in_data[in_index[0]][i] for i in index_order] 84 | i_s = 0 85 | curr_group = [[] for i in index_order] 86 | n_orders = len(index_order) - 1 87 | tmp = [in_data[i] for i in in_index] 88 | 89 | for index, item in enumerate(tmp): 90 | for order_index, order in enumerate(index_order): 91 | if item[order] != last_values[order_index]: 92 | curr_group[-1].append((i_s, index)) 93 | for i in range(n_orders, order_index, -1): 94 | curr_group[i-1].append(curr_group[i]) 95 | curr_group[i] = [] 96 | if order_index == 0: 97 | out_groups += curr_group[0] 98 | curr_group[0] = [] 99 | last_values = [item[i] for i in index_order] 100 | i_s = index 101 | break 102 | 103 | if i_s < len(in_data): 104 | curr_group[-1].append((i_s, len(in_data))) 105 | for i in range(n_orders, 0, -1): 106 | curr_group[i-1].append(curr_group[i]) 107 | out_groups += curr_group[0] 108 | 109 | return in_index, out_groups 110 | 111 | 112 | class ABXFeatureLoader: 113 | 114 | def __init__(self, 115 | path_item_file, 116 | seqList, 117 | featureMaker, 118 | stepFeature, 119 | normalize): 120 | r""" 121 | Args: 122 | path_item_file (str): path to the .item files containing the ABX 123 | triplets 124 | seqList (list): list of items (fileID, path) where fileID refers to 125 | the file's ID as used in path_item_file, and path 126 | is the actual path to the input audio sequence 127 | featureMaker (function): either a function or a callable object. 128 | Takes a path as input and outputs the 129 | feature sequence corresponding to the 130 | given file. 131 | normalize (bool): if True all input features will be noramlized 132 | across the channels dimension. 133 | 134 | Note: 135 | You can use this dataset with pre-computed features. For example, if 136 | you have a collection of features files in the torch .pt format then 137 | you can just set featureMaker = torch.load. 138 | """ 139 | 140 | files_data, self.context_match, self.phone_match, self.speaker_match = \ 141 | load_item_file(path_item_file) 142 | self.seqNorm = True 143 | self.stepFeature = stepFeature 144 | self.loadFromFileData(files_data, seqList, featureMaker, normalize) 145 | 146 | def loadFromFileData(self, files_data, seqList, feature_maker, normalize): 147 | 148 | # self.features[i]: index_start, size, context_id, phone_id, speaker_id 149 | self.features = [] 150 | self.INDEX_CONTEXT = 2 151 | self.INDEX_PHONE = 3 152 | self.INDEX_SPEAKER = 4 153 | data = [] 154 | 155 | totSize = 0 156 | 157 | print(" > Building the input features...") 158 | bar = progressbar.ProgressBar(prefix=' > ', maxval=len(seqList)) 159 | bar.start() 160 | 161 | for index, vals in enumerate(seqList): 162 | 163 | fileID, file_path = vals 164 | bar.update(index) 165 | if fileID not in files_data: 166 | continue 167 | 168 | features = feature_maker(file_path) 169 | if normalize: 170 | features = normalize_with_singularity(features) 171 | 172 | features = features.detach().cpu() 173 | 174 | phone_data = files_data[fileID] 175 | 176 | for phone_start, phone_end, context_id, phone_id, speaker_id in phone_data: 177 | 178 | index_start = max( 179 | 0, int(math.ceil(self.stepFeature * phone_start - 0.5))) 180 | index_end = min(features.size(0), 181 | int(math.floor(self.stepFeature * phone_end - 0.5))) 182 | 183 | if index_start >= features.size(0) or index_end <= index_start: 184 | continue 185 | 186 | loc_size = index_end - index_start 187 | self.features.append([totSize, loc_size, context_id, 188 | phone_id, speaker_id]) 189 | data.append(features[index_start:index_end]) 190 | totSize += loc_size 191 | 192 | bar.finish() 193 | 194 | self.data = torch.cat(data, dim=0) 195 | self.feature_dim = self.data.size(1) 196 | 197 | def get_data_device(self): 198 | return self.data.device 199 | 200 | def cuda(self): 201 | self.data = self.data.cuda() 202 | 203 | def cpu(self): 204 | self.data = self.data.cpu() 205 | 206 | def get_max_group_size(self, i_group, i_sub_group): 207 | id_start, id_end = self.group_index[i_group][i_sub_group] 208 | return max([self.features[i][1] for i in range(id_start, id_end)]) 209 | 210 | def get_ids(self, index): 211 | context_id, phone_id, speaker_id = self.features[index][2:] 212 | return context_id, phone_id, speaker_id 213 | 214 | def __getitem__(self, index): 215 | i_data, out_size, context_id, phone_id, speaker_id = self.features[index] 216 | return self.data[i_data:(i_data + out_size)], out_size, (context_id, phone_id, speaker_id) 217 | 218 | def __len__(self): 219 | return len(self.features) 220 | 221 | def get_n_speakers(self): 222 | return len(self.speaker_match) 223 | 224 | def get_n_context(self): 225 | return len(self.context_match) 226 | 227 | def get_n_phone(self): 228 | return len(self.phone_match) 229 | 230 | def get_n_groups(self): 231 | return len(self.group_index) 232 | 233 | def get_n_sub_group(self, index_sub_group): 234 | return len(self.group_index[index_sub_group]) 235 | 236 | def get_iterator(self, mode, max_size_group): 237 | if mode == 'within': 238 | return ABXWithinGroupIterator(self, max_size_group) 239 | if mode == 'across': 240 | return ABXAcrossGroupIterator(self, max_size_group) 241 | raise ValueError(f"Invalid mode: {mode}") 242 | 243 | 244 | class ABXIterator: 245 | r""" 246 | Base class building ABX's triplets. 247 | """ 248 | 249 | def __init__(self, abxDataset, max_size_group): 250 | self.max_size_group = max_size_group 251 | self.dataset = abxDataset 252 | self.len = 0 253 | 254 | self.index_csp, self.groups_csp = \ 255 | get_features_group(abxDataset.features, 256 | [abxDataset.INDEX_CONTEXT, 257 | abxDataset.INDEX_SPEAKER, 258 | abxDataset.INDEX_PHONE]) 259 | 260 | def get_group(self, i_start, i_end): 261 | data = [] 262 | max_size = 0 263 | to_take = list(range(i_start, i_end)) 264 | if i_end - i_start > self.max_size_group: 265 | to_take = random.sample(to_take, k=self.max_size_group) 266 | for i in to_take: 267 | loc_data, loc_size, loc_id = self.dataset[self.index_csp[i]] 268 | max_size = max(loc_size, max_size) 269 | data.append(loc_data) 270 | 271 | N = len(to_take) 272 | out_data = torch.zeros(N, max_size, 273 | self.dataset.feature_dim, 274 | device=self.dataset.get_data_device()) 275 | out_size = torch.zeros(N, dtype=torch.long, 276 | device=self.dataset.get_data_device()) 277 | 278 | for i in range(N): 279 | size = data[i].size(0) 280 | out_data[i, :size] = data[i] 281 | out_size[i] = size 282 | 283 | return out_data, out_size, loc_id 284 | 285 | def __len__(self): 286 | return self.len 287 | 288 | def get_board_size(self): 289 | r""" 290 | Get the output dimension of the triplet's space. 291 | """ 292 | pass 293 | 294 | 295 | class ABXWithinGroupIterator(ABXIterator): 296 | r""" 297 | Iterator giving the triplets for the ABX within score. 298 | """ 299 | 300 | def __init__(self, abxDataset, max_size_group): 301 | 302 | super(ABXWithinGroupIterator, self).__init__(abxDataset, 303 | max_size_group) 304 | self.symmetric = True 305 | 306 | for context_group in self.groups_csp: 307 | for speaker_group in context_group: 308 | if len(speaker_group) > 1: 309 | for i_start, i_end in speaker_group: 310 | if i_end - i_start > 1: 311 | self.len += (len(speaker_group) - 1) 312 | 313 | def __iter__(self): 314 | for i_c, context_group in enumerate(self.groups_csp): 315 | for i_s, speaker_group in enumerate(context_group): 316 | n_phones = len(speaker_group) 317 | if n_phones == 1: 318 | continue 319 | 320 | for i_a in range(n_phones): 321 | i_start_a, i_end_a = self.groups_csp[i_c][i_s][i_a] 322 | if i_end_a - i_start_a == 1: 323 | continue 324 | 325 | for i_b in range(n_phones): 326 | if i_b == i_a: 327 | continue 328 | 329 | i_start_b, i_end_b = self.groups_csp[i_c][i_s][i_b] 330 | data_b, size_b, id_b = self.get_group(i_start_b, 331 | i_end_b) 332 | data_a, size_a, id_a = self.get_group(i_start_a, 333 | i_end_a) 334 | 335 | out_coords = id_a[2], id_a[1], id_b[1], id_a[0] 336 | yield out_coords, (data_a, size_a), (data_b, size_b), \ 337 | (data_a, size_a) 338 | 339 | def get_board_size(self): 340 | 341 | return (self.dataset.get_n_speakers(), 342 | self.dataset.get_n_phone(), 343 | self.dataset.get_n_phone(), 344 | self.dataset.get_n_context()) 345 | 346 | 347 | class ABXAcrossGroupIterator(ABXIterator): 348 | r""" 349 | Iterator giving the triplets for the ABX across score. 350 | """ 351 | 352 | def __init__(self, abxDataset, max_size_group): 353 | 354 | super(ABXAcrossGroupIterator, self).__init__(abxDataset, 355 | max_size_group) 356 | self.symmetric = False 357 | self.get_speakers_from_cp = {} 358 | self.max_x = 5 359 | 360 | for context_group in self.groups_csp: 361 | for speaker_group in context_group: 362 | for i_start, i_end in speaker_group: 363 | c_id, p_id, s_id = self.dataset.get_ids( 364 | self.index_csp[i_start]) 365 | if c_id not in self.get_speakers_from_cp: 366 | self.get_speakers_from_cp[c_id] = {} 367 | if p_id not in self.get_speakers_from_cp[c_id]: 368 | self.get_speakers_from_cp[c_id][p_id] = {} 369 | self.get_speakers_from_cp[c_id][p_id][s_id] = ( 370 | i_start, i_end) 371 | 372 | for context_group in self.groups_csp: 373 | for speaker_group in context_group: 374 | if len(speaker_group) > 1: 375 | for i_start, i_end in speaker_group: 376 | c_id, p_id, s_id = self.dataset.get_ids( 377 | self.index_csp[i_start]) 378 | self.len += (len(speaker_group) - 1) * (min(self.max_x, 379 | len(self.get_speakers_from_cp[c_id][p_id]) - 1)) 380 | 381 | def get_other_speakers_in_group(self, i_start_group): 382 | c_id, p_id, s_id = self.dataset.get_ids(self.index_csp[i_start_group]) 383 | return [v for k, v in self.get_speakers_from_cp[c_id][p_id].items() if k != s_id] 384 | 385 | def get_abx_triplet(self, i_a, i_b, i_x): 386 | i_start_a, i_end_a = i_a 387 | data_a, size_a, id_a = self.get_group(i_start_a, i_end_a) 388 | 389 | i_start_b, i_end_b = i_b 390 | data_b, size_b, id_b = self.get_group(i_start_b, i_end_b) 391 | 392 | i_start_x, i_end_x = i_x 393 | data_x, size_x, id_x = self.get_group(i_start_x, i_end_x) 394 | 395 | out_coords = id_a[2], id_a[1], id_b[1], id_a[0], id_x[2] 396 | return out_coords, (data_a, size_a), (data_b, size_b), \ 397 | (data_x, size_x) 398 | 399 | def __iter__(self): 400 | for i_c, context_group in enumerate(self.groups_csp): 401 | for i_s, speaker_group in enumerate(context_group): 402 | n_phones = len(speaker_group) 403 | if n_phones == 1: 404 | continue 405 | 406 | for i_a in range(n_phones): 407 | i_start_a, i_end_a = self.groups_csp[i_c][i_s][i_a] 408 | ref = self.get_other_speakers_in_group(i_start_a) 409 | if len(ref) > self.max_x: 410 | speakers_a = random.sample(ref, k=self.max_x) 411 | else: 412 | speakers_a = ref 413 | 414 | for i_start_x, i_end_x in speakers_a: 415 | 416 | for i_b in range(n_phones): 417 | if i_b == i_a: 418 | continue 419 | 420 | i_start_b, i_end_b = self.groups_csp[i_c][i_s][i_b] 421 | yield self.get_abx_triplet((i_start_a, i_end_a), (i_start_b, i_end_b), (i_start_x, i_end_x)) 422 | 423 | def get_board_size(self): 424 | 425 | return (self.dataset.get_n_speakers(), 426 | self.dataset.get_n_phone(), 427 | self.dataset.get_n_phone(), 428 | self.dataset.get_n_context(), 429 | self.dataset.get_n_speakers()) 430 | -------------------------------------------------------------------------------- /zerospeech2021/phonetic_eval/ABX_src/dtw.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | import numpy as np 4 | cimport numpy as np 5 | cimport cython 6 | from cpython cimport bool 7 | ctypedef np.float32_t CTYPE_t # cost type 8 | ctypedef np.intp_t IND_t # array index type 9 | CTYPE = np.float32 # cost type 10 | 11 | 12 | 13 | def dtw_batch(x,y, sx, sy, dist_mat, ignore_diag=False, symetric=False): 14 | 15 | Nx = dist_mat.shape[0] 16 | Ny = dist_mat.shape[1] 17 | 18 | out = torch.zeros((Nx, Ny)) 19 | 20 | for i in range(Nx): 21 | start_index = i if symetric else 0 22 | i_sx = sx[i] 23 | for j in range(start_index, Ny): 24 | 25 | j_sy = sy[j] 26 | if ignore_diag and i == j: 27 | continue 28 | distance = _dtw(i_sx, j_sy, dist_mat[i,j,:i_sx,:j_sy],True) 29 | out[i][j] = distance 30 | if symetric and i != j: 31 | out[j][i] = out[i][j] 32 | 33 | return out 34 | 35 | 36 | 37 | cpdef _dtw(IND_t N, IND_t M, CTYPE_t[:,:] dist_array, bool normalized): 38 | cdef IND_t i, j 39 | cdef CTYPE_t[:,:] cost = np.empty((N, M), dtype=CTYPE) 40 | cdef CTYPE_t final_cost, c_diag, c_left, c_up 41 | # initialization 42 | cost[0,0] = dist_array[0,0] 43 | for i in range(1,N): 44 | cost[i,0] = dist_array[i,0] + cost[i-1,0] 45 | for j in range(1,M): 46 | cost[0,j] = dist_array[0,j] + cost[0,j-1] 47 | # the dynamic programming loop 48 | for i in range(1,N): 49 | for j in range(1,M): 50 | cost[i,j] = dist_array[i,j] + min(cost[i-1,j], cost[i-1,j-1], cost[i,j-1]) 51 | 52 | final_cost = cost[N-1, M-1] 53 | if normalized: 54 | path_len = 1 55 | i = N-1 56 | j = M-1 57 | while i > 0 and j > 0: 58 | c_up = cost[i - 1, j] 59 | c_left = cost[i, j-1] 60 | c_diag = cost[i-1, j-1] 61 | if c_diag <= c_left and c_diag <= c_up: 62 | i -= 1 63 | j -= 1 64 | elif c_left <= c_up: 65 | j -= 1 66 | else: 67 | i -= 1 68 | path_len += 1 69 | if i == 0: 70 | path_len += j 71 | if j == 0: 72 | path_len += i 73 | final_cost /= path_len 74 | return final_cost 75 | -------------------------------------------------------------------------------- /zerospeech2021/phonetic_eval/CPC_loader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import argparse 3 | import torch 4 | import torchaudio 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | def download_state_dict(model_name): 10 | 11 | base_url = "https://dl.fbaipublicfiles.com/librilight/CPC_checkpoints" 12 | return torch.hub.load_state_dict_from_url(f"{base_url}/{model_name}") 13 | 14 | 15 | def load_cpc_features(state_dict): 16 | 17 | config = state_dict["config"] 18 | weights = state_dict["weights"] 19 | encoder = CPCEncoder(config["hiddenEncoder"]) 20 | ar_net = CPCAR(config["hiddenEncoder"], config["hiddenGar"], False, 21 | config["nLevelsGRU"]) 22 | 23 | model = CPCModel(encoder, ar_net) 24 | model.load_state_dict(weights, strict=False) 25 | output = FeatureModule(model, False) 26 | output.config = config 27 | return output 28 | 29 | 30 | def get_features_state_dict(feature_module): 31 | config = feature_module.config 32 | if config is None: 33 | raise ValueError("The input feature_module should have config defined") 34 | weights = feature_module.model.state_dict() 35 | return {"config": config, "weights": weights} 36 | 37 | 38 | def build_feature_from_file(file_path, feature_maker, max_size_seq=64000): 39 | r""" 40 | Apply the featureMaker to the given file. 41 | Arguments: 42 | - file_path (FeatureModule): model to apply 43 | - file_path (string): path of the sequence to load 44 | - seq_norm (bool): if True, normalize the output along the time 45 | dimension to get chunks of mean zero and var 1 46 | - max_size_seq (int): maximal size of a chunk 47 | Return: 48 | a torch vector of size 1 x Seq_size x Feature_dim 49 | """ 50 | seq = torchaudio.load(file_path)[0] 51 | sizeSeq = seq.size(1) 52 | start = 0 53 | out = [] 54 | while start < sizeSeq: 55 | if start + max_size_seq > sizeSeq: 56 | break 57 | end = min(sizeSeq, start + max_size_seq) 58 | subseq = (seq[:, start:end]).view(1, 1, -1).cuda(device=0) 59 | with torch.no_grad(): 60 | features = feature_maker(subseq) 61 | out.append(features.detach().cpu()) 62 | start += max_size_seq 63 | 64 | if start < sizeSeq: 65 | subseq = (seq[:, -max_size_seq:]).view(1, 1, -1).cuda(device=0) 66 | with torch.no_grad(): 67 | features = feature_maker(subseq) 68 | df = subseq.size(2) // features.size(1) 69 | delta = (sizeSeq - start) // df 70 | out.append(features[:, -delta:].detach().cpu()) 71 | 72 | out = torch.cat(out, dim=1) 73 | return out.view(out.size(1), out.size(2)) 74 | 75 | ############################################################################## 76 | # Minimal code to load a CPC checkpoint 77 | ############################################################################## 78 | 79 | 80 | class ChannelNorm(nn.Module): 81 | 82 | def __init__(self, 83 | numFeatures, 84 | epsilon=1e-05, 85 | affine=True): 86 | 87 | super(ChannelNorm, self).__init__() 88 | if affine: 89 | self.weight = nn.parameter.Parameter( 90 | torch.Tensor(1, numFeatures, 1)) 91 | self.bias = nn.parameter.Parameter(torch.Tensor(1, numFeatures, 1)) 92 | else: 93 | self.weight = None 94 | self.bias = None 95 | self.epsilon = epsilon 96 | self.p = 0 97 | self.affine = affine 98 | self.reset_parameters() 99 | 100 | def reset_parameters(self): 101 | if self.affine: 102 | torch.nn.init.ones_(self.weight) 103 | torch.nn.init.zeros_(self.bias) 104 | 105 | def forward(self, x): 106 | 107 | cumMean = x.mean(dim=1, keepdim=True) 108 | cumVar = x.var(dim=1, keepdim=True) 109 | x = (x - cumMean)*torch.rsqrt(cumVar + self.epsilon) 110 | 111 | if self.weight is not None: 112 | x = x * self.weight + self.bias 113 | return x 114 | 115 | 116 | class CPCEncoder(nn.Module): 117 | 118 | def __init__(self, 119 | sizeHidden=512): 120 | 121 | super(CPCEncoder, self).__init__() 122 | normLayer = ChannelNorm 123 | 124 | self.conv0 = nn.Conv1d(1, sizeHidden, 10, stride=5, padding=3) 125 | self.batchNorm0 = normLayer(sizeHidden) 126 | self.conv1 = nn.Conv1d(sizeHidden, sizeHidden, 8, stride=4, padding=2) 127 | self.batchNorm1 = normLayer(sizeHidden) 128 | self.conv2 = nn.Conv1d(sizeHidden, sizeHidden, 4, 129 | stride=2, padding=1) 130 | self.batchNorm2 = normLayer(sizeHidden) 131 | self.conv3 = nn.Conv1d(sizeHidden, sizeHidden, 4, stride=2, padding=1) 132 | self.batchNorm3 = normLayer(sizeHidden) 133 | self.conv4 = nn.Conv1d(sizeHidden, sizeHidden, 4, stride=2, padding=1) 134 | self.batchNorm4 = normLayer(sizeHidden) 135 | self.DOWNSAMPLING = 160 136 | 137 | def getDimOutput(self): 138 | return self.conv4.out_channels 139 | 140 | def forward(self, x): 141 | x = F.relu(self.batchNorm0(self.conv0(x))) 142 | x = F.relu(self.batchNorm1(self.conv1(x))) 143 | x = F.relu(self.batchNorm2(self.conv2(x))) 144 | x = F.relu(self.batchNorm3(self.conv3(x))) 145 | x = F.relu(self.batchNorm4(self.conv4(x))) 146 | return x 147 | 148 | 149 | class CPCAR(nn.Module): 150 | 151 | def __init__(self, 152 | dimEncoded, 153 | dimOutput, 154 | keepHidden, 155 | nLevelsGRU): 156 | 157 | super(CPCAR, self).__init__() 158 | self.baseNet = nn.LSTM(dimEncoded, dimOutput, 159 | num_layers=nLevelsGRU, batch_first=True) 160 | self.hidden = None 161 | self.keepHidden = keepHidden 162 | 163 | def getDimOutput(self): 164 | return self.baseNet.hidden_size 165 | 166 | def forward(self, x): 167 | 168 | try: 169 | self.baseNet.flatten_parameters() 170 | except RuntimeError: 171 | pass 172 | x, h = self.baseNet(x, self.hidden) 173 | if self.keepHidden: 174 | if isinstance(h, tuple): 175 | self.hidden = tuple(x.detach() for x in h) 176 | else: 177 | self.hidden = h.detach() 178 | return x 179 | 180 | 181 | class CPCModel(nn.Module): 182 | 183 | def __init__(self, 184 | encoder, 185 | AR): 186 | 187 | super(CPCModel, self).__init__() 188 | self.gEncoder = encoder 189 | self.gAR = AR 190 | 191 | def forward(self, batchData, label): 192 | encodedData = self.gEncoder(batchData).permute(0, 2, 1) 193 | cFeature = self.gAR(encodedData) 194 | return cFeature, encodedData, label 195 | 196 | 197 | class FeatureModule(torch.nn.Module): 198 | r""" 199 | A simpler interface to handle CPC models. Useful for a smooth workflow when 200 | working with CPC trained features. 201 | """ 202 | 203 | def __init__(self, featureMaker, get_encoded, 204 | seq_norm=True): 205 | super(FeatureModule, self).__init__() 206 | self.get_encoded = get_encoded 207 | self.model = featureMaker 208 | self.seq_norm = seq_norm 209 | self.config = None 210 | 211 | def forward(self, batch_data): 212 | # Input Size : BatchSize x 1 x SeqSize 213 | # Feature size: BatchSize x SeqSize x ChannelSize 214 | if self.is_cuda: 215 | batch_data = batch_data.cuda() 216 | cFeature, encoded, _ = self.model(batch_data, None) 217 | if self.get_encoded: 218 | cFeature = encoded 219 | if self.seq_norm: 220 | mean = cFeature.mean(dim=1, keepdim=True) 221 | var = cFeature.var(dim=1, keepdim=True) 222 | cFeature = (cFeature - mean) / torch.sqrt(var + 1e-08) 223 | return cFeature 224 | 225 | def cuda(self): 226 | self.is_cuda = True 227 | super(FeatureModule, self).cuda() 228 | 229 | def cpu(self): 230 | self.is_cuda = False 231 | super(FeatureModule, self).cuda() 232 | 233 | def get_output_dim(self): 234 | if self.get_encoded: 235 | return self.config["hiddenEncoder"] 236 | return self.config["hiddenGar"] 237 | 238 | 239 | if __name__ == "__main__": 240 | 241 | parser = argparse.ArgumentParser(description='Download model') 242 | parser.add_argument('model_name', type=str, 243 | choices=["600h", "6kh", "60kh"]) 244 | parser.add_argument('output', type=str) 245 | args = parser.parse_args() 246 | 247 | CPC_MODELS_NAMES = {"60kh": "60k_epoch4-d0f474de.pt", 248 | "600h": "600h-bdd7ced6.pt", 249 | "6kh":"6k_epoch30-9df0493c.pt"} 250 | state_dict = download_state_dict(CPC_MODELS_NAMES[args.model_name]) 251 | torch.save(state_dict, args.output) 252 | -------------------------------------------------------------------------------- /zerospeech2021/phonetic_eval/LICENCE.txt: -------------------------------------------------------------------------------- 1 | The libri_light_eval module original can be found at https://github.com/facebookresearch/libri-light/tree/master/eval 2 | 3 | This module is licenced under the MIT licence all credit goes to the original creators. 4 | 5 | MIT License 6 | 7 | Copyright (c) Facebook, Inc. and its affiliates. 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. -------------------------------------------------------------------------------- /zerospeech2021/phonetic_eval/README.md: -------------------------------------------------------------------------------- 1 | # Note from ZR2021 organizers 2 | 3 | This is a modified version of the libri-light evaluation cde available at 4 | https://github.com/facebookresearch/libri-light/tree/master/eval. 5 | 6 | Modification only consists in files renaming and package reorganization to 7 | facilitate installation and integration with the zerospeech2021 package. 8 | 9 | 10 | # Eval 11 | 12 | You will find here all relevant evaluation launched on the LibriLight-dataset. 13 | 14 | ## ABX 15 | 16 | ABX is an evaluation metric for unsupervised representation learning. It evaluates feature files based on its ability to distinguish sounds like /i/ and /e/ as in "bit" versus "bet". 17 | 18 | ### Setup 19 | 20 | To setup the ABX evaluation script you need to: 21 | 22 | 1. compile the cython code. Just do: 23 | 24 | ```console 25 | cd ABX_src 26 | python setup.py build_ext --inplace 27 | ``` 28 | 29 | 2. Check that everything works properly with: 30 | ```console 31 | cd ABX_src 32 | nosetests -d 33 | ``` 34 | 35 | 3. Download the Librilight `.item` files here: [ABX_data.tgz](https://dl.fbaipublicfiles.com/librilight/data/ABX_data.tgz). 36 | 37 | This archive contains four `.item` files constructed from the Librispeech dev and test set: `dev-clean.item`, `dev-other.item`, `test-clean.item`, and `test-other.item`, which provide the labels for the ABX evaluation. 38 | 39 | ### How to run the ABX evaluation ? 40 | 41 | Dump your features in .pt (torch), .npz or .npy (numpy) format somewhere. Your features dataset should look like this: 42 | 43 | ```console 44 | \data_dir 45 | file_name_0.extension 46 | file_name_1.extension 47 | ... 48 | ``` 49 | 50 | Each file should contain a 2D-vector of shape Sequence_size x Feature_dimension. 51 | 52 | Then run: 53 | ```console 54 | python eval_ABX.py $PATH_FEATURE_DIR $PATH_TO_ABX_ITEMS/$DB_NAME.item --file_extension $EXTENSION --out $OUTPUT_DIR --feature_size $FEATURE_SIZE 55 | ``` 56 | 57 | Where `$DB_NAME` is one of the 4 evaluation datasets (`dev-clean`, `dev-other`, `test-clean`, `test-other`) and `$FEATURE_SIZE` is the duration (in s) of one feature of the model (for a `10ms` frame rate, this would be `0.01`). 58 | 59 | 60 | ## Pre-computed checkpoints 61 | 62 | Some pre-computed model trained with CPC are available for use ! In order to load a model just use CPC_loader.py, for example to retrieve the model trained on the 60k hours dataset: 63 | 64 | ```console 65 | python CPC_loader.py 60k $PATH_OUTPUT_CHECKPOINT 66 | ``` 67 | 68 | You can directly evaluate the ABX score on this checkpoint by running: 69 | ```console 70 | python eval_ABX.py $PATH_AUDIO_DIR ABX_data/$DB_NAME.item --file_extension $EXTENSION --out $OUTPUT_DIR --path_checkpoint $PATH_OUTPUT_CHECKPOINT 71 | ``` 72 | 73 | Where $EXTENSION corresponds to an audio foramt (.wav, .flac ...) 74 | 75 | ## Linear Classification PER 76 | 77 | Representations can also be evaluated by how easy it is to train a linear phoneme classifier. 78 | 79 | ### Setup 80 | 81 | To setup the PER evaluation script you need to compile the cython code it relies on. Just do: 82 | ```console 83 | cd PER_src 84 | python setup.py build_ext --inplace 85 | ``` 86 | 87 | You will also need to download the [10h labelled data](https://dl.fbaipublicfiles.com/librilight/data/librispeech_finetuning.tgz). 88 | 89 | ### How to run the PER evaluation ? 90 | 91 | First you need to train a linear classifier on your features. For example, if you want to evaluate a model fine-tuned on the 10h dataset, just run: 92 | ```console 93 | python eval_PER.py train $PATH_TO_10h_AUDIO_DATA_DIR $PATH_TO_10h_PHONE_DATA $PATH_TO_THE_JSON_PHONE_CONVERTER $PATH_TO_THE_CPC_MODEL -o $PATH_OUT 94 | ``` 95 | 96 | Then you can run the PER computation, for example on librispeech100/test-clean: 97 | ```console 98 | python eval_PER.py per $PATH_OUT/checkpoint.pt $PATH_TO_TEST_CLEAN $PATH_TO_TEST_CLEAN_PHONES --file_extension .flac 99 | ``` 100 | 101 | 102 | ## WER 103 | 104 | We provide here a test of representations based on word error rate. 105 | 106 | ### Setup 107 | * wav2letter python bindings: [(how-to)](https://github.com/facebookresearch/wav2letter/tree/master/bindings/python). 108 | * KenLM-based Librispeech language model, can be found [here](http://www.openslr.org/11/) or downloaded [here](https://dl.fbaipublicfiles.com/librilight/data/4-gram.bin); it should be placed into `WER_data/`. 109 | * lexicon, [download](https://dl.fbaipublicfiles.com/librilight/data/lexicon.txt.gz); it should be placed into `WER_data/`. 110 | * jiwer, installable via `pip install jiwer`. 111 | 112 | ### How to run the WER evaluation? 113 | 114 | Training a letter classifier on top of a pre-trained CPC model: 115 | ```console 116 | python eval_WER.py --path_train=$PATH_FINETUNING --path_val=$PATH_TO_DEV_CLEAN --path_checkpoint=$PATH_OUT/checkpoint.pt --lr=1e-3 --n_epochs=50 --p_dropout=0.1 --output=$OUTPUT_DIR 117 | 118 | ``` 119 | Evaluating it with wav2letter decoder: 120 | ```console 121 | python eval_WER.py --path_checkpoint=$PATH_OUT/checkpoint.pt --lr=1e-3 --n_epochs=50 --p_dropout=0.1 --output=$OUTPUT_DIR --path_wer=$PATH_TO_TEST_CLEAN 122 | ``` 123 | 124 | You can also train and evaluate afterwards, in a single command: 125 | ```console 126 | python eval_WER.py --path_train=$PATH_FINETUNING --path_val=$PATH_TO_DEV_CLEAN --path_checkpoint=$PATH_OUT/checkpoint.pt --lr=1e-3 --n_epochs=50 --p_dropout=0.1 --output=$OUTPUT_DIR --path_wer=$PATH_TO_TEST_CLEAN 127 | ``` 128 | -------------------------------------------------------------------------------- /zerospeech2021/phonetic_eval/__init__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | warnings.simplefilter("ignore") -------------------------------------------------------------------------------- /zerospeech2021/phonetic_eval/eval_ABX.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import argparse 3 | import os 4 | from pathlib import Path 5 | 6 | import numpy as np 7 | import torch 8 | 9 | import zerospeech2021.phonetic_eval.ABX_src.abx_group_computation as abx_g 10 | import zerospeech2021.phonetic_eval.ABX_src.abx_iterators as abx_it 11 | from zerospeech2021.phonetic_eval.CPC_loader import load_cpc_features, build_feature_from_file 12 | 13 | 14 | def find_all_files(path_dir, extension): 15 | out = [] 16 | for root, dirs, filenames in os.walk(path_dir): 17 | for f in filenames: 18 | if f.endswith(extension): 19 | out.append(((str(Path(f).stem)), os.path.join(root, f))) 20 | return out 21 | 22 | 23 | def reduce_sparse_data(quotient, divisor): 24 | return quotient / (1e-08 * (divisor == 0) + divisor) 25 | 26 | 27 | def load_pt(x): 28 | data = torch.load(x, 'cpu') 29 | assert(len(data.size()) == 2) 30 | return data 31 | 32 | 33 | def load_npy(x): 34 | data = torch.tensor(np.load(x)) 35 | assert(len(data.size()) == 2) 36 | return data 37 | 38 | 39 | def load_txt(x): 40 | data = torch.tensor(np.loadtxt(x)) 41 | assert (len(data.size()) == 2) 42 | return data 43 | 44 | 45 | def ABX(feature_function, 46 | path_item_file, 47 | seq_list, 48 | distance_mode, 49 | step_feature, 50 | modes, 51 | cuda=False, 52 | max_x_across=5, 53 | max_size_group=30): 54 | 55 | # ABX dataset 56 | ABXDataset = abx_it.ABXFeatureLoader(path_item_file, seq_list, 57 | feature_function, step_feature, True) 58 | 59 | if cuda: 60 | ABXDataset.cuda() 61 | 62 | # Distance function 63 | distance_function = abx_g.get_distance_function_from_name(distance_mode) 64 | 65 | # Output 66 | scores = {} 67 | 68 | # ABX within 69 | if 'within' in modes: 70 | print(" > Computing ABX within speakers...") 71 | ABXIterator = ABXDataset.get_iterator('within', max_size_group) 72 | group_confusion = abx_g.get_abx_scores_dtw_on_group(ABXIterator, 73 | distance_function, 74 | ABXIterator.symmetric) 75 | n_data = group_confusion._values().size(0) 76 | index_ = torch.sparse.LongTensor(group_confusion._indices(), 77 | torch.ones((n_data), 78 | dtype=torch.float), 79 | group_confusion.size()) 80 | divisor_context = torch.sparse.sum(index_, dim=3).to_dense() 81 | group_confusion = torch.sparse.sum(group_confusion, dim=3).to_dense() 82 | group_confusion = reduce_sparse_data(group_confusion, divisor_context) 83 | S, p1, p2 = group_confusion.size() 84 | 85 | index_speaker = divisor_context > 0 86 | divisor_speaker = index_speaker.sum(dim=0) 87 | phone_confusion = reduce_sparse_data(group_confusion.sum(dim=0), 88 | divisor_speaker) 89 | 90 | scores['within'] = (phone_confusion.sum() / 91 | (divisor_speaker > 0).sum()).item() 92 | print(f" > ...done. ABX within : {scores['within']}") 93 | 94 | # ABX across 95 | if 'across' in modes: 96 | print(" > Computing ABX across speakers...") 97 | ABXIterator = ABXDataset.get_iterator('across', max_size_group) 98 | ABXIterator.max_x = max_x_across 99 | group_confusion = abx_g.get_abx_scores_dtw_on_group(ABXIterator, 100 | distance_function, 101 | ABXIterator.symmetric) 102 | n_data = group_confusion._values().size(0) 103 | index_ = torch.sparse.LongTensor(group_confusion._indices(), 104 | torch.ones((n_data), 105 | dtype=torch.float), 106 | group_confusion.size()) 107 | divisor_context = torch.sparse.sum(index_, dim=[3, 4]).to_dense() 108 | group_confusion = torch.sparse.sum( 109 | group_confusion, dim=[3, 4]).to_dense() 110 | group_confusion = reduce_sparse_data(group_confusion, divisor_context) 111 | S, p1, p2 = group_confusion.size() 112 | 113 | index_speaker = divisor_context > 0 114 | divisor_speaker = index_speaker.sum(dim=0) 115 | phone_confusion = reduce_sparse_data(group_confusion.sum(dim=0), 116 | divisor_speaker) 117 | scores['across'] = (phone_confusion.sum() / 118 | (divisor_speaker > 0).sum()).item() 119 | print(f" > ...done. ABX across : {scores['across']}") 120 | 121 | return scores 122 | 123 | 124 | def parse_args(argv): 125 | 126 | parser = argparse.ArgumentParser(description='ABX metric') 127 | 128 | parser.add_argument('path_data', type=str, 129 | help="Path to directory containing the data") 130 | parser.add_argument('path_item_file', type=str, 131 | help="Path to the .item file") 132 | parser.add_argument('--path_checkpoint', type=str, default=None, 133 | help="Path to a CPC checkpoint. If set, the apply the " 134 | "model to the input data to compute the features") 135 | parser.add_argument('--file_extension', type=str, default='.pt', 136 | choices=['.pt', '.npy', '.wav', '.flac', '.mp3']) 137 | parser.add_argument('--feature_size', type=float, default=0.01, 138 | help="Size (in s) of one feature") 139 | parser.add_argument('--cuda', action='store_true', 140 | help="Use the GPU to compute distances") 141 | parser.add_argument('--mode', type=str, default='all', 142 | choices=['all', 'within', 'across'], 143 | help="Choose the mode of the ABX score to compute") 144 | parser.add_argument('--distance_mode', type=str, default='cosine', 145 | choices=['euclidian', 'cosine', 'kl', 'kl_symmetric'], 146 | help="Choose the kind of distance to use to compute " 147 | "the ABX score.") 148 | parser.add_argument("--max_size_group", type=int, default=10, 149 | help="Max size of a group while computing the" 150 | "ABX score. A small value will make the code " 151 | "faster but less precise.") 152 | parser.add_argument("--max_x_across", type=int, default=5, 153 | help="When computing the ABX across score, maximum" 154 | "number of speaker X to sample per couple A,B. " 155 | " A small value will make the code faster but " 156 | "less precise.") 157 | parser.add_argument("--out", type=str, default=None, 158 | help="Path where the results should be saved") 159 | 160 | # multi-gpu / multi-node 161 | return parser.parse_args(argv) 162 | 163 | 164 | def main(argv=None, arg_obj=None): 165 | 166 | if argv: 167 | args = parse_args(argv) 168 | else: 169 | args = arg_obj 170 | 171 | if args.path_checkpoint is None: 172 | if args.file_extension == '.pt': 173 | feature_function = load_pt 174 | elif args.file_extension == '.npy': 175 | feature_function = load_npy 176 | elif args.file_extension == '.txt': 177 | feature_function = load_txt 178 | else: 179 | state_dict = torch.load(args.path_checkpoint) 180 | feature_maker = load_cpc_features(state_dict) 181 | feature_maker.cuda() 182 | feature_function = lambda x: build_feature_from_file(x, feature_maker) 183 | 184 | # Modes 185 | if args.mode == 'all': 186 | modes = ["within", "across"] 187 | else: 188 | modes = [args.mode] 189 | 190 | step_feature = 1 / args.feature_size 191 | 192 | # Get the list of sequences 193 | seq_list = find_all_files(args.path_data, args.file_extension) 194 | 195 | scores = ABX(feature_function, args.path_item_file, 196 | seq_list, args.distance_mode, 197 | step_feature, modes, 198 | cuda=args.cuda, 199 | max_x_across=args.max_x_across, 200 | max_size_group=args.max_size_group) 201 | 202 | return scores 203 | -------------------------------------------------------------------------------- /zerospeech2021/semantic.py: -------------------------------------------------------------------------------- 1 | """Semantic part of the ZR2021 (validation and evaluation)""" 2 | 3 | import pathlib 4 | 5 | import numpy as np 6 | import pandas 7 | import scipy.spatial 8 | import scipy.stats 9 | import joblib 10 | 11 | from zerospeech2021.exception import ( 12 | MismatchError, FileFormatError, ValidationError, EntryMissingError) 13 | 14 | 15 | def _validate_file(source_file, submission): 16 | """Verifies that a feature file is a 2D numpy array of floats 17 | 18 | :param source_file: input file 19 | :param submission: location of submitted files 20 | :return: a pair (error, ncols) 21 | 22 | """ 23 | try: 24 | target_file = submission / (source_file + '.txt') 25 | if not target_file.is_file(): 26 | raise EntryMissingError(source=source_file, expected=target_file) 27 | 28 | try: 29 | array = np.loadtxt(str(target_file)) 30 | except Exception: 31 | raise FileFormatError(target_file, 'not a valid numpy array') 32 | 33 | if array.dtype != np.dtype('float'): 34 | raise FileFormatError(target_file, "not a float array") 35 | 36 | if array.ndim != 2: 37 | raise FileFormatError(target_file, 'not a 2D array') 38 | 39 | except ValidationError as error: 40 | return str(error), None 41 | 42 | return None, array.shape[1] 43 | 44 | 45 | def validate(submission, dataset, kind, subset, njobs=1): 46 | """Raises a ValidationError if the `submission` is not valid 47 | 48 | The submission folder must include .txt files, each file 49 | containing a matrix of floats. Each .wav file in the dataset must 50 | have its .txt equivalent in the submission directory. 51 | 52 | Parameters 53 | ---------- 54 | submisison: path 55 | The submisison directory to validate. 56 | dataset: path 57 | The root path of the ZR2021 dataset. 58 | kind: str 59 | Must be 'dev' or 'test'. 60 | subset: str 61 | Must be 'synthetic' or 'librispeech' 62 | njobs : int 63 | Number of parallel processes to use 64 | 65 | Raises 66 | ------ 67 | ValueError 68 | If `kind` is not 'dev' or 'test', if `submisison` or `dataset` are not 69 | an existing directory. 70 | ValidationError 71 | If one line of the submission file is not valid or if the submitted 72 | filenames does not fit the required ones. 73 | 74 | """ 75 | if kind not in ('dev', 'test'): 76 | raise ValueError( 77 | f'kind must be "dev" or "test", it is {kind}') 78 | 79 | if subset not in ('librispeech', 'synthetic'): 80 | raise ValueError( 81 | f'subset must be "librispeech" or "synthetic", it is {subset}') 82 | 83 | submission = pathlib.Path(submission) / kind / subset 84 | if not submission.is_dir(): 85 | raise ValueError( 86 | f'{kind} submission directory not found: {submission}') 87 | 88 | dataset = pathlib.Path(dataset) / f'semantic/{kind}/{subset}' 89 | if not dataset.is_dir(): 90 | raise ValueError(f'dataset not found: {dataset}') 91 | 92 | # retrieve the required filenames that must be present in the submission 93 | required = set(f.stem for f in dataset.glob('*.wav')) 94 | if not required: 95 | raise ValidationError(f'{dataset} contains no .wav files') 96 | 97 | # retrieve the submitted files 98 | submitted = set(submission.glob('*')) 99 | if not submitted: 100 | raise ValidationError(f'{submission} contains no files') 101 | 102 | # ensure we have only .txt files in submission 103 | no_txt_files = [str(f) for f in submitted if f.suffix != '.txt'] 104 | if no_txt_files: 105 | raise MismatchError('extra files found', [], no_txt_files) 106 | 107 | # ensure each required file is present in the submission 108 | submitted = set(f.stem for f in submitted) 109 | if submitted != required: 110 | raise MismatchError('files mismatch', required, submitted) 111 | 112 | # ensure each submitted file has a correct format ad the number of columns 113 | # is constant across files 114 | errors, ncols = zip(*joblib.Parallel(n_jobs=njobs)( 115 | joblib.delayed(_validate_file)(f, submission) for f in submitted)) 116 | 117 | # ensure there are no detected errors 118 | errors = [e for e in errors if e] 119 | if errors: 120 | for e in errors[:10]: 121 | print(f'ERROR: {e}') 122 | if len(errors) > 10: 123 | print('ERROR: ... and {len(errors - 10)} more!') 124 | raise ValidationError(f'error detected in phonetic {kind}') 125 | 126 | # ensure all submitted files have the same number of columns 127 | if len(set(ncols)) != 1: 128 | raise ValidationError( 129 | f'all files must have the same number of columns ' 130 | f'but have: {set(ncols)}') 131 | 132 | 133 | def _compute_distance(pair, gold, pool, metric): 134 | """Returns the mean distance between a pair of words""" 135 | function = { 136 | 'librispeech': _compute_distance_librispeech, 137 | 'synthetic': _compute_distance_synthetic}[pair['type']] 138 | 139 | return function(pair, gold, pool, metric) 140 | 141 | 142 | def _compute_distance_librispeech(pair, gold, pool, metric): 143 | # filter out 'synthetic' data from gold 144 | assert pair['type'] == 'librispeech' 145 | gold = gold[gold['type'] == 'librispeech'] 146 | 147 | # get the list of tokens corresponding to the given pair of words 148 | tokens_1 = gold['filename'][gold['word'] == pair['word_1']] 149 | tokens_2 = gold['filename'][gold['word'] == pair['word_2']] 150 | assert 0 < len(tokens_1) <= 10 and 0 < len(tokens_2) <= 10 151 | 152 | X = np.asarray(pool[pool['filename'].isin(tokens_1)]['pooling'].tolist()) 153 | Y = np.asarray(pool[pool['filename'].isin(tokens_2)]['pooling'].tolist()) 154 | 155 | # compute the mean distance across all pairs of tokens after pooling 156 | return scipy.spatial.distance.cdist(X, Y, metric=metric).mean() 157 | 158 | 159 | def _compute_distance_synthetic(pair, gold, pool, metric): 160 | # filter out 'librispeech' data from gold 161 | assert pair['type'] == 'synthetic' 162 | gold = gold[gold['type'] == 'synthetic'] 163 | 164 | # get the list of tokens corresponding to the given pair of words 165 | tokens_1 = gold[['filename', 'voice']][gold['word'] == pair['word_1']] 166 | tokens_2 = gold[['filename', 'voice']][gold['word'] == pair['word_2']] 167 | tokens = tokens_1.merge(tokens_2, on='voice').drop(['voice'], axis=1) 168 | 169 | # compute the mean of distances within a given voice 170 | dist = 0 171 | for _, (filename_x, filename_y) in tokens.iterrows(): 172 | X = pool[pool['filename'] == filename_x]['pooling'].item() 173 | Y = pool[pool['filename'] == filename_y]['pooling'].item() 174 | dist += scipy.spatial.distance.cdist( 175 | np.atleast_2d(X), np.atleast_2d(Y), metric=metric)[0][0] 176 | return dist / len(tokens) 177 | 178 | 179 | def _correlation(df): 180 | # choose 'similarity' or 'relatedness' column (the one with no NaN) 181 | human = df.similarity if df.relatedness.hasnans else df.relatedness 182 | assert not human.hasnans 183 | 184 | # return spearman correlation. Humans score are similarity (high when 185 | # close) so we take the opposite to have a quantity close to a distance 186 | # (low when close) 187 | return 100 * scipy.stats.spearmanr( 188 | - human.to_numpy(), df.score.to_numpy())[0] 189 | 190 | 191 | def _compute_correlation(pairs): 192 | """"Returns the Spearman's correlation between human and machine scores""" 193 | # for each (type/dataset) combination, compute spearman correlation 194 | serie = pairs.groupby([pairs['type'], pairs['dataset']]).apply(_correlation) 195 | 196 | # transfrom raw result in a usable dataframe 197 | return serie.to_frame().rename(columns={0: 'correlation'}).reset_index() 198 | 199 | 200 | def evaluate(gold_file, pairs_file, submission_dir, metric, pooling, njobs=1): 201 | """Returns the distance of each words pair and overall correlations 202 | 203 | Parameters 204 | ---------- 205 | gold_file : path 206 | The gold file (csv format) for the dev or test semantic dataset. 207 | pairs_file : path 208 | The pairs file (csv format) corresponding to `gold_file` (dev or test). 209 | submission_dir : path 210 | The submission directry containing the embeddings to evaluate. 211 | metric : str 212 | The metric to use for distance computation, must be a metric supported 213 | by `scipy.spatial.distance.cdist` (see 214 | https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html) 215 | pooling : str 216 | The pooling method to use, must be 'min', 'max', 'mean', 'sum', 'last', 217 | 'lastlast' or 'off'. 218 | 219 | Returns 220 | ------- 221 | pairs : pandas.DataFrame 222 | The same content as in `pairs_file` with an additional 'score' column 223 | containing the evaluated machine scores for each pair of words. 224 | correlation : pandas.DataFrame 225 | The Spearman correlation between human judgements and machine scores on 226 | each dataset. The frame contains the columns 'type', 'dataset' and 227 | 'correlation'. 228 | 229 | Raises 230 | ------ 231 | ValueError 232 | If one of the input parameters is not valid. 233 | OSError 234 | If a file defined in `gold_file` is not found in `submission_dir`. 235 | 236 | """ 237 | # ensures input arguments are correct 238 | for input_file in (gold_file, pairs_file): 239 | if not pathlib.Path(input_file).is_file(): 240 | raise ValueError(f'file not found: {input_file}') 241 | if not pathlib.Path(submission_dir).is_dir(): 242 | raise ValueError(f'{submission_dir} is not a directory') 243 | 244 | # get the pooling function 245 | try: 246 | _pooling_function = { 247 | 'max': lambda x: np.max(x, axis=0), 248 | 'mean': lambda x: np.mean(x, axis=0), 249 | 'min': lambda x: np.min(x, axis=0), 250 | 'sum': lambda x: np.sum(x, axis=0), 251 | 'last': lambda x: x[-1], 252 | 'lastlast': lambda x: x[-2], 253 | 'off': lambda x: x}[pooling] 254 | except KeyError: 255 | raise ValueError( 256 | 'pooling method must be "max", "min", "mean", "sum", ' 257 | '"last" or "lastlast"') 258 | 259 | # load the pairs and gold files 260 | pairs = pandas.read_csv(pairs_file, header=0) 261 | gold = pandas.read_csv(gold_file, header=0) 262 | 263 | # a data frame [filename, type, pooling] computed in parallel 264 | print(f' > Computing {pooling} pooling...') 265 | pool = pandas.DataFrame( 266 | joblib.Parallel(n_jobs=njobs)( 267 | joblib.delayed( 268 | lambda x: (x[1], x[0], _pooling_function( 269 | np.loadtxt(submission_dir / x[0] / (x[1] + '.txt'))))) 270 | (x) for _, x in gold.iterrows()), 271 | columns=['filename', 'type', 'pooling']) 272 | 273 | print(f' > Computing {metric} distances...') 274 | pairs['score'] = [ 275 | _compute_distance(pair, gold, pool, metric) 276 | for _, pair in pairs.iterrows()] 277 | 278 | # compute correlations 279 | print(' > Computing Spearman correlations...') 280 | correlation = _compute_correlation(pairs) 281 | return pairs, correlation 282 | -------------------------------------------------------------------------------- /zerospeech2021/syntactic.py: -------------------------------------------------------------------------------- 1 | """Syntactic part of the ZR2021 (validation and evaluation)""" 2 | 3 | import collections 4 | import pathlib 5 | import sys 6 | 7 | import pandas 8 | from zerospeech2021.exception import FormatError, MismatchError 9 | 10 | 11 | def _validate_line(index, line): 12 | """Auxiliary function to validate() 13 | 14 | Returns the filename in `line`, checks the score and raises FormatError if 15 | the line is not valid. 16 | 17 | """ 18 | # ensure the line has two fields separated by a space 19 | line = line.strip() 20 | fields = line.split(' ') 21 | if len(fields) != 2: 22 | raise FormatError( 23 | index, f'must be " " but is "{line}"') 24 | 25 | filename, score = tuple(fields) 26 | 27 | # ensure the second field is a positive float 28 | try: 29 | float(score) 30 | except ValueError: 31 | raise FormatError( 32 | index, f' must be a float but is "{score}"') 33 | 34 | return filename 35 | 36 | 37 | def validate(submission, dataset, kind): 38 | """Raises a ValidationError if the `submisison` file is not valid 39 | 40 | * The submission file must be in text format, each line as: 41 | 42 | 43 | * The is the name of a wav file in the syntactic dataset, 44 | without path nor extension ("xKtnLJYiWGt", not 45 | "syntactic/dev/xKtnLJYiWGt.wav") 46 | 47 | * The is a positive float 48 | 49 | Parameters 50 | ---------- 51 | submisison: path 52 | The submisison file to validate, each line must be formatted as 53 | " ". 54 | dataset: path 55 | The root path of the ZR2021 dataset 56 | kind: str, optional 57 | Must be 'dev' or 'test' 58 | 59 | Raises 60 | ------ 61 | ValueError 62 | If `kind` is not 'dev' or 'test', if `submisison` is not a file or if 63 | the dataset is not an existing directory. 64 | ValidationError 65 | If one line of the submisison file is not valid or if the submitted 66 | filenames does not fit the required ones. 67 | 68 | """ 69 | if kind not in ('dev', 'test'): 70 | raise ValueError( 71 | f'kind must be "dev" or "test", it is {kind}') 72 | 73 | if not pathlib.Path(submission).is_file(): 74 | raise ValueError( 75 | f'{kind} submission file not found: {submission}') 76 | 77 | # retrieve the required filenames that must be present in the submission 78 | dataset = pathlib.Path(dataset) / f'syntactic/{kind}' 79 | if not dataset.is_dir(): 80 | raise ValueError(f'dataset not found: {dataset}') 81 | required_files = set(w.stem for w in dataset.glob('*.wav')) 82 | 83 | # ensure each line in the submission is valid and retrieve the filenames 84 | submitted_files = list( 85 | _validate_line(index + 1, line) 86 | for index, line in enumerate(open(submission, 'r'))) 87 | 88 | # ensures the is no duplicate in the filenames 89 | duplicates = [ 90 | f for f, n in collections.Counter(submitted_files).items() if n > 1] 91 | if duplicates: 92 | raise MismatchError('duplicates found', [], duplicates) 93 | 94 | # ensure all the required files are here and there is no extra filename 95 | if required_files != set(submitted_files): 96 | raise MismatchError( 97 | 'mismatch in filenames', required_files, submitted_files) 98 | 99 | 100 | def load_data(gold_file, submission_file): 101 | """Returns the data required for evaluation as a pandas data frame 102 | 103 | Each line of the returned data frame contains a pair of (correct, 104 | incorrect) sentences and has the following columns: 'id', 'voice', 'type', 105 | 'sentence', 'score sentence', 'non sentence', 'score non sentence'. 106 | 107 | Parameters 108 | ---------- 109 | gold_file : path 110 | The gold file for the lexical dataset (test or dev). 111 | submission_file : path 112 | The submission corresponding to the provided gold file. 113 | 114 | Returns 115 | ------- 116 | data : pandas.DataFrame 117 | The data ready for evaluation 118 | 119 | Raise 120 | ----- 121 | ValueError 122 | If the input files cannot be opened or in case of data mismatch between 123 | the two files. 124 | 125 | """ 126 | # ensures the two input files are here 127 | for input_file in (gold_file, submission_file): 128 | if not pathlib.Path(input_file).is_file(): 129 | raise ValueError(f'file not found: {input_file}') 130 | 131 | # load them as data frames indexed by filenames 132 | gold = pandas.read_csv( 133 | gold_file, header=0, index_col='filename') 134 | score = pandas.read_csv( 135 | submission_file, sep=' ', header=None, 136 | names=['filename', 'score'], index_col='filename') 137 | 138 | # ensures the filenames in gold and submission are the same 139 | if set(gold.index) != set(score.index): 140 | has_less_files = set(gold.index) - set(score.index) 141 | has_more_files = set(score.index) - set(gold.index) 142 | print("MismatchError:", file=sys.stderr) 143 | if len(has_more_files) > 0: 144 | print('submission has extra files', file=sys.stderr) 145 | print(f'extra files: {has_more_files}', file=sys.stderr) 146 | 147 | if len(has_less_files) > 0: 148 | print('submission is missing files', file=sys.stderr) 149 | print(f'missing files: {has_less_files}:', file=sys.stderr) 150 | 151 | sys.exit(1) 152 | 153 | # merge the gold and score using filenames, then remove the columns 154 | # 'phones' and 'filename' as we don't use them for evaluation 155 | data = pandas.concat([gold, score], axis=1) 156 | data.reset_index(drop=True, inplace=True) 157 | 158 | # going from a word per line to a pair (word, non word) per line 159 | data = pandas.concat([ 160 | data.loc[data['correct'] == 1].reset_index().rename( 161 | lambda x: 's_' + x, axis=1), 162 | data.loc[data['correct'] == 0].reset_index().rename( 163 | lambda x: 'ns_' + x, axis=1)], axis=1) 164 | data.drop( 165 | ['s_index', 'ns_index', 'ns_voice', 'ns_type', 'ns_subtype', 166 | 's_correct', 'ns_correct', 'ns_id'], 167 | axis=1, inplace=True) 168 | 169 | data.rename( 170 | {'s_id': 'id', 171 | 's_voice': 'voice', 172 | 's_type': 'type', 173 | 's_subtype': 'subtype', 174 | 's_transcription': 'sentence', 175 | 'ns_transcription': 'non sentence', 176 | 's_score': 'score sentence', 177 | 'ns_score': 'score non sentence'}, 178 | axis=1, inplace=True) 179 | 180 | return data 181 | 182 | 183 | def evaluate_by_pair(data): 184 | """Returns a data frame with the scores by (sentence, non sentence) pair 185 | 186 | Parameters 187 | ---------- 188 | data : pandas.DataFrame 189 | The result of `load_data` 190 | 191 | Returns 192 | ------- 193 | by_pair : pandas.DataFrame 194 | The evaluated (sentence, non sentence) pairs, the data frame has the 195 | columns: 'sentence', 'non sentence' 'type' and 'score'. 196 | 197 | """ 198 | # compute the score for each pair in an additional 'score' column, then 199 | # delete the 'score word' and 'score non word' columns that become useless 200 | score = data.loc[:, ['score sentence', 'score non sentence']].to_numpy() 201 | data['score'] = ( 202 | 0.5 * (score[:, 0] == score[:, 1]) 203 | + (score[:, 0] > score[:, 1])) 204 | data.drop(columns=['score sentence', 'score non sentence'], inplace=True) 205 | 206 | # finally get the mean score across voices for all pairs 207 | score = data.groupby(['type', 'subtype', 'id']).apply(lambda x: ( 208 | x.iat[0, 2], # type 209 | x.iat[0, 3], # subtype 210 | x.iat[0, 4], # sentence 211 | x.iat[0, 5], # non sentence 212 | x['score'].mean())) 213 | return pandas.DataFrame( 214 | score.to_list(), 215 | columns=['type', 'subtype', 'sentence', 'non sentence', 'score']) 216 | 217 | 218 | def evaluate_by_type(by_pair): 219 | """Returns a data frame with mean scores by syntax error type 220 | 221 | Parameters 222 | ---------- 223 | by_pair: pandas.DataFrame 224 | The output of `evaluate_by_pair` 225 | 226 | Returns 227 | ------- 228 | by_type : pandas.DataFrame 229 | The score collapsed on types, the data frame has the 230 | following columns: 'type', 'score'. 231 | 232 | """ 233 | return by_pair.score.groupby([by_pair['type']]).agg( 234 | n='count', score='mean', std='std').reset_index() 235 | 236 | 237 | def evaluate(gold_file, submission_file): 238 | """Returns the score by sentences pair and by syntax type 239 | 240 | Parameters 241 | ---------- 242 | gold_file : path 243 | The gold file (csv format) for the lexical dataset (test or dev). 244 | submission_file : path 245 | The submission corresponding to the provided gold file. 246 | 247 | Returns 248 | ------- 249 | by_pair : pandas.DataFrame 250 | The evaluated pairs, the data frame has the columns: 251 | 'sentence', 'non sentence' and 'score'. 252 | by_type : pandas.DataFrame 253 | The score collapsed on syntax errors types, the data frame has the 254 | following columns: 'type', 'score'. 255 | 256 | Raise 257 | ----- 258 | ValueError 259 | If the input files cannot be opened or in case of data mismatch between 260 | the two files. 261 | 262 | """ 263 | data = load_data(gold_file, submission_file) 264 | by_pair = evaluate_by_pair(data) 265 | by_type = evaluate_by_type(by_pair) 266 | by_pair.drop(['type', 'subtype'], axis=1, inplace=True) 267 | 268 | return by_pair, by_type 269 | -------------------------------------------------------------------------------- /zerospeech2021/zr_upload_lib/__init__.py: -------------------------------------------------------------------------------- 1 | from . import api_fn, auth, model, upload, split 2 | -------------------------------------------------------------------------------- /zerospeech2021/zr_upload_lib/api_fn.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import requests 4 | 5 | from rich.console import Console 6 | from rich.table import Table 7 | 8 | 9 | from . import model 10 | 11 | console = Console() 12 | 13 | 14 | def view_challenges(): 15 | """ Fetches the list of available challenges and allows selecting one.""" 16 | response = requests.get( 17 | f"{model.SERVER_LOCATION}/challenges/", params={"include_inactive": "false"}) 18 | if response.status_code != 200: 19 | raise ValueError('Request to server Failed !!') 20 | 21 | challenges = response.json() 22 | 23 | table = Table(show_header=True, header_style="bold magenta") 24 | table.add_column("Challenge") 25 | table.add_column("ID") 26 | 27 | for item in challenges: 28 | table.add_row(f"{item.get('label', '')}", f"{item.get('id', 'XX')}") 29 | 30 | console.print(table) 31 | 32 | 33 | def create_multipart_submission(challenge_id: int, file_meta: dict, _token: str): 34 | """ Create a multipart upload submission session on the server via the API.""" 35 | data = { 36 | "filename": file_meta["filename"], 37 | "hash": file_meta["hash"], 38 | "multipart": True, 39 | "index": file_meta['index'] 40 | } 41 | 42 | return requests.post( 43 | f'{model.SERVER_LOCATION}/challenges/{challenge_id}/submission/create', 44 | json=data, 45 | headers={ 46 | 'Authorization': f'Bearer {_token}' 47 | }) 48 | 49 | 50 | def create_single_part_submission(challenge_id: int, filename: Path, _hash: str, _token: str): 51 | """ Create a single part submission upload session on the server via the API.""" 52 | return requests.post( 53 | f'{model.SERVER_LOCATION}/challenges/{challenge_id}/submission/create', 54 | json={ 55 | "filename": f"{filename}", 56 | "hash": _hash, 57 | "multipart": False, 58 | }, 59 | headers={ 60 | 'Authorization': f'Bearer {_token}' 61 | }) 62 | 63 | 64 | def submission_upload(challenge_id: int, submission_id: str, file: Path, _token: str): 65 | """Upload a file (or part) to an existing upload session.""" 66 | response = requests.put( 67 | f'{model.SERVER_LOCATION}/challenges/{challenge_id}/submission/upload', 68 | params={ 69 | "part_name": file.name, 70 | "submission_id": f"{submission_id}" 71 | }, 72 | files={f'file_data': file.open('rb').read()}, 73 | headers={ 74 | 'Authorization': f'Bearer {_token}' 75 | } 76 | ) 77 | return response 78 | -------------------------------------------------------------------------------- /zerospeech2021/zr_upload_lib/auth.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pathlib import Path 3 | 4 | import requests 5 | from rich.console import Console 6 | 7 | from . import model 8 | 9 | # Fancy console 10 | console = Console() 11 | 12 | 13 | def login(username: str, password: str): 14 | """ Create an auth session in zerospeech.com 15 | 16 | :returns: token token used to authentify the current session 17 | """ 18 | 19 | # request login from server 20 | response = requests.post( 21 | f'{model.SERVER_LOCATION}/auth/login', 22 | data={ 23 | "grant_type": "password", 24 | "username": username, 25 | "password": password, 26 | "scopes": [], 27 | "client_id": model.CLIENT_ID, 28 | "client_secret": model.CLIENT_SECRET 29 | } 30 | ) 31 | if response.status_code != 200: 32 | console.print(f"[red]:x:{response.status_code}[/red]: {response.json().get('detail')}") 33 | sys.exit(1) 34 | 35 | return response.json().get("access_token") 36 | 37 | 38 | def logout(_token): 39 | """ Clears the given auth session on the back-end """ 40 | return requests.delete( 41 | f'{model.SERVER_LOCATION}/auth/logout', 42 | headers={ 43 | 'Authorization': f'Bearer {_token}' 44 | }) 45 | 46 | 47 | def clear_session(): 48 | """ Clear the current session locally and on the server.""" 49 | token_file = Path(model.AUTH_FILE).expanduser().resolve() 50 | if token_file.is_file(): 51 | with token_file.open() as fp: 52 | token = fp.read().replace("\n", "") 53 | 54 | # clear 55 | token_file.unlink(missing_ok=True) 56 | logout(token) 57 | console.print(f"Session saved @ {token_file} was removed.", style='green bold') 58 | 59 | 60 | def create_session(token: str): 61 | """ Creates an new auth session & saves it locally """ 62 | token_file = Path(model.AUTH_FILE).expanduser().resolve() 63 | 64 | with token_file.open('w') as fp: 65 | fp.write(token) 66 | 67 | 68 | def get_session(): 69 | """ Get or Create a new auth session """ 70 | token_file = Path(model.AUTH_FILE).expanduser().resolve() 71 | 72 | if not token_file.is_file(): 73 | console.print(f"No session found use login command to create one.", style='red bold') 74 | sys.exit(1) 75 | 76 | with token_file.open() as fp: 77 | return fp.read().replace("\n", "") 78 | -------------------------------------------------------------------------------- /zerospeech2021/zr_upload_lib/model.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from dataclasses import dataclass 4 | from pathlib import Path 5 | from typing import Optional, List 6 | 7 | SERVER_LOCATION: str = "https://api.zerospeech.com" 8 | CLIENT_ID: str = "cli_uploader" 9 | CLIENT_SECRET: str = 'TaX9K1WtryizOTr5pLUM4OoqXZE5QGlj3Xo6dkh3CcI=' 10 | NB_RETRY_ATTEMPTS: int = 2 11 | MULTIPART_THRESHOLD: int = 500000000 # in bytes (500MB) 12 | AUTH_FILE: str = "~/.zerospeech-token" 13 | CHALLENGE_ID = 7 14 | 15 | 16 | def get_challenge_id(): 17 | """ Get the current challenge id from the current environment or return the default. """ 18 | return os.environ.get("CHALLENGE_ID", CHALLENGE_ID) 19 | 20 | 21 | class ZrApiException(Exception): 22 | pass 23 | 24 | 25 | @dataclass 26 | class ManifestFileIndexItem: 27 | """ Upload File Manifest Item """ 28 | file_name: str 29 | file_size: int 30 | file_hash: Optional[str] = None 31 | 32 | def dict(self): 33 | return {f"{x}": getattr(self, x) for x in self.__dataclass_fields__.keys()} 34 | 35 | @classmethod 36 | def from_dict(cls, data): 37 | return cls(**data) 38 | 39 | 40 | @dataclass 41 | class SplitManifest: 42 | """ A class containing information about archive split""" 43 | filename: str 44 | tmp_location: Path 45 | hash: str 46 | index: Optional[List[ManifestFileIndexItem]] 47 | multipart: bool = True 48 | hashed_parts: bool = True 49 | completed: int = 0 50 | 51 | def dict(self): 52 | data = {f"{x}": f"{getattr(self, x)}" for x in self.__dataclass_fields__.keys()} 53 | if "index" in data.keys(): 54 | data["index"] = [ 55 | item.dict() for item in self.index 56 | ] 57 | 58 | return data 59 | 60 | @classmethod 61 | def from_dict(cls, data): 62 | if "index" in data.keys(): 63 | data["index"] = [ 64 | ManifestFileIndexItem.from_dict(item) for item in data["index"] 65 | ] 66 | return cls(**data) 67 | 68 | 69 | class UploadManifest: 70 | """ Fail-safe multi-part upload""" 71 | 72 | @classmethod 73 | def load(cls, filename: Path, retries: int = 2): 74 | with filename.open('r') as fp: 75 | dd = json.load(fp) 76 | return cls(dd["manifest"], filename, metadata=dd["metadata"], retries=retries) 77 | 78 | def __init__(self, list_manifest, save_file: Path, metadata=None, retries: int = 2): 79 | if isinstance(list_manifest, dict): 80 | self.man = list_manifest 81 | else: 82 | self.man = { 83 | f"{name}": 'todo' 84 | for name in list_manifest 85 | } 86 | self.save_file = save_file 87 | self.retries = retries 88 | if metadata: 89 | self._metadata = metadata 90 | else: 91 | self._metadata = {} 92 | self.save() 93 | 94 | def __iter__(self): 95 | return self 96 | 97 | @property 98 | def metadata(self): 99 | return self._metadata 100 | 101 | @metadata.setter 102 | def metadata(self, data): 103 | self._metadata.update(data) 104 | self.save() 105 | 106 | def __next__(self): 107 | for k, v in self.man.items(): 108 | if v == 'todo': 109 | return k 110 | for k, v in self.man.items(): 111 | if v == 'waiting': 112 | return k 113 | for k, v in self.man.items(): 114 | if 'retry' in v: 115 | return k 116 | raise StopIteration 117 | 118 | def status(self, key): 119 | return self.man[key] 120 | 121 | def set_waiting(self, key): 122 | if self.man[key] == 'todo': 123 | self.man[key] = "waiting" 124 | self.save() 125 | 126 | def set_done(self, key): 127 | self.man[key] = "done" 128 | self.save() 129 | 130 | def set_failed(self, key): 131 | k = self.man[key] 132 | if k in ["waiting", "todo"]: 133 | self.man[key] = "retry_1" 134 | elif "retry" in k: 135 | nb = int(k.split('_')[1]) 136 | nb += 1 137 | if nb > self.retries: 138 | st = 'failed' 139 | else: 140 | st = f"retry_{nb}" 141 | self.man[key] = st 142 | self.save() 143 | 144 | def save(self): 145 | with self.save_file.open('w') as fp: 146 | json.dump({ 147 | "manifest": self.man, 148 | "metadata": self.metadata 149 | }, fp) 150 | 151 | def is_complete(self): 152 | for k, v in self.man.items(): 153 | if v != "done": 154 | return False 155 | return True 156 | 157 | def get_failed(self): 158 | return [k for k, v in self.man.items() if v == 'failed'] 159 | 160 | def clear(self): 161 | # remove checkpoint file 162 | self.save_file.unlink() 163 | -------------------------------------------------------------------------------- /zerospeech2021/zr_upload_lib/split.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from pathlib import Path 3 | from typing import List 4 | 5 | import pandas as pd 6 | from Crypto.Hash import MD5 7 | from fsplit.filesplit import Filesplit 8 | 9 | from .model import SplitManifest, ManifestFileIndexItem 10 | 11 | 12 | def md5sum(file_path: Path, chunk_size: int = 8192): 13 | """ Return a md5 hash of a files content """ 14 | h = MD5.new() 15 | 16 | with file_path.open('rb') as f: 17 | while True: 18 | chunk = f.read(chunk_size) 19 | if len(chunk): 20 | h.update(chunk) 21 | else: 22 | break 23 | return h.hexdigest() 24 | 25 | 26 | def split_zip_v2(zipfile: Path, chunk_max_size: int = 500000000, hash_parts: bool = True): 27 | """...""" 28 | assert zipfile.is_file(), f"entry file ({zipfile}) was not found" 29 | print(f"splitting {zipfile} into chunks...") 30 | 31 | tmp_loc = Path(tempfile.mkdtemp(dir=f"{zipfile.parents[0]}")) 32 | fs = Filesplit() 33 | fs.split(file=f"{zipfile}", split_size=chunk_max_size, output_dir=str(tmp_loc)) 34 | df = pd.read_csv(tmp_loc / 'fs_manifest.csv') 35 | if hash_parts: 36 | df['hash'] = df.apply(lambda row: md5sum( 37 | (tmp_loc / row['filename'])), axis=1) 38 | index: List[ManifestFileIndexItem] = [ManifestFileIndexItem(file_name=x[0], file_size=x[1], file_hash=x[2]) 39 | for x in zip(df['filename'], df['filesize'], df['hash'])] 40 | else: 41 | index: List[ManifestFileIndexItem] = [ManifestFileIndexItem(file_name=x[0], file_size=x[1]) 42 | for x in zip(df['filename'], df['filesize'])] 43 | 44 | return SplitManifest( 45 | filename=zipfile.name, 46 | tmp_location=tmp_loc, 47 | hash=md5sum(zipfile), 48 | index=index, 49 | hashed_parts=hash_parts 50 | ) -------------------------------------------------------------------------------- /zerospeech2021/zr_upload_lib/upload.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import sys 3 | from pathlib import Path 4 | 5 | from rich import inspect, print 6 | from rich.console import Console 7 | from rich.progress import Progress 8 | from rich.prompt import Prompt 9 | 10 | from . import model 11 | from .api_fn import ( 12 | create_multipart_submission, submission_upload, create_single_part_submission 13 | ) 14 | from .split import split_zip_v2, md5sum 15 | 16 | # Fancy console 17 | console = Console() 18 | 19 | 20 | def multipart_upload(challenge_id: int, zipfile: Path, _token: str, checkpoint: Path): 21 | print("preparing metadata....") 22 | 23 | # check for checkpoint 24 | if checkpoint.is_file(): 25 | file_list = model.UploadManifest.load(checkpoint, retries=model.NB_RETRY_ATTEMPTS) 26 | tmp_location = Path(file_list.metadata.get("tmp_location")) 27 | _token = file_list.metadata.get('token') 28 | challenge_id = file_list.metadata.get("challenge_id") 29 | else: 30 | manifest = split_zip_v2(zipfile) 31 | file_list = [i.file_name for i in manifest.index] 32 | tmp_location = manifest.tmp_location 33 | meta = { 34 | "tmp_location": f"{tmp_location}", 35 | "filename": manifest.filename, 36 | "hash": manifest.hash, 37 | "index": [i.dict() for i in manifest.index], 38 | "token": _token, 39 | "challenge_id": challenge_id 40 | } 41 | file_list = model.UploadManifest(file_list, checkpoint, meta, retries=model.NB_RETRY_ATTEMPTS) 42 | 43 | # check if submission session exists 44 | if "submission_id" in file_list.metadata: 45 | submission_id = file_list.metadata.get('submission_id') 46 | else: 47 | response = create_multipart_submission(challenge_id, file_list.metadata, _token) 48 | if response.status_code != 200: 49 | print(f'[red]:x:[/red][bold]Submission Creation Failed with code [red] {response.status_code}[/red][/bold]') 50 | inspect(response.json()) 51 | sys.exit(1) 52 | 53 | submission_id = response.text.replace('"', '').replace("'", "") 54 | file_list.metadata = {"submission_id": submission_id} 55 | 56 | with Progress() as progress: 57 | task1 = progress.add_task("[red]Uploading parts...", total=len(file_list.man)) 58 | 59 | for item in file_list: 60 | file_list.set_waiting(item) 61 | progress.update(task1, advance=0.5) 62 | file_path = tmp_location / item 63 | print(f'uploading : {file_path.name}...') 64 | response = submission_upload( 65 | challenge_id=challenge_id, 66 | submission_id=submission_id, 67 | file=file_path, 68 | _token=_token 69 | ) 70 | 71 | if response.status_code == 200: 72 | print(f'[green]:heavy_check_mark: {file_path}') 73 | file_list.set_done(item) 74 | progress.update(task1, advance=0.5) 75 | else: 76 | progress.update(task1, advance=-0.5) 77 | file_list.set_failed(item) 78 | 79 | if file_list.is_complete(): 80 | checkpoint.unlink() 81 | shutil.rmtree(tmp_location) 82 | return [] 83 | else: 84 | return file_list.get_failed() 85 | 86 | 87 | def single_part_upload(challenge_id: int, zipfile: Path, _token: str): 88 | zip_hash = md5sum(zipfile) 89 | response = create_single_part_submission(challenge_id, filename=zipfile, _hash=zip_hash, _token=_token) 90 | 91 | if response.status_code != 200: 92 | print(f'[red]:x:[/red][bold]Submission Creation Failed with code [red] {response.status_code}[/red][/bold]') 93 | inspect(response.json()) 94 | sys.exit(1) 95 | 96 | submission_id = response.text.replace('"', '').replace("'", "") 97 | print(f'submission id: {submission_id}') 98 | response = submission_upload( 99 | challenge_id=challenge_id, 100 | submission_id=submission_id, 101 | file=zipfile, 102 | _token=_token 103 | ) 104 | 105 | if response.status_code != 200: 106 | print(f'[red]:x:[/red][bold]Archive upload failed with code [red] {response.status_code}[/red][/bold]') 107 | print(response.json()) 108 | sys.exit(1) 109 | 110 | 111 | def ask_resume(file: Path): 112 | """ Ask the user to resume or not the upload """ 113 | choice = "No" 114 | if file.is_file(): 115 | choice = Prompt.ask("A checkpoint file was found. Do you wish to resume ?", 116 | choices=["Yes", "No"]) 117 | if choice == "No": 118 | file.unlink() 119 | 120 | return choice == "Yes" 121 | --------------------------------------------------------------------------------