├── .gitignore
├── LICENSE.txt
├── README.md
├── environment.yml
├── setup.py
└── zerospeech2021
├── __init__.py
├── cli
├── __init__.py
├── evaluate.py
├── leaderboard.py
├── upload.py
└── validate.py
├── exception.py
├── leaderboard.py
├── lexical.py
├── meta.py
├── phonetic.py
├── phonetic_eval
├── ABX_src
│ ├── __init__.py
│ ├── abx_group_computation.py
│ ├── abx_iterators.py
│ ├── dtw.c
│ └── dtw.pyx
├── CPC_loader.py
├── LICENCE.txt
├── README.md
├── __init__.py
└── eval_ABX.py
├── semantic.py
├── syntactic.py
└── zr_upload_lib
├── __init__.py
├── api_fn.py
├── auth.py
├── model.py
├── split.py
└── upload.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__/
3 | /zerospeech2021.egg-info/
4 | build/
5 | dist/
6 | .idea/
7 | .DS_Store
8 | *.so
9 | zerospeech2021/libri_light_eval/ABX_src/dtw.c
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 | {one line to give the program's name and a brief idea of what it does.}
635 | Copyright (C) {year} {name of author}
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | {project} Copyright (C) {year} {fullname}
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ZeroSpeech Challenge 2021 Python package
2 |
3 |
4 | This repository bundles all the scripts required to evaluate and validate a
5 | submission to the [ZeroSpeech Challenge 2021](https://zerospeech.com/2021).
6 |
7 | ## Installation
8 |
9 | * First clone this repository
10 |
11 | git clone https://github.com/bootphon/zerospeech2021.git
12 | cd zerospeech2021
13 |
14 | * Setup a conda environment:
15 |
16 | conda env create -f environment.yml
17 |
18 | * Activate the created environment:
19 |
20 | conda activate zerospeech2021
21 |
22 | * Install the package:
23 |
24 | python setup.py install
25 |
26 | ## Usage
27 |
28 | The `zerospeech2021` package provides 2 command-line tools:
29 |
30 | * `zerospeech2021-validate` which validates a submission, ensuring all the
31 | required files are here and correctly formatted.
32 |
33 | * `zerospeech2021-evaluate` which evaluates a submission (supposed valid). Only
34 | the development datasets are evaluated. The test datasets can only be
35 | evaluated by doing an official submission to the challenge.
36 |
37 | * `zerospeech2021-leaderboard` which allows generation of leaderboard entries from scores.
38 |
39 | *  `zerospeech2021-upload` utility to allow upload submission to zerospeech.com.
40 |
41 | Each tool comes with a `--help` option describing the possible arguments (e.g.
42 | `zerospeech2021-validate --help`).
43 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: zerospeech2021
2 | channels:
3 | - pytorch
4 | - defaults
5 | dependencies:
6 | - python=3
7 | - click
8 | - cudatoolkit=9.2
9 | - cython
10 | - joblib
11 | - numpy
12 | - pandas
13 | - pip
14 | - pytorch
15 | - pyyaml
16 | - scipy
17 | - torchaudio
18 | - tqdm
19 | - pip:
20 | - progressbar2
21 | - sox
22 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Setup script for the zerospeech2021 Python package"""
3 |
4 | import codecs
5 | import numpy
6 | import setuptools
7 |
8 | import zerospeech2021
9 |
10 |
11 | setuptools.setup(
12 | # general description
13 | name='zerospeech2021',
14 | description="Evaluation and validation tools for ZeroSpeech2021",
15 | version=zerospeech2021.__version__,
16 |
17 | # python package dependencies
18 | setup_requires=['cython', 'numpy'],
19 |
20 | # include Python code
21 | packages=setuptools.find_packages(),
22 |
23 | # build cython extension
24 | ext_modules=[setuptools.Extension(
25 | 'libri_light_dtw',
26 | sources=['zerospeech2021/phonetic_eval/ABX_src/dtw.pyx'],
27 | extra_compile_args=['-O3'],
28 | include_dirs=[numpy.get_include()])],
29 |
30 | # needed for cython/setuptools, see
31 | # http://docs.cython.org/en/latest/src/quickstart/build.html
32 | zip_safe=False,
33 |
34 | # the command-line scripts to export
35 | entry_points={
36 | 'console_scripts': [
37 | 'zerospeech2021-validate = zerospeech2021.cli.validate:validate',
38 | 'zerospeech2021-evaluate = zerospeech2021.cli.evaluate:evaluate',
39 | 'zerospeech2021-leaderboard = zerospeech2021.cli.leaderboard:leaderboard',
40 | 'zerospeech2021-upload = zerospeech2021.cli.upload:upload_cmd'
41 | ]},
42 |
43 | # metadata
44 | author='CoML team',
45 | author_email='zerospeech2021@gmail.com',
46 | license='GPL3',
47 | url='https://zerospeech.com/2021',
48 | long_description=codecs.open('README.md', encoding='utf-8').read(),
49 | long_description_content_type="text/markdown",
50 | python_requires='>=3.7',
51 | )
52 |
--------------------------------------------------------------------------------
/zerospeech2021/__init__.py:
--------------------------------------------------------------------------------
1 | """Evaluation and validation tools for the ZeroSpeech Challenge 2021"""
2 |
3 |
4 | __version__ = '0.5'
5 |
--------------------------------------------------------------------------------
/zerospeech2021/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zerospeech/zerospeech2021/199624adfba52901bab564b076fe7d4a63f47ddb/zerospeech2021/cli/__init__.py
--------------------------------------------------------------------------------
/zerospeech2021/cli/evaluate.py:
--------------------------------------------------------------------------------
1 | """Evaluation program for ZR2021 submissions"""
2 |
3 | import atexit
4 | import os
5 | import pathlib
6 | import shutil
7 | import sys
8 | import tempfile
9 | import zipfile
10 |
11 | import click
12 | import pandas
13 | import yaml
14 |
15 | from zerospeech2021 import phonetic, lexical, syntactic, semantic
16 |
17 |
18 | def write_csv(frame, filename):
19 | frame.to_csv(filename, index=False, float_format='%.4f')
20 | print(f' > Wrote {filename}')
21 |
22 |
23 | def eval_lexical(dataset, submission, output, kinds):
24 | for kind in kinds: # 'dev' or 'test'
25 | print(f'Evaluating lexical {kind}...')
26 |
27 | gold_file = dataset / 'lexical' / kind / 'gold.csv'
28 | submission_file = submission / 'lexical' / f'{kind}.txt'
29 |
30 | by_pair, by_frequency, by_length = lexical.evaluate(
31 | gold_file, submission_file)
32 |
33 | write_csv(
34 | by_pair, output / f'score_lexical_{kind}_by_pair.csv')
35 | write_csv(
36 | by_frequency, output / f'score_lexical_{kind}_by_frequency.csv')
37 | write_csv(
38 | by_length, output / f'score_lexical_{kind}_by_length.csv')
39 |
40 |
41 | def eval_semantic(dataset, submission, output, kinds, njobs):
42 | # load metric and poling parameters from meta.yaml
43 | meta = yaml.safe_load((submission / 'meta.yaml').open('r').read())
44 | metric = meta['parameters']['semantic']['metric']
45 | pooling = meta['parameters']['semantic']['pooling']
46 |
47 | for kind in kinds: # 'dev' or 'test'
48 | print(f'Evaluating semantic {kind} '
49 | f'(metric={metric}, pooling={pooling})...')
50 |
51 | gold_file = dataset / 'semantic' / kind / 'gold.csv'
52 | pairs_file = dataset / 'semantic' / kind / 'pairs.csv'
53 | pairs, correlation = semantic.evaluate(
54 | gold_file, pairs_file, submission / 'semantic' / kind,
55 | metric, pooling, njobs=njobs)
56 |
57 | write_csv(
58 | pairs, output / f'score_semantic_{kind}_pairs.csv')
59 | write_csv(
60 | correlation, output / f'score_semantic_{kind}_correlation.csv')
61 |
62 |
63 | def eval_syntactic(dataset, submission, output, kinds):
64 | for kind in kinds: # 'dev' or 'test'
65 | print(f'Evaluating syntactic {kind}...')
66 |
67 | gold_file = dataset / 'syntactic' / kind / 'gold.csv'
68 | submission_file = submission / 'syntactic' / f'{kind}.txt'
69 |
70 | by_pair, by_type = syntactic.evaluate(gold_file, submission_file)
71 |
72 | write_csv(
73 | by_pair, output / f'score_syntactic_{kind}_by_pair.csv')
74 | write_csv(
75 | by_type, output / f'score_syntactic_{kind}_by_type.csv')
76 |
77 |
78 | def eval_phonetic(dataset, submission, output, kinds, force_cpu):
79 | meta = yaml.safe_load((submission / 'meta.yaml').open('r').read())
80 | metric = meta['parameters']['phonetic']['metric']
81 | frame_shift = meta['parameters']['phonetic']['frame_shift']
82 |
83 | results = []
84 | for kind in kinds: # 'dev' or 'test'
85 | results.append(phonetic.evaluate(
86 | submission / 'phonetic', dataset / 'phonetic',
87 | kind, metric, frame_shift, force_cpu=force_cpu))
88 |
89 | write_csv(pandas.concat(results), output / 'score_phonetic.csv')
90 |
91 |
92 | @click.command(epilog='See https://zerospeech.com/2021 for more details')
93 | @click.argument('dataset', type=pathlib.Path)
94 | @click.argument('submission', type=pathlib.Path)
95 | @click.option(
96 | '-j', '--njobs', default=1, type=int,
97 | help='Parallel jobs to use for semantic part (default to 1)')
98 | @click.option(
99 | '--force-cpu', help='Do not use GPU for phonetic part', is_flag=True)
100 | @click.option(
101 | '-o', '--output-directory', type=pathlib.Path,
102 | default='.', show_default=True,
103 | help="Directory to store output results")
104 | @click.option('--no-phonetic', help="Skip phonetic part", is_flag=True)
105 | @click.option('--no-lexical', help="Skip lexical part", is_flag=True)
106 | @click.option('--no-syntactic', help="Skip syntactic part", is_flag=True)
107 | @click.option('--no-semantic', help="Skip semantic part", is_flag=True)
108 | def evaluate(
109 | dataset, submission, njobs, force_cpu, output_directory,
110 | no_phonetic, no_lexical, no_syntactic, no_semantic):
111 | """Evaluate a submission to the Zero Resource Speech Challenge 2021
112 |
113 | DATASET is the root directory of the ZR2021 dataset, as downloaded from
114 | https://zerospeech.com/2021.
115 |
116 | SUBMISSION is the submission to evaluate, it can be a .zip file or a
117 | directory.
118 |
119 | """
120 | try:
121 | # regular participants can only evaluate dev datasets, test can only be
122 | # evaluated by doing an official submission to the challenge. The
123 | # ZEROSPEECH2021_TEST_GOLD environment variable is used by organizers
124 | # to provide test gold files to the evaluation program while keeping
125 | # the program as simple as possible to participants.
126 | kinds = ['dev']
127 | if 'ZEROSPEECH2021_TEST_GOLD' in os.environ:
128 | kinds.append('test')
129 | dataset = pathlib.Path(os.environ['ZEROSPEECH2021_TEST_GOLD'])
130 |
131 | # ensures the dataset exists
132 | dataset = dataset.resolve(strict=True)
133 | if not dataset.is_dir():
134 | raise ValueError(f'dataset not found: {dataset}')
135 |
136 | # ensures the submission exists, it it is a zip, uncompress it
137 | submission = submission.resolve(strict=True)
138 | if submission.is_file() and zipfile.is_zipfile(submission):
139 | # create a temp directory we remove at exit
140 | submission_unzip = tempfile.mkdtemp()
141 | atexit.register(shutil.rmtree, submission_unzip)
142 |
143 | # uncompress to the temp directory
144 | print(f'Unzip submission to {submission_unzip}...')
145 | zipfile.ZipFile(submission, 'r').extractall(submission_unzip)
146 | submission = pathlib.Path(submission_unzip)
147 | elif not submission.is_dir():
148 | raise ValueError(
149 | f'submssion is not a zip file or a directory: {submission}')
150 |
151 | if not output_directory.is_dir():
152 | output_directory.mkdir(exist_ok=True, parents=True)
153 |
154 | if not no_lexical:
155 | eval_lexical(dataset, submission, output_directory, kinds)
156 |
157 | if not no_semantic:
158 | eval_semantic(dataset, submission, output_directory, kinds, njobs)
159 |
160 | if not no_syntactic:
161 | eval_syntactic(dataset, submission, output_directory, kinds)
162 |
163 | if not no_phonetic:
164 | eval_phonetic(
165 | dataset, submission, output_directory, kinds, force_cpu)
166 |
167 | except ValueError as error:
168 | print(f'ERROR: {error}')
169 | sys.exit(-1)
170 |
--------------------------------------------------------------------------------
/zerospeech2021/cli/leaderboard.py:
--------------------------------------------------------------------------------
1 | import json
2 | import sys
3 | from pathlib import Path
4 |
5 | import click
6 |
7 | from zerospeech2021.leaderboard import get_semantic_size, ZeroSpeechSubmission
8 |
9 |
10 | def create(submission_location: Path, dataset_location, score_location: Path,
11 | user_meta, leaderboard_file: Path):
12 | """ Function that builds a leaderboard entry from the computed scores of evaluation
13 |
14 | ARGS:
15 | submission_location: location to the submission entry files (as described in ...)
16 | dataset_location: location of the test set
17 | score_location: location of the scores computed by evaluation
18 | user_meta: file containing platform metadata (user, submission date etc.)
19 | leaderboard_file: location & name to write result file
20 | """
21 | print("Building leaderboard entry from scores...")
22 | semantic_size = get_semantic_size(dataset_location)
23 |
24 | if not submission_location.is_dir():
25 | print("SUBMISSION folder not found", file=sys.stderr)
26 | sys.exit(-1)
27 |
28 | if not dataset_location.is_dir():
29 | print("DATASET folder not found", file=sys.stderr)
30 | sys.exit(-1)
31 |
32 | if not score_location.is_dir():
33 | print("SCORE folder not found", file=sys.stderr)
34 | sys.exit(-1)
35 |
36 | if leaderboard_file.is_file():
37 | print(f"WARNING: leaderboard specified already exists: [OVERWRITING] {leaderboard_file}", file=sys.stderr)
38 |
39 | subs = ZeroSpeechSubmission(
40 | submission_location=submission_location, external_meta_file=user_meta,
41 | _semantic_size=semantic_size, score_location=score_location,
42 | )
43 |
44 | leaderboard_file = leaderboard_file.with_suffix(".json")
45 | with leaderboard_file.open('w') as fp:
46 | json.dump(subs.leaderboard(), fp, indent=4)
47 | print(f"\t> Wrote {leaderboard_file}")
48 |
49 |
50 | @click.command(epilog='See https://zerospeech.com/2021 for more details')
51 | @click.argument('submission', type=Path)
52 | @click.argument('dataset', type=Path)
53 | @click.argument('scores', type=Path)
54 | @click.option('-u', '--user-meta', type=Path, help="Location of platform metadata")
55 | @click.option('-o', '--output-file', type=Path, help="Location & name of the leaderboard file")
56 | def leaderboard(submission: Path, dataset: Path, scores: Path, user_meta, output_file):
57 | """ CLI wrapper to build leaderboard entry """
58 | try:
59 | create(submission, dataset, scores, user_meta, output_file)
60 | except ValueError as error:
61 | print(f'ERROR: {error}')
62 | sys.exit(-1)
63 |
--------------------------------------------------------------------------------
/zerospeech2021/cli/upload.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from getpass import getpass
3 | from pathlib import Path
4 |
5 | import click
6 |
7 | from rich.console import Console
8 | from rich.progress import Progress, BarColumn
9 |
10 | from zerospeech2021 import zr_upload_lib as zr_up
11 |
12 | # Fancy console
13 | console = Console()
14 |
15 | # The challenge to use for uploads
16 | # ID 6 => zerospeech2021
17 | CHALLENGE_ID: int = 6
18 |
19 |
20 | @click.group(epilog='See https://zerospeech.com/2021 for more details')
21 | @click.option('--debug', help="Print debug info", is_flag=True)
22 | @click.pass_context
23 | def upload_cmd(ctx, debug):
24 | ctx.debug = debug
25 |
26 |
27 | @upload_cmd.command()
28 | @click.option('-u', '--username', type=str)
29 | @click.option('-p', '--password', type=str)
30 | @click.option('--clear', is_flag=True)
31 | @click.pass_obj
32 | def login(debug, username, password, clear):
33 | # clear session
34 | if clear:
35 | zr_up.auth.clear_session()
36 | sys.exit(1)
37 |
38 | if not username:
39 | username = input('Username: ')
40 |
41 | if not password:
42 | password = getpass("Password: ")
43 |
44 | # login
45 | token = zr_up.auth.login(username, password)
46 | # save session
47 | zr_up.auth.create_session(token)
48 | console.print(f'Successfully logged in as {username}', style='green bold')
49 |
50 |
51 | @upload_cmd.command()
52 | @click.argument('archive_file', type=Path)
53 | @click.pass_obj
54 | def multipart(debug, archive_file):
55 | """ Upload an archive using multipart upload """
56 | if archive_file.is_file() and archive_file.suffix != ".zip":
57 | console.print(f"ERROR: given file: {archive_file} was not found or is not a .zip file !!",
58 | style="red bold")
59 | sys.exit(1)
60 |
61 | # check if file is large enough for splitting
62 | will_split = archive_file.stat().st_size > zr_up.model.MULTIPART_THRESHOLD * 2
63 |
64 | checkpoint_file = archive_file.parent / f"{archive_file.stem}.checkpoint.json"
65 | zr_up.upload.ask_resume(checkpoint_file)
66 | token = zr_up.auth.get_session()
67 |
68 | with Progress(
69 | "[progress.description]{task.description}", BarColumn(),
70 | ) as progress:
71 | task = progress.add_task("[red]Uploading...", start=False, total=100)
72 |
73 | if will_split:
74 | zr_up.upload.multipart_upload(CHALLENGE_ID, archive_file, token, checkpoint_file)
75 | else:
76 | zr_up.upload.single_part_upload(CHALLENGE_ID, archive_file, token)
77 |
78 | progress.advance(task, advance=100)
79 |
80 | console.print(f"Successfully uploaded archive {archive_file} to zerospeech.com", style="green")
81 |
82 |
83 | @upload_cmd.command()
84 | @click.argument('archive_file', type=Path)
85 | @click.pass_obj
86 | def simple(debug, archive_file):
87 | """ Upload an archive using simple upload """
88 | if archive_file.is_file() and archive_file.suffix != ".zip":
89 | console.print(f"ERROR: given file: {archive_file} was not found or is not a .zip file !!",
90 | style="red bold")
91 | sys.exit(1)
92 |
93 | token = zr_up.auth.get_session()
94 | with Progress(
95 | "[progress.description]{task.description}", BarColumn(),
96 | ) as progress:
97 | task = progress.add_task("[red]Uploading...", start=False, total=100)
98 |
99 | # upload
100 | zr_up.upload.single_part_upload(CHALLENGE_ID, archive_file, token)
101 |
102 | progress.advance(task, advance=100)
103 |
104 | console.print(f"Successfully uploaded archive {archive_file} to zerospeech.com", style="green")
--------------------------------------------------------------------------------
/zerospeech2021/cli/validate.py:
--------------------------------------------------------------------------------
1 | """Validation program for ZR2021 submissions"""
2 |
3 | import atexit
4 | import pathlib
5 | import shutil
6 | import sys
7 | import tempfile
8 | import zipfile
9 |
10 | import click
11 |
12 | from zerospeech2021 import (
13 | exception, meta, phonetic, lexical, syntactic, semantic)
14 |
15 |
16 | def _validate_directory(directory, expected):
17 | """Ensures the expected content is present in the directory"""
18 | expected = set(expected)
19 | observed = set(
20 | str(f.relative_to(directory))
21 | for f in pathlib.Path(directory).glob('*'))
22 |
23 | if expected != observed:
24 | raise exception.MismatchError(
25 | f'mismatch in directory {directory}', expected, observed)
26 |
27 |
28 | def _validate_phonetic(submission, dataset, only_dev, njobs):
29 | print('Validating phonetic...')
30 | _validate_directory(
31 | submission / 'phonetic',
32 | ['dev-clean', 'dev-other'] if only_dev
33 | else ['dev-clean', 'dev-other', 'test-clean', 'test-other'])
34 |
35 | print(' > phonetic/dev')
36 | phonetic.validate(
37 | submission / 'phonetic',
38 | dataset / 'phonetic', 'dev',
39 | njobs=njobs)
40 |
41 | if not only_dev:
42 | print(' > phonetic/test')
43 | phonetic.validate(
44 | submission / 'phonetic',
45 | dataset / 'phonetic', 'test',
46 | njobs=njobs)
47 |
48 |
49 | def _validate_lexical(submission, dataset, only_dev):
50 | print('Validating lexical...')
51 | _validate_directory(
52 | submission / 'lexical',
53 | ['dev.txt'] if only_dev else ['dev.txt', 'test.txt'])
54 |
55 | print(' > lexical/dev')
56 | lexical.validate(
57 | submission / 'lexical' / 'dev.txt',
58 | dataset, 'dev')
59 |
60 | if not only_dev:
61 | print(' > lexical/test')
62 | lexical.validate(
63 | submission / 'lexical' / 'test.txt',
64 | dataset, 'test')
65 |
66 |
67 | def _validate_syntactic(submission, dataset, only_dev):
68 | print('Validating syntactic...')
69 | _validate_directory(
70 | submission / 'syntactic',
71 | ['dev.txt'] if only_dev else ['dev.txt', 'test.txt'])
72 |
73 | print(' > syntactic/dev')
74 | syntactic.validate(
75 | submission / 'syntactic' / 'dev.txt',
76 | dataset, 'dev')
77 |
78 | if not only_dev:
79 | print(' > syntactic/test')
80 | syntactic.validate(
81 | submission / 'syntactic' / 'test.txt',
82 | dataset, 'test')
83 |
84 |
85 | def _validate_semantic(submission, dataset, only_dev, njobs):
86 | print('Validating semantic...')
87 | semantic_content = ['dev'] if only_dev else ['dev', 'test']
88 | _validate_directory(submission / 'semantic', semantic_content)
89 |
90 | for subdir in semantic_content:
91 | _validate_directory(
92 | submission / 'semantic' / subdir,
93 | ['librispeech', 'synthetic'])
94 |
95 | print(' > semantic/dev/synthetic')
96 | semantic.validate(
97 | submission / 'semantic', dataset, 'dev', 'synthetic', njobs=njobs)
98 |
99 | print(' > semantic/dev/librispeech')
100 | semantic.validate(
101 | submission / 'semantic', dataset, 'dev', 'librispeech', njobs=njobs)
102 |
103 | if not only_dev:
104 | print(' > semantic/test/synthetic')
105 | semantic.validate(
106 | submission / 'semantic', dataset, 'test', 'synthetic', njobs=njobs)
107 |
108 | print(' > semantic/test/librispeech')
109 | semantic.validate(
110 | submission / 'semantic', dataset, 'test', 'librispeech', njobs=njobs)
111 |
112 |
113 | @click.command(epilog='See https://zerospeech.com/2021 for more details')
114 | @click.argument('dataset', type=pathlib.Path)
115 | @click.argument('submission', type=pathlib.Path)
116 | @click.option(
117 | '-j', '--njobs', default=1, type=int,
118 | help='Number of parallel jobs (default to 1)')
119 | @click.option('--only-dev', help='Skip test part', is_flag=True)
120 | @click.option('--no-phonetic', help="Skip phonetic part", is_flag=True)
121 | @click.option('--no-lexical', help="Skip lexical part", is_flag=True)
122 | @click.option('--no-syntactic', help="Skip syntactic part", is_flag=True)
123 | @click.option('--no-semantic', help="Skip semantic part", is_flag=True)
124 | def validate(
125 | dataset, submission, njobs, only_dev,
126 | no_phonetic, no_lexical, no_syntactic, no_semantic):
127 | """Validate a submission to the Zero Resource Speech Challenge 2021
128 |
129 | DATASET is the root directory of the ZR2021 dataset, as downloaded with the
130 | zerospeech2021-download tool.
131 |
132 | SUBMISSION is the submission to validate, it can be a .zip file or a
133 | directory.
134 |
135 | """
136 | try:
137 | # ensures the dataset exists
138 | dataset = dataset.resolve(strict=True)
139 | if not dataset.is_dir():
140 | raise ValueError(f'dataset not found: {dataset}')
141 |
142 | # ensures the submission exists, it it is a zip, uncompress it
143 | submission = submission.resolve(strict=True)
144 |
145 | print('Prepare input...')
146 | print(f' > dataset: {dataset}')
147 | print(f' > submission: {submission}')
148 |
149 | if submission.is_file() and zipfile.is_zipfile(submission):
150 | # create a temp directory we remove at exit
151 | submission_unzip = tempfile.mkdtemp()
152 | atexit.register(shutil.rmtree, submission_unzip)
153 |
154 | # uncompress to the temp directory
155 | print(f' > unzip submission to {submission_unzip}...')
156 | zipfile.ZipFile(submission, 'r').extractall(submission_unzip)
157 | submission = pathlib.Path(submission_unzip)
158 | elif not submission.is_dir():
159 | raise ValueError(
160 | f'submssion is not a zip file or a directory: {submission}')
161 |
162 | print('Validating root folder...')
163 | print(' > meta.yaml')
164 | is_open_source = meta.validate(submission)
165 |
166 | print(' > root folder')
167 | root_content = [
168 | 'meta.yaml', 'phonetic', 'lexical', 'syntactic', 'semantic']
169 | if is_open_source:
170 | root_content.append('code')
171 | _validate_directory(submission, root_content)
172 |
173 | if is_open_source:
174 | if not (submission / 'code').is_dir():
175 | raise exception.ValidationError(
176 | 'submission specified as open source but '
177 | 'code folder is missing')
178 | if not list((submission / 'code').iterdir()):
179 | raise exception.ValidationError(
180 | 'submission specified as open source but '
181 | 'code folder is empty')
182 | print(' > code folder detected: submission will be manually '
183 | 'inspected to ensure it is open source')
184 |
185 | if not no_phonetic:
186 | _validate_phonetic(submission, dataset, only_dev, njobs)
187 |
188 | if not no_lexical:
189 | _validate_lexical(submission, dataset, only_dev)
190 |
191 | if not no_syntactic:
192 | _validate_syntactic(submission, dataset, only_dev)
193 |
194 | if not no_semantic:
195 | _validate_semantic(submission, dataset, only_dev, njobs)
196 |
197 | except (exception.ValidationError, ValueError, FileNotFoundError) as error:
198 | print(f'ERROR: {error}')
199 | print('Validation failed, please fix it and try again!')
200 | sys.exit(-1)
201 |
202 | print('Success!')
203 | sys.exit(0)
204 |
--------------------------------------------------------------------------------
/zerospeech2021/exception.py:
--------------------------------------------------------------------------------
1 | """Custom exceptions for ZR2021 validation steps"""
2 |
3 |
4 | def _print_sublist(entries, num=3):
5 | """Returns a string containing the `n` first elements of `entries`"""
6 | if len(entries) <= num:
7 | return '[' + ', '.join(str(e) for e in entries) + ']'
8 |
9 | return (
10 | '[' + ', '.join(list(str(e) for e in entries)[:num]) +
11 | f', ...] and {len(entries) - num} more')
12 |
13 |
14 | class ValidationError(Exception):
15 | """Raised when detecting a validation error"""
16 |
17 |
18 | class FormatError(ValidationError):
19 | """Raised when detecting a bad format in submission file"""
20 | def __init__(self, line, message):
21 | super().__init__(message)
22 | self._line = line
23 |
24 | def __str__(self):
25 | return f'bad format (line {self._line}): ' + super().__str__()
26 |
27 |
28 | class FileFormatError(ValidationError):
29 | """Raised when detecting a bad format in submission file"""
30 | def __init__(self, file, message):
31 | super().__init__(message)
32 | self._file = file
33 |
34 | def __str__(self):
35 | return f'bad format (file {self._file}): ' + super().__str__()
36 |
37 |
38 | class MismatchError(ValidationError):
39 | """Raised when detecting a mismatch between two sets"""
40 | def __init__(self, message, expected, observed):
41 | super().__init__()
42 | self._message = message
43 |
44 | expected = set(expected)
45 | observed = set(observed)
46 |
47 | missing = expected - observed
48 | extra = observed - expected
49 |
50 | if missing or extra:
51 | self._message += ': '
52 | if missing:
53 | self._message += f'missing {_print_sublist(missing)}'
54 | if missing and extra:
55 | self._message += ', '
56 | if extra:
57 | self._message += f'extra {_print_sublist(extra)}'
58 |
59 | def __str__(self):
60 | return self._message
61 |
62 |
63 | class EntryMissingError(ValidationError):
64 | """Raised when an entry is missing from the result set """
65 |
66 | def __init__(self, expected, source):
67 | super().__init__()
68 | self._message = f"Input file ({source} does not have a matching feature ({expected})!!!"
69 |
70 | def __str__(self):
71 | return self._message
72 |
--------------------------------------------------------------------------------
/zerospeech2021/leaderboard.py:
--------------------------------------------------------------------------------
1 | import json
2 | from dataclasses import dataclass
3 | from datetime import datetime
4 | from pathlib import Path
5 | from typing import Dict, Optional
6 |
7 | import numpy as np
8 | import pandas as pd
9 | import yaml
10 |
11 |
12 | class LexicalScores:
13 | """ Class that extracts lexical scores resume from a scores directory """
14 | # score files
15 | __dev_pairs = 'score_lexical_dev_by_pair.csv'
16 | __test_pairs = 'score_lexical_test_by_pair.csv'
17 | __dev_frequency = 'score_lexical_dev_by_frequency.csv'
18 | __test_frequency = 'score_lexical_test_by_frequency.csv'
19 | __dev_length = 'score_lexical_dev_by_length.csv'
20 | __test_length = 'score_lexical_test_by_length.csv'
21 |
22 | def is_valid(self, location: Path):
23 | """ Verify that all files are present """
24 |
25 | if not (location / self.__dev_length).is_file():
26 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_length score file!")
27 | if not (location / self.__test_length).is_file():
28 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_test_by_length score file!")
29 | if not (location / self.__dev_frequency).is_file():
30 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_frequency score file!")
31 | if not (location / self.__test_frequency).is_file():
32 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_frequency score file!")
33 | if not (location / self.__dev_pairs).is_file():
34 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_pairs score file!")
35 | if not (location / self.__test_pairs).is_file():
36 | raise FileNotFoundError(f"Score folder {location}, is missing lexical_test_by_pairs score file!")
37 |
38 | def __init__(self, location: Path):
39 | """ Initialise lexical score object """
40 | self.is_valid(location)
41 | self.location = location
42 |
43 | @staticmethod
44 | def _score_invocab(frame):
45 | """Weighted mean of scores by frequency, excluding OOVs"""
46 | # filter out OOVs
47 | frame = frame[frame['frequency'] != 'oov']
48 |
49 | # weighted mean
50 | return np.average(
51 | frame['score'].to_numpy(),
52 | weights=frame['n'].to_numpy())
53 |
54 | def general(self):
55 | """ Extract general lexical score """
56 | dev_score = pd.read_csv(self.location / self.__dev_pairs)['score'].mean()
57 | test_score = pd.read_csv(self.location / self.__test_pairs)['score'].mean()
58 | # weighted scores
59 | dev_score_invocab = self._score_invocab(
60 | pd.read_csv(self.location / self.__dev_frequency)
61 | )
62 |
63 | test_score_invocab = self._score_invocab(
64 | pd.read_csv(self.location / self.__test_frequency)
65 | )
66 |
67 | return {
68 | 'lexical_all': [dev_score, test_score],
69 | 'lexical_invocab': [dev_score_invocab, test_score_invocab]
70 | }
71 |
72 | def detailed(self):
73 | """ Extract detailed lexical score """
74 | frequency_dev = pd.read_csv(self.location / self.__dev_frequency)
75 | frequency_test = pd.read_csv(self.location / self.__test_frequency)
76 |
77 | by_frequency = pd.merge(frequency_dev, frequency_test,
78 | how="outer", on=['frequency'], suffixes=("_dev", "_test"))
79 |
80 | length_dev = pd.read_csv(self.location / self.__dev_length)
81 | length_test = pd.read_csv(self.location / self.__test_length)
82 |
83 | by_length = pd.merge(length_dev, length_test, how="outer", on=['length'], suffixes=['_dev', '_test'])
84 |
85 | return {
86 | "by_length": by_length.to_dict(orient='records'),
87 | "by_frequency": by_frequency.to_dict(orient='records')
88 | }
89 |
90 |
91 | class SemanticScore:
92 | """ Class that extracts lexical scores resume from a scores directory """
93 | # score files
94 | __dev_correlation = 'score_semantic_dev_correlation.csv'
95 | __test_correlation = 'score_semantic_test_correlation.csv'
96 |
97 | def is_valid(self, location: Path):
98 | """ Verify that all files are present """
99 |
100 | if not (location / self.__dev_correlation):
101 | raise FileNotFoundError(f"Score folder {location}, is missing semantic_dev_correlation score file!")
102 | if not (location / self.__test_correlation):
103 | raise FileNotFoundError(f"Score folder {location}, is missing semantic_test_correlation score file!")
104 |
105 | def __init__(self, location: Path, size: Dict):
106 | """ Initialise semantic score object """
107 | self.is_valid(location)
108 | self.location = location
109 | self.size = size
110 |
111 | def general(self):
112 | """ Extract general semantic score """
113 | dev_correlations = pd.read_csv(self.location / self.__dev_correlation)
114 | dev_librispeech_mean = dev_correlations[dev_correlations['type'] == 'librispeech']['correlation'].mean()
115 | dev_synthetic_mean = dev_correlations[dev_correlations['type'] == 'synthetic']['correlation'].mean()
116 |
117 | dev_correlations['size'] = self.size['dev']['size']
118 | dev_librispeech_wmean = np.average(
119 | dev_correlations[dev_correlations['type'] == 'librispeech']['correlation'].to_numpy(),
120 | weights=dev_correlations[dev_correlations['type'] == 'librispeech']['size'].to_numpy())
121 | dev_synthetic_wmean = np.average(
122 | dev_correlations[dev_correlations['type'] == 'synthetic']['correlation'].to_numpy(),
123 | weights=dev_correlations[dev_correlations['type'] == 'synthetic']['size'].to_numpy())
124 |
125 | test_correlations = pd.read_csv(self.location / self.__test_correlation)
126 | test_librispeech_mean = test_correlations[test_correlations['type'] == 'librispeech']['correlation'].mean()
127 | test_synthetic_mean = test_correlations[test_correlations['type'] == 'synthetic']['correlation'].mean()
128 |
129 | test_correlations['size'] = self.size['test']['size']
130 | test_librispeech_wmean = np.average(
131 | test_correlations[test_correlations['type'] == 'librispeech']['correlation'].to_numpy(),
132 | weights=test_correlations[test_correlations['type'] == 'librispeech']['size'].to_numpy())
133 | test_synthetic_wmean = np.average(
134 | test_correlations[test_correlations['type'] == 'synthetic']['correlation'].to_numpy(),
135 | weights=test_correlations[test_correlations['type'] == 'synthetic']['size'].to_numpy())
136 |
137 | return {
138 | "semantic_synthetic": [
139 | dev_synthetic_mean, test_synthetic_mean],
140 | "semantic_librispeech": [
141 | dev_librispeech_mean, test_librispeech_mean],
142 | "weighted_semantic_synthetic": [
143 | dev_synthetic_wmean, test_synthetic_wmean],
144 | "weighted_semantic_librispeech": [
145 | dev_librispeech_wmean, test_librispeech_wmean]
146 | }
147 |
148 | def detailed(self):
149 | """ Extract detailed semantic score """
150 | dev_correlations = pd.read_csv(self.location / self.__dev_correlation)
151 | test_correlations = pd.read_csv(self.location / self.__test_correlation)
152 |
153 | ndev_correlations = dev_correlations \
154 | .set_index(['dataset', dev_correlations.groupby('dataset').cumcount()])['correlation'] \
155 | .unstack() \
156 | .reset_index()
157 | ndev_correlations.columns = ['dataset', 'librispeech', 'synthetic']
158 | ndev_correlations["set"] = "dev"
159 |
160 | ntest_correlations = test_correlations \
161 | .set_index(['dataset', test_correlations.groupby('dataset').cumcount()])['correlation'] \
162 | .unstack() \
163 | .reset_index()
164 | ntest_correlations.columns = ['dataset', 'librispeech', 'synthetic']
165 | ntest_correlations["set"] = "test"
166 |
167 | # DeprecationWarning from pandas: append is to be replaced by concat
168 | correlations = pd.concat([ndev_correlations, ntest_correlations], axis=0)
169 | # correlations = ndev_correlations.append(ntest_correlations)
170 |
171 | return correlations.to_dict(orient='records')
172 |
173 |
174 | class SyntacticScores:
175 | """ Class that extracts syntactic scores resume from a scores directory """
176 | # score files
177 | __dev_pairs = 'score_syntactic_dev_by_pair.csv'
178 | __test_pairs = 'score_syntactic_test_by_pair.csv'
179 | __dev_types = 'score_syntactic_dev_by_type.csv'
180 | __test_types = 'score_syntactic_test_by_type.csv'
181 |
182 | def is_valid(self, location: Path):
183 | """ Verify that all files are present """
184 |
185 | if not (location / self.__dev_pairs):
186 | raise FileNotFoundError(f"Score folder {location}, is missing syntactic_dev_by_pair score file!")
187 | if not (location / self.__test_pairs):
188 | raise FileNotFoundError(f"Score folder {location}, is missing syntactic_test_by_pair score file!")
189 | if not (location / self.__dev_types):
190 | raise FileNotFoundError(f"Score folder {location}, is missing syntactic_dev_by_type score file!")
191 | if not (location / self.__test_types):
192 | raise FileNotFoundError(f"Score folder {location}, is missing syntactic_test_by_type score file!")
193 |
194 | def __init__(self, location: Path):
195 | """ Initialise syntactic score object """
196 | self.is_valid(location)
197 | self.location = location
198 |
199 | def general(self):
200 | """ Extract general semantic score """
201 | dev_mean = pd.read_csv(self.location / self.__dev_pairs)['score'].mean()
202 | test_mean = pd.read_csv(self.location / self.__test_pairs)['score'].mean()
203 | return [dev_mean, test_mean]
204 |
205 | def detailed(self):
206 | """ Extract detailed semantic score """
207 | dev_types = pd.read_csv(self.location / self.__dev_types)
208 | test_types = pd.read_csv(self.location / self.__test_types)
209 |
210 | merged = pd.merge(dev_types, test_types, how="outer", on=["type"], suffixes=("_dev", "_test"))
211 |
212 | return merged.to_dict(orient='records')
213 |
214 |
215 | class PhoneticScores:
216 | """ Class that extracts syntactic scores resume from a scores directory """
217 | # score files
218 | __scores = 'score_phonetic.csv'
219 |
220 | def is_valid(self, location: Path):
221 | """ Verify that all files are present """
222 |
223 | if not (location / self.__scores):
224 | raise FileNotFoundError(f"Score folder {location}, is missing phonetic score file!")
225 |
226 | def __init__(self, location: Path):
227 | """ Initialise phonetic score object """
228 | self.is_valid(location)
229 | self.location = location
230 |
231 | def general(self):
232 | """ Extract general semantic score """
233 |
234 | def e(d):
235 | return {s['type']: s['score'] for s in d}
236 |
237 | frame = pd.read_csv(self.location / self.__scores)
238 | dev_clean = frame[(frame["dataset"] == 'dev') & (frame["sub-dataset"] == 'clean')][['type', 'score']] \
239 | .to_dict(orient='records')
240 | dev_other = frame[(frame["dataset"] == 'dev') & (frame["sub-dataset"] == 'other')][['type', 'score']] \
241 | .to_dict(orient='records')
242 | test_clean = frame[(frame["dataset"] == 'test') & (frame["sub-dataset"] == 'clean')][['type', 'score']] \
243 | .to_dict(orient='records')
244 | test_other = frame[(frame["dataset"] == 'test') & (frame["sub-dataset"] == 'other')][['type', 'score']] \
245 | .to_dict(orient='records')
246 |
247 | return {
248 | "phonetic_clean_within": [e(dev_clean)['within'], e(test_clean)['within']],
249 | "phonetic_clean_across": [e(dev_clean)['across'], e(test_clean)['across']],
250 | "phonetic_other_within": [e(dev_other)['within'], e(test_other)['within']],
251 | "phonetic_other_across": [e(dev_other)['across'], e(test_other)['across']]
252 | }
253 |
254 | @staticmethod
255 | def detailed():
256 | """ Extract detailed semantic score """
257 | # phonetic task has no detailed view of scores
258 | return {}
259 |
260 |
261 | @dataclass
262 | class Metadata:
263 | author: str
264 | affiliation: str
265 | description: str
266 | open_source: bool
267 | train_set: str
268 | gpu_budget: float
269 | parameters: Dict
270 | visually_grounded: bool = False
271 | submission_id: Optional[str] = None
272 | submission_date: Optional[datetime] = None
273 | submitted_by: Optional[str] = None
274 |
275 | @staticmethod
276 | def parse_external_meta(filepath: Path) -> Dict:
277 | if filepath is None or not filepath.is_file():
278 | return {}
279 | elif filepath.suffix == '.json':
280 | with filepath.open() as fp:
281 | return json.load(fp)
282 | else:
283 | # old txt based file
284 | submitted_at = None
285 | with filepath.open() as fp:
286 | for line in fp.readlines():
287 | line = line.rstrip()
288 | if line.startswith('submitted-at:'):
289 | submitted_at = line.replace('submitted-at:', '').replace(' ', '')
290 | return {"submitted-at": submitted_at}
291 |
292 | @staticmethod
293 | def filter_external_meta(data: Dict):
294 | try:
295 | sub_data = datetime.fromisoformat(data.get("submitted-at", None))
296 | except (ValueError, TypeError):
297 | sub_data = None
298 |
299 | return {
300 | "submission_date": sub_data,
301 | "submitted_by": data.get("user", None),
302 | "submission_id": data.get("submission_id", None)
303 | }
304 |
305 | @classmethod
306 | def create_from(cls, filepath: Path, external_meta_file: Path):
307 | with (filepath / 'meta.yaml').open() as fp:
308 | meta = yaml.load(fp, Loader=yaml.SafeLoader)
309 |
310 | # parse & filter items of platform metadata
311 | external_meta = cls.filter_external_meta(cls.parse_external_meta(external_meta_file))
312 |
313 | return cls(**meta, **external_meta)
314 |
315 | def to_dict(self):
316 | if self.submission_date:
317 | sub_date = self.submission_date.isoformat()
318 | else:
319 | sub_date = datetime.now().isoformat()
320 |
321 | return {
322 | "submitted_at": sub_date,
323 | "author": self.author,
324 | "affiliation": self.affiliation,
325 | "submitted_by": self.submitted_by,
326 | "submission_id": self.submission_id,
327 | "description": self.description,
328 | "visually_grounded": self.visually_grounded,
329 | "open_source": self.open_source,
330 | "train_set": self.train_set,
331 | "gpu_budget": self.gpu_budget,
332 | "parameters": self.parameters
333 | }
334 |
335 |
336 | class ZeroSpeechSubmission:
337 |
338 | def __init__(self, submission_location: Path, _semantic_size: Dict,
339 | score_location: Path, external_meta_file: Path):
340 |
341 | # fetch metadata
342 | self.description = Metadata.create_from(submission_location, external_meta_file)
343 |
344 | # create scores
345 | self.lexical = LexicalScores(score_location)
346 | self.semantic = SemanticScore(score_location, _semantic_size)
347 | self.syntactic = SyntacticScores(score_location)
348 | self.phonetic = PhoneticScores(score_location)
349 |
350 | def leaderboard(self):
351 | """ Build leaderboard object """
352 | ph = self.phonetic.general()
353 | le = self.lexical.general()
354 | se = self.semantic.general()
355 | sy = self.syntactic.general()
356 | more = {
357 | "description": self.description.to_dict(),
358 | "lexical": self.lexical.detailed(),
359 | "syntactic": self.syntactic.detailed(),
360 | "semantic": self.semantic.detailed(),
361 | }
362 | return {
363 | "author_label": self.description.author,
364 | "set": ['dev', 'test'],
365 | **le,
366 | "syntactic": sy,
367 | **ph,
368 | **se,
369 | "more": more
370 | }
371 |
372 |
373 | def get_semantic_size(dataset: Path):
374 | test_size = pd.read_csv(dataset / 'semantic/test/pairs.csv', header=0) \
375 | .groupby(['type', 'dataset'], as_index=False).size()
376 | dev_size = pd.read_csv(dataset / 'semantic/dev/pairs.csv', header=0) \
377 | .groupby(['type', 'dataset'], as_index=False).size()
378 | return {'dev': dev_size, 'test': test_size}
379 |
--------------------------------------------------------------------------------
/zerospeech2021/lexical.py:
--------------------------------------------------------------------------------
1 | """Lexical part of the ZR2021 (validation and evaluation)"""
2 |
3 | import collections
4 | import pathlib
5 | import sys
6 |
7 | import pandas
8 | from zerospeech2021.exception import FormatError, MismatchError
9 |
10 |
11 | def _validate_line(index, line):
12 | """Auxiliary function to validate()
13 |
14 | Returns the filename in `line`, checks the score and raises FormatError if
15 | the line is not valid.
16 |
17 | """
18 | # ensure the line has two fields separated by a space
19 | line = line.strip()
20 | fields = line.split(' ')
21 | if len(fields) != 2:
22 | raise FormatError(
23 | index, f'must be " " but is "{line}"')
24 |
25 | filename, score = tuple(fields)
26 |
27 | # ensure the second field is a positive float
28 | try:
29 | float(score)
30 | except ValueError:
31 | raise FormatError(
32 | index, f' must be a float but is "{score}"')
33 |
34 | return filename
35 |
36 |
37 | def validate(submission, dataset, kind):
38 | """Raises a ValidationError if the `submission` file is not valid
39 |
40 | * The submission file must be in text format, each line as:
41 |
42 |
43 | * The is the name of a wav file in the lexical dataset, without
44 | path nor extension ("xKtnLJYiWGt", not "lexical/dev/xKtnLJYiWGt.wav")
45 |
46 | * The is a positive float
47 |
48 | Parameters
49 | ----------
50 | submisison: path
51 | The submisison file to validate, each line must be formatted as
52 | " ".
53 | dataset: path
54 | The root path of the ZR2021 dataset
55 | kind: str, optional
56 | Must be 'dev' or 'test'
57 |
58 | Raises
59 | ------
60 | ValueError
61 | If `kind` is not 'dev' or 'test', if `submisison` is not a file or if
62 | the dataset is not an existing directory.
63 | ValidationError
64 | If one line of the submisison file is not valid or if the submitted
65 | filenames does not fit the required ones.
66 |
67 | """
68 | if kind not in ('dev', 'test'):
69 | raise ValueError(
70 | f'kind must be "dev" or "test", it is {kind}')
71 |
72 | if not pathlib.Path(submission).is_file():
73 | raise ValueError(
74 | f'{kind} submission file not found: {submission}')
75 |
76 | # retrieve the required filenames that must be present in the submission
77 | dataset = pathlib.Path(dataset) / f'lexical/{kind}'
78 | if not dataset.is_dir():
79 | raise ValueError(f'dataset not found: {dataset}')
80 | required_files = set(w.stem for w in dataset.glob('*.wav'))
81 |
82 | # ensure each line in the submission is valid and retrieve the filenames
83 | submitted_files = list(
84 | _validate_line(index + 1, line)
85 | for index, line in enumerate(open(submission, 'r')))
86 |
87 | # ensures the is no duplicate in the filenames
88 | duplicates = [
89 | f for f, n in collections.Counter(submitted_files).items() if n > 1]
90 | if duplicates:
91 | raise MismatchError('duplicates found', [], duplicates)
92 |
93 | # ensure all the required files are here and there is no extra filename
94 | if required_files != set(submitted_files):
95 | raise MismatchError(
96 | 'mismatch in filenames', required_files, submitted_files)
97 |
98 |
99 | def load_data(gold_file, submission_file):
100 | """Returns the data required for evaluation as a pandas data frame
101 |
102 | Each line of the returned data frame contains a pair (word, non word) and
103 | has the following columns: 'id', 'voice', 'frequency', 'word', 'score
104 | word', 'non word', 'score non word'.
105 |
106 | Parameters
107 | ----------
108 | gold_file : path
109 | The gold file for the lexical dataset (test or dev).
110 | submission_file : path
111 | The submission corresponding to the provided gold file.
112 |
113 | Returns
114 | -------
115 | data : pandas.DataFrame
116 | The data ready for evaluation
117 |
118 | Raise
119 | -----
120 | ValueError
121 | If the input files cannot be opened or in case of data mismatch between
122 | the two files.
123 |
124 | """
125 | # ensures the two input files are here
126 | for input_file in (gold_file, submission_file):
127 | if not pathlib.Path(input_file).is_file():
128 | raise ValueError(f'file not found: {input_file}')
129 |
130 | # load them as data frames indexed by filenames
131 | gold = pandas.read_csv(
132 | gold_file, header=0, index_col='filename').astype(
133 | {'frequency': pandas.Int64Dtype()})
134 | score = pandas.read_csv(
135 | submission_file, sep=' ', header=None,
136 | names=['filename', 'score'], index_col='filename')
137 |
138 | # ensures the filenames in gold and submission are the same
139 | if set(gold.index) != set(score.index):
140 | has_less_files = set(gold.index) - set(score.index)
141 | has_more_files = set(score.index) - set(gold.index)
142 | print("MismatchError:", file=sys.stderr)
143 | if len(has_more_files) > 0:
144 | print('submission has extra files', file=sys.stderr)
145 | print(f'extra files: {has_more_files}', file=sys.stderr)
146 |
147 | if len(has_less_files) > 0:
148 | print('submission is missing files', file=sys.stderr)
149 | print(f'missing files: {has_less_files}:', file=sys.stderr)
150 | sys.exit(1)
151 |
152 | # merge the gold and score using filenames, then remove the columns
153 | # 'phones' and 'filename' as we don't use them for evaluation
154 | data = pandas.merge(gold, score, on='filename', how='inner')
155 | data.reset_index(inplace=True)
156 | # if all non words have their textual version set to NaN, we take their phonemic version instead.
157 | if data[data.correct == 0]['word'].isnull().sum() == len(data[data.correct==0]):
158 | data['word'] = data['phones']
159 | data.drop(columns=['phones', 'filename'], inplace=True)
160 |
161 | # going from a word per line to a pair (word, non word) per line
162 | words = data.loc[data['correct'] == 1].reset_index().rename(lambda x: 'w_' + x, axis=1)
163 | non_words = data.loc[data['correct'] == 0].reset_index().rename(lambda x: 'nw_' + x, axis=1)
164 | data = pandas.merge(words, non_words, left_on=['w_voice', 'w_id'], right_on=['nw_voice', 'nw_id'])
165 |
166 | data.drop(
167 | ['w_index', 'nw_index', 'nw_voice', 'nw_frequency',
168 | 'w_correct', 'nw_correct', 'nw_id', 'nw_length'],
169 | axis=1, inplace=True)
170 | data.rename(
171 | {'w_id': 'id', 'w_voice': 'voice', 'w_frequency': 'frequency',
172 | 'w_word': 'word', 'nw_word': 'non word', 'w_length': 'length',
173 | 'w_score': 'score word', 'nw_score': 'score non word'},
174 | axis=1, inplace=True)
175 |
176 | return data
177 |
178 |
179 | def evaluate_by_pair(data):
180 | """Returns a data frame with the computed scores by (word, non word) pair
181 |
182 | Parameters
183 | ----------
184 | data : pandas.DataFrame
185 | The result of `load_data`
186 |
187 | Returns
188 | -------
189 | by_pair : pandas.DataFrame
190 | The evaluated (word, non word) pairs, the data frame has the columns:
191 | 'word', 'non word' 'frequency', 'length' and 'score'.
192 |
193 | """
194 | # compute the score for each pair in an additional 'score' column, then
195 | # delete the 'score word' and 'score non word' columns that become useless
196 | score = data.loc[:, ['score word', 'score non word']].to_numpy()
197 | data['score'] = (
198 | 0.5 * (score[:, 0] == score[:, 1])
199 | + (score[:, 0] > score[:, 1]))
200 | data.drop(columns=['score word', 'score non word'], inplace=True)
201 |
202 | # finally get the mean score across voices for all pairs
203 | score = data.groupby('id').apply(lambda x: (
204 | x.iat[0, 3], # word
205 | x.iat[0, 5], # non word
206 | x.iat[0, 2], # frequency
207 | x.iat[0, 4], # length
208 | x['score'].mean()))
209 | return pandas.DataFrame(
210 | score.to_list(),
211 | columns=['word', 'non word', 'frequency', 'length', 'score'])
212 |
213 |
214 | def evaluate_by_frequency(by_pair):
215 | """Returns a data frame with mean scores by frequency bands
216 |
217 | The frequency is defined as the number of occurences of the word in the
218 | LibriSpeech dataset. The following frequency bands are considered : oov,
219 | 1-5, 6-20, 21-100 and >100.
220 |
221 | Parameters
222 | ----------
223 | by_pair: pandas.DataFrame
224 | The output of `evaluate_by_pair`
225 |
226 | Returns
227 | -------
228 | by_frequency : pandas.DataFrame
229 | The score collapsed on frequency bands, the data frame has the
230 | following columns: 'frequency', 'score'.
231 |
232 | """
233 | bands = pandas.cut(
234 | by_pair.frequency,
235 | [0, 1, 5, 20, 100, float('inf')],
236 | labels=['oov', '1-5', '6-20', '21-100', '>100'],
237 | right=False)
238 |
239 | return by_pair.score.groupby(bands).agg(
240 | n='count', score='mean', std='std').reset_index()
241 |
242 |
243 | def evaluate_by_length(by_pair):
244 | """Returns a data frame with mean scores by word length
245 |
246 | Parameters
247 | ----------
248 | by_pair: pandas.DataFrame
249 | The output of `evaluate_by_pair`
250 |
251 | Returns
252 | -------
253 | by_length : pandas.DataFrame
254 | The score collapsed on word length, the data frame has the
255 | following columns: 'length', 'score'.
256 |
257 | """
258 | return by_pair.score.groupby(by_pair.length).agg(
259 | n='count', score='mean', std='std').reset_index()
260 |
261 |
262 | def evaluate(gold_file, submission_file):
263 | """Returns the score by (word, non word) pair, by frequency and by length
264 |
265 | Parameters
266 | ----------
267 | gold_file : path
268 | The gold file (csv format) for the lexical dataset (test or dev).
269 | submission_file : path
270 | The submission corresponding to the provided gold file.
271 |
272 | Returns
273 | -------
274 | by_pair : pandas.DataFrame
275 | The evaluated (word, non word) pairs, the data frame has the columns:
276 | 'word', 'non word' and 'score'.
277 | by_frequency : pandas.DataFrame
278 | The score collapsed on frequency bands, the data frame has the
279 | following columns: 'frequency', 'score'.
280 | by_length : pandas.DataFrame
281 | The score collapsed on word length (in number of phones), the data
282 | frame has the following columns: 'length', 'score'.
283 |
284 | Raise
285 | -----
286 | ValueError
287 | If the input files cannot be opened or in case of data mismatch between
288 | the two files.
289 |
290 | """
291 | data = load_data(gold_file, submission_file)
292 |
293 | by_pair = evaluate_by_pair(data)
294 | by_frequency = evaluate_by_frequency(by_pair)
295 | by_length = evaluate_by_length(by_pair)
296 | by_pair.drop(['frequency', 'length'], axis=1, inplace=True)
297 |
298 | return by_pair, by_frequency, by_length
299 |
--------------------------------------------------------------------------------
/zerospeech2021/meta.py:
--------------------------------------------------------------------------------
1 | """Validation of meta.yaml"""
2 |
3 | import numbers
4 | import numpy as np
5 | import scipy.spatial
6 | import yaml
7 |
8 | from zerospeech2021.exception import ValidationError, MismatchError
9 |
10 |
11 | def _validate_entries(meta, entries, prefix=None):
12 | if sorted(meta.keys()) != sorted(entries.keys()):
13 | message = 'invalid entries'
14 | if prefix:
15 | message += f' in {prefix}'
16 | raise MismatchError(message, entries.keys(), meta.keys())
17 |
18 | for key, value in entries.items():
19 | _validate_entry(meta, key, value[0], values=value[1], prefix=prefix)
20 |
21 |
22 | def _validate_entry(meta, name, expected_type, values=None, prefix=None):
23 | prefix = prefix + '/' if prefix else ''
24 |
25 | if name not in meta:
26 | raise ValidationError(f'{prefix}{name} section missing')
27 |
28 | value = meta[name]
29 | if not isinstance(value, expected_type):
30 | raise ValidationError(
31 | f'{prefix}{name} must be a {expected_type}, it is {type(value)}')
32 |
33 | if values and value not in values:
34 | raise ValidationError(
35 | f'{prefix}{name} must be in ({", ".join(values)}) but is {value}')
36 |
37 | if expected_type == str and not value:
38 | raise ValidationError(f'{prefix}{name} must not be an empty string')
39 |
40 |
41 | def _validate_scipy_metric(metric):
42 | """"Raises a ValidationError if `metric` is not a valid metric in scipy"""
43 | try:
44 | scipy.spatial.distance.cdist(
45 | np.ones((5, 2)), np.ones((5, 2)), metric)
46 | except:
47 | raise ValidationError(f'invalid metric for semantic: {metric}')
48 |
49 |
50 | def validate(submission):
51 | """Validation of the meta.yaml in submission
52 |
53 | Testing that submission/meta.yaml is a valid yaml file and corresponds to
54 | the following format:
55 |
56 | author:
57 | affiliation:
58 | description:
59 | open_source:
60 | train_set:
61 | visually_grounded:
62 | gpu_budget:
63 | parameters:
64 | phonetic:
65 | metric: , "cosine", "euclidean", "kl" or "kl_symmetric"
66 | frame_shift:
67 | semantic:
68 | metric:
69 | pooling: , "min", "max", "mean", "sum", "last", lastlast" or
70 | "off"
71 |
72 | Raises
73 | ------
74 | exception.ValidationError
75 | For any item not corresponding to prototype.
76 |
77 | """
78 | meta_file = submission / 'meta.yaml'
79 |
80 | if not meta_file.is_file():
81 | raise ValidationError("missing meta.yaml file")
82 |
83 | try:
84 | meta = yaml.safe_load(meta_file.open('r').read().replace('\t', ' '))
85 | except yaml.YAMLError as err:
86 | raise ValidationError(f'failed to parse {meta_file}: {err}')
87 |
88 | if not meta or not isinstance(meta, dict):
89 | raise ValidationError("meta.yaml file is not valid")
90 |
91 | # top level entries
92 | _validate_entries(
93 | meta,
94 | {'author': (str, None),
95 | 'affiliation': (str, None),
96 | 'description': (str, None),
97 | 'open_source': (bool, None),
98 | 'train_set': (str, None),
99 | 'visually_grounded': (bool, None),
100 | 'gpu_budget': (numbers.Number, None),
101 | 'parameters': (dict, None)})
102 |
103 | # parameters entries
104 | _validate_entries(
105 | meta['parameters'],
106 | {'phonetic': (dict, None), 'semantic': (dict, None)},
107 | prefix='parameters')
108 |
109 | # parameters/phonetic level
110 | _validate_entries(
111 | meta['parameters']['phonetic'],
112 | {'metric': (str, ['cosine', 'euclidean', 'kl', 'kl_symmetric']),
113 | 'frame_shift': (numbers.Number, None)},
114 | prefix='parameters/phonetic')
115 |
116 | # parameters/semantic level
117 | _validate_entries(
118 | meta['parameters']['semantic'],
119 | {'metric': (str, None),
120 | 'pooling': (str, [
121 | 'min', 'max', 'mean', 'sum', 'last', 'lastlast', 'off'])},
122 | prefix='parameters/semantic')
123 |
124 | _validate_scipy_metric(meta['parameters']['semantic']['metric'])
125 |
126 | return meta['open_source']
127 |
--------------------------------------------------------------------------------
/zerospeech2021/phonetic.py:
--------------------------------------------------------------------------------
1 | """ Phonetic task zerospeech 2021 """
2 | import collections
3 | from dataclasses import dataclass
4 | from itertools import chain
5 | from typing import Optional
6 | from enum import Enum
7 |
8 | import numpy as np
9 | import pandas
10 | import joblib
11 |
12 | from zerospeech2021 import exception
13 | from zerospeech2021.phonetic_eval import eval_ABX
14 |
15 | LIBRISPEECH_SETS = {
16 | 'dev': ['dev-clean', 'dev-other'],
17 | 'test': ['test-clean', 'test-other']}
18 |
19 |
20 | ABXFileTypes = Enum('ABXFileTypes',
21 | '.pt .npy .txt .wav .flac .mp3')
22 | ABXMode = Enum('ABXMode', 'all within across')
23 |
24 | ABXDistanceMode = Enum('ABXDistanceMode',
25 | 'euclidian cosine kl kl_symmetric')
26 |
27 |
28 | @dataclass
29 | class AbxArguments:
30 | """ List of arguments to provide to abx in phonetic_eval.abx"""
31 | # path to input data
32 | path_data: str
33 | # path to item file
34 | path_item_file: str
35 | # Path to a CPC checkpoint
36 | path_checkpoint: Optional[str] = None
37 | # size of a single feature
38 | feature_size: Optional[float] = float(0.1)
39 | # Use the GPU to compute distances
40 | cuda: bool = True
41 | # extension (of input files ?)
42 | file_extension: ABXFileTypes = '.txt'
43 | # Choose the mode of the ABX score to compute
44 | mode: ABXMode = 'all'
45 | # Choose the kind of distance to use to compute
46 | distance_mode: ABXDistanceMode = 'cosine'
47 | # Max size of a group while computing the ABX score
48 | max_size_group: int = 10
49 | # When computing the ABX across score, maximum
50 | # number of speaker X to sample per couple A,B.
51 | max_x_across: int = 5
52 | # location to output the results
53 | out: Optional[str] = None
54 |
55 |
56 | def get_input_files(dataset_directory, _set, file_type):
57 | """ Returns a list of all the files in a set """
58 | res = []
59 | for s in LIBRISPEECH_SETS[_set]:
60 | res.append((dataset_directory / s).rglob(f"*.{file_type}"))
61 | return list(chain(*res))
62 |
63 |
64 | def get_submitted_files(submission_directory, _set):
65 | """ Returns a list of all the files in a set """
66 | res = []
67 | for s in LIBRISPEECH_SETS[_set]:
68 | res.append((submission_directory / s).rglob("*"))
69 | return list(chain(*res))
70 |
71 |
72 | def _validate_file(source_file, submission, dataset):
73 | """Ensure a file has the correct format
74 |
75 | Verifies that a feature file is a 2D numpy array of floats and it matches a
76 | file in the dataset.
77 |
78 | :param source_file: input file from dataset
79 | :param submission: location of submitted files
80 | :param dataset: location of dataset
81 |
82 | :return: a pair (target_file, ncols), where target_file is the file in the
83 | submission directory and ncols is the number of columns in the array.
84 |
85 | :raises exception.EntryMissingError if an entry is not present
86 |
87 | """
88 | try:
89 | target_file = submission / source_file.relative_to(dataset)
90 | target_file = target_file.with_suffix('.txt')
91 | if not target_file.is_file():
92 | raise exception.EntryMissingError(
93 | source=source_file, expected=target_file)
94 |
95 | try:
96 | array = np.loadtxt(str(target_file))
97 | except Exception:
98 | raise exception.FileFormatError(
99 | target_file, 'not a valid numpy array')
100 |
101 | if array.dtype != np.dtype('float'):
102 | raise exception.FileFormatError(
103 | target_file, "array loaded is not dtype = float")
104 |
105 | if array.ndim != 2:
106 | raise exception.FileFormatError(
107 | target_file, 'not a 2D array')
108 | except exception.ValidationError as error:
109 | return str(error), None, None
110 |
111 | return None, target_file, array.shape[1]
112 |
113 |
114 | def validate(submission, dataset, kind, njobs=1):
115 | """Validate a subset of the submissions for the phonetic task
116 |
117 | :param submission_directory: location of submissions
118 | :param dataset_directory: location of data
119 | :param kind: subset type (dev | test)
120 | :param njobs: number of paralle processes to use for validation
121 |
122 | :raise ValidationError: if the submission is not valid
123 |
124 | """
125 | if kind not in LIBRISPEECH_SETS.keys():
126 | raise ValueError(f'kind must be "dev" or "test", it is {kind}')
127 |
128 | input_files = get_input_files(dataset, kind, "wav")
129 | if not input_files:
130 | raise exception.ValidationError(
131 | f'found no wav files in {dataset}')
132 |
133 | submitted_files = get_submitted_files(submission, kind)
134 | if not submitted_files:
135 | raise exception.ValidationError(
136 | f'found no files in {submission}')
137 |
138 | # ensure we have only .txt files in submission
139 | no_txt_files = [str(f) for f in submitted_files if f.suffix != '.txt']
140 | if no_txt_files:
141 | raise exception.MismatchError('extra files found', [], no_txt_files)
142 |
143 | # ensure that there are no duplicates
144 | duplicates = [
145 | f for f, n in collections.Counter(submitted_files).items() if n > 1
146 | ]
147 | if duplicates:
148 | raise exception.MismatchError('duplicates found', [], duplicates)
149 |
150 | # check that necessary files are present and valid
151 | valid_entries = joblib.Parallel(n_jobs=njobs)(
152 | joblib.delayed(_validate_file)(f, submission, dataset)
153 | for f in input_files)
154 | errors, valid_entries, ncols = zip(*valid_entries)
155 |
156 | # ensure there are no detected errors
157 | errors = [e for e in errors if e]
158 | if errors:
159 | for e in errors[:10]:
160 | print(f'ERROR: {e}')
161 | if len(errors) > 10:
162 | print(f'ERROR: ... and {len(errors) - 10} more!')
163 | raise exception.ValidationError(f'error detected in phonetic {kind}')
164 |
165 | # ensure all submitted files have the same number of columns
166 | if len(set(ncols)) != 1:
167 | raise exception.ValidationError(
168 | f'all files must have the same number of columns '
169 | f'but have: {set(ncols)}')
170 |
171 | if collections.Counter(submitted_files) != collections.Counter(valid_entries):
172 | raise exception.MismatchError(
173 | 'mismatch in filenames', valid_entries, submitted_files)
174 |
175 |
176 | def evaluate(submission, dataset, kind, metric, frame_shift, force_cpu=False):
177 | """Writes the phonetic evaluation results to `output_dir`
178 |
179 | Parameters
180 | ----------
181 | submission : path
182 | The directory where the phonetic submission is stored (expect
183 | subdirectories dev-clean, dev-other, etc)
184 | dataset : path
185 | The directory where the phonetic dataset is stored
186 | output_dir : path
187 | The directory where to write results
188 | kind : str
189 | Must be 'dev' or 'test'
190 | metric : str
191 | Must be 'cosine', 'euclidean', 'kl' or 'kl_symmetric'
192 | frame_shift : float
193 | The shift between two features frames in s.
194 | force_cpu: bool, optional
195 | When True use CPU, elsewise use PU (default to False)
196 |
197 | Returns
198 | -------
199 | score : pandas.DataFrame
200 | A data frame with the ABX score obtained for each combination of
201 | {dev, test}, {clean, other} and {across, within}.
202 |
203 | """
204 | results = {}
205 | for subkind in LIBRISPEECH_SETS[kind]:
206 | print(
207 | f'Evaluating phonetic {subkind} '
208 | f'(metric={metric}, frame_shift={frame_shift})')
209 |
210 | arg_obj = AbxArguments(
211 | path_data=str(submission / subkind),
212 | path_item_file=str(dataset / subkind / f'{subkind}.item'),
213 | distance_mode=metric,
214 | feature_size=frame_shift,
215 | cuda=not force_cpu)
216 |
217 | results[subkind] = eval_ABX.main(arg_obj=arg_obj)
218 |
219 | results2 = [
220 | (dset.split('-')[0], dset.split('-')[1], kind, score)
221 | for dset, v in results.items() for kind, score in v.items()]
222 | return pandas.DataFrame(
223 | results2, columns=['dataset', 'sub-dataset', 'type', 'score'])
224 |
--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/ABX_src/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 |
--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/ABX_src/abx_group_computation.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | import torch
3 | import math
4 | import libri_light_dtw as dtw
5 | import progressbar
6 |
7 |
8 | def get_distance_function_from_name(name_str):
9 | if name_str == 'euclidean':
10 | return get_euclidian_distance_batch
11 | if name_str == 'cosine':
12 | return get_cosine_distance_batch
13 | if name_str == 'kl':
14 | return get_kl_distance_batch
15 | if name_str == 'kl_symmetric':
16 | return get_kl_distance_symmetric_batch
17 | raise ValueError(f"Invalid distance mode")
18 |
19 |
20 | def check_dtw_group_validity(a, b, x):
21 | assert (len(a.size()) == len(b.size()))
22 | assert (len(a.size()) == len(x.size()))
23 | assert (a.size(2) == x.size(2))
24 | assert (a.size(2) == b.size(2))
25 |
26 |
27 | def get_kl_distance_batch(a1, a2, epsilon=1e-6):
28 | N1, S1, D = a1.size() # Batch x Seq x Channel
29 | N2, S2, D = a2.size() # Batch x Seq x Channel
30 |
31 | # (P * (P / Q).log()).sum()
32 | div = (a1.view(N1, 1, S1, 1, D) + epsilon) / (a2.view(1, N2, 1, S2, D) + epsilon)
33 | prod = (a1.view(N1, 1, S1, 1, D)) * div.log()
34 |
35 | return prod.sum(dim=4)
36 |
37 |
38 | def get_kl_distance_symmetric_batch(a1, a2, epsilon=1e-6):
39 | N1, S1, D = a1.size()
40 | N2, S2, D = a2.size()
41 |
42 | div1 = (a1.view(N1, 1, S1, 1, D) + epsilon) / (a2.view(1, N2, 1, S2, D) + epsilon)
43 | div2 = (a2.view(1, N2, 1, S2, D) + epsilon) / (a1.view(N1, 1, S1, 1, D) + epsilon)
44 |
45 | prod1 = (a1.view(N1, 1, S1, 1, D)) * div1.log()
46 | prod2 = (a2.view(1, N2, 1, S2, D)) * div2.log()
47 |
48 | return (0.5 * prod1 + 0.5 * prod2).sum(dim=4)
49 |
50 |
51 | def get_cosine_distance_batch(a1, a2, epsilon=1e-8):
52 | r""" a1 and a2 must be normalized"""
53 | N1, S1, D = a1.size() # Batch x Seq x Channel
54 | N2, S2, D = a2.size() # Batch x Seq x Channel
55 |
56 | prod = (a1.view(N1, 1, S1, 1, D)) * (a2.view(1, N2, 1, S2, D))
57 | # Sum accross the channel dimension
58 | prod = torch.clamp(prod.sum(dim=4), -1, 1).acos() / math.pi
59 |
60 | return prod
61 |
62 |
63 | def get_euclidian_distance_batch(a1, a2):
64 | N1, S1, D = a1.size()
65 | N2, S2, D = a2.size()
66 | diff = a1.view(N1, 1, S1, 1, D) - a2.view(1, N2, 1, S2, D)
67 | return torch.sqrt((diff ** 2).sum(dim=4))
68 |
69 |
70 | def get_distance_group_dtw(a1, a2, size1, size2,
71 | ignore_diag=False, symmetric=False,
72 | distance_function=get_cosine_distance_batch):
73 | N1, S1, D = a1.size()
74 | N2, S2, D = a2.size()
75 | if size1.size(0) != N1:
76 | print(a1.size(), size1.size())
77 | print(a2.size(), size2.size())
78 | assert (size1.size(0) == N1)
79 | assert (size2.size(0) == N2)
80 |
81 | distance_mat = distance_function(a1, a2).detach().cpu().numpy()
82 | return dtw.dtw_batch(a1, a2, size1, size2,
83 | distance_mat,
84 | ignore_diag, symmetric)
85 |
86 |
87 | def get_theta_group_dtw(a, b, x, sa, sb, sx, distance_function, symmetric):
88 | check_dtw_group_validity(a, b, x)
89 |
90 | dxb = get_distance_group_dtw(
91 | x, b, sx, sb, distance_function=distance_function)
92 | dxa = get_distance_group_dtw(x, a, sx, sa, ignore_diag=symmetric,
93 | symmetric=symmetric,
94 | distance_function=distance_function)
95 |
96 | Nx, Na = dxa.size()
97 | Nx, Nb = dxb.size()
98 |
99 | if symmetric:
100 | n_pos = Na * (Na - 1)
101 | max_val = dxb.max().item()
102 | for i in range(Na):
103 | dxa[i, i] = max_val + 1
104 | else:
105 | n_pos = Na * Nx
106 |
107 | dxb = dxb.view(Nx, 1, Nb).expand(Nx, Na, Nb)
108 | dxa = dxa.view(Nx, Na, 1).expand(Nx, Na, Nb)
109 |
110 | sc = (dxa < dxb).sum() + 0.5 * (dxa == dxb).sum()
111 | sc /= (n_pos * Nb)
112 |
113 | return sc.item()
114 |
115 |
116 | def loc_dtw(data, distance_function, symmetric):
117 | coords, group_a, group_b, group_x = data
118 | group_a_data, group_a_size = group_a
119 | group_b_data, group_b_size = group_b
120 | group_x_data, group_x_size = group_x
121 | theta = get_theta_group_dtw(group_a_data,
122 | group_b_data,
123 | group_x_data,
124 | group_a_size,
125 | group_b_size,
126 | group_x_size,
127 | distance_function,
128 | symmetric)
129 |
130 | return (coords, 1 - theta)
131 |
132 |
133 | def get_abx_scores_dtw_on_group(group_iterator,
134 | distance_function,
135 | symmetric):
136 | data_list = []
137 | coords_list = []
138 | bar = progressbar.ProgressBar(prefix=' > ', maxval=len(group_iterator))
139 | bar.start()
140 |
141 | with torch.no_grad():
142 | for index, group in enumerate(group_iterator):
143 | bar.update(index)
144 | coords, abx = loc_dtw(group, distance_function, symmetric)
145 | data_list.append(abx)
146 | coords_list.append(coords)
147 | bar.finish()
148 |
149 | return torch.sparse.FloatTensor(torch.LongTensor(coords_list).t(),
150 | torch.FloatTensor(data_list),
151 | group_iterator.get_board_size())
152 |
--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/ABX_src/abx_iterators.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | import torch
3 | import progressbar
4 | import math
5 | import random
6 |
7 |
8 | def normalize_with_singularity(x):
9 | r"""
10 | Normalize the given vector across the third dimension.
11 | Extend all vectors by eps=1e-12 to put the null vector at the maximal
12 | cosine distance from any non-null vector.
13 | """
14 | S, H = x.size()
15 | norm_x = (x**2).sum(dim=1, keepdim=True)
16 |
17 | x /= torch.sqrt(norm_x)
18 | zero_vals = (norm_x == 0).view(S)
19 | x[zero_vals] = 1 / math.sqrt(H)
20 | border_vect = torch.zeros((S, 1),
21 | dtype=x.dtype,
22 | device=x.device) + 1e-12
23 | border_vect[zero_vals] = -2*1e12
24 | return torch.cat([x, border_vect], dim=1)
25 |
26 |
27 | def load_item_file(path_item_file):
28 | r""" Load a .item file indicating the triplets for the ABX score. The
29 | input file must have the following fomat:
30 | line 0 : whatever (not read)
31 | line > 0: #file_ID onset offset #phone prev-phone next-phone speaker
32 | onset : begining of the triplet (in s)
33 | onset : end of the triplet (in s)
34 | """
35 | with open(path_item_file, 'r') as file:
36 | data = file.readlines()[1:]
37 |
38 | data = [x.replace('\n', '') for x in data]
39 |
40 | out = {}
41 |
42 | phone_match = {}
43 | speaker_match = {}
44 | context_match = {}
45 |
46 | for line in data:
47 | items = line.split()
48 | assert(len(items) == 7)
49 | fileID = items[0]
50 | if fileID not in out:
51 | out[fileID] = []
52 |
53 | onset, offset = float(items[1]), float(items[2])
54 | context = '+'.join([items[4], items[5]])
55 | phone = items[3]
56 | speaker = items[6]
57 |
58 | if phone not in phone_match:
59 | s = len(phone_match)
60 | phone_match[phone] = s
61 | phone_id = phone_match[phone]
62 |
63 | if context not in context_match:
64 | s = len(context_match)
65 | context_match[context] = s
66 | context_id = context_match[context]
67 |
68 | if speaker not in speaker_match:
69 | s = len(speaker_match)
70 | speaker_match[speaker] = s
71 | speaker_id = speaker_match[speaker]
72 |
73 | out[fileID].append([onset, offset, context_id, phone_id, speaker_id])
74 |
75 | return out, context_match, phone_match, speaker_match
76 |
77 |
78 | def get_features_group(in_data, index_order):
79 |
80 | in_index = list(range(len(in_data)))
81 | in_index.sort(key=lambda x: [in_data[x][i] for i in index_order])
82 | out_groups = []
83 | last_values = [in_data[in_index[0]][i] for i in index_order]
84 | i_s = 0
85 | curr_group = [[] for i in index_order]
86 | n_orders = len(index_order) - 1
87 | tmp = [in_data[i] for i in in_index]
88 |
89 | for index, item in enumerate(tmp):
90 | for order_index, order in enumerate(index_order):
91 | if item[order] != last_values[order_index]:
92 | curr_group[-1].append((i_s, index))
93 | for i in range(n_orders, order_index, -1):
94 | curr_group[i-1].append(curr_group[i])
95 | curr_group[i] = []
96 | if order_index == 0:
97 | out_groups += curr_group[0]
98 | curr_group[0] = []
99 | last_values = [item[i] for i in index_order]
100 | i_s = index
101 | break
102 |
103 | if i_s < len(in_data):
104 | curr_group[-1].append((i_s, len(in_data)))
105 | for i in range(n_orders, 0, -1):
106 | curr_group[i-1].append(curr_group[i])
107 | out_groups += curr_group[0]
108 |
109 | return in_index, out_groups
110 |
111 |
112 | class ABXFeatureLoader:
113 |
114 | def __init__(self,
115 | path_item_file,
116 | seqList,
117 | featureMaker,
118 | stepFeature,
119 | normalize):
120 | r"""
121 | Args:
122 | path_item_file (str): path to the .item files containing the ABX
123 | triplets
124 | seqList (list): list of items (fileID, path) where fileID refers to
125 | the file's ID as used in path_item_file, and path
126 | is the actual path to the input audio sequence
127 | featureMaker (function): either a function or a callable object.
128 | Takes a path as input and outputs the
129 | feature sequence corresponding to the
130 | given file.
131 | normalize (bool): if True all input features will be noramlized
132 | across the channels dimension.
133 |
134 | Note:
135 | You can use this dataset with pre-computed features. For example, if
136 | you have a collection of features files in the torch .pt format then
137 | you can just set featureMaker = torch.load.
138 | """
139 |
140 | files_data, self.context_match, self.phone_match, self.speaker_match = \
141 | load_item_file(path_item_file)
142 | self.seqNorm = True
143 | self.stepFeature = stepFeature
144 | self.loadFromFileData(files_data, seqList, featureMaker, normalize)
145 |
146 | def loadFromFileData(self, files_data, seqList, feature_maker, normalize):
147 |
148 | # self.features[i]: index_start, size, context_id, phone_id, speaker_id
149 | self.features = []
150 | self.INDEX_CONTEXT = 2
151 | self.INDEX_PHONE = 3
152 | self.INDEX_SPEAKER = 4
153 | data = []
154 |
155 | totSize = 0
156 |
157 | print(" > Building the input features...")
158 | bar = progressbar.ProgressBar(prefix=' > ', maxval=len(seqList))
159 | bar.start()
160 |
161 | for index, vals in enumerate(seqList):
162 |
163 | fileID, file_path = vals
164 | bar.update(index)
165 | if fileID not in files_data:
166 | continue
167 |
168 | features = feature_maker(file_path)
169 | if normalize:
170 | features = normalize_with_singularity(features)
171 |
172 | features = features.detach().cpu()
173 |
174 | phone_data = files_data[fileID]
175 |
176 | for phone_start, phone_end, context_id, phone_id, speaker_id in phone_data:
177 |
178 | index_start = max(
179 | 0, int(math.ceil(self.stepFeature * phone_start - 0.5)))
180 | index_end = min(features.size(0),
181 | int(math.floor(self.stepFeature * phone_end - 0.5)))
182 |
183 | if index_start >= features.size(0) or index_end <= index_start:
184 | continue
185 |
186 | loc_size = index_end - index_start
187 | self.features.append([totSize, loc_size, context_id,
188 | phone_id, speaker_id])
189 | data.append(features[index_start:index_end])
190 | totSize += loc_size
191 |
192 | bar.finish()
193 |
194 | self.data = torch.cat(data, dim=0)
195 | self.feature_dim = self.data.size(1)
196 |
197 | def get_data_device(self):
198 | return self.data.device
199 |
200 | def cuda(self):
201 | self.data = self.data.cuda()
202 |
203 | def cpu(self):
204 | self.data = self.data.cpu()
205 |
206 | def get_max_group_size(self, i_group, i_sub_group):
207 | id_start, id_end = self.group_index[i_group][i_sub_group]
208 | return max([self.features[i][1] for i in range(id_start, id_end)])
209 |
210 | def get_ids(self, index):
211 | context_id, phone_id, speaker_id = self.features[index][2:]
212 | return context_id, phone_id, speaker_id
213 |
214 | def __getitem__(self, index):
215 | i_data, out_size, context_id, phone_id, speaker_id = self.features[index]
216 | return self.data[i_data:(i_data + out_size)], out_size, (context_id, phone_id, speaker_id)
217 |
218 | def __len__(self):
219 | return len(self.features)
220 |
221 | def get_n_speakers(self):
222 | return len(self.speaker_match)
223 |
224 | def get_n_context(self):
225 | return len(self.context_match)
226 |
227 | def get_n_phone(self):
228 | return len(self.phone_match)
229 |
230 | def get_n_groups(self):
231 | return len(self.group_index)
232 |
233 | def get_n_sub_group(self, index_sub_group):
234 | return len(self.group_index[index_sub_group])
235 |
236 | def get_iterator(self, mode, max_size_group):
237 | if mode == 'within':
238 | return ABXWithinGroupIterator(self, max_size_group)
239 | if mode == 'across':
240 | return ABXAcrossGroupIterator(self, max_size_group)
241 | raise ValueError(f"Invalid mode: {mode}")
242 |
243 |
244 | class ABXIterator:
245 | r"""
246 | Base class building ABX's triplets.
247 | """
248 |
249 | def __init__(self, abxDataset, max_size_group):
250 | self.max_size_group = max_size_group
251 | self.dataset = abxDataset
252 | self.len = 0
253 |
254 | self.index_csp, self.groups_csp = \
255 | get_features_group(abxDataset.features,
256 | [abxDataset.INDEX_CONTEXT,
257 | abxDataset.INDEX_SPEAKER,
258 | abxDataset.INDEX_PHONE])
259 |
260 | def get_group(self, i_start, i_end):
261 | data = []
262 | max_size = 0
263 | to_take = list(range(i_start, i_end))
264 | if i_end - i_start > self.max_size_group:
265 | to_take = random.sample(to_take, k=self.max_size_group)
266 | for i in to_take:
267 | loc_data, loc_size, loc_id = self.dataset[self.index_csp[i]]
268 | max_size = max(loc_size, max_size)
269 | data.append(loc_data)
270 |
271 | N = len(to_take)
272 | out_data = torch.zeros(N, max_size,
273 | self.dataset.feature_dim,
274 | device=self.dataset.get_data_device())
275 | out_size = torch.zeros(N, dtype=torch.long,
276 | device=self.dataset.get_data_device())
277 |
278 | for i in range(N):
279 | size = data[i].size(0)
280 | out_data[i, :size] = data[i]
281 | out_size[i] = size
282 |
283 | return out_data, out_size, loc_id
284 |
285 | def __len__(self):
286 | return self.len
287 |
288 | def get_board_size(self):
289 | r"""
290 | Get the output dimension of the triplet's space.
291 | """
292 | pass
293 |
294 |
295 | class ABXWithinGroupIterator(ABXIterator):
296 | r"""
297 | Iterator giving the triplets for the ABX within score.
298 | """
299 |
300 | def __init__(self, abxDataset, max_size_group):
301 |
302 | super(ABXWithinGroupIterator, self).__init__(abxDataset,
303 | max_size_group)
304 | self.symmetric = True
305 |
306 | for context_group in self.groups_csp:
307 | for speaker_group in context_group:
308 | if len(speaker_group) > 1:
309 | for i_start, i_end in speaker_group:
310 | if i_end - i_start > 1:
311 | self.len += (len(speaker_group) - 1)
312 |
313 | def __iter__(self):
314 | for i_c, context_group in enumerate(self.groups_csp):
315 | for i_s, speaker_group in enumerate(context_group):
316 | n_phones = len(speaker_group)
317 | if n_phones == 1:
318 | continue
319 |
320 | for i_a in range(n_phones):
321 | i_start_a, i_end_a = self.groups_csp[i_c][i_s][i_a]
322 | if i_end_a - i_start_a == 1:
323 | continue
324 |
325 | for i_b in range(n_phones):
326 | if i_b == i_a:
327 | continue
328 |
329 | i_start_b, i_end_b = self.groups_csp[i_c][i_s][i_b]
330 | data_b, size_b, id_b = self.get_group(i_start_b,
331 | i_end_b)
332 | data_a, size_a, id_a = self.get_group(i_start_a,
333 | i_end_a)
334 |
335 | out_coords = id_a[2], id_a[1], id_b[1], id_a[0]
336 | yield out_coords, (data_a, size_a), (data_b, size_b), \
337 | (data_a, size_a)
338 |
339 | def get_board_size(self):
340 |
341 | return (self.dataset.get_n_speakers(),
342 | self.dataset.get_n_phone(),
343 | self.dataset.get_n_phone(),
344 | self.dataset.get_n_context())
345 |
346 |
347 | class ABXAcrossGroupIterator(ABXIterator):
348 | r"""
349 | Iterator giving the triplets for the ABX across score.
350 | """
351 |
352 | def __init__(self, abxDataset, max_size_group):
353 |
354 | super(ABXAcrossGroupIterator, self).__init__(abxDataset,
355 | max_size_group)
356 | self.symmetric = False
357 | self.get_speakers_from_cp = {}
358 | self.max_x = 5
359 |
360 | for context_group in self.groups_csp:
361 | for speaker_group in context_group:
362 | for i_start, i_end in speaker_group:
363 | c_id, p_id, s_id = self.dataset.get_ids(
364 | self.index_csp[i_start])
365 | if c_id not in self.get_speakers_from_cp:
366 | self.get_speakers_from_cp[c_id] = {}
367 | if p_id not in self.get_speakers_from_cp[c_id]:
368 | self.get_speakers_from_cp[c_id][p_id] = {}
369 | self.get_speakers_from_cp[c_id][p_id][s_id] = (
370 | i_start, i_end)
371 |
372 | for context_group in self.groups_csp:
373 | for speaker_group in context_group:
374 | if len(speaker_group) > 1:
375 | for i_start, i_end in speaker_group:
376 | c_id, p_id, s_id = self.dataset.get_ids(
377 | self.index_csp[i_start])
378 | self.len += (len(speaker_group) - 1) * (min(self.max_x,
379 | len(self.get_speakers_from_cp[c_id][p_id]) - 1))
380 |
381 | def get_other_speakers_in_group(self, i_start_group):
382 | c_id, p_id, s_id = self.dataset.get_ids(self.index_csp[i_start_group])
383 | return [v for k, v in self.get_speakers_from_cp[c_id][p_id].items() if k != s_id]
384 |
385 | def get_abx_triplet(self, i_a, i_b, i_x):
386 | i_start_a, i_end_a = i_a
387 | data_a, size_a, id_a = self.get_group(i_start_a, i_end_a)
388 |
389 | i_start_b, i_end_b = i_b
390 | data_b, size_b, id_b = self.get_group(i_start_b, i_end_b)
391 |
392 | i_start_x, i_end_x = i_x
393 | data_x, size_x, id_x = self.get_group(i_start_x, i_end_x)
394 |
395 | out_coords = id_a[2], id_a[1], id_b[1], id_a[0], id_x[2]
396 | return out_coords, (data_a, size_a), (data_b, size_b), \
397 | (data_x, size_x)
398 |
399 | def __iter__(self):
400 | for i_c, context_group in enumerate(self.groups_csp):
401 | for i_s, speaker_group in enumerate(context_group):
402 | n_phones = len(speaker_group)
403 | if n_phones == 1:
404 | continue
405 |
406 | for i_a in range(n_phones):
407 | i_start_a, i_end_a = self.groups_csp[i_c][i_s][i_a]
408 | ref = self.get_other_speakers_in_group(i_start_a)
409 | if len(ref) > self.max_x:
410 | speakers_a = random.sample(ref, k=self.max_x)
411 | else:
412 | speakers_a = ref
413 |
414 | for i_start_x, i_end_x in speakers_a:
415 |
416 | for i_b in range(n_phones):
417 | if i_b == i_a:
418 | continue
419 |
420 | i_start_b, i_end_b = self.groups_csp[i_c][i_s][i_b]
421 | yield self.get_abx_triplet((i_start_a, i_end_a), (i_start_b, i_end_b), (i_start_x, i_end_x))
422 |
423 | def get_board_size(self):
424 |
425 | return (self.dataset.get_n_speakers(),
426 | self.dataset.get_n_phone(),
427 | self.dataset.get_n_phone(),
428 | self.dataset.get_n_context(),
429 | self.dataset.get_n_speakers())
430 |
--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/ABX_src/dtw.pyx:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | import torch
3 | import numpy as np
4 | cimport numpy as np
5 | cimport cython
6 | from cpython cimport bool
7 | ctypedef np.float32_t CTYPE_t # cost type
8 | ctypedef np.intp_t IND_t # array index type
9 | CTYPE = np.float32 # cost type
10 |
11 |
12 |
13 | def dtw_batch(x,y, sx, sy, dist_mat, ignore_diag=False, symetric=False):
14 |
15 | Nx = dist_mat.shape[0]
16 | Ny = dist_mat.shape[1]
17 |
18 | out = torch.zeros((Nx, Ny))
19 |
20 | for i in range(Nx):
21 | start_index = i if symetric else 0
22 | i_sx = sx[i]
23 | for j in range(start_index, Ny):
24 |
25 | j_sy = sy[j]
26 | if ignore_diag and i == j:
27 | continue
28 | distance = _dtw(i_sx, j_sy, dist_mat[i,j,:i_sx,:j_sy],True)
29 | out[i][j] = distance
30 | if symetric and i != j:
31 | out[j][i] = out[i][j]
32 |
33 | return out
34 |
35 |
36 |
37 | cpdef _dtw(IND_t N, IND_t M, CTYPE_t[:,:] dist_array, bool normalized):
38 | cdef IND_t i, j
39 | cdef CTYPE_t[:,:] cost = np.empty((N, M), dtype=CTYPE)
40 | cdef CTYPE_t final_cost, c_diag, c_left, c_up
41 | # initialization
42 | cost[0,0] = dist_array[0,0]
43 | for i in range(1,N):
44 | cost[i,0] = dist_array[i,0] + cost[i-1,0]
45 | for j in range(1,M):
46 | cost[0,j] = dist_array[0,j] + cost[0,j-1]
47 | # the dynamic programming loop
48 | for i in range(1,N):
49 | for j in range(1,M):
50 | cost[i,j] = dist_array[i,j] + min(cost[i-1,j], cost[i-1,j-1], cost[i,j-1])
51 |
52 | final_cost = cost[N-1, M-1]
53 | if normalized:
54 | path_len = 1
55 | i = N-1
56 | j = M-1
57 | while i > 0 and j > 0:
58 | c_up = cost[i - 1, j]
59 | c_left = cost[i, j-1]
60 | c_diag = cost[i-1, j-1]
61 | if c_diag <= c_left and c_diag <= c_up:
62 | i -= 1
63 | j -= 1
64 | elif c_left <= c_up:
65 | j -= 1
66 | else:
67 | i -= 1
68 | path_len += 1
69 | if i == 0:
70 | path_len += j
71 | if j == 0:
72 | path_len += i
73 | final_cost /= path_len
74 | return final_cost
75 |
--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/CPC_loader.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | import argparse
3 | import torch
4 | import torchaudio
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 |
9 | def download_state_dict(model_name):
10 |
11 | base_url = "https://dl.fbaipublicfiles.com/librilight/CPC_checkpoints"
12 | return torch.hub.load_state_dict_from_url(f"{base_url}/{model_name}")
13 |
14 |
15 | def load_cpc_features(state_dict):
16 |
17 | config = state_dict["config"]
18 | weights = state_dict["weights"]
19 | encoder = CPCEncoder(config["hiddenEncoder"])
20 | ar_net = CPCAR(config["hiddenEncoder"], config["hiddenGar"], False,
21 | config["nLevelsGRU"])
22 |
23 | model = CPCModel(encoder, ar_net)
24 | model.load_state_dict(weights, strict=False)
25 | output = FeatureModule(model, False)
26 | output.config = config
27 | return output
28 |
29 |
30 | def get_features_state_dict(feature_module):
31 | config = feature_module.config
32 | if config is None:
33 | raise ValueError("The input feature_module should have config defined")
34 | weights = feature_module.model.state_dict()
35 | return {"config": config, "weights": weights}
36 |
37 |
38 | def build_feature_from_file(file_path, feature_maker, max_size_seq=64000):
39 | r"""
40 | Apply the featureMaker to the given file.
41 | Arguments:
42 | - file_path (FeatureModule): model to apply
43 | - file_path (string): path of the sequence to load
44 | - seq_norm (bool): if True, normalize the output along the time
45 | dimension to get chunks of mean zero and var 1
46 | - max_size_seq (int): maximal size of a chunk
47 | Return:
48 | a torch vector of size 1 x Seq_size x Feature_dim
49 | """
50 | seq = torchaudio.load(file_path)[0]
51 | sizeSeq = seq.size(1)
52 | start = 0
53 | out = []
54 | while start < sizeSeq:
55 | if start + max_size_seq > sizeSeq:
56 | break
57 | end = min(sizeSeq, start + max_size_seq)
58 | subseq = (seq[:, start:end]).view(1, 1, -1).cuda(device=0)
59 | with torch.no_grad():
60 | features = feature_maker(subseq)
61 | out.append(features.detach().cpu())
62 | start += max_size_seq
63 |
64 | if start < sizeSeq:
65 | subseq = (seq[:, -max_size_seq:]).view(1, 1, -1).cuda(device=0)
66 | with torch.no_grad():
67 | features = feature_maker(subseq)
68 | df = subseq.size(2) // features.size(1)
69 | delta = (sizeSeq - start) // df
70 | out.append(features[:, -delta:].detach().cpu())
71 |
72 | out = torch.cat(out, dim=1)
73 | return out.view(out.size(1), out.size(2))
74 |
75 | ##############################################################################
76 | # Minimal code to load a CPC checkpoint
77 | ##############################################################################
78 |
79 |
80 | class ChannelNorm(nn.Module):
81 |
82 | def __init__(self,
83 | numFeatures,
84 | epsilon=1e-05,
85 | affine=True):
86 |
87 | super(ChannelNorm, self).__init__()
88 | if affine:
89 | self.weight = nn.parameter.Parameter(
90 | torch.Tensor(1, numFeatures, 1))
91 | self.bias = nn.parameter.Parameter(torch.Tensor(1, numFeatures, 1))
92 | else:
93 | self.weight = None
94 | self.bias = None
95 | self.epsilon = epsilon
96 | self.p = 0
97 | self.affine = affine
98 | self.reset_parameters()
99 |
100 | def reset_parameters(self):
101 | if self.affine:
102 | torch.nn.init.ones_(self.weight)
103 | torch.nn.init.zeros_(self.bias)
104 |
105 | def forward(self, x):
106 |
107 | cumMean = x.mean(dim=1, keepdim=True)
108 | cumVar = x.var(dim=1, keepdim=True)
109 | x = (x - cumMean)*torch.rsqrt(cumVar + self.epsilon)
110 |
111 | if self.weight is not None:
112 | x = x * self.weight + self.bias
113 | return x
114 |
115 |
116 | class CPCEncoder(nn.Module):
117 |
118 | def __init__(self,
119 | sizeHidden=512):
120 |
121 | super(CPCEncoder, self).__init__()
122 | normLayer = ChannelNorm
123 |
124 | self.conv0 = nn.Conv1d(1, sizeHidden, 10, stride=5, padding=3)
125 | self.batchNorm0 = normLayer(sizeHidden)
126 | self.conv1 = nn.Conv1d(sizeHidden, sizeHidden, 8, stride=4, padding=2)
127 | self.batchNorm1 = normLayer(sizeHidden)
128 | self.conv2 = nn.Conv1d(sizeHidden, sizeHidden, 4,
129 | stride=2, padding=1)
130 | self.batchNorm2 = normLayer(sizeHidden)
131 | self.conv3 = nn.Conv1d(sizeHidden, sizeHidden, 4, stride=2, padding=1)
132 | self.batchNorm3 = normLayer(sizeHidden)
133 | self.conv4 = nn.Conv1d(sizeHidden, sizeHidden, 4, stride=2, padding=1)
134 | self.batchNorm4 = normLayer(sizeHidden)
135 | self.DOWNSAMPLING = 160
136 |
137 | def getDimOutput(self):
138 | return self.conv4.out_channels
139 |
140 | def forward(self, x):
141 | x = F.relu(self.batchNorm0(self.conv0(x)))
142 | x = F.relu(self.batchNorm1(self.conv1(x)))
143 | x = F.relu(self.batchNorm2(self.conv2(x)))
144 | x = F.relu(self.batchNorm3(self.conv3(x)))
145 | x = F.relu(self.batchNorm4(self.conv4(x)))
146 | return x
147 |
148 |
149 | class CPCAR(nn.Module):
150 |
151 | def __init__(self,
152 | dimEncoded,
153 | dimOutput,
154 | keepHidden,
155 | nLevelsGRU):
156 |
157 | super(CPCAR, self).__init__()
158 | self.baseNet = nn.LSTM(dimEncoded, dimOutput,
159 | num_layers=nLevelsGRU, batch_first=True)
160 | self.hidden = None
161 | self.keepHidden = keepHidden
162 |
163 | def getDimOutput(self):
164 | return self.baseNet.hidden_size
165 |
166 | def forward(self, x):
167 |
168 | try:
169 | self.baseNet.flatten_parameters()
170 | except RuntimeError:
171 | pass
172 | x, h = self.baseNet(x, self.hidden)
173 | if self.keepHidden:
174 | if isinstance(h, tuple):
175 | self.hidden = tuple(x.detach() for x in h)
176 | else:
177 | self.hidden = h.detach()
178 | return x
179 |
180 |
181 | class CPCModel(nn.Module):
182 |
183 | def __init__(self,
184 | encoder,
185 | AR):
186 |
187 | super(CPCModel, self).__init__()
188 | self.gEncoder = encoder
189 | self.gAR = AR
190 |
191 | def forward(self, batchData, label):
192 | encodedData = self.gEncoder(batchData).permute(0, 2, 1)
193 | cFeature = self.gAR(encodedData)
194 | return cFeature, encodedData, label
195 |
196 |
197 | class FeatureModule(torch.nn.Module):
198 | r"""
199 | A simpler interface to handle CPC models. Useful for a smooth workflow when
200 | working with CPC trained features.
201 | """
202 |
203 | def __init__(self, featureMaker, get_encoded,
204 | seq_norm=True):
205 | super(FeatureModule, self).__init__()
206 | self.get_encoded = get_encoded
207 | self.model = featureMaker
208 | self.seq_norm = seq_norm
209 | self.config = None
210 |
211 | def forward(self, batch_data):
212 | # Input Size : BatchSize x 1 x SeqSize
213 | # Feature size: BatchSize x SeqSize x ChannelSize
214 | if self.is_cuda:
215 | batch_data = batch_data.cuda()
216 | cFeature, encoded, _ = self.model(batch_data, None)
217 | if self.get_encoded:
218 | cFeature = encoded
219 | if self.seq_norm:
220 | mean = cFeature.mean(dim=1, keepdim=True)
221 | var = cFeature.var(dim=1, keepdim=True)
222 | cFeature = (cFeature - mean) / torch.sqrt(var + 1e-08)
223 | return cFeature
224 |
225 | def cuda(self):
226 | self.is_cuda = True
227 | super(FeatureModule, self).cuda()
228 |
229 | def cpu(self):
230 | self.is_cuda = False
231 | super(FeatureModule, self).cuda()
232 |
233 | def get_output_dim(self):
234 | if self.get_encoded:
235 | return self.config["hiddenEncoder"]
236 | return self.config["hiddenGar"]
237 |
238 |
239 | if __name__ == "__main__":
240 |
241 | parser = argparse.ArgumentParser(description='Download model')
242 | parser.add_argument('model_name', type=str,
243 | choices=["600h", "6kh", "60kh"])
244 | parser.add_argument('output', type=str)
245 | args = parser.parse_args()
246 |
247 | CPC_MODELS_NAMES = {"60kh": "60k_epoch4-d0f474de.pt",
248 | "600h": "600h-bdd7ced6.pt",
249 | "6kh":"6k_epoch30-9df0493c.pt"}
250 | state_dict = download_state_dict(CPC_MODELS_NAMES[args.model_name])
251 | torch.save(state_dict, args.output)
252 |
--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/LICENCE.txt:
--------------------------------------------------------------------------------
1 | The libri_light_eval module original can be found at https://github.com/facebookresearch/libri-light/tree/master/eval
2 |
3 | This module is licenced under the MIT licence all credit goes to the original creators.
4 |
5 | MIT License
6 |
7 | Copyright (c) Facebook, Inc. and its affiliates.
8 |
9 | Permission is hereby granted, free of charge, to any person obtaining a copy
10 | of this software and associated documentation files (the "Software"), to deal
11 | in the Software without restriction, including without limitation the rights
12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | copies of the Software, and to permit persons to whom the Software is
14 | furnished to do so, subject to the following conditions:
15 |
16 | The above copyright notice and this permission notice shall be included in all
17 | copies or substantial portions of the Software.
18 |
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.
--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/README.md:
--------------------------------------------------------------------------------
1 | # Note from ZR2021 organizers
2 |
3 | This is a modified version of the libri-light evaluation cde available at
4 | https://github.com/facebookresearch/libri-light/tree/master/eval.
5 |
6 | Modification only consists in files renaming and package reorganization to
7 | facilitate installation and integration with the zerospeech2021 package.
8 |
9 |
10 | # Eval
11 |
12 | You will find here all relevant evaluation launched on the LibriLight-dataset.
13 |
14 | ## ABX
15 |
16 | ABX is an evaluation metric for unsupervised representation learning. It evaluates feature files based on its ability to distinguish sounds like /i/ and /e/ as in "bit" versus "bet".
17 |
18 | ### Setup
19 |
20 | To setup the ABX evaluation script you need to:
21 |
22 | 1. compile the cython code. Just do:
23 |
24 | ```console
25 | cd ABX_src
26 | python setup.py build_ext --inplace
27 | ```
28 |
29 | 2. Check that everything works properly with:
30 | ```console
31 | cd ABX_src
32 | nosetests -d
33 | ```
34 |
35 | 3. Download the Librilight `.item` files here: [ABX_data.tgz](https://dl.fbaipublicfiles.com/librilight/data/ABX_data.tgz).
36 |
37 | This archive contains four `.item` files constructed from the Librispeech dev and test set: `dev-clean.item`, `dev-other.item`, `test-clean.item`, and `test-other.item`, which provide the labels for the ABX evaluation.
38 |
39 | ### How to run the ABX evaluation ?
40 |
41 | Dump your features in .pt (torch), .npz or .npy (numpy) format somewhere. Your features dataset should look like this:
42 |
43 | ```console
44 | \data_dir
45 | file_name_0.extension
46 | file_name_1.extension
47 | ...
48 | ```
49 |
50 | Each file should contain a 2D-vector of shape Sequence_size x Feature_dimension.
51 |
52 | Then run:
53 | ```console
54 | python eval_ABX.py $PATH_FEATURE_DIR $PATH_TO_ABX_ITEMS/$DB_NAME.item --file_extension $EXTENSION --out $OUTPUT_DIR --feature_size $FEATURE_SIZE
55 | ```
56 |
57 | Where `$DB_NAME` is one of the 4 evaluation datasets (`dev-clean`, `dev-other`, `test-clean`, `test-other`) and `$FEATURE_SIZE` is the duration (in s) of one feature of the model (for a `10ms` frame rate, this would be `0.01`).
58 |
59 |
60 | ## Pre-computed checkpoints
61 |
62 | Some pre-computed model trained with CPC are available for use ! In order to load a model just use CPC_loader.py, for example to retrieve the model trained on the 60k hours dataset:
63 |
64 | ```console
65 | python CPC_loader.py 60k $PATH_OUTPUT_CHECKPOINT
66 | ```
67 |
68 | You can directly evaluate the ABX score on this checkpoint by running:
69 | ```console
70 | python eval_ABX.py $PATH_AUDIO_DIR ABX_data/$DB_NAME.item --file_extension $EXTENSION --out $OUTPUT_DIR --path_checkpoint $PATH_OUTPUT_CHECKPOINT
71 | ```
72 |
73 | Where $EXTENSION corresponds to an audio foramt (.wav, .flac ...)
74 |
75 | ## Linear Classification PER
76 |
77 | Representations can also be evaluated by how easy it is to train a linear phoneme classifier.
78 |
79 | ### Setup
80 |
81 | To setup the PER evaluation script you need to compile the cython code it relies on. Just do:
82 | ```console
83 | cd PER_src
84 | python setup.py build_ext --inplace
85 | ```
86 |
87 | You will also need to download the [10h labelled data](https://dl.fbaipublicfiles.com/librilight/data/librispeech_finetuning.tgz).
88 |
89 | ### How to run the PER evaluation ?
90 |
91 | First you need to train a linear classifier on your features. For example, if you want to evaluate a model fine-tuned on the 10h dataset, just run:
92 | ```console
93 | python eval_PER.py train $PATH_TO_10h_AUDIO_DATA_DIR $PATH_TO_10h_PHONE_DATA $PATH_TO_THE_JSON_PHONE_CONVERTER $PATH_TO_THE_CPC_MODEL -o $PATH_OUT
94 | ```
95 |
96 | Then you can run the PER computation, for example on librispeech100/test-clean:
97 | ```console
98 | python eval_PER.py per $PATH_OUT/checkpoint.pt $PATH_TO_TEST_CLEAN $PATH_TO_TEST_CLEAN_PHONES --file_extension .flac
99 | ```
100 |
101 |
102 | ## WER
103 |
104 | We provide here a test of representations based on word error rate.
105 |
106 | ### Setup
107 | * wav2letter python bindings: [(how-to)](https://github.com/facebookresearch/wav2letter/tree/master/bindings/python).
108 | * KenLM-based Librispeech language model, can be found [here](http://www.openslr.org/11/) or downloaded [here](https://dl.fbaipublicfiles.com/librilight/data/4-gram.bin); it should be placed into `WER_data/`.
109 | * lexicon, [download](https://dl.fbaipublicfiles.com/librilight/data/lexicon.txt.gz); it should be placed into `WER_data/`.
110 | * jiwer, installable via `pip install jiwer`.
111 |
112 | ### How to run the WER evaluation?
113 |
114 | Training a letter classifier on top of a pre-trained CPC model:
115 | ```console
116 | python eval_WER.py --path_train=$PATH_FINETUNING --path_val=$PATH_TO_DEV_CLEAN --path_checkpoint=$PATH_OUT/checkpoint.pt --lr=1e-3 --n_epochs=50 --p_dropout=0.1 --output=$OUTPUT_DIR
117 |
118 | ```
119 | Evaluating it with wav2letter decoder:
120 | ```console
121 | python eval_WER.py --path_checkpoint=$PATH_OUT/checkpoint.pt --lr=1e-3 --n_epochs=50 --p_dropout=0.1 --output=$OUTPUT_DIR --path_wer=$PATH_TO_TEST_CLEAN
122 | ```
123 |
124 | You can also train and evaluate afterwards, in a single command:
125 | ```console
126 | python eval_WER.py --path_train=$PATH_FINETUNING --path_val=$PATH_TO_DEV_CLEAN --path_checkpoint=$PATH_OUT/checkpoint.pt --lr=1e-3 --n_epochs=50 --p_dropout=0.1 --output=$OUTPUT_DIR --path_wer=$PATH_TO_TEST_CLEAN
127 | ```
128 |
--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/__init__.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | warnings.simplefilter("ignore")
--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/eval_ABX.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | import argparse
3 | import os
4 | from pathlib import Path
5 |
6 | import numpy as np
7 | import torch
8 |
9 | import zerospeech2021.phonetic_eval.ABX_src.abx_group_computation as abx_g
10 | import zerospeech2021.phonetic_eval.ABX_src.abx_iterators as abx_it
11 | from zerospeech2021.phonetic_eval.CPC_loader import load_cpc_features, build_feature_from_file
12 |
13 |
14 | def find_all_files(path_dir, extension):
15 | out = []
16 | for root, dirs, filenames in os.walk(path_dir):
17 | for f in filenames:
18 | if f.endswith(extension):
19 | out.append(((str(Path(f).stem)), os.path.join(root, f)))
20 | return out
21 |
22 |
23 | def reduce_sparse_data(quotient, divisor):
24 | return quotient / (1e-08 * (divisor == 0) + divisor)
25 |
26 |
27 | def load_pt(x):
28 | data = torch.load(x, 'cpu')
29 | assert(len(data.size()) == 2)
30 | return data
31 |
32 |
33 | def load_npy(x):
34 | data = torch.tensor(np.load(x))
35 | assert(len(data.size()) == 2)
36 | return data
37 |
38 |
39 | def load_txt(x):
40 | data = torch.tensor(np.loadtxt(x))
41 | assert (len(data.size()) == 2)
42 | return data
43 |
44 |
45 | def ABX(feature_function,
46 | path_item_file,
47 | seq_list,
48 | distance_mode,
49 | step_feature,
50 | modes,
51 | cuda=False,
52 | max_x_across=5,
53 | max_size_group=30):
54 |
55 | # ABX dataset
56 | ABXDataset = abx_it.ABXFeatureLoader(path_item_file, seq_list,
57 | feature_function, step_feature, True)
58 |
59 | if cuda:
60 | ABXDataset.cuda()
61 |
62 | # Distance function
63 | distance_function = abx_g.get_distance_function_from_name(distance_mode)
64 |
65 | # Output
66 | scores = {}
67 |
68 | # ABX within
69 | if 'within' in modes:
70 | print(" > Computing ABX within speakers...")
71 | ABXIterator = ABXDataset.get_iterator('within', max_size_group)
72 | group_confusion = abx_g.get_abx_scores_dtw_on_group(ABXIterator,
73 | distance_function,
74 | ABXIterator.symmetric)
75 | n_data = group_confusion._values().size(0)
76 | index_ = torch.sparse.LongTensor(group_confusion._indices(),
77 | torch.ones((n_data),
78 | dtype=torch.float),
79 | group_confusion.size())
80 | divisor_context = torch.sparse.sum(index_, dim=3).to_dense()
81 | group_confusion = torch.sparse.sum(group_confusion, dim=3).to_dense()
82 | group_confusion = reduce_sparse_data(group_confusion, divisor_context)
83 | S, p1, p2 = group_confusion.size()
84 |
85 | index_speaker = divisor_context > 0
86 | divisor_speaker = index_speaker.sum(dim=0)
87 | phone_confusion = reduce_sparse_data(group_confusion.sum(dim=0),
88 | divisor_speaker)
89 |
90 | scores['within'] = (phone_confusion.sum() /
91 | (divisor_speaker > 0).sum()).item()
92 | print(f" > ...done. ABX within : {scores['within']}")
93 |
94 | # ABX across
95 | if 'across' in modes:
96 | print(" > Computing ABX across speakers...")
97 | ABXIterator = ABXDataset.get_iterator('across', max_size_group)
98 | ABXIterator.max_x = max_x_across
99 | group_confusion = abx_g.get_abx_scores_dtw_on_group(ABXIterator,
100 | distance_function,
101 | ABXIterator.symmetric)
102 | n_data = group_confusion._values().size(0)
103 | index_ = torch.sparse.LongTensor(group_confusion._indices(),
104 | torch.ones((n_data),
105 | dtype=torch.float),
106 | group_confusion.size())
107 | divisor_context = torch.sparse.sum(index_, dim=[3, 4]).to_dense()
108 | group_confusion = torch.sparse.sum(
109 | group_confusion, dim=[3, 4]).to_dense()
110 | group_confusion = reduce_sparse_data(group_confusion, divisor_context)
111 | S, p1, p2 = group_confusion.size()
112 |
113 | index_speaker = divisor_context > 0
114 | divisor_speaker = index_speaker.sum(dim=0)
115 | phone_confusion = reduce_sparse_data(group_confusion.sum(dim=0),
116 | divisor_speaker)
117 | scores['across'] = (phone_confusion.sum() /
118 | (divisor_speaker > 0).sum()).item()
119 | print(f" > ...done. ABX across : {scores['across']}")
120 |
121 | return scores
122 |
123 |
124 | def parse_args(argv):
125 |
126 | parser = argparse.ArgumentParser(description='ABX metric')
127 |
128 | parser.add_argument('path_data', type=str,
129 | help="Path to directory containing the data")
130 | parser.add_argument('path_item_file', type=str,
131 | help="Path to the .item file")
132 | parser.add_argument('--path_checkpoint', type=str, default=None,
133 | help="Path to a CPC checkpoint. If set, the apply the "
134 | "model to the input data to compute the features")
135 | parser.add_argument('--file_extension', type=str, default='.pt',
136 | choices=['.pt', '.npy', '.wav', '.flac', '.mp3'])
137 | parser.add_argument('--feature_size', type=float, default=0.01,
138 | help="Size (in s) of one feature")
139 | parser.add_argument('--cuda', action='store_true',
140 | help="Use the GPU to compute distances")
141 | parser.add_argument('--mode', type=str, default='all',
142 | choices=['all', 'within', 'across'],
143 | help="Choose the mode of the ABX score to compute")
144 | parser.add_argument('--distance_mode', type=str, default='cosine',
145 | choices=['euclidian', 'cosine', 'kl', 'kl_symmetric'],
146 | help="Choose the kind of distance to use to compute "
147 | "the ABX score.")
148 | parser.add_argument("--max_size_group", type=int, default=10,
149 | help="Max size of a group while computing the"
150 | "ABX score. A small value will make the code "
151 | "faster but less precise.")
152 | parser.add_argument("--max_x_across", type=int, default=5,
153 | help="When computing the ABX across score, maximum"
154 | "number of speaker X to sample per couple A,B. "
155 | " A small value will make the code faster but "
156 | "less precise.")
157 | parser.add_argument("--out", type=str, default=None,
158 | help="Path where the results should be saved")
159 |
160 | # multi-gpu / multi-node
161 | return parser.parse_args(argv)
162 |
163 |
164 | def main(argv=None, arg_obj=None):
165 |
166 | if argv:
167 | args = parse_args(argv)
168 | else:
169 | args = arg_obj
170 |
171 | if args.path_checkpoint is None:
172 | if args.file_extension == '.pt':
173 | feature_function = load_pt
174 | elif args.file_extension == '.npy':
175 | feature_function = load_npy
176 | elif args.file_extension == '.txt':
177 | feature_function = load_txt
178 | else:
179 | state_dict = torch.load(args.path_checkpoint)
180 | feature_maker = load_cpc_features(state_dict)
181 | feature_maker.cuda()
182 | feature_function = lambda x: build_feature_from_file(x, feature_maker)
183 |
184 | # Modes
185 | if args.mode == 'all':
186 | modes = ["within", "across"]
187 | else:
188 | modes = [args.mode]
189 |
190 | step_feature = 1 / args.feature_size
191 |
192 | # Get the list of sequences
193 | seq_list = find_all_files(args.path_data, args.file_extension)
194 |
195 | scores = ABX(feature_function, args.path_item_file,
196 | seq_list, args.distance_mode,
197 | step_feature, modes,
198 | cuda=args.cuda,
199 | max_x_across=args.max_x_across,
200 | max_size_group=args.max_size_group)
201 |
202 | return scores
203 |
--------------------------------------------------------------------------------
/zerospeech2021/semantic.py:
--------------------------------------------------------------------------------
1 | """Semantic part of the ZR2021 (validation and evaluation)"""
2 |
3 | import pathlib
4 |
5 | import numpy as np
6 | import pandas
7 | import scipy.spatial
8 | import scipy.stats
9 | import joblib
10 |
11 | from zerospeech2021.exception import (
12 | MismatchError, FileFormatError, ValidationError, EntryMissingError)
13 |
14 |
15 | def _validate_file(source_file, submission):
16 | """Verifies that a feature file is a 2D numpy array of floats
17 |
18 | :param source_file: input file
19 | :param submission: location of submitted files
20 | :return: a pair (error, ncols)
21 |
22 | """
23 | try:
24 | target_file = submission / (source_file + '.txt')
25 | if not target_file.is_file():
26 | raise EntryMissingError(source=source_file, expected=target_file)
27 |
28 | try:
29 | array = np.loadtxt(str(target_file))
30 | except Exception:
31 | raise FileFormatError(target_file, 'not a valid numpy array')
32 |
33 | if array.dtype != np.dtype('float'):
34 | raise FileFormatError(target_file, "not a float array")
35 |
36 | if array.ndim != 2:
37 | raise FileFormatError(target_file, 'not a 2D array')
38 |
39 | except ValidationError as error:
40 | return str(error), None
41 |
42 | return None, array.shape[1]
43 |
44 |
45 | def validate(submission, dataset, kind, subset, njobs=1):
46 | """Raises a ValidationError if the `submission` is not valid
47 |
48 | The submission folder must include .txt files, each file
49 | containing a matrix of floats. Each .wav file in the dataset must
50 | have its .txt equivalent in the submission directory.
51 |
52 | Parameters
53 | ----------
54 | submisison: path
55 | The submisison directory to validate.
56 | dataset: path
57 | The root path of the ZR2021 dataset.
58 | kind: str
59 | Must be 'dev' or 'test'.
60 | subset: str
61 | Must be 'synthetic' or 'librispeech'
62 | njobs : int
63 | Number of parallel processes to use
64 |
65 | Raises
66 | ------
67 | ValueError
68 | If `kind` is not 'dev' or 'test', if `submisison` or `dataset` are not
69 | an existing directory.
70 | ValidationError
71 | If one line of the submission file is not valid or if the submitted
72 | filenames does not fit the required ones.
73 |
74 | """
75 | if kind not in ('dev', 'test'):
76 | raise ValueError(
77 | f'kind must be "dev" or "test", it is {kind}')
78 |
79 | if subset not in ('librispeech', 'synthetic'):
80 | raise ValueError(
81 | f'subset must be "librispeech" or "synthetic", it is {subset}')
82 |
83 | submission = pathlib.Path(submission) / kind / subset
84 | if not submission.is_dir():
85 | raise ValueError(
86 | f'{kind} submission directory not found: {submission}')
87 |
88 | dataset = pathlib.Path(dataset) / f'semantic/{kind}/{subset}'
89 | if not dataset.is_dir():
90 | raise ValueError(f'dataset not found: {dataset}')
91 |
92 | # retrieve the required filenames that must be present in the submission
93 | required = set(f.stem for f in dataset.glob('*.wav'))
94 | if not required:
95 | raise ValidationError(f'{dataset} contains no .wav files')
96 |
97 | # retrieve the submitted files
98 | submitted = set(submission.glob('*'))
99 | if not submitted:
100 | raise ValidationError(f'{submission} contains no files')
101 |
102 | # ensure we have only .txt files in submission
103 | no_txt_files = [str(f) for f in submitted if f.suffix != '.txt']
104 | if no_txt_files:
105 | raise MismatchError('extra files found', [], no_txt_files)
106 |
107 | # ensure each required file is present in the submission
108 | submitted = set(f.stem for f in submitted)
109 | if submitted != required:
110 | raise MismatchError('files mismatch', required, submitted)
111 |
112 | # ensure each submitted file has a correct format ad the number of columns
113 | # is constant across files
114 | errors, ncols = zip(*joblib.Parallel(n_jobs=njobs)(
115 | joblib.delayed(_validate_file)(f, submission) for f in submitted))
116 |
117 | # ensure there are no detected errors
118 | errors = [e for e in errors if e]
119 | if errors:
120 | for e in errors[:10]:
121 | print(f'ERROR: {e}')
122 | if len(errors) > 10:
123 | print('ERROR: ... and {len(errors - 10)} more!')
124 | raise ValidationError(f'error detected in phonetic {kind}')
125 |
126 | # ensure all submitted files have the same number of columns
127 | if len(set(ncols)) != 1:
128 | raise ValidationError(
129 | f'all files must have the same number of columns '
130 | f'but have: {set(ncols)}')
131 |
132 |
133 | def _compute_distance(pair, gold, pool, metric):
134 | """Returns the mean distance between a pair of words"""
135 | function = {
136 | 'librispeech': _compute_distance_librispeech,
137 | 'synthetic': _compute_distance_synthetic}[pair['type']]
138 |
139 | return function(pair, gold, pool, metric)
140 |
141 |
142 | def _compute_distance_librispeech(pair, gold, pool, metric):
143 | # filter out 'synthetic' data from gold
144 | assert pair['type'] == 'librispeech'
145 | gold = gold[gold['type'] == 'librispeech']
146 |
147 | # get the list of tokens corresponding to the given pair of words
148 | tokens_1 = gold['filename'][gold['word'] == pair['word_1']]
149 | tokens_2 = gold['filename'][gold['word'] == pair['word_2']]
150 | assert 0 < len(tokens_1) <= 10 and 0 < len(tokens_2) <= 10
151 |
152 | X = np.asarray(pool[pool['filename'].isin(tokens_1)]['pooling'].tolist())
153 | Y = np.asarray(pool[pool['filename'].isin(tokens_2)]['pooling'].tolist())
154 |
155 | # compute the mean distance across all pairs of tokens after pooling
156 | return scipy.spatial.distance.cdist(X, Y, metric=metric).mean()
157 |
158 |
159 | def _compute_distance_synthetic(pair, gold, pool, metric):
160 | # filter out 'librispeech' data from gold
161 | assert pair['type'] == 'synthetic'
162 | gold = gold[gold['type'] == 'synthetic']
163 |
164 | # get the list of tokens corresponding to the given pair of words
165 | tokens_1 = gold[['filename', 'voice']][gold['word'] == pair['word_1']]
166 | tokens_2 = gold[['filename', 'voice']][gold['word'] == pair['word_2']]
167 | tokens = tokens_1.merge(tokens_2, on='voice').drop(['voice'], axis=1)
168 |
169 | # compute the mean of distances within a given voice
170 | dist = 0
171 | for _, (filename_x, filename_y) in tokens.iterrows():
172 | X = pool[pool['filename'] == filename_x]['pooling'].item()
173 | Y = pool[pool['filename'] == filename_y]['pooling'].item()
174 | dist += scipy.spatial.distance.cdist(
175 | np.atleast_2d(X), np.atleast_2d(Y), metric=metric)[0][0]
176 | return dist / len(tokens)
177 |
178 |
179 | def _correlation(df):
180 | # choose 'similarity' or 'relatedness' column (the one with no NaN)
181 | human = df.similarity if df.relatedness.hasnans else df.relatedness
182 | assert not human.hasnans
183 |
184 | # return spearman correlation. Humans score are similarity (high when
185 | # close) so we take the opposite to have a quantity close to a distance
186 | # (low when close)
187 | return 100 * scipy.stats.spearmanr(
188 | - human.to_numpy(), df.score.to_numpy())[0]
189 |
190 |
191 | def _compute_correlation(pairs):
192 | """"Returns the Spearman's correlation between human and machine scores"""
193 | # for each (type/dataset) combination, compute spearman correlation
194 | serie = pairs.groupby([pairs['type'], pairs['dataset']]).apply(_correlation)
195 |
196 | # transfrom raw result in a usable dataframe
197 | return serie.to_frame().rename(columns={0: 'correlation'}).reset_index()
198 |
199 |
200 | def evaluate(gold_file, pairs_file, submission_dir, metric, pooling, njobs=1):
201 | """Returns the distance of each words pair and overall correlations
202 |
203 | Parameters
204 | ----------
205 | gold_file : path
206 | The gold file (csv format) for the dev or test semantic dataset.
207 | pairs_file : path
208 | The pairs file (csv format) corresponding to `gold_file` (dev or test).
209 | submission_dir : path
210 | The submission directry containing the embeddings to evaluate.
211 | metric : str
212 | The metric to use for distance computation, must be a metric supported
213 | by `scipy.spatial.distance.cdist` (see
214 | https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html)
215 | pooling : str
216 | The pooling method to use, must be 'min', 'max', 'mean', 'sum', 'last',
217 | 'lastlast' or 'off'.
218 |
219 | Returns
220 | -------
221 | pairs : pandas.DataFrame
222 | The same content as in `pairs_file` with an additional 'score' column
223 | containing the evaluated machine scores for each pair of words.
224 | correlation : pandas.DataFrame
225 | The Spearman correlation between human judgements and machine scores on
226 | each dataset. The frame contains the columns 'type', 'dataset' and
227 | 'correlation'.
228 |
229 | Raises
230 | ------
231 | ValueError
232 | If one of the input parameters is not valid.
233 | OSError
234 | If a file defined in `gold_file` is not found in `submission_dir`.
235 |
236 | """
237 | # ensures input arguments are correct
238 | for input_file in (gold_file, pairs_file):
239 | if not pathlib.Path(input_file).is_file():
240 | raise ValueError(f'file not found: {input_file}')
241 | if not pathlib.Path(submission_dir).is_dir():
242 | raise ValueError(f'{submission_dir} is not a directory')
243 |
244 | # get the pooling function
245 | try:
246 | _pooling_function = {
247 | 'max': lambda x: np.max(x, axis=0),
248 | 'mean': lambda x: np.mean(x, axis=0),
249 | 'min': lambda x: np.min(x, axis=0),
250 | 'sum': lambda x: np.sum(x, axis=0),
251 | 'last': lambda x: x[-1],
252 | 'lastlast': lambda x: x[-2],
253 | 'off': lambda x: x}[pooling]
254 | except KeyError:
255 | raise ValueError(
256 | 'pooling method must be "max", "min", "mean", "sum", '
257 | '"last" or "lastlast"')
258 |
259 | # load the pairs and gold files
260 | pairs = pandas.read_csv(pairs_file, header=0)
261 | gold = pandas.read_csv(gold_file, header=0)
262 |
263 | # a data frame [filename, type, pooling] computed in parallel
264 | print(f' > Computing {pooling} pooling...')
265 | pool = pandas.DataFrame(
266 | joblib.Parallel(n_jobs=njobs)(
267 | joblib.delayed(
268 | lambda x: (x[1], x[0], _pooling_function(
269 | np.loadtxt(submission_dir / x[0] / (x[1] + '.txt')))))
270 | (x) for _, x in gold.iterrows()),
271 | columns=['filename', 'type', 'pooling'])
272 |
273 | print(f' > Computing {metric} distances...')
274 | pairs['score'] = [
275 | _compute_distance(pair, gold, pool, metric)
276 | for _, pair in pairs.iterrows()]
277 |
278 | # compute correlations
279 | print(' > Computing Spearman correlations...')
280 | correlation = _compute_correlation(pairs)
281 | return pairs, correlation
282 |
--------------------------------------------------------------------------------
/zerospeech2021/syntactic.py:
--------------------------------------------------------------------------------
1 | """Syntactic part of the ZR2021 (validation and evaluation)"""
2 |
3 | import collections
4 | import pathlib
5 | import sys
6 |
7 | import pandas
8 | from zerospeech2021.exception import FormatError, MismatchError
9 |
10 |
11 | def _validate_line(index, line):
12 | """Auxiliary function to validate()
13 |
14 | Returns the filename in `line`, checks the score and raises FormatError if
15 | the line is not valid.
16 |
17 | """
18 | # ensure the line has two fields separated by a space
19 | line = line.strip()
20 | fields = line.split(' ')
21 | if len(fields) != 2:
22 | raise FormatError(
23 | index, f'must be " " but is "{line}"')
24 |
25 | filename, score = tuple(fields)
26 |
27 | # ensure the second field is a positive float
28 | try:
29 | float(score)
30 | except ValueError:
31 | raise FormatError(
32 | index, f' must be a float but is "{score}"')
33 |
34 | return filename
35 |
36 |
37 | def validate(submission, dataset, kind):
38 | """Raises a ValidationError if the `submisison` file is not valid
39 |
40 | * The submission file must be in text format, each line as:
41 |
42 |
43 | * The is the name of a wav file in the syntactic dataset,
44 | without path nor extension ("xKtnLJYiWGt", not
45 | "syntactic/dev/xKtnLJYiWGt.wav")
46 |
47 | * The is a positive float
48 |
49 | Parameters
50 | ----------
51 | submisison: path
52 | The submisison file to validate, each line must be formatted as
53 | " ".
54 | dataset: path
55 | The root path of the ZR2021 dataset
56 | kind: str, optional
57 | Must be 'dev' or 'test'
58 |
59 | Raises
60 | ------
61 | ValueError
62 | If `kind` is not 'dev' or 'test', if `submisison` is not a file or if
63 | the dataset is not an existing directory.
64 | ValidationError
65 | If one line of the submisison file is not valid or if the submitted
66 | filenames does not fit the required ones.
67 |
68 | """
69 | if kind not in ('dev', 'test'):
70 | raise ValueError(
71 | f'kind must be "dev" or "test", it is {kind}')
72 |
73 | if not pathlib.Path(submission).is_file():
74 | raise ValueError(
75 | f'{kind} submission file not found: {submission}')
76 |
77 | # retrieve the required filenames that must be present in the submission
78 | dataset = pathlib.Path(dataset) / f'syntactic/{kind}'
79 | if not dataset.is_dir():
80 | raise ValueError(f'dataset not found: {dataset}')
81 | required_files = set(w.stem for w in dataset.glob('*.wav'))
82 |
83 | # ensure each line in the submission is valid and retrieve the filenames
84 | submitted_files = list(
85 | _validate_line(index + 1, line)
86 | for index, line in enumerate(open(submission, 'r')))
87 |
88 | # ensures the is no duplicate in the filenames
89 | duplicates = [
90 | f for f, n in collections.Counter(submitted_files).items() if n > 1]
91 | if duplicates:
92 | raise MismatchError('duplicates found', [], duplicates)
93 |
94 | # ensure all the required files are here and there is no extra filename
95 | if required_files != set(submitted_files):
96 | raise MismatchError(
97 | 'mismatch in filenames', required_files, submitted_files)
98 |
99 |
100 | def load_data(gold_file, submission_file):
101 | """Returns the data required for evaluation as a pandas data frame
102 |
103 | Each line of the returned data frame contains a pair of (correct,
104 | incorrect) sentences and has the following columns: 'id', 'voice', 'type',
105 | 'sentence', 'score sentence', 'non sentence', 'score non sentence'.
106 |
107 | Parameters
108 | ----------
109 | gold_file : path
110 | The gold file for the lexical dataset (test or dev).
111 | submission_file : path
112 | The submission corresponding to the provided gold file.
113 |
114 | Returns
115 | -------
116 | data : pandas.DataFrame
117 | The data ready for evaluation
118 |
119 | Raise
120 | -----
121 | ValueError
122 | If the input files cannot be opened or in case of data mismatch between
123 | the two files.
124 |
125 | """
126 | # ensures the two input files are here
127 | for input_file in (gold_file, submission_file):
128 | if not pathlib.Path(input_file).is_file():
129 | raise ValueError(f'file not found: {input_file}')
130 |
131 | # load them as data frames indexed by filenames
132 | gold = pandas.read_csv(
133 | gold_file, header=0, index_col='filename')
134 | score = pandas.read_csv(
135 | submission_file, sep=' ', header=None,
136 | names=['filename', 'score'], index_col='filename')
137 |
138 | # ensures the filenames in gold and submission are the same
139 | if set(gold.index) != set(score.index):
140 | has_less_files = set(gold.index) - set(score.index)
141 | has_more_files = set(score.index) - set(gold.index)
142 | print("MismatchError:", file=sys.stderr)
143 | if len(has_more_files) > 0:
144 | print('submission has extra files', file=sys.stderr)
145 | print(f'extra files: {has_more_files}', file=sys.stderr)
146 |
147 | if len(has_less_files) > 0:
148 | print('submission is missing files', file=sys.stderr)
149 | print(f'missing files: {has_less_files}:', file=sys.stderr)
150 |
151 | sys.exit(1)
152 |
153 | # merge the gold and score using filenames, then remove the columns
154 | # 'phones' and 'filename' as we don't use them for evaluation
155 | data = pandas.concat([gold, score], axis=1)
156 | data.reset_index(drop=True, inplace=True)
157 |
158 | # going from a word per line to a pair (word, non word) per line
159 | data = pandas.concat([
160 | data.loc[data['correct'] == 1].reset_index().rename(
161 | lambda x: 's_' + x, axis=1),
162 | data.loc[data['correct'] == 0].reset_index().rename(
163 | lambda x: 'ns_' + x, axis=1)], axis=1)
164 | data.drop(
165 | ['s_index', 'ns_index', 'ns_voice', 'ns_type', 'ns_subtype',
166 | 's_correct', 'ns_correct', 'ns_id'],
167 | axis=1, inplace=True)
168 |
169 | data.rename(
170 | {'s_id': 'id',
171 | 's_voice': 'voice',
172 | 's_type': 'type',
173 | 's_subtype': 'subtype',
174 | 's_transcription': 'sentence',
175 | 'ns_transcription': 'non sentence',
176 | 's_score': 'score sentence',
177 | 'ns_score': 'score non sentence'},
178 | axis=1, inplace=True)
179 |
180 | return data
181 |
182 |
183 | def evaluate_by_pair(data):
184 | """Returns a data frame with the scores by (sentence, non sentence) pair
185 |
186 | Parameters
187 | ----------
188 | data : pandas.DataFrame
189 | The result of `load_data`
190 |
191 | Returns
192 | -------
193 | by_pair : pandas.DataFrame
194 | The evaluated (sentence, non sentence) pairs, the data frame has the
195 | columns: 'sentence', 'non sentence' 'type' and 'score'.
196 |
197 | """
198 | # compute the score for each pair in an additional 'score' column, then
199 | # delete the 'score word' and 'score non word' columns that become useless
200 | score = data.loc[:, ['score sentence', 'score non sentence']].to_numpy()
201 | data['score'] = (
202 | 0.5 * (score[:, 0] == score[:, 1])
203 | + (score[:, 0] > score[:, 1]))
204 | data.drop(columns=['score sentence', 'score non sentence'], inplace=True)
205 |
206 | # finally get the mean score across voices for all pairs
207 | score = data.groupby(['type', 'subtype', 'id']).apply(lambda x: (
208 | x.iat[0, 2], # type
209 | x.iat[0, 3], # subtype
210 | x.iat[0, 4], # sentence
211 | x.iat[0, 5], # non sentence
212 | x['score'].mean()))
213 | return pandas.DataFrame(
214 | score.to_list(),
215 | columns=['type', 'subtype', 'sentence', 'non sentence', 'score'])
216 |
217 |
218 | def evaluate_by_type(by_pair):
219 | """Returns a data frame with mean scores by syntax error type
220 |
221 | Parameters
222 | ----------
223 | by_pair: pandas.DataFrame
224 | The output of `evaluate_by_pair`
225 |
226 | Returns
227 | -------
228 | by_type : pandas.DataFrame
229 | The score collapsed on types, the data frame has the
230 | following columns: 'type', 'score'.
231 |
232 | """
233 | return by_pair.score.groupby([by_pair['type']]).agg(
234 | n='count', score='mean', std='std').reset_index()
235 |
236 |
237 | def evaluate(gold_file, submission_file):
238 | """Returns the score by sentences pair and by syntax type
239 |
240 | Parameters
241 | ----------
242 | gold_file : path
243 | The gold file (csv format) for the lexical dataset (test or dev).
244 | submission_file : path
245 | The submission corresponding to the provided gold file.
246 |
247 | Returns
248 | -------
249 | by_pair : pandas.DataFrame
250 | The evaluated pairs, the data frame has the columns:
251 | 'sentence', 'non sentence' and 'score'.
252 | by_type : pandas.DataFrame
253 | The score collapsed on syntax errors types, the data frame has the
254 | following columns: 'type', 'score'.
255 |
256 | Raise
257 | -----
258 | ValueError
259 | If the input files cannot be opened or in case of data mismatch between
260 | the two files.
261 |
262 | """
263 | data = load_data(gold_file, submission_file)
264 | by_pair = evaluate_by_pair(data)
265 | by_type = evaluate_by_type(by_pair)
266 | by_pair.drop(['type', 'subtype'], axis=1, inplace=True)
267 |
268 | return by_pair, by_type
269 |
--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/__init__.py:
--------------------------------------------------------------------------------
1 | from . import api_fn, auth, model, upload, split
2 |
--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/api_fn.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | import requests
4 |
5 | from rich.console import Console
6 | from rich.table import Table
7 |
8 |
9 | from . import model
10 |
11 | console = Console()
12 |
13 |
14 | def view_challenges():
15 | """ Fetches the list of available challenges and allows selecting one."""
16 | response = requests.get(
17 | f"{model.SERVER_LOCATION}/challenges/", params={"include_inactive": "false"})
18 | if response.status_code != 200:
19 | raise ValueError('Request to server Failed !!')
20 |
21 | challenges = response.json()
22 |
23 | table = Table(show_header=True, header_style="bold magenta")
24 | table.add_column("Challenge")
25 | table.add_column("ID")
26 |
27 | for item in challenges:
28 | table.add_row(f"{item.get('label', '')}", f"{item.get('id', 'XX')}")
29 |
30 | console.print(table)
31 |
32 |
33 | def create_multipart_submission(challenge_id: int, file_meta: dict, _token: str):
34 | """ Create a multipart upload submission session on the server via the API."""
35 | data = {
36 | "filename": file_meta["filename"],
37 | "hash": file_meta["hash"],
38 | "multipart": True,
39 | "index": file_meta['index']
40 | }
41 |
42 | return requests.post(
43 | f'{model.SERVER_LOCATION}/challenges/{challenge_id}/submission/create',
44 | json=data,
45 | headers={
46 | 'Authorization': f'Bearer {_token}'
47 | })
48 |
49 |
50 | def create_single_part_submission(challenge_id: int, filename: Path, _hash: str, _token: str):
51 | """ Create a single part submission upload session on the server via the API."""
52 | return requests.post(
53 | f'{model.SERVER_LOCATION}/challenges/{challenge_id}/submission/create',
54 | json={
55 | "filename": f"{filename}",
56 | "hash": _hash,
57 | "multipart": False,
58 | },
59 | headers={
60 | 'Authorization': f'Bearer {_token}'
61 | })
62 |
63 |
64 | def submission_upload(challenge_id: int, submission_id: str, file: Path, _token: str):
65 | """Upload a file (or part) to an existing upload session."""
66 | response = requests.put(
67 | f'{model.SERVER_LOCATION}/challenges/{challenge_id}/submission/upload',
68 | params={
69 | "part_name": file.name,
70 | "submission_id": f"{submission_id}"
71 | },
72 | files={f'file_data': file.open('rb').read()},
73 | headers={
74 | 'Authorization': f'Bearer {_token}'
75 | }
76 | )
77 | return response
78 |
--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/auth.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from pathlib import Path
3 |
4 | import requests
5 | from rich.console import Console
6 |
7 | from . import model
8 |
9 | # Fancy console
10 | console = Console()
11 |
12 |
13 | def login(username: str, password: str):
14 | """ Create an auth session in zerospeech.com
15 |
16 | :returns: token token used to authentify the current session
17 | """
18 |
19 | # request login from server
20 | response = requests.post(
21 | f'{model.SERVER_LOCATION}/auth/login',
22 | data={
23 | "grant_type": "password",
24 | "username": username,
25 | "password": password,
26 | "scopes": [],
27 | "client_id": model.CLIENT_ID,
28 | "client_secret": model.CLIENT_SECRET
29 | }
30 | )
31 | if response.status_code != 200:
32 | console.print(f"[red]:x:{response.status_code}[/red]: {response.json().get('detail')}")
33 | sys.exit(1)
34 |
35 | return response.json().get("access_token")
36 |
37 |
38 | def logout(_token):
39 | """ Clears the given auth session on the back-end """
40 | return requests.delete(
41 | f'{model.SERVER_LOCATION}/auth/logout',
42 | headers={
43 | 'Authorization': f'Bearer {_token}'
44 | })
45 |
46 |
47 | def clear_session():
48 | """ Clear the current session locally and on the server."""
49 | token_file = Path(model.AUTH_FILE).expanduser().resolve()
50 | if token_file.is_file():
51 | with token_file.open() as fp:
52 | token = fp.read().replace("\n", "")
53 |
54 | # clear
55 | token_file.unlink(missing_ok=True)
56 | logout(token)
57 | console.print(f"Session saved @ {token_file} was removed.", style='green bold')
58 |
59 |
60 | def create_session(token: str):
61 | """ Creates an new auth session & saves it locally """
62 | token_file = Path(model.AUTH_FILE).expanduser().resolve()
63 |
64 | with token_file.open('w') as fp:
65 | fp.write(token)
66 |
67 |
68 | def get_session():
69 | """ Get or Create a new auth session """
70 | token_file = Path(model.AUTH_FILE).expanduser().resolve()
71 |
72 | if not token_file.is_file():
73 | console.print(f"No session found use login command to create one.", style='red bold')
74 | sys.exit(1)
75 |
76 | with token_file.open() as fp:
77 | return fp.read().replace("\n", "")
78 |
--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/model.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from dataclasses import dataclass
4 | from pathlib import Path
5 | from typing import Optional, List
6 |
7 | SERVER_LOCATION: str = "https://api.zerospeech.com"
8 | CLIENT_ID: str = "cli_uploader"
9 | CLIENT_SECRET: str = 'TaX9K1WtryizOTr5pLUM4OoqXZE5QGlj3Xo6dkh3CcI='
10 | NB_RETRY_ATTEMPTS: int = 2
11 | MULTIPART_THRESHOLD: int = 500000000 # in bytes (500MB)
12 | AUTH_FILE: str = "~/.zerospeech-token"
13 | CHALLENGE_ID = 7
14 |
15 |
16 | def get_challenge_id():
17 | """ Get the current challenge id from the current environment or return the default. """
18 | return os.environ.get("CHALLENGE_ID", CHALLENGE_ID)
19 |
20 |
21 | class ZrApiException(Exception):
22 | pass
23 |
24 |
25 | @dataclass
26 | class ManifestFileIndexItem:
27 | """ Upload File Manifest Item """
28 | file_name: str
29 | file_size: int
30 | file_hash: Optional[str] = None
31 |
32 | def dict(self):
33 | return {f"{x}": getattr(self, x) for x in self.__dataclass_fields__.keys()}
34 |
35 | @classmethod
36 | def from_dict(cls, data):
37 | return cls(**data)
38 |
39 |
40 | @dataclass
41 | class SplitManifest:
42 | """ A class containing information about archive split"""
43 | filename: str
44 | tmp_location: Path
45 | hash: str
46 | index: Optional[List[ManifestFileIndexItem]]
47 | multipart: bool = True
48 | hashed_parts: bool = True
49 | completed: int = 0
50 |
51 | def dict(self):
52 | data = {f"{x}": f"{getattr(self, x)}" for x in self.__dataclass_fields__.keys()}
53 | if "index" in data.keys():
54 | data["index"] = [
55 | item.dict() for item in self.index
56 | ]
57 |
58 | return data
59 |
60 | @classmethod
61 | def from_dict(cls, data):
62 | if "index" in data.keys():
63 | data["index"] = [
64 | ManifestFileIndexItem.from_dict(item) for item in data["index"]
65 | ]
66 | return cls(**data)
67 |
68 |
69 | class UploadManifest:
70 | """ Fail-safe multi-part upload"""
71 |
72 | @classmethod
73 | def load(cls, filename: Path, retries: int = 2):
74 | with filename.open('r') as fp:
75 | dd = json.load(fp)
76 | return cls(dd["manifest"], filename, metadata=dd["metadata"], retries=retries)
77 |
78 | def __init__(self, list_manifest, save_file: Path, metadata=None, retries: int = 2):
79 | if isinstance(list_manifest, dict):
80 | self.man = list_manifest
81 | else:
82 | self.man = {
83 | f"{name}": 'todo'
84 | for name in list_manifest
85 | }
86 | self.save_file = save_file
87 | self.retries = retries
88 | if metadata:
89 | self._metadata = metadata
90 | else:
91 | self._metadata = {}
92 | self.save()
93 |
94 | def __iter__(self):
95 | return self
96 |
97 | @property
98 | def metadata(self):
99 | return self._metadata
100 |
101 | @metadata.setter
102 | def metadata(self, data):
103 | self._metadata.update(data)
104 | self.save()
105 |
106 | def __next__(self):
107 | for k, v in self.man.items():
108 | if v == 'todo':
109 | return k
110 | for k, v in self.man.items():
111 | if v == 'waiting':
112 | return k
113 | for k, v in self.man.items():
114 | if 'retry' in v:
115 | return k
116 | raise StopIteration
117 |
118 | def status(self, key):
119 | return self.man[key]
120 |
121 | def set_waiting(self, key):
122 | if self.man[key] == 'todo':
123 | self.man[key] = "waiting"
124 | self.save()
125 |
126 | def set_done(self, key):
127 | self.man[key] = "done"
128 | self.save()
129 |
130 | def set_failed(self, key):
131 | k = self.man[key]
132 | if k in ["waiting", "todo"]:
133 | self.man[key] = "retry_1"
134 | elif "retry" in k:
135 | nb = int(k.split('_')[1])
136 | nb += 1
137 | if nb > self.retries:
138 | st = 'failed'
139 | else:
140 | st = f"retry_{nb}"
141 | self.man[key] = st
142 | self.save()
143 |
144 | def save(self):
145 | with self.save_file.open('w') as fp:
146 | json.dump({
147 | "manifest": self.man,
148 | "metadata": self.metadata
149 | }, fp)
150 |
151 | def is_complete(self):
152 | for k, v in self.man.items():
153 | if v != "done":
154 | return False
155 | return True
156 |
157 | def get_failed(self):
158 | return [k for k, v in self.man.items() if v == 'failed']
159 |
160 | def clear(self):
161 | # remove checkpoint file
162 | self.save_file.unlink()
163 |
--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/split.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 | from pathlib import Path
3 | from typing import List
4 |
5 | import pandas as pd
6 | from Crypto.Hash import MD5
7 | from fsplit.filesplit import Filesplit
8 |
9 | from .model import SplitManifest, ManifestFileIndexItem
10 |
11 |
12 | def md5sum(file_path: Path, chunk_size: int = 8192):
13 | """ Return a md5 hash of a files content """
14 | h = MD5.new()
15 |
16 | with file_path.open('rb') as f:
17 | while True:
18 | chunk = f.read(chunk_size)
19 | if len(chunk):
20 | h.update(chunk)
21 | else:
22 | break
23 | return h.hexdigest()
24 |
25 |
26 | def split_zip_v2(zipfile: Path, chunk_max_size: int = 500000000, hash_parts: bool = True):
27 | """..."""
28 | assert zipfile.is_file(), f"entry file ({zipfile}) was not found"
29 | print(f"splitting {zipfile} into chunks...")
30 |
31 | tmp_loc = Path(tempfile.mkdtemp(dir=f"{zipfile.parents[0]}"))
32 | fs = Filesplit()
33 | fs.split(file=f"{zipfile}", split_size=chunk_max_size, output_dir=str(tmp_loc))
34 | df = pd.read_csv(tmp_loc / 'fs_manifest.csv')
35 | if hash_parts:
36 | df['hash'] = df.apply(lambda row: md5sum(
37 | (tmp_loc / row['filename'])), axis=1)
38 | index: List[ManifestFileIndexItem] = [ManifestFileIndexItem(file_name=x[0], file_size=x[1], file_hash=x[2])
39 | for x in zip(df['filename'], df['filesize'], df['hash'])]
40 | else:
41 | index: List[ManifestFileIndexItem] = [ManifestFileIndexItem(file_name=x[0], file_size=x[1])
42 | for x in zip(df['filename'], df['filesize'])]
43 |
44 | return SplitManifest(
45 | filename=zipfile.name,
46 | tmp_location=tmp_loc,
47 | hash=md5sum(zipfile),
48 | index=index,
49 | hashed_parts=hash_parts
50 | )
--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/upload.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import sys
3 | from pathlib import Path
4 |
5 | from rich import inspect, print
6 | from rich.console import Console
7 | from rich.progress import Progress
8 | from rich.prompt import Prompt
9 |
10 | from . import model
11 | from .api_fn import (
12 | create_multipart_submission, submission_upload, create_single_part_submission
13 | )
14 | from .split import split_zip_v2, md5sum
15 |
16 | # Fancy console
17 | console = Console()
18 |
19 |
20 | def multipart_upload(challenge_id: int, zipfile: Path, _token: str, checkpoint: Path):
21 | print("preparing metadata....")
22 |
23 | # check for checkpoint
24 | if checkpoint.is_file():
25 | file_list = model.UploadManifest.load(checkpoint, retries=model.NB_RETRY_ATTEMPTS)
26 | tmp_location = Path(file_list.metadata.get("tmp_location"))
27 | _token = file_list.metadata.get('token')
28 | challenge_id = file_list.metadata.get("challenge_id")
29 | else:
30 | manifest = split_zip_v2(zipfile)
31 | file_list = [i.file_name for i in manifest.index]
32 | tmp_location = manifest.tmp_location
33 | meta = {
34 | "tmp_location": f"{tmp_location}",
35 | "filename": manifest.filename,
36 | "hash": manifest.hash,
37 | "index": [i.dict() for i in manifest.index],
38 | "token": _token,
39 | "challenge_id": challenge_id
40 | }
41 | file_list = model.UploadManifest(file_list, checkpoint, meta, retries=model.NB_RETRY_ATTEMPTS)
42 |
43 | # check if submission session exists
44 | if "submission_id" in file_list.metadata:
45 | submission_id = file_list.metadata.get('submission_id')
46 | else:
47 | response = create_multipart_submission(challenge_id, file_list.metadata, _token)
48 | if response.status_code != 200:
49 | print(f'[red]:x:[/red][bold]Submission Creation Failed with code [red] {response.status_code}[/red][/bold]')
50 | inspect(response.json())
51 | sys.exit(1)
52 |
53 | submission_id = response.text.replace('"', '').replace("'", "")
54 | file_list.metadata = {"submission_id": submission_id}
55 |
56 | with Progress() as progress:
57 | task1 = progress.add_task("[red]Uploading parts...", total=len(file_list.man))
58 |
59 | for item in file_list:
60 | file_list.set_waiting(item)
61 | progress.update(task1, advance=0.5)
62 | file_path = tmp_location / item
63 | print(f'uploading : {file_path.name}...')
64 | response = submission_upload(
65 | challenge_id=challenge_id,
66 | submission_id=submission_id,
67 | file=file_path,
68 | _token=_token
69 | )
70 |
71 | if response.status_code == 200:
72 | print(f'[green]:heavy_check_mark: {file_path}')
73 | file_list.set_done(item)
74 | progress.update(task1, advance=0.5)
75 | else:
76 | progress.update(task1, advance=-0.5)
77 | file_list.set_failed(item)
78 |
79 | if file_list.is_complete():
80 | checkpoint.unlink()
81 | shutil.rmtree(tmp_location)
82 | return []
83 | else:
84 | return file_list.get_failed()
85 |
86 |
87 | def single_part_upload(challenge_id: int, zipfile: Path, _token: str):
88 | zip_hash = md5sum(zipfile)
89 | response = create_single_part_submission(challenge_id, filename=zipfile, _hash=zip_hash, _token=_token)
90 |
91 | if response.status_code != 200:
92 | print(f'[red]:x:[/red][bold]Submission Creation Failed with code [red] {response.status_code}[/red][/bold]')
93 | inspect(response.json())
94 | sys.exit(1)
95 |
96 | submission_id = response.text.replace('"', '').replace("'", "")
97 | print(f'submission id: {submission_id}')
98 | response = submission_upload(
99 | challenge_id=challenge_id,
100 | submission_id=submission_id,
101 | file=zipfile,
102 | _token=_token
103 | )
104 |
105 | if response.status_code != 200:
106 | print(f'[red]:x:[/red][bold]Archive upload failed with code [red] {response.status_code}[/red][/bold]')
107 | print(response.json())
108 | sys.exit(1)
109 |
110 |
111 | def ask_resume(file: Path):
112 | """ Ask the user to resume or not the upload """
113 | choice = "No"
114 | if file.is_file():
115 | choice = Prompt.ask("A checkpoint file was found. Do you wish to resume ?",
116 | choices=["Yes", "No"])
117 | if choice == "No":
118 | file.unlink()
119 |
120 | return choice == "Yes"
121 |
--------------------------------------------------------------------------------