├── .gitignore
├── LICENSE.txt
├── README.md
├── environment.yml
├── setup.py
└── zerospeech2021
    ├── __init__.py
    ├── cli
        ├── __init__.py
        ├── evaluate.py
        ├── leaderboard.py
        ├── upload.py
        └── validate.py
    ├── exception.py
    ├── leaderboard.py
    ├── lexical.py
    ├── meta.py
    ├── phonetic.py
    ├── phonetic_eval
        ├── ABX_src
        │   ├── __init__.py
        │   ├── abx_group_computation.py
        │   ├── abx_iterators.py
        │   ├── dtw.c
        │   └── dtw.pyx
        ├── CPC_loader.py
        ├── LICENCE.txt
        ├── README.md
        ├── __init__.py
        └── eval_ABX.py
    ├── semantic.py
    ├── syntactic.py
    └── zr_upload_lib
        ├── __init__.py
        ├── api_fn.py
        ├── auth.py
        ├── model.py
        ├── split.py
        └── upload.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__/
3 | /zerospeech2021.egg-info/
4 | build/
5 | dist/
6 | .idea/
7 | .DS_Store
8 | *.so
9 | zerospeech2021/libri_light_eval/ABX_src/dtw.c


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     {one line to give the program's name and a brief idea of what it does.}
635 |     Copyright (C) {year}  {name of author}
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     {project}  Copyright (C) {year}  {fullname}
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ZeroSpeech Challenge 2021 Python package
 2 | 
 3 | 
 4 | This repository bundles all the scripts required to evaluate and validate a
 5 | submission to the [ZeroSpeech Challenge 2021](https://zerospeech.com/2021).
 6 | 
 7 | ## Installation
 8 | 
 9 | * First clone this repository
10 | 
11 |         git clone https://github.com/bootphon/zerospeech2021.git
12 |         cd zerospeech2021
13 | 
14 | * Setup a conda environment:
15 | 
16 |         conda env create -f environment.yml
17 | 
18 | * Activate the created environment:
19 | 
20 |         conda activate zerospeech2021
21 | 
22 | * Install the package:
23 | 
24 |         python setup.py install
25 | 
26 | ## Usage
27 | 
28 | The `zerospeech2021` package provides 2 command-line tools:
29 | 
30 | * `zerospeech2021-validate` which validates a submission, ensuring all the
31 |   required files are here and correctly formatted.
32 | 
33 | * `zerospeech2021-evaluate` which evaluates a submission (supposed valid). Only
34 |   the development datasets are evaluated. The test datasets can only be
35 |   evaluated by doing an official submission to the challenge.
36 | 
37 | * `zerospeech2021-leaderboard` which allows generation of leaderboard entries from scores.
38 | 
39 | * ![VERSION](https://img.shields.io/badge/-WIP-red) `zerospeech2021-upload` utility to allow upload submission to zerospeech.com.
40 | 
41 | Each tool comes with a `--help` option describing the possible arguments (e.g.
42 | `zerospeech2021-validate --help`).
43 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: zerospeech2021
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3
 7 |   - click
 8 |   - cudatoolkit=9.2
 9 |   - cython
10 |   - joblib
11 |   - numpy
12 |   - pandas
13 |   - pip
14 |   - pytorch
15 |   - pyyaml
16 |   - scipy
17 |   - torchaudio
18 |   - tqdm
19 |   - pip:
20 |     - progressbar2
21 |     - sox
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Setup script for the zerospeech2021 Python package"""
 3 | 
 4 | import codecs
 5 | import numpy
 6 | import setuptools
 7 | 
 8 | import zerospeech2021
 9 | 
10 | 
11 | setuptools.setup(
12 |     # general description
13 |     name='zerospeech2021',
14 |     description="Evaluation and validation tools for ZeroSpeech2021",
15 |     version=zerospeech2021.__version__,
16 | 
17 |     # python package dependencies
18 |     setup_requires=['cython', 'numpy'],
19 | 
20 |     # include Python code
21 |     packages=setuptools.find_packages(),
22 | 
23 |     # build cython extension
24 |     ext_modules=[setuptools.Extension(
25 |         'libri_light_dtw',
26 |         sources=['zerospeech2021/phonetic_eval/ABX_src/dtw.pyx'],
27 |         extra_compile_args=['-O3'],
28 |         include_dirs=[numpy.get_include()])],
29 | 
30 |     # needed for cython/setuptools, see
31 |     # http://docs.cython.org/en/latest/src/quickstart/build.html
32 |     zip_safe=False,
33 | 
34 |     # the command-line scripts to export
35 |     entry_points={
36 |         'console_scripts': [
37 |             'zerospeech2021-validate        = zerospeech2021.cli.validate:validate',
38 |             'zerospeech2021-evaluate        = zerospeech2021.cli.evaluate:evaluate',
39 |             'zerospeech2021-leaderboard     = zerospeech2021.cli.leaderboard:leaderboard',
40 |             'zerospeech2021-upload          = zerospeech2021.cli.upload:upload_cmd'
41 |         ]},
42 | 
43 |     # metadata
44 |     author='CoML team',
45 |     author_email='zerospeech2021@gmail.com',
46 |     license='GPL3',
47 |     url='https://zerospeech.com/2021',
48 |     long_description=codecs.open('README.md', encoding='utf-8').read(),
49 |     long_description_content_type="text/markdown",
50 |     python_requires='>=3.7',
51 | )
52 | 


--------------------------------------------------------------------------------
/zerospeech2021/__init__.py:
--------------------------------------------------------------------------------
1 | """Evaluation and validation tools for the ZeroSpeech Challenge 2021"""
2 | 
3 | 
4 | __version__ = '0.5'
5 | 


--------------------------------------------------------------------------------
/zerospeech2021/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zerospeech/zerospeech2021/199624adfba52901bab564b076fe7d4a63f47ddb/zerospeech2021/cli/__init__.py


--------------------------------------------------------------------------------
/zerospeech2021/cli/evaluate.py:
--------------------------------------------------------------------------------
  1 | """Evaluation program for ZR2021 submissions"""
  2 | 
  3 | import atexit
  4 | import os
  5 | import pathlib
  6 | import shutil
  7 | import sys
  8 | import tempfile
  9 | import zipfile
 10 | 
 11 | import click
 12 | import pandas
 13 | import yaml
 14 | 
 15 | from zerospeech2021 import phonetic, lexical, syntactic, semantic
 16 | 
 17 | 
 18 | def write_csv(frame, filename):
 19 |     frame.to_csv(filename, index=False, float_format='%.4f')
 20 |     print(f'  > Wrote {filename}')
 21 | 
 22 | 
 23 | def eval_lexical(dataset, submission, output, kinds):
 24 |     for kind in kinds:  # 'dev' or 'test'
 25 |         print(f'Evaluating lexical {kind}...')
 26 | 
 27 |         gold_file = dataset / 'lexical' / kind / 'gold.csv'
 28 |         submission_file = submission / 'lexical' / f'{kind}.txt'
 29 | 
 30 |         by_pair, by_frequency, by_length = lexical.evaluate(
 31 |             gold_file, submission_file)
 32 | 
 33 |         write_csv(
 34 |             by_pair, output / f'score_lexical_{kind}_by_pair.csv')
 35 |         write_csv(
 36 |             by_frequency, output / f'score_lexical_{kind}_by_frequency.csv')
 37 |         write_csv(
 38 |             by_length, output / f'score_lexical_{kind}_by_length.csv')
 39 | 
 40 | 
 41 | def eval_semantic(dataset, submission, output, kinds, njobs):
 42 |     # load metric and poling parameters from meta.yaml
 43 |     meta = yaml.safe_load((submission / 'meta.yaml').open('r').read())
 44 |     metric = meta['parameters']['semantic']['metric']
 45 |     pooling = meta['parameters']['semantic']['pooling']
 46 | 
 47 |     for kind in kinds:  # 'dev' or 'test'
 48 |         print(f'Evaluating semantic {kind} '
 49 |               f'(metric={metric}, pooling={pooling})...')
 50 | 
 51 |         gold_file = dataset / 'semantic' / kind / 'gold.csv'
 52 |         pairs_file = dataset / 'semantic' / kind / 'pairs.csv'
 53 |         pairs, correlation = semantic.evaluate(
 54 |             gold_file, pairs_file, submission / 'semantic' / kind,
 55 |             metric, pooling, njobs=njobs)
 56 | 
 57 |         write_csv(
 58 |             pairs, output / f'score_semantic_{kind}_pairs.csv')
 59 |         write_csv(
 60 |             correlation, output / f'score_semantic_{kind}_correlation.csv')
 61 | 
 62 | 
 63 | def eval_syntactic(dataset, submission, output, kinds):
 64 |     for kind in kinds:  # 'dev' or 'test'
 65 |         print(f'Evaluating syntactic {kind}...')
 66 | 
 67 |         gold_file = dataset / 'syntactic' / kind / 'gold.csv'
 68 |         submission_file = submission / 'syntactic' / f'{kind}.txt'
 69 | 
 70 |         by_pair, by_type = syntactic.evaluate(gold_file, submission_file)
 71 | 
 72 |         write_csv(
 73 |             by_pair, output / f'score_syntactic_{kind}_by_pair.csv')
 74 |         write_csv(
 75 |             by_type, output / f'score_syntactic_{kind}_by_type.csv')
 76 | 
 77 | 
 78 | def eval_phonetic(dataset, submission, output, kinds, force_cpu):
 79 |     meta = yaml.safe_load((submission / 'meta.yaml').open('r').read())
 80 |     metric = meta['parameters']['phonetic']['metric']
 81 |     frame_shift = meta['parameters']['phonetic']['frame_shift']
 82 | 
 83 |     results = []
 84 |     for kind in kinds:  # 'dev' or 'test'
 85 |         results.append(phonetic.evaluate(
 86 |             submission / 'phonetic', dataset / 'phonetic',
 87 |             kind, metric, frame_shift, force_cpu=force_cpu))
 88 | 
 89 |     write_csv(pandas.concat(results), output / 'score_phonetic.csv')
 90 | 
 91 | 
 92 | @click.command(epilog='See https://zerospeech.com/2021 for more details')
 93 | @click.argument('dataset', type=pathlib.Path)
 94 | @click.argument('submission', type=pathlib.Path)
 95 | @click.option(
 96 |     '-j', '--njobs', default=1, type=int,
 97 |     help='Parallel jobs to use for semantic part (default to 1)')
 98 | @click.option(
 99 |     '--force-cpu', help='Do not use GPU for phonetic part', is_flag=True)
100 | @click.option(
101 |     '-o', '--output-directory', type=pathlib.Path,
102 |     default='.', show_default=True,
103 |     help="Directory to store output results")
104 | @click.option('--no-phonetic', help="Skip phonetic part", is_flag=True)
105 | @click.option('--no-lexical', help="Skip lexical part", is_flag=True)
106 | @click.option('--no-syntactic', help="Skip syntactic part", is_flag=True)
107 | @click.option('--no-semantic', help="Skip semantic part", is_flag=True)
108 | def evaluate(
109 |         dataset, submission, njobs, force_cpu, output_directory,
110 |         no_phonetic, no_lexical, no_syntactic, no_semantic):
111 |     """Evaluate a submission to the Zero Resource Speech Challenge 2021
112 | 
113 |     DATASET is the root directory of the ZR2021 dataset, as downloaded from
114 |     https://zerospeech.com/2021.
115 | 
116 |     SUBMISSION is the submission to evaluate, it can be a .zip file or a
117 |     directory.
118 | 
119 |     """
120 |     try:
121 |         # regular participants can only evaluate dev datasets, test can only be
122 |         # evaluated by doing an official submission to the challenge. The
123 |         # ZEROSPEECH2021_TEST_GOLD environment variable is used by organizers
124 |         # to provide test gold files to the evaluation program while keeping
125 |         # the program as simple as possible to participants.
126 |         kinds = ['dev']
127 |         if 'ZEROSPEECH2021_TEST_GOLD' in os.environ:
128 |             kinds.append('test')
129 |             dataset = pathlib.Path(os.environ['ZEROSPEECH2021_TEST_GOLD'])
130 | 
131 |         # ensures the dataset exists
132 |         dataset = dataset.resolve(strict=True)
133 |         if not dataset.is_dir():
134 |             raise ValueError(f'dataset not found: {dataset}')
135 | 
136 |         # ensures the submission exists, it it is a zip, uncompress it
137 |         submission = submission.resolve(strict=True)
138 |         if submission.is_file() and zipfile.is_zipfile(submission):
139 |             # create a temp directory we remove at exit
140 |             submission_unzip = tempfile.mkdtemp()
141 |             atexit.register(shutil.rmtree, submission_unzip)
142 | 
143 |             # uncompress to the temp directory
144 |             print(f'Unzip submission to {submission_unzip}...')
145 |             zipfile.ZipFile(submission, 'r').extractall(submission_unzip)
146 |             submission = pathlib.Path(submission_unzip)
147 |         elif not submission.is_dir():
148 |             raise ValueError(
149 |                 f'submssion is not a zip file or a directory: {submission}')
150 | 
151 |         if not output_directory.is_dir():
152 |             output_directory.mkdir(exist_ok=True, parents=True)
153 | 
154 |         if not no_lexical:
155 |             eval_lexical(dataset, submission, output_directory, kinds)
156 | 
157 |         if not no_semantic:
158 |             eval_semantic(dataset, submission, output_directory, kinds, njobs)
159 | 
160 |         if not no_syntactic:
161 |             eval_syntactic(dataset, submission, output_directory, kinds)
162 | 
163 |         if not no_phonetic:
164 |             eval_phonetic(
165 |                 dataset, submission, output_directory, kinds, force_cpu)
166 | 
167 |     except ValueError as error:
168 |         print(f'ERROR: {error}')
169 |         sys.exit(-1)
170 | 


--------------------------------------------------------------------------------
/zerospeech2021/cli/leaderboard.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import sys
 3 | from pathlib import Path
 4 | 
 5 | import click
 6 | 
 7 | from zerospeech2021.leaderboard import get_semantic_size, ZeroSpeechSubmission
 8 | 
 9 | 
10 | def create(submission_location: Path, dataset_location, score_location: Path,
11 |            user_meta, leaderboard_file: Path):
12 |     """ Function that builds a leaderboard entry from the computed scores of evaluation
13 | 
14 |     ARGS:
15 |         submission_location<PathDir>: location to the submission entry files (as described in ...)
16 |         dataset_location<PathDir>: location of the test set
17 |         score_location<PathDir>: location of the scores computed by evaluation
18 |         user_meta<PathFile>: file containing platform metadata (user, submission date etc.)
19 |         leaderboard_file<PathFile>: location & name to write result file
20 |     """
21 |     print("Building leaderboard entry from scores...")
22 |     semantic_size = get_semantic_size(dataset_location)
23 | 
24 |     if not submission_location.is_dir():
25 |         print("SUBMISSION folder not found", file=sys.stderr)
26 |         sys.exit(-1)
27 | 
28 |     if not dataset_location.is_dir():
29 |         print("DATASET folder not found", file=sys.stderr)
30 |         sys.exit(-1)
31 | 
32 |     if not score_location.is_dir():
33 |         print("SCORE folder not found", file=sys.stderr)
34 |         sys.exit(-1)
35 | 
36 |     if leaderboard_file.is_file():
37 |         print(f"WARNING: leaderboard specified already exists: [OVERWRITING] {leaderboard_file}", file=sys.stderr)
38 | 
39 |     subs = ZeroSpeechSubmission(
40 |         submission_location=submission_location, external_meta_file=user_meta,
41 |         _semantic_size=semantic_size, score_location=score_location,
42 |     )
43 | 
44 |     leaderboard_file = leaderboard_file.with_suffix(".json")
45 |     with leaderboard_file.open('w') as fp:
46 |         json.dump(subs.leaderboard(), fp, indent=4)
47 |     print(f"\t> Wrote {leaderboard_file}")
48 | 
49 | 
50 | @click.command(epilog='See https://zerospeech.com/2021 for more details')
51 | @click.argument('submission', type=Path)
52 | @click.argument('dataset', type=Path)
53 | @click.argument('scores', type=Path)
54 | @click.option('-u', '--user-meta', type=Path, help="Location of platform metadata")
55 | @click.option('-o', '--output-file', type=Path, help="Location & name of the leaderboard file")
56 | def leaderboard(submission: Path, dataset: Path, scores: Path, user_meta, output_file):
57 |     """ CLI wrapper to build leaderboard entry """
58 |     try:
59 |         create(submission, dataset, scores, user_meta, output_file)
60 |     except ValueError as error:
61 |         print(f'ERROR: {error}')
62 |         sys.exit(-1)
63 | 


--------------------------------------------------------------------------------
/zerospeech2021/cli/upload.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from getpass import getpass
  3 | from pathlib import Path
  4 | 
  5 | import click
  6 | 
  7 | from rich.console import Console
  8 | from rich.progress import Progress, BarColumn
  9 | 
 10 | from zerospeech2021 import zr_upload_lib as zr_up
 11 | 
 12 | # Fancy console
 13 | console = Console()
 14 | 
 15 | # The challenge to use for uploads
 16 | # ID 6 => zerospeech2021
 17 | CHALLENGE_ID: int = 6
 18 | 
 19 | 
 20 | @click.group(epilog='See https://zerospeech.com/2021 for more details')
 21 | @click.option('--debug', help="Print debug info", is_flag=True)
 22 | @click.pass_context
 23 | def upload_cmd(ctx, debug):
 24 |     ctx.debug = debug
 25 | 
 26 | 
 27 | @upload_cmd.command()
 28 | @click.option('-u', '--username', type=str)
 29 | @click.option('-p', '--password', type=str)
 30 | @click.option('--clear', is_flag=True)
 31 | @click.pass_obj
 32 | def login(debug, username, password, clear):
 33 |     # clear session
 34 |     if clear:
 35 |         zr_up.auth.clear_session()
 36 |         sys.exit(1)
 37 | 
 38 |     if not username:
 39 |         username = input('Username: ')
 40 | 
 41 |     if not password:
 42 |         password = getpass("Password: ")
 43 | 
 44 |     # login
 45 |     token = zr_up.auth.login(username, password)
 46 |     # save session
 47 |     zr_up.auth.create_session(token)
 48 |     console.print(f'Successfully logged in as {username}', style='green bold')
 49 | 
 50 | 
 51 | @upload_cmd.command()
 52 | @click.argument('archive_file', type=Path)
 53 | @click.pass_obj
 54 | def multipart(debug, archive_file):
 55 |     """ Upload an archive using multipart upload """
 56 |     if archive_file.is_file() and archive_file.suffix != ".zip":
 57 |         console.print(f"ERROR: given file: {archive_file} was not found or is not a .zip file !!",
 58 |                       style="red bold")
 59 |         sys.exit(1)
 60 | 
 61 |     # check if file is large enough for splitting
 62 |     will_split = archive_file.stat().st_size > zr_up.model.MULTIPART_THRESHOLD * 2
 63 | 
 64 |     checkpoint_file = archive_file.parent / f"{archive_file.stem}.checkpoint.json"
 65 |     zr_up.upload.ask_resume(checkpoint_file)
 66 |     token = zr_up.auth.get_session()
 67 | 
 68 |     with Progress(
 69 |             "[progress.description]{task.description}", BarColumn(),
 70 |     ) as progress:
 71 |         task = progress.add_task("[red]Uploading...", start=False, total=100)
 72 | 
 73 |         if will_split:
 74 |             zr_up.upload.multipart_upload(CHALLENGE_ID, archive_file, token, checkpoint_file)
 75 |         else:
 76 |             zr_up.upload.single_part_upload(CHALLENGE_ID, archive_file, token)
 77 | 
 78 |         progress.advance(task, advance=100)
 79 | 
 80 |     console.print(f"Successfully uploaded archive {archive_file} to zerospeech.com", style="green")
 81 | 
 82 | 
 83 | @upload_cmd.command()
 84 | @click.argument('archive_file', type=Path)
 85 | @click.pass_obj
 86 | def simple(debug, archive_file):
 87 |     """ Upload an archive using simple upload """
 88 |     if archive_file.is_file() and archive_file.suffix != ".zip":
 89 |         console.print(f"ERROR: given file: {archive_file} was not found or is not a .zip file !!",
 90 |                       style="red bold")
 91 |         sys.exit(1)
 92 | 
 93 |     token = zr_up.auth.get_session()
 94 |     with Progress(
 95 |             "[progress.description]{task.description}", BarColumn(),
 96 |     ) as progress:
 97 |         task = progress.add_task("[red]Uploading...", start=False, total=100)
 98 | 
 99 |         # upload
100 |         zr_up.upload.single_part_upload(CHALLENGE_ID, archive_file, token)
101 | 
102 |         progress.advance(task, advance=100)
103 | 
104 |     console.print(f"Successfully uploaded archive {archive_file} to zerospeech.com", style="green")


--------------------------------------------------------------------------------
/zerospeech2021/cli/validate.py:
--------------------------------------------------------------------------------
  1 | """Validation program for ZR2021 submissions"""
  2 | 
  3 | import atexit
  4 | import pathlib
  5 | import shutil
  6 | import sys
  7 | import tempfile
  8 | import zipfile
  9 | 
 10 | import click
 11 | 
 12 | from zerospeech2021 import (
 13 |     exception, meta, phonetic, lexical, syntactic, semantic)
 14 | 
 15 | 
 16 | def _validate_directory(directory, expected):
 17 |     """Ensures the expected content is present in the directory"""
 18 |     expected = set(expected)
 19 |     observed = set(
 20 |         str(f.relative_to(directory))
 21 |         for f in pathlib.Path(directory).glob('*'))
 22 | 
 23 |     if expected != observed:
 24 |         raise exception.MismatchError(
 25 |             f'mismatch in directory {directory}', expected, observed)
 26 | 
 27 | 
 28 | def _validate_phonetic(submission, dataset, only_dev, njobs):
 29 |     print('Validating phonetic...')
 30 |     _validate_directory(
 31 |         submission / 'phonetic',
 32 |         ['dev-clean', 'dev-other'] if only_dev
 33 |         else ['dev-clean', 'dev-other', 'test-clean', 'test-other'])
 34 | 
 35 |     print('  > phonetic/dev')
 36 |     phonetic.validate(
 37 |         submission / 'phonetic',
 38 |         dataset / 'phonetic', 'dev',
 39 |         njobs=njobs)
 40 | 
 41 |     if not only_dev:
 42 |         print('  > phonetic/test')
 43 |         phonetic.validate(
 44 |             submission / 'phonetic',
 45 |             dataset / 'phonetic', 'test',
 46 |             njobs=njobs)
 47 | 
 48 | 
 49 | def _validate_lexical(submission, dataset, only_dev):
 50 |     print('Validating lexical...')
 51 |     _validate_directory(
 52 |         submission / 'lexical',
 53 |         ['dev.txt'] if only_dev else ['dev.txt', 'test.txt'])
 54 | 
 55 |     print('  > lexical/dev')
 56 |     lexical.validate(
 57 |         submission / 'lexical' / 'dev.txt',
 58 |         dataset, 'dev')
 59 | 
 60 |     if not only_dev:
 61 |         print('  > lexical/test')
 62 |         lexical.validate(
 63 |             submission / 'lexical' / 'test.txt',
 64 |             dataset, 'test')
 65 | 
 66 | 
 67 | def _validate_syntactic(submission, dataset, only_dev):
 68 |     print('Validating syntactic...')
 69 |     _validate_directory(
 70 |         submission / 'syntactic',
 71 |         ['dev.txt'] if only_dev else ['dev.txt', 'test.txt'])
 72 | 
 73 |     print('  > syntactic/dev')
 74 |     syntactic.validate(
 75 |         submission / 'syntactic' / 'dev.txt',
 76 |         dataset, 'dev')
 77 | 
 78 |     if not only_dev:
 79 |         print('  > syntactic/test')
 80 |         syntactic.validate(
 81 |             submission / 'syntactic' / 'test.txt',
 82 |             dataset, 'test')
 83 | 
 84 | 
 85 | def _validate_semantic(submission, dataset, only_dev, njobs):
 86 |     print('Validating semantic...')
 87 |     semantic_content = ['dev'] if only_dev else ['dev', 'test']
 88 |     _validate_directory(submission / 'semantic', semantic_content)
 89 | 
 90 |     for subdir in semantic_content:
 91 |         _validate_directory(
 92 |             submission / 'semantic' / subdir,
 93 |             ['librispeech', 'synthetic'])
 94 | 
 95 |     print('  > semantic/dev/synthetic')
 96 |     semantic.validate(
 97 |         submission / 'semantic', dataset, 'dev', 'synthetic', njobs=njobs)
 98 | 
 99 |     print('  > semantic/dev/librispeech')
100 |     semantic.validate(
101 |         submission / 'semantic', dataset, 'dev', 'librispeech', njobs=njobs)
102 | 
103 |     if not only_dev:
104 |         print('  > semantic/test/synthetic')
105 |         semantic.validate(
106 |             submission / 'semantic', dataset, 'test', 'synthetic', njobs=njobs)
107 | 
108 |         print('  > semantic/test/librispeech')
109 |         semantic.validate(
110 |             submission / 'semantic', dataset, 'test', 'librispeech', njobs=njobs)
111 | 
112 | 
113 | @click.command(epilog='See https://zerospeech.com/2021 for more details')
114 | @click.argument('dataset', type=pathlib.Path)
115 | @click.argument('submission', type=pathlib.Path)
116 | @click.option(
117 |     '-j', '--njobs', default=1, type=int,
118 |     help='Number of parallel jobs (default to 1)')
119 | @click.option('--only-dev', help='Skip test part', is_flag=True)
120 | @click.option('--no-phonetic', help="Skip phonetic part", is_flag=True)
121 | @click.option('--no-lexical', help="Skip lexical part", is_flag=True)
122 | @click.option('--no-syntactic', help="Skip syntactic part", is_flag=True)
123 | @click.option('--no-semantic', help="Skip semantic part", is_flag=True)
124 | def validate(
125 |         dataset, submission, njobs, only_dev,
126 |         no_phonetic, no_lexical, no_syntactic, no_semantic):
127 |     """Validate a submission to the Zero Resource Speech Challenge 2021
128 | 
129 |     DATASET is the root directory of the ZR2021 dataset, as downloaded with the
130 |     zerospeech2021-download tool.
131 | 
132 |     SUBMISSION is the submission to validate, it can be a .zip file or a
133 |     directory.
134 | 
135 |     """
136 |     try:
137 |         # ensures the dataset exists
138 |         dataset = dataset.resolve(strict=True)
139 |         if not dataset.is_dir():
140 |             raise ValueError(f'dataset not found: {dataset}')
141 | 
142 |         # ensures the submission exists, it it is a zip, uncompress it
143 |         submission = submission.resolve(strict=True)
144 | 
145 |         print('Prepare input...')
146 |         print(f'  > dataset: {dataset}')
147 |         print(f'  > submission: {submission}')
148 | 
149 |         if submission.is_file() and zipfile.is_zipfile(submission):
150 |             # create a temp directory we remove at exit
151 |             submission_unzip = tempfile.mkdtemp()
152 |             atexit.register(shutil.rmtree, submission_unzip)
153 | 
154 |             # uncompress to the temp directory
155 |             print(f'  > unzip submission to {submission_unzip}...')
156 |             zipfile.ZipFile(submission, 'r').extractall(submission_unzip)
157 |             submission = pathlib.Path(submission_unzip)
158 |         elif not submission.is_dir():
159 |             raise ValueError(
160 |                 f'submssion is not a zip file or a directory: {submission}')
161 | 
162 |         print('Validating root folder...')
163 |         print('  > meta.yaml')
164 |         is_open_source = meta.validate(submission)
165 | 
166 |         print('  > root folder')
167 |         root_content = [
168 |             'meta.yaml', 'phonetic', 'lexical', 'syntactic', 'semantic']
169 |         if is_open_source:
170 |             root_content.append('code')
171 |         _validate_directory(submission, root_content)
172 | 
173 |         if is_open_source:
174 |             if not (submission / 'code').is_dir():
175 |                 raise exception.ValidationError(
176 |                     'submission specified as open source but '
177 |                     'code folder is missing')
178 |             if not list((submission / 'code').iterdir()):
179 |                 raise exception.ValidationError(
180 |                     'submission specified as open source but '
181 |                     'code folder is empty')
182 |             print('  > code folder detected: submission will be manually '
183 |                   'inspected to ensure it is open source')
184 | 
185 |         if not no_phonetic:
186 |             _validate_phonetic(submission, dataset, only_dev, njobs)
187 | 
188 |         if not no_lexical:
189 |             _validate_lexical(submission, dataset, only_dev)
190 | 
191 |         if not no_syntactic:
192 |             _validate_syntactic(submission, dataset, only_dev)
193 | 
194 |         if not no_semantic:
195 |             _validate_semantic(submission, dataset, only_dev, njobs)
196 | 
197 |     except (exception.ValidationError, ValueError, FileNotFoundError) as error:
198 |         print(f'ERROR: {error}')
199 |         print('Validation failed, please fix it and try again!')
200 |         sys.exit(-1)
201 | 
202 |     print('Success!')
203 |     sys.exit(0)
204 | 


--------------------------------------------------------------------------------
/zerospeech2021/exception.py:
--------------------------------------------------------------------------------
 1 | """Custom exceptions for ZR2021 validation steps"""
 2 | 
 3 | 
 4 | def _print_sublist(entries, num=3):
 5 |     """Returns a string containing the `n` first elements of `entries`"""
 6 |     if len(entries) <= num:
 7 |         return '[' + ', '.join(str(e) for e in entries) + ']'
 8 | 
 9 |     return (
10 |         '[' + ', '.join(list(str(e) for e in entries)[:num]) +
11 |         f', ...] and {len(entries) - num} more')
12 | 
13 | 
14 | class ValidationError(Exception):
15 |     """Raised when detecting a validation error"""
16 | 
17 | 
18 | class FormatError(ValidationError):
19 |     """Raised when detecting a bad format in submission file"""
20 |     def __init__(self, line, message):
21 |         super().__init__(message)
22 |         self._line = line
23 | 
24 |     def __str__(self):
25 |         return f'bad format (line {self._line}): ' + super().__str__()
26 | 
27 | 
28 | class FileFormatError(ValidationError):
29 |     """Raised when detecting a bad format in submission file"""
30 |     def __init__(self, file, message):
31 |         super().__init__(message)
32 |         self._file = file
33 | 
34 |     def __str__(self):
35 |         return f'bad format (file {self._file}): ' + super().__str__()
36 | 
37 | 
38 | class MismatchError(ValidationError):
39 |     """Raised when detecting a mismatch between two sets"""
40 |     def __init__(self, message, expected, observed):
41 |         super().__init__()
42 |         self._message = message
43 | 
44 |         expected = set(expected)
45 |         observed = set(observed)
46 | 
47 |         missing = expected - observed
48 |         extra = observed - expected
49 | 
50 |         if missing or extra:
51 |             self._message += ': '
52 |         if missing:
53 |             self._message += f'missing {_print_sublist(missing)}'
54 |         if missing and extra:
55 |             self._message += ', '
56 |         if extra:
57 |             self._message += f'extra {_print_sublist(extra)}'
58 | 
59 |     def __str__(self):
60 |         return self._message
61 | 
62 | 
63 | class EntryMissingError(ValidationError):
64 |     """Raised when an entry is missing from the result set """
65 | 
66 |     def __init__(self, expected, source):
67 |         super().__init__()
68 |         self._message = f"Input file ({source} does not have a matching feature ({expected})!!!"
69 | 
70 |     def __str__(self):
71 |         return self._message
72 | 


--------------------------------------------------------------------------------
/zerospeech2021/leaderboard.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from dataclasses import dataclass
  3 | from datetime import datetime
  4 | from pathlib import Path
  5 | from typing import Dict, Optional
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import yaml
 10 | 
 11 | 
 12 | class LexicalScores:
 13 |     """ Class that extracts lexical scores resume from a scores directory """
 14 |     # score files
 15 |     __dev_pairs = 'score_lexical_dev_by_pair.csv'
 16 |     __test_pairs = 'score_lexical_test_by_pair.csv'
 17 |     __dev_frequency = 'score_lexical_dev_by_frequency.csv'
 18 |     __test_frequency = 'score_lexical_test_by_frequency.csv'
 19 |     __dev_length = 'score_lexical_dev_by_length.csv'
 20 |     __test_length = 'score_lexical_test_by_length.csv'
 21 | 
 22 |     def is_valid(self, location: Path):
 23 |         """ Verify that all files are present """
 24 | 
 25 |         if not (location / self.__dev_length).is_file():
 26 |             raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_length score file!")
 27 |         if not (location / self.__test_length).is_file():
 28 |             raise FileNotFoundError(f"Score folder {location}, is missing lexical_test_by_length score file!")
 29 |         if not (location / self.__dev_frequency).is_file():
 30 |             raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_frequency score file!")
 31 |         if not (location / self.__test_frequency).is_file():
 32 |             raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_frequency score file!")
 33 |         if not (location / self.__dev_pairs).is_file():
 34 |             raise FileNotFoundError(f"Score folder {location}, is missing lexical_dev_by_pairs score file!")
 35 |         if not (location / self.__test_pairs).is_file():
 36 |             raise FileNotFoundError(f"Score folder {location}, is missing lexical_test_by_pairs score file!")
 37 | 
 38 |     def __init__(self, location: Path):
 39 |         """ Initialise lexical score object """
 40 |         self.is_valid(location)
 41 |         self.location = location
 42 | 
 43 |     @staticmethod
 44 |     def _score_invocab(frame):
 45 |         """Weighted mean of scores by frequency, excluding OOVs"""
 46 |         # filter out OOVs
 47 |         frame = frame[frame['frequency'] != 'oov']
 48 | 
 49 |         # weighted mean
 50 |         return np.average(
 51 |             frame['score'].to_numpy(),
 52 |             weights=frame['n'].to_numpy())
 53 | 
 54 |     def general(self):
 55 |         """ Extract general lexical score """
 56 |         dev_score = pd.read_csv(self.location / self.__dev_pairs)['score'].mean()
 57 |         test_score = pd.read_csv(self.location / self.__test_pairs)['score'].mean()
 58 |         # weighted scores
 59 |         dev_score_invocab = self._score_invocab(
 60 |             pd.read_csv(self.location / self.__dev_frequency)
 61 |         )
 62 | 
 63 |         test_score_invocab = self._score_invocab(
 64 |             pd.read_csv(self.location / self.__test_frequency)
 65 |         )
 66 | 
 67 |         return {
 68 |             'lexical_all': [dev_score, test_score],
 69 |             'lexical_invocab': [dev_score_invocab, test_score_invocab]
 70 |         }
 71 | 
 72 |     def detailed(self):
 73 |         """ Extract detailed lexical score """
 74 |         frequency_dev = pd.read_csv(self.location / self.__dev_frequency)
 75 |         frequency_test = pd.read_csv(self.location / self.__test_frequency)
 76 | 
 77 |         by_frequency = pd.merge(frequency_dev, frequency_test,
 78 |                                 how="outer", on=['frequency'], suffixes=("_dev", "_test"))
 79 | 
 80 |         length_dev = pd.read_csv(self.location / self.__dev_length)
 81 |         length_test = pd.read_csv(self.location / self.__test_length)
 82 | 
 83 |         by_length = pd.merge(length_dev, length_test, how="outer", on=['length'], suffixes=['_dev', '_test'])
 84 | 
 85 |         return {
 86 |             "by_length": by_length.to_dict(orient='records'),
 87 |             "by_frequency": by_frequency.to_dict(orient='records')
 88 |         }
 89 | 
 90 | 
 91 | class SemanticScore:
 92 |     """ Class that extracts lexical scores resume from a scores directory """
 93 |     # score files
 94 |     __dev_correlation = 'score_semantic_dev_correlation.csv'
 95 |     __test_correlation = 'score_semantic_test_correlation.csv'
 96 | 
 97 |     def is_valid(self, location: Path):
 98 |         """ Verify that all files are present """
 99 | 
100 |         if not (location / self.__dev_correlation):
101 |             raise FileNotFoundError(f"Score folder {location}, is missing semantic_dev_correlation score file!")
102 |         if not (location / self.__test_correlation):
103 |             raise FileNotFoundError(f"Score folder {location}, is missing semantic_test_correlation score file!")
104 | 
105 |     def __init__(self, location: Path, size: Dict):
106 |         """ Initialise semantic score object """
107 |         self.is_valid(location)
108 |         self.location = location
109 |         self.size = size
110 | 
111 |     def general(self):
112 |         """ Extract general semantic score """
113 |         dev_correlations = pd.read_csv(self.location / self.__dev_correlation)
114 |         dev_librispeech_mean = dev_correlations[dev_correlations['type'] == 'librispeech']['correlation'].mean()
115 |         dev_synthetic_mean = dev_correlations[dev_correlations['type'] == 'synthetic']['correlation'].mean()
116 | 
117 |         dev_correlations['size'] = self.size['dev']['size']
118 |         dev_librispeech_wmean = np.average(
119 |             dev_correlations[dev_correlations['type'] == 'librispeech']['correlation'].to_numpy(),
120 |             weights=dev_correlations[dev_correlations['type'] == 'librispeech']['size'].to_numpy())
121 |         dev_synthetic_wmean = np.average(
122 |             dev_correlations[dev_correlations['type'] == 'synthetic']['correlation'].to_numpy(),
123 |             weights=dev_correlations[dev_correlations['type'] == 'synthetic']['size'].to_numpy())
124 | 
125 |         test_correlations = pd.read_csv(self.location / self.__test_correlation)
126 |         test_librispeech_mean = test_correlations[test_correlations['type'] == 'librispeech']['correlation'].mean()
127 |         test_synthetic_mean = test_correlations[test_correlations['type'] == 'synthetic']['correlation'].mean()
128 | 
129 |         test_correlations['size'] = self.size['test']['size']
130 |         test_librispeech_wmean = np.average(
131 |             test_correlations[test_correlations['type'] == 'librispeech']['correlation'].to_numpy(),
132 |             weights=test_correlations[test_correlations['type'] == 'librispeech']['size'].to_numpy())
133 |         test_synthetic_wmean = np.average(
134 |             test_correlations[test_correlations['type'] == 'synthetic']['correlation'].to_numpy(),
135 |             weights=test_correlations[test_correlations['type'] == 'synthetic']['size'].to_numpy())
136 | 
137 |         return {
138 |             "semantic_synthetic": [
139 |                 dev_synthetic_mean, test_synthetic_mean],
140 |             "semantic_librispeech": [
141 |                 dev_librispeech_mean, test_librispeech_mean],
142 |             "weighted_semantic_synthetic": [
143 |                 dev_synthetic_wmean, test_synthetic_wmean],
144 |             "weighted_semantic_librispeech": [
145 |                 dev_librispeech_wmean, test_librispeech_wmean]
146 |         }
147 | 
148 |     def detailed(self):
149 |         """ Extract detailed semantic score """
150 |         dev_correlations = pd.read_csv(self.location / self.__dev_correlation)
151 |         test_correlations = pd.read_csv(self.location / self.__test_correlation)
152 | 
153 |         ndev_correlations = dev_correlations \
154 |             .set_index(['dataset', dev_correlations.groupby('dataset').cumcount()])['correlation'] \
155 |             .unstack() \
156 |             .reset_index()
157 |         ndev_correlations.columns = ['dataset', 'librispeech', 'synthetic']
158 |         ndev_correlations["set"] = "dev"
159 | 
160 |         ntest_correlations = test_correlations \
161 |             .set_index(['dataset', test_correlations.groupby('dataset').cumcount()])['correlation'] \
162 |             .unstack() \
163 |             .reset_index()
164 |         ntest_correlations.columns = ['dataset', 'librispeech', 'synthetic']
165 |         ntest_correlations["set"] = "test"
166 | 
167 |         # DeprecationWarning from pandas: append is to be replaced by concat
168 |         correlations = pd.concat([ndev_correlations, ntest_correlations], axis=0)
169 |         # correlations = ndev_correlations.append(ntest_correlations)
170 | 
171 |         return correlations.to_dict(orient='records')
172 | 
173 | 
174 | class SyntacticScores:
175 |     """ Class that extracts syntactic scores resume from a scores directory """
176 |     # score files
177 |     __dev_pairs = 'score_syntactic_dev_by_pair.csv'
178 |     __test_pairs = 'score_syntactic_test_by_pair.csv'
179 |     __dev_types = 'score_syntactic_dev_by_type.csv'
180 |     __test_types = 'score_syntactic_test_by_type.csv'
181 | 
182 |     def is_valid(self, location: Path):
183 |         """ Verify that all files are present """
184 | 
185 |         if not (location / self.__dev_pairs):
186 |             raise FileNotFoundError(f"Score folder {location}, is missing syntactic_dev_by_pair score file!")
187 |         if not (location / self.__test_pairs):
188 |             raise FileNotFoundError(f"Score folder {location}, is missing syntactic_test_by_pair score file!")
189 |         if not (location / self.__dev_types):
190 |             raise FileNotFoundError(f"Score folder {location}, is missing syntactic_dev_by_type score file!")
191 |         if not (location / self.__test_types):
192 |             raise FileNotFoundError(f"Score folder {location}, is missing syntactic_test_by_type score file!")
193 | 
194 |     def __init__(self, location: Path):
195 |         """ Initialise syntactic score object """
196 |         self.is_valid(location)
197 |         self.location = location
198 | 
199 |     def general(self):
200 |         """ Extract general semantic score """
201 |         dev_mean = pd.read_csv(self.location / self.__dev_pairs)['score'].mean()
202 |         test_mean = pd.read_csv(self.location / self.__test_pairs)['score'].mean()
203 |         return [dev_mean, test_mean]
204 | 
205 |     def detailed(self):
206 |         """ Extract detailed semantic score """
207 |         dev_types = pd.read_csv(self.location / self.__dev_types)
208 |         test_types = pd.read_csv(self.location / self.__test_types)
209 | 
210 |         merged = pd.merge(dev_types, test_types, how="outer", on=["type"], suffixes=("_dev", "_test"))
211 | 
212 |         return merged.to_dict(orient='records')
213 | 
214 | 
215 | class PhoneticScores:
216 |     """ Class that extracts syntactic scores resume from a scores directory """
217 |     # score files
218 |     __scores = 'score_phonetic.csv'
219 | 
220 |     def is_valid(self, location: Path):
221 |         """ Verify that all files are present """
222 | 
223 |         if not (location / self.__scores):
224 |             raise FileNotFoundError(f"Score folder {location}, is missing phonetic score file!")
225 | 
226 |     def __init__(self, location: Path):
227 |         """ Initialise phonetic score object """
228 |         self.is_valid(location)
229 |         self.location = location
230 | 
231 |     def general(self):
232 |         """ Extract general semantic score """
233 | 
234 |         def e(d):
235 |             return {s['type']: s['score'] for s in d}
236 | 
237 |         frame = pd.read_csv(self.location / self.__scores)
238 |         dev_clean = frame[(frame["dataset"] == 'dev') & (frame["sub-dataset"] == 'clean')][['type', 'score']] \
239 |             .to_dict(orient='records')
240 |         dev_other = frame[(frame["dataset"] == 'dev') & (frame["sub-dataset"] == 'other')][['type', 'score']] \
241 |             .to_dict(orient='records')
242 |         test_clean = frame[(frame["dataset"] == 'test') & (frame["sub-dataset"] == 'clean')][['type', 'score']] \
243 |             .to_dict(orient='records')
244 |         test_other = frame[(frame["dataset"] == 'test') & (frame["sub-dataset"] == 'other')][['type', 'score']] \
245 |             .to_dict(orient='records')
246 | 
247 |         return {
248 |             "phonetic_clean_within": [e(dev_clean)['within'], e(test_clean)['within']],
249 |             "phonetic_clean_across": [e(dev_clean)['across'], e(test_clean)['across']],
250 |             "phonetic_other_within": [e(dev_other)['within'], e(test_other)['within']],
251 |             "phonetic_other_across": [e(dev_other)['across'], e(test_other)['across']]
252 |         }
253 | 
254 |     @staticmethod
255 |     def detailed():
256 |         """ Extract detailed semantic score """
257 |         # phonetic task has no detailed view of scores
258 |         return {}
259 | 
260 | 
261 | @dataclass
262 | class Metadata:
263 |     author: str
264 |     affiliation: str
265 |     description: str
266 |     open_source: bool
267 |     train_set: str
268 |     gpu_budget: float
269 |     parameters: Dict
270 |     visually_grounded: bool = False
271 |     submission_id: Optional[str] = None
272 |     submission_date: Optional[datetime] = None
273 |     submitted_by: Optional[str] = None
274 | 
275 |     @staticmethod
276 |     def parse_external_meta(filepath: Path) -> Dict:
277 |         if filepath is None or not filepath.is_file():
278 |             return {}
279 |         elif filepath.suffix == '.json':
280 |             with filepath.open() as fp:
281 |                 return json.load(fp)
282 |         else:
283 |             # old txt based file
284 |             submitted_at = None
285 |             with filepath.open() as fp:
286 |                 for line in fp.readlines():
287 |                     line = line.rstrip()
288 |                     if line.startswith('submitted-at:'):
289 |                         submitted_at = line.replace('submitted-at:', '').replace(' ', '')
290 |             return {"submitted-at": submitted_at}
291 | 
292 |     @staticmethod
293 |     def filter_external_meta(data: Dict):
294 |         try:
295 |             sub_data = datetime.fromisoformat(data.get("submitted-at", None))
296 |         except (ValueError, TypeError):
297 |             sub_data = None
298 | 
299 |         return {
300 |             "submission_date": sub_data,
301 |             "submitted_by": data.get("user", None),
302 |             "submission_id": data.get("submission_id", None)
303 |         }
304 | 
305 |     @classmethod
306 |     def create_from(cls, filepath: Path, external_meta_file: Path):
307 |         with (filepath / 'meta.yaml').open() as fp:
308 |             meta = yaml.load(fp, Loader=yaml.SafeLoader)
309 | 
310 |         # parse & filter items of platform metadata
311 |         external_meta = cls.filter_external_meta(cls.parse_external_meta(external_meta_file))
312 | 
313 |         return cls(**meta, **external_meta)
314 | 
315 |     def to_dict(self):
316 |         if self.submission_date:
317 |             sub_date = self.submission_date.isoformat()
318 |         else:
319 |             sub_date = datetime.now().isoformat()
320 | 
321 |         return {
322 |             "submitted_at": sub_date,
323 |             "author": self.author,
324 |             "affiliation": self.affiliation,
325 |             "submitted_by": self.submitted_by,
326 |             "submission_id": self.submission_id,
327 |             "description": self.description,
328 |             "visually_grounded": self.visually_grounded,
329 |             "open_source": self.open_source,
330 |             "train_set": self.train_set,
331 |             "gpu_budget": self.gpu_budget,
332 |             "parameters": self.parameters
333 |         }
334 | 
335 | 
336 | class ZeroSpeechSubmission:
337 | 
338 |     def __init__(self, submission_location: Path, _semantic_size: Dict,
339 |                  score_location: Path, external_meta_file: Path):
340 | 
341 |         # fetch metadata
342 |         self.description = Metadata.create_from(submission_location, external_meta_file)
343 | 
344 |         # create scores
345 |         self.lexical = LexicalScores(score_location)
346 |         self.semantic = SemanticScore(score_location, _semantic_size)
347 |         self.syntactic = SyntacticScores(score_location)
348 |         self.phonetic = PhoneticScores(score_location)
349 | 
350 |     def leaderboard(self):
351 |         """ Build leaderboard object """
352 |         ph = self.phonetic.general()
353 |         le = self.lexical.general()
354 |         se = self.semantic.general()
355 |         sy = self.syntactic.general()
356 |         more = {
357 |             "description": self.description.to_dict(),
358 |             "lexical": self.lexical.detailed(),
359 |             "syntactic": self.syntactic.detailed(),
360 |             "semantic": self.semantic.detailed(),
361 |         }
362 |         return {
363 |             "author_label": self.description.author,
364 |             "set": ['dev', 'test'],
365 |             **le,
366 |             "syntactic": sy,
367 |             **ph,
368 |             **se,
369 |             "more": more
370 |         }
371 | 
372 | 
373 | def get_semantic_size(dataset: Path):
374 |     test_size = pd.read_csv(dataset / 'semantic/test/pairs.csv', header=0) \
375 |                   .groupby(['type', 'dataset'], as_index=False).size()
376 |     dev_size = pd.read_csv(dataset / 'semantic/dev/pairs.csv', header=0) \
377 |                  .groupby(['type', 'dataset'], as_index=False).size()
378 |     return {'dev': dev_size, 'test': test_size}
379 | 


--------------------------------------------------------------------------------
/zerospeech2021/lexical.py:
--------------------------------------------------------------------------------
  1 | """Lexical part of the ZR2021 (validation and evaluation)"""
  2 | 
  3 | import collections
  4 | import pathlib
  5 | import sys
  6 | 
  7 | import pandas
  8 | from zerospeech2021.exception import FormatError, MismatchError
  9 | 
 10 | 
 11 | def _validate_line(index, line):
 12 |     """Auxiliary function to validate()
 13 | 
 14 |     Returns the filename in `line`, checks the score and raises FormatError if
 15 |     the line is not valid.
 16 | 
 17 |     """
 18 |     # ensure the line has two fields separated by a space
 19 |     line = line.strip()
 20 |     fields = line.split(' ')
 21 |     if len(fields) != 2:
 22 |         raise FormatError(
 23 |             index, f'must be "<filename> <score>" but is "{line}"')
 24 | 
 25 |     filename, score = tuple(fields)
 26 | 
 27 |     # ensure the second field is a positive float
 28 |     try:
 29 |         float(score)
 30 |     except ValueError:
 31 |         raise FormatError(
 32 |             index, f'<score> must be a float but is "{score}"')
 33 | 
 34 |     return filename
 35 | 
 36 | 
 37 | def validate(submission, dataset, kind):
 38 |     """Raises a ValidationError if the `submission` file is not valid
 39 | 
 40 |     * The submission file must be in text format, each line as:
 41 |           <filename> <score>
 42 | 
 43 |     * The <filename> is the name of a wav file in the lexical dataset, without
 44 |       path nor extension ("xKtnLJYiWGt", not "lexical/dev/xKtnLJYiWGt.wav")
 45 | 
 46 |     * The <score> is a positive float
 47 | 
 48 |     Parameters
 49 |     ----------
 50 |     submisison: path
 51 |         The submisison file to validate, each line must be formatted as
 52 |         "<filename> <score>".
 53 |     dataset: path
 54 |         The root path of the ZR2021 dataset
 55 |     kind: str, optional
 56 |         Must be 'dev' or 'test'
 57 | 
 58 |     Raises
 59 |     ------
 60 |     ValueError
 61 |         If `kind` is not 'dev' or 'test', if `submisison` is not a file or if
 62 |         the dataset is not an existing directory.
 63 |     ValidationError
 64 |         If one line of the submisison file is not valid or if the submitted
 65 |         filenames does not fit the required ones.
 66 | 
 67 |     """
 68 |     if kind not in ('dev', 'test'):
 69 |         raise ValueError(
 70 |             f'kind must be "dev" or "test", it is {kind}')
 71 | 
 72 |     if not pathlib.Path(submission).is_file():
 73 |         raise ValueError(
 74 |             f'{kind} submission file not found: {submission}')
 75 | 
 76 |     # retrieve the required filenames that must be present in the submission
 77 |     dataset = pathlib.Path(dataset) / f'lexical/{kind}'
 78 |     if not dataset.is_dir():
 79 |         raise ValueError(f'dataset not found: {dataset}')
 80 |     required_files = set(w.stem for w in dataset.glob('*.wav'))
 81 | 
 82 |     # ensure each line in the submission is valid and retrieve the filenames
 83 |     submitted_files = list(
 84 |         _validate_line(index + 1, line)
 85 |         for index, line in enumerate(open(submission, 'r')))
 86 | 
 87 |     # ensures the is no duplicate in the filenames
 88 |     duplicates = [
 89 |         f for f, n in collections.Counter(submitted_files).items() if n > 1]
 90 |     if duplicates:
 91 |         raise MismatchError('duplicates found', [], duplicates)
 92 | 
 93 |     # ensure all the required files are here and there is no extra filename
 94 |     if required_files != set(submitted_files):
 95 |         raise MismatchError(
 96 |             'mismatch in filenames', required_files, submitted_files)
 97 | 
 98 | 
 99 | def load_data(gold_file, submission_file):
100 |     """Returns the data required for evaluation as a pandas data frame
101 | 
102 |     Each line of the returned data frame contains a pair (word, non word) and
103 |     has the following columns: 'id', 'voice', 'frequency', 'word', 'score
104 |     word', 'non word', 'score non word'.
105 | 
106 |     Parameters
107 |     ----------
108 |     gold_file : path
109 |         The gold file for the lexical dataset (test or dev).
110 |     submission_file : path
111 |         The submission corresponding to the provided gold file.
112 | 
113 |     Returns
114 |     -------
115 |     data : pandas.DataFrame
116 |         The data ready for evaluation
117 | 
118 |     Raise
119 |     -----
120 |     ValueError
121 |         If the input files cannot be opened or in case of data mismatch between
122 |         the two files.
123 | 
124 |     """
125 |     # ensures the two input files are here
126 |     for input_file in (gold_file, submission_file):
127 |         if not pathlib.Path(input_file).is_file():
128 |             raise ValueError(f'file not found: {input_file}')
129 | 
130 |     # load them as data frames indexed by filenames
131 |     gold = pandas.read_csv(
132 |         gold_file, header=0, index_col='filename').astype(
133 |             {'frequency': pandas.Int64Dtype()})
134 |     score = pandas.read_csv(
135 |         submission_file, sep=' ', header=None,
136 |         names=['filename', 'score'], index_col='filename')
137 | 
138 |     # ensures the filenames in gold and submission are the same
139 |     if set(gold.index) != set(score.index):
140 |         has_less_files = set(gold.index) - set(score.index)
141 |         has_more_files = set(score.index) - set(gold.index)
142 |         print("MismatchError:", file=sys.stderr)
143 |         if len(has_more_files) > 0:
144 |             print('submission has extra files', file=sys.stderr)
145 |             print(f'extra files: {has_more_files}', file=sys.stderr)
146 | 
147 |         if len(has_less_files) > 0:
148 |             print('submission is missing files', file=sys.stderr)
149 |             print(f'missing files: {has_less_files}:', file=sys.stderr)
150 |         sys.exit(1)
151 | 
152 |     # merge the gold and score using filenames, then remove the columns
153 |     # 'phones' and 'filename' as we don't use them for evaluation
154 |     data = pandas.merge(gold, score, on='filename', how='inner') 
155 |     data.reset_index(inplace=True)
156 |     # if all non words have their textual version set to NaN, we take their phonemic version instead.
157 |     if data[data.correct == 0]['word'].isnull().sum() == len(data[data.correct==0]):
158 |         data['word'] = data['phones']
159 |     data.drop(columns=['phones', 'filename'], inplace=True)
160 |     
161 |     # going from a word per line to a pair (word, non word) per line
162 |     words = data.loc[data['correct'] == 1].reset_index().rename(lambda x: 'w_' + x, axis=1)
163 |     non_words = data.loc[data['correct'] == 0].reset_index().rename(lambda x: 'nw_' + x, axis=1)
164 |     data = pandas.merge(words, non_words, left_on=['w_voice', 'w_id'], right_on=['nw_voice', 'nw_id'])
165 |     
166 |     data.drop(
167 |         ['w_index', 'nw_index', 'nw_voice', 'nw_frequency',
168 |          'w_correct', 'nw_correct', 'nw_id', 'nw_length'],
169 |         axis=1, inplace=True)
170 |     data.rename(
171 |         {'w_id': 'id', 'w_voice': 'voice', 'w_frequency': 'frequency',
172 |          'w_word': 'word', 'nw_word': 'non word', 'w_length': 'length',
173 |          'w_score': 'score word', 'nw_score': 'score non word'},
174 |         axis=1, inplace=True)
175 | 
176 |     return data
177 | 
178 | 
179 | def evaluate_by_pair(data):
180 |     """Returns a data frame with the computed scores by (word, non word) pair
181 | 
182 |     Parameters
183 |     ----------
184 |     data : pandas.DataFrame
185 |         The result of `load_data`
186 | 
187 |     Returns
188 |     -------
189 |     by_pair : pandas.DataFrame
190 |         The evaluated (word, non word) pairs, the data frame has the columns:
191 |         'word', 'non word' 'frequency', 'length' and 'score'.
192 | 
193 |     """
194 |     # compute the score for each pair in an additional 'score' column, then
195 |     # delete the 'score word' and 'score non word' columns that become useless
196 |     score = data.loc[:, ['score word', 'score non word']].to_numpy()
197 |     data['score'] = (
198 |         0.5 * (score[:, 0] == score[:, 1])
199 |         + (score[:, 0] > score[:, 1]))
200 |     data.drop(columns=['score word', 'score non word'], inplace=True)
201 | 
202 |     # finally get the mean score across voices for all pairs
203 |     score = data.groupby('id').apply(lambda x: (
204 |         x.iat[0, 3],  # word
205 |         x.iat[0, 5],  # non word
206 |         x.iat[0, 2],  # frequency
207 |         x.iat[0, 4],  # length
208 |         x['score'].mean()))
209 |     return pandas.DataFrame(
210 |         score.to_list(),
211 |         columns=['word', 'non word', 'frequency', 'length', 'score'])
212 | 
213 | 
214 | def evaluate_by_frequency(by_pair):
215 |     """Returns a data frame with mean scores by frequency bands
216 | 
217 |     The frequency is defined as the number of occurences of the word in the
218 |     LibriSpeech dataset. The following frequency bands are considered : oov,
219 |     1-5, 6-20, 21-100 and >100.
220 | 
221 |     Parameters
222 |     ----------
223 |     by_pair: pandas.DataFrame
224 |         The output of `evaluate_by_pair`
225 | 
226 |     Returns
227 |     -------
228 |     by_frequency : pandas.DataFrame
229 |         The score collapsed on frequency bands, the data frame has the
230 |         following columns: 'frequency', 'score'.
231 | 
232 |     """
233 |     bands = pandas.cut(
234 |         by_pair.frequency,
235 |         [0, 1, 5, 20, 100, float('inf')],
236 |         labels=['oov', '1-5', '6-20', '21-100', '>100'],
237 |         right=False)
238 | 
239 |     return by_pair.score.groupby(bands).agg(
240 |         n='count', score='mean', std='std').reset_index()
241 | 
242 | 
243 | def evaluate_by_length(by_pair):
244 |     """Returns a data frame with mean scores by word length
245 | 
246 |     Parameters
247 |     ----------
248 |     by_pair: pandas.DataFrame
249 |         The output of `evaluate_by_pair`
250 | 
251 |     Returns
252 |     -------
253 |     by_length : pandas.DataFrame
254 |         The score collapsed on word length, the data frame has the
255 |         following columns: 'length', 'score'.
256 | 
257 |     """
258 |     return by_pair.score.groupby(by_pair.length).agg(
259 |         n='count', score='mean', std='std').reset_index()
260 | 
261 | 
262 | def evaluate(gold_file, submission_file):
263 |     """Returns the score by (word, non word) pair, by frequency and by length
264 | 
265 |     Parameters
266 |     ----------
267 |     gold_file : path
268 |         The gold file (csv format) for the lexical dataset (test or dev).
269 |     submission_file : path
270 |         The submission corresponding to the provided gold file.
271 | 
272 |     Returns
273 |     -------
274 |     by_pair : pandas.DataFrame
275 |         The evaluated (word, non word) pairs, the data frame has the columns:
276 |         'word', 'non word' and 'score'.
277 |     by_frequency : pandas.DataFrame
278 |         The score collapsed on frequency bands, the data frame has the
279 |         following columns: 'frequency', 'score'.
280 |     by_length : pandas.DataFrame
281 |         The score collapsed on word length (in number of phones), the data
282 |         frame has the following columns: 'length', 'score'.
283 | 
284 |     Raise
285 |     -----
286 |     ValueError
287 |         If the input files cannot be opened or in case of data mismatch between
288 |         the two files.
289 | 
290 |     """
291 |     data = load_data(gold_file, submission_file)
292 | 
293 |     by_pair = evaluate_by_pair(data)
294 |     by_frequency = evaluate_by_frequency(by_pair)
295 |     by_length = evaluate_by_length(by_pair)
296 |     by_pair.drop(['frequency', 'length'], axis=1, inplace=True)
297 | 
298 |     return by_pair, by_frequency, by_length
299 | 


--------------------------------------------------------------------------------
/zerospeech2021/meta.py:
--------------------------------------------------------------------------------
  1 | """Validation of meta.yaml"""
  2 | 
  3 | import numbers
  4 | import numpy as np
  5 | import scipy.spatial
  6 | import yaml
  7 | 
  8 | from zerospeech2021.exception import ValidationError, MismatchError
  9 | 
 10 | 
 11 | def _validate_entries(meta, entries, prefix=None):
 12 |     if sorted(meta.keys()) != sorted(entries.keys()):
 13 |         message = 'invalid entries'
 14 |         if prefix:
 15 |             message += f' in {prefix}'
 16 |         raise MismatchError(message, entries.keys(), meta.keys())
 17 | 
 18 |     for key, value in entries.items():
 19 |         _validate_entry(meta, key, value[0], values=value[1], prefix=prefix)
 20 | 
 21 | 
 22 | def _validate_entry(meta, name, expected_type, values=None, prefix=None):
 23 |     prefix = prefix + '/' if prefix else ''
 24 | 
 25 |     if name not in meta:
 26 |         raise ValidationError(f'{prefix}{name} section missing')
 27 | 
 28 |     value = meta[name]
 29 |     if not isinstance(value, expected_type):
 30 |         raise ValidationError(
 31 |             f'{prefix}{name} must be a {expected_type}, it is {type(value)}')
 32 | 
 33 |     if values and value not in values:
 34 |         raise ValidationError(
 35 |             f'{prefix}{name} must be in ({", ".join(values)}) but is {value}')
 36 | 
 37 |     if expected_type == str and not value:
 38 |         raise ValidationError(f'{prefix}{name} must not be an empty string')
 39 | 
 40 | 
 41 | def _validate_scipy_metric(metric):
 42 |     """"Raises a ValidationError if `metric` is not a valid metric in scipy"""
 43 |     try:
 44 |         scipy.spatial.distance.cdist(
 45 |             np.ones((5, 2)), np.ones((5, 2)), metric)
 46 |     except:
 47 |         raise ValidationError(f'invalid metric for semantic: {metric}')
 48 | 
 49 | 
 50 | def validate(submission):
 51 |     """Validation of the meta.yaml in submission
 52 | 
 53 |     Testing that submission/meta.yaml is a valid yaml file and corresponds to
 54 |     the following format:
 55 | 
 56 |         author: <str>
 57 |         affiliation: <str>
 58 |         description: <str>
 59 |         open_source: <bool>
 60 |         train_set: <str>
 61 |         visually_grounded: <bool>
 62 |         gpu_budget: <float>
 63 |         parameters:
 64 |           phonetic:
 65 |             metric: <str>, "cosine", "euclidean", "kl" or "kl_symmetric"
 66 |             frame_shift: <float>
 67 |           semantic:
 68 |             metric: <str>
 69 |             pooling: <str>, "min", "max", "mean", "sum", "last", lastlast" or
 70 |             "off"
 71 | 
 72 |     Raises
 73 |     ------
 74 |     exception.ValidationError
 75 |         For any item not corresponding to prototype.
 76 | 
 77 |     """
 78 |     meta_file = submission / 'meta.yaml'
 79 | 
 80 |     if not meta_file.is_file():
 81 |         raise ValidationError("missing meta.yaml file")
 82 | 
 83 |     try:
 84 |         meta = yaml.safe_load(meta_file.open('r').read().replace('\t', ' '))
 85 |     except yaml.YAMLError as err:
 86 |         raise ValidationError(f'failed to parse {meta_file}: {err}')
 87 | 
 88 |     if not meta or not isinstance(meta, dict):
 89 |         raise ValidationError("meta.yaml file is not valid")
 90 | 
 91 |     # top level entries
 92 |     _validate_entries(
 93 |         meta,
 94 |         {'author': (str, None),
 95 |          'affiliation': (str, None),
 96 |          'description': (str, None),
 97 |          'open_source': (bool, None),
 98 |          'train_set': (str, None),
 99 |          'visually_grounded': (bool, None),
100 |          'gpu_budget': (numbers.Number, None),
101 |          'parameters': (dict, None)})
102 | 
103 |     # parameters entries
104 |     _validate_entries(
105 |         meta['parameters'],
106 |         {'phonetic': (dict, None), 'semantic': (dict, None)},
107 |         prefix='parameters')
108 | 
109 |     # parameters/phonetic level
110 |     _validate_entries(
111 |         meta['parameters']['phonetic'],
112 |         {'metric': (str, ['cosine', 'euclidean', 'kl', 'kl_symmetric']),
113 |          'frame_shift': (numbers.Number, None)},
114 |         prefix='parameters/phonetic')
115 | 
116 |     # parameters/semantic level
117 |     _validate_entries(
118 |         meta['parameters']['semantic'],
119 |         {'metric': (str, None),
120 |          'pooling': (str, [
121 |              'min', 'max', 'mean', 'sum', 'last', 'lastlast', 'off'])},
122 |         prefix='parameters/semantic')
123 | 
124 |     _validate_scipy_metric(meta['parameters']['semantic']['metric'])
125 | 
126 |     return meta['open_source']
127 | 


--------------------------------------------------------------------------------
/zerospeech2021/phonetic.py:
--------------------------------------------------------------------------------
  1 | """ Phonetic task zerospeech 2021 """
  2 | import collections
  3 | from dataclasses import dataclass
  4 | from itertools import chain
  5 | from typing import Optional
  6 | from enum import Enum
  7 | 
  8 | import numpy as np
  9 | import pandas
 10 | import joblib
 11 | 
 12 | from zerospeech2021 import exception
 13 | from zerospeech2021.phonetic_eval import eval_ABX
 14 | 
 15 | LIBRISPEECH_SETS = {
 16 |     'dev': ['dev-clean', 'dev-other'],
 17 |     'test': ['test-clean', 'test-other']}
 18 | 
 19 | 
 20 | ABXFileTypes = Enum('ABXFileTypes',
 21 |                     '.pt .npy .txt .wav .flac .mp3')
 22 | ABXMode = Enum('ABXMode', 'all within across')
 23 | 
 24 | ABXDistanceMode = Enum('ABXDistanceMode',
 25 |                        'euclidian cosine kl kl_symmetric')
 26 | 
 27 | 
 28 | @dataclass
 29 | class AbxArguments:
 30 |     """ List of arguments to provide to abx in phonetic_eval.abx"""
 31 |     # path to input data
 32 |     path_data: str
 33 |     # path to item file
 34 |     path_item_file: str
 35 |     # Path to a CPC checkpoint
 36 |     path_checkpoint: Optional[str] = None
 37 |     # size of a single feature
 38 |     feature_size: Optional[float] = float(0.1)
 39 |     # Use the GPU to compute distances
 40 |     cuda: bool = True
 41 |     # extension (of input files ?)
 42 |     file_extension: ABXFileTypes = '.txt'
 43 |     # Choose the mode of the ABX score to compute
 44 |     mode: ABXMode = 'all'
 45 |     # Choose the kind of distance to use to compute
 46 |     distance_mode: ABXDistanceMode = 'cosine'
 47 |     # Max size of a group while computing the ABX score
 48 |     max_size_group: int = 10
 49 |     # When computing the ABX across score, maximum
 50 |     # number of speaker X to sample per couple A,B.
 51 |     max_x_across: int = 5
 52 |     # location to output the results
 53 |     out: Optional[str] = None
 54 | 
 55 | 
 56 | def get_input_files(dataset_directory, _set, file_type):
 57 |     """ Returns a list of all the files in a set """
 58 |     res = []
 59 |     for s in LIBRISPEECH_SETS[_set]:
 60 |         res.append((dataset_directory / s).rglob(f"*.{file_type}"))
 61 |     return list(chain(*res))
 62 | 
 63 | 
 64 | def get_submitted_files(submission_directory, _set):
 65 |     """ Returns a list of all the files in a set """
 66 |     res = []
 67 |     for s in LIBRISPEECH_SETS[_set]:
 68 |         res.append((submission_directory / s).rglob("*"))
 69 |     return list(chain(*res))
 70 | 
 71 | 
 72 | def _validate_file(source_file, submission, dataset):
 73 |     """Ensure a file has the correct format
 74 | 
 75 |     Verifies that a feature file is a 2D numpy array of floats and it matches a
 76 |     file in the dataset.
 77 | 
 78 |     :param source_file: input file from dataset
 79 |     :param submission: location of submitted files
 80 |     :param dataset: location of dataset
 81 | 
 82 |     :return: a pair (target_file, ncols), where target_file is the file in the
 83 |       submission directory and ncols is the number of columns in the array.
 84 | 
 85 |     :raises exception.EntryMissingError if an entry is not present
 86 | 
 87 |     """
 88 |     try:
 89 |         target_file = submission / source_file.relative_to(dataset)
 90 |         target_file = target_file.with_suffix('.txt')
 91 |         if not target_file.is_file():
 92 |             raise exception.EntryMissingError(
 93 |                 source=source_file, expected=target_file)
 94 | 
 95 |         try:
 96 |             array = np.loadtxt(str(target_file))
 97 |         except Exception:
 98 |             raise exception.FileFormatError(
 99 |                 target_file, 'not a valid numpy array')
100 | 
101 |         if array.dtype != np.dtype('float'):
102 |             raise exception.FileFormatError(
103 |                 target_file, "array loaded is not dtype = float")
104 | 
105 |         if array.ndim != 2:
106 |             raise exception.FileFormatError(
107 |                 target_file, 'not a 2D array')
108 |     except exception.ValidationError as error:
109 |         return str(error), None, None
110 | 
111 |     return None, target_file, array.shape[1]
112 | 
113 | 
114 | def validate(submission, dataset, kind, njobs=1):
115 |     """Validate a subset of the submissions for the phonetic task
116 | 
117 |     :param submission_directory: location of submissions
118 |     :param dataset_directory: location of data
119 |     :param kind: subset type (dev | test)
120 |     :param njobs: number of paralle processes to use for validation
121 | 
122 |     :raise ValidationError: if the submission is not valid
123 | 
124 |     """
125 |     if kind not in LIBRISPEECH_SETS.keys():
126 |         raise ValueError(f'kind must be "dev" or "test", it is {kind}')
127 | 
128 |     input_files = get_input_files(dataset, kind, "wav")
129 |     if not input_files:
130 |         raise exception.ValidationError(
131 |             f'found no wav files in {dataset}')
132 | 
133 |     submitted_files = get_submitted_files(submission, kind)
134 |     if not submitted_files:
135 |         raise exception.ValidationError(
136 |             f'found no files in {submission}')
137 | 
138 |     # ensure we have only .txt files in submission
139 |     no_txt_files = [str(f) for f in submitted_files if f.suffix != '.txt']
140 |     if no_txt_files:
141 |         raise exception.MismatchError('extra files found', [], no_txt_files)
142 | 
143 |     # ensure that there are no duplicates
144 |     duplicates = [
145 |         f for f, n in collections.Counter(submitted_files).items() if n > 1
146 |     ]
147 |     if duplicates:
148 |         raise exception.MismatchError('duplicates found', [], duplicates)
149 | 
150 |     # check that necessary files are present and valid
151 |     valid_entries = joblib.Parallel(n_jobs=njobs)(
152 |         joblib.delayed(_validate_file)(f, submission, dataset)
153 |         for f in input_files)
154 |     errors, valid_entries, ncols = zip(*valid_entries)
155 | 
156 |     # ensure there are no detected errors
157 |     errors = [e for e in errors if e]
158 |     if errors:
159 |         for e in errors[:10]:
160 |             print(f'ERROR: {e}')
161 |         if len(errors) > 10:
162 |             print(f'ERROR: ... and {len(errors) - 10} more!')
163 |         raise exception.ValidationError(f'error detected in phonetic {kind}')
164 | 
165 |     # ensure all submitted files have the same number of columns
166 |     if len(set(ncols)) != 1:
167 |         raise exception.ValidationError(
168 |             f'all files must have the same number of columns '
169 |             f'but have: {set(ncols)}')
170 | 
171 |     if collections.Counter(submitted_files) != collections.Counter(valid_entries):
172 |         raise exception.MismatchError(
173 |             'mismatch in filenames', valid_entries, submitted_files)
174 | 
175 | 
176 | def evaluate(submission, dataset, kind, metric, frame_shift, force_cpu=False):
177 |     """Writes the phonetic evaluation results to `output_dir`
178 | 
179 |     Parameters
180 |     ----------
181 |     submission : path
182 |         The directory where the phonetic submission is stored (expect
183 |         subdirectories dev-clean, dev-other, etc)
184 |     dataset : path
185 |         The directory where the phonetic dataset is stored
186 |     output_dir : path
187 |         The directory where to write results
188 |     kind : str
189 |         Must be 'dev' or 'test'
190 |     metric : str
191 |         Must be 'cosine', 'euclidean', 'kl' or 'kl_symmetric'
192 |     frame_shift : float
193 |         The shift between two features frames in s.
194 |     force_cpu: bool, optional
195 |         When True use CPU, elsewise use PU (default to False)
196 | 
197 |     Returns
198 |     -------
199 |     score : pandas.DataFrame
200 |         A data frame with the ABX score obtained for each combination of
201 |         {dev, test}, {clean, other} and {across, within}.
202 | 
203 |     """
204 |     results = {}
205 |     for subkind in LIBRISPEECH_SETS[kind]:
206 |         print(
207 |             f'Evaluating phonetic {subkind} '
208 |             f'(metric={metric}, frame_shift={frame_shift})')
209 | 
210 |         arg_obj = AbxArguments(
211 |             path_data=str(submission / subkind),
212 |             path_item_file=str(dataset / subkind / f'{subkind}.item'),
213 |             distance_mode=metric,
214 |             feature_size=frame_shift,
215 |             cuda=not force_cpu)
216 | 
217 |         results[subkind] = eval_ABX.main(arg_obj=arg_obj)
218 | 
219 |     results2 = [
220 |         (dset.split('-')[0], dset.split('-')[1], kind, score)
221 |         for dset, v in results.items() for kind, score in v.items()]
222 |     return pandas.DataFrame(
223 |         results2, columns=['dataset', 'sub-dataset', 'type', 'score'])
224 | 


--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/ABX_src/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | 


--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/ABX_src/abx_group_computation.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import torch
  3 | import math
  4 | import libri_light_dtw as dtw
  5 | import progressbar
  6 | 
  7 | 
  8 | def get_distance_function_from_name(name_str):
  9 |     if name_str == 'euclidean':
 10 |         return get_euclidian_distance_batch
 11 |     if name_str == 'cosine':
 12 |         return get_cosine_distance_batch
 13 |     if name_str == 'kl':
 14 |         return get_kl_distance_batch
 15 |     if name_str == 'kl_symmetric':
 16 |         return get_kl_distance_symmetric_batch
 17 |     raise ValueError(f"Invalid distance mode")
 18 | 
 19 | 
 20 | def check_dtw_group_validity(a, b, x):
 21 |     assert (len(a.size()) == len(b.size()))
 22 |     assert (len(a.size()) == len(x.size()))
 23 |     assert (a.size(2) == x.size(2))
 24 |     assert (a.size(2) == b.size(2))
 25 | 
 26 | 
 27 | def get_kl_distance_batch(a1, a2, epsilon=1e-6):
 28 |     N1, S1, D = a1.size()  # Batch x Seq x Channel
 29 |     N2, S2, D = a2.size()  # Batch x Seq x Channel
 30 | 
 31 |     # (P * (P / Q).log()).sum()
 32 |     div = (a1.view(N1, 1, S1, 1, D) + epsilon) / (a2.view(1, N2, 1, S2, D) + epsilon)
 33 |     prod = (a1.view(N1, 1, S1, 1, D)) * div.log()
 34 | 
 35 |     return prod.sum(dim=4)
 36 | 
 37 | 
 38 | def get_kl_distance_symmetric_batch(a1, a2, epsilon=1e-6):
 39 |     N1, S1, D = a1.size()
 40 |     N2, S2, D = a2.size()
 41 | 
 42 |     div1 = (a1.view(N1, 1, S1, 1, D) + epsilon) / (a2.view(1, N2, 1, S2, D) + epsilon)
 43 |     div2 = (a2.view(1, N2, 1, S2, D) + epsilon) / (a1.view(N1, 1, S1, 1, D) + epsilon)
 44 | 
 45 |     prod1 = (a1.view(N1, 1, S1, 1, D)) * div1.log()
 46 |     prod2 = (a2.view(1, N2, 1, S2, D)) * div2.log()
 47 | 
 48 |     return (0.5 * prod1 + 0.5 * prod2).sum(dim=4)
 49 | 
 50 | 
 51 | def get_cosine_distance_batch(a1, a2, epsilon=1e-8):
 52 |     r""" a1 and a2 must be normalized"""
 53 |     N1, S1, D = a1.size()  # Batch x Seq x Channel
 54 |     N2, S2, D = a2.size()  # Batch x Seq x Channel
 55 | 
 56 |     prod = (a1.view(N1, 1, S1, 1, D)) * (a2.view(1, N2, 1, S2, D))
 57 |     # Sum accross the channel dimension
 58 |     prod = torch.clamp(prod.sum(dim=4), -1, 1).acos() / math.pi
 59 | 
 60 |     return prod
 61 | 
 62 | 
 63 | def get_euclidian_distance_batch(a1, a2):
 64 |     N1, S1, D = a1.size()
 65 |     N2, S2, D = a2.size()
 66 |     diff = a1.view(N1, 1, S1, 1, D) - a2.view(1, N2, 1, S2, D)
 67 |     return torch.sqrt((diff ** 2).sum(dim=4))
 68 | 
 69 | 
 70 | def get_distance_group_dtw(a1, a2, size1, size2,
 71 |                            ignore_diag=False, symmetric=False,
 72 |                            distance_function=get_cosine_distance_batch):
 73 |     N1, S1, D = a1.size()
 74 |     N2, S2, D = a2.size()
 75 |     if size1.size(0) != N1:
 76 |         print(a1.size(), size1.size())
 77 |         print(a2.size(), size2.size())
 78 |     assert (size1.size(0) == N1)
 79 |     assert (size2.size(0) == N2)
 80 | 
 81 |     distance_mat = distance_function(a1, a2).detach().cpu().numpy()
 82 |     return dtw.dtw_batch(a1, a2, size1, size2,
 83 |                          distance_mat,
 84 |                          ignore_diag, symmetric)
 85 | 
 86 | 
 87 | def get_theta_group_dtw(a, b, x, sa, sb, sx, distance_function, symmetric):
 88 |     check_dtw_group_validity(a, b, x)
 89 | 
 90 |     dxb = get_distance_group_dtw(
 91 |         x, b, sx, sb, distance_function=distance_function)
 92 |     dxa = get_distance_group_dtw(x, a, sx, sa, ignore_diag=symmetric,
 93 |                                  symmetric=symmetric,
 94 |                                  distance_function=distance_function)
 95 | 
 96 |     Nx, Na = dxa.size()
 97 |     Nx, Nb = dxb.size()
 98 | 
 99 |     if symmetric:
100 |         n_pos = Na * (Na - 1)
101 |         max_val = dxb.max().item()
102 |         for i in range(Na):
103 |             dxa[i, i] = max_val + 1
104 |     else:
105 |         n_pos = Na * Nx
106 | 
107 |     dxb = dxb.view(Nx, 1, Nb).expand(Nx, Na, Nb)
108 |     dxa = dxa.view(Nx, Na, 1).expand(Nx, Na, Nb)
109 | 
110 |     sc = (dxa < dxb).sum() + 0.5 * (dxa == dxb).sum()
111 |     sc /= (n_pos * Nb)
112 | 
113 |     return sc.item()
114 | 
115 | 
116 | def loc_dtw(data, distance_function, symmetric):
117 |     coords, group_a, group_b, group_x = data
118 |     group_a_data, group_a_size = group_a
119 |     group_b_data, group_b_size = group_b
120 |     group_x_data, group_x_size = group_x
121 |     theta = get_theta_group_dtw(group_a_data,
122 |                                 group_b_data,
123 |                                 group_x_data,
124 |                                 group_a_size,
125 |                                 group_b_size,
126 |                                 group_x_size,
127 |                                 distance_function,
128 |                                 symmetric)
129 | 
130 |     return (coords, 1 - theta)
131 | 
132 | 
133 | def get_abx_scores_dtw_on_group(group_iterator,
134 |                                 distance_function,
135 |                                 symmetric):
136 |     data_list = []
137 |     coords_list = []
138 |     bar = progressbar.ProgressBar(prefix='  > ', maxval=len(group_iterator))
139 |     bar.start()
140 | 
141 |     with torch.no_grad():
142 |         for index, group in enumerate(group_iterator):
143 |             bar.update(index)
144 |             coords, abx = loc_dtw(group, distance_function, symmetric)
145 |             data_list.append(abx)
146 |             coords_list.append(coords)
147 |     bar.finish()
148 | 
149 |     return torch.sparse.FloatTensor(torch.LongTensor(coords_list).t(),
150 |                                     torch.FloatTensor(data_list),
151 |                                     group_iterator.get_board_size())
152 | 


--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/ABX_src/abx_iterators.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import torch
  3 | import progressbar
  4 | import math
  5 | import random
  6 | 
  7 | 
  8 | def normalize_with_singularity(x):
  9 |     r"""
 10 |     Normalize the given vector across the third dimension.
 11 |     Extend all vectors by eps=1e-12 to put the null vector at the maximal
 12 |     cosine distance from any non-null vector.
 13 |     """
 14 |     S, H = x.size()
 15 |     norm_x = (x**2).sum(dim=1, keepdim=True)
 16 | 
 17 |     x /= torch.sqrt(norm_x)
 18 |     zero_vals = (norm_x == 0).view(S)
 19 |     x[zero_vals] = 1 / math.sqrt(H)
 20 |     border_vect = torch.zeros((S, 1),
 21 |                               dtype=x.dtype,
 22 |                               device=x.device) + 1e-12
 23 |     border_vect[zero_vals] = -2*1e12
 24 |     return torch.cat([x, border_vect], dim=1)
 25 | 
 26 | 
 27 | def load_item_file(path_item_file):
 28 |     r""" Load a .item file indicating the triplets for the ABX score. The
 29 |     input file must have the following fomat:
 30 |     line 0 : whatever (not read)
 31 |     line > 0: #file_ID onset offset #phone prev-phone next-phone speaker
 32 |     onset : begining of the triplet (in s)
 33 |     onset : end of the triplet (in s)
 34 |     """
 35 |     with open(path_item_file, 'r') as file:
 36 |         data = file.readlines()[1:]
 37 | 
 38 |     data = [x.replace('\n', '') for x in data]
 39 | 
 40 |     out = {}
 41 | 
 42 |     phone_match = {}
 43 |     speaker_match = {}
 44 |     context_match = {}
 45 | 
 46 |     for line in data:
 47 |         items = line.split()
 48 |         assert(len(items) == 7)
 49 |         fileID = items[0]
 50 |         if fileID not in out:
 51 |             out[fileID] = []
 52 | 
 53 |         onset, offset = float(items[1]), float(items[2])
 54 |         context = '+'.join([items[4], items[5]])
 55 |         phone = items[3]
 56 |         speaker = items[6]
 57 | 
 58 |         if phone not in phone_match:
 59 |             s = len(phone_match)
 60 |             phone_match[phone] = s
 61 |         phone_id = phone_match[phone]
 62 | 
 63 |         if context not in context_match:
 64 |             s = len(context_match)
 65 |             context_match[context] = s
 66 |         context_id = context_match[context]
 67 | 
 68 |         if speaker not in speaker_match:
 69 |             s = len(speaker_match)
 70 |             speaker_match[speaker] = s
 71 |         speaker_id = speaker_match[speaker]
 72 | 
 73 |         out[fileID].append([onset, offset, context_id, phone_id, speaker_id])
 74 | 
 75 |     return out, context_match, phone_match, speaker_match
 76 | 
 77 | 
 78 | def get_features_group(in_data, index_order):
 79 | 
 80 |     in_index = list(range(len(in_data)))
 81 |     in_index.sort(key=lambda x: [in_data[x][i] for i in index_order])
 82 |     out_groups = []
 83 |     last_values = [in_data[in_index[0]][i] for i in index_order]
 84 |     i_s = 0
 85 |     curr_group = [[] for i in index_order]
 86 |     n_orders = len(index_order) - 1
 87 |     tmp = [in_data[i] for i in in_index]
 88 | 
 89 |     for index, item in enumerate(tmp):
 90 |         for order_index, order in enumerate(index_order):
 91 |             if item[order] != last_values[order_index]:
 92 |                 curr_group[-1].append((i_s, index))
 93 |                 for i in range(n_orders, order_index, -1):
 94 |                     curr_group[i-1].append(curr_group[i])
 95 |                     curr_group[i] = []
 96 |                 if order_index == 0:
 97 |                     out_groups += curr_group[0]
 98 |                     curr_group[0] = []
 99 |                 last_values = [item[i] for i in index_order]
100 |                 i_s = index
101 |                 break
102 | 
103 |     if i_s < len(in_data):
104 |         curr_group[-1].append((i_s, len(in_data)))
105 |         for i in range(n_orders, 0, -1):
106 |             curr_group[i-1].append(curr_group[i])
107 |         out_groups += curr_group[0]
108 | 
109 |     return in_index, out_groups
110 | 
111 | 
112 | class ABXFeatureLoader:
113 | 
114 |     def __init__(self,
115 |                  path_item_file,
116 |                  seqList,
117 |                  featureMaker,
118 |                  stepFeature,
119 |                  normalize):
120 |         r"""
121 |         Args:
122 |             path_item_file (str): path to the .item files containing the ABX
123 |                                   triplets
124 |             seqList (list): list of items (fileID, path) where fileID refers to
125 |                             the file's ID as used in path_item_file, and path
126 |                             is the actual path to the input audio sequence
127 |             featureMaker (function): either a function or a callable object.
128 |                                      Takes a path as input and outputs the
129 |                                      feature sequence corresponding to the
130 |                                      given file.
131 |             normalize (bool): if True all input features will be noramlized
132 |                               across the channels dimension.
133 | 
134 |         Note:
135 |         You can use this dataset with pre-computed features. For example, if
136 |         you have a collection of features files in the torch .pt format then
137 |         you can just set featureMaker = torch.load.
138 |         """
139 | 
140 |         files_data, self.context_match, self.phone_match, self.speaker_match = \
141 |             load_item_file(path_item_file)
142 |         self.seqNorm = True
143 |         self.stepFeature = stepFeature
144 |         self.loadFromFileData(files_data, seqList, featureMaker, normalize)
145 | 
146 |     def loadFromFileData(self, files_data, seqList, feature_maker, normalize):
147 | 
148 |         # self.features[i]: index_start, size, context_id, phone_id, speaker_id
149 |         self.features = []
150 |         self.INDEX_CONTEXT = 2
151 |         self.INDEX_PHONE = 3
152 |         self.INDEX_SPEAKER = 4
153 |         data = []
154 | 
155 |         totSize = 0
156 | 
157 |         print("  > Building the input features...")
158 |         bar = progressbar.ProgressBar(prefix='  > ', maxval=len(seqList))
159 |         bar.start()
160 | 
161 |         for index, vals in enumerate(seqList):
162 | 
163 |             fileID, file_path = vals
164 |             bar.update(index)
165 |             if fileID not in files_data:
166 |                 continue
167 | 
168 |             features = feature_maker(file_path)
169 |             if normalize:
170 |                 features = normalize_with_singularity(features)
171 | 
172 |             features = features.detach().cpu()
173 | 
174 |             phone_data = files_data[fileID]
175 | 
176 |             for phone_start, phone_end, context_id, phone_id, speaker_id in phone_data:
177 | 
178 |                 index_start = max(
179 |                     0, int(math.ceil(self.stepFeature * phone_start - 0.5)))
180 |                 index_end = min(features.size(0),
181 |                                 int(math.floor(self.stepFeature * phone_end - 0.5)))
182 | 
183 |                 if index_start >= features.size(0) or index_end <= index_start:
184 |                     continue
185 | 
186 |                 loc_size = index_end - index_start
187 |                 self.features.append([totSize, loc_size, context_id,
188 |                                       phone_id, speaker_id])
189 |                 data.append(features[index_start:index_end])
190 |                 totSize += loc_size
191 | 
192 |         bar.finish()
193 | 
194 |         self.data = torch.cat(data, dim=0)
195 |         self.feature_dim = self.data.size(1)
196 | 
197 |     def get_data_device(self):
198 |         return self.data.device
199 | 
200 |     def cuda(self):
201 |         self.data = self.data.cuda()
202 | 
203 |     def cpu(self):
204 |         self.data = self.data.cpu()
205 | 
206 |     def get_max_group_size(self, i_group, i_sub_group):
207 |         id_start, id_end = self.group_index[i_group][i_sub_group]
208 |         return max([self.features[i][1] for i in range(id_start, id_end)])
209 | 
210 |     def get_ids(self, index):
211 |         context_id, phone_id, speaker_id = self.features[index][2:]
212 |         return context_id, phone_id, speaker_id
213 | 
214 |     def __getitem__(self, index):
215 |         i_data, out_size, context_id, phone_id, speaker_id = self.features[index]
216 |         return self.data[i_data:(i_data + out_size)], out_size, (context_id, phone_id, speaker_id)
217 | 
218 |     def __len__(self):
219 |         return len(self.features)
220 | 
221 |     def get_n_speakers(self):
222 |         return len(self.speaker_match)
223 | 
224 |     def get_n_context(self):
225 |         return len(self.context_match)
226 | 
227 |     def get_n_phone(self):
228 |         return len(self.phone_match)
229 | 
230 |     def get_n_groups(self):
231 |         return len(self.group_index)
232 | 
233 |     def get_n_sub_group(self, index_sub_group):
234 |         return len(self.group_index[index_sub_group])
235 | 
236 |     def get_iterator(self, mode, max_size_group):
237 |         if mode == 'within':
238 |             return ABXWithinGroupIterator(self, max_size_group)
239 |         if mode == 'across':
240 |             return ABXAcrossGroupIterator(self, max_size_group)
241 |         raise ValueError(f"Invalid mode: {mode}")
242 | 
243 | 
244 | class ABXIterator:
245 |     r"""
246 |     Base class building ABX's triplets.
247 |     """
248 | 
249 |     def __init__(self, abxDataset, max_size_group):
250 |         self.max_size_group = max_size_group
251 |         self.dataset = abxDataset
252 |         self.len = 0
253 | 
254 |         self.index_csp, self.groups_csp = \
255 |             get_features_group(abxDataset.features,
256 |                                [abxDataset.INDEX_CONTEXT,
257 |                                 abxDataset.INDEX_SPEAKER,
258 |                                 abxDataset.INDEX_PHONE])
259 | 
260 |     def get_group(self, i_start, i_end):
261 |         data = []
262 |         max_size = 0
263 |         to_take = list(range(i_start, i_end))
264 |         if i_end - i_start > self.max_size_group:
265 |             to_take = random.sample(to_take, k=self.max_size_group)
266 |         for i in to_take:
267 |             loc_data, loc_size, loc_id = self.dataset[self.index_csp[i]]
268 |             max_size = max(loc_size, max_size)
269 |             data.append(loc_data)
270 | 
271 |         N = len(to_take)
272 |         out_data = torch.zeros(N, max_size,
273 |                                self.dataset.feature_dim,
274 |                                device=self.dataset.get_data_device())
275 |         out_size = torch.zeros(N, dtype=torch.long,
276 |                                device=self.dataset.get_data_device())
277 | 
278 |         for i in range(N):
279 |             size = data[i].size(0)
280 |             out_data[i, :size] = data[i]
281 |             out_size[i] = size
282 | 
283 |         return out_data, out_size, loc_id
284 | 
285 |     def __len__(self):
286 |         return self.len
287 | 
288 |     def get_board_size(self):
289 |         r"""
290 |         Get the output dimension of the triplet's space.
291 |         """
292 |         pass
293 | 
294 | 
295 | class ABXWithinGroupIterator(ABXIterator):
296 |     r"""
297 |     Iterator giving the triplets for the ABX within score.
298 |     """
299 | 
300 |     def __init__(self, abxDataset, max_size_group):
301 | 
302 |         super(ABXWithinGroupIterator, self).__init__(abxDataset,
303 |                                                      max_size_group)
304 |         self.symmetric = True
305 | 
306 |         for context_group in self.groups_csp:
307 |             for speaker_group in context_group:
308 |                 if len(speaker_group) > 1:
309 |                     for i_start, i_end in speaker_group:
310 |                         if i_end - i_start > 1:
311 |                             self.len += (len(speaker_group) - 1)
312 | 
313 |     def __iter__(self):
314 |         for i_c, context_group in enumerate(self.groups_csp):
315 |             for i_s, speaker_group in enumerate(context_group):
316 |                 n_phones = len(speaker_group)
317 |                 if n_phones == 1:
318 |                     continue
319 | 
320 |                 for i_a in range(n_phones):
321 |                     i_start_a, i_end_a = self.groups_csp[i_c][i_s][i_a]
322 |                     if i_end_a - i_start_a == 1:
323 |                         continue
324 | 
325 |                     for i_b in range(n_phones):
326 |                         if i_b == i_a:
327 |                             continue
328 | 
329 |                         i_start_b, i_end_b = self.groups_csp[i_c][i_s][i_b]
330 |                         data_b, size_b, id_b = self.get_group(i_start_b,
331 |                                                               i_end_b)
332 |                         data_a, size_a, id_a = self.get_group(i_start_a,
333 |                                                               i_end_a)
334 | 
335 |                         out_coords = id_a[2], id_a[1], id_b[1], id_a[0]
336 |                         yield out_coords, (data_a, size_a), (data_b, size_b), \
337 |                             (data_a, size_a)
338 | 
339 |     def get_board_size(self):
340 | 
341 |         return (self.dataset.get_n_speakers(),
342 |                 self.dataset.get_n_phone(),
343 |                 self.dataset.get_n_phone(),
344 |                 self.dataset.get_n_context())
345 | 
346 | 
347 | class ABXAcrossGroupIterator(ABXIterator):
348 |     r"""
349 |     Iterator giving the triplets for the ABX across score.
350 |     """
351 | 
352 |     def __init__(self, abxDataset, max_size_group):
353 | 
354 |         super(ABXAcrossGroupIterator, self).__init__(abxDataset,
355 |                                                      max_size_group)
356 |         self.symmetric = False
357 |         self.get_speakers_from_cp = {}
358 |         self.max_x = 5
359 | 
360 |         for context_group in self.groups_csp:
361 |             for speaker_group in context_group:
362 |                 for i_start, i_end in speaker_group:
363 |                     c_id, p_id, s_id = self.dataset.get_ids(
364 |                         self.index_csp[i_start])
365 |                     if c_id not in self.get_speakers_from_cp:
366 |                         self.get_speakers_from_cp[c_id] = {}
367 |                     if p_id not in self.get_speakers_from_cp[c_id]:
368 |                         self.get_speakers_from_cp[c_id][p_id] = {}
369 |                     self.get_speakers_from_cp[c_id][p_id][s_id] = (
370 |                         i_start, i_end)
371 | 
372 |         for context_group in self.groups_csp:
373 |             for speaker_group in context_group:
374 |                 if len(speaker_group) > 1:
375 |                     for i_start, i_end in speaker_group:
376 |                         c_id, p_id, s_id = self.dataset.get_ids(
377 |                             self.index_csp[i_start])
378 |                         self.len += (len(speaker_group) - 1) * (min(self.max_x,
379 |                                                                     len(self.get_speakers_from_cp[c_id][p_id]) - 1))
380 | 
381 |     def get_other_speakers_in_group(self, i_start_group):
382 |         c_id, p_id, s_id = self.dataset.get_ids(self.index_csp[i_start_group])
383 |         return [v for k, v in self.get_speakers_from_cp[c_id][p_id].items() if k != s_id]
384 | 
385 |     def get_abx_triplet(self, i_a, i_b, i_x):
386 |         i_start_a, i_end_a = i_a
387 |         data_a, size_a, id_a = self.get_group(i_start_a, i_end_a)
388 | 
389 |         i_start_b, i_end_b = i_b
390 |         data_b, size_b, id_b = self.get_group(i_start_b, i_end_b)
391 | 
392 |         i_start_x, i_end_x = i_x
393 |         data_x, size_x, id_x = self.get_group(i_start_x, i_end_x)
394 | 
395 |         out_coords = id_a[2], id_a[1], id_b[1], id_a[0], id_x[2]
396 |         return out_coords, (data_a, size_a), (data_b, size_b), \
397 |             (data_x, size_x)
398 | 
399 |     def __iter__(self):
400 |         for i_c, context_group in enumerate(self.groups_csp):
401 |             for i_s, speaker_group in enumerate(context_group):
402 |                 n_phones = len(speaker_group)
403 |                 if n_phones == 1:
404 |                     continue
405 | 
406 |                 for i_a in range(n_phones):
407 |                     i_start_a, i_end_a = self.groups_csp[i_c][i_s][i_a]
408 |                     ref = self.get_other_speakers_in_group(i_start_a)
409 |                     if len(ref) > self.max_x:
410 |                         speakers_a = random.sample(ref, k=self.max_x)
411 |                     else:
412 |                         speakers_a = ref
413 | 
414 |                     for i_start_x, i_end_x in speakers_a:
415 | 
416 |                         for i_b in range(n_phones):
417 |                             if i_b == i_a:
418 |                                 continue
419 | 
420 |                             i_start_b, i_end_b = self.groups_csp[i_c][i_s][i_b]
421 |                             yield self.get_abx_triplet((i_start_a, i_end_a), (i_start_b, i_end_b), (i_start_x, i_end_x))
422 | 
423 |     def get_board_size(self):
424 | 
425 |         return (self.dataset.get_n_speakers(),
426 |                 self.dataset.get_n_phone(),
427 |                 self.dataset.get_n_phone(),
428 |                 self.dataset.get_n_context(),
429 |                 self.dataset.get_n_speakers())
430 | 


--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/ABX_src/dtw.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | import numpy as np
 4 | cimport numpy as np
 5 | cimport cython
 6 | from cpython cimport bool
 7 | ctypedef np.float32_t CTYPE_t # cost type
 8 | ctypedef np.intp_t IND_t # array index type
 9 | CTYPE = np.float32 # cost type
10 | 
11 | 
12 | 
13 | def dtw_batch(x,y, sx, sy, dist_mat, ignore_diag=False, symetric=False):
14 | 
15 |     Nx = dist_mat.shape[0]
16 |     Ny = dist_mat.shape[1]
17 | 
18 |     out = torch.zeros((Nx,  Ny))
19 | 
20 |     for i in range(Nx):
21 |         start_index = i if symetric else 0
22 |         i_sx = sx[i]
23 |         for j in range(start_index, Ny):
24 | 
25 |             j_sy = sy[j]
26 |             if ignore_diag and i == j:
27 |                 continue
28 |             distance = _dtw(i_sx, j_sy, dist_mat[i,j,:i_sx,:j_sy],True)
29 |             out[i][j] = distance
30 |             if symetric and i != j:
31 |                 out[j][i] = out[i][j]
32 | 
33 |     return out
34 | 
35 | 
36 | 
37 | cpdef _dtw(IND_t N, IND_t M, CTYPE_t[:,:] dist_array, bool normalized):
38 |     cdef IND_t i, j
39 |     cdef CTYPE_t[:,:] cost = np.empty((N, M), dtype=CTYPE)
40 |     cdef CTYPE_t final_cost, c_diag, c_left, c_up
41 |     # initialization
42 |     cost[0,0] = dist_array[0,0]
43 |     for i in range(1,N):
44 |         cost[i,0] = dist_array[i,0] + cost[i-1,0]
45 |     for j in range(1,M):
46 |         cost[0,j] = dist_array[0,j] + cost[0,j-1]
47 |     # the dynamic programming loop
48 |     for i in range(1,N):
49 |         for j in range(1,M):
50 |             cost[i,j] = dist_array[i,j] + min(cost[i-1,j], cost[i-1,j-1], cost[i,j-1])
51 | 
52 |     final_cost = cost[N-1, M-1]
53 |     if normalized:
54 |         path_len = 1
55 |         i = N-1
56 |         j = M-1
57 |         while i > 0 and j > 0:
58 |             c_up = cost[i - 1, j]
59 |             c_left = cost[i, j-1]
60 |             c_diag = cost[i-1, j-1]
61 |             if c_diag <= c_left and c_diag <= c_up:
62 |                 i -= 1
63 |                 j -= 1
64 |             elif c_left <= c_up:
65 |                 j -= 1
66 |             else:
67 |                 i -= 1
68 |             path_len += 1
69 |         if i == 0:
70 |             path_len += j
71 |         if j == 0:
72 |             path_len += i
73 |         final_cost /= path_len
74 |     return final_cost
75 | 


--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/CPC_loader.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import argparse
  3 | import torch
  4 | import torchaudio
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | def download_state_dict(model_name):
 10 | 
 11 |     base_url = "https://dl.fbaipublicfiles.com/librilight/CPC_checkpoints"
 12 |     return torch.hub.load_state_dict_from_url(f"{base_url}/{model_name}")
 13 | 
 14 | 
 15 | def load_cpc_features(state_dict):
 16 | 
 17 |     config = state_dict["config"]
 18 |     weights = state_dict["weights"]
 19 |     encoder = CPCEncoder(config["hiddenEncoder"])
 20 |     ar_net = CPCAR(config["hiddenEncoder"], config["hiddenGar"], False,
 21 |                    config["nLevelsGRU"])
 22 | 
 23 |     model = CPCModel(encoder, ar_net)
 24 |     model.load_state_dict(weights, strict=False)
 25 |     output = FeatureModule(model, False)
 26 |     output.config = config
 27 |     return output
 28 | 
 29 | 
 30 | def get_features_state_dict(feature_module):
 31 |     config = feature_module.config
 32 |     if config is None:
 33 |         raise ValueError("The input feature_module should have config defined")
 34 |     weights = feature_module.model.state_dict()
 35 |     return {"config": config, "weights": weights}
 36 | 
 37 | 
 38 | def build_feature_from_file(file_path, feature_maker, max_size_seq=64000):
 39 |     r"""
 40 |     Apply the featureMaker to the given file.
 41 |     Arguments:
 42 |         - file_path (FeatureModule): model to apply
 43 |         - file_path (string): path of the sequence to load
 44 |         - seq_norm (bool): if True, normalize the output along the time
 45 |                            dimension to get chunks of mean zero and var 1
 46 |         - max_size_seq (int): maximal size of a chunk
 47 |     Return:
 48 |         a torch vector of size 1 x Seq_size x Feature_dim
 49 |     """
 50 |     seq = torchaudio.load(file_path)[0]
 51 |     sizeSeq = seq.size(1)
 52 |     start = 0
 53 |     out = []
 54 |     while start < sizeSeq:
 55 |         if start + max_size_seq > sizeSeq:
 56 |             break
 57 |         end = min(sizeSeq, start + max_size_seq)
 58 |         subseq = (seq[:, start:end]).view(1, 1, -1).cuda(device=0)
 59 |         with torch.no_grad():
 60 |             features = feature_maker(subseq)
 61 |         out.append(features.detach().cpu())
 62 |         start += max_size_seq
 63 | 
 64 |     if start < sizeSeq:
 65 |         subseq = (seq[:, -max_size_seq:]).view(1, 1, -1).cuda(device=0)
 66 |         with torch.no_grad():
 67 |             features = feature_maker(subseq)
 68 |         df = subseq.size(2) // features.size(1)
 69 |         delta = (sizeSeq - start) // df
 70 |         out.append(features[:, -delta:].detach().cpu())
 71 | 
 72 |     out = torch.cat(out, dim=1)
 73 |     return out.view(out.size(1), out.size(2))
 74 | 
 75 | ##############################################################################
 76 | # Minimal code to load a CPC checkpoint
 77 | ##############################################################################
 78 | 
 79 | 
 80 | class ChannelNorm(nn.Module):
 81 | 
 82 |     def __init__(self,
 83 |                  numFeatures,
 84 |                  epsilon=1e-05,
 85 |                  affine=True):
 86 | 
 87 |         super(ChannelNorm, self).__init__()
 88 |         if affine:
 89 |             self.weight = nn.parameter.Parameter(
 90 |                 torch.Tensor(1, numFeatures, 1))
 91 |             self.bias = nn.parameter.Parameter(torch.Tensor(1, numFeatures, 1))
 92 |         else:
 93 |             self.weight = None
 94 |             self.bias = None
 95 |         self.epsilon = epsilon
 96 |         self.p = 0
 97 |         self.affine = affine
 98 |         self.reset_parameters()
 99 | 
100 |     def reset_parameters(self):
101 |         if self.affine:
102 |             torch.nn.init.ones_(self.weight)
103 |             torch.nn.init.zeros_(self.bias)
104 | 
105 |     def forward(self, x):
106 | 
107 |         cumMean = x.mean(dim=1, keepdim=True)
108 |         cumVar = x.var(dim=1, keepdim=True)
109 |         x = (x - cumMean)*torch.rsqrt(cumVar + self.epsilon)
110 | 
111 |         if self.weight is not None:
112 |             x = x * self.weight + self.bias
113 |         return x
114 | 
115 | 
116 | class CPCEncoder(nn.Module):
117 | 
118 |     def __init__(self,
119 |                  sizeHidden=512):
120 | 
121 |         super(CPCEncoder, self).__init__()
122 |         normLayer = ChannelNorm
123 | 
124 |         self.conv0 = nn.Conv1d(1, sizeHidden, 10, stride=5, padding=3)
125 |         self.batchNorm0 = normLayer(sizeHidden)
126 |         self.conv1 = nn.Conv1d(sizeHidden, sizeHidden, 8, stride=4, padding=2)
127 |         self.batchNorm1 = normLayer(sizeHidden)
128 |         self.conv2 = nn.Conv1d(sizeHidden, sizeHidden, 4,
129 |                                stride=2, padding=1)
130 |         self.batchNorm2 = normLayer(sizeHidden)
131 |         self.conv3 = nn.Conv1d(sizeHidden, sizeHidden, 4, stride=2, padding=1)
132 |         self.batchNorm3 = normLayer(sizeHidden)
133 |         self.conv4 = nn.Conv1d(sizeHidden, sizeHidden, 4, stride=2, padding=1)
134 |         self.batchNorm4 = normLayer(sizeHidden)
135 |         self.DOWNSAMPLING = 160
136 | 
137 |     def getDimOutput(self):
138 |         return self.conv4.out_channels
139 | 
140 |     def forward(self, x):
141 |         x = F.relu(self.batchNorm0(self.conv0(x)))
142 |         x = F.relu(self.batchNorm1(self.conv1(x)))
143 |         x = F.relu(self.batchNorm2(self.conv2(x)))
144 |         x = F.relu(self.batchNorm3(self.conv3(x)))
145 |         x = F.relu(self.batchNorm4(self.conv4(x)))
146 |         return x
147 | 
148 | 
149 | class CPCAR(nn.Module):
150 | 
151 |     def __init__(self,
152 |                  dimEncoded,
153 |                  dimOutput,
154 |                  keepHidden,
155 |                  nLevelsGRU):
156 | 
157 |         super(CPCAR, self).__init__()
158 |         self.baseNet = nn.LSTM(dimEncoded, dimOutput,
159 |                                num_layers=nLevelsGRU, batch_first=True)
160 |         self.hidden = None
161 |         self.keepHidden = keepHidden
162 | 
163 |     def getDimOutput(self):
164 |         return self.baseNet.hidden_size
165 | 
166 |     def forward(self, x):
167 | 
168 |         try:
169 |             self.baseNet.flatten_parameters()
170 |         except RuntimeError:
171 |             pass
172 |         x, h = self.baseNet(x, self.hidden)
173 |         if self.keepHidden:
174 |             if isinstance(h, tuple):
175 |                 self.hidden = tuple(x.detach() for x in h)
176 |             else:
177 |                 self.hidden = h.detach()
178 |         return x
179 | 
180 | 
181 | class CPCModel(nn.Module):
182 | 
183 |     def __init__(self,
184 |                  encoder,
185 |                  AR):
186 | 
187 |         super(CPCModel, self).__init__()
188 |         self.gEncoder = encoder
189 |         self.gAR = AR
190 | 
191 |     def forward(self, batchData, label):
192 |         encodedData = self.gEncoder(batchData).permute(0, 2, 1)
193 |         cFeature = self.gAR(encodedData)
194 |         return cFeature, encodedData, label
195 | 
196 | 
197 | class FeatureModule(torch.nn.Module):
198 |     r"""
199 |     A simpler interface to handle CPC models. Useful for a smooth workflow when
200 |     working with CPC trained features.
201 |     """
202 | 
203 |     def __init__(self, featureMaker, get_encoded,
204 |                  seq_norm=True):
205 |         super(FeatureModule, self).__init__()
206 |         self.get_encoded = get_encoded
207 |         self.model = featureMaker
208 |         self.seq_norm = seq_norm
209 |         self.config = None
210 | 
211 |     def forward(self, batch_data):
212 |         # Input Size : BatchSize x 1 x SeqSize
213 |         # Feature size: BatchSize x SeqSize x ChannelSize
214 |         if self.is_cuda:
215 |             batch_data = batch_data.cuda()
216 |         cFeature, encoded, _ = self.model(batch_data, None)
217 |         if self.get_encoded:
218 |             cFeature = encoded
219 |         if self.seq_norm:
220 |             mean = cFeature.mean(dim=1, keepdim=True)
221 |             var = cFeature.var(dim=1, keepdim=True)
222 |             cFeature = (cFeature - mean) / torch.sqrt(var + 1e-08)
223 |         return cFeature
224 | 
225 |     def cuda(self):
226 |         self.is_cuda = True
227 |         super(FeatureModule, self).cuda()
228 | 
229 |     def cpu(self):
230 |         self.is_cuda = False
231 |         super(FeatureModule, self).cuda()
232 | 
233 |     def get_output_dim(self):
234 |         if self.get_encoded:
235 |             return self.config["hiddenEncoder"]
236 |         return self.config["hiddenGar"]
237 | 
238 | 
239 | if __name__ == "__main__":
240 | 
241 |     parser = argparse.ArgumentParser(description='Download model')
242 |     parser.add_argument('model_name', type=str,
243 |                         choices=["600h", "6kh", "60kh"])
244 |     parser.add_argument('output', type=str)
245 |     args = parser.parse_args()
246 | 
247 |     CPC_MODELS_NAMES = {"60kh": "60k_epoch4-d0f474de.pt",
248 |                         "600h": "600h-bdd7ced6.pt",
249 |                         "6kh":"6k_epoch30-9df0493c.pt"}
250 |     state_dict = download_state_dict(CPC_MODELS_NAMES[args.model_name])
251 |     torch.save(state_dict, args.output)
252 | 


--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/LICENCE.txt:
--------------------------------------------------------------------------------
 1 | The libri_light_eval module original can be found at https://github.com/facebookresearch/libri-light/tree/master/eval
 2 | 
 3 | This module is licenced under the MIT licence all credit goes to the original creators.
 4 | 
 5 | MIT License
 6 | 
 7 | Copyright (c) Facebook, Inc. and its affiliates.
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a copy
10 | of this software and associated documentation files (the "Software"), to deal
11 | in the Software without restriction, including without limitation the rights
12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | copies of the Software, and to permit persons to whom the Software is
14 | furnished to do so, subject to the following conditions:
15 | 
16 | The above copyright notice and this permission notice shall be included in all
17 | copies or substantial portions of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.


--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/README.md:
--------------------------------------------------------------------------------
  1 | # Note from ZR2021 organizers
  2 | 
  3 | This is a modified version of the libri-light evaluation cde available at
  4 | https://github.com/facebookresearch/libri-light/tree/master/eval.
  5 | 
  6 | Modification only consists in files renaming and package reorganization to
  7 | facilitate installation and integration with the zerospeech2021 package.
  8 | 
  9 | 
 10 | # Eval
 11 | 
 12 | You will find here all relevant evaluation launched on the LibriLight-dataset.
 13 | 
 14 | ## ABX
 15 | 
 16 | ABX is an evaluation metric for unsupervised representation learning. It evaluates feature files based on its ability to distinguish sounds like /i/ and /e/ as in "bit" versus "bet".
 17 | 
 18 | ### Setup
 19 | 
 20 | To setup the ABX evaluation script you need to:
 21 | 
 22 | 1. compile the cython code. Just do:
 23 | 
 24 | ```console
 25 | cd ABX_src
 26 | python setup.py build_ext --inplace
 27 | ```
 28 | 
 29 | 2. Check that everything works properly with:
 30 | ```console
 31 | cd ABX_src
 32 | nosetests -d
 33 | ```
 34 | 
 35 | 3. Download the Librilight `.item` files here: [ABX_data.tgz](https://dl.fbaipublicfiles.com/librilight/data/ABX_data.tgz).
 36 | 
 37 | This archive contains four `.item` files constructed from the Librispeech dev and test set: `dev-clean.item`, `dev-other.item`, `test-clean.item`, and `test-other.item`, which provide the labels for the ABX evaluation.
 38 | 
 39 | ### How to run the ABX evaluation ?
 40 | 
 41 | Dump your features in .pt (torch), .npz or .npy (numpy) format somewhere. Your features dataset should look like this:
 42 | 
 43 | ```console
 44 | \data_dir
 45 |   file_name_0.extension
 46 |   file_name_1.extension
 47 |   ...
 48 | ```
 49 | 
 50 | Each file should contain a 2D-vector of shape Sequence_size x Feature_dimension.
 51 | 
 52 | Then run:
 53 | ```console
 54 | python eval_ABX.py $PATH_FEATURE_DIR  $PATH_TO_ABX_ITEMS/$DB_NAME.item --file_extension $EXTENSION --out $OUTPUT_DIR --feature_size $FEATURE_SIZE
 55 | ```
 56 | 
 57 | Where `$DB_NAME` is one of the 4 evaluation datasets (`dev-clean`, `dev-other`, `test-clean`, `test-other`) and `$FEATURE_SIZE` is the duration (in s) of one feature of the model (for a `10ms` frame rate, this would be `0.01`).
 58 | 
 59 | 
 60 | ## Pre-computed checkpoints
 61 | 
 62 | Some pre-computed model trained with CPC are available for use ! In order to load a model just use CPC_loader.py, for example to retrieve the model trained on the 60k hours dataset:
 63 | 
 64 | ```console
 65 | python CPC_loader.py 60k $PATH_OUTPUT_CHECKPOINT
 66 | ```
 67 | 
 68 | You can directly evaluate the ABX score on this checkpoint by running:
 69 | ```console
 70 | python eval_ABX.py $PATH_AUDIO_DIR  ABX_data/$DB_NAME.item --file_extension $EXTENSION --out $OUTPUT_DIR --path_checkpoint $PATH_OUTPUT_CHECKPOINT
 71 | ```
 72 | 
 73 | Where $EXTENSION corresponds to an audio foramt (.wav, .flac ...)
 74 | 
 75 | ## Linear Classification PER
 76 | 
 77 | Representations can also be evaluated by how easy it is to train a linear phoneme classifier.
 78 | 
 79 | ### Setup
 80 | 
 81 | To setup the PER evaluation script you need to compile the cython code it relies on. Just do:
 82 | ```console
 83 | cd PER_src
 84 | python setup.py build_ext --inplace
 85 | ```
 86 | 
 87 | You will also need to download the [10h labelled data](https://dl.fbaipublicfiles.com/librilight/data/librispeech_finetuning.tgz).
 88 | 
 89 | ### How to run the PER evaluation ?
 90 | 
 91 | First you need to train a linear classifier on your features. For example, if you want to evaluate a model fine-tuned on the 10h dataset, just run:
 92 | ```console
 93 | python eval_PER.py train $PATH_TO_10h_AUDIO_DATA_DIR $PATH_TO_10h_PHONE_DATA $PATH_TO_THE_JSON_PHONE_CONVERTER $PATH_TO_THE_CPC_MODEL -o $PATH_OUT
 94 | ```
 95 | 
 96 | Then you can run the PER computation, for example on librispeech100/test-clean:
 97 | ```console
 98 | python eval_PER.py per $PATH_OUT/checkpoint.pt $PATH_TO_TEST_CLEAN $PATH_TO_TEST_CLEAN_PHONES --file_extension .flac
 99 | ```
100 | 
101 | 
102 | ## WER
103 | 
104 | We provide here a test of representations based on word error rate.
105 | 
106 | ### Setup
107 | * wav2letter python bindings: [(how-to)](https://github.com/facebookresearch/wav2letter/tree/master/bindings/python).
108 | * KenLM-based Librispeech language model, can be found [here](http://www.openslr.org/11/) or downloaded [here](https://dl.fbaipublicfiles.com/librilight/data/4-gram.bin); it should be placed into `WER_data/`.
109 | * lexicon, [download](https://dl.fbaipublicfiles.com/librilight/data/lexicon.txt.gz); it should be placed into `WER_data/`.
110 | * jiwer, installable via `pip install jiwer`.
111 | 
112 | ### How to run the WER evaluation?
113 | 
114 | Training a letter classifier on top of a pre-trained CPC model:
115 | ```console
116 | python eval_WER.py --path_train=$PATH_FINETUNING --path_val=$PATH_TO_DEV_CLEAN --path_checkpoint=$PATH_OUT/checkpoint.pt --lr=1e-3  --n_epochs=50 --p_dropout=0.1 --output=$OUTPUT_DIR
117 | 
118 | ```
119 | Evaluating it with wav2letter decoder:
120 | ```console
121 | python eval_WER.py --path_checkpoint=$PATH_OUT/checkpoint.pt --lr=1e-3  --n_epochs=50 --p_dropout=0.1 --output=$OUTPUT_DIR --path_wer=$PATH_TO_TEST_CLEAN
122 | ```
123 | 
124 | You can also train and evaluate afterwards, in a single command:
125 | ```console
126 | python eval_WER.py --path_train=$PATH_FINETUNING --path_val=$PATH_TO_DEV_CLEAN --path_checkpoint=$PATH_OUT/checkpoint.pt --lr=1e-3  --n_epochs=50 --p_dropout=0.1 --output=$OUTPUT_DIR --path_wer=$PATH_TO_TEST_CLEAN
127 | ```
128 | 


--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/__init__.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | 
3 | warnings.simplefilter("ignore")


--------------------------------------------------------------------------------
/zerospeech2021/phonetic_eval/eval_ABX.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import argparse
  3 | import os
  4 | from pathlib import Path
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | import zerospeech2021.phonetic_eval.ABX_src.abx_group_computation as abx_g
 10 | import zerospeech2021.phonetic_eval.ABX_src.abx_iterators as abx_it
 11 | from zerospeech2021.phonetic_eval.CPC_loader import load_cpc_features, build_feature_from_file
 12 | 
 13 | 
 14 | def find_all_files(path_dir, extension):
 15 |     out = []
 16 |     for root, dirs, filenames in os.walk(path_dir):
 17 |         for f in filenames:
 18 |             if f.endswith(extension):
 19 |                 out.append(((str(Path(f).stem)), os.path.join(root, f)))
 20 |     return out
 21 | 
 22 | 
 23 | def reduce_sparse_data(quotient, divisor):
 24 |     return quotient / (1e-08 * (divisor == 0) + divisor)
 25 | 
 26 | 
 27 | def load_pt(x):
 28 |     data = torch.load(x, 'cpu')
 29 |     assert(len(data.size()) == 2)
 30 |     return data
 31 | 
 32 | 
 33 | def load_npy(x):
 34 |     data = torch.tensor(np.load(x))
 35 |     assert(len(data.size()) == 2)
 36 |     return data
 37 | 
 38 | 
 39 | def load_txt(x):
 40 |     data = torch.tensor(np.loadtxt(x))
 41 |     assert (len(data.size()) == 2)
 42 |     return data
 43 | 
 44 | 
 45 | def ABX(feature_function,
 46 |         path_item_file,
 47 |         seq_list,
 48 |         distance_mode,
 49 |         step_feature,
 50 |         modes,
 51 |         cuda=False,
 52 |         max_x_across=5,
 53 |         max_size_group=30):
 54 | 
 55 |     # ABX dataset
 56 |     ABXDataset = abx_it.ABXFeatureLoader(path_item_file, seq_list,
 57 |                                          feature_function, step_feature, True)
 58 | 
 59 |     if cuda:
 60 |         ABXDataset.cuda()
 61 | 
 62 |     # Distance function
 63 |     distance_function = abx_g.get_distance_function_from_name(distance_mode)
 64 | 
 65 |     # Output
 66 |     scores = {}
 67 | 
 68 |     # ABX within
 69 |     if 'within' in modes:
 70 |         print("  > Computing ABX within speakers...")
 71 |         ABXIterator = ABXDataset.get_iterator('within', max_size_group)
 72 |         group_confusion = abx_g.get_abx_scores_dtw_on_group(ABXIterator,
 73 |                                                             distance_function,
 74 |                                                             ABXIterator.symmetric)
 75 |         n_data = group_confusion._values().size(0)
 76 |         index_ = torch.sparse.LongTensor(group_confusion._indices(),
 77 |                                          torch.ones((n_data),
 78 |                                                     dtype=torch.float),
 79 |                                          group_confusion.size())
 80 |         divisor_context = torch.sparse.sum(index_, dim=3).to_dense()
 81 |         group_confusion = torch.sparse.sum(group_confusion, dim=3).to_dense()
 82 |         group_confusion = reduce_sparse_data(group_confusion, divisor_context)
 83 |         S, p1, p2 = group_confusion.size()
 84 | 
 85 |         index_speaker = divisor_context > 0
 86 |         divisor_speaker = index_speaker.sum(dim=0)
 87 |         phone_confusion = reduce_sparse_data(group_confusion.sum(dim=0),
 88 |                                              divisor_speaker)
 89 | 
 90 |         scores['within'] = (phone_confusion.sum() /
 91 |                             (divisor_speaker > 0).sum()).item()
 92 |         print(f"  > ...done. ABX within : {scores['within']}")
 93 | 
 94 |     # ABX across
 95 |     if 'across' in modes:
 96 |         print("  > Computing ABX across speakers...")
 97 |         ABXIterator = ABXDataset.get_iterator('across', max_size_group)
 98 |         ABXIterator.max_x = max_x_across
 99 |         group_confusion = abx_g.get_abx_scores_dtw_on_group(ABXIterator,
100 |                                                             distance_function,
101 |                                                             ABXIterator.symmetric)
102 |         n_data = group_confusion._values().size(0)
103 |         index_ = torch.sparse.LongTensor(group_confusion._indices(),
104 |                                          torch.ones((n_data),
105 |                                                     dtype=torch.float),
106 |                                          group_confusion.size())
107 |         divisor_context = torch.sparse.sum(index_, dim=[3, 4]).to_dense()
108 |         group_confusion = torch.sparse.sum(
109 |             group_confusion, dim=[3, 4]).to_dense()
110 |         group_confusion = reduce_sparse_data(group_confusion, divisor_context)
111 |         S, p1, p2 = group_confusion.size()
112 | 
113 |         index_speaker = divisor_context > 0
114 |         divisor_speaker = index_speaker.sum(dim=0)
115 |         phone_confusion = reduce_sparse_data(group_confusion.sum(dim=0),
116 |                                              divisor_speaker)
117 |         scores['across'] = (phone_confusion.sum() /
118 |                             (divisor_speaker > 0).sum()).item()
119 |         print(f"  > ...done. ABX across : {scores['across']}")
120 | 
121 |     return scores
122 | 
123 | 
124 | def parse_args(argv):
125 | 
126 |     parser = argparse.ArgumentParser(description='ABX metric')
127 | 
128 |     parser.add_argument('path_data', type=str,
129 |                         help="Path to directory containing the data")
130 |     parser.add_argument('path_item_file', type=str,
131 |                         help="Path to the .item file")
132 |     parser.add_argument('--path_checkpoint', type=str, default=None,
133 |                         help="Path to a CPC checkpoint. If set, the apply the "
134 |                         "model to the input data to compute the features")
135 |     parser.add_argument('--file_extension', type=str, default='.pt',
136 |                         choices=['.pt', '.npy', '.wav', '.flac', '.mp3'])
137 |     parser.add_argument('--feature_size', type=float, default=0.01,
138 |                         help="Size (in s) of one feature")
139 |     parser.add_argument('--cuda', action='store_true',
140 |                         help="Use the GPU to compute distances")
141 |     parser.add_argument('--mode', type=str, default='all',
142 |                         choices=['all', 'within', 'across'],
143 |                         help="Choose the mode of the ABX score to compute")
144 |     parser.add_argument('--distance_mode', type=str, default='cosine',
145 |                         choices=['euclidian', 'cosine', 'kl', 'kl_symmetric'],
146 |                         help="Choose the kind of distance to use to compute "
147 |                         "the ABX score.")
148 |     parser.add_argument("--max_size_group", type=int, default=10,
149 |                         help="Max size of a group while computing the"
150 |                              "ABX score. A small value will make the code "
151 |                              "faster but less precise.")
152 |     parser.add_argument("--max_x_across", type=int, default=5,
153 |                         help="When computing the ABX across score, maximum"
154 |                              "number of speaker X to sample per couple A,B. "
155 |                              " A small value will make the code faster but "
156 |                              "less precise.")
157 |     parser.add_argument("--out", type=str, default=None,
158 |                         help="Path where the results should be saved")
159 | 
160 |     # multi-gpu / multi-node
161 |     return parser.parse_args(argv)
162 | 
163 | 
164 | def main(argv=None, arg_obj=None):
165 | 
166 |     if argv:
167 |         args = parse_args(argv)
168 |     else:
169 |         args = arg_obj
170 | 
171 |     if args.path_checkpoint is None:
172 |         if args.file_extension == '.pt':
173 |             feature_function = load_pt
174 |         elif args.file_extension == '.npy':
175 |             feature_function = load_npy
176 |         elif args.file_extension == '.txt':
177 |             feature_function = load_txt
178 |     else:
179 |         state_dict = torch.load(args.path_checkpoint)
180 |         feature_maker = load_cpc_features(state_dict)
181 |         feature_maker.cuda()
182 |         feature_function = lambda x: build_feature_from_file(x, feature_maker)
183 | 
184 |     # Modes
185 |     if args.mode == 'all':
186 |         modes = ["within", "across"]
187 |     else:
188 |         modes = [args.mode]
189 | 
190 |     step_feature = 1 / args.feature_size
191 | 
192 |     # Get the list of sequences
193 |     seq_list = find_all_files(args.path_data, args.file_extension)
194 | 
195 |     scores = ABX(feature_function, args.path_item_file,
196 |                  seq_list, args.distance_mode,
197 |                  step_feature, modes,
198 |                  cuda=args.cuda,
199 |                  max_x_across=args.max_x_across,
200 |                  max_size_group=args.max_size_group)
201 | 
202 |     return scores
203 | 


--------------------------------------------------------------------------------
/zerospeech2021/semantic.py:
--------------------------------------------------------------------------------
  1 | """Semantic part of the ZR2021 (validation and evaluation)"""
  2 | 
  3 | import pathlib
  4 | 
  5 | import numpy as np
  6 | import pandas
  7 | import scipy.spatial
  8 | import scipy.stats
  9 | import joblib
 10 | 
 11 | from zerospeech2021.exception import (
 12 |     MismatchError, FileFormatError, ValidationError, EntryMissingError)
 13 | 
 14 | 
 15 | def _validate_file(source_file, submission):
 16 |     """Verifies that a feature file is a 2D numpy array of floats
 17 | 
 18 |     :param source_file: input file
 19 |     :param submission: location of submitted files
 20 |     :return: a pair (error, ncols)
 21 | 
 22 |     """
 23 |     try:
 24 |         target_file = submission / (source_file + '.txt')
 25 |         if not target_file.is_file():
 26 |             raise EntryMissingError(source=source_file, expected=target_file)
 27 | 
 28 |         try:
 29 |             array = np.loadtxt(str(target_file))
 30 |         except Exception:
 31 |             raise FileFormatError(target_file, 'not a valid numpy array')
 32 | 
 33 |         if array.dtype != np.dtype('float'):
 34 |             raise FileFormatError(target_file, "not a float array")
 35 | 
 36 |         if array.ndim != 2:
 37 |             raise FileFormatError(target_file, 'not a 2D array')
 38 | 
 39 |     except ValidationError as error:
 40 |         return str(error), None
 41 | 
 42 |     return None, array.shape[1]
 43 | 
 44 | 
 45 | def validate(submission, dataset, kind, subset, njobs=1):
 46 |     """Raises a ValidationError if the `submission` is not valid
 47 | 
 48 |     The submission folder must include <filename>.txt files, each file
 49 |     containing a matrix of floats. Each <filename>.wav file in the dataset must
 50 |     have its <filename>.txt equivalent in the submission directory.
 51 | 
 52 |     Parameters
 53 |     ----------
 54 |     submisison: path
 55 |         The submisison directory to validate.
 56 |     dataset: path
 57 |         The root path of the ZR2021 dataset.
 58 |     kind: str
 59 |         Must be 'dev' or 'test'.
 60 |     subset: str
 61 |         Must be 'synthetic' or 'librispeech'
 62 |     njobs : int
 63 |         Number of parallel processes to use
 64 | 
 65 |     Raises
 66 |     ------
 67 |     ValueError
 68 |         If `kind` is not 'dev' or 'test', if `submisison` or `dataset` are not
 69 |         an existing directory.
 70 |     ValidationError
 71 |         If one line of the submission file is not valid or if the submitted
 72 |         filenames does not fit the required ones.
 73 | 
 74 |     """
 75 |     if kind not in ('dev', 'test'):
 76 |         raise ValueError(
 77 |             f'kind must be "dev" or "test", it is {kind}')
 78 | 
 79 |     if subset not in ('librispeech', 'synthetic'):
 80 |         raise ValueError(
 81 |             f'subset must be "librispeech" or "synthetic", it is {subset}')
 82 | 
 83 |     submission = pathlib.Path(submission) / kind / subset
 84 |     if not submission.is_dir():
 85 |         raise ValueError(
 86 |             f'{kind} submission directory not found: {submission}')
 87 | 
 88 |     dataset = pathlib.Path(dataset) / f'semantic/{kind}/{subset}'
 89 |     if not dataset.is_dir():
 90 |         raise ValueError(f'dataset not found: {dataset}')
 91 | 
 92 |     # retrieve the required filenames that must be present in the submission
 93 |     required = set(f.stem for f in dataset.glob('*.wav'))
 94 |     if not required:
 95 |         raise ValidationError(f'{dataset} contains no .wav files')
 96 | 
 97 |     # retrieve the submitted files
 98 |     submitted = set(submission.glob('*'))
 99 |     if not submitted:
100 |         raise ValidationError(f'{submission} contains no files')
101 | 
102 |     # ensure we have only .txt files in submission
103 |     no_txt_files = [str(f) for f in submitted if f.suffix != '.txt']
104 |     if no_txt_files:
105 |         raise MismatchError('extra files found', [], no_txt_files)
106 | 
107 |     # ensure each required file is present in the submission
108 |     submitted = set(f.stem for f in submitted)
109 |     if submitted != required:
110 |         raise MismatchError('files mismatch', required, submitted)
111 | 
112 |     # ensure each submitted file has a correct format ad the number of columns
113 |     # is constant across files
114 |     errors, ncols = zip(*joblib.Parallel(n_jobs=njobs)(
115 |         joblib.delayed(_validate_file)(f, submission) for f in submitted))
116 | 
117 |     # ensure there are no detected errors
118 |     errors = [e for e in errors if e]
119 |     if errors:
120 |         for e in errors[:10]:
121 |             print(f'ERROR: {e}')
122 |         if len(errors) > 10:
123 |             print('ERROR: ... and {len(errors - 10)} more!')
124 |         raise ValidationError(f'error detected in phonetic {kind}')
125 | 
126 |     # ensure all submitted files have the same number of columns
127 |     if len(set(ncols)) != 1:
128 |         raise ValidationError(
129 |             f'all files must have the same number of columns '
130 |             f'but have: {set(ncols)}')
131 | 
132 | 
133 | def _compute_distance(pair, gold, pool, metric):
134 |     """Returns the mean distance between a pair of words"""
135 |     function = {
136 |         'librispeech': _compute_distance_librispeech,
137 |         'synthetic': _compute_distance_synthetic}[pair['type']]
138 | 
139 |     return function(pair, gold, pool, metric)
140 | 
141 | 
142 | def _compute_distance_librispeech(pair, gold, pool, metric):
143 |     # filter out 'synthetic' data from gold
144 |     assert pair['type'] == 'librispeech'
145 |     gold = gold[gold['type'] == 'librispeech']
146 | 
147 |     # get the list of tokens corresponding to the given pair of words
148 |     tokens_1 = gold['filename'][gold['word'] == pair['word_1']]
149 |     tokens_2 = gold['filename'][gold['word'] == pair['word_2']]
150 |     assert 0 < len(tokens_1) <= 10 and 0 < len(tokens_2) <= 10
151 | 
152 |     X = np.asarray(pool[pool['filename'].isin(tokens_1)]['pooling'].tolist())
153 |     Y = np.asarray(pool[pool['filename'].isin(tokens_2)]['pooling'].tolist())
154 | 
155 |     # compute the mean distance across all pairs of tokens after pooling
156 |     return scipy.spatial.distance.cdist(X, Y, metric=metric).mean()
157 | 
158 | 
159 | def _compute_distance_synthetic(pair, gold, pool, metric):
160 |     # filter out 'librispeech' data from gold
161 |     assert pair['type'] == 'synthetic'
162 |     gold = gold[gold['type'] == 'synthetic']
163 | 
164 |     # get the list of tokens corresponding to the given pair of words
165 |     tokens_1 = gold[['filename', 'voice']][gold['word'] == pair['word_1']]
166 |     tokens_2 = gold[['filename', 'voice']][gold['word'] == pair['word_2']]
167 |     tokens = tokens_1.merge(tokens_2, on='voice').drop(['voice'], axis=1)
168 | 
169 |     # compute the mean of distances within a given voice
170 |     dist = 0
171 |     for _, (filename_x, filename_y) in tokens.iterrows():
172 |         X = pool[pool['filename'] == filename_x]['pooling'].item()
173 |         Y = pool[pool['filename'] == filename_y]['pooling'].item()
174 |         dist += scipy.spatial.distance.cdist(
175 |             np.atleast_2d(X), np.atleast_2d(Y), metric=metric)[0][0]
176 |     return dist / len(tokens)
177 | 
178 | 
179 | def _correlation(df):
180 |     # choose 'similarity' or 'relatedness' column (the one with no NaN)
181 |     human = df.similarity if df.relatedness.hasnans else df.relatedness
182 |     assert not human.hasnans
183 | 
184 |     # return spearman correlation. Humans score are similarity (high when
185 |     # close) so we take the opposite to have a quantity close to a distance
186 |     # (low when close)
187 |     return 100 * scipy.stats.spearmanr(
188 |         - human.to_numpy(), df.score.to_numpy())[0]
189 | 
190 | 
191 | def _compute_correlation(pairs):
192 |     """"Returns the Spearman's correlation between human and machine scores"""
193 |     # for each (type/dataset) combination, compute spearman correlation
194 |     serie = pairs.groupby([pairs['type'], pairs['dataset']]).apply(_correlation)
195 | 
196 |     # transfrom raw result in a usable dataframe
197 |     return serie.to_frame().rename(columns={0: 'correlation'}).reset_index()
198 | 
199 | 
200 | def evaluate(gold_file, pairs_file, submission_dir, metric, pooling, njobs=1):
201 |     """Returns the distance of each words pair and overall correlations
202 | 
203 |     Parameters
204 |     ----------
205 |     gold_file : path
206 |         The gold file (csv format) for the dev or test semantic dataset.
207 |     pairs_file : path
208 |         The pairs file (csv format) corresponding to `gold_file` (dev or test).
209 |     submission_dir : path
210 |         The submission directry containing the embeddings to evaluate.
211 |     metric : str
212 |         The metric to use for distance computation, must be a metric supported
213 |         by `scipy.spatial.distance.cdist` (see
214 |         https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html)
215 |     pooling : str
216 |         The pooling method to use, must be 'min', 'max', 'mean', 'sum', 'last',
217 |         'lastlast' or 'off'.
218 | 
219 |     Returns
220 |     -------
221 |     pairs : pandas.DataFrame
222 |         The same content as in `pairs_file` with an additional 'score' column
223 |         containing the evaluated machine scores for each pair of words.
224 |     correlation : pandas.DataFrame
225 |         The Spearman correlation between human judgements and machine scores on
226 |         each dataset. The frame contains the columns 'type', 'dataset' and
227 |         'correlation'.
228 | 
229 |     Raises
230 |     ------
231 |     ValueError
232 |         If one of the input parameters is not valid.
233 |     OSError
234 |         If a file defined in `gold_file` is not found in `submission_dir`.
235 | 
236 |     """
237 |     # ensures input arguments are correct
238 |     for input_file in (gold_file, pairs_file):
239 |         if not pathlib.Path(input_file).is_file():
240 |             raise ValueError(f'file not found: {input_file}')
241 |     if not pathlib.Path(submission_dir).is_dir():
242 |         raise ValueError(f'{submission_dir} is not a directory')
243 | 
244 |     # get the pooling function
245 |     try:
246 |         _pooling_function = {
247 |             'max': lambda x: np.max(x, axis=0),
248 |             'mean': lambda x: np.mean(x, axis=0),
249 |             'min': lambda x: np.min(x, axis=0),
250 |             'sum': lambda x: np.sum(x, axis=0),
251 |             'last': lambda x: x[-1],
252 |             'lastlast': lambda x: x[-2],
253 |             'off': lambda x: x}[pooling]
254 |     except KeyError:
255 |         raise ValueError(
256 |             'pooling method must be "max", "min", "mean", "sum", '
257 |             '"last" or "lastlast"')
258 | 
259 |     # load the pairs and gold files
260 |     pairs = pandas.read_csv(pairs_file, header=0)
261 |     gold = pandas.read_csv(gold_file, header=0)
262 | 
263 |     # a data frame [filename, type, pooling] computed in parallel
264 |     print(f'  > Computing {pooling} pooling...')
265 |     pool = pandas.DataFrame(
266 |         joblib.Parallel(n_jobs=njobs)(
267 |             joblib.delayed(
268 |                 lambda x: (x[1], x[0], _pooling_function(
269 |                     np.loadtxt(submission_dir / x[0] / (x[1] + '.txt')))))
270 |             (x) for _, x in gold.iterrows()),
271 |         columns=['filename', 'type', 'pooling'])
272 | 
273 |     print(f'  > Computing {metric} distances...')
274 |     pairs['score'] = [
275 |         _compute_distance(pair, gold, pool, metric)
276 |         for _, pair in pairs.iterrows()]
277 | 
278 |     # compute correlations
279 |     print('  > Computing Spearman correlations...')
280 |     correlation = _compute_correlation(pairs)
281 |     return pairs, correlation
282 | 


--------------------------------------------------------------------------------
/zerospeech2021/syntactic.py:
--------------------------------------------------------------------------------
  1 | """Syntactic part of the ZR2021 (validation and evaluation)"""
  2 | 
  3 | import collections
  4 | import pathlib
  5 | import sys
  6 | 
  7 | import pandas
  8 | from zerospeech2021.exception import FormatError, MismatchError
  9 | 
 10 | 
 11 | def _validate_line(index, line):
 12 |     """Auxiliary function to validate()
 13 | 
 14 |     Returns the filename in `line`, checks the score and raises FormatError if
 15 |     the line is not valid.
 16 | 
 17 |     """
 18 |     # ensure the line has two fields separated by a space
 19 |     line = line.strip()
 20 |     fields = line.split(' ')
 21 |     if len(fields) != 2:
 22 |         raise FormatError(
 23 |             index, f'must be "<filename> <score>" but is "{line}"')
 24 | 
 25 |     filename, score = tuple(fields)
 26 | 
 27 |     # ensure the second field is a positive float
 28 |     try:
 29 |         float(score)
 30 |     except ValueError:
 31 |         raise FormatError(
 32 |             index, f'<score> must be a float but is "{score}"')
 33 | 
 34 |     return filename
 35 | 
 36 | 
 37 | def validate(submission, dataset, kind):
 38 |     """Raises a ValidationError if the `submisison` file is not valid
 39 | 
 40 |     * The submission file must be in text format, each line as:
 41 |           <filename> <score>
 42 | 
 43 |     * The <filename> is the name of a wav file in the syntactic dataset,
 44 |       without path nor extension ("xKtnLJYiWGt", not
 45 |       "syntactic/dev/xKtnLJYiWGt.wav")
 46 | 
 47 |     * The <score> is a positive float
 48 | 
 49 |     Parameters
 50 |     ----------
 51 |     submisison: path
 52 |         The submisison file to validate, each line must be formatted as
 53 |         "<filename> <score>".
 54 |     dataset: path
 55 |         The root path of the ZR2021 dataset
 56 |     kind: str, optional
 57 |         Must be 'dev' or 'test'
 58 | 
 59 |     Raises
 60 |     ------
 61 |     ValueError
 62 |         If `kind` is not 'dev' or 'test', if `submisison` is not a file or if
 63 |         the dataset is not an existing directory.
 64 |     ValidationError
 65 |         If one line of the submisison file is not valid or if the submitted
 66 |         filenames does not fit the required ones.
 67 | 
 68 |     """
 69 |     if kind not in ('dev', 'test'):
 70 |         raise ValueError(
 71 |             f'kind must be "dev" or "test", it is {kind}')
 72 | 
 73 |     if not pathlib.Path(submission).is_file():
 74 |         raise ValueError(
 75 |             f'{kind} submission file not found: {submission}')
 76 | 
 77 |     # retrieve the required filenames that must be present in the submission
 78 |     dataset = pathlib.Path(dataset) / f'syntactic/{kind}'
 79 |     if not dataset.is_dir():
 80 |         raise ValueError(f'dataset not found: {dataset}')
 81 |     required_files = set(w.stem for w in dataset.glob('*.wav'))
 82 | 
 83 |     # ensure each line in the submission is valid and retrieve the filenames
 84 |     submitted_files = list(
 85 |         _validate_line(index + 1, line)
 86 |         for index, line in enumerate(open(submission, 'r')))
 87 | 
 88 |     # ensures the is no duplicate in the filenames
 89 |     duplicates = [
 90 |         f for f, n in collections.Counter(submitted_files).items() if n > 1]
 91 |     if duplicates:
 92 |         raise MismatchError('duplicates found', [], duplicates)
 93 | 
 94 |     # ensure all the required files are here and there is no extra filename
 95 |     if required_files != set(submitted_files):
 96 |         raise MismatchError(
 97 |             'mismatch in filenames', required_files, submitted_files)
 98 | 
 99 | 
100 | def load_data(gold_file, submission_file):
101 |     """Returns the data required for evaluation as a pandas data frame
102 | 
103 |     Each line of the returned data frame contains a pair of (correct,
104 |     incorrect) sentences and has the following columns: 'id', 'voice', 'type',
105 |     'sentence', 'score sentence', 'non sentence', 'score non sentence'.
106 | 
107 |     Parameters
108 |     ----------
109 |     gold_file : path
110 |         The gold file for the lexical dataset (test or dev).
111 |     submission_file : path
112 |         The submission corresponding to the provided gold file.
113 | 
114 |     Returns
115 |     -------
116 |     data : pandas.DataFrame
117 |         The data ready for evaluation
118 | 
119 |     Raise
120 |     -----
121 |     ValueError
122 |         If the input files cannot be opened or in case of data mismatch between
123 |         the two files.
124 | 
125 |     """
126 |     # ensures the two input files are here
127 |     for input_file in (gold_file, submission_file):
128 |         if not pathlib.Path(input_file).is_file():
129 |             raise ValueError(f'file not found: {input_file}')
130 | 
131 |     # load them as data frames indexed by filenames
132 |     gold = pandas.read_csv(
133 |         gold_file, header=0, index_col='filename')
134 |     score = pandas.read_csv(
135 |         submission_file, sep=' ', header=None,
136 |         names=['filename', 'score'], index_col='filename')
137 | 
138 |     # ensures the filenames in gold and submission are the same
139 |     if set(gold.index) != set(score.index):
140 |         has_less_files = set(gold.index) - set(score.index)
141 |         has_more_files = set(score.index) - set(gold.index)
142 |         print("MismatchError:", file=sys.stderr)
143 |         if len(has_more_files) > 0:
144 |             print('submission has extra files', file=sys.stderr)
145 |             print(f'extra files: {has_more_files}', file=sys.stderr)
146 | 
147 |         if len(has_less_files) > 0:
148 |             print('submission is missing files', file=sys.stderr)
149 |             print(f'missing files: {has_less_files}:', file=sys.stderr)
150 | 
151 |         sys.exit(1)
152 | 
153 |     # merge the gold and score using filenames, then remove the columns
154 |     # 'phones' and 'filename' as we don't use them for evaluation
155 |     data = pandas.concat([gold, score], axis=1)
156 |     data.reset_index(drop=True, inplace=True)
157 | 
158 |     # going from a word per line to a pair (word, non word) per line
159 |     data = pandas.concat([
160 |         data.loc[data['correct'] == 1].reset_index().rename(
161 |             lambda x: 's_' + x, axis=1),
162 |         data.loc[data['correct'] == 0].reset_index().rename(
163 |             lambda x: 'ns_' + x, axis=1)], axis=1)
164 |     data.drop(
165 |         ['s_index', 'ns_index', 'ns_voice', 'ns_type', 'ns_subtype',
166 |          's_correct', 'ns_correct', 'ns_id'],
167 |         axis=1, inplace=True)
168 | 
169 |     data.rename(
170 |         {'s_id': 'id',
171 |          's_voice': 'voice',
172 |          's_type': 'type',
173 |          's_subtype': 'subtype',
174 |          's_transcription': 'sentence',
175 |          'ns_transcription': 'non sentence',
176 |          's_score': 'score sentence',
177 |          'ns_score': 'score non sentence'},
178 |         axis=1, inplace=True)
179 | 
180 |     return data
181 | 
182 | 
183 | def evaluate_by_pair(data):
184 |     """Returns a data frame with the scores by (sentence, non sentence) pair
185 | 
186 |     Parameters
187 |     ----------
188 |     data : pandas.DataFrame
189 |         The result of `load_data`
190 | 
191 |     Returns
192 |     -------
193 |     by_pair : pandas.DataFrame
194 |         The evaluated (sentence, non sentence) pairs, the data frame has the
195 |         columns: 'sentence', 'non sentence' 'type' and 'score'.
196 | 
197 |     """
198 |     # compute the score for each pair in an additional 'score' column, then
199 |     # delete the 'score word' and 'score non word' columns that become useless
200 |     score = data.loc[:, ['score sentence', 'score non sentence']].to_numpy()
201 |     data['score'] = (
202 |         0.5 * (score[:, 0] == score[:, 1])
203 |         + (score[:, 0] > score[:, 1]))
204 |     data.drop(columns=['score sentence', 'score non sentence'], inplace=True)
205 | 
206 |     # finally get the mean score across voices for all pairs
207 |     score = data.groupby(['type', 'subtype', 'id']).apply(lambda x: (
208 |         x.iat[0, 2],  # type
209 |         x.iat[0, 3],  # subtype
210 |         x.iat[0, 4],  # sentence
211 |         x.iat[0, 5],  # non sentence
212 |         x['score'].mean()))
213 |     return pandas.DataFrame(
214 |         score.to_list(),
215 |         columns=['type', 'subtype', 'sentence', 'non sentence', 'score'])
216 | 
217 | 
218 | def evaluate_by_type(by_pair):
219 |     """Returns a data frame with mean scores by syntax error type
220 | 
221 |     Parameters
222 |     ----------
223 |     by_pair: pandas.DataFrame
224 |         The output of `evaluate_by_pair`
225 | 
226 |     Returns
227 |     -------
228 |     by_type : pandas.DataFrame
229 |         The score collapsed on types, the data frame has the
230 |         following columns: 'type', 'score'.
231 | 
232 |     """
233 |     return by_pair.score.groupby([by_pair['type']]).agg(
234 |         n='count', score='mean', std='std').reset_index()
235 | 
236 | 
237 | def evaluate(gold_file, submission_file):
238 |     """Returns the score by sentences pair and by syntax type
239 | 
240 |     Parameters
241 |     ----------
242 |     gold_file : path
243 |         The gold file (csv format) for the lexical dataset (test or dev).
244 |     submission_file : path
245 |         The submission corresponding to the provided gold file.
246 | 
247 |     Returns
248 |     -------
249 |     by_pair : pandas.DataFrame
250 |         The evaluated pairs, the data frame has the columns:
251 |         'sentence', 'non sentence' and 'score'.
252 |     by_type : pandas.DataFrame
253 |         The score collapsed on syntax errors types, the data frame has the
254 |         following columns: 'type', 'score'.
255 | 
256 |     Raise
257 |     -----
258 |     ValueError
259 |         If the input files cannot be opened or in case of data mismatch between
260 |         the two files.
261 | 
262 |     """
263 |     data = load_data(gold_file, submission_file)
264 |     by_pair = evaluate_by_pair(data)
265 |     by_type = evaluate_by_type(by_pair)
266 |     by_pair.drop(['type', 'subtype'], axis=1, inplace=True)
267 | 
268 |     return by_pair, by_type
269 | 


--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/__init__.py:
--------------------------------------------------------------------------------
1 | from . import api_fn, auth, model, upload, split
2 | 


--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/api_fn.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import requests
 4 | 
 5 | from rich.console import Console
 6 | from rich.table import Table
 7 | 
 8 | 
 9 | from . import model
10 | 
11 | console = Console()
12 | 
13 | 
14 | def view_challenges():
15 |     """ Fetches the list of available challenges and allows selecting one."""
16 |     response = requests.get(
17 |         f"{model.SERVER_LOCATION}/challenges/", params={"include_inactive": "false"})
18 |     if response.status_code != 200:
19 |         raise ValueError('Request to server Failed !!')
20 | 
21 |     challenges = response.json()
22 | 
23 |     table = Table(show_header=True, header_style="bold magenta")
24 |     table.add_column("Challenge")
25 |     table.add_column("ID")
26 | 
27 |     for item in challenges:
28 |         table.add_row(f"{item.get('label', '')}", f"{item.get('id', 'XX')}")
29 | 
30 |     console.print(table)
31 | 
32 | 
33 | def create_multipart_submission(challenge_id: int, file_meta: dict, _token: str):
34 |     """ Create a multipart upload submission session on the server via the API."""
35 |     data = {
36 |         "filename": file_meta["filename"],
37 |         "hash": file_meta["hash"],
38 |         "multipart": True,
39 |         "index": file_meta['index']
40 |     }
41 | 
42 |     return requests.post(
43 |         f'{model.SERVER_LOCATION}/challenges/{challenge_id}/submission/create',
44 |         json=data,
45 |         headers={
46 |             'Authorization': f'Bearer {_token}'
47 |         })
48 | 
49 | 
50 | def create_single_part_submission(challenge_id: int, filename: Path, _hash: str, _token: str):
51 |     """ Create a single part submission upload session on the server via the API."""
52 |     return requests.post(
53 |         f'{model.SERVER_LOCATION}/challenges/{challenge_id}/submission/create',
54 |         json={
55 |             "filename": f"{filename}",
56 |             "hash": _hash,
57 |             "multipart": False,
58 |         },
59 |         headers={
60 |             'Authorization': f'Bearer {_token}'
61 |         })
62 | 
63 | 
64 | def submission_upload(challenge_id: int, submission_id: str, file: Path, _token: str):
65 |     """Upload a file (or part) to an existing upload session."""
66 |     response = requests.put(
67 |         f'{model.SERVER_LOCATION}/challenges/{challenge_id}/submission/upload',
68 |         params={
69 |             "part_name": file.name,
70 |             "submission_id": f"{submission_id}"
71 |         },
72 |         files={f'file_data': file.open('rb').read()},
73 |         headers={
74 |             'Authorization': f'Bearer {_token}'
75 |         }
76 |     )
77 |     return response
78 | 


--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/auth.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pathlib import Path
 3 | 
 4 | import requests
 5 | from rich.console import Console
 6 | 
 7 | from . import model
 8 | 
 9 | # Fancy console
10 | console = Console()
11 | 
12 | 
13 | def login(username: str, password: str):
14 |     """ Create an auth session in zerospeech.com
15 | 
16 |     :returns: token<str> token used to authentify the current session
17 |     """
18 | 
19 |     # request login from server
20 |     response = requests.post(
21 |         f'{model.SERVER_LOCATION}/auth/login',
22 |         data={
23 |             "grant_type": "password",
24 |             "username": username,
25 |             "password": password,
26 |             "scopes": [],
27 |             "client_id": model.CLIENT_ID,
28 |             "client_secret": model.CLIENT_SECRET
29 |         }
30 |     )
31 |     if response.status_code != 200:
32 |         console.print(f"[red]:x:{response.status_code}[/red]: {response.json().get('detail')}")
33 |         sys.exit(1)
34 | 
35 |     return response.json().get("access_token")
36 | 
37 | 
38 | def logout(_token):
39 |     """ Clears the given auth session on the back-end """
40 |     return requests.delete(
41 |         f'{model.SERVER_LOCATION}/auth/logout',
42 |         headers={
43 |             'Authorization': f'Bearer {_token}'
44 |         })
45 | 
46 | 
47 | def clear_session():
48 |     """ Clear the current session locally and on the server."""
49 |     token_file = Path(model.AUTH_FILE).expanduser().resolve()
50 |     if token_file.is_file():
51 |         with token_file.open() as fp:
52 |             token = fp.read().replace("\n", "")
53 | 
54 |         # clear
55 |         token_file.unlink(missing_ok=True)
56 |         logout(token)
57 |         console.print(f"Session saved @ {token_file} was removed.", style='green bold')
58 | 
59 | 
60 | def create_session(token: str):
61 |     """ Creates an new auth session & saves it locally """
62 |     token_file = Path(model.AUTH_FILE).expanduser().resolve()
63 | 
64 |     with token_file.open('w') as fp:
65 |         fp.write(token)
66 | 
67 | 
68 | def get_session():
69 |     """ Get or Create a new auth session """
70 |     token_file = Path(model.AUTH_FILE).expanduser().resolve()
71 | 
72 |     if not token_file.is_file():
73 |         console.print(f"No session found use login command to create one.", style='red  bold')
74 |         sys.exit(1)
75 | 
76 |     with token_file.open() as fp:
77 |         return fp.read().replace("\n", "")
78 | 


--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/model.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from dataclasses import dataclass
  4 | from pathlib import Path
  5 | from typing import Optional, List
  6 | 
  7 | SERVER_LOCATION: str = "https://api.zerospeech.com"
  8 | CLIENT_ID: str = "cli_uploader"
  9 | CLIENT_SECRET: str = 'TaX9K1WtryizOTr5pLUM4OoqXZE5QGlj3Xo6dkh3CcI='
 10 | NB_RETRY_ATTEMPTS: int = 2
 11 | MULTIPART_THRESHOLD: int = 500000000  # in bytes (500MB)
 12 | AUTH_FILE: str = "~/.zerospeech-token"
 13 | CHALLENGE_ID = 7
 14 | 
 15 | 
 16 | def get_challenge_id():
 17 |     """ Get the current challenge id from the current environment or return the default. """
 18 |     return os.environ.get("CHALLENGE_ID", CHALLENGE_ID)
 19 | 
 20 | 
 21 | class ZrApiException(Exception):
 22 |     pass
 23 | 
 24 | 
 25 | @dataclass
 26 | class ManifestFileIndexItem:
 27 |     """ Upload File Manifest Item """
 28 |     file_name: str
 29 |     file_size: int
 30 |     file_hash: Optional[str] = None
 31 | 
 32 |     def dict(self):
 33 |         return {f"{x}": getattr(self, x) for x in self.__dataclass_fields__.keys()}
 34 | 
 35 |     @classmethod
 36 |     def from_dict(cls, data):
 37 |         return cls(**data)
 38 | 
 39 | 
 40 | @dataclass
 41 | class SplitManifest:
 42 |     """ A class containing information about archive split"""
 43 |     filename: str
 44 |     tmp_location: Path
 45 |     hash: str
 46 |     index: Optional[List[ManifestFileIndexItem]]
 47 |     multipart: bool = True
 48 |     hashed_parts: bool = True
 49 |     completed: int = 0
 50 | 
 51 |     def dict(self):
 52 |         data = {f"{x}": f"{getattr(self, x)}" for x in self.__dataclass_fields__.keys()}
 53 |         if "index" in data.keys():
 54 |             data["index"] = [
 55 |                 item.dict() for item in self.index
 56 |             ]
 57 | 
 58 |         return data
 59 | 
 60 |     @classmethod
 61 |     def from_dict(cls, data):
 62 |         if "index" in data.keys():
 63 |             data["index"] = [
 64 |                 ManifestFileIndexItem.from_dict(item) for item in data["index"]
 65 |             ]
 66 |         return cls(**data)
 67 | 
 68 | 
 69 | class UploadManifest:
 70 |     """ Fail-safe multi-part upload"""
 71 | 
 72 |     @classmethod
 73 |     def load(cls, filename: Path, retries: int = 2):
 74 |         with filename.open('r') as fp:
 75 |             dd = json.load(fp)
 76 |         return cls(dd["manifest"], filename, metadata=dd["metadata"], retries=retries)
 77 | 
 78 |     def __init__(self, list_manifest, save_file: Path, metadata=None, retries: int = 2):
 79 |         if isinstance(list_manifest, dict):
 80 |             self.man = list_manifest
 81 |         else:
 82 |             self.man = {
 83 |                 f"{name}": 'todo'
 84 |                 for name in list_manifest
 85 |             }
 86 |         self.save_file = save_file
 87 |         self.retries = retries
 88 |         if metadata:
 89 |             self._metadata = metadata
 90 |         else:
 91 |             self._metadata = {}
 92 |         self.save()
 93 | 
 94 |     def __iter__(self):
 95 |         return self
 96 | 
 97 |     @property
 98 |     def metadata(self):
 99 |         return self._metadata
100 | 
101 |     @metadata.setter
102 |     def metadata(self, data):
103 |         self._metadata.update(data)
104 |         self.save()
105 | 
106 |     def __next__(self):
107 |         for k, v in self.man.items():
108 |             if v == 'todo':
109 |                 return k
110 |         for k, v in self.man.items():
111 |             if v == 'waiting':
112 |                 return k
113 |         for k, v in self.man.items():
114 |             if 'retry' in v:
115 |                 return k
116 |         raise StopIteration
117 | 
118 |     def status(self, key):
119 |         return self.man[key]
120 | 
121 |     def set_waiting(self, key):
122 |         if self.man[key] == 'todo':
123 |             self.man[key] = "waiting"
124 |             self.save()
125 | 
126 |     def set_done(self, key):
127 |         self.man[key] = "done"
128 |         self.save()
129 | 
130 |     def set_failed(self, key):
131 |         k = self.man[key]
132 |         if k in ["waiting", "todo"]:
133 |             self.man[key] = "retry_1"
134 |         elif "retry" in k:
135 |             nb = int(k.split('_')[1])
136 |             nb += 1
137 |             if nb > self.retries:
138 |                 st = 'failed'
139 |             else:
140 |                 st = f"retry_{nb}"
141 |             self.man[key] = st
142 |         self.save()
143 | 
144 |     def save(self):
145 |         with self.save_file.open('w') as fp:
146 |             json.dump({
147 |                 "manifest": self.man,
148 |                 "metadata": self.metadata
149 |             }, fp)
150 | 
151 |     def is_complete(self):
152 |         for k, v in self.man.items():
153 |             if v != "done":
154 |                 return False
155 |         return True
156 | 
157 |     def get_failed(self):
158 |         return [k for k, v in self.man.items() if v == 'failed']
159 | 
160 |     def clear(self):
161 |         # remove checkpoint file
162 |         self.save_file.unlink()
163 | 


--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/split.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | from pathlib import Path
 3 | from typing import List
 4 | 
 5 | import pandas as pd
 6 | from Crypto.Hash import MD5
 7 | from fsplit.filesplit import Filesplit
 8 | 
 9 | from .model import SplitManifest, ManifestFileIndexItem
10 | 
11 | 
12 | def md5sum(file_path: Path, chunk_size: int = 8192):
13 |     """ Return a md5 hash of a files content """
14 |     h = MD5.new()
15 | 
16 |     with file_path.open('rb') as f:
17 |         while True:
18 |             chunk = f.read(chunk_size)
19 |             if len(chunk):
20 |                 h.update(chunk)
21 |             else:
22 |                 break
23 |     return h.hexdigest()
24 | 
25 | 
26 | def split_zip_v2(zipfile: Path, chunk_max_size: int = 500000000, hash_parts: bool = True):
27 |     """..."""
28 |     assert zipfile.is_file(), f"entry file ({zipfile}) was not found"
29 |     print(f"splitting {zipfile} into chunks...")
30 | 
31 |     tmp_loc = Path(tempfile.mkdtemp(dir=f"{zipfile.parents[0]}"))
32 |     fs = Filesplit()
33 |     fs.split(file=f"{zipfile}", split_size=chunk_max_size, output_dir=str(tmp_loc))
34 |     df = pd.read_csv(tmp_loc / 'fs_manifest.csv')
35 |     if hash_parts:
36 |         df['hash'] = df.apply(lambda row: md5sum(
37 |             (tmp_loc / row['filename'])), axis=1)
38 |         index: List[ManifestFileIndexItem] = [ManifestFileIndexItem(file_name=x[0], file_size=x[1], file_hash=x[2])
39 |                                               for x in zip(df['filename'], df['filesize'], df['hash'])]
40 |     else:
41 |         index: List[ManifestFileIndexItem] = [ManifestFileIndexItem(file_name=x[0], file_size=x[1])
42 |                                               for x in zip(df['filename'], df['filesize'])]
43 | 
44 |     return SplitManifest(
45 |         filename=zipfile.name,
46 |         tmp_location=tmp_loc,
47 |         hash=md5sum(zipfile),
48 |         index=index,
49 |         hashed_parts=hash_parts
50 |     )


--------------------------------------------------------------------------------
/zerospeech2021/zr_upload_lib/upload.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | import sys
  3 | from pathlib import Path
  4 | 
  5 | from rich import inspect, print
  6 | from rich.console import Console
  7 | from rich.progress import Progress
  8 | from rich.prompt import Prompt
  9 | 
 10 | from . import model
 11 | from .api_fn import (
 12 |     create_multipart_submission, submission_upload, create_single_part_submission
 13 | )
 14 | from .split import split_zip_v2, md5sum
 15 | 
 16 | # Fancy console
 17 | console = Console()
 18 | 
 19 | 
 20 | def multipart_upload(challenge_id: int, zipfile: Path, _token: str, checkpoint: Path):
 21 |     print("preparing metadata....")
 22 | 
 23 |     # check for checkpoint
 24 |     if checkpoint.is_file():
 25 |         file_list = model.UploadManifest.load(checkpoint, retries=model.NB_RETRY_ATTEMPTS)
 26 |         tmp_location = Path(file_list.metadata.get("tmp_location"))
 27 |         _token = file_list.metadata.get('token')
 28 |         challenge_id = file_list.metadata.get("challenge_id")
 29 |     else:
 30 |         manifest = split_zip_v2(zipfile)
 31 |         file_list = [i.file_name for i in manifest.index]
 32 |         tmp_location = manifest.tmp_location
 33 |         meta = {
 34 |             "tmp_location": f"{tmp_location}",
 35 |             "filename": manifest.filename,
 36 |             "hash": manifest.hash,
 37 |             "index": [i.dict() for i in manifest.index],
 38 |             "token": _token,
 39 |             "challenge_id": challenge_id
 40 |         }
 41 |         file_list = model.UploadManifest(file_list, checkpoint, meta, retries=model.NB_RETRY_ATTEMPTS)
 42 | 
 43 |     # check if submission session exists
 44 |     if "submission_id" in file_list.metadata:
 45 |         submission_id = file_list.metadata.get('submission_id')
 46 |     else:
 47 |         response = create_multipart_submission(challenge_id, file_list.metadata, _token)
 48 |         if response.status_code != 200:
 49 |             print(f'[red]:x:[/red][bold]Submission Creation Failed with code [red] {response.status_code}[/red][/bold]')
 50 |             inspect(response.json())
 51 |             sys.exit(1)
 52 | 
 53 |         submission_id = response.text.replace('"', '').replace("'", "")
 54 |         file_list.metadata = {"submission_id": submission_id}
 55 | 
 56 |     with Progress() as progress:
 57 |         task1 = progress.add_task("[red]Uploading parts...", total=len(file_list.man))
 58 | 
 59 |         for item in file_list:
 60 |             file_list.set_waiting(item)
 61 |             progress.update(task1, advance=0.5)
 62 |             file_path = tmp_location / item
 63 |             print(f'uploading : {file_path.name}...')
 64 |             response = submission_upload(
 65 |                 challenge_id=challenge_id,
 66 |                 submission_id=submission_id,
 67 |                 file=file_path,
 68 |                 _token=_token
 69 |             )
 70 | 
 71 |             if response.status_code == 200:
 72 |                 print(f'[green]:heavy_check_mark: {file_path}')
 73 |                 file_list.set_done(item)
 74 |                 progress.update(task1, advance=0.5)
 75 |             else:
 76 |                 progress.update(task1, advance=-0.5)
 77 |                 file_list.set_failed(item)
 78 | 
 79 |     if file_list.is_complete():
 80 |         checkpoint.unlink()
 81 |         shutil.rmtree(tmp_location)
 82 |         return []
 83 |     else:
 84 |         return file_list.get_failed()
 85 | 
 86 | 
 87 | def single_part_upload(challenge_id: int, zipfile: Path, _token: str):
 88 |     zip_hash = md5sum(zipfile)
 89 |     response = create_single_part_submission(challenge_id, filename=zipfile, _hash=zip_hash, _token=_token)
 90 | 
 91 |     if response.status_code != 200:
 92 |         print(f'[red]:x:[/red][bold]Submission Creation Failed with code [red] {response.status_code}[/red][/bold]')
 93 |         inspect(response.json())
 94 |         sys.exit(1)
 95 | 
 96 |     submission_id = response.text.replace('"', '').replace("'", "")
 97 |     print(f'submission id: {submission_id}')
 98 |     response = submission_upload(
 99 |         challenge_id=challenge_id,
100 |         submission_id=submission_id,
101 |         file=zipfile,
102 |         _token=_token
103 |     )
104 | 
105 |     if response.status_code != 200:
106 |         print(f'[red]:x:[/red][bold]Archive upload failed with code [red] {response.status_code}[/red][/bold]')
107 |         print(response.json())
108 |         sys.exit(1)
109 | 
110 | 
111 | def ask_resume(file: Path):
112 |     """ Ask the user to resume or not the upload """
113 |     choice = "No"
114 |     if file.is_file():
115 |         choice = Prompt.ask("A checkpoint file was found. Do you wish to resume ?",
116 |                             choices=["Yes", "No"])
117 |         if choice == "No":
118 |             file.unlink()
119 | 
120 |     return choice == "Yes"
121 | 


--------------------------------------------------------------------------------