├── .gitignore
├── LICENSE
├── README.md
├── classification
    ├── attention_lstm.py
    ├── attention_utils.py
    ├── cnn_bilstm.py
    ├── conv_1d_model.py
    ├── conv_1d_model_aws.py
    ├── conv_1d_model_run.py
    ├── log.txt
    ├── logs
    │   └── split_0.15_batchsize_10
    │   │   ├── events.out.tfevents.1569179240.elem
    │   │   └── events.out.tfevents.1569217478.elem
    ├── lstm_cnn.py
    ├── model_5epochs_rnn.h5
    ├── model_hin_tel_38_samples.h5
    ├── rnn_example.py
    ├── testing.py
    └── testing.pyc
├── data
    ├── all_accents
    │   ├── accent_trim_gen_x.py
    │   ├── accent_trim_gen_y.py
    │   ├── all_accents_split.log.save
    │   ├── all_accents_trim.sh
    │   ├── alt_split.py
    │   ├── mfcc.py
    │   └── split_to_wav.sh
    ├── folder_structure
    ├── non_trained_accents
    │   ├── accent_trim_gen_x.py
    │   ├── accent_trim_gen_y.py
    │   └── mfcc.py
    └── numpy_vectors
    │   ├── conv1d.ipynb - Colaboratory3:1.pdf
    │   ├── conv1d.ipynb - Colaboratoryacc99ep10.pdf
    │   ├── conv1d.ipynb - Colaboratoryacc99ep10new.pdf
    │   ├── conv1d.ipynb - Colaboratoryacc99ep12.pdf
    │   └── conv1d.ipynb acc1 splithalf - Colaboratory.pdf
├── harvard_sentences.txt
├── helpers
    ├── __init__.py
    ├── alt_split.py
    ├── convert_raw_to_processed.py
    ├── file_manager.py
    ├── run.py
    ├── split_to_wav.sh.save
    └── split_wav.py
├── ipynb-htmls
    ├── all_accents.html
    ├── all_accents_2.html
    ├── conv1d (1).html
    ├── conv1d (1).ipynb
    ├── conv1d (2).ipynb
    ├── conv1d (3).ipynb
    ├── conv1d.html
    ├── conv1d.ipynb
    ├── train_on_one_person_test_on_other_conv1d (1).html
    ├── train_on_one_person_test_on_other_conv1d (1).ipynb
    └── train_on_one_person_test_on_other_conv1d.ipynb
├── notebooks
    ├── AccentDB_Classification_Colab.ipynb
    └── pase.ipynb
├── repo.tree
└── speech2vec
    ├── all_split.sh
    ├── gen_x.py
    ├── gen_y.py
    ├── mfcc.py
    └── mp3_getter.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | ###Python###
  2 | 
  3 | # dataset and proccessed
  4 | *.wav
  5 | *.npy
  6 | 
  7 | # Byte-compiled / optimized / DLL files
  8 | __pycache__/
  9 | *.py[cod]
 10 | *$py.class
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | .hypothesis/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | .static_storage/
 62 | .media/
 63 | local_settings.py
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # pyenv
 82 | .python-version
 83 | 
 84 | # celery beat schedule file
 85 | celerybeat-schedule
 86 | 
 87 | # SageMath parsed files
 88 | *.sage.py
 89 | 
 90 | # Environments
 91 | .env
 92 | .venv
 93 | env/
 94 | venv/
 95 | ENV/
 96 | env.bak/
 97 | venv.bak/
 98 | 
 99 | # Spyder project settings
100 | .spyderproject
101 | .spyproject
102 | 
103 | # Rope project settings
104 | .ropeproject
105 | 
106 | # mkdocs documentation
107 | /site
108 | 
109 | # mypy
110 | .mypy_cache/
111 | 
112 | 
113 | ###IntelliJ###
114 | 
115 | *.iml
116 | *.ipr
117 | *.iws
118 | .idea/
119 | 
120 | 
121 | ###VisualStudio###
122 | 
123 | ## Ignore Visual Studio temporary files, build results, and
124 | ## files generated by popular Visual Studio add-ons.
125 | ##
126 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
127 | 
128 | # User-specific files
129 | *.suo
130 | *.user
131 | *.userosscache
132 | *.sln.docstates
133 | 
134 | # User-specific files (MonoDevelop/Xamarin Studio)
135 | *.userprefs
136 | 
137 | # Build results
138 | [Dd]ebug/
139 | [Dd]ebugPublic/
140 | [Rr]elease/
141 | [Rr]eleases/
142 | x64/
143 | x86/
144 | bld/
145 | [Bb]in/
146 | [Oo]bj/
147 | [Ll]og/
148 | 
149 | # Visual Studio 2015/2017 cache/options directory
150 | .vs/
151 | # Uncomment if you have tasks that create the project's static files in wwwroot
152 | #wwwroot/
153 | 
154 | # Visual Studio 2017 auto generated files
155 | Generated\ Files/
156 | 
157 | # MSTest test Results
158 | [Tt]est[Rr]esult*/
159 | [Bb]uild[Ll]og.*
160 | 
161 | # NUNIT
162 | *.VisualState.xml
163 | TestResult.xml
164 | 
165 | # Build Results of an ATL Project
166 | [Dd]ebugPS/
167 | [Rr]eleasePS/
168 | dlldata.c
169 | 
170 | # Benchmark Results
171 | BenchmarkDotNet.Artifacts/
172 | 
173 | # .NET Core
174 | project.lock.json
175 | project.fragment.lock.json
176 | artifacts/
177 | **/Properties/launchSettings.json
178 | 
179 | # StyleCop
180 | StyleCopReport.xml
181 | 
182 | # Files built by Visual Studio
183 | *_i.c
184 | *_p.c
185 | *_i.h
186 | *.ilk
187 | *.meta
188 | *.obj
189 | *.pch
190 | *.pdb
191 | *.pgc
192 | *.pgd
193 | *.rsp
194 | *.sbr
195 | *.tlb
196 | *.tli
197 | *.tlh
198 | *.tmp
199 | *.tmp_proj
200 | *.log
201 | *.vspscc
202 | *.vssscc
203 | .builds
204 | *.pidb
205 | *.svclog
206 | *.scc
207 | 
208 | # Chutzpah Test files
209 | _Chutzpah*
210 | 
211 | # Visual C++ cache files
212 | ipch/
213 | *.aps
214 | *.ncb
215 | *.opendb
216 | *.opensdf
217 | *.sdf
218 | *.cachefile
219 | *.VC.db
220 | *.VC.VC.opendb
221 | 
222 | # Visual Studio profiler
223 | *.psess
224 | *.vsp
225 | *.vspx
226 | *.sap
227 | 
228 | # Visual Studio Trace Files
229 | *.e2e
230 | 
231 | # TFS 2012 Local Workspace
232 | $tf/
233 | 
234 | # Guidance Automation Toolkit
235 | *.gpState
236 | 
237 | # ReSharper is a .NET coding add-in
238 | _ReSharper*/
239 | *.[Rr]e[Ss]harper
240 | *.DotSettings.user
241 | 
242 | # JustCode is a .NET coding add-in
243 | .JustCode
244 | 
245 | # TeamCity is a build add-in
246 | _TeamCity*
247 | 
248 | # DotCover is a Code Coverage Tool
249 | *.dotCover
250 | 
251 | # AxoCover is a Code Coverage Tool
252 | .axoCover/*
253 | !.axoCover/settings.json
254 | 
255 | # Visual Studio code coverage results
256 | *.coverage
257 | *.coveragexml
258 | 
259 | # NCrunch
260 | _NCrunch_*
261 | .*crunch*.local.xml
262 | nCrunchTemp_*
263 | 
264 | # MightyMoose
265 | *.mm.*
266 | AutoTest.Net/
267 | 
268 | # Web workbench (sass)
269 | .sass-cache/
270 | 
271 | # Installshield output folder
272 | [Ee]xpress/
273 | 
274 | # DocProject is a documentation generator add-in
275 | DocProject/buildhelp/
276 | DocProject/Help/*.HxT
277 | DocProject/Help/*.HxC
278 | DocProject/Help/*.hhc
279 | DocProject/Help/*.hhk
280 | DocProject/Help/*.hhp
281 | DocProject/Help/Html2
282 | DocProject/Help/html
283 | 
284 | # Click-Once directory
285 | publish/
286 | 
287 | # Publish Web Output
288 | *.[Pp]ublish.xml
289 | *.azurePubxml
290 | # Note: Comment the next line if you want to checkin your web deploy settings,
291 | # but database connection strings (with potential passwords) will be unencrypted
292 | *.pubxml
293 | *.publishproj
294 | 
295 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
296 | # checkin your Azure Web App publish settings, but sensitive information contained
297 | # in these scripts will be unencrypted
298 | PublishScripts/
299 | 
300 | # NuGet Packages
301 | *.nupkg
302 | # The packages folder can be ignored because of Package Restore
303 | **/[Pp]ackages/*
304 | # except build/, which is used as an MSBuild target.
305 | !**/[Pp]ackages/build/
306 | # Uncomment if necessary however generally it will be regenerated when needed
307 | #!**/[Pp]ackages/repositories.config
308 | # NuGet v3's project.json files produces more ignorable files
309 | *.nuget.props
310 | *.nuget.targets
311 | 
312 | # Microsoft Azure Build Output
313 | csx/
314 | *.build.csdef
315 | 
316 | # Microsoft Azure Emulator
317 | ecf/
318 | rcf/
319 | 
320 | # Windows Store app package directories and files
321 | AppPackages/
322 | BundleArtifacts/
323 | Package.StoreAssociation.xml
324 | _pkginfo.txt
325 | *.appx
326 | 
327 | # Visual Studio cache files
328 | # files ending in .cache can be ignored
329 | *.[Cc]ache
330 | # but keep track of directories ending in .cache
331 | !*.[Cc]ache/
332 | 
333 | # Others
334 | ClientBin/
335 | ~$*
336 | *~
337 | *.dbmdl
338 | *.dbproj.schemaview
339 | *.jfm
340 | *.pfx
341 | *.publishsettings
342 | orleans.codegen.cs
343 | 
344 | # Including strong name files can present a security risk 
345 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
346 | #*.snk
347 | 
348 | # Since there are multiple workflows, uncomment next line to ignore bower_components
349 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
350 | #bower_components/
351 | 
352 | # RIA/Silverlight projects
353 | Generated_Code/
354 | 
355 | # Backup & report files from converting an old project file
356 | # to a newer Visual Studio version. Backup files are not needed,
357 | # because we have git ;-)
358 | _UpgradeReport_Files/
359 | Backup*/
360 | UpgradeLog*.XML
361 | UpgradeLog*.htm
362 | ServiceFabricBackup/
363 | 
364 | # SQL Server files
365 | *.mdf
366 | *.ldf
367 | *.ndf
368 | 
369 | # Business Intelligence projects
370 | *.rdl.data
371 | *.bim.layout
372 | *.bim_*.settings
373 | 
374 | # Microsoft Fakes
375 | FakesAssemblies/
376 | 
377 | # GhostDoc plugin setting file
378 | *.GhostDoc.xml
379 | 
380 | # Node.js Tools for Visual Studio
381 | .ntvs_analysis.dat
382 | node_modules/
383 | 
384 | # TypeScript v1 declaration files
385 | typings/
386 | 
387 | # Visual Studio 6 build log
388 | *.plg
389 | 
390 | # Visual Studio 6 workspace options file
391 | *.opt
392 | 
393 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
394 | *.vbw
395 | 
396 | # Visual Studio LightSwitch build output
397 | **/*.HTMLClient/GeneratedArtifacts
398 | **/*.DesktopClient/GeneratedArtifacts
399 | **/*.DesktopClient/ModelManifest.xml
400 | **/*.Server/GeneratedArtifacts
401 | **/*.Server/ModelManifest.xml
402 | _Pvt_Extensions
403 | 
404 | # Paket dependency manager
405 | .paket/paket.exe
406 | paket-files/
407 | 
408 | # FAKE - F# Make
409 | .fake/
410 | 
411 | # JetBrains Rider
412 | .idea/
413 | *.sln.iml
414 | 
415 | # CodeRush
416 | .cr/
417 | 
418 | # Python Tools for Visual Studio (PTVS)
419 | __pycache__/
420 | *.pyc
421 | 
422 | # Cake - Uncomment if you are using it
423 | # tools/**
424 | # !tools/packages.config
425 | 
426 | # Tabs Studio
427 | *.tss
428 | 
429 | # Telerik's JustMock configuration file
430 | *.jmconfig
431 | 
432 | # BizTalk build output
433 | *.btp.cs
434 | *.btm.cs
435 | *.odx.cs
436 | *.xsd.cs
437 | 
438 | # OpenCover UI analysis results
439 | OpenCover/
440 | 
441 | # Azure Stream Analytics local run output 
442 | ASALocalRun/
443 | 
444 | # MSBuild Binary and Structured Log
445 | *.binlog


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | "AccentDB: A Database of Non-Native English Accents to Assist Neural 
  2 | Speech Recognition" (c) by Ahamad, Afroz; Anand, Ankit; Bhargava, 
  3 | Pranesh.
  4 | 
  5 | "AccentDB: A Database of Non-Native English Accents to Assist Neural 
  6 | Speech Recognition" is licensed under a Creative Commons 
  7 | Attribution-NonCommercial 4.0 International License.
  8 | 
  9 | Attribution-NonCommercial 4.0 International
 10 | 
 11 | =======================================================================
 12 | 
 13 | Creative Commons Corporation ("Creative Commons") is not a law firm and
 14 | does not provide legal services or legal advice. Distribution of
 15 | Creative Commons public licenses does not create a lawyer-client or
 16 | other relationship. Creative Commons makes its licenses and related
 17 | information available on an "as-is" basis. Creative Commons gives no
 18 | warranties regarding its licenses, any material licensed under their
 19 | terms and conditions, or any related information. Creative Commons
 20 | disclaims all liability for damages resulting from their use to the
 21 | fullest extent possible.
 22 | 
 23 | Using Creative Commons Public Licenses
 24 | 
 25 | Creative Commons public licenses provide a standard set of terms and
 26 | conditions that creators and other rights holders may use to share
 27 | original works of authorship and other material subject to copyright
 28 | and certain other rights specified in the public license below. The
 29 | following considerations are for informational purposes only, are not
 30 | exhaustive, and do not form part of our licenses.
 31 | 
 32 |      Considerations for licensors: Our public licenses are
 33 |      intended for use by those authorized to give the public
 34 |      permission to use material in ways otherwise restricted by
 35 |      copyright and certain other rights. Our licenses are
 36 |      irrevocable. Licensors should read and understand the terms
 37 |      and conditions of the license they choose before applying it.
 38 |      Licensors should also secure all rights necessary before
 39 |      applying our licenses so that the public can reuse the
 40 |      material as expected. Licensors should clearly mark any
 41 |      material not subject to the license. This includes other CC-
 42 |      licensed material, or material used under an exception or
 43 |      limitation to copyright. More considerations for licensors:
 44 |     wiki.creativecommons.org/Considerations_for_licensors
 45 | 
 46 |      Considerations for the public: By using one of our public
 47 |      licenses, a licensor grants the public permission to use the
 48 |      licensed material under specified terms and conditions. If
 49 |      the licensor's permission is not necessary for any reason--for
 50 |      example, because of any applicable exception or limitation to
 51 |      copyright--then that use is not regulated by the license. Our
 52 |      licenses grant only permissions under copyright and certain
 53 |      other rights that a licensor has authority to grant. Use of
 54 |      the licensed material may still be restricted for other
 55 |      reasons, including because others have copyright or other
 56 |      rights in the material. A licensor may make special requests,
 57 |      such as asking that all changes be marked or described.
 58 |      Although not required by our licenses, you are encouraged to
 59 |      respect those requests where reasonable. More considerations
 60 |      for the public:
 61 |     wiki.creativecommons.org/Considerations_for_licensees
 62 | 
 63 | =======================================================================
 64 | 
 65 | Creative Commons Attribution-NonCommercial 4.0 International Public
 66 | License
 67 | 
 68 | By exercising the Licensed Rights (defined below), You accept and agree
 69 | to be bound by the terms and conditions of this Creative Commons
 70 | Attribution-NonCommercial 4.0 International Public License ("Public
 71 | License"). To the extent this Public License may be interpreted as a
 72 | contract, You are granted the Licensed Rights in consideration of Your
 73 | acceptance of these terms and conditions, and the Licensor grants You
 74 | such rights in consideration of benefits the Licensor receives from
 75 | making the Licensed Material available under these terms and
 76 | conditions.
 77 | 
 78 | 
 79 | Section 1 -- Definitions.
 80 | 
 81 |   a. Adapted Material means material subject to Copyright and Similar
 82 |      Rights that is derived from or based upon the Licensed Material
 83 |      and in which the Licensed Material is translated, altered,
 84 |      arranged, transformed, or otherwise modified in a manner requiring
 85 |      permission under the Copyright and Similar Rights held by the
 86 |      Licensor. For purposes of this Public License, where the Licensed
 87 |      Material is a musical work, performance, or sound recording,
 88 |      Adapted Material is always produced where the Licensed Material is
 89 |      synched in timed relation with a moving image.
 90 | 
 91 |   b. Adapter's License means the license You apply to Your Copyright
 92 |      and Similar Rights in Your contributions to Adapted Material in
 93 |      accordance with the terms and conditions of this Public License.
 94 | 
 95 |   c. Copyright and Similar Rights means copyright and/or similar rights
 96 |      closely related to copyright including, without limitation,
 97 |      performance, broadcast, sound recording, and Sui Generis Database
 98 |      Rights, without regard to how the rights are labeled or
 99 |      categorized. For purposes of this Public License, the rights
100 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
101 |      Rights.
102 |   d. Effective Technological Measures means those measures that, in the
103 |      absence of proper authority, may not be circumvented under laws
104 |      fulfilling obligations under Article 11 of the WIPO Copyright
105 |      Treaty adopted on December 20, 1996, and/or similar international
106 |      agreements.
107 | 
108 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
109 |      any other exception or limitation to Copyright and Similar Rights
110 |      that applies to Your use of the Licensed Material.
111 | 
112 |   f. Licensed Material means the artistic or literary work, database,
113 |      or other material to which the Licensor applied this Public
114 |      License.
115 | 
116 |   g. Licensed Rights means the rights granted to You subject to the
117 |      terms and conditions of this Public License, which are limited to
118 |      all Copyright and Similar Rights that apply to Your use of the
119 |      Licensed Material and that the Licensor has authority to license.
120 | 
121 |   h. Licensor means the individual(s) or entity(ies) granting rights
122 |      under this Public License.
123 | 
124 |   i. NonCommercial means not primarily intended for or directed towards
125 |      commercial advantage or monetary compensation. For purposes of
126 |      this Public License, the exchange of the Licensed Material for
127 |      other material subject to Copyright and Similar Rights by digital
128 |      file-sharing or similar means is NonCommercial provided there is
129 |      no payment of monetary compensation in connection with the
130 |      exchange.
131 | 
132 |   j. Share means to provide material to the public by any means or
133 |      process that requires permission under the Licensed Rights, such
134 |      as reproduction, public display, public performance, distribution,
135 |      dissemination, communication, or importation, and to make material
136 |      available to the public including in ways that members of the
137 |      public may access the material from a place and at a time
138 |      individually chosen by them.
139 | 
140 |   k. Sui Generis Database Rights means rights other than copyright
141 |      resulting from Directive 96/9/EC of the European Parliament and of
142 |      the Council of 11 March 1996 on the legal protection of databases,
143 |      as amended and/or succeeded, as well as other essentially
144 |      equivalent rights anywhere in the world.
145 | 
146 |   l. You means the individual or entity exercising the Licensed Rights
147 |      under this Public License. Your has a corresponding meaning.
148 | 
149 | 
150 | Section 2 -- Scope.
151 | 
152 |   a. License grant.
153 | 
154 |        1. Subject to the terms and conditions of this Public License,
155 |           the Licensor hereby grants You a worldwide, royalty-free,
156 |           non-sublicensable, non-exclusive, irrevocable license to
157 |           exercise the Licensed Rights in the Licensed Material to:
158 | 
159 |             a. reproduce and Share the Licensed Material, in whole or
160 |                in part, for NonCommercial purposes only; and
161 | 
162 |             b. produce, reproduce, and Share Adapted Material for
163 |                NonCommercial purposes only.
164 | 
165 |        2. Exceptions and Limitations. For the avoidance of doubt, where
166 |           Exceptions and Limitations apply to Your use, this Public
167 |           License does not apply, and You do not need to comply with
168 |           its terms and conditions.
169 | 
170 |        3. Term. The term of this Public License is specified in Section
171 |           6(a).
172 | 
173 |        4. Media and formats; technical modifications allowed. The
174 |           Licensor authorizes You to exercise the Licensed Rights in
175 |           all media and formats whether now known or hereafter created,
176 |           and to make technical modifications necessary to do so. The
177 |           Licensor waives and/or agrees not to assert any right or
178 |           authority to forbid You from making technical modifications
179 |           necessary to exercise the Licensed Rights, including
180 |           technical modifications necessary to circumvent Effective
181 |           Technological Measures. For purposes of this Public License,
182 |           simply making modifications authorized by this Section 2(a)
183 |           (4) never produces Adapted Material.
184 | 
185 |        5. Downstream recipients.
186 | 
187 |             a. Offer from the Licensor -- Licensed Material. Every
188 |                recipient of the Licensed Material automatically
189 |                receives an offer from the Licensor to exercise the
190 |                Licensed Rights under the terms and conditions of this
191 |                Public License.
192 | 
193 |             b. No downstream restrictions. You may not offer or impose
194 |                any additional or different terms or conditions on, or
195 |                apply any Effective Technological Measures to, the
196 |                Licensed Material if doing so restricts exercise of the
197 |                Licensed Rights by any recipient of the Licensed
198 |                Material.
199 | 
200 |        6. No endorsement. Nothing in this Public License constitutes or
201 |           may be construed as permission to assert or imply that You
202 |           are, or that Your use of the Licensed Material is, connected
203 |           with, or sponsored, endorsed, or granted official status by,
204 |           the Licensor or others designated to receive attribution as
205 |           provided in Section 3(a)(1)(A)(i).
206 | 
207 |   b. Other rights.
208 | 
209 |        1. Moral rights, such as the right of integrity, are not
210 |           licensed under this Public License, nor are publicity,
211 |           privacy, and/or other similar personality rights; however, to
212 |           the extent possible, the Licensor waives and/or agrees not to
213 |           assert any such rights held by the Licensor to the limited
214 |           extent necessary to allow You to exercise the Licensed
215 |           Rights, but not otherwise.
216 | 
217 |        2. Patent and trademark rights are not licensed under this
218 |           Public License.
219 | 
220 |        3. To the extent possible, the Licensor waives any right to
221 |           collect royalties from You for the exercise of the Licensed
222 |           Rights, whether directly or through a collecting society
223 |           under any voluntary or waivable statutory or compulsory
224 |           licensing scheme. In all other cases the Licensor expressly
225 |           reserves any right to collect such royalties, including when
226 |           the Licensed Material is used other than for NonCommercial
227 |           purposes.
228 | 
229 | 
230 | Section 3 -- License Conditions.
231 | 
232 | Your exercise of the Licensed Rights is expressly made subject to the
233 | following conditions.
234 | 
235 |   a. Attribution.
236 | 
237 |        1. If You Share the Licensed Material (including in modified
238 |           form), You must:
239 | 
240 |             a. retain the following if it is supplied by the Licensor
241 |                with the Licensed Material:
242 | 
243 |                  i. identification of the creator(s) of the Licensed
244 |                     Material and any others designated to receive
245 |                     attribution, in any reasonable manner requested by
246 |                     the Licensor (including by pseudonym if
247 |                     designated);
248 | 
249 |                 ii. a copyright notice;
250 | 
251 |                iii. a notice that refers to this Public License;
252 | 
253 |                 iv. a notice that refers to the disclaimer of
254 |                     warranties;
255 | 
256 |                  v. a URI or hyperlink to the Licensed Material to the
257 |                     extent reasonably practicable;
258 | 
259 |             b. indicate if You modified the Licensed Material and
260 |                retain an indication of any previous modifications; and
261 | 
262 |             c. indicate the Licensed Material is licensed under this
263 |                Public License, and include the text of, or the URI or
264 |                hyperlink to, this Public License.
265 | 
266 |        2. You may satisfy the conditions in Section 3(a)(1) in any
267 |           reasonable manner based on the medium, means, and context in
268 |           which You Share the Licensed Material. For example, it may be
269 |           reasonable to satisfy the conditions by providing a URI or
270 |           hyperlink to a resource that includes the required
271 |           information.
272 | 
273 |        3. If requested by the Licensor, You must remove any of the
274 |           information required by Section 3(a)(1)(A) to the extent
275 |           reasonably practicable.
276 | 
277 |        4. If You Share Adapted Material You produce, the Adapter's
278 |           License You apply must not prevent recipients of the Adapted
279 |           Material from complying with this Public License.
280 | 
281 | 
282 | Section 4 -- Sui Generis Database Rights.
283 | 
284 | Where the Licensed Rights include Sui Generis Database Rights that
285 | apply to Your use of the Licensed Material:
286 | 
287 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
288 |      to extract, reuse, reproduce, and Share all or a substantial
289 |      portion of the contents of the database for NonCommercial purposes
290 |      only;
291 | 
292 |   b. if You include all or a substantial portion of the database
293 |      contents in a database in which You have Sui Generis Database
294 |      Rights, then the database in which You have Sui Generis Database
295 |      Rights (but not its individual contents) is Adapted Material; and
296 | 
297 |   c. You must comply with the conditions in Section 3(a) if You Share
298 |      all or a substantial portion of the contents of the database.
299 | 
300 | For the avoidance of doubt, this Section 4 supplements and does not
301 | replace Your obligations under this Public License where the Licensed
302 | Rights include other Copyright and Similar Rights.
303 | 
304 | 
305 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
306 | 
307 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
308 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
309 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
310 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
311 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
312 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
313 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
314 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
315 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
316 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
317 | 
318 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
319 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
320 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
321 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
322 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
323 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
324 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
325 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
326 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
327 | 
328 |   c. The disclaimer of warranties and limitation of liability provided
329 |      above shall be interpreted in a manner that, to the extent
330 |      possible, most closely approximates an absolute disclaimer and
331 |      waiver of all liability.
332 | 
333 | 
334 | Section 6 -- Term and Termination.
335 | 
336 |   a. This Public License applies for the term of the Copyright and
337 |      Similar Rights licensed here. However, if You fail to comply with
338 |      this Public License, then Your rights under this Public License
339 |      terminate automatically.
340 | 
341 |   b. Where Your right to use the Licensed Material has terminated under
342 |      Section 6(a), it reinstates:
343 | 
344 |        1. automatically as of the date the violation is cured, provided
345 |           it is cured within 30 days of Your discovery of the
346 |           violation; or
347 | 
348 |        2. upon express reinstatement by the Licensor.
349 | 
350 |      For the avoidance of doubt, this Section 6(b) does not affect any
351 |      right the Licensor may have to seek remedies for Your violations
352 |      of this Public License.
353 | 
354 |   c. For the avoidance of doubt, the Licensor may also offer the
355 |      Licensed Material under separate terms or conditions or stop
356 |      distributing the Licensed Material at any time; however, doing so
357 |      will not terminate this Public License.
358 | 
359 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
360 |      License.
361 | 
362 | 
363 | Section 7 -- Other Terms and Conditions.
364 | 
365 |   a. The Licensor shall not be bound by any additional or different
366 |      terms or conditions communicated by You unless expressly agreed.
367 | 
368 |   b. Any arrangements, understandings, or agreements regarding the
369 |      Licensed Material not stated herein are separate from and
370 |      independent of the terms and conditions of this Public License.
371 | 
372 | 
373 | Section 8 -- Interpretation.
374 | 
375 |   a. For the avoidance of doubt, this Public License does not, and
376 |      shall not be interpreted to, reduce, limit, restrict, or impose
377 |      conditions on any use of the Licensed Material that could lawfully
378 |      be made without permission under this Public License.
379 | 
380 |   b. To the extent possible, if any provision of this Public License is
381 |      deemed unenforceable, it shall be automatically reformed to the
382 |      minimum extent necessary to make it enforceable. If the provision
383 |      cannot be reformed, it shall be severed from this Public License
384 |      without affecting the enforceability of the remaining terms and
385 |      conditions.
386 | 
387 |   c. No term or condition of this Public License will be waived and no
388 |      failure to comply consented to unless expressly agreed to by the
389 |      Licensor.
390 | 
391 |   d. Nothing in this Public License constitutes or may be interpreted
392 |      as a limitation upon, or waiver of, any privileges and immunities
393 |      that apply to the Licensor or You, including from the legal
394 |      processes of any jurisdiction or authority.
395 | 
396 | =======================================================================
397 | 
398 | Creative Commons is not a party to its public
399 | licenses. Notwithstanding, Creative Commons may elect to apply one of
400 | its public licenses to material it publishes and in those instances
401 | will be considered the “Licensor.” The text of the Creative Commons
402 | public licenses is dedicated to the public domain under the CC0 Public
403 | Domain Dedication. Except for the limited purpose of indicating that
404 | material is shared under a Creative Commons public license or as
405 | otherwise permitted by the Creative Commons policies published at
406 | creativecommons.org/policies, Creative Commons does not authorize the
407 | use of the trademark "Creative Commons" or any other trademark or logo
408 | of Creative Commons without its prior written consent including,
409 | without limitation, in connection with any unauthorized modifications
410 | to any of its public licenses or any other arrangements,
411 | understandings, or agreements concerning use of licensed material. For
412 | the avoidance of doubt, this paragraph does not form part of the
413 | public licenses.
414 | 
415 | Creative Commons may be contacted at creativecommons.org.
416 | 
417 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AccentDB
  2 | [A Database of Non-Native English Accents to Assist Neural Speech Recognition](https://accentdb.github.io/)
  3 | 
  4 | ## Dataset
  5 | The current release **v1.0** of AccentDB has three datasets that can be downloaded from [here](https://accentdb.github.io/#dataset).
  6 | 
  7 | | Title | Description | Notes |
  8 | |:--------- | :---------- | --------: |
  9 | |**accentdb_core**| 4 non-native Indian English accents collected by authors.   | 6,587 files   |
 10 | |**accentdb_extended**| Samples for 5 English Accents + 4 accents from accentdb_core. |   19,111 files|
 11 | |**accentdb_raw**| Raw and unprocessed recordings for the core dataset. | 11 files |
 12 | 
 13 | ## Embedding Visualization
 14 | The one-speaker-per-accent 600 sample vectors and metadata can be found at [AccentDB/embedding-150](https://github.com/AccentDB/embedding-150); and the projection at [Embedding Projector](https://projector.tensorflow.org/?config=https://raw.githubusercontent.com/AccentDB/embedding-150/master/template_projector_config.json).
 15 | 
 16 | Larger vectors and metadata files can be downloaded from here.
 17 | - [accents-4-samples-250](https://drive.google.com/drive/folders/1ECGDOxcFAMp9y-yCBTy4d1M2Bb8fkp3r?usp=sharing):  1,000 rows.    
 18 | - [accents-4-samples-700](https://drive.google.com/drive/folders/1d7pyl2AwmnEgVvGTeNjOYgKRa_awUnjN?usp=sharing):  2,800 rows.
 19 | - [accents-9-samples-250](https://drive.google.com/drive/folders/16vkVq36zTFGB2p0-QL7PZQRLGQkk9yQ7?usp=sharing): 22,500 rows.
 20 | 
 21 | ## Colab
 22 | 
 23 | Run the following colab to experiment with classification model on a smaller AccentDB dataset.
 24 | [conv_classfication_multi_setup.ipynb](https://colab.research.google.com/drive/1J_pTtmY98vtWHWoIs9WhRuWK0b0uBuxU)      
 25 | 
 26 | A static version of the colab can se accessed [here](./notebooks/AccentDB_Classification_Colab.ipynb).
 27 |  
 28 | ---------
 29 | 
 30 | ## Code
 31 | The steps below are required if you want to work with the raw recordings. We share the scripts that we used to clean and preprocess the recordings. We also share code to train and test the different models.
 32 | 
 33 | `repo.tree` contains the structure of the repo including `.npy` and `.wav` files. These files are not tracked by git.
 34 | 
 35 | > We are thankful to https://github.com/dwww2012/Accent-Classifier for providing with code for preprocessing and generating MFCC vectors. 
 36 | 
 37 | ### Preprocessing .wav recordings
 38 | 
 39 | #### Step 1: Convert .mp3 files to .wav
 40 | Use the following script to convert all .mp3 files to .wav files. 
 41 | ```
 42 | for file in *.mp3                                                                                                             
 43 | do
 44 |   ffmpeg -i "$file" "$file".wav
 45 | done
 46 | ```
 47 | This makes .wav files with <filename>.mp3.wav names from which the .mp3 can be removed using a bulk remave via:
 48 | ```
 49 | $ qmv -f do
 50 | ```
 51 | 
 52 | #### Step 2: Split hour long .wav recordings to sentence level    
 53 |         
 54 |  This is done using `split_to_wav.py` present in the corresponding folders or a generic file `helpers/alt_split.py`.
 55 |  The splitting is done based on silence thresholds in terms of duration and energy. The threholds that were used for the experiments are noted below:
 56 | ```
 57 | 
 58 | Splitting Bangla_Arc.wav where energy is below 1.0% for longer than 2.0s.
 59 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 20134/20143 [00:43<00:00, 459.96it/s]
 60 | 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 778/778 [00:13<00:00, 56.96it/s]
 61 | Splitting Bangla_Jay.wav where energy is below 1.0% for longer than 2.0s.
 62 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19824/19833 [00:44<00:00, 447.85it/s]
 63 | 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 750/750 [00:06<00:00, 110.16it/s]
 64 | Splitting Malayalam_Hab.wav where energy is below 1.0% for longer than 2.0s.
 65 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19902/19911 [01:01<00:00, 323.10it/s]
 66 | 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 751/751 [00:08<00:00, 84.67it/s]
 67 | Splitting Malayalam_Sal.wav where energy is below 1.0% for longer than 2.0s.
 68 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19969/19978 [00:59<00:00, 334.94it/s]
 69 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:11<00:00,  2.54it/s]
 70 | Splitting Malayalam_Sha.wav where energy is below 1.0% for longer than 2.0s.
 71 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 23904/23913 [01:06<00:00, 357.02it/s]
 72 | 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 895/895 [00:23<00:00, 38.31it/s]
 73 | Splitting Odiya_Suc.wav where energy is below 1.0% for longer than 2.0s.
 74 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19675/19684 [01:06<00:00, 294.34it/s]
 75 | 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 120/120 [00:09<00:00, 12.26it/s]
 76 | Splitting Telugu_Nav.wav where energy is below 1.0% for longer than 2.0s.
 77 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19554/19563 [00:54<00:00, 356.87it/s]
 78 | 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 766/766 [00:09<00:00, 83.00it/s]
 79 | Splitting Telugu_Tho.wav where energy is below 1.0% for longer than 2.0s.
 80 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19347/19356 [01:06<00:00, 291.94it/s]
 81 | 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 749/749 [00:07<00:00, 95.36it/s]
 82 | 
 83 | Updated with:
 84 | Splitting /home/enigmaeth/accentPhase2/data/all_accents/Malayalam_Sal.wav where energy is below 0.1% for longer than 2.0s.
 85 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19969/19978 [00:45<00:00, 443.22it/s]
 86 | 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 747/747 [00:08<00:00, 91.82it/s]
 87 | 
 88 | 
 89 | Splitting /home/enigmaeth/accentPhase2/data/all_accents/Odiya_Suc.wav where energy is below 0.1% for longer than 2.0s.
 90 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19675/19684 [00:41<00:00, 470.82it/s]
 91 | 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 747/747 [00:08<00:00, 85.22it/s]
 92 | 
 93 | ```
 94 | The number of files produced per recording is noted in the tqdm output above. For example: 747 files for the last row here.
 95 | Most files are around 5 seconds while some files are 13 to 14 seconds long.
 96 | 
 97 | #### Step 3: Trim all files to 5s.
 98 | Run script similar to `/data/all_accents/all_accents_trim.sh` to trim all files to less than 5s. This command runs using `sox`, details here: https://stackoverflow.com/questions/9667081/how-do-you-trim-the-audio-files-end-using-sox
 99 | 
100 | #### Step 4: Generate X and Y vectors for training
101 | 
102 | └── `speech2vec`        
103 |         &nbsp;&nbsp;&nbsp;&nbsp;├── `all_split.sh` : bash script to run all models on a given X and Y npy vectors.   
104 |         &nbsp;&nbsp;&nbsp;&nbsp;├── `gen_x.py`: generate MFCC vectors for all files in specified folder.       
105 |         &nbsp;&nbsp;&nbsp;&nbsp;├── `gen_y.py`: generate class labels for all files in specified folder.   
106 |         &nbsp;&nbsp;&nbsp;&nbsp;├── `mfcc.py`: MFCC utilty.    
107 |         
108 |  `*.npy*` files are stored in `/data/numpy_vectors` or in the corresponding folder for some experiments.    
109 |  
110 | --------
111 | ### Step 5: Classification using initial run with MFCC
112 | 
113 | <strikethrough>Run the following colabs for 2 experimental setups.
114 | 1. [conv1d on all_accents](https://colab.research.google.com/drive/1Z5vg1eRU3zCskrlTc2kp1y9xzUx8P9H8?authuser=2#scrollTo=Zz0tpQ_kiQNo) (Requires access request)
115 | 2. [train_on_one_person_and_test_on_other](https://colab.research.google.com/drive/1dMZxbFCPBc2gJkNM47F_j7lDtvVaDhxb?authuser=2#scrollTo=koL6wrhIq_em) (Requires access request) </strikethrough>
116 | 
117 | The results can be found inside `data/numpy_vectors/terminal.log`.
118 | 
119 |  Models ran:    
120 |    > ├── `classification`    
121 |     │    &nbsp;&nbsp;&nbsp;&nbsp;├── `attention_lstm.py`    
122 |     │    &nbsp;&nbsp;&nbsp;&nbsp;├── `attention_utils.py`    
123 |     │    &nbsp;&nbsp;&nbsp;&nbsp;├── `cnn_bilstm.py`    
124 |     │    &nbsp;&nbsp;&nbsp;&nbsp;├── `conv_1d_model_aws.py`    
125 |     │    &nbsp;&nbsp;&nbsp;&nbsp;├── `conv_1d_model.py`    
126 |     │    &nbsp;&nbsp;&nbsp;&nbsp;├── `conv_1d_model_run.py`    
127 |    
128 | ---------
129 | ## Citation
130 | If you have found our dataset or models to be useful, please cite us as below.  
131 | ```
132 | @InProceedings{ahamad-anand-bhargava:2020:LREC,
133 |   author    = {Ahamad, Afroz  and  Anand, Ankit  and  Bhargava, Pranesh},
134 |   title     = {AccentDB: A Database of Non-Native English Accents to Assist Neural Speech Recognition},
135 |   booktitle      = {Proceedings of The 12th Language Resources and Evaluation Conference},
136 |   month          = {May},
137 |   year           = {2020},
138 |   address        = {Marseille, France},
139 |   publisher      = {European Language Resources Association},
140 |   pages     = {5353--5360},
141 |   url       = {https://www.aclweb.org/anthology/2020.lrec-1.659}
142 | }
143 | ```
144 | -----
145 | ## LICENSE
146 | AccentDB is licensed under a [CC BY-NC 4.0 License](./LICENSE).
147 | 


--------------------------------------------------------------------------------
/classification/attention_lstm.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from keras.layers import multiply
  3 | from keras.layers.core import *
  4 | from keras.layers.recurrent import LSTM
  5 | from keras.models import *
  6 | 
  7 | import numpy as np
  8 | 
  9 | from keras.optimizers import SGD
 10 | 
 11 | 
 12 | np.random.seed(1337)  # for reproducibility
 13 | from keras.preprocessing import sequence
 14 | from keras.utils import np_utils
 15 | from keras.models import Sequential
 16 | from keras.layers.core import Dense, Dropout, Activation
 17 | from keras.layers.recurrent import LSTM
 18 | from sklearn.model_selection import train_test_split
 19 | from sklearn.metrics import classification_report
 20 | 
 21 | from attention_utils import get_activations, get_data_recurrent
 22 | nb_classes = 2
 23 | INPUT_DIM = 13
 24 | TIME_STEPS = 2999
 25 | # if True, the attention vector is shared across the input_dimensions where the attention is applied.
 26 | SINGLE_ATTENTION_VECTOR = False
 27 | APPLY_ATTENTION_BEFORE_LSTM = True
 28 | 
 29 | 
 30 | print('Loading data...')
 31 | X = np.load('../data/numpy_vectors/x_label_splits.npy')
 32 | y = np.load('../data/numpy_vectors/y_label_splits.npy')
 33 | N = X.shape[0]
 34 | X = X.repeat(2).repeat(2)
 35 | y = y.repeat(2).repeat(2)
 36 | X = X.reshape(4*N, 2999, 13)
 37 | print(X.shape)
 38 | print(y.shape)
 39 | 
 40 | #X = X[:200]
 41 | #y = y[:200]
 42 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10)
 43 | batch_size = 50
 44 | 
 45 | Y_train = np_utils.to_categorical(y_train, nb_classes)
 46 | Y_test = np_utils.to_categorical(y_test, nb_classes)
 47 | #split__ = int((len(X_train)//batch_size)*batch_size)
 48 | #X_train = X_train[:split__]
 49 | #y_train = y_train[:split__]
 50 | 
 51 | def attention_3d_block(inputs):
 52 | 	# inputs.shape = (batch_size, time_steps, input_dim)
 53 | 	input_dim = int(inputs.shape[2])
 54 | 	a = Permute((2, 1))(inputs)
 55 | 	a = Reshape((input_dim, TIME_STEPS))(a) # this line is not useful. It's just to know which dimension is what.
 56 | 	a = Dense(TIME_STEPS, activation='softmax')(a)
 57 | 	if SINGLE_ATTENTION_VECTOR:
 58 | 		a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a)
 59 | 		a = RepeatVector(input_dim)(a)
 60 | 	a_probs = Permute((2, 1), name='attention_vec')(a)
 61 | 	output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
 62 | 	return output_attention_mul
 63 | 
 64 | 
 65 | def model_attention_applied_after_lstm():
 66 | 	inputs = Input(shape=(TIME_STEPS, INPUT_DIM,))
 67 | 	lstm_units = 100
 68 | 	lstm_out = LSTM(lstm_units, return_sequences=True)(inputs)
 69 | 	attention_mul = attention_3d_block(lstm_out)
 70 | 	attention_mul = Flatten()(attention_mul)
 71 | 	output = Dense(nb_classes, activation='sigmoid')(attention_mul)
 72 | 	model = Model(input=[inputs], output=output)
 73 | 	return model
 74 | 
 75 | 
 76 | def model_attention_applied_before_lstm():
 77 | 	inputs = Input(shape=(TIME_STEPS, INPUT_DIM,))
 78 | 	attention_mul = attention_3d_block(inputs)
 79 | 	lstm_units = 100
 80 | 	attention_mul = LSTM(lstm_units, return_sequences=False)(attention_mul)
 81 | 	output = Dense(nb_classes, activation='sigmoid')(attention_mul)
 82 | 	model = Model(input=[inputs], output=output)
 83 | 	return model
 84 | 
 85 | 
 86 | if __name__ == '__main__':
 87 | 
 88 | 	N = 300000
 89 | 	# N = 300 -> too few = no training
 90 | 	inputs_1, outputs = X_train, Y_train
 91 | 
 92 | 	if APPLY_ATTENTION_BEFORE_LSTM:
 93 | 		m = model_attention_applied_before_lstm()
 94 | 	else:
 95 | 		m = model_attention_applied_after_lstm()
 96 | 
 97 | 	m.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
 98 | 	print(m.summary())
 99 | 
100 | 	m.fit([inputs_1], outputs, epochs=6, batch_size=64, validation_split=0.1)
101 | 
102 | 	y_pred=m.predict(X_test, batch_size=batch_size)
103 | 	for i in range(len(y_pred)):
104 | 		print(y_pred[i], Y_test[i])
105 | 	# 	print(classification_report(Y_test, y_pred))
106 | 	# attention_vectors = []
107 | 	# for i in range(X_test.shape[0]):
108 | 	#     testing_inputs_1, testing_outputs = X_test[i], y_test[i]
109 | 	#     attention_vector = np.mean(get_activations(m,
110 | 	#                                                testing_inputs_1,
111 | 	#                                                print_shape_only=True,
112 | 	#                                                layer_name='attention_vec')[0], axis=2).squeeze()
113 | 	#     print('attention =', attention_vector)
114 | 	#     assert (np.sum(attention_vector) - 1.0) < 1e-5
115 | 	#     attention_vectors.append(attention_vector)
116 | 
117 | 	# attention_vector_final = np.mean(np.array(attention_vectors), axis=0)
118 | 	# # plot part.
119 | 	# import matplotlib.pyplot as plt
120 | 	# import pandas as pd
121 | 
122 | 	# pd.DataFrame(attention_vector_final, columns=['attention (%)']).plot(kind='bar',
123 | 	#                                                                      title='Attention Mechanism as '
124 | 	#                                                                            'a function of input'
125 | 	#                                                                            ' dimensions.')
126 | 	# plt.show()
127 | 


--------------------------------------------------------------------------------
/classification/attention_utils.py:
--------------------------------------------------------------------------------
 1 | import keras.backend as K
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_activations(model, inputs, print_shape_only=False, layer_name=None):
 6 |     # Documentation is available online on Github at the address below.
 7 |     # From: https://github.com/philipperemy/keras-visualize-activations
 8 |     print('----- activations -----')
 9 |     activations = []
10 |     inp = model.input
11 |     if layer_name is None:
12 |         outputs = [layer.output for layer in model.layers]
13 |     else:
14 |         outputs = [layer.output for layer in model.layers if layer.name == layer_name]  # all layer outputs
15 |     funcs = [K.function([inp] + [K.learning_phase()], [out]) for out in outputs]  # evaluation functions
16 |     layer_outputs = [func([inputs, 1.])[0] for func in funcs]
17 |     for layer_activations in layer_outputs:
18 |         activations.append(layer_activations)
19 |         if print_shape_only:
20 |             print(layer_activations.shape)
21 |         else:
22 |             print(layer_activations)
23 |     return activations
24 | 
25 | 
26 | def get_data(n, input_dim, attention_column=1):
27 |     """
28 |     Data generation. x is purely random except that it's first value equals the target y.
29 |     In practice, the network should learn that the target = x[attention_column].
30 |     Therefore, most of its attention should be focused on the value addressed by attention_column.
31 |     :param n: the number of samples to retrieve.
32 |     :param input_dim: the number of dimensions of each element in the series.
33 |     :param attention_column: the column linked to the target. Everything else is purely random.
34 |     :return: x: model inputs, y: model targets
35 |     """
36 |     x = np.random.standard_normal(size=(n, input_dim))
37 |     y = np.random.randint(low=0, high=2, size=(n, 1))
38 |     x[:, attention_column] = y[:, 0]
39 |     return x, y
40 | 
41 | 
42 | def get_data_recurrent(n, time_steps, input_dim, attention_column=10):
43 |     """
44 |     Data generation. x is purely random except that it's first value equals the target y.
45 |     In practice, the network should learn that the target = x[attention_column].
46 |     Therefore, most of its attention should be focused on the value addressed by attention_column.
47 |     :param n: the number of samples to retrieve.
48 |     :param time_steps: the number of time steps of your series.
49 |     :param input_dim: the number of dimensions of each element in the series.
50 |     :param attention_column: the column linked to the target. Everything else is purely random.
51 |     :return: x: model inputs, y: model targets
52 |     """
53 |     x = np.random.standard_normal(size=(n, time_steps, input_dim))
54 |     y = np.random.randint(low=0, high=2, size=(n, 1))
55 |     x[:, attention_column, :] = np.tile(y[:], (1, input_dim))
56 |     return x, y
57 | 


--------------------------------------------------------------------------------
/classification/cnn_bilstm.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import numpy as np
  3 | 
  4 | from keras.optimizers import SGD
  5 | from keras.callbacks import TensorBoard
  6 | 
  7 | np.random.seed(1337)  # for reproducibility
  8 | from keras.preprocessing import sequence
  9 | from keras.utils import np_utils
 10 | from keras.models import Sequential
 11 | from keras.layers import Dense, Bidirectional
 12 | from keras.layers.core import Dense, Dropout, Activation, Flatten
 13 | from keras.layers.recurrent import LSTM
 14 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
 15 | from keras.layers.normalization import BatchNormalization
 16 | from keras.preprocessing import sequence
 17 | from keras.utils import np_utils
 18 | from keras.layers.core import Dense, Dropout, Activation, Flatten
 19 | 
 20 | from sklearn.model_selection import train_test_split
 21 | from sklearn.metrics import classification_report
 22 | 
 23 | # parameters
 24 | test_dim = 2999
 25 | maxlen = 100
 26 | batch_size = 100
 27 | nb_filter = 64
 28 | filter_length_1 = 50
 29 | filter_length_2 = 25
 30 | hidden_dims = 250
 31 | nb_epoch = 16
 32 | nb_classes = 2
 33 | 
 34 | print('Loading data...')
 35 | X = np.load('../data/numpy_vectors/x_label_splits.npy')
 36 | y = np.load('../data/numpy_vectors/y_label_splits.npy')
 37 | print(X.shape)
 38 | print(y.shape)
 39 | 
 40 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)
 41 | 
 42 | print(len(X_train), 'train sequences')
 43 | print(len(X_test), 'test sequences')
 44 | 
 45 | Y_train = np_utils.to_categorical(y_train, nb_classes)
 46 | Y_test = np_utils.to_categorical(y_test, nb_classes)
 47 | 
 48 | # create the model
 49 | embedding_vecor_length = 32
 50 | model = Sequential()
 51 | 
 52 | # model.add(Conv1D(filters=32, input_shape=(test_dim, 13), kernel_size=3, padding='same', activation='relu'))
 53 | model.add(Convolution1D(nb_filter=nb_filter,
 54 |                         filter_length=filter_length_1,
 55 |                         input_shape=(test_dim, 13),
 56 |                         border_mode='valid',
 57 |                         activation='relu'
 58 |                         ))
 59 | 
 60 | model.add(BatchNormalization())
 61 | 
 62 | 
 63 | model.add(Convolution1D(nb_filter=nb_filter,
 64 |                         filter_length=filter_length_2,
 65 |                         border_mode='same',
 66 |                         activation='relu'
 67 |                         ))
 68 | 
 69 | model.add(BatchNormalization())
 70 | 
 71 | model.add(MaxPooling1D(pool_length=2))
 72 | 
 73 | model.add(Convolution1D(nb_filter=nb_filter,
 74 |                         filter_length=filter_length_2,
 75 |                         border_mode='same',
 76 |                         activation='relu'
 77 |                         ))
 78 | 
 79 | model.add(BatchNormalization())
 80 | 
 81 | model.add(MaxPooling1D(pool_length=2))
 82 | 
 83 | # We flatten the output of the conv layer,
 84 | # so that we can add a vanilla dense layer:
 85 | 
 86 | # We add a vanilla hidden layer:
 87 | # model.add(Dense(hidden_dims))
 88 | model.add(Dropout(0.25))
 89 | 
 90 | model.add(Bidirectional(LSTM(100, return_sequences=True)))
 91 | model.add(Bidirectional(LSTM(100, return_sequences=True)))
 92 | model.add(Bidirectional(LSTM(100)))
 93 | 
 94 | model.add(Dropout(0.25))
 95 | 
 96 | model.add(Dense(128, activation='relu'))
 97 | model.add(Dense(32, activation='relu'))
 98 | model.add(Dense(nb_classes, activation='softmax'))
 99 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
100 | 
101 | print(model.summary())
102 | 
103 | model.fit(X_train, Y_train, batch_size=batch_size,
104 |         nb_epoch=nb_epoch,  verbose=1, validation_split=0.15)
105 | 
106 | y_preds = model.predict(X_test)
107 | for i in range(len(y_preds)):
108 | 	print(y_preds[i], y_test[i])
109 | 	# Final evaluation of the model
110 | scores = model.evaluate(X_test, Y_test, verbose=0)
111 | print("Accuracy: %.2f%%" % (scores[1]*100))
112 | 


--------------------------------------------------------------------------------
/classification/conv_1d_model.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import print_function
  3 | import numpy as np
  4 | from sklearn.model_selection import train_test_split
  5 | from sklearn.metrics import classification_report
  6 | from time import time
  7 | #np.random.seed(1337)  # for reproducibility
  8 | 
  9 | from keras.preprocessing import sequence
 10 | from keras.models import Sequential
 11 | from keras.layers.core import Dense, Dropout, Activation, Flatten
 12 | from keras.layers.normalization import BatchNormalization
 13 | from keras.layers.convolutional import Convolution1D, MaxPooling1D
 14 | from keras.utils import np_utils
 15 | from keras.callbacks import TensorBoard
 16 | 
 17 | 
 18 | # set parameters:
 19 | test_dim = 2999
 20 | maxlen = 100
 21 | nb_filter = 64
 22 | filter_length_1 = 50
 23 | filter_length_2 = 25
 24 | hidden_dims = 250
 25 | nb_epoch = 20
 26 | nb_classes = 2
 27 | split_ratio = 0.15
 28 | 
 29 | print('Loading data...')
 30 | 
 31 | X = np.load('../data/numpy_vectors/x_label_splits.npy')
 32 | y = np.load('../data/numpy_vectors/y_label_splits.npy')
 33 | print(X.shape)
 34 | print(y.shape)
 35 | 
 36 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_ratio)
 37 | 
 38 | xts = X_train.shape
 39 | #X_train = np.reshape(X_train, (xts[0], xts[1], 1))
 40 | xtss = X_test.shape
 41 | #X_test = np.reshape(X_test, (xtss[0], xtss[1], 1))
 42 | yts = y_train.shape
 43 | #y_train = np.reshape(y_train, (yts[0], 1))
 44 | ytss = y_test.shape
 45 | #y_test = np.reshape(y_test, (ytss[0], 1))
 46 | 
 47 | print(len(X_train), 'train sequences')
 48 | print(len(X_test), 'test sequences')
 49 | 
 50 | Y_train = np_utils.to_categorical(y_train, nb_classes)
 51 | Y_test = np_utils.to_categorical(y_test, nb_classes)
 52 | 
 53 | # print('Pad sequences (samples x time)')
 54 | # X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
 55 | # X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
 56 | # print('X_train shape:', X_train.shape)
 57 | # print('X_test shape:', X_test.shape)
 58 | 
 59 | for batch_size in range(10, 11, 5):
 60 |     print('Build model...')
 61 |     model = Sequential()
 62 | 
 63 |     # we start off with an efficient embedding layer which maps
 64 |     # our vocab indices into embedding_dims dimensions
 65 |     # model.add(Embedding(max_features, embedding_dims, input_length=maxlen))
 66 |     # model.add(Dropout(0.25))
 67 | 
 68 |     # we add a Convolution1D, which will learn nb_filter
 69 |     # word group filters of size filter_length:
 70 |     model.add(Convolution1D(nb_filter=nb_filter,
 71 |                             filter_length=filter_length_1,
 72 |                             input_shape=(test_dim, 13),
 73 |                             border_mode='valid',
 74 |                             activation='relu'
 75 |                             ))
 76 |     # we use standard max pooling (halving the output of the previous layer):
 77 |     model.add(BatchNormalization())
 78 | 
 79 |     model.add(Convolution1D(nb_filter=nb_filter,
 80 |                             filter_length=filter_length_2,
 81 |                             border_mode='same',
 82 |                             activation='relu'
 83 |                             ))
 84 | 
 85 |     model.add(BatchNormalization())
 86 | 
 87 |     model.add(MaxPooling1D(pool_length=2))
 88 | 
 89 |     model.add(Convolution1D(nb_filter=nb_filter,
 90 |                             filter_length=filter_length_2,
 91 |                             border_mode='same',
 92 |                             activation='relu'
 93 |                             ))
 94 | 
 95 |     model.add(BatchNormalization())
 96 | 
 97 |     model.add(MaxPooling1D(pool_length=2))
 98 | 
 99 |     # We flatten the output of the conv layer,
100 |     # so that we can add a vanilla dense layer:
101 |     model.add(Flatten())
102 | 
103 |     # We add a vanilla hidden layer:
104 |     # model.add(Dense(hidden_dims))
105 |     model.add(Dropout(0.25))
106 |     # model.add(Activation('relu'))
107 | 
108 |     # We project onto a single unit output layer, and squash it with a sigmoid:
109 |     model.add(Dense(nb_classes))
110 |     model.add(Activation('softmax'))
111 | 
112 |     model.compile(loss='binary_crossentropy',
113 |                 optimizer='adam', metrics=['accuracy'])
114 | 
115 |     print("model/split = {} <> batchsize = {}".format(split_ratio, batch_size))
116 |     tensorboard = TensorBoard(log_dir="logs/split_{}_batchsize_{}".format(split_ratio, batch_size))
117 | 
118 |     model.fit(X_train, Y_train, batch_size=batch_size,
119 |             nb_epoch=nb_epoch,  verbose=1, callbacks=[tensorboard]	)
120 | 
121 |     # model.save('model_hin_tel_38_samples.h5')
122 | 
123 |     y_preds = model.predict(X_test)
124 |     for i in range(len(y_preds)):
125 |         print(y_preds[i], y_test[i])
126 |         
127 |     score = model.evaluate(X_test, Y_test, verbose=1)
128 |     print(score)
129 |     print("\n**********************************\n")
130 | 
131 | # print(classification_report(Y_test, Y_preds))
132 | 


--------------------------------------------------------------------------------
/classification/conv_1d_model_aws.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import print_function
  3 | import numpy as np
  4 | from sklearn.cross_validation import train_test_split
  5 | from sklearn.metrics import classification_report
  6 | np.random.seed(1337)  # for reproducibility
  7 | 
  8 | from keras.preprocessing import sequence
  9 | from keras.layers.noise import GaussianNoise
 10 | from keras.models import Sequential
 11 | from keras.layers.core import Dense, Dropout, Activation, Flatten
 12 | from keras.layers.normalization import BatchNormalization
 13 | from keras.layers.convolutional import Convolution1D, MaxPooling1D, AveragePooling1D
 14 | from keras.utils import np_utils
 15 | 
 16 | 
 17 | # set parameters:
 18 | test_dim = 999
 19 | maxlen = 100
 20 | batch_size = 50
 21 | nb_filter = 512
 22 | filter_length_1 = 100
 23 | filter_length_2 = 30
 24 | filter_length_3 = 15
 25 | hidden_dims = 10
 26 | nb_epoch = 5
 27 | nb_classes = 3
 28 | 
 29 | print('Loading data...')
 30 | X = np.load('top_3_100_split_mfcc.npy')
 31 | y = np.load('top_3_100_split_y.npy')
 32 | 
 33 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)
 34 | 
 35 | # in case the passed in data is 2d and not 3d
 36 | '''
 37 | xts = X_train.shape
 38 | X_train = np.reshape(X_train, (xts[0], xts[1], 1))
 39 | xtss = X_test.shape
 40 | X_test = np.reshape(X_test, (xtss[0], xtss[1], 1))
 41 | yts = y_train.shape
 42 | y_train = np.reshape(y_train, (yts[0], 1))
 43 | ytss = y_test.shape
 44 | y_test = np.reshape(y_test, (ytss[0], 1))
 45 | '''
 46 | 
 47 | print(len(X_train), 'train sequences')
 48 | print(len(X_test), 'test sequences')
 49 | 
 50 | Y_train = np_utils.to_categorical(y_train, nb_classes)
 51 | Y_test = np_utils.to_categorical(y_test, nb_classes)
 52 | 
 53 | 
 54 | print('Build model...')
 55 | model = Sequential()
 56 | 
 57 | # we add a Convolution1D, which will learn nb_filter mfcc groups:
 58 | model.add(Convolution1D(nb_filter=nb_filter,
 59 |                         filter_length=filter_length_1,
 60 |                         input_shape=(test_dim, 13),
 61 |                         init = 'glorot_normal',
 62 |                         border_mode='valid',
 63 |                         activation='relu'
 64 |                         ))
 65 | 
 66 | # batch normalization to keep weights in the 0 to 1 range
 67 | model.add(BatchNormalization())
 68 | 
 69 | # add more layers
 70 | model.add(Convolution1D(nb_filter=nb_filter,
 71 |                         filter_length=filter_length_2,
 72 | 			border_mode='valid',
 73 |                         activation='relu'
 74 |                         ))
 75 | 
 76 | model.add(BatchNormalization())
 77 | 
 78 | # we use standard max pooling (halving the output of the previous layer)
 79 | model.add(MaxPooling1D(pool_length=2))
 80 | 
 81 | 
 82 | model.add(Convolution1D(nb_filter=nb_filter,
 83 |                         filter_length=filter_length_2,
 84 |                         border_mode='valid',
 85 |                         activation='relu'
 86 |                         ))
 87 | 
 88 | model.add(BatchNormalization())
 89 | 
 90 | model.add(MaxPooling1D(pool_length=2))
 91 | 
 92 | model.add(Convolution1D(nb_filter=nb_filter,
 93 |                         filter_length=filter_length_2,
 94 |                         border_mode='valid',
 95 |                         activation='relu'
 96 |                         ))
 97 | 
 98 | model.add(BatchNormalization())
 99 | 
100 | model.add(MaxPooling1D(pool_length=2))
101 | 
102 | # Dropout reduces overfitting
103 | model.add(Dropout(.1))
104 | 
105 | model.add(Convolution1D(nb_filter=nb_filter,
106 |                         filter_length=filter_length_2,
107 |                         border_mode='valid',
108 |                         activation='relu'
109 |                         ))
110 | 
111 | model.add(BatchNormalization())
112 | 
113 | model.add(MaxPooling1D(pool_length=2))
114 | 
115 | model.add(Dropout(.1))
116 | 
117 | model.add(Convolution1D(nb_filter=nb_filter,
118 |                         filter_length=filter_length_3,
119 |                         border_mode='valid',
120 |                         activation='relu'
121 |                         ))
122 | 
123 | model.add(BatchNormalization())
124 | 
125 | model.add(MaxPooling1D(pool_length=2))
126 | 
127 | # We flatten the output of the conv layer,
128 | # so that we can add a vanilla dense layer:
129 | model.add(Flatten())
130 | 
131 | # We project onto a single unit output layer, and squash it with a softmax into 0-1 probability space:
132 | model.add(Dense(nb_classes))
133 | model.add(Activation('softmax'))
134 | 
135 | model.compile(loss='categorical_crossentropy',
136 |               optimizer='adam', metrics = ["accuracy"])
137 | model.fit(X_train, Y_train, batch_size=batch_size,
138 |           nb_epoch=nb_epoch,  verbose=1,
139 |           validation_data=(X_test, Y_test))
140 | 
141 | # print report of recall, precision, f1 score 
142 | y_pred = model.predict_classes(X_test)
143 | print(classification_report(y_test, y_pred))
144 | 


--------------------------------------------------------------------------------
/classification/conv_1d_model_run.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gen_y import generate_y
 3 | from sklearn.cross_validation import train_test_split
 4 | from sklearn.metrics import classification_report
 5 | from keras.models import load_model
 6 | from keras.utils import np_utils
 7 | 
 8 | nb_classes = 3
 9 | 
10 | X_test = np.load('test_mfcc_merge_spanish_test.npy')
11 | #print(X.shape)
12 | # y = generate_y('/media/enigmaeth/My Passport/Datasets/Accent/sounds_wav')
13 | #print(y.shape)
14 | 
15 | # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)
16 | ""
17 | # Y_train = np_utils.to_categorical(y_train, nb_classes)
18 | # Y_test = np_utils.to_categorical(y_test, nb_classes)
19 | 
20 | #X_test = np.load('test_mfcc.npy')
21 | print(X_test.shape)
22 | y = np.array([2]*380)
23 | #print(y)
24 | Y_test = np_utils.to_categorical(y, nb_classes)
25 | print(Y_test.shape)
26 | 
27 | model = load_model('model_20epochs.h5')
28 | prediction = model.predict(X_test)
29 | ct = 0
30 | ans = []
31 | for row in prediction:
32 | 	index = idx = 0
33 | 	ma = 0
34 | 	for col in row:
35 | 		if col > ma:
36 | 			idx = index
37 | 			ma = col
38 | 		index += 1
39 | 	#if ct == 10: break
40 | 	ct += 1
41 | 	ans.append(idx)
42 | 
43 | print(ans, len(ans))
44 | 
45 | score = model.evaluate(X_test, Y_test,  verbose=1)
46 | print(score)


--------------------------------------------------------------------------------
/classification/log.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/classification/log.txt


--------------------------------------------------------------------------------
/classification/logs/split_0.15_batchsize_10/events.out.tfevents.1569179240.elem:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/classification/logs/split_0.15_batchsize_10/events.out.tfevents.1569179240.elem


--------------------------------------------------------------------------------
/classification/logs/split_0.15_batchsize_10/events.out.tfevents.1569217478.elem:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/classification/logs/split_0.15_batchsize_10/events.out.tfevents.1569217478.elem


--------------------------------------------------------------------------------
/classification/lstm_cnn.py:
--------------------------------------------------------------------------------
 1 | '''classification task.
 2 | Gets to 0.8498 test accuracy after 2 epochs. 41s/epoch on K520 GPU.
 3 | '''
 4 | from __future__ import print_function
 5 | 
 6 | from keras.preprocessing import sequence
 7 | from keras.models import Sequential
 8 | from keras.layers import Dense, Dropout, Activation
 9 | from keras.layers import Embedding
10 | from keras.layers import LSTM
11 | from keras.layers import Conv1D, MaxPooling1D
12 | from keras.datasets import imdb
13 | 
14 | # Embedding
15 | max_features = 20000
16 | maxlen = 100
17 | embedding_size = 128
18 | 
19 | # Convolution
20 | kernel_size = 5
21 | filters = 64
22 | pool_size = 4
23 | 
24 | # LSTM
25 | lstm_output_size = 70
26 | 
27 | # Training
28 | batch_size = 30
29 | epochs = 2
30 | 
31 | '''
32 | Note:
33 | batch_size is highly sensitive.
34 | Only 2 epochs are needed as the dataset is very small.
35 | '''
36 | 
37 | print('Loading data...')
38 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
39 | print(len(x_train), 'train sequences')
40 | print(len(x_test), 'test sequences')
41 | 
42 | print('Pad sequences (samples x time)')
43 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
44 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
45 | print('x_train shape:', x_train.shape)
46 | print('x_test shape:', x_test.shape)
47 | 
48 | print('Build model...')
49 | 
50 | model = Sequential()
51 | model.add(Embedding(max_features, embedding_size, input_length=maxlen))
52 | model.add(Dropout(0.25))
53 | model.add(Conv1D(filters,
54 |                  kernel_size,
55 |                  padding='valid',
56 |                  activation='relu',
57 |                  strides=1))
58 | model.add(MaxPooling1D(pool_size=pool_size))
59 | model.add(LSTM(lstm_output_size))
60 | model.add(Dense(1))
61 | model.add(Activation('sigmoid'))
62 | 
63 | model.compile(loss='binary_crossentropy',
64 |               optimizer='adam',
65 |               metrics=['accuracy'])
66 | 
67 | print('Train...')
68 | model.fit(x_train, y_train,
69 |           batch_size=batch_size,
70 |           epochs=epochs,
71 |           validation_data=(x_test, y_test))
72 | score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
73 | print('Test score:', score)
74 | print('Test accuracy:', acc)
75 | 


--------------------------------------------------------------------------------
/classification/model_5epochs_rnn.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/classification/model_5epochs_rnn.h5


--------------------------------------------------------------------------------
/classification/model_hin_tel_38_samples.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/classification/model_hin_tel_38_samples.h5


--------------------------------------------------------------------------------
/classification/rnn_example.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import numpy as np
 3 | 
 4 | from .attention_lstm import goo
 5 | from keras.optimizers import SGD
 6 | 
 7 | 
 8 | np.random.seed(1337)  # for reproducibility
 9 | from keras.preprocessing import sequence
10 | from keras.utils import np_utils
11 | from keras.models import Sequential
12 | from keras.layers.core import Dense, Dropout, Activation
13 | from keras.layers.recurrent import LSTM
14 | from sklearn.cross_validation import train_test_split
15 | from sklearn.metrics import classification_report
16 | from gen_y import generate_y
17 | 
18 | hidden_units = 100
19 | nb_classes = 2
20 | print('Loading data...')
21 | X = np.load('x_test_mfcc_split_wav_30sec.npy')
22 | y = generate_y('/media/enigmaeth/My Passport/Datasets/linguistics data/split_wav_30sec')
23 | 
24 | X = X[:200]
25 | y = y[:200]
26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
27 | batch_size = 20
28 | 
29 | split__ = int((len(X_train)//batch_size)*batch_size)
30 | X_train = X_train[:split__]
31 | y_train = y_train[:split__]
32 | 
33 | 
34 | print(len(X_train), 'train sequences')
35 | print(len(X_test), 'test sequences')
36 | print('X_train shape:', X_train.shape)
37 | print('X_test shape:', X_test.shape)
38 | print('y_train shape:', y_train.shape)
39 | print('y_test shape:', y_test.shape)
40 | print('Build model...')
41 | 
42 | print(y_train)
43 | Y_train = np_utils.to_categorical(y_train, nb_classes)
44 | Y_test = np_utils.to_categorical(y_test, nb_classes)
45 | 
46 | model = Sequential()
47 | 
48 | #batch_input_shape= (batch_size, X_train.shape[1], X_train.shape[2])
49 | 
50 | # note that it is necessary to pass in 3d batch_input_shape if stateful=True
51 | model.add(LSTM(64, return_sequences=True, stateful=False,
52 |                batch_input_shape= (batch_size, X_train.shape[1], X_train.shape[2])))
53 | model.add(LSTM(64, return_sequences=True, stateful=False))
54 | model.add(LSTM(64, stateful=False))
55 | 
56 | 
57 | # add dropout to control for overfitting
58 | model.add(Dropout(.25))
59 | 
60 | # squash output onto number of classes in probability space
61 | model.add(Dense(nb_classes, activation='softmax'))
62 | 
63 | 
64 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])
65 | 
66 | print("Train...")
67 | model.fit(X_train, Y_train, batch_size=batch_size, epochs=5, validation_data=(X_test, Y_test))
68 | 
69 | y_pred=model.predict_classes(X_test, batch_size=batch_size)
70 | print(classification_report(y_test, y_pred))
71 | 
72 | model.save('model_5epochs_rnn.h5')
73 | 


--------------------------------------------------------------------------------
/classification/testing.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from python_speech_features import mfcc
  4 | from python_speech_features import logfbank
  5 | import scipy.io.wavfile as wav
  6 | from scipy.io.wavfile import write as wav_write
  7 | import librosa
  8 | import scipy
  9 | from tqdm import tqdm
 10 | # import scikits.samplerate
 11 | import os
 12 | 
 13 | 
 14 | '''
 15 | mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
 16 | '''
 17 | # read in wav file, get out signal (np array) and sampling rate (int)
 18 | def read_in_audio(filename):
 19 |     (rate, sig) = wav.read(filename)
 20 |     return sig, rate
 21 | 
 22 | 
 23 | # read in signal, take absolute value and slice seconds 1-3 from beginning
 24 | def get_two_secs(filename):
 25 |     sig, rate = read_in_audio(filename)
 26 |     abs_sig = np.abs(sig)
 27 |     two_secs = abs_sig[rate:3*rate]
 28 |     return two_secs
 29 | 
 30 | # calculates moving average for a specified window (number of samples)
 31 | def take_moving_average(sig, window_width):
 32 |     cumsum_vec = np.cumsum(np.insert(sig, 0, 0))
 33 |     ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width])/float(window_width)
 34 |     return ma_vec
 35 |         
 36 | # read in signal, change sample rate to outrate (samples/sec), use write_wav=True to save wav file to disk
 37 | def downsample(filename, outrate=8000, write_wav = False):
 38 |     print(filename)
 39 |     (rate, sig) = wav.read(filename)
 40 |     down_sig = librosa.core.resample(sig * 1., rate, outrate, scale=True)
 41 |     if not write_wav:
 42 |         return down_sig, outrate
 43 |     if write_wav:
 44 |         wav_write('{}_down_{}.wav'.format(filename, outrate), outrate, down_sig)
 45 | 
 46 | def librosa_downsample(filename, outrate=8000):
 47 |     y, s = librosa.load(filename, sr=8000)
 48 |     return y, s
 49 | 
 50 | def custom_downsample(filename, outrate=8000):
 51 |     (rate, sig) = wav.read(filename)
 52 |     len_in_secs = len(sig)
 53 |     secs = len_in_secs/rate # Number of seconds in signal X
 54 |     samps = secs*outrate   # Number of samples to downsample
 55 |     print(secs, samps)
 56 |     Y = scipy.signal.resample(sig , int(samps))
 57 |     return Y, outrate
 58 | 
 59 | # change total number of samps for downsampled file to n_samps by trimming or zero-padding and standardize them
 60 | def make_standard_length(filename, n_samps=240000):
 61 |     down_sig, rate = librosa_downsample(filename)
 62 |     normed_sig = librosa.util.fix_length(down_sig, n_samps)
 63 |     normed_sig = (normed_sig - np.mean(normed_sig))/np.std(normed_sig)
 64 |     return normed_sig
 65 | 
 66 | # from a folder containing wav files, normalize each, divide into num_splits-1 chunks and write the resulting np.arrays to a single matrix
 67 | def make_split_audio_array(folder, num_splits = 5):
 68 |     """
 69 |     returns numpy array of split audio for a folder
 70 |     """
 71 |     lst = []
 72 |     for filename in tqdm(os.listdir(folder))	:
 73 |         if filename.endswith('wav'):
 74 |             normed_sig = make_standard_length(filename)
 75 |             chunk = normed_sig.shape[0]/num_splits
 76 |             for i in range(num_splits - 1):
 77 |                 lst.append(normed_sig[i*chunk:(i+2)*chunk])
 78 |     lst = np.array(lst)
 79 |     lst = lst.reshape(lst.shape[0], -1)
 80 |     return lst
 81 | 
 82 | # for input wav file outputs (13, 2999) mfcc np array
 83 | def make_normed_mfcc(filename, outrate=8000):
 84 |     normed_sig = make_standard_length(filename)
 85 |     normed_mfcc_feat = mfcc(normed_sig, outrate)
 86 |     normed_mfcc_feat = normed_mfcc_feat.T
 87 |     return normed_mfcc_feat
 88 | 
 89 | # make mfcc np array from wav file using librosa package
 90 | def make_librosa_mfcc(filename):
 91 |      y, sr = librosa.load(filename)
 92 |      mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
 93 |      return mfcc_feat
 94 | 
 95 | # make mfcc np array from wav file using speech features package
 96 | def make_mfcc(filename):
 97 |     (rate, sig) = wav.read(filename)
 98 |     mfcc_feat = mfcc(sig, rate)
 99 |     mfcc_feat = mfcc_feat.T
100 |     return mfcc_feat
101 | 
102 | # for folder containing wav files, output numpy array of normed mfcc
103 | def make_class_array(folder):
104 |     lst = []
105 |     files = os.listdir(folder)
106 |     count_files = len(files)
107 |     for idx, file_path in tqdm(enumerate(files)):
108 |         filename = os.path.join(folder, file_path)
109 |         lst.append(make_normed_mfcc(filename))
110 |     class_array = np.array(lst)
111 |     class_array = np.reshape(class_array, (class_array.shape[0], class_array.shape[2], class_array.shape[1]))
112 |     return class_array
113 | 
114 | # read in wav file, output (1,13) numpy array of mean mfccs for each of 13 features
115 | def make_mean_mfcc(filename):
116 |     try:
117 |         (rate, sig) = wav.read(filename)
118 |         mfcc_feat = mfcc(sig, rate)
119 |         avg_mfcc = np.mean(mfcc_feat, axis = 0)
120 |         return avg_mfcc
121 |     except:
122 |         pass
123 | 
124 | # write new csv corresponding to dataframe of given language and gender
125 | def make_df_language_gender(df, language, gender):
126 |     newdf = df.query("native_language == @language").query("sex == @gender")
127 |     newdf.to_csv('df_{}_{}.csv'.format(language, gender))
128 | 
129 | # write new directories to disk containing the male and female speakers from the most common languages
130 | def make_folders_from_csv():
131 |     top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese']
132 |     for lang in top_15_langs:
133 |         os.makedirs('{}/{}_male'.format(lang, lang))
134 |         os.makedirs('{}/{}_female'.format(lang, lang))
135 | 
136 | # copy files to the corresponding directories
137 | def copy_files_from_csv():
138 |     top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese']
139 |     for lang in top_15_langs:
140 |         df_male = pd.read_csv('df_{}_male.csv'.format(lang))
141 |         df_female = pd.read_csv('df_{}_female.csv'.format(lang))
142 |         m_list = df_male['filename'].values
143 |         f_list = df_female['filename'].values
144 |         for filename in f_list:
145 |             shutil.copy2('big_langs/{}/{}.wav'.format(lang, filename), 'big_langs/{}/{}_female/{}.wav'.format(lang, lang, filename))
146 | 
147 | # input folder of wav files, output pandas dataframe of mean mfcc values
148 | def make_mean_mfcc_df(folder):
149 |     norms = []
150 |     for file_path in os.listdir(folder):
151 |         filename = os.path.join(folder, file_path)
152 |         (rate, sig) = wav.read(filename)
153 |         mfcc_feat = mfcc(sig, rate)
154 |         mean_mfcc = np.mean(mfcc_feat, axis = 0)
155 |         #mean_mfcc = np.reshape(mean_mfcc, (1,13))
156 |         norms.append(mean_mfcc)
157 |     flat = [a.ravel() for a in norms]
158 |     stacked = np.vstack(flat)
159 |     df = pd.DataFrame(stacked)
160 |     return df
161 | 


--------------------------------------------------------------------------------
/classification/testing.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/classification/testing.pyc


--------------------------------------------------------------------------------
/data/all_accents/accent_trim_gen_x.py:
--------------------------------------------------------------------------------
 1 | from mfcc import *
 2 | import numpy as np
 3 | 
 4 | folder = '/home/enigmaeth/accentPhase2/data/all_accents/all_accents_trim'
 5 | 
 6 | x = make_class_array(folder)
 7 | print(x.shape)
 8 | X_file = '/home/enigmaeth/accentPhase2/data/numpy_vectors/x_' + (folder.split('/'))[-1]
 9 | 
10 | print("saving  labels to ", X_file)
11 | np.save(X_file, x)  
12 | 
13 | 
14 | 
15 | # filename = "english1.wav"
16 | 
17 | # with open(filename, 'rb') as f:
18 | #     print(read_in_audio(f))
19 | 
20 | # cd = make_class_array('/media/enigmaeth/My Passport/Datasets/Accent/clean_data')
21 | # print(cd.shape)
22 | # np.save('top_3_100_split_mfcc.npy', cd)  
23 | # mf = make_mean_mfcc_df('/media/enigmaeth/My Passport/Datasets/Accent/sounds_wav')
24 | # print(mf.shape)
25 | # np.save('top_3_100_split_y.npy', mf)  
26 | 


--------------------------------------------------------------------------------
/data/all_accents/accent_trim_gen_y.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | def generate_y(folder):
 5 |     accents = {}
 6 |     counts = {}
 7 |     y = []
 8 |     index = 0
 9 | 
10 |     for filename in os.listdir(folder):
11 |         name = ''.join([i for i in filename if not i.isdigit()])
12 |         name = name.split('_')[0]
13 |         if name not in accents:
14 |             accents[name] = index
15 |             index += 1
16 |             counts[name] = 0
17 | 
18 |         counts[name] += 1
19 |         y.append(accents[name])
20 | 
21 |     print(counts)
22 |     print(accents)
23 | 
24 |     sorted_counts = sorted(counts, key=counts.get, reverse=True)
25 |     for r in sorted_counts:
26 |         print(r, counts[r])
27 | 
28 |     np_y = np.reshape(np.array(y), (len(y), 1))
29 | 
30 |     Y_file = '/home/enigmaeth/accentPhase2/data/numpy_vectors/y_'+ (folder.split('/'))[-1]
31 |     print("saving  labels to ", Y_file)
32 |     np.save(Y_file, y)
33 | 
34 | folder = "/home/enigmaeth/accentPhase2/data/all_accents/all_accents_trim"
35 | generate_y(folder)


--------------------------------------------------------------------------------
/data/all_accents/all_accents_split.log.save:
--------------------------------------------------------------------------------
1 | Splitting Bangla_Arc.wav where energy is below 1.0% for longer than 2.0s.100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 20134/20143 [00:43<00:00, 459.96it/s]100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 778/778 [00:13<00:00, 56.96it/s]Splitting Bangla_Jay.wav where energy is below 1.0% for longer than 2.0s.100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19824/19833 [00:44<00:00, 447.85it/s]100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 750/750 [00:06<00:00, 110.16it/s]Splitting Malayalam_Hab.wav where energy is below 1.0% for longer than 2.0s.100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19902/19911 [01:01<00:00, 323.10it/s]100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 751/751 [00:08<00:00, 84.67it/s]Splitting Malayalam_Sal.wav where energy is below 1.0% for longer than 2.0s.100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19969/19978 [00:59<00:00, 334.94it/s]100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:11<00:00,  2.54it/s]Splitting Malayalam_Sha.wav where energy is below 1.0% for longer than 2.0s.100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 23904/23913 [01:06<00:00, 357.02it/s]100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 895/895 [00:23<00:00, 38.31it/s]Splitting Odiya_Suc.wav where energy is below 1.0% for longer than 2.0s.100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19675/19684 [01:06<00:00, 294.34it/s]100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 120/120 [00:09<00:00, 12.26it/s]Splitting Telugu_Nav.wav where energy is below 1.0% for longer than 2.0s.100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19554/19563 [00:54<00:00, 356.87it/s]100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 766/766 [00:09<00:00, 83.00it/s]Splitting Telugu_Tho.wav where energy is below 1.0% for longer than 2.0s.100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 19347/19356 [01:06<00:00, 291.94it/s]100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 749/749 [00:07<00:00, 95.36it/s]


--------------------------------------------------------------------------------
/data/all_accents/all_accents_trim.sh:
--------------------------------------------------------------------------------
1 | cd all_accents_out
2 | for i in *.wav                                                                                                                                  ✔  ⚙  00:54:25 
3 | do
4 |     sox "$i" ../all_accents_trim/"$i" trim 0 300                       
5 | done
6 | 


--------------------------------------------------------------------------------
/data/all_accents/alt_split.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from scipy.io import wavfile
 4 | import os
 5 | import numpy as np
 6 | import argparse
 7 | from tqdm import tqdm
 8 | 
 9 | # Utility functions
10 | xrange = range
11 | def windows(signal, window_size, step_size):
12 |     if type(window_size) is not int:
13 |         raise AttributeError("Window size must be an integer.")
14 |     if type(step_size) is not int:
15 |         raise AttributeError("Step size must be an integer.")
16 |     for i_start in xrange(0, len(signal), step_size):
17 |         i_end = i_start + window_size
18 |         if i_end >= len(signal):
19 |             break
20 |         yield signal[i_start:i_end]
21 | 
22 | def energy(samples):
23 |     return np.sum(np.power(samples, 2.)) / float(len(samples))
24 | 
25 | def rising_edges(binary_signal):
26 |     previous_value = 0
27 |     index = 0
28 |     for x in binary_signal:
29 |         if x and not previous_value:
30 |             yield index
31 |         previous_value = x
32 |         index += 1
33 | 
34 | # Process command line arguments
35 | 
36 | parser = argparse.ArgumentParser(description='Split a WAV file at silence.')
37 | parser.add_argument('input_file', type=str, help='The WAV file to split.')
38 | parser.add_argument('--output-dir', '-o', type=str, default='.', help='The output folder. Defaults to the current folder.')
39 | parser.add_argument('--min-silence-length', '-m', type=float, default=3., help='The minimum length of silence at which a split may occur [seconds]. Defaults to 3 seconds.')
40 | parser.add_argument('--silence-threshold', '-t', type=float, default=1e-6, help='The energy level (between 0.0 and 1.0) below which the signal is regarded as silent. Defaults to 1e-6 == 0.0001%.')
41 | parser.add_argument('--step-duration', '-s', type=float, default=None, help='The amount of time to step forward in the input file after calculating energy. Smaller value = slower, but more accurate silence detection. Larger value = faster, but might miss some split opportunities. Defaults to (min-silence-length / 10.).')
42 | parser.add_argument('--dry-run', '-n', action='store_true', help='Don\'t actually write any output files.')
43 | 
44 | args = parser.parse_args()
45 | 
46 | input_filename = args.input_file
47 | window_duration = args.min_silence_length
48 | if args.step_duration is None:
49 |     step_duration = window_duration / 10.
50 | else:
51 |     step_duration = args.step_duration
52 | silence_threshold = args.silence_threshold
53 | output_dir = args.output_dir
54 | output_filename_prefix = os.path.splitext(os.path.basename(input_filename))[0]
55 | dry_run = args.dry_run
56 | 
57 | print(f"Splitting {input_filename} where energy is below {silence_threshold * 100}% for longer than {window_duration}s.")
58 | 
59 | sample_rate, samples = input_data=wavfile.read(filename=input_filename, mmap=True)
60 | 
61 | max_amplitude = np.iinfo(samples.dtype).max
62 | max_energy = energy([max_amplitude])
63 | 
64 | window_size = int(window_duration * sample_rate)
65 | step_size = int(step_duration * sample_rate)
66 | 
67 | signal_windows = windows(
68 |     signal=samples,
69 |     window_size=window_size,
70 |     step_size=step_size
71 | )
72 | 
73 | window_energy = (energy(w) / max_energy for w in tqdm(
74 |     signal_windows,
75 |     total=int(len(samples) / float(step_size))
76 | ))
77 | 
78 | window_silence = (e > silence_threshold for e in window_energy)
79 | 
80 | cut_times = (r * step_duration for r in rising_edges(window_silence))
81 | 
82 | # This is the step that takes long, since we force the generators to run.
83 | cut_samples = [int(t * sample_rate) for t in cut_times]
84 | cut_samples.append(-1)
85 | 
86 | cut_ranges = [(i, cut_samples[i], cut_samples[i+1]) for i in xrange(len(cut_samples) - 1)]
87 | 
88 | for i, start, stop in tqdm(cut_ranges):
89 |     output_file_path = "{}_{:03d}.wav".format(
90 |         os.path.join(output_dir, output_filename_prefix),
91 |         i
92 |     )
93 |     if not dry_run:
94 |         wavfile.write(
95 |             filename=output_file_path,
96 |             rate=sample_rate,
97 |             data=samples[start:stop]
98 |         )
99 | 


--------------------------------------------------------------------------------
/data/all_accents/mfcc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from python_speech_features import mfcc
  4 | from python_speech_features import logfbank
  5 | import scipy.io.wavfile as wav
  6 | from scipy.io.wavfile import write as wav_write
  7 | import librosa
  8 | import scipy
  9 | from tqdm import tqdm
 10 | # import scikits.samplerate
 11 | import os
 12 | 
 13 | 
 14 | '''
 15 | mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
 16 | '''
 17 | # read in wav file, get out signal (np array) and sampling rate (int)
 18 | def read_in_audio(filename):
 19 |     (rate, sig) = wav.read(filename)
 20 |     return sig, rate
 21 | 
 22 | 
 23 | # read in signal, take absolute value and slice seconds 1-3 from beginning
 24 | def get_two_secs(filename):
 25 |     sig, rate = read_in_audio(filename)
 26 |     abs_sig = np.abs(sig)
 27 |     two_secs = abs_sig[rate:3*rate]
 28 |     return two_secs
 29 | 
 30 | # calculates moving average for a specified window (number of samples)
 31 | def take_moving_average(sig, window_width):
 32 |     cumsum_vec = np.cumsum(np.insert(sig, 0, 0))
 33 |     ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width])/float(window_width)
 34 |     return ma_vec
 35 |         
 36 | # read in signal, change sample rate to outrate (samples/sec), use write_wav=True to save wav file to disk
 37 | def downsample(filename, outrate=8000, write_wav = False):
 38 |     print(filename)
 39 |     (rate, sig) = wav.read(filename)
 40 |     down_sig = librosa.core.resample(sig * 1., rate, outrate, scale=True)
 41 |     if not write_wav:
 42 |         return down_sig, outrate
 43 |     if write_wav:
 44 |         wav_write('{}_down_{}.wav'.format(filename, outrate), outrate, down_sig)
 45 | 
 46 | def librosa_downsample(filename, outrate=8000):
 47 |     y, s = librosa.load(filename, sr=8000)
 48 |     return y, s
 49 | 
 50 | # change total number of samps for downsampled file to n_samps by trimming or zero-padding and standardize them
 51 | def make_standard_length(filename):
 52 |     down_sig, rate = librosa_downsample(filename)
 53 |     normed_sig = librosa.util.fix_length(down_sig, 40000)
 54 |     # start = 1 * 8000
 55 |     # end = 4 * 8000
 56 |     # normed_sig = down_sig[start:end]
 57 |     normed_sig = (normed_sig - np.mean(normed_sig))/np.std(normed_sig)
 58 |     return normed_sig   
 59 | 
 60 | # from a folder containing wav files, normalize each, divide into num_splits-1 chunks and write the resulting np.arrays to a single matrix
 61 | def make_split_audio_array(folder, num_splits = 5):
 62 |     """
 63 |     returns numpy array of split audio for a folder
 64 |     """
 65 |     lst = []
 66 |     for filename in tqdm(os.listdir(folder))	:
 67 |         if filename.endswith('wav'):
 68 |             normed_sig = make_standard_length(filename)
 69 |             chunk = normed_sig.shape[0]/num_splits
 70 |             for i in range(num_splits - 1):
 71 |                 lst.append(normed_sig[i*chunk:(i+2)*chunk])
 72 |     lst = np.array(lst)
 73 |     lst = lst.reshape(lst.shape[0], -1)
 74 |     return lst
 75 | 
 76 | # for input wav file outputs (13, 2999) mfcc np array
 77 | def make_normed_mfcc(filename, outrate=8000):
 78 |     normed_sig = make_standard_length(filename)
 79 |     normed_mfcc_feat = mfcc(normed_sig, outrate)
 80 |     normed_mfcc_feat = normed_mfcc_feat.T
 81 |     return normed_mfcc_feat
 82 | 
 83 | # make mfcc np array from wav file using librosa package
 84 | def make_librosa_mfcc(filename):
 85 |      y, sr = librosa.load(filename)
 86 |      mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
 87 |      return mfcc_feat
 88 | 
 89 | # make mfcc np array from wav file using speech features package
 90 | def make_mfcc(filename):
 91 |     (rate, sig) = wav.read(filename)
 92 |     mfcc_feat = mfcc(sig, rate)
 93 |     mfcc_feat = mfcc_feat.T
 94 |     return mfcc_feat
 95 | 
 96 | # for folder containing wav files, output numpy array of normed mfcc
 97 | def make_class_array(folder):
 98 |     lst = []
 99 |     files = os.listdir(folder)
100 |     count_files = len(files)
101 |     for file_path in tqdm(files):
102 |         filename = os.path.join(folder, file_path)
103 |         lst.append(make_normed_mfcc(filename))
104 |     class_array = np.array(lst)
105 |     class_array = np.reshape(class_array, (class_array.shape[0], class_array.shape[2], class_array.shape[1]))
106 |     return class_array
107 | 
108 | # read in wav file, output (1,13) numpy array of mean mfccs for each of 13 features
109 | def make_mean_mfcc(filename):
110 |     try:
111 |         (rate, sig) = wav.read(filename)
112 |         mfcc_feat = mfcc(sig, rate)
113 |         avg_mfcc = np.mean(mfcc_feat, axis = 0)
114 |         return avg_mfcc
115 |     except:
116 |         pass
117 | 
118 | # write new csv corresponding to dataframe of given language and gender
119 | def make_df_language_gender(df, language, gender):
120 |     newdf = df.query("native_language == @language").query("sex == @gender")
121 |     newdf.to_csv('df_{}_{}.csv'.format(language, gender))
122 | 
123 | # write new directories to disk containing the male and female speakers from the most common languages
124 | def make_folders_from_csv():
125 |     top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese']
126 |     for lang in top_15_langs:
127 |         os.makedirs('{}/{}_male'.format(lang, lang))
128 |         os.makedirs('{}/{}_female'.format(lang, lang))
129 | 
130 | # copy files to the corresponding directories
131 | def copy_files_from_csv():
132 |     top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese']
133 |     for lang in top_15_langs:
134 |         df_male = pd.read_csv('df_{}_male.csv'.format(lang))
135 |         df_female = pd.read_csv('df_{}_female.csv'.format(lang))
136 |         m_list = df_male['filename'].values
137 |         f_list = df_female['filename'].values
138 |         for filename in f_list:
139 |             shutil.copy2('big_langs/{}/{}.wav'.format(lang, filename), 'big_langs/{}/{}_female/{}.wav'.format(lang, lang, filename))
140 | 
141 | # input folder of wav files, output pandas dataframe of mean mfcc values
142 | def make_mean_mfcc_df(folder):
143 |     norms = []
144 |     for file_path in os.listdir(folder):
145 |         filename = os.path.join(folder, file_path)
146 |         (rate, sig) = wav.read(filename)
147 |         mfcc_feat = mfcc(sig, rate)
148 |         mean_mfcc = np.mean(mfcc_feat, axis = 0)
149 |         #mean_mfcc = np.reshape(mean_mfcc, (1,13))
150 |         norms.append(mean_mfcc)
151 |     flat = [a.ravel() for a in norms]
152 |     stacked = np.vstack(flat)
153 |     df = pd.DataFrame(stacked)
154 |     return df
155 | 


--------------------------------------------------------------------------------
/data/all_accents/split_to_wav.sh:
--------------------------------------------------------------------------------
1 | for file in *.wav
2 | do
3 | 	python3 alt_split.py "$file" --output-dir all_accents_out/ --min-silence-length=2 --silence-threshold=0.01
4 | done
5 | 


--------------------------------------------------------------------------------
/data/folder_structure:
--------------------------------------------------------------------------------
1 | data [error opening dir]
2 | 
3 | 0 directories, 0 files
4 | 


--------------------------------------------------------------------------------
/data/non_trained_accents/accent_trim_gen_x.py:
--------------------------------------------------------------------------------
 1 | from mfcc import *
 2 | import numpy as np
 3 | 
 4 | folder = '/home/enigmaeth/accentPhase2/data/non_trained_accents/bangla'
 5 | 
 6 | x = make_class_array(folder)
 7 | print(x.shape)
 8 | X_file = '/home/enigmaeth/accentPhase2/data/non_trained_accents/numpy_vectors/x_' + (folder.split('/'))[-1]
 9 | 
10 | print("saving  labels to ", X_file)
11 | np.save(X_file, x)  
12 | 
13 | 
14 | 
15 | # filename = "english1.wav"
16 | 
17 | # with open(filename, 'rb') as f:
18 | #     print(read_in_audio(f))
19 | 
20 | # cd = make_class_array('/media/enigmaeth/My Passport/Datasets/Accent/clean_data')
21 | # print(cd.shape)
22 | # np.save('top_3_100_split_mfcc.npy', cd)  
23 | # mf = make_mean_mfcc_df('/media/enigmaeth/My Passport/Datasets/Accent/sounds_wav')
24 | # print(mf.shape)
25 | # np.save('top_3_100_split_y.npy', mf)  
26 | 


--------------------------------------------------------------------------------
/data/non_trained_accents/accent_trim_gen_y.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | def generate_y(folder):
 5 |     accents = {}
 6 |     counts = {}
 7 |     y = []
 8 |     index = 0
 9 | 
10 |     for filename in os.listdir(folder):
11 |         name = ''.join([i for i in filename if not i.isdigit()])
12 |         name = name.split('_')[0]
13 |         if name not in accents:
14 |             accents[name] = index
15 |             index += 1
16 |             counts[name] = 0
17 | 
18 |         counts[name] += 1
19 |         y.append(accents[name]+1)
20 | 
21 |     print(counts)
22 |     print(accents)
23 | 
24 |     sorted_counts = sorted(counts, key=counts.get, reverse=True)
25 |     for r in sorted_counts:
26 |         print(r, counts[r])
27 | 
28 |     np_y = np.reshape(np.array(y), (len(y), 1))
29 | 
30 |     Y_file = '/home/enigmaeth/accentPhase2/data/non_trained_accents/numpy_vectors/y_'+ (folder.split('/'))[-1]
31 |     print("saving  labels to ", Y_file)
32 |     np.save(Y_file, y)
33 | 
34 | folder = "/home/enigmaeth/accentPhase2/data/non_trained_accents/bangla"
35 | generate_y(folder)


--------------------------------------------------------------------------------
/data/non_trained_accents/mfcc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from python_speech_features import mfcc
  4 | from python_speech_features import logfbank
  5 | import scipy.io.wavfile as wav
  6 | from scipy.io.wavfile import write as wav_write
  7 | import librosa
  8 | import scipy
  9 | from tqdm import tqdm
 10 | # import scikits.samplerate
 11 | import os
 12 | 
 13 | 
 14 | '''
 15 | mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
 16 | '''
 17 | # read in wav file, get out signal (np array) and sampling rate (int)
 18 | def read_in_audio(filename):
 19 |     (rate, sig) = wav.read(filename)
 20 |     return sig, rate
 21 | 
 22 | 
 23 | # read in signal, take absolute value and slice seconds 1-3 from beginning
 24 | def get_two_secs(filename):
 25 |     sig, rate = read_in_audio(filename)
 26 |     abs_sig = np.abs(sig)
 27 |     two_secs = abs_sig[rate:3*rate]
 28 |     return two_secs
 29 | 
 30 | # calculates moving average for a specified window (number of samples)
 31 | def take_moving_average(sig, window_width):
 32 |     cumsum_vec = np.cumsum(np.insert(sig, 0, 0))
 33 |     ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width])/float(window_width)
 34 |     return ma_vec
 35 |         
 36 | # read in signal, change sample rate to outrate (samples/sec), use write_wav=True to save wav file to disk
 37 | def downsample(filename, outrate=8000, write_wav = False):
 38 |     print(filename)
 39 |     (rate, sig) = wav.read(filename)
 40 |     down_sig = librosa.core.resample(sig * 1., rate, outrate, scale=True)
 41 |     if not write_wav:
 42 |         return down_sig, outrate
 43 |     if write_wav:
 44 |         wav_write('{}_down_{}.wav'.format(filename, outrate), outrate, down_sig)
 45 | 
 46 | def librosa_downsample(filename, outrate=8000):
 47 |     y, s = librosa.load(filename, sr=8000)
 48 |     return y, s
 49 | 
 50 | # change total number of samps for downsampled file to n_samps by trimming or zero-padding and standardize them
 51 | def make_standard_length(filename):
 52 |     down_sig, rate = librosa_downsample(filename)
 53 |     normed_sig = librosa.util.fix_length(down_sig, 40000)
 54 |     # start = 1 * 8000
 55 |     # end = 4 * 8000
 56 |     # normed_sig = down_sig[start:end]
 57 |     normed_sig = (normed_sig - np.mean(normed_sig))/np.std(normed_sig)
 58 |     return normed_sig   
 59 | 
 60 | # from a folder containing wav files, normalize each, divide into num_splits-1 chunks and write the resulting np.arrays to a single matrix
 61 | def make_split_audio_array(folder, num_splits = 5):
 62 |     """
 63 |     returns numpy array of split audio for a folder
 64 |     """
 65 |     lst = []
 66 |     for filename in tqdm(os.listdir(folder))	:
 67 |         if filename.endswith('wav'):
 68 |             normed_sig = make_standard_length(filename)
 69 |             chunk = normed_sig.shape[0]/num_splits
 70 |             for i in range(num_splits - 1):
 71 |                 lst.append(normed_sig[i*chunk:(i+2)*chunk])
 72 |     lst = np.array(lst)
 73 |     lst = lst.reshape(lst.shape[0], -1)
 74 |     return lst
 75 | 
 76 | # for input wav file outputs (13, 2999) mfcc np array
 77 | def make_normed_mfcc(filename, outrate=8000):
 78 |     normed_sig = make_standard_length(filename)
 79 |     normed_mfcc_feat = mfcc(normed_sig, outrate)
 80 |     normed_mfcc_feat = normed_mfcc_feat.T
 81 |     return normed_mfcc_feat
 82 | 
 83 | # make mfcc np array from wav file using librosa package
 84 | def make_librosa_mfcc(filename):
 85 |      y, sr = librosa.load(filename)
 86 |      mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
 87 |      return mfcc_feat
 88 | 
 89 | # make mfcc np array from wav file using speech features package
 90 | def make_mfcc(filename):
 91 |     (rate, sig) = wav.read(filename)
 92 |     mfcc_feat = mfcc(sig, rate)
 93 |     mfcc_feat = mfcc_feat.T
 94 |     return mfcc_feat
 95 | 
 96 | # for folder containing wav files, output numpy array of normed mfcc
 97 | def make_class_array(folder):
 98 |     lst = []
 99 |     files = os.listdir(folder)
100 |     count_files = len(files)
101 |     for file_path in tqdm(files):
102 |         filename = os.path.join(folder, file_path)
103 |         lst.append(make_normed_mfcc(filename))
104 |     class_array = np.array(lst)
105 |     class_array = np.reshape(class_array, (class_array.shape[0], class_array.shape[2], class_array.shape[1]))
106 |     return class_array
107 | 
108 | # read in wav file, output (1,13) numpy array of mean mfccs for each of 13 features
109 | def make_mean_mfcc(filename):
110 |     try:
111 |         (rate, sig) = wav.read(filename)
112 |         mfcc_feat = mfcc(sig, rate)
113 |         avg_mfcc = np.mean(mfcc_feat, axis = 0)
114 |         return avg_mfcc
115 |     except:
116 |         pass
117 | 
118 | # write new csv corresponding to dataframe of given language and gender
119 | def make_df_language_gender(df, language, gender):
120 |     newdf = df.query("native_language == @language").query("sex == @gender")
121 |     newdf.to_csv('df_{}_{}.csv'.format(language, gender))
122 | 
123 | # write new directories to disk containing the male and female speakers from the most common languages
124 | def make_folders_from_csv():
125 |     top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese']
126 |     for lang in top_15_langs:
127 |         os.makedirs('{}/{}_male'.format(lang, lang))
128 |         os.makedirs('{}/{}_female'.format(lang, lang))
129 | 
130 | # copy files to the corresponding directories
131 | def copy_files_from_csv():
132 |     top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese']
133 |     for lang in top_15_langs:
134 |         df_male = pd.read_csv('df_{}_male.csv'.format(lang))
135 |         df_female = pd.read_csv('df_{}_female.csv'.format(lang))
136 |         m_list = df_male['filename'].values
137 |         f_list = df_female['filename'].values
138 |         for filename in f_list:
139 |             shutil.copy2('big_langs/{}/{}.wav'.format(lang, filename), 'big_langs/{}/{}_female/{}.wav'.format(lang, lang, filename))
140 | 
141 | # input folder of wav files, output pandas dataframe of mean mfcc values
142 | def make_mean_mfcc_df(folder):
143 |     norms = []
144 |     for file_path in os.listdir(folder):
145 |         filename = os.path.join(folder, file_path)
146 |         (rate, sig) = wav.read(filename)
147 |         mfcc_feat = mfcc(sig, rate)
148 |         mean_mfcc = np.mean(mfcc_feat, axis = 0)
149 |         #mean_mfcc = np.reshape(mean_mfcc, (1,13))
150 |         norms.append(mean_mfcc)
151 |     flat = [a.ravel() for a in norms]
152 |     stacked = np.vstack(flat)
153 |     df = pd.DataFrame(stacked)
154 |     return df
155 | 


--------------------------------------------------------------------------------
/data/numpy_vectors/conv1d.ipynb - Colaboratory3:1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/data/numpy_vectors/conv1d.ipynb - Colaboratory3:1.pdf


--------------------------------------------------------------------------------
/data/numpy_vectors/conv1d.ipynb - Colaboratoryacc99ep10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/data/numpy_vectors/conv1d.ipynb - Colaboratoryacc99ep10.pdf


--------------------------------------------------------------------------------
/data/numpy_vectors/conv1d.ipynb - Colaboratoryacc99ep10new.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/data/numpy_vectors/conv1d.ipynb - Colaboratoryacc99ep10new.pdf


--------------------------------------------------------------------------------
/data/numpy_vectors/conv1d.ipynb - Colaboratoryacc99ep12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/data/numpy_vectors/conv1d.ipynb - Colaboratoryacc99ep12.pdf


--------------------------------------------------------------------------------
/data/numpy_vectors/conv1d.ipynb acc1 splithalf - Colaboratory.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/data/numpy_vectors/conv1d.ipynb acc1 splithalf - Colaboratory.pdf


--------------------------------------------------------------------------------
/harvard_sentences.txt:
--------------------------------------------------------------------------------
  1 | "The birch canoe slid on the smooth planks.",
  2 | "Glue the sheet to the dark blue background",
  3 | "It's easy to tell the depth of a well.",
  4 | "These days a chicken leg is a rare dish.",
  5 | "Rice is often served in round bowls",
  6 | "The juice of lemons makes fine punch.",
  7 | "The box was thrown beside the parked truck.",
  8 | "The hogs were fed chopped corn and garbage.",
  9 | "Four hours of steady work faced us.",
 10 | "A large size in stockings is hard to sell.",
 11 | "The boy was there when the sun rose",
 12 | "A rod is used to catch pink salmon.",
 13 | "The source of the huge river is the clear spring.",
 14 | "Kick the ball straight and follow through",
 15 | "Help the woman get back to her feet.",
 16 | "A pot of tea helps to pass the evening.",
 17 | "Smoky fires lack flame and heat.",
 18 | "The soft cushion broke the man's fall.",
 19 | "The salt breeze came across from the sea.",
 20 | "The girl at the booth sold fifty bonds.",
 21 | "The small pup gnawed a hole in the sock.",
 22 | "The fish twisted and turned on the bent hook.",
 23 | "Press the pants and sew a button on the vest.",
 24 | "The swan dive was far short of perfect.",
 25 | "The beauty of the view stunned the young boy.",
 26 | "Two blue fish swam in the tank.",
 27 | "Her purse was full of useless trash.",
 28 | "The colt reared and threw the tall rider.",
 29 | "It snowed, rained, and hailed the same morning.",
 30 | "Read verse out loud for pleasure",
 31 | "Hoist the load to your left shoulder.",
 32 | "Take the winding path to reach the lake.",
 33 | "Note closely the size of the gas tank.",
 34 | "Wipe the grease off his dirty face.",
 35 | "Mend the coat before you go out.",
 36 | "The wrist was badly strained and hung limp.",
 37 | "The stray cat gave birth to kittens.",
 38 | "The young girl gave no clear response.",
 39 | "The meal was cooked before the bell rang",
 40 | "What joy there is in living.",
 41 | "A king ruled the state in the early days.",
 42 | "The ship was torn apart on the sharp reef.",
 43 | "Sickness kept him home the third week.",
 44 | "The wide road shimmered in the hot sun.",
 45 | "The lazy cow lay in the cool grass.",
 46 | "Lift the square stone over the fence.",
 47 | "The rope will bind the seven books at once.",
 48 | "Hop over the fence and plunge in.",
 49 | "The friendly gang left the drug store.",
 50 | "Mesh wire keeps chicks inside.",
 51 | "The frosty air passed through the coat.",
 52 | "The crooked maze failed to fool the mouse.",
 53 | "Adding fast leads to wrong sums.",
 54 | "The show was a flop from the very start.",
 55 | "A saw is a tool used for making boards",
 56 | "The wagon moved on well oiled wheels",
 57 | "March the soldiers past the next hill.",
 58 | "A cup of sugar makes sweet fudge.",
 59 | "Place a rosebush near the porch steps.",
 60 | "Both lost their lives in the raging storm.",
 61 | "We talked of the side show in the circus",
 62 | "Use a pencil to write the first draft",
 63 | "He ran half way to the hardware store.",
 64 | "The clock struck to mark the third period",
 65 | "A small creek cut across the field",
 66 | "Cars and busses stalled in snow drifts",
 67 | "The set of china hit the floor with a crash.",
 68 | "This is a grand season for hikes on the road",
 69 | "The dune rose from the edge of the water.",
 70 | "Those words were the cue for the actor to leave",
 71 | "A yacht slid around the point into the bay",
 72 | "The two met while playing on the sand",
 73 | "The ink stain dried on the finished page",
 74 | "The walled town was seized without a fight.",
 75 | "The lease ran out in sixteen weeks.",
 76 | "A tame squirrel makes a nice pet.",
 77 | "The horn of the car woke the sleeping cop.",
 78 | "The heart beat strongly and with firm strokes.",
 79 | "The pearl was worn in a thin silver ring.",
 80 | "The fruit peel was cut in thick slices",
 81 | "The Navy attacked the big task force",
 82 | "See the cat glaring at the scared mouse.",
 83 | "There are more than two factors here",
 84 | "The hat brim was wide and too droopy",
 85 | "The lawyer tried to lose his case",
 86 | "The grass curled around the fence post",
 87 | "Cut the pie into large parts",
 88 | "Men strive but seldom get rich.",
 89 | "Always close the barn door tight",
 90 | "He lay prone and hardly moved a limb",
 91 | "The slush lay deep along the street.",
 92 | "A wisp of cloud hung in the blue air.",
 93 | "A pound of sugar costs more than eggs",
 94 | "The fin was sharp and cut the clear water",
 95 | "The play seems dull and quite stupid",
 96 | "Bail the boat to stop it from sinking",
 97 | "The term ended in late june that year",
 98 | "A Tusk is used to make costly gifts",
 99 | "Ten pins were set in order",
100 | "The bill was paid every third week",
101 | "Oak is strong and also gives shade",
102 | "Cats and Dogs each hate the other",
103 | "The pipe began to rust while new",
104 | "Open the crate but don't break the glass",
105 | "Add the sum to the product of these three",
106 | "Thieves who rob friends deserve jail",
107 | "The ripe taste of cheese improves with age",
108 | "Act on these orders with great speed",
109 | "The hog crawled under the high fence",
110 | "Move the vat over the hot fire",
111 | "The bark of the pine tree was shiny and dark",
112 | "Leaves turn brown and yellow in the fall.",
113 | "The pennant waved when the wind blew",
114 | "Split the log with a quick, sharp blow",
115 | "Burn peat after the logs give out",
116 | "He ordered peach pie with ice cream",
117 | "Weave the carpet on the right hand side",
118 | "Hemp is a weed found in parts of the tropics",
119 | "A lame back kept his score low",
120 | "We find joy in the simplest things",
121 | "Type out three lists of orders",
122 | "The harder he tried the less he got done",
123 | "The boss ran the show with a watchful eye",
124 | "The cup cracked and spilled its contents",
125 | "Paste can cleanse the most dirty brass",
126 | "The slang word for raw whiskey is booze",
127 | "It caught its hind paw in a rusty trap",
128 | "The wharf could be seen at the farther shore",
129 | "Feel the heat of the weak dying flame",
130 | "The tiny girl took off her hat",
131 | "A cramp is no small danger on a swim",
132 | "He said the same phrase thirty times",
133 | "Pluck the bright rose without leaves",
134 | "Two plus seven is less than ten",
135 | "The glow deepened in the eyes of the sweet girl",
136 | "Bring your problems to the wise chief",
137 | "Write a fond note to the friend you cherish",
138 | "Clothes and lodging are free to new men",
139 | "We frown when events take a bad turn",
140 | "Port is a strong wine with a smoky taste",
141 | "The young kid jumped the rusty gate",
142 | "Guess the result from the first scores.",
143 | "A salt pickle tastes fine with ham",
144 | "The just claim got the right verdict",
145 | "Those thistles bend in a high wind",
146 | "Pure bred poodles have curls",
147 | "The tree top waved in a graceful way.",
148 | "The spot on the blotter was made by green ink.",
149 | "Mud was spattered on the front of his white shirt",
150 | "The cigar burned a hole in the desk top.",
151 | "The empty flask stood on the tin tray",
152 | "A speedy man can beat this track mark.",
153 | "He broke a new shoelace that day",
154 | "The coffee stand is too high for the couch.",
155 | "The urge to write short stories is rare.",
156 | "The pencils have all been used",
157 | "The pirates seized the crew of the lost ship",
158 | "We tried to replace the coin but failed.",
159 | "She sewed the torn coat quite neatly.",
160 | "The sofa cushion is red and of light weight",
161 | "The jacket hung on the back of the wide chair",
162 | "At that high level the air is pure",
163 | "Drop the two when you add the figures",
164 | "A filing case is now hard to buy",
165 | "An abrupt start does not win the prize.",
166 | "Wood is best for making toys and blocks",
167 | "The office paint was a dull, sad tan",
168 | "He knew the skill of the great young actress",
169 | "A rag will soak up spilled water",
170 | "A shower of dirt fell from the hot pipes",
171 | "Steam hissed from the broken valve.",
172 | "The child almost hurt the small dog",
173 | "There was a sound of dry leaves outside",
174 | "The sky that morning was clear and bright blue",
175 | "Torn scraps littered the stone floor",
176 | "Sunday is the best part of the week.",
177 | "The doctor cured him with these pills",
178 | "The new girl was fired today at noon",
179 | "They felt gay when the ship arrived in port",
180 | "Add the store's account to the last cent",
181 | "Acid burns holes in wool cloth",
182 | "Fairy tales should be fun to write",
183 | "Eight miles of woodland burned to waste",
184 | "The third act was dull and tired the players",
185 | "A young child should not suffer fright",
186 | "Add the column and put the sum here",
187 | "We admire and love a good cook",
188 | "There the flood mark is ten inches",
189 | "He carved a head from the round block of marble",
190 | "She has a smart way of wearing clothes",
191 | "The fruit of a fig tree is apple shaped",
192 | "Corn cobs can be used to kindle a fire.",
193 | "Where were they when the noise started",
194 | "The paper box is full of thumb tacks",
195 | "Sell your gift to a buyer at a good gain",
196 | "The tongs lay beside the ice pail",
197 | "The petals fall with the next puff of wind",
198 | "Bring your best compass to the third class",
199 | "They could laugh although they were sad",
200 | "Farmers came in to thresh the oat crop",
201 | "The brown house was on fire to the attic",
202 | "The lure is used to catch trout and flounder",
203 | "Float the soap on top of the bath water",
204 | "A blue crane is a tall wading bird",
205 | "A fresh start will work such wonders",
206 | "The club rented the rink for the fifth night",
207 | "After the dance, they went straight home",
208 | "The hostess taught the new maid to serve",
209 | "He wrote his last novel there at the inn",
210 | "Even the worst will beat his low score",
211 | "The cement had dried when he moved it",
212 | "The loss of the second ship was hard to take",
213 | "The fly made its way along the wall",
214 | "Do that with a wooden stick",
215 | "Live wires should be kept covered",
216 | "The large house had hot water taps",
217 | "It is hard to erase blue or red ink",
218 | "Write at once or you may forget it",
219 | "The doorknob was made of bright clean brass",
220 | "The wreck occurred by the bank on Main Street",
221 | "A pencil with black lead writes best",
222 | "Coax a young calf to drink from a bucket",
223 | "Schools for ladies teach charm and grace",
224 | "The lamp shone with a steady green flame",
225 | "They took the axe and the saw to the forest",
226 | "The ancient coin was quite dull and worn",
227 | "The shaky barn fell with a loud crash.",
228 | "Jazz and swing fans like fast music",
229 | "Rake the rubbish up and then burn it",
230 | "Slash the gold cloth into fine ribbons",
231 | "Try to have the court decide the case",
232 | "They are pushed back each time they attack",
233 | "He broke his ties with groups of former friends",
234 | "They floated on the raft to sun their white backs",
235 | "The map had an X that meant nothing",
236 | "Whitings are small fish caught in nets",
237 | "Some ads serve to cheat buyers",
238 | "Jerk the rope and the bell rings weakly",
239 | "A waxed floor makes us lose balance",
240 | "Madam, this is the best brand of corn",
241 | "On the islands the sea breeze is soft and mild",
242 | "The play began as soon as we sat down",
243 | "This will lead the world to more sound and fury",
244 | "Add salt before you fry the egg",
245 | "The rush for funds reached its peak Tuesday",
246 | "The birch looked stark white and lonesome",
247 | "The box is held by a bright red snapper",
248 | "To make pure ice, you freeze water",
249 | "The first worm gets snapped early",
250 | "Jump the fence and hurry up the bank",
251 | "Yell and clap as the curtain slides back",
252 | "They are men who walk the middle of the road",
253 | "Both brothers wear the same size",
254 | "In some form or other we need fun",
255 | "The prince ordered his head chopped off",
256 | "The houses are built of red clay bricks",
257 | "Ducks fly north but lack a compass",
258 | "Fruit flavors are used in fizz drinks",
259 | "These pills do less good than others",
260 | "Canned pears lack full flavor",
261 | "The dark pot hung in the front closet",
262 | "Carry the pail to the wall and spill it there",
263 | "The train brought our hero to the big town",
264 | "We are sure that one war is enough",
265 | "Gray paint stretched for miles around",
266 | "The rude laugh filled the empty room",
267 | "High seats are best for football fans",
268 | "Tea served from the brown jug is tasty",
269 | "A dash of pepper spoils beef stew",
270 | "A zestful food is the hot-cross bun",
271 | "The horse trotted around the field at a brisk pace",
272 | "Find the twin who stole the pearl necklace",
273 | "Cut the cord that binds the box tightly",
274 | "The red tape bound the smuggled food",
275 | "Look in the corner to find the tan shirt",
276 | "The cold drizzle will halt the bond drive",
277 | "Nine men were hired to dig the ruins",
278 | "The junk yard had a mouldy smell",
279 | "The flint sputtered and lit a pine torch",
280 | "Soak the cloth and drown the sharp odor",
281 | "The shelves were bare of both jam or crackers",
282 | "A joy to every child is the swan boat",
283 | "All sat frozen and watched the screen",
284 | "A cloud of dust stung his tender eyes",
285 | "To reach the end he needs much courage",
286 | "Shape the clay gently into block form",
287 | "A ridge on a smooth surface is a bump or flaw",
288 | "Hedge apples may stain your hands green",
289 | "Quench your thirst, then eat the crackers",
290 | "Tight curls get limp on rainy days",
291 | "The mute muffled the high tones of the horn",
292 | "The gold ring fits only a pierced ear",
293 | "The old pan was covered with hard fudge",
294 | "Watch the log float in the wide river",
295 | "The node on the stalk of wheat grew daily",
296 | "The heap of fallen leaves was set on fire",
297 | "Write fast if you want to finish early",
298 | "His shirt was clean but one button was gone",
299 | "The barrel of beer was a brew of malt and hops",
300 | "Tin cans are absent from store shelves",
301 | "Slide the box into that empty space",
302 | "The plant grew large and green in the window",
303 | "The beam dropped down on the workman's head",
304 | "Pink clouds floated with the breeze",
305 | "She danced like a swan, tall and graceful",
306 | "The tube was blown and the tire flat and useless",
307 | "It is late morning on the old wall clock",
308 | "Let's all join as we sing the last chorus",
309 | "The last switch cannot be turned off",
310 | "The fight will end in just six minutes",
311 | "The store walls were lined with colored frocks",
312 | "The peace league met to discuss their plans.",
313 | "The rise to fame of a person takes luck",
314 | "Paper is scarce, so write with much care",
315 | "The quick fox jumped on the sleeping cat",
316 | "The nozzle of the fire hose was bright brass",
317 | "Screw the round cap on as tight as needed",
318 | "Time brings us many changes",
319 | "The purple tie was ten years old",
320 | "Men think and plan and sometimes act",
321 | "Fill the ink jar with sticky glue",
322 | "He smoke a big pipe with strong contents",
323 | "We need grain to keep our mules healthy",
324 | "Pack the records in a neat thin case",
325 | "The crunch of feet in the snow was the only sound",
326 | "The copper bowl shone in the sun's rays",
327 | "Boards will warp unless kept dry.",
328 | "The plush chair leaned against the wall.",
329 | "Glass will clink when struck by metal",
330 | "Bathe and relax in the cool green grass",
331 | "Nine rows of soldiers stood in a line",
332 | "The beach is dry and shallow at low tide",
333 | "The idea is to sew both edges straight",
334 | "The kitten chased the dog down the street",
335 | "Pages bound in cloth make a book",
336 | "Try to trace the fine lines of the painting",
337 | "Women form less than half of the group.",
338 | "The zones merge in the central part of town",
339 | "A gem in the rough needs work to polish",
340 | "Code is used when secrets are sent",
341 | "Most of the news is easy for us to hear",
342 | "He used the lathe to make brass objects",
343 | "The vane on top of the pole revolved in the wind",
344 | "Mince pie is a dish served to children",
345 | "The clan gathered on each dull night",
346 | "Let it burn, it gives us warmth and comfort",
347 | "A castle built from sand fails to endure",
348 | "A child's wit saved the day for us",
349 | "Tack the strip of carpet to the worn floor",
350 | "Next Tuesday we must vote",
351 | "Pour the stew from the pot into the plate",
352 | "Each penny shone like new",
353 | "The man went to the woods to gather sticks",
354 | "The dirt piles were lines along the road",
355 | "The logs fell and tumbled into the clear stream",
356 | "Just hoist it up and take it away",
357 | "A ripe plum is fit for a king's palate",
358 | "Our plans right now are hazy.",
359 | "Brass rings are sold by these natives",
360 | "It takes a good trap to capture a bear",
361 | "Feed the white mouse some flower seeds",
362 | "The thaw came early and freed the stream",
363 | "He took the lead and kept it the whole distance",
364 | "The key you designed will fit the lock",
365 | "Plead to the council to free the poor thief",
366 | "Better hash is made of rare beef",
367 | "This plank was made for walking on ",
368 | "The lake sparkled in the red hot sun",
369 | "He crawled with care along the ledge",
370 | "Tend the sheep while the dog wanders",
371 | "It takes a lot of help to finish these",
372 | "Mark the spot with a sign painted red",
373 | "Take two shares as a fair profit.",
374 | "The fur of cats goes by many names",
375 | "North winds bring colds and fevers",
376 | "He asks no person to vouch for him",
377 | "Go now and come here later",
378 | "A sash of gold silk will trim her dress",
379 | "Soap can wash most dirt away",
380 | "That move means the game is over",
381 | "He wrote down a long list of items",
382 | "A siege will crack the strong defense",
383 | "Grape juice and water mix well",
384 | "Roads are paved with sticky tar",
385 | "Fake stones shine but cost little",
386 | "The drip of the rain made a pleasant sound.",
387 | "Smoke poured out of every crack.",
388 | "Serve the hot rum to the tired heroes",
389 | "Much of the story makes good sense.",
390 | "The sun came up to light the eastern sky",
391 | "Heave the line over the port side",
392 | "A lathe cuts and trims any wood",
393 | "It's a dense crowd in two distinct ways",
394 | "His hip struck the knee of the next player",
395 | "The stale smell of old beer lingers",
396 | "The desk was firm on the shaky floor",
397 | "It takes heat to bring out the odor",
398 | "Beef is scarcer than some lamb",
399 | "Raise the sail and steer the ship northward",
400 | "A cone costs five cents on Mondays",
401 | "A pod is what peas always grow in",
402 | "Jerk that dart from the cork target",
403 | "No cement will hold hard wood",
404 | "We now have a new base for shipping",
405 | "A list of names is carved around the base",
406 | "The sheep were led home by a dog",
407 | "Three for a dime, the young peddler cried",
408 | "The sense of smell is better than that of touch",
409 | "No hardship seemed to make him sad",
410 | "Grace makes up for lack of beauty",
411 | "Nudge gently but wake her now",
412 | "The news struck doubt into restless minds",
413 | "Once we stood beside the shore",
414 | "A chink in the wall allowed a draft to blow",
415 | "Fasten two pins on each side",
416 | "A cold dip restores health and zest",
417 | "He takes the oath of office each March",
418 | "The sand drifts over the sills of the old house",
419 | "The point of the steel pen was bent and twisted",
420 | "There is a lag between thought and act",
421 | "Seed is needed to plant the spring corn",
422 | "Draw the chart with heavy black lines",
423 | "The boy owed his pal thirty cents",
424 | "The chap slipped into the crowd and was lost",
425 | "Hats are worn to tea and not to dinner",
426 | "The ramp led up to the wide highway",
427 | "Beat the dust from the rug onto the lawn",
428 | "Say it slowly but make it ring clear",
429 | "The straw nest housed five robins",
430 | "Screen the porch with woven straw mats",
431 | "This horse will nose his way to the finish",
432 | "The dry wax protects the deep scratch",
433 | "He picked up the dice for a second roll",
434 | "These coins will be needed to pay his debt",
435 | "The nag pulled the frail cart along",
436 | "Twist the valve and release hot steam",
437 | "The vamp of the shoe had a gold buckle",
438 | "The smell of burned rags itches my nose",
439 | "New pants lack cuffs and pockets",
440 | "The marsh will freeze when cold enough",
441 | "They slice the sausage thin with a knife",
442 | "The bloom of the rose lasts a few days",
443 | "A gray mare walked before the colt",
444 | "Breakfast buns are fine with a hot drink",
445 | "Bottles hold four kinds of rum",
446 | "The man wore a feather in his felt hat",
447 | "He wheeled the bike past the winding road",
448 | "Drop the ashes on the worn old rug",
449 | "The desk and both chairs were painted tan",
450 | "Throw out the used paper cup and plate",
451 | "A clean neck means a neat collar",
452 | "The couch cover and hall drapes were blue",
453 | "The stems of the tall glasses cracked and broke",
454 | "The wall phone rang loud and often",
455 | "The clothes dried on a thin wooden rack",
456 | "Turn out the lantern which gives us light",
457 | "The cleat sank deeply into the soft turf",
458 | "The bills were mailed promptly on the tenth of the month",
459 | "To have is better than to wait and hope",
460 | "The price is fair for a good antique clock",
461 | "The music played on while they talked",
462 | "Dispense with a vest on a day like this",
463 | "The bunch of grapes was pressed into wine",
464 | "He sent the figs, but kept the ripe cherries",
465 | "The hinge on the door creaked with old age",
466 | "The screen before the fire kept in the sparks",
467 | "Fly by night and you waste little time",
468 | "Thick glasses helped him read the print",
469 | "Birth and death marks the limits of life",
470 | "The chair looked strong but had no bottom",
471 | "The kite flew wildly in the high wind",
472 | "A fur muff is stylish once more",
473 | "The tin box held priceless stones",
474 | "We need an end of all such matter",
475 | "The case was puzzling to the old and wise",
476 | "The bright lanterns were gay on the dark lawn",
477 | "We don't get much money but we have fun",
478 | "The youth drove with zest, but little skill",
479 | "Five years he lived with a shaggy dog",
480 | "A fence cuts through the corner lot",
481 | "The way to save money is not to spend much",
482 | "Shut the hatch before the waves push it in",
483 | "The odor of spring makes young hearts jump",
484 | "Crack the walnut with your sharp side teeth",
485 | "He offered proof in the form of a large chart",
486 | "Send the stuff in a thick paper bag",
487 | "A quart of milk is water for the most part",
488 | "They told wild tales to frighten him",
489 | "The three story house was built of stone",
490 | "In the rear of the ground floor was a large passage",
491 | "A man in a blue sweater sat at the desk",
492 | "Oats are a food eaten by horse and man",
493 | "Their eyelids droop for want of sleep",
494 | "A sip of tea revives his tired friend",
495 | "There are many ways to do these things",
496 | "Tuck the sheet under the edge of the mat",
497 | "A force equal to that would move the earth",
498 | "We like to see clear weather",
499 | "The work of the tailor is seen on each side",
500 | "Take a chance and win a china doll",
501 | "Shake the dust from your shoes, stranger",
502 | "She was kind to sick old people",
503 | "The square wooden crate was packed to be shipped",
504 | "The dusty bench stood by the stone wall",
505 | "We dress to suit the weather of most days",
506 | "Smile when you say nasty words",
507 | "A bowl of rice is free with chicken stew",
508 | "The water in this well is a source of good health",
509 | "Take shelter in this tent, but keep still",
510 | "That guy is the writer of a few banned books",
511 | "The little tales they tell are false",
512 | "The door was barred, locked, and bolted as well",
513 | "Ripe pears are fit for a queen's table",
514 | "A big wet stain was on the round carpet",
515 | "The kite dipped and swayed, but stayed aloft",
516 | "The pleasant hours fly by much too soon",
517 | "The room was crowded with a wild mob",
518 | "This strong arm shall shield your honor",
519 | "She blushed when he gave her a white orchid",
520 | "The beetle droned in the hot June sun",
521 | "Press the pedal with your left foot",
522 | "Neat plans fail without luck",
523 | "The black trunk fell from the landing",
524 | "The bank pressed for payment of the debt",
525 | "The theft of the pearl pin was kept secret",
526 | "Shake hands with this friendly child",
527 | "The vast space stretched into the far distance",
528 | "A rich farm is rare in this sandy waste",
529 | "His wide grin earned many friends",
530 | "Flax makes a fine brand of paper",
531 | "Hurdle the pit with the aid of a long pole",
532 | "A strong bid may scare your partner stiff",
533 | "Even a just cause needs power to win",
534 | "Peep under the tent and see the clowns",
535 | "The leaf drifts along with a slow spin",
536 | "Cheap clothes are flashy but don't last",
537 | "A thing of small note can cause despair",
538 | "Flood the mails with requests for this book",
539 | "A thick coat of black paint covered all",
540 | "The pencil was cut to be sharp at both ends",
541 | "Those last words were a strong statement",
542 | "He wrote his name boldly at the top of the sheet",
543 | "Dill pickles are sour but taste fine",
544 | "Down that road is the way to the grain farmer",
545 | "Either mud or dust are found at all times",
546 | "The best method is to fix it in place with clips",
547 | "If you mumble your speech will be lost",
548 | "At night the alarm roused him from a deep sleep",
549 | "Read just what the meter says",
550 | "Fill your pack with bright trinkets for the poor",
551 | "The small red neon lamp went out",
552 | "Clams are small, round, soft, and tasty",
553 | "The fan whirled its round blades softly",
554 | "The line where the edges join was clean",
555 | "Breathe deep and smell the piny air",
556 | "It matters not if he reads these words or those",
557 | "A brown leather bag hung from its strap",
558 | "A toad and a frog are hard to tell apart",
559 | "A white silk jacket goes with any shoes",
560 | "A break in the dam almost caused a flood",
561 | "Paint the sockets in the wall dull green",
562 | "The child crawled into the dense grass",
563 | "Bribes fail where honest men work",
564 | "Trample the spark, else the flames will spread",
565 | "The hilt of the sword was carved with fine designs",
566 | "A round hole was drilled through the thin board",
567 | "Footprints showed the path he took up the beach",
568 | "She was waiting at my front lawn",
569 | "A vent near the edge brought in fresh air",
570 | "Prod the old mule with a crooked stick",
571 | "It is a band of steel three inches wide",
572 | "The pipe ran almost the length of the ditch",
573 | "It was hidden from sight by a mass of leaves and shrubs",
574 | "The weight of the package was seen on the high scale",
575 | "Wake and rise, and step into the green outdoors",
576 | "The green light in the brown box flickered",
577 | "The brass tube circled the high wall",
578 | "The lobes of her ears were pierced to hold rings",
579 | "Hold the hammer near the end to drive the nail",
580 | "Next Sunday is the twelfth of the month",
581 | "Every word and phrase he speaks is true",
582 | "He put his last cartridge into the gun and fired",
583 | "They took their kids from the public school",
584 | "Drive the screw straight into the wood",
585 | "Keep the hatch tight and the watch constant",
586 | "Sever the twine with a quick snip of the knife",
587 | "Paper will dry out when wet",
588 | "Slide the catch back and open the desk",
589 | "Help the weak to preserve their strength",
590 | "A sullen smile gets few friends",
591 | "Stop whistling and watch the boys march",
592 | "Jerk the cord, and out tumbles the gold",
593 | "Slide the tray across the glass top",
594 | "The cloud moved in a stately way and was gone",
595 | "Light maple makes for a swell room",
596 | "Set the piece here and say nothing",
597 | "Dull stories make her laugh",
598 | "A stiff cord will do to fasten your shoe",
599 | "Get the trust fund to the bank early",
600 | "Choose between the high road and the low",
601 | "A plea for funds seems to come again",
602 | "He lent his coat to the tall gaunt stranger",
603 | "There is a strong chance it will happen once more",
604 | "The duke left the park in a silver coach",
605 | "Greet the new guests and leave quickly",
606 | "When the frost has come it is time for turkey",
607 | "Sweet words work better than fierce",
608 | "A thin stripe runs down the middle",
609 | "A six comes up more often than a ten",
610 | "Lush ferns grow on the lofty rocks",
611 | "The ram scared the school children off",
612 | "The team with the best timing looks good",
613 | "The farmer swapped his horse for a brown ox",
614 | "Sit on the perch and tell the others what to do",
615 | "A steep trail is painful for our feet",
616 | "The early phase of life moves fast",
617 | "Green moss grows on the northern side",
618 | "Tea in thin china has a sweet taste",
619 | "Pitch the straw through the door of the stable",
620 | "The latch on the back gate needed a nail",
621 | "The goose was brought straight from the old market",
622 | "The sink is the thing in which we pile dishes",
623 | "A whiff of it will cure the most stubborn cold",
624 | "The facts don't always show who is right",
625 | "She flaps her cape as she parades the street",
626 | "The loss of the cruiser was a blow to the fleet",
627 | "Loop the braid to the left and then over",
628 | "Plead with the lawyer to drop the lost cause",
629 | "Calves thrive on tender spring grass",
630 | "Post no bills on this office wall",
631 | "Tear a thin sheet from the yellow pad",
632 | "A cruise in warm waters in a sleek yacht is fun",
633 | "A streak of color ran down the left edge",
634 | "It was done before the boy could see it",
635 | "Crouch before you jump or miss the mark",
636 | "Pack the kits and don't forget the salt",
637 | "The square peg will settle in the round hole",
638 | "Fine soap saves tender skin",
639 | "Poached eggs and tea must suffice",
640 | "Bad nerves are jangled by a door slam",
641 | "Ship maps are different from those for planes",
642 | "Dimes showered down from all sides",
643 | "They sang the same tunes at each party",
644 | "The sky in the west is tinged with orange red",
645 | "The pods of peas ferment in bare fields",
646 | "The horse balked and threw the tall rider",
647 | "The hitch between the horse and cart broke",
648 | "Pile the coal high in the shed corner",
649 | "A gold vase is both rare and costly",
650 | "The knife was hung inside its bright sheath",
651 | "The rarest spice comes from the far East",
652 | "The roof should be tilted at a sharp slant",
653 | "A smatter of French is worse than none",
654 | "The mule trod the treadmill day and night",
655 | "The aim of the contest is to raise a great fund",
656 | "To send it now in large amounts is bad",
657 | "There is a fine hard tang in salty air",
658 | "Cod is the main business of the north shore",
659 | "The slab was hewn from heavy blocks of slate",
660 | "Dunk the stale biscuits into strong drink",
661 | "Hang tinsel from both branches",
662 | "Cap the jar with a tight brass cover",
663 | "The poor boy missed the boat again",
664 | "Be sure to set that lamp firmly in the hole",
665 | "Pick a card and slip it under the pack",
666 | "A round mat will cover the dull spot",
667 | "The first part of the plan needs changing",
668 | "A good book informs of what we ought to know",
669 | "The mail comes in three batches per day",
670 | "You cannot brew tea in a cold pot",
671 | "Dots of light betrayed the black cat",
672 | "Put the chart on the mantel and tack it down",
673 | "The night shift men rate extra pay",
674 | "The red paper brightened the dim stage",
675 | "See the player scoot to third base",
676 | "Slide the bill between the two leaves",
677 | "Many hands help get the job done",
678 | "We don't like to admit our small faults",
679 | "No doubt about the way the wind blows",
680 | "Dig deep in the earth for pirate's gold",
681 | "The steady drip is worse than a drenching rain",
682 | "A flat pack takes less luggage space",
683 | "Green ice frosted the punch bowl",
684 | "A stuffed chair slipped from the moving van",
685 | "The stitch will serve but needs to be shortened",
686 | "A thin book fits in the side pocket",
687 | "The gloss on top made it unfit to read",
688 | "The hail pattered on the burnt brown grass",
689 | "Seven seals were stamped on great sheets",
690 | "Our troops are set to strike heavy blows",
691 | "The store was jammed before the sale could start",
692 | "It was a bad error on the part of the new judge",
693 | "One step more and the board will collapse",
694 | "Take the match and strike it against your shoe",
695 | "The pot boiled but the contents failed to jell",
696 | "The baby puts his right foot in his mouth",
697 | "The bombs left most of the town in ruins",
698 | "Stop and stare at the hard working man",
699 | "The streets are narrow and full of sharp turns",
700 | "The pup jerked the leash as he saw a feline shape",
701 | "Open your book to the first page",
702 | "Fish evade the net and swim off",
703 | "Dip the pail once and let it settle",
704 | "Will you please answer that phone",
705 | "The big red apple fell to the ground",
706 | "The curtain rose and the show was on",
707 | "The young prince became heir to the throne",
708 | "He sent the boy on a short errand",
709 | "Leave now and you will arrive on time",
710 | "The corner store was robbed last night",
711 | "A gold ring will please most any girl",
712 | "The long journey home took a year",
713 | "She saw a cat in the neighbor's house",
714 | "A pink shell was found on the sandy beach",
715 | "Small children came to see him",
716 | "The grass and bushes were wet with dew",
717 | "The blind man counted his old coins",
718 | "A severe storm tore down the barn",
719 | "She called his name many times",
720 | "When you hear the bell, come quickly",
721 | 


--------------------------------------------------------------------------------
/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AccentDB/code/1b1b0a6fba57e94a3e4549e31340e5a39851e2eb/helpers/__init__.py


--------------------------------------------------------------------------------
/helpers/alt_split.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from scipy.io import wavfile
  4 | import os
  5 | import numpy as np
  6 | import argparse
  7 | from tqdm import tqdm
  8 | 
  9 | # Utility functions
 10 | xrange = range
 11 | def windows(signal, window_size, step_size):
 12 |     if type(window_size) is not int:
 13 |         raise AttributeError("Window size must be an integer.")
 14 |     if type(step_size) is not int:
 15 |         raise AttributeError("Step size must be an integer.")
 16 |     for i_start in xrange(0, len(signal), step_size):
 17 |         i_end = i_start + window_size
 18 |         if i_end >= len(signal):
 19 |             break
 20 |         yield signal[i_start:i_end]
 21 | 
 22 | def energy(samples):
 23 |     return np.sum(np.power(samples, 2.)) / float(len(samples))
 24 | 
 25 | def rising_edges(binary_signal):
 26 |     previous_value = 0
 27 |     index = 0
 28 |     for x in binary_signal:
 29 |         if x and not previous_value:
 30 |             yield index
 31 |         previous_value = x
 32 |         index += 1
 33 | 
 34 | # Process command line arguments
 35 | 
 36 | parser = argparse.ArgumentParser(description='Split a WAV file at silence.')
 37 | parser.add_argument('input_file', type=str, help='The WAV file to split.')
 38 | parser.add_argument('--output-dir', '-o', type=str, default='.', help='The output folder. Defaults to the current folder.')
 39 | parser.add_argument('--min-silence-length', '-m', type=float, default=3., help='The minimum length of silence at which a split may occur [seconds]. Defaults to 3 seconds.')
 40 | parser.add_argument('--silence-threshold', '-t', type=float, default=1e-6, help='The energy level (between 0.0 and 1.0) below which the signal is regarded as silent. Defaults to 1e-6 == 0.0001%.')
 41 | parser.add_argument('--step-duration', '-s', type=float, default=None, help='The amount of time to step forward in the input file after calculating energy. Smaller value = slower, but more accurate silence detection. Larger value = faster, but might miss some split opportunities. Defaults to (min-silence-length / 10.).')
 42 | parser.add_argument('--dry-run', '-n', action='store_true', help='Don\'t actually write any output files.')
 43 | 
 44 | args = parser.parse_args()
 45 | 
 46 | input_filename = args.input_file
 47 | window_duration = args.min_silence_length
 48 | if args.step_duration is None:
 49 |     step_duration = window_duration / 10.
 50 | else:
 51 |     step_duration = args.step_duration
 52 | silence_threshold = args.silence_threshold
 53 | output_dir = args.output_dir
 54 | output_filename_prefix = os.path.splitext(os.path.basename(input_filename))[0]
 55 | dry_run = args.dry_run
 56 | 
 57 | # print("Splitting f{} where energy is below f{}% for longer than {}s.".format(
 58 | #     input_filename,
 59 | #     silence_threshold * 100.,
 60 | #     window_duration
 61 | # ))
 62 | 
 63 | # Read and split the file
 64 | 
 65 | sample_rate, samples = input_data=wavfile.read(filename=input_filename, mmap=True)
 66 | 
 67 | max_amplitude = np.iinfo(samples.dtype).max
 68 | max_energy = energy([max_amplitude])
 69 | 
 70 | window_size = int(window_duration * sample_rate)
 71 | step_size = int(step_duration * sample_rate)
 72 | 
 73 | signal_windows = windows(
 74 |     signal=samples,
 75 |     window_size=window_size,
 76 |     step_size=step_size
 77 | )
 78 | 
 79 | window_energy = (energy(w) / max_energy for w in tqdm(
 80 |     signal_windows,
 81 |     total=int(len(samples) / float(step_size))
 82 | ))
 83 | 
 84 | window_silence = (e > silence_threshold for e in window_energy)
 85 | 
 86 | cut_times = (r * step_duration for r in rising_edges(window_silence))
 87 | 
 88 | # This is the step that takes long, since we force the generators to run.
 89 | print("Finding silences...")
 90 | cut_samples = [int(t * sample_rate) for t in cut_times]
 91 | cut_samples.append(-1)
 92 | 
 93 | cut_ranges = [(i, cut_samples[i], cut_samples[i+1]) for i in xrange(len(cut_samples) - 1)]
 94 | 
 95 | for i, start, stop in tqdm(cut_ranges):
 96 |     output_file_path = "{}_{:03d}.wav".format(
 97 |         os.path.join(output_dir, output_filename_prefix),
 98 |         i
 99 |     )
100 |     if not dry_run:
101 |         print("Writing file ", output_file_path)
102 |         wavfile.write(
103 |             filename=output_file_path,
104 |             rate=sample_rate,
105 |             data=samples[start:stop]
106 |         )
107 | 


--------------------------------------------------------------------------------
/helpers/convert_raw_to_processed.py:
--------------------------------------------------------------------------------
 1 | # from file_manager import FileManager
 2 | 
 3 | # root = '/home/enigmaeth/Videos/accentPhase2/raw'
 4 | # accepted_formats = ['wav', 'mp3']
 5 | # FM = FileManager(root, accepted_formats)
 6 | 
 7 | # all_files = FM.get_all_files()
 8 | 
 9 | # for file in all_files:
10 | #     language = file.split(' ')[0].split('/')[-1]
11 | 
12 | # Import the AudioSegment class for processing audio and the 
13 | # split_on_silence function for separating out silent chunks.
14 | from pydub import AudioSegment
15 | from pydub.silence import split_on_silence
16 | 
17 | # Define a function to normalize a chunk to a target amplitude.
18 | def match_target_amplitude(aChunk, target_dBFS):
19 |     ''' Normalize given audio chunk '''
20 |     change_in_dBFS = target_dBFS - aChunk.dBFS
21 |     return aChunk.apply_gain(change_in_dBFS)
22 | 
23 | # Load your audio.
24 | song = AudioSegment.from_mp3("/home/enigmaeth/Videos/accentPhase2/raw/sb.wav")
25 | 
26 | # Split track where the silence is 2 seconds or more and get chunks using 
27 | # the imported function.
28 | chunks = split_on_silence (
29 |     # Use the loaded audio.
30 |     song, 
31 |     # Specify that a silent chunk must be at least 2 seconds or 2000 ms long.
32 |     min_silence_len = 1,
33 |     # Consider a chunk silent if it's quieter than -16 dBFS.
34 |     # (You may want to adjust this parameter.)
35 |     silence_thresh = 1000
36 | )
37 | print("split ", len(chunks))
38 | 
39 | # Process each chunk with your parameters
40 | for i, chunk in enumerate(chunks):
41 |     # Create a silence chunk that's 0.5 seconds (or 500 ms) long for padding.
42 |     # silence_chunk = AudioSegment.silent(duration=500)
43 | 
44 |     # Add the padding chunk to beginning and end of the entire chunk.
45 |     # audio_chunk = silence_chunk + chunk + silence_chunk
46 | 
47 |     # Normalize the entire chunk.
48 |     # normalized_chunk = match_target_amplitude(audio_chunk, -20.0)
49 | 
50 |     # Export the audio chunk with new bitrate.
51 |     print("Exporting chunk{0}.mp3.".format(i))
52 |     normalized_chunk.export(
53 |         ".//chunk{0}.mp3".format(i),
54 |         bitrate = "192k",
55 |         format = "mp3"
56 |     )


--------------------------------------------------------------------------------
/helpers/file_manager.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | class FileManager:
 4 | 	"""
 5 | 	This class provides functions related to file management required for the indexer
 6 | 	"""
 7 | 
 8 | 	def __init__(self, root, accepted_formats):
 9 | 		"""
10 | 		initialize variables: root path and accepted formats for the indexer
11 | 		"""
12 | 		self.root = root
13 | 		self.accepted_formats = accepted_formats
14 | 
15 | 
16 | 	def get_all_files(self):
17 | 		"""
18 | 		List all files recursively in the root specified by root
19 | 		"""
20 | 		files_list = []
21 | 		for path, subdirs, files in os.walk(self.root):
22 | 		    for name in files:
23 | 		    	files_list.append(os.path.join(self.root, name))
24 | 		return files_list
25 | 
26 | 
27 | 	def get_files_to_be_processed(self):
28 | 		"""
29 | 		returns list of files to be included in the index
30 | 		set `root` variable to the desired root
31 | 		:return: list of files to be processed
32 | 		"""
33 | 		files = self.get_all_files()
34 | 		files_list = []
35 | 		for name in files:
36 | 			if(name.split('.')[-1] in self.accepted_formats and os.stat(os.path.join(self.root, name)).st_size < 5000000):
37 | 				files_list.append(os.path.join(self.root, name))
38 | 		return files_list[0:-1]


--------------------------------------------------------------------------------
/helpers/run.py:
--------------------------------------------------------------------------------
 1 | # Import the AudioSegment class for processing audio and the 
 2 | # split_on_silence function for separating out silent chunks.
 3 | from pydub import AudioSegment
 4 | from pydub.silence import split_on_silence
 5 | import os
 6 | 
 7 | FORMAT = 'mp3'
 8 | 
 9 | # Define a function to normalize a chunk to a target amplitude.
10 | def match_target_amplitude(aChunk, target_dBFS):
11 |     ''' Normalize given audio chunk '''
12 |     change_in_dBFS = target_dBFS - aChunk.dBFS
13 |     return aChunk.apply_gain(change_in_dBFS)
14 | 
15 | # Load your audio.
16 | song = AudioSegment.from_file(os.path.join("Bangla Jaya REC20190704135235.mp3"), format=FORMAT)
17 | 
18 | print(song)
19 | 
20 | # Split track where the silence is 2 seconds or more and get chunks using 
21 | # the imported function.
22 | chunks = split_on_silence (
23 |     # Use the loaded audio.
24 |     song, 
25 |     # Specify that a silent chunk must be at least 2 seconds or 2000 ms long.
26 |     min_silence_len = 1000,
27 |     # Consider a chunk silent if it's quieter than -16 dBFS.
28 |     # (You may want to adjust this parameter.)
29 |     silence_thresh = -16
30 | )
31 | 
32 | print(chunks)
33 | 
34 | # Process each chunk with your parameters
35 | for i, chunk in enumerate(chunks):
36 |     # Create a silence chunk that's 0.5 seconds (or 500 ms) long for padding.
37 |     silence_chunk = AudioSegment.silent(duration=500)
38 | 
39 |     # Add the padding chunk to beginning and end of the entire chunk.
40 |     audio_chunk = silence_chunk + chunk + silence_chunk
41 | 
42 |     # Normalize the entire chunk.
43 |     normalized_chunk = match_target_amplitude(audio_chunk, -20.0)
44 | 
45 |     # Export the audio chunk with new bitrate.
46 |     print("Exporting chunk{0}.{1}".format(i, FORMAT))
47 |     normalized_chunk.export(
48 |         ".//chunk{0}.{1}".format(i, FORMAT),
49 |         bitrate = "192k",
50 |         format = FORMAT
51 |     )
52 | 


--------------------------------------------------------------------------------
/helpers/split_to_wav.sh.save:
--------------------------------------------------------------------------------
1 | python alt_split.py /home/enigmaeth/accentPhase2/data/all_accents/Bangla_Jay.wav --output-dir out/ --min-silence-length=2 --silence-threshold=0.01
2 | python alt_split.py /home/enigmaeth/accentPhase2/data/all_accents/Bangla_Jay.wav --output-dir out/ --min-silence-length=2 --silence-threshold=0.01   
3 | python alt_split.py /home/enigmaeth/accentPhase2/data/all_accents/Bangla_Jay.wav --output-dir out/ --min-silence-length=2 --silence-threshold=0.01   
4 | python alt_split.py /home/enigmaeth/accentPhase2/data/all_accents/Bangla_Jay.wav --output-dir out/ --min-silence-length=2 --silence-threshold=0.01   
5 | python alt_split.py /home/enigmaeth/accentPhase2/data/all_accents/Bangla_Jay.wav --output-dir out/ --min-silence-length=2 --silence-threshold=0.01   
6 | python alt_split.py /home/enigmaeth/accentPhase2/data/all_accents/Bangla_Jay.wav --output-dir out/ --min-silence-length=2 --silence-threshold=0.01   
7 | python alt_split.py /home/enigmaeth/accentPhase2/data/all_accents/Bangla_Jay.wav --output-dir out/ --min-silence-length=2 --silence-threshold=0.01   
8 | 


--------------------------------------------------------------------------------
/helpers/split_wav.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from scipy.io import wavfile
  4 | import os
  5 | import numpy as np
  6 | import argparse
  7 | from tqdm import tqdm
  8 | 
  9 | # Utility functions
 10 | 
 11 | def windows(signal, window_size, step_size):
 12 |     if type(window_size) is not int:
 13 |         raise AttributeError("Window size must be an integer.")
 14 |     if type(step_size) is not int:
 15 |         raise AttributeError("Step size must be an integer.")
 16 |     for i_start in xrange(0, len(signal), step_size):
 17 |         i_end = i_start + window_size
 18 |         if i_end >= len(signal):
 19 |             break
 20 |         yield signal[i_start:i_end]
 21 | 
 22 | def energy(samples):
 23 |     return np.sum(np.power(samples, 2.)) / float(len(samples))
 24 | 
 25 | def rising_edges(binary_signal):
 26 |     previous_value = 0
 27 |     index = 0
 28 |     for x in binary_signal:
 29 |         if x and not previous_value:
 30 |             yield index
 31 |         previous_value = x
 32 |         index += 1
 33 | 
 34 | # Process command line arguments
 35 | 
 36 | parser = argparse.ArgumentParser(description='Split a WAV file at silence.')
 37 | parser.add_argument('--input_file', type=str, help='The WAV file to split.')
 38 | parser.add_argument('--output-dir', '-o', type=str, default='.', help='The output folder. Defaults to the current folder.')
 39 | parser.add_argument('--min-silence-length', '-m', type=float, default=3., help='The minimum length of silence at which a split may occur [seconds]. Defaults to 3 seconds.')
 40 | parser.add_argument('--silence-threshold', '-t', type=float, default=1e-6, help='The energy level (between 0.0 and 1.0) below which the signal is regarded as silent. Defaults to 1e-6 == 0.0001%.')
 41 | parser.add_argument('--step-duration', '-s', type=float, default=None, help='The amount of time to step forward in the input file after calculating energy. Smaller value = slower, but more accurate silence detection. Larger value = faster, but might miss some split opportunities. Defaults to (min-silence-length / 10.).')
 42 | parser.add_argument('--dry-run', '-n', action='store_true', help='Don\'t actually write any output files.')
 43 | 
 44 | args = parser.parse_args()
 45 | 
 46 | input_filename = args.input_file
 47 | window_duration = args.min_silence_length
 48 | if args.step_duration is None:
 49 |     step_duration = window_duration / 10.
 50 | else:
 51 |     step_duration = args.step_duration
 52 | silence_threshold = args.silence_threshold
 53 | output_dir = args.output_dir
 54 | output_filename_prefix = os.path.splitext(os.path.basename(input_filename))[0]
 55 | dry_run = args.dry_run
 56 | 
 57 | # print("Splitting f{} where energy is below f{}% for longer than {}s.".format(
 58 | #     input_filename,
 59 | #     silence_threshold * 100.,
 60 | #     window_duration
 61 | # ))
 62 | 
 63 | # Read and split the file
 64 | 
 65 | sample_rate, samples = input_data=wavfile.read(filename=input_filename, mmap=True)
 66 | 
 67 | max_amplitude = np.iinfo(samples.dtype).max
 68 | max_energy = energy([max_amplitude])
 69 | 
 70 | window_size = int(window_duration * sample_rate)
 71 | step_size = int(step_duration * sample_rate)
 72 | 
 73 | signal_windows = windows(
 74 |     signal=samples,
 75 |     window_size=window_size,
 76 |     step_size=step_size
 77 | )
 78 | 
 79 | window_energy = (energy(w) / max_energy for w in tqdm(
 80 |     signal_windows,
 81 |     total=int(len(samples) / float(step_size))
 82 | ))
 83 | 
 84 | window_silence = (e > silence_threshold for e in window_energy)
 85 | 
 86 | cut_times = (r * step_duration for r in rising_edges(window_silence))
 87 | 
 88 | # This is the step that takes long, since we force the generators to run.
 89 | print("Finding silences...")
 90 | cut_samples = [int(t * sample_rate) for t in cut_times]
 91 | cut_samples.append(-1)
 92 | 
 93 | cut_ranges = [(i, cut_samples[i], cut_samples[i+1]) for i in xrange(len(cut_samples) - 1)]
 94 | 
 95 | for i, start, stop in tqdm(cut_ranges):
 96 |     output_file_path = "{}_{:03d}.wav".format(
 97 |         os.path.join(output_dir, output_filename_prefix),
 98 |         i
 99 |     )
100 |     if not dry_run:
101 |         print("Writing file ", output_file_path)
102 |         wavfile.write(
103 |             filename=output_file_path,
104 |             rate=sample_rate,
105 |             data=samples[start:stop]
106 |         )
107 |     else:
108 |         print("Not Writing file ", output_file_path)
109 | 


--------------------------------------------------------------------------------
/ipynb-htmls/conv1d (1).ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "conv1d.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "accelerator": "GPU"
 15 |   },
 16 |   "cells": [
 17 |     {
 18 |       "cell_type": "code",
 19 |       "metadata": {
 20 |         "id": "OTCB8AL-osBL",
 21 |         "colab_type": "code",
 22 |         "colab": {
 23 |           "base_uri": "https://localhost:8080/",
 24 |           "height": 35
 25 |         },
 26 |         "outputId": "42e4be27-f0b0-4d9d-8353-c1b410aeca82"
 27 |       },
 28 |       "source": [
 29 |         "from google.colab import drive\n",
 30 |         "drive.mount('/content/drive')"
 31 |       ],
 32 |       "execution_count": 1,
 33 |       "outputs": [
 34 |         {
 35 |           "output_type": "stream",
 36 |           "text": [
 37 |             "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
 38 |           ],
 39 |           "name": "stdout"
 40 |         }
 41 |       ]
 42 |     },
 43 |     {
 44 |       "cell_type": "code",
 45 |       "metadata": {
 46 |         "id": "koL6wrhIq_em",
 47 |         "colab_type": "code",
 48 |         "colab": {
 49 |           "base_uri": "https://localhost:8080/",
 50 |           "height": 138
 51 |         },
 52 |         "outputId": "1d690b45-0ba9-42de-82bc-476a9b55bccb"
 53 |       },
 54 |       "source": [
 55 |         "\n",
 56 |         "from __future__ import print_function\n",
 57 |         "import numpy as np\n",
 58 |         "from sklearn.model_selection import train_test_split\n",
 59 |         "from sklearn.metrics import classification_report\n",
 60 |         "from time import time\n",
 61 |         "#np.random.seed(1337)  # for reproducibility\n",
 62 |         "\n",
 63 |         "from keras.preprocessing import sequence\n",
 64 |         "from keras.models import Sequential\n",
 65 |         "from keras.layers.core import Dense, Dropout, Activation, Flatten\n",
 66 |         "from keras.layers.normalization import BatchNormalization\n",
 67 |         "from keras.layers.convolutional import Convolution1D, MaxPooling1D\n",
 68 |         "from keras.utils import np_utils\n",
 69 |         "from keras.callbacks import TensorBoard\n",
 70 |         "\n",
 71 |         "\n",
 72 |         "# set parameters:\n",
 73 |         "test_dim = 499\n",
 74 |         "maxlen = 100\n",
 75 |         "nb_filter = 256\n",
 76 |         "filter_length_1 = 10\n",
 77 |         "filter_length_2 = 5\n",
 78 |         "hidden_dims = 750\n",
 79 |         "nb_epoch = 12\n",
 80 |         "nb_classes = 2\n",
 81 |         "split_ratio = 0.15\n",
 82 |         "\n",
 83 |         "print('Loading data...')\n",
 84 |         "\n",
 85 |         "# X = np.load('/content/drive/My Drive/Colab Notebooks/data/numpy_vectors/x_test_mfcc_500_50:50_samples_sliced_out.npy')\n",
 86 |         "# y = np.load('/content/drive/My Drive/Colab Notebooks/data/numpy_vectors/y_label_500_50:50_samples_sliced_out.npy')\n",
 87 |         "X = np.load('/content/drive/My Drive/Colab Notebooks/data/numpy_vectors/x_3:1_samples_out.npy')\n",
 88 |         "y = np.load('/content/drive/My Drive/Colab Notebooks/data/numpy_vectors/y_3:1_samples_out.npy')\n",
 89 |         "print(X.shape)\n",
 90 |         "print(y.shape)"
 91 |       ],
 92 |       "execution_count": 2,
 93 |       "outputs": [
 94 |         {
 95 |           "output_type": "stream",
 96 |           "text": [
 97 |             "Using TensorFlow backend.\n"
 98 |           ],
 99 |           "name": "stderr"
100 |         },
101 |         {
102 |           "output_type": "display_data",
103 |           "data": {
104 |             "text/html": [
105 |               "<p style=\"color: red;\">\n",
106 |               "The default version of TensorFlow in Colab will soon switch to TensorFlow 2.x.<br>\n",
107 |               "We recommend you <a href=\"https://www.tensorflow.org/guide/migrate\" target=\"_blank\">upgrade</a> now \n",
108 |               "or ensure your notebook will continue to use TensorFlow 1.x via the <code>%tensorflow_version 1.x</code> magic:\n",
109 |               "<a href=\"https://colab.research.google.com/notebooks/tensorflow_version.ipynb\" target=\"_blank\">more info</a>.</p>\n"
110 |             ],
111 |             "text/plain": [
112 |               "<IPython.core.display.HTML object>"
113 |             ]
114 |           },
115 |           "metadata": {
116 |             "tags": []
117 |           }
118 |         },
119 |         {
120 |           "output_type": "stream",
121 |           "text": [
122 |             "Loading data...\n",
123 |             "(3155, 499, 13)\n",
124 |             "(3155,)\n"
125 |           ],
126 |           "name": "stdout"
127 |         }
128 |       ]
129 |     },
130 |     {
131 |       "cell_type": "code",
132 |       "metadata": {
133 |         "id": "D9lzWC2zrkch",
134 |         "colab_type": "code",
135 |         "colab": {}
136 |       },
137 |       "source": [
138 |         "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_ratio)\n",
139 |         "Y_train = y_train\n",
140 |         "Y_test = y_test"
141 |       ],
142 |       "execution_count": 0,
143 |       "outputs": []
144 |     },
145 |     {
146 |       "cell_type": "code",
147 |       "metadata": {
148 |         "id": "Zz0tpQ_kiQNo",
149 |         "colab_type": "code",
150 |         "colab": {
151 |           "base_uri": "https://localhost:8080/",
152 |           "height": 1141
153 |         },
154 |         "outputId": "e3bec454-7207-4661-ae41-c37f0d82f754"
155 |       },
156 |       "source": [
157 |         "import keras\n",
158 |         "from keras.preprocessing.image import ImageDataGenerator\n",
159 |         "from keras.models import Sequential\n",
160 |         "from keras.layers import Conv1D, MaxPooling1D\n",
161 |         "from keras.layers import Activation, Dropout, Flatten, Dense\n",
162 |         "nb_train_samples = X.shape\n",
163 |         "input_shape = (test_dim, 13)\n",
164 |         "for batch_size in range(25, 26, 5):\n",
165 |         "    print('Build model...')\n",
166 |         "    model = Sequential()\n",
167 |         "\n",
168 |         "    model = Sequential()\n",
169 |         "    model.add(Conv1D(32, (3), input_shape=input_shape))\n",
170 |         "    model.add(Activation('relu'))\n",
171 |         "    model.add(MaxPooling1D(pool_size=(2)))\n",
172 |         "\n",
173 |         "    model.add(Conv1D(32, (3)))\n",
174 |         "    model.add(Activation('relu'))\n",
175 |         "    model.add(MaxPooling1D(pool_size=(2)))\n",
176 |         "\n",
177 |         "    model.add(Conv1D(64, (3)))\n",
178 |         "    model.add(Activation('relu'))\n",
179 |         "    model.add(MaxPooling1D(pool_size=(2)))\n",
180 |         "\n",
181 |         "    model.add(Flatten())\n",
182 |         "    model.add(Dense(64))\n",
183 |         "    model.add(Activation('relu'))\n",
184 |         "    model.add(Dropout(0.5))\n",
185 |         "    model.add(Dense(1))\n",
186 |         "    model.add(Activation('sigmoid'))\n",
187 |         "\n",
188 |         "    model.compile(loss='binary_crossentropy',\n",
189 |         "                  optimizer='rmsprop',\n",
190 |         "                  metrics=['accuracy'])\n",
191 |         "   \n",
192 |         "    model.fit(X_train, Y_train, steps_per_epoch=nb_train_samples[0] // batch_size,\n",
193 |         "            nb_epoch=10, shuffle='true', verbose=1)\n",
194 |         "\n",
195 |         "    Y_preds = model.predict(X_test)\n",
196 |         "    # for i in range(len(Y_preds)):\n",
197 |         "        # print(Y_preds[i], Y_test[i])\n",
198 |         "    score = model.evaluate(X_test, Y_test, verbose=1)\n",
199 |         "    print(score)"
200 |       ],
201 |       "execution_count": 4,
202 |       "outputs": [
203 |         {
204 |           "output_type": "stream",
205 |           "text": [
206 |             "Build model...\n",
207 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n",
208 |             "\n",
209 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n",
210 |             "\n",
211 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n",
212 |             "\n",
213 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4267: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n",
214 |             "\n",
215 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:148: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.\n",
216 |             "\n",
217 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3733: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
218 |             "Instructions for updating:\n",
219 |             "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n",
220 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:793: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.\n",
221 |             "\n",
222 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3657: The name tf.log is deprecated. Please use tf.math.log instead.\n",
223 |             "\n",
224 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/nn_impl.py:183: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
225 |             "Instructions for updating:\n",
226 |             "Use tf.where in 2.0, which has the same broadcast rule as np.where\n"
227 |           ],
228 |           "name": "stdout"
229 |         },
230 |         {
231 |           "output_type": "stream",
232 |           "text": [
233 |             "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:37: UserWarning: The `nb_epoch` argument in `fit` has been renamed `epochs`.\n"
234 |           ],
235 |           "name": "stderr"
236 |         },
237 |         {
238 |           "output_type": "stream",
239 |           "text": [
240 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1033: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.\n",
241 |             "\n",
242 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1020: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.\n",
243 |             "\n",
244 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3005: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n",
245 |             "\n",
246 |             "Epoch 1/10\n",
247 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:190: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.\n",
248 |             "\n",
249 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:197: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.\n",
250 |             "\n",
251 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:207: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n",
252 |             "\n",
253 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:216: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.\n",
254 |             "\n",
255 |             "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:223: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.\n",
256 |             "\n",
257 |             "126/126 [==============================] - 20s 160ms/step - loss: 0.7433 - acc: 0.9211\n",
258 |             "Epoch 2/10\n",
259 |             "126/126 [==============================] - 18s 142ms/step - loss: 0.0579 - acc: 0.9902\n",
260 |             "Epoch 3/10\n",
261 |             "126/126 [==============================] - 18s 142ms/step - loss: 0.0239 - acc: 0.9971\n",
262 |             "Epoch 4/10\n",
263 |             "126/126 [==============================] - 18s 142ms/step - loss: 0.0182 - acc: 0.9978\n",
264 |             "Epoch 5/10\n",
265 |             "126/126 [==============================] - 18s 142ms/step - loss: 0.0271 - acc: 0.9969\n",
266 |             "Epoch 6/10\n",
267 |             "126/126 [==============================] - 18s 141ms/step - loss: 0.0440 - acc: 0.9947\n",
268 |             "Epoch 7/10\n",
269 |             "126/126 [==============================] - 18s 141ms/step - loss: 2.6129e-04 - acc: 0.9999\n",
270 |             "Epoch 8/10\n",
271 |             "126/126 [==============================] - 18s 141ms/step - loss: 0.0508 - acc: 0.9953\n",
272 |             "Epoch 9/10\n",
273 |             "126/126 [==============================] - 18s 141ms/step - loss: 0.0255 - acc: 0.9980\n",
274 |             "Epoch 10/10\n",
275 |             "126/126 [==============================] - 18s 141ms/step - loss: 0.0212 - acc: 0.9983\n",
276 |             "474/474 [==============================] - 0s 247us/step\n",
277 |             "[1.2402050405118628e-07, 1.0]\n"
278 |           ],
279 |           "name": "stdout"
280 |         }
281 |       ]
282 |     }
283 |   ]
284 | }


--------------------------------------------------------------------------------
/ipynb-htmls/conv1d.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "conv1d.ipynb",
  7 |       "provenance": [],
  8 |       "private_outputs": true,
  9 |       "collapsed_sections": []
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "code",
 20 |       "metadata": {
 21 |         "id": "OTCB8AL-osBL",
 22 |         "colab_type": "code",
 23 |         "colab": {}
 24 |       },
 25 |       "source": [
 26 |         "from google.colab import drive\n",
 27 |         "drive.mount('/content/drive')"
 28 |       ],
 29 |       "execution_count": 0,
 30 |       "outputs": []
 31 |     },
 32 |     {
 33 |       "cell_type": "code",
 34 |       "metadata": {
 35 |         "id": "koL6wrhIq_em",
 36 |         "colab_type": "code",
 37 |         "colab": {}
 38 |       },
 39 |       "source": [
 40 |         "\n",
 41 |         "from __future__ import print_function\n",
 42 |         "import numpy as np\n",
 43 |         "from sklearn.model_selection import train_test_split\n",
 44 |         "from sklearn.metrics import classification_report\n",
 45 |         "from time import time\n",
 46 |         "#np.random.seed(1337)  # for reproducibility\n",
 47 |         "\n",
 48 |         "from keras.preprocessing import sequence\n",
 49 |         "from keras.models import Sequential\n",
 50 |         "from keras.layers.core import Dense, Dropout, Activation, Flatten\n",
 51 |         "from keras.layers.normalization import BatchNormalization\n",
 52 |         "from keras.layers.convolutional import Convolution1D, MaxPooling1D\n",
 53 |         "from keras.utils import np_utils\n",
 54 |         "from keras.callbacks import TensorBoard\n",
 55 |         "\n",
 56 |         "\n",
 57 |         "# set parameters:\n",
 58 |         "test_dim = 499\n",
 59 |         "maxlen = 100\n",
 60 |         "nb_filter = 512\n",
 61 |         "filter_length_1 = 10\n",
 62 |         "filter_length_2 = 5\n",
 63 |         "hidden_dims = 750\n",
 64 |         "nb_epoch = 20\n",
 65 |         "nb_classes = 2\n",
 66 |         "split_ratio = 0.15\n",
 67 |         "\n",
 68 |         "print('Loading data...')\n",
 69 |         "\n",
 70 |         "X = np.load('/content/drive/My Drive/Colab Notebooks/data/numpy_vectors/x_test_mfcc_500_50:50_samples_sliced_out.npy')\n",
 71 |         "y = np.load('/content/drive/My Drive/Colab Notebooks/data/numpy_vectors/y_label_500_50:50_samples_sliced_out.npy')\n",
 72 |         "print(X.shape)\n",
 73 |         "print(y.shape)"
 74 |       ],
 75 |       "execution_count": 0,
 76 |       "outputs": []
 77 |     },
 78 |     {
 79 |       "cell_type": "code",
 80 |       "metadata": {
 81 |         "id": "D9lzWC2zrkch",
 82 |         "colab_type": "code",
 83 |         "colab": {}
 84 |       },
 85 |       "source": [
 86 |         "\n",
 87 |         "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_ratio)\n",
 88 |         "\n",
 89 |         "xts = X_train.shape\n",
 90 |         "#X_train = np.reshape(X_train, (xts[0], xts[1], 1))\n",
 91 |         "xtss = X_test.shape\n",
 92 |         "#X_test = np.reshape(X_test, (xtss[0], xtss[1], 1))\n",
 93 |         "yts = y_train.shape\n",
 94 |         "#y_train = np.reshape(y_train, (yts[0], 1))\n",
 95 |         "ytss = y_test.shape\n",
 96 |         "#y_test = np.reshape(y_test, (ytss[0], 1))\n",
 97 |         "\n",
 98 |         "print(len(X_train), 'train sequences')\n",
 99 |         "print(len(X_test), 'test sequences')\n",
100 |         "\n",
101 |         "Y_train = np_utils.to_categorical(y_train, nb_classes)\n",
102 |         "Y_test = np_utils.to_categorical(y_test, nb_classes)\n",
103 |         "\n",
104 |         "# print('Pad sequences (samples x time)')\n",
105 |         "# X_train = sequence.pad_sequences(X_train, maxlen=maxlen)\n",
106 |         "# X_test = sequence.pad_sequences(X_test, maxlen=maxlen)\n",
107 |         "# print('X_train shape:', X_train.shape)\n",
108 |         "# print('X_test shape:', X_test.shape)\n"
109 |       ],
110 |       "execution_count": 0,
111 |       "outputs": []
112 |     },
113 |     {
114 |       "cell_type": "code",
115 |       "metadata": {
116 |         "id": "9yKzDEgVroJf",
117 |         "colab_type": "code",
118 |         "colab": {}
119 |       },
120 |       "source": [
121 |         "\n",
122 |         "for batch_size in range(10, 11, 5):\n",
123 |         "    print('Build model...')\n",
124 |         "    model = Sequential()\n",
125 |         "\n",
126 |         "    # we start off with an efficient embedding layer which maps\n",
127 |         "    # our vocab indices into embedding_dims dimensions\n",
128 |         "    # model.add(Embedding(max_features, embedding_dims, input_length=maxlen))\n",
129 |         "    # model.add(Dropout(0.25))\n",
130 |         "\n",
131 |         "    # we add a Convolution1D, which will learn nb_filter\n",
132 |         "    # word group filters of size filter_length:\n",
133 |         "    model.add(Convolution1D(nb_filter=nb_filter,\n",
134 |         "                            filter_length=filter_length_1,\n",
135 |         "                            input_shape=(test_dim, 13),\n",
136 |         "                            border_mode='valid',\n",
137 |         "                            activation='relu'\n",
138 |         "                            ))\n",
139 |         "    # we use standard max pooling (halving the output of the previous layer):\n",
140 |         "    model.add(BatchNormalization())\n",
141 |         "\n",
142 |         "    model.add(Convolution1D(nb_filter=nb_filter,\n",
143 |         "                            filter_length=5,\n",
144 |         "                            border_mode='valid',\n",
145 |         "                            activation='relu'\n",
146 |         "                            ))\n",
147 |         "\n",
148 |         "    model.add(BatchNormalization())\n",
149 |         "\n",
150 |         "    model.add(MaxPooling1D(pool_length=2))\n",
151 |         "\n",
152 |         "    model.add(Convolution1D(nb_filter=nb_filter,\n",
153 |         "                            filter_length=25,\n",
154 |         "                            border_mode='same',\n",
155 |         "                            activation='relu'\n",
156 |         "                            ))\n",
157 |         "\n",
158 |         "    model.add(BatchNormalization())\n",
159 |         "\n",
160 |         "    model.add(MaxPooling1D(pool_length=2))\n",
161 |         "\n",
162 |         "    model.add(Convolution1D(nb_filter=nb_filter,\n",
163 |         "                            filter_length=50,\n",
164 |         "                            border_mode='same',\n",
165 |         "                            activation='relu'\n",
166 |         "                            ))\n",
167 |         "\n",
168 |         "    model.add(BatchNormalization())\n",
169 |         "\n",
170 |         "    model.add(MaxPooling1D(pool_length=2))\n",
171 |         "\n",
172 |         "    model.add(Convolution1D(nb_filter=nb_filter,\n",
173 |         "                            filter_length=2,\n",
174 |         "                            border_mode='same',\n",
175 |         "                            activation='relu'\n",
176 |         "                            ))\n",
177 |         "\n",
178 |         "    model.add(BatchNormalization())\n",
179 |         "\n",
180 |         "    model.add(MaxPooling1D(pool_length=2))\n",
181 |         "\n",
182 |         "    # We flatten the output of the conv layer,\n",
183 |         "    # so that we can add a vanilla dense layer:\n",
184 |         "    model.add(Flatten())\n",
185 |         "\n",
186 |         "    # We add a vanilla hidden layer:\n",
187 |         "    # model.add(Dense(hidden_dims))\n",
188 |         "    model.add(Dropout(0.25))\n",
189 |         "    # model.add(Activation('relu'))\n",
190 |         "\n",
191 |         "    model.add(Dense(1000))\n",
192 |         "    model.add(Activation('relu'))\n",
193 |         "    model.add(Dense(750))\n",
194 |         "    model.add(Activation('relu'))\n",
195 |         "    model.add(Dense(50))\n",
196 |         "    model.add(Activation('relu'))\n",
197 |         "    # We project onto a single unit output layer, and squash it with a sigmoid:\n",
198 |         "    model.add(Dense(nb_classes))\n",
199 |         "    model.add(Activation('softmax'))\n",
200 |         "\n",
201 |         "    model.compile(loss='binary_crossentropy',\n",
202 |         "                optimizer='adam', metrics=['accuracy'])\n",
203 |         "\n",
204 |         "    print(\"model/split = {} <> batchsize = {}\".format(split_ratio, batch_size))\n",
205 |         "    tensorboard = TensorBoard(log_dir=\"logs/split_{}_batchsize_{}\".format(split_ratio, batch_size))\n",
206 |         "\n",
207 |         "    model.fit(X_train, Y_train, batch_size=batch_size,\n",
208 |         "            nb_epoch=nb_epoch,  verbose=1, callbacks=[tensorboard]\t)\n",
209 |         "\n",
210 |         "    # model.save('model_hin_tel_38_samples.h5')\n",
211 |         "\n",
212 |         "    y_preds = model.predict(X_test)\n",
213 |         "    for i in range(len(y_preds)):\n",
214 |         "        print(y_preds[i], y_test[i])\n",
215 |         "        \n",
216 |         "    score = model.evaluate(X_test, Y_test, verbose=1)\n",
217 |         "    print(score)\n",
218 |         "    print(\"\\n**********************************\\n\")\n",
219 |         "\n",
220 |         "# print(classification_report(Y_test, Y_preds))"
221 |       ],
222 |       "execution_count": 0,
223 |       "outputs": []
224 |     }
225 |   ]
226 | }


--------------------------------------------------------------------------------
/notebooks/pase.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 75,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Current Model keys:  69\n",
 13 |       "Current Pt keys:  69\n",
 14 |       "Loading matching keys:  ['blocks.0.conv.low_hz_', 'blocks.0.conv.band_hz_', 'blocks.0.norm.weight', 'blocks.0.norm.bias', 'blocks.0.norm.running_mean', 'blocks.0.norm.running_var', 'blocks.0.norm.num_batches_tracked', 'blocks.0.act.weight', 'blocks.1.conv.weight', 'blocks.1.conv.bias', 'blocks.1.norm.weight', 'blocks.1.norm.bias', 'blocks.1.norm.running_mean', 'blocks.1.norm.running_var', 'blocks.1.norm.num_batches_tracked', 'blocks.1.act.weight', 'blocks.2.conv.weight', 'blocks.2.conv.bias', 'blocks.2.norm.weight', 'blocks.2.norm.bias', 'blocks.2.norm.running_mean', 'blocks.2.norm.running_var', 'blocks.2.norm.num_batches_tracked', 'blocks.2.act.weight', 'blocks.3.conv.weight', 'blocks.3.conv.bias', 'blocks.3.norm.weight', 'blocks.3.norm.bias', 'blocks.3.norm.running_mean', 'blocks.3.norm.running_var', 'blocks.3.norm.num_batches_tracked', 'blocks.3.act.weight', 'blocks.4.conv.weight', 'blocks.4.conv.bias', 'blocks.4.norm.weight', 'blocks.4.norm.bias', 'blocks.4.norm.running_mean', 'blocks.4.norm.running_var', 'blocks.4.norm.num_batches_tracked', 'blocks.4.act.weight', 'blocks.5.conv.weight', 'blocks.5.conv.bias', 'blocks.5.norm.weight', 'blocks.5.norm.bias', 'blocks.5.norm.running_mean', 'blocks.5.norm.running_var', 'blocks.5.norm.num_batches_tracked', 'blocks.5.act.weight', 'blocks.6.conv.weight', 'blocks.6.conv.bias', 'blocks.6.norm.weight', 'blocks.6.norm.bias', 'blocks.6.norm.running_mean', 'blocks.6.norm.running_var', 'blocks.6.norm.num_batches_tracked', 'blocks.6.act.weight', 'blocks.7.conv.weight', 'blocks.7.conv.bias', 'blocks.7.norm.weight', 'blocks.7.norm.bias', 'blocks.7.norm.running_mean', 'blocks.7.norm.running_var', 'blocks.7.norm.num_batches_tracked', 'blocks.7.act.weight', 'W.weight', 'W.bias', 'norm_out.running_mean', 'norm_out.running_var', 'norm_out.num_batches_tracked']\n",
 15 |       "torch.Size([1, 1, 100000])\n"
 16 |      ]
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "from pase.models.frontend import wf_builder\n",
 21 |     "pase = wf_builder('pase/cfg/PASE.cfg')\n",
 22 |     "pase.eval()\n",
 23 |     "pase.load_pretrained('pase/PASE.ckpt', load_last=True, verbose=True)\n",
 24 |     "\n",
 25 |     "# Now we can forward waveforms as Torch tensors\n",
 26 |     "import torch\n",
 27 |     "x = torch.randn(1, 1, 100000)\n",
 28 |     "# y size will be (1, 100, 625), which are 625 frames of 100 dims each\n",
 29 |     "print(x.shape)\n",
 30 |     "y = pase(x)\n"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 41,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "import tempfile\n",
 40 |     "import os\n",
 41 |     "import pydub\n",
 42 |     "import scipy\n",
 43 |     "import scipy.io.wavfile\n",
 44 |     "\n",
 45 |     "\n",
 46 |     "def read_mp3(file_path, as_float = False):\n",
 47 |     "    \"\"\"\n",
 48 |     "    Read an MP3 File into numpy data.\n",
 49 |     "    :param file_path: String path to a file\n",
 50 |     "    :param as_float: Cast data to float and normalize to [-1, 1]\n",
 51 |     "    :return: Tuple(rate, data), where\n",
 52 |     "        rate is an integer indicating samples/s\n",
 53 |     "        data is an ndarray(n_samples, 2)[int16] if as_float = False\n",
 54 |     "            otherwise ndarray(n_samples, 2)[float] in range [-1, 1]\n",
 55 |     "    \"\"\"\n",
 56 |     "\n",
 57 |     "    path, ext = os.path.splitext(file_path)\n",
 58 |     "    assert ext=='.wav'\n",
 59 |     "    mp3 = pydub.AudioSegment.from_wav(file_path)\n",
 60 |     "    _, path = tempfile.mkstemp()\n",
 61 |     "    mp3.export(path, format=\"wav\")\n",
 62 |     "    rate, data = scipy.io.wavfile.read(path)\n",
 63 |     "    os.remove(path)\n",
 64 |     "    if as_float:\n",
 65 |     "        data = data/(2**15)\n",
 66 |     "    return rate, data"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 56,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "root = '/home/enigmaeth/Videos/accentPhase2/splits'\n",
 76 |     "def get_all_files():\n",
 77 |     "    \"\"\"\n",
 78 |     "    List all files recursively in the root specified by root\n",
 79 |     "    \"\"\"\n",
 80 |     "    files_list = []\n",
 81 |     "    dirs = []\n",
 82 |     "    import os\n",
 83 |     "\n",
 84 |     "    for dirname, dirnames, filenames in os.walk(root):\n",
 85 |     "        for filename in filenames:\n",
 86 |     "            files_list.append(os.path.join(dirname, filename))\n",
 87 |     "\n",
 88 |     "        # Advanced usage:\n",
 89 |     "        # editing the 'dirnames' list will stop os.walk() from recursing into there.\n",
 90 |     "        if '.git' in dirnames:\n",
 91 |     "            # don't go into any .git directories.\n",
 92 |     "            dirnames.remove('.git')\n",
 93 |     "            \n",
 94 |     "    return files_list\n"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 57,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "['/home/enigmaeth/Videos/accentPhase2/splits/MSl/MSl_738.wav', '/home/enigmaeth/Videos/accentPhase2/splits/MSl/MSl_273.wav', '/home/enigmaeth/Videos/accentPhase2/splits/MSl/MSl_508.wav', '/home/enigmaeth/Videos/accentPhase2/splits/MSl/MSl_332.wav', '/home/enigmaeth/Videos/accentPhase2/splits/MSl/MSl_629.wav', '/home/enigmaeth/Videos/accentPhase2/splits/MSl/MSl_526.wav', '/home/enigmaeth/Videos/accentPhase2/splits/MSl/MSl_040.wav', '/home/enigmaeth/Videos/accentPhase2/splits/MSl/MSl_631.wav', '/home/enigmaeth/Videos/accentPhase2/splits/MSl/MSl_097.wav', '/home/enigmaeth/Videos/accentPhase2/splits/MSl/MSl_581.wav']\n"
107 |      ]
108 |     }
109 |    ],
110 |    "source": [
111 |     "files = get_all_files()\n",
112 |     "files = files[:10]\n",
113 |     "print(files)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 78,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "import torch\n",
123 |     "def run_pipe(n, as_float):\n",
124 |     "    \"\"\"\n",
125 |     "    returns pase vectors for set of files \n",
126 |     "    \"\"\" \n",
127 |     "    files = get_all_files()\n",
128 |     "    files = files[0:n]\n",
129 |     "    pase_vectors = []\n",
130 |     "    for file in files:\n",
131 |     "        file_rate, file_vec = read_mp3(file, as_float=as_float)\n",
132 |     "        file_vec_tensor = torch.from_numpy(file_vec)\n",
133 |     "        file_vec_tensor_flat = torch.flatten(file_vec_tensor)\n",
134 |     "        file_vec_tensor_view = file_vec_tensor_flat.view(1, 1, file_vec_tensor_flat.shape[0])\n",
135 |     "        print(file_vec_tensor_view.size())\n",
136 |     "        file_pase = pase(file_vec_tensor_view)\n",
137 |     "        pase_vectors.append(file_pase)\n",
138 |     "        \n",
139 |     "    return pase_vectors        "
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 79,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "torch.Size([1, 1, 480000])\n"
152 |      ]
153 |     },
154 |     {
155 |      "ename": "RuntimeError",
156 |      "evalue": "\"reflection_pad1d\" not implemented for 'Short'",
157 |      "output_type": "error",
158 |      "traceback": [
159 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
160 |       "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
161 |       "\u001b[0;32m<ipython-input-79-d219f46e9d22>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrun_pipe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
162 |       "\u001b[0;32m<ipython-input-78-115d2cfa43f1>\u001b[0m in \u001b[0;36mrun_pipe\u001b[0;34m(n, as_float)\u001b[0m\n\u001b[1;32m     13\u001b[0m         \u001b[0mfile_vec_tensor_view\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfile_vec_tensor_flat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_vec_tensor_flat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_vec_tensor_view\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m         \u001b[0mfile_pase\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpase\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_vec_tensor_view\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m         \u001b[0mpase_vectors\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_pase\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
163 |       "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    545\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    546\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 547\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    548\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
164 |       "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/PASE-0.1.dev0-py3.6.egg/pase/models/frontend.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m    140\u001b[0m             \u001b[0mdskips\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    141\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mblock\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m             \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mblock\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    143\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mdenseskips\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mblocks\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    144\u001b[0m                 \u001b[0;31m# denseskips happen til the last but one layer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
165 |       "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    545\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    546\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 547\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    548\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
166 |       "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/PASE-0.1.dev0-py3.6.egg/pase/models/modules.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m    824\u001b[0m                 \u001b[0mP\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mpad\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    825\u001b[0m             \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mP\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpad_mode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 826\u001b[0;31m         \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    827\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'norm'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    828\u001b[0m             \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mforward_norm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
167 |       "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m    545\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    546\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 547\u001b[0;31m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    548\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    549\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
168 |       "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/PASE-0.1.dev0-py3.6.egg/pase/models/modules.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, waveforms)\u001b[0m\n\u001b[1;32m    688\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    689\u001b[0m                 x_p = F.pad(x, (self.kernel_size // 2,\n\u001b[0;32m--> 690\u001b[0;31m                                 self.kernel_size // 2), mode=self.pad_mode)\n\u001b[0m\u001b[1;32m    691\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    692\u001b[0m             \u001b[0mx_p\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
169 |       "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mpad\u001b[0;34m(input, pad, mode, value)\u001b[0m\n\u001b[1;32m   2740\u001b[0m             \u001b[0;32massert\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpad\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'3D tensors expect 2 values for padding'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2741\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'reflect'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2742\u001b[0;31m                 \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreflection_pad1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2743\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'replicate'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2744\u001b[0m                 \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_nn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplication_pad1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
170 |       "\u001b[0;31mRuntimeError\u001b[0m: \"reflection_pad1d\" not implemented for 'Short'"
171 |      ]
172 |     }
173 |    ],
174 |    "source": [
175 |     "print(run_pipe(10, False))"
176 |    ]
177 |   }
178 |  ],
179 |  "metadata": {
180 |   "kernelspec": {
181 |    "display_name": "Python 3",
182 |    "language": "python",
183 |    "name": "python3"
184 |   },
185 |   "language_info": {
186 |    "codemirror_mode": {
187 |     "name": "ipython",
188 |     "version": 3
189 |    },
190 |    "file_extension": ".py",
191 |    "mimetype": "text/x-python",
192 |    "name": "python",
193 |    "nbconvert_exporter": "python",
194 |    "pygments_lexer": "ipython3",
195 |    "version": "3.6.5"
196 |   }
197 |  },
198 |  "nbformat": 4,
199 |  "nbformat_minor": 2
200 | }
201 | 


--------------------------------------------------------------------------------
/speech2vec/all_split.sh:
--------------------------------------------------------------------------------
 1 | echo "GenX"
 2 | # python3 gen_x.py
 3 | echo "======================================="
 4 | echo "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
 5 | 
 6 | cd ../classification
 7 | echo "======================================="
 8 | echo "cnn_bilstm"
 9 | echo "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
10 | python3 cnn_bilstm.py 
11 | echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
12 | echo "======================================="
13 | 
14 | echo "======================================="
15 | echo "attention_lstm"
16 | echo "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
17 | python3 attention_lstm.py  
18 | echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
19 | echo "======================================="
20 | 
21 | echo "======================================="
22 | echo "conv_1d_model"
23 | echo "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
24 | python3 conv_1d_model.py 
25 | echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
26 | echo "======================================="
27 | 
28 | echo "End"


--------------------------------------------------------------------------------
/speech2vec/gen_x.py:
--------------------------------------------------------------------------------
 1 | from mfcc import *
 2 | import numpy as np
 3 | 
 4 | folder = '../data/splits'
 5 | 
 6 | x = make_class_array(folder)
 7 | print(x.shape)
 8 | X_file = '../data/numpy_vectors/x_test_mfcc_' + (folder.split('/'))[-1]
 9 | 
10 | print("saving  labels to ", X_file)
11 | np.save(X_file, x)  
12 | 
13 | 
14 | 
15 | # filename = "english1.wav"
16 | 
17 | # with open(filename, 'rb') as f:
18 | #     print(read_in_audio(f))
19 | 
20 | # cd = make_class_array('/media/enigmaeth/My Passport/Datasets/Accent/clean_data')
21 | # print(cd.shape)
22 | # np.save('top_3_100_split_mfcc.npy', cd)  
23 | # mf = make_mean_mfcc_df('/media/enigmaeth/My Passport/Datasets/Accent/sounds_wav')
24 | # print(mf.shape)
25 | # np.save('top_3_100_split_y.npy', mf)  
26 | 


--------------------------------------------------------------------------------
/speech2vec/gen_y.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | def generate_y(folder):
 5 |     accents = {}
 6 |     counts = {}
 7 |     y = []
 8 |     index = 0
 9 | 
10 |     for filename in os.listdir(folder):
11 |         name = ''.join([i for i in filename if not i.isdigit()])
12 |         name = name.split('_')[0]
13 |         if name not in accents:
14 |             accents[name] = index
15 |             index += 1
16 |             counts[name] = 0
17 | 
18 |         counts[name] += 1
19 |         y.append(accents[name])
20 | 
21 |     print(counts)
22 |     print(accents)
23 | 
24 |     sorted_counts = sorted(counts, key=counts.get, reverse=True)
25 |     for r in sorted_counts:
26 |         print(r, counts[r])
27 | 
28 |     np_y = np.reshape(np.array(y), (len(y), 1))
29 | 
30 |     Y_file = '../data/numpy_vectors/y_label_'+ (folder.split('/'))[-1]
31 |     print("saving  labels to ", Y_file)
32 |     np.save(Y_file, y)
33 | 
34 | folder = "../data/splits"
35 | generate_y(folder)


--------------------------------------------------------------------------------
/speech2vec/mfcc.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from python_speech_features import mfcc
  4 | from python_speech_features import logfbank
  5 | import scipy.io.wavfile as wav
  6 | from scipy.io.wavfile import write as wav_write
  7 | import librosa
  8 | import scipy
  9 | from tqdm import tqdm
 10 | # import scikits.samplerate
 11 | import os
 12 | 
 13 | 
 14 | '''
 15 | mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True)
 16 | '''
 17 | # read in wav file, get out signal (np array) and sampling rate (int)
 18 | def read_in_audio(filename):
 19 |     (rate, sig) = wav.read(filename)
 20 |     return sig, rate
 21 | 
 22 | 
 23 | # read in signal, take absolute value and slice seconds 1-3 from beginning
 24 | def get_two_secs(filename):
 25 |     sig, rate = read_in_audio(filename)
 26 |     abs_sig = np.abs(sig)
 27 |     two_secs = abs_sig[rate:3*rate]
 28 |     return two_secs
 29 | 
 30 | # calculates moving average for a specified window (number of samples)
 31 | def take_moving_average(sig, window_width):
 32 |     cumsum_vec = np.cumsum(np.insert(sig, 0, 0))
 33 |     ma_vec = (cumsum_vec[window_width:] - cumsum_vec[:-window_width])/float(window_width)
 34 |     return ma_vec
 35 |         
 36 | # read in signal, change sample rate to outrate (samples/sec), use write_wav=True to save wav file to disk
 37 | def downsample(filename, outrate=8000, write_wav = False):
 38 |     print(filename)
 39 |     (rate, sig) = wav.read(filename)
 40 |     down_sig = librosa.core.resample(sig * 1., rate, outrate, scale=True)
 41 |     if not write_wav:
 42 |         return down_sig, outrate
 43 |     if write_wav:
 44 |         wav_write('{}_down_{}.wav'.format(filename, outrate), outrate, down_sig)
 45 | 
 46 | def librosa_downsample(filename, outrate=8000):
 47 |     y, s = librosa.load(filename, sr=8000)
 48 |     return y, s
 49 | 
 50 | def custom_downsample(filename, outrate=8000):
 51 |     (rate, sig) = wav.read(filename)
 52 |     len_in_secs = len(sig)
 53 |     secs = len_in_secs/rate # Number of seconds in signal X
 54 |     samps = secs*outrate   # Number of samples to downsample
 55 |     print(secs, samps)
 56 |     Y = scipy.signal.resample(sig , int(samps))
 57 |     return Y, outrate
 58 | 
 59 | # change total number of samps for downsampled file to n_samps by trimming or zero-padding and standardize them
 60 | def make_standard_length(filename, n_samps=240000):
 61 |     down_sig, rate = librosa_downsample(filename)
 62 |     normed_sig = librosa.util.fix_length(down_sig, n_samps)
 63 |     normed_sig = (normed_sig - np.mean(normed_sig))/np.std(normed_sig)
 64 |     return normed_sig
 65 | 
 66 | # from a folder containing wav files, normalize each, divide into num_splits-1 chunks and write the resulting np.arrays to a single matrix
 67 | def make_split_audio_array(folder, num_splits = 5):
 68 |     """
 69 |     returns numpy array of split audio for a folder
 70 |     """
 71 |     lst = []
 72 |     for filename in tqdm(os.listdir(folder))	:
 73 |         if filename.endswith('wav'):
 74 |             normed_sig = make_standard_length(filename)
 75 |             chunk = normed_sig.shape[0]/num_splits
 76 |             for i in range(num_splits - 1):
 77 |                 lst.append(normed_sig[i*chunk:(i+2)*chunk])
 78 |     lst = np.array(lst)
 79 |     lst = lst.reshape(lst.shape[0], -1)
 80 |     return lst
 81 | 
 82 | # for input wav file outputs (13, 2999) mfcc np array
 83 | def make_normed_mfcc(filename, outrate=8000):
 84 |     normed_sig = make_standard_length(filename)
 85 |     normed_mfcc_feat = mfcc(normed_sig, outrate)
 86 |     normed_mfcc_feat = normed_mfcc_feat.T
 87 |     return normed_mfcc_feat
 88 | 
 89 | # make mfcc np array from wav file using librosa package
 90 | def make_librosa_mfcc(filename):
 91 |      y, sr = librosa.load(filename)
 92 |      mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
 93 |      return mfcc_feat
 94 | 
 95 | # make mfcc np array from wav file using speech features package
 96 | def make_mfcc(filename):
 97 |     (rate, sig) = wav.read(filename)
 98 |     mfcc_feat = mfcc(sig, rate)
 99 |     mfcc_feat = mfcc_feat.T
100 |     return mfcc_feat
101 | 
102 | # for folder containing wav files, output numpy array of normed mfcc
103 | def make_class_array(folder):
104 |     lst = []
105 |     files = os.listdir(folder)
106 |     count_files = len(files)
107 |     for file_path in tqdm(files):
108 |         filename = os.path.join(folder, file_path)
109 |         lst.append(make_normed_mfcc(filename))
110 |     class_array = np.array(lst)
111 |     class_array = np.reshape(class_array, (class_array.shape[0], class_array.shape[2], class_array.shape[1]))
112 |     return class_array
113 | 
114 | # read in wav file, output (1,13) numpy array of mean mfccs for each of 13 features
115 | def make_mean_mfcc(filename):
116 |     try:
117 |         (rate, sig) = wav.read(filename)
118 |         mfcc_feat = mfcc(sig, rate)
119 |         avg_mfcc = np.mean(mfcc_feat, axis = 0)
120 |         return avg_mfcc
121 |     except:
122 |         pass
123 | 
124 | # write new csv corresponding to dataframe of given language and gender
125 | def make_df_language_gender(df, language, gender):
126 |     newdf = df.query("native_language == @language").query("sex == @gender")
127 |     newdf.to_csv('df_{}_{}.csv'.format(language, gender))
128 | 
129 | # write new directories to disk containing the male and female speakers from the most common languages
130 | def make_folders_from_csv():
131 |     top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese']
132 |     for lang in top_15_langs:
133 |         os.makedirs('{}/{}_male'.format(lang, lang))
134 |         os.makedirs('{}/{}_female'.format(lang, lang))
135 | 
136 | # copy files to the corresponding directories
137 | def copy_files_from_csv():
138 |     top_15_langs = ['english', 'spanish', 'arabic', 'mandarin', 'french', 'german', 'korean', 'russian', 'portuguese', 'dutch', 'turkish', 'italian', 'polish', 'japanese', 'vietnamese']
139 |     for lang in top_15_langs:
140 |         df_male = pd.read_csv('df_{}_male.csv'.format(lang))
141 |         df_female = pd.read_csv('df_{}_female.csv'.format(lang))
142 |         m_list = df_male['filename'].values
143 |         f_list = df_female['filename'].values
144 |         for filename in f_list:
145 |             shutil.copy2('big_langs/{}/{}.wav'.format(lang, filename), 'big_langs/{}/{}_female/{}.wav'.format(lang, lang, filename))
146 | 
147 | # input folder of wav files, output pandas dataframe of mean mfcc values
148 | def make_mean_mfcc_df(folder):
149 |     norms = []
150 |     for file_path in os.listdir(folder):
151 |         filename = os.path.join(folder, file_path)
152 |         (rate, sig) = wav.read(filename)
153 |         mfcc_feat = mfcc(sig, rate)
154 |         mean_mfcc = np.mean(mfcc_feat, axis = 0)
155 |         #mean_mfcc = np.reshape(mean_mfcc, (1,13))
156 |         norms.append(mean_mfcc)
157 |     flat = [a.ravel() for a in norms]
158 |     stacked = np.vstack(flat)
159 |     df = pd.DataFrame(stacked)
160 |     return df
161 | 


--------------------------------------------------------------------------------
/speech2vec/mp3_getter.py:
--------------------------------------------------------------------------------
  1 | import urllib
  2 | import time
  3 | import shutil
  4 | from requests import get
  5 | from bs4 import BeautifulSoup
  6 | import pandas as pd
  7 | import numpy as np
  8 | 
  9 | 
 10 | # from the accent.gmu website, pass in list of languages to scrape mp3 files and save them to disk
 11 | def mp3getter(lst):
 12 |     links = []
 13 |     for j in range(len(lst)):
 14 |         for i in range(1,lst[j][1]+1):
 15 |                 try:
 16 |                     print(" fetching record ", i, " for language ", j)
 17 |                     # urllib.request.urlretrieve("http://accent.gmu.edu/soundtracks/{0}{1}.mp3".format(lst[j][0], i), '../sounds/{0}{1}.mp3'.format(lst[j][0], i))
 18 |                     # print("http://accent.gmu.edu/soundtracks/{0}{1}.mp3".format(lst[j][0], i))
 19 |                     with open('links.txt', 'a') as f:
 20 |                         f.writelines("http://accent.gmu.edu/soundtracks/{0}{1}.mp3".format(lst[j][0], i))
 21 |                         f.writelines('\n')
 22 |                 except:
 23 |                     time.sleep(2)
 24 |         
 25 | 
 26 | # from list of languages, return urls of each language landing page
 27 | def lang_pages(lst):
 28 |     urls=[]
 29 |     for lang in lst:
 30 |         urls.append('http://accent.gmu.edu/browse_language.php?function=find&language={}'.format(lang))
 31 |     return urls
 32 | 
 33 | #output:
 34 | #
 35 | # ['http://accent.gmu.edu/browse_language.php?function=find&language=amharic',
 36 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=arabic',
 37 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=bengali',
 38 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=bulgarian',
 39 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=cantonese',
 40 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=dutch',
 41 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=english',
 42 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=farsi',
 43 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=french',
 44 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=german',
 45 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=greek',
 46 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=hindi',
 47 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=italian',
 48 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=japanese',
 49 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=korean',
 50 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=kurdish',
 51 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=macedonian',
 52 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=mandarin',
 53 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=miskito',
 54 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=nepali',
 55 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=pashto',
 56 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=polish',
 57 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=portuguese',
 58 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=punjabi',
 59 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=romanian',
 60 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=russian',
 61 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=serbian',
 62 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=spanish',
 63 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=swedish',
 64 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=tagalog',
 65 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=thai',
 66 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=turkish',
 67 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=ukrainian',
 68 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=urdu',
 69 | #  'http://accent.gmu.edu/browse_language.php?function=find&language=vietnamese']
 70 | 
 71 | # from http://accent.gmu.edu/browse_language.php, return list of languages
 72 | def get_languages():
 73 |     url = "http://accent.gmu.edu/browse_language.php"
 74 |     html = get(url)
 75 |     soup = BeautifulSoup(html.content, 'html.parser')
 76 |     languages = []
 77 |     language_lists = soup.findAll('ul', attrs={'class': 'languagelist'})
 78 |     for ul in language_lists:
 79 |         for li in ul.findAll('li'):
 80 |             languages.append(li.text)
 81 |     return languages
 82 |     
 83 | # from list of languages, return list of urls
 84 | def get_language_urls(lst):
 85 |     urls = []
 86 |     for language in lst:
 87 |         urls.append('http://accent.gmu.edu/browse_language.php?function=find&language=' + language)
 88 |     return urls
 89 | 
 90 | # from language, get the number of speakers of that language
 91 | def get_num(language):
 92 |     url = 'http://accent.gmu.edu/browse_language.php?function=find&language=' + language
 93 |     try:
 94 |         html = get(url)
 95 |         soup = BeautifulSoup(html.content, 'html.parser')
 96 |         test = soup.find_all('div', attrs={'class': 'content'})
 97 |         num = int(test[0].find('h5').text.split()[2])
 98 |     except:
 99 |         num = 0
100 |     return num
101 |     
102 | # from list of languages, return list of tuples (LANGUAGE, LANGUAGE_NUM_SPEAKERS) for mp3getter, ignoring languages
103 | # with 0 speakers
104 | def get_formatted_languages(languages):
105 |     formatted_languages = []
106 |     for language in languages:
107 |         print("processing language: ", language)
108 |         num = get_num(language)
109 |         if num != 0:
110 |             formatted_languages.append((language,num))
111 |     return formatted_languages
112 |     
113 | # from each language whose url is contained in the above list, save the number of speakers of that language to a list
114 | def get_nums(lst):
115 |     nums = []
116 |     for url in lst:
117 |         html = get(url)
118 |         soup = BeautifulSoup(html.content, 'html.parser')
119 |         test = soup.find_all('div', attrs={'class': 'content'})
120 |         nums.append(int(test[0].find('h5').text.split()[2]))
121 |     return nums
122 | 
123 | def get_speaker_info(start, stop):
124 |     '''
125 |     Inputs: two integers, corresponding to min and max speaker id number per language
126 |     Outputs: Pandas Dataframe containing speaker filename, birthplace, native_language, age, sex, age_onset of English
127 |     '''
128 | 
129 |     user_data = []
130 |     for num in range(start,stop):
131 |         info = {'speakerid': num, 'filename': 0, 'birthplace':1, 'native_language': 2, 'age':3, 'sex':4, 'age_onset':5}
132 |         url = "http://accent.gmu.edu/browse_language.php?function=detail&speakerid={}".format(num)
133 |         html = get(url)
134 |         soup = BeautifulSoup(html.content, 'html.parser')
135 |         body = soup.find_all('div', attrs={'class': 'content'})
136 |         try:
137 |             info['filename']=str(body[0].find('h5').text.split()[0])
138 |             bio_bar = soup.find_all('ul', attrs={'class':'bio'})
139 |             info['birthplace'] = str(bio_bar[0].find_all('li')[0].text)[13:-6]
140 |             info['native_language'] = str(bio_bar[0].find_all('li')[1].text.split()[2])
141 |             info['age'] = float(bio_bar[0].find_all('li')[3].text.split()[2].strip(','))
142 |             info['sex'] = str(bio_bar[0].find_all('li')[3].text.split()[3].strip())
143 |             info['age_onset'] = float(bio_bar[0].find_all('li')[4].text.split()[4].strip())
144 |             user_data.append(info)
145 |         except:
146 |             info['filename'] = ''
147 |             info['birthplace'] = ''
148 |             info['native_language'] = ''
149 |             info['age'] = ''
150 |             info['sex'] = ''
151 |             info['age_onset'] = ''
152 |             user_data.append(info)
153 |         df = pd.DataFrame(user_data)
154 |         df.to_csv('speaker_info_{}.csv'.format(stop))
155 |     return df
156 | 
157 | # copy files from one list of wav files to a specified location
158 | def copy_files(lst, path):
159 |     for filename in lst:
160 |         shutil.copy2('{}.wav'.format(filename), '{}/{}.wav'.format(path, filename))
161 | 
162 | 
163 | if __name__ == '__main__':
164 |     # lst = get_languages()
165 |     # print(len(lst))
166 |     # lert = get_formatted_languages(lst)
167 |     # print(lert)
168 |     lert = [('afrikaans', 6), ('agni', 1), ('akan', 13), ('albanian', 11), ('amazigh', 2), ('american sign language', 2), ('amharic', 23), ('anyin', 1), ('arabic', 153), ('armenian', 8), ('azerbaijani', 5), ('azerbaijani, south', 2), ('azeri turk', 2), ('bafang', 2), ('baga', 1), ('bahasa indonesia', 12), ('bai', 1), ('balant', 1), ('balanta ganja', 1), ('bamanankan', 5), ('bambara', 5), ('bamun', 1), ('bangla', 1), ('bari', 2), ('basque', 2), ('bavarian', 2), ('belarusan', 3), ('bengali', 19), ('bosnian', 12), ('bulgarian', 19), ('burmese', 2), ('cameroon creole english', 1), ('cantonese', 31), ('carolinian', 1), ('catalan', 5), ('cebuano', 1), ('chaldean', 2), ('chaldean neo aramaic', 1), ('chamorro', 1), ('chichewa', 1), ('chin, mizo', 1), ('chinese', 167), ('chittagonian', 1), ('croatian', 8), ('danish', 9), ('dari', 8), ('dholuo', 2), ('dinka', 1), ('djola', 1), ('dutch', 50), ('eastern farsi', 2), ('ebira', 1), ('edo', 1), ('english', 618), ('estonian', 17), ('ewe', 3), ('fang', 1), ('fanti', 3), ('faroese', 1), ('farsi', 30), ('fataluku', 1), ('fefe', 1), ('fijian', 3), ('filipino', 2), ('finnish', 15), ('flemish', 5), ('french', 69), ('frisian', 1), ('fulani', 1), ('fulfulde adamawa', 1), ('ga', 6), ('gan', 1), ('ganda', 3), ('garifuna', 2), ('gedeo', 1), ('georgian', 5), ('german', 38), ('greek', 15), ('gujarati', 16), ('gusii', 2), ('hadiyya', 2), ('hainanese', 1), ('haitian creole french', 7), ('hakka', 3), ('hausa', 10), ("hawai'i creole english", 2), ("hawai'ian pidgin", 2), ('hebrew', 9), ('hijazi', 17), ('hiligaynon', 2), ('hindi', 31), ('hindi-urdu', 31), ('hindko', 1), ('hmong', 2), ('hmong daw', 2), ('home sign', 1), ('hungarian', 11), ('ibibio', 3), ('icelandic', 3), ('ife', 1), ('igbo', 3), ('ikalanga', 1), ('ilonggo', 2), ('indonesian', 12), ('irish', 1), ('irish gaelic', 1), ('italian', 37), ('jamaican creole english', 4), ('japanese', 34), ('javanese', 1), ('jola', 1), ('kabyle', 1), ('kalanga', 1), ('kamba', 1), ('kambaata', 3), ('kamtok', 1), ('kannada', 9), ('kanuri', 1), ('kazakh', 4), ('kembata', 3), ('khalkha mongol', 8), ('khasonke', 1), ('khmer', 7), ('kikongo', 2), ('kikuyu', 5), ('kinyarwanda', 1), ('kirghiz', 3), ('kirundi', 1), ('kisii', 2), ('kiswahili', 11), ('klao', 1), ('kongo', 2), ('konkani', 3), ('korean', 90), ('krio', 6), ('kru', 1), ('kurdi', 4), ('kurdish', 10), ('kurmanji', 1), ('kyrgyz', 3), ('lamaholot', 1), ('lamotrekese', 1), ('lao', 3), ('latvian', 3), ('liberian english', 2), ('liberian pidgin english', 2), ('lingala', 1), ('lithuanian', 7), ('luba-kasai', 1), ('luganda', 3), ('luo', 3), ('luxembourgeois', 1), ('macedonian', 26), ('malagasy', 1), ('malay', 5), ('malayalam', 7), ('maltese', 2), ('mancagne', 1), ('mandarin', 115), ('mandingo', 1), ('mandingue', 1), ('mandinka', 1), ('maninkakan', 1), ('mankanya', 1), ('manual communication', 1), ('marathi', 9), ('mauritian', 3), ('mende', 3), ('miskito', 11), ('mizo', 1), ('moba', 1), ('mongolian', 9), ('montenegrin', 1), ('moore', 1), ('morisyen', 2), ('mortlockese', 1), ('najdi', 26), ('nama', 1), ('nandi', 1), ('naxi', 1), ('ndebele', 1), ('nepali', 14), ('newar', 1), ('newari', 1), ('ngemba', 2), ('nicaragua creole english', 4), ('northern sotho', 1), ('norwegian', 7), ('nuer', 1), ('nyanja', 1), ('omani arabic', 1), ('oriya', 2), ('oromo', 3), ('ossetic', 1), ('pahari', 2), ('panjabi', 12), ('papiamentu', 2), ('pashto', 10), ('patois', 4), ('persian', 27), ('pidgin english', 1), ('pohnpeian', 1), ('polish', 39), ('poonchi', 1), ('portuguese', 60), ('pulaar', 3), ('punjabi', 12), ('quechua', 2), ('romanian', 23), ('rotuman', 2), ('rundi', 1), ('russian', 76), ('rwanda', 1), ("sa'a", 1), ('sardinian', 1), ('sarua', 1), ('satawalese', 2), ('sepedi', 1), ('serbian', 19), ('serer', 1), ('serer sine', 1), ('sesotho', 1), ('setswana', 2), ('shan', 1), ('shilluk', 1), ('shona', 2), ('sicilian', 1), ('sindhi', 1), ('sinhala', 7), ('sinhalese', 7), ('slovak', 6), ('slovenian', 2), ('somali', 7), ('sotho', 1), ('spanish', 206), ('sunda', 1), ('sundanese', 1), ('susu', 1), ('swahili', 11), ('swedish', 22), ('swiss german', 7), ('sylheti', 1), ('synthesized', 4), ('tagalog', 24), ('taishan', 1), ('taiwanese', 9), ('tajiki', 14), ('tamajeq', 2), ('tamazight', 1), ('tamil', 13), ('tatar', 1), ('telugu', 13), ('temne', 1), ('teochew', 1), ('tetum', 1), ('tetun-dili', 1), ('thai', 20), ('tibetan', 4), ('tigre', 1), ('tigrigna', 9), ('tok pisin', 1), ('tshiluba', 1), ('tswana', 2), ('turkish', 38), ('turkmen', 2), ('twi', 9), ('ukrainian', 11), ('urdu', 25), ('uyghur', 5), ('uzbek', 5), ('vietnamese', 27), ('vlaams', 4), ('voro', 3), ('wali', 1), ('woleaian', 1), ('wolof', 6), ('wu', 3), ('xasonga', 1), ('xiang', 4), ('yakut', 1), ('yapese', 1), ('yiddish', 5), ('yoruba', 5), ('yue', 1), ('zulu', 1)]
169 | 
170 |     mp3getter(lert)


--------------------------------------------------------------------------------