├── .gitignore ├── LICENSE ├── README.md ├── config ├── GLU │ ├── 0Pool │ │ ├── crnn_avgpool.yaml │ │ ├── crnn_convpool.yaml │ │ ├── crnn_gatedpooling_layer.yaml │ │ ├── crnn_lppool.yaml │ │ ├── crnn_maxpool.yaml │ │ ├── crnn_meanmax_alpha.yaml │ │ └── crnn_meanmaxpool.yaml │ ├── 16Pool │ │ ├── crnn_avgpool.yaml │ │ ├── crnn_convpool.yaml │ │ ├── crnn_gatedpooling_layer.yaml │ │ ├── crnn_lppool.yaml │ │ ├── crnn_maxpool.yaml │ │ ├── crnn_meanmax_alpha.yaml │ │ └── crnn_meanmaxpool.yaml │ ├── 2Pool │ │ ├── crnn_avgpool.yaml │ │ ├── crnn_convpool.yaml │ │ ├── crnn_gatedpooling_layer.yaml │ │ ├── crnn_lppool.yaml │ │ ├── crnn_maxpool.yaml │ │ ├── crnn_meanmax_alpha.yaml │ │ └── crnn_meanmaxpool.yaml │ ├── 4Pool │ │ ├── crnn_avgpool.yaml │ │ ├── crnn_convpool.yaml │ │ ├── crnn_gatedpooling_layer.yaml │ │ ├── crnn_lppool.yaml │ │ ├── crnn_maxpool.yaml │ │ ├── crnn_meanmax_alpha.yaml │ │ └── crnn_meanmaxpool.yaml │ └── 8Pool │ │ ├── crnn_avgpool.yaml │ │ ├── crnn_convpool.yaml │ │ ├── crnn_gatedpooling_layer.yaml │ │ ├── crnn_lppool.yaml │ │ ├── crnn_maxpool.yaml │ │ ├── crnn_meanmax_alpha.yaml │ │ └── crnn_meanmaxpool.yaml └── ReLU │ ├── 0Pool │ ├── crnn_avgpool.yaml │ ├── crnn_convpool.yaml │ ├── crnn_gatedpooling_layer.yaml │ ├── crnn_lppool.yaml │ ├── crnn_maxpool.yaml │ ├── crnn_meanmax_alpha.yaml │ └── crnn_meanmaxpool.yaml │ ├── 16Pool │ ├── crnn_avgpool.yaml │ ├── crnn_convpool.yaml │ ├── crnn_gatedpooling_layer.yaml │ ├── crnn_lppool.yaml │ ├── crnn_maxpool.yaml │ ├── crnn_meanmax_alpha.yaml │ └── crnn_meanmaxpool.yaml │ ├── 2Pool │ ├── crnn_avgpool.yaml │ ├── crnn_convpool.yaml │ ├── crnn_gatedpooling_layer.yaml │ ├── crnn_lppool.yaml │ ├── crnn_maxpool.yaml │ ├── crnn_meanmax_alpha.yaml │ └── crnn_meanmaxpool.yaml │ ├── 4Pool │ ├── crnn_avgpool.yaml │ ├── crnn_convpool.yaml │ ├── crnn_gatedpooling_layer.yaml │ ├── crnn_lppool.yaml │ ├── crnn_maxpool.yaml │ ├── crnn_meanmax_alpha.yaml │ └── crnn_meanmaxpool.yaml │ └── 8Pool │ ├── crnn_avgpool.yaml │ ├── crnn_convpool.yaml │ ├── crnn_gatedpooling_layer.yaml │ ├── crnn_lppool.yaml │ ├── crnn_maxpool.yaml │ ├── crnn_meanmax_alpha.yaml │ └── crnn_meanmaxpool.yaml ├── dataset.py ├── feature_extract └── extract_lms.py ├── losses.py ├── merge_csv.py ├── models.py ├── pooling.py ├── print_results.py ├── requirements.txt ├── run.py └── thresholding.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Duration robust sound event detection 2 | 3 | This Repo implements the recent work for ICASSP2020 seen on [arxiv]( 4 | http://arxiv.org/abs/1904.03841) and [IEEExplore](https://ieeexplore.ieee.org/document/9053459/). 5 | 6 | The Pooling methods in the paper can all be found in the script `pooling.py`. 7 | 8 | Results of the paper on the development set (measured in F1 score) are: 9 | 10 | | pooltype | 0 | 2 | 4 | 8 | 16 | 11 | |--------------------------|-------|-------|-------|-------|-------| 12 | | AvgPool2d | 30.82 | 31.58 | - | 35.15 | 22.21 | 13 | | ConvPool | - | 23.04 | 32.05 | 24.8 | 16.39 | 14 | | LPPool2d | 28.82 | 32.3 | 35.34 | 33.14 | 21.97 | 15 | | MeanMaxPooling | 30.35 | 35.64 | 27.98 | 31.15 | 20.11 | 16 | | MixedPooling_learn_alpha | 23.22 | 36 | 32.92 | 31.76 | 24.39 | 17 | 18 | 19 | And on the evaluation set: 20 | 21 | | pooltype | 0 | 2 | 4 | 8 | 16 | 22 | |--------------------------|-------|-------|-------|-------|-------| 23 | | AvgPool2d | 26.59 | 25.85 | - | 31.27 | 22.14 | 24 | | ConvPool | - | 19.95 | 22.46 | 21.13 | 17.07 | 25 | | LPPool2d | 23.29 | 27.46 | 30.81 | 28 | 21.65 | 26 | | MaxPool2d | 21.98 | 26.01 | 29.74 | 26.16 | 21.5 | 27 | | MeanMaxPooling | 24.72 | 29.8 | 25.14 | 28.2 | 21.83 | 28 | | MixedPooling_learn_alpha | 20.13 | 27.93 | 30.72 | 27.54 | 23 | 29 | 30 | 31 | Each value in the row section represents the poolingfactor of the network (e.g., how many $2$ subsampling pools were done in the time-domain) 32 | 33 | # Requirements 34 | 35 | Please see the `requirements.txt` file. Simply install via `pip install -r requirements.txt` or use a conda environment. 36 | 37 | Packages are: 38 | 39 | ``` 40 | librosa==0.6.2 41 | tqdm==4.24.0 42 | fire==0.1.3 43 | sed_eval==0.2.1 44 | tableprint==0.8.0 45 | dcase_util==0.2.5 46 | kaldi_io==0.9.1 47 | tabulate==0.8.2 48 | pandas==0.24.1 49 | scipy==1.2.1 50 | torchnet==0.0.4 51 | torch==0.4.1.post2 52 | numpy==1.16.2 53 | scikit_learn==0.20.3 54 | PyYAML==5.1 55 | ``` 56 | 57 | 58 | Specifically, we use [Kaldi](https://github.com/kaldi-asr/kaldi) as our data format and data processing tool. 59 | 60 | ## Dataset 61 | 62 | The data can be downloaded from the [official dcase2018](https://github.com/DCASE-REPO/dcase2018_baseline) repository. The script can be found in `task4/dataset/download_data.py`. 63 | 64 | After successfully downloading the data, please generate a `.scp` file from the dataset, by running something around: 65 | 66 | ```bash 67 | for settype in audio/*; do 68 | find audio/${settype} -type f -name '*.wav' | awk -F/ '{print $NF,$0}' > ${settype}.scp 69 | done 70 | ``` 71 | 72 | Features can then be extracted with the script `feature_extract/extract_lms.py`. 73 | We recommend putting all the feature files into a dir e.g., `features/logmel_64/weak.ark`, since our defaults for training search for this specific dir. Defaults can be changed by simply passing a `--features` flag. 74 | After creating the required `.scp` files (at least `weak.scp` and `test.scp`), simply run: 75 | 76 | ```bash 77 | for i in weak test; do 78 | python feature_extract/extract_lms.py ${i}.scp features/logmel_64/${i}.ark 79 | done 80 | ``` 81 | 82 | Lastly, just softlink the `metadata` directory (given by the challenge) into the current directory. 83 | 84 | # Running the code 85 | 86 | The meat of the code is in the `run.py` script. Here, [google-fire](https://github.com/google/python-fire) is used in order to access most of the functions from the command line. 87 | As one can see in the bottom of `run.py`, the following functions can be utilized: 88 | 89 | * train: Trains a CRNN model given a configuration. 90 | * test: Evaluates a given trained model and a given feature set ( by default development features and development labels) using standard median filtering 91 | * test_double: Evaluates a given trained model and a given feature set ( by default development features and development labels) using the double threshold method 92 | * stats: A helper script to analyze the per class statistics on the training set 93 | * traintest: Just a combination of train + test. Prints results in the cmdline and into a file. 94 | * traintestindomain: Combination of training and evaluating the model on the original training set + reestimating new labels from the indomain dataset and rerunning training + evaluation 95 | * runtests: Runs development as well as evaluation tests ( just convenience function ) 96 | * calcthres: A dynamic threshold algorithm (not used in this work) 97 | 98 | Most training function can be tweaked on the fly by adding some parameter in `fire` fashion before training. 99 | If one e.g., wants to change the poolingfunction for a specific experiment, just pass `--poolingfunction mean` in order to use mean pooling. 100 | Other arguments which are passed to objects ending with `_args` can be passed in dict faction e.g, `--model_args '{"bidirectional":False, "filters":[1,1,1,1,1]}'`. 101 | 102 | All configurations for all experiments can be seen in `config/`. Mainly all configs only differ in their pooling function and their subsampling factor `P`. 103 | 104 | # Citation 105 | 106 | ``` 107 | @inproceedings{Dinkel2020, 108 | archivePrefix = {arXiv}, 109 | arxivId = {1904.03841}, 110 | author = {Dinkel, Heinrich and Yu, Kai}, 111 | journal = {ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 112 | doi = {10.1109/ICASSP40776.2020.9053459}, 113 | eprint = {1904.03841}, 114 | isbn = {978-1-5090-6631-5}, 115 | month = {may}, 116 | pages = {311--315}, 117 | publisher = {IEEE}, 118 | title = {{Duration robust weakly supervised sound event detection}}, 119 | url = {http://arxiv.org/abs/1904.03841}, 120 | year = {2019} 121 | } 122 | ``` 123 | 124 | -------------------------------------------------------------------------------- /config/GLU/0Pool/crnn_avgpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: AvgPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/AvgPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/0Pool/crnn_convpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: ConvolutionPool 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/ConvPool_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/0Pool/crnn_gatedpooling_layer.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: GatedPooling1 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/GatedPooling1_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/0Pool/crnn_lppool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: LPPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/LPPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/0Pool/crnn_maxpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: MaxPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MaxPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/0Pool/crnn_meanmax_alpha.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: MixedPooling_learn_alpha 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MixedPooling_learn_alpha_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/0Pool/crnn_meanmaxpool.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: MeanMaxPooling 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MeanMaxPooling_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/16Pool/crnn_avgpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: AvgPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/AvgPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/16Pool/crnn_convpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: ConvolutionPool 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/ConvPool_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/16Pool/crnn_gatedpooling_layer.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: GatedPooling1 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/GatedPooling1_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/16Pool/crnn_lppool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: LPPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/LPPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/16Pool/crnn_maxpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: MaxPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MaxPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/16Pool/crnn_meanmax_alpha.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: MixedPooling_learn_alpha 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MixedPooling_learn_alpha_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/16Pool/crnn_meanmaxpool.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: MeanMaxPooling 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MeanMaxPooling_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/2Pool/crnn_avgpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: AvgPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/AvgPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/2Pool/crnn_convpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: ConvolutionPool 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/ConvPool_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/2Pool/crnn_gatedpooling_layer.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: GatedPooling1 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/GatedPooling1_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/2Pool/crnn_lppool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: LPPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/LPPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/2Pool/crnn_maxpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: MaxPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MaxPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/2Pool/crnn_meanmax_alpha.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: MixedPooling_learn_alpha 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MixedPooling_learn_alpha_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/2Pool/crnn_meanmaxpool.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: MeanMaxPooling 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MeanMaxPooling_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/4Pool/crnn_avgpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: AvgPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/AvgPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/4Pool/crnn_convpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: ConvolutionPool 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/ConvPool_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/4Pool/crnn_gatedpooling_layer.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: GatedPooling1 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/GatedPooling1_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/4Pool/crnn_lppool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: LPPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/LPPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/4Pool/crnn_maxpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: MaxPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MaxPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/4Pool/crnn_meanmax_alpha.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: MixedPooling_learn_alpha 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MixedPooling_learn_alpha_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/4Pool/crnn_meanmaxpool.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: MeanMaxPooling 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MeanMaxPooling_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/8Pool/crnn_avgpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: AvgPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/AvgPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/8Pool/crnn_convpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: ConvolutionPool 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/ConvPool_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/8Pool/crnn_gatedpooling_layer.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: GatedPooling1 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/GatedPooling1_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/8Pool/crnn_lppool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: LPPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/LPPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/8Pool/crnn_maxpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: MaxPool2d 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MaxPool2d_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/8Pool/crnn_meanmax_alpha.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: MixedPooling_learn_alpha 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MixedPooling_learn_alpha_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/GLU/8Pool/crnn_meanmaxpool.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: MeanMaxPooling 9 | activation: GLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MeanMaxPooling_GLU 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/0Pool/crnn_avgpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: AvgPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/AvgPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/0Pool/crnn_convpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: ConvolutionPool 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/ConvPool 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/0Pool/crnn_gatedpooling_layer.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: GatedPooling1 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/GatedPooling1 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/0Pool/crnn_lppool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: LPPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/LPPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/0Pool/crnn_maxpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: MaxPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MaxPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/0Pool/crnn_meanmax_alpha.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: MixedPooling_learn_alpha 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MixedPooling_learn_alpha 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/0Pool/crnn_meanmaxpool.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [[1,2],[1,2],[1,2],[1,2]] 8 | pooltype: MeanMaxPooling 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MeanMaxPooling 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/16Pool/crnn_avgpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: AvgPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/AvgPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/16Pool/crnn_convpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: ConvolutionPool 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/ConvPool 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/16Pool/crnn_gatedpooling_layer.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: GatedPooling1 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/GatedPooling1 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/16Pool/crnn_lppool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: LPPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/LPPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/16Pool/crnn_maxpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: MaxPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MaxPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/16Pool/crnn_meanmax_alpha.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: MixedPooling_learn_alpha 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MixedPooling_learn_alpha 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/16Pool/crnn_meanmaxpool.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,2] 8 | pooltype: MeanMaxPooling 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MeanMaxPooling 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/2Pool/crnn_avgpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: AvgPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/AvgPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/2Pool/crnn_convpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: ConvolutionPool 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/ConvPool 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/2Pool/crnn_gatedpooling_layer.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: GatedPooling1 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/GatedPooling1 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/2Pool/crnn_lppool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: LPPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/LPPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/2Pool/crnn_maxpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: MaxPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MaxPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/2Pool/crnn_meanmax_alpha.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: MixedPooling_learn_alpha 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MixedPooling_learn_alpha 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/2Pool/crnn_meanmaxpool.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,[1,2],[1,2],[1,2]] 8 | pooltype: MeanMaxPooling 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MeanMaxPooling 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/4Pool/crnn_avgpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: AvgPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/AvgPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/4Pool/crnn_convpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: ConvolutionPool 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/ConvPool 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/4Pool/crnn_gatedpooling_layer.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: GatedPooling1 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/GatedPooling1 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/4Pool/crnn_lppool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: LPPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/LPPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/4Pool/crnn_maxpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: MaxPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MaxPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/4Pool/crnn_meanmax_alpha.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: MixedPooling_learn_alpha 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MixedPooling_learn_alpha 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/4Pool/crnn_meanmaxpool.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,[1,2],[1,2]] 8 | pooltype: MeanMaxPooling 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MeanMaxPooling 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/8Pool/crnn_avgpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: AvgPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/AvgPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/8Pool/crnn_convpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: ConvolutionPool 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/ConvPool 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/8Pool/crnn_gatedpooling_layer.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: GatedPooling1 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/GatedPooling1 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/8Pool/crnn_lppool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: LPPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/LPPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/8Pool/crnn_maxpool.yaml: -------------------------------------------------------------------------------- 1 | model: CRNN 2 | features: features/logmel_64/weak.ark 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: MaxPool2d 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MaxPool2d 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/8Pool/crnn_meanmax_alpha.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: MixedPooling_learn_alpha 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MixedPooling_learn_alpha 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /config/ReLU/8Pool/crnn_meanmaxpool.yaml: -------------------------------------------------------------------------------- 1 | features: features/logmel_64/weak.ark 2 | model: CRNN 3 | model_args: 4 | bidirectional: True 5 | filtersizes: [3,3,3,3,3] 6 | filter: [16,32,128,128,128] 7 | pooling: [2,2,2,[1,2]] 8 | pooltype: MeanMaxPooling 9 | activation: ReLU 10 | labels: metadata/train/weak.csv 11 | outputpath: experiments/MeanMaxPooling 12 | dataloader_args: 13 | batch_size: 32 14 | num_workers: 1 15 | percent: 90 16 | over_sample_factor: 1 # N times the usual dataset ( replacement ) 17 | optimizer: Adam 18 | optimizer_args: 19 | lr: 0.001 20 | betas: [0.9, 0.999] 21 | eps: 0.00000001 22 | weight_decay: 0.0 23 | epochs: 300 24 | feature_args: 25 | cmvn: False 26 | delta: False 27 | splice: False 28 | scaler: StandardScaler # Can be any of sklearn.preprocessing that supports fit_partial 29 | scaler_args: 30 | with_std : True 31 | with_mean : True 32 | improvecriterion: loss # Can also be acc | loss 33 | saveinterval: 10 #Save every 5 epochs to see what happened 34 | poolingfunction: linear 35 | loss: BCELoss # Can also be FocalLoss 36 | # BCELoss has no args, otherwise FocalLoss can have 'alpha' and 'gamma' 37 | loss_args: {} 38 | scheduler: ReduceLROnPlateau 39 | scheduler_args: 40 | mode: min 41 | factor: 0.1 42 | patience: 10 43 | cooldown: 1 44 | verbose: False 45 | threshold: 0.001 46 | -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: richman 3 | # @Date: 2018-01-18 10:28:31 4 | # @Last Modified by: richman 5 | # @Last Modified time: 2018-04-11 6 | import kaldi_io 7 | import numpy as np 8 | import torch 9 | from sklearn.model_selection import train_test_split 10 | from torch.utils.data import Sampler, WeightedRandomSampler 11 | from torch.utils import data 12 | 13 | 14 | class ListDataset(torch.utils.data.Dataset): 15 | """Dataset wrapping List. 16 | 17 | Each sample will be retrieved by indexing List along the first dimension. 18 | 19 | Arguments: 20 | *lists (List): List that have the same size of the first dimension. 21 | """ 22 | 23 | def __init__(self, *lists): 24 | assert all(len(lists[0]) == len(a_list) for a_list in lists) 25 | self.lists = lists 26 | 27 | def __getitem__(self, index): 28 | return tuple(a_list[index] for a_list in self.lists) 29 | 30 | def __len__(self): 31 | return len(self.lists[0]) 32 | 33 | 34 | def seq_collate_fn(data_batches): 35 | """seq_collate_fn 36 | 37 | Helper function for torch.utils.data.Dataloader 38 | 39 | :param data_batches: iterateable 40 | """ 41 | data_batches.sort(key=lambda x: len(x[0]), reverse=True) 42 | 43 | def merge_seq(dataseq, dim=0): 44 | lengths = [seq.shape for seq in dataseq] 45 | # Assuming duration is given in the first dimension of each sequence 46 | maxlengths = tuple(np.max(lengths, axis=dim)) 47 | 48 | # For the case that the lenthts are 2dimensional 49 | lengths = np.array(lengths)[:, dim] 50 | # batch_mean = np.mean(np.concatenate(dataseq),axis=0, keepdims=True) 51 | # padded = np.tile(batch_mean, (len(dataseq), maxlengths[0], 1)) 52 | padded = np.zeros((len(dataseq),) + maxlengths) 53 | for i, seq in enumerate(dataseq): 54 | end = lengths[i] 55 | padded[i, :end] = seq[:end] 56 | return padded, lengths 57 | features, targets = zip(*data_batches) 58 | features_seq, feature_lengths = merge_seq(features) 59 | return torch.from_numpy(features_seq), torch.tensor(targets) 60 | 61 | 62 | def create_dataloader_train_cv( 63 | kaldi_string, utt_labels, transform=None, 64 | batch_size: int = 16, num_workers: int = 1, percent: float = 90, 65 | over_sample_factor: int = 1, 66 | ): 67 | def valid_feat(item): 68 | """valid_feat 69 | Checks if feature is in labels 70 | 71 | :param item: key value pair from read_mat_ark 72 | """ 73 | return item[0] in utt_labels 74 | 75 | features = [] 76 | labels = [] 77 | # Directly filter out all utterances without labels 78 | for idx, (k, feat) in enumerate(filter(valid_feat, kaldi_io.read_mat_ark(kaldi_string))): 79 | if transform: 80 | feat = transform(feat) 81 | features.append(feat) 82 | labels.append(utt_labels[k]) 83 | assert len(features) > 0, "No features were found, are the labels correct?" 84 | 85 | assert percent > 0 and percent <= 100, "Percentage needs to be 0
4 76 | traindatasetname = "Train" if not has_indomain else root_split_path[4] 77 | time_pooling_depth = lookforpoolinginfile( 78 | os.path.join(root, args.search)) 79 | 80 | summary.append( 81 | { 82 | "pooltype": pooling_type, 83 | "poolfactor": time_pooling_depth, 84 | "path": root, 85 | "traindataset": traindatasetname, 86 | "f1_macro": f1_macro, 87 | "f1_micro": f1_micro, 88 | "err_macro": error_macro, 89 | "utt_pre": utt_pre, 90 | "utt_re": utt_recall, 91 | "utt_f1": utt_f1, 92 | "onset_pre": onset_pre, 93 | "onset_rec": onset_recall, 94 | "onset_f1": onset_f1, 95 | "offset_pre": offset_pre, 96 | "offset_rec": offset_recall, 97 | "offset_f1": offset_f1, 98 | "onoff_f1": onoff_f1, 99 | "onoff_pre": onoff_pre, 100 | "onoff_rec": onoff_recall, 101 | "alarm_bell_f1": alarm_bell_f1, 102 | "blender_f1": blender_f1, 103 | "cat_f1": cat_f1, 104 | "dishes_f1": dishes_f1, 105 | "dog_f1": dog_f1, 106 | "elec_f1": elec_f1, 107 | "fry_f1": fry_f1, 108 | "water_f1": water_f1, 109 | "speech_f1": speech_f1, 110 | "vac_f1": vac_f1, 111 | } 112 | ) 113 | 114 | assert len(summary)>0, "Nothing found in search for [{}]".format(args.target) 115 | summary = pd.DataFrame(summary).sort_values('f1_macro', ascending=False) 116 | if args.output: 117 | summary.to_csv(args.output, index=False) 118 | print(tabulate(summary, headers='keys', tablefmt='psql')) 119 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | librosa==0.6.2 2 | tqdm==4.24.0 3 | fire==0.1.3 4 | sed_eval==0.2.1 5 | tableprint==0.8.0 6 | dcase_util==0.2.5 7 | kaldi_io==0.9.1 8 | tabulate==0.8.2 9 | pandas==0.24.1 10 | scipy==1.2.1 11 | torchnet==0.0.4 12 | torch==0.4.1.post2 13 | numpy==1.16.2 14 | scikit_learn==0.20.3 15 | PyYAML==5.4 16 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | #!/usr/bin/env python3 3 | import datetime 4 | import torch 5 | from pprint import pformat 6 | import models 7 | from dataset import create_dataloader_train_cv 8 | import fire 9 | import losses 10 | import logging 11 | import pandas as pd 12 | import kaldi_io 13 | import yaml 14 | import os 15 | import numpy as np 16 | from dcase_util.data import ManyHotEncoder, ProbabilityEncoder 17 | from sklearn import metrics 18 | import tableprint as tp 19 | import sklearn.preprocessing as pre 20 | import torchnet as tnt 21 | import sed_eval 22 | from torch._six import container_abcs 23 | from itertools import repeat 24 | 25 | 26 | class AUCMeter(object): 27 | def __init__(self): 28 | self.reset() 29 | 30 | def reset(self): 31 | self.outputs = [] 32 | self.targets = [] 33 | 34 | def add(self, outputs, targets): 35 | outputs, targets = np.atleast_2d( 36 | outputs.cpu().numpy(), targets.cpu().numpy()) 37 | self.outputs.append(outputs) 38 | self.targets.append(targets) 39 | 40 | def value(self): 41 | return metrics.roc_auc_score( 42 | np.concatenate(self.targets, axis=0), 43 | np.concatenate(self.outputs, axis=0), 44 | average='macro') 45 | 46 | 47 | class BinarySimilarMeter(object): 48 | """Only counts ones, does not consider zeros as being correct""" 49 | 50 | def __init__(self, sigmoid_output=False): 51 | super(BinarySimilarMeter, self).__init__() 52 | self.sigmoid_output = sigmoid_output 53 | self.reset() 54 | 55 | def reset(self): 56 | self.correct = 0 57 | self.n = 0 58 | 59 | def add(self, output, target): 60 | if self.sigmoid_output: 61 | output = torch.sigmoid(output) 62 | output = output.round() 63 | self.correct += np.sum(np.logical_and(output, target).numpy()) 64 | self.n += (target == 1).nonzero().shape[0] 65 | 66 | def value(self): 67 | if self.n == 0: 68 | return 0 69 | return (self.correct / self.n) * 100. 70 | 71 | 72 | class BinaryAccuracyMeter(object): 73 | """Counts all outputs, including zero""" 74 | 75 | def __init__(self, sigmoid_output=False): 76 | super(BinaryAccuracyMeter, self).__init__() 77 | self.sigmoid_output = sigmoid_output 78 | self.reset() 79 | 80 | def reset(self): 81 | self.correct = 0 82 | self.n = 0 83 | 84 | def add(self, output, target): 85 | if self.sigmoid_output: 86 | output = torch.sigmoid(output) 87 | output = output.round() 88 | self.correct += int((output == target).sum()) 89 | self.n += np.prod(output.shape) 90 | 91 | def value(self): 92 | if self.n == 0: 93 | return 0 94 | return (self.correct / self.n) * 100. 95 | 96 | 97 | def parsecopyfeats(feat, cmvn=False, delta=False, splice=None): 98 | outstr = "copy-feats ark:{} ark:- |".format(feat) 99 | if cmvn: 100 | outstr += "apply-cmvn-sliding --center ark:- ark:- |" 101 | if delta: 102 | outstr += "add-deltas ark:- ark:- |" 103 | if splice and splice > 0: 104 | outstr += "splice-feats --left-context={} --right-context={} ark:- ark:- |".format( 105 | splice, splice) 106 | return outstr 107 | 108 | 109 | def runepoch(dataloader, model, criterion, optimizer=None, dotrain=True, poolfun=lambda x, d: x.mean(d)): 110 | model = model.train() if dotrain else model.eval() 111 | # By default use average pooling 112 | utt_loss_meter = tnt.meter.AverageValueMeter() 113 | utt_acc_meter = BinarySimilarMeter() 114 | auc_meter = AUCMeter() 115 | with torch.set_grad_enabled(dotrain): 116 | for i, (features, utt_targets) in enumerate(dataloader): 117 | features = features.float().to(device) 118 | # Might be a bit taxing on the GPU to put all 500 * 10 labels there 119 | utt_targets = utt_targets.float().cpu() 120 | outputs = torch.sigmoid(model(features)).cpu() 121 | pooled_prob = poolfun(outputs, 1) 122 | loss = criterion(pooled_prob, utt_targets) 123 | utt_loss_meter.add(loss.item()) 124 | auc_meter.add(pooled_prob.data, utt_targets.data) 125 | utt_acc_meter.add(pooled_prob.data, utt_targets.data) 126 | if dotrain: 127 | optimizer.zero_grad() 128 | loss.backward() 129 | optimizer.step() 130 | 131 | return utt_loss_meter.value(), utt_acc_meter.value(), auc_meter.value() 132 | 133 | 134 | def genlogger(outdir, fname): 135 | formatter = logging.Formatter( 136 | "[ %(levelname)s : %(asctime)s ] - %(message)s") 137 | logging.basicConfig( 138 | level=logging.DEBUG, 139 | format="[ %(levelname)s : %(asctime)s ] - %(message)s") 140 | logger = logging.getLogger("Pyobj, f") 141 | # Dump log to file 142 | fh = logging.FileHandler(os.path.join(outdir, fname)) 143 | fh.setFormatter(formatter) 144 | logger.addHandler(fh) 145 | return logger 146 | 147 | 148 | def parse_config_or_kwargs(config_file, **kwargs): 149 | with open(config_file) as con_read: 150 | yaml_config = yaml.load(con_read) 151 | # passed kwargs will override yaml config 152 | for key in kwargs.keys(): 153 | assert key in yaml_config, "Parameter {} invalid!".format(key) 154 | return dict(yaml_config, **kwargs) 155 | 156 | 157 | def criterion_improver(mode): 158 | """Returns a function to ascertain if criterion did improve 159 | 160 | :mode: can be ether 'loss' or 'acc' 161 | :returns: function that can be called, function returns true if criterion improved 162 | 163 | """ 164 | assert mode in ('loss', 'acc') 165 | best_value = np.inf if mode == 'loss' else 0 166 | 167 | def comparator(x, best_x): 168 | return x < best_x if mode == 'loss' else x > best_x 169 | 170 | def inner(x): 171 | # rebind parent scope variable 172 | nonlocal best_value 173 | if comparator(x, best_value): 174 | best_value = x 175 | return True 176 | return False 177 | return inner 178 | 179 | 180 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 181 | torch.manual_seed(1) 182 | if device == 'cuda': 183 | torch.cuda.manual_seed_all(1) 184 | 185 | 186 | def main(config='config/ReLU/0Pool/crnn_maxpool.yaml', **kwargs): 187 | """Trains a model on the given features and vocab. 188 | 189 | :features: str: Input features. Needs to be kaldi formatted file 190 | :config: A training configuration. Note that all parameters in the config can also be manually adjusted with --ARG=VALUE 191 | :returns: None 192 | """ 193 | 194 | config_parameters = parse_config_or_kwargs(config, **kwargs) 195 | outputdir = os.path.join( 196 | config_parameters['outputpath'], 197 | config_parameters['model'], 198 | datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%f')) 199 | try: 200 | os.makedirs(outputdir) 201 | except IOError: 202 | pass 203 | logger = genlogger(outputdir, 'train.log') 204 | logger.info("Storing data at: {}".format(outputdir)) 205 | logger.info("<== Passed Arguments ==>") 206 | # Print arguments into logs 207 | for line in pformat(config_parameters).split('\n'): 208 | logger.info(line) 209 | 210 | kaldi_string = parsecopyfeats( 211 | config_parameters['features'], **config_parameters['feature_args']) 212 | 213 | scaler = getattr( 214 | pre, config_parameters['scaler'])( 215 | **config_parameters['scaler_args']) 216 | inputdim = -1 217 | logger.info( 218 | "<== Estimating Scaler ({}) ==>".format( 219 | scaler.__class__.__name__)) 220 | for kid, feat in kaldi_io.read_mat_ark(kaldi_string): 221 | scaler.partial_fit(feat) 222 | inputdim = feat.shape[-1] 223 | assert inputdim > 0, "Reading inputstream failed" 224 | logger.info( 225 | "Features: {} Input dimension: {}".format( 226 | config_parameters['features'], 227 | inputdim)) 228 | logger.info("<== Labels ==>") 229 | label_df = pd.read_csv(config_parameters['labels'], sep='\t') 230 | label_df.event_labels = label_df.event_labels.str.split(',') 231 | label_df = label_df.set_index('filename') 232 | uniquelabels = list(np.unique( 233 | [item 234 | for row in label_df.event_labels.values 235 | for item in row])) 236 | many_hot_encoder = ManyHotEncoder( 237 | label_list=uniquelabels, 238 | time_resolution=1 239 | ) 240 | label_df['manyhot'] = label_df['event_labels'].apply( 241 | lambda x: many_hot_encoder.encode(x, 1).data.flatten()) 242 | 243 | utt_labels = label_df.loc[:, 'manyhot'].to_dict() 244 | 245 | train_dataloader, cv_dataloader = create_dataloader_train_cv( 246 | kaldi_string, 247 | utt_labels, 248 | transform=scaler.transform, 249 | **config_parameters['dataloader_args']) 250 | model = getattr( 251 | models, 252 | config_parameters['model'])( 253 | inputdim=inputdim, 254 | output_size=len(uniquelabels), 255 | **config_parameters['model_args']) 256 | logger.info("<== Model ==>") 257 | for line in pformat(model).split('\n'): 258 | logger.info(line) 259 | optimizer = getattr( 260 | torch.optim, config_parameters['optimizer'])( 261 | model.parameters(), 262 | **config_parameters['optimizer_args']) 263 | 264 | scheduler = getattr( 265 | torch.optim.lr_scheduler, 266 | config_parameters['scheduler'])( 267 | optimizer, 268 | **config_parameters['scheduler_args']) 269 | criterion = getattr(losses, config_parameters['loss'])( 270 | **config_parameters['loss_args']) 271 | 272 | trainedmodelpath = os.path.join(outputdir, 'model.th') 273 | 274 | model = model.to(device) 275 | criterion_improved = criterion_improver( 276 | config_parameters['improvecriterion']) 277 | header = [ 278 | 'Epoch', 279 | 'UttLoss(T)', 280 | 'UttLoss(CV)', 281 | "UttAcc(T)", 282 | "UttAcc(CV)", 283 | "mAUC(CV)"] 284 | for line in tp.header( 285 | header, 286 | style='grid').split('\n'): 287 | logger.info(line) 288 | 289 | poolingfunction_name = config_parameters['poolingfunction'] 290 | pooling_function = parse_poolingfunction(poolingfunction_name) 291 | for epoch in range(1, config_parameters['epochs']+1): 292 | train_utt_loss_mean_std, train_utt_acc, train_auc_utt = runepoch( 293 | train_dataloader, model, criterion, optimizer, dotrain=True, poolfun=pooling_function) 294 | cv_utt_loss_mean_std, cv_utt_acc, cv_auc_utt = runepoch( 295 | cv_dataloader, model, criterion, dotrain=False, poolfun=pooling_function) 296 | logger.info( 297 | tp.row( 298 | (epoch,) + 299 | (train_utt_loss_mean_std[0], 300 | cv_utt_loss_mean_std[0], 301 | train_utt_acc, cv_utt_acc, cv_auc_utt), 302 | style='grid')) 303 | epoch_meanloss = cv_utt_loss_mean_std[0] 304 | if epoch % config_parameters['saveinterval'] == 0: 305 | torch.save({'model': model, 306 | 'scaler': scaler, 307 | 'encoder': many_hot_encoder, 308 | 'config': config_parameters}, 309 | os.path.join(outputdir, 'model_{}.th'.format(epoch))) 310 | # ReduceOnPlateau needs a value to work 311 | schedarg = epoch_meanloss if scheduler.__class__.__name__ == 'ReduceLROnPlateau' else None 312 | scheduler.step(schedarg) 313 | if criterion_improved(epoch_meanloss): 314 | torch.save({'model': model, 315 | 'scaler': scaler, 316 | 'encoder': many_hot_encoder, 317 | 'config': config_parameters}, 318 | trainedmodelpath) 319 | if optimizer.param_groups[0]['lr'] < 1e-7: 320 | break 321 | logger.info(tp.bottom(len(header), style='grid')) 322 | logger.info("Results are in: {}".format(outputdir)) 323 | return outputdir 324 | 325 | 326 | def parse_poolingfunction(poolingfunction_name='mean'): 327 | if poolingfunction_name == 'mean': 328 | def pooling_function(x, d): return x.mean(d) 329 | elif poolingfunction_name == 'max': 330 | def pooling_function(x, d): return x.max(d)[0] 331 | elif poolingfunction_name == 'linear': 332 | def pooling_function(x, d): return (x**2).sum(d) / x.sum(d) 333 | elif poolingfunction_name == 'exp': 334 | def pooling_function(x, d): return ( 335 | x.exp() * x).sum(d) / x.exp().sum(d) 336 | return pooling_function 337 | 338 | 339 | def evaluate_threshold( 340 | model_path: str, features: str = "features/logmel_64/test.ark", 341 | result_filename='dev.txt', 342 | test_labels: 343 | str = "metadata/test/test.csv", 344 | threshold=0.5, 345 | window=1, 346 | hop_size=0.02): 347 | from dcase_util.data import ProbabilityEncoder, DecisionEncoder, ManyHotEncoder 348 | from dcase_util.containers import MetaDataContainer 349 | from scipy.signal import medfilt 350 | modeldump = torch.load( 351 | model_path, 352 | map_location=lambda storage, loc: storage) 353 | model = modeldump['model'] 354 | config_parameters = modeldump['config'] 355 | scaler = modeldump['scaler'] 356 | many_hot_encoder = modeldump['encoder'] 357 | model_dirname = os.path.dirname(model_path) 358 | meta_container_resultfile = os.path.join( 359 | model_dirname, "pred_nowindow.txt") 360 | metacontainer = MetaDataContainer(filename=meta_container_resultfile) 361 | 362 | kaldi_string = parsecopyfeats( 363 | features, **config_parameters['feature_args']) 364 | model = model.to(device).eval() 365 | 366 | probability_encoder = ProbabilityEncoder() 367 | decision_encoder = DecisionEncoder( 368 | label_list=many_hot_encoder.label_list 369 | ) 370 | binarization_type = 'global_threshold' if isinstance( 371 | threshold, float) else 'class_threshold' 372 | # If class thresholds are given, then use those 373 | if isinstance(threshold, str): 374 | threshold = torch.load(threshold) 375 | windows = {k: window for k in many_hot_encoder.label_list} 376 | if isinstance(window, str): 377 | windows = torch.load(window) 378 | 379 | with torch.no_grad(): 380 | for k, feat in kaldi_io.read_mat_ark(kaldi_string): 381 | # Add batch dim 382 | feat = torch.from_numpy( 383 | scaler.transform(feat)).to(device).unsqueeze(0) 384 | feat = model(feat) 385 | probabilities = torch.sigmoid(feat).cpu().numpy().squeeze(0) 386 | frame_decisions = probability_encoder.binarization( 387 | probabilities=probabilities, 388 | binarization_type=binarization_type, 389 | threshold=threshold, 390 | time_axis=0, 391 | ) 392 | for i, label in enumerate(many_hot_encoder.label_list): 393 | label_frame_decisions = medfilt( 394 | frame_decisions[:, i], kernel_size=windows[label]) 395 | # Found only zeros, no activity, go on 396 | if (label_frame_decisions == 0).all(): 397 | continue 398 | estimated_events = decision_encoder.find_contiguous_regions( 399 | activity_array=label_frame_decisions 400 | ) 401 | for [onset, offset] in estimated_events: 402 | metacontainer.append({'event_label': label, 403 | 'onset': onset * hop_size, 404 | 'offset': offset * hop_size, 405 | 'filename': os.path.basename(k) 406 | }) 407 | metacontainer.save() 408 | estimated_event_list = MetaDataContainer().load( 409 | filename=meta_container_resultfile) 410 | reference_event_list = MetaDataContainer().load(filename=test_labels) 411 | 412 | event_based_metric = event_based_evaluation( 413 | reference_event_list, estimated_event_list) 414 | onset_scores = precision_recall_fscore_on_offset( 415 | reference_event_list, estimated_event_list, offset=False) 416 | offset_scores = precision_recall_fscore_on_offset( 417 | reference_event_list, estimated_event_list, onset=False) 418 | onset_offset_scores = precision_recall_fscore_on_offset( 419 | reference_event_list, estimated_event_list) 420 | # Utt wise Accuracy 421 | precision_labels = precision_recall_fscore_on_offset( 422 | reference_event_list, estimated_event_list, onset=False, offset=False, label=True) 423 | 424 | print(event_based_metric.__str__()) 425 | print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("UttLabel", *precision_labels)) 426 | print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("Onset", *onset_scores)) 427 | print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("Offset", *offset_scores)) 428 | print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("On-Offset", *onset_offset_scores)) 429 | 430 | result_filename = os.path.join(model_dirname, result_filename) 431 | 432 | with open(result_filename, 'w') as wp: 433 | wp.write(event_based_metric.__str__()) 434 | wp.write('\n') 435 | wp.write("{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format( 436 | "UttLabel", *precision_labels)) 437 | wp.write( 438 | "{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format("Onset", *onset_scores)) 439 | wp.write( 440 | "{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format("Offset", *offset_scores)) 441 | wp.write("{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format( 442 | "On-Offset", *onset_offset_scores)) 443 | 444 | 445 | def event_based_evaluation(reference_event_list, estimated_event_list): 446 | """ Calculate sed_eval event based metric for challenge 447 | 448 | Parameters 449 | ---------- 450 | 451 | reference_event_list : MetaDataContainer, list of referenced events 452 | 453 | estimated_event_list : MetaDataContainer, list of estimated events 454 | 455 | Return 456 | ------ 457 | 458 | event_based_metric : EventBasedMetrics 459 | 460 | """ 461 | 462 | files = {} 463 | for event in reference_event_list: 464 | files[event['filename']] = event['filename'] 465 | 466 | evaluated_files = sorted(list(files.keys())) 467 | 468 | event_based_metric = sed_eval.sound_event.EventBasedMetrics( 469 | event_label_list=reference_event_list.unique_event_labels, 470 | # evaluate_onset = False, 471 | # evaluate_offset = False, 472 | t_collar=0.200, 473 | percentage_of_length=0.2, 474 | ) 475 | 476 | for file in evaluated_files: 477 | reference_event_list_for_current_file = [] 478 | # events = [] 479 | for event in reference_event_list: 480 | if event['filename'] == file: 481 | reference_event_list_for_current_file.append(event) 482 | # events.append(event.event_label) 483 | estimated_event_list_for_current_file = [] 484 | for event in estimated_event_list: 485 | if event['filename'] == file: 486 | estimated_event_list_for_current_file.append(event) 487 | 488 | event_based_metric.evaluate( 489 | reference_event_list=reference_event_list_for_current_file, 490 | estimated_event_list=estimated_event_list_for_current_file 491 | ) 492 | 493 | return event_based_metric 494 | 495 | 496 | def precision_recall_fscore_on_offset(reference_event_list, estimated_event_list, onset=True, offset=True, label=False): 497 | files = {} 498 | for event in reference_event_list: 499 | files[event['filename']] = event['filename'] 500 | 501 | evaluated_files = sorted(list(files.keys())) 502 | overall = {'ntp': 0, 'nsys': 0, 'nref': 0} 503 | 504 | for file in evaluated_files: 505 | reference_event_list_for_current_file = [] 506 | # events = [] 507 | for event in reference_event_list: 508 | if event['filename'] == file: 509 | reference_event_list_for_current_file.append(event) 510 | # events.append(event.event_label) 511 | estimated_event_list_for_current_file = [] 512 | for event in estimated_event_list: 513 | if event['filename'] == file: 514 | estimated_event_list_for_current_file.append(event) 515 | 516 | ntp, nsys, nref = _precision_recall_fscore_on_offset( 517 | reference_event_list=reference_event_list_for_current_file, 518 | estimated_event_list=estimated_event_list_for_current_file, 519 | onset=onset, 520 | offset=offset, 521 | label=label 522 | ) 523 | overall['ntp'] += ntp 524 | overall['nsys'] += nsys 525 | overall['nref'] += nref 526 | 527 | precision = sed_eval.metric.precision( 528 | Ntp=overall['ntp'], Nsys=overall['nsys']) 529 | recall = sed_eval.metric.recall(Ntp=overall['ntp'], Nref=overall['nref']) 530 | f_score = sed_eval.metric.f_measure(precision, recall) 531 | return precision, recall, f_score 532 | 533 | 534 | def _precision_recall_fscore_on_offset(reference_event_list, estimated_event_list, onset=True, offset=True, label=False): 535 | # Evaluate only valid events 536 | import dcase_util 537 | valid_reference_event_list = dcase_util.containers.MetaDataContainer() 538 | for item in reference_event_list: 539 | if 'event_onset' in item and 'event_offset' in item and 'event_label' in item: 540 | valid_reference_event_list.append(item) 541 | 542 | elif 'onset' in item and 'offset' in item and 'event_label' in item: 543 | valid_reference_event_list.append(item) 544 | 545 | reference_event_list = valid_reference_event_list 546 | 547 | valid_estimated_event_list = dcase_util.containers.MetaDataContainer() 548 | for item in estimated_event_list: 549 | if 'event_onset' in item and 'event_offset' in item and 'event_label' in item: 550 | valid_estimated_event_list.append(item) 551 | 552 | elif 'onset' in item and 'offset' in item and 'event_label' in item: 553 | valid_estimated_event_list.append(item) 554 | 555 | estimated_event_list = valid_estimated_event_list 556 | hit_matrix = np.zeros( 557 | (len(reference_event_list), len(estimated_event_list)), dtype=bool) 558 | Nsys = len(estimated_event_list) 559 | Nref = len(reference_event_list) 560 | if label: 561 | label_hit_matrix = np.zeros( 562 | (len(reference_event_list), len(estimated_event_list)), dtype=bool) 563 | for j in range(0, len(reference_event_list)): 564 | for i in range(0, len(estimated_event_list)): 565 | label_hit_matrix[j, i] = reference_event_list[j]['event_label'] == estimated_event_list[i]['event_label'] 566 | hit_matrix = label_hit_matrix 567 | if onset: 568 | onset_hit_matrix = np.zeros( 569 | (len(reference_event_list), len(estimated_event_list)), dtype=bool) 570 | for j in range(0, len(reference_event_list)): 571 | for i in range(0, len(estimated_event_list)): 572 | onset_hit_matrix[j, i] = sed_eval.sound_event.EventBasedMetrics.validate_onset( 573 | reference_event=reference_event_list[j], 574 | estimated_event=estimated_event_list[i], 575 | t_collar=0.200 576 | ) 577 | if label: 578 | hit_matrix *= onset_hit_matrix 579 | else: 580 | hit_matrix = onset_hit_matrix 581 | if offset: 582 | offset_hit_matrix = np.zeros( 583 | (len(reference_event_list), len(estimated_event_list)), dtype=bool) 584 | for j in range(0, len(reference_event_list)): 585 | for i in range(0, len(estimated_event_list)): 586 | offset_hit_matrix[j, i] = sed_eval.sound_event.EventBasedMetrics.validate_offset( 587 | reference_event=reference_event_list[j], 588 | estimated_event=estimated_event_list[i], 589 | t_collar=0.200, 590 | percentage_of_length=0.2 591 | ) 592 | if onset: 593 | hit_matrix *= offset_hit_matrix 594 | else: 595 | hit_matrix = offset_hit_matrix 596 | 597 | hits = np.where(hit_matrix) 598 | G = {} 599 | for ref_i, est_i in zip(*hits): 600 | if est_i not in G: 601 | G[est_i] = [] 602 | 603 | G[est_i].append(ref_i) 604 | matching = sorted(sed_eval.util.event_matching.bipartite_match(G).items()) 605 | ref_correct = np.zeros(Nref, dtype=bool) 606 | sys_correct = np.zeros(Nsys, dtype=bool) 607 | for item in matching: 608 | ref_correct[item[0]] = True 609 | sys_correct[item[1]] = True 610 | 611 | Ntp = len(matching) 612 | return Ntp, Nsys, Nref 613 | 614 | 615 | def get_f_measure_by_class(outputs, nb_tags, threshold=None): 616 | TP = np.zeros(nb_tags) 617 | TN = np.zeros(nb_tags) 618 | FP = np.zeros(nb_tags) 619 | FN = np.zeros(nb_tags) 620 | 621 | binarization_type = 'global_threshold' 622 | probability_encoder = ProbabilityEncoder() 623 | threshold = 0.5 if not threshold else threshold 624 | for predictions, utt_targets in outputs: 625 | predictions = probability_encoder.binarization(predictions, 626 | binarization_type=binarization_type, 627 | threshold=threshold, 628 | time_axis=0 629 | ) 630 | TP += (predictions + utt_targets == 2).sum(axis=0) 631 | FP += (predictions - utt_targets == 1).sum(axis=0) 632 | FN += (utt_targets - predictions == 1).sum(axis=0) 633 | TN += (predictions + utt_targets == 0).sum(axis=0) 634 | 635 | macro_f_measure = np.zeros(nb_tags) 636 | mask_f_score = 2*TP + FP + FN != 0 637 | macro_f_measure[mask_f_score] = 2 * \ 638 | TP[mask_f_score] / (2*TP + FP + FN)[mask_f_score] 639 | 640 | return macro_f_measure 641 | 642 | 643 | def dynamic_threshold(model_path: str, 644 | features: str = 'features/logmel_64/weak.ark'): 645 | from tqdm import tqdm 646 | modeldump = torch.load( 647 | model_path, 648 | map_location=lambda storage, loc: storage) 649 | model = modeldump['model'] 650 | config_parameters = modeldump['config'] 651 | scaler = modeldump['scaler'] 652 | many_hot_encoder = modeldump['encoder'] 653 | model_dirname = os.path.dirname(model_path) 654 | thresholds = [] 655 | thresholds_filename = os.path.join(model_dirname, 'thresholds.th') 656 | uniquelabels = many_hot_encoder.label_list 657 | kaldi_string = parsecopyfeats( 658 | features, **config_parameters['feature_args']) 659 | label_df = pd.read_json(config_parameters['labels']) 660 | uniquelabels = list(np.unique( 661 | [item 662 | for row in label_df.event_labels.values 663 | for item in row])) 664 | label_df['manyhot'] = label_df['event_labels'].apply( 665 | lambda x: many_hot_encoder.encode(x, 1).data.flatten()) 666 | label_df['onehot'] = label_df['frame_labels'].apply( 667 | lambda row: [ 668 | many_hot_encoder.encode( 669 | [item], 670 | 1).data.flatten() if item in uniquelabels else np.zeros( 671 | len(uniquelabels)) for item in row]) 672 | 673 | frame_labels = label_df.loc[:, 'onehot'].to_dict() 674 | utt_labels = label_df.loc[:, 'manyhot'].to_dict() 675 | # No CV part 676 | dataloader, _ = create_dataloader_train_cv( 677 | kaldi_string, frame_labels, utt_labels, transform=scaler.transform, percent=100) 678 | pooling_function = parse_poolingfunction( 679 | config_parameters['poolingfunction']) 680 | model = model.eval().to(device) 681 | all_predictions = [] 682 | with torch.no_grad(): 683 | for counter, (X, frame_targets, utt_targets) in enumerate(dataloader): 684 | X = X.float().to(device) 685 | utt_targets = utt_targets.numpy() 686 | # Add sigmoid function to the output 687 | predictions = torch.sigmoid(pooling_function(model(X), 0)).cpu() 688 | if len(predictions.shape) == 3: 689 | predictions = pooling_function(predictions, 1) 690 | predictions = predictions.numpy() 691 | all_predictions.append((predictions, utt_targets)) 692 | 693 | thresholds = [0] * len(uniquelabels) 694 | max_f_measure = [-np.inf] * len(uniquelabels) 695 | # Estimate best thresholds for each class from 0 to 1 in 0.01 steps 696 | for threshold in tqdm(np.arange(0, 1, 0.01)): 697 | # Assign current threshold to each class 698 | current_thresholds = [threshold] * len(uniquelabels) 699 | 700 | # Calculate f_measures with the current thresholds 701 | macro_f_measure = get_f_measure_by_class( 702 | all_predictions, len(uniquelabels), current_thresholds) 703 | # Update thresholds for class with better f_measures 704 | for i, label in enumerate(uniquelabels): 705 | f_measure = macro_f_measure[i] 706 | if f_measure > max_f_measure[i]: 707 | max_f_measure[i] = f_measure 708 | thresholds[i] = threshold 709 | torch.save(thresholds, thresholds_filename) 710 | for i, label in enumerate(uniquelabels): 711 | print('{:30}, threshold : {}'.format( 712 | label, thresholds[i])) 713 | 714 | 715 | def _forward_model(model_path: str, features: str): 716 | modeldump = torch.load( 717 | model_path, 718 | map_location=lambda storage, loc: storage) 719 | model = modeldump['model'] 720 | config_parameters = modeldump['config'] 721 | scaler = modeldump['scaler'] 722 | many_hot_encoder = modeldump['encoder'] 723 | kaldi_string = parsecopyfeats( 724 | features, **config_parameters['feature_args']) 725 | model = model.eval().to(device) 726 | ret = {} 727 | with torch.no_grad(): 728 | for k, feat in kaldi_io.read_mat_ark(kaldi_string): 729 | # Add batch dim 730 | feat = torch.from_numpy( 731 | scaler.transform(feat)).to(device).unsqueeze(0) 732 | feat = model(feat) 733 | probabilities = torch.sigmoid(feat).cpu().numpy().squeeze(0) 734 | ret[k] = probabilities 735 | return ret, many_hot_encoder 736 | 737 | 738 | def evaluate_double_threshold( 739 | model_path: list, features: str = "features/logmel_64/test.ark", 740 | result_filename='dev_double.txt', 741 | test_labels: 742 | str = "metadata/test/test.csv", 743 | threshold=[0.75, 0.2], 744 | window=1, 745 | hop_size=0.02): 746 | 747 | from dcase_util.data import ProbabilityEncoder, ManyHotEncoder 748 | from dcase_util.containers import MetaDataContainer 749 | from thresholding import activity_detection 750 | from collections import defaultdict 751 | # Put into single list element if model_path is a single string, otherwise evaluate as fusion 752 | model_paths = model_path if type(model_path) == list else [model_path] 753 | 754 | fname_to_probabilities = defaultdict(list) 755 | for path in model_paths: 756 | model_dirname = os.path.dirname(path) 757 | meta_container_resultfile = os.path.join( 758 | model_dirname, "label_outputs_double_threshold.txt") 759 | metacontainer = MetaDataContainer(filename=meta_container_resultfile) 760 | cur_fname_to_probabilities, many_hot_encoder = _forward_model( 761 | path, features) 762 | for k, v in cur_fname_to_probabilities.items(): 763 | fname_to_probabilities[k].append(v) 764 | windows = {k: window for k in many_hot_encoder.label_list} 765 | if isinstance(window, str): 766 | windows = torch.load(window) 767 | # Average all the outputs 768 | for k, probs in fname_to_probabilities.items(): 769 | lengths = tuple(len(prob) for prob in probs) 770 | max_length = max(lengths) 771 | if len(set(lengths)) != 1: 772 | factors = (max_length / np.array(lengths)).astype(int) 773 | idxs = np.where(factors != 1)[0] 774 | for idx in idxs: 775 | probs[idx] = probs[idx].repeat( 776 | factors[idx], axis=0) 777 | left_over_pads = max_length - (factors[idx] * lengths[idx]) 778 | # In case of one array having uneven amount of frames ... pad 779 | if left_over_pads != 0: 780 | probs[idx] = np.pad( 781 | probs[idx], ((0, left_over_pads), (0, 0)), mode='reflect') 782 | # Average predictions of the models ( or just return single instance ) 783 | fname_to_probabilities[k] = np.mean(probs, axis=0) 784 | 785 | for k, probabilities in fname_to_probabilities.items(): 786 | for i, label in enumerate(many_hot_encoder.label_list): 787 | window_size = windows[label] 788 | estimated_events = activity_detection( 789 | probabilities[:, i], threshold[0], threshold[1], window_size) 790 | for [onset, offset] in estimated_events: 791 | metacontainer.append({'event_label': label, 792 | 'onset': onset * hop_size, 793 | 'offset': offset * hop_size, 794 | 'filename': os.path.basename(k) 795 | }) 796 | metacontainer.save() 797 | estimated_event_list = MetaDataContainer().load( 798 | filename=meta_container_resultfile) 799 | reference_event_list = MetaDataContainer().load(filename=test_labels) 800 | 801 | event_based_metric = event_based_evaluation( 802 | reference_event_list, estimated_event_list) 803 | onset_scores = precision_recall_fscore_on_offset( 804 | reference_event_list, estimated_event_list, offset=False) 805 | offset_scores = precision_recall_fscore_on_offset( 806 | reference_event_list, estimated_event_list, onset=False) 807 | onset_offset_scores = precision_recall_fscore_on_offset( 808 | reference_event_list, estimated_event_list) 809 | # Utt wise Accuracy 810 | precision_labels = precision_recall_fscore_on_offset( 811 | reference_event_list, estimated_event_list, onset=False, offset=False, label=True) 812 | 813 | print(event_based_metric.__str__()) 814 | print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("UttLabel", *precision_labels)) 815 | print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("Onset", *onset_scores)) 816 | print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("Offset", *offset_scores)) 817 | print("{:>10}-Precision: {:.1%} Recall {:.1%} F-Score {:.1%}".format("On-Offset", *onset_offset_scores)) 818 | 819 | result_filename = os.path.join(model_dirname, result_filename) 820 | 821 | with open(result_filename, 'w') as wp: 822 | wp.write(event_based_metric.__str__()) 823 | wp.write('\n') 824 | wp.write("{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format( 825 | "UttLabel", *precision_labels)) 826 | wp.write( 827 | "{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format("Onset", *onset_scores)) 828 | wp.write( 829 | "{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format("Offset", *offset_scores)) 830 | wp.write("{:>10}: Precision: {:.1%} Recall {:.1%} F-Score {:.1%}\n".format( 831 | "On-Offset", *onset_offset_scores)) 832 | 833 | 834 | def class_wise_statistics(model_path: str, features: str = "features/logmel_64/weak.ark", 835 | result_filename: str = 'train_stats.txt', 836 | labels: str = "labels/labels.json"): 837 | 838 | modeldump = torch.load( 839 | model_path, map_location=lambda storage, loc: storage) 840 | model = modeldump['model'] 841 | config_parameters = modeldump['config'] 842 | scaler = modeldump['scaler'] 843 | many_hot_encoder = modeldump['encoder'] 844 | 845 | label_df = pd.read_json(labels) 846 | label_df['manyhot'] = label_df['event_labels'].apply( 847 | lambda x: many_hot_encoder.encode(x, 1).data.flatten()) 848 | utt_labels = label_df.loc[:, 'manyhot'].to_dict() 849 | kaldi_string = parsecopyfeats( 850 | features, **config_parameters['feature_args']) 851 | pooling_function = parse_poolingfunction( 852 | config_parameters['poolingfunction']) 853 | from sklearn.metrics import precision_recall_fscore_support 854 | y_pred, y_true = [], [] 855 | model.to(device) 856 | with torch.no_grad(): 857 | for k, feat in kaldi_io.read_mat_ark(kaldi_string): 858 | feat = torch.from_numpy( 859 | scaler.transform(feat)).to(device).unsqueeze(0) 860 | # Pool windows ( there is only 1 usually ) 861 | feat = pooling_function(model(feat), 0) 862 | pred = torch.sigmoid(feat) 863 | # Pool in time 864 | pred = pooling_function(pred, 1).cpu().numpy().squeeze(0) 865 | y_pred.append(pred.round()) 866 | y_true.append(utt_labels[k]) 867 | y_pred, y_true = np.array(y_pred), np.array(y_true) 868 | avg_pre, avg_rec, avg_f1 = 0, 0, 0 869 | for i, label in enumerate(many_hot_encoder.label_list): 870 | pre, rec, f1, _ = precision_recall_fscore_support( 871 | y_true[:, i], y_pred[:, i], average='micro') 872 | print("{:<30} {:<3.4f} {:<3.4f} {:<3.4f}".format(label, pre, rec, f1)) 873 | avg_pre += pre 874 | avg_rec += rec 875 | avg_f1 += f1 876 | avg_pre /= len(many_hot_encoder.label_list) 877 | avg_rec /= len(many_hot_encoder.label_list) 878 | avg_f1 /= len(many_hot_encoder.label_list) 879 | print("{:<30} {:<3.4f} {:<3.4f} {:<3.4f}".format( 880 | "Overall", avg_pre, avg_rec, avg_f1)) 881 | 882 | 883 | def addtodataset(model_path: str, threshold: float = 0.9, features: str = "features/logmel_64/indomain.ark", mode='prob'): 884 | modeldump = torch.load( 885 | model_path, 886 | map_location=lambda storage, loc: storage) 887 | model = modeldump['model'] 888 | config_parameters = modeldump['config'] 889 | scaler = modeldump['scaler'] 890 | many_hot_encoder = modeldump['encoder'] 891 | model_dirname = os.path.dirname(model_path) 892 | fname, fname_ext = os.path.splitext(os.path.basename(features)) 893 | outputfile = os.path.join(model_dirname, '{}{}'.format(fname, fname_ext)) 894 | outputlabels = os.path.join(model_dirname, "{}.{}".format(fname, 'csv')) 895 | kaldi_string = parsecopyfeats( 896 | features, **config_parameters['feature_args']) 897 | model = model.to(device).eval() 898 | poolingfunction_name = config_parameters['poolingfunction'] if 'poolingfunction' in config_parameters else 'mean' 899 | pooling_function = parse_poolingfunction(poolingfunction_name) 900 | data_labels = [] 901 | with torch.no_grad(): 902 | with open(outputfile, 'wb') as wp: 903 | for k, feat in kaldi_io.read_mat_ark(kaldi_string): 904 | feat_torch = torch.from_numpy( 905 | scaler.transform(feat)).to(device).unsqueeze(0) 906 | prob = torch.sigmoid(model(feat_torch)).cpu().squeeze(0) 907 | prob_utt = pooling_function(prob, 0).numpy() 908 | if mode != 'prob': 909 | prob_utt = prob_utt / prob_utt.sum(-1) 910 | if any(prob_utt >= threshold): 911 | class_idx = np.where(prob_utt >= threshold)[0] 912 | labels = ','.join([many_hot_encoder.label_list[lab] 913 | for lab in class_idx.tolist()]) 914 | # From NFrames x nClass to class x nframes 915 | kaldi_io.write_mat(wp, feat, k) 916 | data_labels.append((k, labels)) 917 | data_labels = pd.DataFrame(data_labels, columns=[ 918 | 'filename', 'event_labels']).set_index('filename') 919 | data_labels.to_csv(outputlabels, sep='\t') 920 | return outputfile, outputlabels 921 | 922 | 923 | def _ntuple(n): 924 | def parse(x): 925 | if isinstance(x, container_abcs.Iterable): 926 | return x 927 | return tuple(repeat(x, n)) 928 | return parse 929 | 930 | 931 | def test_dev_eval(model_path: str, single_thres_window: int = 1, single_thres: float = 0.5, double_thres_window: int = 1, double_thres: list = [0.75, 0.2]): 932 | model_dump = torch.load(model_path, lambda storage, loc: storage) 933 | model = model_dump['model'] 934 | _pair = _ntuple(2) 935 | # Get the pooling factors for time and dimension 936 | poolfactors = np.prod(list(map(_pair, model._pooling)), axis=0) 937 | # Base hopsize in experiments in 20ms, 938 | hop_size = 0.02 * poolfactors[0] 939 | suffix_single = "w{}_t{}".format(single_thres_window, single_thres) 940 | suffix_double = "w{}_t{}".format( 941 | double_thres_window, "-".join(map(str, double_thres))) 942 | # Development stats 943 | evaluate_threshold(model_path, hop_size=hop_size, 944 | result_filename='dev_{}.txt'.format(suffix_single), threshold=single_thres, window=single_thres_window) 945 | evaluate_double_threshold( 946 | model_path, hop_size=hop_size, result_filename='dev_double_{}.txt'.format(suffix_double), threshold=double_thres, window=double_thres_window) 947 | # Evaluation stats 948 | evaluate_threshold(model_path, hop_size=hop_size, 949 | features='features/logmel_64/eval.ark', 950 | result_filename='evaluation_{}.txt'.format( 951 | suffix_single), 952 | test_labels='labels/eval.csv', 953 | window=single_thres_window, threshold=single_thres) 954 | evaluate_double_threshold(model_path, hop_size=hop_size, 955 | features='features/logmel_64/eval.ark', 956 | result_filename='evaluation_double_{}.txt'.format(suffix_double), test_labels='labels/eval.csv', window=double_thres_window, threshold=double_thres) 957 | 958 | 959 | def train_test(config='config/ReLU/0Pool/crnn_maxpool.yaml', **kwargs): 960 | folder_output = main(config=config, **kwargs) 961 | model_path = os.path.join(folder_output, 'model.th') 962 | model_dump = torch.load(model_path, lambda storage, loc: storage) 963 | model = model_dump['model'] 964 | _pair = _ntuple(2) 965 | # Get the pooling factors for time and dimension 966 | poolfactors = np.prod(list(map(_pair, model._pooling)), axis=0) 967 | # Base hopsize in experiments in 20ms, 968 | hop_size = 0.02 * poolfactors[0] 969 | # Development stats 970 | evaluate_threshold(model_path, hop_size=hop_size, 971 | result_filename='dev.txt') 972 | evaluate_double_threshold( 973 | model_path, hop_size=hop_size, result_filename='dev_double.txt') 974 | # Evaluation stats 975 | evaluate_threshold(model_path, hop_size=hop_size, 976 | features='features/logmel_64/eval.ark', 977 | result_filename='evaluation.txt', test_labels='labels/eval.csv') 978 | evaluate_double_threshold(model_path, hop_size=hop_size, 979 | features='features/logmel_64/eval.ark', result_filename='evaluation_double.txt', test_labels='labels/eval.csv') 980 | return folder_output 981 | 982 | 983 | def train_test_indomain(config='config/ReLU/0Pool/crnn_maxpool.yaml', **kwargs): 984 | folder_output = train_test(config, **kwargs) 985 | model_path = os.path.join(folder_output, 'model.th') 986 | indomain_feats, indomain_labels = addtodataset(model_path) 987 | # Outputpath is overwritten in the next function 988 | kwargs.pop('outputpath', None) 989 | indomain_weak_feats = os.path.join(folder_output, 'indomain_weak.ark') 990 | indomain_weak_labels = os.path.join(folder_output, 'indomain_weak.csv') 991 | # Original training features for the model 992 | config_parameters = parse_config_or_kwargs(config) 993 | train_feats = config_parameters['features'] 994 | train_labels = config_parameters['labels'] 995 | from subprocess import call 996 | call("cat {} {} > {}".format(train_feats, 997 | indomain_feats, indomain_weak_feats), shell=True) 998 | call("python3 merge_csv.py {} {} -out {}".format(train_labels, 999 | indomain_labels, indomain_weak_labels), shell=True) 1000 | 1001 | indomain_weak_output = os.path.join(folder_output, 'indomain_weak') 1002 | train_test(config, features=indomain_weak_feats, 1003 | labels=indomain_weak_labels, outputpath=indomain_weak_output, **kwargs) 1004 | 1005 | 1006 | if __name__ == '__main__': 1007 | fire.Fire({ 1008 | 'train': main, 1009 | 'test': evaluate_threshold, 1010 | 'stats': class_wise_statistics, 1011 | 'traintest': train_test, 1012 | 'traintestindomain': train_test_indomain, 1013 | 'test_double': evaluate_double_threshold, 1014 | 'runtests': test_dev_eval, 1015 | 'calcthres': dynamic_threshold 1016 | }) 1017 | -------------------------------------------------------------------------------- /thresholding.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from dcase_util.data import DecisionEncoder 3 | 4 | 5 | def activity_detection(x, high_thres, low_thres, n_connect=1): 6 | high_locations = np.where(x > high_thres)[0] 7 | locations = x > low_thres 8 | dec_enc = DecisionEncoder() 9 | encoded_pairs = dec_enc.find_contiguous_regions(locations) 10 | 11 | filtered_list = list(filter(lambda pair: ((pair[0] <= high_locations) & 12 | (high_locations <= pair[1])).any(), encoded_pairs)) 13 | 14 | filtered_list = connect_(filtered_list, n_connect) 15 | return filtered_list 16 | 17 | 18 | def connect_(pairs, n=1): 19 | if len(pairs) == 0: 20 | return [] 21 | start_, end_ = pairs[0] 22 | new_pairs = [] 23 | for i, (next_item, cur_item) in enumerate(zip(pairs[1:], pairs[0:])): 24 | end_ = next_item[1] 25 | if next_item[0] - cur_item[1] <= n: 26 | pass 27 | else: 28 | new_pairs.append((start_, cur_item[1])) 29 | start_ = next_item[0] 30 | new_pairs.append((start_, end_)) 31 | return new_pairs 32 | --------------------------------------------------------------------------------