├── .flake8 ├── .gitignore ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.rst ├── docs ├── .gitignore ├── Makefile ├── conf.py ├── index.rst └── source │ ├── config.rst │ ├── convnet.rst │ ├── evaluation.rst │ ├── features.rst │ ├── file_io.rst │ ├── gated_conv.rst │ ├── inference.rst │ ├── main.rst │ ├── mixup.rst │ ├── modules.rst │ ├── silence.rst │ ├── training.rst │ └── utils.rst ├── metadata ├── test.csv └── training.csv ├── requirements.txt ├── scripts ├── meta_features.py ├── predict_stack.py ├── relabel.py └── select_folds.py └── task2 ├── config ├── __init__.py ├── dcase2018_task2.py ├── logmel.py ├── paths.py ├── prediction.py ├── silence.py └── training.py ├── convnet.py ├── evaluation.py ├── features.py ├── file_io.py ├── gated_conv.py ├── inference.py ├── main.py ├── mixup.py ├── silence.py ├── training.py └── utils.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = D100, D107, D413 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /_*/ 2 | 3 | *.py[cod] 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Turab Iqbal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [dev-packages] 7 | "flake8" = "*" 8 | "flake8-docstrings" = "*" 9 | sphinx = "*" 10 | sphinx-rtd-theme = "*" 11 | 12 | [packages] 13 | "h5py" = "*" 14 | keras = "*" 15 | librosa = "*" 16 | numpy = "*" 17 | pandas = "*" 18 | pydub = "*" 19 | scipy = "*" 20 | sklearn = "*" 21 | tensorflow-gpu = "*" 22 | tqdm = "*" 23 | 24 | [requires] 25 | python_version = "3" 26 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "27667c5cc8fc32b181926c8bd1c8bd8889241b458c314e833947226712a472fc" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "absl-py": { 20 | "hashes": [ 21 | "sha256:87519e3b91a3d573664c6e2ee33df582bb68dca6642ae3cf3a4361b1c0a4e9d6" 22 | ], 23 | "version": "==0.6.1" 24 | }, 25 | "astor": { 26 | "hashes": [ 27 | "sha256:95c30d87a6c2cf89aa628b87398466840f0ad8652f88eb173125a6df8533fb8d", 28 | "sha256:fb503b9e2fdd05609fbf557b916b4a7824171203701660f0c55bbf5a7a68713e" 29 | ], 30 | "version": "==0.7.1" 31 | }, 32 | "audioread": { 33 | "hashes": [ 34 | "sha256:b0b9270c20833a75ce0d167fb2fdad52ddcd8e8f300be8afad3ac9715850bc50" 35 | ], 36 | "version": "==2.1.6" 37 | }, 38 | "decorator": { 39 | "hashes": [ 40 | "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82", 41 | "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c" 42 | ], 43 | "version": "==4.3.0" 44 | }, 45 | "gast": { 46 | "hashes": [ 47 | "sha256:7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930" 48 | ], 49 | "version": "==0.2.0" 50 | }, 51 | "grpcio": { 52 | "hashes": [ 53 | "sha256:0b09e82027f27cb540999404acf1be19cb50073d76ab257d7369aa3730bec3c0", 54 | "sha256:0cc5f2d3ee21c642d8982f197c83053fd3a8cbcd6a60240d8c87c6c256b10d57", 55 | "sha256:13b498b0415715a4214574c67ac6d0d7b565a861eb4490238a828fac17a51506", 56 | "sha256:314c557efecec7f901cf394beb184b31414f906785e4811d2392859576d4d7b5", 57 | "sha256:32d2859b68e185d05d6b5f5814121e786088f5e3483da0a7359f5d7fc0401ee3", 58 | "sha256:3bf1b9d72a05a855762c36bd458d3750bedb5fd7b957a44443a62facf80afba4", 59 | "sha256:41614ec2df4776a7d1b46183543d5c508bfc4972f092ec1ea83e98f808e5fa4d", 60 | "sha256:4a7fab9f8ed8352d63585d221ee9c1fc58fb9b3d12535e777e36e855b0cab3db", 61 | "sha256:4b4a2faa53e0f8d2b1479173dbce1523a7daaf2644fb835fb9fff04beb29ed8d", 62 | "sha256:5526bf9f6615e22d0290aa83324f87fcc1fee51c3a9580ebeb2a52271c21a563", 63 | "sha256:5bf2c9ec1d55c28ca1221f7b2d1914f20b2819c44579da89f447789baaba1386", 64 | "sha256:62b24446d447ebe3a7002a6e3bd2c7372159e094868eb61ea2426327fe9f1992", 65 | "sha256:63afda9d946fff727107ebbef25f6b45497f29486e462725dc9942391f3714a8", 66 | "sha256:6dd039527b7333c947b9757ad40adf93b917f3734aed1da4fdeb28fd17ec63f0", 67 | "sha256:6e719d17ca8fa06260a427cd1fab58abfd0672e8e625fcad81595bd125e0e367", 68 | "sha256:76b3dbff4c775f5f8667c405b909ab2f80440c7579ad56f823476b011124a8a5", 69 | "sha256:7be774ca3c8faa0e126d1e41e11fd82c9c114efb5437b36f651fe25add7f8c2e", 70 | "sha256:7d74c3c6d8c7aadd505c8cef2b4b5324588bee645e6d20a6493940b24d394603", 71 | "sha256:84afdfbf88c0ed2426a4f029fae3e677e8f1b2f3370feeae939d64670926c981", 72 | "sha256:84d62107eb5bc9fe4e3682b038434c709ca7a2ae19e621e08ed7e8d908046cfb", 73 | "sha256:8a1f4bee826b0edb123157f19843f46ca9ef29f12ed0b54eeffde5ff65101340", 74 | "sha256:93edd492a1c6865e15db1ff7d98228b7351221bf815286a41834e10934c0cde0", 75 | "sha256:9907fcb03a9fd327b114919dbb7a4577d5d5aeed2d6d000e6e6d002ad5cb959d", 76 | "sha256:9dd008cd45a646b0e3761f0963c95b0dcd07d880d278a3c1ce23dd4ecb9cd174", 77 | "sha256:a440935203be2581f68de7a4c5ca7ca22e948a21af70d7279ba9a2e32f73ae40", 78 | "sha256:a9144b8a0f73be76aff348e4d558a5c3f43a8378a17c6327d56dbea8efda4aeb", 79 | "sha256:b14629835e796f7905db2f7d10035958f995bae67bf9e652b13be156ed4a8457", 80 | "sha256:b4fe851428b630bdf6f3a99c3761ce3d304b194162812fc1312bfe7bd138e620", 81 | "sha256:c4318cea2d85f13811655e5d1c30fe97074aeb8105b16cc6da2d1d5d64a9f4f7", 82 | "sha256:e46d3d702198d164474078140e008e8961e95dfb5a100f2890eb201c94c48c6e", 83 | "sha256:e986100947cdafa2817701ffe616f2dc0221cc27eb301d654b9462b98ee62912", 84 | "sha256:f94ae68c43b4bba0272e565882db2709d8827910ccc427f0a89d8cf070180f61" 85 | ], 86 | "version": "==1.16.0" 87 | }, 88 | "h5py": { 89 | "hashes": [ 90 | "sha256:0f8cd2acbacf3177b4427ed42639c911667b1f24d923388ab1f8ad466a12be5e", 91 | "sha256:11277e3879098f921ee9e29105b20591e1dfdd44963357399f2abaa1a280c560", 92 | "sha256:1241dec0c94ac32f3285cac1d6f44beabf80423e422ab03bd2686d731a8a9294", 93 | "sha256:17b8187de0b3a945d8e8d031e7eb6ece2fce90791f9c5fde36f4396bf38fdde1", 94 | "sha256:2f30007d0796788a454c1293262f19f25e6428317d3d386f78138fba2a44e37d", 95 | "sha256:308e0758587ee16d4e73e7f2f8aae8351091e343bf0a43d2f697f9535465c816", 96 | "sha256:37cacddf0e8209905f52537a8cf71da0dd9a4de62bd79247274c97b24a408997", 97 | "sha256:38a23bb599748adf23d77f74885c0de6f4a7d9baa42f74e476bbf90fba2b47dd", 98 | "sha256:47ab18b7b7bbc36fd2b606289b703b6f0ee915b923d6ad94dd17ac80ebffc280", 99 | "sha256:486c78330af0bf33f5077b51d1888c0739c3cd1a03d5aade0d48572b3b5690ca", 100 | "sha256:4e2183458d6ef1ae87dfb5d6acd0786359336cd9ac0ece6396c09b59fdaa3bd6", 101 | "sha256:51d0595c3e58814c831f6cd2b664a5bf9590e26262c1d541b380d041e4fcb3c0", 102 | "sha256:56d259d56822b70881760b243957f04a0cf133f0ec65eae6a33f562826aee899", 103 | "sha256:5e6e777653169a3cc24ea56bb3d8c845ea391f8914c35bb6f350b0753a52891c", 104 | "sha256:62bfb0ebb0f59e5dccc0b0dbbc0fc40dd1d1e09d04c0dc71f89790231531d4a2", 105 | "sha256:67d89b64debfa021b54aa6f24bbf008403bd144748a0148596b518bce80d2fc4", 106 | "sha256:6bf38571f555fa214493ec6349d29024cc5f313bf1715b09f236c553fd22ae4d", 107 | "sha256:9214ca445c18a37bfe9c165982c0e317e2f21f035c8d635d1c6d9fcbaf35b7a8", 108 | "sha256:ab0c52850428d2e86029935389379c2c97f752e76b616da851deec8a4484f8ec", 109 | "sha256:b2eff336697d8dfd712c5d93fef9f4e4d3e97d9d8c258801836b8664a239e07a", 110 | "sha256:bb33fabc0b8f3fe3bb0f8d6821b2fad5b2a64c27a0808e8d1c5c1e3362062064", 111 | "sha256:bd5353ab342bae1262b04745934cc1565df4cbc8d6a979a0c98f42209bd5c265", 112 | "sha256:bd73444efd1ac06dac27b8405bbe8791a02fd1bc8a2fa0e575257f90b7b57467", 113 | "sha256:bd932236a2ef91a75fee5d7f4ace80ab494c5a59cd092a67c9785ddb7fdc218c", 114 | "sha256:c45650de228ace7731e4280e14fb687f6d5c29cd666c5b22b42492b035e994d6", 115 | "sha256:d5c0c01da45f901a3d429e7ef9e7e22baa869e1affb8715f1bf94e6a30020740", 116 | "sha256:d75035db5bde802a29f4f29f18bb7548863d29ac90ccbf2c04c11799bbbba2c3", 117 | "sha256:dda88206dc9464923f27f601000bc5b152ac0bd6d0122f098d4f239150a70076", 118 | "sha256:e1c2ac5d0aa232c0f60fecc6bd1122346885086a176f939b91058c4c980cc226", 119 | "sha256:e626c65a8587921ebc7fb8d31a49addfdd0b9a9aa96315ea484c09803337b955" 120 | ], 121 | "index": "pypi", 122 | "version": "==2.8.0" 123 | }, 124 | "joblib": { 125 | "hashes": [ 126 | "sha256:11cdfd38cdb71768149e1373f2509e9b4fc1ec6bc92f874cb515b25f2d69f8f4", 127 | "sha256:142e74bee7b9dfad75c7dad24c4295b9084a6788116dd149e2fc7daf0db25675" 128 | ], 129 | "version": "==0.12.5" 130 | }, 131 | "keras": { 132 | "hashes": [ 133 | "sha256:794d0c92c6c4122f1f0fcf3a7bc2f49054c6a54ddbef8d8ffafca62795d760b6", 134 | "sha256:90b610a3dbbf6d257b20a079eba3fdf2eed2158f64066a7c6f7227023fd60bc9" 135 | ], 136 | "index": "pypi", 137 | "version": "==2.2.4" 138 | }, 139 | "keras-applications": { 140 | "hashes": [ 141 | "sha256:721dda4fa4e043e5bbd6f52a2996885c4639a7130ae478059b3798d0706f5ae7", 142 | "sha256:a03af60ddc9c5afdae4d5c9a8dd4ca857550e0b793733a5072e0725829b87017" 143 | ], 144 | "version": "==1.0.6" 145 | }, 146 | "keras-preprocessing": { 147 | "hashes": [ 148 | "sha256:90d04c1750bccceef88ac09475c291b4b5f6aa1eaf0603167061b1aa8b043c61", 149 | "sha256:ef2e482c4336fcf7180244d06f4374939099daa3183816e82aee7755af35b754" 150 | ], 151 | "version": "==1.0.5" 152 | }, 153 | "librosa": { 154 | "hashes": [ 155 | "sha256:2aa868b8aade749b9904eeb7034fcf44115601c367969b6d01f5e1b4b9b6031d" 156 | ], 157 | "index": "pypi", 158 | "version": "==0.6.2" 159 | }, 160 | "llvmlite": { 161 | "hashes": [ 162 | "sha256:05b817a961fa0aacfccc5ecaa2e6fbc826c8988759a6d25243916e74c2082d3e", 163 | "sha256:0ea68d4ab6a1fa73d919b17ab2f4909f4aa9a6730ba32a91c61db8c08626f408", 164 | "sha256:152e1ac25c57d3d686128a8ee899762be95d2bb1418d7b776c55b83c9591cc44", 165 | "sha256:276f6585a7a962d86f0751cc3016afa45c0ba86190e1ebd7b43071507ff18967", 166 | "sha256:2bb68dbe572b021b238677729ed77dc2bda396acf6ede00373bb24124e282383", 167 | "sha256:3a0e5a5c9aa56ca506b576ef6ca6e6c37306a483baf951034dc357cb34beb7c7", 168 | "sha256:696ce74ac7060eba4f8b3cb6122bb8aff5d30a6ddc872b519dcb3ed201c42af2", 169 | "sha256:7a249ec0a3731e789fdffe9772f2ae62303b47554b7e5109360d7fa77be0cb76", 170 | "sha256:950f44f2b33f04c91417b9846271decc66adfaa16849ca3522a4c51e0037a022", 171 | "sha256:964d76903e8298ae3446319d12183dcf64722bdc9deb66159fa97ef68986594b", 172 | "sha256:9b1c41413bf5d709c8bd3621e075e19dd9170cb08cbf95319d6cbd28ca2d84ed", 173 | "sha256:b19f69376799848e411fad5c079dedc6ac4aedb032f922b80dcf3ec368a59dc3", 174 | "sha256:b36854919a9b13408d7523e4327a04afc62ce880564466f9f79f506d560b70ff", 175 | "sha256:b445c88e9677e427cc6dc49aa621691f04dbee1c888040413fba26711d0edc68", 176 | "sha256:c27fedc3cd6869c222a36cd7b1db8906bb209a8b7b1cd6e84174e8e53e51c23d", 177 | "sha256:c7f6e7078b1830c8fb15f340af4da5692d53a6590eff03c0afa7e69857ac37c0", 178 | "sha256:d8702b8825349b090f51c358cd1e5f9631865bdab4e9e13bfce1ac7c82e44cfe", 179 | "sha256:e866935bc1236478fe09c741313b1ed322ab34343ef226aa34f75ce80e5188ef", 180 | "sha256:f35c1b9d3efdcf54a1b0599c72995e66e1e4033bf08d13ab450011f0f3ca78a2", 181 | "sha256:f54bbd9e73fabd13b208b71a14a8e4d83400fc59cc5eb3bceeff1a7352a23f2f", 182 | "sha256:fd64def9a51dd7dc61913a7a08eeba5b9785522740bec5a7c5995b2a90525025" 183 | ], 184 | "version": "==0.25.0" 185 | }, 186 | "markdown": { 187 | "hashes": [ 188 | "sha256:c00429bd503a47ec88d5e30a751e147dcb4c6889663cd3e2ba0afe858e009baa", 189 | "sha256:d02e0f9b04c500cde6637c11ad7c72671f359b87b9fe924b2383649d8841db7c" 190 | ], 191 | "version": "==3.0.1" 192 | }, 193 | "numba": { 194 | "hashes": [ 195 | "sha256:1f6ebba0caea938178e6220af12c01db987d0cb8f08436b492f0bd05d0f82018", 196 | "sha256:1fe794735fc26dfa4fda68402cc5d6a3c2457706146db7a36b99ec29a1cdac6d", 197 | "sha256:277412c10d58c946e9204fe6a3caf3c5d4dd9088f4b595f78b6a8cd5fb357cde", 198 | "sha256:31a4f073f40aff7cdd18a2527c45da99e8b6f87930ad22d02d82b2564d5e7907", 199 | "sha256:52d046c13bcf0de79dbfb936874b7228f141b9b8e3447cc35855e9ad3e12aa33", 200 | "sha256:57d54d48b4001c1feb57b03d7ae87c952874f730d36f058d0ac9f94c4c4a42c8", 201 | "sha256:6ba5cf183883c21c3acd2f8a4b67dfe9987c194d40a5f5bef75d956391cb9b74", 202 | "sha256:6d11a6c19f68e5d731354fa2690b3ad1cc39e1c3cff6424ce76bbaa7f146a084", 203 | "sha256:6faf3795d4676948b3bd2d562bc694d01411428ce2e2b76408840f4f2289fbd3", 204 | "sha256:710747f81c8ca390a38220ca1c800e25c839ab4632ce6e37cda526d91fffc384", 205 | "sha256:762defd58acc2e5f5a099b1c9865294ce7bebd0b7f877e0c504fe69beca7d757", 206 | "sha256:773b7b064f4b06b0a8a84c8600df12c3db337193159ac04d678c86202165e1db", 207 | "sha256:77989a936dc28ace69f039d47017faba756bae9c8d708a5a0c56a0fdd4241f6e", 208 | "sha256:85108757264e73b02de8b959c4c5913cfbe9b9b697d843158535bcaaeb6541f5", 209 | "sha256:8fc3ed08e0d713cc2a99ad7f26782cf13ef2cd3a8f82b397eaa4f3e114458cea", 210 | "sha256:bd0bee047a5e84c657c1273f479ccd45a30d74b48ae5355ac3e5a90dd67b216b", 211 | "sha256:beb59759b876dc854855d9e963a2b9308693f3ce73064842c64ceb863a4cd92b", 212 | "sha256:c6c254fc8fd32e08dd25fae679e0f39f8a33cb72e4934287d7d1c591df6637e8", 213 | "sha256:c87c842af32c0abdf948a18ec4d9e8d88f8ea34533794a4f83dd3cced46ba842", 214 | "sha256:d64c6dec35914ad53b67272a0505aee1a1d81a75b3e3ef25a0c081719ffba713", 215 | "sha256:f763ff00febb898a67bb1164747e150c5cf60e606ee67b3cd48be0b71ee0ca25" 216 | ], 217 | "version": "==0.40.1" 218 | }, 219 | "numpy": { 220 | "hashes": [ 221 | "sha256:0df89ca13c25eaa1621a3f09af4c8ba20da849692dcae184cb55e80952c453fb", 222 | "sha256:154c35f195fd3e1fad2569930ca51907057ae35e03938f89a8aedae91dd1b7c7", 223 | "sha256:18e84323cdb8de3325e741a7a8dd4a82db74fde363dce32b625324c7b32aa6d7", 224 | "sha256:1e8956c37fc138d65ded2d96ab3949bd49038cc6e8a4494b1515b0ba88c91565", 225 | "sha256:23557bdbca3ccbde3abaa12a6e82299bc92d2b9139011f8c16ca1bb8c75d1e95", 226 | "sha256:24fd645a5e5d224aa6e39d93e4a722fafa9160154f296fd5ef9580191c755053", 227 | "sha256:36e36b6868e4440760d4b9b44587ea1dc1f06532858d10abba98e851e154ca70", 228 | "sha256:3d734559db35aa3697dadcea492a423118c5c55d176da2f3be9c98d4803fc2a7", 229 | "sha256:416a2070acf3a2b5d586f9a6507bb97e33574df5bd7508ea970bbf4fc563fa52", 230 | "sha256:4a22dc3f5221a644dfe4a63bf990052cc674ef12a157b1056969079985c92816", 231 | "sha256:4d8d3e5aa6087490912c14a3c10fbdd380b40b421c13920ff468163bc50e016f", 232 | "sha256:4f41fd159fba1245e1958a99d349df49c616b133636e0cf668f169bce2aeac2d", 233 | "sha256:561ef098c50f91fbac2cc9305b68c915e9eb915a74d9038ecf8af274d748f76f", 234 | "sha256:56994e14b386b5c0a9b875a76d22d707b315fa037affc7819cda08b6d0489756", 235 | "sha256:73a1f2a529604c50c262179fcca59c87a05ff4614fe8a15c186934d84d09d9a5", 236 | "sha256:7da99445fd890206bfcc7419f79871ba8e73d9d9e6b82fe09980bc5bb4efc35f", 237 | "sha256:99d59e0bcadac4aa3280616591fb7bcd560e2218f5e31d5223a2e12a1425d495", 238 | "sha256:a4cc09489843c70b22e8373ca3dfa52b3fab778b57cf81462f1203b0852e95e3", 239 | "sha256:a61dc29cfca9831a03442a21d4b5fd77e3067beca4b5f81f1a89a04a71cf93fa", 240 | "sha256:b1853df739b32fa913cc59ad9137caa9cc3d97ff871e2bbd89c2a2a1d4a69451", 241 | "sha256:b1f44c335532c0581b77491b7715a871d0dd72e97487ac0f57337ccf3ab3469b", 242 | "sha256:b261e0cb0d6faa8fd6863af26d30351fd2ffdb15b82e51e81e96b9e9e2e7ba16", 243 | "sha256:c857ae5dba375ea26a6228f98c195fec0898a0fd91bcf0e8a0cae6d9faf3eca7", 244 | "sha256:cf5bb4a7d53a71bb6a0144d31df784a973b36d8687d615ef6a7e9b1809917a9b", 245 | "sha256:db9814ff0457b46f2e1d494c1efa4111ca089e08c8b983635ebffb9c1573361f", 246 | "sha256:df04f4bad8a359daa2ff74f8108ea051670cafbca533bb2636c58b16e962989e", 247 | "sha256:ecf81720934a0e18526177e645cbd6a8a21bb0ddc887ff9738de07a1df5c6b61", 248 | "sha256:edfa6fba9157e0e3be0f40168eb142511012683ac3dc82420bee4a3f3981b30e" 249 | ], 250 | "index": "pypi", 251 | "version": "==1.15.4" 252 | }, 253 | "pandas": { 254 | "hashes": [ 255 | "sha256:11975fad9edbdb55f1a560d96f91830e83e29bed6ad5ebf506abda09818eaf60", 256 | "sha256:12e13d127ca1b585dd6f6840d3fe3fa6e46c36a6afe2dbc5cb0b57032c902e31", 257 | "sha256:1c87fcb201e1e06f66e23a61a5fea9eeebfe7204a66d99df24600e3f05168051", 258 | "sha256:242e9900de758e137304ad4b5663c2eff0d798c2c3b891250bd0bd97144579da", 259 | "sha256:26c903d0ae1542890cb9abadb4adcb18f356b14c2df46e4ff657ae640e3ac9e7", 260 | "sha256:2e1e88f9d3e5f107b65b59cd29f141995597b035d17cc5537e58142038942e1a", 261 | "sha256:31b7a48b344c14691a8e92765d4023f88902ba3e96e2e4d0364d3453cdfd50db", 262 | "sha256:4fd07a932b4352f8a8973761ab4e84f965bf81cc750fb38e04f01088ab901cb8", 263 | "sha256:5b24ca47acf69222e82530e89111dd9d14f9b970ab2cd3a1c2c78f0c4fbba4f4", 264 | "sha256:647b3b916cc8f6aeba240c8171be3ab799c3c1b2ea179a3be0bd2712c4237553", 265 | "sha256:66b060946046ca27c0e03e9bec9bba3e0b918bafff84c425ca2cc2e157ce121e", 266 | "sha256:6efa9fa6e1434141df8872d0fa4226fc301b17aacf37429193f9d70b426ea28f", 267 | "sha256:be4715c9d8367e51dbe6bc6d05e205b1ae234f0dc5465931014aa1c4af44c1ba", 268 | "sha256:bea90da782d8e945fccfc958585210d23de374fa9294a9481ed2abcef637ebfc", 269 | "sha256:d318d77ab96f66a59e792a481e2701fba879e1a453aefeebdb17444fe204d1ed", 270 | "sha256:d785fc08d6f4207437e900ffead930a61e634c5e4f980ba6d3dc03c9581748c7", 271 | "sha256:de9559287c4fe8da56e8c3878d2374abc19d1ba2b807bfa7553e912a8e5ba87c", 272 | "sha256:f4f98b190bb918ac0bc0e3dd2ab74ff3573da9f43106f6dba6385406912ec00f", 273 | "sha256:f71f1a7e2d03758f6e957896ed696254e2bc83110ddbc6942018f1a232dd9dad", 274 | "sha256:fb944c8f0b0ab5c1f7846c686bc4cdf8cde7224655c12edcd59d5212cd57bec0" 275 | ], 276 | "index": "pypi", 277 | "version": "==0.23.4" 278 | }, 279 | "protobuf": { 280 | "hashes": [ 281 | "sha256:10394a4d03af7060fa8a6e1cbf38cea44be1467053b0aea5bbfcb4b13c4b88c4", 282 | "sha256:1489b376b0f364bcc6f89519718c057eb191d7ad6f1b395ffd93d1aa45587811", 283 | "sha256:1931d8efce896981fe410c802fd66df14f9f429c32a72dd9cfeeac9815ec6444", 284 | "sha256:196d3a80f93c537f27d2a19a4fafb826fb4c331b0b99110f985119391d170f96", 285 | "sha256:46e34fdcc2b1f2620172d3a4885128705a4e658b9b62355ae5e98f9ea19f42c2", 286 | "sha256:4b92e235a3afd42e7493b281c8b80c0c65cbef45de30f43d571d1ee40a1f77ef", 287 | "sha256:574085a33ca0d2c67433e5f3e9a0965c487410d6cb3406c83bdaf549bfc2992e", 288 | "sha256:59cd75ded98094d3cf2d79e84cdb38a46e33e7441b2826f3838dcc7c07f82995", 289 | "sha256:5ee0522eed6680bb5bac5b6d738f7b0923b3cafce8c4b1a039a6107f0841d7ed", 290 | "sha256:65917cfd5da9dfc993d5684643063318a2e875f798047911a9dd71ca066641c9", 291 | "sha256:685bc4ec61a50f7360c9fd18e277b65db90105adbf9c79938bd315435e526b90", 292 | "sha256:92e8418976e52201364a3174e40dc31f5fd8c147186d72380cbda54e0464ee19", 293 | "sha256:9335f79d1940dfb9bcaf8ec881fb8ab47d7a2c721fb8b02949aab8bbf8b68625", 294 | "sha256:a7ee3bb6de78185e5411487bef8bc1c59ebd97e47713cba3c460ef44e99b3db9", 295 | "sha256:ceec283da2323e2431c49de58f80e1718986b79be59c266bb0509cbf90ca5b9e", 296 | "sha256:fcfc907746ec22716f05ea96b7f41597dfe1a1c088f861efb8a0d4f4196a6f10" 297 | ], 298 | "version": "==3.6.1" 299 | }, 300 | "pydub": { 301 | "hashes": [ 302 | "sha256:07d5eec305110cf1dfe1a0c45eed84bc31f889b1b19feab3859a05dc45b70e7b", 303 | "sha256:c703e220917be686fc83d24231849a11c95ba540963ffb3ae5bb322788974d79" 304 | ], 305 | "index": "pypi", 306 | "version": "==0.23.0" 307 | }, 308 | "python-dateutil": { 309 | "hashes": [ 310 | "sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93", 311 | "sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02" 312 | ], 313 | "version": "==2.7.5" 314 | }, 315 | "pytz": { 316 | "hashes": [ 317 | "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca", 318 | "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6" 319 | ], 320 | "version": "==2018.7" 321 | }, 322 | "pyyaml": { 323 | "hashes": [ 324 | "sha256:3d7da3009c0f3e783b2c873687652d83b1bbfd5c88e9813fb7e5b03c0dd3108b", 325 | "sha256:3ef3092145e9b70e3ddd2c7ad59bdd0252a94dfe3949721633e41344de00a6bf", 326 | "sha256:40c71b8e076d0550b2e6380bada1f1cd1017b882f7e16f09a65be98e017f211a", 327 | "sha256:558dd60b890ba8fd982e05941927a3911dc409a63dcb8b634feaa0cda69330d3", 328 | "sha256:a7c28b45d9f99102fa092bb213aa12e0aaf9a6a1f5e395d36166639c1f96c3a1", 329 | "sha256:aa7dd4a6a427aed7df6fb7f08a580d68d9b118d90310374716ae90b710280af1", 330 | "sha256:bc558586e6045763782014934bfaf39d48b8ae85a2713117d16c39864085c613", 331 | "sha256:d46d7982b62e0729ad0175a9bc7e10a566fc07b224d2c79fafb5e032727eaa04", 332 | "sha256:d5eef459e30b09f5a098b9cea68bebfeb268697f78d647bd255a085371ac7f3f", 333 | "sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537", 334 | "sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531" 335 | ], 336 | "version": "==3.13" 337 | }, 338 | "resampy": { 339 | "hashes": [ 340 | "sha256:7f6912ca2b746eb9bcdc05c52fcef088f0b7ba1ca6ee0b2d0a359d18fc57f8f8" 341 | ], 342 | "version": "==0.2.1" 343 | }, 344 | "scikit-learn": { 345 | "hashes": [ 346 | "sha256:1ca280bbdeb0f9950f9427c71e29d9f14e63b2ffa3e8fdf95f25e13773e6d898", 347 | "sha256:33ad23aa0928c64567a24aac771aea4e179fab2a20f9f786ab00ca9fe0a13c82", 348 | "sha256:344bc433ccbfbadcac8c16b4cec9d7c4722bcea9ce19f6da42e2c2f805571941", 349 | "sha256:35ee532b5e992a6e8d8a71d325fd9e0b58716894657e7d3da3e7a1d888c2e7d4", 350 | "sha256:37cbbba2d2a3895bba834d50488d22268a511279e053135bb291f637fe30512b", 351 | "sha256:40cf1908ee712545f4286cc21f3ee21f3466c81438320204725ab37c96849f27", 352 | "sha256:4130760ac54f5946523c1a1fb32a6c0925e5245f77285270a8f6fb5901b7b733", 353 | "sha256:46cc8c32496f02affde7abe507af99cd752de0e41aec951a0bc40c693c2a1e07", 354 | "sha256:4a364cf22be381a17c05ada9f9ce102733a0f75893c51b83718cd9358444921e", 355 | "sha256:56aff3fa3417cd69807c1c74db69aee34ce08d7161cbdfebbff9b4023d9d224b", 356 | "sha256:58debb34a15cfc03f4876e450068dbd711d9ec36ae5503ed2868f2c1f88522f7", 357 | "sha256:7bcf7ade62ef3443470af32afb82646640d653f42502cf31a13cc17d3ff85d57", 358 | "sha256:7d4eab203ed260075f47e2bf6a2bd656367e4e8683b3ad46d4651070c5d1e9aa", 359 | "sha256:86697c6e4c2d74fbbf110c6d5979d34196a55108fa9896bf424f9795a8d935ad", 360 | "sha256:911115db6669c9b11efd502dcc5483cd0c53e4e3c4bcdfe2e73bbb27eb5e81da", 361 | "sha256:97d1d971f8ec257011e64b7d655df68081dd3097322690afa1a71a1d755f8c18", 362 | "sha256:99f22c3228ec9ab3933597825dc7d595b6c8c7b9ae725cfa557f16353fac8314", 363 | "sha256:a2e18e5a4095b3ca4852eb087d28335f3bb8515df4ccf906d380ee627613837f", 364 | "sha256:a3070f71a4479a9827148609f24f2978f10acffa3b8012fe9606720d271066bd", 365 | "sha256:a6a197499429d2eaa2ae922760aa3966ef353545422d5f47ea2ca9369cbf7d26", 366 | "sha256:a7f6f5b3bc7b8e2066076098788579af12bd507ccea8ca6859e52761aa61eaca", 367 | "sha256:a82b90b6037fcc6b311431395c11b02555a3fbf96921a0667c8f8b0c495991cb", 368 | "sha256:ab2c4266b8cd159a266eb03c709ad5400756dca9c45aa48fb523263344475093", 369 | "sha256:b983a2dfdb9d707c78790608bcfd63692e5c2d996865a9689f3db768d0a2978d", 370 | "sha256:bb33d447f4c6fb164d426467d7bf8a4901c303333c5809b85319b2e0626763cd", 371 | "sha256:bc2a0116a67081167f1fbfed731d361671e5925db291b70e65fa66170045c53f", 372 | "sha256:bd189f6d0c2fdccb7c0d3fd1227c6626dc17d00257edbb63dd7c88f31928db61", 373 | "sha256:d393f810da9cd4746cad7350fb89f0509c3ae702c79d2ba8bd875201be4102d1" 374 | ], 375 | "version": "==0.20.0" 376 | }, 377 | "scipy": { 378 | "hashes": [ 379 | "sha256:0611ee97296265af4a21164a5323f8c1b4e8e15c582d3dfa7610825900136bb7", 380 | "sha256:08237eda23fd8e4e54838258b124f1cd141379a5f281b0a234ca99b38918c07a", 381 | "sha256:0e645dbfc03f279e1946cf07c9c754c2a1859cb4a41c5f70b25f6b3a586b6dbd", 382 | "sha256:0e9bb7efe5f051ea7212555b290e784b82f21ffd0f655405ac4f87e288b730b3", 383 | "sha256:108c16640849e5827e7d51023efb3bd79244098c3f21e4897a1007720cb7ce37", 384 | "sha256:340ef70f5b0f4e2b4b43c8c8061165911bc6b2ad16f8de85d9774545e2c47463", 385 | "sha256:3ad73dfc6f82e494195144bd3a129c7241e761179b7cb5c07b9a0ede99c686f3", 386 | "sha256:3b243c77a822cd034dad53058d7c2abf80062aa6f4a32e9799c95d6391558631", 387 | "sha256:404a00314e85eca9d46b80929571b938e97a143b4f2ddc2b2b3c91a4c4ead9c5", 388 | "sha256:423b3ff76957d29d1cce1bc0d62ebaf9a3fdfaf62344e3fdec14619bb7b5ad3a", 389 | "sha256:42d9149a2fff7affdd352d157fa5717033767857c11bd55aa4a519a44343dfef", 390 | "sha256:625f25a6b7d795e8830cb70439453c9f163e6870e710ec99eba5722775b318f3", 391 | "sha256:698c6409da58686f2df3d6f815491fd5b4c2de6817a45379517c92366eea208f", 392 | "sha256:729f8f8363d32cebcb946de278324ab43d28096f36593be6281ca1ee86ce6559", 393 | "sha256:8190770146a4c8ed5d330d5b5ad1c76251c63349d25c96b3094875b930c44692", 394 | "sha256:878352408424dffaa695ffedf2f9f92844e116686923ed9aa8626fc30d32cfd1", 395 | "sha256:8b984f0821577d889f3c7ca8445564175fb4ac7c7f9659b7c60bef95b2b70e76", 396 | "sha256:8f841bbc21d3dad2111a94c490fb0a591b8612ffea86b8e5571746ae76a3deac", 397 | "sha256:c22b27371b3866c92796e5d7907e914f0e58a36d3222c5d436ddd3f0e354227a", 398 | "sha256:d0cdd5658b49a722783b8b4f61a6f1f9c75042d0e29a30ccb6cacc9b25f6d9e2", 399 | "sha256:d40dc7f494b06dcee0d303e51a00451b2da6119acbeaccf8369f2d29e28917ac", 400 | "sha256:d8491d4784aceb1f100ddb8e31239c54e4afab8d607928a9f7ef2469ec35ae01", 401 | "sha256:dfc5080c38dde3f43d8fbb9c0539a7839683475226cf83e4b24363b227dfe552", 402 | "sha256:e24e22c8d98d3c704bb3410bce9b69e122a8de487ad3dbfe9985d154e5c03a40", 403 | "sha256:e7a01e53163818d56eabddcafdc2090e9daba178aad05516b20c6591c4811020", 404 | "sha256:ee677635393414930541a096fc8e61634304bb0153e4e02b75685b11eba14cae", 405 | "sha256:f0521af1b722265d824d6ad055acfe9bd3341765735c44b5a4d0069e189a0f40", 406 | "sha256:f25c281f12c0da726c6ed00535ca5d1622ec755c30a3f8eafef26cf43fede694" 407 | ], 408 | "index": "pypi", 409 | "version": "==1.1.0" 410 | }, 411 | "six": { 412 | "hashes": [ 413 | "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", 414 | "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb" 415 | ], 416 | "version": "==1.11.0" 417 | }, 418 | "sklearn": { 419 | "hashes": [ 420 | "sha256:e23001573aa194b834122d2b9562459bf5ae494a2d59ca6b8aa22c85a44c0e31" 421 | ], 422 | "index": "pypi", 423 | "version": "==0.0" 424 | }, 425 | "tensorboard": { 426 | "hashes": [ 427 | "sha256:537603db949e10d2f5f201d88b073f3f8fb4e4c311d5541e1d4518aa59aa8daa", 428 | "sha256:ca275a7e39797946930d7d4460999369b73968e8191f2256e23bfb7924004d59" 429 | ], 430 | "version": "==1.12.0" 431 | }, 432 | "tensorflow-gpu": { 433 | "hashes": [ 434 | "sha256:12902549817d2f093f3045f7861df84a5936e8f14469d11c5a5622c85455b96c", 435 | "sha256:435a9a4a37c1a92f9bc80f577f0328775539c593b9bc9e943712a204ada11db5", 436 | "sha256:6e9e6b73cc6dc6b82a8e09f9688a8806f44dbe02c4e92cb9c36efea30a7cd47e", 437 | "sha256:bf2c1e660c533102db2a81fad21a26213f4e4ff5ce6b841c0d9adc4ac3c5c6bc", 438 | "sha256:ce47aaa4ddf8446c9c9a83d968c2beba93feefaf796f1255ec6e361e4dd0e13a", 439 | "sha256:d02f018e46ee0d45a86bd27c5635b936330ab7e180c43029d1b3c4cebc7c2c45", 440 | "sha256:da799ad89780c21380fdbb99f3ecf73488dbfdca0715493c6931c2710c710e62" 441 | ], 442 | "index": "pypi", 443 | "version": "==1.12.0" 444 | }, 445 | "termcolor": { 446 | "hashes": [ 447 | "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b" 448 | ], 449 | "version": "==1.1.0" 450 | }, 451 | "tqdm": { 452 | "hashes": [ 453 | "sha256:3c4d4a5a41ef162dd61f1edb86b0e1c7859054ab656b2e7c7b77e7fbf6d9f392", 454 | "sha256:5b4d5549984503050883bc126280b386f5f4ca87e6c023c5d015655ad75bdebb" 455 | ], 456 | "index": "pypi", 457 | "version": "==4.28.1" 458 | }, 459 | "werkzeug": { 460 | "hashes": [ 461 | "sha256:c3fd7a7d41976d9f44db327260e263132466836cef6f91512889ed60ad26557c", 462 | "sha256:d5da73735293558eb1651ee2fddc4d0dedcfa06538b8813a2e20011583c9e49b" 463 | ], 464 | "version": "==0.14.1" 465 | }, 466 | "wheel": { 467 | "hashes": [ 468 | "sha256:196c9842d79262bb66fcf59faa4bd0deb27da911dbc7c6cdca931080eb1f0783", 469 | "sha256:c93e2d711f5f9841e17f53b0e6c0ff85593f3b416b6eec7a9452041a59a42688" 470 | ], 471 | "markers": "python_version >= '3'", 472 | "version": "==0.32.2" 473 | } 474 | }, 475 | "develop": { 476 | "alabaster": { 477 | "hashes": [ 478 | "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359", 479 | "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02" 480 | ], 481 | "version": "==0.7.12" 482 | }, 483 | "babel": { 484 | "hashes": [ 485 | "sha256:6778d85147d5d85345c14a26aada5e478ab04e39b078b0745ee6870c2b5cf669", 486 | "sha256:8cba50f48c529ca3fa18cf81fa9403be176d374ac4d60738b839122dfaaa3d23" 487 | ], 488 | "version": "==2.6.0" 489 | }, 490 | "certifi": { 491 | "hashes": [ 492 | "sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c", 493 | "sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a" 494 | ], 495 | "version": "==2018.10.15" 496 | }, 497 | "chardet": { 498 | "hashes": [ 499 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", 500 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" 501 | ], 502 | "version": "==3.0.4" 503 | }, 504 | "docutils": { 505 | "hashes": [ 506 | "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6", 507 | "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274", 508 | "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6" 509 | ], 510 | "version": "==0.14" 511 | }, 512 | "flake8": { 513 | "hashes": [ 514 | "sha256:6a35f5b8761f45c5513e3405f110a86bea57982c3b75b766ce7b65217abe1670", 515 | "sha256:c01f8a3963b3571a8e6bd7a4063359aff90749e160778e03817cd9b71c9e07d2" 516 | ], 517 | "index": "pypi", 518 | "version": "==3.6.0" 519 | }, 520 | "flake8-docstrings": { 521 | "hashes": [ 522 | "sha256:4e0ce1476b64e6291520e5570cf12b05016dd4e8ae454b8a8a9a48bc5f84e1cd", 523 | "sha256:8436396b5ecad51a122a2c99ba26e5b4e623bf6e913b0fea0cb6c2c4050f91eb" 524 | ], 525 | "index": "pypi", 526 | "version": "==1.3.0" 527 | }, 528 | "flake8-polyfill": { 529 | "hashes": [ 530 | "sha256:12be6a34ee3ab795b19ca73505e7b55826d5f6ad7230d31b18e106400169b9e9", 531 | "sha256:e44b087597f6da52ec6393a709e7108b2905317d0c0b744cdca6208e670d8eda" 532 | ], 533 | "version": "==1.0.2" 534 | }, 535 | "idna": { 536 | "hashes": [ 537 | "sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e", 538 | "sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16" 539 | ], 540 | "version": "==2.7" 541 | }, 542 | "imagesize": { 543 | "hashes": [ 544 | "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8", 545 | "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5" 546 | ], 547 | "version": "==1.1.0" 548 | }, 549 | "jinja2": { 550 | "hashes": [ 551 | "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd", 552 | "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4" 553 | ], 554 | "version": "==2.10" 555 | }, 556 | "markupsafe": { 557 | "hashes": [ 558 | "sha256:048ef924c1623740e70204aa7143ec592504045ae4429b59c30054cb31e3c432", 559 | "sha256:130f844e7f5bdd8e9f3f42e7102ef1d49b2e6fdf0d7526df3f87281a532d8c8b", 560 | "sha256:19f637c2ac5ae9da8bfd98cef74d64b7e1bb8a63038a3505cd182c3fac5eb4d9", 561 | "sha256:1b8a7a87ad1b92bd887568ce54b23565f3fd7018c4180136e1cf412b405a47af", 562 | "sha256:1c25694ca680b6919de53a4bb3bdd0602beafc63ff001fea2f2fc16ec3a11834", 563 | "sha256:1f19ef5d3908110e1e891deefb5586aae1b49a7440db952454b4e281b41620cd", 564 | "sha256:1fa6058938190ebe8290e5cae6c351e14e7bb44505c4a7624555ce57fbbeba0d", 565 | "sha256:31cbb1359e8c25f9f48e156e59e2eaad51cd5242c05ed18a8de6dbe85184e4b7", 566 | "sha256:3e835d8841ae7863f64e40e19477f7eb398674da6a47f09871673742531e6f4b", 567 | "sha256:4e97332c9ce444b0c2c38dd22ddc61c743eb208d916e4265a2a3b575bdccb1d3", 568 | "sha256:525396ee324ee2da82919f2ee9c9e73b012f23e7640131dd1b53a90206a0f09c", 569 | "sha256:52b07fbc32032c21ad4ab060fec137b76eb804c4b9a1c7c7dc562549306afad2", 570 | "sha256:52ccb45e77a1085ec5461cde794e1aa037df79f473cbc69b974e73940655c8d7", 571 | "sha256:5c3fbebd7de20ce93103cb3183b47671f2885307df4a17a0ad56a1dd51273d36", 572 | "sha256:5e5851969aea17660e55f6a3be00037a25b96a9b44d2083651812c99d53b14d1", 573 | "sha256:5edfa27b2d3eefa2210fb2f5d539fbed81722b49f083b2c6566455eb7422fd7e", 574 | "sha256:7d263e5770efddf465a9e31b78362d84d015cc894ca2c131901a4445eaa61ee1", 575 | "sha256:83381342bfc22b3c8c06f2dd93a505413888694302de25add756254beee8449c", 576 | "sha256:857eebb2c1dc60e4219ec8e98dfa19553dae33608237e107db9c6078b1167856", 577 | "sha256:98e439297f78fca3a6169fd330fbe88d78b3bb72f967ad9961bcac0d7fdd1550", 578 | "sha256:bf54103892a83c64db58125b3f2a43df6d2cb2d28889f14c78519394feb41492", 579 | "sha256:d9ac82be533394d341b41d78aca7ed0e0f4ba5a2231602e2f05aa87f25c51672", 580 | "sha256:e982fe07ede9fada6ff6705af70514a52beb1b2c3d25d4e873e82114cf3c5401", 581 | "sha256:edce2ea7f3dfc981c4ddc97add8a61381d9642dc3273737e756517cc03e84dd6", 582 | "sha256:efdc45ef1afc238db84cb4963aa689c0408912a0239b0721cb172b4016eb31d6", 583 | "sha256:f137c02498f8b935892d5c0172560d7ab54bc45039de8805075e19079c639a9c", 584 | "sha256:f82e347a72f955b7017a39708a3667f106e6ad4d10b25f237396a7115d8ed5fd", 585 | "sha256:fb7c206e01ad85ce57feeaaa0bf784b97fa3cad0d4a5737bc5295785f5c613a1" 586 | ], 587 | "version": "==1.1.0" 588 | }, 589 | "mccabe": { 590 | "hashes": [ 591 | "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", 592 | "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" 593 | ], 594 | "version": "==0.6.1" 595 | }, 596 | "packaging": { 597 | "hashes": [ 598 | "sha256:0886227f54515e592aaa2e5a553332c73962917f2831f1b0f9b9f4380a4b9807", 599 | "sha256:f95a1e147590f204328170981833854229bb2912ac3d5f89e2a8ccd2834800c9" 600 | ], 601 | "version": "==18.0" 602 | }, 603 | "pycodestyle": { 604 | "hashes": [ 605 | "sha256:cbc619d09254895b0d12c2c691e237b2e91e9b2ecf5e84c26b35400f93dcfb83", 606 | "sha256:cbfca99bd594a10f674d0cd97a3d802a1fdef635d4361e1a2658de47ed261e3a" 607 | ], 608 | "version": "==2.4.0" 609 | }, 610 | "pydocstyle": { 611 | "hashes": [ 612 | "sha256:2258f9b0df68b97bf3a6c29003edc5238ff8879f1efb6f1999988d934e432bd8", 613 | "sha256:5741c85e408f9e0ddf873611085e819b809fca90b619f5fd7f34bd4959da3dd4", 614 | "sha256:ed79d4ec5e92655eccc21eb0c6cf512e69512b4a97d215ace46d17e4990f2039" 615 | ], 616 | "version": "==3.0.0" 617 | }, 618 | "pyflakes": { 619 | "hashes": [ 620 | "sha256:9a7662ec724d0120012f6e29d6248ae3727d821bba522a0e6b356eff19126a49", 621 | "sha256:f661252913bc1dbe7fcfcbf0af0db3f42ab65aabd1a6ca68fe5d466bace94dae" 622 | ], 623 | "version": "==2.0.0" 624 | }, 625 | "pygments": { 626 | "hashes": [ 627 | "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d", 628 | "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc" 629 | ], 630 | "version": "==2.2.0" 631 | }, 632 | "pyparsing": { 633 | "hashes": [ 634 | "sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b", 635 | "sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592" 636 | ], 637 | "version": "==2.3.0" 638 | }, 639 | "pytz": { 640 | "hashes": [ 641 | "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca", 642 | "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6" 643 | ], 644 | "version": "==2018.7" 645 | }, 646 | "requests": { 647 | "hashes": [ 648 | "sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c", 649 | "sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279" 650 | ], 651 | "version": "==2.20.0" 652 | }, 653 | "six": { 654 | "hashes": [ 655 | "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", 656 | "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb" 657 | ], 658 | "version": "==1.11.0" 659 | }, 660 | "snowballstemmer": { 661 | "hashes": [ 662 | "sha256:919f26a68b2c17a7634da993d91339e288964f93c274f1343e3bbbe2096e1128", 663 | "sha256:9f3bcd3c401c3e862ec0ebe6d2c069ebc012ce142cce209c098ccb5b09136e89" 664 | ], 665 | "version": "==1.2.1" 666 | }, 667 | "sphinx": { 668 | "hashes": [ 669 | "sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17", 670 | "sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464" 671 | ], 672 | "index": "pypi", 673 | "version": "==1.8.1" 674 | }, 675 | "sphinx-rtd-theme": { 676 | "hashes": [ 677 | "sha256:02f02a676d6baabb758a20c7a479d58648e0f64f13e07d1b388e9bb2afe86a09", 678 | "sha256:d0f6bc70f98961145c5b0e26a992829363a197321ba571b31b24ea91879e0c96" 679 | ], 680 | "index": "pypi", 681 | "version": "==0.4.2" 682 | }, 683 | "sphinxcontrib-websupport": { 684 | "hashes": [ 685 | "sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd", 686 | "sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9" 687 | ], 688 | "version": "==1.1.0" 689 | }, 690 | "urllib3": { 691 | "hashes": [ 692 | "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39", 693 | "sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22" 694 | ], 695 | "version": "==1.24.1" 696 | } 697 | } 698 | } 699 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Surrey CVSSP DCASE 2018 Task 2 System 2 | ===================================== 3 | 4 | This is the source code for CVSSP's system used in `DCASE 2018 Task 2`__. 5 | 6 | For more details about the system, consider reading the `technical 7 | report`__ or the `workshop paper`__ [1]_. 8 | 9 | __ http://dcase.community/challenge2018/task-general-purpose-audio-tagging 10 | __ http://dcase.community/documents/challenge2018/technical_reports/DCASE2018_Iqbal_89.pdf 11 | __ http://dcase.community/documents/workshop2018/proceedings/DCASE2018Workshop_Iqbal_151.pdf 12 | 13 | 14 | Requirements 15 | ------------ 16 | 17 | This software requires Python 3. To install the dependencies, run:: 18 | 19 | pipenv install 20 | 21 | or:: 22 | 23 | pip install -r requirements.txt 24 | 25 | The main functionality of this software also requires the DCASE 2018 Task 2 26 | datasets, which may be downloaded `here`__. After acquiring the datasets, 27 | modify ``task2/config/dcase2018_task2.py`` accordingly. 28 | 29 | For example:: 30 | 31 | _root_dataset_path = ('/path/to/datasets') 32 | """str: Path to root directory containing input audio clips.""" 33 | 34 | training_set = Dataset( 35 | name='training', 36 | path=os.path.join(_root_dataset_path, 'audio_train'), 37 | metadata_path='metadata/training.csv', 38 | ) 39 | """Dataset instance for the training dataset.""" 40 | 41 | You will also want to change the work path in ``task2/config/paths.py``:: 42 | 43 | work_path = '/path/to/workspace' 44 | """str: Path to parent directory containing program output.""" 45 | 46 | __ https://www.kaggle.com/c/freesound-audio-tagging/data 47 | 48 | 49 | Usage 50 | ----- 51 | 52 | In this section, the various commands are described. Using this software, the 53 | user is able to apply preprocessing (silence removal), extract feature vectors, 54 | train the network, generate predictions, and evaluate the predictions. 55 | 56 | Preprocessing 57 | ^^^^^^^^^^^^^ 58 | 59 | Our implementation of preprocessing involves extracting the non-silent sections 60 | of audio clips and saving these to disk separately. A new metadata file is then 61 | created with entries corresponding to the new files. 62 | 63 | To apply preprocessing, run:: 64 | 65 | python task2/main.py preprocess 66 | 67 | Refer to ``task2/silence.py`` for the relevant code. 68 | 69 | Feature Extraction 70 | ^^^^^^^^^^^^^^^^^^ 71 | 72 | To extract feature vectors, run:: 73 | 74 | python task2/main.py extract [--recompute] 75 | 76 | If ``--recompute`` is enabled, the program will recompute existing feature 77 | vectors. This implementaion extracts log-mel spectrogram features. See 78 | ``task2/config/logmel.py`` for tweaking the parameters. 79 | 80 | Training 81 | ^^^^^^^^ 82 | 83 | To train a model, run:: 84 | 85 | python task2/main.py train [--model MODEL] [--fold n] [--sample_weight x] [--class_weight] 86 | 87 | The ``--model`` option can be one of the following: 88 | 89 | * ``vgg13`` 90 | * ``gcnn`` 91 | * ``crnn`` 92 | * ``gcrnn`` 93 | 94 | The training set is assumed to be split into several folds, so the ``--fold`` 95 | option specifies which one to use as the validation set. If set to ``-1``, the 96 | program trains on the entire dataset. The ``--sample_weight`` option allows 97 | setting a sample weight to be used for unverified (noisy) examples. Finally, 98 | setting the ``--class_weight`` flag indicates that examples should be weighted 99 | based on the class that they belong to. 100 | 101 | See ``task2/config/training.py`` for tweaking the parameters or 102 | ``task2/training.py`` for further modifications. 103 | 104 | Prediction 105 | ^^^^^^^^^^ 106 | 107 | To generate predictions, run:: 108 | 109 | python task2/main.py predict [--fold n] 110 | 111 | The ``--fold`` option specifies which fold-specific model to use. 112 | 113 | See ``task2/config/predictions.py`` to modify which epochs are selected for 114 | generating the predictions. By default, the top four models based on their MAP 115 | score on the validation set are chosen. 116 | 117 | Evaluation 118 | ^^^^^^^^^^ 119 | 120 | To evaluate the predictions, run:: 121 | 122 | python task2/main.py evaluate [--fold n] 123 | 124 | Stacking 125 | ^^^^^^^^ 126 | Stacking is an ensembling technique that involves creating meta-features based 127 | on the predictions of a number of base classifiers. These meta-features are 128 | then used to train a second-level classifier and generate new predictions. We 129 | provide scripts to do this. 130 | 131 | To generate meta-features, run:: 132 | 133 | python scripts/meta_features.py 134 | 135 | The argument ``pred_path`` refers to the parent directory in which the 136 | predictions of the base classifiers are stored. ``pred_type`` must be either 137 | ``training`` or ``test``, depending on which dataset the meta-features are for. 138 | ``output_path`` specifies the path of the output HDF5 file. 139 | 140 | To give an example, assume that the directory structure looks like this:: 141 | 142 | workspace 143 | ├── predictions 144 | │   ├── classifier1 145 | │   ├── classifier2 146 | │   ├── classifier3 147 | 148 | In this case, you might run:: 149 | 150 | python scripts/meta_features.py workspace/predictions training training.h5 151 | python scripts/meta_features.py workspace/predictions test test.h5 152 | 153 | For the time being, the script must be edited to select the classifiers. 154 | 155 | To then generate predictions using a second-level classifier, run:: 156 | 157 | python scripts/predict_stack.py --test_path test.h5 training.h5 158 | 159 | The argument ``metadata_path`` is the path to the training set metadata file. 160 | See the script itself for more details. 161 | 162 | Pseudo-labeling 163 | ^^^^^^^^^^^^^^^ 164 | To relabel or promote training examples, run:: 165 | 166 | python scripts/relabel.py [--relabel_threshold t1] [--promote_threshold t2] 167 | 168 | The argument ``metadata_path`` is the path to the training set metadata file 169 | containing the original labels. ``pred_path`` is the path to the predictions 170 | file used for pseudo-labeling. ``output_path`` is the path of the new metadata 171 | file to be written. The threshold options allow constraining which examples are 172 | relabeled or promoted. 173 | 174 | 175 | Citing 176 | ------ 177 | If you wish to cite this work, please cite the following paper: 178 | 179 | .. [1] \T. Iqbal, Q. Kong, M. D. Plumbley, and W. Wang, "General-Purpose 180 | Audio Tagging from Noisy Labels using Convolutional Neural 181 | Networks," in Detection and Classification of Acoustic Scenes and 182 | Events 2018 Workshop, Woking, UK, 2018, pp. 212–216. 183 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = dcase2018_task2 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('../task2')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'dcase2018_task2' 23 | copyright = '2018, Turab Iqbal' 24 | author = 'Turab Iqbal' 25 | 26 | # The short X.Y version 27 | version = '' 28 | # The full version, including alpha/beta/rc tags 29 | release = '' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.doctest', 44 | 'sphinx.ext.mathjax', 45 | 'sphinx.ext.napoleon', 46 | ] 47 | 48 | # Add any paths that contain templates here, relative to this directory. 49 | templates_path = ['_templates'] 50 | 51 | # The suffix(es) of source filenames. 52 | # You can specify multiple suffix as a list of string: 53 | # 54 | # source_suffix = ['.rst', '.md'] 55 | source_suffix = '.rst' 56 | 57 | # The master toctree document. 58 | master_doc = 'index' 59 | 60 | # The language for content autogenerated by Sphinx. Refer to documentation 61 | # for a list of supported languages. 62 | # 63 | # This is also used if you do content translation via gettext catalogs. 64 | # Usually you set "language" from the command line for these cases. 65 | language = None 66 | 67 | # List of patterns, relative to source directory, that match files and 68 | # directories to ignore when looking for source files. 69 | # This pattern also affects html_static_path and html_extra_path . 70 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 71 | 72 | # The name of the Pygments (syntax highlighting) style to use. 73 | pygments_style = 'sphinx' 74 | 75 | 76 | # -- Options for HTML output ------------------------------------------------- 77 | 78 | # The theme to use for HTML and HTML Help pages. See the documentation for 79 | # a list of builtin themes. 80 | # 81 | html_theme = 'sphinx_rtd_theme' 82 | 83 | # Theme options are theme-specific and customize the look and feel of a theme 84 | # further. For a list of options available for each theme, see the 85 | # documentation. 86 | # 87 | # html_theme_options = {} 88 | 89 | # Add any paths that contain custom static files (such as style sheets) here, 90 | # relative to this directory. They are copied after the builtin static files, 91 | # so a file named "default.css" will overwrite the builtin "default.css". 92 | html_static_path = ['_static'] 93 | 94 | # Custom sidebar templates, must be a dictionary that maps document names 95 | # to template names. 96 | # 97 | # The default sidebars (for documents that don't match any pattern) are 98 | # defined by theme itself. Builtin themes are using these templates by 99 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 100 | # 'searchbox.html']``. 101 | # 102 | # html_sidebars = {} 103 | 104 | 105 | # -- Options for HTMLHelp output --------------------------------------------- 106 | 107 | # Output file base name for HTML help builder. 108 | htmlhelp_basename = 'dcase2018_task2doc' 109 | 110 | 111 | # -- Options for LaTeX output ------------------------------------------------ 112 | 113 | latex_elements = { 114 | # The paper size ('letterpaper' or 'a4paper'). 115 | # 116 | # 'papersize': 'letterpaper', 117 | 118 | # The font size ('10pt', '11pt' or '12pt'). 119 | # 120 | # 'pointsize': '10pt', 121 | 122 | # Additional stuff for the LaTeX preamble. 123 | # 124 | # 'preamble': '', 125 | 126 | # Latex figure (float) alignment 127 | # 128 | # 'figure_align': 'htbp', 129 | } 130 | 131 | # Grouping the document tree into LaTeX files. List of tuples 132 | # (source start file, target name, title, 133 | # author, documentclass [howto, manual, or own class]). 134 | latex_documents = [ 135 | (master_doc, 'dcase2018_task2.tex', 'dcase2018\\_task2 Documentation', 136 | 'Turab Iqbal', 'manual'), 137 | ] 138 | 139 | 140 | # -- Options for manual page output ------------------------------------------ 141 | 142 | # One entry per manual page. List of tuples 143 | # (source start file, name, description, authors, manual section). 144 | man_pages = [ 145 | (master_doc, 'dcase2018_task2', 'dcase2018_task2 Documentation', 146 | [author], 1) 147 | ] 148 | 149 | 150 | # -- Options for Texinfo output ---------------------------------------------- 151 | 152 | # Grouping the document tree into Texinfo files. List of tuples 153 | # (source start file, target name, title, author, 154 | # dir menu entry, description, category) 155 | texinfo_documents = [ 156 | (master_doc, 'dcase2018_task2', 'dcase2018_task2 Documentation', 157 | author, 'dcase2018_task2', 'Surrey CVSSP DCASE 2018 Task 2 system.', 158 | 'Miscellaneous'), 159 | ] 160 | 161 | 162 | # -- Extension configuration ------------------------------------------------- 163 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. dcase2018_task2 documentation master file, created by 2 | sphinx-quickstart on Wed Aug 8 16:30:23 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to dcase2018_task2's documentation! 7 | =========================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | 14 | 15 | Indices and tables 16 | ================== 17 | 18 | * :ref:`genindex` 19 | * :ref:`modindex` 20 | * :ref:`search` 21 | -------------------------------------------------------------------------------- /docs/source/config.rst: -------------------------------------------------------------------------------- 1 | config package 2 | ============== 3 | 4 | Submodules 5 | ---------- 6 | 7 | config.dcase2018\_task2 module 8 | ------------------------------ 9 | 10 | .. automodule:: config.dcase2018_task2 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | config.logmel module 16 | -------------------- 17 | 18 | .. automodule:: config.logmel 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | config.paths module 24 | ------------------- 25 | 26 | .. automodule:: config.paths 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | config.prediction module 32 | ------------------------ 33 | 34 | .. automodule:: config.prediction 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | config.silence module 40 | --------------------- 41 | 42 | .. automodule:: config.silence 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | 47 | config.training module 48 | ---------------------- 49 | 50 | .. automodule:: config.training 51 | :members: 52 | :undoc-members: 53 | :show-inheritance: 54 | 55 | 56 | Module contents 57 | --------------- 58 | 59 | .. automodule:: config 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | -------------------------------------------------------------------------------- /docs/source/convnet.rst: -------------------------------------------------------------------------------- 1 | convnet module 2 | ============== 3 | 4 | .. automodule:: convnet 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/evaluation.rst: -------------------------------------------------------------------------------- 1 | evaluation module 2 | ================= 3 | 4 | .. automodule:: evaluation 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/features.rst: -------------------------------------------------------------------------------- 1 | features module 2 | =============== 3 | 4 | .. automodule:: features 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/file_io.rst: -------------------------------------------------------------------------------- 1 | file\_io module 2 | =============== 3 | 4 | .. automodule:: file_io 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/gated_conv.rst: -------------------------------------------------------------------------------- 1 | gated\_conv module 2 | ================== 3 | 4 | .. automodule:: gated_conv 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/inference.rst: -------------------------------------------------------------------------------- 1 | inference module 2 | ================ 3 | 4 | .. automodule:: inference 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/main.rst: -------------------------------------------------------------------------------- 1 | main module 2 | =========== 3 | 4 | .. automodule:: main 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/mixup.rst: -------------------------------------------------------------------------------- 1 | mixup module 2 | ============ 3 | 4 | .. automodule:: mixup 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | task2 2 | ===== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | config 8 | convnet 9 | evaluation 10 | features 11 | file_io 12 | gated_conv 13 | inference 14 | main 15 | mixup 16 | silence 17 | training 18 | utils 19 | -------------------------------------------------------------------------------- /docs/source/silence.rst: -------------------------------------------------------------------------------- 1 | silence module 2 | ============== 3 | 4 | .. automodule:: silence 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/training.rst: -------------------------------------------------------------------------------- 1 | training module 2 | =============== 3 | 4 | .. automodule:: training 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/utils.rst: -------------------------------------------------------------------------------- 1 | utils module 2 | ============ 3 | 4 | .. automodule:: utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -i https://pypi.org/simple 2 | absl-py==0.6.1 3 | astor==0.7.1 4 | audioread==2.1.6 5 | decorator==4.3.0 6 | gast==0.2.0 7 | grpcio==1.16.0 8 | h5py==2.8.0 9 | joblib==0.12.5 10 | keras-applications==1.0.6 11 | keras-preprocessing==1.0.5 12 | keras==2.2.4 13 | librosa==0.6.2 14 | llvmlite==0.25.0 15 | markdown==3.0.1 16 | numba==0.40.1 17 | numpy==1.15.4 18 | pandas==0.23.4 19 | protobuf==3.6.1 20 | pydub==0.23.0 21 | python-dateutil==2.7.5 22 | pytz==2018.7 23 | pyyaml==3.13 24 | resampy==0.2.1 25 | scikit-learn==0.20.0 26 | scipy==1.1.0 27 | six==1.11.0 28 | sklearn==0.0 29 | tensorboard==1.12.0 30 | tensorflow-gpu==1.12.0 31 | termcolor==1.1.0 32 | tqdm==4.28.1 33 | werkzeug==0.14.1 34 | wheel==0.32.2 ; python_version >= '3' 35 | -------------------------------------------------------------------------------- /scripts/meta_features.py: -------------------------------------------------------------------------------- 1 | """Generate meta-features for stacking. 2 | 3 | After training a model on the cross-validation folds, the user can 4 | generate predictions for the validation sets -- which constitute the 5 | training set -- and the test set. This script generates features based 6 | on these predictions. For example, if we have five models, and each 7 | model outputs an N x K matrix of predictions, where N is the number of 8 | predicted audio clips and K=41 is the number of classes, this script 9 | concatenates these to produce an N x 5K matrix, i.e. N feature vectors. 10 | 11 | This script requires three command-line arguments: 12 | 13 | * pred_path: Path to predictions directory. 14 | * pred_type: Either ``'training'`` or ``'test'``. 15 | * output_path: Output file path of meta-features. 16 | 17 | It is assumed that the relevant predictions have already been generated 18 | for each fold. This script merges the fold predictions into one. 19 | """ 20 | 21 | import argparse 22 | import os.path 23 | 24 | import h5py 25 | import numpy as np 26 | import pandas as pd 27 | 28 | 29 | MODELS = [ 30 | 'jul28_pydub_gcnn', 31 | 'jul28_pydub_gcnn_1s', 32 | 'jul31_pydub_vgg13', 33 | 'jul31_pydub_vgg13_1s', 34 | 'jul28_pydub_crnn', 35 | 'jul28_pydub_crnn_1s', 36 | 'jul25_pydub_gcrnn', 37 | 'jul30_pydub_gcrnn_1s', 38 | ] 39 | """The training IDs of the models to use.""" 40 | 41 | 42 | def merge_predictions(base_path, pred_type, n_folds=5): 43 | """Merge the predictions of the training folds. 44 | 45 | If the predictions are for the training set, they are collated. If 46 | they are for the test set, they are averaged. 47 | 48 | Args: 49 | base_path (str): Path to predictions directory. 50 | pred_type (str): Either ``'training'`` or ``'test'``. 51 | n_folds (int): Number of training folds. 52 | 53 | Returns: 54 | pd.DataFrame: The merged predictions. 55 | """ 56 | dfs = [] 57 | for i in range(n_folds): 58 | name = 'fold' if pred_type == 'training' else pred_type 59 | path = os.path.join(base_path, 'predictions_%s%d.csv' % (name, i)) 60 | dfs.append(pd.read_csv(path, index_col=0)) 61 | 62 | df = pd.concat(dfs) 63 | if pred_type == 'training': 64 | metadata_path = '/vol/vssp/msos/ti/dcase2018/task2/metadata/train.csv' 65 | df_train = pd.read_csv(metadata_path, index_col=0) 66 | return df.loc[df_train.index] 67 | if pred_type == 'test': 68 | return df.groupby(level=0).mean() 69 | 70 | 71 | # Parse command line arguments 72 | parser = argparse.ArgumentParser() 73 | parser.add_argument('pred_path', help='path to predictions directory') 74 | parser.add_argument('pred_type', help='either "training" or "test"') 75 | parser.add_argument('output_path', help='output file path') 76 | args = parser.parse_args() 77 | 78 | # Collect predictions for each model 79 | feats = [] 80 | top_preds = [] 81 | for model in MODELS: 82 | path = os.path.join(args.pred_path, model) 83 | df = merge_predictions(path, args.pred_type) 84 | feats.append(df.values) 85 | 86 | top_preds.append(df.idxmax(axis=1).astype('category').cat.codes) 87 | 88 | # Print correlation matrix 89 | print(pd.concat(top_preds, axis=1).corr()) 90 | 91 | # Save meta-features to disk 92 | feats = np.stack(feats, axis=1) 93 | feats = np.reshape(feats, (feats.shape[0], -1)) 94 | with h5py.File(args.output_path, 'w') as f: 95 | f.create_dataset('F', data=feats) 96 | f.create_dataset('names', data=top_preds[0].index.values, 97 | dtype=h5py.special_dtype(vlen=str)) 98 | -------------------------------------------------------------------------------- /scripts/predict_stack.py: -------------------------------------------------------------------------------- 1 | """Predict labels for the test set using a second-level classifier. 2 | 3 | This script trains a logistic regression classifier on the training set 4 | meta-features created using the ``meta_features.py`` script. It then 5 | generates predictions for either the training set or the test set. The 6 | former refers to training and predicting each fold. 7 | 8 | This script requires three command-line arguments: 9 | 10 | * train_path: Path to training features. 11 | * metadata_path: Path to training metadata. 12 | * output_path: Output file path. 13 | 14 | It also takes an optional argument: 15 | 16 | * --test_path: Path to test features. If this is specified, the script 17 | will generate predictions for the test set and write them to a 18 | submission file. Otherwise, it will generate predictions for the 19 | training set on a fold-by-fold basis and write them to a csv file. 20 | """ 21 | 22 | import argparse 23 | import sys 24 | 25 | import h5py 26 | import numpy as np 27 | import pandas as pd 28 | 29 | from sklearn.linear_model import LogisticRegression 30 | 31 | sys.path.append('task2') 32 | 33 | import file_io as io 34 | import utils as utils 35 | 36 | 37 | def train(x, df): 38 | """Train a logistic regression classifier. 39 | 40 | Args: 41 | x (np.ndarray): Training data. 42 | df (pd.DataFrame): Training metadata. 43 | 44 | Returns: 45 | The trained classifier. 46 | """ 47 | y = df.label.astype('category').cat.codes.values 48 | sample_weight = np.ones(len(x)) 49 | sample_weight[df.manually_verified == 0] = 0.65 50 | 51 | clf = LogisticRegression( 52 | penalty='l2', 53 | tol=0.0001, 54 | C=1.0, 55 | random_state=1000, 56 | class_weight='balanced', 57 | ) 58 | clf.fit(x, y, sample_weight=sample_weight) 59 | 60 | return clf 61 | 62 | 63 | # Parse command line arguments 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument('train_path', help='path to training features') 66 | parser.add_argument('metadata_path', help='path to training metadata') 67 | parser.add_argument('output_path', help='output file path') 68 | parser.add_argument('--test_path', help='path to test features') 69 | args = parser.parse_args() 70 | 71 | # Load training data 72 | with h5py.File(args.train_path, 'r') as f: 73 | x_train = np.array(f['F']) 74 | 75 | df_train = pd.read_csv(args.metadata_path, index_col=0) 76 | y_train = df_train.label.astype('category').cat.codes.values 77 | 78 | if args.test_path: 79 | # Load test data 80 | with h5py.File(args.test_path, 'r') as f: 81 | x_test = np.array(f['F']) 82 | 83 | index = pd.Index(f['names'], name='fname') 84 | 85 | # Train and predict the test data 86 | clf = train(x_train, df_train) 87 | y_pred = clf.predict_proba(x_test) 88 | 89 | # Write to a submission file. 90 | df_pred = pd.DataFrame(y_pred, index=index, columns=utils.LABELS) 91 | io.write_predictions(df_pred, args.output_path) 92 | else: 93 | index = pd.Index([], name='fname') 94 | 95 | # Train and predict for each fold and concatenate the predictions 96 | y_preds = [] 97 | for fold in range(5): 98 | mask = df_train.fold == fold 99 | index = index.append(df_train[mask].index) 100 | clf = train(x_train[~mask], df_train[~mask]) 101 | y_preds.append(clf.predict_proba(x_train[mask])) 102 | y_pred = np.concatenate(y_preds) 103 | 104 | # Write to a CSV file 105 | df_pred = pd.DataFrame(y_pred, index=index, columns=utils.LABELS) 106 | df_pred = df_pred.loc[df_train.index] 107 | df_pred.to_csv(args.output_path) 108 | -------------------------------------------------------------------------------- /scripts/relabel.py: -------------------------------------------------------------------------------- 1 | """Relabel/promote training examples based on predicted labels. 2 | 3 | This script is for pseudo-labeling non-verified examples. It can also 4 | promote non-verified examples to verified if the predicted labels match 5 | the ground truth labels. In both cases, the confidence of the prediction 6 | must exceed a certain threshold. 7 | 8 | This script requires three command-line arguments: 9 | 10 | * metadata_path: Path to metadata file containing ground truth. 11 | * pred_path: Path to training predictions. 12 | * output_path: Output file path. 13 | 14 | It also takes optional arguments: 15 | 16 | * relabel_threshold: Confidence threshold for relabeling. 17 | * promote_threshold: Confidence threshold for promotion. 18 | """ 19 | 20 | import argparse 21 | 22 | import h5py 23 | import numpy as np 24 | import pandas as pd 25 | 26 | 27 | # Parse command line arguments 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('metadata_path', help='path to metadata') 30 | parser.add_argument('pred_path', help='path to predictions') 31 | parser.add_argument('output_path', help='output file path') 32 | parser.add_argument('--relabel_threshold', type=float, default=0, 33 | help='confidence threshold for relabeling') 34 | parser.add_argument('--promote_threshold', type=float, default=1.0, 35 | help='confidence threshold for promotion') 36 | args = parser.parse_args() 37 | 38 | df_true = pd.read_csv(args.metadata_path, index_col=0) 39 | df_pred = pd.read_csv(args.pred_path, index_col=0) 40 | top_label = df_pred.idxmax(axis=1) 41 | confidence = df_pred.max(axis=1) 42 | 43 | # Determine which examples should be relabeled or promoted 44 | relabel_mask = (df_true.manually_verified == 0) \ 45 | & (top_label != df_true.label) \ 46 | & (confidence > args.relabel_threshold) 47 | promote_mask = (df_true.manually_verified == 0) \ 48 | & (top_label == df_true.label) \ 49 | & (confidence > args.promote_threshold) 50 | 51 | df_true.loc[relabel_mask, 'label'] = top_label[relabel_mask] 52 | print('%d examples relabeled' % sum(relabel_mask)) 53 | 54 | df_true.loc[promote_mask, 'manually_verified'] = 2 55 | print('%d examples promoted' % sum(promote_mask)) 56 | 57 | # Save as a new metadata file 58 | df_true.to_csv(args.output_path) 59 | -------------------------------------------------------------------------------- /scripts/select_folds.py: -------------------------------------------------------------------------------- 1 | """Split the training set into K folds. 2 | 3 | This script requires three command-line arguments: 4 | 5 | * metadata_path: Path to training set metadata. 6 | * output_path: Output file path. 7 | * n_folds: Number of folds to use. 8 | 9 | The output is a new metadata file that assigns each example to a fold. 10 | """ 11 | 12 | import argparse 13 | 14 | import pandas as pd 15 | 16 | from sklearn.model_selection import StratifiedKFold 17 | 18 | 19 | # Parse command line arguments 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('metadata_path', help='path to training set metadata') 22 | parser.add_argument('output_path', help='output metadata file path') 23 | parser.add_argument('--n_folds', type=int, default=5, 24 | help='number of folds to use') 25 | args = parser.parse_args() 26 | 27 | # Create dummy labels to ensure each fold has a similar number of 28 | # manually verified examples. 29 | df = pd.read_csv(args.metadata_path, index_col=0) 30 | labels = df.label + df.manually_verified.astype(str) 31 | 32 | # Assign a fold number to each example 33 | df['fold'] = -1 34 | skf = StratifiedKFold(args.n_folds) 35 | for i, (_, te) in enumerate(skf.split(df.index, labels)): 36 | df.iloc[te, 2] = i 37 | 38 | print('Number of verified examples per fold:') 39 | print([sum((df.fold == i) & (df.manually_verified == 1)) 40 | for i in range(args.n_folds)]) 41 | 42 | # Save new metadata file to disk 43 | df.to_csv(args.output_path) 44 | -------------------------------------------------------------------------------- /task2/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .logmel import * 2 | from .paths import * 3 | from .prediction import * 4 | from .silence import * 5 | from .training import * 6 | from .dcase2018_task2 import * 7 | -------------------------------------------------------------------------------- /task2/config/dcase2018_task2.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import os.path 3 | 4 | import config.paths as paths 5 | 6 | 7 | Dataset = collections.namedtuple('Dataset', 8 | ['name', 9 | 'path', 10 | 'metadata_path', 11 | ]) 12 | """Data structure encapsulating information about a dataset.""" 13 | 14 | 15 | _root_dataset_path = ('/vol/vssp/datasets/audio/dcase2018/task2') 16 | """str: Path to root directory containing input audio clips.""" 17 | 18 | training_set = Dataset( 19 | name='training', 20 | path=os.path.join(_root_dataset_path, 'audio_train'), 21 | metadata_path='metadata/training.csv', 22 | ) 23 | """Dataset instance for the training dataset.""" 24 | 25 | test_set = Dataset( 26 | name='test', 27 | path=os.path.join(_root_dataset_path, 'audio_test'), 28 | metadata_path='metadata/test.csv', 29 | ) 30 | """Dataset instance for the test dataset.""" 31 | 32 | preprocessed_training_set = Dataset( 33 | name='training', 34 | path=os.path.join(paths.preprocessing_path, 'training'), 35 | metadata_path=os.path.join(paths.preprocessing_path, 'training.csv'), 36 | ) 37 | """Dataset instance for the preprocessed training dataset.""" 38 | 39 | preprocessed_test_set = Dataset( 40 | name='test', 41 | path=os.path.join(paths.preprocessing_path, 'test'), 42 | metadata_path=os.path.join(paths.preprocessing_path, 'test.csv'), 43 | ) 44 | """Dataset instance for the preprocessed test dataset.""" 45 | 46 | 47 | def to_dataset(name, preprocessed=True): 48 | """Return the Dataset instance corresponding to the given name. 49 | 50 | Args: 51 | name (str): Name of dataset. 52 | preprocessed (bool): Whether to return the preprocessed instance. 53 | 54 | Returns: 55 | The Dataset instance corresponding to the given name. 56 | """ 57 | if name == 'training': 58 | return preprocessed_training_set if preprocessed else training_set 59 | elif name == 'test': 60 | return preprocessed_test_set if preprocessed else test_set 61 | return None 62 | -------------------------------------------------------------------------------- /task2/config/logmel.py: -------------------------------------------------------------------------------- 1 | sample_rate = 32000 2 | """number: Target sample rate during feature extraction.""" 3 | 4 | n_window = 1024 5 | """int: Size of STFT window.""" 6 | 7 | hop_length = 512 8 | """int: Number of samples between frames.""" 9 | 10 | n_mels = 64 11 | """int: Number of Mel bins.""" 12 | -------------------------------------------------------------------------------- /task2/config/paths.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | from . import training 4 | 5 | 6 | work_path = '/vol/vssp/msos/ti/dcase2018/task2' 7 | """str: Path to parent directory containing program output.""" 8 | 9 | preprocessing_path = os.path.join(work_path, 'split') 10 | """str: Path to the directory containing preprocessed audio files.""" 11 | 12 | extraction_path = os.path.join(work_path, 'features/logmel64') 13 | """str: Path to the directory containing extracted feature vectors.""" 14 | 15 | model_path = os.path.join(work_path, 'models', training.training_id) 16 | """str: Path to the output directory of saved models.""" 17 | 18 | log_path = os.path.join(work_path, 'logs', training.training_id, '{}') 19 | """str: Path to the directory of TensorBoard logs.""" 20 | 21 | history_path = os.path.join(log_path, 'history.csv') 22 | """str: Path to log file for training history.""" 23 | 24 | predictions_path = os.path.join( 25 | work_path, 'predictions', training.training_id, '{}_{}.csv') 26 | """str: Path to a model predictions file.""" 27 | 28 | results_path = os.path.join( 29 | work_path, 'results', training.training_id, '{}_results.csv') 30 | """str: Path to the file containing results.""" 31 | -------------------------------------------------------------------------------- /task2/config/prediction.py: -------------------------------------------------------------------------------- 1 | prediction_epochs = 'val_map' 2 | """Specification for which models (epochs) to select for prediction. 3 | 4 | Either a list of epoch numbers or a string specifying the metric to be 5 | used to select the top epochs. 6 | """ 7 | 8 | threshold = -1 9 | """number: Number for thresholding audio tagging predictions. 10 | 11 | A value of -1 indicates that the most probable label should be selected 12 | instead of selecting labels that surpass a certain threshold. 13 | """ 14 | -------------------------------------------------------------------------------- /task2/config/silence.py: -------------------------------------------------------------------------------- 1 | n_window = 1024 2 | """int: Length of a frame used for silence detection.""" 3 | 4 | default_threshold = -48 5 | """int: Default threshold for silence.""" 6 | 7 | transients_threshold = -56 8 | """int: Threshold for transient audio signals.""" 9 | 10 | min_silence = 500 11 | """int: Minimum length of silence between two non-silent segments.""" 12 | 13 | keep_silence = 400 14 | """int: Amount of start/end silence to keep for each audio segment.""" 15 | -------------------------------------------------------------------------------- /task2/config/training.py: -------------------------------------------------------------------------------- 1 | training_id = 'jul31_vgg13' 2 | """str: A string identifying this particular training instance.""" 3 | 4 | initial_seed = 1000 5 | """int: Fixed seed used prior to training.""" 6 | 7 | batch_size = 128 8 | """int: The number of samples in a mini batch.""" 9 | 10 | n_epochs = 40 11 | """int: The number of epochs to train the network for. 12 | 13 | A value of -1 indicates an early stopping condition should be used. 14 | """ 15 | 16 | learning_rate = {'initial': 0.0005, 17 | 'decay': 0.90, 18 | 'decay_rate': 2., 19 | } 20 | """dict: Learning rate hyperparameters for SGD. 21 | 22 | Keyword Args: 23 | initial (float): Initial learning rate. 24 | decay (float): Multiplicative factor for learning rate decay. A 25 | value of 1 indicates the learning rate should not be decayed. 26 | decay_rate (float): Number of epochs until learning rate is decayed. 27 | """ 28 | -------------------------------------------------------------------------------- /task2/convnet.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | from keras.layers import BatchNormalization 3 | from keras.layers import Bidirectional 4 | from keras.layers import Conv2D 5 | from keras.layers import Dense 6 | from keras.layers import GRU 7 | from keras.layers import Input 8 | from keras.layers import Lambda 9 | from keras.layers import MaxPooling2D 10 | from keras.layers import GlobalAveragePooling1D 11 | from keras.layers import GlobalAveragePooling2D 12 | from keras.models import Model 13 | 14 | import gated_conv 15 | 16 | 17 | def vgg13(input_shape, n_classes): 18 | """Create a VGG13-style model. 19 | 20 | Args: 21 | input_shape (tuple): Shape of the input tensor. 22 | n_classes (int): Number of classes for classification. 23 | 24 | Returns: 25 | A Keras model of the VGG13 architecture. 26 | """ 27 | input_tensor = Input(shape=input_shape, name='input_tensor') 28 | 29 | x = _conv_block(input_tensor, n_filters=64) 30 | x = _conv_block(x, n_filters=128) 31 | x = _conv_block(x, n_filters=256) 32 | x = _conv_block(x, n_filters=512) 33 | x = _conv_block(x, n_filters=512) 34 | 35 | x = GlobalAveragePooling2D()(x) 36 | 37 | x = Dense(n_classes, activation='softmax')(x) 38 | return Model(input_tensor, x, name='vgg13') 39 | 40 | 41 | def gcnn(input_shape, n_classes): 42 | """Create a VGG13 model based on gated convolutions. 43 | 44 | Args: 45 | input_shape (tuple): Shape of the input tensor. 46 | n_classes (int): Number of classes for classification. 47 | 48 | Returns: 49 | A Keras model of the GCNN architecture. 50 | """ 51 | input_tensor = Input(shape=input_shape, name='input_tensor') 52 | 53 | x = gated_conv.block(input_tensor, n_filters=64) 54 | x = gated_conv.block(x, n_filters=128) 55 | x = gated_conv.block(x, n_filters=256) 56 | x = gated_conv.block(x, n_filters=512) 57 | x = gated_conv.block(x, n_filters=512) 58 | 59 | x = GlobalAveragePooling2D()(x) 60 | 61 | x = Dense(n_classes, activation='softmax')(x) 62 | return Model(input_tensor, x, name='gcnn') 63 | 64 | 65 | def crnn(input_shape, n_classes): 66 | """Create a convolutional recurrent neural network (CRNN) model. 67 | 68 | Args: 69 | input_shape (tuple): Shape of the input tensor. 70 | n_classes (int): Number of classes for classification. 71 | 72 | Returns: 73 | A Keras model of the CRNN architecture. 74 | """ 75 | input_tensor = Input(shape=input_shape, name='input_tensor') 76 | 77 | x = _conv_block(input_tensor, n_filters=64) 78 | x = _conv_block(x, n_filters=128) 79 | x = _conv_block(x, n_filters=256) 80 | x = _conv_block(x, n_filters=512) 81 | x = _conv_block(x, n_filters=512) 82 | 83 | x = Lambda(lambda x: K.mean(x, axis=2))(x) 84 | x = Bidirectional(GRU(512, activation='relu', 85 | return_sequences=True))(x) 86 | x = GlobalAveragePooling1D()(x) 87 | 88 | x = Dense(n_classes, activation='softmax')(x) 89 | return Model(input_tensor, x, name='crnn') 90 | 91 | 92 | def gcrnn(input_shape, n_classes): 93 | """Create a CRNN model based on gated convolutions. 94 | 95 | Args: 96 | input_shape (tuple): Shape of the input tensor. 97 | n_classes (int): Number of classes for classification. 98 | 99 | Returns: 100 | A Keras model of the GCRNN architecture. 101 | """ 102 | input_tensor = Input(shape=input_shape, name='input_tensor') 103 | 104 | x = gated_conv.block(input_tensor, n_filters=64) 105 | x = gated_conv.block(x, n_filters=128) 106 | x = gated_conv.block(x, n_filters=256) 107 | x = gated_conv.block(x, n_filters=512) 108 | x = gated_conv.block(x, n_filters=512) 109 | 110 | x = Lambda(lambda x: K.mean(x, axis=2))(x) 111 | x = Bidirectional(GRU(512, activation='relu', 112 | return_sequences=True))(x) 113 | x = GlobalAveragePooling1D()(x) 114 | 115 | x = Dense(n_classes, activation='softmax')(x) 116 | return Model(input_tensor, x, name='crnn') 117 | 118 | 119 | def _conv_block(x, n_filters, kernel_size=(3, 3), pool_size=(2, 2), **kwargs): 120 | """Apply two batch-normalized convolutions followed by max pooling. 121 | 122 | Args: 123 | x (tensor): Input tensor. 124 | n_filters (int): Number of convolution filters. 125 | kernel_size (int or tuple): Convolution kernel size. 126 | pool_size (int or tuple): Max pooling parameter. 127 | kwargs: Other keyword arguments. 128 | 129 | Returns: 130 | tensor: The output tensor. 131 | """ 132 | x = _conv_bn(x, n_filters, kernel_size, **kwargs) 133 | x = _conv_bn(x, n_filters, kernel_size, **kwargs) 134 | return MaxPooling2D(pool_size=pool_size)(x) 135 | 136 | 137 | def _conv_bn(x, n_filters, kernel_size=(3, 3), **kwargs): 138 | """Apply a convolution operation followed by batch normalization. 139 | 140 | Args: 141 | x (tensor): Input tensor. 142 | n_filters (int): Number of convolution filters. 143 | kernel_size (int or tuple): Convolution kernel size. 144 | kwargs: Other keyword arguments. 145 | 146 | Returns: 147 | tensor: The output tensor. 148 | """ 149 | x = Conv2D(n_filters, 150 | kernel_size=kernel_size, 151 | padding='same', 152 | activation='relu', 153 | **kwargs)(x) 154 | return BatchNormalization(axis=-1)(x) 155 | -------------------------------------------------------------------------------- /task2/evaluation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import sklearn.metrics as metrics 4 | 5 | import inference 6 | import utils 7 | 8 | 9 | def evaluate_audio_tagging(y_true, y_pred, threshold=-1): 10 | """Evaluate audio tagging performance. 11 | 12 | Three types of scores are returned: 13 | 14 | * Class-wise 15 | * Macro-averaged 16 | * Micro-averaged 17 | 18 | The ground truth values and predictions should both be passed in a 19 | 2D array in which the first dimension is the sample axis and the 20 | second is the class axis. 21 | 22 | Args: 23 | y_true (np.ndarray): 2D array of ground truth values. 24 | y_pred (np.ndarray): 2D array of predictions. 25 | threshold (number): Threshold used to binarize predictions. 26 | 27 | Returns: 28 | pd.DataFrame: Table of evaluation results. 29 | """ 30 | y_pred_b = inference.binarize_predictions(y_pred, threshold) 31 | 32 | class_scores = compute_audio_tagging_scores(y_true, y_pred, y_pred_b).T 33 | macro_scores = np.mean(class_scores, axis=0, keepdims=True) 34 | micro_scores = compute_audio_tagging_scores( 35 | y_true, y_pred, y_pred_b, average='micro') 36 | 37 | # Create DataFrame of evaluation results 38 | data = np.concatenate((class_scores, macro_scores, micro_scores[None, :])) 39 | index = utils.LABELS + ['Macro Average', 'Micro Average'] 40 | columns = ['MAP@3', 'F-score', 'Precision', 'Recall'] 41 | return pd.DataFrame(data, pd.Index(index, name='Class'), columns) 42 | 43 | 44 | def compute_audio_tagging_scores(y_true, y_pred, y_pred_b, average=None): 45 | """Compute prediction scores using several performance metrics. 46 | 47 | The following metrics are used: 48 | 49 | * MAP@3 50 | * F1 Score 51 | * Precision 52 | * Recall 53 | 54 | Args: 55 | y_true (np.ndarray): 2D array of ground truth values. 56 | y_pred (np.ndarray): 2D array of prediction probabilities. 57 | y_pred_b (np.ndarray): 2D array of binary predictions. 58 | average (str): The averaging method. Either ``'macro'``, 59 | ``'micro'``, or ``None``, where the latter is used to 60 | disable averaging. 61 | 62 | Returns: 63 | np.ndarray: Scores corresponding to the metrics used. 64 | """ 65 | # Compute MAP@3 66 | map_3 = compute_map(y_true, y_pred, k=3, class_wise=average is None) 67 | 68 | # Compute precision and recall scores 69 | precision, recall, f1_score, _ = metrics.precision_recall_fscore_support( 70 | y_true, y_pred_b, average=average) 71 | 72 | return np.array([map_3, f1_score, precision, recall]) 73 | 74 | 75 | def compute_map(y_true, y_pred, k=3, class_wise=False): 76 | """Compute the mean average precision at k (MAP@k). 77 | 78 | Args: 79 | y_true (np.ndarray): 2D array of ground truth values. 80 | y_pred (np.ndarray): 2D array of predictions. 81 | k (int): The maximum number of predicted elements. 82 | class_wise (bool): Whether to compute a score for each class. 83 | 84 | Returns: 85 | float or np.ndarray: The mean average precision score(s) at k. 86 | 87 | Note: 88 | This function assumes the grounds truths are single-label. 89 | """ 90 | if class_wise: 91 | nonzero = np.nonzero(y_true)[1] 92 | return np.array([compute_map(y_true[nonzero == i], 93 | y_pred[nonzero == i], k) 94 | for i in range(y_true.shape[1])]) 95 | 96 | # Compute how the true label ranks in terms of probability 97 | idx = y_pred.argsort()[:, ::-1].argsort() 98 | rank = idx[y_true.astype(bool)] + 1 99 | 100 | if len(rank) > len(y_true): 101 | raise Exception('Multi-label classification not supported') 102 | 103 | return np.sum(1 / rank[rank <= k]) / len(y_true) 104 | -------------------------------------------------------------------------------- /task2/features.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import datetime as dt 3 | 4 | import h5py 5 | import librosa 6 | import numpy as np 7 | from tqdm import tqdm 8 | 9 | import utils 10 | 11 | 12 | def extract_dataset(dataset_path, 13 | file_names, 14 | extractor, 15 | output_path, 16 | recompute=False, 17 | ): 18 | """Extract features from the audio clips in a dataset. 19 | 20 | Args: 21 | dataset_path (str): Path of directory containing dataset. 22 | file_names (array_like): List of file names for the audio clips. 23 | extractor: Class instance for feature extraction. 24 | output_path (str): File path of output HDF5 file. 25 | recompute (bool): Whether to extract features that already exist 26 | in the HDF5 file. 27 | """ 28 | # Create/load the HDF5 file to store the feature vectors 29 | with h5py.File(output_path, 'a') as f: 30 | size = len(file_names) # Size of dataset 31 | 32 | # Create/load feature vector dataset and timestamp dataset 33 | feats = f.require_dataset('F', (size,), 34 | dtype=h5py.special_dtype(vlen=float)) 35 | timestamps = f.require_dataset('timestamps', (size,), 36 | dtype=h5py.special_dtype(vlen=bytes)) 37 | 38 | # Record shape of reference feature vector. Used to infer the 39 | # original shape of a vector prior to flattening. 40 | feats.attrs['shape'] = extractor.output_shape(1)[1:] 41 | 42 | for i, name in enumerate(tqdm(file_names)): 43 | # Skip if existing feature vector should not be recomputed 44 | if timestamps[i] and not recompute: 45 | continue 46 | 47 | path = os.path.join(dataset_path, name) 48 | x, sample_rate = librosa.load(path, sr=None) 49 | if sample_rate is None: 50 | print('Warning: Skipping {}'.format(name)) 51 | continue 52 | 53 | # Extract and save to dataset as flattened array 54 | feats[i] = extractor.extract(x, sample_rate).flatten() 55 | # Record timestamp in ISO format 56 | timestamps[i] = dt.datetime.now().isoformat() 57 | 58 | 59 | def load_features(path, chunk_size=128, r_threshold=32): 60 | """Load feature vectors from the specified HDF5 file. 61 | 62 | Since the original feature vectors are of variable length, this 63 | function partitions them into chunks of length `chunk_size`. When 64 | they cannot be partitioned exactly, one of three things can happen: 65 | 66 | * If the length of the vector is less than the chunk size, the 67 | vector is simply padded with a fill value. 68 | * If the remainder, ``r``, is less than ``r_threshold``, the edges 69 | of the vector are truncated so that it can be partitioned. 70 | * If the remainder, ``r``, is greater than ``r_threshold``, the 71 | last chunk is the last `chunk_size` frames of the feature vector 72 | such that it overlaps with the penultimate chunk. 73 | 74 | Args: 75 | path (str): Path to the HDF5 file. 76 | chunk_size (int): Size of a chunk. 77 | r_threshold (int): Threshold for ``r`` (see above). 78 | 79 | Returns: 80 | np.ndarray: Array of feature vectors. 81 | list: Number of chunks for each audio clip. 82 | """ 83 | chunks = [] 84 | n_chunks = [] 85 | with h5py.File(path, 'r') as f: 86 | feats = f['F'] 87 | shape = feats.attrs['shape'] 88 | for i, feat in enumerate(tqdm(feats)): 89 | # Reshape flat array to original shape 90 | feat = np.reshape(feat, (-1, *shape)) 91 | 92 | if len(feat) == 0: 93 | n_chunks.append(0) 94 | continue 95 | 96 | # Split feature vector into chunks along time axis 97 | q = len(feat) // chunk_size 98 | r = len(feat) % chunk_size 99 | if not q and r: 100 | split = [utils.pad_truncate(feat, chunk_size, 101 | pad_value=np.min(feat))] 102 | elif r: 103 | r = len(feat) % chunk_size 104 | off = r // 2 if r < r_threshold else 0 105 | split = np.split(feat[off:q * chunk_size + off], q) 106 | if r >= r_threshold: 107 | split.append(feat[-chunk_size:]) 108 | else: 109 | split = np.split(feat, q) 110 | 111 | n_chunks.append(len(split)) 112 | chunks += split 113 | 114 | return np.array(chunks), n_chunks 115 | 116 | 117 | class LogmelExtractor(object): 118 | """Feature extractor for logmel representations. 119 | 120 | A logmel feature vector is a spectrogram representation that has 121 | been scaled using a Mel filterbank and a log nonlinearity. 122 | 123 | Args: 124 | sample_rate (number): Target resampling rate. 125 | n_window (int): Number of bins in each spectrogram frame. 126 | hop_length (int): Number of samples between frames. 127 | n_mels (int): Number of Mel bands. 128 | 129 | Attributes: 130 | sample_rate (number): Target resampling rate. 131 | n_window (int): Number of bins in each spectrogram frame. 132 | hop_length (int): Number of samples between frames. 133 | mel_fb (np.ndarray): Mel fitlerbank matrix. 134 | """ 135 | 136 | def __init__(self, 137 | sample_rate=16000, 138 | n_window=1024, 139 | hop_length=512, 140 | n_mels=64, 141 | ): 142 | self.sample_rate = sample_rate 143 | self.n_window = n_window 144 | self.hop_length = hop_length 145 | 146 | # Create Mel filterbank matrix 147 | self.mel_fb = librosa.filters.mel(sr=sample_rate, 148 | n_fft=n_window, 149 | n_mels=n_mels, 150 | ) 151 | 152 | def output_shape(self, clip_duration): 153 | """Determine the shape of a logmel feature vector. 154 | 155 | Args: 156 | clip_duration (number): Duration of the input time-series 157 | signal given in seconds. 158 | 159 | Returns: 160 | tuple: The shape of a logmel feature vector. 161 | """ 162 | n_samples = clip_duration * self.sample_rate 163 | n_frames = n_samples // self.hop_length + 1 164 | return (n_frames, self.mel_fb.shape[0]) 165 | 166 | def extract(self, x, sample_rate): 167 | """Transform the given signal into a logmel feature vector. 168 | 169 | Args: 170 | x (np.ndarray): Input time-series signal. 171 | sample_rate (number): Sampling rate of signal. 172 | 173 | Returns: 174 | np.ndarray: The logmel feature vector. 175 | """ 176 | # Resample to target sampling rate 177 | x = librosa.resample(x, sample_rate, self.sample_rate) 178 | 179 | # Compute short-time Fourier transform 180 | D = librosa.stft(x, n_fft=self.n_window, hop_length=self.hop_length) 181 | # Transform to Mel frequency scale 182 | S = np.dot(self.mel_fb, np.abs(D) ** 2).T 183 | # Apply log nonlinearity and return as float32 184 | return librosa.power_to_db(S, ref=np.max, top_db=None) 185 | -------------------------------------------------------------------------------- /task2/file_io.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def read_metadata(path): 5 | """Read from the specified metadata file. 6 | 7 | Args: 8 | path (str): Path of metadata file. 9 | 10 | Returns: 11 | pd.DataFrame: The parsed metadata. 12 | """ 13 | return pd.read_csv(path, index_col=0) 14 | 15 | 16 | def read_training_history(path, ordering=None): 17 | """Read training history from the specified CSV file. 18 | 19 | Args: 20 | path (str): Path of CSV file. 21 | ordering (str): Column name to order the entries with respect to 22 | or ``None`` if the entries should remain unordered. 23 | 24 | Returns: 25 | pd.DataFrame: The training history. 26 | """ 27 | df = pd.read_csv(path, index_col=0) 28 | ascending = ordering not in ['val_acc', 'val_map'] 29 | if ordering: 30 | df.sort_values(by=ordering, ascending=ascending, inplace=True) 31 | return df 32 | 33 | 34 | def write_predictions(y_pred, output_path): 35 | """Write classification predictions to a CSV file. 36 | 37 | Args: 38 | y_pred (pd.DataFrame): Table of predictions. 39 | output_path (str): Output file path. 40 | """ 41 | top_3 = y_pred.apply(lambda x: ' '.join(x.nlargest(3).index), axis=1) 42 | pd.Series(top_3, name='label').to_csv(output_path, header=True) 43 | -------------------------------------------------------------------------------- /task2/gated_conv.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Activation 2 | from keras.layers import BatchNormalization 3 | from keras.layers import Conv2D 4 | from keras.layers import MaxPooling2D 5 | from keras.layers import Multiply 6 | 7 | 8 | def block(x, n_filters=64, pool_size=(2, 2)): 9 | """Apply two gated convolutions followed by a max-pooling operation. 10 | 11 | Batch normalization is applied for regularization. 12 | 13 | Args: 14 | x (tensor): Input tensor to transform. 15 | n_filters (int): Number of filters for each gated convolution. 16 | pool_size (int or tuple): Pool size of max-pooling operation. 17 | 18 | Returns: 19 | A Keras tensor of the resulting output. 20 | """ 21 | x = GatedConv(n_filters, padding='same')(x) 22 | x = BatchNormalization(axis=-1)(x) 23 | 24 | x = GatedConv(n_filters, padding='same')(x) 25 | x = BatchNormalization(axis=-1)(x) 26 | 27 | return MaxPooling2D(pool_size=pool_size)(x) 28 | 29 | 30 | class GatedConv(Conv2D): 31 | """A Keras layer implementing gated convolutions [1]_. 32 | 33 | Args: 34 | n_filters (int): Number of output filters. 35 | kernel_size (int or tuple): Size of convolution kernel. 36 | strides (int or tuple): Strides of the convolution. 37 | padding (str): One of ``'valid'`` or ``'same'``. 38 | kwargs: Other layer keyword arguments. 39 | 40 | References: 41 | .. [1] Y. N. Dauphin, A. Fan, M. Auli, and D. Grangier, 42 | “Language modeling with gated convolutional networks,” in 43 | Proc. 34th Int. Conf. Mach. Learn. (ICML), vol. 70, 44 | Sydney, Australia, 2017, pp. 933–941. 45 | """ 46 | 47 | def __init__(self, n_filters=64, kernel_size=(3, 3), **kwargs): 48 | super(GatedConv, self).__init__(filters=n_filters * 2, 49 | kernel_size=kernel_size, 50 | **kwargs) 51 | 52 | self.n_filters = n_filters 53 | 54 | def call(self, inputs): 55 | """Apply gated convolution.""" 56 | output = super(GatedConv, self).call(inputs) 57 | 58 | n_filters = self.n_filters 59 | linear = Activation('linear')(output[:, :, :, :n_filters]) 60 | sigmoid = Activation('sigmoid')(output[:, :, :, n_filters:]) 61 | 62 | return Multiply()([linear, sigmoid]) 63 | 64 | def compute_output_shape(self, input_shape): 65 | """Compute shape of layer output.""" 66 | output_shape = super(GatedConv, self).compute_output_shape(input_shape) 67 | return tuple(output_shape[:3]) + (self.n_filters,) 68 | 69 | def get_config(self): 70 | """Return the config of the layer.""" 71 | config = super(GatedConv, self).get_config() 72 | config['n_filters'] = self.n_filters 73 | del config['filters'] 74 | return config 75 | -------------------------------------------------------------------------------- /task2/inference.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import scipy.stats as stats 4 | 5 | import utils 6 | 7 | 8 | def binarize_predictions(y_pred, threshold=-1): 9 | """Convert prediction probabilities to binary values. 10 | 11 | This function is intended for audio tagging predictions. The 12 | predictions should be passed in a 2D array in which the first 13 | dimension is the sample axis and the second is the class axis. 14 | 15 | Args: 16 | y_pred (np.ndarray): 2D array of predictions. 17 | threshold (float or list): Threshold used to determine the 18 | binary values. If a list is given, it must specify a 19 | threshold for each class. If the value is -1, the label 20 | with the highest probability is selected. 21 | 22 | Returns: 23 | np.ndarray: Binarized prediction values. 24 | """ 25 | if threshold > 0: 26 | return (y_pred > threshold).astype(int) 27 | 28 | dtype = pd.api.types.CategoricalDtype(categories=range(y_pred.shape[1])) 29 | return utils.to_categorical(pd.Series( 30 | np.argmax(y_pred, axis=1), dtype=dtype)) 31 | 32 | 33 | def merge_predictions(y_pred, index, op='gmean'): 34 | """Merge predictions of chunks belonging to the same audio clip. 35 | 36 | Args: 37 | y_pred (np.ndarray): 2D array of chunk-level predictions. 38 | index (pd.Index): Files names indicating how to group chunks. 39 | op (str): The operation to perform on grouped predictions. 40 | Either ``'first'``, ``'mean'``, or ``'gmean'``. 41 | 42 | Returns: 43 | pd.DataFrame: The merged predictions. 44 | """ 45 | pred = pd.DataFrame(y_pred, index=index, columns=utils.LABELS) 46 | group = utils.group_by_name(pred) 47 | if op == 'first': 48 | pred = group.first() 49 | elif op == 'mean': 50 | pred = group.mean() 51 | elif op == 'gmean': 52 | # TODO: Improve performance as this operation is slow 53 | pred = group.agg(lambda x: stats.gmean(x + 1e-8)) 54 | 55 | pred.index.name = index.name 56 | 57 | return pred 58 | -------------------------------------------------------------------------------- /task2/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import glob 3 | import os 4 | import sys 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from tqdm import tqdm 9 | 10 | import config as cfg 11 | import file_io as io 12 | import utils 13 | 14 | 15 | def main(): 16 | """Execute a task based on the given command-line arguments. 17 | 18 | This function is the main entry-point of the program. It allows the 19 | user to extract features, train a model, generate predictions, or 20 | evaluate predictions using the command-line interface. 21 | """ 22 | parser = argparse.ArgumentParser() 23 | subparsers = parser.add_subparsers(dest='mode') 24 | 25 | parser_preprocess = subparsers.add_parser('preprocess') 26 | parser_preprocess.add_argument('dataset', choices=['training', 'test']) 27 | 28 | # Add sub-parser for feature extraction 29 | parser_extract = subparsers.add_parser('extract') 30 | parser_extract.add_argument('dataset', choices=['training', 'test']) 31 | parser_extract.add_argument('--recompute', action='store_true') 32 | 33 | # Add sub-parser for training 34 | parser_train = subparsers.add_parser('train') 35 | parser_train.add_argument('--model', 36 | choices=['vgg13', 37 | 'gcnn', 38 | 'crnn', 39 | 'gcrnn', 40 | ], 41 | default='gcnn', 42 | ) 43 | parser_train.add_argument('--fold', type=int, default=-1) 44 | parser_train.add_argument('--class_weight', action='store_true') 45 | parser_train.add_argument('--sample_weight', type=float) 46 | 47 | # Add sub-parser for inference 48 | parser_predict = subparsers.add_parser('predict') 49 | parser_predict.add_argument('dataset', choices=['training', 'test']) 50 | parser_predict.add_argument('--fold', type=int, default=-1) 51 | 52 | # Add sub-parser for evaluation 53 | parser_evaluate = subparsers.add_parser('evaluate') 54 | parser_evaluate.add_argument('dataset', choices=['training', 'test']) 55 | parser_evaluate.add_argument('--fold', type=int, default=-1) 56 | 57 | args = parser.parse_args() 58 | if args.mode == 'preprocess': 59 | preprocess(cfg.to_dataset(args.dataset, preprocessed=False)) 60 | elif args.mode == 'extract': 61 | extract(cfg.to_dataset(args.dataset), args.recompute) 62 | elif args.mode == 'train': 63 | train(args.model, args.fold, args.class_weight, args.sample_weight) 64 | elif args.mode == 'predict': 65 | predict(cfg.to_dataset(args.dataset), args.fold) 66 | elif args.mode == 'evaluate': 67 | dataset = cfg.to_dataset(args.dataset, preprocessed=False) 68 | evaluate_audio_tagging(dataset, args.fold) 69 | 70 | 71 | def preprocess(dataset): 72 | """Apply preprocessing to the audio clips. 73 | 74 | Args: 75 | dataset: Dataset to apply preprocessing to. 76 | """ 77 | import silence 78 | 79 | # Ensure output directory exists 80 | output_path = os.path.join(cfg.preprocessing_path, dataset.name) 81 | os.makedirs(output_path, exist_ok=True) 82 | 83 | # Split each audio clip based on silence 84 | file_names = [] 85 | df = io.read_metadata(dataset.metadata_path) 86 | for name in tqdm(df.index): 87 | file_names += silence.split_audio( 88 | dataset_path=dataset.path, 89 | file_name=name, 90 | output_path=output_path, 91 | n_window=cfg.n_window, 92 | default_threshold=cfg.default_threshold, 93 | transients_threshold=cfg.transients_threshold, 94 | min_silence=cfg.min_silence, 95 | keep_silence=cfg.keep_silence, 96 | ) 97 | 98 | # Create new metadata DataFrame 99 | df = df.loc[[s[:8] + '.wav' for s in file_names]] 100 | df.index = pd.Index(file_names, name=df.index.name) 101 | 102 | # Save metadata to disk 103 | df.to_csv(os.path.join(cfg.preprocessing_path, '%s.csv' % dataset.name)) 104 | 105 | 106 | def extract(dataset, recompute=False): 107 | """Extract feature vectors from the given dataset. 108 | 109 | Args: 110 | dataset: Dataset to extract features from. 111 | recompute (bool): Whether to recompute existing features. 112 | """ 113 | import features 114 | 115 | # Use a logmel representation for feature extraction 116 | extractor = features.LogmelExtractor(cfg.sample_rate, 117 | cfg.n_window, 118 | cfg.hop_length, 119 | cfg.n_mels, 120 | ) 121 | 122 | # Ensure output directory exists and set file path 123 | os.makedirs(cfg.extraction_path, exist_ok=True) 124 | output_path = os.path.join(cfg.extraction_path, dataset.name + '.h5') 125 | 126 | # Save free parameters to disk 127 | utils.log_parameters(cfg.logmel, os.path.join(cfg.extraction_path, 128 | 'parameters.json')) 129 | 130 | # Extract features for each audio clip in the dataset 131 | df = io.read_metadata(dataset.metadata_path) 132 | features.extract_dataset(dataset_path=dataset.path, 133 | file_names=df.index.tolist(), 134 | extractor=extractor, 135 | output_path=output_path, 136 | recompute=recompute, 137 | ) 138 | 139 | 140 | def train(model, fold, use_class_weight, noisy_sample_weight): 141 | """Train the neural network model. 142 | 143 | Args: 144 | model (str): The neural network architecture. 145 | fold (int): The fold to use for validation. 146 | use_class_weight (bool): Whether to use class-wise weights. 147 | noisy_sample_weight (float): Examples that are not verified are 148 | weighted according to this value. 149 | 150 | Note: 151 | For reproducibility, the random seed is set to a fixed value. 152 | """ 153 | import training 154 | 155 | # Try to create reproducible results 156 | np.random.seed(cfg.initial_seed) 157 | 158 | # Load training data and associated metadata 159 | x, df = _load_data(cfg.to_dataset('training')) 160 | # Get one-hot representation of target values 161 | y = utils.to_categorical(df.label) 162 | 163 | # Split training data into training and validation 164 | if fold >= 0: 165 | mask = df.fold == fold 166 | else: 167 | mask = np.zeros(len(df), dtype=bool) 168 | val_mask = mask & (df.manually_verified == 1) 169 | 170 | tr_x = x[~mask] 171 | tr_y = y[~mask] 172 | val_x = x[val_mask] 173 | val_y = y[val_mask] 174 | val_index = df.index[val_mask] 175 | 176 | # Compute class weights based on number of class examples 177 | if use_class_weight: 178 | group = utils.group_by_name(df) 179 | n_examples = group.first().groupby('label').size().values 180 | class_weight = len(group) / (len(n_examples) * n_examples) 181 | else: 182 | class_weight = None 183 | 184 | # Assign a specific sample weight to unverified examples 185 | if noisy_sample_weight: 186 | sample_weight = df[~mask].manually_verified.values.astype(float) 187 | sample_weight[sample_weight == 0] = noisy_sample_weight 188 | else: 189 | sample_weight = None 190 | 191 | # Ensure output directories exist 192 | fold_dir = str(fold) if fold >= 0 else 'all' 193 | os.makedirs(os.path.join(cfg.model_path, fold_dir), exist_ok=True) 194 | os.makedirs(cfg.log_path.format(fold_dir), exist_ok=True) 195 | 196 | # Save free parameters to disk 197 | utils.log_parameters(cfg.training, os.path.join(cfg.model_path, 198 | 'parameters.json')) 199 | 200 | training.train(tr_x, tr_y, val_x, val_y, val_index, model, fold, 201 | class_weight=class_weight, sample_weight=sample_weight) 202 | 203 | 204 | def predict(dataset, fold): 205 | """Generate predictions for audio tagging. 206 | 207 | This function uses an ensemble of trained models to generate the 208 | predictions, with the averaging function being an arithmetic mean. 209 | Computed predictions are then saved to disk. 210 | 211 | Args: 212 | dataset: Dataset to generate predictions for. 213 | fold (int): The specific fold to generate predictions for. Only 214 | applicable for the training dataset. 215 | """ 216 | import inference 217 | 218 | # Load input data and associated metadata 219 | x, df = _load_data(dataset) 220 | dataset_name = dataset.name 221 | if dataset.name == 'training': 222 | if fold == -1: 223 | raise ValueError('Invalid fold: %d' % fold) 224 | 225 | dataset_name += str(fold) 226 | mask = df.fold == fold 227 | tr_x = x[~mask] 228 | x = x[mask] 229 | df = df[mask] 230 | else: 231 | tr_x, tr_df = _load_data(cfg.to_dataset('training')) 232 | if fold >= 0: 233 | dataset_name += str(fold) 234 | tr_x = tr_x[tr_df.fold != fold] 235 | 236 | generator = utils.fit_scaler(tr_x) 237 | x = generator.standardize(x) 238 | 239 | # Predict class probabilities for each model (epoch) 240 | preds = [] 241 | for epoch in _determine_epochs(cfg.prediction_epochs, fold, n=4): 242 | pred = utils.timeit( 243 | lambda: _load_model(fold, epoch).predict(x), 244 | '[Epoch %d] Predicted class probabilities' % epoch) 245 | 246 | preds.append(inference.merge_predictions(pred, df.index)) 247 | 248 | pred_mean = pd.concat(preds).groupby(level=0).mean() 249 | 250 | # Ensure output directory exists and set file path format 251 | os.makedirs(os.path.dirname(cfg.predictions_path), exist_ok=True) 252 | predictions_path = cfg.predictions_path.format('%s', dataset_name) 253 | 254 | # Save free parameters to disk 255 | utils.log_parameters({'prediction_epochs': cfg.prediction_epochs}, 256 | os.path.join(os.path.dirname(cfg.predictions_path), 257 | 'parameters.json')) 258 | 259 | # Write predictions to disk 260 | pred_mean.to_csv(predictions_path % 'predictions') 261 | io.write_predictions(pred_mean, predictions_path % 'submission') 262 | 263 | 264 | def evaluate_audio_tagging(dataset, fold): 265 | """Evaluate the audio tagging predictions and write results. 266 | 267 | Args: 268 | fold (int): The fold (validation set) to evaluate. 269 | """ 270 | import evaluation 271 | 272 | # Load grouth truth data 273 | df = io.read_metadata(dataset.metadata_path) 274 | if dataset.name == 'training': 275 | df = df[(df.fold == fold) & (df.manually_verified == 1)] 276 | elif dataset.name == 'test': 277 | df = df[df.usage != 'Ignored'] 278 | y_true = pd.get_dummies(df.label) 279 | 280 | name = dataset.name 281 | if fold >= 0: 282 | name += str(fold) 283 | 284 | # Load predictions 285 | y_pred = io.read_metadata(cfg.predictions_path.format('predictions', name)) 286 | 287 | # Ensure only elements common to both y_true and y_pred are selected 288 | index = y_true.index.intersection(y_pred.index) 289 | y_true = y_true.loc[index].values 290 | y_pred = y_pred.loc[index].values 291 | 292 | # Evaluate audio tagging performance 293 | scores = evaluation.evaluate_audio_tagging( 294 | y_true, y_pred, threshold=cfg.threshold) 295 | 296 | # Ensure output directory exist and write results 297 | os.makedirs(os.path.dirname(cfg.results_path), exist_ok=True) 298 | output_path = cfg.results_path.format(name) 299 | scores.to_csv(output_path) 300 | 301 | # Print scores to 3 decimal places 302 | pd.options.display.float_format = '{:,.3f}'.format 303 | print('\n' + str(scores)) 304 | 305 | 306 | def _load_data(dataset): 307 | """Load input data and the associated metadata for a dataset. 308 | 309 | Args: 310 | dataset: Structure encapsulating dataset information. 311 | 312 | Returns: 313 | tuple: Tuple containing: 314 | 315 | x (np.ndarray): The input data of the dataset. 316 | df (pd.DataFrame): The metadata of the dataset. 317 | """ 318 | import features 319 | 320 | # Load feature vectors and reshape to 4D tensor 321 | features_path = os.path.join(cfg.extraction_path, dataset.name + '.h5') 322 | x, n_chunks = utils.timeit(lambda: features.load_features(features_path), 323 | 'Loaded features of %s dataset' % dataset.name) 324 | x = np.expand_dims(x, -1) 325 | assert x.ndim == 4 326 | 327 | # Load metadata and duplicate entries based on number of chunks 328 | df = io.read_metadata(dataset.metadata_path) 329 | df = df.loc[np.repeat(df.index, n_chunks)] 330 | 331 | return x, df 332 | 333 | 334 | def _determine_epochs(spec, fold, n=5): 335 | """Return a list of epoch numbers based on the given argument. 336 | 337 | If `spec` is a list, this function simply returns the list. 338 | Otherwise, `spec` should be a string, in which case this function 339 | returns the top `n` epochs based on the training history file 340 | and the contents of `spec`. For example, if `spec` is ``'val_acc'``, 341 | the epochs that achieved the highest accuracy are returned. 342 | 343 | Args: 344 | spec (list or str): A list of epoch numbers or a string 345 | specifying how to select the epoch numbers. 346 | fold (int): Fold number, since determining the top epochs 347 | depends on the fold in question. 348 | n (int): Number of epochs to return (if applicable). 349 | 350 | Returns: 351 | list: The relevant epoch numbers. 352 | """ 353 | if type(spec) is list: 354 | return spec 355 | 356 | fold_dir = str(fold) if fold >= 0 else 'all' 357 | path = cfg.history_path.format(fold_dir) 358 | history = io.read_training_history(path, ordering=spec) 359 | return (history.index.values + 1)[:n] 360 | 361 | 362 | def _load_model(fold, epoch): 363 | """Load model based on specified fold and epoch number. 364 | 365 | Args: 366 | fold (int): Fold used to train the model. 367 | epoch (int): Epoch number of the model to load. 368 | 369 | Returns: 370 | An instance of a Keras model. 371 | """ 372 | import keras.models 373 | 374 | from gated_conv import GatedConv 375 | 376 | fold_dir = str(fold) if fold >= 0 else 'all' 377 | model_path = glob.glob(os.path.join(cfg.model_path, fold_dir, 378 | '*.%.02d*.h5' % epoch))[0] 379 | 380 | custom_objects = { 381 | 'GatedConv': GatedConv, 382 | } 383 | 384 | return keras.models.load_model(model_path, custom_objects) 385 | 386 | 387 | if __name__ == '__main__': 388 | sys.exit(main()) 389 | -------------------------------------------------------------------------------- /task2/mixup.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class MixupGenerator(): 5 | """Implementation of mixup [1]_ data augmentation. 6 | 7 | Args: 8 | x_train (np.ndarray): Array of training data. 9 | y_train (np.ndarray): Target values of the training data. 10 | sample_weight (np.ndarray): Weights for the training data. 11 | batch_size (int): Number of examples in a mini-batch. 12 | alpha (float): Parameter for sampling mixing weights. 13 | generator (ImageDataGenerator): Generator for preprocessing. 14 | 15 | Attributes: 16 | x_train (np.ndarray): Array of training data. 17 | y_train (np.ndarray): Target values of the training data. 18 | sample_weight (np.ndarray): Weights for the training data. 19 | batch_size (int): Number of examples in a mini-batch. 20 | alpha (float): Parameter for sampling mixing weights. 21 | generator (ImageDataGenerator): Generator for preprocessing. 22 | 23 | References: 24 | .. [1] Zhang, H. and Cisse, M. and Dauphin, Y.~N. and Lopez-Paz, 25 | “mixup: Beyond Empirical Risk Minimization,” 26 | """ 27 | 28 | def __init__(self, x_train, y_train, sample_weight=None, 29 | batch_size=32, alpha=1.0, generator=None): 30 | self.x_train = x_train 31 | self.y_train = y_train 32 | self.sample_weight = sample_weight 33 | self.batch_size = batch_size 34 | self.alpha = alpha 35 | self.generator = generator 36 | 37 | def __call__(self): 38 | batch_size = self.batch_size 39 | n_classes = self.y_train.shape[1] 40 | n_examples = np.sum(self.y_train, axis=0).astype(int) 41 | indexes = [np.where(self.y_train[:, label] == 1)[0] 42 | for label in range(n_classes)] 43 | offsets = [0] * n_classes 44 | 45 | while True: 46 | # Choose which class each mini-batch example will belong to 47 | labels = np.random.choice(n_classes, size=(batch_size * 2,)) 48 | batch_indexes = np.empty(batch_size * 2, dtype=int) 49 | 50 | for i, label in enumerate(labels): 51 | batch_indexes[i] = indexes[label][offsets[label]] 52 | 53 | offsets[label] += 1 54 | if offsets[label] >= n_examples[label]: 55 | np.random.shuffle(indexes[label]) 56 | offsets[label] = 0 57 | 58 | x, y, sample_weight = self._generate(batch_indexes) 59 | 60 | yield x, y, sample_weight 61 | 62 | def _generate(self, indexes): 63 | # Generate mixing weights using beta distribution 64 | mixup_weights = np.random.beta(a=self.alpha, b=self.alpha, 65 | size=self.batch_size) 66 | 67 | # Mix training data and labels 68 | x = self._mixup(self.x_train, indexes, 69 | mixup_weights[:, None, None, None]) 70 | y = self._mixup(self.y_train, indexes, mixup_weights[:, None]) 71 | 72 | # Mix sample weights if applicable 73 | sample_weight = self.sample_weight 74 | if sample_weight is not None: 75 | sample_weight = self._mixup(sample_weight, indexes, mixup_weights) 76 | 77 | # Apply preprocessing to training data 78 | if self.generator: 79 | for i in range(self.batch_size): 80 | x[i] = self.generator.random_transform(x[i]) 81 | x[i] = self.generator.standardize(x[i]) 82 | 83 | return x, y, sample_weight 84 | 85 | def _mixup(self, tensor, indexes, weights): 86 | t1 = tensor[indexes[:self.batch_size]] 87 | t2 = tensor[indexes[self.batch_size:]] 88 | return t1 * weights + t2 * (1 - weights) 89 | -------------------------------------------------------------------------------- /task2/silence.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | import librosa 4 | import numpy as np 5 | 6 | from pydub import AudioSegment 7 | import pydub.silence as silence 8 | from pydub.exceptions import CouldntDecodeError 9 | 10 | 11 | def split_audio(dataset_path, 12 | file_name, 13 | output_path, 14 | n_window=1024, 15 | default_threshold=-56, 16 | transients_threshold=-56, 17 | min_silence=500, 18 | keep_silence=500, 19 | ): 20 | """Split an audio clip into non-silent segments. 21 | 22 | This function detects the non-silent segments of an audio clip and 23 | saves them separately as WAV files in the specified directory. 24 | Silence is detected on a frame-by-frame basis by thresholding the 25 | RMS energy of each frame. A non-silent segment is defined to be the 26 | span of non-silent frames such that two such adjacent frames are 27 | less than `min_silence` ms apart. `keep_silence` ms of silence is 28 | also kept at the beginning and end of each segment. 29 | 30 | Args: 31 | dataset_path (str): Path of directory containing dataset. 32 | file_name (str): File name of audio clip to be split. 33 | output_path (str): Path of output directory. 34 | n_window (int): Number of samples in a frame. 35 | default_threshold (int): Default silence threshold (in dBFS). 36 | transients_threshold (int): Silence threshold for transient 37 | audio signals (in dBFS). 38 | min_silence (int): Minimum length of silence between segments. 39 | keep_silence (int): Amound of start/end silence to keep (in ms). 40 | 41 | Returns: 42 | list: The output file names. 43 | """ 44 | def _export_segments(segments): 45 | fnames = [] 46 | for i, seg in enumerate(segments): 47 | fname = '{}_{}.wav'.format(os.path.splitext(file_name)[0], i) 48 | seg.export(os.path.join(output_path, fname), format='wav') 49 | fnames.append(fname) 50 | return fnames 51 | 52 | try: 53 | x = AudioSegment.from_wav(os.path.join(dataset_path, file_name)) 54 | except CouldntDecodeError: 55 | x = AudioSegment.empty() 56 | 57 | # Skip audio clips that are not longer than the padding 58 | # Padding refers to the silence that is kept for each segment 59 | padding = keep_silence * 2 60 | if x.duration_seconds <= padding / 1000: 61 | return _export_segments([x]) 62 | 63 | # Determine silence threshold based on whether the audio signal 64 | # consists entirely of transients. 65 | if _is_transients(x.get_array_of_samples(), x.frame_rate, n_window): 66 | threshold = transients_threshold 67 | else: 68 | threshold = default_threshold 69 | 70 | segments = silence.split_on_silence( 71 | audio_segment=x, 72 | min_silence_len=min_silence, 73 | silence_thresh=threshold, 74 | keep_silence=keep_silence, 75 | ) 76 | 77 | # Export the original clip if no non-silent segments were found 78 | if len(segments) == 0: 79 | return _export_segments([x]) 80 | 81 | # Discard segments that are too short 82 | mean_time = np.mean([seg.duration_seconds for seg in segments]) 83 | discard_threshold = 100 + padding 84 | if mean_time > discard_threshold + 500: 85 | segments = [seg for seg in segments 86 | if seg.duration_seconds > discard_threshold] 87 | 88 | return _export_segments(segments) 89 | 90 | 91 | def _is_transients(x, sample_rate, n_window=512): 92 | """Determine whether an audio signal contains transients only. 93 | 94 | Args: 95 | x (np.ndarray): Audio signal to analyze. 96 | sample_rate (number): Sampling rate of signal. 97 | n_window (int): Window size for computing the signal's envelope. 98 | 99 | Returns: 100 | bool: Whether the audio signal contains transients only. 101 | """ 102 | envelope = _moving_average(np.abs(x), n=n_window) 103 | envelope = librosa.amplitude_to_db(envelope, ref=np.max) 104 | mask = (envelope > -30).astype(int) 105 | diff = np.diff(mask) 106 | start = np.where(diff == 1)[0] 107 | end = np.where(diff == -1)[0] 108 | 109 | if len(end) == 0: 110 | return True 111 | 112 | if mask[0] == 1: 113 | start = np.concatenate(([0], start)) 114 | if len(start) > len(end): 115 | start = start[:-1] 116 | 117 | return max(end - start) / sample_rate < 0.5 118 | 119 | 120 | def _moving_average(x, n=3): 121 | """Compute the moving average of a 1D array. 122 | 123 | Args: 124 | x (array_like): Input 1D array. 125 | n (int): Window size of moving average. 126 | 127 | Returns: 128 | np.ndarray: The averaged version of the array. 129 | """ 130 | ret = np.cumsum(x, dtype=float) 131 | ret[n:] = ret[n:] - ret[:-n] 132 | return ret[n - 1:] / n 133 | -------------------------------------------------------------------------------- /task2/training.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import sklearn.metrics as metrics 4 | 5 | from keras.callbacks import Callback 6 | from keras.callbacks import CSVLogger 7 | from keras.callbacks import EarlyStopping 8 | from keras.callbacks import LearningRateScheduler 9 | from keras.callbacks import ModelCheckpoint 10 | from keras.callbacks import TensorBoard 11 | from keras.optimizers import Adam 12 | import keras.utils 13 | 14 | from mixup import MixupGenerator 15 | import config as cfg 16 | import convnet 17 | import evaluation 18 | import inference 19 | import utils 20 | 21 | 22 | def train(tr_x, tr_y, val_x, val_y, val_index, model_id='gcnn', 23 | fold=-1, sample_weight=None, class_weight=None): 24 | """Train a neural network using the given training set. 25 | 26 | Args: 27 | tr_x (np.ndarray): Array of training data. 28 | tr_y (np.ndarray): Target values of the training data. 29 | val_x (np.ndarray): Array of validation data. 30 | val_y (np.ndarray): Target values of the validation data. 31 | val_index (pd.Index): File names of validation data. Used to 32 | group chunks in order to compute clip-level predictions. 33 | model_id (str): The neural network architecture. 34 | fold (int): Fold number identifying validation set. 35 | sample_weight (float): Weights for the training examples. 36 | class_weight (float): Class-wise weights. 37 | """ 38 | if model_id == 'gcnn': 39 | create_model = convnet.gcnn 40 | elif model_id == 'vgg13': 41 | create_model = convnet.vgg13 42 | elif model_id == 'crnn': 43 | create_model = convnet.crnn 44 | elif model_id == 'gcrnn': 45 | create_model = convnet.gcrnn 46 | 47 | # Create model and print summary 48 | model = create_model(input_shape=tr_x.shape[1:], 49 | n_classes=tr_y.shape[1]) 50 | _print_model_summary(model) 51 | 52 | # Use Adam SGD optimizer 53 | optimizer = Adam(lr=cfg.learning_rate['initial']) 54 | model.compile(loss='categorical_crossentropy', 55 | optimizer=optimizer, 56 | metrics=['accuracy'], 57 | ) 58 | 59 | # Create the appropriate callbacks to use during training 60 | callbacks = _create_callbacks(fold) 61 | for callback in callbacks: 62 | callback.val_index = val_index 63 | 64 | # Set a large value for `n_epochs` if early stopping is used 65 | n_epochs = cfg.n_epochs 66 | if n_epochs < 0: 67 | n_epochs = 10000 68 | 69 | # Standardize validation data 70 | generator = utils.fit_scaler(tr_x) 71 | if len(val_x): 72 | validation_data = (generator.standardize(val_x), val_y) 73 | else: 74 | validation_data = None 75 | 76 | # Redefine generator for mixup data augmentation 77 | batch_size = cfg.batch_size 78 | generator = MixupGenerator(tr_x, 79 | tr_y, 80 | sample_weight=sample_weight, 81 | batch_size=batch_size, 82 | alpha=1.0, 83 | generator=generator, 84 | ) 85 | 86 | return model.fit_generator(generator(), 87 | steps_per_epoch=len(tr_x) // batch_size, 88 | epochs=n_epochs, 89 | callbacks=callbacks, 90 | validation_data=validation_data, 91 | class_weight=class_weight, 92 | ) 93 | 94 | 95 | class Evaluator(Callback): 96 | """A base class for logging evaluation results.""" 97 | 98 | def predict(self): 99 | """Predict target values of the validation data. 100 | 101 | The main utility of this function is to merge the predictions of 102 | chunks belonging to the same audio clip. The same is done for 103 | the ground truth target values so that dimensions match. 104 | 105 | Returns: 106 | tuple: Tuple containing: 107 | 108 | y_true (np.ndarray): Ground truth target values. 109 | y_pred (np.ndarray): Predicted target values. 110 | """ 111 | x, y_true = self.validation_data[:2] 112 | 113 | y_pred = self.model.predict(x) 114 | y_true = inference.merge_predictions(y_true, self.val_index, 'first') 115 | y_pred = inference.merge_predictions(y_pred, self.val_index) 116 | return y_true.values, y_pred.values 117 | 118 | 119 | class MAPLogger(Evaluator): 120 | """A callback for computing the mean average precision at k (MAP@k). 121 | 122 | At the end of each epoch, the MAP is computed and logged for the 123 | predictions of the validation dataset. It is assumed that the ground 124 | truths are single-label. 125 | 126 | Args: 127 | k (int): The maximum number of predicted elements. 128 | 129 | Attributes: 130 | k (int): The maximum number of predicted elements. 131 | """ 132 | 133 | def __init__(self, k=3): 134 | super(MAPLogger, self).__init__() 135 | 136 | self.k = k 137 | 138 | def on_epoch_end(self, epoch, logs=None): 139 | """Compute the MAP of the validation set predictions.""" 140 | y_true, y_pred = self.predict() 141 | map_k = evaluation.compute_map(y_true, y_pred, self.k) 142 | map_k_min = min(evaluation.compute_map(y_true, y_pred, self.k, True)) 143 | 144 | # Log the computed value 145 | logs = logs or {} 146 | logs['val_map'] = map_k 147 | logs['val_map_min'] = map_k_min 148 | 149 | 150 | class F1ScoreLogger(Evaluator): 151 | """A callback for computing the F1 score. 152 | 153 | At the end of each epoch, the F1 score is computed and logged for 154 | the predictions of the validation dataset. 155 | 156 | Args: 157 | threshold (float): Threshold used to binarize predictions. 158 | 159 | Attributes: 160 | threshold (float): Threshold used to binarize predictions. 161 | """ 162 | 163 | def __init__(self, threshold=-1): 164 | super(F1ScoreLogger, self).__init__() 165 | 166 | self.threshold = threshold 167 | 168 | def on_epoch_end(self, epoch, logs=None): 169 | """Compute the F1 score of the validation set predictions.""" 170 | y_true, y_pred = self.predict() 171 | y_pred_b = inference.binarize_predictions(y_pred, self.threshold) 172 | f1_score = metrics.f1_score(y_true, y_pred_b, average='micro', 173 | labels=range(y_true.shape[1])) 174 | 175 | # Log the computed value 176 | logs = logs or {} 177 | logs['val_f1_score'] = f1_score 178 | 179 | 180 | def _print_model_summary(model): 181 | """Print a summary of the model and also write the summary to disk. 182 | 183 | Args: 184 | model: The Keras model to summarize. 185 | """ 186 | keras.utils.print_summary(model) 187 | with open(os.path.join(cfg.model_path, 'summary.txt'), 'w') as f: 188 | keras.utils.print_summary(model, print_fn=lambda s: f.write(s + '\n')) 189 | 190 | 191 | def _create_callbacks(fold): 192 | """Create a list of training callbacks. 193 | 194 | The following callbacks are included in the list: 195 | * Several performance-logging callbacks. 196 | * A callback for logging results to a CSV file. 197 | * A callback for saving models. 198 | * A callback for using TensorBoard. 199 | * An optional callback for learning rate decay. 200 | * An optional callback for early stopping. 201 | 202 | Args: 203 | fold (int): Fold number identifying validation set. 204 | 205 | Returns: 206 | list: List of Keras callbacks. 207 | """ 208 | fold_dir = str(fold) if fold >= 0 else 'all' 209 | 210 | # Create callbacks for computing various metrics and logging them 211 | callbacks = [] 212 | if fold >= 0: 213 | callbacks += [MAPLogger(), F1ScoreLogger(cfg.threshold), 214 | CSVLogger(cfg.history_path.format(fold_dir))] 215 | 216 | # Create callback to save model after every epoch 217 | path = os.path.join(cfg.model_path, fold_dir, 218 | 'model.{epoch:02d}-{acc:.4f}.h5') 219 | callbacks.append(ModelCheckpoint(filepath=path, monitor='acc')) 220 | 221 | # Create callback for TensorBoard logs 222 | callbacks.append(TensorBoard(cfg.log_path.format(fold_dir), 223 | batch_size=cfg.batch_size)) 224 | 225 | lr_decay = cfg.learning_rate['decay'] 226 | if lr_decay < 1.: 227 | # Create callback to decay learning rate 228 | def _lr_schedule(epoch, lr): 229 | decay = epoch % cfg.learning_rate['decay_rate'] == 0 230 | return lr * lr_decay if decay else lr 231 | callbacks.append(LearningRateScheduler(schedule=_lr_schedule)) 232 | 233 | if cfg.n_epochs == -1: 234 | # Create callback to use an early stopping condition 235 | callbacks.append(EarlyStopping(monitor='val_loss', 236 | min_delta=0, 237 | patience=5, 238 | )) 239 | 240 | return callbacks 241 | -------------------------------------------------------------------------------- /task2/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import types 4 | 5 | import numpy as np 6 | import pandas as pd 7 | 8 | 9 | LABELS = [ 10 | 'Acoustic_guitar', 11 | 'Applause', 12 | 'Bark', 13 | 'Bass_drum', 14 | 'Burping_or_eructation', 15 | 'Bus', 16 | 'Cello', 17 | 'Chime', 18 | 'Clarinet', 19 | 'Computer_keyboard', 20 | 'Cough', 21 | 'Cowbell', 22 | 'Double_bass', 23 | 'Drawer_open_or_close', 24 | 'Electric_piano', 25 | 'Fart', 26 | 'Finger_snapping', 27 | 'Fireworks', 28 | 'Flute', 29 | 'Glockenspiel', 30 | 'Gong', 31 | 'Gunshot_or_gunfire', 32 | 'Harmonica', 33 | 'Hi-hat', 34 | 'Keys_jangling', 35 | 'Knock', 36 | 'Laughter', 37 | 'Meow', 38 | 'Microwave_oven', 39 | 'Oboe', 40 | 'Saxophone', 41 | 'Scissors', 42 | 'Shatter', 43 | 'Snare_drum', 44 | 'Squeak', 45 | 'Tambourine', 46 | 'Tearing', 47 | 'Telephone', 48 | 'Trumpet', 49 | 'Violin_or_fiddle', 50 | 'Writing', 51 | ] 52 | 53 | 54 | def to_categorical(y): 55 | """Encode labels as one-hot vectors. 56 | 57 | Args: 58 | y (pd.Series): Labels to be converted into categorical format. 59 | 60 | Returns: 61 | np.ndarray: Matrix of encoded labels. 62 | """ 63 | return pd.get_dummies(y).values 64 | 65 | 66 | def pad_truncate(x, length, pad_value=0): 67 | """Pad or truncate an array to a specified length. 68 | 69 | Args: 70 | x (array_like): Input array. 71 | length (int): Target length. 72 | pad_value (number): Padding value. 73 | 74 | Returns: 75 | array_like: The array padded/truncated to the specified length. 76 | """ 77 | x_len = len(x) 78 | if x_len > length: 79 | x = x[:length] 80 | elif x_len < length: 81 | padding = np.full((length - x_len,) + x.shape[1:], pad_value) 82 | x = np.concatenate((x, padding)) 83 | 84 | return x 85 | 86 | 87 | def fit_scaler(x): 88 | """Fit an ImageDataGenerator to the given data. 89 | 90 | Args: 91 | x (np.ndarray): 4D array of data. 92 | 93 | Returns: 94 | keras.ImageDataGenerator: The fitted generator. 95 | """ 96 | from keras.preprocessing.image import ImageDataGenerator 97 | 98 | generator = ImageDataGenerator( 99 | featurewise_center=True, 100 | featurewise_std_normalization=True, 101 | ) 102 | generator.fit(x) 103 | return generator 104 | 105 | 106 | def group_by_name(data): 107 | """Group metadata entries based on original file names. 108 | 109 | Args: 110 | data (pd.Series or pd.DataFrame): The metadata to group. 111 | 112 | Returns: 113 | The relevant GroupBy object. 114 | """ 115 | return data.groupby(lambda s: s[:8] + '.wav') 116 | 117 | 118 | def timeit(callback, message): 119 | """Measure the time taken to execute the given callback. 120 | 121 | This function measures the amount of time it takes to execute the 122 | specified callback and prints a message afterwards regarding the 123 | time taken. The `message` parameter provides part of the message, 124 | e.g. if `message` is 'Executed', the printed message is 'Executed in 125 | 1.234567 seconds'. 126 | 127 | Args: 128 | callback: Function to execute and time. 129 | message (str): Message to print after executing the callback. 130 | 131 | Returns: 132 | The return value of the callback. 133 | """ 134 | # Record time prior to invoking callback 135 | onset = time.time() 136 | # Invoke callback function 137 | x = callback() 138 | 139 | print('%s in %f seconds' % (message, time.time() - onset)) 140 | 141 | return x 142 | 143 | 144 | def log_parameters(params, output_path): 145 | """Write the given parameters to a file in JSON format. 146 | 147 | Args: 148 | params (dict or module): Parameters to serialize. If `params` is 149 | a module, the relevant variables are serialized. 150 | output_path (str): Output file path. 151 | """ 152 | if isinstance(params, types.ModuleType): 153 | params = {k: v for k, v in params.__dict__.items() 154 | if not k.startswith('_')} 155 | elif not isinstance(params, dict): 156 | raise ValueError("'params' must be a dict or a module") 157 | 158 | with open(output_path, 'w') as f: 159 | json.dump(params, f, indent=2) 160 | --------------------------------------------------------------------------------