├── .flake8
├── .gitignore
├── LICENSE
├── Pipfile
├── Pipfile.lock
├── README.rst
├── docs
    ├── .gitignore
    ├── Makefile
    ├── conf.py
    ├── index.rst
    └── source
    │   ├── config.rst
    │   ├── convnet.rst
    │   ├── evaluation.rst
    │   ├── features.rst
    │   ├── file_io.rst
    │   ├── gated_conv.rst
    │   ├── inference.rst
    │   ├── main.rst
    │   ├── mixup.rst
    │   ├── modules.rst
    │   ├── silence.rst
    │   ├── training.rst
    │   └── utils.rst
├── metadata
    ├── test.csv
    └── training.csv
├── requirements.txt
├── scripts
    ├── meta_features.py
    ├── predict_stack.py
    ├── relabel.py
    └── select_folds.py
└── task2
    ├── config
        ├── __init__.py
        ├── dcase2018_task2.py
        ├── logmel.py
        ├── paths.py
        ├── prediction.py
        ├── silence.py
        └── training.py
    ├── convnet.py
    ├── evaluation.py
    ├── features.py
    ├── file_io.py
    ├── gated_conv.py
    ├── inference.py
    ├── main.py
    ├── mixup.py
    ├── silence.py
    ├── training.py
    └── utils.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = D100, D107, D413
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /_*/
2 | 
3 | *.py[cod]
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Turab Iqbal
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [dev-packages]
 7 | "flake8" = "*"
 8 | "flake8-docstrings" = "*"
 9 | sphinx = "*"
10 | sphinx-rtd-theme = "*"
11 | 
12 | [packages]
13 | "h5py" = "*"
14 | keras = "*"
15 | librosa = "*"
16 | numpy = "*"
17 | pandas = "*"
18 | pydub = "*"
19 | scipy = "*"
20 | sklearn = "*"
21 | tensorflow-gpu = "*"
22 | tqdm = "*"
23 | 
24 | [requires]
25 | python_version = "3"
26 | 


--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
  1 | {
  2 |     "_meta": {
  3 |         "hash": {
  4 |             "sha256": "27667c5cc8fc32b181926c8bd1c8bd8889241b458c314e833947226712a472fc"
  5 |         },
  6 |         "pipfile-spec": 6,
  7 |         "requires": {
  8 |             "python_version": "3"
  9 |         },
 10 |         "sources": [
 11 |             {
 12 |                 "name": "pypi",
 13 |                 "url": "https://pypi.org/simple",
 14 |                 "verify_ssl": true
 15 |             }
 16 |         ]
 17 |     },
 18 |     "default": {
 19 |         "absl-py": {
 20 |             "hashes": [
 21 |                 "sha256:87519e3b91a3d573664c6e2ee33df582bb68dca6642ae3cf3a4361b1c0a4e9d6"
 22 |             ],
 23 |             "version": "==0.6.1"
 24 |         },
 25 |         "astor": {
 26 |             "hashes": [
 27 |                 "sha256:95c30d87a6c2cf89aa628b87398466840f0ad8652f88eb173125a6df8533fb8d",
 28 |                 "sha256:fb503b9e2fdd05609fbf557b916b4a7824171203701660f0c55bbf5a7a68713e"
 29 |             ],
 30 |             "version": "==0.7.1"
 31 |         },
 32 |         "audioread": {
 33 |             "hashes": [
 34 |                 "sha256:b0b9270c20833a75ce0d167fb2fdad52ddcd8e8f300be8afad3ac9715850bc50"
 35 |             ],
 36 |             "version": "==2.1.6"
 37 |         },
 38 |         "decorator": {
 39 |             "hashes": [
 40 |                 "sha256:2c51dff8ef3c447388fe5e4453d24a2bf128d3a4c32af3fabef1f01c6851ab82",
 41 |                 "sha256:c39efa13fbdeb4506c476c9b3babf6a718da943dab7811c206005a4a956c080c"
 42 |             ],
 43 |             "version": "==4.3.0"
 44 |         },
 45 |         "gast": {
 46 |             "hashes": [
 47 |                 "sha256:7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930"
 48 |             ],
 49 |             "version": "==0.2.0"
 50 |         },
 51 |         "grpcio": {
 52 |             "hashes": [
 53 |                 "sha256:0b09e82027f27cb540999404acf1be19cb50073d76ab257d7369aa3730bec3c0",
 54 |                 "sha256:0cc5f2d3ee21c642d8982f197c83053fd3a8cbcd6a60240d8c87c6c256b10d57",
 55 |                 "sha256:13b498b0415715a4214574c67ac6d0d7b565a861eb4490238a828fac17a51506",
 56 |                 "sha256:314c557efecec7f901cf394beb184b31414f906785e4811d2392859576d4d7b5",
 57 |                 "sha256:32d2859b68e185d05d6b5f5814121e786088f5e3483da0a7359f5d7fc0401ee3",
 58 |                 "sha256:3bf1b9d72a05a855762c36bd458d3750bedb5fd7b957a44443a62facf80afba4",
 59 |                 "sha256:41614ec2df4776a7d1b46183543d5c508bfc4972f092ec1ea83e98f808e5fa4d",
 60 |                 "sha256:4a7fab9f8ed8352d63585d221ee9c1fc58fb9b3d12535e777e36e855b0cab3db",
 61 |                 "sha256:4b4a2faa53e0f8d2b1479173dbce1523a7daaf2644fb835fb9fff04beb29ed8d",
 62 |                 "sha256:5526bf9f6615e22d0290aa83324f87fcc1fee51c3a9580ebeb2a52271c21a563",
 63 |                 "sha256:5bf2c9ec1d55c28ca1221f7b2d1914f20b2819c44579da89f447789baaba1386",
 64 |                 "sha256:62b24446d447ebe3a7002a6e3bd2c7372159e094868eb61ea2426327fe9f1992",
 65 |                 "sha256:63afda9d946fff727107ebbef25f6b45497f29486e462725dc9942391f3714a8",
 66 |                 "sha256:6dd039527b7333c947b9757ad40adf93b917f3734aed1da4fdeb28fd17ec63f0",
 67 |                 "sha256:6e719d17ca8fa06260a427cd1fab58abfd0672e8e625fcad81595bd125e0e367",
 68 |                 "sha256:76b3dbff4c775f5f8667c405b909ab2f80440c7579ad56f823476b011124a8a5",
 69 |                 "sha256:7be774ca3c8faa0e126d1e41e11fd82c9c114efb5437b36f651fe25add7f8c2e",
 70 |                 "sha256:7d74c3c6d8c7aadd505c8cef2b4b5324588bee645e6d20a6493940b24d394603",
 71 |                 "sha256:84afdfbf88c0ed2426a4f029fae3e677e8f1b2f3370feeae939d64670926c981",
 72 |                 "sha256:84d62107eb5bc9fe4e3682b038434c709ca7a2ae19e621e08ed7e8d908046cfb",
 73 |                 "sha256:8a1f4bee826b0edb123157f19843f46ca9ef29f12ed0b54eeffde5ff65101340",
 74 |                 "sha256:93edd492a1c6865e15db1ff7d98228b7351221bf815286a41834e10934c0cde0",
 75 |                 "sha256:9907fcb03a9fd327b114919dbb7a4577d5d5aeed2d6d000e6e6d002ad5cb959d",
 76 |                 "sha256:9dd008cd45a646b0e3761f0963c95b0dcd07d880d278a3c1ce23dd4ecb9cd174",
 77 |                 "sha256:a440935203be2581f68de7a4c5ca7ca22e948a21af70d7279ba9a2e32f73ae40",
 78 |                 "sha256:a9144b8a0f73be76aff348e4d558a5c3f43a8378a17c6327d56dbea8efda4aeb",
 79 |                 "sha256:b14629835e796f7905db2f7d10035958f995bae67bf9e652b13be156ed4a8457",
 80 |                 "sha256:b4fe851428b630bdf6f3a99c3761ce3d304b194162812fc1312bfe7bd138e620",
 81 |                 "sha256:c4318cea2d85f13811655e5d1c30fe97074aeb8105b16cc6da2d1d5d64a9f4f7",
 82 |                 "sha256:e46d3d702198d164474078140e008e8961e95dfb5a100f2890eb201c94c48c6e",
 83 |                 "sha256:e986100947cdafa2817701ffe616f2dc0221cc27eb301d654b9462b98ee62912",
 84 |                 "sha256:f94ae68c43b4bba0272e565882db2709d8827910ccc427f0a89d8cf070180f61"
 85 |             ],
 86 |             "version": "==1.16.0"
 87 |         },
 88 |         "h5py": {
 89 |             "hashes": [
 90 |                 "sha256:0f8cd2acbacf3177b4427ed42639c911667b1f24d923388ab1f8ad466a12be5e",
 91 |                 "sha256:11277e3879098f921ee9e29105b20591e1dfdd44963357399f2abaa1a280c560",
 92 |                 "sha256:1241dec0c94ac32f3285cac1d6f44beabf80423e422ab03bd2686d731a8a9294",
 93 |                 "sha256:17b8187de0b3a945d8e8d031e7eb6ece2fce90791f9c5fde36f4396bf38fdde1",
 94 |                 "sha256:2f30007d0796788a454c1293262f19f25e6428317d3d386f78138fba2a44e37d",
 95 |                 "sha256:308e0758587ee16d4e73e7f2f8aae8351091e343bf0a43d2f697f9535465c816",
 96 |                 "sha256:37cacddf0e8209905f52537a8cf71da0dd9a4de62bd79247274c97b24a408997",
 97 |                 "sha256:38a23bb599748adf23d77f74885c0de6f4a7d9baa42f74e476bbf90fba2b47dd",
 98 |                 "sha256:47ab18b7b7bbc36fd2b606289b703b6f0ee915b923d6ad94dd17ac80ebffc280",
 99 |                 "sha256:486c78330af0bf33f5077b51d1888c0739c3cd1a03d5aade0d48572b3b5690ca",
100 |                 "sha256:4e2183458d6ef1ae87dfb5d6acd0786359336cd9ac0ece6396c09b59fdaa3bd6",
101 |                 "sha256:51d0595c3e58814c831f6cd2b664a5bf9590e26262c1d541b380d041e4fcb3c0",
102 |                 "sha256:56d259d56822b70881760b243957f04a0cf133f0ec65eae6a33f562826aee899",
103 |                 "sha256:5e6e777653169a3cc24ea56bb3d8c845ea391f8914c35bb6f350b0753a52891c",
104 |                 "sha256:62bfb0ebb0f59e5dccc0b0dbbc0fc40dd1d1e09d04c0dc71f89790231531d4a2",
105 |                 "sha256:67d89b64debfa021b54aa6f24bbf008403bd144748a0148596b518bce80d2fc4",
106 |                 "sha256:6bf38571f555fa214493ec6349d29024cc5f313bf1715b09f236c553fd22ae4d",
107 |                 "sha256:9214ca445c18a37bfe9c165982c0e317e2f21f035c8d635d1c6d9fcbaf35b7a8",
108 |                 "sha256:ab0c52850428d2e86029935389379c2c97f752e76b616da851deec8a4484f8ec",
109 |                 "sha256:b2eff336697d8dfd712c5d93fef9f4e4d3e97d9d8c258801836b8664a239e07a",
110 |                 "sha256:bb33fabc0b8f3fe3bb0f8d6821b2fad5b2a64c27a0808e8d1c5c1e3362062064",
111 |                 "sha256:bd5353ab342bae1262b04745934cc1565df4cbc8d6a979a0c98f42209bd5c265",
112 |                 "sha256:bd73444efd1ac06dac27b8405bbe8791a02fd1bc8a2fa0e575257f90b7b57467",
113 |                 "sha256:bd932236a2ef91a75fee5d7f4ace80ab494c5a59cd092a67c9785ddb7fdc218c",
114 |                 "sha256:c45650de228ace7731e4280e14fb687f6d5c29cd666c5b22b42492b035e994d6",
115 |                 "sha256:d5c0c01da45f901a3d429e7ef9e7e22baa869e1affb8715f1bf94e6a30020740",
116 |                 "sha256:d75035db5bde802a29f4f29f18bb7548863d29ac90ccbf2c04c11799bbbba2c3",
117 |                 "sha256:dda88206dc9464923f27f601000bc5b152ac0bd6d0122f098d4f239150a70076",
118 |                 "sha256:e1c2ac5d0aa232c0f60fecc6bd1122346885086a176f939b91058c4c980cc226",
119 |                 "sha256:e626c65a8587921ebc7fb8d31a49addfdd0b9a9aa96315ea484c09803337b955"
120 |             ],
121 |             "index": "pypi",
122 |             "version": "==2.8.0"
123 |         },
124 |         "joblib": {
125 |             "hashes": [
126 |                 "sha256:11cdfd38cdb71768149e1373f2509e9b4fc1ec6bc92f874cb515b25f2d69f8f4",
127 |                 "sha256:142e74bee7b9dfad75c7dad24c4295b9084a6788116dd149e2fc7daf0db25675"
128 |             ],
129 |             "version": "==0.12.5"
130 |         },
131 |         "keras": {
132 |             "hashes": [
133 |                 "sha256:794d0c92c6c4122f1f0fcf3a7bc2f49054c6a54ddbef8d8ffafca62795d760b6",
134 |                 "sha256:90b610a3dbbf6d257b20a079eba3fdf2eed2158f64066a7c6f7227023fd60bc9"
135 |             ],
136 |             "index": "pypi",
137 |             "version": "==2.2.4"
138 |         },
139 |         "keras-applications": {
140 |             "hashes": [
141 |                 "sha256:721dda4fa4e043e5bbd6f52a2996885c4639a7130ae478059b3798d0706f5ae7",
142 |                 "sha256:a03af60ddc9c5afdae4d5c9a8dd4ca857550e0b793733a5072e0725829b87017"
143 |             ],
144 |             "version": "==1.0.6"
145 |         },
146 |         "keras-preprocessing": {
147 |             "hashes": [
148 |                 "sha256:90d04c1750bccceef88ac09475c291b4b5f6aa1eaf0603167061b1aa8b043c61",
149 |                 "sha256:ef2e482c4336fcf7180244d06f4374939099daa3183816e82aee7755af35b754"
150 |             ],
151 |             "version": "==1.0.5"
152 |         },
153 |         "librosa": {
154 |             "hashes": [
155 |                 "sha256:2aa868b8aade749b9904eeb7034fcf44115601c367969b6d01f5e1b4b9b6031d"
156 |             ],
157 |             "index": "pypi",
158 |             "version": "==0.6.2"
159 |         },
160 |         "llvmlite": {
161 |             "hashes": [
162 |                 "sha256:05b817a961fa0aacfccc5ecaa2e6fbc826c8988759a6d25243916e74c2082d3e",
163 |                 "sha256:0ea68d4ab6a1fa73d919b17ab2f4909f4aa9a6730ba32a91c61db8c08626f408",
164 |                 "sha256:152e1ac25c57d3d686128a8ee899762be95d2bb1418d7b776c55b83c9591cc44",
165 |                 "sha256:276f6585a7a962d86f0751cc3016afa45c0ba86190e1ebd7b43071507ff18967",
166 |                 "sha256:2bb68dbe572b021b238677729ed77dc2bda396acf6ede00373bb24124e282383",
167 |                 "sha256:3a0e5a5c9aa56ca506b576ef6ca6e6c37306a483baf951034dc357cb34beb7c7",
168 |                 "sha256:696ce74ac7060eba4f8b3cb6122bb8aff5d30a6ddc872b519dcb3ed201c42af2",
169 |                 "sha256:7a249ec0a3731e789fdffe9772f2ae62303b47554b7e5109360d7fa77be0cb76",
170 |                 "sha256:950f44f2b33f04c91417b9846271decc66adfaa16849ca3522a4c51e0037a022",
171 |                 "sha256:964d76903e8298ae3446319d12183dcf64722bdc9deb66159fa97ef68986594b",
172 |                 "sha256:9b1c41413bf5d709c8bd3621e075e19dd9170cb08cbf95319d6cbd28ca2d84ed",
173 |                 "sha256:b19f69376799848e411fad5c079dedc6ac4aedb032f922b80dcf3ec368a59dc3",
174 |                 "sha256:b36854919a9b13408d7523e4327a04afc62ce880564466f9f79f506d560b70ff",
175 |                 "sha256:b445c88e9677e427cc6dc49aa621691f04dbee1c888040413fba26711d0edc68",
176 |                 "sha256:c27fedc3cd6869c222a36cd7b1db8906bb209a8b7b1cd6e84174e8e53e51c23d",
177 |                 "sha256:c7f6e7078b1830c8fb15f340af4da5692d53a6590eff03c0afa7e69857ac37c0",
178 |                 "sha256:d8702b8825349b090f51c358cd1e5f9631865bdab4e9e13bfce1ac7c82e44cfe",
179 |                 "sha256:e866935bc1236478fe09c741313b1ed322ab34343ef226aa34f75ce80e5188ef",
180 |                 "sha256:f35c1b9d3efdcf54a1b0599c72995e66e1e4033bf08d13ab450011f0f3ca78a2",
181 |                 "sha256:f54bbd9e73fabd13b208b71a14a8e4d83400fc59cc5eb3bceeff1a7352a23f2f",
182 |                 "sha256:fd64def9a51dd7dc61913a7a08eeba5b9785522740bec5a7c5995b2a90525025"
183 |             ],
184 |             "version": "==0.25.0"
185 |         },
186 |         "markdown": {
187 |             "hashes": [
188 |                 "sha256:c00429bd503a47ec88d5e30a751e147dcb4c6889663cd3e2ba0afe858e009baa",
189 |                 "sha256:d02e0f9b04c500cde6637c11ad7c72671f359b87b9fe924b2383649d8841db7c"
190 |             ],
191 |             "version": "==3.0.1"
192 |         },
193 |         "numba": {
194 |             "hashes": [
195 |                 "sha256:1f6ebba0caea938178e6220af12c01db987d0cb8f08436b492f0bd05d0f82018",
196 |                 "sha256:1fe794735fc26dfa4fda68402cc5d6a3c2457706146db7a36b99ec29a1cdac6d",
197 |                 "sha256:277412c10d58c946e9204fe6a3caf3c5d4dd9088f4b595f78b6a8cd5fb357cde",
198 |                 "sha256:31a4f073f40aff7cdd18a2527c45da99e8b6f87930ad22d02d82b2564d5e7907",
199 |                 "sha256:52d046c13bcf0de79dbfb936874b7228f141b9b8e3447cc35855e9ad3e12aa33",
200 |                 "sha256:57d54d48b4001c1feb57b03d7ae87c952874f730d36f058d0ac9f94c4c4a42c8",
201 |                 "sha256:6ba5cf183883c21c3acd2f8a4b67dfe9987c194d40a5f5bef75d956391cb9b74",
202 |                 "sha256:6d11a6c19f68e5d731354fa2690b3ad1cc39e1c3cff6424ce76bbaa7f146a084",
203 |                 "sha256:6faf3795d4676948b3bd2d562bc694d01411428ce2e2b76408840f4f2289fbd3",
204 |                 "sha256:710747f81c8ca390a38220ca1c800e25c839ab4632ce6e37cda526d91fffc384",
205 |                 "sha256:762defd58acc2e5f5a099b1c9865294ce7bebd0b7f877e0c504fe69beca7d757",
206 |                 "sha256:773b7b064f4b06b0a8a84c8600df12c3db337193159ac04d678c86202165e1db",
207 |                 "sha256:77989a936dc28ace69f039d47017faba756bae9c8d708a5a0c56a0fdd4241f6e",
208 |                 "sha256:85108757264e73b02de8b959c4c5913cfbe9b9b697d843158535bcaaeb6541f5",
209 |                 "sha256:8fc3ed08e0d713cc2a99ad7f26782cf13ef2cd3a8f82b397eaa4f3e114458cea",
210 |                 "sha256:bd0bee047a5e84c657c1273f479ccd45a30d74b48ae5355ac3e5a90dd67b216b",
211 |                 "sha256:beb59759b876dc854855d9e963a2b9308693f3ce73064842c64ceb863a4cd92b",
212 |                 "sha256:c6c254fc8fd32e08dd25fae679e0f39f8a33cb72e4934287d7d1c591df6637e8",
213 |                 "sha256:c87c842af32c0abdf948a18ec4d9e8d88f8ea34533794a4f83dd3cced46ba842",
214 |                 "sha256:d64c6dec35914ad53b67272a0505aee1a1d81a75b3e3ef25a0c081719ffba713",
215 |                 "sha256:f763ff00febb898a67bb1164747e150c5cf60e606ee67b3cd48be0b71ee0ca25"
216 |             ],
217 |             "version": "==0.40.1"
218 |         },
219 |         "numpy": {
220 |             "hashes": [
221 |                 "sha256:0df89ca13c25eaa1621a3f09af4c8ba20da849692dcae184cb55e80952c453fb",
222 |                 "sha256:154c35f195fd3e1fad2569930ca51907057ae35e03938f89a8aedae91dd1b7c7",
223 |                 "sha256:18e84323cdb8de3325e741a7a8dd4a82db74fde363dce32b625324c7b32aa6d7",
224 |                 "sha256:1e8956c37fc138d65ded2d96ab3949bd49038cc6e8a4494b1515b0ba88c91565",
225 |                 "sha256:23557bdbca3ccbde3abaa12a6e82299bc92d2b9139011f8c16ca1bb8c75d1e95",
226 |                 "sha256:24fd645a5e5d224aa6e39d93e4a722fafa9160154f296fd5ef9580191c755053",
227 |                 "sha256:36e36b6868e4440760d4b9b44587ea1dc1f06532858d10abba98e851e154ca70",
228 |                 "sha256:3d734559db35aa3697dadcea492a423118c5c55d176da2f3be9c98d4803fc2a7",
229 |                 "sha256:416a2070acf3a2b5d586f9a6507bb97e33574df5bd7508ea970bbf4fc563fa52",
230 |                 "sha256:4a22dc3f5221a644dfe4a63bf990052cc674ef12a157b1056969079985c92816",
231 |                 "sha256:4d8d3e5aa6087490912c14a3c10fbdd380b40b421c13920ff468163bc50e016f",
232 |                 "sha256:4f41fd159fba1245e1958a99d349df49c616b133636e0cf668f169bce2aeac2d",
233 |                 "sha256:561ef098c50f91fbac2cc9305b68c915e9eb915a74d9038ecf8af274d748f76f",
234 |                 "sha256:56994e14b386b5c0a9b875a76d22d707b315fa037affc7819cda08b6d0489756",
235 |                 "sha256:73a1f2a529604c50c262179fcca59c87a05ff4614fe8a15c186934d84d09d9a5",
236 |                 "sha256:7da99445fd890206bfcc7419f79871ba8e73d9d9e6b82fe09980bc5bb4efc35f",
237 |                 "sha256:99d59e0bcadac4aa3280616591fb7bcd560e2218f5e31d5223a2e12a1425d495",
238 |                 "sha256:a4cc09489843c70b22e8373ca3dfa52b3fab778b57cf81462f1203b0852e95e3",
239 |                 "sha256:a61dc29cfca9831a03442a21d4b5fd77e3067beca4b5f81f1a89a04a71cf93fa",
240 |                 "sha256:b1853df739b32fa913cc59ad9137caa9cc3d97ff871e2bbd89c2a2a1d4a69451",
241 |                 "sha256:b1f44c335532c0581b77491b7715a871d0dd72e97487ac0f57337ccf3ab3469b",
242 |                 "sha256:b261e0cb0d6faa8fd6863af26d30351fd2ffdb15b82e51e81e96b9e9e2e7ba16",
243 |                 "sha256:c857ae5dba375ea26a6228f98c195fec0898a0fd91bcf0e8a0cae6d9faf3eca7",
244 |                 "sha256:cf5bb4a7d53a71bb6a0144d31df784a973b36d8687d615ef6a7e9b1809917a9b",
245 |                 "sha256:db9814ff0457b46f2e1d494c1efa4111ca089e08c8b983635ebffb9c1573361f",
246 |                 "sha256:df04f4bad8a359daa2ff74f8108ea051670cafbca533bb2636c58b16e962989e",
247 |                 "sha256:ecf81720934a0e18526177e645cbd6a8a21bb0ddc887ff9738de07a1df5c6b61",
248 |                 "sha256:edfa6fba9157e0e3be0f40168eb142511012683ac3dc82420bee4a3f3981b30e"
249 |             ],
250 |             "index": "pypi",
251 |             "version": "==1.15.4"
252 |         },
253 |         "pandas": {
254 |             "hashes": [
255 |                 "sha256:11975fad9edbdb55f1a560d96f91830e83e29bed6ad5ebf506abda09818eaf60",
256 |                 "sha256:12e13d127ca1b585dd6f6840d3fe3fa6e46c36a6afe2dbc5cb0b57032c902e31",
257 |                 "sha256:1c87fcb201e1e06f66e23a61a5fea9eeebfe7204a66d99df24600e3f05168051",
258 |                 "sha256:242e9900de758e137304ad4b5663c2eff0d798c2c3b891250bd0bd97144579da",
259 |                 "sha256:26c903d0ae1542890cb9abadb4adcb18f356b14c2df46e4ff657ae640e3ac9e7",
260 |                 "sha256:2e1e88f9d3e5f107b65b59cd29f141995597b035d17cc5537e58142038942e1a",
261 |                 "sha256:31b7a48b344c14691a8e92765d4023f88902ba3e96e2e4d0364d3453cdfd50db",
262 |                 "sha256:4fd07a932b4352f8a8973761ab4e84f965bf81cc750fb38e04f01088ab901cb8",
263 |                 "sha256:5b24ca47acf69222e82530e89111dd9d14f9b970ab2cd3a1c2c78f0c4fbba4f4",
264 |                 "sha256:647b3b916cc8f6aeba240c8171be3ab799c3c1b2ea179a3be0bd2712c4237553",
265 |                 "sha256:66b060946046ca27c0e03e9bec9bba3e0b918bafff84c425ca2cc2e157ce121e",
266 |                 "sha256:6efa9fa6e1434141df8872d0fa4226fc301b17aacf37429193f9d70b426ea28f",
267 |                 "sha256:be4715c9d8367e51dbe6bc6d05e205b1ae234f0dc5465931014aa1c4af44c1ba",
268 |                 "sha256:bea90da782d8e945fccfc958585210d23de374fa9294a9481ed2abcef637ebfc",
269 |                 "sha256:d318d77ab96f66a59e792a481e2701fba879e1a453aefeebdb17444fe204d1ed",
270 |                 "sha256:d785fc08d6f4207437e900ffead930a61e634c5e4f980ba6d3dc03c9581748c7",
271 |                 "sha256:de9559287c4fe8da56e8c3878d2374abc19d1ba2b807bfa7553e912a8e5ba87c",
272 |                 "sha256:f4f98b190bb918ac0bc0e3dd2ab74ff3573da9f43106f6dba6385406912ec00f",
273 |                 "sha256:f71f1a7e2d03758f6e957896ed696254e2bc83110ddbc6942018f1a232dd9dad",
274 |                 "sha256:fb944c8f0b0ab5c1f7846c686bc4cdf8cde7224655c12edcd59d5212cd57bec0"
275 |             ],
276 |             "index": "pypi",
277 |             "version": "==0.23.4"
278 |         },
279 |         "protobuf": {
280 |             "hashes": [
281 |                 "sha256:10394a4d03af7060fa8a6e1cbf38cea44be1467053b0aea5bbfcb4b13c4b88c4",
282 |                 "sha256:1489b376b0f364bcc6f89519718c057eb191d7ad6f1b395ffd93d1aa45587811",
283 |                 "sha256:1931d8efce896981fe410c802fd66df14f9f429c32a72dd9cfeeac9815ec6444",
284 |                 "sha256:196d3a80f93c537f27d2a19a4fafb826fb4c331b0b99110f985119391d170f96",
285 |                 "sha256:46e34fdcc2b1f2620172d3a4885128705a4e658b9b62355ae5e98f9ea19f42c2",
286 |                 "sha256:4b92e235a3afd42e7493b281c8b80c0c65cbef45de30f43d571d1ee40a1f77ef",
287 |                 "sha256:574085a33ca0d2c67433e5f3e9a0965c487410d6cb3406c83bdaf549bfc2992e",
288 |                 "sha256:59cd75ded98094d3cf2d79e84cdb38a46e33e7441b2826f3838dcc7c07f82995",
289 |                 "sha256:5ee0522eed6680bb5bac5b6d738f7b0923b3cafce8c4b1a039a6107f0841d7ed",
290 |                 "sha256:65917cfd5da9dfc993d5684643063318a2e875f798047911a9dd71ca066641c9",
291 |                 "sha256:685bc4ec61a50f7360c9fd18e277b65db90105adbf9c79938bd315435e526b90",
292 |                 "sha256:92e8418976e52201364a3174e40dc31f5fd8c147186d72380cbda54e0464ee19",
293 |                 "sha256:9335f79d1940dfb9bcaf8ec881fb8ab47d7a2c721fb8b02949aab8bbf8b68625",
294 |                 "sha256:a7ee3bb6de78185e5411487bef8bc1c59ebd97e47713cba3c460ef44e99b3db9",
295 |                 "sha256:ceec283da2323e2431c49de58f80e1718986b79be59c266bb0509cbf90ca5b9e",
296 |                 "sha256:fcfc907746ec22716f05ea96b7f41597dfe1a1c088f861efb8a0d4f4196a6f10"
297 |             ],
298 |             "version": "==3.6.1"
299 |         },
300 |         "pydub": {
301 |             "hashes": [
302 |                 "sha256:07d5eec305110cf1dfe1a0c45eed84bc31f889b1b19feab3859a05dc45b70e7b",
303 |                 "sha256:c703e220917be686fc83d24231849a11c95ba540963ffb3ae5bb322788974d79"
304 |             ],
305 |             "index": "pypi",
306 |             "version": "==0.23.0"
307 |         },
308 |         "python-dateutil": {
309 |             "hashes": [
310 |                 "sha256:063df5763652e21de43de7d9e00ccf239f953a832941e37be541614732cdfc93",
311 |                 "sha256:88f9287c0174266bb0d8cedd395cfba9c58e87e5ad86b2ce58859bc11be3cf02"
312 |             ],
313 |             "version": "==2.7.5"
314 |         },
315 |         "pytz": {
316 |             "hashes": [
317 |                 "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca",
318 |                 "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6"
319 |             ],
320 |             "version": "==2018.7"
321 |         },
322 |         "pyyaml": {
323 |             "hashes": [
324 |                 "sha256:3d7da3009c0f3e783b2c873687652d83b1bbfd5c88e9813fb7e5b03c0dd3108b",
325 |                 "sha256:3ef3092145e9b70e3ddd2c7ad59bdd0252a94dfe3949721633e41344de00a6bf",
326 |                 "sha256:40c71b8e076d0550b2e6380bada1f1cd1017b882f7e16f09a65be98e017f211a",
327 |                 "sha256:558dd60b890ba8fd982e05941927a3911dc409a63dcb8b634feaa0cda69330d3",
328 |                 "sha256:a7c28b45d9f99102fa092bb213aa12e0aaf9a6a1f5e395d36166639c1f96c3a1",
329 |                 "sha256:aa7dd4a6a427aed7df6fb7f08a580d68d9b118d90310374716ae90b710280af1",
330 |                 "sha256:bc558586e6045763782014934bfaf39d48b8ae85a2713117d16c39864085c613",
331 |                 "sha256:d46d7982b62e0729ad0175a9bc7e10a566fc07b224d2c79fafb5e032727eaa04",
332 |                 "sha256:d5eef459e30b09f5a098b9cea68bebfeb268697f78d647bd255a085371ac7f3f",
333 |                 "sha256:e01d3203230e1786cd91ccfdc8f8454c8069c91bee3962ad93b87a4b2860f537",
334 |                 "sha256:e170a9e6fcfd19021dd29845af83bb79236068bf5fd4df3327c1be18182b2531"
335 |             ],
336 |             "version": "==3.13"
337 |         },
338 |         "resampy": {
339 |             "hashes": [
340 |                 "sha256:7f6912ca2b746eb9bcdc05c52fcef088f0b7ba1ca6ee0b2d0a359d18fc57f8f8"
341 |             ],
342 |             "version": "==0.2.1"
343 |         },
344 |         "scikit-learn": {
345 |             "hashes": [
346 |                 "sha256:1ca280bbdeb0f9950f9427c71e29d9f14e63b2ffa3e8fdf95f25e13773e6d898",
347 |                 "sha256:33ad23aa0928c64567a24aac771aea4e179fab2a20f9f786ab00ca9fe0a13c82",
348 |                 "sha256:344bc433ccbfbadcac8c16b4cec9d7c4722bcea9ce19f6da42e2c2f805571941",
349 |                 "sha256:35ee532b5e992a6e8d8a71d325fd9e0b58716894657e7d3da3e7a1d888c2e7d4",
350 |                 "sha256:37cbbba2d2a3895bba834d50488d22268a511279e053135bb291f637fe30512b",
351 |                 "sha256:40cf1908ee712545f4286cc21f3ee21f3466c81438320204725ab37c96849f27",
352 |                 "sha256:4130760ac54f5946523c1a1fb32a6c0925e5245f77285270a8f6fb5901b7b733",
353 |                 "sha256:46cc8c32496f02affde7abe507af99cd752de0e41aec951a0bc40c693c2a1e07",
354 |                 "sha256:4a364cf22be381a17c05ada9f9ce102733a0f75893c51b83718cd9358444921e",
355 |                 "sha256:56aff3fa3417cd69807c1c74db69aee34ce08d7161cbdfebbff9b4023d9d224b",
356 |                 "sha256:58debb34a15cfc03f4876e450068dbd711d9ec36ae5503ed2868f2c1f88522f7",
357 |                 "sha256:7bcf7ade62ef3443470af32afb82646640d653f42502cf31a13cc17d3ff85d57",
358 |                 "sha256:7d4eab203ed260075f47e2bf6a2bd656367e4e8683b3ad46d4651070c5d1e9aa",
359 |                 "sha256:86697c6e4c2d74fbbf110c6d5979d34196a55108fa9896bf424f9795a8d935ad",
360 |                 "sha256:911115db6669c9b11efd502dcc5483cd0c53e4e3c4bcdfe2e73bbb27eb5e81da",
361 |                 "sha256:97d1d971f8ec257011e64b7d655df68081dd3097322690afa1a71a1d755f8c18",
362 |                 "sha256:99f22c3228ec9ab3933597825dc7d595b6c8c7b9ae725cfa557f16353fac8314",
363 |                 "sha256:a2e18e5a4095b3ca4852eb087d28335f3bb8515df4ccf906d380ee627613837f",
364 |                 "sha256:a3070f71a4479a9827148609f24f2978f10acffa3b8012fe9606720d271066bd",
365 |                 "sha256:a6a197499429d2eaa2ae922760aa3966ef353545422d5f47ea2ca9369cbf7d26",
366 |                 "sha256:a7f6f5b3bc7b8e2066076098788579af12bd507ccea8ca6859e52761aa61eaca",
367 |                 "sha256:a82b90b6037fcc6b311431395c11b02555a3fbf96921a0667c8f8b0c495991cb",
368 |                 "sha256:ab2c4266b8cd159a266eb03c709ad5400756dca9c45aa48fb523263344475093",
369 |                 "sha256:b983a2dfdb9d707c78790608bcfd63692e5c2d996865a9689f3db768d0a2978d",
370 |                 "sha256:bb33d447f4c6fb164d426467d7bf8a4901c303333c5809b85319b2e0626763cd",
371 |                 "sha256:bc2a0116a67081167f1fbfed731d361671e5925db291b70e65fa66170045c53f",
372 |                 "sha256:bd189f6d0c2fdccb7c0d3fd1227c6626dc17d00257edbb63dd7c88f31928db61",
373 |                 "sha256:d393f810da9cd4746cad7350fb89f0509c3ae702c79d2ba8bd875201be4102d1"
374 |             ],
375 |             "version": "==0.20.0"
376 |         },
377 |         "scipy": {
378 |             "hashes": [
379 |                 "sha256:0611ee97296265af4a21164a5323f8c1b4e8e15c582d3dfa7610825900136bb7",
380 |                 "sha256:08237eda23fd8e4e54838258b124f1cd141379a5f281b0a234ca99b38918c07a",
381 |                 "sha256:0e645dbfc03f279e1946cf07c9c754c2a1859cb4a41c5f70b25f6b3a586b6dbd",
382 |                 "sha256:0e9bb7efe5f051ea7212555b290e784b82f21ffd0f655405ac4f87e288b730b3",
383 |                 "sha256:108c16640849e5827e7d51023efb3bd79244098c3f21e4897a1007720cb7ce37",
384 |                 "sha256:340ef70f5b0f4e2b4b43c8c8061165911bc6b2ad16f8de85d9774545e2c47463",
385 |                 "sha256:3ad73dfc6f82e494195144bd3a129c7241e761179b7cb5c07b9a0ede99c686f3",
386 |                 "sha256:3b243c77a822cd034dad53058d7c2abf80062aa6f4a32e9799c95d6391558631",
387 |                 "sha256:404a00314e85eca9d46b80929571b938e97a143b4f2ddc2b2b3c91a4c4ead9c5",
388 |                 "sha256:423b3ff76957d29d1cce1bc0d62ebaf9a3fdfaf62344e3fdec14619bb7b5ad3a",
389 |                 "sha256:42d9149a2fff7affdd352d157fa5717033767857c11bd55aa4a519a44343dfef",
390 |                 "sha256:625f25a6b7d795e8830cb70439453c9f163e6870e710ec99eba5722775b318f3",
391 |                 "sha256:698c6409da58686f2df3d6f815491fd5b4c2de6817a45379517c92366eea208f",
392 |                 "sha256:729f8f8363d32cebcb946de278324ab43d28096f36593be6281ca1ee86ce6559",
393 |                 "sha256:8190770146a4c8ed5d330d5b5ad1c76251c63349d25c96b3094875b930c44692",
394 |                 "sha256:878352408424dffaa695ffedf2f9f92844e116686923ed9aa8626fc30d32cfd1",
395 |                 "sha256:8b984f0821577d889f3c7ca8445564175fb4ac7c7f9659b7c60bef95b2b70e76",
396 |                 "sha256:8f841bbc21d3dad2111a94c490fb0a591b8612ffea86b8e5571746ae76a3deac",
397 |                 "sha256:c22b27371b3866c92796e5d7907e914f0e58a36d3222c5d436ddd3f0e354227a",
398 |                 "sha256:d0cdd5658b49a722783b8b4f61a6f1f9c75042d0e29a30ccb6cacc9b25f6d9e2",
399 |                 "sha256:d40dc7f494b06dcee0d303e51a00451b2da6119acbeaccf8369f2d29e28917ac",
400 |                 "sha256:d8491d4784aceb1f100ddb8e31239c54e4afab8d607928a9f7ef2469ec35ae01",
401 |                 "sha256:dfc5080c38dde3f43d8fbb9c0539a7839683475226cf83e4b24363b227dfe552",
402 |                 "sha256:e24e22c8d98d3c704bb3410bce9b69e122a8de487ad3dbfe9985d154e5c03a40",
403 |                 "sha256:e7a01e53163818d56eabddcafdc2090e9daba178aad05516b20c6591c4811020",
404 |                 "sha256:ee677635393414930541a096fc8e61634304bb0153e4e02b75685b11eba14cae",
405 |                 "sha256:f0521af1b722265d824d6ad055acfe9bd3341765735c44b5a4d0069e189a0f40",
406 |                 "sha256:f25c281f12c0da726c6ed00535ca5d1622ec755c30a3f8eafef26cf43fede694"
407 |             ],
408 |             "index": "pypi",
409 |             "version": "==1.1.0"
410 |         },
411 |         "six": {
412 |             "hashes": [
413 |                 "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
414 |                 "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
415 |             ],
416 |             "version": "==1.11.0"
417 |         },
418 |         "sklearn": {
419 |             "hashes": [
420 |                 "sha256:e23001573aa194b834122d2b9562459bf5ae494a2d59ca6b8aa22c85a44c0e31"
421 |             ],
422 |             "index": "pypi",
423 |             "version": "==0.0"
424 |         },
425 |         "tensorboard": {
426 |             "hashes": [
427 |                 "sha256:537603db949e10d2f5f201d88b073f3f8fb4e4c311d5541e1d4518aa59aa8daa",
428 |                 "sha256:ca275a7e39797946930d7d4460999369b73968e8191f2256e23bfb7924004d59"
429 |             ],
430 |             "version": "==1.12.0"
431 |         },
432 |         "tensorflow-gpu": {
433 |             "hashes": [
434 |                 "sha256:12902549817d2f093f3045f7861df84a5936e8f14469d11c5a5622c85455b96c",
435 |                 "sha256:435a9a4a37c1a92f9bc80f577f0328775539c593b9bc9e943712a204ada11db5",
436 |                 "sha256:6e9e6b73cc6dc6b82a8e09f9688a8806f44dbe02c4e92cb9c36efea30a7cd47e",
437 |                 "sha256:bf2c1e660c533102db2a81fad21a26213f4e4ff5ce6b841c0d9adc4ac3c5c6bc",
438 |                 "sha256:ce47aaa4ddf8446c9c9a83d968c2beba93feefaf796f1255ec6e361e4dd0e13a",
439 |                 "sha256:d02f018e46ee0d45a86bd27c5635b936330ab7e180c43029d1b3c4cebc7c2c45",
440 |                 "sha256:da799ad89780c21380fdbb99f3ecf73488dbfdca0715493c6931c2710c710e62"
441 |             ],
442 |             "index": "pypi",
443 |             "version": "==1.12.0"
444 |         },
445 |         "termcolor": {
446 |             "hashes": [
447 |                 "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
448 |             ],
449 |             "version": "==1.1.0"
450 |         },
451 |         "tqdm": {
452 |             "hashes": [
453 |                 "sha256:3c4d4a5a41ef162dd61f1edb86b0e1c7859054ab656b2e7c7b77e7fbf6d9f392",
454 |                 "sha256:5b4d5549984503050883bc126280b386f5f4ca87e6c023c5d015655ad75bdebb"
455 |             ],
456 |             "index": "pypi",
457 |             "version": "==4.28.1"
458 |         },
459 |         "werkzeug": {
460 |             "hashes": [
461 |                 "sha256:c3fd7a7d41976d9f44db327260e263132466836cef6f91512889ed60ad26557c",
462 |                 "sha256:d5da73735293558eb1651ee2fddc4d0dedcfa06538b8813a2e20011583c9e49b"
463 |             ],
464 |             "version": "==0.14.1"
465 |         },
466 |         "wheel": {
467 |             "hashes": [
468 |                 "sha256:196c9842d79262bb66fcf59faa4bd0deb27da911dbc7c6cdca931080eb1f0783",
469 |                 "sha256:c93e2d711f5f9841e17f53b0e6c0ff85593f3b416b6eec7a9452041a59a42688"
470 |             ],
471 |             "markers": "python_version >= '3'",
472 |             "version": "==0.32.2"
473 |         }
474 |     },
475 |     "develop": {
476 |         "alabaster": {
477 |             "hashes": [
478 |                 "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359",
479 |                 "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02"
480 |             ],
481 |             "version": "==0.7.12"
482 |         },
483 |         "babel": {
484 |             "hashes": [
485 |                 "sha256:6778d85147d5d85345c14a26aada5e478ab04e39b078b0745ee6870c2b5cf669",
486 |                 "sha256:8cba50f48c529ca3fa18cf81fa9403be176d374ac4d60738b839122dfaaa3d23"
487 |             ],
488 |             "version": "==2.6.0"
489 |         },
490 |         "certifi": {
491 |             "hashes": [
492 |                 "sha256:339dc09518b07e2fa7eda5450740925974815557727d6bd35d319c1524a04a4c",
493 |                 "sha256:6d58c986d22b038c8c0df30d639f23a3e6d172a05c3583e766f4c0b785c0986a"
494 |             ],
495 |             "version": "==2018.10.15"
496 |         },
497 |         "chardet": {
498 |             "hashes": [
499 |                 "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
500 |                 "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
501 |             ],
502 |             "version": "==3.0.4"
503 |         },
504 |         "docutils": {
505 |             "hashes": [
506 |                 "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6",
507 |                 "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274",
508 |                 "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"
509 |             ],
510 |             "version": "==0.14"
511 |         },
512 |         "flake8": {
513 |             "hashes": [
514 |                 "sha256:6a35f5b8761f45c5513e3405f110a86bea57982c3b75b766ce7b65217abe1670",
515 |                 "sha256:c01f8a3963b3571a8e6bd7a4063359aff90749e160778e03817cd9b71c9e07d2"
516 |             ],
517 |             "index": "pypi",
518 |             "version": "==3.6.0"
519 |         },
520 |         "flake8-docstrings": {
521 |             "hashes": [
522 |                 "sha256:4e0ce1476b64e6291520e5570cf12b05016dd4e8ae454b8a8a9a48bc5f84e1cd",
523 |                 "sha256:8436396b5ecad51a122a2c99ba26e5b4e623bf6e913b0fea0cb6c2c4050f91eb"
524 |             ],
525 |             "index": "pypi",
526 |             "version": "==1.3.0"
527 |         },
528 |         "flake8-polyfill": {
529 |             "hashes": [
530 |                 "sha256:12be6a34ee3ab795b19ca73505e7b55826d5f6ad7230d31b18e106400169b9e9",
531 |                 "sha256:e44b087597f6da52ec6393a709e7108b2905317d0c0b744cdca6208e670d8eda"
532 |             ],
533 |             "version": "==1.0.2"
534 |         },
535 |         "idna": {
536 |             "hashes": [
537 |                 "sha256:156a6814fb5ac1fc6850fb002e0852d56c0c8d2531923a51032d1b70760e186e",
538 |                 "sha256:684a38a6f903c1d71d6d5fac066b58d7768af4de2b832e426ec79c30daa94a16"
539 |             ],
540 |             "version": "==2.7"
541 |         },
542 |         "imagesize": {
543 |             "hashes": [
544 |                 "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8",
545 |                 "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5"
546 |             ],
547 |             "version": "==1.1.0"
548 |         },
549 |         "jinja2": {
550 |             "hashes": [
551 |                 "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd",
552 |                 "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4"
553 |             ],
554 |             "version": "==2.10"
555 |         },
556 |         "markupsafe": {
557 |             "hashes": [
558 |                 "sha256:048ef924c1623740e70204aa7143ec592504045ae4429b59c30054cb31e3c432",
559 |                 "sha256:130f844e7f5bdd8e9f3f42e7102ef1d49b2e6fdf0d7526df3f87281a532d8c8b",
560 |                 "sha256:19f637c2ac5ae9da8bfd98cef74d64b7e1bb8a63038a3505cd182c3fac5eb4d9",
561 |                 "sha256:1b8a7a87ad1b92bd887568ce54b23565f3fd7018c4180136e1cf412b405a47af",
562 |                 "sha256:1c25694ca680b6919de53a4bb3bdd0602beafc63ff001fea2f2fc16ec3a11834",
563 |                 "sha256:1f19ef5d3908110e1e891deefb5586aae1b49a7440db952454b4e281b41620cd",
564 |                 "sha256:1fa6058938190ebe8290e5cae6c351e14e7bb44505c4a7624555ce57fbbeba0d",
565 |                 "sha256:31cbb1359e8c25f9f48e156e59e2eaad51cd5242c05ed18a8de6dbe85184e4b7",
566 |                 "sha256:3e835d8841ae7863f64e40e19477f7eb398674da6a47f09871673742531e6f4b",
567 |                 "sha256:4e97332c9ce444b0c2c38dd22ddc61c743eb208d916e4265a2a3b575bdccb1d3",
568 |                 "sha256:525396ee324ee2da82919f2ee9c9e73b012f23e7640131dd1b53a90206a0f09c",
569 |                 "sha256:52b07fbc32032c21ad4ab060fec137b76eb804c4b9a1c7c7dc562549306afad2",
570 |                 "sha256:52ccb45e77a1085ec5461cde794e1aa037df79f473cbc69b974e73940655c8d7",
571 |                 "sha256:5c3fbebd7de20ce93103cb3183b47671f2885307df4a17a0ad56a1dd51273d36",
572 |                 "sha256:5e5851969aea17660e55f6a3be00037a25b96a9b44d2083651812c99d53b14d1",
573 |                 "sha256:5edfa27b2d3eefa2210fb2f5d539fbed81722b49f083b2c6566455eb7422fd7e",
574 |                 "sha256:7d263e5770efddf465a9e31b78362d84d015cc894ca2c131901a4445eaa61ee1",
575 |                 "sha256:83381342bfc22b3c8c06f2dd93a505413888694302de25add756254beee8449c",
576 |                 "sha256:857eebb2c1dc60e4219ec8e98dfa19553dae33608237e107db9c6078b1167856",
577 |                 "sha256:98e439297f78fca3a6169fd330fbe88d78b3bb72f967ad9961bcac0d7fdd1550",
578 |                 "sha256:bf54103892a83c64db58125b3f2a43df6d2cb2d28889f14c78519394feb41492",
579 |                 "sha256:d9ac82be533394d341b41d78aca7ed0e0f4ba5a2231602e2f05aa87f25c51672",
580 |                 "sha256:e982fe07ede9fada6ff6705af70514a52beb1b2c3d25d4e873e82114cf3c5401",
581 |                 "sha256:edce2ea7f3dfc981c4ddc97add8a61381d9642dc3273737e756517cc03e84dd6",
582 |                 "sha256:efdc45ef1afc238db84cb4963aa689c0408912a0239b0721cb172b4016eb31d6",
583 |                 "sha256:f137c02498f8b935892d5c0172560d7ab54bc45039de8805075e19079c639a9c",
584 |                 "sha256:f82e347a72f955b7017a39708a3667f106e6ad4d10b25f237396a7115d8ed5fd",
585 |                 "sha256:fb7c206e01ad85ce57feeaaa0bf784b97fa3cad0d4a5737bc5295785f5c613a1"
586 |             ],
587 |             "version": "==1.1.0"
588 |         },
589 |         "mccabe": {
590 |             "hashes": [
591 |                 "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
592 |                 "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
593 |             ],
594 |             "version": "==0.6.1"
595 |         },
596 |         "packaging": {
597 |             "hashes": [
598 |                 "sha256:0886227f54515e592aaa2e5a553332c73962917f2831f1b0f9b9f4380a4b9807",
599 |                 "sha256:f95a1e147590f204328170981833854229bb2912ac3d5f89e2a8ccd2834800c9"
600 |             ],
601 |             "version": "==18.0"
602 |         },
603 |         "pycodestyle": {
604 |             "hashes": [
605 |                 "sha256:cbc619d09254895b0d12c2c691e237b2e91e9b2ecf5e84c26b35400f93dcfb83",
606 |                 "sha256:cbfca99bd594a10f674d0cd97a3d802a1fdef635d4361e1a2658de47ed261e3a"
607 |             ],
608 |             "version": "==2.4.0"
609 |         },
610 |         "pydocstyle": {
611 |             "hashes": [
612 |                 "sha256:2258f9b0df68b97bf3a6c29003edc5238ff8879f1efb6f1999988d934e432bd8",
613 |                 "sha256:5741c85e408f9e0ddf873611085e819b809fca90b619f5fd7f34bd4959da3dd4",
614 |                 "sha256:ed79d4ec5e92655eccc21eb0c6cf512e69512b4a97d215ace46d17e4990f2039"
615 |             ],
616 |             "version": "==3.0.0"
617 |         },
618 |         "pyflakes": {
619 |             "hashes": [
620 |                 "sha256:9a7662ec724d0120012f6e29d6248ae3727d821bba522a0e6b356eff19126a49",
621 |                 "sha256:f661252913bc1dbe7fcfcbf0af0db3f42ab65aabd1a6ca68fe5d466bace94dae"
622 |             ],
623 |             "version": "==2.0.0"
624 |         },
625 |         "pygments": {
626 |             "hashes": [
627 |                 "sha256:78f3f434bcc5d6ee09020f92ba487f95ba50f1e3ef83ae96b9d5ffa1bab25c5d",
628 |                 "sha256:dbae1046def0efb574852fab9e90209b23f556367b5a320c0bcb871c77c3e8cc"
629 |             ],
630 |             "version": "==2.2.0"
631 |         },
632 |         "pyparsing": {
633 |             "hashes": [
634 |                 "sha256:40856e74d4987de5d01761a22d1621ae1c7f8774585acae358aa5c5936c6c90b",
635 |                 "sha256:f353aab21fd474459d97b709e527b5571314ee5f067441dc9f88e33eecd96592"
636 |             ],
637 |             "version": "==2.3.0"
638 |         },
639 |         "pytz": {
640 |             "hashes": [
641 |                 "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca",
642 |                 "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6"
643 |             ],
644 |             "version": "==2018.7"
645 |         },
646 |         "requests": {
647 |             "hashes": [
648 |                 "sha256:99dcfdaaeb17caf6e526f32b6a7b780461512ab3f1d992187801694cba42770c",
649 |                 "sha256:a84b8c9ab6239b578f22d1c21d51b696dcfe004032bb80ea832398d6909d7279"
650 |             ],
651 |             "version": "==2.20.0"
652 |         },
653 |         "six": {
654 |             "hashes": [
655 |                 "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
656 |                 "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
657 |             ],
658 |             "version": "==1.11.0"
659 |         },
660 |         "snowballstemmer": {
661 |             "hashes": [
662 |                 "sha256:919f26a68b2c17a7634da993d91339e288964f93c274f1343e3bbbe2096e1128",
663 |                 "sha256:9f3bcd3c401c3e862ec0ebe6d2c069ebc012ce142cce209c098ccb5b09136e89"
664 |             ],
665 |             "version": "==1.2.1"
666 |         },
667 |         "sphinx": {
668 |             "hashes": [
669 |                 "sha256:652eb8c566f18823a022bb4b6dbc868d366df332a11a0226b5bc3a798a479f17",
670 |                 "sha256:d222626d8356de702431e813a05c68a35967e3d66c6cd1c2c89539bb179a7464"
671 |             ],
672 |             "index": "pypi",
673 |             "version": "==1.8.1"
674 |         },
675 |         "sphinx-rtd-theme": {
676 |             "hashes": [
677 |                 "sha256:02f02a676d6baabb758a20c7a479d58648e0f64f13e07d1b388e9bb2afe86a09",
678 |                 "sha256:d0f6bc70f98961145c5b0e26a992829363a197321ba571b31b24ea91879e0c96"
679 |             ],
680 |             "index": "pypi",
681 |             "version": "==0.4.2"
682 |         },
683 |         "sphinxcontrib-websupport": {
684 |             "hashes": [
685 |                 "sha256:68ca7ff70785cbe1e7bccc71a48b5b6d965d79ca50629606c7861a21b206d9dd",
686 |                 "sha256:9de47f375baf1ea07cdb3436ff39d7a9c76042c10a769c52353ec46e4e8fc3b9"
687 |             ],
688 |             "version": "==1.1.0"
689 |         },
690 |         "urllib3": {
691 |             "hashes": [
692 |                 "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
693 |                 "sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22"
694 |             ],
695 |             "version": "==1.24.1"
696 |         }
697 |     }
698 | }
699 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Surrey CVSSP DCASE 2018 Task 2 System
  2 | =====================================
  3 | 
  4 | This is the source code for CVSSP's system used in `DCASE 2018 Task 2`__.
  5 | 
  6 | For more details about the system, consider reading the `technical
  7 | report`__ or the `workshop paper`__ [1]_.
  8 | 
  9 | __ http://dcase.community/challenge2018/task-general-purpose-audio-tagging
 10 | __ http://dcase.community/documents/challenge2018/technical_reports/DCASE2018_Iqbal_89.pdf
 11 | __ http://dcase.community/documents/workshop2018/proceedings/DCASE2018Workshop_Iqbal_151.pdf
 12 | 
 13 | 
 14 | Requirements
 15 | ------------
 16 | 
 17 | This software requires Python 3. To install the dependencies, run::
 18 | 
 19 |     pipenv install
 20 | 
 21 | or::
 22 | 
 23 |     pip install -r requirements.txt
 24 | 
 25 | The main functionality of this software also requires the DCASE 2018 Task 2
 26 | datasets, which may be downloaded `here`__. After acquiring the datasets,
 27 | modify ``task2/config/dcase2018_task2.py`` accordingly.
 28 | 
 29 | For example::
 30 | 
 31 |     _root_dataset_path = ('/path/to/datasets')
 32 |     """str: Path to root directory containing input audio clips."""
 33 | 
 34 |     training_set = Dataset(
 35 |         name='training',
 36 |         path=os.path.join(_root_dataset_path, 'audio_train'),
 37 |         metadata_path='metadata/training.csv',
 38 |     )
 39 |     """Dataset instance for the training dataset."""
 40 | 
 41 | You will also want to change the work path in ``task2/config/paths.py``::
 42 | 
 43 |     work_path = '/path/to/workspace'
 44 |     """str: Path to parent directory containing program output."""
 45 | 
 46 | __ https://www.kaggle.com/c/freesound-audio-tagging/data
 47 | 
 48 | 
 49 | Usage
 50 | -----
 51 | 
 52 | In this section, the various commands are described. Using this software, the
 53 | user is able to apply preprocessing (silence removal), extract feature vectors,
 54 | train the network, generate predictions, and evaluate the predictions.
 55 | 
 56 | Preprocessing
 57 | ^^^^^^^^^^^^^
 58 | 
 59 | Our implementation of preprocessing involves extracting the non-silent sections
 60 | of audio clips and saving these to disk separately. A new metadata file is then
 61 | created with entries corresponding to the new files.
 62 | 
 63 | To apply preprocessing, run::
 64 | 
 65 |     python task2/main.py preprocess <training/test>
 66 | 
 67 | Refer to ``task2/silence.py`` for the relevant code.
 68 | 
 69 | Feature Extraction
 70 | ^^^^^^^^^^^^^^^^^^
 71 | 
 72 | To extract feature vectors, run::
 73 | 
 74 |     python task2/main.py extract <training/test> [--recompute]
 75 | 
 76 | If ``--recompute`` is enabled, the program will recompute existing feature
 77 | vectors. This implementaion extracts log-mel spectrogram features. See
 78 | ``task2/config/logmel.py`` for tweaking the parameters.
 79 | 
 80 | Training
 81 | ^^^^^^^^
 82 | 
 83 | To train a model, run::
 84 | 
 85 |     python task2/main.py train [--model MODEL] [--fold n] [--sample_weight x] [--class_weight]
 86 | 
 87 | The ``--model`` option can be one of the following:
 88 | 
 89 | * ``vgg13``
 90 | * ``gcnn``
 91 | * ``crnn``
 92 | * ``gcrnn``
 93 | 
 94 | The training set is assumed to be split into several folds, so the ``--fold``
 95 | option specifies which one to use as the validation set. If set to ``-1``, the
 96 | program trains on the entire dataset. The ``--sample_weight`` option allows
 97 | setting a sample weight to be used for unverified (noisy) examples. Finally,
 98 | setting the ``--class_weight`` flag indicates that examples should be weighted
 99 | based on the class that they belong to.
100 | 
101 | See ``task2/config/training.py`` for tweaking the parameters or
102 | ``task2/training.py`` for further modifications.
103 | 
104 | Prediction
105 | ^^^^^^^^^^
106 | 
107 | To generate predictions, run::
108 | 
109 |     python task2/main.py predict <training/test> [--fold n]
110 | 
111 | The ``--fold`` option specifies which fold-specific model to use.
112 | 
113 | See ``task2/config/predictions.py`` to modify which epochs are selected for
114 | generating the predictions. By default, the top four models based on their MAP
115 | score on the validation set are chosen.
116 | 
117 | Evaluation
118 | ^^^^^^^^^^
119 | 
120 | To evaluate the predictions, run::
121 | 
122 |     python task2/main.py evaluate <training/test> [--fold n]
123 | 
124 | Stacking
125 | ^^^^^^^^
126 | Stacking is an ensembling technique that involves creating meta-features based
127 | on the predictions of a number of base classifiers. These meta-features are
128 | then used to train a second-level classifier and generate new predictions. We
129 | provide scripts to do this.
130 | 
131 | To generate meta-features, run::
132 | 
133 |     python scripts/meta_features.py <pred_path> <pred_type> <output_path>
134 | 
135 | The argument ``pred_path`` refers to the parent directory in which the
136 | predictions of the base classifiers are stored. ``pred_type`` must be either
137 | ``training`` or ``test``, depending on which dataset the meta-features are for.
138 | ``output_path`` specifies the path of the output HDF5 file.
139 | 
140 | To give an example, assume that the directory structure looks like this::
141 | 
142 |     workspace
143 |     ├── predictions
144 |     │   ├── classifier1
145 |     │   ├── classifier2
146 |     │   ├── classifier3
147 | 
148 | In this case, you might run::
149 | 
150 |     python scripts/meta_features.py workspace/predictions training training.h5
151 |     python scripts/meta_features.py workspace/predictions test test.h5
152 | 
153 | For the time being, the script must be edited to select the classifiers.
154 | 
155 | To then generate predictions using a second-level classifier, run::
156 | 
157 |     python scripts/predict_stack.py --test_path test.h5 training.h5 <metadata_path> <output_path>
158 | 
159 | The argument ``metadata_path`` is the path to the training set metadata file.
160 | See the script itself for more details.
161 | 
162 | Pseudo-labeling
163 | ^^^^^^^^^^^^^^^
164 | To relabel or promote training examples, run::
165 | 
166 |     python scripts/relabel.py <metadata_path> <pred_path> <output_path> [--relabel_threshold t1] [--promote_threshold t2]
167 | 
168 | The argument ``metadata_path`` is the path to the training set metadata file
169 | containing the original labels. ``pred_path`` is the path to the predictions
170 | file used for pseudo-labeling. ``output_path`` is the path of the new metadata
171 | file to be written. The threshold options allow constraining which examples are
172 | relabeled or promoted.
173 | 
174 | 
175 | Citing
176 | ------
177 | If you wish to cite this work, please cite the following paper:
178 | 
179 | .. [1] \T. Iqbal, Q. Kong, M. D. Plumbley, and W. Wang, "General-Purpose
180 |        Audio Tagging from Noisy Labels using Convolutional Neural
181 |        Networks," in Detection and Classification of Acoustic Scenes and
182 |        Events 2018 Workshop, Woking, UK, 2018, pp. 212–216.
183 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = dcase2018_task2
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | sys.path.insert(0, os.path.abspath('../task2'))
 18 | 
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = 'dcase2018_task2'
 23 | copyright = '2018, Turab Iqbal'
 24 | author = 'Turab Iqbal'
 25 | 
 26 | # The short X.Y version
 27 | version = ''
 28 | # The full version, including alpha/beta/rc tags
 29 | release = ''
 30 | 
 31 | 
 32 | # -- General configuration ---------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #
 36 | # needs_sphinx = '1.0'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 |     'sphinx.ext.autodoc',
 43 |     'sphinx.ext.doctest',
 44 |     'sphinx.ext.mathjax',
 45 |     'sphinx.ext.napoleon',
 46 | ]
 47 | 
 48 | # Add any paths that contain templates here, relative to this directory.
 49 | templates_path = ['_templates']
 50 | 
 51 | # The suffix(es) of source filenames.
 52 | # You can specify multiple suffix as a list of string:
 53 | #
 54 | # source_suffix = ['.rst', '.md']
 55 | source_suffix = '.rst'
 56 | 
 57 | # The master toctree document.
 58 | master_doc = 'index'
 59 | 
 60 | # The language for content autogenerated by Sphinx. Refer to documentation
 61 | # for a list of supported languages.
 62 | #
 63 | # This is also used if you do content translation via gettext catalogs.
 64 | # Usually you set "language" from the command line for these cases.
 65 | language = None
 66 | 
 67 | # List of patterns, relative to source directory, that match files and
 68 | # directories to ignore when looking for source files.
 69 | # This pattern also affects html_static_path and html_extra_path .
 70 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 71 | 
 72 | # The name of the Pygments (syntax highlighting) style to use.
 73 | pygments_style = 'sphinx'
 74 | 
 75 | 
 76 | # -- Options for HTML output -------------------------------------------------
 77 | 
 78 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 79 | # a list of builtin themes.
 80 | #
 81 | html_theme = 'sphinx_rtd_theme'
 82 | 
 83 | # Theme options are theme-specific and customize the look and feel of a theme
 84 | # further.  For a list of options available for each theme, see the
 85 | # documentation.
 86 | #
 87 | # html_theme_options = {}
 88 | 
 89 | # Add any paths that contain custom static files (such as style sheets) here,
 90 | # relative to this directory. They are copied after the builtin static files,
 91 | # so a file named "default.css" will overwrite the builtin "default.css".
 92 | html_static_path = ['_static']
 93 | 
 94 | # Custom sidebar templates, must be a dictionary that maps document names
 95 | # to template names.
 96 | #
 97 | # The default sidebars (for documents that don't match any pattern) are
 98 | # defined by theme itself.  Builtin themes are using these templates by
 99 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
100 | # 'searchbox.html']``.
101 | #
102 | # html_sidebars = {}
103 | 
104 | 
105 | # -- Options for HTMLHelp output ---------------------------------------------
106 | 
107 | # Output file base name for HTML help builder.
108 | htmlhelp_basename = 'dcase2018_task2doc'
109 | 
110 | 
111 | # -- Options for LaTeX output ------------------------------------------------
112 | 
113 | latex_elements = {
114 |     # The paper size ('letterpaper' or 'a4paper').
115 |     #
116 |     # 'papersize': 'letterpaper',
117 | 
118 |     # The font size ('10pt', '11pt' or '12pt').
119 |     #
120 |     # 'pointsize': '10pt',
121 | 
122 |     # Additional stuff for the LaTeX preamble.
123 |     #
124 |     # 'preamble': '',
125 | 
126 |     # Latex figure (float) alignment
127 |     #
128 |     # 'figure_align': 'htbp',
129 | }
130 | 
131 | # Grouping the document tree into LaTeX files. List of tuples
132 | # (source start file, target name, title,
133 | #  author, documentclass [howto, manual, or own class]).
134 | latex_documents = [
135 |     (master_doc, 'dcase2018_task2.tex', 'dcase2018\\_task2 Documentation',
136 |      'Turab Iqbal', 'manual'),
137 | ]
138 | 
139 | 
140 | # -- Options for manual page output ------------------------------------------
141 | 
142 | # One entry per manual page. List of tuples
143 | # (source start file, name, description, authors, manual section).
144 | man_pages = [
145 |     (master_doc, 'dcase2018_task2', 'dcase2018_task2 Documentation',
146 |      [author], 1)
147 | ]
148 | 
149 | 
150 | # -- Options for Texinfo output ----------------------------------------------
151 | 
152 | # Grouping the document tree into Texinfo files. List of tuples
153 | # (source start file, target name, title, author,
154 | #  dir menu entry, description, category)
155 | texinfo_documents = [
156 |     (master_doc, 'dcase2018_task2', 'dcase2018_task2 Documentation',
157 |      author, 'dcase2018_task2', 'Surrey CVSSP DCASE 2018 Task 2 system.',
158 |      'Miscellaneous'),
159 | ]
160 | 
161 | 
162 | # -- Extension configuration -------------------------------------------------
163 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. dcase2018_task2 documentation master file, created by
 2 |    sphinx-quickstart on Wed Aug  8 16:30:23 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to dcase2018_task2's documentation!
 7 | ===========================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 | 
14 | 
15 | Indices and tables
16 | ==================
17 | 
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 


--------------------------------------------------------------------------------
/docs/source/config.rst:
--------------------------------------------------------------------------------
 1 | config package
 2 | ==============
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | config.dcase2018\_task2 module
 8 | ------------------------------
 9 | 
10 | .. automodule:: config.dcase2018_task2
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | config.logmel module
16 | --------------------
17 | 
18 | .. automodule:: config.logmel
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | config.paths module
24 | -------------------
25 | 
26 | .. automodule:: config.paths
27 |     :members:
28 |     :undoc-members:
29 |     :show-inheritance:
30 | 
31 | config.prediction module
32 | ------------------------
33 | 
34 | .. automodule:: config.prediction
35 |     :members:
36 |     :undoc-members:
37 |     :show-inheritance:
38 | 
39 | config.silence module
40 | ---------------------
41 | 
42 | .. automodule:: config.silence
43 |     :members:
44 |     :undoc-members:
45 |     :show-inheritance:
46 | 
47 | config.training module
48 | ----------------------
49 | 
50 | .. automodule:: config.training
51 |     :members:
52 |     :undoc-members:
53 |     :show-inheritance:
54 | 
55 | 
56 | Module contents
57 | ---------------
58 | 
59 | .. automodule:: config
60 |     :members:
61 |     :undoc-members:
62 |     :show-inheritance:
63 | 


--------------------------------------------------------------------------------
/docs/source/convnet.rst:
--------------------------------------------------------------------------------
1 | convnet module
2 | ==============
3 | 
4 | .. automodule:: convnet
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/evaluation.rst:
--------------------------------------------------------------------------------
1 | evaluation module
2 | =================
3 | 
4 | .. automodule:: evaluation
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/features.rst:
--------------------------------------------------------------------------------
1 | features module
2 | ===============
3 | 
4 | .. automodule:: features
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/file_io.rst:
--------------------------------------------------------------------------------
1 | file\_io module
2 | ===============
3 | 
4 | .. automodule:: file_io
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/gated_conv.rst:
--------------------------------------------------------------------------------
1 | gated\_conv module
2 | ==================
3 | 
4 | .. automodule:: gated_conv
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/inference.rst:
--------------------------------------------------------------------------------
1 | inference module
2 | ================
3 | 
4 | .. automodule:: inference
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/main.rst:
--------------------------------------------------------------------------------
1 | main module
2 | ===========
3 | 
4 | .. automodule:: main
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/mixup.rst:
--------------------------------------------------------------------------------
1 | mixup module
2 | ============
3 | 
4 | .. automodule:: mixup
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
 1 | task2
 2 | =====
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 4
 6 | 
 7 |    config
 8 |    convnet
 9 |    evaluation
10 |    features
11 |    file_io
12 |    gated_conv
13 |    inference
14 |    main
15 |    mixup
16 |    silence
17 |    training
18 |    utils
19 | 


--------------------------------------------------------------------------------
/docs/source/silence.rst:
--------------------------------------------------------------------------------
1 | silence module
2 | ==============
3 | 
4 | .. automodule:: silence
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/training.rst:
--------------------------------------------------------------------------------
1 | training module
2 | ===============
3 | 
4 | .. automodule:: training
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/utils.rst:
--------------------------------------------------------------------------------
1 | utils module
2 | ============
3 | 
4 | .. automodule:: utils
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | -i https://pypi.org/simple
 2 | absl-py==0.6.1
 3 | astor==0.7.1
 4 | audioread==2.1.6
 5 | decorator==4.3.0
 6 | gast==0.2.0
 7 | grpcio==1.16.0
 8 | h5py==2.8.0
 9 | joblib==0.12.5
10 | keras-applications==1.0.6
11 | keras-preprocessing==1.0.5
12 | keras==2.2.4
13 | librosa==0.6.2
14 | llvmlite==0.25.0
15 | markdown==3.0.1
16 | numba==0.40.1
17 | numpy==1.15.4
18 | pandas==0.23.4
19 | protobuf==3.6.1
20 | pydub==0.23.0
21 | python-dateutil==2.7.5
22 | pytz==2018.7
23 | pyyaml==3.13
24 | resampy==0.2.1
25 | scikit-learn==0.20.0
26 | scipy==1.1.0
27 | six==1.11.0
28 | sklearn==0.0
29 | tensorboard==1.12.0
30 | tensorflow-gpu==1.12.0
31 | termcolor==1.1.0
32 | tqdm==4.28.1
33 | werkzeug==0.14.1
34 | wheel==0.32.2 ; python_version >= '3'
35 | 


--------------------------------------------------------------------------------
/scripts/meta_features.py:
--------------------------------------------------------------------------------
 1 | """Generate meta-features for stacking.
 2 | 
 3 | After training a model on the cross-validation folds, the user can
 4 | generate predictions for the validation sets -- which constitute the
 5 | training set -- and the test set. This script generates features based
 6 | on these predictions. For example, if we have five models, and each
 7 | model outputs an N x K matrix of predictions, where N is the number of
 8 | predicted audio clips and K=41 is the number of classes, this script
 9 | concatenates these to produce an N x 5K matrix, i.e. N feature vectors.
10 | 
11 | This script requires three command-line arguments:
12 | 
13 |   * pred_path: Path to predictions directory.
14 |   * pred_type: Either ``'training'`` or ``'test'``.
15 |   * output_path: Output file path of meta-features.
16 | 
17 | It is assumed that the relevant predictions have already been generated
18 | for each fold. This script merges the fold predictions into one.
19 | """
20 | 
21 | import argparse
22 | import os.path
23 | 
24 | import h5py
25 | import numpy as np
26 | import pandas as pd
27 | 
28 | 
29 | MODELS = [
30 |     'jul28_pydub_gcnn',
31 |     'jul28_pydub_gcnn_1s',
32 |     'jul31_pydub_vgg13',
33 |     'jul31_pydub_vgg13_1s',
34 |     'jul28_pydub_crnn',
35 |     'jul28_pydub_crnn_1s',
36 |     'jul25_pydub_gcrnn',
37 |     'jul30_pydub_gcrnn_1s',
38 | ]
39 | """The training IDs of the models to use."""
40 | 
41 | 
42 | def merge_predictions(base_path, pred_type, n_folds=5):
43 |     """Merge the predictions of the training folds.
44 | 
45 |     If the predictions are for the training set, they are collated. If
46 |     they are for the test set, they are averaged.
47 | 
48 |     Args:
49 |         base_path (str): Path to predictions directory.
50 |         pred_type (str): Either ``'training'`` or ``'test'``.
51 |         n_folds (int): Number of training folds.
52 | 
53 |     Returns:
54 |         pd.DataFrame: The merged predictions.
55 |     """
56 |     dfs = []
57 |     for i in range(n_folds):
58 |         name = 'fold' if pred_type == 'training' else pred_type
59 |         path = os.path.join(base_path, 'predictions_%s%d.csv' % (name, i))
60 |         dfs.append(pd.read_csv(path, index_col=0))
61 | 
62 |     df = pd.concat(dfs)
63 |     if pred_type == 'training':
64 |         metadata_path = '/vol/vssp/msos/ti/dcase2018/task2/metadata/train.csv'
65 |         df_train = pd.read_csv(metadata_path, index_col=0)
66 |         return df.loc[df_train.index]
67 |     if pred_type == 'test':
68 |         return df.groupby(level=0).mean()
69 | 
70 | 
71 | # Parse command line arguments
72 | parser = argparse.ArgumentParser()
73 | parser.add_argument('pred_path', help='path to predictions directory')
74 | parser.add_argument('pred_type', help='either "training" or "test"')
75 | parser.add_argument('output_path', help='output file path')
76 | args = parser.parse_args()
77 | 
78 | # Collect predictions for each model
79 | feats = []
80 | top_preds = []
81 | for model in MODELS:
82 |     path = os.path.join(args.pred_path, model)
83 |     df = merge_predictions(path, args.pred_type)
84 |     feats.append(df.values)
85 | 
86 |     top_preds.append(df.idxmax(axis=1).astype('category').cat.codes)
87 | 
88 | # Print correlation matrix
89 | print(pd.concat(top_preds, axis=1).corr())
90 | 
91 | # Save meta-features to disk
92 | feats = np.stack(feats, axis=1)
93 | feats = np.reshape(feats, (feats.shape[0], -1))
94 | with h5py.File(args.output_path, 'w') as f:
95 |     f.create_dataset('F', data=feats)
96 |     f.create_dataset('names', data=top_preds[0].index.values,
97 |                      dtype=h5py.special_dtype(vlen=str))
98 | 


--------------------------------------------------------------------------------
/scripts/predict_stack.py:
--------------------------------------------------------------------------------
  1 | """Predict labels for the test set using a second-level classifier.
  2 | 
  3 | This script trains a logistic regression classifier on the training set
  4 | meta-features created using the ``meta_features.py`` script. It then
  5 | generates predictions for either the training set or the test set. The
  6 | former refers to training and predicting each fold.
  7 | 
  8 | This script requires three command-line arguments:
  9 | 
 10 |   * train_path: Path to training features.
 11 |   * metadata_path: Path to training metadata.
 12 |   * output_path: Output file path.
 13 | 
 14 | It also takes an optional argument:
 15 | 
 16 |   * --test_path: Path to test features. If this is specified, the script
 17 |     will generate predictions for the test set and write them to a
 18 |     submission file. Otherwise, it will generate predictions for the
 19 |     training set on a fold-by-fold basis and write them to a csv file.
 20 | """
 21 | 
 22 | import argparse
 23 | import sys
 24 | 
 25 | import h5py
 26 | import numpy as np
 27 | import pandas as pd
 28 | 
 29 | from sklearn.linear_model import LogisticRegression
 30 | 
 31 | sys.path.append('task2')
 32 | 
 33 | import file_io as io
 34 | import utils as utils
 35 | 
 36 | 
 37 | def train(x, df):
 38 |     """Train a logistic regression classifier.
 39 | 
 40 |     Args:
 41 |         x (np.ndarray): Training data.
 42 |         df (pd.DataFrame): Training metadata.
 43 | 
 44 |     Returns:
 45 |         The trained classifier.
 46 |     """
 47 |     y = df.label.astype('category').cat.codes.values
 48 |     sample_weight = np.ones(len(x))
 49 |     sample_weight[df.manually_verified == 0] = 0.65
 50 | 
 51 |     clf = LogisticRegression(
 52 |         penalty='l2',
 53 |         tol=0.0001,
 54 |         C=1.0,
 55 |         random_state=1000,
 56 |         class_weight='balanced',
 57 |     )
 58 |     clf.fit(x, y, sample_weight=sample_weight)
 59 | 
 60 |     return clf
 61 | 
 62 | 
 63 | # Parse command line arguments
 64 | parser = argparse.ArgumentParser()
 65 | parser.add_argument('train_path', help='path to training features')
 66 | parser.add_argument('metadata_path', help='path to training metadata')
 67 | parser.add_argument('output_path', help='output file path')
 68 | parser.add_argument('--test_path', help='path to test features')
 69 | args = parser.parse_args()
 70 | 
 71 | # Load training data
 72 | with h5py.File(args.train_path, 'r') as f:
 73 |     x_train = np.array(f['F'])
 74 | 
 75 |     df_train = pd.read_csv(args.metadata_path, index_col=0)
 76 |     y_train = df_train.label.astype('category').cat.codes.values
 77 | 
 78 | if args.test_path:
 79 |     # Load test data
 80 |     with h5py.File(args.test_path, 'r') as f:
 81 |         x_test = np.array(f['F'])
 82 | 
 83 |         index = pd.Index(f['names'], name='fname')
 84 | 
 85 |     # Train and predict the test data
 86 |     clf = train(x_train, df_train)
 87 |     y_pred = clf.predict_proba(x_test)
 88 | 
 89 |     # Write to a submission file.
 90 |     df_pred = pd.DataFrame(y_pred, index=index, columns=utils.LABELS)
 91 |     io.write_predictions(df_pred, args.output_path)
 92 | else:
 93 |     index = pd.Index([], name='fname')
 94 | 
 95 |     # Train and predict for each fold and concatenate the predictions
 96 |     y_preds = []
 97 |     for fold in range(5):
 98 |         mask = df_train.fold == fold
 99 |         index = index.append(df_train[mask].index)
100 |         clf = train(x_train[~mask], df_train[~mask])
101 |         y_preds.append(clf.predict_proba(x_train[mask]))
102 |     y_pred = np.concatenate(y_preds)
103 | 
104 |     # Write to a CSV file
105 |     df_pred = pd.DataFrame(y_pred, index=index, columns=utils.LABELS)
106 |     df_pred = df_pred.loc[df_train.index]
107 |     df_pred.to_csv(args.output_path)
108 | 


--------------------------------------------------------------------------------
/scripts/relabel.py:
--------------------------------------------------------------------------------
 1 | """Relabel/promote training examples based on predicted labels.
 2 | 
 3 | This script is for pseudo-labeling non-verified examples. It can also
 4 | promote non-verified examples to verified if the predicted labels match
 5 | the ground truth labels. In both cases, the confidence of the prediction
 6 | must exceed a certain threshold.
 7 | 
 8 | This script requires three command-line arguments:
 9 | 
10 |   * metadata_path: Path to metadata file containing ground truth.
11 |   * pred_path: Path to training predictions.
12 |   * output_path: Output file path.
13 | 
14 | It also takes optional arguments:
15 | 
16 |   * relabel_threshold: Confidence threshold for relabeling.
17 |   * promote_threshold: Confidence threshold for promotion.
18 | """
19 | 
20 | import argparse
21 | 
22 | import h5py
23 | import numpy as np
24 | import pandas as pd
25 | 
26 | 
27 | # Parse command line arguments
28 | parser = argparse.ArgumentParser()
29 | parser.add_argument('metadata_path', help='path to metadata')
30 | parser.add_argument('pred_path', help='path to predictions')
31 | parser.add_argument('output_path', help='output file path')
32 | parser.add_argument('--relabel_threshold', type=float, default=0,
33 |                     help='confidence threshold for relabeling')
34 | parser.add_argument('--promote_threshold', type=float, default=1.0,
35 |                     help='confidence threshold for promotion')
36 | args = parser.parse_args()
37 | 
38 | df_true = pd.read_csv(args.metadata_path, index_col=0)
39 | df_pred = pd.read_csv(args.pred_path, index_col=0)
40 | top_label = df_pred.idxmax(axis=1)
41 | confidence = df_pred.max(axis=1)
42 | 
43 | # Determine which examples should be relabeled or promoted
44 | relabel_mask = (df_true.manually_verified == 0) \
45 |                & (top_label != df_true.label) \
46 |                & (confidence > args.relabel_threshold)
47 | promote_mask = (df_true.manually_verified == 0) \
48 |                & (top_label == df_true.label) \
49 |                & (confidence > args.promote_threshold)
50 | 
51 | df_true.loc[relabel_mask, 'label'] = top_label[relabel_mask]
52 | print('%d examples relabeled' % sum(relabel_mask))
53 | 
54 | df_true.loc[promote_mask, 'manually_verified'] = 2 
55 | print('%d examples promoted' % sum(promote_mask))
56 | 
57 | # Save as a new metadata file
58 | df_true.to_csv(args.output_path)
59 | 


--------------------------------------------------------------------------------
/scripts/select_folds.py:
--------------------------------------------------------------------------------
 1 | """Split the training set into K folds.
 2 | 
 3 | This script requires three command-line arguments:
 4 | 
 5 |   * metadata_path: Path to training set metadata.
 6 |   * output_path: Output file path.
 7 |   * n_folds: Number of folds to use.
 8 | 
 9 | The output is a new metadata file that assigns each example to a fold.
10 | """
11 | 
12 | import argparse
13 | 
14 | import pandas as pd
15 | 
16 | from sklearn.model_selection import StratifiedKFold
17 | 
18 | 
19 | # Parse command line arguments
20 | parser = argparse.ArgumentParser()
21 | parser.add_argument('metadata_path', help='path to training set metadata')
22 | parser.add_argument('output_path', help='output metadata file path')
23 | parser.add_argument('--n_folds', type=int, default=5,
24 |                     help='number of folds to use')
25 | args = parser.parse_args()
26 | 
27 | # Create dummy labels to ensure each fold has a similar number of
28 | # manually verified examples.
29 | df = pd.read_csv(args.metadata_path, index_col=0)
30 | labels = df.label + df.manually_verified.astype(str)
31 | 
32 | # Assign a fold number to each example
33 | df['fold'] = -1
34 | skf = StratifiedKFold(args.n_folds)
35 | for i, (_, te) in enumerate(skf.split(df.index, labels)):
36 |     df.iloc[te, 2] = i
37 | 
38 | print('Number of verified examples per fold:')
39 | print([sum((df.fold == i) & (df.manually_verified == 1))
40 |        for i in range(args.n_folds)])
41 | 
42 | # Save new metadata file to disk
43 | df.to_csv(args.output_path)
44 | 


--------------------------------------------------------------------------------
/task2/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .logmel import *
2 | from .paths import *
3 | from .prediction import *
4 | from .silence import *
5 | from .training import *
6 | from .dcase2018_task2 import *
7 | 


--------------------------------------------------------------------------------
/task2/config/dcase2018_task2.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import os.path
 3 | 
 4 | import config.paths as paths
 5 | 
 6 | 
 7 | Dataset = collections.namedtuple('Dataset',
 8 |                                  ['name',
 9 |                                   'path',
10 |                                   'metadata_path',
11 |                                   ])
12 | """Data structure encapsulating information about a dataset."""
13 | 
14 | 
15 | _root_dataset_path = ('/vol/vssp/datasets/audio/dcase2018/task2')
16 | """str: Path to root directory containing input audio clips."""
17 | 
18 | training_set = Dataset(
19 |     name='training',
20 |     path=os.path.join(_root_dataset_path, 'audio_train'),
21 |     metadata_path='metadata/training.csv',
22 | )
23 | """Dataset instance for the training dataset."""
24 | 
25 | test_set = Dataset(
26 |     name='test',
27 |     path=os.path.join(_root_dataset_path, 'audio_test'),
28 |     metadata_path='metadata/test.csv',
29 | )
30 | """Dataset instance for the test dataset."""
31 | 
32 | preprocessed_training_set = Dataset(
33 |     name='training',
34 |     path=os.path.join(paths.preprocessing_path, 'training'),
35 |     metadata_path=os.path.join(paths.preprocessing_path, 'training.csv'),
36 | )
37 | """Dataset instance for the preprocessed training dataset."""
38 | 
39 | preprocessed_test_set = Dataset(
40 |     name='test',
41 |     path=os.path.join(paths.preprocessing_path, 'test'),
42 |     metadata_path=os.path.join(paths.preprocessing_path, 'test.csv'),
43 | )
44 | """Dataset instance for the preprocessed test dataset."""
45 | 
46 | 
47 | def to_dataset(name, preprocessed=True):
48 |     """Return the Dataset instance corresponding to the given name.
49 | 
50 |     Args:
51 |         name (str): Name of dataset.
52 |         preprocessed (bool): Whether to return the preprocessed instance.
53 | 
54 |     Returns:
55 |         The Dataset instance corresponding to the given name.
56 |     """
57 |     if name == 'training':
58 |         return preprocessed_training_set if preprocessed else training_set
59 |     elif name == 'test':
60 |         return preprocessed_test_set if preprocessed else test_set
61 |     return None
62 | 


--------------------------------------------------------------------------------
/task2/config/logmel.py:
--------------------------------------------------------------------------------
 1 | sample_rate = 32000
 2 | """number: Target sample rate during feature extraction."""
 3 | 
 4 | n_window = 1024
 5 | """int: Size of STFT window."""
 6 | 
 7 | hop_length = 512
 8 | """int: Number of samples between frames."""
 9 | 
10 | n_mels = 64
11 | """int: Number of Mel bins."""
12 | 


--------------------------------------------------------------------------------
/task2/config/paths.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | from . import training
 4 | 
 5 | 
 6 | work_path = '/vol/vssp/msos/ti/dcase2018/task2'
 7 | """str: Path to parent directory containing program output."""
 8 | 
 9 | preprocessing_path = os.path.join(work_path, 'split')
10 | """str: Path to the directory containing preprocessed audio files."""
11 | 
12 | extraction_path = os.path.join(work_path, 'features/logmel64')
13 | """str: Path to the directory containing extracted feature vectors."""
14 | 
15 | model_path = os.path.join(work_path, 'models', training.training_id)
16 | """str: Path to the output directory of saved models."""
17 | 
18 | log_path = os.path.join(work_path, 'logs', training.training_id, '{}')
19 | """str: Path to the directory of TensorBoard logs."""
20 | 
21 | history_path = os.path.join(log_path, 'history.csv')
22 | """str: Path to log file for training history."""
23 | 
24 | predictions_path = os.path.join(
25 |     work_path, 'predictions', training.training_id, '{}_{}.csv')
26 | """str: Path to a model predictions file."""
27 | 
28 | results_path = os.path.join(
29 |     work_path, 'results', training.training_id, '{}_results.csv')
30 | """str: Path to the file containing results."""
31 | 


--------------------------------------------------------------------------------
/task2/config/prediction.py:
--------------------------------------------------------------------------------
 1 | prediction_epochs = 'val_map'
 2 | """Specification for which models (epochs) to select for prediction.
 3 | 
 4 | Either a list of epoch numbers or a string specifying the metric to be
 5 | used to select the top epochs.
 6 | """
 7 | 
 8 | threshold = -1
 9 | """number: Number for thresholding audio tagging predictions.
10 | 
11 | A value of -1 indicates that the most probable label should be selected
12 | instead of selecting labels that surpass a certain threshold.
13 | """
14 | 


--------------------------------------------------------------------------------
/task2/config/silence.py:
--------------------------------------------------------------------------------
 1 | n_window = 1024
 2 | """int: Length of a frame used for silence detection."""
 3 | 
 4 | default_threshold = -48
 5 | """int: Default threshold for silence."""
 6 | 
 7 | transients_threshold = -56
 8 | """int: Threshold for transient audio signals."""
 9 | 
10 | min_silence = 500
11 | """int: Minimum length of silence between two non-silent segments."""
12 | 
13 | keep_silence = 400
14 | """int: Amount of start/end silence to keep for each audio segment."""
15 | 


--------------------------------------------------------------------------------
/task2/config/training.py:
--------------------------------------------------------------------------------
 1 | training_id = 'jul31_vgg13'
 2 | """str: A string identifying this particular training instance."""
 3 | 
 4 | initial_seed = 1000
 5 | """int: Fixed seed used prior to training."""
 6 | 
 7 | batch_size = 128
 8 | """int: The number of samples in a mini batch."""
 9 | 
10 | n_epochs = 40
11 | """int: The number of epochs to train the network for.
12 | 
13 | A value of -1 indicates an early stopping condition should be used.
14 | """
15 | 
16 | learning_rate = {'initial': 0.0005,
17 |                  'decay': 0.90,
18 |                  'decay_rate': 2.,
19 |                  }
20 | """dict: Learning rate hyperparameters for SGD.
21 | 
22 | Keyword Args:
23 |     initial (float): Initial learning rate.
24 |     decay (float): Multiplicative factor for learning rate decay. A
25 |         value of 1 indicates the learning rate should not be decayed.
26 |     decay_rate (float): Number of epochs until learning rate is decayed.
27 | """
28 | 


--------------------------------------------------------------------------------
/task2/convnet.py:
--------------------------------------------------------------------------------
  1 | import keras.backend as K
  2 | from keras.layers import BatchNormalization
  3 | from keras.layers import Bidirectional
  4 | from keras.layers import Conv2D
  5 | from keras.layers import Dense
  6 | from keras.layers import GRU
  7 | from keras.layers import Input
  8 | from keras.layers import Lambda
  9 | from keras.layers import MaxPooling2D
 10 | from keras.layers import GlobalAveragePooling1D
 11 | from keras.layers import GlobalAveragePooling2D
 12 | from keras.models import Model
 13 | 
 14 | import gated_conv
 15 | 
 16 | 
 17 | def vgg13(input_shape, n_classes):
 18 |     """Create a VGG13-style model.
 19 | 
 20 |     Args:
 21 |         input_shape (tuple): Shape of the input tensor.
 22 |         n_classes (int): Number of classes for classification.
 23 | 
 24 |     Returns:
 25 |         A Keras model of the VGG13 architecture.
 26 |     """
 27 |     input_tensor = Input(shape=input_shape, name='input_tensor')
 28 | 
 29 |     x = _conv_block(input_tensor, n_filters=64)
 30 |     x = _conv_block(x, n_filters=128)
 31 |     x = _conv_block(x, n_filters=256)
 32 |     x = _conv_block(x, n_filters=512)
 33 |     x = _conv_block(x, n_filters=512)
 34 | 
 35 |     x = GlobalAveragePooling2D()(x)
 36 | 
 37 |     x = Dense(n_classes, activation='softmax')(x)
 38 |     return Model(input_tensor, x, name='vgg13')
 39 | 
 40 | 
 41 | def gcnn(input_shape, n_classes):
 42 |     """Create a VGG13 model based on gated convolutions.
 43 | 
 44 |     Args:
 45 |         input_shape (tuple): Shape of the input tensor.
 46 |         n_classes (int): Number of classes for classification.
 47 | 
 48 |     Returns:
 49 |         A Keras model of the GCNN architecture.
 50 |     """
 51 |     input_tensor = Input(shape=input_shape, name='input_tensor')
 52 | 
 53 |     x = gated_conv.block(input_tensor, n_filters=64)
 54 |     x = gated_conv.block(x, n_filters=128)
 55 |     x = gated_conv.block(x, n_filters=256)
 56 |     x = gated_conv.block(x, n_filters=512)
 57 |     x = gated_conv.block(x, n_filters=512)
 58 | 
 59 |     x = GlobalAveragePooling2D()(x)
 60 | 
 61 |     x = Dense(n_classes, activation='softmax')(x)
 62 |     return Model(input_tensor, x, name='gcnn')
 63 | 
 64 | 
 65 | def crnn(input_shape, n_classes):
 66 |     """Create a convolutional recurrent neural network (CRNN) model.
 67 | 
 68 |     Args:
 69 |         input_shape (tuple): Shape of the input tensor.
 70 |         n_classes (int): Number of classes for classification.
 71 | 
 72 |     Returns:
 73 |         A Keras model of the CRNN architecture.
 74 |     """
 75 |     input_tensor = Input(shape=input_shape, name='input_tensor')
 76 | 
 77 |     x = _conv_block(input_tensor, n_filters=64)
 78 |     x = _conv_block(x, n_filters=128)
 79 |     x = _conv_block(x, n_filters=256)
 80 |     x = _conv_block(x, n_filters=512)
 81 |     x = _conv_block(x, n_filters=512)
 82 | 
 83 |     x = Lambda(lambda x: K.mean(x, axis=2))(x)
 84 |     x = Bidirectional(GRU(512, activation='relu',
 85 |                       return_sequences=True))(x)
 86 |     x = GlobalAveragePooling1D()(x)
 87 | 
 88 |     x = Dense(n_classes, activation='softmax')(x)
 89 |     return Model(input_tensor, x, name='crnn')
 90 | 
 91 | 
 92 | def gcrnn(input_shape, n_classes):
 93 |     """Create a CRNN model based on gated convolutions.
 94 | 
 95 |     Args:
 96 |         input_shape (tuple): Shape of the input tensor.
 97 |         n_classes (int): Number of classes for classification.
 98 | 
 99 |     Returns:
100 |         A Keras model of the GCRNN architecture.
101 |     """
102 |     input_tensor = Input(shape=input_shape, name='input_tensor')
103 | 
104 |     x = gated_conv.block(input_tensor, n_filters=64)
105 |     x = gated_conv.block(x, n_filters=128)
106 |     x = gated_conv.block(x, n_filters=256)
107 |     x = gated_conv.block(x, n_filters=512)
108 |     x = gated_conv.block(x, n_filters=512)
109 | 
110 |     x = Lambda(lambda x: K.mean(x, axis=2))(x)
111 |     x = Bidirectional(GRU(512, activation='relu',
112 |                       return_sequences=True))(x)
113 |     x = GlobalAveragePooling1D()(x)
114 | 
115 |     x = Dense(n_classes, activation='softmax')(x)
116 |     return Model(input_tensor, x, name='crnn')
117 | 
118 | 
119 | def _conv_block(x, n_filters, kernel_size=(3, 3), pool_size=(2, 2), **kwargs):
120 |     """Apply two batch-normalized convolutions followed by max pooling.
121 | 
122 |     Args:
123 |         x (tensor): Input tensor.
124 |         n_filters (int): Number of convolution filters.
125 |         kernel_size (int or tuple): Convolution kernel size.
126 |         pool_size (int or tuple): Max pooling parameter.
127 |         kwargs: Other keyword arguments.
128 | 
129 |     Returns:
130 |         tensor: The output tensor.
131 |     """
132 |     x = _conv_bn(x, n_filters, kernel_size, **kwargs)
133 |     x = _conv_bn(x, n_filters, kernel_size, **kwargs)
134 |     return MaxPooling2D(pool_size=pool_size)(x)
135 | 
136 | 
137 | def _conv_bn(x, n_filters, kernel_size=(3, 3), **kwargs):
138 |     """Apply a convolution operation followed by batch normalization.
139 | 
140 |     Args:
141 |         x (tensor): Input tensor.
142 |         n_filters (int): Number of convolution filters.
143 |         kernel_size (int or tuple): Convolution kernel size.
144 |         kwargs: Other keyword arguments.
145 | 
146 |     Returns:
147 |         tensor: The output tensor.
148 |     """
149 |     x = Conv2D(n_filters,
150 |                kernel_size=kernel_size,
151 |                padding='same',
152 |                activation='relu',
153 |                **kwargs)(x)
154 |     return BatchNormalization(axis=-1)(x)
155 | 


--------------------------------------------------------------------------------
/task2/evaluation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import sklearn.metrics as metrics
  4 | 
  5 | import inference
  6 | import utils
  7 | 
  8 | 
  9 | def evaluate_audio_tagging(y_true, y_pred, threshold=-1):
 10 |     """Evaluate audio tagging performance.
 11 | 
 12 |     Three types of scores are returned:
 13 | 
 14 |       * Class-wise
 15 |       * Macro-averaged
 16 |       * Micro-averaged
 17 | 
 18 |     The ground truth values and predictions should both be passed in a
 19 |     2D array in which the first dimension is the sample axis and the
 20 |     second is the class axis.
 21 | 
 22 |     Args:
 23 |         y_true (np.ndarray): 2D array of ground truth values.
 24 |         y_pred (np.ndarray): 2D array of predictions.
 25 |         threshold (number): Threshold used to binarize predictions.
 26 | 
 27 |     Returns:
 28 |         pd.DataFrame: Table of evaluation results.
 29 |     """
 30 |     y_pred_b = inference.binarize_predictions(y_pred, threshold)
 31 | 
 32 |     class_scores = compute_audio_tagging_scores(y_true, y_pred, y_pred_b).T
 33 |     macro_scores = np.mean(class_scores, axis=0, keepdims=True)
 34 |     micro_scores = compute_audio_tagging_scores(
 35 |         y_true, y_pred, y_pred_b, average='micro')
 36 | 
 37 |     # Create DataFrame of evaluation results
 38 |     data = np.concatenate((class_scores, macro_scores, micro_scores[None, :]))
 39 |     index = utils.LABELS + ['Macro Average', 'Micro Average']
 40 |     columns = ['MAP@3', 'F-score', 'Precision', 'Recall']
 41 |     return pd.DataFrame(data, pd.Index(index, name='Class'), columns)
 42 | 
 43 | 
 44 | def compute_audio_tagging_scores(y_true, y_pred, y_pred_b, average=None):
 45 |     """Compute prediction scores using several performance metrics.
 46 | 
 47 |     The following metrics are used:
 48 | 
 49 |       * MAP@3
 50 |       * F1 Score
 51 |       * Precision
 52 |       * Recall
 53 | 
 54 |     Args:
 55 |         y_true (np.ndarray): 2D array of ground truth values.
 56 |         y_pred (np.ndarray): 2D array of prediction probabilities.
 57 |         y_pred_b (np.ndarray): 2D array of binary predictions.
 58 |         average (str): The averaging method. Either ``'macro'``,
 59 |             ``'micro'``, or ``None``, where the latter is used to
 60 |             disable averaging.
 61 | 
 62 |     Returns:
 63 |         np.ndarray: Scores corresponding to the metrics used.
 64 |     """
 65 |     # Compute MAP@3
 66 |     map_3 = compute_map(y_true, y_pred, k=3, class_wise=average is None)
 67 | 
 68 |     # Compute precision and recall scores
 69 |     precision, recall, f1_score, _ = metrics.precision_recall_fscore_support(
 70 |         y_true, y_pred_b, average=average)
 71 | 
 72 |     return np.array([map_3, f1_score, precision, recall])
 73 | 
 74 | 
 75 | def compute_map(y_true, y_pred, k=3, class_wise=False):
 76 |     """Compute the mean average precision at k (MAP@k).
 77 | 
 78 |     Args:
 79 |         y_true (np.ndarray): 2D array of ground truth values.
 80 |         y_pred (np.ndarray): 2D array of predictions.
 81 |         k (int): The maximum number of predicted elements.
 82 |         class_wise (bool): Whether to compute a score for each class.
 83 | 
 84 |     Returns:
 85 |         float or np.ndarray: The mean average precision score(s) at k.
 86 | 
 87 |     Note:
 88 |         This function assumes the grounds truths are single-label.
 89 |     """
 90 |     if class_wise:
 91 |         nonzero = np.nonzero(y_true)[1]
 92 |         return np.array([compute_map(y_true[nonzero == i],
 93 |                                      y_pred[nonzero == i], k)
 94 |                          for i in range(y_true.shape[1])])
 95 | 
 96 |     # Compute how the true label ranks in terms of probability
 97 |     idx = y_pred.argsort()[:, ::-1].argsort()
 98 |     rank = idx[y_true.astype(bool)] + 1
 99 | 
100 |     if len(rank) > len(y_true):
101 |         raise Exception('Multi-label classification not supported')
102 | 
103 |     return np.sum(1 / rank[rank <= k]) / len(y_true)
104 | 


--------------------------------------------------------------------------------
/task2/features.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | import datetime as dt
  3 | 
  4 | import h5py
  5 | import librosa
  6 | import numpy as np
  7 | from tqdm import tqdm
  8 | 
  9 | import utils
 10 | 
 11 | 
 12 | def extract_dataset(dataset_path,
 13 |                     file_names,
 14 |                     extractor,
 15 |                     output_path,
 16 |                     recompute=False,
 17 |                     ):
 18 |     """Extract features from the audio clips in a dataset.
 19 | 
 20 |     Args:
 21 |         dataset_path (str): Path of directory containing dataset.
 22 |         file_names (array_like): List of file names for the audio clips.
 23 |         extractor: Class instance for feature extraction.
 24 |         output_path (str): File path of output HDF5 file.
 25 |         recompute (bool): Whether to extract features that already exist
 26 |             in the HDF5 file.
 27 |     """
 28 |     # Create/load the HDF5 file to store the feature vectors
 29 |     with h5py.File(output_path, 'a') as f:
 30 |         size = len(file_names)  # Size of dataset
 31 | 
 32 |         # Create/load feature vector dataset and timestamp dataset
 33 |         feats = f.require_dataset('F', (size,),
 34 |                                   dtype=h5py.special_dtype(vlen=float))
 35 |         timestamps = f.require_dataset('timestamps', (size,),
 36 |                                        dtype=h5py.special_dtype(vlen=bytes))
 37 | 
 38 |         # Record shape of reference feature vector. Used to infer the
 39 |         # original shape of a vector prior to flattening.
 40 |         feats.attrs['shape'] = extractor.output_shape(1)[1:]
 41 | 
 42 |         for i, name in enumerate(tqdm(file_names)):
 43 |             # Skip if existing feature vector should not be recomputed
 44 |             if timestamps[i] and not recompute:
 45 |                 continue
 46 | 
 47 |             path = os.path.join(dataset_path, name)
 48 |             x, sample_rate = librosa.load(path, sr=None)
 49 |             if sample_rate is None:
 50 |                 print('Warning: Skipping {}'.format(name))
 51 |                 continue
 52 | 
 53 |             # Extract and save to dataset as flattened array
 54 |             feats[i] = extractor.extract(x, sample_rate).flatten()
 55 |             # Record timestamp in ISO format
 56 |             timestamps[i] = dt.datetime.now().isoformat()
 57 | 
 58 | 
 59 | def load_features(path, chunk_size=128, r_threshold=32):
 60 |     """Load feature vectors from the specified HDF5 file.
 61 | 
 62 |     Since the original feature vectors are of variable length, this
 63 |     function partitions them into chunks of length `chunk_size`. When
 64 |     they cannot be partitioned exactly, one of three things can happen:
 65 | 
 66 |       * If the length of the vector is less than the chunk size, the
 67 |         vector is simply padded with a fill value.
 68 |       * If the remainder, ``r``, is less than ``r_threshold``, the edges
 69 |         of the vector are truncated so that it can be partitioned.
 70 |       * If the remainder, ``r``, is greater than ``r_threshold``, the
 71 |         last chunk is the last `chunk_size` frames of the feature vector
 72 |         such that it overlaps with the penultimate chunk.
 73 | 
 74 |     Args:
 75 |         path (str): Path to the HDF5 file.
 76 |         chunk_size (int): Size of a chunk.
 77 |         r_threshold (int): Threshold for ``r`` (see above).
 78 | 
 79 |     Returns:
 80 |         np.ndarray: Array of feature vectors.
 81 |         list: Number of chunks for each audio clip.
 82 |     """
 83 |     chunks = []
 84 |     n_chunks = []
 85 |     with h5py.File(path, 'r') as f:
 86 |         feats = f['F']
 87 |         shape = feats.attrs['shape']
 88 |         for i, feat in enumerate(tqdm(feats)):
 89 |             # Reshape flat array to original shape
 90 |             feat = np.reshape(feat, (-1, *shape))
 91 | 
 92 |             if len(feat) == 0:
 93 |                 n_chunks.append(0)
 94 |                 continue
 95 | 
 96 |             # Split feature vector into chunks along time axis
 97 |             q = len(feat) // chunk_size
 98 |             r = len(feat) % chunk_size
 99 |             if not q and r:
100 |                 split = [utils.pad_truncate(feat, chunk_size,
101 |                                             pad_value=np.min(feat))]
102 |             elif r:
103 |                 r = len(feat) % chunk_size
104 |                 off = r // 2 if r < r_threshold else 0
105 |                 split = np.split(feat[off:q * chunk_size + off], q)
106 |                 if r >= r_threshold:
107 |                     split.append(feat[-chunk_size:])
108 |             else:
109 |                 split = np.split(feat, q)
110 | 
111 |             n_chunks.append(len(split))
112 |             chunks += split
113 | 
114 |     return np.array(chunks), n_chunks
115 | 
116 | 
117 | class LogmelExtractor(object):
118 |     """Feature extractor for logmel representations.
119 | 
120 |     A logmel feature vector is a spectrogram representation that has
121 |     been scaled using a Mel filterbank and a log nonlinearity.
122 | 
123 |     Args:
124 |         sample_rate (number): Target resampling rate.
125 |         n_window (int): Number of bins in each spectrogram frame.
126 |         hop_length (int): Number of samples between frames.
127 |         n_mels (int): Number of Mel bands.
128 | 
129 |     Attributes:
130 |         sample_rate (number): Target resampling rate.
131 |         n_window (int): Number of bins in each spectrogram frame.
132 |         hop_length (int): Number of samples between frames.
133 |         mel_fb (np.ndarray): Mel fitlerbank matrix.
134 |     """
135 | 
136 |     def __init__(self,
137 |                  sample_rate=16000,
138 |                  n_window=1024,
139 |                  hop_length=512,
140 |                  n_mels=64,
141 |                  ):
142 |         self.sample_rate = sample_rate
143 |         self.n_window = n_window
144 |         self.hop_length = hop_length
145 | 
146 |         # Create Mel filterbank matrix
147 |         self.mel_fb = librosa.filters.mel(sr=sample_rate,
148 |                                           n_fft=n_window,
149 |                                           n_mels=n_mels,
150 |                                           )
151 | 
152 |     def output_shape(self, clip_duration):
153 |         """Determine the shape of a logmel feature vector.
154 | 
155 |         Args:
156 |             clip_duration (number): Duration of the input time-series
157 |                 signal given in seconds.
158 | 
159 |         Returns:
160 |             tuple: The shape of a logmel feature vector.
161 |         """
162 |         n_samples = clip_duration * self.sample_rate
163 |         n_frames = n_samples // self.hop_length + 1
164 |         return (n_frames, self.mel_fb.shape[0])
165 | 
166 |     def extract(self, x, sample_rate):
167 |         """Transform the given signal into a logmel feature vector.
168 | 
169 |         Args:
170 |             x (np.ndarray): Input time-series signal.
171 |             sample_rate (number): Sampling rate of signal.
172 | 
173 |         Returns:
174 |             np.ndarray: The logmel feature vector.
175 |         """
176 |         # Resample to target sampling rate
177 |         x = librosa.resample(x, sample_rate, self.sample_rate)
178 | 
179 |         # Compute short-time Fourier transform
180 |         D = librosa.stft(x, n_fft=self.n_window, hop_length=self.hop_length)
181 |         # Transform to Mel frequency scale
182 |         S = np.dot(self.mel_fb, np.abs(D) ** 2).T
183 |         # Apply log nonlinearity and return as float32
184 |         return librosa.power_to_db(S, ref=np.max, top_db=None)
185 | 


--------------------------------------------------------------------------------
/task2/file_io.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def read_metadata(path):
 5 |     """Read from the specified metadata file.
 6 | 
 7 |     Args:
 8 |         path (str): Path of metadata file.
 9 | 
10 |     Returns:
11 |         pd.DataFrame: The parsed metadata.
12 |     """
13 |     return pd.read_csv(path, index_col=0)
14 | 
15 | 
16 | def read_training_history(path, ordering=None):
17 |     """Read training history from the specified CSV file.
18 | 
19 |     Args:
20 |         path (str): Path of CSV file.
21 |         ordering (str): Column name to order the entries with respect to
22 |             or ``None`` if the entries should remain unordered.
23 | 
24 |     Returns:
25 |         pd.DataFrame: The training history.
26 |     """
27 |     df = pd.read_csv(path, index_col=0)
28 |     ascending = ordering not in ['val_acc', 'val_map']
29 |     if ordering:
30 |         df.sort_values(by=ordering, ascending=ascending, inplace=True)
31 |     return df
32 | 
33 | 
34 | def write_predictions(y_pred, output_path):
35 |     """Write classification predictions to a CSV file.
36 | 
37 |     Args:
38 |         y_pred (pd.DataFrame): Table of predictions.
39 |         output_path (str): Output file path.
40 |     """
41 |     top_3 = y_pred.apply(lambda x: ' '.join(x.nlargest(3).index), axis=1)
42 |     pd.Series(top_3, name='label').to_csv(output_path, header=True)
43 | 


--------------------------------------------------------------------------------
/task2/gated_conv.py:
--------------------------------------------------------------------------------
 1 | from keras.layers import Activation
 2 | from keras.layers import BatchNormalization
 3 | from keras.layers import Conv2D
 4 | from keras.layers import MaxPooling2D
 5 | from keras.layers import Multiply
 6 | 
 7 | 
 8 | def block(x, n_filters=64, pool_size=(2, 2)):
 9 |     """Apply two gated convolutions followed by a max-pooling operation.
10 | 
11 |     Batch normalization is applied for regularization.
12 | 
13 |     Args:
14 |         x (tensor): Input tensor to transform.
15 |         n_filters (int): Number of filters for each gated convolution.
16 |         pool_size (int or tuple): Pool size of max-pooling operation.
17 | 
18 |     Returns:
19 |         A Keras tensor of the resulting output.
20 |     """
21 |     x = GatedConv(n_filters, padding='same')(x)
22 |     x = BatchNormalization(axis=-1)(x)
23 | 
24 |     x = GatedConv(n_filters, padding='same')(x)
25 |     x = BatchNormalization(axis=-1)(x)
26 | 
27 |     return MaxPooling2D(pool_size=pool_size)(x)
28 | 
29 | 
30 | class GatedConv(Conv2D):
31 |     """A Keras layer implementing gated convolutions [1]_.
32 | 
33 |     Args:
34 |         n_filters (int): Number of output filters.
35 |         kernel_size (int or tuple): Size of convolution kernel.
36 |         strides (int or tuple): Strides of the convolution.
37 |         padding (str): One of ``'valid'`` or ``'same'``.
38 |         kwargs: Other layer keyword arguments.
39 | 
40 |     References:
41 |         .. [1] Y. N. Dauphin, A. Fan, M. Auli, and D. Grangier,
42 |                “Language modeling with gated convolutional networks,” in
43 |                Proc. 34th Int. Conf. Mach. Learn. (ICML), vol. 70,
44 |                Sydney, Australia, 2017, pp. 933–941.
45 |     """
46 | 
47 |     def __init__(self, n_filters=64, kernel_size=(3, 3), **kwargs):
48 |         super(GatedConv, self).__init__(filters=n_filters * 2,
49 |                                         kernel_size=kernel_size,
50 |                                         **kwargs)
51 | 
52 |         self.n_filters = n_filters
53 | 
54 |     def call(self, inputs):
55 |         """Apply gated convolution."""
56 |         output = super(GatedConv, self).call(inputs)
57 | 
58 |         n_filters = self.n_filters
59 |         linear = Activation('linear')(output[:, :, :, :n_filters])
60 |         sigmoid = Activation('sigmoid')(output[:, :, :, n_filters:])
61 | 
62 |         return Multiply()([linear, sigmoid])
63 | 
64 |     def compute_output_shape(self, input_shape):
65 |         """Compute shape of layer output."""
66 |         output_shape = super(GatedConv, self).compute_output_shape(input_shape)
67 |         return tuple(output_shape[:3]) + (self.n_filters,)
68 | 
69 |     def get_config(self):
70 |         """Return the config of the layer."""
71 |         config = super(GatedConv, self).get_config()
72 |         config['n_filters'] = self.n_filters
73 |         del config['filters']
74 |         return config
75 | 


--------------------------------------------------------------------------------
/task2/inference.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import scipy.stats as stats
 4 | 
 5 | import utils
 6 | 
 7 | 
 8 | def binarize_predictions(y_pred, threshold=-1):
 9 |     """Convert prediction probabilities to binary values.
10 | 
11 |     This function is intended for audio tagging predictions. The
12 |     predictions should be passed in a 2D array in which the first
13 |     dimension is the sample axis and the second is the class axis.
14 | 
15 |     Args:
16 |         y_pred (np.ndarray): 2D array of predictions.
17 |         threshold (float or list): Threshold used to determine the
18 |             binary values. If a list is given, it must specify a
19 |             threshold for each class. If the value is -1, the label
20 |             with the highest probability is selected.
21 | 
22 |     Returns:
23 |         np.ndarray: Binarized prediction values.
24 |     """
25 |     if threshold > 0:
26 |         return (y_pred > threshold).astype(int)
27 | 
28 |     dtype = pd.api.types.CategoricalDtype(categories=range(y_pred.shape[1]))
29 |     return utils.to_categorical(pd.Series(
30 |         np.argmax(y_pred, axis=1), dtype=dtype))
31 | 
32 | 
33 | def merge_predictions(y_pred, index, op='gmean'):
34 |     """Merge predictions of chunks belonging to the same audio clip.
35 | 
36 |     Args:
37 |         y_pred (np.ndarray): 2D array of chunk-level predictions.
38 |         index (pd.Index): Files names indicating how to group chunks.
39 |         op (str): The operation to perform on grouped predictions.
40 |             Either ``'first'``, ``'mean'``, or ``'gmean'``.
41 | 
42 |     Returns:
43 |         pd.DataFrame: The merged predictions.
44 |     """
45 |     pred = pd.DataFrame(y_pred, index=index, columns=utils.LABELS)
46 |     group = utils.group_by_name(pred)
47 |     if op == 'first':
48 |         pred = group.first()
49 |     elif op == 'mean':
50 |         pred = group.mean()
51 |     elif op == 'gmean':
52 |         # TODO: Improve performance as this operation is slow
53 |         pred = group.agg(lambda x: stats.gmean(x + 1e-8))
54 | 
55 |     pred.index.name = index.name
56 | 
57 |     return pred
58 | 


--------------------------------------------------------------------------------
/task2/main.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import glob
  3 | import os
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | import pandas as pd
  8 | from tqdm import tqdm
  9 | 
 10 | import config as cfg
 11 | import file_io as io
 12 | import utils
 13 | 
 14 | 
 15 | def main():
 16 |     """Execute a task based on the given command-line arguments.
 17 | 
 18 |     This function is the main entry-point of the program. It allows the
 19 |     user to extract features, train a model, generate predictions, or
 20 |     evaluate predictions using the command-line interface.
 21 |     """
 22 |     parser = argparse.ArgumentParser()
 23 |     subparsers = parser.add_subparsers(dest='mode')
 24 | 
 25 |     parser_preprocess = subparsers.add_parser('preprocess')
 26 |     parser_preprocess.add_argument('dataset', choices=['training', 'test'])
 27 | 
 28 |     # Add sub-parser for feature extraction
 29 |     parser_extract = subparsers.add_parser('extract')
 30 |     parser_extract.add_argument('dataset', choices=['training', 'test'])
 31 |     parser_extract.add_argument('--recompute', action='store_true')
 32 | 
 33 |     # Add sub-parser for training
 34 |     parser_train = subparsers.add_parser('train')
 35 |     parser_train.add_argument('--model',
 36 |                               choices=['vgg13',
 37 |                                        'gcnn',
 38 |                                        'crnn',
 39 |                                        'gcrnn',
 40 |                                        ],
 41 |                               default='gcnn',
 42 |                               )
 43 |     parser_train.add_argument('--fold', type=int, default=-1)
 44 |     parser_train.add_argument('--class_weight', action='store_true')
 45 |     parser_train.add_argument('--sample_weight', type=float)
 46 | 
 47 |     # Add sub-parser for inference
 48 |     parser_predict = subparsers.add_parser('predict')
 49 |     parser_predict.add_argument('dataset', choices=['training', 'test'])
 50 |     parser_predict.add_argument('--fold', type=int, default=-1)
 51 | 
 52 |     # Add sub-parser for evaluation
 53 |     parser_evaluate = subparsers.add_parser('evaluate')
 54 |     parser_evaluate.add_argument('dataset', choices=['training', 'test'])
 55 |     parser_evaluate.add_argument('--fold', type=int, default=-1)
 56 | 
 57 |     args = parser.parse_args()
 58 |     if args.mode == 'preprocess':
 59 |         preprocess(cfg.to_dataset(args.dataset, preprocessed=False))
 60 |     elif args.mode == 'extract':
 61 |         extract(cfg.to_dataset(args.dataset), args.recompute)
 62 |     elif args.mode == 'train':
 63 |         train(args.model, args.fold, args.class_weight, args.sample_weight)
 64 |     elif args.mode == 'predict':
 65 |         predict(cfg.to_dataset(args.dataset), args.fold)
 66 |     elif args.mode == 'evaluate':
 67 |         dataset = cfg.to_dataset(args.dataset, preprocessed=False)
 68 |         evaluate_audio_tagging(dataset, args.fold)
 69 | 
 70 | 
 71 | def preprocess(dataset):
 72 |     """Apply preprocessing to the audio clips.
 73 | 
 74 |     Args:
 75 |         dataset: Dataset to apply preprocessing to.
 76 |     """
 77 |     import silence
 78 | 
 79 |     # Ensure output directory exists
 80 |     output_path = os.path.join(cfg.preprocessing_path, dataset.name)
 81 |     os.makedirs(output_path, exist_ok=True)
 82 | 
 83 |     # Split each audio clip based on silence
 84 |     file_names = []
 85 |     df = io.read_metadata(dataset.metadata_path)
 86 |     for name in tqdm(df.index):
 87 |         file_names += silence.split_audio(
 88 |             dataset_path=dataset.path,
 89 |             file_name=name,
 90 |             output_path=output_path,
 91 |             n_window=cfg.n_window,
 92 |             default_threshold=cfg.default_threshold,
 93 |             transients_threshold=cfg.transients_threshold,
 94 |             min_silence=cfg.min_silence,
 95 |             keep_silence=cfg.keep_silence,
 96 |         )
 97 | 
 98 |     # Create new metadata DataFrame
 99 |     df = df.loc[[s[:8] + '.wav' for s in file_names]]
100 |     df.index = pd.Index(file_names, name=df.index.name)
101 | 
102 |     # Save metadata to disk
103 |     df.to_csv(os.path.join(cfg.preprocessing_path, '%s.csv' % dataset.name))
104 | 
105 | 
106 | def extract(dataset, recompute=False):
107 |     """Extract feature vectors from the given dataset.
108 | 
109 |     Args:
110 |         dataset: Dataset to extract features from.
111 |         recompute (bool): Whether to recompute existing features.
112 |     """
113 |     import features
114 | 
115 |     # Use a logmel representation for feature extraction
116 |     extractor = features.LogmelExtractor(cfg.sample_rate,
117 |                                          cfg.n_window,
118 |                                          cfg.hop_length,
119 |                                          cfg.n_mels,
120 |                                          )
121 | 
122 |     # Ensure output directory exists and set file path
123 |     os.makedirs(cfg.extraction_path, exist_ok=True)
124 |     output_path = os.path.join(cfg.extraction_path, dataset.name + '.h5')
125 | 
126 |     # Save free parameters to disk
127 |     utils.log_parameters(cfg.logmel, os.path.join(cfg.extraction_path,
128 |                                                   'parameters.json'))
129 | 
130 |     # Extract features for each audio clip in the dataset
131 |     df = io.read_metadata(dataset.metadata_path)
132 |     features.extract_dataset(dataset_path=dataset.path,
133 |                              file_names=df.index.tolist(),
134 |                              extractor=extractor,
135 |                              output_path=output_path,
136 |                              recompute=recompute,
137 |                              )
138 | 
139 | 
140 | def train(model, fold, use_class_weight, noisy_sample_weight):
141 |     """Train the neural network model.
142 | 
143 |     Args:
144 |         model (str): The neural network architecture.
145 |         fold (int): The fold to use for validation.
146 |         use_class_weight (bool): Whether to use class-wise weights.
147 |         noisy_sample_weight (float): Examples that are not verified are
148 |             weighted according to this value.
149 | 
150 |     Note:
151 |         For reproducibility, the random seed is set to a fixed value.
152 |     """
153 |     import training
154 | 
155 |     # Try to create reproducible results
156 |     np.random.seed(cfg.initial_seed)
157 | 
158 |     # Load training data and associated metadata
159 |     x, df = _load_data(cfg.to_dataset('training'))
160 |     # Get one-hot representation of target values
161 |     y = utils.to_categorical(df.label)
162 | 
163 |     # Split training data into training and validation
164 |     if fold >= 0:
165 |         mask = df.fold == fold
166 |     else:
167 |         mask = np.zeros(len(df), dtype=bool)
168 |     val_mask = mask & (df.manually_verified == 1)
169 | 
170 |     tr_x = x[~mask]
171 |     tr_y = y[~mask]
172 |     val_x = x[val_mask]
173 |     val_y = y[val_mask]
174 |     val_index = df.index[val_mask]
175 | 
176 |     # Compute class weights based on number of class examples
177 |     if use_class_weight:
178 |         group = utils.group_by_name(df)
179 |         n_examples = group.first().groupby('label').size().values
180 |         class_weight = len(group) / (len(n_examples) * n_examples)
181 |     else:
182 |         class_weight = None
183 | 
184 |     # Assign a specific sample weight to unverified examples
185 |     if noisy_sample_weight:
186 |         sample_weight = df[~mask].manually_verified.values.astype(float)
187 |         sample_weight[sample_weight == 0] = noisy_sample_weight
188 |     else:
189 |         sample_weight = None
190 | 
191 |     # Ensure output directories exist
192 |     fold_dir = str(fold) if fold >= 0 else 'all'
193 |     os.makedirs(os.path.join(cfg.model_path, fold_dir), exist_ok=True)
194 |     os.makedirs(cfg.log_path.format(fold_dir), exist_ok=True)
195 | 
196 |     # Save free parameters to disk
197 |     utils.log_parameters(cfg.training, os.path.join(cfg.model_path,
198 |                                                     'parameters.json'))
199 | 
200 |     training.train(tr_x, tr_y, val_x, val_y, val_index, model, fold,
201 |                    class_weight=class_weight, sample_weight=sample_weight)
202 | 
203 | 
204 | def predict(dataset, fold):
205 |     """Generate predictions for audio tagging.
206 | 
207 |     This function uses an ensemble of trained models to generate the
208 |     predictions, with the averaging function being an arithmetic mean.
209 |     Computed predictions are then saved to disk.
210 | 
211 |     Args:
212 |         dataset: Dataset to generate predictions for.
213 |         fold (int): The specific fold to generate predictions for. Only
214 |             applicable for the training dataset.
215 |     """
216 |     import inference
217 | 
218 |     # Load input data and associated metadata
219 |     x, df = _load_data(dataset)
220 |     dataset_name = dataset.name
221 |     if dataset.name == 'training':
222 |         if fold == -1:
223 |             raise ValueError('Invalid fold: %d' % fold)
224 | 
225 |         dataset_name += str(fold)
226 |         mask = df.fold == fold
227 |         tr_x = x[~mask]
228 |         x = x[mask]
229 |         df = df[mask]
230 |     else:
231 |         tr_x, tr_df = _load_data(cfg.to_dataset('training'))
232 |         if fold >= 0:
233 |             dataset_name += str(fold)
234 |             tr_x = tr_x[tr_df.fold != fold]
235 | 
236 |     generator = utils.fit_scaler(tr_x)
237 |     x = generator.standardize(x)
238 | 
239 |     # Predict class probabilities for each model (epoch)
240 |     preds = []
241 |     for epoch in _determine_epochs(cfg.prediction_epochs, fold, n=4):
242 |         pred = utils.timeit(
243 |             lambda: _load_model(fold, epoch).predict(x),
244 |             '[Epoch %d] Predicted class probabilities' % epoch)
245 | 
246 |         preds.append(inference.merge_predictions(pred, df.index))
247 | 
248 |     pred_mean = pd.concat(preds).groupby(level=0).mean()
249 | 
250 |     # Ensure output directory exists and set file path format
251 |     os.makedirs(os.path.dirname(cfg.predictions_path), exist_ok=True)
252 |     predictions_path = cfg.predictions_path.format('%s', dataset_name)
253 | 
254 |     # Save free parameters to disk
255 |     utils.log_parameters({'prediction_epochs': cfg.prediction_epochs},
256 |                          os.path.join(os.path.dirname(cfg.predictions_path),
257 |                                       'parameters.json'))
258 | 
259 |     # Write predictions to disk
260 |     pred_mean.to_csv(predictions_path % 'predictions')
261 |     io.write_predictions(pred_mean, predictions_path % 'submission')
262 | 
263 | 
264 | def evaluate_audio_tagging(dataset, fold):
265 |     """Evaluate the audio tagging predictions and write results.
266 | 
267 |     Args:
268 |         fold (int): The fold (validation set) to evaluate.
269 |     """
270 |     import evaluation
271 | 
272 |     # Load grouth truth data
273 |     df = io.read_metadata(dataset.metadata_path)
274 |     if dataset.name == 'training':
275 |         df = df[(df.fold == fold) & (df.manually_verified == 1)]
276 |     elif dataset.name == 'test':
277 |         df = df[df.usage != 'Ignored']
278 |     y_true = pd.get_dummies(df.label)
279 | 
280 |     name = dataset.name
281 |     if fold >= 0:
282 |         name += str(fold)
283 | 
284 |     # Load predictions
285 |     y_pred = io.read_metadata(cfg.predictions_path.format('predictions', name))
286 | 
287 |     # Ensure only elements common to both y_true and y_pred are selected
288 |     index = y_true.index.intersection(y_pred.index)
289 |     y_true = y_true.loc[index].values
290 |     y_pred = y_pred.loc[index].values
291 | 
292 |     # Evaluate audio tagging performance
293 |     scores = evaluation.evaluate_audio_tagging(
294 |         y_true, y_pred, threshold=cfg.threshold)
295 | 
296 |     # Ensure output directory exist and write results
297 |     os.makedirs(os.path.dirname(cfg.results_path), exist_ok=True)
298 |     output_path = cfg.results_path.format(name)
299 |     scores.to_csv(output_path)
300 | 
301 |     # Print scores to 3 decimal places
302 |     pd.options.display.float_format = '{:,.3f}'.format
303 |     print('\n' + str(scores))
304 | 
305 | 
306 | def _load_data(dataset):
307 |     """Load input data and the associated metadata for a dataset.
308 | 
309 |     Args:
310 |         dataset: Structure encapsulating dataset information.
311 | 
312 |     Returns:
313 |         tuple: Tuple containing:
314 | 
315 |             x (np.ndarray): The input data of the dataset.
316 |             df (pd.DataFrame): The metadata of the dataset.
317 |     """
318 |     import features
319 | 
320 |     # Load feature vectors and reshape to 4D tensor
321 |     features_path = os.path.join(cfg.extraction_path, dataset.name + '.h5')
322 |     x, n_chunks = utils.timeit(lambda: features.load_features(features_path),
323 |                                'Loaded features of %s dataset' % dataset.name)
324 |     x = np.expand_dims(x, -1)
325 |     assert x.ndim == 4
326 | 
327 |     # Load metadata and duplicate entries based on number of chunks
328 |     df = io.read_metadata(dataset.metadata_path)
329 |     df = df.loc[np.repeat(df.index, n_chunks)]
330 | 
331 |     return x, df
332 | 
333 | 
334 | def _determine_epochs(spec, fold, n=5):
335 |     """Return a list of epoch numbers based on the given argument.
336 | 
337 |     If `spec` is a list, this function simply returns the list.
338 |     Otherwise, `spec` should be a string, in which case this function
339 |     returns the top `n` epochs based on the training history file
340 |     and the contents of `spec`. For example, if `spec` is ``'val_acc'``,
341 |     the epochs that achieved the highest accuracy are returned.
342 | 
343 |     Args:
344 |         spec (list or str): A list of epoch numbers or a string
345 |             specifying how to select the epoch numbers.
346 |         fold (int): Fold number, since determining the top epochs
347 |             depends on the fold in question.
348 |         n (int): Number of epochs to return (if applicable).
349 | 
350 |     Returns:
351 |         list: The relevant epoch numbers.
352 |     """
353 |     if type(spec) is list:
354 |         return spec
355 | 
356 |     fold_dir = str(fold) if fold >= 0 else 'all'
357 |     path = cfg.history_path.format(fold_dir)
358 |     history = io.read_training_history(path, ordering=spec)
359 |     return (history.index.values + 1)[:n]
360 | 
361 | 
362 | def _load_model(fold, epoch):
363 |     """Load model based on specified fold and epoch number.
364 | 
365 |     Args:
366 |         fold (int): Fold used to train the model.
367 |         epoch (int): Epoch number of the model to load.
368 | 
369 |     Returns:
370 |         An instance of a Keras model.
371 |     """
372 |     import keras.models
373 | 
374 |     from gated_conv import GatedConv
375 | 
376 |     fold_dir = str(fold) if fold >= 0 else 'all'
377 |     model_path = glob.glob(os.path.join(cfg.model_path, fold_dir,
378 |                                         '*.%.02d*.h5' % epoch))[0]
379 | 
380 |     custom_objects = {
381 |         'GatedConv': GatedConv,
382 |     }
383 | 
384 |     return keras.models.load_model(model_path, custom_objects)
385 | 
386 | 
387 | if __name__ == '__main__':
388 |     sys.exit(main())
389 | 


--------------------------------------------------------------------------------
/task2/mixup.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class MixupGenerator():
 5 |     """Implementation of mixup [1]_ data augmentation.
 6 | 
 7 |     Args:
 8 |         x_train (np.ndarray): Array of training data.
 9 |         y_train (np.ndarray): Target values of the training data.
10 |         sample_weight (np.ndarray): Weights for the training data.
11 |         batch_size (int): Number of examples in a mini-batch.
12 |         alpha (float): Parameter for sampling mixing weights.
13 |         generator (ImageDataGenerator): Generator for preprocessing.
14 | 
15 |     Attributes:
16 |         x_train (np.ndarray): Array of training data.
17 |         y_train (np.ndarray): Target values of the training data.
18 |         sample_weight (np.ndarray): Weights for the training data.
19 |         batch_size (int): Number of examples in a mini-batch.
20 |         alpha (float): Parameter for sampling mixing weights.
21 |         generator (ImageDataGenerator): Generator for preprocessing.
22 | 
23 |     References:
24 |         .. [1] Zhang, H. and Cisse, M. and Dauphin, Y.~N. and Lopez-Paz,
25 |                “mixup: Beyond Empirical Risk Minimization,”
26 |     """
27 | 
28 |     def __init__(self, x_train, y_train, sample_weight=None,
29 |                  batch_size=32, alpha=1.0, generator=None):
30 |         self.x_train = x_train
31 |         self.y_train = y_train
32 |         self.sample_weight = sample_weight
33 |         self.batch_size = batch_size
34 |         self.alpha = alpha
35 |         self.generator = generator
36 | 
37 |     def __call__(self):
38 |         batch_size = self.batch_size
39 |         n_classes = self.y_train.shape[1]
40 |         n_examples = np.sum(self.y_train, axis=0).astype(int)
41 |         indexes = [np.where(self.y_train[:, label] == 1)[0]
42 |                    for label in range(n_classes)]
43 |         offsets = [0] * n_classes
44 | 
45 |         while True:
46 |             # Choose which class each mini-batch example will belong to
47 |             labels = np.random.choice(n_classes, size=(batch_size * 2,))
48 |             batch_indexes = np.empty(batch_size * 2, dtype=int)
49 | 
50 |             for i, label in enumerate(labels):
51 |                 batch_indexes[i] = indexes[label][offsets[label]]
52 | 
53 |                 offsets[label] += 1
54 |                 if offsets[label] >= n_examples[label]:
55 |                     np.random.shuffle(indexes[label])
56 |                     offsets[label] = 0
57 | 
58 |             x, y, sample_weight = self._generate(batch_indexes)
59 | 
60 |             yield x, y, sample_weight
61 | 
62 |     def _generate(self, indexes):
63 |         # Generate mixing weights using beta distribution
64 |         mixup_weights = np.random.beta(a=self.alpha, b=self.alpha,
65 |                                        size=self.batch_size)
66 | 
67 |         # Mix training data and labels
68 |         x = self._mixup(self.x_train, indexes,
69 |                         mixup_weights[:, None, None, None])
70 |         y = self._mixup(self.y_train, indexes, mixup_weights[:, None])
71 | 
72 |         # Mix sample weights if applicable
73 |         sample_weight = self.sample_weight
74 |         if sample_weight is not None:
75 |             sample_weight = self._mixup(sample_weight, indexes, mixup_weights)
76 | 
77 |         # Apply preprocessing to training data
78 |         if self.generator:
79 |             for i in range(self.batch_size):
80 |                 x[i] = self.generator.random_transform(x[i])
81 |                 x[i] = self.generator.standardize(x[i])
82 | 
83 |         return x, y, sample_weight
84 | 
85 |     def _mixup(self, tensor, indexes, weights):
86 |         t1 = tensor[indexes[:self.batch_size]]
87 |         t2 = tensor[indexes[self.batch_size:]]
88 |         return t1 * weights + t2 * (1 - weights)
89 | 


--------------------------------------------------------------------------------
/task2/silence.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | 
  3 | import librosa
  4 | import numpy as np
  5 | 
  6 | from pydub import AudioSegment
  7 | import pydub.silence as silence
  8 | from pydub.exceptions import CouldntDecodeError
  9 | 
 10 | 
 11 | def split_audio(dataset_path,
 12 |                 file_name,
 13 |                 output_path,
 14 |                 n_window=1024,
 15 |                 default_threshold=-56,
 16 |                 transients_threshold=-56,
 17 |                 min_silence=500,
 18 |                 keep_silence=500,
 19 |                 ):
 20 |     """Split an audio clip into non-silent segments.
 21 | 
 22 |     This function detects the non-silent segments of an audio clip and
 23 |     saves them separately as WAV files in the specified directory.
 24 |     Silence is detected on a frame-by-frame basis by thresholding the
 25 |     RMS energy of each frame. A non-silent segment is defined to be the
 26 |     span of non-silent frames such that two such adjacent frames are
 27 |     less than `min_silence` ms apart. `keep_silence` ms of silence is
 28 |     also kept at the beginning and end of each segment.
 29 | 
 30 |     Args:
 31 |         dataset_path (str): Path of directory containing dataset.
 32 |         file_name (str): File name of audio clip to be split.
 33 |         output_path (str): Path of output directory.
 34 |         n_window (int): Number of samples in a frame.
 35 |         default_threshold (int): Default silence threshold (in dBFS).
 36 |         transients_threshold (int): Silence threshold for transient
 37 |             audio signals (in dBFS).
 38 |         min_silence (int): Minimum length of silence between segments.
 39 |         keep_silence (int): Amound of start/end silence to keep (in ms).
 40 | 
 41 |     Returns:
 42 |         list: The output file names.
 43 |     """
 44 |     def _export_segments(segments):
 45 |         fnames = []
 46 |         for i, seg in enumerate(segments):
 47 |             fname = '{}_{}.wav'.format(os.path.splitext(file_name)[0], i)
 48 |             seg.export(os.path.join(output_path, fname), format='wav')
 49 |             fnames.append(fname)
 50 |         return fnames
 51 | 
 52 |     try:
 53 |         x = AudioSegment.from_wav(os.path.join(dataset_path, file_name))
 54 |     except CouldntDecodeError:
 55 |         x = AudioSegment.empty()
 56 | 
 57 |     # Skip audio clips that are not longer than the padding
 58 |     # Padding refers to the silence that is kept for each segment
 59 |     padding = keep_silence * 2
 60 |     if x.duration_seconds <= padding / 1000:
 61 |         return _export_segments([x])
 62 | 
 63 |     # Determine silence threshold based on whether the audio signal
 64 |     # consists entirely of transients.
 65 |     if _is_transients(x.get_array_of_samples(), x.frame_rate, n_window):
 66 |         threshold = transients_threshold
 67 |     else:
 68 |         threshold = default_threshold
 69 | 
 70 |     segments = silence.split_on_silence(
 71 |         audio_segment=x,
 72 |         min_silence_len=min_silence,
 73 |         silence_thresh=threshold,
 74 |         keep_silence=keep_silence,
 75 |     )
 76 | 
 77 |     # Export the original clip if no non-silent segments were found
 78 |     if len(segments) == 0:
 79 |         return _export_segments([x])
 80 | 
 81 |     # Discard segments that are too short
 82 |     mean_time = np.mean([seg.duration_seconds for seg in segments])
 83 |     discard_threshold = 100 + padding
 84 |     if mean_time > discard_threshold + 500:
 85 |         segments = [seg for seg in segments
 86 |                     if seg.duration_seconds > discard_threshold]
 87 | 
 88 |     return _export_segments(segments)
 89 | 
 90 | 
 91 | def _is_transients(x, sample_rate, n_window=512):
 92 |     """Determine whether an audio signal contains transients only.
 93 | 
 94 |     Args:
 95 |         x (np.ndarray): Audio signal to analyze.
 96 |         sample_rate (number): Sampling rate of signal.
 97 |         n_window (int): Window size for computing the signal's envelope.
 98 | 
 99 |     Returns:
100 |         bool: Whether the audio signal contains transients only.
101 |     """
102 |     envelope = _moving_average(np.abs(x), n=n_window)
103 |     envelope = librosa.amplitude_to_db(envelope, ref=np.max)
104 |     mask = (envelope > -30).astype(int)
105 |     diff = np.diff(mask)
106 |     start = np.where(diff == 1)[0]
107 |     end = np.where(diff == -1)[0]
108 | 
109 |     if len(end) == 0:
110 |         return True
111 | 
112 |     if mask[0] == 1:
113 |         start = np.concatenate(([0], start))
114 |     if len(start) > len(end):
115 |         start = start[:-1]
116 | 
117 |     return max(end - start) / sample_rate < 0.5
118 | 
119 | 
120 | def _moving_average(x, n=3):
121 |     """Compute the moving average of a 1D array.
122 | 
123 |     Args:
124 |         x (array_like): Input 1D array.
125 |         n (int): Window size of moving average.
126 | 
127 |     Returns:
128 |         np.ndarray: The averaged version of the array.
129 |     """
130 |     ret = np.cumsum(x, dtype=float)
131 |     ret[n:] = ret[n:] - ret[:-n]
132 |     return ret[n - 1:] / n
133 | 


--------------------------------------------------------------------------------
/task2/training.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import sklearn.metrics as metrics
  4 | 
  5 | from keras.callbacks import Callback
  6 | from keras.callbacks import CSVLogger
  7 | from keras.callbacks import EarlyStopping
  8 | from keras.callbacks import LearningRateScheduler
  9 | from keras.callbacks import ModelCheckpoint
 10 | from keras.callbacks import TensorBoard
 11 | from keras.optimizers import Adam
 12 | import keras.utils
 13 | 
 14 | from mixup import MixupGenerator
 15 | import config as cfg
 16 | import convnet
 17 | import evaluation
 18 | import inference
 19 | import utils
 20 | 
 21 | 
 22 | def train(tr_x, tr_y, val_x, val_y, val_index, model_id='gcnn',
 23 |           fold=-1, sample_weight=None, class_weight=None):
 24 |     """Train a neural network using the given training set.
 25 | 
 26 |     Args:
 27 |         tr_x (np.ndarray): Array of training data.
 28 |         tr_y (np.ndarray): Target values of the training data.
 29 |         val_x (np.ndarray): Array of validation data.
 30 |         val_y (np.ndarray): Target values of the validation data.
 31 |         val_index (pd.Index): File names of validation data. Used to
 32 |             group chunks in order to compute clip-level predictions.
 33 |         model_id (str): The neural network architecture.
 34 |         fold (int): Fold number identifying validation set.
 35 |         sample_weight (float): Weights for the training examples.
 36 |         class_weight (float): Class-wise weights.
 37 |     """
 38 |     if model_id == 'gcnn':
 39 |         create_model = convnet.gcnn
 40 |     elif model_id == 'vgg13':
 41 |         create_model = convnet.vgg13
 42 |     elif model_id == 'crnn':
 43 |         create_model = convnet.crnn
 44 |     elif model_id == 'gcrnn':
 45 |         create_model = convnet.gcrnn
 46 | 
 47 |     # Create model and print summary
 48 |     model = create_model(input_shape=tr_x.shape[1:],
 49 |                          n_classes=tr_y.shape[1])
 50 |     _print_model_summary(model)
 51 | 
 52 |     # Use Adam SGD optimizer
 53 |     optimizer = Adam(lr=cfg.learning_rate['initial'])
 54 |     model.compile(loss='categorical_crossentropy',
 55 |                   optimizer=optimizer,
 56 |                   metrics=['accuracy'],
 57 |                   )
 58 | 
 59 |     # Create the appropriate callbacks to use during training
 60 |     callbacks = _create_callbacks(fold)
 61 |     for callback in callbacks:
 62 |         callback.val_index = val_index
 63 | 
 64 |     # Set a large value for `n_epochs` if early stopping is used
 65 |     n_epochs = cfg.n_epochs
 66 |     if n_epochs < 0:
 67 |         n_epochs = 10000
 68 | 
 69 |     # Standardize validation data
 70 |     generator = utils.fit_scaler(tr_x)
 71 |     if len(val_x):
 72 |         validation_data = (generator.standardize(val_x), val_y)
 73 |     else:
 74 |         validation_data = None
 75 | 
 76 |     # Redefine generator for mixup data augmentation
 77 |     batch_size = cfg.batch_size
 78 |     generator = MixupGenerator(tr_x,
 79 |                                tr_y,
 80 |                                sample_weight=sample_weight,
 81 |                                batch_size=batch_size,
 82 |                                alpha=1.0,
 83 |                                generator=generator,
 84 |                                )
 85 | 
 86 |     return model.fit_generator(generator(),
 87 |                                steps_per_epoch=len(tr_x) // batch_size,
 88 |                                epochs=n_epochs,
 89 |                                callbacks=callbacks,
 90 |                                validation_data=validation_data,
 91 |                                class_weight=class_weight,
 92 |                                )
 93 | 
 94 | 
 95 | class Evaluator(Callback):
 96 |     """A base class for logging evaluation results."""
 97 | 
 98 |     def predict(self):
 99 |         """Predict target values of the validation data.
100 | 
101 |         The main utility of this function is to merge the predictions of
102 |         chunks belonging to the same audio clip. The same is done for
103 |         the ground truth target values so that dimensions match.
104 | 
105 |         Returns:
106 |             tuple: Tuple containing:
107 | 
108 |                 y_true (np.ndarray): Ground truth target values.
109 |                 y_pred (np.ndarray): Predicted target values.
110 |         """
111 |         x, y_true = self.validation_data[:2]
112 | 
113 |         y_pred = self.model.predict(x)
114 |         y_true = inference.merge_predictions(y_true, self.val_index, 'first')
115 |         y_pred = inference.merge_predictions(y_pred, self.val_index)
116 |         return y_true.values, y_pred.values
117 | 
118 | 
119 | class MAPLogger(Evaluator):
120 |     """A callback for computing the mean average precision at k (MAP@k).
121 | 
122 |     At the end of each epoch, the MAP is computed and logged for the
123 |     predictions of the validation dataset. It is assumed that the ground
124 |     truths are single-label.
125 | 
126 |     Args:
127 |         k (int): The maximum number of predicted elements.
128 | 
129 |     Attributes:
130 |         k (int): The maximum number of predicted elements.
131 |     """
132 | 
133 |     def __init__(self, k=3):
134 |         super(MAPLogger, self).__init__()
135 | 
136 |         self.k = k
137 | 
138 |     def on_epoch_end(self, epoch, logs=None):
139 |         """Compute the MAP of the validation set predictions."""
140 |         y_true, y_pred = self.predict()
141 |         map_k = evaluation.compute_map(y_true, y_pred, self.k)
142 |         map_k_min = min(evaluation.compute_map(y_true, y_pred, self.k, True))
143 | 
144 |         # Log the computed value
145 |         logs = logs or {}
146 |         logs['val_map'] = map_k
147 |         logs['val_map_min'] = map_k_min
148 | 
149 | 
150 | class F1ScoreLogger(Evaluator):
151 |     """A callback for computing the F1 score.
152 | 
153 |     At the end of each epoch, the F1 score is computed and logged for
154 |     the predictions of the validation dataset.
155 | 
156 |     Args:
157 |         threshold (float): Threshold used to binarize predictions.
158 | 
159 |     Attributes:
160 |         threshold (float): Threshold used to binarize predictions.
161 |     """
162 | 
163 |     def __init__(self, threshold=-1):
164 |         super(F1ScoreLogger, self).__init__()
165 | 
166 |         self.threshold = threshold
167 | 
168 |     def on_epoch_end(self, epoch, logs=None):
169 |         """Compute the F1 score of the validation set predictions."""
170 |         y_true, y_pred = self.predict()
171 |         y_pred_b = inference.binarize_predictions(y_pred, self.threshold)
172 |         f1_score = metrics.f1_score(y_true, y_pred_b, average='micro',
173 |                                     labels=range(y_true.shape[1]))
174 | 
175 |         # Log the computed value
176 |         logs = logs or {}
177 |         logs['val_f1_score'] = f1_score
178 | 
179 | 
180 | def _print_model_summary(model):
181 |     """Print a summary of the model and also write the summary to disk.
182 | 
183 |     Args:
184 |         model: The Keras model to summarize.
185 |     """
186 |     keras.utils.print_summary(model)
187 |     with open(os.path.join(cfg.model_path, 'summary.txt'), 'w') as f:
188 |         keras.utils.print_summary(model, print_fn=lambda s: f.write(s + '\n'))
189 | 
190 | 
191 | def _create_callbacks(fold):
192 |     """Create a list of training callbacks.
193 | 
194 |     The following callbacks are included in the list:
195 |       * Several performance-logging callbacks.
196 |       * A callback for logging results to a CSV file.
197 |       * A callback for saving models.
198 |       * A callback for using TensorBoard.
199 |       * An optional callback for learning rate decay.
200 |       * An optional callback for early stopping.
201 | 
202 |     Args:
203 |         fold (int): Fold number identifying validation set.
204 | 
205 |     Returns:
206 |         list: List of Keras callbacks.
207 |     """
208 |     fold_dir = str(fold) if fold >= 0 else 'all'
209 | 
210 |     # Create callbacks for computing various metrics and logging them
211 |     callbacks = []
212 |     if fold >= 0:
213 |         callbacks += [MAPLogger(), F1ScoreLogger(cfg.threshold),
214 |                       CSVLogger(cfg.history_path.format(fold_dir))]
215 | 
216 |     # Create callback to save model after every epoch
217 |     path = os.path.join(cfg.model_path, fold_dir,
218 |                         'model.{epoch:02d}-{acc:.4f}.h5')
219 |     callbacks.append(ModelCheckpoint(filepath=path, monitor='acc'))
220 | 
221 |     # Create callback for TensorBoard logs
222 |     callbacks.append(TensorBoard(cfg.log_path.format(fold_dir),
223 |                                  batch_size=cfg.batch_size))
224 | 
225 |     lr_decay = cfg.learning_rate['decay']
226 |     if lr_decay < 1.:
227 |         # Create callback to decay learning rate
228 |         def _lr_schedule(epoch, lr):
229 |             decay = epoch % cfg.learning_rate['decay_rate'] == 0
230 |             return lr * lr_decay if decay else lr
231 |         callbacks.append(LearningRateScheduler(schedule=_lr_schedule))
232 | 
233 |     if cfg.n_epochs == -1:
234 |         # Create callback to use an early stopping condition
235 |         callbacks.append(EarlyStopping(monitor='val_loss',
236 |                                        min_delta=0,
237 |                                        patience=5,
238 |                                        ))
239 | 
240 |     return callbacks
241 | 


--------------------------------------------------------------------------------
/task2/utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import time
  3 | import types
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | 
  9 | LABELS = [
 10 |     'Acoustic_guitar',
 11 |     'Applause',
 12 |     'Bark',
 13 |     'Bass_drum',
 14 |     'Burping_or_eructation',
 15 |     'Bus',
 16 |     'Cello',
 17 |     'Chime',
 18 |     'Clarinet',
 19 |     'Computer_keyboard',
 20 |     'Cough',
 21 |     'Cowbell',
 22 |     'Double_bass',
 23 |     'Drawer_open_or_close',
 24 |     'Electric_piano',
 25 |     'Fart',
 26 |     'Finger_snapping',
 27 |     'Fireworks',
 28 |     'Flute',
 29 |     'Glockenspiel',
 30 |     'Gong',
 31 |     'Gunshot_or_gunfire',
 32 |     'Harmonica',
 33 |     'Hi-hat',
 34 |     'Keys_jangling',
 35 |     'Knock',
 36 |     'Laughter',
 37 |     'Meow',
 38 |     'Microwave_oven',
 39 |     'Oboe',
 40 |     'Saxophone',
 41 |     'Scissors',
 42 |     'Shatter',
 43 |     'Snare_drum',
 44 |     'Squeak',
 45 |     'Tambourine',
 46 |     'Tearing',
 47 |     'Telephone',
 48 |     'Trumpet',
 49 |     'Violin_or_fiddle',
 50 |     'Writing',
 51 | ]
 52 | 
 53 | 
 54 | def to_categorical(y):
 55 |     """Encode labels as one-hot vectors.
 56 | 
 57 |     Args:
 58 |         y (pd.Series): Labels to be converted into categorical format.
 59 | 
 60 |     Returns:
 61 |         np.ndarray: Matrix of encoded labels.
 62 |     """
 63 |     return pd.get_dummies(y).values
 64 | 
 65 | 
 66 | def pad_truncate(x, length, pad_value=0):
 67 |     """Pad or truncate an array to a specified length.
 68 | 
 69 |     Args:
 70 |         x (array_like): Input array.
 71 |         length (int): Target length.
 72 |         pad_value (number): Padding value.
 73 | 
 74 |     Returns:
 75 |         array_like: The array padded/truncated to the specified length.
 76 |     """
 77 |     x_len = len(x)
 78 |     if x_len > length:
 79 |         x = x[:length]
 80 |     elif x_len < length:
 81 |         padding = np.full((length - x_len,) + x.shape[1:], pad_value)
 82 |         x = np.concatenate((x, padding))
 83 | 
 84 |     return x
 85 | 
 86 | 
 87 | def fit_scaler(x):
 88 |     """Fit an ImageDataGenerator to the given data.
 89 | 
 90 |     Args:
 91 |         x (np.ndarray): 4D array of data.
 92 | 
 93 |     Returns:
 94 |         keras.ImageDataGenerator: The fitted generator.
 95 |     """
 96 |     from keras.preprocessing.image import ImageDataGenerator
 97 | 
 98 |     generator = ImageDataGenerator(
 99 |         featurewise_center=True,
100 |         featurewise_std_normalization=True,
101 |     )
102 |     generator.fit(x)
103 |     return generator
104 | 
105 | 
106 | def group_by_name(data):
107 |     """Group metadata entries based on original file names.
108 | 
109 |     Args:
110 |         data (pd.Series or pd.DataFrame): The metadata to group.
111 | 
112 |     Returns:
113 |         The relevant GroupBy object.
114 |     """
115 |     return data.groupby(lambda s: s[:8] + '.wav')
116 | 
117 | 
118 | def timeit(callback, message):
119 |     """Measure the time taken to execute the given callback.
120 | 
121 |     This function measures the amount of time it takes to execute the
122 |     specified callback and prints a message afterwards regarding the
123 |     time taken. The `message` parameter provides part of the message,
124 |     e.g. if `message` is 'Executed', the printed message is 'Executed in
125 |     1.234567 seconds'.
126 | 
127 |     Args:
128 |         callback: Function to execute and time.
129 |         message (str): Message to print after executing the callback.
130 | 
131 |     Returns:
132 |         The return value of the callback.
133 |     """
134 |     # Record time prior to invoking callback
135 |     onset = time.time()
136 |     # Invoke callback function
137 |     x = callback()
138 | 
139 |     print('%s in %f seconds' % (message, time.time() - onset))
140 | 
141 |     return x
142 | 
143 | 
144 | def log_parameters(params, output_path):
145 |     """Write the given parameters to a file in JSON format.
146 | 
147 |     Args:
148 |         params (dict or module): Parameters to serialize. If `params` is
149 |             a module, the relevant variables are serialized.
150 |         output_path (str): Output file path.
151 |     """
152 |     if isinstance(params, types.ModuleType):
153 |         params = {k: v for k, v in params.__dict__.items()
154 |                   if not k.startswith('_')}
155 |     elif not isinstance(params, dict):
156 |         raise ValueError("'params' must be a dict or a module")
157 | 
158 |     with open(output_path, 'w') as f:
159 |         json.dump(params, f, indent=2)
160 | 


--------------------------------------------------------------------------------