├── AudioRecorder.ipynb ├── Emotion Detection through Speech.docx ├── LICENSE ├── Predictions.csv ├── README.md ├── final_results_gender_test.ipynb ├── images ├── cnn.png ├── feature.png ├── feature2.png ├── joomla_speech_prosody.png ├── livevoice.PNG ├── livevoice2.PNG ├── predict.png ├── spec.png └── wave.png ├── model.json ├── output10.wav └── saved_models └── Emotion_Voice_Detection_Model.h5 /AudioRecorder.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Collecting pyaudio\n", 13 | " Downloading PyAudio-0.2.11-cp36-cp36m-win_amd64.whl (52kB)\n", 14 | "Installing collected packages: pyaudio\n", 15 | "Successfully installed pyaudio-0.2.11\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "!pip install pyaudio" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "Collecting wave\n", 33 | " Downloading Wave-0.0.2.tar.gz\n", 34 | "Building wheels for collected packages: wave\n", 35 | " Running setup.py bdist_wheel for wave: started\n", 36 | " Running setup.py bdist_wheel for wave: finished with status 'done'\n", 37 | " Stored in directory: C:\\Users\\mites\\AppData\\Local\\pip\\Cache\\wheels\\49\\db\\0f\\18c4bbc88cf4af661a52e4854f4798f62fece2c74671896570\n", 38 | "Successfully built wave\n", 39 | "Installing collected packages: wave\n", 40 | "Successfully installed wave-0.0.2\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "!pip install wave" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 20, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "* recording\n", 58 | "* done recording\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "import pyaudio\n", 64 | "import wave\n", 65 | "\n", 66 | "CHUNK = 1024 \n", 67 | "FORMAT = pyaudio.paInt16 #paInt8\n", 68 | "CHANNELS = 2 \n", 69 | "RATE = 44100 #sample rate\n", 70 | "RECORD_SECONDS = 4\n", 71 | "WAVE_OUTPUT_FILENAME = \"output10.wav\"\n", 72 | "\n", 73 | "p = pyaudio.PyAudio()\n", 74 | "\n", 75 | "stream = p.open(format=FORMAT,\n", 76 | " channels=CHANNELS,\n", 77 | " rate=RATE,\n", 78 | " input=True,\n", 79 | " frames_per_buffer=CHUNK) #buffer\n", 80 | "\n", 81 | "print(\"* recording\")\n", 82 | "\n", 83 | "frames = []\n", 84 | "\n", 85 | "for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):\n", 86 | " data = stream.read(CHUNK)\n", 87 | " frames.append(data) # 2 bytes(16 bits) per channel\n", 88 | "\n", 89 | "print(\"* done recording\")\n", 90 | "\n", 91 | "stream.stop_stream()\n", 92 | "stream.close()\n", 93 | "p.terminate()\n", 94 | "\n", 95 | "wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')\n", 96 | "wf.setnchannels(CHANNELS)\n", 97 | "wf.setsampwidth(p.get_sample_size(FORMAT))\n", 98 | "wf.setframerate(RATE)\n", 99 | "wf.writeframes(b''.join(frames))\n", 100 | "wf.close()" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": true 108 | }, 109 | "outputs": [], 110 | "source": [] 111 | } 112 | ], 113 | "metadata": { 114 | "kernelspec": { 115 | "display_name": "Python 3", 116 | "language": "python", 117 | "name": "python3" 118 | }, 119 | "language_info": { 120 | "codemirror_mode": { 121 | "name": "ipython", 122 | "version": 3 123 | }, 124 | "file_extension": ".py", 125 | "mimetype": "text/x-python", 126 | "name": "python", 127 | "nbconvert_exporter": "python", 128 | "pygments_lexer": "ipython3", 129 | "version": "3.6.2" 130 | } 131 | }, 132 | "nbformat": 4, 133 | "nbformat_minor": 2 134 | } 135 | -------------------------------------------------------------------------------- /Emotion Detection through Speech.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/Emotion Detection through Speech.docx -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Mitesh Puthran 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Predictions.csv: -------------------------------------------------------------------------------- 1 | actualvalues,predictedvalues 2 | male_calm,male_calm 3 | male_sad,male_sad 4 | male_fearful,male_fearful 5 | female_fearful,female_calm 6 | male_fearful,male_fearful 7 | male_happy,male_happy 8 | female_calm,female_calm 9 | female_angry,female_fearful 10 | male_angry,male_angry 11 | male_happy,male_happy 12 | male_fearful,male_fearful 13 | female_sad,female_calm 14 | male_sad,male_sad 15 | male_sad,male_sad 16 | female_calm,female_sad 17 | female_fearful,female_angry 18 | male_angry,male_angry 19 | female_angry,female_angry 20 | male_angry,male_angry 21 | male_happy,male_happy 22 | male_sad,female_sad 23 | female_calm,female_calm 24 | male_angry,male_angry 25 | male_sad,male_sad 26 | male_fearful,male_fearful 27 | male_angry,male_angry 28 | male_fearful,male_fearful 29 | female_sad,female_calm 30 | male_fearful,male_fearful 31 | male_angry,male_angry 32 | male_happy,male_happy 33 | female_calm,female_calm 34 | male_happy,male_happy 35 | male_fearful,male_sad 36 | male_angry,female_fearful 37 | female_fearful,female_fearful 38 | female_calm,female_calm 39 | male_happy,male_happy 40 | female_angry,male_calm 41 | female_happy,female_angry 42 | male_angry,male_angry 43 | male_sad,male_sad 44 | female_fearful,female_calm 45 | female_happy,female_angry 46 | male_fearful,male_fearful 47 | male_angry,male_angry 48 | male_angry,male_angry 49 | male_fearful,male_fearful 50 | male_fearful,male_fearful 51 | female_angry,female_angry 52 | female_angry,female_angry 53 | male_fearful,male_fearful 54 | female_angry,female_angry 55 | male_sad,female_angry 56 | male_calm,male_calm 57 | male_angry,male_angry 58 | female_happy,female_happy 59 | male_fearful,male_fearful 60 | male_angry,male_angry 61 | male_happy,male_happy 62 | male_angry,male_angry 63 | male_sad,female_sad 64 | male_happy,male_happy 65 | male_happy,male_happy 66 | male_sad,male_sad 67 | male_fearful,male_fearful 68 | female_fearful,female_fearful 69 | male_fearful,male_happy 70 | male_sad,male_calm 71 | female_happy,female_calm 72 | male_angry,male_angry 73 | male_calm,male_calm 74 | male_happy,male_happy 75 | male_calm,male_calm 76 | male_angry,male_angry 77 | female_happy,female_happy 78 | female_sad,female_sad 79 | male_happy,male_happy 80 | male_angry,male_happy 81 | male_sad,male_sad 82 | male_fearful,male_sad 83 | female_sad,female_angry 84 | male_happy,male_happy 85 | male_sad,male_sad 86 | female_sad,female_sad 87 | male_angry,male_angry 88 | female_sad,female_angry 89 | female_happy,female_angry 90 | male_angry,male_angry 91 | male_angry,male_angry 92 | male_fearful,male_fearful 93 | male_happy,male_sad 94 | female_calm,female_sad 95 | male_fearful,male_fearful 96 | male_calm,male_happy 97 | male_calm,male_calm 98 | male_sad,male_sad 99 | male_angry,male_angry 100 | male_calm,male_calm 101 | male_angry,male_happy 102 | female_calm,female_calm 103 | male_fearful,male_fearful 104 | female_angry,female_angry 105 | male_angry,male_happy 106 | male_happy,male_happy 107 | male_calm,male_fearful 108 | female_calm,female_happy 109 | female_happy,female_fearful 110 | male_fearful,male_fearful 111 | male_happy,male_happy 112 | male_fearful,male_fearful 113 | male_sad,male_sad 114 | male_fearful,male_fearful 115 | male_sad,male_sad 116 | male_angry,male_angry 117 | male_fearful,male_fearful 118 | male_happy,male_happy 119 | male_calm,male_calm 120 | male_happy,female_angry 121 | female_calm,female_calm 122 | male_calm,male_calm 123 | male_sad,male_sad 124 | female_sad,female_calm 125 | male_angry,male_angry 126 | male_happy,male_happy 127 | male_fearful,male_fearful 128 | female_calm,female_calm 129 | male_happy,male_happy 130 | male_sad,male_calm 131 | male_sad,male_sad 132 | female_happy,female_happy 133 | male_fearful,male_fearful 134 | male_fearful,male_fearful 135 | female_sad,female_sad 136 | female_calm,male_calm 137 | male_angry,male_angry 138 | male_happy,male_happy 139 | male_happy,male_happy 140 | female_calm,female_calm 141 | female_angry,female_angry 142 | male_angry,male_angry 143 | female_happy,female_angry 144 | male_fearful,male_fearful 145 | male_sad,male_calm 146 | male_fearful,male_angry 147 | male_fearful,male_fearful 148 | female_fearful,female_sad 149 | male_sad,male_sad 150 | female_angry,female_fearful 151 | male_fearful,male_angry 152 | female_sad,female_sad 153 | male_fearful,male_fearful 154 | male_angry,male_angry 155 | male_calm,male_calm 156 | male_happy,male_happy 157 | male_angry,male_angry 158 | male_angry,male_angry 159 | female_sad,female_sad 160 | male_happy,male_happy 161 | male_happy,male_happy 162 | female_fearful,female_fearful 163 | male_fearful,male_fearful 164 | male_angry,male_angry 165 | male_fearful,male_fearful 166 | male_calm,male_calm 167 | male_sad,male_sad 168 | male_happy,male_happy 169 | male_sad,male_sad 170 | male_calm,male_sad 171 | male_happy,female_angry 172 | female_fearful,female_fearful 173 | male_angry,male_angry 174 | male_fearful,male_fearful 175 | male_happy,male_happy 176 | female_happy,female_happy 177 | female_angry,female_angry 178 | female_angry,female_sad 179 | male_sad,male_calm 180 | male_angry,male_calm 181 | male_sad,male_sad 182 | male_sad,male_sad 183 | male_angry,male_sad 184 | female_happy,female_happy 185 | male_happy,male_happy 186 | female_sad,female_calm 187 | male_fearful,male_fearful 188 | female_angry,female_fearful 189 | male_angry,male_angry 190 | male_happy,male_happy 191 | female_calm,female_calm 192 | male_happy,male_happy 193 | female_sad,female_sad 194 | female_sad,female_sad 195 | female_angry,male_happy 196 | male_angry,male_angry 197 | female_fearful,female_fearful 198 | male_angry,male_sad 199 | male_sad,male_sad 200 | male_angry,male_angry 201 | male_calm,male_calm 202 | female_fearful,female_angry 203 | male_calm,male_angry 204 | male_happy,male_sad 205 | female_sad,female_angry 206 | male_calm,male_happy 207 | male_fearful,male_fearful 208 | female_fearful,female_sad 209 | male_calm,male_calm 210 | male_calm,male_calm 211 | female_happy,female_happy 212 | male_sad,male_sad 213 | male_angry,male_angry 214 | male_sad,male_sad 215 | male_sad,male_calm 216 | female_calm,female_calm 217 | male_fearful,male_fearful 218 | male_angry,male_angry 219 | male_fearful,male_fearful 220 | male_happy,male_happy 221 | female_sad,female_sad 222 | male_sad,male_sad 223 | female_happy,female_fearful 224 | male_angry,male_angry 225 | male_calm,male_calm 226 | male_calm,male_calm 227 | female_fearful,female_sad 228 | female_sad,female_fearful 229 | female_angry,female_calm 230 | male_happy,male_happy 231 | female_angry,female_fearful 232 | male_fearful,male_fearful 233 | male_angry,male_angry 234 | male_fearful,male_fearful 235 | female_happy,female_happy 236 | male_angry,male_angry 237 | male_calm,male_calm 238 | male_happy,male_happy 239 | female_angry,male_fearful 240 | female_calm,female_calm 241 | male_sad,male_calm 242 | female_angry,female_angry 243 | male_happy,female_angry 244 | female_fearful,female_sad 245 | male_fearful,male_fearful 246 | female_sad,female_sad 247 | female_sad,female_sad 248 | female_sad,female_sad 249 | female_calm,female_sad 250 | female_calm,female_sad 251 | female_angry,female_angry 252 | male_happy,male_happy 253 | female_calm,female_calm 254 | male_sad,male_calm 255 | male_fearful,male_fearful 256 | female_calm,female_sad 257 | male_happy,male_happy 258 | male_fearful,male_fearful 259 | male_fearful,male_happy 260 | female_fearful,female_fearful 261 | male_sad,male_sad 262 | male_angry,male_calm 263 | female_fearful,female_fearful 264 | male_fearful,male_fearful 265 | male_angry,male_happy 266 | female_happy,female_happy 267 | male_sad,male_sad 268 | male_sad,male_sad 269 | male_fearful,male_fearful 270 | female_fearful,female_fearful 271 | male_angry,male_angry 272 | male_fearful,male_fearful 273 | female_fearful,female_fearful 274 | male_sad,male_sad 275 | male_fearful,male_angry 276 | male_angry,male_angry 277 | male_sad,male_fearful 278 | male_happy,male_happy 279 | male_angry,male_angry 280 | male_calm,male_calm 281 | male_happy,male_happy 282 | female_calm,female_calm 283 | female_fearful,female_fearful 284 | male_angry,male_angry 285 | male_calm,male_sad 286 | female_angry,female_sad 287 | male_angry,male_angry 288 | male_angry,male_angry 289 | male_calm,male_calm 290 | male_fearful,male_fearful 291 | male_happy,male_happy 292 | male_fearful,male_fearful 293 | male_fearful,female_angry 294 | male_fearful,male_sad 295 | male_happy,male_angry 296 | male_sad,male_sad 297 | male_angry,male_angry 298 | male_calm,male_sad 299 | male_fearful,male_fearful 300 | male_happy,male_happy 301 | female_sad,female_calm 302 | female_fearful,female_fearful 303 | male_happy,male_happy 304 | male_fearful,male_fearful 305 | male_fearful,male_fearful 306 | male_angry,male_angry 307 | male_happy,male_fearful 308 | male_happy,male_happy 309 | male_happy,male_fearful 310 | male_fearful,male_fearful 311 | female_happy,female_sad 312 | male_angry,male_angry 313 | male_happy,male_happy 314 | male_angry,male_angry 315 | female_angry,female_angry 316 | female_angry,female_angry 317 | male_angry,male_angry 318 | female_happy,male_sad 319 | male_happy,male_happy 320 | male_happy,male_happy 321 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Speech Emotion Analyzer 2 | 3 | * The idea behind creating this project was to build a machine learning model that could detect emotions from the speech we have with each other all the time. Nowadays personalization is something that is needed in all the things we experience everyday. 4 | 5 | * So why not have a emotion detector that will guage your emotions and in the future recommend you different things based on your mood. 6 | This can be used by multiple industries to offer different services like marketing company suggesting you to buy products based on your emotions, automotive industry can detect the persons emotions and adjust the speed of autonomous cars as required to avoid any collisions etc. 7 | 8 | ## Analyzing audio signals 9 | ![](images/joomla_speech_prosody.png?raw=true) 10 | 11 | [©Fabien_Ringeval_PhD_Thesis](https://drive.google.com/file/d/0B2V_I9XKBODhcEtZV1lRWW1fYTg/view). 12 |
13 | 14 | ### Datasets: 15 | Made use of two different datasets: 16 | 1. [RAVDESS](https://zenodo.org/record/1188976). 17 | This dataset includes around 1500 audio file input from 24 different actors. 12 male and 12 female where these actors record short audios in 8 different emotions i.e 1 = neutral, 2 = calm, 3 = happy, 4 = sad, 5 = angry, 6 = fearful, 7 = disgust, 8 = surprised.
18 | Each audio file is named in such a way that the 7th character is consistent with the different emotions that they represent. 19 | 20 | 2. [SAVEE](http://kahlan.eps.surrey.ac.uk/savee/Download.html). 21 | This dataset contains around 500 audio files recorded by 4 different male actors. The first two characters of the file name correspond to the different emotions that the potray. 22 | 23 | ## Audio files: 24 | Tested out the audio files by plotting out the waveform and a spectrogram to see the sample audio files.
25 | **Waveform** 26 | ![](images/wave.png?raw=true) 27 |
28 |
29 | **Spectrogram**
30 | ![](images/spec.png?raw=true) 31 |
32 | 33 | ## Feature Extraction 34 | The next step involves extracting the features from the audio files which will help our model learn between these audio files. 35 | For feature extraction we make use of the [**LibROSA**](https://librosa.github.io/librosa/) library in python which is one of the libraries used for audio analysis. 36 |
37 | ![](images/feature.png?raw=true) 38 |
39 | * Here there are some things to note. While extracting the features, all the audio files have been timed for 3 seconds to get equal number of features. 40 | * The sampling rate of each file is doubled keeping sampling frequency constant to get more features which will help classify the audio file when the size of dataset is small. 41 |
42 | 43 | **The extracted features looks as follows** 44 | 45 |
46 | 47 | ![](images/feature2.png?raw=true) 48 | 49 |
50 | 51 | These are array of values with lables appended to them. 52 | 53 | ## Building Models 54 | 55 | Since the project is a classification problem, **Convolution Neural Network** seems the obivious choice. We also built **Multilayer perceptrons** and **Long Short Term Memory** models but they under-performed with very low accuracies which couldn't pass the test while predicting the right emotions. 56 | 57 | Building and tuning a model is a very time consuming process. The idea is to always start small without adding too many layers just for the sake of making it complex. After testing out with layers, the model which gave the max validation accuracy against test data was little more than 70% 58 |
59 |
60 | ![](images/cnn.png?raw=true) 61 |
62 | 63 | ## Predictions 64 | 65 | After tuning the model, tested it out by predicting the emotions for the test data. For a model with the given accuracy these are a sample of the actual vs predicted values. 66 |
67 |
68 | ![](images/predict.png?raw=true) 69 |
70 | 71 | ## Testing out with live voices. 72 | In order to test out our model on voices that were completely different than what we have in our training and test data, we recorded our own voices with dfferent emotions and predicted the outcomes. You can see the results below: 73 | The audio contained a male voice which said **"This coffee sucks"** in a angry tone. 74 |
75 | ![](images/livevoice.PNG?raw=true) 76 |
77 |
78 | ![](images/livevoice2.PNG?raw=true) 79 |
80 | 81 | ### As you can see that the model has predicted the male voice and emotion very accurately in the image above. 82 | 83 | ## NOTE: If you are using the model directly and want to decode the output ranging from 0 to 9 then the following list will help you. 84 | 85 | 0 - female_angry
86 | 1 - female_calm
87 | 2 - female_fearful
88 | 3 - female_happy
89 | 4 - female_sad
90 | 5 - male_angry
91 | 6 - male_calm
92 | 7 - male_fearful
93 | 8 - male_happy
94 | 9 - male_sad
95 | 96 | ## Conclusion 97 | Building the model was a challenging task as it involved lot of trail and error methods, tuning etc. The model is very well trained to distinguish between male and female voices and it distinguishes with 100% accuracy. The model was tuned to detect emotions with more than 70% accuracy. Accuracy can be increased by including more audio files for training. 98 | -------------------------------------------------------------------------------- /images/cnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/images/cnn.png -------------------------------------------------------------------------------- /images/feature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/images/feature.png -------------------------------------------------------------------------------- /images/feature2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/images/feature2.png -------------------------------------------------------------------------------- /images/joomla_speech_prosody.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/images/joomla_speech_prosody.png -------------------------------------------------------------------------------- /images/livevoice.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/images/livevoice.PNG -------------------------------------------------------------------------------- /images/livevoice2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/images/livevoice2.PNG -------------------------------------------------------------------------------- /images/predict.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/images/predict.png -------------------------------------------------------------------------------- /images/spec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/images/spec.png -------------------------------------------------------------------------------- /images/wave.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/images/wave.png -------------------------------------------------------------------------------- /model.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "config": [{"class_name": "Conv1D", "config": {"name": "conv1d_7", "trainable": true, "batch_input_shape": [null, 216, 1], "dtype": "float32", "filters": 128, "kernel_size": [5], "strides": [1], "padding": "same", "dilation_rate": [1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_8", "trainable": true, "activation": "relu"}}, {"class_name": "Conv1D", "config": {"name": "conv1d_8", "trainable": true, "filters": 128, "kernel_size": [5], "strides": [1], "padding": "same", "dilation_rate": [1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_9", "trainable": true, "activation": "relu"}}, {"class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "rate": 0.1}}, {"class_name": "MaxPooling1D", "config": {"name": "max_pooling1d_2", "trainable": true, "strides": [8], "pool_size": [8], "padding": "valid"}}, {"class_name": "Conv1D", "config": {"name": "conv1d_9", "trainable": true, "filters": 128, "kernel_size": [5], "strides": [1], "padding": "same", "dilation_rate": [1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_10", "trainable": true, "activation": "relu"}}, {"class_name": "Conv1D", "config": {"name": "conv1d_10", "trainable": true, "filters": 128, "kernel_size": [5], "strides": [1], "padding": "same", "dilation_rate": [1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_11", "trainable": true, "activation": "relu"}}, {"class_name": "Conv1D", "config": {"name": "conv1d_11", "trainable": true, "filters": 128, "kernel_size": [5], "strides": [1], "padding": "same", "dilation_rate": [1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_12", "trainable": true, "activation": "relu"}}, {"class_name": "Dropout", "config": {"name": "dropout_4", "trainable": true, "rate": 0.2}}, {"class_name": "Conv1D", "config": {"name": "conv1d_12", "trainable": true, "filters": 128, "kernel_size": [5], "strides": [1], "padding": "same", "dilation_rate": [1], "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_13", "trainable": true, "activation": "relu"}}, {"class_name": "Flatten", "config": {"name": "flatten_2", "trainable": true}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 10, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Activation", "config": {"name": "activation_14", "trainable": true, "activation": "softmax"}}], "keras_version": "2.0.6", "backend": "tensorflow"} -------------------------------------------------------------------------------- /output10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/output10.wav -------------------------------------------------------------------------------- /saved_models/Emotion_Voice_Detection_Model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MiteshPuthran/Speech-Emotion-Analyzer/30333cf1809ff990eac18f669dcb07fbb77d9521/saved_models/Emotion_Voice_Detection_Model.h5 --------------------------------------------------------------------------------