├── src ├── .vscode │ ├── settings.json │ ├── launch.json │ └── tasks.json ├── SpectrogramData.cs ├── SpectrogramPredictionEx.cs ├── SpectrogramPrediction.cs ├── mlnet-sound-classifier.csproj └── Program.cs ├── .gitattributes ├── images ├── acoustic_guitar_23-spectro.jpg └── acoustic_guitar_26-spectro.jpg ├── .gitignore └── README.md /src/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "git.ignoreLimitWarning": true 3 | } -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /images/acoustic_guitar_23-spectro.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aslotte/mlnet-sound-classifier/HEAD/images/acoustic_guitar_23-spectro.jpg -------------------------------------------------------------------------------- /images/acoustic_guitar_26-spectro.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aslotte/mlnet-sound-classifier/HEAD/images/acoustic_guitar_26-spectro.jpg -------------------------------------------------------------------------------- /src/SpectrogramData.cs: -------------------------------------------------------------------------------- 1 | namespace SoundClassifier 2 | { 3 | public class SpectrogramData 4 | { 5 | public string ImagePath { get; set; } 6 | 7 | public string Label { get; set; } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/SpectrogramPredictionEx.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace SoundClassifier 4 | { 5 | public class SpectrogramPredictionEx 6 | { 7 | public string ImagePath; 8 | public string Label; 9 | public UInt32 PredictedLabel; 10 | public float[] Score; 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/SpectrogramPrediction.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Microsoft.ML.Data; 3 | 4 | namespace SoundClassifier 5 | { 6 | public class ImagePrediction 7 | { 8 | [ColumnName("Score")] 9 | public float[] Score; 10 | 11 | [ColumnName("PredictedLabel")] 12 | public UInt32 PredictedLabel; 13 | } 14 | } -------------------------------------------------------------------------------- /src/mlnet-sound-classifier.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | netcoreapp2.2 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.AssemblyInfo.cs 3 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.AssemblyInfoInputs.cache 4 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.csproj.CoreCompileInputs.cache 5 | src/bin/Debug/netcoreapp2.2/mlnet-sound-classifier.deps.json 6 | src/bin/Debug/netcoreapp2.2/mlnet-sound-classifier.dll 7 | src/bin/Debug/netcoreapp2.2/mlnet-sound-classifier.pdb 8 | src/bin/Debug/netcoreapp2.2/mlnet-sound-classifier.runtimeconfig.dev.json 9 | src/bin/Debug/netcoreapp2.2/mlnet-sound-classifier.runtimeconfig.json 10 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.assets.cache 11 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.csproj.FileListAbsolute.txt 12 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.csprojAssemblyReference.cache 13 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.dll 14 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.pdb 15 | src/obj/mlnet-sound-classifier.csproj.nuget.cache 16 | src/obj/mlnet-sound-classifier.csproj.nuget.dgspec.json 17 | src/obj/mlnet-sound-classifier.csproj.nuget.g.props 18 | src/obj/mlnet-sound-classifier.csproj.nuget.g.targets 19 | src/obj/project.assets.json 20 | -------------------------------------------------------------------------------- /src/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to find out which attributes exist for C# debugging 3 | // Use hover for the description of the existing attributes 4 | // For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": ".NET Core Launch (console)", 9 | "type": "coreclr", 10 | "request": "launch", 11 | "preLaunchTask": "build", 12 | // If you have changed target frameworks, make sure to update the program path. 13 | "program": "${workspaceFolder}/bin/Debug/netcoreapp2.2/AlexaForCats.dll", 14 | "args": [], 15 | "cwd": "${workspaceFolder}", 16 | // For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console 17 | "console": "internalConsole", 18 | "stopAtEntry": false 19 | }, 20 | { 21 | "name": ".NET Core Attach", 22 | "type": "coreclr", 23 | "request": "attach", 24 | "processId": "${command:pickProcess}" 25 | } 26 | ] 27 | } -------------------------------------------------------------------------------- /src/.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "build", 6 | "command": "dotnet", 7 | "type": "process", 8 | "args": [ 9 | "build", 10 | "${workspaceFolder}/AlexaForCats.csproj", 11 | "/property:GenerateFullPaths=true", 12 | "/consoleloggerparameters:NoSummary" 13 | ], 14 | "problemMatcher": "$msCompile" 15 | }, 16 | { 17 | "label": "publish", 18 | "command": "dotnet", 19 | "type": "process", 20 | "args": [ 21 | "publish", 22 | "${workspaceFolder}/AlexaForCats.csproj", 23 | "/property:GenerateFullPaths=true", 24 | "/consoleloggerparameters:NoSummary" 25 | ], 26 | "problemMatcher": "$msCompile" 27 | }, 28 | { 29 | "label": "watch", 30 | "command": "dotnet", 31 | "type": "process", 32 | "args": [ 33 | "watch", 34 | "run", 35 | "${workspaceFolder}/AlexaForCats.csproj", 36 | "/property:GenerateFullPaths=true", 37 | "/consoleloggerparameters:NoSummary" 38 | ], 39 | "problemMatcher": "$msCompile" 40 | } 41 | ] 42 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Sound Classification using Deep Convolutional Neural Networks in ML.NET 2 | 3 | #### Background 4 | As of September 2019, it's possible to use transfer learning to natively re-train an InceptionV3 or Resnet CNN in ML.NET. This enables .NET developers to be able to create their own custom image classification models, for their specific use cases. 5 | 6 | However, Convolutional Neural Networks (CNN's) can be used in so many other applications than just image classification. In this repo we'll demonstrate how to build a rudimentary sound classifier using ML.NET 7 | 8 | #### Disclaimer 9 | The data used for the training was retrieved from an online research paper. 10 | I've since starting on this repo lost track of the resarch paper, but is determined to provide credit to the original creators of the data used during the training, once I'm able to locate the research paper again. 11 | 12 | #### Approach 13 | To be able to classify sounds using a CNN, we first need to create an image of the audio. 14 | To do this, we can create something called an audio spectrogram, which is visual presentation of the energy levels of a sound clip. 15 | 16 | We can do this by using an open-source library called Spectrogram.NET 17 | 18 | ``` 19 | private static void CreateSpectrogram(string fileName) 20 | { 21 | var spectrogramName = fileName.Substring(0, fileName.Length-4) + "-spectro.jpg"; 22 | if (File.Exists(spectrogramName)) return; 23 | 24 | var spec = new Spectrogram.Spectrogram(sampleRate: 8000, fftSize: 2048, step: 700); 25 | float[] values = Spectrogram.Tools.ReadWav(fileName); 26 | spec.AddExtend(values); 27 | 28 | var bitmap = spec.GetBitmap(intensity: 2, freqHigh: 2500); 29 | spec.SaveBitmap(bitmap, spectrogramName); 30 | } 31 | ``` 32 | 33 | Below is an example in which we've transformed an audio file of a guitar playing to a spectrogram. 34 | 35 | ![guitar](https://github.com/aslotte/mlnet-sound-classifier/blob/master/images/acoustic_guitar_23-spectro.jpg) 36 | 37 | Once we've generated the images needed to train our model, we can load them from disk and create our training pipelinne as such: 38 | ``` 39 | var pipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelAsKey", 40 | inputColumnName: "Label", 41 | keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue) 42 | .Append(mlContext.Model.ImageClassification("ImagePath", "LabelAsKey", 43 | arch: ImageClassificationEstimator.Architecture.InceptionV3, 44 | epoch: 200, 45 | metricsCallback: (metrics) => Console.WriteLine(metrics), 46 | validationSet: transformedValidationDataView)); 47 | ``` 48 | 49 | #### Result 50 | The model currently only yields a 75% accuracy on the validation dataset, which under the circumstances is pretty good. The accuracy can most likely be improved by increasing the size of the dataset used for training, or augmenting the spectrograms further by e.g. transforming then to mel-spectrograms, which will provide even more detail. 51 | -------------------------------------------------------------------------------- /src/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Collections.Generic; 4 | using Microsoft.ML; 5 | using Microsoft.ML.Transforms; 6 | using Microsoft.ML.Data; 7 | using System.Linq; 8 | 9 | namespace SoundClassifier 10 | { 11 | class Program 12 | { 13 | private static string DataPath = @"C:\Users\Admin\Desktop\sound-classifier\ae_dataset\AudioEventDataset"; 14 | 15 | static void Main(string[] args) 16 | { 17 | var trainDataPath = string.Concat(DataPath, @"\train"); 18 | var testDataPath = string.Concat(DataPath, @"\test"); 19 | 20 | string[] allAudioFiles = Directory.GetFiles(DataPath, "*.wav*", SearchOption.AllDirectories); 21 | 22 | //Data pre-processing 23 | foreach(var fileName in allAudioFiles) 24 | { 25 | CreateSpectrogram(fileName); 26 | } 27 | 28 | MLContext mlContext = new MLContext(seed: 1); 29 | 30 | //Read and shuffle 31 | IEnumerable images = LoadImagesFromDirectory(folder: trainDataPath, useFolderNameasLabel: false).ToList(); 32 | IEnumerable testImages = LoadImagesFromDirectory(folder: testDataPath, useFolderNameasLabel: false).ToList(); 33 | 34 | IDataView trainDataView = mlContext.Data.LoadFromEnumerable(images); 35 | trainDataView = mlContext.Data.ShuffleRows(trainDataView); 36 | 37 | IDataView testDataView = mlContext.Data.LoadFromEnumerable(testImages); 38 | 39 | IDataView transformedValidationDataView = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelAsKey", 40 | inputColumnName: "Label", 41 | keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue) 42 | .Fit(testDataView) 43 | .Transform(testDataView); 44 | 45 | //Define training pipeline 46 | var pipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelAsKey", 47 | inputColumnName: "Label", 48 | keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue) 49 | .Append(mlContext.Model.ImageClassification("ImagePath", "LabelAsKey", 50 | arch: ImageClassificationEstimator.Architecture.InceptionV3, 51 | epoch: 200, 52 | metricsCallback: (metrics) => Console.WriteLine(metrics), 53 | validationSet: transformedValidationDataView)); 54 | 55 | //Train model 56 | ITransformer trainedModel = pipeline.Fit(trainDataView); 57 | 58 | //Evaluate 59 | EvaluateModel(mlContext, testDataView, trainedModel); 60 | 61 | // Save 62 | mlContext.Model.Save(trainedModel, trainDataView.Schema, "sound-classifier.zip"); 63 | } 64 | 65 | private static void EvaluateModel(MLContext mlContext, IDataView testDataset, ITransformer trainedModel) 66 | { 67 | Console.WriteLine("Making predictions in bulk for evaluating model's quality..."); 68 | 69 | IDataView predictionsDataView = trainedModel.Transform(testDataset); 70 | 71 | var metrics = mlContext.MulticlassClassification.Evaluate(predictionsDataView, labelColumnName:"LabelAsKey", predictedLabelColumnName: "PredictedLabel"); 72 | 73 | Console.WriteLine("*** Showing all the predictions ***"); 74 | VBuffer> keys = default; 75 | predictionsDataView.Schema["LabelAsKey"].GetKeyValues(ref keys); 76 | var originalLabels = keys.DenseValues().ToArray(); 77 | 78 | List predictions = mlContext.Data.CreateEnumerable(predictionsDataView, false, true).ToList(); 79 | predictions.ForEach(pred => ConsoleWriteImagePrediction(pred.ImagePath, pred.Label, (originalLabels[pred.PredictedLabel]).ToString(), pred.Score.Max())); 80 | } 81 | 82 | private static void CreateSpectrogram(string fileName) 83 | { 84 | var spectrogramName = fileName.Substring(0, fileName.Length-4) + "-spectro.jpg"; 85 | if (File.Exists(spectrogramName)) return; 86 | 87 | var spec = new Spectrogram.Spectrogram(sampleRate: 8000, fftSize: 2048, step: 700); 88 | float[] values = Spectrogram.Tools.ReadWav(fileName); 89 | spec.AddExtend(values); 90 | 91 | var bitmap = spec.GetBitmap(intensity: 2, freqHigh: 2500); 92 | spec.SaveBitmap(bitmap, spectrogramName); 93 | } 94 | 95 | public static IEnumerable LoadImagesFromDirectory(string folder, bool useFolderNameasLabel = true) 96 | { 97 | var files = Directory.GetFiles(folder, "*spectro.jpg", 98 | searchOption: SearchOption.AllDirectories); 99 | 100 | foreach (var file in files) 101 | { 102 | if ((Path.GetExtension(file) != ".jpg") && (Path.GetExtension(file) != ".png")) 103 | continue; 104 | 105 | var fileName = Path.GetFileName(file); 106 | var label = fileName.Substring(0, fileName.LastIndexOf("_")); 107 | 108 | yield return new SpectrogramData() 109 | { 110 | ImagePath = file, 111 | Label = label 112 | }; 113 | } 114 | } 115 | 116 | private static void DeleteCurrentSpectrograms() 117 | { 118 | string[] allSpectrograms = Directory.GetFiles(DataPath, "*.jpg*", SearchOption.AllDirectories); 119 | 120 | foreach(var spectroGram in allSpectrograms) 121 | { 122 | File.Delete(spectroGram); 123 | } 124 | } 125 | 126 | public static void ConsoleWriteImagePrediction(string ImagePath, string Label, string PredictedLabel, float Probability) 127 | { 128 | var defaultForeground = Console.ForegroundColor; 129 | var labelColor = ConsoleColor.Magenta; 130 | var probColor = ConsoleColor.Blue; 131 | 132 | Console.Write("Image File: "); 133 | Console.ForegroundColor = labelColor; 134 | Console.Write($"{Path.GetFileName(ImagePath)}"); 135 | Console.ForegroundColor = defaultForeground; 136 | Console.Write(" original labeled as "); 137 | Console.ForegroundColor = labelColor; 138 | Console.Write(Label); 139 | Console.ForegroundColor = defaultForeground; 140 | Console.Write(" predicted as "); 141 | Console.ForegroundColor = labelColor; 142 | Console.Write(PredictedLabel); 143 | Console.ForegroundColor = defaultForeground; 144 | Console.Write(" with score "); 145 | Console.ForegroundColor = probColor; 146 | Console.Write(Probability); 147 | Console.ForegroundColor = defaultForeground; 148 | Console.WriteLine(""); 149 | } 150 | } 151 | } 152 | --------------------------------------------------------------------------------