├── src
├── .vscode
│ ├── settings.json
│ ├── launch.json
│ └── tasks.json
├── SpectrogramData.cs
├── SpectrogramPredictionEx.cs
├── SpectrogramPrediction.cs
├── mlnet-sound-classifier.csproj
└── Program.cs
├── .gitattributes
├── images
├── acoustic_guitar_23-spectro.jpg
└── acoustic_guitar_26-spectro.jpg
├── .gitignore
└── README.md
/src/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "git.ignoreLimitWarning": true
3 | }
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/images/acoustic_guitar_23-spectro.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aslotte/mlnet-sound-classifier/HEAD/images/acoustic_guitar_23-spectro.jpg
--------------------------------------------------------------------------------
/images/acoustic_guitar_26-spectro.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aslotte/mlnet-sound-classifier/HEAD/images/acoustic_guitar_26-spectro.jpg
--------------------------------------------------------------------------------
/src/SpectrogramData.cs:
--------------------------------------------------------------------------------
1 | namespace SoundClassifier
2 | {
3 | public class SpectrogramData
4 | {
5 | public string ImagePath { get; set; }
6 |
7 | public string Label { get; set; }
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/src/SpectrogramPredictionEx.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace SoundClassifier
4 | {
5 | public class SpectrogramPredictionEx
6 | {
7 | public string ImagePath;
8 | public string Label;
9 | public UInt32 PredictedLabel;
10 | public float[] Score;
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/src/SpectrogramPrediction.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using Microsoft.ML.Data;
3 |
4 | namespace SoundClassifier
5 | {
6 | public class ImagePrediction
7 | {
8 | [ColumnName("Score")]
9 | public float[] Score;
10 |
11 | [ColumnName("PredictedLabel")]
12 | public UInt32 PredictedLabel;
13 | }
14 | }
--------------------------------------------------------------------------------
/src/mlnet-sound-classifier.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | netcoreapp2.2
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.AssemblyInfo.cs
3 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.AssemblyInfoInputs.cache
4 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.csproj.CoreCompileInputs.cache
5 | src/bin/Debug/netcoreapp2.2/mlnet-sound-classifier.deps.json
6 | src/bin/Debug/netcoreapp2.2/mlnet-sound-classifier.dll
7 | src/bin/Debug/netcoreapp2.2/mlnet-sound-classifier.pdb
8 | src/bin/Debug/netcoreapp2.2/mlnet-sound-classifier.runtimeconfig.dev.json
9 | src/bin/Debug/netcoreapp2.2/mlnet-sound-classifier.runtimeconfig.json
10 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.assets.cache
11 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.csproj.FileListAbsolute.txt
12 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.csprojAssemblyReference.cache
13 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.dll
14 | src/obj/Debug/netcoreapp2.2/mlnet-sound-classifier.pdb
15 | src/obj/mlnet-sound-classifier.csproj.nuget.cache
16 | src/obj/mlnet-sound-classifier.csproj.nuget.dgspec.json
17 | src/obj/mlnet-sound-classifier.csproj.nuget.g.props
18 | src/obj/mlnet-sound-classifier.csproj.nuget.g.targets
19 | src/obj/project.assets.json
20 |
--------------------------------------------------------------------------------
/src/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to find out which attributes exist for C# debugging
3 | // Use hover for the description of the existing attributes
4 | // For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "name": ".NET Core Launch (console)",
9 | "type": "coreclr",
10 | "request": "launch",
11 | "preLaunchTask": "build",
12 | // If you have changed target frameworks, make sure to update the program path.
13 | "program": "${workspaceFolder}/bin/Debug/netcoreapp2.2/AlexaForCats.dll",
14 | "args": [],
15 | "cwd": "${workspaceFolder}",
16 | // For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console
17 | "console": "internalConsole",
18 | "stopAtEntry": false
19 | },
20 | {
21 | "name": ".NET Core Attach",
22 | "type": "coreclr",
23 | "request": "attach",
24 | "processId": "${command:pickProcess}"
25 | }
26 | ]
27 | }
--------------------------------------------------------------------------------
/src/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "2.0.0",
3 | "tasks": [
4 | {
5 | "label": "build",
6 | "command": "dotnet",
7 | "type": "process",
8 | "args": [
9 | "build",
10 | "${workspaceFolder}/AlexaForCats.csproj",
11 | "/property:GenerateFullPaths=true",
12 | "/consoleloggerparameters:NoSummary"
13 | ],
14 | "problemMatcher": "$msCompile"
15 | },
16 | {
17 | "label": "publish",
18 | "command": "dotnet",
19 | "type": "process",
20 | "args": [
21 | "publish",
22 | "${workspaceFolder}/AlexaForCats.csproj",
23 | "/property:GenerateFullPaths=true",
24 | "/consoleloggerparameters:NoSummary"
25 | ],
26 | "problemMatcher": "$msCompile"
27 | },
28 | {
29 | "label": "watch",
30 | "command": "dotnet",
31 | "type": "process",
32 | "args": [
33 | "watch",
34 | "run",
35 | "${workspaceFolder}/AlexaForCats.csproj",
36 | "/property:GenerateFullPaths=true",
37 | "/consoleloggerparameters:NoSummary"
38 | ],
39 | "problemMatcher": "$msCompile"
40 | }
41 | ]
42 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ### Sound Classification using Deep Convolutional Neural Networks in ML.NET
2 |
3 | #### Background
4 | As of September 2019, it's possible to use transfer learning to natively re-train an InceptionV3 or Resnet CNN in ML.NET. This enables .NET developers to be able to create their own custom image classification models, for their specific use cases.
5 |
6 | However, Convolutional Neural Networks (CNN's) can be used in so many other applications than just image classification. In this repo we'll demonstrate how to build a rudimentary sound classifier using ML.NET
7 |
8 | #### Disclaimer
9 | The data used for the training was retrieved from an online research paper.
10 | I've since starting on this repo lost track of the resarch paper, but is determined to provide credit to the original creators of the data used during the training, once I'm able to locate the research paper again.
11 |
12 | #### Approach
13 | To be able to classify sounds using a CNN, we first need to create an image of the audio.
14 | To do this, we can create something called an audio spectrogram, which is visual presentation of the energy levels of a sound clip.
15 |
16 | We can do this by using an open-source library called Spectrogram.NET
17 |
18 | ```
19 | private static void CreateSpectrogram(string fileName)
20 | {
21 | var spectrogramName = fileName.Substring(0, fileName.Length-4) + "-spectro.jpg";
22 | if (File.Exists(spectrogramName)) return;
23 |
24 | var spec = new Spectrogram.Spectrogram(sampleRate: 8000, fftSize: 2048, step: 700);
25 | float[] values = Spectrogram.Tools.ReadWav(fileName);
26 | spec.AddExtend(values);
27 |
28 | var bitmap = spec.GetBitmap(intensity: 2, freqHigh: 2500);
29 | spec.SaveBitmap(bitmap, spectrogramName);
30 | }
31 | ```
32 |
33 | Below is an example in which we've transformed an audio file of a guitar playing to a spectrogram.
34 |
35 | 
36 |
37 | Once we've generated the images needed to train our model, we can load them from disk and create our training pipelinne as such:
38 | ```
39 | var pipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelAsKey",
40 | inputColumnName: "Label",
41 | keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue)
42 | .Append(mlContext.Model.ImageClassification("ImagePath", "LabelAsKey",
43 | arch: ImageClassificationEstimator.Architecture.InceptionV3,
44 | epoch: 200,
45 | metricsCallback: (metrics) => Console.WriteLine(metrics),
46 | validationSet: transformedValidationDataView));
47 | ```
48 |
49 | #### Result
50 | The model currently only yields a 75% accuracy on the validation dataset, which under the circumstances is pretty good. The accuracy can most likely be improved by increasing the size of the dataset used for training, or augmenting the spectrograms further by e.g. transforming then to mel-spectrograms, which will provide even more detail.
51 |
--------------------------------------------------------------------------------
/src/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.IO;
3 | using System.Collections.Generic;
4 | using Microsoft.ML;
5 | using Microsoft.ML.Transforms;
6 | using Microsoft.ML.Data;
7 | using System.Linq;
8 |
9 | namespace SoundClassifier
10 | {
11 | class Program
12 | {
13 | private static string DataPath = @"C:\Users\Admin\Desktop\sound-classifier\ae_dataset\AudioEventDataset";
14 |
15 | static void Main(string[] args)
16 | {
17 | var trainDataPath = string.Concat(DataPath, @"\train");
18 | var testDataPath = string.Concat(DataPath, @"\test");
19 |
20 | string[] allAudioFiles = Directory.GetFiles(DataPath, "*.wav*", SearchOption.AllDirectories);
21 |
22 | //Data pre-processing
23 | foreach(var fileName in allAudioFiles)
24 | {
25 | CreateSpectrogram(fileName);
26 | }
27 |
28 | MLContext mlContext = new MLContext(seed: 1);
29 |
30 | //Read and shuffle
31 | IEnumerable images = LoadImagesFromDirectory(folder: trainDataPath, useFolderNameasLabel: false).ToList();
32 | IEnumerable testImages = LoadImagesFromDirectory(folder: testDataPath, useFolderNameasLabel: false).ToList();
33 |
34 | IDataView trainDataView = mlContext.Data.LoadFromEnumerable(images);
35 | trainDataView = mlContext.Data.ShuffleRows(trainDataView);
36 |
37 | IDataView testDataView = mlContext.Data.LoadFromEnumerable(testImages);
38 |
39 | IDataView transformedValidationDataView = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelAsKey",
40 | inputColumnName: "Label",
41 | keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue)
42 | .Fit(testDataView)
43 | .Transform(testDataView);
44 |
45 | //Define training pipeline
46 | var pipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelAsKey",
47 | inputColumnName: "Label",
48 | keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue)
49 | .Append(mlContext.Model.ImageClassification("ImagePath", "LabelAsKey",
50 | arch: ImageClassificationEstimator.Architecture.InceptionV3,
51 | epoch: 200,
52 | metricsCallback: (metrics) => Console.WriteLine(metrics),
53 | validationSet: transformedValidationDataView));
54 |
55 | //Train model
56 | ITransformer trainedModel = pipeline.Fit(trainDataView);
57 |
58 | //Evaluate
59 | EvaluateModel(mlContext, testDataView, trainedModel);
60 |
61 | // Save
62 | mlContext.Model.Save(trainedModel, trainDataView.Schema, "sound-classifier.zip");
63 | }
64 |
65 | private static void EvaluateModel(MLContext mlContext, IDataView testDataset, ITransformer trainedModel)
66 | {
67 | Console.WriteLine("Making predictions in bulk for evaluating model's quality...");
68 |
69 | IDataView predictionsDataView = trainedModel.Transform(testDataset);
70 |
71 | var metrics = mlContext.MulticlassClassification.Evaluate(predictionsDataView, labelColumnName:"LabelAsKey", predictedLabelColumnName: "PredictedLabel");
72 |
73 | Console.WriteLine("*** Showing all the predictions ***");
74 | VBuffer> keys = default;
75 | predictionsDataView.Schema["LabelAsKey"].GetKeyValues(ref keys);
76 | var originalLabels = keys.DenseValues().ToArray();
77 |
78 | List predictions = mlContext.Data.CreateEnumerable(predictionsDataView, false, true).ToList();
79 | predictions.ForEach(pred => ConsoleWriteImagePrediction(pred.ImagePath, pred.Label, (originalLabels[pred.PredictedLabel]).ToString(), pred.Score.Max()));
80 | }
81 |
82 | private static void CreateSpectrogram(string fileName)
83 | {
84 | var spectrogramName = fileName.Substring(0, fileName.Length-4) + "-spectro.jpg";
85 | if (File.Exists(spectrogramName)) return;
86 |
87 | var spec = new Spectrogram.Spectrogram(sampleRate: 8000, fftSize: 2048, step: 700);
88 | float[] values = Spectrogram.Tools.ReadWav(fileName);
89 | spec.AddExtend(values);
90 |
91 | var bitmap = spec.GetBitmap(intensity: 2, freqHigh: 2500);
92 | spec.SaveBitmap(bitmap, spectrogramName);
93 | }
94 |
95 | public static IEnumerable LoadImagesFromDirectory(string folder, bool useFolderNameasLabel = true)
96 | {
97 | var files = Directory.GetFiles(folder, "*spectro.jpg",
98 | searchOption: SearchOption.AllDirectories);
99 |
100 | foreach (var file in files)
101 | {
102 | if ((Path.GetExtension(file) != ".jpg") && (Path.GetExtension(file) != ".png"))
103 | continue;
104 |
105 | var fileName = Path.GetFileName(file);
106 | var label = fileName.Substring(0, fileName.LastIndexOf("_"));
107 |
108 | yield return new SpectrogramData()
109 | {
110 | ImagePath = file,
111 | Label = label
112 | };
113 | }
114 | }
115 |
116 | private static void DeleteCurrentSpectrograms()
117 | {
118 | string[] allSpectrograms = Directory.GetFiles(DataPath, "*.jpg*", SearchOption.AllDirectories);
119 |
120 | foreach(var spectroGram in allSpectrograms)
121 | {
122 | File.Delete(spectroGram);
123 | }
124 | }
125 |
126 | public static void ConsoleWriteImagePrediction(string ImagePath, string Label, string PredictedLabel, float Probability)
127 | {
128 | var defaultForeground = Console.ForegroundColor;
129 | var labelColor = ConsoleColor.Magenta;
130 | var probColor = ConsoleColor.Blue;
131 |
132 | Console.Write("Image File: ");
133 | Console.ForegroundColor = labelColor;
134 | Console.Write($"{Path.GetFileName(ImagePath)}");
135 | Console.ForegroundColor = defaultForeground;
136 | Console.Write(" original labeled as ");
137 | Console.ForegroundColor = labelColor;
138 | Console.Write(Label);
139 | Console.ForegroundColor = defaultForeground;
140 | Console.Write(" predicted as ");
141 | Console.ForegroundColor = labelColor;
142 | Console.Write(PredictedLabel);
143 | Console.ForegroundColor = defaultForeground;
144 | Console.Write(" with score ");
145 | Console.ForegroundColor = probColor;
146 | Console.Write(Probability);
147 | Console.ForegroundColor = defaultForeground;
148 | Console.WriteLine("");
149 | }
150 | }
151 | }
152 |
--------------------------------------------------------------------------------