├── VadSharp ├── .gitignore ├── VadSpeechSegment.cs ├── VadSharp.csproj ├── VadSharp.sln ├── VadOnnxModel.cs └── VadDetector.cs ├── VadSharp.Example ├── VadSharp.Example.csproj └── Program.cs ├── LICENSE ├── README.md └── .gitignore /VadSharp/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /obj 3 | /packages -------------------------------------------------------------------------------- /VadSharp.Example/VadSharp.Example.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net9.0 6 | enable 7 | enable 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /VadSharp/VadSpeechSegment.cs: -------------------------------------------------------------------------------- 1 | namespace VadSharp; 2 | 3 | public class VadSpeechSegment 4 | { 5 | public int? StartOffset { get; set; } 6 | public int? EndOffset { get; set; } 7 | public float? StartSecond { get; set; } 8 | public float? EndSecond { get; set; } 9 | 10 | public VadSpeechSegment() 11 | { 12 | } 13 | 14 | public VadSpeechSegment(int startOffset, int? endOffset, float? startSecond, float? endSecond) 15 | { 16 | StartOffset = startOffset; 17 | EndOffset = endOffset; 18 | StartSecond = startSecond; 19 | EndSecond = endSecond; 20 | } 21 | } -------------------------------------------------------------------------------- /VadSharp/VadSharp.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net6.0 5 | enable 6 | enable 7 | AnyCPU;x64 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Gabriele Bologna 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /VadSharp/VadSharp.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.12.35707.178 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VadSharp", "VadSharp.csproj", "{D9C9F6E2-E55A-4A5D-9F72-60EF8D921DC4}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VadSharp.Example", "..\VadSharp.Example\VadSharp.Example.csproj", "{9720E5DF-3AEC-4506-855D-9D20D8EED77F}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Debug|x64 = Debug|x64 14 | Release|Any CPU = Release|Any CPU 15 | Release|x64 = Release|x64 16 | EndGlobalSection 17 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 18 | {D9C9F6E2-E55A-4A5D-9F72-60EF8D921DC4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 19 | {D9C9F6E2-E55A-4A5D-9F72-60EF8D921DC4}.Debug|Any CPU.Build.0 = Debug|Any CPU 20 | {D9C9F6E2-E55A-4A5D-9F72-60EF8D921DC4}.Debug|x64.ActiveCfg = Debug|x64 21 | {D9C9F6E2-E55A-4A5D-9F72-60EF8D921DC4}.Debug|x64.Build.0 = Debug|x64 22 | {D9C9F6E2-E55A-4A5D-9F72-60EF8D921DC4}.Release|Any CPU.ActiveCfg = Release|Any CPU 23 | {D9C9F6E2-E55A-4A5D-9F72-60EF8D921DC4}.Release|Any CPU.Build.0 = Release|Any CPU 24 | {D9C9F6E2-E55A-4A5D-9F72-60EF8D921DC4}.Release|x64.ActiveCfg = Release|x64 25 | {D9C9F6E2-E55A-4A5D-9F72-60EF8D921DC4}.Release|x64.Build.0 = Release|x64 26 | {9720E5DF-3AEC-4506-855D-9D20D8EED77F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 27 | {9720E5DF-3AEC-4506-855D-9D20D8EED77F}.Debug|Any CPU.Build.0 = Debug|Any CPU 28 | {9720E5DF-3AEC-4506-855D-9D20D8EED77F}.Debug|x64.ActiveCfg = Debug|Any CPU 29 | {9720E5DF-3AEC-4506-855D-9D20D8EED77F}.Debug|x64.Build.0 = Debug|Any CPU 30 | {9720E5DF-3AEC-4506-855D-9D20D8EED77F}.Release|Any CPU.ActiveCfg = Release|Any CPU 31 | {9720E5DF-3AEC-4506-855D-9D20D8EED77F}.Release|Any CPU.Build.0 = Release|Any CPU 32 | {9720E5DF-3AEC-4506-855D-9D20D8EED77F}.Release|x64.ActiveCfg = Release|Any CPU 33 | {9720E5DF-3AEC-4506-855D-9D20D8EED77F}.Release|x64.Build.0 = Release|Any CPU 34 | EndGlobalSection 35 | GlobalSection(SolutionProperties) = preSolution 36 | HideSolutionNode = FALSE 37 | EndGlobalSection 38 | EndGlobal 39 | -------------------------------------------------------------------------------- /VadSharp.Example/Program.cs: -------------------------------------------------------------------------------- 1 | using System.Security.Cryptography; 2 | using System.Text; 3 | using NAudio.Wave; 4 | using VadSharp; 5 | 6 | public class Program 7 | { 8 | private const int SAMPLE_RATE = 16000; 9 | private const float THRESHOLD = 0.5f; 10 | private const int MIN_SPEECH_DURATION_MS = 250; 11 | private const float MAX_SPEECH_DURATION_SECONDS = float.PositiveInfinity; 12 | private const int MIN_SILENCE_DURATION_MS = 100; 13 | private const int SPEECH_PAD_MS = 30; 14 | private const bool USE_DIRECT_ML = true; 15 | 16 | public static void Main() 17 | { 18 | Console.Title = "VadSharpExample | Made by https://github.com/GabryB03/"; 19 | 20 | string modelPath = Path.Combine(AppContext.BaseDirectory, "resources", "silero_vad.onnx"); 21 | string audioPath = Path.Combine(AppContext.BaseDirectory, "resources", "test.wav"); 22 | 23 | if (!File.Exists(modelPath)) 24 | { 25 | Console.WriteLine($"Model file not found: {modelPath}"); 26 | return; 27 | } 28 | 29 | if (!File.Exists(audioPath)) 30 | { 31 | Console.WriteLine($"Audio file not found: {audioPath}"); 32 | return; 33 | } 34 | 35 | VadDetector vadDetector = new VadDetector(modelPath, THRESHOLD, SAMPLE_RATE, MIN_SPEECH_DURATION_MS, MAX_SPEECH_DURATION_SECONDS, MIN_SILENCE_DURATION_MS, SPEECH_PAD_MS, USE_DIRECT_ML); 36 | 37 | { 38 | List speechTimeList = vadDetector.GetSpeechSegmentList(LoadAudioFile(audioPath), 8000); 39 | StringBuilder sb = new StringBuilder(); 40 | 41 | foreach (VadSpeechSegment speechSegment in speechTimeList) 42 | { 43 | sb.AppendLine($"[-] Start second: {speechSegment.StartSecond.ToString().Replace(",", ".")}s, end second: {speechSegment.EndSecond.ToString().Replace(",", ".")}s"); 44 | } 45 | 46 | Console.WriteLine(sb.ToString()); 47 | } 48 | 49 | { 50 | List speechTimeList = vadDetector.GetSpeechSegmentList(audioPath); 51 | StringBuilder sb = new StringBuilder(); 52 | 53 | foreach (VadSpeechSegment speechSegment in speechTimeList) 54 | { 55 | sb.AppendLine($"[-] Start second: {speechSegment.StartSecond.ToString().Replace(",", ".")}s, end second: {speechSegment.EndSecond.ToString().Replace(",", ".")}s"); 56 | } 57 | 58 | Console.WriteLine(sb.ToString()); 59 | } 60 | 61 | Console.ReadLine(); 62 | } 63 | 64 | public static float[] LoadAudioFile(string filePath) 65 | { 66 | using var reader = new AudioFileReader(filePath); 67 | int sampleCount = (int)(reader.Length / (reader.WaveFormat.BitsPerSample / 8)); 68 | List samples = new List(); 69 | float[] buffer = new float[reader.WaveFormat.SampleRate]; 70 | int readSamples; 71 | 72 | while ((readSamples = reader.Read(buffer, 0, buffer.Length)) > 0) 73 | { 74 | for (int i = 0; i < readSamples; i++) 75 | { 76 | samples.Add(buffer[i]); 77 | } 78 | } 79 | 80 | return samples.ToArray(); 81 | } 82 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VadSharp - Voice Activity Detection in C#.NET 2 | 3 | ![VadSharp](https://img.shields.io/badge/.NET-9.0-blue.svg) ![ML.NET](https://img.shields.io/badge/ML.NET-Supported-brightgreen.svg) ![ONNXRuntime](https://img.shields.io/badge/ONNXRuntime-Supported-blue.svg) ![DirectML](https://img.shields.io/badge/DirectML-Supported-orange.svg) ![PRs Welcome](https://img.shields.io/badge/PRs-Welcome-brightgreen.svg) 4 | 5 | ## 🚀 The Best VAD Implementation in C# 6 | 7 | VadSharp is the **first and most efficient** implementation of **[Silero VAD](https://github.com/snakers4/silero-vad/) in C#**, supporting the latest **V5 model** with all its advanced features. It is faster than the original Python version and runs on **any GPU (NVIDIA, AMD, Intel) and CPU** with **ONNXRuntime and DirectML**. 8 | 9 | This project represents **my first significant contribution to the world of artificial intelligence in terms of development**, and I sincerely hope that users will appreciate this effort. Your support and feedback are highly valued! 🙌 10 | 11 | --- 12 | 13 | ## 🛠 Features & Benefits 14 | 15 | ✅ **Stellar Accuracy** - Excellent results in speech detection tasks. 16 | 17 | ⚡ **Fast** - Processes 30ms+ audio chunks in under **1ms** on a single CPU thread, even faster with batching or GPU acceleration. 18 | 19 | 📦 **Lightweight** - Model size is only **~2MB**. 20 | 21 | 🌎 **General** - Trained on **6,000+ languages**, handling various background noises and recording conditions. 22 | 23 | 🎚 **Flexible Sampling Rate** - Supports **8000 Hz and 16000 Hz**. 24 | 25 | 🌍 **Highly Portable** - Runs anywhere **ONNX and ML.NET** are available. 26 | 27 | 🔓 **No Strings Attached** - **MIT License**, no telemetry, no registration, no vendor lock-in. 28 | 29 | --- 30 | 31 | ## 📌 Installation 32 | 33 | ```sh 34 | # Install ONNXRuntime and ML.NET 35 | Install-Package Microsoft.ML.OnnxRuntime 36 | Install-Package Microsoft.ML 37 | Install-Package NAudio 38 | ``` 39 | 40 | --- 41 | 42 | ## 🧑‍💻 Example Usage 43 | 44 | ```csharp 45 | using System.Text; 46 | using VadSharp; 47 | 48 | public class Program 49 | { 50 | private const int SAMPLE_RATE = 16000; 51 | private const float THRESHOLD = 0.5f; 52 | private const int MIN_SPEECH_DURATION_MS = 250; 53 | private const float MAX_SPEECH_DURATION_SECONDS = float.PositiveInfinity; 54 | private const int MIN_SILENCE_DURATION_MS = 100; 55 | private const int SPEECH_PAD_MS = 30; 56 | 57 | public static void Main() 58 | { 59 | Console.Title = "VadSharpExample | Made by https://github.com/GabryB03/"; 60 | 61 | string modelPath = Path.Combine(AppContext.BaseDirectory, "resources", "silero_vad.onnx"); 62 | string audioPath = Path.Combine(AppContext.BaseDirectory, "resources", "test.wav"); 63 | 64 | if (!File.Exists(modelPath)) 65 | { 66 | Console.WriteLine($"Model file not found: {modelPath}"); 67 | return; 68 | } 69 | 70 | if (!File.Exists(audioPath)) 71 | { 72 | Console.WriteLine($"Audio file not found: {audioPath}"); 73 | return; 74 | } 75 | 76 | VadDetector vadDetector = new VadDetector(modelPath, THRESHOLD, SAMPLE_RATE, MIN_SPEECH_DURATION_MS, MAX_SPEECH_DURATION_SECONDS, MIN_SILENCE_DURATION_MS, SPEECH_PAD_MS); 77 | List speechTimeList = vadDetector.GetSpeechSegmentList(audioPath); 78 | StringBuilder sb = new StringBuilder(); 79 | 80 | foreach (VadSpeechSegment speechSegment in speechTimeList) 81 | { 82 | sb.AppendLine($"[-] Start second: {speechSegment.StartSecond.ToString().Replace(",", ".")}s, end second: {speechSegment.EndSecond.ToString().Replace(",", ".")}s"); 83 | } 84 | 85 | Console.WriteLine(sb.ToString()); 86 | Console.ReadLine(); 87 | } 88 | } 89 | ``` 90 | 91 | --- 92 | 93 | ## 🌟 Contributing 94 | 95 | Contributions are welcome there! 🚀 Follow these steps to create a **Pull Request (PR):** 96 | 97 | 1. **Fork the repository** 98 | 2. **Clone your fork**: 99 | ```sh 100 | git clone https://github.com/your-username/VadSharp.git 101 | ``` 102 | 3. **Create a new branch**: 103 | ```sh 104 | git checkout -b feature-branch 105 | ``` 106 | 4. **Make your changes & commit**: 107 | ```sh 108 | git add . 109 | git commit -m "Your awesome feature!" 110 | ``` 111 | 5. **Push the branch & create a PR**: 112 | ```sh 113 | git push origin feature-branch 114 | ``` 115 | 6. **Open a PR on GitHub** 116 | 117 | --- 118 | 119 | ## 🐛 Issues & Bug Reports 120 | 121 | If you find a bug or have a feature request, please **open an issue**: 122 | 123 | 1. Go to the [Issues Tab](https://github.com/GabryB03/VadSharp/issues). 124 | 2. Click on **"New Issue"**. 125 | 3. Provide a **clear and concise** description of the problem. 126 | 4. If possible, include **screenshots and logs**. 127 | 128 | I will review and respond ASAP! 🚀 129 | 130 | --- 131 | 132 | ## ✨ Credits 133 | 134 | All of my credits go to the original inventor of the [Silero VAD](https://github.com/snakers4/silero-vad/) project, 135 | which has worked hard to the architecture of the algorithm and trained the models! 136 | 137 | ## 📜 License 138 | 139 | VadSharp is licensed under the **MIT License**. Feel free to use, modify, and distribute it as you like! 140 | 141 | 📌 **Made with ❤️ by [GabryB03](https://github.com/GabryB03/)** 142 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | 35 | # Visual Studio 2015/2017 cache/options directory 36 | .vs/ 37 | # Uncomment if you have tasks that create the project's static files in wwwroot 38 | #wwwroot/ 39 | 40 | # Visual Studio 2017 auto generated files 41 | Generated\ Files/ 42 | 43 | # MSTest test Results 44 | [Tt]est[Rr]esult*/ 45 | [Bb]uild[Ll]og.* 46 | 47 | # NUnit 48 | *.VisualState.xml 49 | TestResult.xml 50 | nunit-*.xml 51 | 52 | # Build Results of an ATL Project 53 | [Dd]ebugPS/ 54 | [Rr]eleasePS/ 55 | dlldata.c 56 | 57 | # Benchmark Results 58 | BenchmarkDotNet.Artifacts/ 59 | 60 | # .NET Core 61 | project.lock.json 62 | project.fragment.lock.json 63 | artifacts/ 64 | 65 | # ASP.NET Scaffolding 66 | ScaffoldingReadMe.txt 67 | 68 | # StyleCop 69 | StyleCopReport.xml 70 | 71 | # Files built by Visual Studio 72 | *_i.c 73 | *_p.c 74 | *_h.h 75 | *.ilk 76 | *.meta 77 | *.obj 78 | *.iobj 79 | *.pch 80 | *.pdb 81 | *.ipdb 82 | *.pgc 83 | *.pgd 84 | *.rsp 85 | # but not Directory.Build.rsp, as it configures directory-level build defaults 86 | !Directory.Build.rsp 87 | *.sbr 88 | *.tlb 89 | *.tli 90 | *.tlh 91 | *.tmp 92 | *.tmp_proj 93 | *_wpftmp.csproj 94 | *.log 95 | *.tlog 96 | *.vspscc 97 | *.vssscc 98 | .builds 99 | *.pidb 100 | *.svclog 101 | *.scc 102 | 103 | # Chutzpah Test files 104 | _Chutzpah* 105 | 106 | # Visual C++ cache files 107 | ipch/ 108 | *.aps 109 | *.ncb 110 | *.opendb 111 | *.opensdf 112 | *.sdf 113 | *.cachefile 114 | *.VC.db 115 | *.VC.VC.opendb 116 | 117 | # Visual Studio profiler 118 | *.psess 119 | *.vsp 120 | *.vspx 121 | *.sap 122 | 123 | # Visual Studio Trace Files 124 | *.e2e 125 | 126 | # TFS 2012 Local Workspace 127 | $tf/ 128 | 129 | # Guidance Automation Toolkit 130 | *.gpState 131 | 132 | # ReSharper is a .NET coding add-in 133 | _ReSharper*/ 134 | *.[Rr]e[Ss]harper 135 | *.DotSettings.user 136 | 137 | # TeamCity is a build add-in 138 | _TeamCity* 139 | 140 | # DotCover is a Code Coverage Tool 141 | *.dotCover 142 | 143 | # AxoCover is a Code Coverage Tool 144 | .axoCover/* 145 | !.axoCover/settings.json 146 | 147 | # Coverlet is a free, cross platform Code Coverage Tool 148 | coverage*.json 149 | coverage*.xml 150 | coverage*.info 151 | 152 | # Visual Studio code coverage results 153 | *.coverage 154 | *.coveragexml 155 | 156 | # NCrunch 157 | _NCrunch_* 158 | .*crunch*.local.xml 159 | nCrunchTemp_* 160 | 161 | # MightyMoose 162 | *.mm.* 163 | AutoTest.Net/ 164 | 165 | # Web workbench (sass) 166 | .sass-cache/ 167 | 168 | # Installshield output folder 169 | [Ee]xpress/ 170 | 171 | # DocProject is a documentation generator add-in 172 | DocProject/buildhelp/ 173 | DocProject/Help/*.HxT 174 | DocProject/Help/*.HxC 175 | DocProject/Help/*.hhc 176 | DocProject/Help/*.hhk 177 | DocProject/Help/*.hhp 178 | DocProject/Help/Html2 179 | DocProject/Help/html 180 | 181 | # Click-Once directory 182 | publish/ 183 | 184 | # Publish Web Output 185 | *.[Pp]ublish.xml 186 | *.azurePubxml 187 | # Note: Comment the next line if you want to checkin your web deploy settings, 188 | # but database connection strings (with potential passwords) will be unencrypted 189 | *.pubxml 190 | *.publishproj 191 | 192 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 193 | # checkin your Azure Web App publish settings, but sensitive information contained 194 | # in these scripts will be unencrypted 195 | PublishScripts/ 196 | 197 | # NuGet Packages 198 | *.nupkg 199 | # NuGet Symbol Packages 200 | *.snupkg 201 | # The packages folder can be ignored because of Package Restore 202 | **/[Pp]ackages/* 203 | # except build/, which is used as an MSBuild target. 204 | !**/[Pp]ackages/build/ 205 | # Uncomment if necessary however generally it will be regenerated when needed 206 | #!**/[Pp]ackages/repositories.config 207 | # NuGet v3's project.json files produces more ignorable files 208 | *.nuget.props 209 | *.nuget.targets 210 | 211 | # Microsoft Azure Build Output 212 | csx/ 213 | *.build.csdef 214 | 215 | # Microsoft Azure Emulator 216 | ecf/ 217 | rcf/ 218 | 219 | # Windows Store app package directories and files 220 | AppPackages/ 221 | BundleArtifacts/ 222 | Package.StoreAssociation.xml 223 | _pkginfo.txt 224 | *.appx 225 | *.appxbundle 226 | *.appxupload 227 | 228 | # Visual Studio cache files 229 | # files ending in .cache can be ignored 230 | *.[Cc]ache 231 | # but keep track of directories ending in .cache 232 | !?*.[Cc]ache/ 233 | 234 | # Others 235 | ClientBin/ 236 | ~$* 237 | *~ 238 | *.dbmdl 239 | *.dbproj.schemaview 240 | *.jfm 241 | *.pfx 242 | *.publishsettings 243 | orleans.codegen.cs 244 | 245 | # Including strong name files can present a security risk 246 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 247 | #*.snk 248 | 249 | # Since there are multiple workflows, uncomment next line to ignore bower_components 250 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 251 | #bower_components/ 252 | 253 | # RIA/Silverlight projects 254 | Generated_Code/ 255 | 256 | # Backup & report files from converting an old project file 257 | # to a newer Visual Studio version. Backup files are not needed, 258 | # because we have git ;-) 259 | _UpgradeReport_Files/ 260 | Backup*/ 261 | UpgradeLog*.XML 262 | UpgradeLog*.htm 263 | ServiceFabricBackup/ 264 | *.rptproj.bak 265 | 266 | # SQL Server files 267 | *.mdf 268 | *.ldf 269 | *.ndf 270 | 271 | # Business Intelligence projects 272 | *.rdl.data 273 | *.bim.layout 274 | *.bim_*.settings 275 | *.rptproj.rsuser 276 | *- [Bb]ackup.rdl 277 | *- [Bb]ackup ([0-9]).rdl 278 | *- [Bb]ackup ([0-9][0-9]).rdl 279 | 280 | # Microsoft Fakes 281 | FakesAssemblies/ 282 | 283 | # GhostDoc plugin setting file 284 | *.GhostDoc.xml 285 | 286 | # Node.js Tools for Visual Studio 287 | .ntvs_analysis.dat 288 | node_modules/ 289 | 290 | # Visual Studio 6 build log 291 | *.plg 292 | 293 | # Visual Studio 6 workspace options file 294 | *.opt 295 | 296 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 297 | *.vbw 298 | 299 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 300 | *.vbp 301 | 302 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 303 | *.dsw 304 | *.dsp 305 | 306 | # Visual Studio 6 technical files 307 | *.ncb 308 | *.aps 309 | 310 | # Visual Studio LightSwitch build output 311 | **/*.HTMLClient/GeneratedArtifacts 312 | **/*.DesktopClient/GeneratedArtifacts 313 | **/*.DesktopClient/ModelManifest.xml 314 | **/*.Server/GeneratedArtifacts 315 | **/*.Server/ModelManifest.xml 316 | _Pvt_Extensions 317 | 318 | # Paket dependency manager 319 | .paket/paket.exe 320 | paket-files/ 321 | 322 | # FAKE - F# Make 323 | .fake/ 324 | 325 | # CodeRush personal settings 326 | .cr/personal 327 | 328 | # Python Tools for Visual Studio (PTVS) 329 | __pycache__/ 330 | *.pyc 331 | 332 | # Cake - Uncomment if you are using it 333 | # tools/** 334 | # !tools/packages.config 335 | 336 | # Tabs Studio 337 | *.tss 338 | 339 | # Telerik's JustMock configuration file 340 | *.jmconfig 341 | 342 | # BizTalk build output 343 | *.btp.cs 344 | *.btm.cs 345 | *.odx.cs 346 | *.xsd.cs 347 | 348 | # OpenCover UI analysis results 349 | OpenCover/ 350 | 351 | # Azure Stream Analytics local run output 352 | ASALocalRun/ 353 | 354 | # MSBuild Binary and Structured Log 355 | *.binlog 356 | 357 | # NVidia Nsight GPU debugger configuration file 358 | *.nvuser 359 | 360 | # MFractors (Xamarin productivity tool) working folder 361 | .mfractor/ 362 | 363 | # Local History for Visual Studio 364 | .localhistory/ 365 | 366 | # Visual Studio History (VSHistory) files 367 | .vshistory/ 368 | 369 | # BeatPulse healthcheck temp database 370 | healthchecksdb 371 | 372 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 373 | MigrationBackup/ 374 | 375 | # Ionide (cross platform F# VS Code tools) working folder 376 | .ionide/ 377 | 378 | # Fody - auto-generated XML schema 379 | FodyWeavers.xsd 380 | 381 | # VS Code files for those working on multiple tools 382 | .vscode/* 383 | !.vscode/settings.json 384 | !.vscode/tasks.json 385 | !.vscode/launch.json 386 | !.vscode/extensions.json 387 | *.code-workspace 388 | 389 | # Local History for Visual Studio Code 390 | .history/ 391 | 392 | # Windows Installer files from build outputs 393 | *.cab 394 | *.msi 395 | *.msix 396 | *.msm 397 | *.msp 398 | 399 | # JetBrains Rider 400 | *.sln.iml 401 | -------------------------------------------------------------------------------- /VadSharp/VadOnnxModel.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.ML.OnnxRuntime; 2 | using Microsoft.ML.OnnxRuntime.Tensors; 3 | 4 | namespace VadSharp 5 | { 6 | public class VadOnnxModel : IDisposable 7 | { 8 | private readonly InferenceSession session; 9 | private float[][][] state; 10 | private float[][] context; 11 | private int lastSr, lastBatchSize; 12 | private static readonly int[] SupportedRates = { 8000, 16000 }; 13 | 14 | public VadOnnxModel(string modelPath, bool useDirectML = true) 15 | { 16 | var sessionOptions = new SessionOptions(); 17 | 18 | if (useDirectML) 19 | { 20 | try 21 | { 22 | sessionOptions.AppendExecutionProvider_DML(); 23 | } 24 | catch 25 | { 26 | 27 | } 28 | } 29 | 30 | sessionOptions.InterOpNumThreads = 1; 31 | sessionOptions.IntraOpNumThreads = 1; 32 | sessionOptions.EnableCpuMemArena = true; 33 | 34 | session = new InferenceSession(modelPath, sessionOptions); 35 | ResetStates(); 36 | } 37 | 38 | public void ResetStates() 39 | { 40 | state = new float[2][][] 41 | { 42 | new float[1][] { new float[128] }, 43 | new float[1][] { new float[128] } 44 | }; 45 | context = Array.Empty(); 46 | lastSr = 0; 47 | lastBatchSize = 0; 48 | } 49 | 50 | public void Dispose() => session?.Dispose(); 51 | 52 | public class ValidationResult 53 | { 54 | public float[][] X { get; } 55 | public int Sr { get; } 56 | public ValidationResult(float[][] x, int sr) 57 | { 58 | X = x; 59 | Sr = sr; 60 | } 61 | } 62 | 63 | private ValidationResult ValidateInput(float[][] x, int sr) 64 | { 65 | if (x.Length == 1) 66 | { 67 | x = new float[][] { x[0] }; 68 | } 69 | 70 | if (x.Length > 2) 71 | { 72 | throw new ArgumentException($"Incorrect audio data dimension: {x[0].Length}"); 73 | } 74 | 75 | if (sr != 16000 && sr % 16000 == 0) 76 | { 77 | int step = sr / 16000; 78 | 79 | for (int i = 0; i < x.Length; i++) 80 | { 81 | float[] current = x[i]; 82 | int newLength = (current.Length + step - 1) / step; 83 | float[] newArr = new float[newLength]; 84 | 85 | for (int j = 0, index = 0; j < current.Length; j += step, index++) 86 | { 87 | newArr[index] = current[j]; 88 | } 89 | 90 | x[i] = newArr; 91 | } 92 | 93 | sr = 16000; 94 | } 95 | 96 | if (Array.IndexOf(SupportedRates, sr) < 0) 97 | { 98 | throw new ArgumentException($"Only supports sample rates {string.Join(", ", SupportedRates)} (or multiples of 16000)"); 99 | } 100 | 101 | if (((float)sr) / x[0].Length > 31.25) 102 | { 103 | throw new ArgumentException("Input audio is too short"); 104 | } 105 | 106 | return new ValidationResult(x, sr); 107 | } 108 | 109 | private static float[][] Concatenate(float[][] a, float[][] b) 110 | { 111 | if (a.Length != b.Length) 112 | { 113 | throw new ArgumentException("The number of rows in both arrays must be the same."); 114 | } 115 | 116 | int rows = a.Length; 117 | float[][] result = new float[rows][]; 118 | 119 | for (int i = 0; i < rows; i++) 120 | { 121 | int lenA = a[i].Length; 122 | int lenB = b[i].Length; 123 | float[] row = new float[lenA + lenB]; 124 | Buffer.BlockCopy(a[i], 0, row, 0, lenA * sizeof(float)); 125 | Buffer.BlockCopy(b[i], 0, row, lenA * sizeof(float), lenB * sizeof(float)); 126 | result[i] = row; 127 | } 128 | 129 | return result; 130 | } 131 | 132 | private static float[][] GetLastColumns(float[][] array, int contextSize) 133 | { 134 | int rows = array.Length; 135 | int cols = array[0].Length; 136 | 137 | if (contextSize > cols) 138 | { 139 | throw new ArgumentException("contextSize cannot be greater than the number of columns in the array."); 140 | } 141 | 142 | float[][] result = new float[rows][]; 143 | 144 | for (int i = 0; i < rows; i++) 145 | { 146 | float[] row = new float[contextSize]; 147 | Buffer.BlockCopy(array[i], (cols - contextSize) * sizeof(float), row, 0, contextSize * sizeof(float)); 148 | result[i] = row; 149 | } 150 | 151 | return result; 152 | } 153 | 154 | public float[] Call(float[][] x, int sr) 155 | { 156 | var validation = ValidateInput(x, sr); 157 | x = validation.X; 158 | sr = validation.Sr; 159 | 160 | int numberSamples = (sr == 16000) ? 512 : 256; 161 | 162 | if (x[0].Length != numberSamples) 163 | { 164 | throw new ArgumentException($"Provided number of samples is {x[0].Length} (Supported values: 256 for 8000 sample rate, 512 for 16000)"); 165 | } 166 | 167 | int batchSize = x.Length; 168 | int contextSize = (sr == 16000) ? 64 : 32; 169 | 170 | if (lastBatchSize == 0 || lastSr != sr || lastBatchSize != batchSize) 171 | { 172 | ResetStates(); 173 | } 174 | 175 | if (context.Length != batchSize) 176 | { 177 | context = new float[batchSize][]; 178 | 179 | for (int i = 0; i < batchSize; i++) 180 | { 181 | context[i] = new float[contextSize]; 182 | } 183 | } 184 | 185 | x = Concatenate(context, x); 186 | int rows = x.Length; 187 | int cols = x[0].Length; 188 | 189 | float[] inputData = new float[rows * cols]; 190 | 191 | for (int i = 0; i < rows; i++) 192 | { 193 | Array.Copy(x[i], 0, inputData, i * cols, cols); 194 | } 195 | 196 | var inputTensor = new DenseTensor(inputData, new[] { rows, cols }); 197 | var srTensor = new DenseTensor(new long[] { sr }, new[] { 1 }); 198 | 199 | int stateDim0 = state.Length; 200 | int stateDim1 = state[0].Length; 201 | int stateDim2 = state[0][0].Length; 202 | 203 | float[] stateData = new float[stateDim0 * stateDim1 * stateDim2]; 204 | int index = 0; 205 | 206 | for (int i = 0; i < stateDim0; i++) 207 | { 208 | for (int j = 0; j < stateDim1; j++) 209 | { 210 | Array.Copy(state[i][j], 0, stateData, index, stateDim2); 211 | index += stateDim2; 212 | } 213 | } 214 | 215 | var stateTensor = new DenseTensor(stateData, new[] { stateDim0, stateDim1, stateDim2 }); 216 | 217 | var inputs = new List 218 | { 219 | NamedOnnxValue.CreateFromTensor("input", inputTensor), 220 | NamedOnnxValue.CreateFromTensor("sr", srTensor), 221 | NamedOnnxValue.CreateFromTensor("state", stateTensor) 222 | }; 223 | 224 | using var outputs = session.Run(inputs); 225 | var outputTensor = outputs.First(o => o.Name == "output").AsTensor(); 226 | var newStateTensor = outputs.First(o => o.Name == "stateN").AsTensor(); 227 | 228 | context = GetLastColumns(x, contextSize); 229 | lastSr = sr; 230 | lastBatchSize = batchSize; 231 | 232 | var dims = newStateTensor.Dimensions; 233 | int d0 = dims[0], d1 = dims[1], d2 = dims[2]; 234 | state = new float[d0][][]; 235 | float[] newStateFlat = newStateTensor.ToArray(); 236 | index = 0; 237 | 238 | for (int i = 0; i < d0; i++) 239 | { 240 | state[i] = new float[d1][]; 241 | for (int j = 0; j < d1; j++) 242 | { 243 | float[] row = new float[d2]; 244 | Array.Copy(newStateFlat, index, row, 0, d2); 245 | state[i][j] = row; 246 | index += d2; 247 | } 248 | } 249 | 250 | return outputTensor.ToArray(); 251 | } 252 | } 253 | } -------------------------------------------------------------------------------- /VadSharp/VadDetector.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection.PortableExecutable; 2 | using System.Security.Cryptography; 3 | using NAudio.Wave; 4 | using NAudio.Wave.SampleProviders; 5 | 6 | namespace VadSharp 7 | { 8 | public class VadDetector 9 | { 10 | private readonly VadOnnxModel _model; 11 | private readonly float _threshold; 12 | private readonly float _negThreshold; 13 | private readonly int _samplingRate; 14 | private readonly int _windowSizeSample; 15 | private readonly float _minSpeechSamples; 16 | private readonly float _speechPadSamples; 17 | private readonly float _maxSpeechSamples; 18 | private readonly float _minSilenceSamples; 19 | private readonly float _minSilenceSamplesAtMaxSpeech; 20 | private int _audioLengthSamples; 21 | private const float THRESHOLD_GAP = 0.15f; 22 | private const int SAMPLING_RATE_8K = 8000; 23 | private const int SAMPLING_RATE_16K = 16000; 24 | private const string _modelHash = "47d6ceb95435caf8049e0ea17a4dd95580e8a5950976ec770962c5534e64f2ea71ce196957102104fe014a2bdfa766f323ab5e46b6beb1fc46db129622298913"; 25 | 26 | public VadDetector(string onnxModelPath, float threshold, int samplingRate, int minSpeechDurationMs, float maxSpeechDurationSeconds, int minSilenceDurationMs, int speechPadMs, bool useDirectML = true) 27 | { 28 | if (samplingRate != SAMPLING_RATE_8K && samplingRate != SAMPLING_RATE_16K) 29 | { 30 | throw new ArgumentException("Sampling rate not supported, only available for [8000, 16000]"); 31 | } 32 | 33 | SHA512 sha2_512 = SHA512.Create(); 34 | byte[] loadedModelBytes = File.ReadAllBytes(onnxModelPath); 35 | byte[] hashedModel = sha2_512.ComputeHash(loadedModelBytes); 36 | string hashString = BitConverter.ToString(hashedModel).Replace("-", "").ToLower(); 37 | 38 | if (hashString != _modelHash) 39 | { 40 | throw new ArgumentException("Model not supported"); 41 | } 42 | 43 | _model = new VadOnnxModel(onnxModelPath, useDirectML); 44 | _samplingRate = samplingRate; 45 | _threshold = threshold; 46 | _negThreshold = threshold - THRESHOLD_GAP; 47 | _windowSizeSample = samplingRate == SAMPLING_RATE_16K ? 512 : 256; 48 | _minSpeechSamples = samplingRate * minSpeechDurationMs / 1000f; 49 | _speechPadSamples = samplingRate * speechPadMs / 1000f; 50 | _maxSpeechSamples = samplingRate * maxSpeechDurationSeconds - _windowSizeSample - 2 * _speechPadSamples; 51 | _minSilenceSamples = samplingRate * minSilenceDurationMs / 1000f; 52 | _minSilenceSamplesAtMaxSpeech = samplingRate * 98 / 1000f; 53 | Reset(); 54 | } 55 | 56 | public void Reset() => _model.ResetStates(); 57 | 58 | public List GetSpeechSegmentList(string wavFile) => GetSpeechSegmentList(new FileInfo(wavFile)); 59 | 60 | public List GetSpeechSegmentList(FileInfo wavFile) 61 | { 62 | Reset(); 63 | using var reader = new AudioFileReader(wavFile.FullName); 64 | var resampler = new WdlResamplingSampleProvider(reader, _samplingRate); 65 | var speechProbList = new List(); 66 | _audioLengthSamples = (int)(reader.Length / 2); 67 | int window = _windowSizeSample; 68 | float[] buffer = new float[window]; 69 | 70 | while (resampler.Read(buffer, 0, window) > 0) 71 | { 72 | float speechProb = _model.Call(new[] { buffer }, _samplingRate)[0]; 73 | speechProbList.Add(speechProb); 74 | } 75 | 76 | return CalculateProb(speechProbList); 77 | } 78 | 79 | public List GetSpeechSegmentList(float[] audioBuffer, int originalSampleRate) 80 | { 81 | audioBuffer = ResampleAudioBuffer(audioBuffer, originalSampleRate, 16000); 82 | Reset(); 83 | 84 | var speechProbList = new List(); 85 | int totalSamples = audioBuffer.Length; 86 | _audioLengthSamples = totalSamples; 87 | int window = _windowSizeSample; 88 | 89 | for (int i = 0; i < totalSamples; i += window) 90 | { 91 | float[] buffer = new float[window]; 92 | int remaining = totalSamples - i; 93 | 94 | if (remaining >= window) 95 | { 96 | Array.Copy(audioBuffer, i, buffer, 0, window); 97 | } 98 | else 99 | { 100 | Array.Copy(audioBuffer, i, buffer, 0, remaining); 101 | for (int j = remaining; j < window; j++) 102 | { 103 | buffer[j] = 0f; 104 | } 105 | } 106 | 107 | float speechProb = _model.Call(new[] { buffer }, _samplingRate)[0]; 108 | speechProbList.Add(speechProb); 109 | } 110 | 111 | return CalculateProb(speechProbList); 112 | } 113 | 114 | private float[] ResampleAudioBuffer(float[] audioBuffer, int originalRate, int targetRate) 115 | { 116 | if (originalRate == targetRate) 117 | { 118 | return audioBuffer; 119 | } 120 | 121 | double ratio = (double)targetRate / originalRate; 122 | int newLength = (int)(audioBuffer.Length * ratio); 123 | float[] resampledBuffer = new float[newLength]; 124 | 125 | for (int i = 0; i < newLength; i++) 126 | { 127 | double origPos = i / ratio; 128 | int index = (int)Math.Floor(origPos); 129 | double frac = origPos - index; 130 | 131 | if (index + 1 < audioBuffer.Length) 132 | { 133 | resampledBuffer[i] = (float)(audioBuffer[index] * (1 - frac) + audioBuffer[index + 1] * frac); 134 | } 135 | else 136 | { 137 | resampledBuffer[i] = audioBuffer[index]; 138 | } 139 | } 140 | 141 | return resampledBuffer; 142 | } 143 | 144 | private List CalculateProb(List speechProbList) 145 | { 146 | var result = new List(); 147 | bool triggered = false; 148 | int tempEnd = 0, prevEnd = 0, nextStart = 0; 149 | var segment = new VadSpeechSegment(); 150 | int window = _windowSizeSample; 151 | 152 | for (int i = 0, count = speechProbList.Count; i < count; i++) 153 | { 154 | float prob = speechProbList[i]; 155 | int currentOffset = window * i; 156 | 157 | if (prob >= _threshold && tempEnd != 0) 158 | { 159 | tempEnd = 0; 160 | 161 | if (nextStart < prevEnd) 162 | { 163 | nextStart = currentOffset; 164 | } 165 | } 166 | 167 | if (prob >= _threshold && !triggered) 168 | { 169 | triggered = true; 170 | segment.StartOffset = currentOffset; 171 | continue; 172 | } 173 | 174 | if (triggered && currentOffset - segment.StartOffset > _maxSpeechSamples) 175 | { 176 | if (prevEnd != 0) 177 | { 178 | segment.EndOffset = prevEnd; 179 | result.Add(segment); 180 | segment = new VadSpeechSegment(); 181 | 182 | if (nextStart < prevEnd) 183 | { 184 | triggered = false; 185 | } 186 | else 187 | { 188 | segment.StartOffset = nextStart; 189 | } 190 | 191 | prevEnd = nextStart = tempEnd = 0; 192 | } 193 | else 194 | { 195 | segment.EndOffset = currentOffset; 196 | result.Add(segment); 197 | segment = new VadSpeechSegment(); 198 | prevEnd = nextStart = tempEnd = 0; 199 | triggered = false; 200 | continue; 201 | } 202 | } 203 | 204 | if (prob < _negThreshold && triggered) 205 | { 206 | if (tempEnd == 0) 207 | { 208 | tempEnd = currentOffset; 209 | } 210 | 211 | if (currentOffset - tempEnd > _minSilenceSamplesAtMaxSpeech) 212 | { 213 | prevEnd = tempEnd; 214 | } 215 | 216 | if (currentOffset - tempEnd < _minSilenceSamples) 217 | { 218 | continue; 219 | } 220 | else 221 | { 222 | segment.EndOffset = tempEnd; 223 | 224 | if ((segment.EndOffset - segment.StartOffset) > _minSpeechSamples) 225 | { 226 | result.Add(segment); 227 | } 228 | 229 | segment = new VadSpeechSegment(); 230 | prevEnd = nextStart = tempEnd = 0; 231 | triggered = false; 232 | continue; 233 | } 234 | } 235 | } 236 | 237 | if (segment.StartOffset != null && (_audioLengthSamples - segment.StartOffset) > _minSpeechSamples) 238 | { 239 | segment.EndOffset = _audioLengthSamples; 240 | result.Add(segment); 241 | } 242 | 243 | for (int i = 0, count = result.Count; i < count; i++) 244 | { 245 | var item = result[i]; 246 | 247 | if (i == 0) 248 | { 249 | item.StartOffset = Math.Max(0, item.StartOffset.Value - (int)_speechPadSamples); 250 | } 251 | 252 | if (i != count - 1) 253 | { 254 | var nextItem = result[i + 1]; 255 | int silenceDuration = nextItem.StartOffset.Value - item.EndOffset.Value; 256 | 257 | if (silenceDuration < 2 * _speechPadSamples) 258 | { 259 | int halfSilence = silenceDuration / 2; 260 | item.EndOffset += halfSilence; 261 | nextItem.StartOffset = Math.Max(0, nextItem.StartOffset.Value - halfSilence); 262 | } 263 | else 264 | { 265 | item.EndOffset = Math.Min(_audioLengthSamples, item.EndOffset.Value + (int)_speechPadSamples); 266 | nextItem.StartOffset = Math.Max(0, nextItem.StartOffset.Value - (int)_speechPadSamples); 267 | } 268 | } 269 | else 270 | { 271 | item.EndOffset = Math.Min(_audioLengthSamples, item.EndOffset.Value + (int)_speechPadSamples); 272 | } 273 | } 274 | 275 | return MergeListAndCalculateSecond(result, _samplingRate); 276 | } 277 | 278 | private List MergeListAndCalculateSecond(List segments, int samplingRate) 279 | { 280 | var merged = new List(); 281 | 282 | if (segments == null || segments.Count == 0) 283 | { 284 | return merged; 285 | } 286 | 287 | segments.Sort((a, b) => a.StartOffset.Value.CompareTo(b.StartOffset.Value)); 288 | int left = segments[0].StartOffset.Value; 289 | int right = segments[0].EndOffset.Value; 290 | 291 | for (int i = 1, count = segments.Count; i < count; i++) 292 | { 293 | var seg = segments[i]; 294 | 295 | if (seg.StartOffset > right) 296 | { 297 | merged.Add(new VadSpeechSegment(left, right, CalculateSecondByOffset(left, samplingRate), CalculateSecondByOffset(right, samplingRate))); 298 | left = seg.StartOffset.Value; 299 | right = seg.EndOffset.Value; 300 | } 301 | else 302 | { 303 | right = Math.Max(right, seg.EndOffset.Value); 304 | } 305 | } 306 | 307 | merged.Add(new VadSpeechSegment(left, right, CalculateSecondByOffset(left, samplingRate), CalculateSecondByOffset(right, samplingRate))); 308 | return merged; 309 | } 310 | 311 | private float CalculateSecondByOffset(int offset, int samplingRate) 312 | { 313 | float seconds = offset / (float)samplingRate; 314 | return (float)(Math.Floor(seconds * 1000.0f) / 1000.0f); 315 | } 316 | } 317 | } --------------------------------------------------------------------------------