├── AliParaformerAsr.Tests
├── TestResources
│ └── README.md
├── OfflineRecognizerTests .cs
├── README.md
├── AliParaformerAsr.Tests.csproj
└── Utils
│ └── TextHelper.cs
├── AliParaformerAsr
├── data
│ └── embed.onnx
├── Model
│ ├── PreEncoderConfEntity.cs
│ ├── PostEncoderConfEntity.cs
│ ├── OfflineInputEntity.cs
│ ├── OnlineInputEntity.cs
│ ├── CmvnEntity.cs
│ ├── DecoderOutputEntity.cs
│ ├── PredictorOutputEntity.cs
│ ├── EncoderOutputEntity.cs
│ ├── ModelOutputEntity.cs
│ ├── OfflineOutputEntity.cs
│ ├── OnlineOutputEntity.cs
│ ├── OnlineRecognizerResultEntity.cs
│ ├── OfflineRecognizerResultEntity.cs
│ ├── PredictorConfEntity.cs
│ ├── FrontendConfEntity.cs
│ ├── ModelConfEntity.cs
│ ├── DecoderConfEntity.cs
│ ├── EncoderConfEntity.cs
│ ├── ConfEntity.cs
│ └── OfflineYamlEntity.cs
├── IOfflineProj.cs
├── Utils
│ ├── PadHelper.cs
│ └── PreloadHelper.cs
├── EmbedSVModel.cs
├── AliParaformerAsr.csproj
├── OfflineStream.cs
├── OfflineModel.cs
├── OfflineProjOfParaformer.cs
├── EmbedSeacoModel.cs
├── WavFrontend.cs
├── OfflineProjOfSeacoParaformer.cs
├── OnlineWavFrontend.cs
├── OfflineProjOfSenseVoiceSmall.cs
└── OnlineModel.cs
├── AliParaformerAsr.Examples.MauiApp
├── Resources
│ ├── Fonts
│ │ ├── OpenSans-Regular.ttf
│ │ └── OpenSans-Semibold.ttf
│ ├── AppIcon
│ │ └── appicon.svg
│ ├── Raw
│ │ └── AboutAssets.txt
│ ├── Splash
│ │ └── splash.svg
│ ├── Styles
│ │ └── Colors.xaml
│ └── Images
│ │ └── dotnet_bot.svg
├── AppShell.xaml.cs
├── Properties
│ └── launchSettings.json
├── Platforms
│ ├── Android
│ │ ├── Resources
│ │ │ └── values
│ │ │ │ └── colors.xml
│ │ ├── MainActivity.cs
│ │ ├── AndroidManifest.xml
│ │ └── MainApplication.cs
│ ├── iOS
│ │ ├── AppDelegate.cs
│ │ ├── Program.cs
│ │ └── Info.plist
│ ├── MacCatalyst
│ │ ├── AppDelegate.cs
│ │ ├── Program.cs
│ │ └── Info.plist
│ ├── Windows
│ │ ├── App.xaml
│ │ ├── app.manifest
│ │ ├── App.xaml.cs
│ │ └── Package.appxmanifest
│ └── Tizen
│ │ ├── Main.cs
│ │ └── tizen-manifest.xml
├── App.xaml.cs
├── MauiProgram.cs
├── MainPage.xaml.cs
├── MySplashPage.xaml.cs
├── Utils
│ ├── SysConf.cs
│ └── AEDEmojiHelper.cs
├── App.xaml
├── AppShell.xaml
├── MainPage.xaml
├── MySplashPage.xaml
├── RecognitionForFiles.xaml
└── MauiApp1.csproj
├── AliParaformerAsr.Examples
├── BaseAsr.cs
├── app.manifest
├── AliParaformerAsr.Examples.csproj
├── Config
│ └── TrimmerRoots.xml
├── Utils
│ └── TextHelper.cs
└── OfflineAliParaformerAsrRecognizer.cs
├── .gitattributes
├── AliParaformerAsr.sln
├── .gitignore
├── README.md
└── LICENSE
/AliParaformerAsr.Tests/TestResources/README.md:
--------------------------------------------------------------------------------
1 | 1.将模型放置在当前目录,并设置文件属性为“如果较新则复制”
2 |
--------------------------------------------------------------------------------
/AliParaformerAsr/data/embed.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manyeyes/AliParaformerAsr/HEAD/AliParaformerAsr/data/embed.onnx
--------------------------------------------------------------------------------
/AliParaformerAsr.Tests/OfflineRecognizerTests .cs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manyeyes/AliParaformerAsr/HEAD/AliParaformerAsr.Tests/OfflineRecognizerTests .cs
--------------------------------------------------------------------------------
/AliParaformerAsr.Tests/README.md:
--------------------------------------------------------------------------------
1 | 1.下载模型:
2 | https://modelscope.cn/models/manyeyes/sensevoice-small-int8-onnx
3 |
4 | 2.将模型放入 TestResources 目录,并设置模型文件属性为“如果较新则复制”
5 |
6 | 3.运行测试
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Resources/Fonts/OpenSans-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manyeyes/AliParaformerAsr/HEAD/AliParaformerAsr.Examples.MauiApp/Resources/Fonts/OpenSans-Regular.ttf
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Resources/Fonts/OpenSans-Semibold.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manyeyes/AliParaformerAsr/HEAD/AliParaformerAsr.Examples.MauiApp/Resources/Fonts/OpenSans-Semibold.ttf
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/AppShell.xaml.cs:
--------------------------------------------------------------------------------
1 | namespace MauiApp1;
2 |
3 | public partial class AppShell : Shell
4 | {
5 | public AppShell()
6 | {
7 | InitializeComponent();
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Properties/launchSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "profiles": {
3 | "Windows Machine": {
4 | "commandName": "MsixPackage",
5 | "nativeDebugging": false
6 | }
7 | }
8 | }
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Resources/AppIcon/appicon.svg:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/Android/Resources/values/colors.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | #512BD4
4 | #2B0B98
5 | #2B0B98
6 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/iOS/AppDelegate.cs:
--------------------------------------------------------------------------------
1 | using Foundation;
2 |
3 | namespace MauiApp1;
4 |
5 | [Register("AppDelegate")]
6 | public class AppDelegate : MauiUIApplicationDelegate
7 | {
8 | protected override MauiApp CreateMauiApp() => MauiProgram.CreateMauiApp();
9 | }
10 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/MacCatalyst/AppDelegate.cs:
--------------------------------------------------------------------------------
1 | using Foundation;
2 |
3 | namespace MauiApp1;
4 |
5 | [Register("AppDelegate")]
6 | public class AppDelegate : MauiUIApplicationDelegate
7 | {
8 | protected override MauiApp CreateMauiApp() => MauiProgram.CreateMauiApp();
9 | }
10 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples/BaseAsr.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace AliParaformerAsr.Examples
8 | {
9 | internal class BaseAsr
10 | {
11 | public static string applicationBase = AppDomain.CurrentDomain.BaseDirectory;
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/Windows/App.xaml:
--------------------------------------------------------------------------------
1 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/PreEncoderConfEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using System;
4 | using System.Collections.Generic;
5 | using System.Linq;
6 | using System.Text;
7 | using System.Threading.Tasks;
8 |
9 | namespace AliParaformerAsr.Model
10 | {
11 | public class PreEncoderConfEntity
12 | {
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/PostEncoderConfEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using System;
4 | using System.Collections.Generic;
5 | using System.Linq;
6 | using System.Text;
7 | using System.Threading.Tasks;
8 |
9 | namespace AliParaformerAsr.Model
10 | {
11 | public class PostEncoderConfEntity
12 | {
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/Tizen/Main.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using Microsoft.Maui;
3 | using Microsoft.Maui.Hosting;
4 |
5 | namespace MauiApp1;
6 |
7 | class Program : MauiApplication
8 | {
9 | protected override MauiApp CreateMauiApp() => MauiProgram.CreateMauiApp();
10 |
11 | static void Main(string[] args)
12 | {
13 | var app = new Program();
14 | app.Run(args);
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/OfflineInputEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | namespace AliParaformerAsr.Model
4 | {
5 | public class OfflineInputEntity
6 | {
7 | public float[]? Speech { get; set; }
8 | public int SpeechLength { get; set; }
9 | public List? Hotwords { get; set; } = new List();
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/iOS/Program.cs:
--------------------------------------------------------------------------------
1 | using ObjCRuntime;
2 | using UIKit;
3 |
4 | namespace MauiApp1;
5 |
6 | public class Program
7 | {
8 | // This is the main entry point of the application.
9 | static void Main(string[] args)
10 | {
11 | // if you want to use a different Application Delegate class from "AppDelegate"
12 | // you can specify it here.
13 | UIApplication.Main(args, null, typeof(AppDelegate));
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/MacCatalyst/Program.cs:
--------------------------------------------------------------------------------
1 | using ObjCRuntime;
2 | using UIKit;
3 |
4 | namespace MauiApp1;
5 |
6 | public class Program
7 | {
8 | // This is the main entry point of the application.
9 | static void Main(string[] args)
10 | {
11 | // if you want to use a different Application Delegate class from "AppDelegate"
12 | // you can specify it here.
13 | UIApplication.Main(args, null, typeof(AppDelegate));
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/App.xaml.cs:
--------------------------------------------------------------------------------
1 | namespace MauiApp1;
2 |
3 | public partial class App : Application
4 | {
5 | public App()
6 | {
7 | InitializeComponent();
8 | MainPage = new MySplashPage();
9 | _ = EndSplash();
10 | }
11 | async Task EndSplash()
12 | {
13 | await Task.Delay(1000);
14 | MainThread.BeginInvokeOnMainThread(() =>
15 | {
16 | MainPage = new AppShell();
17 | });
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/MauiProgram.cs:
--------------------------------------------------------------------------------
1 | namespace MauiApp1;
2 |
3 | public static class MauiProgram
4 | {
5 | public static MauiApp CreateMauiApp()
6 | {
7 | var builder = MauiApp.CreateBuilder();
8 | builder
9 | .UseMauiApp()
10 | .ConfigureFonts(fonts =>
11 | {
12 | fonts.AddFont("OpenSans-Regular.ttf", "OpenSansRegular");
13 | fonts.AddFont("OpenSans-Semibold.ttf", "OpenSansSemibold");
14 | });
15 |
16 | return builder.Build();
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/Android/MainActivity.cs:
--------------------------------------------------------------------------------
1 | using Android.App;
2 | using Android.Content.PM;
3 | using Android.OS;
4 |
5 | namespace MauiApp1;
6 |
7 | [Activity(Theme = "@style/Maui.SplashTheme", MainLauncher = true, ConfigurationChanges = ConfigChanges.ScreenSize | ConfigChanges.Orientation | ConfigChanges.UiMode | ConfigChanges.ScreenLayout | ConfigChanges.SmallestScreenSize | ConfigChanges.Density)]
8 | public class MainActivity : MauiAppCompatActivity
9 | {
10 | }
11 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/OnlineInputEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | namespace AliParaformerAsr.Model
4 | {
5 | public class OnlineInputEntity
6 | {
7 | private float[]? _speech;
8 | private int _speech_length;
9 | public float[]? Speech { get => _speech; set => _speech = value; }
10 | public int SpeechLength { get => _speech_length; set => _speech_length = value; }
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/CmvnEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | namespace AliParaformerAsr.Model
4 | {
5 | internal class CmvnEntity
6 | {
7 | private List _means = new List();
8 | private List _vars = new List();
9 |
10 | public List Means { get => _means; set => _means = value; }
11 | public List Vars { get => _vars; set => _vars = value; }
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples/app.manifest:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/MainPage.xaml.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.Maui.Storage;
2 | using NAudio.Wave;
3 | using System.ComponentModel;
4 | using System.Net;
5 | using System.Reflection;
6 | using System.Text;
7 | using System.Windows.Input;
8 | using static System.Net.Mime.MediaTypeNames;
9 | using MauiApp1.Utils;
10 | using System;
11 | using Microsoft.Maui.Graphics;
12 |
13 | namespace MauiApp1;
14 |
15 | public partial class MainPage : ContentPage
16 | {
17 |
18 | public MainPage()
19 | {
20 | InitializeComponent();
21 | }
22 |
23 |
24 | }
25 |
26 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/MySplashPage.xaml.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.Maui.Storage;
2 | using NAudio.Wave;
3 | using System.ComponentModel;
4 | using System.Net;
5 | using System.Reflection;
6 | using System.Text;
7 | using System.Windows.Input;
8 | using static System.Net.Mime.MediaTypeNames;
9 | using MauiApp1.Utils;
10 | using System;
11 | using Microsoft.Maui.Graphics;
12 |
13 | namespace MauiApp1;
14 |
15 | public partial class MySplashPage : ContentPage
16 | {
17 |
18 | public MySplashPage()
19 | {
20 | InitializeComponent();
21 | }
22 |
23 |
24 | }
25 |
26 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/DecoderOutputEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 |
4 | namespace AliParaformerAsr.Model
5 | {
6 | public class DecoderOutputEntity
7 | {
8 | private float[]? _logits;
9 | private List? _sample_ids;
10 | private List statesList;
11 |
12 | public float[]? Logits { get => _logits; set => _logits = value; }
13 | public List? Sample_ids { get => _sample_ids; set => _sample_ids = value; }
14 | public List StatesList { get => statesList; set => statesList = value; }
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Utils/SysConf.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace MauiApp1.Utils
8 | {
9 | internal class SysConf
10 | {
11 | #if WINDOWS
12 | private static string _applicationBase = Microsoft.Maui.Storage.FileSystem.AppDataDirectory;
13 | #else
14 | private static string _applicationBase = AppDomain.CurrentDomain.BaseDirectory;
15 | #endif
16 | public SysConf() { }
17 |
18 | public static string ApplicationBase { get => _applicationBase; set => _applicationBase = value; }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/PredictorOutputEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 |
4 | // See https://github.com/manyeyes for more information
5 | // Copyright (c) 2023 by manyeyes
6 | using System.Collections;
7 |
8 | namespace AliParaformerAsr.Model
9 | {
10 | public class PredictorOutputEntity
11 | {
12 |
13 | private float[] _acoustic_embeds;
14 | private int[] _acoustic_embeds_len;
15 |
16 | public float[] Acoustic_embeds { get => _acoustic_embeds; set => _acoustic_embeds = value; }
17 | public int[] Acoustic_embeds_len { get => _acoustic_embeds_len; set => _acoustic_embeds_len = value; }
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/App.xaml:
--------------------------------------------------------------------------------
1 |
2 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/AppShell.xaml:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
13 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Resources/Raw/AboutAssets.txt:
--------------------------------------------------------------------------------
1 | Any raw assets you want to be deployed with your application can be placed in
2 | this directory (and child directories). Deployment of the asset to your application
3 | is automatically handled by the following `MauiAsset` Build Action within your `.csproj`.
4 |
5 |
6 |
7 | These files will be deployed with you package and will be accessible using Essentials:
8 |
9 | async Task LoadMauiAsset()
10 | {
11 | using var stream = await FileSystem.OpenAppPackageFileAsync("AboutAssets.txt");
12 | using var reader = new StreamReader(stream);
13 |
14 | var contents = reader.ReadToEnd();
15 | }
16 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/EncoderOutputEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 |
4 | // See https://github.com/manyeyes for more information
5 | // Copyright (c) 2023 by manyeyes
6 | using System.Collections;
7 |
8 | namespace AliParaformerAsr.Model
9 | {
10 | public class EncoderOutputEntity
11 | {
12 |
13 | private List>? _enc;
14 | private int[]? _enc_len;
15 | private List>? _alphas;
16 |
17 | public List>? Enc { get => _enc; set => _enc = value; }
18 | public int[]? Enc_len { get => _enc_len; set => _enc_len = value; }
19 | public List>? Alphas { get => _alphas; set => _alphas = value; }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/MainPage.xaml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
11 |
12 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/ModelOutputEntity.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.ML.OnnxRuntime.Tensors;
2 | using System;
3 | using System.Collections.Generic;
4 | using System.Linq;
5 | using System.Text;
6 | using System.Threading.Tasks;
7 |
8 | namespace AliParaformerAsr.Model
9 | {
10 | internal class ModelOutputEntity
11 | {
12 | private Tensor? _model_out;
13 | private int[]? _model_out_lens;
14 | private Tensor? _cif_peak_tensor;
15 |
16 | public Tensor? model_out { get => _model_out; set => _model_out = value; }
17 | public int[]? model_out_lens { get => _model_out_lens; set => _model_out_lens = value; }
18 | public Tensor? cif_peak_tensor { get => _cif_peak_tensor; set => _cif_peak_tensor = value; }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/MySplashPage.xaml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
11 |
12 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/OfflineOutputEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | namespace AliParaformerAsr.Model
4 | {
5 | public class OfflineOutputEntity
6 | {
7 |
8 | private float[]? logits;
9 | private long[]? _token_num;
10 | private List? _token_nums=new List() { new int[4]};
11 | private int[] _token_nums_length;
12 |
13 | public float[]? Logits { get => logits; set => logits = value; }
14 | public long[]? Token_num { get => _token_num; set => _token_num = value; }
15 | public List? Token_nums { get => _token_nums; set => _token_nums = value; }
16 | public int[] Token_nums_length { get => _token_nums_length; set => _token_nums_length = value; }
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/OnlineOutputEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 |
4 | namespace AliParaformerAsr.Model
5 | {
6 | public class OnlineOutputEntity
7 | {
8 |
9 | private float[]? logits;
10 | private long[]? _token_num;
11 | private List? _token_nums=new List() { new int[4]};
12 | private int[] _token_nums_length;
13 |
14 | public float[]? Logits { get => logits; set => logits = value; }
15 | public long[]? Token_num { get => _token_num; set => _token_num = value; }
16 | public List? Token_nums { get => _token_nums; set => _token_nums = value; }
17 | public int[] Token_nums_length { get => _token_nums_length; set => _token_nums_length = value; }
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/Windows/app.manifest:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
11 | true/PM
12 | PerMonitorV2, PerMonitor
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/Tizen/tizen-manifest.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | maui-appicon-placeholder
7 |
8 |
9 |
10 |
11 | http://tizen.org/privilege/internet
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/Windows/App.xaml.cs:
--------------------------------------------------------------------------------
1 | using Microsoft.UI.Xaml;
2 |
3 | // To learn more about WinUI, the WinUI project structure,
4 | // and more about our project templates, see: http://aka.ms/winui-project-info.
5 |
6 | namespace MauiApp1.WinUI;
7 |
8 | ///
9 | /// Provides application-specific behavior to supplement the default Application class.
10 | ///
11 | public partial class App : MauiWinUIApplication
12 | {
13 | ///
14 | /// Initializes the singleton application object. This is the first line of authored code
15 | /// executed, and as such is the logical equivalent of main() or WinMain().
16 | ///
17 | public App()
18 | {
19 | this.InitializeComponent();
20 | }
21 |
22 | protected override MauiApp CreateMauiApp() => MauiProgram.CreateMauiApp();
23 | }
24 |
25 |
--------------------------------------------------------------------------------
/AliParaformerAsr/IOfflineProj.cs:
--------------------------------------------------------------------------------
1 | using AliParaformerAsr.Model;
2 | using Microsoft.ML.OnnxRuntime;
3 |
4 | namespace AliParaformerAsr
5 | {
6 | internal interface IOfflineProj
7 | {
8 | InferenceSession ModelSession
9 | {
10 | get;
11 | set;
12 | }
13 | int Blank_id
14 | {
15 | get;
16 | set;
17 | }
18 | int Sos_eos_id
19 | {
20 | get;
21 | set;
22 | }
23 | int Unk_id
24 | {
25 | get;
26 | set;
27 | }
28 | int SampleRate
29 | {
30 | get;
31 | set;
32 | }
33 | int FeatureDim
34 | {
35 | get;
36 | set;
37 | }
38 | internal ModelOutputEntity ModelProj(List modelInputs);
39 | internal void Dispose();
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples/AliParaformerAsr.Examples.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net9.0
6 | enable
7 | 10
8 | enable
9 | false
10 | app.manifest
11 | 1.1.6
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/OnlineRecognizerResultEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | namespace AliParaformerAsr.Model
4 | {
5 | ///
6 | /// online recognizer result entity
7 | /// Copyright (c) 2023 by manyeyes
8 | ///
9 | public class OnlineRecognizerResultEntity
10 | {
11 | ///
12 | /// recognizer result
13 | ///
14 | public string? Text { get; set; }
15 | ///
16 | /// recognizer result length
17 | ///
18 | public int TextLen { get; set; }
19 | ///
20 | /// decode tokens
21 | ///
22 | public List? Tokens { get; set; } = new List();
23 |
24 | ///
25 | /// timestamps
26 | ///
27 | public List? Timestamps { get; set; } = new List();
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/OfflineRecognizerResultEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | namespace AliParaformerAsr.Model
4 | {
5 | ///
6 | /// online recognizer result entity
7 | /// Copyright (c) 2023 by manyeyes
8 | ///
9 | public class OfflineRecognizerResultEntity
10 | {
11 | ///
12 | /// recognizer result
13 | ///
14 | public string? Text { get; set; }
15 | ///
16 | /// recognizer result length
17 | ///
18 | public int TextLen { get; set; }
19 | ///
20 | /// decode tokens
21 | ///
22 | public List? Tokens { get; set; } = new List();
23 |
24 | ///
25 | /// timestamps
26 | ///
27 | public List? Timestamps { get; set; } = new List();
28 |
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/PredictorConfEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using System;
4 | using System.Collections.Generic;
5 | using System.Linq;
6 | using System.Text;
7 | using System.Threading.Tasks;
8 |
9 | namespace AliParaformerAsr.Model
10 | {
11 | public class PredictorConfEntity
12 | {
13 | private int _idim = 512;
14 | private float _threshold = 1.0F;
15 | private int _l_order = 1;
16 | private int _r_order = 1;
17 | private float _tail_threshold = 0.45F;
18 |
19 | public int idim { get => _idim; set => _idim = value; }
20 | public float threshold { get => _threshold; set => _threshold = value; }
21 | public int l_order { get => _l_order; set => _l_order = value; }
22 | public int r_order { get => _r_order; set => _r_order = value; }
23 | public float tail_threshold { get => _tail_threshold; set => _tail_threshold = value; }
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/MacCatalyst/Info.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | UIDeviceFamily
6 |
7 | 1
8 | 2
9 |
10 | UIRequiredDeviceCapabilities
11 |
12 | arm64
13 |
14 | UISupportedInterfaceOrientations
15 |
16 | UIInterfaceOrientationPortrait
17 | UIInterfaceOrientationLandscapeLeft
18 | UIInterfaceOrientationLandscapeRight
19 |
20 | UISupportedInterfaceOrientations~ipad
21 |
22 | UIInterfaceOrientationPortrait
23 | UIInterfaceOrientationPortraitUpsideDown
24 | UIInterfaceOrientationLandscapeLeft
25 | UIInterfaceOrientationLandscapeRight
26 |
27 | XSAppIconAssets
28 | Assets.xcassets/appicon.appiconset
29 |
30 |
31 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/iOS/Info.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | LSRequiresIPhoneOS
6 |
7 | UIDeviceFamily
8 |
9 | 1
10 | 2
11 |
12 | UIRequiredDeviceCapabilities
13 |
14 | arm64
15 |
16 | UISupportedInterfaceOrientations
17 |
18 | UIInterfaceOrientationPortrait
19 | UIInterfaceOrientationLandscapeLeft
20 | UIInterfaceOrientationLandscapeRight
21 |
22 | UISupportedInterfaceOrientations~ipad
23 |
24 | UIInterfaceOrientationPortrait
25 | UIInterfaceOrientationPortraitUpsideDown
26 | UIInterfaceOrientationLandscapeLeft
27 | UIInterfaceOrientationLandscapeRight
28 |
29 | XSAppIconAssets
30 | Assets.xcassets/appicon.appiconset
31 |
32 |
33 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/FrontendConfEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | namespace AliParaformerAsr.Model
4 | {
5 | public class FrontendConfEntity
6 | {
7 | private int _fs = 16000;
8 | private string _window = "hamming";
9 | private int _n_mels = 80;
10 | private int _frame_length = 25;
11 | private int _frame_shift = 10;
12 | private float _dither = 1.0F;
13 | private int _lfr_m = 7;
14 | private int _lfr_n = 6;
15 | private bool _snip_edges = false;
16 |
17 | public int fs { get => _fs; set => _fs = value; }
18 | public string window { get => _window; set => _window = value; }
19 | public int n_mels { get => _n_mels; set => _n_mels = value; }
20 | public int frame_length { get => _frame_length; set => _frame_length = value; }
21 | public int frame_shift { get => _frame_shift; set => _frame_shift = value; }
22 | public float dither { get => _dither; set => _dither = value; }
23 | public int lfr_m { get => _lfr_m; set => _lfr_m = value; }
24 | public int lfr_n { get => _lfr_n; set => _lfr_n = value; }
25 | public bool snip_edges { get => _snip_edges; set => _snip_edges = value; }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/Android/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Tests/AliParaformerAsr.Tests.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net9.0
5 | enable
6 | enable
7 |
8 | false
9 | true
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/ModelConfEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | namespace AliParaformerAsr.Model
4 | {
5 | public class ModelConfEntity
6 | {
7 | private float _ctc_weight = 0.0F;
8 | private float _lsm_weight = 0.1F;
9 | private bool _length_normalized_loss = true;
10 | private float _predictor_weight = 1.0F;
11 | private int _predictor_bias = 1;
12 | private float _sampling_ratio = 0.75F;
13 | private int _sos = 1;
14 | private int _eos = 2;
15 | private int _ignore_id = -1;
16 |
17 | public float ctc_weight { get => _ctc_weight; set => _ctc_weight = value; }
18 | public float lsm_weight { get => _lsm_weight; set => _lsm_weight = value; }
19 | public bool length_normalized_loss { get => _length_normalized_loss; set => _length_normalized_loss = value; }
20 | public float predictor_weight { get => _predictor_weight; set => _predictor_weight = value; }
21 | public int predictor_bias { get => _predictor_bias; set => _predictor_bias = value; }
22 | public float sampling_ratio { get => _sampling_ratio; set => _sampling_ratio = value; }
23 | public int sos { get => _sos; set => _sos = value; }
24 | public int eos { get => _eos; set => _eos = value; }
25 | public int ignore_id { get => _ignore_id; set => _ignore_id = value; }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Utils/AEDEmojiHelper.cs:
--------------------------------------------------------------------------------
1 | using System.Text.RegularExpressions;
2 |
3 | namespace MauiApp1.Utils
4 | {
5 | internal class AEDEmojiHelper
6 | {
7 | public static string ReplaceTagsWithEmojis(string input)
8 | {
9 | // 定义标签与表情包的映射关系
10 | var emojiMap = new System.Collections.Generic.Dictionary
11 | {
12 | { "Laughter", "😆" },
13 | { "Applause", "👏" },
14 | { "HAPPY", "😀" },
15 | { "SAD", "😢" },
16 | { "ANGRY", "😡" },
17 | { "NEUTRAL", "😐" },
18 | { "FEARFUL", "😨" },
19 | { "DISGUSTED", "🤢" },
20 | { "SURPRISED", "😲" },
21 | { "Cry", "😭" },
22 | { "Sneeze", "👃🤧" },
23 | { "Cough", "🤒" },
24 | { "Sing", "🎤" }
25 | };
26 |
27 | string pattern = @"<\|(\w+)\|>";
28 | return Regex.Replace(input, pattern, match =>
29 | {
30 | string tag = match.Groups[1].Value;
31 | if (emojiMap.TryGetValue(tag, out string emoji))
32 | {
33 | return emoji;
34 | }
35 | return "";
36 | });
37 | }
38 |
39 | public static string ReplaceTagsWithEmpty(string input)
40 | {
41 | string pattern = @"<\|.*?\|>";
42 | return Regex.Replace(input, pattern, match =>
43 | {
44 | return "";
45 | });
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples/Config/TrimmerRoots.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/DecoderConfEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | namespace AliParaformerAsr.Model
4 | {
5 | public class DecoderConfEntity
6 | {
7 | private int _attention_heads = 4;
8 | private int _linear_units = 2048;
9 | private int _num_blocks = 16;
10 | private float _dropout_rate = 0.1F;
11 | private float _positional_dropout_rate = 0.1F;
12 | private float _self_attention_dropout_rate= 0.1F;
13 | private float _src_attention_dropout_rate = 0.1F;
14 | private int _att_layer_num = 16;
15 | private int _kernel_size = 11;
16 | private int _sanm_shfit = 0;
17 |
18 | public int attention_heads { get => _attention_heads; set => _attention_heads = value; }
19 | public int linear_units { get => _linear_units; set => _linear_units = value; }
20 | public int num_blocks { get => _num_blocks; set => _num_blocks = value; }
21 | public float dropout_rate { get => _dropout_rate; set => _dropout_rate = value; }
22 | public float positional_dropout_rate { get => _positional_dropout_rate; set => _positional_dropout_rate = value; }
23 | public float self_attention_dropout_rate { get => _self_attention_dropout_rate; set => _self_attention_dropout_rate = value; }
24 | public float src_attention_dropout_rate { get => _src_attention_dropout_rate; set => _src_attention_dropout_rate = value; }
25 | public int att_layer_num { get => _att_layer_num; set => _att_layer_num = value; }
26 | public int kernel_size { get => _kernel_size; set => _kernel_size = value; }
27 | public int sanm_shfit { get => _sanm_shfit; set => _sanm_shfit = value; }
28 |
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Resources/Splash/splash.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Tests/Utils/TextHelper.cs:
--------------------------------------------------------------------------------
1 | namespace AliParaformerAsr.Tests.Utils
2 | {
3 | internal class TextHelper
4 | {
5 | public static List? GetHotwords(string tokensFilePath, string hotwordFilePath)
6 | {
7 | List? hotwords = new List();
8 | if (File.Exists(tokensFilePath) && File.Exists(hotwordFilePath))
9 | {
10 | string[] tokens = File.ReadAllLines(tokensFilePath);
11 | string[] sentences = File.ReadAllLines(hotwordFilePath);
12 | foreach (string sentence in sentences)
13 | {
14 | string[] wordList = new string[] { sentence };//TODO:分词
15 | foreach (string word in wordList)
16 | {
17 | List ids = word.ToCharArray().Select(x => Array.IndexOf(tokens, x.ToString())).Where(x => x != -1).ToList();
18 | hotwords.Add(ids.ToArray());
19 | }
20 | }
21 | hotwords.Add(new int[] { 1 });
22 | }
23 | return hotwords;
24 | }
25 | public static List? GetHotwords(string tokensFilePath, string[] wordList)
26 | {
27 | List? hotwords = new List();
28 | if (File.Exists(tokensFilePath) && wordList.Length>0)
29 | {
30 | string[] tokens = File.ReadAllLines(tokensFilePath);
31 | foreach (string word in wordList)
32 | {
33 | List ids = word.ToCharArray().Select(x => Array.IndexOf(tokens, x.ToString())).Where(x => x != -1).ToList();
34 | hotwords.Add(ids.ToArray());
35 | }
36 | hotwords.Add(new int[] { 1 });
37 | }
38 | return hotwords;
39 | }
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/Android/MainApplication.cs:
--------------------------------------------------------------------------------
1 | using Android.App;
2 | using Android.Runtime;
3 |
4 | namespace MauiApp1;
5 |
6 | [Application]
7 |
8 | //[assembly: UsesPermission(Android.Manifest.Permission.ReadExternalStorage, MaxSDKVersion = 32)]
9 | [assembly: UsesPermission(Android.Manifest.Permission.ReadMediaAudio)]
10 | [assembly: UsesPermission(Android.Manifest.Permission.ReadMediaImages)]
11 | [assembly: UsesPermission(Android.Manifest.Permission.ReadMediaVideo)]
12 | [assembly: UsesPermission(Android.Manifest.Permission.ManageExternalStorage)]
13 | // Needed for Picking photo/video
14 | [assembly: UsesPermission(Android.Manifest.Permission.ReadExternalStorage)]
15 |
16 | // Needed for Taking photo/video
17 | [assembly: UsesPermission(Android.Manifest.Permission.WriteExternalStorage)]
18 | [assembly: UsesPermission(Android.Manifest.Permission.Camera)]
19 | [assembly: UsesPermission(Android.Manifest.Permission.RecordAudio)]
20 | [assembly: UsesPermission(Android.Manifest.Permission.CaptureVideoOutput)]
21 |
22 | // Add these properties if you would like to filter out devices that do not have cameras, or set to false to make them optional
23 | [assembly: UsesFeature("android.hardware.camera", Required = true)]
24 | [assembly: UsesFeature("android.hardware.camera.autofocus", Required = true)]
25 | [assembly: UsesFeature("android.hardware.recordaudio", Required = true)]
26 | [assembly: UsesFeature("android.hardware.recordaudio.autofocus", Required = true)]
27 | [assembly: UsesFeature("android.hardware.capturevideooutput", Required = true)]
28 | [assembly: UsesFeature("android.hardware.capturevideooutput.autofocus", Required = true)]
29 |
30 |
31 | public class MainApplication : MauiApplication
32 | {
33 | public MainApplication(IntPtr handle, JniHandleOwnership ownership)
34 | : base(handle, ownership)
35 | {
36 | }
37 |
38 | protected override MauiApp CreateMauiApp() => MauiProgram.CreateMauiApp();
39 | }
40 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples/Utils/TextHelper.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace AliParaformerAsr.Examples.Utils
8 | {
9 | internal class TextHelper
10 | {
11 | public static List? GetHotwords(string tokensFilePath, string hotwordFilePath)
12 | {
13 | List? hotwords = new List();
14 | if (File.Exists(tokensFilePath) && File.Exists(hotwordFilePath))
15 | {
16 | string[] tokens = File.ReadAllLines(tokensFilePath);
17 | string[] sentences = File.ReadAllLines(hotwordFilePath);
18 | foreach (string sentence in sentences)
19 | {
20 | string[] wordList = new string[] { sentence };//TODO:分词
21 | foreach (string word in wordList)
22 | {
23 | List ids = word.ToCharArray().Select(x => Array.IndexOf(tokens, x.ToString())).Where(x => x != -1).ToList();
24 | hotwords.Add(ids.ToArray());
25 | }
26 | }
27 | hotwords.Add(new int[] { 1 });
28 | }
29 | return hotwords;
30 | }
31 | public static List? GetHotwords(string tokensFilePath, string[] wordList)
32 | {
33 | List? hotwords = new List();
34 | if (File.Exists(tokensFilePath) && wordList.Length>0)
35 | {
36 | string[] tokens = File.ReadAllLines(tokensFilePath);
37 | foreach (string word in wordList)
38 | {
39 | List ids = word.ToCharArray().Select(x => Array.IndexOf(tokens, x.ToString())).Where(x => x != -1).ToList();
40 | hotwords.Add(ids.ToArray());
41 | }
42 | hotwords.Add(new int[] { 1 });
43 | }
44 | return hotwords;
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Platforms/Windows/Package.appxmanifest:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | $placeholder$
15 | User Name
16 | $placeholder$.png
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/EncoderConfEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using System;
4 | using System.Collections.Generic;
5 | using System.Linq;
6 | using System.Text;
7 | using System.Threading.Tasks;
8 |
9 | namespace AliParaformerAsr.Model
10 | {
11 | public class EncoderConfEntity
12 | {
13 | private int _output_size = 512;
14 | private int _attention_heads = 4;
15 | private int _linear_units = 2048;
16 | private int _num_blocks = 50;
17 | private float _dropout_rate = 0.1F;
18 | private float _positional_dropout_rate = 0.1F;
19 | private float _attention_dropout_rate= 0.1F;
20 | private string _input_layer = "pe";
21 | private string _pos_enc_class = "SinusoidalPositionEncoder";
22 | private bool _normalize_before = true;
23 | private int _kernel_size = 11;
24 | private int _sanm_shfit = 0;
25 | private string _selfattention_layer_type = "sanm";
26 |
27 | public int output_size { get => _output_size; set => _output_size = value; }
28 | public int attention_heads { get => _attention_heads; set => _attention_heads = value; }
29 | public int linear_units { get => _linear_units; set => _linear_units = value; }
30 | public int num_blocks { get => _num_blocks; set => _num_blocks = value; }
31 | public float dropout_rate { get => _dropout_rate; set => _dropout_rate = value; }
32 | public float positional_dropout_rate { get => _positional_dropout_rate; set => _positional_dropout_rate = value; }
33 | public float attention_dropout_rate { get => _attention_dropout_rate; set => _attention_dropout_rate = value; }
34 | public string input_layer { get => _input_layer; set => _input_layer = value; }
35 | public string pos_enc_class { get => _pos_enc_class; set => _pos_enc_class = value; }
36 | public bool normalize_before { get => _normalize_before; set => _normalize_before = value; }
37 | public int kernel_size { get => _kernel_size; set => _kernel_size = value; }
38 | public int sanm_shfit { get => _sanm_shfit; set => _sanm_shfit = value; }
39 | public string selfattention_layer_type { get => _selfattention_layer_type; set => _selfattention_layer_type = value; }
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Resources/Styles/Colors.xaml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 | #512BD4
8 | #DFD8F7
9 | #2B0B98
10 | White
11 | Black
12 | #E1E1E1
13 | #C8C8C8
14 | #ACACAC
15 | #919191
16 | #6E6E6E
17 | #404040
18 | #212121
19 | #141414
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | #F7B548
35 | #FFD590
36 | #FFE5B9
37 | #28C2D1
38 | #7BDDEF
39 | #C3F2F4
40 | #3E8EED
41 | #72ACF1
42 | #A7CBF6
43 |
44 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Set default behavior to automatically normalize line endings.
3 | ###############################################################################
4 | * text=auto
5 |
6 | ###############################################################################
7 | # Set default behavior for command prompt diff.
8 | #
9 | # This is need for earlier builds of msysgit that does not have it on by
10 | # default for csharp files.
11 | # Note: This is only used by command line
12 | ###############################################################################
13 | #*.cs diff=csharp
14 |
15 | ###############################################################################
16 | # Set the merge driver for project and solution files
17 | #
18 | # Merging from the command prompt will add diff markers to the files if there
19 | # are conflicts (Merging from VS is not affected by the settings below, in VS
20 | # the diff markers are never inserted). Diff markers may cause the following
21 | # file extensions to fail to load in VS. An alternative would be to treat
22 | # these files as binary and thus will always conflict and require user
23 | # intervention with every merge. To do so, just uncomment the entries below
24 | ###############################################################################
25 | #*.sln merge=binary
26 | #*.csproj merge=binary
27 | #*.vbproj merge=binary
28 | #*.vcxproj merge=binary
29 | #*.vcproj merge=binary
30 | #*.dbproj merge=binary
31 | #*.fsproj merge=binary
32 | #*.lsproj merge=binary
33 | #*.wixproj merge=binary
34 | #*.modelproj merge=binary
35 | #*.sqlproj merge=binary
36 | #*.wwaproj merge=binary
37 |
38 | ###############################################################################
39 | # behavior for image files
40 | #
41 | # image files are treated as binary by default.
42 | ###############################################################################
43 | #*.jpg binary
44 | #*.png binary
45 | #*.gif binary
46 |
47 | ###############################################################################
48 | # diff behavior for common document formats
49 | #
50 | # Convert binary document formats to text before diffing them. This feature
51 | # is only available from the command line. Turn it on by uncommenting the
52 | # entries below.
53 | ###############################################################################
54 | #*.doc diff=astextplain
55 | #*.DOC diff=astextplain
56 | #*.docx diff=astextplain
57 | #*.DOCX diff=astextplain
58 | #*.dot diff=astextplain
59 | #*.DOT diff=astextplain
60 | #*.pdf diff=astextplain
61 | #*.PDF diff=astextplain
62 | #*.rtf diff=astextplain
63 | #*.RTF diff=astextplain
64 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/ConfEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | namespace AliParaformerAsr.Model
4 | {
5 | public class ConfEntity
6 | {
7 | private int _input_size;
8 | private string _frontend = "wav_frontend";
9 | private FrontendConfEntity _frontend_conf = new FrontendConfEntity();
10 | private string _model = "paraformer";
11 | private bool _use_itn = false;
12 | private ModelConfEntity _model_conf = new ModelConfEntity();
13 | private string _preencoder = string.Empty;
14 | private PostEncoderConfEntity _preencoder_conf = new PostEncoderConfEntity();
15 | private string _encoder = "sanm";
16 | private EncoderConfEntity _encoder_conf = new EncoderConfEntity();
17 | private string _postencoder = string.Empty;
18 | private PostEncoderConfEntity _postencoder_conf = new PostEncoderConfEntity();
19 | private string _decoder = "paraformer_decoder_sanm";
20 | private DecoderConfEntity _decoder_conf = new DecoderConfEntity();
21 | private string _predictor = "cif_predictor_v2";
22 | private PredictorConfEntity _predictor_conf = new PredictorConfEntity();
23 | private string _version = string.Empty;
24 |
25 |
26 | public int input_size { get => _input_size; set => _input_size = value; }
27 | public string frontend { get => _frontend; set => _frontend = value; }
28 | public FrontendConfEntity frontend_conf { get => _frontend_conf; set => _frontend_conf = value; }
29 | public string model { get => _model; set => _model = value; }
30 | public ModelConfEntity model_conf { get => _model_conf; set => _model_conf = value; }
31 | public string preencoder { get => _preencoder; set => _preencoder = value; }
32 | public PostEncoderConfEntity preencoder_conf { get => _preencoder_conf; set => _preencoder_conf = value; }
33 | public string encoder { get => _encoder; set => _encoder = value; }
34 | public EncoderConfEntity encoder_conf { get => _encoder_conf; set => _encoder_conf = value; }
35 | public string postencoder { get => _postencoder; set => _postencoder = value; }
36 | public PostEncoderConfEntity postencoder_conf { get => _postencoder_conf; set => _postencoder_conf = value; }
37 | public string decoder { get => _decoder; set => _decoder = value; }
38 | public DecoderConfEntity decoder_conf { get => _decoder_conf; set => _decoder_conf = value; }
39 | public string predictor { get => _predictor; set => _predictor = value; }
40 | public string version { get => _version; set => _version = value; }
41 | public PredictorConfEntity predictor_conf { get => _predictor_conf; set => _predictor_conf = value; }
42 | public bool use_itn { get => _use_itn; set => _use_itn = value; }
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Model/OfflineYamlEntity.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using System;
4 | using System.Collections.Generic;
5 | using System.Linq;
6 | using System.Text;
7 | using System.Threading.Tasks;
8 |
9 | namespace AliParaformerAsr.Model
10 | {
11 | internal class OfflineYamlEntity
12 | {
13 | private int _input_size;
14 | private string _frontend = "wav_frontend";
15 | private FrontendConfEntity _frontend_conf = new FrontendConfEntity();
16 | private string _model = "paraformer";
17 | private ModelConfEntity _model_conf = new ModelConfEntity();
18 | private string _preencoder = string.Empty;
19 | private PostEncoderConfEntity _preencoder_conf = new PostEncoderConfEntity();
20 | private string _encoder = "sanm";
21 | private EncoderConfEntity _encoder_conf = new EncoderConfEntity();
22 | private string _postencoder = string.Empty;
23 | private PostEncoderConfEntity _postencoder_conf = new PostEncoderConfEntity();
24 | private string _decoder = "paraformer_decoder_sanm";
25 | private DecoderConfEntity _decoder_conf = new DecoderConfEntity();
26 | private string _predictor = "cif_predictor_v2";
27 | private PredictorConfEntity _predictor_conf = new PredictorConfEntity();
28 | private string _version = string.Empty;
29 |
30 |
31 | public int input_size { get => _input_size; set => _input_size = value; }
32 | public string frontend { get => _frontend; set => _frontend = value; }
33 | public FrontendConfEntity frontend_conf { get => _frontend_conf; set => _frontend_conf = value; }
34 | public string model { get => _model; set => _model = value; }
35 | public ModelConfEntity model_conf { get => _model_conf; set => _model_conf = value; }
36 | public string preencoder { get => _preencoder; set => _preencoder = value; }
37 | public PostEncoderConfEntity preencoder_conf { get => _preencoder_conf; set => _preencoder_conf = value; }
38 | public string encoder { get => _encoder; set => _encoder = value; }
39 | public EncoderConfEntity encoder_conf { get => _encoder_conf; set => _encoder_conf = value; }
40 | public string postencoder { get => _postencoder; set => _postencoder = value; }
41 | public PostEncoderConfEntity postencoder_conf { get => _postencoder_conf; set => _postencoder_conf = value; }
42 | public string decoder { get => _decoder; set => _decoder = value; }
43 | public DecoderConfEntity decoder_conf { get => _decoder_conf; set => _decoder_conf = value; }
44 | public string predictor { get => _predictor; set => _predictor = value; }
45 | public string version { get => _version; set => _version = value; }
46 | public PredictorConfEntity predictor_conf { get => _predictor_conf; set => _predictor_conf = value; }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/AliParaformerAsr.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.1.32210.238
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AliParaformerAsr.Examples", "AliParaformerAsr.Examples\AliParaformerAsr.Examples.csproj", "{0CC20DAF-D6F4-481B-AE5F-09521DAC3CA2}"
7 | EndProject
8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AliParaformerAsr", "AliParaformerAsr\AliParaformerAsr.csproj", "{763DE8F4-D05C-4317-B627-3CE1B09431A3}"
9 | EndProject
10 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{FC70D2F3-89D8-40D2-A59A-47D4960C508F}"
11 | ProjectSection(SolutionItems) = preProject
12 | LICENSE = LICENSE
13 | README.EN.md = README.EN.md
14 | README.md = README.md
15 | EndProjectSection
16 | EndProject
17 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MauiApp1", "AliParaformerAsr.Examples.MauiApp\MauiApp1.csproj", "{3190BB8F-83E1-42D8-B3CF-6C43BB419768}"
18 | EndProject
19 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AliParaformerAsr.Tests", "AliParaformerAsr.Tests\AliParaformerAsr.Tests.csproj", "{B9D49696-DEAF-48C0-AE47-3C0285BC7D78}"
20 | EndProject
21 | Global
22 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
23 | Debug|Any CPU = Debug|Any CPU
24 | Release|Any CPU = Release|Any CPU
25 | EndGlobalSection
26 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
27 | {0CC20DAF-D6F4-481B-AE5F-09521DAC3CA2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
28 | {0CC20DAF-D6F4-481B-AE5F-09521DAC3CA2}.Debug|Any CPU.Build.0 = Debug|Any CPU
29 | {0CC20DAF-D6F4-481B-AE5F-09521DAC3CA2}.Release|Any CPU.ActiveCfg = Release|Any CPU
30 | {0CC20DAF-D6F4-481B-AE5F-09521DAC3CA2}.Release|Any CPU.Build.0 = Release|Any CPU
31 | {763DE8F4-D05C-4317-B627-3CE1B09431A3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
32 | {763DE8F4-D05C-4317-B627-3CE1B09431A3}.Debug|Any CPU.Build.0 = Debug|Any CPU
33 | {763DE8F4-D05C-4317-B627-3CE1B09431A3}.Release|Any CPU.ActiveCfg = Release|Any CPU
34 | {763DE8F4-D05C-4317-B627-3CE1B09431A3}.Release|Any CPU.Build.0 = Release|Any CPU
35 | {3190BB8F-83E1-42D8-B3CF-6C43BB419768}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
36 | {3190BB8F-83E1-42D8-B3CF-6C43BB419768}.Debug|Any CPU.Build.0 = Debug|Any CPU
37 | {3190BB8F-83E1-42D8-B3CF-6C43BB419768}.Debug|Any CPU.Deploy.0 = Debug|Any CPU
38 | {3190BB8F-83E1-42D8-B3CF-6C43BB419768}.Release|Any CPU.ActiveCfg = Release|Any CPU
39 | {3190BB8F-83E1-42D8-B3CF-6C43BB419768}.Release|Any CPU.Build.0 = Release|Any CPU
40 | {3190BB8F-83E1-42D8-B3CF-6C43BB419768}.Release|Any CPU.Deploy.0 = Release|Any CPU
41 | {B9D49696-DEAF-48C0-AE47-3C0285BC7D78}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
42 | {B9D49696-DEAF-48C0-AE47-3C0285BC7D78}.Debug|Any CPU.Build.0 = Debug|Any CPU
43 | {B9D49696-DEAF-48C0-AE47-3C0285BC7D78}.Release|Any CPU.ActiveCfg = Release|Any CPU
44 | {B9D49696-DEAF-48C0-AE47-3C0285BC7D78}.Release|Any CPU.Build.0 = Release|Any CPU
45 | EndGlobalSection
46 | GlobalSection(SolutionProperties) = preSolution
47 | HideSolutionNode = FALSE
48 | EndGlobalSection
49 | GlobalSection(ExtensibilityGlobals) = postSolution
50 | SolutionGuid = {FADC677C-FC04-47A3-B4DE-704D30A42AF8}
51 | EndGlobalSection
52 | EndGlobal
53 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Utils/PadHelper.cs:
--------------------------------------------------------------------------------
1 | using AliParaformerAsr.Model;
2 | using System;
3 | using System.Collections.Generic;
4 | using System.Linq;
5 | using System.Text;
6 | using System.Threading.Tasks;
7 |
8 | namespace AliParaformerAsr.Utils
9 | {
10 | internal static class PadHelper
11 | {
12 | public static float[] PadSequence(List modelInputs)
13 | {
14 | List floats = modelInputs.Where(x => x != null).Select(x => x.Speech).ToList();
15 | return PadSequence(floats);
16 | }
17 | public static float[] PadSequence(List modelInputs)
18 | {
19 | List floats = modelInputs.Where(x => x != null).Select(x => x.Speech).ToList();
20 | return PadSequence(floats, tailLen:0);
21 | }
22 |
23 | private static float[] PadSequence(List floats, int tailLen = 0)
24 | {
25 | int max_speech_length = floats.Where(x => x != null).Max(x => x.Length) + 560 * tailLen;
26 | int speech_length = max_speech_length * floats.Count;
27 | float[] speech = new float[speech_length];
28 | float[,] xxx = new float[floats.Count, max_speech_length];
29 | for (int i = 0; i < floats.Count; i++)
30 | {
31 | if (floats[i] == null || max_speech_length == floats[i].Length)
32 | {
33 | for (int j = 0; j < xxx.GetLength(1); j++)
34 | {
35 | #pragma warning disable CS8602 // 解引用可能出现空引用。
36 | xxx[i, j] = floats[i][j];
37 | #pragma warning restore CS8602 // 解引用可能出现空引用。
38 | }
39 | continue;
40 | }
41 | float[] nullspeech = new float[max_speech_length - floats[i].Length];
42 | float[]? curr_speech = floats[i];
43 | float[] padspeech = new float[max_speech_length];
44 | Array.Copy(curr_speech, 0, padspeech, 0, curr_speech.Length);
45 | //Array.Copy(nullspeech, 0, padspeech, curr_speech.Length, nullspeech.Length);
46 | for (int j = 0; j < padspeech.Length; j++)
47 | {
48 | #pragma warning disable CS8602 // 解引用可能出现空引用。
49 | xxx[i, j] = padspeech[j];
50 | #pragma warning restore CS8602 // 解引用可能出现空引用。
51 | }
52 | }
53 | //Array.Copy(xxx, 0, speech, 0, speech.Length);//one len is 3120
54 | int s = 0;
55 | for (int i = 0; i < xxx.GetLength(0); i++)
56 | {
57 | for (int j = 0; j < xxx.GetLength(1); j++)
58 | {
59 | speech[s] = xxx[i, j];
60 | s++;
61 | }
62 | }
63 | speech = speech.Select(x => x == 0 ? -23.025850929940457F * 32768 : x).ToArray();
64 | return speech;
65 | }
66 |
67 | public static float[] PadSequence_unittest(List modelInputs)
68 | {
69 | int max_speech_length = modelInputs.Max(x => x.SpeechLength);
70 | int speech_length = max_speech_length * modelInputs.Count;
71 | float[] speech = new float[speech_length];
72 | for (int i = 0; i < modelInputs.Count; i++)
73 | {
74 | float[]? curr_speech = modelInputs[i].Speech;
75 | Array.Copy(curr_speech, 0, speech, i * curr_speech.Length, curr_speech.Length);
76 | }
77 | speech = speech.Select(x => x == 0 ? -23.025850929940457F * 32768 : x).ToArray();
78 | return speech;
79 | }
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/AliParaformerAsr/EmbedSVModel.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2024 by manyeyes
3 | using Microsoft.ML.OnnxRuntime;
4 | using Microsoft.ML.OnnxRuntime.Tensors;
5 | //using System.Reflection;
6 |
7 | namespace AliParaformerAsr
8 | {
9 | public class EmbedSVModel
10 | {
11 | private InferenceSession _modelSession;
12 |
13 | public EmbedSVModel(int threadsNum = 2)
14 | {
15 | _modelSession = initModel(threadsNum);
16 | }
17 | public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
18 |
19 | public InferenceSession initModel(int threadsNum = 2)
20 | {
21 | byte[] model = ReadEmbeddedResourceAsBytes("AliParaformerAsr.data.embed.onnx");
22 | Microsoft.ML.OnnxRuntime.SessionOptions options = new Microsoft.ML.OnnxRuntime.SessionOptions();
23 | options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_FATAL;
24 | //options.AppendExecutionProvider_DML(0);
25 | options.AppendExecutionProvider_CPU(0);
26 | //options.AppendExecutionProvider_CUDA(0);
27 | options.InterOpNumThreads = threadsNum;
28 | InferenceSession onnxSession = new InferenceSession(model, options);
29 | return onnxSession;
30 | }
31 | private static byte[] ReadEmbeddedResourceAsBytes(string resourceName)
32 | {
33 | //var assembly = Assembly.GetExecutingAssembly();
34 | var assembly = typeof(EmbedSVModel).Assembly;
35 |
36 | var stream = assembly.GetManifestResourceStream(resourceName) ??
37 | throw new FileNotFoundException($"Embedded resource '{resourceName}' not found.");
38 | byte[] bytes = new byte[stream.Length];
39 | stream.Read(bytes, 0, bytes.Length);
40 | stream.Seek(0, SeekOrigin.Begin);
41 | stream.Close();
42 | stream.Dispose();
43 | return bytes;
44 | }
45 | public float[] Forward(Int64[] x,int speechSize=0)
46 | {
47 | float[] y=new float[0];
48 | var inputMeta = _modelSession.InputMetadata;
49 | var container = new List();
50 | foreach (var name in inputMeta.Keys)
51 | {
52 | if (name == "x")
53 | {
54 | int[] dim = new int[] { 1,x.Length };
55 | var tensor = new DenseTensor(x, dim, false);
56 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
57 | }
58 | }
59 | //IReadOnlyCollection outputNames = new List();
60 | //outputNames.Append("y");
61 | IDisposableReadOnlyCollection results = null;
62 | try
63 | {
64 | results = _modelSession.Run(container);
65 | if (results != null)
66 | {
67 | var resultsArray = results.ToArray();
68 | Tensor logits_tensor = resultsArray[0].AsTensor();
69 | y = logits_tensor.ToArray();
70 | }
71 | }
72 | catch (Exception ex)
73 | {
74 | throw new Exception("Embed SV Forward failed", ex.InnerException);
75 | }
76 | return y;
77 | }
78 | protected virtual void Dispose(bool disposing)
79 | {
80 | if (disposing)
81 | {
82 | if (_modelSession != null)
83 | {
84 | _modelSession.Dispose();
85 | }
86 | }
87 | }
88 |
89 | internal void Dispose()
90 | {
91 | Dispose(disposing: true);
92 | GC.SuppressFinalize(this);
93 | }
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/AliParaformerAsr/AliParaformerAsr.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net461;net472;net48;netstandard2.0;netstandard2.1;netcoreapp3.1;net6.0;net8.0;net8.0-android;net8.0-ios;net8.0-maccatalyst
5 | $(TargetFrameworks);net8.0-windows10.0.19041.0
6 |
7 | 10.0.19041.0
8 | 21.0
9 | 14.2
10 | 15.0
11 |
12 | $(DefineConstants);MODERN_DOTNET
13 | 10
14 | enable
15 | enable
16 |
17 | ManySpeech.AliParaformerAsr
18 | 1.1.8
19 | manyeyes contributors
20 | manyeyes contributors
21 | Copyright © manyeyes contributors
22 | c# library for decoding paraformer, sensevoice Models,used in speech recognition (ASR)
23 | c# library for decoding paraformer, sensevoice Models,used in speech recognition (ASR).Paraformer is an efficient non autoregressive end-to-end speech recognition framework proposed by the speech team at Damo Institute. This project is a Paraformer Chinese universal speech recognition model, which uses tens of thousands of hours of industrial grade annotated audio for model training to ensure the universal recognition effect of the model. The model can be applied to scenarios such as voice input methods, voice navigation, and intelligent meeting minutes. Accuracy: High.
24 | speech recognition asr paraformer sensevoice ai local privacy
25 | https://github.com/manyeyes/AliParaformerAsr
26 | https://github.com/manyeyes/AliParaformerAsr
27 | true
28 | snupkg
29 | enable
30 | latest
31 | App_Readme/README.EN.md
32 | App_Readme/LICENSE
33 | true
34 |
35 | App_Readme/README.md
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
--------------------------------------------------------------------------------
/AliParaformerAsr/OfflineStream.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using AliParaformerAsr.Model;
4 |
5 | namespace AliParaformerAsr
6 | {
7 | public class OfflineStream : IDisposable
8 | {
9 | private bool _disposed;
10 |
11 | private WavFrontend _wavFrontend;
12 | private OfflineInputEntity _offlineInputEntity;
13 | private int _blank_id = 0;
14 | private int _unk_id = 2;
15 | private Int64[] _hyp;
16 | private List? _hotwords = new List();
17 | List _tokens = new List();
18 | List _timestamps = new List();
19 | private static object obj = new object();
20 | public OfflineStream(string mvnFilePath, ConfEntity confEntity)
21 | {
22 | _offlineInputEntity = new OfflineInputEntity();
23 |
24 | _wavFrontend = new WavFrontend(mvnFilePath, confEntity.frontend_conf);
25 | _hyp = new Int64[] { _blank_id, _blank_id };
26 | _tokens = new List { _blank_id, _blank_id };
27 | _timestamps = new List { };
28 | }
29 |
30 | public OfflineInputEntity OfflineInputEntity { get => _offlineInputEntity; set => _offlineInputEntity = value; }
31 | public Int64[] Hyp { get => _hyp; set => _hyp = value; }
32 | public List Tokens { get => _tokens; set => _tokens = value; }
33 | public List Timestamps { get => _timestamps; set => _timestamps = value; }
34 | public List? Hotwords { get => _hotwords; set => _hotwords = value; }
35 |
36 | public void AddSamples(float[] samples)
37 | {
38 | lock (obj)
39 | {
40 | float[] fbanks = _wavFrontend.GetFbank(samples);
41 | float[] features = _wavFrontend.LfrCmvn(fbanks);
42 | int oLen = 0;
43 | if (OfflineInputEntity.SpeechLength > 0)
44 | {
45 | oLen = OfflineInputEntity.SpeechLength;
46 | }
47 | float[]? featuresTemp = new float[oLen + features.Length];
48 | if (OfflineInputEntity.SpeechLength > 0)
49 | {
50 | Array.Copy(_offlineInputEntity.Speech, 0, featuresTemp, 0, _offlineInputEntity.SpeechLength);
51 | }
52 | Array.Copy(features, 0, featuresTemp, OfflineInputEntity.SpeechLength, features.Length);
53 | OfflineInputEntity.Speech = featuresTemp;
54 | OfflineInputEntity.SpeechLength = featuresTemp.Length;
55 | OfflineInputEntity.Hotwords = Hotwords;
56 | }
57 | }
58 | public OfflineInputEntity GetDecodeChunk()
59 | {
60 | lock (obj)
61 | {
62 | if (OfflineInputEntity.Speech != null && OfflineInputEntity.SpeechLength > 0)
63 | {
64 | OfflineInputEntity.Hotwords = Hotwords;
65 | }
66 | return OfflineInputEntity;
67 | }
68 | }
69 | public void RemoveChunk()
70 | {
71 | lock (obj)
72 | {
73 | if (_tokens.Count > 2)
74 | {
75 | OfflineInputEntity.Speech = null;
76 | OfflineInputEntity.SpeechLength = 0;
77 | }
78 | }
79 | }
80 | protected virtual void Dispose(bool disposing)
81 | {
82 | if (!_disposed)
83 | {
84 | if (disposing)
85 | {
86 | if (_wavFrontend != null)
87 | {
88 | _wavFrontend.Dispose();
89 | }
90 | if (_offlineInputEntity != null)
91 | {
92 | _offlineInputEntity = null;
93 | }
94 | if (_hyp != null)
95 | {
96 | _hyp = null;
97 | }
98 | if (_tokens != null)
99 | {
100 | _tokens = null;
101 | }
102 | if (_timestamps != null)
103 | {
104 | _timestamps = null;
105 | }
106 | }
107 | _disposed = true;
108 | }
109 | }
110 |
111 | public void Dispose()
112 | {
113 | Dispose(disposing: true);
114 | GC.SuppressFinalize(this);
115 | }
116 | ~OfflineStream()
117 | {
118 | Dispose(_disposed);
119 | }
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/AliParaformerAsr/OfflineModel.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using Microsoft.ML.OnnxRuntime;
4 | //using System.Reflection;
5 |
6 | namespace AliParaformerAsr
7 | {
8 | public class OfflineModel
9 | {
10 | private InferenceSession _modelSession;
11 | private string _modelebFilePath;
12 | private List? _hotwords = null;
13 | private int _blank_id = 0;
14 | private int _sos_eos_id = 1;
15 | private int _unk_id = 2;
16 | private int _featureDim = 80;
17 | private int _sampleRate = 16000;
18 | private bool _use_itn = false;
19 |
20 | public OfflineModel(string modelFilePath, string modelebFilePath = "", int threadsNum = 2)
21 | {
22 | _modelSession = initModel(modelFilePath, threadsNum);
23 | _modelebFilePath = modelebFilePath;
24 | }
25 | public int Blank_id { get => _blank_id; set => _blank_id = value; }
26 | public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; }
27 | public int Unk_id { get => _unk_id; set => _unk_id = value; }
28 | public int FeatureDim { get => _featureDim; set => _featureDim = value; }
29 | public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
30 | public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
31 | public bool Use_itn { get => _use_itn; set => _use_itn = value; }
32 | public string ModelebFilePath { get => _modelebFilePath; set => _modelebFilePath = value; }
33 | public List? Hotwords { get => _hotwords; set => _hotwords = value; }
34 |
35 | public InferenceSession initModel(string modelFilePath, int threadsNum = 2)
36 | {
37 | if (string.IsNullOrEmpty(modelFilePath) || !File.Exists(modelFilePath))
38 | {
39 | return null;
40 | }
41 | Microsoft.ML.OnnxRuntime.SessionOptions options = new Microsoft.ML.OnnxRuntime.SessionOptions();
42 | //options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
43 | options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_FATAL;
44 | options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL; // 启用所有图优化
45 | //options.AppendExecutionProvider_DML(0);
46 | options.AppendExecutionProvider_CPU(0);
47 | //options.AppendExecutionProvider_CUDA(0);
48 | //options.AppendExecutionProvider_MKLDNN();
49 | //options.AppendExecutionProvider_ROCm(0);
50 | if (threadsNum > 0)
51 | options.InterOpNumThreads = threadsNum;
52 | else
53 | options.InterOpNumThreads = System.Environment.ProcessorCount;
54 | // 启用CPU内存计划
55 | options.EnableMemoryPattern = true;
56 | // 设置其他优化选项
57 | options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
58 |
59 | InferenceSession onnxSession = null;
60 | if (!string.IsNullOrEmpty(modelFilePath) && modelFilePath.IndexOf("/") < 0 && modelFilePath.IndexOf("\\") < 0)
61 | {
62 | byte[] model = ReadEmbeddedResourceAsBytes(modelFilePath);
63 | onnxSession = new InferenceSession(model, options);
64 | }
65 | else
66 | {
67 | onnxSession = new InferenceSession(modelFilePath, options);
68 | }
69 | return onnxSession;
70 | }
71 |
72 | private static byte[] ReadEmbeddedResourceAsBytes(string resourceName)
73 | {
74 | //var assembly = Assembly.GetExecutingAssembly();
75 | var assembly = typeof(OfflineModel).Assembly;
76 | var stream = assembly.GetManifestResourceStream(resourceName) ??
77 | throw new FileNotFoundException($"Embedded resource '{resourceName}' not found.");
78 | byte[] bytes = new byte[stream.Length];
79 | stream.Read(bytes, 0, bytes.Length);
80 | // 设置当前流的位置为流的开始
81 | stream.Seek(0, SeekOrigin.Begin);
82 | stream.Close();
83 | stream.Dispose();
84 |
85 | return bytes;
86 | }
87 |
88 | protected virtual void Dispose(bool disposing)
89 | {
90 | if (disposing)
91 | {
92 | if (_modelSession != null)
93 | {
94 | _modelSession.Dispose();
95 | }
96 | }
97 | }
98 |
99 | internal void Dispose()
100 | {
101 | Dispose(disposing: true);
102 | GC.SuppressFinalize(this);
103 | }
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/AliParaformerAsr/OfflineProjOfParaformer.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using AliParaformerAsr.Model;
4 | using AliParaformerAsr.Utils;
5 | using Microsoft.ML.OnnxRuntime;
6 | using Microsoft.ML.OnnxRuntime.Tensors;
7 |
8 | namespace AliParaformerAsr
9 | {
10 | internal class OfflineProjOfParaformer : IOfflineProj, IDisposable
11 | {
12 | // To detect redundant calls
13 | private bool _disposed;
14 |
15 | private InferenceSession _modelSession;
16 | private int _blank_id = 0;
17 | private int _sos_eos_id = 1;
18 | private int _unk_id = 2;
19 |
20 | private int _featureDim = 80;
21 | private int _sampleRate = 16000;
22 |
23 | public OfflineProjOfParaformer(OfflineModel offlineModel)
24 | {
25 | _modelSession = offlineModel.ModelSession;
26 | _blank_id = offlineModel.Blank_id;
27 | _sos_eos_id = offlineModel.Sos_eos_id;
28 | _unk_id = offlineModel.Unk_id;
29 | _featureDim = offlineModel.FeatureDim;
30 | _sampleRate = offlineModel.SampleRate;
31 | }
32 | public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
33 | public int Blank_id { get => _blank_id; set => _blank_id = value; }
34 | public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; }
35 | public int Unk_id { get => _unk_id; set => _unk_id = value; }
36 | public int FeatureDim { get => _featureDim; set => _featureDim = value; }
37 | public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
38 |
39 | public ModelOutputEntity ModelProj(List modelInputs)
40 | {
41 | int batchSize = modelInputs.Count;
42 | float[] padSequence = PadHelper.PadSequence(modelInputs);
43 | var inputMeta = _modelSession.InputMetadata;
44 | var container = new List();
45 | foreach (var name in inputMeta.Keys)
46 | {
47 | if (name == "speech")
48 | {
49 | int[] dim = new int[] { batchSize, padSequence.Length / 560 / batchSize, 560 };
50 | var tensor = new DenseTensor(padSequence, dim, false);
51 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
52 | }
53 | if (name == "speech_lengths")
54 | {
55 | int[] dim = new int[] { batchSize };
56 | int[] speech_lengths = new int[batchSize];
57 | for (int i = 0; i < batchSize; i++)
58 | {
59 | speech_lengths[i] = padSequence.Length / 560 / batchSize;
60 | }
61 | var tensor = new DenseTensor(speech_lengths, dim, false);
62 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
63 | }
64 | }
65 | ModelOutputEntity modelOutputEntity = new ModelOutputEntity();
66 | try
67 | {
68 | IDisposableReadOnlyCollection results = _modelSession.Run(container);
69 |
70 | if (results != null)
71 | {
72 | var resultsArray = results.ToArray();
73 | modelOutputEntity.model_out = resultsArray[0].AsTensor();
74 | modelOutputEntity.model_out_lens = resultsArray[1].AsEnumerable().ToArray();
75 | if (resultsArray.Length >= 4)
76 | {
77 | Tensor cif_peak_tensor = resultsArray[3].AsTensor();
78 | modelOutputEntity.cif_peak_tensor = cif_peak_tensor;
79 | }
80 | }
81 | }
82 | catch (Exception ex)
83 | {
84 | throw new Exception("ModelProj failed", ex);
85 | }
86 | return modelOutputEntity;
87 | }
88 | protected virtual void Dispose(bool disposing)
89 | {
90 | if (!_disposed)
91 | {
92 | if (disposing)
93 | {
94 | if (_modelSession != null)
95 | {
96 | _modelSession.Dispose();
97 | }
98 | }
99 | _disposed = true;
100 | }
101 | }
102 |
103 | public void Dispose()
104 | {
105 | Dispose(disposing: true);
106 | GC.SuppressFinalize(this);
107 | }
108 | ~OfflineProjOfParaformer()
109 | {
110 | Dispose(_disposed);
111 | }
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/RecognitionForFiles.xaml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
27 |
33 |
39 |
45 |
50 |
57 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
74 |
80 |
81 |
82 |
93 |
94 |
95 |
96 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/MauiApp1.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | net9.0;net9.0-android;net9.0-ios;net9.0-maccatalyst
4 | $(TargetFrameworks);net9.0-windows10.0.19041.0
5 |
6 |
7 | Exe
8 | MauiApp1
9 | MauiApp1
10 | true
11 | true
12 | enable
13 |
14 |
15 |
16 | false
17 |
18 | link
19 |
20 |
21 |
22 | true
23 |
24 |
25 | true
26 |
27 |
28 | --aot=full
29 |
30 |
31 |
32 | llvm
33 |
34 | false
35 |
36 |
37 |
38 | false
39 |
40 | MSIX
41 |
42 |
43 |
44 | ASR demo
45 |
46 | com.manyeyes.MauiApp1
47 | be632abf-f31d-4458-8964-b4e8787dee11
48 |
49 | 1.0
50 | 1
51 |
52 | 14.2
53 | 14.0
54 | 21.0
55 | 10.0.17763.0
56 | 10.0.17763.0
57 | 6.5
58 | False
59 | True
60 | SHA256
61 | True
62 | False
63 | True
64 | c:/user/aliparaformerasr-mauiapp1
65 | 0
66 | 082E6518132EFF224F9B4AE94EAA5A68FFC93789
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 | RecognitionForFiles.xaml
97 |
98 |
99 | MainPage.xaml
100 |
101 |
102 |
103 |
104 |
105 | MSBuild:Compile
106 |
107 |
108 | MSBuild:Compile
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
--------------------------------------------------------------------------------
/AliParaformerAsr/EmbedSeacoModel.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2024 by manyeyes
3 | using Microsoft.ML.OnnxRuntime;
4 | using Microsoft.ML.OnnxRuntime.Tensors;
5 | //using System.Reflection;
6 |
7 | namespace AliParaformerAsr
8 | {
9 | public class EmbedSeacoModel
10 | {
11 | private InferenceSession _modelSession;
12 |
13 | public EmbedSeacoModel(string modelFilePath, int threadsNum = 2)
14 | {
15 | _modelSession = initModel(modelFilePath, threadsNum);
16 | }
17 | public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
18 |
19 | public InferenceSession initModel(string modelFilePath, int threadsNum = 2)
20 | {
21 | if (string.IsNullOrEmpty(modelFilePath) || !File.Exists(modelFilePath))
22 | {
23 | return null;
24 | }
25 | Microsoft.ML.OnnxRuntime.SessionOptions options = new Microsoft.ML.OnnxRuntime.SessionOptions();
26 | //options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
27 | options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_FATAL;
28 | //options.AppendExecutionProvider_DML(0);
29 | options.AppendExecutionProvider_CPU(0);
30 | //options.AppendExecutionProvider_CUDA(0);
31 | //options.AppendExecutionProvider_MKLDNN();
32 | //options.AppendExecutionProvider_ROCm(0);
33 | if (threadsNum > 0)
34 | options.InterOpNumThreads = threadsNum;
35 | else
36 | options.InterOpNumThreads = System.Environment.ProcessorCount;
37 | // 启用CPU内存计划
38 | options.EnableMemoryPattern = true;
39 | // 设置其他优化选项
40 | options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
41 |
42 | InferenceSession onnxSession = null;
43 | if (!string.IsNullOrEmpty(modelFilePath) && modelFilePath.IndexOf("/") < 0 && modelFilePath.IndexOf("\\") < 0)
44 | {
45 | byte[] model = ReadEmbeddedResourceAsBytes(modelFilePath);
46 | onnxSession = new InferenceSession(model, options);
47 | }
48 | else
49 | {
50 | onnxSession = new InferenceSession(modelFilePath, options);
51 | }
52 | return onnxSession;
53 | }
54 |
55 | private static byte[] ReadEmbeddedResourceAsBytes(string resourceName)
56 | {
57 | //var assembly = Assembly.GetExecutingAssembly();
58 | var assembly = typeof(EmbedSeacoModel).Assembly;
59 | var stream = assembly.GetManifestResourceStream(resourceName) ??
60 | throw new FileNotFoundException($"Embedded resource '{resourceName}' not found.");
61 | byte[] bytes = new byte[stream.Length];
62 | stream.Read(bytes, 0, bytes.Length);
63 | // 设置当前流的位置为流的开始
64 | stream.Seek(0, SeekOrigin.Begin);
65 | stream.Close();
66 | stream.Dispose();
67 |
68 | return bytes;
69 | }
70 | public Tensor? Forward(List? hotwords)
71 | {
72 | if (hotwords == null || hotwords.Count == 0)
73 | {
74 | return null;
75 | }
76 | //float[] y=new float[0];
77 | Tensor? hwEmbed = null;
78 | int numHotwords = hotwords.Count;
79 | int maxLength = 10;
80 | int[] hotwords_pad = PadList(hotwords, 0, maxLength);
81 | var inputMeta = _modelSession.InputMetadata;
82 | var container = new List();
83 | foreach (var name in inputMeta.Keys)
84 | {
85 | if (name == "hotword")
86 | {
87 | int[] dim = new int[] { numHotwords, 10 };
88 | var tensor = new DenseTensor(hotwords_pad, dim, false);
89 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
90 | }
91 | }
92 | //IReadOnlyCollection outputNames = new List();
93 | //outputNames.Append("hw_embed");
94 | IDisposableReadOnlyCollection results = null;
95 | try
96 | {
97 | results = _modelSession.Run(container);
98 | if (results != null)
99 | {
100 | var resultsArray = results.ToArray();
101 | hwEmbed = resultsArray[0].AsTensor();
102 | }
103 | }
104 | catch (Exception ex)
105 | {
106 | throw new Exception("Embed SeACo Forward failed", ex.InnerException);
107 | }
108 | return hwEmbed;
109 | }
110 | private int[] PadList(List hotwords, int paddingValue, int maxLength = 0)
111 | {
112 | List hotwordsPadList = new List(hotwords);
113 | if (maxLength == 0)
114 | {
115 | maxLength = hotwords.Select(x => x.Length).Max();
116 | }
117 | for (int i = 0; i < hotwordsPadList.Count; i++)
118 | {
119 | hotwordsPadList[i] = hotwordsPadList[i].Length > maxLength ? hotwordsPadList[i].Take(maxLength).ToArray() : hotwordsPadList[i].Concat(Enumerable.Repeat(paddingValue, maxLength - hotwordsPadList[i].Length)).ToArray();
120 | }
121 | int[] hotwordsPad = hotwordsPadList.SelectMany(x => x).ToArray();
122 | return hotwordsPad;
123 | }
124 | protected virtual void Dispose(bool disposing)
125 | {
126 | if (disposing)
127 | {
128 | if (_modelSession != null)
129 | {
130 | _modelSession.Dispose();
131 | }
132 | }
133 | }
134 |
135 | internal void Dispose()
136 | {
137 | Dispose(disposing: true);
138 | GC.SuppressFinalize(this);
139 | }
140 | }
141 | }
142 |
--------------------------------------------------------------------------------
/AliParaformerAsr/Utils/PreloadHelper.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2024 by manyeyes
3 | using System.Reflection;
4 | using System.Text.Json;
5 | using System.Text.Json.Serialization;
6 | using YamlDotNet.Serialization;
7 | using YamlDotNet.Serialization.NamingConventions;
8 |
9 | namespace AliParaformerAsr.Utils
10 | {
11 | // 源生成器的上下文配置
12 | [JsonSourceGenerationOptions(WriteIndented = true)] // 配置序列化选项
13 | [JsonSerializable(typeof(Model.ConfEntity))] // 指定需要序列化的类型
14 | public partial class AppJsonContext : JsonSerializerContext
15 | {
16 | // 生成器会自动填充实现
17 | }
18 | // 源生成器的上下文配置
19 | [YamlStaticContext]
20 | [YamlSerializable(typeof(Model.ConfEntity))] // 指定需要序列化的类型
21 | [YamlSerializable(typeof(Model.FrontendConfEntity))]
22 | [YamlSerializable(typeof(Model.ModelConfEntity))]
23 | [YamlSerializable(typeof(Model.PostEncoderConfEntity))]
24 | [YamlSerializable(typeof(Model.EncoderConfEntity))]
25 | [YamlSerializable(typeof(Model.PostEncoderConfEntity))]
26 | [YamlSerializable(typeof(Model.DecoderConfEntity))]
27 | [YamlSerializable(typeof(Model.PredictorConfEntity))]
28 | public partial class YamlStaticContext : YamlDotNet.Serialization.StaticContext
29 | {
30 | // 生成器会自动填充实现
31 | }
32 |
33 | ///
34 | /// PreloadHelper
35 | /// Copyright (c) 2024 by manyeyes
36 | ///
37 | internal class PreloadHelper
38 | {
39 | public static T? ReadYaml(string yamlFilePath)
40 | {
41 | T? info = default(T);
42 | IDeserializer yamlDeserializer = new StaticDeserializerBuilder(new YamlStaticContext()).WithNamingConvention(UnderscoredNamingConvention.Instance).Build();
43 | if (!string.IsNullOrEmpty(yamlFilePath) && yamlFilePath.IndexOf("/") < 0 && yamlFilePath.IndexOf("\\") < 0)
44 | {
45 | var assembly = Assembly.GetExecutingAssembly();
46 | var stream = assembly.GetManifestResourceStream(yamlFilePath) ??
47 | throw new FileNotFoundException($"Embedded resource '{yamlFilePath}' not found.");
48 | using (var yamlReader = new StreamReader(stream))
49 | {
50 | info = yamlDeserializer.Deserialize(yamlReader);
51 | yamlReader.Close();
52 | }
53 | }
54 | else if (File.Exists(yamlFilePath))
55 | {
56 | using (var yamlReader = File.OpenText(yamlFilePath))
57 | {
58 | info = yamlDeserializer.Deserialize(yamlReader);
59 | yamlReader.Close();
60 | }
61 | }
62 | return info;
63 | }
64 |
65 | public static T? ReadJson(string jsonFilePath)
66 | {
67 | T? info = default(T);
68 | if (!string.IsNullOrEmpty(jsonFilePath) && jsonFilePath.IndexOf("/") < 0 && jsonFilePath.IndexOf("\\") < 0)
69 | {
70 | var assembly = Assembly.GetExecutingAssembly();
71 | var stream = assembly.GetManifestResourceStream(jsonFilePath) ??
72 | throw new FileNotFoundException($"Embedded resource '{jsonFilePath}' not found.");
73 | using (var jsonReader = new StreamReader(stream))
74 | {
75 | info = JsonSerializer.Deserialize(jsonReader.ReadToEnd());
76 | jsonReader.Close();
77 | }
78 | }
79 | else if (File.Exists(jsonFilePath))
80 | {
81 | using (var jsonReader = File.OpenText(jsonFilePath))
82 | {
83 | info = JsonSerializer.Deserialize(jsonReader.ReadToEnd());
84 | jsonReader.Close();
85 | }
86 | }
87 | return info;
88 | }
89 | ///
90 | /// ReadJson for ConfEntity (To compile for AOT)
91 | ///
92 | ///
93 | ///
94 | ///
95 | public static Model.ConfEntity? ReadJson(string jsonFilePath)
96 | {
97 | Model.ConfEntity? info = new Model.ConfEntity();
98 | if (!string.IsNullOrEmpty(jsonFilePath) && jsonFilePath.IndexOf("/") < 0 && jsonFilePath.IndexOf("\\") < 0)
99 | {
100 | var assembly = Assembly.GetExecutingAssembly();
101 | var stream = assembly.GetManifestResourceStream(jsonFilePath) ??
102 | throw new FileNotFoundException($"Embedded resource '{jsonFilePath}' not found.");
103 | using (var jsonReader = new StreamReader(stream))
104 | {
105 | info = JsonSerializer.Deserialize(jsonReader.ReadToEnd(), AppJsonContext.Default.ConfEntity);
106 | jsonReader.Close();
107 | }
108 | }
109 | else if (File.Exists(jsonFilePath))
110 | {
111 | using (var jsonReader = File.OpenText(jsonFilePath))
112 | {
113 | info = JsonSerializer.Deserialize(jsonReader.ReadToEnd(), AppJsonContext.Default.ConfEntity);
114 | jsonReader.Close();
115 | }
116 | }
117 | return info;
118 | }
119 |
120 | public static string[] ReadTokens(string tokensFilePath)
121 | {
122 | string[] tokens = null;
123 | if (!string.IsNullOrEmpty(tokensFilePath))
124 | {
125 | if (tokensFilePath.IndexOf("/") < 0 && tokensFilePath.IndexOf("\\") < 0)
126 | {
127 | var assembly = Assembly.GetExecutingAssembly();
128 | var stream = assembly.GetManifestResourceStream(tokensFilePath) ??
129 | throw new FileNotFoundException($"Embedded resource '{tokensFilePath}' not found.");
130 | using (var reader = new StreamReader(stream))
131 | {
132 | tokens = reader.ReadToEnd().Split('\n');//Environment.NewLine
133 | }
134 | }
135 | else
136 | {
137 | tokens = File.ReadAllLines(tokensFilePath);
138 | }
139 | }
140 | return tokens;
141 | }
142 | }
143 | }
144 |
--------------------------------------------------------------------------------
/AliParaformerAsr/WavFrontend.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using AliParaformerAsr.Model;
4 | using SpeechFeatures;
5 |
6 | namespace AliParaformerAsr
7 | {
8 | ///
9 | /// WavFrontend
10 | /// Copyright (c) 2023 by manyeyes
11 | ///
12 | internal class WavFrontend
13 | {
14 | private FrontendConfEntity _frontendConfEntity;
15 | OnlineFbank _onlineFbank;
16 | private CmvnEntity _cmvnEntity;
17 |
18 | public WavFrontend(string mvnFilePath, FrontendConfEntity frontendConfEntity)
19 | {
20 | _frontendConfEntity = frontendConfEntity;
21 | _onlineFbank = new OnlineFbank(
22 | dither: _frontendConfEntity.dither,
23 | snip_edges: _frontendConfEntity.snip_edges,
24 | window_type: _frontendConfEntity.window,
25 | sample_rate: _frontendConfEntity.fs,
26 | num_bins: _frontendConfEntity.n_mels
27 | );
28 | _cmvnEntity = LoadCmvn(mvnFilePath);
29 | }
30 |
31 | public float[] GetFbank(float[] samples)
32 | {
33 | float sample_rate = _frontendConfEntity.fs;
34 | samples = samples.Select((float x) => x * 32768f).ToArray();
35 | float[] fbanks = _onlineFbank.GetFbank(samples);
36 | return fbanks;
37 | }
38 |
39 | public float[] LfrCmvn(float[] fbanks)
40 | {
41 | float[] features = fbanks;
42 | if (_frontendConfEntity.lfr_m != 1 || _frontendConfEntity.lfr_n != 1)
43 | {
44 | features = ApplyLfr(fbanks, _frontendConfEntity.lfr_m, _frontendConfEntity.lfr_n);
45 | }
46 | if (_cmvnEntity != null)
47 | {
48 | features = ApplyCmvn(features);
49 | }
50 | return features;
51 | }
52 |
53 | public float[] ApplyCmvn(float[] inputs)
54 | {
55 | var arr_neg_mean = _cmvnEntity.Means;
56 | float[] neg_mean = arr_neg_mean.Select(x => (float)Convert.ToDouble(x)).ToArray();
57 | var arr_inv_stddev = _cmvnEntity.Vars;
58 | float[] inv_stddev = arr_inv_stddev.Select(x => (float)Convert.ToDouble(x)).ToArray();
59 |
60 | int dim = neg_mean.Length;
61 | int num_frames = inputs.Length / dim;
62 |
63 | for (int i = 0; i < num_frames; i++)
64 | {
65 | for (int k = 0; k != dim; ++k)
66 | {
67 | inputs[dim * i + k] = (inputs[dim * i + k] + neg_mean[k]) * inv_stddev[k];
68 | }
69 | }
70 | return inputs;
71 | }
72 |
73 | public float[] ApplyLfr(float[] inputs, int lfr_m, int lfr_n)
74 | {
75 | int t = inputs.Length / 80;
76 | int t_lfr = (int)Math.Floor((double)(t / lfr_n));
77 | float[] input_0 = new float[80];
78 | Array.Copy(inputs, 0, input_0, 0, 80);
79 | int tile_x = (lfr_m - 1) / 2;
80 | t = t + tile_x;
81 | float[] inputs_temp = new float[t * 80];
82 | for (int i = 0; i < tile_x; i++)
83 | {
84 | Array.Copy(input_0, 0, inputs_temp, tile_x * 80, 80);
85 | }
86 | Array.Copy(inputs, 0, inputs_temp, tile_x * 80, inputs.Length);
87 | inputs = inputs_temp;
88 |
89 | float[] LFR_outputs = new float[t_lfr * lfr_m * 80];
90 | for (int i = 0; i < t_lfr; i++)
91 | {
92 | if (lfr_m <= t - i * lfr_n)
93 | {
94 | Array.Copy(inputs, i * lfr_n * 80, LFR_outputs, i * lfr_m * 80, lfr_m * 80);
95 | }
96 | else
97 | {
98 | // process last LFR frame
99 | int num_padding = lfr_m - (t - i * lfr_n);
100 | float[] frame = new float[lfr_m * 80];
101 | Array.Copy(inputs, i * lfr_n * 80, frame, 0, (t - i * lfr_n) * 80);
102 |
103 | for (int j = 0; j < num_padding; j++)
104 | {
105 | Array.Copy(inputs, (t - 1) * 80, frame, (lfr_m - num_padding + j) * 80, 80);
106 | }
107 | Array.Copy(frame, 0, LFR_outputs, i * lfr_m * 80, frame.Length);
108 | }
109 | }
110 | return LFR_outputs;
111 | }
112 | private CmvnEntity LoadCmvn(string mvnFilePath)
113 | {
114 | List means_list = new List();
115 | List vars_list = new List();
116 | StreamReader srtReader = new StreamReader(mvnFilePath);
117 | int i = 0;
118 | while (!srtReader.EndOfStream)
119 | {
120 | string? strLine = srtReader.ReadLine();
121 | if (!string.IsNullOrEmpty(strLine))
122 | {
123 | if (strLine.StartsWith(""))
124 | {
125 | i = 1;
126 | continue;
127 | }
128 | if (strLine.StartsWith(""))
129 | {
130 | i = 2;
131 | continue;
132 | }
133 | if (strLine.StartsWith("") && i == 1)
134 | {
135 | string[] add_shift_line = strLine.Substring(strLine.IndexOf("[") + 1, strLine.LastIndexOf("]") - strLine.IndexOf("[") - 1).Split(' ');
136 | means_list = add_shift_line.Where(x => !string.IsNullOrEmpty(x)).Select(x => float.Parse(x.Trim())).ToList();
137 | //i++;
138 | continue;
139 | }
140 | if (strLine.StartsWith("") && i == 2)
141 | {
142 | string[] rescale_line = strLine.Substring(strLine.IndexOf("[") + 1, strLine.LastIndexOf("]") - strLine.IndexOf("[") - 1).Split(' ');
143 | vars_list = rescale_line.Where(x => !string.IsNullOrEmpty(x)).Select(x => float.Parse(x.Trim())).ToList();
144 | //i++;
145 | continue;
146 | }
147 | }
148 | }
149 | CmvnEntity cmvnEntity = new CmvnEntity();
150 | cmvnEntity.Means = means_list;
151 | cmvnEntity.Vars = vars_list;
152 | return cmvnEntity;
153 | }
154 | protected virtual void Dispose(bool disposing)
155 | {
156 | if (disposing)
157 | {
158 | if (_onlineFbank != null)
159 | {
160 | _onlineFbank.Dispose();
161 | }
162 | if (_cmvnEntity != null)
163 | {
164 | _cmvnEntity = null;
165 | }
166 | if (_frontendConfEntity != null)
167 | {
168 | _frontendConfEntity = null;
169 | }
170 | }
171 | }
172 |
173 | public void Dispose()
174 | {
175 | Dispose(disposing: true);
176 | GC.SuppressFinalize(this);
177 | }
178 | }
179 | }
180 |
--------------------------------------------------------------------------------
/AliParaformerAsr/OfflineProjOfSeacoParaformer.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using AliParaformerAsr.Model;
4 | using AliParaformerAsr.Utils;
5 | using Microsoft.ML.OnnxRuntime;
6 | using Microsoft.ML.OnnxRuntime.Tensors;
7 |
8 | namespace AliParaformerAsr
9 | {
10 | internal class OfflineProjOfSeacoParaformer : IOfflineProj, IDisposable
11 | {
12 | // To detect redundant calls
13 | private bool _disposed;
14 |
15 | private InferenceSession _modelSession;
16 | private EmbedSeacoModel _seacohwModel;
17 | private Tensor? _hwEmbed = null;
18 | private int _blank_id = 0;
19 | private int _sos_eos_id = 1;
20 | private int _unk_id = 2;
21 |
22 | private int _featureDim = 80;
23 | private int _sampleRate = 16000;
24 |
25 | public OfflineProjOfSeacoParaformer(OfflineModel offlineModel)
26 | {
27 | _modelSession = offlineModel.ModelSession;
28 | var inputMeta = _modelSession.InputMetadata;
29 | if (inputMeta.ContainsKey("bias_embed"))
30 | {
31 | _seacohwModel = new EmbedSeacoModel(offlineModel.ModelebFilePath);
32 | List? hotwords = offlineModel.Hotwords;
33 | _hwEmbed = _seacohwModel.Forward(hotwords);
34 | }
35 | _blank_id = offlineModel.Blank_id;
36 | _sos_eos_id = offlineModel.Sos_eos_id;
37 | _unk_id = offlineModel.Unk_id;
38 | _featureDim = offlineModel.FeatureDim;
39 | _sampleRate = offlineModel.SampleRate;
40 | }
41 | public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
42 | public int Blank_id { get => _blank_id; set => _blank_id = value; }
43 | public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; }
44 | public int Unk_id { get => _unk_id; set => _unk_id = value; }
45 | public int FeatureDim { get => _featureDim; set => _featureDim = value; }
46 | public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
47 |
48 | public ModelOutputEntity ModelProj(List modelInputs)
49 | {
50 | int batchSize = modelInputs.Count;
51 | Tensor? hwEmbed = null;
52 | List? hotwords = modelInputs.SelectMany(x => x.Hotwords).ToList();
53 | if (hotwords != null && hotwords?.Count > 0)
54 | {
55 | hwEmbed = _seacohwModel.Forward(hotwords);
56 | }
57 | else
58 | {
59 | hwEmbed = _hwEmbed;
60 | }
61 | float[] padSequence = PadHelper.PadSequence(modelInputs);
62 | var inputMeta = _modelSession.InputMetadata;
63 | var container = new List();
64 | foreach (var name in inputMeta.Keys)
65 | {
66 | if (name == "speech")
67 | {
68 | int[] dim = new int[] { batchSize, padSequence.Length / 560 / batchSize, 560 };
69 | var tensor = new DenseTensor(padSequence, dim, false);
70 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
71 | }
72 | if (name == "speech_lengths")
73 | {
74 | int[] dim = new int[] { batchSize };
75 | int[] speech_lengths = new int[batchSize];
76 | for (int i = 0; i < batchSize; i++)
77 | {
78 | speech_lengths[i] = padSequence.Length / 560 / batchSize;
79 | }
80 | var tensor = new DenseTensor(speech_lengths, dim, false);
81 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
82 | }
83 | if (name == "bias_embed")
84 | {
85 | int[] dim = new int[] { batchSize, 0, 512 };
86 | float[] biasEmbed = new float[0];
87 | if (hwEmbed != null)
88 | {
89 | long _hwEmbedLength = hwEmbed.Length;
90 | biasEmbed = new float[_hwEmbedLength * batchSize];
91 | List ebList = new List();
92 | for (int n = 0; n < hwEmbed.Dimensions[1]; n++)
93 | {
94 | float[] eb = new float[10 * 512];
95 | for (int j = 0; j < hwEmbed.Dimensions[0]; j++)
96 | {
97 | int k = hwEmbed.Dimensions[2];
98 | Array.Copy(hwEmbed.ToArray(), j * hwEmbed.Dimensions[1] * k + n * k, eb, j * k, k);
99 | }
100 | ebList.Add(eb);
101 | }
102 | float[] biasEmbedTemp = ebList.SelectMany(x => x).ToArray(); // hwEmbed.ToArray();//
103 | for (int i = 0; i < batchSize; i++)
104 | {
105 | Array.Copy(biasEmbedTemp, 0, biasEmbed, i * biasEmbedTemp.Length, biasEmbedTemp.Length);
106 | }
107 | dim = new int[] { batchSize, biasEmbed.Length / 512 / batchSize, 512 };
108 | }
109 | var tensor = new DenseTensor(biasEmbed, dim, false);
110 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
111 | }
112 | }
113 | ModelOutputEntity modelOutputEntity = new ModelOutputEntity();
114 | try
115 | {
116 | IDisposableReadOnlyCollection results = _modelSession.Run(container);
117 |
118 | if (results != null)
119 | {
120 | var resultsArray = results.ToArray();
121 | modelOutputEntity.model_out = resultsArray[0].AsTensor();
122 | modelOutputEntity.model_out_lens = resultsArray[1].AsEnumerable().ToArray();
123 | if (resultsArray.Length >= 4)
124 | {
125 | Tensor cif_peak_tensor = resultsArray[3].AsTensor();
126 | modelOutputEntity.cif_peak_tensor = cif_peak_tensor;
127 | }
128 | }
129 | }
130 | catch (Exception ex)
131 | {
132 | throw new Exception("ModelProj failed", ex);
133 | }
134 | return modelOutputEntity;
135 | }
136 | protected virtual void Dispose(bool disposing)
137 | {
138 | if (!_disposed)
139 | {
140 | if (disposing)
141 | {
142 | if (_modelSession != null)
143 | {
144 | _modelSession.Dispose();
145 | }
146 | if (_seacohwModel != null)
147 | {
148 | _seacohwModel.Dispose();
149 | }
150 | if (_hwEmbed != null)
151 | {
152 | _hwEmbed = null;
153 | }
154 | }
155 | _disposed = true;
156 | }
157 | }
158 |
159 | public void Dispose()
160 | {
161 | Dispose(disposing: true);
162 | GC.SuppressFinalize(this);
163 | }
164 | ~OfflineProjOfSeacoParaformer()
165 | {
166 | Dispose(_disposed);
167 | }
168 | }
169 | }
170 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Ww][Ii][Nn]32/
27 | [Aa][Rr][Mm]/
28 | [Aa][Rr][Mm]64/
29 | bld/
30 | [Bb]in/
31 | [Oo]bj/
32 | [Oo]ut/
33 | [Ll]og/
34 | [Ll]ogs/
35 |
36 | # Visual Studio 2015/2017 cache/options directory
37 | .vs/
38 | # Uncomment if you have tasks that create the project's static files in wwwroot
39 | #wwwroot/
40 |
41 | # Visual Studio 2017 auto generated files
42 | Generated\ Files/
43 |
44 | # MSTest test Results
45 | [Tt]est[Rr]esult*/
46 | [Bb]uild[Ll]og.*
47 |
48 | # NUnit
49 | *.VisualState.xml
50 | TestResult.xml
51 | nunit-*.xml
52 |
53 | # Build Results of an ATL Project
54 | [Dd]ebugPS/
55 | [Rr]eleasePS/
56 | dlldata.c
57 |
58 | # Benchmark Results
59 | BenchmarkDotNet.Artifacts/
60 |
61 | # .NET Core
62 | project.lock.json
63 | project.fragment.lock.json
64 | artifacts/
65 |
66 | # ASP.NET Scaffolding
67 | ScaffoldingReadMe.txt
68 |
69 | # StyleCop
70 | StyleCopReport.xml
71 |
72 | # Files built by Visual Studio
73 | *_i.c
74 | *_p.c
75 | *_h.h
76 | *.ilk
77 | *.meta
78 | *.obj
79 | *.iobj
80 | *.pch
81 | *.pdb
82 | *.ipdb
83 | *.pgc
84 | *.pgd
85 | *.rsp
86 | *.sbr
87 | *.tlb
88 | *.tli
89 | *.tlh
90 | *.tmp
91 | *.tmp_proj
92 | *_wpftmp.csproj
93 | *.log
94 | *.vspscc
95 | *.vssscc
96 | .builds
97 | *.pidb
98 | *.svclog
99 | *.scc
100 |
101 | # Chutzpah Test files
102 | _Chutzpah*
103 |
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 |
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 |
121 | # Visual Studio Trace Files
122 | *.e2e
123 |
124 | # TFS 2012 Local Workspace
125 | $tf/
126 |
127 | # Guidance Automation Toolkit
128 | *.gpState
129 |
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 |
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 |
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 |
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 |
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 |
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 |
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 |
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 |
163 | # Web workbench (sass)
164 | .sass-cache/
165 |
166 | # Installshield output folder
167 | [Ee]xpress/
168 |
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 |
179 | # Click-Once directory
180 | publish/
181 |
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 |
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 |
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 |
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 |
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 |
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 |
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 |
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 |
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 |
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 |
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 |
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 |
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 |
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 |
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 |
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 |
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 |
288 | # Visual Studio 6 build log
289 | *.plg
290 |
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 |
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 |
297 | # Visual Studio LightSwitch build output
298 | **/*.HTMLClient/GeneratedArtifacts
299 | **/*.DesktopClient/GeneratedArtifacts
300 | **/*.DesktopClient/ModelManifest.xml
301 | **/*.Server/GeneratedArtifacts
302 | **/*.Server/ModelManifest.xml
303 | _Pvt_Extensions
304 |
305 | # Paket dependency manager
306 | .paket/paket.exe
307 | paket-files/
308 |
309 | # FAKE - F# Make
310 | .fake/
311 |
312 | # CodeRush personal settings
313 | .cr/personal
314 |
315 | # Python Tools for Visual Studio (PTVS)
316 | __pycache__/
317 | *.pyc
318 |
319 | # Cake - Uncomment if you are using it
320 | # tools/**
321 | # !tools/packages.config
322 |
323 | # Tabs Studio
324 | *.tss
325 |
326 | # Telerik's JustMock configuration file
327 | *.jmconfig
328 |
329 | # BizTalk build output
330 | *.btp.cs
331 | *.btm.cs
332 | *.odx.cs
333 | *.xsd.cs
334 |
335 | # OpenCover UI analysis results
336 | OpenCover/
337 |
338 | # Azure Stream Analytics local run output
339 | ASALocalRun/
340 |
341 | # MSBuild Binary and Structured Log
342 | *.binlog
343 |
344 | # NVidia Nsight GPU debugger configuration file
345 | *.nvuser
346 |
347 | # MFractors (Xamarin productivity tool) working folder
348 | .mfractor/
349 |
350 | # Local History for Visual Studio
351 | .localhistory/
352 |
353 | # BeatPulse healthcheck temp database
354 | healthchecksdb
355 |
356 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
357 | MigrationBackup/
358 |
359 | # Ionide (cross platform F# VS Code tools) working folder
360 | .ionide/
361 |
362 | # Fody - auto-generated XML schema
363 | FodyWeavers.xsd
--------------------------------------------------------------------------------
/AliParaformerAsr/OnlineWavFrontend.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using AliParaformerAsr.Model;
4 | using SpeechFeatures;
5 |
6 | namespace AliParaformerAsr
7 | {
8 | ///
9 | /// OnlineWavFrontend
10 | /// Copyright (c) 2023 by manyeyes
11 | ///
12 | internal class OnlineWavFrontend
13 | {
14 | private FrontendConfEntity _frontendConfEntity;
15 | OnlineFbank _onlineFbank;
16 | private CmvnEntity _cmvnEntity;
17 |
18 | public OnlineWavFrontend(string mvnFilePath, FrontendConfEntity frontendConfEntity)
19 | {
20 | _frontendConfEntity = frontendConfEntity;
21 | _onlineFbank = new OnlineFbank(
22 | dither: _frontendConfEntity.dither,
23 | snip_edges: _frontendConfEntity.snip_edges,
24 | window_type: _frontendConfEntity.window,
25 | sample_rate: _frontendConfEntity.fs,
26 | num_bins: _frontendConfEntity.n_mels
27 | );
28 | _cmvnEntity = LoadCmvn(mvnFilePath);
29 | }
30 |
31 | public float[] GetFbank(float[] samples)
32 | {
33 | float sample_rate = _frontendConfEntity.fs;
34 | float[] fbanks = _onlineFbank.GetFbank(samples);//GetFbankIndoor
35 | return fbanks;
36 | }
37 |
38 |
39 | public float[] LfrCmvn(float[] fbanks)
40 | {
41 | float[] features = fbanks;
42 | if (_frontendConfEntity.lfr_m != 1 || _frontendConfEntity.lfr_n != 1)
43 | {
44 | features = ApplyLfr(fbanks, _frontendConfEntity.lfr_m, _frontendConfEntity.lfr_n);
45 | }
46 | if (_cmvnEntity != null)
47 | {
48 | features = ApplyCmvn(features);
49 | }
50 | return features;
51 | }
52 |
53 | public float[] ApplyCmvn(float[] inputs)
54 | {
55 | var arr_neg_mean = _cmvnEntity.Means;
56 | float[] neg_mean = arr_neg_mean.Select(x => (float)Convert.ToDouble(x)).ToArray();
57 | var arr_inv_stddev = _cmvnEntity.Vars;
58 | float[] inv_stddev = arr_inv_stddev.Select(x => (float)Convert.ToDouble(x)).ToArray();
59 |
60 | int dim = neg_mean.Length;
61 | int num_frames = inputs.Length / dim;
62 |
63 | for (int i = 0; i < num_frames; i++)
64 | {
65 | for (int k = 0; k != dim; ++k)
66 | {
67 | inputs[dim * i + k] = (inputs[dim * i + k] + neg_mean[k]) * inv_stddev[k];
68 | }
69 | }
70 | return inputs;
71 | }
72 |
73 | public float[] ApplyLfr(float[] inputs, int lfr_m, int lfr_n)
74 | {
75 | int t = inputs.Length / 80;
76 | int t_lfr = 0;
77 | if (t % lfr_n < lfr_m - lfr_n)
78 | {
79 | t_lfr = (int)Math.Floor((double)(t / lfr_n)) - 1;
80 | }
81 | if (t % lfr_n >= lfr_m - lfr_n)
82 | {
83 | t_lfr = (int)Math.Floor((double)(t / lfr_n));
84 | }
85 | float[] LFR_outputs = new float[t_lfr * lfr_m * 80];
86 | for (int i = 0; i < t_lfr; i++)
87 | {
88 | Array.Copy(inputs, i * lfr_n * 80, LFR_outputs, i * lfr_m * 80, lfr_m * 80);
89 | }
90 | return LFR_outputs;
91 | }
92 |
93 | public float[] ApplyLfr2(float[] inputs, int lfr_m, int lfr_n)
94 | {
95 | int t = inputs.Length / 80;
96 | int t_lfr = (int)Math.Floor((double)(t / lfr_n));
97 | float[] LFR_outputs = new float[t_lfr * lfr_m * 80];
98 | for (int i = 0; i < t_lfr; i++)
99 | {
100 | Array.Copy(inputs, i * lfr_n * 80, LFR_outputs, i * lfr_m * 80, lfr_m * 80);
101 | }
102 | return LFR_outputs;
103 | }
104 |
105 | private CmvnEntity LoadCmvn(string mvnFilePath)
106 | {
107 | List means_list = new List();
108 | List vars_list = new List();
109 | StreamReader srtReader = new StreamReader(mvnFilePath);
110 | int i = 0;
111 | while (!srtReader.EndOfStream)
112 | {
113 | string? strLine = srtReader.ReadLine();
114 | if (!string.IsNullOrEmpty(strLine))
115 | {
116 | if (strLine.StartsWith(""))
117 | {
118 | i = 1;
119 | continue;
120 | }
121 | if (strLine.StartsWith(""))
122 | {
123 | i = 2;
124 | continue;
125 | }
126 | if (strLine.StartsWith("") && i == 1)
127 | {
128 | string[] add_shift_line = strLine.Substring(strLine.IndexOf("[") + 1, strLine.LastIndexOf("]") - strLine.IndexOf("[") - 1).Split(' ');
129 | means_list = add_shift_line.Where(x => !string.IsNullOrEmpty(x)).Select(x => float.Parse(x.Trim())).ToList();
130 | //i++;
131 | continue;
132 | }
133 | if (strLine.StartsWith("") && i == 2)
134 | {
135 | string[] rescale_line = strLine.Substring(strLine.IndexOf("[") + 1, strLine.LastIndexOf("]") - strLine.IndexOf("[") - 1).Split(' ');
136 | vars_list = rescale_line.Where(x => !string.IsNullOrEmpty(x)).Select(x => float.Parse(x.Trim())).ToList();
137 | //i++;
138 | continue;
139 | }
140 | }
141 | }
142 | CmvnEntity cmvnEntity = new CmvnEntity();
143 | cmvnEntity.Means = means_list;
144 | cmvnEntity.Vars = vars_list;
145 | return cmvnEntity;
146 | }
147 |
148 | ///
149 | /// Streaming Positional encoding
150 | ///
151 | ///
152 | public float[] SinusoidalPositionEncoder(float[] inputs, int timesteps, int inputsDim, int startIdx)
153 | {
154 | //forward
155 | float[] positions = new float[timesteps + startIdx];
156 | for (int i = 1; i < positions.Length + 1; i++)
157 | {
158 | positions[i - 1] = (float)i;
159 | }
160 | //forward
161 | //encode
162 | int batch_size = 1;
163 | float log_timescale_increment = (float)Math.Log(10000F) / (inputsDim / 2 - 1);
164 | float[] inv_timescales = new float[inputsDim / 2];
165 | for (int i = 0; i < inv_timescales.Length; i++)
166 | {
167 | inv_timescales[i] = (float)(i + 1);
168 | }
169 | inv_timescales = inv_timescales.Select(x => x * (-log_timescale_increment)).ToArray();
170 | inv_timescales = inv_timescales.Select(x => (float)Math.Exp(x)).ToArray();
171 | float[] scaled_time = new float[inv_timescales.Length * positions.Length * 2];
172 | foreach (float p in positions)
173 | {
174 | float[] scaled_time_item_sin = inv_timescales.Select(x => (float)Math.Sin(x * p)).ToArray();
175 | float[] scaled_time_item_cos = inv_timescales.Select(x => (float)Math.Cos(x * p)).ToArray();
176 | Array.Copy(scaled_time_item_sin, 0, scaled_time, ((int)p - 1) * (scaled_time_item_sin.Length + scaled_time_item_cos.Length), scaled_time_item_sin.Length);
177 | Array.Copy(scaled_time_item_cos, 0, scaled_time, ((int)p - 1) * (scaled_time_item_sin.Length + scaled_time_item_cos.Length) + scaled_time_item_sin.Length, scaled_time_item_cos.Length);
178 | }
179 | float[] encoding = scaled_time;
180 | float[] position_encoding = new float[inputs.Length];
181 | Array.Copy(encoding, inputsDim * startIdx, position_encoding, 0, position_encoding.Length);
182 | for (int i = 0; i < inputs.Length; i++)
183 | {
184 | inputs[i] += position_encoding[i];
185 | }
186 | return inputs;
187 | //encode
188 | }
189 | protected virtual void Dispose(bool disposing)
190 | {
191 | if (disposing)
192 | {
193 | if (_onlineFbank != null)
194 | {
195 | _onlineFbank.Dispose();
196 | }
197 | if (_cmvnEntity != null)
198 | {
199 | _cmvnEntity = null;
200 | }
201 | if (_frontendConfEntity != null)
202 | {
203 | _frontendConfEntity = null;
204 | }
205 | }
206 | }
207 |
208 | public void Dispose()
209 | {
210 | Dispose(disposing: true);
211 | GC.SuppressFinalize(this);
212 | }
213 | }
214 | }
215 |
--------------------------------------------------------------------------------
/AliParaformerAsr/OfflineProjOfSenseVoiceSmall.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2024 by manyeyes
3 | using AliParaformerAsr.Model;
4 | using AliParaformerAsr.Utils;
5 | using Microsoft.ML.OnnxRuntime;
6 | using Microsoft.ML.OnnxRuntime.Tensors;
7 |
8 | namespace AliParaformerAsr
9 | {
10 | internal class OfflineProjOfSenseVoiceSmall : IOfflineProj, IDisposable
11 | {
12 | // To detect redundant calls
13 | private bool _disposed;
14 |
15 | private InferenceSession _modelSession;
16 | private EmbedSVModel _embedSVModel;
17 | private int _blank_id = 0;
18 | private int _sos_eos_id = 1;
19 | private int _unk_id = 2;
20 |
21 | private int _featureDim = 80;
22 | private int _sampleRate = 16000;
23 |
24 | private bool _use_itn = false;
25 | private string _textnorm = "woitn";
26 | private Dictionary _lidDict = new Dictionary() { { "auto", 0 }, { "zh", 3 }, { "en", 4 }, { "yue", 7 }, { "ja", 11 }, { "ko", 12 }, { "nospeech", 13 } };
27 | private Dictionary _lidIntDict = new Dictionary() { { 24884, 3 }, { 24885, 4 }, { 24888, 7 }, { 24892, 11 }, { 24896, 12 }, { 24992, 13 } };
28 | private Dictionary _textnormDict = new Dictionary() { { "withitn", 14 }, { "woitn", 15 } };
29 | private Dictionary _textnormIntDict = new Dictionary() { { 25016, 14 }, { 25017, 15 } };
30 |
31 | public OfflineProjOfSenseVoiceSmall(OfflineModel offlineModel)
32 | {
33 | _modelSession = offlineModel.ModelSession;
34 | var inputMeta = _modelSession.InputMetadata;
35 | if (!inputMeta.ContainsKey("language") && !inputMeta.ContainsKey("textnorm"))
36 | {
37 | _embedSVModel = new EmbedSVModel();
38 | }
39 | _blank_id = offlineModel.Blank_id;
40 | _sos_eos_id = offlineModel.Sos_eos_id;
41 | _unk_id = offlineModel.Unk_id;
42 | _featureDim = offlineModel.FeatureDim;
43 | _sampleRate = offlineModel.SampleRate;
44 | _use_itn = offlineModel.Use_itn;
45 | }
46 | public InferenceSession ModelSession { get => _modelSession; set => _modelSession = value; }
47 | public int Blank_id { get => _blank_id; set => _blank_id = value; }
48 | public int Sos_eos_id { get => _sos_eos_id; set => _sos_eos_id = value; }
49 | public int Unk_id { get => _unk_id; set => _unk_id = value; }
50 | public int FeatureDim { get => _featureDim; set => _featureDim = value; }
51 | public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
52 |
53 | public ModelOutputEntity ModelProj(List modelInputs)
54 | {
55 | int batchSize = modelInputs.Count;
56 | //
57 | string languageValue = "ja";
58 | int languageId = 0;
59 | if (_lidDict.ContainsKey(languageValue))
60 | {
61 | //languageId = _lidDict.GetValueOrDefault(languageValue);
62 | _lidDict.TryGetValue(languageValue,out languageId);
63 | }
64 | string textnormValue = "withitn";
65 | if (!_use_itn)
66 | {
67 | textnormValue = "woitn";
68 | }
69 | int textnormId = 15;
70 | if (_textnormDict.ContainsKey(textnormValue))
71 | {
72 | //textnormId = _textnormDict.GetValueOrDefault(textnormValue);
73 | _textnormDict.TryGetValue(textnormValue, out languageId);
74 | }
75 | var inputMeta = _modelSession.InputMetadata;
76 | if (!inputMeta.ContainsKey("language") && !inputMeta.ContainsKey("textnorm"))
77 | {
78 | List offlineInputEntities = new List();
79 | foreach (OfflineInputEntity offlineInputEntity in modelInputs)
80 | {
81 | float[]? speech = offlineInputEntity.Speech;
82 | if (speech != null)
83 | {
84 | float[] language_query = _embedSVModel.Forward(new Int64[] { languageId });
85 | float[] textnorm_query = _embedSVModel.Forward(new long[] { textnormId });
86 | //
87 | float[] tempSpeech = new float[speech.Length + 560];
88 | Array.Copy(textnorm_query, 0, tempSpeech, 0, textnorm_query.Length);
89 | Array.Copy(speech, 0, tempSpeech, textnorm_query.Length, speech.Length);
90 | speech = tempSpeech;
91 | //
92 | float[] event_emo_query = _embedSVModel.Forward(new Int64[] { 1, 2 });
93 | float[] input_query = new float[language_query.Length + event_emo_query.Length];
94 | Array.Copy(language_query, 0, input_query, 0, language_query.Length);
95 | Array.Copy(event_emo_query, 0, input_query, language_query.Length, event_emo_query.Length);
96 | //
97 | float[] tempSpeech2 = new float[speech.Length + input_query.Length];
98 | Array.Copy(input_query, 0, tempSpeech2, 0, input_query.Length);
99 | Array.Copy(speech, 0, tempSpeech2, input_query.Length, speech.Length);
100 | speech = tempSpeech2;
101 | }
102 | offlineInputEntity.Speech = speech;
103 | offlineInputEntity.SpeechLength = speech.Length;
104 | offlineInputEntities.Add(offlineInputEntity);
105 | }
106 | modelInputs = offlineInputEntities;
107 | }
108 | float[] padSequence = PadHelper.PadSequence(modelInputs);
109 | var container = new List();
110 | foreach (var name in inputMeta.Keys)
111 | {
112 | if (name == "speech")
113 | {
114 | int[] dim = new int[] { batchSize, padSequence.Length / 560 / batchSize, 560 };
115 | var tensor = new DenseTensor(padSequence, dim, false);
116 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
117 | }
118 | if (name == "speech_lengths")
119 | {
120 |
121 | int[] dim = new int[] { batchSize };
122 | int[] speech_lengths = new int[batchSize];
123 | for (int i = 0; i < batchSize; i++)
124 | {
125 | speech_lengths[i] = padSequence.Length / 560 / batchSize;
126 | }
127 | var tensor = new DenseTensor(speech_lengths, dim, false);
128 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
129 | }
130 | if (name == "language")
131 | {
132 | int[] language = new int[batchSize];
133 | for (int i = 0; i < batchSize; i++)
134 | {
135 | language[i] = languageId;
136 | }
137 | int[] dim = new int[] { batchSize };
138 | var tensor = new DenseTensor(language, dim, false);
139 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
140 | }
141 | if (name == "textnorm")
142 | {
143 | int[] textnorm = new int[batchSize];
144 | for (int i = 0; i < batchSize; i++)
145 | {
146 | textnorm[i] = textnormId;
147 | }
148 | int[] dim = new int[] { batchSize };
149 | var tensor = new DenseTensor(textnorm, dim, false);
150 | container.Add(NamedOnnxValue.CreateFromTensor(name, tensor));
151 | }
152 | }
153 | ModelOutputEntity modelOutputEntity = new ModelOutputEntity();
154 | try
155 | {
156 | IDisposableReadOnlyCollection results = _modelSession.Run(container);
157 |
158 | if (results != null)
159 | {
160 | var resultsArray = results.ToArray();
161 | modelOutputEntity.model_out = resultsArray[0].AsTensor();
162 | modelOutputEntity.model_out_lens = resultsArray[1].AsEnumerable().ToArray();
163 | if (resultsArray.Length >= 4)
164 | {
165 | Tensor cif_peak_tensor = resultsArray[3].AsTensor();
166 | modelOutputEntity.cif_peak_tensor = cif_peak_tensor;
167 | }
168 | }
169 | }
170 | catch (Exception ex)
171 | {
172 | throw new Exception("ModelProj failed", ex);
173 | }
174 | return modelOutputEntity;
175 | }
176 | protected virtual void Dispose(bool disposing)
177 | {
178 | if (!_disposed)
179 | {
180 | if (disposing)
181 | {
182 | if (_modelSession != null)
183 | {
184 | _modelSession.Dispose();
185 | }
186 | }
187 | _disposed = true;
188 | }
189 | }
190 |
191 | public void Dispose()
192 | {
193 | Dispose(disposing: true);
194 | GC.SuppressFinalize(this);
195 | }
196 | ~OfflineProjOfSenseVoiceSmall()
197 | {
198 | Dispose(_disposed);
199 | }
200 | }
201 | }
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 📢 项目迁移重要通知
2 |
3 | 为便于统一管理和维护,**AliParaformerAsr 项目已完整迁移至新仓库**:
4 |
5 | 🔗 **新仓库地址**
6 | [ManySpeech](https://github.com/manyeyes/ManySpeech "ManySpeech 主仓库")
7 |
8 | 此后,所有代码更新、问题反馈及功能维护将统一在 ManySpeech 仓库进行。请各位用户和开发者前往新仓库获取最新资源。
9 |
10 |
11 | ### ⚠️ 迁移后关键变更
12 | - **命名空间调整**
13 | 由原 `AliParaformerAsr` 变更为 `ManySpeech.AliParaformerAsr`,请在集成时注意更新引用。
14 |
15 | - **资源升级**
16 | 新仓库提供更丰富的使用示例、更详细的 API 文档及持续的功能迭代。
17 |
18 | 感谢您的理解与支持!
19 |
20 | ----------------------------------------------------------------
21 |
22 | # AliParaformerAsr
23 |
24 | ## 简介:
25 |
26 | AliParaformerAsr是一个使用C#编写的“语音识别”库,底层调用Microsoft.ML.OnnxRuntime对onnx模型进行解码,支持 net461+、net60+、netcoreapp3.1 及 netstandard2.0+ 等多种环境,支持跨平台编译,支持AOT编译。使用简单方便。
27 |
28 | ## 如何运行示例项目
29 |
30 | #### 1.克隆项目到本地
31 | ```bash
32 | cd /path/to
33 | git clone https://github.com/manyeyes/AliParaformerAsr.git
34 | ```
35 | #### 2.下载上述列表中的模型到本地目录备用(运行示例将自动下载模型,此步骤可跳过)
36 | ```bash
37 | cd /path/to/AliParaformerAsr/AliParaformerAsr.Examples
38 | git clone https://www.modelscope.cn/manyeyes/[模型名称].git
39 | ```
40 | #### 3.使用vs2022(或其他IDE)加载工程,
41 | #### 4.运行 AliParaformerAsr.Examples 项目
42 | AliParaformerAsr.Examples 是控制台 / 桌面端示例项目,用于演示语音识别的基础功能(如离线转写、实时识别)。
43 | #### 5.运行 MauiApp1 项目
44 | MauiApp1 是基于 .NET MAUI 开发的跨平台项目,支持在 Android、iOS、Windows 等设备上运行语音识别功能。
45 | #### 6.配置说明(参考:asr.yaml文件):
46 | 用于解码的asr.yaml配置参数,大部分不需要修改。
47 | 可修改的参数:
48 | use_itn: true(在sensevoicesmall的配置中开启之后,可实现逆文本正则化。)
49 |
50 | ## 如何在代码中调用
51 |
52 | ### 离线(非流式)模型调用方法:
53 |
54 | #### 1.添加项目引用
55 | using AliParaformerAsr;
56 |
57 | #### 2.模型初始化和配置
58 | paraformer模型调用方式:
59 |
60 | ```csharp
61 | string applicationBase = AppDomain.CurrentDomain.BaseDirectory;
62 | string modelName = "speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx";
63 | string modelFilePath = applicationBase + "./"+ modelName + "/model_quant.onnx";
64 | string configFilePath = applicationBase + "./" + modelName + "/asr.yaml";
65 | string mvnFilePath = applicationBase + "./" + modelName + "/am.mvn";
66 | string tokensFilePath = applicationBase + "./" + modelName + "/tokens.txt";
67 | AliParaformerAsr.OfflineRecognizer offlineRecognizer = new OfflineRecognizer(modelFilePath, configFilePath, mvnFilePath, tokensFilePath);
68 | ```
69 | SeACo-paraformer模型调用方式:
70 | 1.在模型目录中修改hotword.txt文件,添加自定义热词(目前支持“每一行一个中文词汇”的格式)
71 | 2.在代码中新增参数:modelebFilePath, hotwordFilePath
72 | ```csharp
73 | string applicationBase = AppDomain.CurrentDomain.BaseDirectory;
74 | string modelName = "paraformer-seaco-large-zh-timestamp-onnx-offline";
75 | string modelFilePath = applicationBase + "./" + modelName + "/model.int8.onnx";
76 | string modelebFilePath = applicationBase + "./" + modelName + "/model_eb.int8.onnx";
77 | string configFilePath = applicationBase + "./" + modelName + "/asr.yaml";
78 | string mvnFilePath = applicationBase + "./" + modelName + "/am.mvn";
79 | string hotwordFilePath = applicationBase + "./" + modelName + "/hotword.txt";
80 | string tokensFilePath = applicationBase + "./" + modelName + "/tokens.txt";
81 | OfflineRecognizer offlineRecognizer = new OfflineRecognizer(modelFilePath: modelFilePath, configFilePath: configFilePath, mvnFilePath, tokensFilePath: tokensFilePath, modelebFilePath: modelebFilePath, hotwordFilePath: hotwordFilePath);
82 | ```
83 | #### 3.调用
84 | ```csharp
85 | List samples = new List();
86 | //这里省略wav文件转samples...
87 | //具体参考示例(AliParaformerAsr.Examples)代码
88 | List streams = new List();
89 | foreach (var sample in samples)
90 | {
91 | AliParaformerAsr.OfflineStream stream = offlineRecognizer.CreateOfflineStream();
92 | stream.AddSamples(sample);
93 | streams.Add(stream);
94 | }
95 | List results = offlineRecognizer.GetResults(streams);
96 | ```
97 | #### 4.输出结果:
98 | ```
99 | 欢迎大家来体验达摩院推出的语音识别模型
100 |
101 | 非常的方便但是现在不同啊英国脱欧欧盟内部完善的产业链的红利人
102 |
103 | he must be home now for the light is on他一定在家因为灯亮着就是有一种推理或者解释的那种感觉
104 |
105 | elapsed_milliseconds:1502.8828125
106 | total_duration:40525.6875
107 | rtf:0.037084696280599808
108 | end!
109 | ```
110 |
111 | ## 实时(流式)模型调用方法:
112 |
113 | #### 1.添加项目引用
114 | using AliParaformerAsr;
115 |
116 | #### 2.模型初始化和配置
117 | ```csharp
118 | string encoderFilePath = applicationBase + "./" + modelName + "/encoder.int8.onnx";
119 | string decoderFilePath = applicationBase + "./" + modelName + "/decoder.int8.onnx";
120 | string configFilePath = applicationBase + "./" + modelName + "/asr.yaml";
121 | string mvnFilePath = applicationBase + "./" + modelName + "/am.mvn";
122 | string tokensFilePath = applicationBase + "./" + modelName + "/tokens.txt";
123 | OnlineRecognizer onlineRecognizer = new OnlineRecognizer(encoderFilePath, decoderFilePath, configFilePath, mvnFilePath, tokensFilePath);
124 | ```
125 | #### 3.调用
126 | ```csharp
127 | List samples = new List();
128 | //这里省略wav文件转samples...
129 | //这里省略细节,以下是批处理示意代码:
130 | List streams = new List();
131 | AliParaformerAsr.OnlineStream stream = onlineRecognizer.CreateOnlineStream();
132 | foreach (var sample in samples)
133 | {
134 | AliParaformerAsr.OnlineStream stream = onlineRecognizer.CreateOnlineStream();
135 | stream.AddSamples(sample);
136 | streams.Add(stream);
137 | }
138 | List results = onlineRecognizer.GetResults(streams);
139 | //单处理,只需构建一个stream:
140 | AliParaformerAsr.OnlineStream stream = onlineRecognizer.CreateOnlineStream();
141 | stream.AddSamples(sample);
142 | AliParaformerAsr.OnlineRecognizerResultEntity result = onlineRecognizer.GetResult(stream);
143 | //具体参考示例(AliParaformerAsr.Examples)代码
144 | ```
145 |
146 | #### 4.输出结果
147 | ```
148 |
149 | 正是因为存在绝对正义所以我我接受现实式相对生但是不要因因现实的相对对正义们就就认为这个世界有有证因为如果当你认为这这个界界
150 |
151 | elapsed_milliseconds:1389.3125
152 | total_duration:13052
153 | rtf:0.10644441464909593
154 | Hello, World!
155 | ```
156 |
157 |
158 | #### 相关工程:
159 | * 语音端点检测,解决长音频合理切分的问题,项目地址:[AliFsmnVad](https://github.com/manyeyes/AliFsmnVad "AliFsmnVad")
160 | * 文本标点预测,解决识别结果没有标点的问题,项目地址:[AliCTTransformerPunc](https://github.com/manyeyes/AliCTTransformerPunc "AliCTTransformerPunc")
161 |
162 | #### 其他说明:
163 |
164 | 测试用例:AliParaformerAsr.Examples。
165 | 测试CPU:Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz 2.59 GHz
166 | 支持平台:
167 | Windows 7 SP1或更高版本,
168 | macOS 10.13 (High Sierra) 或更高版本,ios等,
169 | Linux 发行版(需要特定的依赖关系,详见.NET 6支持的Linux发行版列表),
170 | Android(Android 5.0 (API 21) 或更高版本)。
171 |
172 | #### 模型下载(ONNX 模型)
173 |
174 | | 模型名称 | 类型 | 支持语言 | 标点 | 时间戳 | 下载地址 |
175 | | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ |
176 | | paraformer-large-zh-en-onnx-offline | 非流式 | 中文、英文 | 否 | 否 | [huggingface](https://huggingface.co/manyeyes/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-onnx "huggingface"), [modelscope](https://www.modelscope.cn/models/manyeyes/paraformer-large-zh-en-onnx-offline "modelscope") |
177 | | paraformer-large-zh-en-timestamp-onnx-offline | 非流式 | 中文、英文 | 否 | 是 | [modelscope](https://www.modelscope.cn/models/manyeyes/paraformer-large-zh-en-timestamp-onnx-offline "modelscope") |
178 | | paraformer-large-en-onnx-offline | 非流式 | 英文 | 否 | 否 | [modelscope](https://www.modelscope.cn/models/manyeyes/paraformer-large-en-onnx-offline "modelscope") |
179 | | paraformer-large-zh-en-onnx-online | 流式 | 中文、英文 | 否 | 否 | [modelscope](https://www.modelscope.cn/models/manyeyes/paraformer-large-zh-en-onnx-online "modelscope") |
180 | | paraformer-large-zh-yue-en-timestamp-onnx-offline-dengcunqin-20240805 | 非流式 | 中文、粤语、英文 | 否 | 是 | [modelscope](https://www.modelscope.cn/models/manyeyes/paraformer-large-zh-yue-en-timestamp-onnx-offline-dengcunqin-20240805 "modelscope") |
181 | | paraformer-large-zh-yue-en-onnx-offline-dengcunqin-20240805 | 非流式 | 中文、粤语、英文 | 否 | 否 | [modelscope](https://www.modelscope.cn/models/manyeyes/paraformer-large-zh-yue-en-onnx-offline-dengcunqin-20240805 "modelscope") |
182 | | paraformer-large-zh-yue-en-onnx-online-dengcunqin-20240208 | 流式 | 中文、粤语、英文 | 否 | 否 | [modelscope](https://www.modelscope.cn/models/manyeyes/paraformer-large-zh-yue-en-onnx-online-dengcunqin-20240208 "modelscope") |
183 | | paraformer-seaco-large-zh-timestamp-onnx-offline | 非流式 | 中文、热词 | 否 | 是 | [modelscope](https://www.modelscope.cn/models/manyeyes/paraformer-seaco-large-zh-timestamp-onnx-offline "modelscope") |
184 | | SenseVoiceSmall | 非流式 | 中文、粤语、英文、日语、韩语 | 是 | 否 | [modelscope](https://www.modelscope.cn/models/manyeyes/sensevoice-small-onnx "modelscope"), [modelscope-split-embed](https://www.modelscope.cn/models/manyeyes/sensevoice-small-split-embed-onnx "modelscope-split-embed") |
185 | | sensevoice-small-wenetspeech-yue-int8-onnx | 非流式 | 粤语、中文、英文、日语、韩语 | 是 | 否 | [modelscope](https://www.modelscope.cn/models/manyeyes/sensevoice-small-wenetspeech-yue-int8-onnx "modelscope") |
186 |
187 | ## 模型介绍:
188 |
189 | #### 模型用途:
190 | Paraformer是达摩院语音团队提出的一种高效的非自回归端到端语音识别框架。本项目为Paraformer中文通用语音识别模型,采用工业级数万小时的标注音频进行模型训练,保证了模型的通用识别效果。模型可以被应用于语音输入法、语音导航、智能会议纪要等场景。准确率:高。
191 |
192 | #### 模型结构:
193 | 
194 |
195 | Paraformer模型结构如上图所示,由 Encoder、Predictor、Sampler、Decoder 与 Loss function 五部分组成。Encoder可以采用不同的网络结构,例如self-attention,conformer,SAN-M等。Predictor 为两层FFN,预测目标文字个数以及抽取目标文字对应的声学向量。Sampler 为无可学习参数模块,依据输入的声学向量和目标向量,生产含有语义的特征向量。Decoder 结构与自回归模型类似,为双向建模(自回归为单向建模)。Loss function 部分,除了交叉熵(CE)与 MWER 区分性优化目标,还包括了 Predictor 优化目标 MAE。
196 |
197 | #### 主要核心点:
198 | Predictor 模块:基于 Continuous integrate-and-fire (CIF) 的 预测器 (Predictor) 来抽取目标文字对应的声学特征向量,可以更加准确的预测语音中目标文字个数。
199 | Sampler:通过采样,将声学特征向量与目标文字向量变换成含有语义信息的特征向量,配合双向的 Decoder 来增强模型对于上下文的建模能力。
200 | 基于负样本采样的 MWER 训练准则。
201 |
202 | #### 更详细的资料:
203 | * [paraformer-large-offline(非流式)](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch "paraformer-large-offline(非流式)")
204 | * [paraformer-large-online(流式)](https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online "paraformer-large-online(流式)")
205 | * [SenseVoiceSmall(非流式)](https://www.modelscope.cn/models/iic/SenseVoiceSmall "SenseVoiceSmall(非流式)")
206 | * 论文: [Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition](https://arxiv.org/abs/2206.08317 "Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition")
207 | * 论文解读:[Paraformer: 高识别率、高计算效率的单轮非自回归端到端语音识别模型](https://mp.weixin.qq.com/s/xQ87isj5_wxWiQs4qUXtVw "Paraformer: 高识别率、高计算效率的单轮非自回归端到端语音识别模型")
208 |
209 | 引用参考
210 | ----------
211 | [1] https://github.com/alibaba-damo-academy/FunASR
212 |
213 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [2023] [manyeyes]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples.MauiApp/Resources/Images/dotnet_bot.svg:
--------------------------------------------------------------------------------
1 |
94 |
--------------------------------------------------------------------------------
/AliParaformerAsr/OnlineModel.cs:
--------------------------------------------------------------------------------
1 | // See https://github.com/manyeyes for more information
2 | // Copyright (c) 2023 by manyeyes
3 | using AliParaformerAsr.Model;
4 | using Microsoft.ML.OnnxRuntime;
5 | using System.Diagnostics;
6 | //using System.Reflection;
7 |
8 | namespace AliParaformerAsr
9 | {
10 | public class OnlineModel
11 | {
12 | private InferenceSession _encoderSession;
13 | private InferenceSession _decoderSession;
14 |
15 | private int _chunkSize = 5;
16 | private int _lfr = 10;
17 | private int _chunkLength;
18 | private int _shiftLength;
19 | private int _featureDim = 80;
20 | private int _sampleRate = 16000;
21 | private string _mvnFilePath;
22 | private ConfEntity? _confEntity;
23 |
24 | public OnlineModel(string encoderFilePath, string decoderFilePath, string mvnFilePath, string configFilePath, int threadsNum = 2)
25 | {
26 | _encoderSession = initModel(encoderFilePath, threadsNum);
27 | _decoderSession = initModel(decoderFilePath, threadsNum);
28 | _confEntity = LoadConf(configFilePath);
29 | _mvnFilePath = mvnFilePath;
30 | _chunkLength = _lfr * _chunkSize + 10;
31 | _shiftLength = _chunkLength;
32 | }
33 |
34 | public InferenceSession EncoderSession { get => _encoderSession; set => _encoderSession = value; }
35 | public InferenceSession DecoderSession { get => _decoderSession; set => _decoderSession = value; }
36 | public int ChunkLength { get => _chunkLength; set => _chunkLength = value; }
37 | public int ShiftLength { get => _shiftLength; set => _shiftLength = value; }
38 | public int FeatureDim { get => _featureDim; set => _featureDim = value; }
39 | public int SampleRate { get => _sampleRate; set => _sampleRate = value; }
40 | public string MvnFilePath { get => _mvnFilePath; set => _mvnFilePath = value; }
41 | public ConfEntity? ConfEntity { get => _confEntity; set => _confEntity = value; }
42 |
43 | private ConfEntity? LoadConf(string configFilePath)
44 | {
45 | ConfEntity? confJsonEntity = new ConfEntity();
46 | if (!string.IsNullOrEmpty(configFilePath))
47 | {
48 | if (configFilePath.ToLower().EndsWith(".json"))
49 | {
50 | //confJsonEntity = Utils.PreloadHelper.ReadJson(configFilePath);
51 | confJsonEntity = Utils.PreloadHelper.ReadJson(configFilePath);
52 | }
53 | else if (configFilePath.ToLower().EndsWith(".yaml"))
54 | {
55 | confJsonEntity = Utils.PreloadHelper.ReadYaml(configFilePath);
56 | }
57 | }
58 | return confJsonEntity;
59 | }
60 |
61 | public InferenceSession initModel(string modelFilePath, int threadsNum = 2)
62 | {
63 | if (string.IsNullOrEmpty(modelFilePath) || !File.Exists(modelFilePath))
64 | {
65 | return null;
66 | }
67 | Microsoft.ML.OnnxRuntime.SessionOptions options = new Microsoft.ML.OnnxRuntime.SessionOptions();
68 | //options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
69 | options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_FATAL;
70 | //options.AppendExecutionProvider_DML(0);
71 | options.AppendExecutionProvider_CPU(0);
72 | //options.AppendExecutionProvider_CUDA(0);
73 | //options.AppendExecutionProvider_MKLDNN();
74 | //options.AppendExecutionProvider_ROCm(0);
75 | if (threadsNum > 0)
76 | options.InterOpNumThreads = threadsNum;
77 | else
78 | options.InterOpNumThreads = System.Environment.ProcessorCount;
79 | // 启用CPU内存计划
80 | options.EnableMemoryPattern = true;
81 | // 设置其他优化选项
82 | options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
83 |
84 | InferenceSession onnxSession = null;
85 | if (!string.IsNullOrEmpty(modelFilePath) && modelFilePath.IndexOf("/") < 0 && modelFilePath.IndexOf("\\") < 0)
86 | {
87 | byte[] model = ReadEmbeddedResourceAsBytes(modelFilePath);
88 | onnxSession = new InferenceSession(model, options);
89 | }
90 | else
91 | {
92 | onnxSession = new InferenceSession(modelFilePath, options);
93 | }
94 | return onnxSession;
95 | }
96 |
97 | private static byte[] ReadEmbeddedResourceAsBytes(string resourceName)
98 | {
99 | //var assembly = Assembly.GetExecutingAssembly();
100 | var assembly = typeof(OnlineModel).Assembly;
101 | var stream = assembly.GetManifestResourceStream(resourceName) ??
102 | throw new FileNotFoundException($"Embedded resource '{resourceName}' not found.");
103 | byte[] bytes = new byte[stream.Length];
104 | stream.Read(bytes, 0, bytes.Length);
105 | // 设置当前流的位置为流的开始
106 | stream.Seek(0, SeekOrigin.Begin);
107 | stream.Close();
108 | stream.Dispose();
109 |
110 | return bytes;
111 | }
112 | public List StackCifHiddens(List> cifHiddens)
113 | {
114 | int batchSize = cifHiddens.Count;
115 | List cifHidden = new List();
116 | for (int b = 0; b < batchSize; b++)
117 | {
118 | foreach (float[] item in cifHiddens[b])
119 | {
120 | cifHidden.Add(item);
121 | }
122 | }
123 | return cifHidden;
124 | }
125 |
126 | public List> UnStackCifHiddens(List cifHidden, int batchSize)
127 | {
128 | List> cifHiddens = new List>();
129 | for (int b = 0; b < batchSize; b++)
130 | {
131 | List hiddensItem = new List();
132 | for (int x = b * (cifHidden.Count / batchSize); x < (b + 1) * (cifHidden.Count / batchSize); x++)
133 | {
134 | hiddensItem.Add(cifHidden[x]);
135 | }
136 | cifHiddens.Add(hiddensItem);
137 | }
138 | return cifHiddens;
139 | }
140 |
141 | public List> DynamicMask(List> alphas)
142 | {
143 | List> newAlphas = new List>();
144 | foreach (List item in alphas)
145 | {
146 | float[] cifAlphasItem = item.ToArray();
147 | float[] chunk_size_5 = new float[_chunkSize];
148 | if (cifAlphasItem.Length > chunk_size_5.Length)
149 | {
150 | Array.Copy(chunk_size_5, 0, cifAlphasItem, 0, chunk_size_5.Length);
151 | }
152 | else
153 | {
154 | Array.Copy(chunk_size_5, 0, cifAlphasItem, 0, cifAlphasItem.Length);
155 | }
156 | int decodeLfr = chunk_size_5.Length + _lfr;
157 | if (cifAlphasItem.Length > decodeLfr)
158 | {
159 | float[] chunk_size_15 = new float[cifAlphasItem.Length - decodeLfr];
160 | Array.Copy(chunk_size_15, 0, cifAlphasItem, decodeLfr, chunk_size_15.Length);
161 | }
162 | newAlphas.Add(cifAlphasItem.ToList());
163 | }
164 | return newAlphas;
165 | }
166 |
167 | public float[] StackCifAlphas(List cifAlphaList)
168 | {
169 | int batchSize = cifAlphaList.Count;
170 | float[] cifAlphas = new float[cifAlphaList[0].Length * batchSize];
171 | for (int b = 0; b < batchSize; b++)
172 | {
173 | Array.Copy(cifAlphaList[b], 0, cifAlphas, b * cifAlphaList[0].Length, cifAlphaList[b].Length);
174 | }
175 | return cifAlphas;
176 | }
177 |
178 | public List UnStackCifAlphas(float[] cifAlphas, int batchSize)
179 | {
180 | List cifAlphaList = new List();
181 | for (int b = 0; b < batchSize; b++)
182 | {
183 | float[] cifAlphasItem = new float[cifAlphas.Length / batchSize];
184 | Array.Copy(cifAlphas, b * cifAlphasItem.Length, cifAlphasItem, 0, cifAlphasItem.Length);
185 | //////////
186 | float[] chunk_size_5 = new float[_chunkSize];
187 | if (cifAlphasItem.Length > _chunkSize)
188 | {
189 | Array.Copy(chunk_size_5, 0, cifAlphasItem, 0, chunk_size_5.Length);
190 | }
191 | else
192 | {
193 | Array.Copy(chunk_size_5, 0, cifAlphasItem, 0, cifAlphasItem.Length);
194 | }
195 | int decodeLfr = chunk_size_5.Length + _lfr;
196 | if (cifAlphasItem.Length > decodeLfr)
197 | {
198 | float[] chunk_size_15 = new float[cifAlphasItem.Length - decodeLfr];
199 | Array.Copy(chunk_size_15, 0, cifAlphasItem, decodeLfr, chunk_size_15.Length);
200 | }
201 | //////////
202 | cifAlphaList.Add(cifAlphasItem);
203 | }
204 | return cifAlphaList;
205 | }
206 |
207 | public List stack_states(List> statesList)
208 | {
209 | List states = new List();
210 | int batchSize = statesList.Count;
211 | Debug.Assert(statesList[0].Count % 16 == 0, "when stack_states, state_list[0] is 16x");
212 | int fsmnLayer = statesList[0].Count;
213 | for (int i = 0; i < fsmnLayer; i++)
214 | {
215 | float[] statesItemTemp = new float[statesList[0][i].Length * batchSize];
216 | int statesItemTemp_item_length = statesList[0][i].Length;
217 | int statesItemTemp_item_axisnum = 512 * 10;
218 | for (int x = 0; x < statesItemTemp_item_length / statesItemTemp_item_axisnum; x++)
219 | {
220 | for (int n = 0; n < batchSize; n++)
221 | {
222 | float[] statesItemTemp_item = statesList[n][0];
223 | Array.Copy(statesItemTemp_item, x * statesItemTemp_item_axisnum, statesItemTemp, (x * batchSize + n) * statesItemTemp_item_axisnum, statesItemTemp_item_axisnum);
224 | }
225 | }
226 | states.Add(statesItemTemp);
227 | }
228 | return states;
229 | }
230 | public List> unstack_states(List states)
231 | {
232 | List> statesList = new List>();
233 | Debug.Assert(states.Count % 16 == 0, "when stack_states, state_list[0] is 16x");
234 | int fsmnLayer = states.Count;
235 | int batchSize = states[0].Length / 512 / 10;
236 | for (int b = 0; b < batchSize; b++)
237 | {
238 | List statesListItem = new List();
239 | for (int j = 0; j < fsmnLayer; j++)
240 | {
241 | float[] item = states[j];
242 | int statesItemTemp_axisnum = 512 * 10;
243 | int statesItemTemp_size = 1 * 512 * 10;
244 | float[] statesItemTemp_item = new float[statesItemTemp_size];
245 | for (int k = 0; k < statesItemTemp_size / statesItemTemp_axisnum; k++)
246 | {
247 | Array.Copy(item, (item.Length / statesItemTemp_size * k + b) * statesItemTemp_axisnum, statesItemTemp_item, k * statesItemTemp_axisnum, statesItemTemp_axisnum);
248 | }
249 | statesListItem.Add(statesItemTemp_item);
250 | }
251 | statesList.Add(statesListItem);
252 | }
253 | return statesList;
254 | }
255 | protected virtual void Dispose(bool disposing)
256 | {
257 | if (disposing)
258 | {
259 | if (_encoderSession != null)
260 | {
261 | _encoderSession.Dispose();
262 | }
263 | if (_decoderSession != null)
264 | {
265 | _decoderSession.Dispose();
266 | }
267 | }
268 | }
269 |
270 | internal void Dispose()
271 | {
272 | Dispose(disposing: true);
273 | GC.SuppressFinalize(this);
274 | }
275 | }
276 | }
277 |
--------------------------------------------------------------------------------
/AliParaformerAsr.Examples/OfflineAliParaformerAsrRecognizer.cs:
--------------------------------------------------------------------------------
1 | using AliParaformerAsr.Examples.Utils;
2 | using System.Text;
3 |
4 | namespace AliParaformerAsr.Examples
5 | {
6 | internal partial class AliParaformerAsrRecognizer : BaseAsr
7 | {
8 | private static AliParaformerAsr.OfflineRecognizer? _offlineRecognizer;
9 | public static AliParaformerAsr.OfflineRecognizer InitOfflineRecognizer(string modelName, string modelBasePath, string modelAccuracy = "int8", int threadsNum = 2)
10 | {
11 | if (_offlineRecognizer == null)
12 | {
13 | if (string.IsNullOrEmpty(modelBasePath) || string.IsNullOrEmpty(modelName))
14 | {
15 | return null;
16 | }
17 | string modelFilePath = modelBasePath + "./" + modelName + "/model.int8.onnx";
18 | string configFilePath = modelBasePath + "./" + modelName + "/asr.yaml";
19 | string mvnFilePath = modelBasePath + "./" + modelName + "/am.mvn";
20 | string tokensFilePath = modelBasePath + "./" + modelName + "/tokens.txt";
21 | string modelebFilePath = modelBasePath + "./" + modelName + "/model_eb.int8.onnx";
22 | string hotwordFilePath = modelBasePath + "./" + modelName + "/hotword.txt";
23 | try
24 | {
25 | string folderPath = Path.Combine(modelBasePath, modelName);
26 | // 1. Check if the folder exists
27 | if (!Directory.Exists(folderPath))
28 | {
29 | Console.WriteLine($"Error: folder does not exist - {folderPath}");
30 | return null;
31 | }
32 | // 2. Obtain the file names and destination paths of all files
33 | // (calculate the paths in advance to avoid duplicate concatenation)
34 | var fileInfos = Directory.GetFiles(folderPath)
35 | .Select(filePath => new
36 | {
37 | FileName = Path.GetFileName(filePath),
38 | // Recommend using Path. Combine to handle paths (automatically adapt system separators)
39 | TargetPath = Path.Combine(modelBasePath, modelName, Path.GetFileName(filePath))
40 | // If it is necessary to strictly maintain the original splicing method, it can be replaced with:
41 | // TargetPath = $"{modelBasePath}/./{modelName}/{Path.GetFileName(filePath)}"
42 | })
43 | .ToList();
44 |
45 | // Process model path (priority: containing modelAccuracy>last one that matches prefix)
46 | var modelCandidates = fileInfos
47 | .Where(f => f.FileName.StartsWith("model") && !f.FileName.Contains("_eb"))
48 | .ToList();
49 | if (modelCandidates.Any())
50 | {
51 | // Prioritize selecting files that contain the specified model accuracy
52 | var preferredModel = modelCandidates
53 | .LastOrDefault(f => f.FileName.Contains($".{modelAccuracy}."));
54 | modelFilePath = preferredModel?.TargetPath ?? modelCandidates.Last().TargetPath;
55 | }
56 |
57 | // Process modeleb path
58 | var modelebCandidates = fileInfos
59 | .Where(f => f.FileName.StartsWith("model_eb"))
60 | .ToList();
61 | if (modelebCandidates.Any())
62 | {
63 | var preferredModeleb = modelebCandidates
64 | .LastOrDefault(f => f.FileName.Contains($".{modelAccuracy}."));
65 | modelebFilePath = preferredModeleb?.TargetPath ?? modelebCandidates.Last().TargetPath;
66 | }
67 |
68 | // Process config paths (take the last one that matches the prefix)
69 | configFilePath = fileInfos
70 | .LastOrDefault(f => f.FileName.StartsWith("asr") && (f.FileName.EndsWith(".yaml") || f.FileName.EndsWith(".json")))
71 | ?.TargetPath ?? "";
72 |
73 | // Process mvn paths (take the last one that matches the prefix)
74 | mvnFilePath = fileInfos
75 | .LastOrDefault(f => f.FileName.StartsWith("am") && f.FileName.EndsWith(".mvn"))
76 | ?.TargetPath ?? "";
77 |
78 | // Process token paths (take the last one that matches the prefix)
79 | tokensFilePath = fileInfos
80 | .LastOrDefault(f => f.FileName.StartsWith("tokens") && f.FileName.EndsWith(".txt"))
81 | ?.TargetPath ?? "";
82 |
83 | // Process hotword paths (take the last one that matches the prefix)
84 | hotwordFilePath = fileInfos
85 | .LastOrDefault(f => f.FileName.StartsWith("hotword") && f.FileName.EndsWith(".txt"))
86 | ?.TargetPath ?? "";
87 |
88 | if (string.IsNullOrEmpty(modelFilePath) || string.IsNullOrEmpty(tokensFilePath))
89 | {
90 | return null;
91 | }
92 | TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
93 | _offlineRecognizer = new OfflineRecognizer(modelFilePath: modelFilePath, configFilePath: configFilePath, mvnFilePath: mvnFilePath, tokensFilePath: tokensFilePath, modelebFilePath: modelebFilePath, hotwordFilePath: hotwordFilePath, threadsNum: threadsNum);
94 | TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
95 | double elapsed_milliseconds_init = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
96 | Console.WriteLine("init_models_elapsed_milliseconds:{0}", elapsed_milliseconds_init.ToString());
97 | }
98 | catch (UnauthorizedAccessException)
99 | {
100 | Console.WriteLine($"Error: No permission to access this folder");
101 | }
102 | catch (PathTooLongException)
103 | {
104 | Console.WriteLine($"Error: File path too long");
105 | }
106 | catch (Exception ex)
107 | {
108 | Console.WriteLine($"Error occurred: {ex}");
109 | }
110 | }
111 | return _offlineRecognizer;
112 | }
113 | public static void OfflineRecognizer(string streamDecodeMethod = "one", string modelName = "paraformer-seaco-large-zh-timestamp-onnx-offline", string modelAccuracy = "int8", int threadsNum = 2, string[]? mediaFilePaths = null, string? modelBasePath = null)
114 | {
115 | if (string.IsNullOrEmpty(modelBasePath))
116 | {
117 | modelBasePath = applicationBase;
118 | }
119 | OfflineRecognizer offlineRecognizer = InitOfflineRecognizer(modelName, modelBasePath, modelAccuracy, threadsNum);
120 | if (offlineRecognizer == null)
121 | {
122 | Console.WriteLine("Init models failure!");
123 | return;
124 | }
125 | TimeSpan total_duration = new TimeSpan(0L);
126 | List? samples = new List();
127 | List paths = new List();
128 | if (mediaFilePaths == null || mediaFilePaths.Count() == 0)
129 | {
130 | //mediaFilePaths = Directory.GetFiles(Path.Combine(modelBasePath, modelName, "test_wavs"));
131 | string fullPath = Path.Combine(modelBasePath, modelName);
132 | if (!Directory.Exists(fullPath))
133 | {
134 | mediaFilePaths = Array.Empty(); // 路径不正确时返回空数组
135 | }
136 | else
137 | {
138 | mediaFilePaths = Directory.GetFiles(
139 | path: fullPath,
140 | searchPattern: "*.wav",
141 | searchOption: SearchOption.AllDirectories
142 | );
143 | }
144 | }
145 | foreach (string mediaFilePath in mediaFilePaths)
146 | {
147 | if (!File.Exists(mediaFilePath))
148 | {
149 | continue;
150 | }
151 | if (AudioHelper.IsAudioByHeader(mediaFilePath))
152 | {
153 | TimeSpan duration = TimeSpan.Zero;
154 | float[]? sample = AudioHelper.GetFileSample(wavFilePath: mediaFilePath, duration: ref duration);
155 | if (sample != null)
156 | {
157 | paths.Add(mediaFilePath);
158 | samples.Add(sample);
159 | total_duration += duration;
160 | }
161 | }
162 | }
163 | if (samples.Count == 0)
164 | {
165 | Console.WriteLine("No media file is read!");
166 | return;
167 | }
168 | Console.WriteLine("Automatic speech recognition in progress!");
169 | TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks);
170 | streamDecodeMethod = string.IsNullOrEmpty(streamDecodeMethod) ? "batch" : streamDecodeMethod;//one ,batch
171 | if (streamDecodeMethod == "one")
172 | {
173 | // Non batch method
174 | Console.WriteLine("Recognition results:\r\n");
175 | try
176 | {
177 | int n = 0;
178 | foreach (var sample in samples)
179 | {
180 | OfflineStream stream = offlineRecognizer.CreateOfflineStream();
181 | // Modify the logic here to dynamically modify hot words
182 | //stream.Hotwords = Utils.TextHelper.GetHotwords(Path.Combine(modelBasePath, modelName, "tokens.txt"), new string[] {"魔搭" });
183 | stream.AddSamples(sample);
184 | AliParaformerAsr.Model.OfflineRecognizerResultEntity result = offlineRecognizer.GetResult(stream);
185 | Console.WriteLine($"{paths[n]}");
186 | StringBuilder r = new StringBuilder();
187 | r.Append("{");
188 | r.Append($"\"text\": \"{result.Text}\",");
189 | r.Append($"\"tokens\":[{string.Join(",", result.Tokens.Select(x => $"\"{x}\"").ToArray())}],");
190 | r.Append($"\"timestamps\":[{string.Join(",", result.Timestamps.Select(x => $"[{x.First()},{x.Last()}]").ToArray())}]");
191 | r.Append("}");
192 | Console.WriteLine($"{r.ToString()}");
193 | Console.WriteLine("");
194 | n++;
195 | }
196 | }
197 | catch (Exception ex)
198 | {
199 | Console.WriteLine(ex.Message);
200 | Console.WriteLine(ex.InnerException?.InnerException);
201 | }
202 | // Non batch method
203 | }
204 | if (streamDecodeMethod == "batch")
205 | {
206 | //2. batch method
207 | Console.WriteLine("Recognition results:\r\n");
208 | try
209 | {
210 | int n = 0;
211 | List streams = new List();
212 | foreach (var sample in samples)
213 | {
214 | AliParaformerAsr.OfflineStream stream = offlineRecognizer.CreateOfflineStream();
215 | stream.AddSamples(sample);
216 | streams.Add(stream);
217 | }
218 | List results = offlineRecognizer.GetResults(streams);
219 | foreach (AliParaformerAsr.Model.OfflineRecognizerResultEntity result in results)
220 | {
221 | Console.WriteLine($"{paths[n]}");
222 | StringBuilder r = new StringBuilder();
223 | r.Append("{");
224 | r.Append($"\"text\": \"{result.Text}\",");
225 | r.Append($"\"tokens\":[{string.Join(",", result.Tokens.Select(x => $"\"{x}\"").ToArray())}],");
226 | r.Append($"\"timestamps\":[{string.Join(",", result.Timestamps.Select(x => $"[{x.First()},{x.Last()}]").ToArray())}]");
227 | r.Append("}");
228 | Console.WriteLine($"{r.ToString()}");
229 | Console.WriteLine("");
230 | n++;
231 | }
232 | }
233 | catch (Exception ex)
234 | {
235 | Console.WriteLine(ex.Message);
236 | Console.WriteLine(ex.InnerException?.InnerException.Message);
237 | }
238 | }
239 | if (_offlineRecognizer != null)
240 | {
241 | _offlineRecognizer.Dispose();
242 | _offlineRecognizer = null;
243 | }
244 | TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks);
245 | double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds;
246 | double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds;
247 | Console.WriteLine("recognition_elapsed_milliseconds:{0}", elapsed_milliseconds.ToString());
248 | Console.WriteLine("total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString());
249 | Console.WriteLine("rtf:{1}", "0".ToString(), rtf.ToString());
250 | Console.WriteLine("end!");
251 | }
252 | }
253 | }
254 |
--------------------------------------------------------------------------------