├── Connectors.OnnxRuntimeGenAI.csproj ├── OnnxRuntimeGenAIKernelBuilderExtensions.cs ├── OnnxRuntimeGenAIPromptExecutionSettings.cs ├── OnnxRuntimeGenAIServiceCollectionExtensions.cs ├── README.md ├── SemanticKernel.Connectors.OnnxRuntimeGenAI.sln ├── Services └── OnnxRuntimeGenAIChatCompletionService.cs └── demo.png /Connectors.OnnxRuntimeGenAI.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI 5 | $(AssemblyName) 6 | netstandard2.1 7 | enable 8 | 10 9 | Debug;Release;Debug_Cuda;Release_Cuda;Debug_DirectML;Release_DirectML; 10 | 11 | 12 | 13 | 14 | Semantic Kernel-Microsoft.ML.OnnxRuntimeGenAI connectors 15 | Semantic Kernel connector for Microsoft.ML.OnnxRuntimeGenAI. 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /OnnxRuntimeGenAIKernelBuilderExtensions.cs: -------------------------------------------------------------------------------- 1 |  2 | 3 | using System; 4 | using System.Net.Http; 5 | using Microsoft.Extensions.DependencyInjection; 6 | using Microsoft.Extensions.Logging; 7 | using Microsoft.SemanticKernel.ChatCompletion; 8 | using feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI; 9 | using Microsoft.SemanticKernel.Embeddings; 10 | using Microsoft.SemanticKernel.Http; 11 | using Microsoft.SemanticKernel.TextGeneration; 12 | 13 | namespace Microsoft.SemanticKernel; 14 | 15 | /// 16 | /// Extension methods for adding OnnxRuntimeGenAI Text Generation service to the kernel builder. 17 | /// 18 | public static class OnnxRuntimeGenAIKernelBuilderExtensions 19 | { 20 | /// 21 | /// Add OnnxRuntimeGenAI Chat Completion services to the kernel builder. 22 | /// 23 | /// The kernel builder. 24 | /// The generative AI ONNX model path. 25 | /// The optional service ID. 26 | /// The updated kernel builder. 27 | public static IKernelBuilder AddOnnxRuntimeGenAIChatCompletion( 28 | this IKernelBuilder builder, 29 | string modelPath, 30 | string? serviceId = null) 31 | { 32 | builder.Services.AddKeyedSingleton(serviceId, (serviceProvider, _) => 33 | new OnnxRuntimeGenAIChatCompletionService( 34 | modelPath: modelPath, 35 | loggerFactory: serviceProvider.GetService())); 36 | 37 | return builder; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /OnnxRuntimeGenAIPromptExecutionSettings.cs: -------------------------------------------------------------------------------- 1 |  2 | 3 | using System; 4 | using System.Text.Json; 5 | using System.Text.Json.Serialization; 6 | using Microsoft.SemanticKernel; 7 | 8 | namespace feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI; 9 | 10 | /// 11 | /// OnnxRuntimeGenAI Execution Settings. 12 | /// 13 | public sealed class OnnxRuntimeGenAIPromptExecutionSettings : PromptExecutionSettings 14 | { 15 | public static OnnxRuntimeGenAIPromptExecutionSettings FromExecutionSettings(PromptExecutionSettings? executionSettings) 16 | { 17 | switch (executionSettings) 18 | { 19 | case OnnxRuntimeGenAIPromptExecutionSettings settings: 20 | return settings; 21 | default: 22 | return new OnnxRuntimeGenAIPromptExecutionSettings(); 23 | } 24 | } 25 | 26 | private int _topK = 50; 27 | private float _topP = 0.9f; 28 | private float _temperature = 1; 29 | private float _repetitionPenalty = 1; 30 | private bool _pastPresentShareBuffer = false; 31 | private int _numReturnSequences = 1; 32 | private int _numBeams = 1; 33 | private int _noRepeatNgramSize = 0; 34 | private int _minLength = 0; 35 | private int _maxLength = 200; 36 | private float _lengthPenalty = 1; 37 | private bool _earlyStopping = true; 38 | private bool _doSample = false; 39 | private float _diversityPenalty = 0; 40 | 41 | [JsonPropertyName("top_k")] 42 | public int TopK 43 | { 44 | get { return _topK; } 45 | set { _topK = value; } 46 | } 47 | 48 | [JsonPropertyName("top_p")] 49 | public float TopP 50 | { 51 | get { return _topP; } 52 | set { _topP = value; } 53 | } 54 | 55 | [JsonPropertyName("temperature")] 56 | public float Temperature 57 | { 58 | get { return _temperature; } 59 | set { _temperature = value; } 60 | } 61 | 62 | [JsonPropertyName("repetition_penalty")] 63 | public float RepetitionPenalty 64 | { 65 | get { return _repetitionPenalty; } 66 | set { _repetitionPenalty = value; } 67 | } 68 | 69 | [JsonPropertyName("past_present_share_buffer")] 70 | public bool PastPresentShareBuffer 71 | { 72 | get { return _pastPresentShareBuffer; } 73 | set { _pastPresentShareBuffer = value; } 74 | } 75 | 76 | [JsonPropertyName("num_return_sequences")] 77 | public int NumReturnSequences 78 | { 79 | get { return _numReturnSequences; } 80 | set { _numReturnSequences = value; } 81 | } 82 | 83 | [JsonPropertyName("num_beams")] 84 | public int NumBeams 85 | { 86 | get { return _numBeams; } 87 | set { _numBeams = value; } 88 | } 89 | 90 | [JsonPropertyName("no_repeat_ngram_size")] 91 | public int NoRepeatNgramSize 92 | { 93 | get { return _noRepeatNgramSize; } 94 | set { _noRepeatNgramSize = value; } 95 | } 96 | 97 | [JsonPropertyName("min_length")] 98 | public int MinLength 99 | { 100 | get { return _minLength; } 101 | set { _minLength = value; } 102 | } 103 | 104 | [JsonPropertyName("max_length")] 105 | public int MaxLength 106 | { 107 | get { return _maxLength; } 108 | set { _maxLength = value; } 109 | } 110 | 111 | [JsonPropertyName("length_penalty")] 112 | public float LengthPenalty 113 | { 114 | get { return _lengthPenalty; } 115 | set { _lengthPenalty = value; } 116 | } 117 | 118 | [JsonPropertyName("diversity_penalty")] 119 | public float DiversityPenalty 120 | { 121 | get { return _diversityPenalty; } 122 | set { _diversityPenalty = value; } 123 | } 124 | 125 | [JsonPropertyName("early_stopping")] 126 | public bool EarlyStopping 127 | { 128 | get { return _earlyStopping; } 129 | set { _earlyStopping = value; } 130 | } 131 | 132 | [JsonPropertyName("do_sample")] 133 | public bool DoSample 134 | { 135 | get { return _doSample; } 136 | set { _doSample = value; } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /OnnxRuntimeGenAIServiceCollectionExtensions.cs: -------------------------------------------------------------------------------- 1 |  2 | 3 | using System; 4 | using Microsoft.Extensions.DependencyInjection; 5 | using Microsoft.Extensions.Logging; 6 | using Microsoft.SemanticKernel.ChatCompletion; 7 | using feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI; 8 | using Microsoft.SemanticKernel.Embeddings; 9 | using Microsoft.SemanticKernel.Http; 10 | using Microsoft.SemanticKernel.TextGeneration; 11 | 12 | namespace Microsoft.SemanticKernel; 13 | 14 | /// 15 | /// Extension methods for adding OnnxRuntimeGenAI Text Generation service to the kernel builder. 16 | /// 17 | public static class OnnxRuntimeGenAIServiceCollectionExtensions 18 | { 19 | /// 20 | /// Add OnnxRuntimeGenAI Chat Completion services to the specified service collection. 21 | /// 22 | /// The service collection to add the OnnxRuntimeGenAI Text Generation service to. 23 | /// The generative AI ONNX model path. 24 | /// Optional service ID. 25 | /// The updated service collection. 26 | public static IServiceCollection AddOnnxRuntimeGenAIChatCompletion( 27 | this IServiceCollection services, 28 | string modelPath, 29 | string? serviceId = null) 30 | { 31 | services.AddKeyedSingleton(serviceId, (serviceProvider, _) => 32 | new OnnxRuntimeGenAIChatCompletionService( 33 | modelPath, 34 | loggerFactory: serviceProvider.GetService())); 35 | 36 | return services; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SemanticKernel.Connectors.OnnxRuntimeGenAI 2 | Semantic Kernel connector for ONNX models. 3 | 4 | ## How to use 5 | 6 | ### Prerequisites 7 | 8 | ONNX models, for example `Phi-3 Mini-4K-Instruct` 9 | 10 | ```` 11 | git lfs install 12 | git clone https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-onnx 13 | ```` 14 | 15 | 16 | ### Code 17 | 18 | Create a new console app and add Nuget Package: 19 | 20 | ``` 21 | -- for CPU 22 | feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI.CPU 23 | 24 | -- for CUDA 25 | feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI.CUDA 26 | ``` 27 | 28 | Then change Program.cs to: 29 | 30 | ```` 31 | Kernel kernel = Kernel.CreateBuilder() 32 | .AddOnnxRuntimeGenAIChatCompletion( 33 | modelPath: @"d:\Phi-3-mini-4k-instruct-onnx\cpu_and_mobile\cpu-int4-rtn-block-32-acc-level-4") 34 | .Build(); 35 | 36 | string prompt = @"Write a joke"; 37 | 38 | await foreach (string text in kernel.InvokePromptStreamingAsync(prompt, 39 | new KernelArguments(new OnnxRuntimeGenAIPromptExecutionSettings() { MaxLength = 2048 }))) 40 | { 41 | Console.Write(text); 42 | } 43 | ```` 44 | 45 | ![](demo.png) 46 | 47 | ## 使用说明 48 | 49 | ### 先决条件 50 | 51 | 你需要下载所需的ONNX模型,例如 `Phi-3 Mini-4K-Instruct 52 | 53 | ```` 54 | git lfs install 55 | git clone https://hf-mirror.com/microsoft/Phi-3-mini-4k-instruct-onnx 56 | ```` 57 | 58 | 59 | ### 示例代码 60 | 61 | 创建新的控制台应用,并根据你的硬件配置选择合适的Nuget包: 62 | 63 | ``` 64 | -- for CPU 65 | feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI.CPU 66 | 67 | -- for CUDA 68 | feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI.CUDA 69 | ``` 70 | 71 | 然后,只需几行代码,你就可以构建Kernel,并开始生成聊天内容: 72 | 73 | ```` 74 | Kernel kernel = Kernel.CreateBuilder() 75 | .AddOnnxRuntimeGenAIChatCompletion( 76 | modelPath: @"d:\Phi-3-mini-4k-instruct-onnx\cpu_and_mobile\cpu-int4-rtn-block-32-acc-level-4") 77 | .Build(); 78 | 79 | string prompt = @"Write a joke"; 80 | 81 | await foreach (string text in kernel.InvokePromptStreamingAsync(prompt, 82 | new KernelArguments(new OnnxRuntimeGenAIPromptExecutionSettings() { MaxLength = 2048 }))) 83 | { 84 | Console.Write(text); 85 | } 86 | ```` 87 | 88 | ![](demo.png) 89 | -------------------------------------------------------------------------------- /SemanticKernel.Connectors.OnnxRuntimeGenAI.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.5.002.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connectors.OnnxRuntimeGenAI", "Connectors.OnnxRuntimeGenAI.csproj", "{02B3CF8F-49B4-4FB1-9B56-012BA93FED11}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Release|Any CPU = Release|Any CPU 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {02B3CF8F-49B4-4FB1-9B56-012BA93FED11}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 15 | {02B3CF8F-49B4-4FB1-9B56-012BA93FED11}.Debug|Any CPU.Build.0 = Debug|Any CPU 16 | {02B3CF8F-49B4-4FB1-9B56-012BA93FED11}.Release|Any CPU.ActiveCfg = Release|Any CPU 17 | {02B3CF8F-49B4-4FB1-9B56-012BA93FED11}.Release|Any CPU.Build.0 = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ExtensibilityGlobals) = postSolution 23 | SolutionGuid = {7F327FC4-2646-48A6-8EA8-4D8F26AC60D0} 24 | EndGlobalSection 25 | EndGlobal 26 | -------------------------------------------------------------------------------- /Services/OnnxRuntimeGenAIChatCompletionService.cs: -------------------------------------------------------------------------------- 1 |  2 | 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Net.Http; 6 | using System.Reflection; 7 | using System.Text; 8 | using System.Threading; 9 | using System.Threading.Tasks; 10 | using Microsoft.Extensions.Logging; 11 | using Microsoft.ML.OnnxRuntimeGenAI; 12 | using Microsoft.SemanticKernel; 13 | using Microsoft.SemanticKernel.ChatCompletion; 14 | using Microsoft.SemanticKernel.Services; 15 | 16 | namespace feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI; 17 | 18 | /// 19 | /// Represents a chat completion service using OnnxRuntimeGenAI. 20 | /// 21 | public sealed class OnnxRuntimeGenAIChatCompletionService : IChatCompletionService 22 | { 23 | private readonly Model _model; 24 | private readonly Tokenizer _tokenizer; 25 | 26 | private Dictionary AttributesInternal { get; } = new(); 27 | 28 | /// 29 | /// Initializes a new instance of the OnnxRuntimeGenAIChatCompletionService class. 30 | /// 31 | /// The generative AI ONNX model path for the chat completion service. 32 | /// Optional logger factory to be used for logging. 33 | public OnnxRuntimeGenAIChatCompletionService( 34 | string modelPath, 35 | ILoggerFactory? loggerFactory = null) 36 | { 37 | _model = new Model(modelPath); 38 | _tokenizer = new Tokenizer(_model); 39 | 40 | this.AttributesInternal.Add(AIServiceExtensions.ModelIdKey, _tokenizer); 41 | } 42 | 43 | /// 44 | public IReadOnlyDictionary Attributes => this.AttributesInternal; 45 | 46 | /// 47 | public async Task> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) 48 | { 49 | var result = new StringBuilder(); 50 | 51 | await foreach (var content in RunInferenceAsync(chatHistory, executionSettings, cancellationToken)) 52 | { 53 | result.Append(content); 54 | } 55 | 56 | return new List 57 | { 58 | new( 59 | role: AuthorRole.Assistant, 60 | content: result.ToString()) 61 | }; 62 | } 63 | 64 | /// 65 | public async IAsyncEnumerable GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default) 66 | { 67 | await foreach (var content in RunInferenceAsync(chatHistory, executionSettings, cancellationToken)) 68 | { 69 | yield return new StreamingChatMessageContent(AuthorRole.Assistant, content); 70 | } 71 | } 72 | 73 | private async IAsyncEnumerable RunInferenceAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings, CancellationToken cancellationToken) 74 | { 75 | OnnxRuntimeGenAIPromptExecutionSettings onnxRuntimeGenAIPromptExecutionSettings = OnnxRuntimeGenAIPromptExecutionSettings.FromExecutionSettings(executionSettings); 76 | 77 | var prompt = GetPrompt(chatHistory, onnxRuntimeGenAIPromptExecutionSettings); 78 | var tokens = _tokenizer.Encode(prompt); 79 | 80 | var generatorParams = new GeneratorParams(_model); 81 | ApplyPromptExecutionSettings(generatorParams, onnxRuntimeGenAIPromptExecutionSettings); 82 | generatorParams.SetInputSequences(tokens); 83 | 84 | var generator = new Generator(_model, generatorParams); 85 | 86 | while (!generator.IsDone()) 87 | { 88 | cancellationToken.ThrowIfCancellationRequested(); 89 | 90 | yield return await Task.Run(() => 91 | { 92 | generator.ComputeLogits(); 93 | generator.GenerateNextToken(); 94 | 95 | var outputTokens = generator.GetSequence(0); 96 | var newToken = outputTokens.Slice(outputTokens.Length - 1, 1); 97 | var output = _tokenizer.Decode(newToken); 98 | return output; 99 | }, cancellationToken); 100 | } 101 | } 102 | 103 | private string GetPrompt(ChatHistory chatHistory, OnnxRuntimeGenAIPromptExecutionSettings onnxRuntimeGenAIPromptExecutionSettings) 104 | { 105 | var promptBuilder = new StringBuilder(); 106 | foreach (var message in chatHistory) 107 | { 108 | promptBuilder.Append($"<|{message.Role}|>\n{message.Content}"); 109 | } 110 | promptBuilder.Append($"<|end|>\n<|assistant|>"); 111 | 112 | return promptBuilder.ToString(); 113 | } 114 | 115 | private void ApplyPromptExecutionSettings(GeneratorParams generatorParams, OnnxRuntimeGenAIPromptExecutionSettings onnxRuntimeGenAIPromptExecutionSettings) 116 | { 117 | generatorParams.SetSearchOption("top_p", onnxRuntimeGenAIPromptExecutionSettings.TopP); 118 | generatorParams.SetSearchOption("top_k", onnxRuntimeGenAIPromptExecutionSettings.TopK); 119 | generatorParams.SetSearchOption("temperature", onnxRuntimeGenAIPromptExecutionSettings.Temperature); 120 | generatorParams.SetSearchOption("repetition_penalty", onnxRuntimeGenAIPromptExecutionSettings.RepetitionPenalty); 121 | generatorParams.SetSearchOption("past_present_share_buffer", onnxRuntimeGenAIPromptExecutionSettings.PastPresentShareBuffer); 122 | generatorParams.SetSearchOption("num_return_sequences", onnxRuntimeGenAIPromptExecutionSettings.NumReturnSequences); 123 | generatorParams.SetSearchOption("no_repeat_ngram_size", onnxRuntimeGenAIPromptExecutionSettings.NoRepeatNgramSize); 124 | generatorParams.SetSearchOption("min_length", onnxRuntimeGenAIPromptExecutionSettings.MinLength); 125 | generatorParams.SetSearchOption("max_length", onnxRuntimeGenAIPromptExecutionSettings.MaxLength); 126 | generatorParams.SetSearchOption("length_penalty", onnxRuntimeGenAIPromptExecutionSettings.LengthPenalty); 127 | generatorParams.SetSearchOption("early_stopping", onnxRuntimeGenAIPromptExecutionSettings.EarlyStopping); 128 | generatorParams.SetSearchOption("do_sample", onnxRuntimeGenAIPromptExecutionSettings.DoSample); 129 | generatorParams.SetSearchOption("diversity_penalty", onnxRuntimeGenAIPromptExecutionSettings.DiversityPenalty); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feiyun0112/SemanticKernel.Connectors.OnnxRuntimeGenAI/6efcbcfbb84c5ecdb5cc31215903b6df13f3f841/demo.png --------------------------------------------------------------------------------