├── Connectors.OnnxRuntimeGenAI.csproj
├── OnnxRuntimeGenAIKernelBuilderExtensions.cs
├── OnnxRuntimeGenAIPromptExecutionSettings.cs
├── OnnxRuntimeGenAIServiceCollectionExtensions.cs
├── README.md
├── SemanticKernel.Connectors.OnnxRuntimeGenAI.sln
├── Services
└── OnnxRuntimeGenAIChatCompletionService.cs
└── demo.png
/Connectors.OnnxRuntimeGenAI.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI
5 | $(AssemblyName)
6 | netstandard2.1
7 | enable
8 | 10
9 | Debug;Release;Debug_Cuda;Release_Cuda;Debug_DirectML;Release_DirectML;
10 |
11 |
12 |
13 |
14 | Semantic Kernel-Microsoft.ML.OnnxRuntimeGenAI connectors
15 | Semantic Kernel connector for Microsoft.ML.OnnxRuntimeGenAI.
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/OnnxRuntimeGenAIKernelBuilderExtensions.cs:
--------------------------------------------------------------------------------
1 |
2 |
3 | using System;
4 | using System.Net.Http;
5 | using Microsoft.Extensions.DependencyInjection;
6 | using Microsoft.Extensions.Logging;
7 | using Microsoft.SemanticKernel.ChatCompletion;
8 | using feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI;
9 | using Microsoft.SemanticKernel.Embeddings;
10 | using Microsoft.SemanticKernel.Http;
11 | using Microsoft.SemanticKernel.TextGeneration;
12 |
13 | namespace Microsoft.SemanticKernel;
14 |
15 | ///
16 | /// Extension methods for adding OnnxRuntimeGenAI Text Generation service to the kernel builder.
17 | ///
18 | public static class OnnxRuntimeGenAIKernelBuilderExtensions
19 | {
20 | ///
21 | /// Add OnnxRuntimeGenAI Chat Completion services to the kernel builder.
22 | ///
23 | /// The kernel builder.
24 | /// The generative AI ONNX model path.
25 | /// The optional service ID.
26 | /// The updated kernel builder.
27 | public static IKernelBuilder AddOnnxRuntimeGenAIChatCompletion(
28 | this IKernelBuilder builder,
29 | string modelPath,
30 | string? serviceId = null)
31 | {
32 | builder.Services.AddKeyedSingleton(serviceId, (serviceProvider, _) =>
33 | new OnnxRuntimeGenAIChatCompletionService(
34 | modelPath: modelPath,
35 | loggerFactory: serviceProvider.GetService()));
36 |
37 | return builder;
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/OnnxRuntimeGenAIPromptExecutionSettings.cs:
--------------------------------------------------------------------------------
1 |
2 |
3 | using System;
4 | using System.Text.Json;
5 | using System.Text.Json.Serialization;
6 | using Microsoft.SemanticKernel;
7 |
8 | namespace feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI;
9 |
10 | ///
11 | /// OnnxRuntimeGenAI Execution Settings.
12 | ///
13 | public sealed class OnnxRuntimeGenAIPromptExecutionSettings : PromptExecutionSettings
14 | {
15 | public static OnnxRuntimeGenAIPromptExecutionSettings FromExecutionSettings(PromptExecutionSettings? executionSettings)
16 | {
17 | switch (executionSettings)
18 | {
19 | case OnnxRuntimeGenAIPromptExecutionSettings settings:
20 | return settings;
21 | default:
22 | return new OnnxRuntimeGenAIPromptExecutionSettings();
23 | }
24 | }
25 |
26 | private int _topK = 50;
27 | private float _topP = 0.9f;
28 | private float _temperature = 1;
29 | private float _repetitionPenalty = 1;
30 | private bool _pastPresentShareBuffer = false;
31 | private int _numReturnSequences = 1;
32 | private int _numBeams = 1;
33 | private int _noRepeatNgramSize = 0;
34 | private int _minLength = 0;
35 | private int _maxLength = 200;
36 | private float _lengthPenalty = 1;
37 | private bool _earlyStopping = true;
38 | private bool _doSample = false;
39 | private float _diversityPenalty = 0;
40 |
41 | [JsonPropertyName("top_k")]
42 | public int TopK
43 | {
44 | get { return _topK; }
45 | set { _topK = value; }
46 | }
47 |
48 | [JsonPropertyName("top_p")]
49 | public float TopP
50 | {
51 | get { return _topP; }
52 | set { _topP = value; }
53 | }
54 |
55 | [JsonPropertyName("temperature")]
56 | public float Temperature
57 | {
58 | get { return _temperature; }
59 | set { _temperature = value; }
60 | }
61 |
62 | [JsonPropertyName("repetition_penalty")]
63 | public float RepetitionPenalty
64 | {
65 | get { return _repetitionPenalty; }
66 | set { _repetitionPenalty = value; }
67 | }
68 |
69 | [JsonPropertyName("past_present_share_buffer")]
70 | public bool PastPresentShareBuffer
71 | {
72 | get { return _pastPresentShareBuffer; }
73 | set { _pastPresentShareBuffer = value; }
74 | }
75 |
76 | [JsonPropertyName("num_return_sequences")]
77 | public int NumReturnSequences
78 | {
79 | get { return _numReturnSequences; }
80 | set { _numReturnSequences = value; }
81 | }
82 |
83 | [JsonPropertyName("num_beams")]
84 | public int NumBeams
85 | {
86 | get { return _numBeams; }
87 | set { _numBeams = value; }
88 | }
89 |
90 | [JsonPropertyName("no_repeat_ngram_size")]
91 | public int NoRepeatNgramSize
92 | {
93 | get { return _noRepeatNgramSize; }
94 | set { _noRepeatNgramSize = value; }
95 | }
96 |
97 | [JsonPropertyName("min_length")]
98 | public int MinLength
99 | {
100 | get { return _minLength; }
101 | set { _minLength = value; }
102 | }
103 |
104 | [JsonPropertyName("max_length")]
105 | public int MaxLength
106 | {
107 | get { return _maxLength; }
108 | set { _maxLength = value; }
109 | }
110 |
111 | [JsonPropertyName("length_penalty")]
112 | public float LengthPenalty
113 | {
114 | get { return _lengthPenalty; }
115 | set { _lengthPenalty = value; }
116 | }
117 |
118 | [JsonPropertyName("diversity_penalty")]
119 | public float DiversityPenalty
120 | {
121 | get { return _diversityPenalty; }
122 | set { _diversityPenalty = value; }
123 | }
124 |
125 | [JsonPropertyName("early_stopping")]
126 | public bool EarlyStopping
127 | {
128 | get { return _earlyStopping; }
129 | set { _earlyStopping = value; }
130 | }
131 |
132 | [JsonPropertyName("do_sample")]
133 | public bool DoSample
134 | {
135 | get { return _doSample; }
136 | set { _doSample = value; }
137 | }
138 | }
139 |
--------------------------------------------------------------------------------
/OnnxRuntimeGenAIServiceCollectionExtensions.cs:
--------------------------------------------------------------------------------
1 |
2 |
3 | using System;
4 | using Microsoft.Extensions.DependencyInjection;
5 | using Microsoft.Extensions.Logging;
6 | using Microsoft.SemanticKernel.ChatCompletion;
7 | using feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI;
8 | using Microsoft.SemanticKernel.Embeddings;
9 | using Microsoft.SemanticKernel.Http;
10 | using Microsoft.SemanticKernel.TextGeneration;
11 |
12 | namespace Microsoft.SemanticKernel;
13 |
14 | ///
15 | /// Extension methods for adding OnnxRuntimeGenAI Text Generation service to the kernel builder.
16 | ///
17 | public static class OnnxRuntimeGenAIServiceCollectionExtensions
18 | {
19 | ///
20 | /// Add OnnxRuntimeGenAI Chat Completion services to the specified service collection.
21 | ///
22 | /// The service collection to add the OnnxRuntimeGenAI Text Generation service to.
23 | /// The generative AI ONNX model path.
24 | /// Optional service ID.
25 | /// The updated service collection.
26 | public static IServiceCollection AddOnnxRuntimeGenAIChatCompletion(
27 | this IServiceCollection services,
28 | string modelPath,
29 | string? serviceId = null)
30 | {
31 | services.AddKeyedSingleton(serviceId, (serviceProvider, _) =>
32 | new OnnxRuntimeGenAIChatCompletionService(
33 | modelPath,
34 | loggerFactory: serviceProvider.GetService()));
35 |
36 | return services;
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SemanticKernel.Connectors.OnnxRuntimeGenAI
2 | Semantic Kernel connector for ONNX models.
3 |
4 | ## How to use
5 |
6 | ### Prerequisites
7 |
8 | ONNX models, for example `Phi-3 Mini-4K-Instruct`
9 |
10 | ````
11 | git lfs install
12 | git clone https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-onnx
13 | ````
14 |
15 |
16 | ### Code
17 |
18 | Create a new console app and add Nuget Package:
19 |
20 | ```
21 | -- for CPU
22 | feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI.CPU
23 |
24 | -- for CUDA
25 | feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI.CUDA
26 | ```
27 |
28 | Then change Program.cs to:
29 |
30 | ````
31 | Kernel kernel = Kernel.CreateBuilder()
32 | .AddOnnxRuntimeGenAIChatCompletion(
33 | modelPath: @"d:\Phi-3-mini-4k-instruct-onnx\cpu_and_mobile\cpu-int4-rtn-block-32-acc-level-4")
34 | .Build();
35 |
36 | string prompt = @"Write a joke";
37 |
38 | await foreach (string text in kernel.InvokePromptStreamingAsync(prompt,
39 | new KernelArguments(new OnnxRuntimeGenAIPromptExecutionSettings() { MaxLength = 2048 })))
40 | {
41 | Console.Write(text);
42 | }
43 | ````
44 |
45 | 
46 |
47 | ## 使用说明
48 |
49 | ### 先决条件
50 |
51 | 你需要下载所需的ONNX模型,例如 `Phi-3 Mini-4K-Instruct
52 |
53 | ````
54 | git lfs install
55 | git clone https://hf-mirror.com/microsoft/Phi-3-mini-4k-instruct-onnx
56 | ````
57 |
58 |
59 | ### 示例代码
60 |
61 | 创建新的控制台应用,并根据你的硬件配置选择合适的Nuget包:
62 |
63 | ```
64 | -- for CPU
65 | feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI.CPU
66 |
67 | -- for CUDA
68 | feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI.CUDA
69 | ```
70 |
71 | 然后,只需几行代码,你就可以构建Kernel,并开始生成聊天内容:
72 |
73 | ````
74 | Kernel kernel = Kernel.CreateBuilder()
75 | .AddOnnxRuntimeGenAIChatCompletion(
76 | modelPath: @"d:\Phi-3-mini-4k-instruct-onnx\cpu_and_mobile\cpu-int4-rtn-block-32-acc-level-4")
77 | .Build();
78 |
79 | string prompt = @"Write a joke";
80 |
81 | await foreach (string text in kernel.InvokePromptStreamingAsync(prompt,
82 | new KernelArguments(new OnnxRuntimeGenAIPromptExecutionSettings() { MaxLength = 2048 })))
83 | {
84 | Console.Write(text);
85 | }
86 | ````
87 |
88 | 
89 |
--------------------------------------------------------------------------------
/SemanticKernel.Connectors.OnnxRuntimeGenAI.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.5.002.0
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connectors.OnnxRuntimeGenAI", "Connectors.OnnxRuntimeGenAI.csproj", "{02B3CF8F-49B4-4FB1-9B56-012BA93FED11}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Any CPU = Debug|Any CPU
11 | Release|Any CPU = Release|Any CPU
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {02B3CF8F-49B4-4FB1-9B56-012BA93FED11}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
15 | {02B3CF8F-49B4-4FB1-9B56-012BA93FED11}.Debug|Any CPU.Build.0 = Debug|Any CPU
16 | {02B3CF8F-49B4-4FB1-9B56-012BA93FED11}.Release|Any CPU.ActiveCfg = Release|Any CPU
17 | {02B3CF8F-49B4-4FB1-9B56-012BA93FED11}.Release|Any CPU.Build.0 = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ExtensibilityGlobals) = postSolution
23 | SolutionGuid = {7F327FC4-2646-48A6-8EA8-4D8F26AC60D0}
24 | EndGlobalSection
25 | EndGlobal
26 |
--------------------------------------------------------------------------------
/Services/OnnxRuntimeGenAIChatCompletionService.cs:
--------------------------------------------------------------------------------
1 |
2 |
3 | using System;
4 | using System.Collections.Generic;
5 | using System.Net.Http;
6 | using System.Reflection;
7 | using System.Text;
8 | using System.Threading;
9 | using System.Threading.Tasks;
10 | using Microsoft.Extensions.Logging;
11 | using Microsoft.ML.OnnxRuntimeGenAI;
12 | using Microsoft.SemanticKernel;
13 | using Microsoft.SemanticKernel.ChatCompletion;
14 | using Microsoft.SemanticKernel.Services;
15 |
16 | namespace feiyun0112.SemanticKernel.Connectors.OnnxRuntimeGenAI;
17 |
18 | ///
19 | /// Represents a chat completion service using OnnxRuntimeGenAI.
20 | ///
21 | public sealed class OnnxRuntimeGenAIChatCompletionService : IChatCompletionService
22 | {
23 | private readonly Model _model;
24 | private readonly Tokenizer _tokenizer;
25 |
26 | private Dictionary AttributesInternal { get; } = new();
27 |
28 | ///
29 | /// Initializes a new instance of the OnnxRuntimeGenAIChatCompletionService class.
30 | ///
31 | /// The generative AI ONNX model path for the chat completion service.
32 | /// Optional logger factory to be used for logging.
33 | public OnnxRuntimeGenAIChatCompletionService(
34 | string modelPath,
35 | ILoggerFactory? loggerFactory = null)
36 | {
37 | _model = new Model(modelPath);
38 | _tokenizer = new Tokenizer(_model);
39 |
40 | this.AttributesInternal.Add(AIServiceExtensions.ModelIdKey, _tokenizer);
41 | }
42 |
43 | ///
44 | public IReadOnlyDictionary Attributes => this.AttributesInternal;
45 |
46 | ///
47 | public async Task> GetChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
48 | {
49 | var result = new StringBuilder();
50 |
51 | await foreach (var content in RunInferenceAsync(chatHistory, executionSettings, cancellationToken))
52 | {
53 | result.Append(content);
54 | }
55 |
56 | return new List
57 | {
58 | new(
59 | role: AuthorRole.Assistant,
60 | content: result.ToString())
61 | };
62 | }
63 |
64 | ///
65 | public async IAsyncEnumerable GetStreamingChatMessageContentsAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings = null, Kernel? kernel = null, CancellationToken cancellationToken = default)
66 | {
67 | await foreach (var content in RunInferenceAsync(chatHistory, executionSettings, cancellationToken))
68 | {
69 | yield return new StreamingChatMessageContent(AuthorRole.Assistant, content);
70 | }
71 | }
72 |
73 | private async IAsyncEnumerable RunInferenceAsync(ChatHistory chatHistory, PromptExecutionSettings? executionSettings, CancellationToken cancellationToken)
74 | {
75 | OnnxRuntimeGenAIPromptExecutionSettings onnxRuntimeGenAIPromptExecutionSettings = OnnxRuntimeGenAIPromptExecutionSettings.FromExecutionSettings(executionSettings);
76 |
77 | var prompt = GetPrompt(chatHistory, onnxRuntimeGenAIPromptExecutionSettings);
78 | var tokens = _tokenizer.Encode(prompt);
79 |
80 | var generatorParams = new GeneratorParams(_model);
81 | ApplyPromptExecutionSettings(generatorParams, onnxRuntimeGenAIPromptExecutionSettings);
82 | generatorParams.SetInputSequences(tokens);
83 |
84 | var generator = new Generator(_model, generatorParams);
85 |
86 | while (!generator.IsDone())
87 | {
88 | cancellationToken.ThrowIfCancellationRequested();
89 |
90 | yield return await Task.Run(() =>
91 | {
92 | generator.ComputeLogits();
93 | generator.GenerateNextToken();
94 |
95 | var outputTokens = generator.GetSequence(0);
96 | var newToken = outputTokens.Slice(outputTokens.Length - 1, 1);
97 | var output = _tokenizer.Decode(newToken);
98 | return output;
99 | }, cancellationToken);
100 | }
101 | }
102 |
103 | private string GetPrompt(ChatHistory chatHistory, OnnxRuntimeGenAIPromptExecutionSettings onnxRuntimeGenAIPromptExecutionSettings)
104 | {
105 | var promptBuilder = new StringBuilder();
106 | foreach (var message in chatHistory)
107 | {
108 | promptBuilder.Append($"<|{message.Role}|>\n{message.Content}");
109 | }
110 | promptBuilder.Append($"<|end|>\n<|assistant|>");
111 |
112 | return promptBuilder.ToString();
113 | }
114 |
115 | private void ApplyPromptExecutionSettings(GeneratorParams generatorParams, OnnxRuntimeGenAIPromptExecutionSettings onnxRuntimeGenAIPromptExecutionSettings)
116 | {
117 | generatorParams.SetSearchOption("top_p", onnxRuntimeGenAIPromptExecutionSettings.TopP);
118 | generatorParams.SetSearchOption("top_k", onnxRuntimeGenAIPromptExecutionSettings.TopK);
119 | generatorParams.SetSearchOption("temperature", onnxRuntimeGenAIPromptExecutionSettings.Temperature);
120 | generatorParams.SetSearchOption("repetition_penalty", onnxRuntimeGenAIPromptExecutionSettings.RepetitionPenalty);
121 | generatorParams.SetSearchOption("past_present_share_buffer", onnxRuntimeGenAIPromptExecutionSettings.PastPresentShareBuffer);
122 | generatorParams.SetSearchOption("num_return_sequences", onnxRuntimeGenAIPromptExecutionSettings.NumReturnSequences);
123 | generatorParams.SetSearchOption("no_repeat_ngram_size", onnxRuntimeGenAIPromptExecutionSettings.NoRepeatNgramSize);
124 | generatorParams.SetSearchOption("min_length", onnxRuntimeGenAIPromptExecutionSettings.MinLength);
125 | generatorParams.SetSearchOption("max_length", onnxRuntimeGenAIPromptExecutionSettings.MaxLength);
126 | generatorParams.SetSearchOption("length_penalty", onnxRuntimeGenAIPromptExecutionSettings.LengthPenalty);
127 | generatorParams.SetSearchOption("early_stopping", onnxRuntimeGenAIPromptExecutionSettings.EarlyStopping);
128 | generatorParams.SetSearchOption("do_sample", onnxRuntimeGenAIPromptExecutionSettings.DoSample);
129 | generatorParams.SetSearchOption("diversity_penalty", onnxRuntimeGenAIPromptExecutionSettings.DiversityPenalty);
130 | }
131 | }
132 |
--------------------------------------------------------------------------------
/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feiyun0112/SemanticKernel.Connectors.OnnxRuntimeGenAI/6efcbcfbb84c5ecdb5cc31215903b6df13f3f841/demo.png
--------------------------------------------------------------------------------