├── .gitignore ├── LICENSE ├── README.md ├── README.zh-cn.md ├── config.py ├── demos ├── dotnet-demo │ ├── Program.cs │ └── dotnet-demo.csproj └── python_demo.ipynb ├── img └── python-demo-01.png ├── requirements.txt └── startup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | Local-LLM-Server.sln 162 | demos/dotnet-demo/obj/ 163 | demos/dotnet-demo/bin/ 164 | logs/ 165 | .vs/ 166 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **[中文说明](README.zh-cn.md)** 2 | 3 | Whether it's due to network access restrictions or data security reasons, we may need to deploy large language models (LLMs) privately in order to run access locally. 4 | 5 | This project provides a quick way to build a private large language model server, which only requires a single line of commands, you can build a private large language model server locally, and provide an OpenAI-compatible interface. 6 | 7 | *Note: This project can also be used in a CPU environment, but the speed will be slower.* 8 | 9 | # How to use 10 | 11 | ## 1. Install dependencies 12 | 13 | - First, make sure you have Python installed on your machine (I'm using 3.10) 14 | 15 | - Then, install the dependencies 16 | 17 | ```bash 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | ## 2. Download the model 22 | 23 | This project is based on [FastChat](https://github.com/lm-sys/FastChat), which supports [multiple large language models](https://github.com/lm-sys/FastChat/blob/main/docs/model_support.md). 24 | 25 | Personally, I only tested the LLM model [THUDM/ChatGLM3-6B](https://huggingface.co/THUDM/chatglm3-6b) and the Embedding model [BAAI/bge-large-en](https://huggingface.co/BAAI/bge-large-zh), other models can theoretically be used as well. 26 | 27 | ```bash 28 | git lfs install 29 | git clone https://huggingface.co/THUDM/chatglm3-6b 30 | git clone https://huggingface.co/BAAI/bge-large-zh 31 | ``` 32 | 33 | ## 3. configration 34 | 35 | This project can deploy multiple models at the same time, just need to configure the model name and path key-value pair in`config.py`. 36 | 37 | ```python 38 | 39 | WORK_CONFIG = { 40 | "host": HOST, 41 | "port": 21002, 42 | # Model name and path key-value pairs 43 | "models": { 44 | # The name can be customized, and the path can be relative or absolute 45 | "ChatModel":"d:/chatglm3-6b", 46 | "EmbeddingsModel":"./models/bge-large-zh", 47 | }, 48 | } 49 | ``` 50 | 51 | ## 4. Start the service 52 | 53 | ```bash 54 | python startup.py 55 | ``` 56 | 57 | When you see the following output, the service has been started successfully: 58 | 59 | ```bash 60 | 61 | Local-LLM-Server is successfully started, please use http://127.0.0.1:21000 to access the OpenAI interface 62 | ``` 63 | 64 | # Usage examples 65 | 66 | The sample code is stored in the `demos` directory. 67 | 68 | ## 1. python 69 | 70 | ```python 71 | import openai 72 | 73 | openai.api_key = "Empty" 74 | openai.base_url = "http://localhost:21000/v1/" 75 | 76 | # Use the LLM model 77 | completion = openai.chat.completions.create( 78 | model="ChatModel", 79 | messages=[{"role": "user", "content": "Tell us about yourself?"}] 80 | ) 81 | print(completion.choices[0].message.content) 82 | 83 | # Use the Embeddings model 84 | embedding = openai.embeddings.create( 85 | model="EmbeddingsModel", 86 | input = "Please star⭐️ this project on GitHub!", 87 | encoding_format="float") 88 | print(embedding.data[0].embedding) 89 | ``` 90 | 91 | ![](img/python-demo-01.png) 92 | 93 | ## 2. C# 94 | 95 | *Requires reference to Nuget Microsoft.SemanticKernel 1.0.1* 96 | 97 | ```csharp 98 | using Microsoft.SemanticKernel; 99 | 100 | var kernel = Kernel.CreateBuilder() 101 | .AddOpenAIChatCompletion( 102 | modelId: "ChatModel", 103 | apiKey: "NoKey", 104 | httpClient: new HttpClient(new MyHandler()) 105 | ).Build(); 106 | 107 | var prompt = "Tell us about yourself?"; 108 | var result = await kernel.InvokePromptAsync(prompt); 109 | var answer = result.GetValue(); 110 | Console.WriteLine(answer); 111 | 112 | //Since Microsoft.SemanticKernel does not provide a direct way to set the address of the OpenAI server, 113 | //Therefore, you need to customize a DelegatingHandler and change the OpenAI server address to the Local-LLM-Server address. 114 | class MyHandler : DelegatingHandler 115 | { 116 | public MyHandler() 117 | : base(new HttpClientHandler()) 118 | { 119 | } 120 | protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) 121 | { 122 | var newUriBuilder = new UriBuilder(request.RequestUri); 123 | newUriBuilder.Scheme = "http"; 124 | newUriBuilder.Host = "127.0.0.1"; 125 | newUriBuilder.Port = 21000; 126 | 127 | request.RequestUri = newUriBuilder.Uri; 128 | return base.SendAsync(request, cancellationToken); 129 | } 130 | } 131 | ``` 132 | -------------------------------------------------------------------------------- /README.zh-cn.md: -------------------------------------------------------------------------------- 1 | 2 | 无论是由于网络访问限制,还是数据安全原因,我们可能需要私有化部署大语言模型(LLM),以便在本地运行访问。 3 | 4 | 本项目提供了一个快速搭建私有大语言模型服务器的方法,只需要一行命令,就可以在本地搭建一个私有的大语言模型服务器,并提供OpenAI兼容接口。 5 | 6 | *注意:本项目在CPU 环境下也可以使用,但是速度会比较慢。* 7 | 8 | # 使用方法 9 | 10 | ## 1. 安装依赖 11 | 12 | - 首先,确保你的机器安装了 Python(我使用的是3.10) 13 | 14 | - 然后,安装依赖 15 | 16 | ```bash 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | ## 2. 下载模型 21 | 22 | 本项目基于[FastChat](https://github.com/lm-sys/FastChat)实现,FastChat支持[多种大语言模型](https://github.com/lm-sys/FastChat/blob/main/docs/model_support.md)。 23 | 24 | 我个人只测试了 LLM 模型 [THUDM/ChatGLM3-6B](https://huggingface.co/THUDM/chatglm3-6b) 与 Embedding 模型 [BAAI/bge-large-zh](https://huggingface.co/BAAI/bge-large-zh),其他模型理论上也可以使用。 25 | 26 | ```bash 27 | git lfs install 28 | git clone https://huggingface.co/THUDM/chatglm3-6b 29 | git clone https://huggingface.co/BAAI/bge-large-zh 30 | ``` 31 | 32 | ## 3. 配置 33 | 34 | 本项目可以同时部署多个模型,只需要在`config.py`中配置模型名称和路径键值对即可。 35 | 36 | ```python 37 | WORK_CONFIG = { 38 | "host": HOST, 39 | "port": 21002, 40 | # 模型名称和路径键值对 41 | "models": { 42 | "ChatModel":"d:/chatglm3-6b", # 名称可以自定义,路径可以用相对路径或绝对路径 43 | "EmbeddingsModel":"./models/bge-large-zh", 44 | }, 45 | } 46 | ``` 47 | 48 | ## 4. 启动服务 49 | 50 | ```bash 51 | python startup.py 52 | ``` 53 | 54 | 当看到如下输出时,说明服务已经启动成功: 55 | 56 | ```bash 57 | Local-LLM-Server 启动成功,请使用 http://127.0.0.1:21000 访问 OpenAI 接口 58 | ``` 59 | 60 | # 使用示例 61 | 62 | 示例代码都存放在`demos`目录下。 63 | 64 | ## 1. python 65 | 66 | ```python 67 | 68 | import openai 69 | 70 | openai.api_key = "Empty" 71 | openai.base_url = "http://localhost:21000/v1/" 72 | 73 | # 使用 LLM 模型 74 | completion = openai.chat.completions.create( 75 | model="ChatModel", 76 | messages=[{"role": "user", "content": "请自我介绍一下?"}] 77 | ) 78 | print(completion.choices[0].message.content) 79 | 80 | # 使用 Embeddings 模型 81 | embedding = openai.embeddings.create( 82 | model="EmbeddingsModel", 83 | input = "欢迎关注我的个人公众号MyIO!", 84 | encoding_format="float") 85 | print(embedding.data[0].embedding) 86 | 87 | ``` 88 | 89 | ![](img/python-demo-01.png) 90 | 91 | ## 2. C# 92 | 93 | *需引用 Nuget Microsoft.SemanticKernel 1.0.1* 94 | 95 | ```csharp 96 | 97 | using Microsoft.SemanticKernel; 98 | 99 | var kernel = Kernel.CreateBuilder() 100 | .AddOpenAIChatCompletion( 101 | modelId: "ChatModel", 102 | apiKey: "NoKey", 103 | httpClient: new HttpClient(new MyHandler()) 104 | ).Build(); 105 | 106 | var prompt = "请自我介绍一下?"; 107 | var result = await kernel.InvokePromptAsync(prompt); 108 | var answer = result.GetValue(); 109 | Console.WriteLine(answer); 110 | 111 | //由于 Microsoft.SemanticKernel 没提供直接设置 OpenAI 服务器地址的方法, 112 | //所以需要自定义一个 DelegatingHandler,将 OpenAI 服务器地址修改为 Local-LLM-Server 地址。 113 | class MyHandler : DelegatingHandler 114 | { 115 | public MyHandler() 116 | : base(new HttpClientHandler()) 117 | { 118 | } 119 | protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) 120 | { 121 | var newUriBuilder = new UriBuilder(request.RequestUri); 122 | newUriBuilder.Scheme = "http"; 123 | newUriBuilder.Host = "127.0.0.1"; 124 | newUriBuilder.Port = 21000; 125 | 126 | request.RequestUri = newUriBuilder.Uri; 127 | return base.SendAsync(request, cancellationToken); 128 | } 129 | } 130 | ``` 131 | 132 | # 欢迎关注我的个人公众号MyIO 133 | 134 | ![MyIO](https://raw.githubusercontent.com/feiyun0112/feiyun0112/master/qrcode.jpg) -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | # 日志存储路径 5 | LOG_PATH = os.path.join(os.path.abspath(os.curdir), "logs") 6 | if not os.path.exists(LOG_PATH): 7 | os.mkdir(LOG_PATH) 8 | 9 | HOST = "127.0.0.1" 10 | 11 | 12 | API_SERVER_CONFIG = { 13 | "host": HOST, 14 | "port": 21000, 15 | "api_keys": [], 16 | } 17 | 18 | CONTROLLER_CONFIG = { 19 | "host": HOST, 20 | "port": 21001, 21 | } 22 | 23 | WORK_CONFIG = { 24 | "host": HOST, 25 | "port": 21002, 26 | "models": { 27 | "ChatModel":"d:/chatglm3-6b", 28 | "EmbeddingsModel":"./models/bge-large-zh", 29 | }, 30 | } 31 | -------------------------------------------------------------------------------- /demos/dotnet-demo/Program.cs: -------------------------------------------------------------------------------- 1 | using Microsoft.SemanticKernel; 2 | using System.Text; 3 | 4 | Console.OutputEncoding = Encoding.Unicode; 5 | 6 | var kernel = Kernel.CreateBuilder() 7 | .AddOpenAIChatCompletion( 8 | modelId: "ChatModel", 9 | apiKey: "NoKey", 10 | httpClient: new HttpClient(new MyHandler()) 11 | ).Build(); 12 | 13 | var prompt = "请自我介绍一下?"; 14 | var result = await kernel.InvokePromptAsync(prompt); 15 | var answer = result.GetValue(); 16 | Console.WriteLine(answer); 17 | 18 | //由于 Microsoft.SemanticKernel 没提供直接设置 OpenAI 服务器地址的方法, 19 | //所以需要自定义一个 DelegatingHandler,将 OpenAI 服务器地址修改为 Local-LLM-Server 地址。 20 | class MyHandler : DelegatingHandler 21 | { 22 | public MyHandler() 23 | : base(new HttpClientHandler()) 24 | { 25 | } 26 | protected override Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) 27 | { 28 | var newUriBuilder = new UriBuilder(request.RequestUri); 29 | newUriBuilder.Scheme = "http"; 30 | newUriBuilder.Host = "127.0.0.1"; 31 | newUriBuilder.Port = 21000; 32 | 33 | request.RequestUri = newUriBuilder.Uri; 34 | return base.SendAsync(request, cancellationToken); 35 | } 36 | } -------------------------------------------------------------------------------- /demos/dotnet-demo/dotnet-demo.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net6.0 6 | dotnet_demo 7 | enable 8 | enable 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /demos/python_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import openai\n", 10 | "\n", 11 | "openai.api_key = \"Empty\"\n", 12 | "openai.base_url = \"http://localhost:21000/v1/\"" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "你好,我是人工智能助手 ChatGLM3-6B,是清华大学 KEG 实验室和智谱 AI 公司于 2023 年共同训练的语言模型。我的任务是针对用户的问题和要求提供适当的答复和支持。由于我是一个计算机程序,所以我没有自我意识,也不能像人类一样感知世界。我只能通过分析我所学到的信息来回答问题。\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "completion = openai.chat.completions.create(\n", 30 | " model=\"ChatModel\",\n", 31 | " messages=[{\"role\": \"user\", \"content\": \"请自我介绍一下?\"}]\n", 32 | ")\n", 33 | "print(completion.choices[0].message.content)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "[0.020001007243990898, -0.0442027822136879, -0.033131688833236694, 0.013764282688498497, 0.00972701609134674, 0.021945543587207794, -0.011738743633031845, -0.0041503640823066235, -3.509926318656653e-05, -0.03213749825954437, 0.03297596052289009, 0.013545355759561062, 0.025494789704680443, -0.010174773633480072, 0.01379332598298788, 0.0035503492690622807, 0.02959994040429592, -0.011255824007093906, -0.009516729973256588, 0.011792069301009178, -0.01874622143805027, -0.00038986437721177936, 0.00998582411557436, -0.014824786223471165, -0.0030938899144530296, -0.005475894082337618, 0.010993916541337967, 0.026248913258314133, 0.04671968147158623, -0.016299152746796608, -0.0023726671934127808, 0.030592456459999084, 0.03198375180363655, 0.015597534365952015, 0.008258900605142117, 0.03816499933600426, -0.000542675144970417, 0.010835811495780945, 0.023684488609433174, 0.010235585272312164, 0.02172890491783619, -0.015379900112748146, -0.021361486986279488, -0.045914873480796814, 0.05587274208664894, -0.04707293212413788, -0.014524220488965511, -0.05082609876990318, 0.03726547211408615, -0.0135353310033679, 0.0014320514164865017, -0.056248053908348083, -0.01227588951587677, -0.03260047733783722, -0.005830700509250164, 0.031398121267557144, 0.02443055994808674, 0.036980949342250824, -0.016515254974365234, -0.018871314823627472, -0.007420555222779512, 0.18673130869865417, -0.02442183904349804, -0.012112200260162354, -0.02712257206439972, 0.03330905735492706, -0.023890554904937744, -0.04732093587517738, -0.044115033000707626, -0.026748323813080788, 0.012123740278184414, 0.0627632662653923, -0.012124604545533657, -0.053752798587083817, -0.008461135439574718, -0.007033915258944035, -0.000545509101357311, 0.026250649243593216, -0.024806184694170952, -0.002602272666990757, -0.010951517149806023, 0.02263605035841465, -0.0270728450268507, 0.04311321675777435, -0.045729562640190125, -0.002302570967003703, -0.0027149473316967487, 0.5347057580947876, 0.045195575803518295, -0.014135001227259636, -0.007605633698403835, -0.015056378208100796, -0.03340907394886017, 7.613039997522719e-06, -0.04835667833685875, -0.06002616137266159, 0.013785745948553085, -0.0027178325690329075, -0.02117340825498104, 0.05510110408067703, 0.026163924485445023, -0.03812815621495247, 0.004826873540878296, -0.014477388933300972, -0.0074423872865736485, 0.017352545633912086, -0.005865196231752634, -0.03562851995229721, -0.01977813057601452, 0.0029120121616870165, 0.002772571286186576, 0.05346853658556938, 0.01724884659051895, -0.003343477612361312, -0.007017055992037058, -0.021773017942905426, 0.007760168518871069, -0.00816310103982687, 0.015591797418892384, -0.007730061654001474, -0.0026882963720709085, -0.03549821674823761, -0.04182176664471626, -0.003704201430082321, 0.007298294920474291, -0.013359819538891315, -0.012235299684107304, -0.046554166823625565, -0.02945680357515812, -0.020484497770667076, 0.021171810105443, -0.023917306214571, -0.010021910071372986, 0.01447206363081932, -0.040510065853595734, -0.014012563973665237, 0.011894168332219124, -0.012519348412752151, -0.027374785393476486, 0.0014188186032697558, 0.0041066184639930725, -0.025460299104452133, 0.017588740214705467, 0.003924464341253042, 0.02705082669854164, 0.003223160980269313, -0.02529502473771572, 0.012184319086372852, 0.04023315757513046, -0.04387526959180832, 0.03662187606096268, -0.009153282269835472, -0.02243252471089363, 0.030571362003684044, -0.003928778693079948, -0.010656207799911499, 0.02257312275469303, 0.011668799445033073, 0.0028970108833163977, -0.028404224663972855, 0.034943364560604095, 0.026358172297477722, 0.010891278274357319, 0.016250144690275192, -0.04836418479681015, -0.028361614793539047, 0.011779719032347202, -0.004994242452085018, -0.031126245856285095, 0.05733921378850937, -0.06690364331007004, -0.01449220534414053, -0.021838461980223656, -0.002177231013774872, 0.004150779452174902, -0.011747721582651138, 0.012127612717449665, 0.013026006519794464, -0.024649377912282944, -0.007247726898640394, 0.06229230388998985, -0.022333553060889244, 0.01683584786951542, -0.011222894303500652, -0.002235706429928541, 0.011676901951432228, 0.0003536298463586718, -0.04929174482822418, 0.004463118501007557, -0.00545347249135375, 0.049037497490644455, 0.016994990408420563, -0.015024051070213318, -0.01711334101855755, -0.02592661790549755, -0.03419267013669014, 0.04286252707242966, 0.03216831386089325, -0.015948519110679626, 0.0040275282226502895, 0.029883742332458496, -0.03598619997501373, -0.00492846081033349, -0.010192113928496838, -0.022022053599357605, 0.001411158824339509, 0.03304007276892662, -0.0026946424040943384, 0.023553725332021713, -0.00618368573486805, -0.041152168065309525, 0.013159051537513733, 0.015910597518086433, 0.0032099129166454077, -0.020065391436219215, -0.005148706957697868, -0.0031627179123461246, 0.0026287317741662264, -0.008419627323746681, 0.003658820176497102, 0.037246983498334885, 0.019751740619540215, 0.003816164331510663, -0.02105158194899559, 0.018933922052383423, 0.0059179202653467655, 0.010726899839937687, 0.012874792329967022, -0.038966257125139236, 0.0031150237191468477, -0.01616891287267208, -0.0009734820341691375, -0.01253665890544653, 0.020090876147150993, -0.009161640889942646, -0.0033184385392814875, 0.02335243672132492, -0.007539459969848394, -0.024326276034116745, 0.03734502196311951, -0.027288636192679405, 0.001014051609672606, -0.0052028256468474865, -0.04159809276461601, 0.004179151263087988, 0.0031412376556545496, 0.0075650629587471485, -0.04108109325170517, 0.01169951818883419, -0.01720462180674076, -0.038709282875061035, -0.05741741135716438, 0.011479726992547512, 0.01494546514004469, 0.011113466694951057, -0.020591016858816147, 0.03170155733823776, -0.01550615206360817, 0.001418992644175887, 0.035646118223667145, -0.006700663827359676, -0.008127332665026188, 0.01126075815409422, -0.04412493482232094, -0.046238936483860016, 0.004673240706324577, 0.011448398232460022, 0.027312273159623146, -0.006737116724252701, 0.022394658997654915, 0.00017320826009381562, -0.029672199860215187, -0.05742914229631424, -0.053267404437065125, -0.01963234506547451, 0.02522571012377739, 0.03346961736679077, 0.014108911156654358, -0.0017370169516652822, 0.005583512596786022, 0.01171093713492155, 0.010971389710903168, -0.015144032426178455, 0.014816565439105034, 0.010842833667993546, -0.016060758382081985, -0.006425104103982449, -0.01007948536425829, -0.012035112828016281, -0.013321432285010815, 0.010644374415278435, 0.008276454173028469, -0.020423464477062225, -0.01783127151429653, 0.009605851024389267, 0.036991506814956665, 0.0009926187340170145, -0.05440428480505943, 0.007977539673447609, 0.018839411437511444, 0.014911354519426823, 0.02121690660715103, 0.019528454169631004, 0.03321927785873413, -0.04357155039906502, 0.017274146899580956, -0.04036582261323929, -0.015130233019590378, 0.002851218916475773, 0.0354406014084816, 0.011975194327533245, -0.06397759169340134, 0.02942928671836853, 0.0014831105945631862, 0.036879077553749084, -0.00044624507427215576, -0.015840085223317146, -0.026183072477579117, -0.024958990514278412, 0.006051696836948395, 0.06614407896995544, -0.004754426423460245, 0.024061961099505424, -0.023504042997956276, 0.03294656053185463, -0.003748592920601368, 0.0016755196265876293, 0.012567821890115738, -0.02574000135064125, 0.016208969056606293, -0.012199919670820236, 0.016285410150885582, 0.003607110120356083, 0.01781158708035946, -0.0026043588295578957, -0.024121122434735298, 0.019340457394719124, 0.08597279340028763, 0.002208904828876257, 0.021441016346216202, 0.004277623258531094, 0.026220742613077164, 0.022257765755057335, -0.012665938585996628, -0.003982894122600555, 0.024201782420277596, 0.044899359345436096, 0.014726176857948303, -0.04512668028473854, -0.022273875772953033, 0.06357122957706451, 0.026028601452708244, 0.01573924534022808, 0.0019096018513664603, 0.004601762164384127, -0.0062743681482970715, -0.0003861560544464737, 0.0019030107650905848, 0.022584566846489906, 0.052837781608104706, -0.05198885127902031, 0.0191293153911829, -0.009636654518544674, 0.052381351590156555, 0.00425631832331419, 0.00893368385732174, -0.05483214184641838, -0.029197396710515022, -0.028678713366389275, -0.024182207882404327, -0.009657839313149452, 0.06116338074207306, 0.009442850016057491, -0.05185196176171303, -0.029587607830762863, -0.014568041078746319, -0.00267402408644557, 0.0018864193698391318, -0.011382418684661388, -0.046251628547906876, -0.04699905216693878, -0.01639449968934059, 0.019981449469923973, -0.023511555045843124, 0.005589310545474291, 0.024544456973671913, 0.005429025739431381, -0.0037986645475029945, 0.02328088879585266, -0.02805466763675213, -0.009009097702801228, -0.043104950338602066, -0.008562205359339714, -0.03339327871799469, -0.0018698724452406168, 0.008572563529014587, 0.005402342416346073, 0.0015060151927173138, -0.022293182089924812, 0.02012220025062561, 0.030598903074860573, 0.011293685995042324, 0.008260808885097504, -0.07622276246547699, 0.05272941291332245, 0.022526884451508522, -0.018526915460824966, -0.0016052358550950885, 0.010340511798858643, -0.01718391664326191, -0.033066511154174805, -0.01835961453616619, -0.06919721513986588, -0.02615448273718357, -0.03271479159593582, -0.00762286689132452, -0.026438763365149498, -0.0055865454487502575, 0.013267449103295803, 0.0007127958233468235, 0.029828520491719246, 0.010881170630455017, -0.01831837370991707, 0.0040083895437419415, 0.03262763470411301, -0.023696156218647957, -0.02651921845972538, -0.010897308588027954, -0.015462905168533325, 0.009061074815690517, 0.005221456289291382, 0.038963381201028824, 0.034887365996837616, 0.042735930532217026, 0.03615311160683632, -0.003369431709870696, -0.015469835139811039, -0.014939745888113976, -0.023092124611139297, -0.026230592280626297, -0.01663745380938053, 0.02176523394882679, -0.018830012530088425, -0.0003952958795707673, -0.008136645890772343, 0.0019171210005879402, -0.0033519251737743616, -0.042567960917949677, -0.02190178632736206, -0.0106584457680583, 0.0019782520830631256, 0.016734063625335693, 0.005262005142867565, 0.0002034037752309814, -0.0009940887102857232, -0.008212915621697903, 0.01530522108078003, 0.0007832530536688864, -0.008317980915307999, -0.00024841955746524036, -7.535325130447745e-05, 0.013552476651966572, 0.02383657917380333, 0.012591022066771984, 0.04426303878426552, -0.0461556613445282, 0.06675086170434952, 0.032082848250865936, 0.00648985942825675, -0.025466319173574448, 0.0076857665553689, -0.02448378875851631, -0.021245170384645462, 0.03328603133559227, 0.005281397141516209, 0.028355786576867104, -0.0003457563288975507, -0.035848185420036316, 0.03970808908343315, -0.01560165360569954, 0.03247058019042015, -0.01367348711937666, 0.010164056904613972, 0.028016092255711555, 0.03555922955274582, 0.015597549267113209, 0.03171071410179138, -0.051688823848962784, -0.012701129540801048, -0.03086954355239868, -0.004632989410310984, -0.0034950252156704664, -0.005207206588238478, -0.021219154819846153, 0.02041519060730934, 0.03128369525074959, 0.0030269939452409744, 0.00867279153317213, -0.017291005700826645, 0.010606084018945694, -0.02991081215441227, -0.044945698231458664, 0.0038273578975349665, 0.03051895648241043, -0.009020712226629257, -0.07919638603925705, -0.026324676349759102, 0.0036171781830489635, 0.04257412999868393, 0.0157537292689085, -0.00902596302330494, 0.018363352864980698, 0.029264673590660095, -0.0010654910001903772, -0.02928384579718113, -0.007442264351993799, -0.03275100886821747, 0.026227910071611404, 0.004678407683968544, -0.039294760674238205, 0.010639957152307034, 0.041898008435964584, 0.015819009393453598, -0.029068011790513992, 0.004002207424491644, -0.011489294469356537, -0.031552188098430634, 0.006081367377191782, 0.01763794757425785, 0.004366056527942419, -0.02624519355595112, -0.004232102073729038, -0.015224738046526909, -0.04107457399368286, -0.007405281066894531, -0.006449701264500618, 0.024742942303419113, -0.019234996289014816, 0.02355264127254486, -8.343134686583653e-05, 0.001475093187764287, 0.03089805506169796, 0.002664014231413603, 0.0008504695142619312, 0.018977927044034004, -0.009296861477196217, 0.019367936998605728, 0.022087693214416504, -0.020368855446577072, 0.038930121809244156, -0.04602121189236641, 0.03346877917647362, -0.013546299189329147, -0.030422886833548546, 0.020057372748851776, -0.01497042365372181, -0.0003371451748535037, -0.020643142983317375, 0.027497898787260056, -0.027868760749697685, -0.04203358292579651, -0.011517084203660488, -0.003947350662201643, 0.04284251853823662, -0.023349296301603317, 0.02978144772350788, -0.04410410299897194, 0.02327381633222103, -0.016563523560762405, -0.05728921666741371, -0.009897569194436073, -0.022480977699160576, -0.05501750856637955, -0.007659743539988995, -0.028656134381890297, -0.052364904433488846, 0.00019998732022941113, 0.05831142142415047, -0.011432903818786144, 0.0023597057443112135, 0.023249391466379166, -0.020294297486543655, 0.027155784890055656, 0.03238262981176376, 0.027048388496041298, -0.01791999861598015, -0.0028737199027091265, -0.0015473109669983387, -0.03321313485503197, -0.031750768423080444, 0.004106572363525629, -0.001575309899635613, 0.02053125388920307, 0.01439767424017191, 0.052959900349378586, -0.014298170804977417, -0.020853202790021896, -0.035142816603183746, 0.00786732416599989, 0.009951980784535408, -0.026550861075520515, -0.0175373125821352, 0.0011967085301876068, -0.05157969519495964, -0.0012400613632053137, 0.0008923564455471933, 0.0496135950088501, 0.046559154987335205, -0.011453547514975071, 0.002559467451646924, -0.007002422586083412, 0.0035799280740320683, 0.01131544727832079, 0.024739567190408707, -0.028776800259947777, 0.016829367727041245, 0.015493164770305157, 0.038684867322444916, -0.041733551770448685, -0.013252661563456059, -0.00322142755612731, 0.01791069284081459, 0.0006691296002827585, 0.003155771642923355, -0.04107200354337692, -0.029876619577407837, 0.008570026606321335, -0.007479260675609112, -0.020340338349342346, -0.035897765308618546, -0.01382722519338131, 0.010103744454681873, -0.02619529515504837, -0.011151652783155441, 0.0050480118952691555, -0.018650181591510773, -0.006945318076759577, 0.023841701447963715, 0.009507444687187672, -0.03277431055903435, -0.008703413419425488, -0.018367579206824303, 0.008307290263473988, 0.015011942014098167, 0.034485332667827606, -0.02486906386911869, -0.003280284348875284, -0.03833423927426338, -0.02830924466252327, -0.029027951881289482, -0.02818424627184868, 0.021868007257580757, -0.021580655127763748, -0.013895020820200443, 0.0017320315819233656, -0.0011161379516124725, 0.022786784917116165, 0.011655738577246666, -0.005674448329955339, -0.03036077693104744, 0.01086543407291174, -0.02510499581694603, 0.01951681263744831, 0.008920079097151756, -0.009012598544359207, -0.020405763760209084, -0.042004842311143875, 0.016396334394812584, 0.020619207993149757, -0.0019749400671571493, -0.01322832889854908, -0.012537742964923382, -0.021986158564686775, -0.0025291386991739273, -0.01502747181802988, 0.01619986817240715, 0.007172655314207077, 0.015548010356724262, -0.010088208131492138, 0.0180360097438097, -0.005212884396314621, -0.010060940869152546, 0.020088355988264084, 0.001181482570245862, 0.0017748051322996616, 0.010778678581118584, 0.0033934260718524456, 0.04048611968755722, -0.02141088992357254, 0.029624121263623238, -0.031532011926174164, -0.02898958884179592, 0.05739036947488785, 0.010222434997558594, -0.045239631086587906, 0.001995587721467018, 0.008040405809879303, 0.046729959547519684, -0.006714667193591595, 0.006596360355615616, -0.005011015105992556, 0.0007446961244568229, -0.019272135570645332, 0.0379062220454216, 0.031150110065937042, -0.04078143462538719, 0.0250657107681036, 0.019702160730957985, -0.013932203873991966, -0.017320863902568817, -0.012382362969219685, -0.025960367172956467, 0.01386191975325346, 0.02330840192735195, -0.025490229949355125, -0.0033742552623152733, -0.06211302801966667, -0.04883194342255592, -0.024026088416576385, 0.0416676290333271, -0.04818591848015785, -0.041078269481658936, -0.0056378585286438465, -0.05623286962509155, 0.020054219290614128, 0.012895273044705391, -0.0015185135416686535, -0.025633350014686584, 0.018183482810854912, -0.013033810071647167, 0.04313382878899574, 0.011932718567550182, -0.0131653668358922, -0.004795960616320372, -0.010920829139649868, -0.01188751682639122, -0.011342650279402733, -0.030113274231553078, 0.023847991600632668, -0.009693015366792679, 0.03217869997024536, 0.022410375997424126, 0.024206796661019325, 0.01213702280074358, 0.014869938604533672, -0.009521524421870708, -0.005009609740227461, 0.010666410438716412, -0.04227038845419884, 0.012081127613782883, -0.01634574681520462, -0.011908761225640774, 0.004194624722003937, 0.009422870352864265, 0.01810871623456478, -0.0237268079072237, 0.03524871915578842, -0.021763628348708153, 0.02384003810584545, -0.020441574975848198, -0.006724957842379808, -0.019132835790514946, 0.04045703634619713, -0.006584800314158201, 0.03799502179026604, -0.05107470601797104, 0.0006973576964810491, 0.018824758008122444, -0.005972025915980339, 0.005543128587305546, -0.031201239675283432, 0.01833704113960266, -0.017553502693772316, -0.0122994938865304, -0.04079567268490791, -0.01606905646622181, -0.04629509150981903, -0.0027690832503139973, 0.01207620371133089, 0.022379383444786072, -0.01709035225212574, -0.01818470098078251, -0.044583823531866074, -0.020656736567616463, -0.022160260006785393, -0.03309956192970276, 0.0006713366019539535, 0.03256480395793915, -0.0004640074330382049, -0.011826000176370144, -0.028403516858816147, -0.01217450387775898, 0.0181566271930933, -0.04364944249391556, -0.024299943819642067, 0.0027080599684268236, -0.022365102544426918, 0.006907045841217041, -0.051622841507196426, 0.001578616094775498, 0.0362018458545208, 0.0207380261272192, 0.003695729887112975, -0.013627128675580025, -0.01702232100069523, -0.013752504251897335, -0.019180085510015488, -0.023912392556667328, -0.013180822134017944, 0.04463634267449379, -0.01841823011636734, -0.05833030119538307, -0.009458262473344803, -0.04076126217842102, 0.012051938101649284, -0.02161426842212677, 0.006020229309797287, -0.032587260007858276, 0.0343342162668705, -0.016036447137594223, 0.011144370771944523, 0.04717737063765526, 0.0012077351566404104, -0.013771986588835716, -0.004375609569251537, 0.002241011941805482, 0.0003651517617981881, -0.00737809156998992, 0.03750573843717575, 0.018216572701931, 0.03761963173747063, 0.011169340461492538, -0.0033840390387922525, -0.021067658439278603, -0.0005980199784971774, -0.04141063988208771, 0.012593674473464489, 0.005325901322066784, 0.01827964186668396, -0.0039162347093224525, 0.003993044141680002, 0.01975294016301632, -0.012865251861512661, -0.03746480122208595, 0.03072202578186989, -0.006486162543296814, -0.06002158671617508, 0.03968328982591629, 0.01606253907084465, -0.012060868553817272, 0.008098483085632324, -0.016676543280482292, -0.006791871041059494, 0.025187579914927483, 0.005932848434895277, 0.06780831515789032, 0.02512943744659424, 0.013186799362301826, -0.0071891010738909245, 0.016358546912670135, 0.00805663038045168, -0.004412852227687836, 0.006874742452055216, 0.01338461134582758, 0.010870520025491714, -0.036986447870731354, -0.010826701298356056, -0.026179596781730652, 0.0027779408264905214, -0.03326651081442833, -0.04089110717177391, -0.007735519669950008, 0.017944054678082466, 0.0008270207326859236, 0.04064418748021126, 0.029526447877287865, 0.003397280815988779, 0.013320093974471092, -0.01131423655897379, -0.025863749906420708, -0.000457091664429754, 0.03334098681807518, 0.00634501688182354, 0.025931499898433685, 0.01001043152064085, -0.04597301036119461, -0.04174704849720001, -0.010928723029792309, -0.023534690961241722, 0.006503402255475521, 0.017678720876574516, 0.0021137928124517202, 0.00859261117875576, 0.0067702392116189, 0.030171360820531845, 0.00029231185908429325, -0.012084499932825565, -0.012321053072810173, 0.010437722317874432, -0.02719072997570038, -0.0034020929597318172, -0.011261247098445892, 0.02160763181746006, -0.025033069774508476, 0.028638657182455063, 0.015185441821813583, -0.028430387377738953, 0.014031656086444855, 0.03902174532413483, -0.008502320386469364, -0.021435527130961418, -0.017561841756105423, 0.01760084554553032, 0.021127531304955482, 0.030078614130616188, 0.059031762182712555, -0.04566989466547966, -0.0033410207834094763, 0.01911276951432228, -0.05033565312623978, -0.010874971747398376, 0.07050088047981262, 0.008257245644927025, -0.016262192279100418, 0.04437314346432686, 0.02806428261101246, 0.015617690980434418, -0.061437781900167465, 0.02348782867193222, 0.024378828704357147, -0.04806479811668396, -0.0033334079198539257, -0.0018254752503708005, -0.03426646441221237, 0.04258372262120247, 0.029332272708415985, -0.023668071255087852, -0.014020550064742565, 0.005272792652249336, -0.0035028401762247086, -0.0010656078811734915, -0.01097388006746769, -0.03534890338778496, -0.03213472291827202, -0.02671121060848236, -0.03221968188881874, 0.017511896789073944, 0.022219274193048477, 0.013052426278591156, 0.013322251848876476, 0.008210408501327038, -4.866184463026002e-05, -0.007459267973899841, -0.03486419469118118, -0.007537954952567816, -0.020701680332422256, -0.011929461732506752, -0.050288598984479904, 0.024277878925204277, -0.015822965651750565, 0.004668110515922308, 0.01679813489317894, -0.009365295991301537, 0.017238754779100418, 0.005037819501012564, -0.0357181653380394, -0.054615285247564316, 0.031664177775382996, 0.06251539289951324, -0.013888430781662464, -0.002419922035187483, -0.003737426595762372, -0.010187298059463501, -0.03906850144267082, -0.029951024800539017, -0.013140355236828327, -0.020413238555192947, 0.030347490683197975, 0.0435536727309227, 0.02213655225932598, 0.009119918569922447, 0.020738570019602776, -0.01084447093307972, 0.006243112031370401, 0.00011970131163252518, -0.013529829680919647, 0.007413389626890421, 0.04761672765016556, -0.01792791485786438, 0.013273675926029682, 0.019858907908201218, -0.14050084352493286, 0.0009663726086728275, -0.019640304148197174, 0.04054851084947586, -0.03385576233267784, -0.015199627727270126, 0.026559771969914436, 0.03427044302225113, 0.0029472671449184418, 0.0011902262922376394, 0.03052917867898941, -0.00315757910721004, 0.0003995813603978604, 0.03184090554714203, -0.015513826161623001, -0.04067821055650711, -0.03240610286593437, 0.038966886699199677, -0.047985248267650604, 0.000674610841087997, 0.00013537560880649835, 0.009235748089849949, -0.05234069377183914, -0.01595897227525711, 0.0182657428085804, -0.017997227609157562, 0.027410468086600304, -0.0037915862631052732, -0.0029944833368062973, 0.014233095571398735, 0.016882143914699554, 0.004907773341983557, -0.03205011412501335]\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "embedding = openai.embeddings.create(\n", 51 | " model=\"EmbeddingsModel\",\n", 52 | " input = \"欢迎关注我的个人公众号MyIO!\", \n", 53 | " encoding_format=\"float\")\n", 54 | "print(embedding.data[0].embedding)" 55 | ] 56 | } 57 | ], 58 | "metadata": { 59 | "kernelspec": { 60 | "display_name": "Python 3", 61 | "language": "python", 62 | "name": "python3" 63 | }, 64 | "language_info": { 65 | "codemirror_mode": { 66 | "name": "ipython", 67 | "version": 3 68 | }, 69 | "file_extension": ".py", 70 | "mimetype": "text/x-python", 71 | "name": "python", 72 | "nbconvert_exporter": "python", 73 | "pygments_lexer": "ipython3", 74 | "version": "3.10.11" 75 | } 76 | }, 77 | "nbformat": 4, 78 | "nbformat_minor": 2 79 | } 80 | -------------------------------------------------------------------------------- /img/python-demo-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feiyun0112/Local-LLM-Server/8f3d9dd977ca01b32b7adfb24b370cd4cd537bdf/img/python-demo-01.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | uvicorn 2 | fschat 3 | transformers 4 | torch 5 | accelerate 6 | sentencepiece 7 | openai 8 | langchain -------------------------------------------------------------------------------- /startup.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from multiprocessing import Process, Manager 3 | import uvicorn 4 | import sys 5 | import os 6 | from config import API_SERVER_CONFIG,WORK_CONFIG,CONTROLLER_CONFIG,LOG_PATH 7 | 8 | def run_controller(started_event, stoped_event): 9 | import fastchat.constants 10 | fastchat.constants.LOGDIR = LOG_PATH 11 | from fastchat.serve.controller import app, Controller, logger 12 | 13 | controller = Controller("shortest_queue") 14 | sys.modules["fastchat.serve.controller"].controller = controller 15 | 16 | @app.on_event("startup") 17 | async def app_startup(): 18 | if started_event: 19 | started_event.set() 20 | 21 | @app.on_event("shutdown") 22 | async def app_shutdown(): 23 | if stoped_event: 24 | stoped_event.set() 25 | 26 | uvicorn.run(app, host=CONTROLLER_CONFIG['host'], port=CONTROLLER_CONFIG['port'], log_level="debug") 27 | 28 | 29 | def run_openai_api_server(): 30 | import fastchat.constants 31 | fastchat.constants.LOGDIR = LOG_PATH 32 | from fastchat.serve.openai_api_server import app, CORSMiddleware, app_settings,logger 33 | 34 | app.add_middleware( 35 | CORSMiddleware, 36 | allow_credentials=True, 37 | allow_origins=["*"], 38 | allow_methods=["*"], 39 | allow_headers=["*"], 40 | ) 41 | 42 | app_settings.controller_address = f"http://{CONTROLLER_CONFIG['host']}:{CONTROLLER_CONFIG['port']}" 43 | app_settings.api_keys = API_SERVER_CONFIG['api_keys'] 44 | 45 | uvicorn.run(app, host=API_SERVER_CONFIG['host'], port=API_SERVER_CONFIG['port'], log_level="debug") 46 | 47 | def detect_device(): 48 | try: 49 | import torch 50 | if torch.cuda.is_available(): 51 | return "cuda" 52 | if torch.backends.mps.is_available(): 53 | return "mps" 54 | except: 55 | pass 56 | return "cpu" 57 | 58 | def load_embeddings_model(self): 59 | model_path=WORK_CONFIG['models'][self.model_names[0]] 60 | if 'bge-' in model_path: 61 | from langchain.embeddings import HuggingFaceBgeEmbeddings 62 | if 'zh' in model_path: 63 | # for chinese model 64 | query_instruction = "为这个句子生成表示以用于检索相关文章:" 65 | elif 'en' in model_path: 66 | # for english model 67 | query_instruction = "Represent this sentence for searching relevant passages:" 68 | else: 69 | # maybe ReRanker or else, just use empty string instead 70 | query_instruction = "" 71 | embeddings = HuggingFaceBgeEmbeddings(model_name=model_path, 72 | model_kwargs={'device': detect_device()}, 73 | query_instruction=query_instruction) 74 | if "bge-large-zh-noinstruct" in model_path: # bge large -noinstruct embedding 75 | embeddings.query_instruction = "" 76 | else: 77 | from langchain.embeddings.huggingface import HuggingFaceEmbeddings 78 | embeddings = HuggingFaceEmbeddings(model_name=model_path, 79 | model_kwargs={'device': detect_device()}) 80 | 81 | return embeddings 82 | 83 | def get_embeddings(self, params): 84 | if not hasattr(self, "_embeddings_model"): 85 | self._embeddings_model = load_embeddings_model(self) 86 | ret = {"embedding": [], "token_num": 0} 87 | 88 | normalized_embeddings = self._embeddings_model.embed_query(params["input"][0]) 89 | ret["token_num"] = len(normalized_embeddings) 90 | ret["embedding"] = [normalized_embeddings] 91 | 92 | return ret 93 | 94 | def run_model_worker(started_event): 95 | import fastchat.constants 96 | fastchat.constants.LOGDIR = LOG_PATH 97 | from fastchat.serve.multi_model_worker import app, worker_id,workers ,worker_map,ModelWorker,GptqConfig,ExllamaConfig,XftConfig,logger 98 | from fastchat.model.model_adapter import add_model_args 99 | import argparse 100 | 101 | ModelWorker.get_embeddings=get_embeddings 102 | 103 | 104 | 105 | parser = argparse.ArgumentParser(conflict_handler="resolve") 106 | parser.add_argument("--host", type=str, default=WORK_CONFIG['host']) 107 | parser.add_argument("--port", type=int, default=WORK_CONFIG['port']) 108 | parser.add_argument("--worker-address", type=str, default=f"http://{WORK_CONFIG['host']}:{WORK_CONFIG['port']}") 109 | parser.add_argument( 110 | "--controller-address", type=str, default= f"http://{CONTROLLER_CONFIG['host']}:{CONTROLLER_CONFIG['port']}" 111 | ) 112 | add_model_args(parser) 113 | # Override the model path to be repeated and align it with model names. 114 | parser.add_argument( 115 | "--model-path", 116 | type=str, 117 | action="append", 118 | help="One or more paths to model weights to load. This can be a local folder or a Hugging Face repo ID.", 119 | ) 120 | parser.add_argument( 121 | "--model-names", 122 | type=str, 123 | action="append", 124 | help="One or more model names. Values must be aligned with `--model-path` values.", 125 | ) 126 | parser.add_argument( 127 | "--conv-template", 128 | type=str, 129 | default=None, 130 | action="append", 131 | help="Conversation prompt template. Values must be aligned with `--model-path` values. If only one value is provided, it will be repeated for all models.", 132 | ) 133 | parser.add_argument("--limit-worker-concurrency", type=int, default=5) 134 | parser.add_argument("--stream-interval", type=int, default=2) 135 | parser.add_argument("--no-register", action="store_true") 136 | parser.add_argument( 137 | "--ssl", 138 | action="store_true", 139 | required=False, 140 | default=False, 141 | help="Enable SSL. Requires OS Environment variables 'SSL_KEYFILE' and 'SSL_CERTFILE'.", 142 | ) 143 | args = parser.parse_args() 144 | 145 | 146 | if args.gpus: 147 | if len(args.gpus.split(",")) < args.num_gpus: 148 | raise ValueError( 149 | f"Larger --num-gpus ({args.num_gpus}) than --gpus {args.gpus}!" 150 | ) 151 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus 152 | 153 | gptq_config = GptqConfig( 154 | ckpt=args.gptq_ckpt or args.model_path, 155 | wbits=args.gptq_wbits, 156 | groupsize=args.gptq_groupsize, 157 | act_order=args.gptq_act_order, 158 | ) 159 | if args.enable_exllama: 160 | exllama_config = ExllamaConfig( 161 | max_seq_len=args.exllama_max_seq_len, 162 | gpu_split=args.exllama_gpu_split, 163 | cache_8bit=args.exllama_cache_8bit, 164 | ) 165 | else: 166 | exllama_config = None 167 | if args.enable_xft: 168 | xft_config = XftConfig( 169 | max_seq_len=args.xft_max_seq_len, 170 | data_type=args.xft_dtype, 171 | ) 172 | if args.device != "cpu": 173 | print("xFasterTransformer now is only support CPUs. Reset device to CPU") 174 | args.device = "cpu" 175 | else: 176 | xft_config = None 177 | 178 | # Override 179 | args.device = detect_device() 180 | 181 | # Launch all workers 182 | workers = [] 183 | for key in WORK_CONFIG['models'].keys(): 184 | model_names=[key] 185 | model_path=WORK_CONFIG['models'][key] 186 | w = ModelWorker( 187 | args.controller_address, 188 | args.worker_address, 189 | worker_id, 190 | model_path, 191 | model_names, 192 | args.limit_worker_concurrency, 193 | args.no_register, 194 | device=args.device, 195 | num_gpus=args.num_gpus, 196 | max_gpu_memory=args.max_gpu_memory, 197 | load_8bit=args.load_8bit, 198 | cpu_offloading=args.cpu_offloading, 199 | gptq_config=gptq_config, 200 | exllama_config=exllama_config, 201 | xft_config=xft_config, 202 | stream_interval=args.stream_interval, 203 | conv_template=args.conv_template, 204 | ) 205 | workers.append(w) 206 | for model_name in model_names: 207 | worker_map[model_name] = w 208 | 209 | # Register all models 210 | model_names=[] 211 | for key in WORK_CONFIG['models'].keys(): 212 | model_names.append(key) 213 | url = args.controller_address + "/register_worker" 214 | data = { 215 | "worker_name": workers[0].worker_addr, 216 | "check_heart_beat": not args.no_register, 217 | "worker_status": { 218 | "model_names": model_names, 219 | "speed": 1, 220 | "queue_length": sum([w.get_queue_length() for w in workers]), 221 | }, 222 | } 223 | import requests 224 | r = requests.post(url, json=data) 225 | assert r.status_code == 200 226 | 227 | sys.modules["fastchat.serve.multi_model_worker"].workers = workers 228 | sys.modules["fastchat.serve.multi_model_worker"].worker_map = worker_map 229 | 230 | @app.on_event("startup") 231 | async def app_startup(): 232 | if started_event: 233 | started_event.set() 234 | uvicorn.run(app, host=args.host, port=args.port, log_level="debug") 235 | 236 | def start_main_server(): 237 | import sys 238 | import signal 239 | 240 | def handler(signalname): 241 | """ 242 | Python 3.9 has `signal.strsignal(signalnum)` so this closure would not be needed. 243 | Also, 3.8 includes `signal.valid_signals()` that can be used to create a mapping for the same purpose. 244 | """ 245 | def f(signal_received, frame): 246 | raise KeyboardInterrupt(f"{signalname} received") 247 | return f 248 | 249 | # This will be inherited by the child process if it is forked (not spawned) 250 | signal.signal(signal.SIGINT, handler("SIGINT")) 251 | signal.signal(signal.SIGTERM, handler("SIGTERM")) 252 | manager = Manager() 253 | controller_started = manager.Event() 254 | controller_stoped = manager.Event() 255 | worker_started = manager.Event() 256 | process_list=[] 257 | process_list.append(Process( 258 | target=run_controller, 259 | kwargs=dict(started_event=controller_started,stoped_event = controller_stoped), 260 | name=f"controller", 261 | daemon=True, 262 | )) 263 | process_list.append(Process( 264 | target=run_openai_api_server, 265 | name=f"openai_api_server", 266 | daemon=True, 267 | )) 268 | process_list.append(Process( 269 | target=run_model_worker, 270 | kwargs=dict(started_event=worker_started), 271 | name=f"model_worker", 272 | daemon=True, 273 | )) 274 | for p in process_list: 275 | p.start() 276 | if(p.name=="controller"): 277 | controller_started.wait() # 等待controller启动完成 278 | 279 | worker_started.wait() 280 | 281 | print(f"Local-LLM-Server is successfully started, please use http://{API_SERVER_CONFIG['host']}:{API_SERVER_CONFIG['port']} to access the OpenAI-compatible interfaces") 282 | print(f"Local-LLM-Server 启动成功,请使用 http://{API_SERVER_CONFIG['host']}:{API_SERVER_CONFIG['port']} 访问 OpenAI 接口") 283 | 284 | controller_stoped.wait() 285 | for p in process_list: 286 | p.kill() 287 | 288 | 289 | if __name__ == '__main__': 290 | start_main_server() 291 | 292 | 293 | --------------------------------------------------------------------------------