├── .gitignore
├── LICENSE
├── ModelsCommunityLicenseAgreement
├── README.md
├── README_ja.md
├── README_ko.md
├── README_zh.md
├── assets
    └── imgs
    │   ├── llama_cpp.png
    │   ├── model_cap_en.png
    │   ├── model_cap_zh.png
    │   ├── opencompass_en.png
    │   ├── opencompass_zh.png
    │   ├── orion_start.PNG
    │   ├── vllm.png
    │   └── wechat_group.jpg
├── demo
    ├── cli_demo.py
    ├── text_generation.py
    └── text_generation_base.py
├── doc
    └── Orion14B_v3.pdf
├── gradio_demo
    ├── README.md
    ├── README_en.md
    ├── app.py
    ├── chat_task
    │   ├── __init__.py
    │   └── chat.py
    ├── common
    │   ├── __init__.py
    │   └── call_llm.py
    ├── doc_qa_task
    │   ├── __init__.py
    │   └── doc_qa.py
    ├── examples.py
    ├── extract_data_task
    │   ├── __init__.py
    │   └── extract.py
    ├── images
    │   ├── clear.png
    │   └── send.svg
    ├── plugin_task
    │   ├── __init__.py
    │   ├── api.py
    │   ├── model.py
    │   ├── plugins.py
    │   ├── prompt.py
    │   └── util.py
    ├── qa_generator_task
    │   ├── __init__.py
    │   └── generate_qa.py
    ├── requirements.txt
    └── 模型演示.xlsx
└── quantization
    ├── README.MD
    ├── data
        └── val.jsonl
    ├── eval_quant.py
    ├── orion.py
    └── quant.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # venv
  2 | .orion-llm-app
  3 | ##---------------------------------------------------
  4 | # Automated documentation .gitignore files
  5 | ##---------------------------------------------------
  6 | 
  7 | # Automatically generated API documentation stubs from sphinx-apidoc
  8 | docs/source/packages
  9 | 
 10 | # Automatically converting README from markdown to rST
 11 | docs/bin
 12 | docs/source/readme.rst
 13 | 
 14 | 
 15 | ##---------------------------------------------------
 16 | # Continuous Integration .gitignore files
 17 | ##---------------------------------------------------
 18 | 
 19 | # Ignore test result XML files
 20 | testresults.xml
 21 | coverage.xml
 22 | 
 23 | 
 24 | ##---------------------------------------------------
 25 | # Python default .gitignore
 26 | ##---------------------------------------------------
 27 | 
 28 | # Byte-compiled / optimized / DLL files
 29 | __pycache__/
 30 | *.py[cod]
 31 | *$py.class
 32 | 
 33 | # C extensions
 34 | *.so
 35 | 
 36 | # Distribution / packaging
 37 | .Python
 38 | build/
 39 | develop-eggs/
 40 | dist/
 41 | downloads/
 42 | eggs/
 43 | .eggs/
 44 | lib/
 45 | lib64/
 46 | parts/
 47 | sdist/
 48 | var/
 49 | wheels/
 50 | pip-wheel-metadata/
 51 | share/python-wheels/
 52 | *.egg-info/
 53 | .installed.cfg
 54 | *.egg
 55 | MANIFEST
 56 | 
 57 | # PyInstaller
 58 | #  Usually these files are written by a python script from a template
 59 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 60 | *.manifest
 61 | *.spec
 62 | 
 63 | # Installer logs
 64 | pip-log.txt
 65 | pip-delete-this-directory.txt
 66 | 
 67 | # Unit test / coverage reports
 68 | htmlcov/
 69 | .tox/
 70 | .nox/
 71 | .coverage
 72 | .coverage.*
 73 | .cache
 74 | nosetests.xml
 75 | coverage.xml
 76 | *.cover
 77 | .hypothesis/
 78 | .pytest_cache/
 79 | 
 80 | # Translations
 81 | *.mo
 82 | *.pot
 83 | 
 84 | # Django stuff:
 85 | *.log
 86 | local_settings.py
 87 | db.sqlite3
 88 | 
 89 | # Flask stuff:
 90 | instance/
 91 | .webassets-cache
 92 | 
 93 | # Scrapy stuff:
 94 | .scrapy
 95 | 
 96 | # Sphinx documentation
 97 | docs/_build/
 98 | 
 99 | # PyBuilder
100 | target/
101 | 
102 | # Jupyter Notebook
103 | .ipynb_checkpoints
104 | 
105 | # IPython
106 | profile_default/
107 | ipython_config.py
108 | 
109 | # pyenv
110 | .python-version
111 | 
112 | # pipenv
113 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
114 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
115 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
116 | #   install all needed dependencies.
117 | #Pipfile.lock
118 | 
119 | # celery beat schedule file
120 | celerybeat-schedule
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Weights & Biases
156 | wandb/
157 | 
158 | 
159 | ##---------------------------------------------------
160 | # Windows default .gitignore
161 | ##---------------------------------------------------
162 | 
163 | # Windows thumbnail cache files
164 | Thumbs.db
165 | ehthumbs.db
166 | ehthumbs_vista.db
167 | 
168 | # Dump file
169 | *.stackdump
170 | 
171 | # Folder config file
172 | [Dd]esktop.ini
173 | 
174 | # Recycle Bin used on file shares
175 | $RECYCLE.BIN/
176 | 
177 | # Windows Installer files
178 | *.cab
179 | *.msi
180 | *.msix
181 | *.msm
182 | *.msp
183 | 
184 | # Windows shortcuts
185 | *.lnk
186 | 
187 | 
188 | ##---------------------------------------------------
189 | # Linux default .gitignore
190 | ##---------------------------------------------------
191 | 
192 | # Editor backup files
193 | *~
194 | 
195 | # temporary files which can be created if a process still has a handle open of a deleted file
196 | .fuse_hidden*
197 | 
198 | # KDE directory preferences
199 | .directory
200 | 
201 | # Linux trash folder which might appear on any partition or disk
202 | .Trash-*
203 | 
204 | # .nfs files are created when an open file is removed but is still being accessed
205 | .nfs*
206 | 
207 | 
208 | ##---------------------------------------------------
209 | # Mac OSX default .gitignore
210 | ##---------------------------------------------------
211 | 
212 | # General
213 | .DS_Store
214 | .AppleDouble
215 | .LSOverride
216 | 
217 | # Icon must end with two \r
218 | Icon
219 | 
220 | # Thumbnails
221 | ._*
222 | 
223 | # Files that might appear in the root of a volume
224 | .DocumentRevisions-V100
225 | .fseventsd
226 | .Spotlight-V100
227 | .TemporaryItems
228 | .Trashes
229 | .VolumeIcon.icns
230 | .com.apple.timemachine.donotpresent
231 | 
232 | # Directories potentially created on remote AFP share
233 | .AppleDB
234 | .AppleDesktop
235 | Network Trash Folder
236 | Temporary Items
237 | .apdisk
238 | 
239 | .idea
240 | 
241 | gradio_cached_examples


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright (C) 2023 ORION STAR Robotics
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 
204 | 
205 | 
206 | 
207 | Apache License Version 2.0(简体中文)
208 | 1.　定义
209 | “许可证”是指根据本文档第1到第9部分关于使用、重生成和分发的术语和条件。
210 | “许可证颁发者”是指版权所有者或者由版权所有者批准的授权许可证的实体。
211 | “法律实体”是指实施实体和进行控制的所有其它实体受该实体控制，或者受该实体集中控制。根据此定义，”控制”是指(i)让无论是否签订协议的上述实体，进行指导或管理的直接权利或间接权利，或者(ii)拥有百分之五十(50%)或以上已发行股票的所有者，或者(iii)上述实体的实权所有者。
212 | “用户”(或“用户的”)是指行使本许可证所授予权限的个人或法律实体。
213 | “源程序”形式是指对包含但不限制软件源代码、文档源程序和配置文件进行修改的首选形式。
214 | “目标”形式是指对源程序形式进行机械转换或翻译的任何形式，包括但不限于对编译的目标代码，生成的文件以及转换为其它媒体类型。
215 | “作品”是指根据本许可证所制作的源程序形式或目标形式的著作，在著作中包含的或附加的版权通知(在下面附录中提供了一个示例)。
216 | “衍生作品”是指基于作品(或从作品衍生而来)的源程序形式或目标形式的任何作品，以及编辑修订、注释、详细描述或其它修订等构成原创著作作品的整体。根据本许可证，衍生作品不得包括与作品及其衍生作品分离之作品，或仅与作品及其衍生作品的接口相链接(或按名称结合)之作品。
217 | “贡献”是指任何著作作品，包括作品的原始版本和对该作品或衍生作品所做的任何修订或补充，意在提交给许可证颁发者以让版权所有者或代表版权所有者的授权个人或法律实体包含在其作品中。根据此定义，“提交”一词表示发送给许可证颁发者或其代表人，任何电子的、口头的或书面的交流信息形式，包括但不限于在由许可证颁发者或者代表其管理的电子邮件清单、源代码控制系统、以及发布跟踪系统上为讨论和提高作品的交流，但不包括由版权所有者以书面形式明显标注或指定为”非贡献”的交流活动。
218 | “贡献者”是指许可证颁发者和代表从许可证颁发者接受之贡献的并随后包含在作品之贡献中的任何个人或法律实体。
219 | 2.　版权许可证的授予
220 | 根据本许可证的条款，每个贡献者授予用户永久性的、全球性的、非专有性的、免费的、无版权费的、不可撤销的版权许可证以源程序形式或目标形式复制、准备衍生作品、公开显示、公开执行、授予分许可证、以及分发作品和这样的衍生作品。
221 | 3.　专利许可证的授予
222 | 根据本许可证的条款，每个贡献者授予用户永久性的、全球性的、非专有性的、免费的、无版权费的、不可撤销的(除在本部分进行说明)专利许可证对作品进行制作、让人制作、使用、提供销售、销售、进口和其它转让，且这样的许可证仅适用于在所递交作品的贡献中因可由单一的或多个这样的贡献者授予而必须侵犯的申请专利。如果用户对任何实体针对作品或作品中所涉及贡献提出因直接性或贡献性专利侵权而提起专利法律诉讼(包括交互诉讼请求或反索赔)，那么根据本许可证，授予用户针对作品的任何专利许可证将在提起上述诉讼之日起终止。
223 | 4.　重新分发
224 | 用户可在任何媒介中复制和分发作品或衍生作品之副本，无论是否修订，还是以源程序形式或目标形式，条件是用户需满足下列条款：
225 | 用户必须为作品或衍生作品的任何其他接收者提供本许可证的副本；
226 | 并且用户必须让任何修改过的文件附带明显的通知，声明用户已更改文件；
227 | 并且用户必须从作品的源程序形式中保留衍生作品源程序形式的用户所分发的所有版权、专利、商标和属性通知，但不包括不属于衍生作品任何部分的类似通知；
228 | 并且如果作品将”通知”文本文件包括为其分发作品的一部分，那么用户分发的任何衍生作品中须至少在下列地方之一包括，在这样的通知文件中所包含的属性通知的可读副本，但不包括那些不属于衍生作品任何部分的通知：在作为衍生作品一部分而分发的通知文本文件中；如果与衍生作品一起提供则在源程序形式或文件中；或者通常作为第三方通知出现的时候和地方，在衍生作品中产生的画面中。通知文件的内容仅供信息提供，并未对许可证进行修改。用户可在其分发的衍生作品中在作品的通知文本后或作为附录添加自己的属性通知，条件是附加的属性通知不得构成修改本许可证。
229 | 用户可以为自身所做出的修订添加自己的版权声明并可对自身所做出修订内容或为这样的衍生作品作为整体的使用、复制或分发提供附加或不同的条款，条件是用户对作品的使用、复制和分发必须符合本许可证中声明的条款。
230 | 5.　贡献的提交
231 | 除非用户明确声明，在作品中由用户向许可证颁发者的提交若要包含在贡献中，必须在无任何附加条款下符合本许可证的条款。尽管上面如此规定，执行许可证颁发者有关贡献的条款时，任何情况下均不得替代或修改任何单独许可证协议的条款。
232 | 6.　商标
233 | 本许可证并未授予用户使用许可证颁发者的商号、商标、服务标记或产品名称，除非将这些名称用于合理性和惯例性描述作品起源和复制通知文件的内容时。
234 | 7.　保证否认条款
235 | 除非因适用法律需要或书面同意，许可证颁发者以”按原样”基础提供作品(并且每个贡献者提供其贡献)，无任何明示的或暗示的保证或条件，包括但不限于关于所有权、不侵权、商品适销性、或适用性的保证或条件。用户仅对使用或重新分发作品的正确性负责，并需承担根据本许可证行使权限时的任何风险。
236 | 8.　责任限制条款
237 | 在任何情况下并根据任何法律，无论是因侵权(包括过失)或根据合同，还是其它原因，除非根据适用法律需要(例如故意行为和重大过失行为)或经书面同意，即使贡献者事先已被告知发生损害的可能性，任何贡献者不就用户因使用本许可证或不能使用或无法使用作品(包括但不限于商誉损失、停工、计算机失效或故障，或任何商业损坏或损失)而造成的损失，包括直接的、非直接的、特殊的、意外的或间接的字符损坏而负责。
238 | 9.　接受保证或附加责任
239 | 重新分发作品或及其衍生作品时，用户可选择提供或为符合本许可证承担之支持、担保、赔偿或其它职责义务和/或权利而收取费用。但是，在承担上述义务时，用户只可代表用户本身和用户本身责任来执行，无需代表任何其它贡献者，并且用户仅可保证、防护并保持每个贡献者不受任何因此而产生的责任或对因用户自身承担这样的保证或附加责任而对这样的贡献者所提出的索赔。
240 | 条款结束


--------------------------------------------------------------------------------
/ModelsCommunityLicenseAgreement:
--------------------------------------------------------------------------------
  1 | 【Orion-14B Series】 Models Community License Agreement
  2 |                                  Version: 1.0
  3 |                        Date of Release: 
  4 |  
  5 | 1. Definition
  6 | 
  7 | “Agreement” refers to the terms and conditions defined in this 【Orion-14B Series】 Models Community License Agreement for the use, reproduction, and distribution of Yi 
  8 | Series Models.
  9 |  
 10 | “Model” refers to associated components (including checkpoints) developed based on machine learning, including learned weights and parameters (including the
 11 | status of optimizer).
 12 |  
 13 | “【Orion-14B Series】 Models” refers to open-source models with different specifications and capabilities provided by the Licensor, including:
 14 | 【Orion-14B-Base】Base model
 15 | 【Orion-14B-Chat】Chat model
 16 | 【Orion-14B-LongChat】Long context chat model
 17 | 【Orion-14B-Chat-RAG】Retrieval augmented generation chat model
 18 | 【Orion-14B-Chat-Plugin】Chat model with plugin capability
 19 | 【Orion-14B-Base-Int4】4-bit integer quantized base model
 20 | 【Orion-14B-Chat-Int4】4-bit integer quantized chat model
 21 |  
 22 | “Derivatives” refers to all modifications to 【Orion-14B Series】 Models, work based on 【Orion-14B Series】 Models, or any other models created or initialized by transferring the weights, parameters, activations, or output patterns of 【Orion-14B Series】 Models to other models to achieve similar performance, including but not limited to methods that require using intermediate data representations or generating synthetic data based on 【Orion-14B Series】 Models to train other models.
 23 |  
 24 | “Licensor” refers to Beijing Orionstar Technology Co., Ltd.
 25 |  
 26 | “you” refers to an individual or legal entity that exercises the license granted by this Agreement and/or uses the 【Orion-14B Series】 Models for any purpose and in any field of use.
 27 |  
 28 | “Third Party” refers to any individuals, legal entities, or non-legal organizations other than you.
 29 |  
 30 | “Distribute” refers to transmitting, copying, publishing, or otherwise sharing the 【Orion-14B Series】 Models with third parties, including providing the 【Orion-14B Series】Models through electronic or other remote means (such as any SaaS software or PaaS software accessed via API or web access).
 31 |  
 32 | “Commercial Purposes” refers to the use of the 【Orion-14B Series】 Models, directly or indirectly, for the operation, promotion, revenue generation, or any other profit-making purposes for entities or individuals.
 33 |  
 34 | “Laws and Regulations” refers to the laws and administrative regulations of the mainland of the People's Republic of China (for the purposes of this Agreement only, excluding Hong Kong, Macau, and Taiwan).
 35 |  
 36 | “Personal Information” refers to various information related to identified or identifiable natural persons recorded electronically or by other means, excluding information that has been anonymized.
 37 |  
 38 | “Logo” refers to any trademark, service mark, trade name, domain name, website name, or other distinctive branding marks.
 39 |  
 40 | 2. License and License Restrictions
 41 | The Licensor hereby grants you a non-exclusive, global, non-transferable, on-sub-licensable, revocable, and royalty-free copyright license. You must adhere to the following license restrictions:
 42 |  
 43 | 1) Your use of the 【Orion-14B Series】 Models must comply with the Laws and Regulations as well as applicable legal requirements of other countries/regions, and respect social ethics and moral standards, including but not limited to, not using the【Orion-14B Series】 Models for purposes prohibited by Laws and Regulations as well as applicable legal requirements of other countries/regions, such as harming national security, promoting terrorism, extremism, inciting ethnic or racial hatred, discrimination, violence, or pornography, and spreading false harmful information.
 44 |  
 45 | 2) You shall not, for military or unlawful purposes or in ways not allowed by Laws and Regulations as well as applicable legal requirements of other countries/regions, a) use, copy, or Distribute the【Orion-14B Series】 Models, or b) create complete or partial Derivatives of the 【Orion-14B Series】 Models.
 46 |  
 47 | 3) Your use of the 【Orion-14B Series】 Models (including using the output of the 【Orion-14B Series】 Models) and the creation of Derivatives must not infringe upon the legitimate rights of any Third Party, including but not limited to the rights of personal rights such as the right to likeness, reputation, and privacy, as well as intellectual property rights such as copyrights, patents, trade secrets, and other property rights.
 48 |  
 49 | 4) You must clearly attribute the source of the 【Orion-14B Series】 Models to the Licensor and provide a copy of this Agreement to any Third-Party users of the 【Orion-14B Series】 Models and Derivatives.
 50 |  
 51 | 5) If you modify the 【Orion-14B Series】 Models to create Derivatives, you must clearly indicate the substantial modifications made, and these modifications shall not violate the license restrictions of this Agreement. You shall not enable, assist, or in any way facilitate Third Parties to violate the license restrictions of this Agreement.
 52 |  
 53 | If you plan to use the 【Orion-14B Series】 Models and Derivatives for Commercial Purposes, please refer to the Registration Form of 【Orion-14B Series】 Models for Commercial Purposes (“Registration Form”), available at 【https://test.orionstar.com/llm-license.html】) and to complete the registration and obtain the license for Commercial Purposes. If you obtained the license for Commercial Purposes and use the 【Orion-14B Series】 Models and Derivatives for Commercial Purposes, you must comply with the afore-mentioned license restrictions.
 54 |  
 55 |  
 56 | 3. Intellectual Property
 57 | The ownership of the 【Orion-14B Series】 Models and their related intellectual property rights is solely held by the Licensor.
 58 |  
 59 | In any circumstance, without the prior written consent of the Licensor, you are not allowed to use any Logo associated with the Licensor. If your use of the Licensor's Logo in violation of this Agreement causes any losses to the Licensor or others, you will bear full legal responsibility.
 60 | 
 61 | Within the scope of the granted license, you are authorized to modify the Orion-14B series models to create derivative works. You may assert intellectual property rights over the portions of the derivative works that are the product of your creative labor.
 62 |  
 63 |  
 64 | 4. Disclaimer and Limitation of Liability
 65 | 
 66 | The 【Orion-14B Series】 Models are provided "AS IS." The Licensor does not provide any express or implied warranties for the 【Orion-14B Series】 Models, including but not limited to stability, ownership, merchantability, non-infringement, or fitness for a specific purpose of the 【Orion-14B Series】 Models and their output results. You assume all responsibilities for the risks and consequences arising from the use, reproduction, and distribution of the 【Orion-14B Series】 Models, and the creation of Derivatives.
 67 |  
 68 | The Licensor complies with Laws and Regulations at all stages of model training, maintaining the legality, authenticity, accuracy, objectivity, and diversity of data and algorithms. The Licensor is not liable for any direct, indirect, incidental consequences, and other losses or damages related to your use, reproduction, and distribution of the 【Orion-14B Series】 Models, and the creation of Derivatives under this Agreement. This includes but is not limited to:
 69 |  
 70 | 1) The Licensor is not responsible for data security risks resulting from your use of the 【Orion-14B Series】 Models.
 71 |  
 72 | 2) The 【Orion-14B Series】 Models may contain Personal Information. When you use 【Orion-14B Series】 Models, you acknowledge that you are the data processing entity as defined under the Laws and Regulations responsible for determining the processing methods and purposes of Personal Information. You must comply with legal requirements for processing any Personal Information that may be contained in the 【Orion-14B Series】 Models and assume the associated legal responsibilities, as well as the risks
 73 | and consequences of processing Personal Information.
 74 |  
 75 | 3) The Licensor is not liable for reputation risks arising from your use of the 【Orion-14B Series】 Models or the output results of the 【Orion-14B Series】 Models.
 76 |  
 77 | 4) The Licensor is not liable for intellectual property risks associated with your use of the 【Orion-14B Series】 Models’ output results.
 78 |  
 79 | If your use, reproduction, distribution of the 【Orion-14B Series】 Models, or the creation of Derivatives result in losses to the Licensor, the Licensor has the right to seek compensation from you. For any claims made by Third Parties against the Licensor related to your use, reproduction, and distribution of the 【Orion-14B Series】 Models, or the creation of Derivatives, the Licensor has the right to demand that you defend, compensate, and indemnify the Licensor and protect the Licensor from harm.
 80 |  
 81 |  
 82 | 5. Dispute Resolution
 83 | The stipulation, effectiveness, interpretation, performance, modification, and termination of the Agreement, the use, copy, and Distribute of the 【Orion-14B Series】 Models, and dispute resolution associated with your use, copy, and distribution shall be governed by the laws of the mainland of the People's Republic of China (for the purposes of this agreement only, excluding Hong Kong, Macau, and Taiwan), and the application of conflict of laws is excluded.
 84 |  Any disputes arising from the use, copy, or distribution of the 【Orion-14B Series】 Models should first be resolved through amicable negotiations. If negotiations fail, legal proceedings should be initiated in the People's Court at the location of the Licensor.
 85 |  
 86 |  
 87 | 6. Effectiveness and Termination of the Agreement
 88 | 
 89 | Your use of the 【Orion-14B Series】 Models signifies that you have read and agreed to be bound by the terms of the Agreement. The Agreement becomes effective from the date of your use of the 【Orion-14B Series】 Models and will terminate from the date you cease using the 【Orion-14B Series】 Models. If you violate any terms or restrictions in the Agreement, the Licensor reserves the right to terminate the Agreement.
 90 |  
 91 | Upon termination of the Agreement, you must immediately cease using the 【Orion-14B Series】Models. Section 4, "Disclaimer and Limitation of Liability," and Section 5, "Dispute Resolution," of this Agreement remain in effect after the termination of this Agreement.
 92 |  
 93 |  
 94 | 7. Updates to the Agreement and Contact Information
 95 | 
 96 | The Licensor reserves the right to update the Agreement from time to time. 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 【Orion-14B系列】 模型社区许可协议
104 | 版本：1.0
105 | 发布日期：
106 |     一、 定义 
107 | “许可”是指本协议中定义的使用、复制和分发的条款和条件。
108 |  
109 | “模型”是指任何附带的基于机器学习的组件（包括检查点），包括学习的权重、参数（包括 优化器状态）。
110 |  
111 | “【Orion-14B系列】 模型”是指基于【Orion-14B-Base】模型构建的一系列具备领域特色的模型，包含 ：
112 | 【Orion-14B-Base】基座模型
113 | 【Orion-14B-Chat】对话模型
114 | 【Orion-14B-LongChat】长上下文模型
115 | 【Orion-14B-Chat-RAG】检索增强模型
116 | 【Orion-14B-Chat-Plugin】插件模型
117 | 【Orion-14B-Base-Int4】基座Int4量化模型
118 | 【Orion-14B-Chat-Int4】对话Int4量化模型
119 | 
120 | “数据”是指从与模型一起使用的数据集中提取的信息和/或内容的集合，包括用于训练、预 训练或以其他方式评估模型的数据。数据集中提取的信息和/或内容的集合，可能包含个人 信息或非个人信息。 
121 | 
122 | “个人信息”是指以电子或者其他方式记录的与已识别或者可识别的自然人有关的各种信息， 不包括匿名化处理后的信息。个人信息的处理包括个人信息的收集、存储、使用、加工、 传输、提供、公开、删除等。
123 |  
124 | “输出”是指运行模型的结果，体现在由此产生的信息内容中。
125 |  
126 | “训练”是指为模型提供训练数据，以增强模型的预测能力。
127 |  
128 | “模型衍生品”是指对【Orion-14B系列】模型的所有修改、基于【Orion-14B系列】模型的工作，或通过将 【Orion-14B系列】模型的权重、参数、激活或输出模式转移到其他模型而创建或初始化的任何其他 模型，以使其他模型的性能与【Orion-14B系列】模型类似，包括但不限于需要使用中间数据表示的 提取方法或基于【Orion-14B系列】模型生成合成数据来训练其他模型的方法
129 | 。
130 |  “分发”是指向第三方传输、复制、发布或以其他方式共享模型或模型衍生品，包括将模型作为通过电子或其他远程方式（例如基于 API 或 Web 访问的任何 SaaS 软件或 PaaS 软件） 提供的托管服务。
131 | 
132 |  “许可方”是指授予许可的版权所有者或版权所有者实体，包括可能对模型和/或被分发模型拥有权利的个人或实体。本协议下的许可方是：【北京猎户星空科技有限公司】，或其授权可 对任何第三方进行许可的实体或个人。“您”（或“您的”）是指行使本许可授予的权限和/或出于任何目的和在任何使用领域使用模 型的个人或法人实体，属于本协议的被许可人。
133 |  
134 | “第三方”是指您之外的任何个人、法人实体或非法人组织。 
135 | 
136 | “商业用途”是指使用 【Orion-14B系列】模型，直接或间接为实体或个人进行运营、推广或产生收入，或用于任何其他盈利目的。 
137 | 
138 |     二、 许可及许可限制 
139 | 根据本许可协议的条款和条件，许可方特此授予您一个非排他性、全球性、不可转让、不可再许可、可撤销、免版税的版权许可。您可以出于非商业用途使用此许可。许可方对您使用【Orion-14B系列】模型的输出或基于【Orion-14B系列】模型得到的模型衍生品不主张任何权利，但您必须满足如下许可限制条件： 
140 |     1． 您不得出于任何军事或非法目的使用、复制、修改、合并、发布、分发、复制或创建【Orion-14B系列】 模型的全部或部分衍生品。 
141 |     2． 如果您计划将【Orion-14B系列】模型及模型衍生品用作商业用途，应当按照本协议提供的联络方式，事先向许可方登记并获得许可方的书面授权。请点击以下链接进行登记：https://test.orionstar.com/llm-license.html。按照链接要求完成登记后即可获得商用授权。
142 |     3． 您对【Orion-14B系列】模型的使用和修改（包括使用【Orion-14B系列】 模型的输出或者基于【Orion-14B系列】 模型得到的模型衍生品）不得违反任何国家的法律法规，尤其是中华人民共和国的法律法规，不得侵犯任何第三方的合法权益，包括但不限于肖像权、名誉权、隐私权等 人格权，著作权、专利权、商业秘密等知识产权，或者其他财产权益。
143 |     4． 您必须向【Orion-14B系列】模型或其模型衍生品的任何第三方使用者提供【Orion-14B系列】模型的来源以及本协议的副本。
144 |     5． 您修改【Orion-14B系列】 模型得到模型衍生品，必须以显著的方式说明修改的内容，且上述修改不得违反本协议的许可限制条件，也不能允许、协助或以其他方式使得第三方违反本协议中的许可限制条件。 
145 |     三、 知识产权 
146 |     1. 【Orion-14B系列】模型的所有权及其相关知识产权，由许可方单独所有。 
147 |     2. 在任何情况下，未经许可方事先书面同意，您不得使用许可方任何商标、服务标记、 商号、域名、网站名称或其他显著品牌特征（以下统称为"标识"），包括但不限于明示或暗示您自身为“许可方”。未经许可方事先书面同意，您不得将本条款前述标识以单独或结合的任何方式展示、使用或申请注册商标、进行域名注册等，也不得向他人明示或暗示有权展示、使用、或以其他方式处理这些标识的权利。由于您违反本协议使用许可方上述标识 等给许可方或他人造成损失的，由您承担全部法律责任。 
148 |     3. 在许可范围内，您可以对【Orion-14B系列】模型进行修改以得到模型衍生品，对于模型衍生品中您付出创造性劳动的部分，您可以主张该部分的知识产权。 
149 |     四、 免责声明及责任限制 
150 |     1. 在任何情况下，许可方不对您根据本协议使用【Orion-14B系列】模型而产生或与之相关的任何直接、间接、附带的后果、以及其他损失或损害承担责任。若由此导致许可方遭受损失，您应当向许可方承担全部赔偿责任。
151 |     2. 模型中的模型参数仅仅是一种示例，如果您需要满足其他要求，需自行训练，并遵守相应数据集的许可协议。您将对【Orion-14B系列】模型的输出及模型衍生品所涉及的知识产权风险或与之相关的任何直接、间接、附带的后果、以及其他损失或损害负责。 
152 |     3. 尽管许可方在【Orion-14B系列】模型训练的所有阶段，都坚持努力维护数据的合规性和准确 性，但受限于【Orion-14B系列】模型的规模及其概率固有的随机性因素影响，其输出结果的准确性无法得到保证，模型存在被误导的可能。因此，许可方在此声明，许可方不承担您因使用【Orion-14B系列】模型及其源代码而导致的数据安全问题、声誉风险，或任何涉及【Orion-14B系列】模型被误导、误用、传播或不正当使用而产生的任何风险和责任。
153 |     4. 本协议所称损失或损害包括但不限于下列任何损失或损害（无论此类损失或损害是不可预见的、可预见的、已知的或其他的）:(i)收入损失;(ii)实际或预期利润损失；(ii)货币使用损失；(iv)预期节约的损失；(v)业务损失；(vi)机会损失；(vii)商誉、声誉损失；(viii)软件的使用损失；或(x)任何间接、附带的特殊或间接损害损失。 
154 |     5. 除非适用的法律另有要求或经过许可方书面同意，否则许可方将按“现状”授予【Orion-14B系列】模型的许可。针对本协议中的【Orion-14B系列】模型，许可方不提供任何明示、暗示的保证，包括但不限于：关于所有权的任何保证或条件、关于适销性的保证或条件、适用于任何特定目的的保证或条件、过去、现在或未来关于【Orion-14B系列】模型不侵权的任何类型的保证、以及因任何交易过程、贸易使用（如建议书、规范或样品）而产生的任何保证。您将对其通过使用、复制或再分发等方式利用【Orion-14B系列】模型所产生的风险与后果，独自承担责任。 
155 |     6. 您充分知悉并理解同意，【Orion-14B系列】模型中可能包含个人信息。您承诺将遵守所有适用的法律法规进行个人信息的处理，特别是遵守《中华人民共和国个人信息保护法》的相关规定。请注意，许可方给予您使用【Orion-14B系列】模型的授权，并不意味着您已经获得处理相关个人信息的合法性基础。您作为独立的个人信息处理者，需要保证在处理【Orion-14B系列】模型中可能包含的个人信息时，完全符合相关法律法规的要求，包括但不限于获得个人信息主体的授权同意等，并愿意独自承担由此可能产生的任何风险和后果。 
156 |     7. 您充分理解并同意，许可方有权依合理判断对违反有关法律法规或本协议规定的行为进行处理，对您的违法违规行为采取适当的法律行动，并依据法律法规保存有关信息向有关部门报告等，您应独自承担由此而产生的一切法律责任。 
157 |     五、 研究、教育和学术目的 
158 |     1. 根据本许可协议的条款和条件，本着对学术界做出贡献的精神，许可方鼓励非营利性学术机构的师生将【Orion-14B系列】模型用于研究、教育和学术目的。
159 |     2. 进一步的，如您以研究、教育和学术目的使用【Orion-14B系列】模型，您可以在开展相关研 究、教育前，将您的机构名称、使用情况以及联系方式以邮件方式向我们进行提前告知，我们的联系邮箱为【ai@orionstar.com】，我们将可能基于您的联系方式，向您推送【Orion-14B系列】模型的相关更新资讯，以便您更好地开展研究、教育和学术工作。
160 |     六、 品牌曝光与显著标识
161 |     1. 您同意并理解，如您将您基于【Orion-14B系列】模型二次开发的模型衍生品在国内外的开源社区提供开源许可的，您需要在该开源社区以显著方式标注该模型衍生品系基于【Orion-14B系列】模型进行的二次开发，标注内容包括但不限于“【Orion-14B  Series】 Inside”以及与【Orion-14B系列】模型相关的品牌的其他元素。
162 |     2. 您同意并理解，如您将【Orion-14B系列】模型二次开发的模型衍生品参加国内外任何组织和个人举行的排名活动，包括但不限于针对模型性能、准确度、算法、算力等任何维度的排名活动，您均需在模型说明中以显著方式标注该模型衍生品系基于【Orion-14B系列】模型进行的二次开发，标注内容包括但不限于“【Orion-14B  Series】Inside”以及与【Orion-14B系列】模型相关的品牌的其他元素。 
163 |     七、 其他 
164 |     1. 许可方在法律法规许可的范围内对协议条款享有最终解释权。
165 |     2. 本协议的订立、效力、解释、履行、修改和终止，使用【Orion-14B系列】模型以及争议的解 决均适用中华人民共和国大陆地区（仅为本协议之目的，不包括香港、澳门和台湾）法律，并排除冲突法的适用。
166 |     3. 因使用【Orion-14B系列】模型而发生的任何争议，各方应首先通过友好协商的方式加以解决。协商不成时，向许可方所在地人民法院提起诉讼。
167 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <!-- markdownlint-disable first-line-h1 -->
  2 | <!-- markdownlint-disable html -->
  3 | <div align="center">
  4 |   <img src="./assets/imgs/orion_start.PNG" alt="logo" width="50%" />
  5 | </div>
  6 | 
  7 | <div align="center">
  8 | <h1>
  9 |   Orion-14B
 10 | </h1>
 11 | </div>
 12 | 
 13 | <div align="center">
 14 | 
 15 | <div align="center">
 16 |      <b>🌐English</b> | <a href="./README_zh.md">🇨🇳中文</a> | <a href="./README_ja.md">🇯🇵日本語</a> | <a href="./README_ko.md">🇰🇷한국어</a>
 17 | </div>
 18 | 
 19 | <h4 align="center">
 20 |     <p>
 21 |         🤗 <a href="https://huggingface.co/OrionStarAI" target="_blank">HuggingFace Mainpage</a> | 🤖 <a href="https://modelscope.cn/organization/OrionStarAI" target="_blank">ModelScope Mainpage</a>| 🧰 <a href="https://openxlab.org.cn/models/hot/OrionStarAI" target="_blank">OpenXLab Mainpage</a><br>🎬 <a href="https://huggingface.co/spaces/OrionStarAI/Orion-14B-App-Demo" target="_blank">HuggingFace Demo</a> | 🎫 <a href="https://modelscope.cn/studios/OrionStarAI/Orion-14B-App-Demo/summary" target="_blank">ModelScope Demo</a><br>😺 <a href="https://github.com/OrionStarAI/Orion" target="_blank">GitHub</a><br>📖 <a href="https://arxiv.org/pdf/2401.12246.pdf" target="_blank">Tech Report</a>
 22 |     <p>
 23 | </h4>
 24 | 
 25 | </div>
 26 | 
 27 | 
 28 | 
 29 | # Table of Contents
 30 | 
 31 | - [📖 Model Introduction](#model-introduction)
 32 | - [🔗 Model Download](#model-download)
 33 | - [🔖 Model Benchmark](#model-benchmark)
 34 | - [📊 Model Inference](#model-inference) [<img src="./assets/imgs/vllm.png" alt="vllm" style="margin: 0;display: initial;" height="20" />](#vllm) [<img src="./assets/imgs/llama_cpp.png" alt="llamacpp" style="margin: 0;display: initial;" height="20" />](#llama-cpp)
 35 | - [📜 Declarations & License](#declarations-license)
 36 | - [🥇 Company Introduction](#company-introduction)
 37 | 
 38 | <a name="model-introduction"></a><br>
 39 | # 1. Model Introduction
 40 | 
 41 | - Orion-14B series models are open-source multilingual large language models trained from scratch by OrionStarAI.  The base model is trained on 2.5T multilingual corpus, including Chinese, English, Japanese, Korean, etc, and it exhibits superior performance in these languages.  For details, please refer to [tech report](https://arxiv.org/pdf/2401.12246.pdf).
 42 | 
 43 | - The Orion-14B series models exhibit the following features:
 44 |   - Among models with 20B-parameter scale level, Orion-14B-Base model shows outstanding performance in comprehensive evaluations.
 45 |   - Strong multilingual capabilities, significantly outperforming in Japanese and Korean testsets.
 46 |   - The fine-tuned models demonstrate strong adaptability, excelling in human-annotated blind tests.
 47 |   - The long-chat version supports extremely long texts, performing exceptionally well at a token length of 200k and can support up to a maximum of 320k.
 48 |   - The quantized versions reduce model size by 70%, improve inference speed by 30%, with performance loss less than 1%.
 49 |  <table style="border-collapse: collapse; width: 100%;">
 50 |    <tr>
 51 |      <td style="border: none; padding: 10px; box-sizing: border-box;">
 52 |        <img src="./assets/imgs/opencompass_en.png" alt="opencompass" style="width: 100%; height: auto;">
 53 |      </td>
 54 |      <td style="border: none; padding: 10px; box-sizing: border-box;">
 55 |        <img src="./assets/imgs/model_cap_en.png" alt="modelcap" style="width: 100%; height: auto;">
 56 |      </td>
 57 |    </tr>
 58 |  </table>
 59 | 
 60 | - Orion-14B series models including:
 61 |   - **Orion-14B-Base:**  A multilingual large language foundational model with 14 billion parameters, pretrained on a diverse dataset of 2.5 trillion tokens.
 62 |   - **Orion-14B-Chat:**  A chat-model fine-tuned on a high-quality corpus aims to provide an excellence interactive experience for users in the large model community.
 63 |   - **Orion-14B-LongChat:**  The long-context version excels at handling extremely lengthy texts, performing exceptionally well at a token length of 200k and can support up to a maximum of 320k.
 64 |   - **Orion-14B-Chat-RAG:**  A chat-model fine-tuned on a custom retrieval augmented generation dataset, achieving superior performance in retrieval augmented generation tasks.
 65 |   - **Orion-14B-Chat-Plugin:**  A chat-model specifically tailored for plugin and function calling tasks, ideal for agent-related scenarios where the LLM acts as a plugin and function call system.
 66 |   - **Orion-14B-Base-Int4:**  A quantized base model utilizing 4-bit integer weights. It significantly reduces the model size by 70% and increases the inference speed by 30% while incurring a minimal performance loss of only 1%.
 67 |   - **Orion-14B-Chat-Int4:**  A quantized chat model utilizing 4-bit integer weights.
 68 | 
 69 | 
 70 | <a name="model-download"></a><br>
 71 | # 2. Model Download
 72 | 
 73 | Model release and download links are provided in the table below:
 74 | 
 75 | | Model Name              | HuggingFace Download Links                                                        | ModelScope Download Links                                                                       | OpenXLab Download Links                                                                       |
 76 | |-------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------|
 77 | | ⚾Orion-14B-Base        | [Orion-14B-Base](https://huggingface.co/OrionStarAI/Orion-14B-Base)               | [Orion-14B-Base](https://modelscope.cn/models/OrionStarAI/Orion-14B-Base/summary)               | [Orion-14B-Base](https://openxlab.org.cn/models/detail/OrionStarAI/Orion-14B-Base)               |
 78 | | 😛Orion-14B-Chat        | [Orion-14B-Chat](https://huggingface.co/OrionStarAI/Orion-14B-Chat)               | [Orion-14B-Chat](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat/summary)               | [Orion-14B-Chat](https://openxlab.org.cn/models/detail/OrionStarAI/Orion-14B-Chat)               |
 79 | | 📃Orion-14B-LongChat    | [Orion-14B-LongChat](https://huggingface.co/OrionStarAI/Orion-14B-LongChat)       | [Orion-14B-LongChat](https://modelscope.cn/models/OrionStarAI/Orion-14B-LongChat/summary)       | [Orion-14B-LongChat](https://openxlab.org.cn/models/detail/OrionStarAI/Orion-14B-LongChat)       |
 80 | | 🔎Orion-14B-Chat-RAG    | [Orion-14B-Chat-RAG](https://huggingface.co/OrionStarAI/Orion-14B-Chat-RAG)       | [Orion-14B-Chat-RAG](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-RAG/summary)       | [Orion-14B-Chat-RAG](https://openxlab.org.cn/models/detail/OrionStarAI/Orion-14B-Chat-RAG)       |
 81 | | 🔌Orion-14B-Chat-Plugin | [Orion-14B-Chat-Plugin](https://huggingface.co/OrionStarAI/Orion-14B-Chat-Plugin) | [Orion-14B-Chat-Plugin](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-Plugin/summary) | [Orion-14B-Chat-Plugin](https://openxlab.org.cn/models/detail/OrionStarAI/Orion-14B-Chat-Plugin) |
 82 | | 💼Orion-14B-Base-Int4   | [Orion-14B-Base-Int4](https://huggingface.co/OrionStarAI/Orion-14B-Base-Int4)     | [Orion-14B-Base-Int4](https://modelscope.cn/models/OrionStarAI/Orion-14B-Base-Int4/summary)     | [Orion-14B-Base-Int4](https://openxlab.org.cn/models/detail/OrionStarAI/Orion-14B-Base-Int4)     |
 83 | | 📦Orion-14B-Chat-Int4   | [Orion-14B-Chat-Int4](https://huggingface.co/OrionStarAI/Orion-14B-Chat-Int4)     | [Orion-14B-Chat-Int4](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-Int4/summary)     | [Orion-14B-Chat-Int4](https://openxlab.org.cn/models/detail/OrionStarAI/Orion-14B-Chat-Int4)     |
 84 | 
 85 | <a name="model-benchmark"></a><br>
 86 | # 3. Model Benchmarks
 87 | 
 88 | ## 3.1. Base Model Orion-14B-Base Benchmarks
 89 | ### 3.1.1. LLM evaluation results on examination and professional knowledge
 90 | | Model              | C-Eval   | CMMLU    | MMLU     | AGIEval  | Gaokao   | BBH      |
 91 | |--------------------|----------|----------|----------|----------|----------|----------|
 92 | | LLaMA2-13B         |   41.4   |   38.4   |   55.0   |   30.9   |   18.2   |   45.6   |
 93 | | Skywork-13B        |   59.1   |   61.4   |   62.7   |   43.6   |   56.1   |   48.3   |
 94 | | Baichuan2-13B      |   59.0   |   61.3   |   59.5   |   37.4   |   45.6   |   49.0   |
 95 | | QWEN-14B           |   71.7   |   70.2   |   67.9   |   51.9   | **62.5** |   53.7   |
 96 | | InternLM-20B       |   58.8   |   59.0   |   62.1   |   44.6   |   45.5   |   52.5   |
 97 | | **Orion-14B-Base** | **72.9** | **70.6** | **69.9** | **54.7** |   62.1   | **56.5** |
 98 | 
 99 | ### 3.1.2. LLM evaluation results on language understanding and common knowledge
100 | | Model             |RACE-middle|RACE-high |HellaSwag | PIQA     | Lambada  | WSC      |
101 | |--------------------|----------|----------|----------|----------|----------|----------|
102 | | LLaMA 2-13B        |   63.0   |   58.9   |   77.5   |   79.8   |   76.5   |   66.3   |
103 | | Skywork-13B        |   87.6   |   84.1   |   73.7   |   78.3   |   71.8   |   66.3   |
104 | | Baichuan 2-13B     |   68.9   |   67.2   |   70.8   |   78.1   |   74.1   |   66.3   |
105 | | QWEN-14B           |   93.0   |   90.3   | **80.2** |   79.8   |   71.4   |   66.3   |
106 | | InternLM-20B       |   86.4   |   83.3   |   78.1   | **80.3** |   71.8   |   68.3   |
107 | | **Orion-14B-Base** | **93.2** | **91.3** |   78.5   |   79.5   | **78.8** | **70.2** |
108 | 
109 | ### 3.1.3. LLM evaluation results of OpenCompass testsets
110 | | Model | Average  | Examination | Language | Knowledge | Understanding | Reasoning |
111 | |------------------|----------|----------|----------|----------|----------|----------|
112 | | LLaMA 2-13B      |   47.3   |   45.2   |   47.0   |   58.3   |   50.9   |   43.6   |
113 | | Skywork-13B      |   53.6   |   61.1   |   51.3   |   52.7   |   64.5   |   45.2   |
114 | | Baichuan 2-13B   |   49.4   |   51.8   |   47.5   |   48.9   |   58.1   |   44.2   |
115 | | QWEN-14B         |   62.4   |   71.3   |   52.67  |   56.1   |   68.8   |   60.1   |
116 | | InternLM-20B     |   59.4   |   62.5   |   55.0   | **60.1** |   67.3   |   54.9   |
117 | |**Orion-14B-Base**| **64.3** | **71.4** | **55.0** |   60.0   | **71.9** | **61.6** |
118 | 
119 | ### 3.1.4. Comparison of LLM performances on Japanese testsets
120 | | Model             |**Average**|  JCQA    |  JNLI    |  MARC    |  JSQD    |  JQK     |  XLS     |  XWN     |  MGSM    |
121 | |--------------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|
122 | | PLaMo-13B          |   52.3   |   56.7   |   42.8   |   95.8   |   70.6   |   71.0   |   8.70   |   70.5   |   2.40   |
123 | | WebLab-10B         |   50.7   |   66.6   |   53.7   |   82.1   |   62.9   |   56.2   |   10.0   |   72.0   |   2.40   |
124 | | ELYZA-jp-7B        |   48.8   |   71.7   |   25.3   |   86.6   |   70.8   |   64.1   |   2.50   |   62.1   |   7.20   |
125 | | StableLM-jp-7B     |   51.1   |   33.4   |   43.3   | **96.7** |   70.6   |   78.1   |   10.7   |   72.8   |   2.80   |
126 | | LLaMA 2-13B        |   46.3   |   75.0   |   47.6   |   38.8   |   76.1   |   67.7   |   18.1   |   63.2   |   10.4   |
127 | | Baichuan 2-13B     |   57.1   |   73.7   |   31.3   |   91.6   |   80.5   |   63.3   |   18.6   |   72.2   |   25.2   |
128 | | QWEN-14B           |   65.8   |   85.9   |   60.7   |   97.0   |   83.3   |   71.8   |   18.8   |   70.6   |   38.0   |
129 | | Yi-34B             |   67.1   |   83.8   |   61.2   |   95.2   | **86.1** |   78.5   | **27.2** |   69.2   |   35.2   |
130 | | **Orion-14B-Base** | **69.1** | **88.2** | **75.8** |   94.1   |   75.7   | **85.1** |   17.3   | **78.8** | **38.0** |
131 | 
132 | ### 3.1.5. Comparison of LLM performances on Korean testsets. n = 0 and n = 5 stand for n-shot prompts used in the evaluation
133 | |Model      | **Average**<br>n=0&nbsp;&nbsp;n=5 | HellaSwag<br>n=0&nbsp;&nbsp;n=5 | COPA<br> n=0&nbsp;&nbsp;n=5 | BooIQ<br>n=0&nbsp;&nbsp;n=5 | SentiNeg<br>n=0&nbsp;&nbsp;n=5|
134 | |------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|
135 | | KoGPT            |  53.0   &nbsp;&nbsp;   70.1  |  55.9   &nbsp;&nbsp;   58.3  |  73.5   &nbsp;&nbsp;   72.9  |  45.1   &nbsp;&nbsp;   59.8  |  37.5   &nbsp;&nbsp;   89.4  |
136 | | Polyglot-ko-13B  |  69.6   &nbsp;&nbsp;   73.7  |**59.5** &nbsp;&nbsp; **63.1**|**79.4** &nbsp;&nbsp; **81.1**|  48.2   &nbsp;&nbsp;   60.4  |  91.2   &nbsp;&nbsp;   90.2  |
137 | | LLaMA 2-13B      |  46.7   &nbsp;&nbsp;   63.7  |  41.3   &nbsp;&nbsp;   44.0  |  59.3   &nbsp;&nbsp;   63.8  |  34.9   &nbsp;&nbsp;   73.8  |  51.5   &nbsp;&nbsp;   73.4  |
138 | | Baichuan 2-13B   |  52.1   &nbsp;&nbsp;   58.7  |  39.2   &nbsp;&nbsp;   39.6  |  60.6   &nbsp;&nbsp;   60.6  |  58.4   &nbsp;&nbsp;   61.5  |  50.3   &nbsp;&nbsp;   72.9  |
139 | | QWEN-14B         |  53.8   &nbsp;&nbsp;   73.7  |  45.3   &nbsp;&nbsp;   46.8  |  64.9   &nbsp;&nbsp;   68.9  |  33.4   &nbsp;&nbsp;   83.5  |  71.5   &nbsp;&nbsp;   95.7  |
140 | | Yi-34B           |  54.2   &nbsp;&nbsp;   72.1  |  44.6   &nbsp;&nbsp;   44.7  |  58.0   &nbsp;&nbsp;   60.6  |  65.9   &nbsp;&nbsp;   90.2  |  48.3   &nbsp;&nbsp;   92.9  |
141 | |**Orion-14B-Chat**|**74.5** &nbsp;&nbsp; **79.6**|  47.0   &nbsp;&nbsp;   49.6  |  77.7   &nbsp;&nbsp;   79.4  |**81.6** &nbsp;&nbsp; **90.7**|**92.4** &nbsp;&nbsp; **98.7**|
142 | 
143 | ### 3.1.6. Multilingual evaluation
144 | | Model              | Train Lang | Japanese | Korean   | Chinese  |  English |
145 | |--------------------|------------|----------|----------|----------|----------|
146 | | PLaMo-13B          |  En,Jp     |   52.3   |   *      |   *      |   *      |
147 | | Weblab-10B         |  En,Jp     |   50.7   |   *      |   *      |   *      |
148 | | ELYZA-jp-7B        |  En,Jp     |   48.8   |   *      |   *      |   *      |
149 | | StableLM-jp-7B     |  En,Jp     |   51.1   |   *      |   *      |   *      |
150 | | KoGPT-6B           |  En,Ko     |   *      |   70.1   |   *      |   *      |
151 | | Polyglot-ko-13B    |  En,Ko     |   *      |   70.7   |   *      |   *      |
152 | | Baichuan2-13B      |  Multi     |   57.1   |   58.7   |   50.8   |   57.1   |
153 | | Qwen-14B           |  Multi     |   65.8   |   73.7   |   64.5   |   65.4   |
154 | | Llama2-13B         |  Multi     |   46.3   |   63.7   |   41.4   |   55.3   |
155 | | Yi-34B             |  Multi     |   67.1   |   72.2   |   58.7   | **68.8** |
156 | | **Orion-14B-Chat** |  Multi     | **69.1** | **79.5** | **67.9** |   67.3   |
157 | 
158 | 
159 | ## 3.2. Chat Model Orion-14B-Chat Benchmarks
160 | ### 3.2.1. Chat model subjective evaluation of MTBench
161 | | Model        | First-Turn | Second-Turn | **Average** |
162 | |----------------------|----------|----------|----------|
163 | | Baichuan2-13B-Chat   |   7.05   |   6.47   |   6.76   |
164 | | Qwen-14B-Chat        |   7.30   |   6.62   |   6.96   |
165 | | Llama2-13B-Chat      |   7.10   |   6.20   |   6.65   |
166 | | InternLM-20B-Chat    |   7.03   |   5.93   |   6.48   |
167 | | **Orion-14B-Chat**   | **7.68** | **7.07** | **7.37** |
168 | \* use vllm for inference
169 | 
170 | ### 3.2.2. Chat model subjective evaluation of AlignBench
171 | | Model              | Math.  |  Logi. | Basic. | Chi.   | Comp.  | Writ.  | Role.  | Prof.  |**Avg.**|
172 | |--------------------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
173 | | Baichuan2-13B-Chat |  3.76  |  4.07  |  6.22  |  6.05  |  7.11  |  6.97  |  6.75  |  6.43  |  5.25  |
174 | | Qwen-14B-Chat      |**4.91**|**4.71**|**6.90**|  6.36  |  6.74  |  6.64  |  6.59  |  6.56  |**5.72**|
175 | | Llama2-13B-Chat    |  3.05  |  3.79  |  5.43  |  4.40  |  6.76  |  6.63  |  6.99  |  5.65  |  4.70  |
176 | | InternLM-20B-Chat  |  3.39  |  3.92  |  5.96  |  5.50  |**7.18**|  6.19  |  6.49  |  6.22  |  4.96  |
177 | | **Orion-14B-Chat** |  4.00  |  4.24  |  6.18  |**6.57**|  7.16  |**7.36**|**7.16**|**6.99**|  5.51  |
178 | \* use vllm for inference
179 | 
180 | ## 3.3. LongChat Model Orion-14B-LongChat Benchmarks
181 | ### 3.3.1. LongChat evaluation of LongBench
182 | | Model           | NarrativeQA|MultiFieldQA-en|MultiFieldQA-zh| DuReader  | QMSum     | VCSUM     | TREC      | TriviaQA  | LSHT      |RepoBench-P|
183 | |--------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
184 | | GPT-3.5-Turbo-16k        | **23.60** | **52.30** | **61.20** |   28.70   |   23.40   | **16.00** |   68.00   | **91.40** |   29.20   |   53.60   |
185 | | LongChat-v1.5-7B-32k     |   16.90   |   41.40   |   29.10   |   19.50   |   22.70   |    9.90   |   63.50   |   82.30   |   23.20   |   55.30   |
186 | | Vicuna-v1.5-7B-16k       |   19.40   |   38.50   |   43.00   |   19.30   |   22.80   |   15.10   |   71.50   |   86.20   |   28.80   |   43.50   |
187 | | Yi-6B-200K               |   14.11   |   36.74   |   22.68   |   14.01   |   20.44   |    8.08   |   72.00   |   86.61   |   38.00   | **63.29** |
188 | | Orion-14B-LongChat       |   19.47   |   48.11   |   55.84   | **37.02** | **24.87** |   15.44   | **77.00** |   89.12   | **45.50** |   54.31   |
189 | 
190 | 
191 | ## 3.4. Chat RAG Model Benchmarks
192 | ### 3.4.1. LLM evaluation results of self-built RAG testsets
193 | |Model|Effectiveness of Response(Keyword)|*Effectiveness of Response（subjective evaluation）|Quoting Ability|Fallback Ability|*AutoQA|*Data Extraction|
194 | |---------------------|------|------|------|------|------|------|
195 | | Baichuan2-13B-Chat  |  85  |  76  |  1   |  0   |  69  |  51  |
196 | | Qwen-14B-Chat       |  79  |  77  |  75  |  47  |  68  |  72  |
197 | | Qwen-72B-Chat(Int4) |  87  |  89  |  90  |  32  |  67  |  76  |
198 | | GPT-4               |  91  |  94  |  96  |  95  |  75  |  86  |
199 | | Orion-14B-Chat-RAG  |  86  |  87  |  91  |  97  |  73  |  71  |
200 |  \* means manual assessment
201 | 
202 | ## 3.5. Chat Plugin Model Orion-14B-Chat-Plugin Benchmarks
203 | ### 3.5.1. LLM evaluation results of self-built plugin testsets
204 | |Model |Intent Recognition with Full Params |Intent Recognition with Missing Params |Non-Plugin Invocation Recognition |
205 | |-----------------------|--------|-----------|--------|
206 | | Baichuan2-13B-Chat    |   25   |   0       |   0    |
207 | | Qwen-14B-Chat         |   55   |   0       |   50   |
208 | | GPT-4                 | **95** |   52.38   |   70   |
209 | | Orion-14B-Chat-Plugin |  92.5  | **60.32** | **90** |
210 | 
211 | ## 3.6. Quantized Model Orion-14B-Base-Int4 Benchmarks
212 | ### 3.6.1. Comparison of before and after quantization
213 | |Model |Size(GB)|Inference Speed(tokens/s)|C-Eval|CMMLU|MMLU|RACE|HellaSwag|
214 | |-------------------------|-------|-----|------|------|------|------|------|
215 | | OrionStar-14B-Base      |  28.0 | 135 | 72.8 | 70.6 | 70.0 | 93.3 | 78.5 |
216 | | OrionStar-14B-Base-Int4 |  8.3  | 178 | 71.8 | 69.8 | 69.2 | 93.1 | 78.0 |
217 | 
218 | 
219 | <a name="model-inference"></a><br>
220 | # 4. Model Inference
221 | 
222 | Model weights, source code, and configuration needed for inference are published on Hugging Face, and the download link
223 | is available in the table at the beginning of this document. We demonstrate various inference methods here, and the
224 | program will automatically download the necessary resources from Hugging Face.
225 | 
226 | ## 4.1. Python Code
227 | 
228 | ```python
229 | import torch
230 | from transformers import AutoModelForCausalLM, AutoTokenizer
231 | from transformers.generation.utils import GenerationConfig
232 | 
233 | tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-14B", use_fast=False, trust_remote_code=True)
234 | model = AutoModelForCausalLM.from_pretrained("OrionStarAI/Orion-14B", device_map="auto",
235 |                                              torch_dtype=torch.bfloat16, trust_remote_code=True)
236 | 
237 | model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-14B")
238 | messages = [{"role": "user", "content": "Hello, what is your name? "}]
239 | response = model.chat(tokenizer, messages, streaming=False)
240 | print(response)
241 | 
242 | ```
243 | 
244 | In the above Python code, the model is loaded with `device_map='auto'` to utilize all available GPUs. To specify the
245 | device, you can use something like `export CUDA_VISIBLE_DEVICES=0,1` (using GPUs 0 and 1).
246 | 
247 | ## 4.2. Command Line Tool
248 | 
249 | ```shell
250 | CUDA_VISIBLE_DEVICES=0 python cli_demo.py
251 | ```
252 | 
253 | This command-line tool is designed for chat scenarios, and thus, it does not support calling the base model.
254 | 
255 | ## 4.3. Direct Script Inference
256 | 
257 | ```shell
258 | 
259 | # base model
260 | CUDA_VISIBLE_DEVICES=0 python demo/text_generation_base.py --model OrionStarAI/Orion-14B --tokenizer OrionStarAI/Orion-14B --prompt hello
261 | 
262 | # chat model
263 | CUDA_VISIBLE_DEVICES=0 python demo/text_generation.py --model OrionStarAI/Orion-14B-Chat --tokenizer OrionStarAI/Orion-14B-Chat --prompt hi
264 | 
265 | ```
266 | 
267 | <a name="vllm"></a><br>
268 | ## 4.4. Inference by vLLM(>=v0.3.3)
269 | 
270 | - vLLM url & QuickStart<br>
271 |   https://github.com/vllm-project/vllm<br>
272 |   https://docs.vllm.ai/en/latest/getting_started/quickstart.html
273 | - Start vllm server:
274 | ```shell
275 | python -m vllm.entrypoints.openai.api_server --model OrionStarAI/Orion-14B-Chat
276 | ```
277 | 
278 | <a name="llama-cpp"></a><br>
279 | ## 4.5. Inference by llama.cpp
280 | 
281 | - Project URL<br>
282 |   https://github.com/ggerganov/llama.cpp
283 | 
284 | - Orion-14B-Chat.gguf<br>
285 |   https://huggingface.co/OrionStarAI/Orion-14B-Chat/blob/main/Orion-14B-Chat.gguf
286 | 
287 | - How to convert to GGUF model
288 | 
289 |   ```shell
290 |   python convert-hf-to-gguf.py path/to/Orion-14B-Chat --outfile chat.gguf
291 |   ```
292 | 
293 | - How to run generation
294 | 
295 |   ```shell
296 |   ./main --frequency-penalty 0.5 --frequency-penalty 0.5 --top-k 5 --top-p 0.9 -m chat.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e
297 |   ```
298 | 
299 | 
300 | ## 4.6 Example Output
301 | 
302 | ### 4.6.1. Casual Chat
303 | 
304 | `````
305 | User: Hello
306 | Orion-14B: Hello! How can I help you today? If you have any questions or need assistance, feel free to ask.
307 | User: What functions do you have?
308 | Orion-14B: As an AI language model, I have a wide range of functions and capabilities, including but not limited to:
309 |   Answering general knowledge questions: I can provide information on various topics such as history, science, geography, and more.
310 |   Providing definitions and explanations: I can explain complex concepts and provide definitions for words and phrases.
311 |   Assisting with tasks: I can help with simple tasks like setting reminders, converting units of measurement, and even ordering food from restaurants.
312 |   Generating text: I can write short stories, essays, articles, and other types of content based on prompts given to me.
313 |   Translating languages: I can translate text from one language to another.
314 |   Chatting: I can engage in conversation with users and respond to their queries in a conversational manner.
315 |   Summarizing long texts: I can summarize longer texts like articles, books, and research papers.
316 |   Generating random numbers and names: I can generate random numbers within a specified range and also suggest names based on certain criteria.
317 |   Playing games: I can play simple games like Hangman, Trivia, and more.
318 |   Providing recommendations: I can recommend movies, music, books, and other forms of entertainment based on user preferences.
319 |   These are just some of the many functions that I am capable of performing. If you have a specific task in mind, feel free to ask me about it.
320 | User: Tell me a joke.
321 | Orion-14B: Sure, here's a classic one-liner: Why don't scientists trust atoms? Because they make up everything.
322 | `````
323 | 
324 | ### 4.6.2. Japanese & Korean Chat
325 | 
326 | `````
327 | User：自己を紹介してください
328 | Orion-14B：こんにちは、私の名前はChatMaxで、OrionStarによって開発されたAIアシスタントです。私はユーザーに正確で、客観的で、有用な回答を提供し、彼らが問題を解決し、情報を取得するのを助けることができます。
329 | User：자기소개를 해주세요.
330 | Orion-14B：안녕하세요, 제 이름은 ChatMax입니다. OrionStar에서 개발한 AI 어시스턴트입니다. 저는 사용자들에게 정확하고 객관적이며 유용한 답변을 제공하여 문제를 해결하고 정보를 얻는 데 도움을 줄 수 있습니다.
331 | `````
332 | 
333 | <a name="declarations-license"></a><br>
334 | # 5. Declarations, License
335 | 
336 | ## 5.1. Declarations
337 | 
338 | We strongly urge all users not to use the Orion-14B model for any activities that may harm national or social security or violate the law.
339 | Additionally, we request users not to use the Orion-14B model for internet services without proper security review and filing.
340 | We hope all users abide by this principle to ensure that technological development takes place in a regulated and legal environment.
341 | We have done our best to ensure the compliance of the data used in the model training process. However, despite our
342 | significant efforts, unforeseen issues may still arise due to the complexity of the model and data. Therefore, if any
343 | problems arise due to the use of the Orion-14B open-source model, including but not limited to data security
344 | issues, public opinion risks, or any risks and issues arising from the model being misled, abused, disseminated, or
345 | improperly utilized, we will not assume any responsibility.
346 | 
347 | ## 5.2. License
348 | 
349 | Community use of the Orion-14B series models
350 | - For code, please comply with  [Apache License Version 2.0](./LICENSE)<br>
351 | - For model, please comply with [【Orion-14B Series】 Models Community License Agreement](./ModelsCommunityLicenseAgreement)
352 | 
353 | 
354 | <a name="company-introduction"></a><br>
355 | # 6. Company Introduction
356 | 
357 | OrionStar is a leading global service robot solutions company, founded in September 2016. OrionStar is dedicated to
358 | using artificial intelligence technology to create the next generation of revolutionary robots, allowing people to break
359 | free from repetitive physical labor and making human work and life more intelligent and enjoyable. Through technology,
360 | OrionStar aims to make society and the world a better place.
361 | 
362 | OrionStar possesses fully self-developed end-to-end artificial intelligence technologies, such as voice interaction and
363 | visual navigation. It integrates product development capabilities and technological application capabilities. Based on
364 | the Orion robotic arm platform, it has launched products such as OrionStar AI Robot Greeting, AI Robot Greeting Mini,
365 | Lucki, Coffee Master, and established the open platform OrionOS for Orion robots. Following the philosophy of "Born for
366 | Truly Useful Robots", OrionStar empowers more people through AI technology.
367 | 
368 | **The core strengths of OrionStar lies in possessing end-to-end AI application capabilities,** including big data preprocessing, large model pretraining, fine-tuning, prompt engineering, agent, etc.  With comprehensive end-to-end model training capabilities, including systematic data processing workflows and the parallel model training capability of hundreds of GPUs, it has been successfully applied in various industry scenarios such as government affairs, cloud services, international e-commerce, and fast-moving consumer goods.
369 | 
370 | Companies with demands for deploying large-scale model applications are welcome to contact us.<br>
371 | **Enquiry Hotline: 400-898-7779**<br>
372 | **E-mail: ai@orionstar.com**<br>
373 | **Discord Link: https://discord.gg/zumjDWgdAs**
374 | 
375 | <div align="center">
376 |   <img src="./assets/imgs/wechat_group.jpg" alt="wechat" width="40%" />
377 | </div>
378 | 


--------------------------------------------------------------------------------
/README_ja.md:
--------------------------------------------------------------------------------
  1 | <!-- markdownlint-disable first-line-h1 -->
  2 | <!-- markdownlint-disable html -->
  3 | <div align="center">
  4 |   <img src="./assets/imgs/orion_start.PNG" alt="logo" width="50%" />
  5 | </div>
  6 | 
  7 | <div align="center">
  8 | <h1>
  9 |   Orion-14B
 10 | </h1>
 11 | </div>
 12 | 
 13 | <div align="center">
 14 | 
 15 | <div align="center">
 16 |      <b>🇯🇵日本語</b> | <a href="./README.md">🌐英語</a> | <a href="./README_zh.md">🇨🇳中文</a> | <a href="./README_ko.md">🇰🇷한국어</a>
 17 | </div>
 18 | 
 19 | <h4 align="center">
 20 |     <p>
 21 |         🤗 <a href="https://huggingface.co/OrionStarAI" target="_blank">HuggingFace メインページ</a> | 🤖 <a href="https://modelscope.cn/organization/OrionStarAI" target="_blank">ModelScope メインページ</a><br>🎬 <a href="https://huggingface.co/spaces/OrionStarAI/Orion-14B-App-Demo" target="_blank">HuggingFace デモ</a> | 🎫 <a href="https://modelscope.cn/studios/OrionStarAI/Orion-14B-App-Demo/summary" target="_blank">ModelScope デモ</a><br>😺 <a href="https://github.com/OrionStarAI/Orion" target="_blank">GitHub</a><br>📖 <a href="https://github.com/OrionStarAI/Orion/blob/master/doc/Orion14B_v3.pdf" target="_blank">技術レポート</a>
 22 |     <p>
 23 | </h4>
 24 | 
 25 | </div>
 26 | 
 27 | 
 28 | 
 29 | # 目次
 30 | 
 31 | - [📖 モデル紹介](#model-introduction)
 32 | - [🔗 モデルダウンロード](#model-download)
 33 | - [🔖 モデルベンチマーク](#model-benchmark)
 34 | - [📊 モデル推論](#model-inference) [<img src="./assets/imgs/vllm.png" alt="vllm" style="margin: 0;display: initial;" height="20" />](#vllm) [<img src="./assets/imgs/llama_cpp.png" alt="llamacpp" style="margin: 0;display: initial;" height="20" />](#llama-cpp)
 35 | - [📜 声明とライセンス](#declarations-license)
 36 | - [🥇 企業紹介](#company-introduction)
 37 | 
 38 | <a name="model-introduction"></a><br>
 39 | # 1. モデル紹介
 40 | 
 41 | - Orion-14B-Baseは、140億のパラメータを持つマルチランゲージの大規模モデルで、さまざまな言語に対応するために2.5兆トークンの多様なデータセットでトレーニングされました。このデータセットには、中文、英語、日本語、韓国語などが含まれています。このモデルは、多言語環境でのさまざまなタスクにおいて卓越した性能を発揮しています。Orion-14Bシリーズモデルは、主要なパフォーマンスベンチマークで優れた結果を示し、同じパラメータ数を持つ他のベースモデルを明らかに凌駕しています。具体的な技術の詳細については、参照先をご覧ください。[技術レポート](https://github.com/OrionStarAI/Orion/blob/master/doc/Orion14B_v3.pdf)を参照してください。
 42 | 
 43 | - Orion-14B シリーズのモデルは、以下の特徴があります：
 44 |   - 基座20Bパラメータモデルは、総合的な評価で優れた結果を示しています。
 45 |   - 多言語対応力が強く、特に日本語と韓国語の対応能力が優れています
 46 |   - ファインチューニングモデルは適応性が高く、人間の注釈つきブラインドテストでは高性能なパフォーマンスを発揮しています。
 47 |   - 長文対応バージョンは非常に長いテキストをサポートし、20万トークンの長さで優れた効果を発揮し、最大で320,000トークンまでサポート可能です。
 48 |   - 量子化バージョンではモデルサイズが70%縮小し、推論速度が30%向上し、性能の損失が1%以下です。
 49 |  <table style="border-collapse: collapse; width: 100%;">
 50 |    <tr>
 51 |      <td style="border: none; padding: 10px; box-sizing: border-box;">
 52 |        <img src="./assets/imgs/opencompass_en.png" alt="opencompass" style="width: 100%; height: auto;">
 53 |      </td>
 54 |      <td style="border: none; padding: 10px; box-sizing: border-box;">
 55 |        <img src="./assets/imgs/model_cap_en.png" alt="modelcap" style="width: 100%; height: auto;">
 56 |      </td>
 57 |    </tr>
 58 |  </table>
 59 | 
 60 | # Orion-14B シリーズ モデルには以下が含まれます：
 61 | 
 62 |   - **Orion-14B-Base:**  2.5兆トークンの多様なデータセットでトレーニングされ、140億のパラメータを持つ多言語基本モデルです。
 63 |   - **Orion-14B-Chat:**  高品質なコーパスでファインチューニングされた対話型モデルで、大規模モデルコミュニティにより良いユーザーインタラクション体験を提供することを目指しています。
 64 |   - **Orion-14B-LongChat:**  20万トークンの長さで優れた効果を発揮し、最大で320,000トークンまでサポート可能で、長文書の評価セットでの性能は専用モデルに匹敵します。
 65 |   - **Orion-14B-Chat-RAG:**  スタムの検索強化生成データセットでファインチューニングされたチャットモデルで、検索強化生成タスクで卓越した性能を発揮しています。
 66 |   - **Orion-14B-Chat-Plugin:**  プラグインと関数呼び出しタスクに特化したチャットモデルで、代理を使用する関連するシナリオに適しています。大規模言語モデルがプラグインと関数呼び出しシステムの役割を果たします。
 67 |   - **Orion-14B-Base-Int4:**  int4を使用して量子化された基本モデル。モデルサイズが70%縮小し、推論速度が30%向上し、わずか1%未満の性能低下しか発生しません。
 68 |   - **Orion-14B-Chat-Int4:**  int4を使用して量子化された対話モデル。
 69 | 
 70 | 
 71 | <a name="model-download"></a><br>
 72 | # 2. モデルのダウンロード
 73 | 
 74 | 以下はモデルのリリースとダウンロードURLが提供されています：
 75 | 
 76 | | モデル名              | HuggingFace ダウンロードリンク                                                    | ModelScope ダウンロードリンク                                                                      |
 77 | |-------------------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------|
 78 | | ⚾Orion-14B-Base        | [Orion-14B-Base](https://huggingface.co/OrionStarAI/Orion-14B-Base)               | [Orion-14B-Base](https://modelscope.cn/models/OrionStarAI/Orion-14B-Base/summary)               |
 79 | | 😛Orion-14B-Chat        | [Orion-14B-Chat](https://huggingface.co/OrionStarAI/Orion-14B-Chat)               | [Orion-14B-Chat](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat/summary)               |
 80 | | 📃Orion-14B-LongChat    | [Orion-14B-LongChat](https://huggingface.co/OrionStarAI/Orion-14B-LongChat)       | [Orion-14B-LongChat](https://modelscope.cn/models/OrionStarAI/Orion-14B-LongChat/summary)       |
 81 | | 🔎Orion-14B-Chat-RAG    | [Orion-14B-Chat-RAG](https://huggingface.co/OrionStarAI/Orion-14B-Chat-RAG)       | [Orion-14B-Chat-RAG](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-RAG/summary)       |
 82 | | 🔌Orion-14B-Chat-Plugin | [Orion-14B-Chat-Plugin](https://huggingface.co/OrionStarAI/Orion-14B-Chat-Plugin) | [Orion-14B-Chat-Plugin](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-Plugin/summary) |
 83 | | 💼Orion-14B-Base-Int4   | [Orion-14B-Base-Int4](https://huggingface.co/OrionStarAI/Orion-14B-Base-Int4)     | [Orion-14B-Base-Int4](https://modelscope.cn/models/OrionStarAI/Orion-14B-Base-Int4/summary)     |
 84 | | 📦Orion-14B-Chat-Int4   | [Orion-14B-Chat-Int4](https://huggingface.co/OrionStarAI/Orion-14B-Chat-Int4)     | [Orion-14B-Chat-Int4](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-Int4/summary)     |
 85 | 
 86 | 
 87 | <a name="model-benchmark"></a><br>
 88 | # 3. モデルのベンチマーク
 89 | 
 90 | ## 3.1. 基本モデル Orion-14B-Base ベンチマーク
 91 | ### 3.1.1. LLM 評価結果（検査と専門知識）
 92 | | モデル              | C-Eval   | CMMLU    | MMLU     | AGIEval  | Gaokao   | BBH      |
 93 | |--------------------|----------|----------|----------|----------|----------|----------|
 94 | | LLaMA2-13B         |   41.4   |   38.4   |   55.0   |   30.9   |   18.2   |   45.6   |
 95 | | Skywork-13B        |   59.1   |   61.4   |   62.7   |   43.6   |   56.1   |   48.3   |
 96 | | Baichuan2-13B      |   59.0   |   61.3   |   59.5   |   37.4   |   45.6   |   49.0   |
 97 | | QWEN-14B           |   71.7   |   70.2   |   67.9   |   51.9   | **62.5** |   53.7   |
 98 | | InternLM-20B       |   58.8   |   59.0   |   62.1   |   44.6   |   45.5   |   52.5   |
 99 | | **Orion-14B-Base** | **72.9** | **70.6** | **69.9** | **54.7** |   62.1   | **56.5** |
100 | 
101 | ### 3.1.2. LLM 評価結果（言語理解と一般的な知識）
102 | | モデル             |RACE-middle|RACE-high |HellaSwag | PIQA     | Lambada  | WSC      |
103 | |--------------------|----------|----------|----------|----------|----------|----------|
104 | | LLaMA 2-13B        |   63.0   |   58.9   |   77.5   |   79.8   |   76.5   |   66.3   |
105 | | Skywork-13B        |   87.6   |   84.1   |   73.7   |   78.3   |   71.8   |   66.3   |
106 | | Baichuan 2-13B     |   68.9   |   67.2   |   70.8   |   78.1   |   74.1   |   66.3   |
107 | | QWEN-14B           |   93.0   |   90.3   | **80.2** |   79.8   |   71.4   |   66.3   |
108 | | InternLM-20B       |   86.4   |   83.3   |   78.1   | **80.3** |   71.8   |   68.3   |
109 | | **Orion-14B-Base** | **93.2** | **91.3** |   78.5   |   79.5   | **78.8** | **70.2** |
110 | 
111 | ### 3.1.3. LLM 評価結果（OpenCompass テストセット）
112 | | モデル | 平均     | 検査       | 言語      | 知識      | 理解      | 推論      |
113 | |------------------|----------|----------|----------|----------|----------|----------|
114 | | LLaMA 2-13B      |   47.3   |   45.2   |   47.0   |   58.3   |   50.9   |   43.6   |
115 | | Skywork-13B      |   53.6   |   61.1   |   51.3   |   52.7   |   64.5   |   45.2   |
116 | | Baichuan 2-13B   |   49.4   |   51.8   |   47.5   |   48.9   |   58.1   |   44.2   |
117 | | QWEN-14B         |   62.4   |   71.3   |   52.67  |   56.1   |   68.8   |   60.1   |
118 | | InternLM-20B     |   59.4   |   62.5   |   55.0   | **60.1** |   67.3   |   54.9   |
119 | |**Orion-14B-Base**| **64.3** | **71.4** | **55.0** |   60.0   | **71.9** | **61.6** |
120 | 
121 | ### 3.1.4. 日本語のテストセットにおけるLLMパフォーマンスの比較
122 | | モデル             |**平均**|  JCQA    |  JNLI    |  MARC    |  JSQD    |  JQK     |  XLS     |  XWN     |  MGSM    |
123 | |--------------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|
124 | | PLaMo-13B          |   52.3   |   56.7   |   42.8   |   95.8   |   70.6   |   71.0   |   8.70   |   70.5   |   2.40   |
125 | | WebLab-10B         |   50.7   |   66.6   |   53.7   |   82.1   |   62.9   |   56.2   |   10.0   |   72.0   |   2.40   |
126 | | ELYZA-jp-7B        |   48.8   |   71.7   |   25.3   |   86.6   |   70.8   |   64.1   |   2.50   |   62.1   |   7.20   |
127 | | StableLM-jp-7B     |   51.1   |   33.4   |   43.3   | 96.7 |   70.6   |   78.1   |   10.7   |   72.8   |   2.80   |
128 | | LLaMA 2-13B        |   46.3   |   75.0   |   47.6   |   38.8   |   76.1   |   67.7   |   18.1   |   63.2   |   10.4   |
129 | | Baichuan 2-13B     |   57.1   |   73.7   |   31.3   |   91.6   |   80.5   |   63.3   |   18.6   |   72.2   |   25.2   |
130 | | QWEN-14B           |   65.8   |   85.9   |   60.7   |   97.0   |   83.3   |   71.8   |   18.8   |   70.6   |   38.0   |
131 | | Yi-34B             |   67.1   |   83.8   |   61.2   |   95.2   | 86.1 |   78.5   | 27.2 |   69.2   |   35.2   |
132 | | Orion-14B-Base | 69.1 | 88.2 | 75.8 |   94.1   |   75.7   | 85.1 |   17.3   | 78.8 | 38.0 |
133 | 
134 | ### 3.1.5. 韓国のテストセットにおけるLLMパフォーマンスの比較。n = 0およびn = 5は評価に使用されたn-shotのプロンプトを表します。
135 | |モデル      | **平均**<br>n=0&nbsp;&nbsp;n=5 | HellaSwag<br>n=0&nbsp;&nbsp;n=5 | COPA<br> n=0&nbsp;&nbsp;n=5 | BooIQ<br>n=0&nbsp;&nbsp;n=5 | SentiNeg<br>n=0&nbsp;&nbsp;n=5|
136 | |------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|
137 | | KoGPT            |  53.0   &nbsp;&nbsp;   70.1  |  55.9   &nbsp;&nbsp;   58.3  |  73.5   &nbsp;&nbsp;   72.9  |  45.1   &nbsp;&nbsp;   59.8  |  37.5   &nbsp;&nbsp;   89.4  |
138 | | Polyglot-ko-13B  |  69.6   &nbsp;&nbsp;   73.7  |**59.5** &nbsp;&nbsp; **63.1**|**79.4** &nbsp;&nbsp; **81.1**|  48.2   &nbsp;&nbsp;   60.4  |  91.2   &nbsp;&nbsp;   90.2  |
139 | | LLaMA 2-13B      |  46.7   &nbsp;&nbsp;   63.7  |  41.3   &nbsp;&nbsp;   44.0  |  59.3   &nbsp;&nbsp;   63.8  |  34.9   &nbsp;&nbsp;   73.8  |  51.5   &nbsp;&nbsp;   73.4  |
140 | | Baichuan 2-13B   |  52.1   &nbsp;&nbsp;   58.7  |  39.2   &nbsp;&nbsp;   39.6  |  60.6   &nbsp;&nbsp;   60.6  |  58.4   &nbsp;&nbsp;   61.5  |  50.3   &nbsp;&nbsp;   72.9  |
141 | | QWEN-14B         |  53.8   &nbsp;&nbsp;   73.7  |  45.3   &nbsp;&nbsp;   46.8  |  64.9   &nbsp;&nbsp;   68.9  |  33.4   &nbsp;&nbsp;   83.5  |  71.5   &nbsp;&nbsp;   95.7  |
142 | | Yi-34B           |  54.2   &nbsp;&nbsp;   72.1  |  44.6   &nbsp;&nbsp;   44.7  |  58.0   &nbsp;&nbsp;   60.6  |  65.9   &nbsp;&nbsp;   90.2  |  48.3   &nbsp;&nbsp;   92.9  |
143 | |**Orion-14B-Chat**|**74.5** &nbsp;&nbsp; **79.6**|  47.0   &nbsp;&nbsp;   49.6  |  77.7   &nbsp;&nbsp;   79.4  |**81.6** &nbsp;&nbsp; **90.7**|**92.4** &nbsp;&nbsp; **98.7**|
144 | 
145 | ### 3.1.6. 多言語評価
146 | | モデル              | トレーニング言語 | 日本語 | 韓国語 | 中国語 |  英語  |
147 | |--------------------|------------|--------|--------|--------|--------|
148 | | PLaMo-13B          |  英語, 日本語  |   52.3 |   *    |   *    |   *    |
149 | | Weblab-10B         |  英語, 日本語  |   50.7 |   *    |   *    |   *    |
150 | | ELYZA-jp-7B        |  英語, 日本語  |   48.8 |   *    |   *    |   *    |
151 | | StableLM-jp-7B     |  英語, 日本語  |   51.1 |   *    |   *    |   *    |
152 | | KoGPT-6B           |  英語, 韓国語  |   *    |   70.1 |   *    |   *    |
153 | | Polyglot-ko-13B    |  英語, 韓国語  |   *    |   70.7 |   *    |   *    |
154 | | Baichuan2-13B      |  マルチ言語    |   57.1 |   58.7 |   50.8 |   57.1 |
155 | | Qwen-14B           |  マルチ言語    |   65.8 |   73.7 |   64.5 |   65.4 |
156 | | Llama2-13B         |  マルチ言語    |   46.3 |   63.7 |   41.4 |   55.3 |
157 | | Yi-34B             |  マルチ言語    |   67.1 |   72.2 |   58.7 |   68.8 |
158 | | Orion-14B-Chat |  マルチ言語    | 69.1 | 79.5 | 67.9 |   67.3 |
159 | 
160 | ## 3.2. チャットモデル Orion-14B-Chat ベンチマーク
161 | ### 3.2.1. チャットモデルのMTBenchにおける主観的評価
162 | | モデル               | ファーストターン | セカンドターン | 平均 |
163 | |----------------------|----------|----------|----------|
164 | | Baichuan2-13B-Chat   |   7.05   |   6.47   |   6.76   |
165 | | Qwen-14B-Chat        |   7.30   |   6.62   |   6.96   |
166 | | Llama2-13B-Chat      |   7.10   |   6.20   |   6.65   |
167 | | InternLM-20B-Chat    |   7.03   |   5.93   |   6.48   |
168 | | Orion-14B-Chat   | 7.68 | 7.07 | 7.37 |
169 | \* 推論にはvllmを使用
170 | 
171 | ### 3.2.2. チャットモデルのAlignBenchにおける主観的評価
172 | | モデル              | 数学   | 論理   | 基礎   | 中国語   | コンピュータ  | ライティング | 役割   | プロフェッショナリズム |**平均**|
173 | |--------------------|--------|--------|--------|--------|--------|--------|--------|--------|--------|
174 | | Baichuan2-13B-Chat |  3.76  |  4.07  |  6.22  |  6.05  |  7.11  |  6.97  |  6.75  |  6.43  |  5.25  |
175 | | Qwen-14B-Chat      |**4.91**|**4.71**|**6.90**|  6.36  |  6.74  |   6.64  |  6.59  |  6.56  |**5.72**|
176 | | Llama2-13B-Chat    |  3.05  |  3.79  |  5.43  |  4.40  |  6.76  |  6.63  |  6.99  |  5.65  |  4.70  |
177 | | InternLM-20B-Chat  |  3.39  |  3.92  |  5.96  |  5.50  |**7.18**|  6.19  |  6.49  |  6.22  |  4.96  |
178 | | Orion-14B-Chat |  4.00  |  4.24  |  6.18  |**6.57**|  7.16  |**7.36**|**7.16**|**6.99**|  5.51  |
179 | \* 推論にはvllmを使用
180 | 
181 | ## 3.3. LongChatモデルOrion-14B-LongChatのベンチマーク
182 | ### 3.3.1. LongChatによるLongBenchの評価
183 | | モデル          | NarrativeQA|MultiFieldQA-en|MultiFieldQA-zh| DuReader  | QMSum     | VCSUM     | TREC      | TriviaQA  | LSHT      |RepoBench-P|
184 | |--------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
185 | | GPT-3.5-Turbo-16k        | **23.60** | **52.30** | **61.20** |   28.70   |   23.40   | **16.00** |   68.00   | **91.40** |   29.20   |   53.60   |
186 | | LongChat-v1.5-7B-32k     |   16.90   |   41.40   |   29.10   |   19.50   |   22.70   |    9.90   |   63.50   |   82.30   |   23.20   |   55.30   |
187 | | Vicuna-v1.5-7B-16k       |   19.40   |   38.50   |   43.00   |   19.30   |   22.80   |   15.10   |   71.50   |   86.20   |   28.80   |   43.50   |
188 | | Yi-6B-200K               |   14.11   |   36.74   |   22.68   |   14.01   |   20.44   |    8.08   |   72.00   |   86.61   |   38.00   | **63.29** |
189 | | Orion-14B-LongChat       |   19.47   |   48.11   |   55.84   | **37.02** | **24.87** |   15.44   | **77.00** |   89.12   | **45.50** |   54.31   |
190 | 
191 | 
192 | ## 3.4. Chat RAGモデルベンチマーク
193 | ### 3.4.1. 自己構築RAGテストセットのLLM評価結果
194 | |モデル|応答の有効性(キーワード)|*応答の有効性(主観的評価)|引用の能力|フォールバックの能力|*AutoQA|*データ抽出|
195 | |---------------------|------|------|------|------|------|------|
196 | | Baichuan2-13B-Chat  |  85  |  76  |  1   |  0   |  69  |  51  |
197 | | Qwen-14B-Chat       |  79  |  77  |  75  |  47  |  68  |  72  |
198 | | Qwen-72B-Chat(Int4) |  87  |  89  |  90  |  32  |  67  |  76  |
199 | | GPT-4               |  91  |  94  |  96  |  95  |  75  |  86  |
200 | | Orion-14B-Chat-RAG  |  86  |  87  |  91  |  97  |  73  |  71  |
201 | \* 手動評価を意味します
202 | 
203 | ## 3.5. Chat PluginモデルOrion-14B-Chat-Pluginベンチマーク
204 | ### 3.5.1. 自己構築プラグインテストセットのLLM評価結果
205 | |モデル|フルパラメータの意図認識|パラメータが不足している場合の意図認識|非プラグイン呼び出しの認識|
206 | |-----------------------|--------|-----------|--------|
207 | | Baichuan2-13B-Chat    |   25   |   0       |   0    |
208 | | Qwen-14B-Chat         |   55   |   0       |   50   |
209 | | GPT-4                 | **95** |   52.38   |   70   |
210 | | Orion-14B-Chat-Plugin |  92.5  | **60.32** | **90** |
211 | 
212 | 
213 | ## 3.6. 量子化モデルOrion-14B-Base-Int4ベンチマーク
214 | ### 3.6.1. 量子化前後の比較
215 | | モデル|サイズ(GB) | 推論速度（トークン/秒） |C-Eval|CMMLU|MMLU|RACE|HellaSwag|
216 | |-------------------------|-------|-----|------|------|------|------|------|
217 | | OrionStar-14B-Base      |  28.0 | 135 | 72.8 | 70.6 | 70.0 | 93.3 | 78.5 |
218 | | OrionStar-14B-Base-Int4 |  8.3  | 178 | 71.8 | 69.8 | 69.2 | 93.1 | 78.0 |
219 | 
220 | 
221 | <a name="model-inference"></a><br>
222 | # 4. モデル推論
223 | 
224 | 推論に必要なモデルの重み、ソースコード、および設定は、Hugging Faceに公開されており、ダウンロードリンクはこの文書の冒頭にある表に示されています。ここでは、さまざまな推論方法のデモが行われます。プログラムは自動的にHugging Faceから必要なリソースをダウンロードします。
225 | 
226 | ## 4.1. Pythonコード
227 | 
228 | ```python
229 | import torch
230 | from transformers import AutoModelForCausalLM, AutoTokenizer
231 | from transformers.generation.utils import GenerationConfig
232 | 
233 | tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-14B", use_fast=False, trust_remote_code=True)
234 | model = AutoModelForCausalLM.from_pretrained("OrionStarAI/Orion-14B", device_map="auto",
235 |                                              torch_dtype=torch.bfloat16, trust_remote_code=True)
236 | 
237 | model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-14B")
238 | messages = [{"role": "user", "content": "Hello, what is your name? "}]
239 | response = model.chat(tokenizer, messages, streaming=False)
240 | print(response)
241 | 
242 | ```
243 | 
244 | 上記のPythonソースコードでは、モデルは device_map='auto' でロードされ、利用可能なすべてのGPUを利用されています。デバイスを指定するには、 export CUDA_VISIBLE_DEVICES=0,1 のようなものを使用できます（GPU 0および1を使用）。
245 | 
246 | ## 4.2. コマンドラインツール
247 | 
248 | ```shell
249 | CUDA_VISIBLE_DEVICES=0 python cli_demo.py
250 | ```
251 | 
252 | このコマンドラインツールはチャットシナリオ向けに設計されており、基本モデルの呼び出しをサポートしていません。
253 | 
254 | ## 4.3. 直接スクリプト推論
255 | 
256 | ```shell
257 | 
258 | # ベースモデル
259 | CUDA_VISIBLE_DEVICES=0 python demo/text_generation_base.py --model OrionStarAI/Orion-14B --tokenizer OrionStarAI/Orion-14B --prompt hello
260 | 
261 | # チャットモデル
262 | CUDA_VISIBLE_DEVICES=0 python demo/text_generation.py --model OrionStarAI/Orion-14B-Chat --tokenizer OrionStarAI/Orion-14B-Chat --prompt hi
263 | ```
264 | 
265 | <a name="vllm"></a><br>
266 | ## 4.4. vLLMを使用した推論
267 | 
268 | - プロジェクトのアドレス<br>
269 |   https://github.com/vllm-project/vllm
270 | 
271 | - プルリクエスト<br>
272 |   https://github.com/vllm-project/vllm/pull/2539
273 | 
274 | <a name="llama-cpp"></a><br>
275 | ## 4.5. llama.cppを使用した推論
276 | 
277 | - プロジェクトのアドレス<br>
278 |   https://github.com/ggerganov/llama.cpp
279 | 
280 | - プルリクエスト<br>
281 |   https://github.com/ggerganov/llama.cpp/pull/5118
282 | 
283 | - GGUF形式に変換する方法
284 | 
285 |   ```shell
286 |   python convert-hf-to-gguf.py path/to/Orion-14B-Chat --outfile chat.gguf
287 |   ```
288 | 
289 | - モデル推論方法
290 | 
291 |   ```shell
292 |   ./main --frequency-penalty 0.5 --frequency-penalty 0.5 --top-k 5 --top-p 0.9 -m chat.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e
293 |   ```
294 | 
295 | 
296 | ## 4.6 例の出力
297 | ### 4.6.1 カジュアルチャット
298 | 
299 | `````
300 | User: Hello
301 | Orion-14B: Hello! How can I help you today? If you have any questions or need assistance, feel free to ask.
302 | User: What functions do you have?
303 | Orion-14B: As an AI language model, I have a wide range of functions and capabilities, including but not limited to:
304 |   Answering general knowledge questions: I can provide information on various topics such as history, science, geography, and more.
305 |   Providing definitions and explanations: I can explain complex concepts and provide definitions for words and phrases.
306 |   Assisting with tasks: I can help with simple tasks like setting reminders, converting units of measurement, and even ordering food from restaurants.
307 |   Generating text: I can write short stories, essays, articles, and other types of content based on prompts given to me.
308 |   Translating languages: I can translate text from one language to another.
309 |   Chatting: I can engage in conversation with users and respond to their queries in a conversational manner.
310 |   Summarizing long texts: I can summarize longer texts like articles, books, and research papers.
311 |   Generating random numbers and names: I can generate random numbers within a specified range and also suggest names based on certain criteria.
312 |   Playing games: I can play simple games like Hangman, Trivia, and more.
313 |   Providing recommendations: I can recommend movies, music, books, and other forms of entertainment based on user preferences.
314 |   These are just some of the many functions that I am capable of performing. If you have a specific task in mind, feel free to ask me about it.
315 | User: Tell me a joke.
316 | Orion-14B: Sure, here's a classic one-liner: Why don't scientists trust atoms? Because they make up everything.
317 | `````
318 | 
319 | ### 4.6.2. 日本語と韓国語のチャット
320 | 
321 | `````
322 | User：自己を紹介してください
323 | Orion-14B：こんにちは、私の名前はChatMaxで、OrionStarによって開発されたAIアシスタントです。私はユーザーに正確で、客観的で、有用な回答を提供し、彼らが問題を解決し、情報を取得するのを助けることができます。
324 | User：자기소개를 해주세요.
325 | Orion-14B：안녕하세요, 제 이름은 ChatMax입니다. OrionStar에서 개발한 AI 어시스턴트입니다. 저는 사용자들에게 정확하고 객관적이며 유용한 답변을 제공하여 문제를 해결하고 정보를 얻는 데 도움을 줄 수 있습니다.
326 | `````
327 | 
328 | <a name="declarations-license"></a><br>
329 | # 5. 声明、ライセンス
330 | 
331 | ## 5.1. 声明
332 | 
333 | 弊社は全ての利用者に対して、Orion-14Bモデルを使用して国家の社会安全を危険にさらすか、違法な活動に使用しないよう強く呼びかけます。さらに、Orion-14Bモデルを適切な安全審査と登録なしにインターネットサービスに使用しないよう要求します。
334 | すべての利用者がこの原則を守ることを期待しており、科技の発展が規範と合法の環境で進むことを確認しています。弊社はモデルのトレーニングプロセスで使用されるデータのコンプライアンスを確保するために最善の努力をしています。ただし、モデルとデータの複雑性から、予測できない問題が依然として発生する可能性があります。
335 | したがって、Orion-14Bオープンソースモデルの使用によって引き起こされる問題、データセキュリティの問題、公共の意見のリスク、またはモデルが誤誘導、乱用、拡散、または不適切に使用されることによるリスクや問題について、弊社は一切の責任を負いません。
336 | 
337 | ## 5.2. ライセンス
338 | 
339 | Orion-14B シリーズモデルのコミュニティ利用
340 | - コードは  [Apache License Version 2.0](./LICENSE) ライセンスに従ってください。<br>
341 | - モデルは  [【Orion-14B シリーズ】 Models Community License Agreement](./ModelsCommunityLicenseAgreement)に従ってください。
342 | 
343 | 
344 | <a name="company-introduction"></a><br>
345 | # 6. 会社紹介
346 | 
347 | オリオンスター（OrionStar）は、2016年9月に設立された、世界をリードするサービスロボットソリューション企業です。オリオンスターは人工知能技術を基に、次世代の革新的なロボットを開発し、人々が単純な体力労働から解放され、仕事や生活がよりスマートで面白くなるようにすることを目指しています。技術を通じて社会と世界をより良くすることを目指しています。
348 | 
349 | オリオンスターは、完全に独自に開発された全体的な人工知能技術を持っており、音声対話や視覚ナビゲーションなどが含まれます。製品開発能力と技術応用能力を統合しています。オリオンメカニカルアームプラットフォームを基に、オリオンスター 、AI Robot Greeting Mini、Lucki、Coffee Masterなどの製品を展開し、オリオンスターロボットのオープンプラットフォームであるオリオンOSも構築しています。本当に有用なロボットのために生まれたという理念に基づき、オリオンスターはAI技術を通じて多くの人々に力を与えています。
350 | 
351 | 7年間のAI経験を基に、オリオンスターは「聚言」という大規模な深層学習アプリケーションを導入し、業界の顧客向けにカスタマイズされたAI大規模モデルのコンサルティングとサービスソリューションを提供しています。これにより、企業の経営効率を向上させる目標を達成するのに役立っています。
352 | 
353 | オリオンスターの大規模モデルアプリケーション能力の主要な利点には、海量データ処理、大規模モデルの事前トレーニング、二次事前トレーニング、ファインチューニング、プロンプトエンジニアリング、エージェント開発など、全体のチェーンにわたる能力と経験の蓄積が含まれます。 さらに、システム全体のデータ処理フローと数百のGPUによる並列モデルトレーニング能力を含む、エンドツーエンドのモデルトレーニング能力を持っています。これらの能力は、大規模政府、クラウドサービス、国際展開の電子商取引、消費財など、さまざまな産業のシーンで実現されています。
354 | 
355 | 大規模モデルアプリケーションの展開に関するニーズがある企業は、お気軽にお問い合わせください。<br>
356 | **Tel: 400-898-7779**<br>
357 | **E-mail: ai@orionstar.com**<br>
358 | **Discord コミュニティ リンク: https://discord.gg/zumjDWgdAs**
359 | 
360 | <div align="center">
361 |   <img src="./assets/imgs/wechat_group.jpg" alt="wechat" width="40%" />
362 | </div>
363 | ```
364 | 


--------------------------------------------------------------------------------
/README_ko.md:
--------------------------------------------------------------------------------
  1 | <!-- markdownlint-disable first-line-h1 -->
  2 | <!-- markdownlint-disable html -->
  3 | <div align="center">
  4 |   <img src="./assets/imgs/orion_start.PNG" alt="logo" width="30%" />
  5 | </div>
  6 | 
  7 | <div align="center">
  8 | <h1>
  9 |   Orion-14B
 10 | </h1>
 11 | </div>
 12 | 
 13 | <div align="center">
 14 | 
 15 | <div align="center">
 16 |      <b>🇰🇷한국어</b> | <a href="./README.md">🌐英語</a> | <a href="./README_zh.md">🇨🇳中文</a> | <a href="./README_ja.md">🇯🇵日本語</a>
 17 | </div>
 18 | 
 19 | <h4 align="center">
 20 |     <p>
 21 |         🤗 <a href="https://huggingface.co/OrionStarAI" target="_blank">HuggingFace홈페이지</a> | 🤖 <a href="https://modelscope.cn/organization/OrionStarAI" target="_blank">ModelScope홈페이지</a><br>🎬 <a href="https://huggingface.co/spaces/OrionStarAI/Orion-14B-App-Demo" target="_blank">HuggingFace온라인 시용</a> | 🎫 <a href="https://modelscope.cn/studios/OrionStarAI/Orion-14B-App-Demo/summary" target="_blank">ModelScope在线试用</a><br>😺 <a href="https://github.com/OrionStarAI/Orion" target="_blank">GitHub</a><br>📖 <a href="https://github.com/OrionStarAI/Orion/blob/master/doc/Orion14B_v3.pdf" target="_blank">기술 리포트</a>
 22 |     <p>
 23 | </h4>
 24 | 
 25 | </div>
 26 | 
 27 | 
 28 | 
 29 | # 목록
 30 | 
 31 | - [📖 모형 소개](#model-introduction)
 32 | - [🔗 다운로드 경로](#model-download)
 33 | - [🔖 평가결과](#model-benchmark)
 34 | - [📊 모형 추리](#model-inference) [<img src="./assets/imgs/vllm.png" alt="vllm" style="margin: 0;display: initial;" height="20" />](#vllm) [<img src="./assets/imgs/llama_cpp.png" alt="llamacpp" style="margin: 0;display: initial;" height="20" />](#llama-cpp)
 35 | - [📜 성명 합의](#declarations-license)
 36 | - [🥇 기업 소개](#company-introduction)
 37 | 
 38 | 
 39 | <a name="model-introduction"></a><br>
 40 | # 1. 모델소게
 41 | 
 42 | 
 43 | -Orion-14B-Base는 2.5조 토큰의 다양한 데이터 집합으로 훈련된 140억 개의 파라메터를 가진 다중 언어 모델이다. 중국어, 영어, 일본어, 한국어 및 기타 언어를 포함한다.다중 언어 환경에서 일련의 업무에서 탁월한 성능을 보인다. Orion-14B 시리즈의 모델들은 주요 공개 기준 측정에서 우수한 성적을 거두었으며 여러가지 지표가 동일한 파라메터를 가진 다른 모델들을 현저히 초월한다. 구체적인 기술 디테일은 [기술보고서]를 참고하세요.
 44 | (https://github.com/OrionStarAI/Orion/blob/master/doc/Orion14B_v3.pdf)。
 45 | 
 46 | - Orion-14B시리즈 대형 모델은 다음과 같은 특징이 있다.
 47 |   - 베이스20B 파라메터 레벨인 대형 모델의 종합적인 평가 결과가 우수하다
 48 |   - 다국어 능력이 뛰어나고 일본어와 한국어 테스트 세트에서 현저히 앞선다
 49 |   - 미세조정 모델은 적응성이 강하며 인위 표시의 블라인드 테스트에서 활약이 두드러진다
 50 |   - 긴 컨텍스트 버전은 최대 320k까지 지원하는 200k 토큰에 뛰어난 긴 텍스트를 지지한다
 51 |   - 정량화 버전 모델 크기를 70% 줄이고 추론 속도를 30% 높이며 성능 손실을 1% 미만하다
 52 |  <table style="border-collapse: collapse; width: 100%;">
 53 |    <tr>
 54 |      <td style="border: none; padding: 10px; box-sizing: border-box;">
 55 |        <img src="./assets/imgs/opencompass_en.png" alt="opencompass" style="width: 100%; height: auto;">
 56 |      </td>
 57 |      <td style="border: none; padding: 10px; box-sizing: border-box;">
 58 |        <img src="./assets/imgs/model_cap_en.png" alt="modelcap" style="width: 100%; height: auto;">
 59 |      </td>
 60 |    </tr>
 61 |  </table>
 62 | 
 63 | - 구체적으로 말하면 Orion-14B시리즈 대형 언어 모델은 다음과 같은 내용을 포함한다:
 64 |   - **Orion-14B-Base:** 2.5억 토켄스 다양화 데이터 세트를 기반으로 한 140억 파라메터 규모의 다언어 기반 모델.
 65 |   - **Orion-14B-Chat:** 고퀄리티 코퍼스 미세조정을 기반으로 한 대화형 모델. 대형 모델 커뮤니티를 위해 더 나은 사용자 인터랙션 경험을 제공하도록 한다.
 66 |   - **Orion-14B-LongChat:** 200k 토큰 길이에 효과적이며 최대 320k까지 지원하며 긴 텍스트 평가 세트에서 독점 모델과 비교할 수 있다.
 67 |   - **Orion-14B-Chat-RAG:** 맞춰 제정된 검색 향상 생성 데이터 세트에서 미세조정하여 검색 향상 생성 작업에서 뛰어난 성능을 제공한 채팅 모델.
 68 |   - **Orion-14B-Chat-Plugin:** 플러그인 및 함수 전용 작업에 맞춰 제정된 채팅 모델. 에이전트와 관련된 상황에 아주 잘 적용되어 대형 언어 모델이 플러그인 및 함수 전용 시스템의 역할을 한다.
 69 |   - **Orion-14B-Base-Int4:** int4로 계량화하는 베이스 모델. 모델 크기를 70%를 줄이며 추리 속도를 30% 높여 1%의 최소한의 성능 손실만 가져왔다.
 70 |   - **Orion-14B-Chat-Int4:** int4로 계량화하는 대화 모델.
 71 | 
 72 | 
 73 | <a name="model-download"></a><br>
 74 | # 2. 다운로드 경로
 75 | 
 76 | 발표된 모델 및 다운로드 링크는 다음 표를 참조하세요:
 77 | 
 78 | | 모델 명칭              | HuggingFace다운로드 링크                                                              | ModelScope다운로드 링크                                                                              |
 79 | |---------------------|-----------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------|
 80 | | ⚾ 베이스 모델           | [Orion-14B-Base](https://huggingface.co/OrionStarAI/Orion-14B-Base)               | [Orion-14B-Base](https://modelscope.cn/models/OrionStarAI/Orion-14B-Base/summary)              |
 81 | | 😛 대화 모델           | [Orion-14B-Chat](https://huggingface.co/OrionStarAI/Orion-14B-Chat)               | [Orion-14B-Chat](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat/summary)              |
 82 | | 📃 긴 컨텍스트 모델        | [Orion-14B-LongChat](https://huggingface.co/OrionStarAI/Orion-14B-LongChat)       | [Orion-14B-LongChat](https://modelscope.cn/models/OrionStarAI/Orion-14B-LongChat/summary)      |
 83 | | 🔎 검색 향상 모델        | [Orion-14B-Chat-RAG](https://huggingface.co/OrionStarAI/Orion-14B-Chat-RAG)       | [Orion-14B-Chat-RAG](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-RAG/summary)      |
 84 | | 🔌 플러그인 모델         | [Orion-14B-Chat-Plugin](https://huggingface.co/OrionStarAI/Orion-14B-Chat-Plugin) | [Orion-14B-Chat-Plugin](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-Plugin/summary)|
 85 | | 💼 베이스Int4계량화 모델    | [Orion-14B-Base-Int4](https://huggingface.co/OrionStarAI/Orion-14B-Base-Int4)     | [Orion-14B-Base-Int4](https://modelscope.cn/models/OrionStarAI/Orion-14B-Base-Int4/summary)    |
 86 | | 📦 대화Int4계량화 모델    | [Orion-14B-Chat-Int4](https://huggingface.co/OrionStarAI/Orion-14B-Chat-Int4)     | [Orion-14B-Chat-Int4](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-Int4/summary)    |
 87 | 
 88 | 
 89 | <a name="model-benchmark"></a><br>
 90 | # 3. 평가 결과
 91 | 
 92 | ## 3.1. 베이스 모델Orion-14B-Base평가
 93 | 
 94 | ### 3.1.1. 전문 지식 및 시험문제 평가 결과
 95 | | 모델 명칭            | C-Eval   | CMMLU    | MMLU     | AGIEval  | Gaokao   | BBH      |
 96 | |--------------------|----------|----------|----------|----------|----------|----------|
 97 | | LLaMA2-13B         |   41.4   |   38.4   |   55.0   |   30.9   |   18.2   |   45.6   |
 98 | | Skywork-13B        |   59.1   |   61.4   |   62.7   |   43.6   |   56.1   |   48.3   |
 99 | | Baichuan2-13B      |   59.0   |   61.3   |   59.5   |   37.4   |   45.6   |   49.0   |
100 | | QWEN-14B           |   71.7   |   70.2   |   67.9   |   51.9   | **62.5** |   53.7   |
101 | | InternLM-20B       |   58.8   |   59.0   |   62.1   |   44.6   |   45.5   |   52.5   |
102 | | **Orion-14B-Base** | **72.9** | **70.6** | **69.9** | **54.7** |   62.1   | **56.5** |
103 | 
104 | ### 3.1.2. 이해 및 통식 평가 결과
105 | | 모델 명칭            |RACE-middle|RACE-high| HellaSwag| PIQA     | Lambada  | WSC      |
106 | |--------------------|----------|----------|----------|----------|----------|----------|
107 | | LLaMA 2-13B        |   63.0   |   58.9   |   77.5   |   79.8   |   76.5   |   66.3   |
108 | | Skywork-13B        |   87.6   |   84.1   |   73.7   |   78.3   |   71.8   |   66.3   |
109 | | Baichuan 2-13B     |   68.9   |   67.2   |   70.8   |   78.1   |   74.1   |   66.3   |
110 | | QWEN-14B           |   93.0   |   90.3   | **80.2** |   79.8   |   71.4   |   66.3   |
111 | | InternLM-20B       |   86.4   |   83.3   |   78.1   | **80.3** |   71.8   |   68.3   |
112 | | **Orion-14B-Base** | **93.2** | **91.3** |   78.5   |   79.5   | **78.8** | **70.2** |
113 | 
114 | ### 3.1.3. OpenCompass평가 세트 평가 결과
115 | | 모델 명칭 | Average | Examination | Language | Knowledge | Understanding | Reasoning |
116 | |------------------|----------|----------|----------|----------|----------|----------|
117 | | LLaMA 2-13B      |   47.3   |   45.2   |   47.0   |   58.3   |   50.9   |   43.6   |
118 | | Skywork-13B      |   53.6   |   61.1   |   51.3   |   52.7   |   64.5   |   45.2   |
119 | | Baichuan 2-13B   |   49.4   |   51.8   |   47.5   |   48.9   |   58.1   |   44.2   |
120 | | QWEN-14B         |   62.4   |   71.3   |   52.67  |   56.1   |   68.8   |   60.1   |
121 | | InternLM-20B     |   59.4   |   62.5   |   55.0   | **60.1** |   67.3   |   54.9   |
122 | |**Orion-14B-Base**| **64.3** | **71.4** | **55.0** |   60.0   | **71.9** | **61.6** |
123 | 
124 | ### 3.1.4. 일본어 테스트 세트 평가 결과
125 | |   모델 명칭         |**Average**|  JCQA    |  JNLI    |  MARC    |  JSQD   |  JQK     |  XLS     |  XWN     |  MGSM    |
126 | |--------------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|
127 | | PLaMo-13B          |   52.3   |   56.7   |   42.8   |   95.8   |   70.6   |   71.0   |   8.70   |   70.5   |   2.40   |
128 | | WebLab-10B         |   50.7   |   66.6   |   53.7   |   82.1   |   62.9   |   56.2   |   10.0   |   72.0   |   2.40   |
129 | | ELYZA-jp-7B        |   48.8   |   71.7   |   25.3   |   86.6   |   70.8   |   64.1   |   2.50   |   62.1   |   7.20   |
130 | | StableLM-jp-7B     |   51.1   |   33.4   |   43.3   | **96.7** |   70.6   |   78.1   |   10.7   |   72.8   |   2.80   |
131 | | LLaMA 2-13B        |   46.3   |   75.0   |   47.6   |   38.8   |   76.1   |   67.7   |   18.1   |   63.2   |   10.4   |
132 | | Baichuan 2-13B     |   57.1   |   73.7   |   31.3   |   91.6   |   80.5   |   63.3   |   18.6   |   72.2   |   25.2   |
133 | | QWEN-14B           |   65.8   |   85.9   |   60.7   |   97.0   |   83.3   |   71.8   |   18.8   |   70.6   |   38.0   |
134 | | Yi-34B             |   67.1   |   83.8   |   61.2   |   95.2   | **86.1** |   78.5   | **27.2** |   69.2   |   35.2   |
135 | | **Orion-14B-Base** | **69.1** | **88.2** | **75.8** |   94.1   |   75.7   | **85.1** |   17.3   | **78.8** | **38.0** |
136 | 
137 | ### 3.1.5. 한국어 테스트 세트n-shot평가 결과
138 | | 모델 명칭  | **Average**<br>n=0&nbsp;&nbsp;n=5 | HellaSwag<br>n=0&nbsp;&nbsp;n=5 | COPA<br> n=0&nbsp;&nbsp;n=5 | BooIQ<br>n=0&nbsp;&nbsp;n=5 | SentiNeg<br>n=0&nbsp;&nbsp;n=5|
139 | |------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|
140 | | KoGPT            |  53.0   &nbsp;&nbsp;   70.1  |  55.9   &nbsp;&nbsp;   58.3  |  73.5   &nbsp;&nbsp;   72.9  |  45.1   &nbsp;&nbsp;   59.8  |  37.5   &nbsp;&nbsp;   89.4  |
141 | | Polyglot-ko-13B  |  69.6   &nbsp;&nbsp;   73.7  |**59.5** &nbsp;&nbsp; **63.1**|**79.4** &nbsp;&nbsp; **81.1**|  48.2   &nbsp;&nbsp;   60.4  |  91.2   &nbsp;&nbsp;   90.2  |
142 | | LLaMA 2-13B      |  46.7   &nbsp;&nbsp;   63.7  |  41.3   &nbsp;&nbsp;   44.0  |  59.3   &nbsp;&nbsp;   63.8  |  34.9   &nbsp;&nbsp;   73.8  |  51.5   &nbsp;&nbsp;   73.4  |
143 | | Baichuan 2-13B   |  52.1   &nbsp;&nbsp;   58.7  |  39.2   &nbsp;&nbsp;   39.6  |  60.6   &nbsp;&nbsp;   60.6  |  58.4   &nbsp;&nbsp;   61.5  |  50.3   &nbsp;&nbsp;   72.9  |
144 | | QWEN-14B         |  53.8   &nbsp;&nbsp;   73.7  |  45.3   &nbsp;&nbsp;   46.8  |  64.9   &nbsp;&nbsp;   68.9  |  33.4   &nbsp;&nbsp;   83.5  |  71.5   &nbsp;&nbsp;   95.7  |
145 | | Yi-34B           |  54.2   &nbsp;&nbsp;   72.1  |  44.6   &nbsp;&nbsp;   44.7  |  58.0   &nbsp;&nbsp;   60.6  |  65.9   &nbsp;&nbsp;   90.2  |  48.3   &nbsp;&nbsp;   92.9  |
146 | |**Orion-14B-Base**|**74.5** &nbsp;&nbsp; **79.6**|  47.0   &nbsp;&nbsp;   49.6  |  77.7   &nbsp;&nbsp;   79.4  |**81.6** &nbsp;&nbsp; **90.7**|**92.4** &nbsp;&nbsp; **98.7**|
147 | 
148 | ### 3.1.6. 다국어 평가 결과
149 | | 모델 명칭            | Train Lang | Japanese | Korean   | Chinese  |  English |
150 | |--------------------|------------|----------|----------|----------|----------|
151 | | PLaMo-13B          |  En,Jp     |   52.3   |   *      |   *      |   *      |
152 | | Weblab-10B         |  En,Jp     |   50.7   |   *      |   *      |   *      |
153 | | ELYZA-jp-7B        |  En,Jp     |   48.8   |   *      |   *      |   *      |
154 | | StableLM-jp-7B     |  En,Jp     |   51.1   |   *      |   *      |   *      |
155 | | KoGPT-6B           |  En,Ko     |   *      |   70.1   |   *      |   *      |
156 | | Polyglot-ko-13B    |  En,Ko     |   *      |   70.7   |   *      |   *      |
157 | | Baichuan2-13B      |  Multi     |   57.1   |   58.7   |   50.8   |   57.1   |
158 | | Qwen-14B           |  Multi     |   65.8   |   73.7   |   64.5   |   65.4   |
159 | | Llama2-13B         |  Multi     |   46.3   |   63.7   |   41.4   |   55.3   |
160 | | Yi-34B             |  Multi     |   67.1   |   72.2   |   58.7   | **68.8** |
161 | | **Orion-14B-Base** |  Multi     | **69.1** | **79.5** | **67.9** |   67.3   |
162 | 
163 | ## 3.2. 대화 모델Orion-14B-Chat평가
164 | ### 3.2.1. 대화 모델MTBench주관적 평가
165 | | 모델 명칭              |   1라운드  |  2라운드   |  **평균** |
166 | |----------------------|----------|----------|----------|
167 | | Baichuan2-13B-Chat   |   7.05   |   6.47   |   6.76   |
168 | | Qwen-14B-Chat        |   7.30   |   6.62   |   6.96   |
169 | | Llama2-13B-Chat      |   7.10   |   6.20   |   6.65   |
170 | | InternLM-20B-Chat    |   7.03   |   5.93   |   6.48   |
171 | | **Orion-14B-Chat**   | **7.68** | **7.07** | **7.37** |
172 | 
173 | \*이 평가는 vllm을 이용하여 추리한다
174 | 
175 | ### 3.2.2. 대화 모델AlignBench주관적 평가
176 | | 모델 명칭             | 수학 능력  | 논리적 추리  | 기본 능력   | 중국어 이해  | 종합적 문답  | 글쓰기 능력 | 롤 플레이  | 전문 지식 | **평균**  |
177 | |--------------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|
178 | | Baichuan2-13B-Chat |   3.76   |   4.07   |   6.22   |   6.05   |   7.11   |   6.97   |   6.75   |   6.43   |   5.25   |
179 | | Qwen-14B-Chat      | **4.91** | **4.71** | **6.90** |   6.36   |   6.74   |   6.64   |   6.59   |   6.56   | **5.72** |
180 | | Llama2-13B-Chat    |   3.05   |   3.79   |   5.43   |   4.40   |   6.76   |   6.63   |   6.99   |   5.65   |   4.70   |
181 | | InternLM-20B-Chat  |   3.39   |   3.92   |   5.96   |   5.50   | **7.18** |   6.19   |   6.49   |   6.22   |   4.96   |
182 | | **Orion-14B-Chat** |   4.00   |   4.24   |   6.18   | **6.57** |   7.16   | **7.36** | **7.16** | **6.99** |   5.51   |
183 | 
184 | \*이 평가는 vllm을 이용하여 추리한다
185 | 
186 | ## 3.3. 긴 컨텍스트 모델Orion-14B-LongChat평가
187 | ### 3.3.1. 긴 컨텍스트 모델LongBench평가
188 | | 모델 명칭              | NarrativeQA| MultiFieldQA-en| MultiFieldQA-zh | DuReader  | QMSum     | VCSUM  | TREC   | TriviaQA | LSHT   | RepoBench-P |
189 | |--------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
190 | | GPT-3.5-Turbo-16k        | **23.60** | **52.30** | **61.20** |   28.70   |   23.40   | **16.00** |   68.00   | **91.40** |   29.20   |   53.60   |
191 | | LongChat-v1.5-7B-32k     |   16.90   |   41.40   |   29.10   |   19.50   |   22.70   |    9.90   |   63.50   |   82.30   |   23.20   |   55.30   |
192 | | Vicuna-v1.5-7B-16k       |   19.40   |   38.50   |   43.00   |   19.30   |   22.80   |   15.10   |   71.50   |   86.20   |   28.80   |   43.50   |
193 | | Yi-6B-200K               |   14.11   |   36.74   |   22.68   |   14.01   |   20.44   |    8.08   |   72.00   |   86.61   |   38.00   | **63.29** |
194 | | Orion-14B-LongChat       |   19.47   |   48.11   |   55.84   | **37.02** | **24.87** |   15.44   | **77.00** |   89.12   | **45.50** |   54.31   |
195 | 
196 | ## 3.4. 검색 향상 모델Orion-14B-Chat-RAG평가
197 | ### 3.4.1. 자기 만든 검색 향상 테스트 세트 평가 결과
198 | |모델 명칭|응답 효과(키워드)|*응답 효과(주관적 점수)|인용 능력|기본 떠맡는 능력|*AutoQA|*데이터 추출|
199 | |---------------------|------|------|------|------|------|------|
200 | | Baichuan2-13B-Chat  |  85  |  76  |  1   |  0   |  69  |  51  |
201 | | Qwen-14B-Chat       |  79  |  77  |  75  |  47  |  68  |  72  |
202 | | Qwen-72B-Chat(Int4) |  87  |  89  |  90  |  32  |  67  |  76  |
203 | | GPT-4               |  91  |  94  |  96  |  95  |  75  |  86  |
204 | | Orion-14B-Chat-RAG  |  86  |  87  |  91  |  97  |  73  |  71  |
205 |  \* 사람 평가 결과를 가리킨다
206 | 
207 | ## 3.5. 플러그인 모델Orion-14B-Chat-Plugin평가
208 | ### 3.5.1.  자기 만든플러그인 테스트 세트 평가 결과
209 | | 모델 명칭  | 풀 파라메터 의도 식별 | 불완전 파라메터 의도 식별 | 비 플러그인 전용 식별 |
210 | |-----------------------|--------|-----------|--------|
211 | | Baichuan2-13B-Chat    |   25   |   0       |   0    |
212 | | Qwen-14B-Chat         |   55   |   0       |   50   |
213 | | GPT-4                 | **95** |   52.38   |   70   |
214 | | Orion-14B-Chat-Plugin |   92.5 | **60.32** | **90** |
215 | 
216 | ## 3.6. 계량화 모델Orion-14B-Base-Int4평가
217 | ### 3.6.1. 계량화 전후 전반적인 비교
218 | |모델 명칭|모델 크기(GB)|추리 속도(토큰 수/초)|C-Eval |CMMLU |MMLU |RACE | HellaSwag|
219 | |-------------------------|------|-----|------|------|------|------|------|
220 | | OrionStar-14B-Base      | 28.0 | 135 | 72.8 | 70.6 | 70.0 | 93.3 | 78.5 |
221 | | OrionStar-14B-Base-Int4 |  8.3 | 178 | 71.8 | 69.8 | 69.2 | 93.1 | 78.0 |
222 | 
223 | 
224 | <a name="model-inference"></a><br>
225 | # 4. 모델 추리
226 | 
227 | 추리에 필요한 모델 가중치, 소스 코드, 배치는 Hugging Face에 게시되어 다운로드 링크는 이 파일 맨 처음에 있는 표를 참조하세요. 저희는 여기서 다양한 추리 방식을 보여 주고 프로그램은 Hugging Face로부터 필요한 자료를 자동으로 다운로드 할 것이다.
228 | 
229 | ## 4.1. Python 코드 방식
230 | 
231 | ```python
232 | import torch
233 | from transformers import AutoModelForCausalLM, AutoTokenizer
234 | from transformers.generation.utils import GenerationConfig
235 | 
236 | tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-14B", use_fast=False, trust_remote_code=True)
237 | model = AutoModelForCausalLM.from_pretrained("OrionStarAI/Orion-14B", device_map="auto",
238 |                                              torch_dtype=torch.bfloat16, trust_remote_code=True)
239 | 
240 | model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-14B")
241 | messages = [{"role": "user", "content": "안녕! 이름이 뭐예요!"}]
242 | response = model.chat(tokenizer, messages, streaming=Flase)
243 | print(response)
244 | 
245 | ```
246 | 
247 | 위의 두 코드에서 모델은 지정된 `device_map='auto'`로딩하면 모든 사용할 수 있는 그래픽 카드를 사용할 것이다. 사용할 장치를 지정하려면 `export CUDA_VISIBLE_DEVICES=0,1`(그래픽 카드 0과 1을 사용)과 같은 방식으로 제어할 수 있다.
248 | 
249 | ## 4.2. 명령줄 툴 방식
250 | 
251 | ```shell
252 | CUDA_VISIBLE_DEVICES=0 python cli_demo.py
253 | ```
254 | 
255 | 이 명령줄 툴은 Chat 시나리오를 위해 설계되었으므로 이 툴로 베이스 모델을 전용하는 것 지원하지 않는다.
256 | 
257 | ## 4.3. 스크립트 직접 추리
258 | 
259 | ```shell
260 | # base model
261 | CUDA_VISIBLE_DEVICES=0 python demo/text_generation_base.py --model OrionStarAI/Orion-14B --tokenizer OrionStarAI/Orion-14B --prompt 안녕. 이름이 뭐예요
262 | 
263 | # chat model
264 | CUDA_VISIBLE_DEVICES=0 python demo/text_generation.py --model OrionStarAI/Orion-14B-Chat --tokenizer OrionStarAI/Orion-14B-Chat --prompt 안녕. 이름이 뭐예요
265 | 
266 | ```
267 | 
268 | <a name="vllm"></a><br>
269 | ## 4.4. vLLM 추론을 통해
270 | 
271 | - 프로젝트 주소<br>
272 |   https://github.com/vllm-project/vllm
273 | 
274 | - 풀 리퀘스트<br>
275 |   https://github.com/vllm-project/vllm/pull/2539
276 | 
277 | 
278 | <a name="llama-cpp"></a><br>
279 | ## 4.5. llama.cpp 추론을 통해
280 | 
281 | - 프로젝트 주소<br>
282 |   https://github.com/ggerganov/llama.cpp
283 | 
284 | - 풀 리퀘스트<br>
285 |   https://github.com/ggerganov/llama.cpp/pull/5118
286 | 
287 | - GGUF 형식으로 변환하는 방법
288 | 
289 |   ```shell
290 |   python convert-hf-to-gguf.py path/to/Orion-14B-Chat --outfile chat.gguf
291 |   ```
292 | 
293 | - 모델 추론 방법
294 | 
295 |   ```shell
296 |   ./main --frequency-penalty 0.5 --frequency-penalty 0.5 --top-k 5 --top-p 0.9 -m chat.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e
297 |   ```
298 | 
299 | 
300 | ## 4.6. 예시 노출
301 | 
302 | ### 4.6.1. 잡담
303 | 
304 | `````
305 | 사용자：안녕,이름이 뭐예요
306 | Orion-14B：안녕! 저는 인공지능 조수 입니다. 정해진 이름은 없고 마음대로 불러도 됩니다. 도와줄 수 있는 거 있나요?
307 | 사용자：이야기 하나 나눠줘
308 | Orion-14B：예전에 잭이라는 어린 소년이 있었다. 그는 작은 마을에 살았다. 잭은 모험과 탐구를 매우 좋아해서 항상 신기한 것을 발견하는 꿈을 꾼다.
309 | 
310 | 어느날 잭은 근처의 숲 깊은 곳에 전설속의 마법성이 있어 그 안에 사악한 거인이 산다는 소식을 든다. 그 거인의 이름은 그림이었다. 그는 성 안에 갇힌 채 떠날 수 없었다.
311 | 
312 | 잭은 그림이 그곳에 갇혀있는 이유가 꼭 있을 거라고 믿기 때문에 거인을 구하러 가기로 결심한다. 그리하여 그는 용기와 결심을 가져 마법의 성으로 가는 여정을 시작한다.
313 | 
314 | 오랜 고생 끝에 잭은 마침내 성보 대문 앞에 다다랐다. 그는 조심스럽게 대문을 열고 신비한 기운이 가득한 방으로 들어갔다.
315 | 
316 | 방안에 어두운 기운이 가득했지만 잭은 놀라지 않았다. 그는 용감하게 성 안으로 들어가 마침내 그림을 찾아냈다.
317 | 
318 | 그림은 자신은 사악한 마녀의 저주를 받아서 저주를 풀어야만 자유로워질 수 있다고 잭에게 말한다. 잭은 그림을 돕기로 결정하여 단서와 주문을 풀 방법을 찾기 시작한다.
319 | 
320 | 한바탕의 노력 끝에 잭은 드디어 마녀의 비밀 실험실로 가는 길을 찾아낸다. 그는 그곳에서 마녀가 남긴 마법서를 발견했는데 그 책에서 주문을 푸는 방법이 적혀 있었다.
321 | 
322 | 잭은 책에 쓰인 절차를 따라하여 그림의 저주를 성공적으로 풀어냈다. 거인은 너무 감사해서 잭의 소원이 이루어지도록 돕겠다고 약속했다.
323 | 
324 | 이때로부터 잭과 그림은 좋은 친구가 되어 많은 위험하고 짜릿한 모험을 함께 겪었으며 그들의 용기와 지혜로 각종 어려움을 이겨냈다.
325 | 
326 | 이 이야기는 저희에게 용기와 결심이 있다면 모든 어려움을 극복하고 자신의 꿈을 이룰 수 있다는 것을 알려준다.
327 | `````
328 | 
329 | ### 4.6.2. 한일
330 | 
331 | `````
332 | 用户：自己を紹介してください
333 | Orion-14B：こんにちは、私の名前はChatMaxで、OrionStarによって開発されたAIアシスタントです。私はユーザーに正確で、客観的で、有用な回答を提供し、彼らが問題を解決し、情報を取得するのを助けることができます。
334 | 用户：자기소개를 해주세요.
335 | Orion-14B：안녕하세요, 제 명칭은 ChatMax입니다. OrionStar에서 개발한 AI 어시스턴트입니다. 저는 사용자들에게 정확하고 객관적이며 유용한 답변을 제공하여 문제를 해결하고 정보를 얻는 데 도움을 줄 수 있습니다.
336 | `````
337 | 
338 | 
339 | <a name="declarations-license"></a><br>
340 | # 5. 성명, 협의
341 | 
342 | ## 5.1. 성명
343 | 
344 | 저희는 모든 사용자들에게 Orion-14B모델을 이용하여 국가 사회 안전에 해치거나 불법적인 행위를 하는 거 하지 않도록 강력히 호소한다. 또한, 저희는 사용자들에게 Orion-14B 모델을 적절한 보안 검토를 하지 않거나 문서화되지 않은 인터넷 서비스로 이용하지 말라는 것을 요청한다.
345 | 
346 | 저희는 모든 사용자가 이 원칙을 지키며 기술의 발전이 규범적이고 합법적인 환경에서 이루어질 수 있기를 바란다.
347 | 저희는 이미 최선을 다해 모델 훈련 과정에서 사용된 데이터의 준칙성을 확보하도록 하였다. 그러나 막대한 노력을 기울였음에도 불구하고 모델과 데이터의 복잡성으로 말미암아 일부 예견할 수 없을 문제들이 여전히 존재할 수 있다. 따라서 Orion-14B 오픈소스 모델의 사용으로 야기된 문제, 데이터 보안 문제와 공론 위험이나 모델의 오도, 남용, 전파, 또한 불적당한 사용 등으로 가져온 위험과 문제에 대해 저희는 책임을 지지 않겠다.
348 | 
349 | ## 5.2. 협의
350 | 
351 | 커뮤니티 사용Orion-14B시리즈 모델
352 | - 코드는 [Apache License Version 2.0](./LICENSE)<br>따르세요
353 | - 모델은 [Orion-14B시리즈 모델 커뮤니티 허가 협의](./ModelsCommunityLicenseAgreement)따르세요
354 | 
355 | 
356 | <a name="company-introduction"></a><br>
357 | # 6. 회사소개
358 | 
359 | 오리온 스타（OrionStar）는 2016년 9월 설립된 세계 최고의 서비스 로봇 솔루션 회사이다. 오리온 스타는 인공지능 기술을 바탕으로 차세대 혁명적 로봇 만들어 사람들이 반복되는 육체노동에서 벗어나 일과 생활을 더욱 지능적이고 재미있게 만들고 기술을 통해 사회와 세계를 더욱 아름답게 만든 것에 힘을 기울인다.
360 | 
361 | 오리온 스타는 음성 인터렉션과 시각 네비게이션 등 완전히 독자적으로 개발한 풀 체인 인공지능 기술을 가지고 있다. 저희는 프로덕트 개발 능력과 기술 응용 능력을 통합하였다. 오리온 로봇 팔 플랫폼을 기반으로 ORIONSTAR AI Robot Greeting, AI Robot Greeting Mini, Lucki, CoffeeMaster 등의 프로덕트 출시하였으며 오리온 로봇의 오픈 플랫폼인 OrionOS를 설립하였다. **진짜 유용한 로봇을 위해 태어나라**의 이념을 위한 실천하여 AI기술을 통해 더 많은 사람들에게 능력을 부여한다.
362 | 
363 | 7년의 AI경험 누적을 바탕으로 오리온 스타는 대형 모델 심층 응용"쥐언(Chatmax)"을 출시했고 업계 고객에게 맞춤형 AI대형 모델 컨설팅과 서비스 솔루션을 지속적으로 제공하여 진정으로 기업 경영 효율이 동종 업계에 앞서는 목표를 달성할 수 있도록 고객들에게 돕고 있다.
364 | 
365 | **오리온 스타는 풀 체인 대형 모델 응용능력이란 핵심적 우세를 갖고 있다**, 대량 데이터 처리, 대형 모델 사전 훈련, 2차 사전 훈련, 미세 조정(Fine-tune), PromptEngineering, Agent등에서 개발된 풀 체인 능력과 경험 누적을 가지는 거 포함한다. 체계화된 데이터 처리 절차와 수백 개의 GPU의 병렬 모델 훈련 능력을 포함한 완정한 엔드투엔드 모델 훈련 능력을 가지고 있으며 현재 대형 정무, 클라우드 서비스, 출해 전자상거래, 쾌속소비품 등 여러 업계에서 구현되었다.
366 | 
367 | ***대형 모델 응용 구현 필요가 있으신 회사께서 저희와 연락하는 것을 환영한다***<br>
368 | **문의 전화:** 400-898-7779<br>
369 | **이메일:** ai@orionstar.com<br>
370 | **Discord 커뮤니티 링크:** https://discord.gg/zumjDWgdAs
371 | 
372 | <div align="center">
373 |   <img src="./assets/imgs/wechat_group.jpg" alt="wechat" width="40%" />
374 | </div>
375 | 


--------------------------------------------------------------------------------
/README_zh.md:
--------------------------------------------------------------------------------
  1 | <!-- markdownlint-disable first-line-h1 -->
  2 | <!-- markdownlint-disable html -->
  3 | <div align="center">
  4 |   <img src="./assets/imgs/orion_start.PNG" alt="logo" width="50%" />
  5 | </div>
  6 | 
  7 | <div align="center">
  8 | <h1>
  9 |   Orion-14B
 10 | </h1>
 11 | </div>
 12 | 
 13 | <div align="center">
 14 | 
 15 | <div align="center">
 16 |      <b>🇨🇳中文</b> | <a href="./README.md">🌐English</a> | <a href="./README_ja.md">🇯🇵日本語</a> | <a href="./README_ko.md">🇰🇷한국어</a>
 17 | </div>
 18 | 
 19 | <h4 align="center">
 20 |     <p>
 21 |         🤗 <a href="https://huggingface.co/OrionStarAI" target="_blank">HuggingFace Mainpage</a> | 🤖 <a href="https://modelscope.cn/organization/OrionStarAI" target="_blank">ModelScope Mainpage</a><br>🎬 <a href="https://huggingface.co/spaces/OrionStarAI/Orion-14B-App-Demo" target="_blank">HuggingFace Demo</a> | 🎫 <a href="https://modelscope.cn/studios/OrionStarAI/Orion-14B-App-Demo/summary" target="_blank">ModelScope Demo</a><br>😺 <a href="https://github.com/OrionStarAI/Orion" target="_blank">GitHub</a><br>📖 <a href="https://arxiv.org/pdf/2401.12246.pdf" target="_blank">Tech Report</a>
 22 |     <p>
 23 | </h4>
 24 | 
 25 | </div>
 26 | 
 27 | 
 28 | # 目录
 29 | 
 30 | 
 31 | - [📖 模型介绍](#zh_model-introduction)
 32 | - [🔗 下载路径](#zh_model-download)
 33 | - [🔖 评估结果](#zh_model-benchmark)
 34 | - [📊 模型推理](#zh_model-inference) [<img src="./assets/imgs/vllm.png" alt="vllm" style="margin: 0;display: initial;" height="20" />](#vllm) [<img src="./assets/imgs/llama_cpp.png" alt="llamacpp" style="margin: 0;display: initial;" height="20" />](#llama-cpp)
 35 | - [📜 声明协议](#zh_declarations-license)
 36 | - [🥇 企业介绍](#zh_company-introduction)
 37 | 
 38 | 
 39 | <a name="zh_model-introduction"></a><br>
 40 | # 1. 模型介绍
 41 | 
 42 | - Orion-14B-Base是一个具有140亿参数的多语种大模型，该模型在一个包含2.5万亿token的多样化数据集上进行了训练，涵盖了中文、英语、日语、韩语等多种语言。在多语言环境下的一系列任务中展现出卓越的性能。在主流的公开基准评测中，Orion-14B系列模型表现优异，多项指标显著超越同等参数基本的其他模型。具体技术细节请参考[技术报告](https://arxiv.org/pdf/2401.12246.pdf)。
 43 | 
 44 | - Orion-14B系列大模型有以下几个特点：
 45 |   - 基座20B参数级别大模型综合评测效果表现优异
 46 |   - 多语言能力强，在日语、韩语测试集上显著领先
 47 |   - 微调模型适应性强，在人类标注盲测中，表现突出
 48 |   - 长上下文版本支持超长文本，在200k token长度上效果优异，最长可支持可达320k
 49 |   - 量化版本模型大小缩小70%，推理速度提升30%，性能损失小于1%
 50 | 
 51 |  <table style="border-collapse: collapse; width: 100%;">
 52 |    <tr>
 53 |      <td style="border: none; padding: 10px; box-sizing: border-box;">
 54 |        <img src="./assets/imgs/opencompass_zh.png" alt="opencompass" style="width: 100%; height: auto;">
 55 |      </td>
 56 |      <td style="border: none; padding: 10px; box-sizing: border-box;">
 57 |        <img src="./assets/imgs/model_cap_zh.png" alt="modelcap" style="width: 100%; height: auto;">
 58 |      </td>
 59 |    </tr>
 60 |  </table>
 61 | 
 62 | - 具体而言，Orion-14B系列大语言模型包含:
 63 |   - **Orion-14B-Base:**  基于2.5万亿tokens多样化数据集训练处的140亿参数量级的多语言基座模型。
 64 |   - **Orion-14B-Chat:**  基于高质量语料库微调的对话类模型，旨在为大模型社区提供更好的用户交互体验。
 65 |   - **Orion-14B-LongChat:**  在200k token长度上效果优异，最长可支持可达320k，在长文本评估集上性能比肩专有模型。
 66 |   - **Orion-14B-Chat-RAG:**  在一个定制的检索增强生成数据集上进行微调的聊天模型，在检索增强生成任务中取得了卓越的性能。
 67 |   - **Orion-14B-Chat-Plugin:**  专门针对插件和函数调用任务定制的聊天模型，非常适用于使用代理的相关场景，其中大语言模型充当插件和函数调用系统。
 68 |   - **Orion-14B-Base-Int4:**  一个使用int4进行量化的基座模型。它将模型大小显著减小了70%，同时提高了推理速度30%，仅引入了1%的最小性能损失。
 69 |   - **Orion-14B-Chat-Int4:**  一个使用int4进行量化的对话模型。
 70 | 
 71 | 
 72 | <a name="zh_model-download"></a><br>
 73 | # 2. 下载路径
 74 | 
 75 | 发布模型和下载链接见下表：
 76 | 
 77 | | 模型名称              | HuggingFace下载链接                                                                | ModelScope下载链接                                                                               |
 78 | |---------------------|-----------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------|
 79 | | ⚾ 基座模型           | [Orion-14B-Base](https://huggingface.co/OrionStarAI/Orion-14B-Base)               | [Orion-14B-Base](https://modelscope.cn/models/OrionStarAI/Orion-14B-Base/summary)              |
 80 | | 😛 对话模型           | [Orion-14B-Chat](https://huggingface.co/OrionStarAI/Orion-14B-Chat)               | [Orion-14B-Chat](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat/summary)              |
 81 | | 📃 长上下文模型        | [Orion-14B-LongChat](https://huggingface.co/OrionStarAI/Orion-14B-LongChat)       | [Orion-14B-LongChat](https://modelscope.cn/models/OrionStarAI/Orion-14B-LongChat/summary)      |
 82 | | 🔎 检索增强模型        | [Orion-14B-Chat-RAG](https://huggingface.co/OrionStarAI/Orion-14B-Chat-RAG)       | [Orion-14B-Chat-RAG](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-RAG/summary)      |
 83 | | 🔌 插件模型           | [Orion-14B-Chat-Plugin](https://huggingface.co/OrionStarAI/Orion-14B-Chat-Plugin) | [Orion-14B-Chat-Plugin](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-Plugin/summary)|
 84 | | 💼 基座Int4量化模型    | [Orion-14B-Base-Int4](https://huggingface.co/OrionStarAI/Orion-14B-Base-Int4)     | [Orion-14B-Base-Int4](https://modelscope.cn/models/OrionStarAI/Orion-14B-Base-Int4/summary)    |
 85 | | 📦 对话Int4量化模型    | [Orion-14B-Chat-Int4](https://huggingface.co/OrionStarAI/Orion-14B-Chat-Int4)     | [Orion-14B-Chat-Int4](https://modelscope.cn/models/OrionStarAI/Orion-14B-Chat-Int4/summary)    |
 86 | 
 87 | 
 88 | <a name="zh_model-benchmark"></a><br>
 89 | # 3. 评估结果
 90 | 
 91 | ## 3.1. 基座模型Orion-14B-Base评估
 92 | 
 93 | ### 3.1.1. 专业知识与试题评估结果
 94 | | 模型名称            | C-Eval   | CMMLU    | MMLU     | AGIEval  | Gaokao   | BBH      |
 95 | |--------------------|----------|----------|----------|----------|----------|----------|
 96 | | LLaMA2-13B         |   41.4   |   38.4   |   55.0   |   30.9   |   18.2   |   45.6   |
 97 | | Skywork-13B        |   59.1   |   61.4   |   62.7   |   43.6   |   56.1   |   48.3   |
 98 | | Baichuan2-13B      |   59.0   |   61.3   |   59.5   |   37.4   |   45.6   |   49.0   |
 99 | | QWEN-14B           |   71.7   |   70.2   |   67.9   |   51.9   | **62.5** |   53.7   |
100 | | InternLM-20B       |   58.8   |   59.0   |   62.1   |   44.6   |   45.5   |   52.5   |
101 | | **Orion-14B-Base** | **72.9** | **70.6** | **69.9** | **54.7** |   62.1   | **56.5** |
102 | 
103 | ### 3.1.2. 理解与通识评估结果
104 | | 模型名称            |RACE-middle|RACE-high| HellaSwag| PIQA     | Lambada  | WSC      |
105 | |--------------------|----------|----------|----------|----------|----------|----------|
106 | | LLaMA 2-13B        |   63.0   |   58.9   |   77.5   |   79.8   |   76.5   |   66.3   |
107 | | Skywork-13B        |   87.6   |   84.1   |   73.7   |   78.3   |   71.8   |   66.3   |
108 | | Baichuan 2-13B     |   68.9   |   67.2   |   70.8   |   78.1   |   74.1   |   66.3   |
109 | | QWEN-14B           |   93.0   |   90.3   | **80.2** |   79.8   |   71.4   |   66.3   |
110 | | InternLM-20B       |   86.4   |   83.3   |   78.1   | **80.3** |   71.8   |   68.3   |
111 | | **Orion-14B-Base** | **93.2** | **91.3** |   78.5   |   79.5   | **78.8** | **70.2** |
112 | 
113 | ### 3.1.3. OpenCompass评测集评估结果
114 | | 模型名称 | Average | Examination | Language | Knowledge | Understanding | Reasoning |
115 | |------------------|----------|----------|----------|----------|----------|----------|
116 | | LLaMA 2-13B      |   47.3   |   45.2   |   47.0   |   58.3   |   50.9   |   43.6   |
117 | | Skywork-13B      |   53.6   |   61.1   |   51.3   |   52.7   |   64.5   |   45.2   |
118 | | Baichuan 2-13B   |   49.4   |   51.8   |   47.5   |   48.9   |   58.1   |   44.2   |
119 | | QWEN-14B         |   62.4   |   71.3   |   52.67  |   56.1   |   68.8   |   60.1   |
120 | | InternLM-20B     |   59.4   |   62.5   |   55.0   | **60.1** |   67.3   |   54.9   |
121 | |**Orion-14B-Base**| **64.3** | **71.4** | **55.0** |   60.0   | **71.9** | **61.6** |
122 | 
123 | ### 3.1.4. 日语测试集评估结果
124 | |   模型名称         |**Average**|  JCQA    |  JNLI    |  MARC    |  JSQD   |  JQK     |  XLS     |  XWN     |  MGSM    |
125 | |--------------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|
126 | | PLaMo-13B          |   52.3   |   56.7   |   42.8   |   95.8   |   70.6   |   71.0   |   8.70   |   70.5   |   2.40   |
127 | | WebLab-10B         |   50.7   |   66.6   |   53.7   |   82.1   |   62.9   |   56.2   |   10.0   |   72.0   |   2.40   |
128 | | ELYZA-jp-7B        |   48.8   |   71.7   |   25.3   |   86.6   |   70.8   |   64.1   |   2.50   |   62.1   |   7.20   |
129 | | StableLM-jp-7B     |   51.1   |   33.4   |   43.3   | **96.7** |   70.6   |   78.1   |   10.7   |   72.8   |   2.80   |
130 | | LLaMA 2-13B        |   46.3   |   75.0   |   47.6   |   38.8   |   76.1   |   67.7   |   18.1   |   63.2   |   10.4   |
131 | | Baichuan 2-13B     |   57.1   |   73.7   |   31.3   |   91.6   |   80.5   |   63.3   |   18.6   |   72.2   |   25.2   |
132 | | QWEN-14B           |   65.8   |   85.9   |   60.7   |   97.0   |   83.3   |   71.8   |   18.8   |   70.6   |   38.0   |
133 | | Yi-34B             |   67.1   |   83.8   |   61.2   |   95.2   | **86.1** |   78.5   | **27.2** |   69.2   |   35.2   |
134 | | **Orion-14B-Base** | **69.1** | **88.2** | **75.8** |   94.1   |   75.7   | **85.1** |   17.3   | **78.8** | **38.0** |
135 | 
136 | ### 3.1.5. 韩语测试集n-shot评估结果
137 | | 模型名称  | **Average**<br>n=0&nbsp;&nbsp;n=5 | HellaSwag<br>n=0&nbsp;&nbsp;n=5 | COPA<br> n=0&nbsp;&nbsp;n=5 | BooIQ<br>n=0&nbsp;&nbsp;n=5 | SentiNeg<br>n=0&nbsp;&nbsp;n=5|
138 | |------------------|------------------------------|------------------------------|------------------------------|------------------------------|------------------------------|
139 | | KoGPT            |  53.0   &nbsp;&nbsp;   70.1  |  55.9   &nbsp;&nbsp;   58.3  |  73.5   &nbsp;&nbsp;   72.9  |  45.1   &nbsp;&nbsp;   59.8  |  37.5   &nbsp;&nbsp;   89.4  |
140 | | Polyglot-ko-13B  |  69.6   &nbsp;&nbsp;   73.7  |**59.5** &nbsp;&nbsp; **63.1**|**79.4** &nbsp;&nbsp; **81.1**|  48.2   &nbsp;&nbsp;   60.4  |  91.2   &nbsp;&nbsp;   90.2  |
141 | | LLaMA 2-13B      |  46.7   &nbsp;&nbsp;   63.7  |  41.3   &nbsp;&nbsp;   44.0  |  59.3   &nbsp;&nbsp;   63.8  |  34.9   &nbsp;&nbsp;   73.8  |  51.5   &nbsp;&nbsp;   73.4  |
142 | | Baichuan 2-13B   |  52.1   &nbsp;&nbsp;   58.7  |  39.2   &nbsp;&nbsp;   39.6  |  60.6   &nbsp;&nbsp;   60.6  |  58.4   &nbsp;&nbsp;   61.5  |  50.3   &nbsp;&nbsp;   72.9  |
143 | | QWEN-14B         |  53.8   &nbsp;&nbsp;   73.7  |  45.3   &nbsp;&nbsp;   46.8  |  64.9   &nbsp;&nbsp;   68.9  |  33.4   &nbsp;&nbsp;   83.5  |  71.5   &nbsp;&nbsp;   95.7  |
144 | | Yi-34B           |  54.2   &nbsp;&nbsp;   72.1  |  44.6   &nbsp;&nbsp;   44.7  |  58.0   &nbsp;&nbsp;   60.6  |  65.9   &nbsp;&nbsp;   90.2  |  48.3   &nbsp;&nbsp;   92.9  |
145 | |**Orion-14B-Base**|**74.5** &nbsp;&nbsp; **79.6**|  47.0   &nbsp;&nbsp;   49.6  |  77.7   &nbsp;&nbsp;   79.4  |**81.6** &nbsp;&nbsp; **90.7**|**92.4** &nbsp;&nbsp; **98.7**|
146 | 
147 | ### 3.1.6. 多语言评估结果
148 | | 模型名称            | Train Lang | Japanese | Korean   | Chinese  |  English |
149 | |--------------------|------------|----------|----------|----------|----------|
150 | | PLaMo-13B          |  En,Jp     |   52.3   |   *      |   *      |   *      |
151 | | Weblab-10B         |  En,Jp     |   50.7   |   *      |   *      |   *      |
152 | | ELYZA-jp-7B        |  En,Jp     |   48.8   |   *      |   *      |   *      |
153 | | StableLM-jp-7B     |  En,Jp     |   51.1   |   *      |   *      |   *      |
154 | | KoGPT-6B           |  En,Ko     |   *      |   70.1   |   *      |   *      |
155 | | Polyglot-ko-13B    |  En,Ko     |   *      |   70.7   |   *      |   *      |
156 | | Baichuan2-13B      |  Multi     |   57.1   |   58.7   |   50.8   |   57.1   |
157 | | Qwen-14B           |  Multi     |   65.8   |   73.7   |   64.5   |   65.4   |
158 | | Llama2-13B         |  Multi     |   46.3   |   63.7   |   41.4   |   55.3   |
159 | | Yi-34B             |  Multi     |   67.1   |   72.2   |   58.7   | **68.8** |
160 | | **Orion-14B-Base** |  Multi     | **69.1** | **79.5** | **67.9** |   67.3   |
161 | 
162 | ## 3.2. 对话模型Orion-14B-Chat评估
163 | ### 3.2.1. 对话模型MTBench主观评估
164 | | 模型名称              |   第一轮  |  第二轮   |  **平均** |
165 | |----------------------|----------|----------|----------|
166 | | Baichuan2-13B-Chat   |   7.05   |   6.47   |   6.76   |
167 | | Qwen-14B-Chat        |   7.30   |   6.62   |   6.96   |
168 | | Llama2-13B-Chat      |   7.10   |   6.20   |   6.65   |
169 | | InternLM-20B-Chat    |   7.03   |   5.93   |   6.48   |
170 | | **Orion-14B-Chat**   | **7.68** | **7.07** | **7.37** |
171 | 
172 | \*这里评测使用vllm进行推理
173 | 
174 | ### 3.2.2. 对话模型AlignBench主观评估
175 | | 模型名称             | 数学能力  | 逻辑推理  | 基本能力   | 中文理解  | 综合问答   | 写作能力  | 角色扮演   | 专业知识  | **平均**  |
176 | |--------------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|
177 | | Baichuan2-13B-Chat |   3.76   |   4.07   |   6.22   |   6.05   |   7.11   |   6.97   |   6.75   |   6.43   |   5.25   |
178 | | Qwen-14B-Chat      | **4.91** | **4.71** | **6.90** |   6.36   |   6.74   |   6.64   |   6.59   |   6.56   | **5.72** |
179 | | Llama2-13B-Chat    |   3.05   |   3.79   |   5.43   |   4.40   |   6.76   |   6.63   |   6.99   |   5.65   |   4.70   |
180 | | InternLM-20B-Chat  |   3.39   |   3.92   |   5.96   |   5.50   | **7.18** |   6.19   |   6.49   |   6.22   |   4.96   |
181 | | **Orion-14B-Chat** |   4.00   |   4.24   |   6.18   | **6.57** |   7.16   | **7.36** | **7.16** | **6.99** |   5.51   |
182 | 
183 | \*这里评测使用vllm进行推理
184 | 
185 | ## 3.3. 长上下文模型Orion-14B-LongChat评估
186 | ### 3.3.1. 长上下文模型LongBench评估
187 | | 模型名称              | NarrativeQA| MultiFieldQA-en| MultiFieldQA-zh | DuReader  | QMSum     | VCSUM  | TREC   | TriviaQA | LSHT   | RepoBench-P |
188 | |--------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
189 | | GPT-3.5-Turbo-16k        | **23.60** | **52.30** | **61.20** |   28.70   |   23.40   | **16.00** |   68.00   | **91.40** |   29.20   |   53.60   |
190 | | LongChat-v1.5-7B-32k     |   16.90   |   41.40   |   29.10   |   19.50   |   22.70   |    9.90   |   63.50   |   82.30   |   23.20   |   55.30   |
191 | | Vicuna-v1.5-7B-16k       |   19.40   |   38.50   |   43.00   |   19.30   |   22.80   |   15.10   |   71.50   |   86.20   |   28.80   |   43.50   |
192 | | Yi-6B-200K               |   14.11   |   36.74   |   22.68   |   14.01   |   20.44   |    8.08   |   72.00   |   86.61   |   38.00   | **63.29** |
193 | | Orion-14B-LongChat       |   19.47   |   48.11   |   55.84   | **37.02** | **24.87** |   15.44   | **77.00** |   89.12   | **45.50** |   54.31   |
194 | 
195 | ## 3.4. 检索增强模型Orion-14B-Chat-RAG评估
196 | ### 3.4.1. 自建检索增强测试集评估结果
197 | |模型名称|回复效果(关键字)|*回复效果(主观打分)|引用能力|兜底能力|*AutoQA|*抽取数据|
198 | |---------------------|------|------|------|------|------|------|
199 | | Baichuan2-13B-Chat  |  85  |  76  |  1   |  0   |  69  |  51  |
200 | | Qwen-14B-Chat       |  79  |  77  |  75  |  47  |  68  |  72  |
201 | | Qwen-72B-Chat(Int4) |  87  |  89  |  90  |  32  |  67  |  76  |
202 | | GPT-4               |  91  |  94  |  96  |  95  |  75  |  86  |
203 | | Orion-14B-Chat-RAG  |  86  |  87  |  91  |  97  |  73  |  71  |
204 |  \* 表示人工评判结果
205 | 
206 | ## 3.5. 插件模型Orion-14B-Chat-Plugin评估
207 | ### 3.5.1. 自建插件测试集评估结果
208 | | 模型名称  | 全参数意图识别 | 缺参数意图识别 | 非插件调用识别 |
209 | |-----------------------|--------|-----------|--------|
210 | | Baichuan2-13B-Chat    |   25   |   0       |   0    |
211 | | Qwen-14B-Chat         |   55   |   0       |   50   |
212 | | GPT-4                 | **95** |   52.38   |   70   |
213 | | Orion-14B-Chat-Plugin |   92.5 | **60.32** | **90** |
214 | 
215 | ## 3.6. 量化模型Orion-14B-Base-Int4评估
216 | ### 3.6.1. 量化前后整体对比
217 | |模型名称|模型大小(GB)|推理速度(令牌数/秒)|C-Eval |CMMLU |MMLU |RACE | HellaSwag|
218 | |-------------------------|------|-----|------|------|------|------|------|
219 | | OrionStar-14B-Base      | 28.0 | 135 | 72.8 | 70.6 | 70.0 | 93.3 | 78.5 |
220 | | OrionStar-14B-Base-Int4 |  8.3 | 178 | 71.8 | 69.8 | 69.2 | 93.1 | 78.0 |
221 | 
222 | 
223 | <a name="zh_model-inference"></a><br>
224 | # 4. 模型推理
225 | 
226 | 推理所需的模型权重、源码、配置已发布在 Hugging Face，下载链接见本文档最开始的表格。我们在此示范多种推理方式。程序会自动从
227 | Hugging Face 下载所需资源。
228 | 
229 | ## 4.1. Python 代码方式
230 | 
231 | ```python
232 | import torch
233 | from transformers import AutoModelForCausalLM, AutoTokenizer
234 | from transformers.generation.utils import GenerationConfig
235 | 
236 | tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-14B", use_fast=False, trust_remote_code=True)
237 | model = AutoModelForCausalLM.from_pretrained("OrionStarAI/Orion-14B", device_map="auto",
238 |                                              torch_dtype=torch.bfloat16, trust_remote_code=True)
239 | 
240 | model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-14B")
241 | messages = [{"role": "user", "content": "你好! 你叫什么名字!"}]
242 | response = model.chat(tokenizer, messages, streaming=Flase)
243 | print(response)
244 | 
245 | ```
246 | 
247 | 在上述两段代码中，模型加载指定 `device_map='auto'`
248 | ，会使用所有可用显卡。如需指定使用的设备，可以使用类似 `export CUDA_VISIBLE_DEVICES=0,1`（使用了0、1号显卡）的方式控制。
249 | 
250 | ## 4.2. 命令行工具方式
251 | 
252 | ```shell
253 | CUDA_VISIBLE_DEVICES=0 python cli_demo.py
254 | ```
255 | 
256 | 本命令行工具是为 Chat 场景设计，因此我们不支持使用该工具调用 Base 模型。
257 | 
258 | ## 4.3. 脚本直接推理
259 | 
260 | ```shell
261 | # base model
262 | CUDA_VISIBLE_DEVICES=0 python demo/text_generation_base.py --model OrionStarAI/Orion-14B --tokenizer OrionStarAI/Orion-14B --prompt 你好,你叫什么名字
263 | 
264 | # chat model
265 | CUDA_VISIBLE_DEVICES=0 python demo/text_generation.py --model OrionStarAI/Orion-14B-Chat --tokenizer OrionStarAI/Orion-14B-Chat --prompt 你好,你叫什么名字
266 | 
267 | ```
268 | 
269 | <a name="vllm"></a><br>
270 | ## 4.4. 使用vllm推理
271 | - 工程地址<br>
272 |   https://github.com/vllm-project/vllm
273 | 
274 | - 拉取请求<br>
275 |   https://github.com/vllm-project/vllm/pull/2539
276 | 
277 | <a name="llama-cpp"></a><br>
278 | ## 4.5. 使用llama.cpp推理
279 | 
280 | - 工程地址<br>
281 |   https://github.com/ggerganov/llama.cpp
282 | 
283 | - 拉取请求<br>
284 |   https://github.com/ggerganov/llama.cpp/pull/5118
285 | 
286 | - 如何转换GGUF格式模型
287 | 
288 |   ```shell
289 |   python convert-hf-to-gguf.py path/to/Orion-14B-Chat --outfile chat.gguf
290 |   ```
291 | 
292 | - 如何运行GGUF格式模型推理
293 | 
294 |   ```shell
295 |   ./main --frequency-penalty 0.5 --frequency-penalty 0.5 --top-k 5 --top-p 0.9 -m chat.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e
296 |   ```
297 | 
298 | 
299 | ## 4.6. 示例输出
300 | 
301 | ### 4.6.1. 闲聊
302 | 
303 | `````
304 | 用户：你好,你叫什么名字
305 | Orion-14B：你好!我是一个人工智能助手,没有固定的名字。你可以随意称呼我。有什么我可以帮助你的吗?
306 | 用户：讲个故事
307 | Orion-14B：从前有一个小男孩叫杰克,他住在一个小村庄里。杰克非常喜欢冒险和探索,总是梦想着能够发现一些神奇的事情。
308 | 
309 |   有一天,杰克听说在附近的森林深处有一座传说中的魔法城堡,里面住着一个邪恶的巨人。这个巨人的名字叫格里姆,他被困在城堡中,无法离开。
310 | 
311 |   杰克决定要去解救这个巨人,因为他相信格里姆一定有自己的原因被困在那里。于是,他带着勇气和决心,踏上了前往魔法城堡的旅程。
312 | 
313 |   经过长时间的跋涉,杰克终于来到了城堡的大门前。他小心翼翼地推开大门,进入了一个充满神秘气息的房间。
314 | 
315 |   房间里弥漫着黑暗的气息,但杰克并没有被吓倒。他勇敢地向城堡深处走去,最终找到了格里姆。
316 | 
317 |   格里姆告诉杰克,他是被一个邪恶的女巫诅咒的,只有通过解开咒语才能获得自由。杰克决定要帮助格里姆,他开始寻找线索和解开咒语的方法。
318 | 
319 |   经过一番努力,杰克终于找到了一条通往女巫的秘密实验室的道路。他在那里发现了女巫留下的魔法书,书中记载了解开咒语的方法。
320 | 
321 |   杰克按照书上所写的步骤进行操作,成功地解开了格里姆身上的诅咒。巨人感激不已,他答应帮助杰克实现他的愿望。
322 | 
323 |   从此以后,杰克和格里姆成为了好朋友,他们一起经历了许多惊险刺激的冒险,并用他们的勇气和智慧战胜了各种困难。
324 | 
325 |   这个故事告诉我们,只要有勇气和决心,我们就能克服一切困难,实现自己的梦想。
326 | `````
327 | 
328 | ### 4.6.2. 日韩
329 | 
330 | `````
331 | 用户：自己を紹介してください
332 | Orion-14B：こんにちは、私の名前はChatMaxで、OrionStarによって開発されたAIアシスタントです。私はユーザーに正確で、客観的で、有用な回答を提供し、彼らが問題を解決し、情報を取得するのを助けることができます。
333 | 用户：자기소개를 해주세요.
334 | Orion-14B：안녕하세요, 제 이름은 ChatMax입니다. OrionStar에서 개발한 AI 어시스턴트입니다. 저는 사용자들에게 정확하고 객관적이며 유용한 답변을 제공하여 문제를 해결하고 정보를 얻는 데 도움을 줄 수 있습니다.
335 | `````
336 | 
337 | 
338 | <a name="zh_declarations-license"></a><br>
339 | # 5. 声明、协议
340 | 
341 | ## 5.1. 声明
342 | 
343 | 我们强烈呼吁所有使用者，不要利用 Orion-14B 模型进行任何危害国家社会安全或违法的活动。另外，我们也要求使用者不要将
344 | Orion-14B 模型用于未经适当安全审查和备案的互联网服务。
345 | 
346 | 我们希望所有的使用者都能遵守这个原则，确保科技的发展能在规范和合法的环境下进行。
347 | 我们已经尽我们所能，来确保模型训练过程中使用的数据的合规性。然而，尽管我们已经做出了巨大的努力，但由于模型和数据的复杂性，仍有可能存在一些无法预见的问题。因此，如果由于使用
348 | Orion-14B 开源模型而导致的任何问题，包括但不限于数据安全问题、公共舆论风险，或模型被误导、滥用、传播或不当利用所带来的任何风险和问题，我们将不承担任何责任。
349 | 
350 | ## 5.2. 协议
351 | 
352 | 社区使用Orion-14B系列模型
353 | - 代码请遵循 [Apache License Version 2.0](./LICENSE)<br>
354 | - 模型请遵循 [Orion-14B系列模型社区许可协议](./ModelsCommunityLicenseAgreement)
355 | 
356 | 
357 | <a name="zh_company-introduction"></a><br>
358 | # 6. 企业介绍
359 | 
360 | 猎户星空（OrionStar）是一家全球领先的服务机器人解决方案公司，成立于2016年9月。猎户星空致力于基于人工智能技术打造下一代革命性机器人，使人们能够摆脱重复的体力劳动，使人类的工作和生活更加智能和有趣，通过技术使社会和世界变得更加美好。
361 | 
362 | 猎户星空拥有完全自主开发的全链条人工智能技术，如语音交互和视觉导航。它整合了产品开发能力和技术应用能力。基于Orion机械臂平台，它推出了ORION
363 | STAR AI Robot Greeting、AI Robot Greeting Mini、Lucki、Coffee
364 | Master等产品，并建立了Orion机器人的开放平台OrionOS。通过为 **真正有用的机器人而生** 的理念实践，它通过AI技术为更多人赋能。
365 | 
366 | 凭借7年AI经验积累，猎户星空已推出的大模型深度应用“聚言”，并陆续面向行业客户提供定制化AI大模型咨询与服务解决方案，真正帮助客户实现企业经营效率领先同行目标。
367 | 
368 | **猎户星空具备全链条大模型应用能力的核心优势**，包括拥有从海量数据处理、大模型预训练、二次预训练、微调(Fine-tune)、Prompt
369 | Engineering 、Agent开发的全链条能力和经验积累；拥有完整的端到端模型训练能力，包括系统化的数据处理流程和数百张GPU的并行模型训练能力，现已在大政务、云服务、出海电商、快消等多个行业场景落地。
370 | 
371 | ***欢迎有大模型应用落地需求的企业联系我们进行商务合作***<br>
372 | **咨询电话:** 400-898-7779<br>
373 | **电子邮箱:** ai@orionstar.com<br>
374 | **Discord社区链接:** https://discord.gg/zumjDWgdAs
375 | 
376 | <div align="center">
377 |   <img src="./assets/imgs/wechat_group.jpg" alt="wechat" width="40%" />
378 | </div>
379 | 


--------------------------------------------------------------------------------
/assets/imgs/llama_cpp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/assets/imgs/llama_cpp.png


--------------------------------------------------------------------------------
/assets/imgs/model_cap_en.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/assets/imgs/model_cap_en.png


--------------------------------------------------------------------------------
/assets/imgs/model_cap_zh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/assets/imgs/model_cap_zh.png


--------------------------------------------------------------------------------
/assets/imgs/opencompass_en.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/assets/imgs/opencompass_en.png


--------------------------------------------------------------------------------
/assets/imgs/opencompass_zh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/assets/imgs/opencompass_zh.png


--------------------------------------------------------------------------------
/assets/imgs/orion_start.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/assets/imgs/orion_start.PNG


--------------------------------------------------------------------------------
/assets/imgs/vllm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/assets/imgs/vllm.png


--------------------------------------------------------------------------------
/assets/imgs/wechat_group.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/assets/imgs/wechat_group.jpg


--------------------------------------------------------------------------------
/demo/cli_demo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import platform
 4 | import subprocess
 5 | from colorama import Fore, Style
 6 | from tempfile import NamedTemporaryFile
 7 | from transformers import AutoModelForCausalLM, AutoTokenizer
 8 | from transformers.generation.utils import GenerationConfig
 9 | 
10 | 
11 | def init_model():
12 |     print("init model ...")
13 |     model = AutoModelForCausalLM.from_pretrained(
14 |         "OrionStarAI/Orion-14B-Chat",
15 |         torch_dtype=torch.float16,
16 |         device_map="auto",
17 |         trust_remote_code=True,
18 |         use_safetensors=False
19 |     )
20 |     model.generation_config = GenerationConfig.from_pretrained(
21 |         "OrionStarAI/Orion-14B-Chat",
22 |     )
23 |     tokenizer = AutoTokenizer.from_pretrained(
24 |         "OrionStarAI/Orion-14B-Chat",
25 |         use_fast=False,
26 |         trust_remote_code=True
27 |     )
28 |     return model, tokenizer
29 | 
30 | 
31 | def clear_screen():
32 |     if platform.system() == "Windows":
33 |         os.system("cls")
34 |     else:
35 |         os.system("clear")
36 |     print(
37 |         Fore.YELLOW + Style.BRIGHT + "欢迎使用Orion-14B-Chat，输入进行对话，vim 多行输入，clear 清空历史，CTRL+C 中断生成，stream 开关流式生成，exit 结束。")
38 |     return []
39 | 
40 | 
41 | def vim_input():
42 |     with NamedTemporaryFile() as tempfile:
43 |         tempfile.close()
44 |         subprocess.call(['vim', '+star', tempfile.name])
45 |         text = open(tempfile.name).read()
46 |     return text
47 | 
48 | 
49 | def main(stream=True):
50 |     model, tokenizer = init_model()
51 |     messages = clear_screen()
52 |     while True:
53 |         prompt = input(Fore.GREEN + Style.BRIGHT + "\n用户：" + Style.NORMAL)
54 |         if prompt.strip() == "exit":
55 |             break
56 |         if prompt.strip() == "clear":
57 |             messages = clear_screen()
58 |             continue
59 |         if prompt.strip() == 'vim':
60 |             prompt = vim_input()
61 |             print(prompt)
62 |         print(Fore.CYAN + Style.BRIGHT + "\nOrion-14B-Chat：" + Style.NORMAL, end='')
63 |         if prompt.strip() == "stream":
64 |             stream = not stream
65 |             print(Fore.YELLOW + "({}流式生成)\n".format("开启" if stream else "关闭"), end='')
66 |             continue
67 |         messages.append({"role": "user", "content": prompt})
68 |         if stream:
69 |             position = 0
70 |             try:
71 |                 for response in model.chat(tokenizer, messages, streaming=True):
72 |                     print(response[position:], end='', flush=True)
73 |                     position = len(response)
74 |                     if torch.backends.mps.is_available():
75 |                         torch.mps.empty_cache()
76 |             except KeyboardInterrupt:
77 |                 pass
78 |             print()
79 |         else:
80 |             response = model.chat(tokenizer, messages)
81 |             print(response)
82 |             if torch.backends.mps.is_available():
83 |                 torch.mps.empty_cache()
84 |         messages.append({"role": "assistant", "content": response})
85 |     print(Style.RESET_ALL)
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/demo/text_generation.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import torch
 4 | from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 5 | 
 6 | def parse_inputs():
 7 |     parser = argparse.ArgumentParser(description="Orion-14B-Chat text generation demo")
 8 |     parser.add_argument(
 9 |         "--model",
10 |         type=str,
11 |         default="OrionStarAI/Orion-14B-Chat",
12 |         help="pretrained model path locally or name on huggingface",
13 |     )
14 |     parser.add_argument(
15 |         "--tokenizer",
16 |         type=str,
17 |         default="OrionStarAI/Orion-14B-Chat",
18 |         help="tokenizer path locally or name on huggingface",
19 |     )
20 |     parser.add_argument(
21 |         "--prompt",
22 |         type=str,
23 |         default="你好!",
24 |         help="The prompt to start with",
25 |     )
26 |     parser.add_argument(
27 |         "--streaming",
28 |         action="store_true",
29 |         help="whether to enable streaming text generation",
30 |     )
31 |     parser.add_argument(
32 |         "--eos-token",
33 |         type=str,
34 |         default="</s>",
35 |         help="End of sentence token",
36 |     )
37 |     args = parser.parse_args()
38 |     return args
39 | 
40 | 
41 | def main(args):
42 |     print(args)
43 |     model = AutoModelForCausalLM.from_pretrained(args.model, device_map="auto", torch_dtype=torch.bfloat16,
44 |                                                  trust_remote_code=True, use_safetensors=False)
45 |     tokenizer = AutoTokenizer.from_pretrained(args.tokenizer or args.model, trust_remote_code=True)
46 |     messages = [{"role": "user", "content": args.prompt}]
47 |     if args.streaming:
48 |         position = 0
49 |         try:
50 |             for response in model.chat(tokenizer, messages, streaming=True):
51 |                 print(response[position:], end='', flush=True)
52 |                 position = len(response)
53 |                 if torch.backends.mps.is_available():
54 |                     torch.mps.empty_cache()
55 |         except KeyboardInterrupt:
56 |             pass
57 |     else:
58 |         response = model.chat(tokenizer, messages)
59 |         print(response)
60 |         if torch.backends.mps.is_available():
61 |             torch.mps.empty_cache()
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     args = parse_inputs()
66 |     main(args)
67 | 


--------------------------------------------------------------------------------
/demo/text_generation_base.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
 3 | import torch
 4 | 
 5 | 
 6 | def parse_inputs():
 7 |     parser = argparse.ArgumentParser(description="Orion-14B-Base text generation demo")
 8 |     parser.add_argument(
 9 |         "--model",
10 |         type=str,
11 |         default="OrionStarAI/Orion-14B-Base",
12 |         help="pretrained model path locally or name on huggingface",
13 |     )
14 |     parser.add_argument(
15 |         "--tokenizer",
16 |         type=str,
17 |         default="OrionStarAI/Orion-14B-Base",
18 |         help="tokenizer path locally or name on huggingface",
19 |     )
20 |     parser.add_argument(
21 |         "--max-tokens",
22 |         type=int,
23 |         default=512,
24 |         help="max number of tokens to generate",
25 |     )
26 |     parser.add_argument(
27 |         "--streaming",
28 |         action="store_true",
29 |         help="whether to enable streaming text generation",
30 |     )
31 |     parser.add_argument(
32 |         "--prompt",
33 |         type=str,
34 |         default="你好!",
35 |         help="The prompt to start with",
36 |     )
37 |     parser.add_argument(
38 |         "--eos-token",
39 |         type=str,
40 |         default="</s>",
41 |         help="End of sentence token",
42 |     )
43 |     args = parser.parse_args()
44 |     return args
45 | 
46 | 
47 | def main(args):
48 |     print(args)
49 |     model = AutoModelForCausalLM.from_pretrained(
50 |         args.model, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True
51 |     )
52 |     tokenizer = AutoTokenizer.from_pretrained(
53 |         args.tokenizer or args.model, trust_remote_code=True
54 |     )
55 |     print("prompt:",args.prompt)
56 |     inputs = tokenizer(
57 |         args.prompt,
58 |         return_tensors="pt",
59 |     )
60 | 
61 |     streamer = TextStreamer(tokenizer) if args.streaming else None
62 |     outputs = model.generate(
63 |         inputs.input_ids.cuda(),
64 |         max_new_tokens=args.max_tokens,
65 |         streamer=streamer,
66 |         eos_token_id=tokenizer.convert_tokens_to_ids(args.eos_token),
67 |         do_sample=True,
68 |         repetition_penalty=1.05,
69 |         no_repeat_ngram_size=5,
70 |         temperature=0.7,
71 |         top_k=40,
72 |         top_p=0.85,
73 |     )
74 |     if streamer is None:
75 |         print(tokenizer.decode(outputs[0], skip_special_tokens=True))
76 | 
77 | if __name__ == "__main__":
78 |     args = parse_inputs()
79 |     main(args)
80 | 


--------------------------------------------------------------------------------
/doc/Orion14B_v3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/doc/Orion14B_v3.pdf


--------------------------------------------------------------------------------
/gradio_demo/README.md:
--------------------------------------------------------------------------------
 1 |  **# Gradio演示**
 2 | 
 3 | ## 快速入门
 4 | 
 5 | 1. 安装依赖项
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | 2. 设置模型推理服务器
12 | 
13 | 3. 设置环境变量
14 | 
15 | ```bash
16 | export BACKEND_HOST="***"  # 模型推理服务器的主机
17 | export MODEL_NAME="***"  # 模型的名称
18 | export API_KEY=="***"  # 模型的 API 密钥，如果没有身份验证可以忽略
19 | export CHAT_ENDPOINT="***"  # 聊天使用的模型的接口，用户可以使用不同的模型来完成不同的任务
20 | export DOC_QA_ENDPOINT="***"  # 文档问答使用的模型的接口，用户可以使用不同的模型来完成不同的任务
21 | export PLUGIN_ENDPOINT="***"  # 插件使用的模型的接口，用户可以使用不同的模型来完成不同的任务
22 | export QA_GENERATOR_ENDPOINT="***"  # 问答生成器使用的模型的接口，用户可以使用不同的模型来完成不同的任务
23 | export EXTRACT_ENDPOINT="***"  # 信息提取使用的模型的接口，用户可以使用不同的模型来完成不同的任务
24 | 
25 | # 插件可以根据实际情况进行替换
26 | export WEATHER_PLUGIN_URL="***"  # 天气插件的URL
27 | export BCE_APP_CODE="***"  # bce的app code
28 | ```
29 | 
30 | 4. 运行演示
31 | 
32 | ```bash
33 | gradio app.py
34 | ```
35 | 


--------------------------------------------------------------------------------
/gradio_demo/README_en.md:
--------------------------------------------------------------------------------
 1 | # Gradio Demo
 2 | 
 3 | ## Quick Start
 4 | 
 5 | 1. Install dependencies
 6 | 
 7 | ```bash
 8 | pip install -r requirements.txt
 9 | ```
10 | 
11 | 2. Set up model inference server
12 | 
13 | 3. Set environment variables
14 | 
15 | ```bash
16 | export BACKEND_HOST="***"  # the host of model inference server
17 | export MODEL_NAME="***"  # the name of model
18 | export API_KEY=="***"  # the api key of model, can be ignored if no auth
19 | export CHAT_ENDPOINT="***"  # the endpoint of chat model, users can use different models for different tasks
20 | export DOC_QA_ENDPOINT="***"  # the endpoint of doc-qa model, users can use different models for different tasks
21 | export PLUGIN_ENDPOINT="***"  # the endpoint of plugin model, users can use different models for different tasks
22 | export QA_GENERATOR_ENDPOINT="***"  # the endpoint of qa-generator model, users can use different models for different tasks
23 | export EXTRACT_ENDPOINT="***"  # the endpoint of extract model, users can use different models for different tasks
24 | 
25 | # Plugins can be replaced according to actual conditions
26 | export WEATHER_PLUGIN_URL="***"  # the url of weather plugin
27 | export BCE_APP_CODE="***"  # the app code of bce
28 | ```
29 | 
30 | 4. Run the demo
31 | 
32 | ```bash
33 | gradio app.py
34 | ```


--------------------------------------------------------------------------------
/gradio_demo/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | 
  4 | import gradio as gr
  5 | import pandas as pd
  6 | 
  7 | from chat_task.chat import generate_chat
  8 | from doc_qa_task.doc_qa import generate_doc_qa
  9 | from examples import (
 10 |     load_examples,
 11 |     preprocess_docqa_examples,
 12 |     preprocess_extraction_examples,
 13 |     preprocess_qa_generator_examples,
 14 | )
 15 | from extract_data_task.extract import extract_slots
 16 | from plugin_task.api import api_plugin_chat
 17 | from qa_generator_task.generate_qa import generate_qa_pairs
 18 | from plugin_task.plugins import PLUGIN_JSON_SCHEMA
 19 | 
 20 | 
 21 | abs_path = os.path.abspath(__file__)
 22 | current_dir = os.path.dirname(abs_path)
 23 | statistic_path = os.path.join(current_dir, "images")
 24 | 
 25 | load_examples()
 26 | 
 27 | 
 28 | def clear_session():
 29 |     """Clears the chat session."""
 30 |     return "", None
 31 | 
 32 | 
 33 | def clear_plugin_session(session: Dict):
 34 |     """Clears the plugin session."""
 35 |     session.clear()
 36 |     return session, None, None
 37 | 
 38 | 
 39 | def show_custom_fallback_textbox(x):
 40 |     if x == "自定义话术":
 41 |         return [gr.Row(visible=True), gr.Textbox()]
 42 |     else:
 43 |         return [gr.Row(visible=False), gr.Textbox()]
 44 | 
 45 | 
 46 | def validate_field_word_count(
 47 |     input_text: str, description: str, max_word_count: int = 3000
 48 | ):
 49 |     """
 50 |     Validate the input text for word count
 51 | 
 52 |     :param input_text:
 53 |     :return:
 54 |     """
 55 |     if len(input_text) == 0:
 56 |         raise gr.Error(f"{description}不能为空")
 57 | 
 58 |     if len(input_text) > max_word_count:
 59 |         raise gr.Error(f"{description}字数不能超过{max_word_count}字")
 60 | 
 61 | 
 62 | def validate_chat(input_text: str):
 63 |     """
 64 |     Validate the input text
 65 | 
 66 |     :param input_text:
 67 |     :return:
 68 |     """
 69 |     validate_field_word_count(input_text, "输入", 500)
 70 | 
 71 | 
 72 | def validate_doc_qa(
 73 |     input_text: str,
 74 |     doc_df: "pd.DataFrame",
 75 |     fallback_ratio: str,
 76 |     fallback_text_input: str,
 77 | ):
 78 |     """
 79 |     Validate fields of doc_qa
 80 |     :param input_text:
 81 |     :param doc_df:
 82 |     :param fallback_ratio:
 83 |     :param fallback_text_input:
 84 |     :return:
 85 |     """
 86 |     # add all the doc ids to the input text
 87 |     if fallback_ratio == "自定义话术":
 88 |         validate_field_word_count(fallback_text_input, "自定义话术", 100)
 89 | 
 90 |     validate_field_word_count(input_text, "输入", 500)
 91 | 
 92 |     page_content_full_text = (
 93 |         " ".join(doc_df["文档片段名称"].tolist())
 94 |         + " "
 95 |         + " ".join(doc_df["文档片段内容"].tolist())
 96 |     )
 97 |     validate_field_word_count(page_content_full_text, "文档信息", 2500)
 98 | 
 99 | 
100 | def validate_qa_pair_generator(input_text: str):
101 |     """
102 |     Validate the input text
103 | 
104 |     :param input_text:
105 |     :return:
106 |     """
107 |     return validate_field_word_count(input_text, "输入")
108 | 
109 | 
110 | def validate_extraction(
111 |     input_text: str,
112 |     extraction_df: "pd.DataFrame",
113 | ):
114 |     """
115 |     Validate fields of extraction
116 |     """
117 |     extraction_full_text = (
118 |         " ".join(extraction_df["字段名称"].tolist())
119 |         + " "
120 |         + " ".join(extraction_df["字段描述"].tolist())
121 |     )
122 |     validate_field_word_count(input_text, "输入", 1500)
123 |     validate_field_word_count(extraction_full_text, "待抽取字段描述", 1500)
124 | 
125 | 
126 | def validate_plugin(input_text: str):
127 |     """
128 |     Validate the input text
129 | 
130 |     :param input_text:
131 |     :return:
132 |     """
133 |     validate_field_word_count(input_text, "输入", 500)
134 | 
135 | 
136 | with gr.Blocks(
137 |     title="Orion-14B",
138 |     theme="shivi/calm_seafoam@>=0.0.1,<1.0.0",
139 | ) as demo:
140 | 
141 |     def user(user_message, history):
142 |         return user_message, (history or []) + [[user_message, ""]]
143 | 
144 |     gr.Markdown(
145 |         """
146 |         <div style="overflow: hidden;color:#fff;display: flex;flex-direction: column;align-items: center; position: relative; width: 100%; height: 180px;background-size: cover; background-image: url(https://www.orionstar.com/res/orics/down/ow001_20240119_8369eca9013416109a2303bf4e329140.png);">
147 |             <img style="width: 130px;height: 60px;position: absolute;top:10px;left:10px" src="https://www.orionstar.com/res/orics/down/ow001_20240119_1236eba7ea0ac15931f4518d7f211d47.png"/>
148 |             <img style="min-width: 1416px; width: 1416px;height: 100px;margin-top: 30px;" src="https://www.orionstar.com/res/orics/down/ow001_20240119_10c5ca12a57116bda0e35916a28b247f.png"/>
149 |             <span style="margin-top: 10px;font-size: 12px;">请在<a href="https://github.com/OrionStarAI/Orion" style="color: white;">Github</a>点击Star支持我们，加入<a href="https://www.orionstar.com/res/orics/down/ow001_20240119_1ef4100af7be44df30597488255b64c7.png" style="color: white;">官方微信交流群</a></span>
150 |         </div>
151 | """
152 |     )
153 |     with gr.Tab("基础能力"):
154 |         chatbot = gr.Chatbot(
155 |             label="Orion-14B-Chat",
156 |             elem_classes="control-height",
157 |             show_copy_button=True,
158 |             min_width=1368,
159 |             height=416,
160 |         )
161 |         chat_text_input = gr.Textbox(label="输入", min_width=1368)
162 | 
163 |         with gr.Row():
164 |             with gr.Column(scale=2):
165 |                 gr.Examples(
166 |                     [
167 |                         "可以给我讲个笑话吗？",
168 |                         "什么是伟大的诗歌？",
169 |                         "你知道李白吗？",
170 |                         "黑洞是如何工作的？",
171 |                         "在表中插入一条数据，id为1，name为张三，age为18，请问SQL语句是什么？",
172 |                     ],
173 |                     chat_text_input,
174 |                     label="试试问",
175 |                 )
176 |             with gr.Column(scale=1):
177 |                 with gr.Row(variant="compact"):
178 |                     clear_history = gr.Button(
179 |                         "清除历史",
180 |                         min_width="17",
181 |                         size="sm",
182 |                         scale=1,
183 |                         icon=os.path.join(statistic_path, "clear.png"),
184 |                     )
185 |                     submit = gr.Button(
186 |                         "发送",
187 |                         variant="primary",
188 |                         min_width="17",
189 |                         size="sm",
190 |                         scale=1,
191 |                         icon=os.path.join(statistic_path, "send.svg"),
192 |                     )
193 | 
194 |         chat_text_input.submit(
195 |             fn=validate_chat, inputs=[chat_text_input], outputs=[], queue=False
196 |         ).success(
197 |             user, [chat_text_input, chatbot], [chat_text_input, chatbot], queue=False
198 |         ).success(
199 |             fn=generate_chat,
200 |             inputs=[chat_text_input, chatbot],
201 |             outputs=[chat_text_input, chatbot],
202 |         )
203 | 
204 |         submit.click(
205 |             fn=validate_chat, inputs=[chat_text_input], outputs=[], queue=False
206 |         ).success(
207 |             user, [chat_text_input, chatbot], [chat_text_input, chatbot], queue=False
208 |         ).success(
209 |             fn=generate_chat,
210 |             inputs=[chat_text_input, chatbot],
211 |             outputs=[chat_text_input, chatbot],
212 |             api_name="chat",
213 |         )
214 | 
215 |         clear_history.click(
216 |             fn=clear_session, inputs=[], outputs=[chat_text_input, chatbot], queue=False
217 |         )
218 | 
219 |     with gr.Tab("基于文档问答"):
220 |         with gr.Row():
221 |             with gr.Column(scale=3, min_width=357, variant="panel"):
222 |                 gr.Markdown(
223 |                     '<span style="color:rgba(0, 0, 0, 0.5); font-size: 14px; font-weight: 400; line-height: 28px; letter-spacing: 0em; text-align: left; width: 42px; height: 14px; left: 36px; top: 255px;">配置项</span>'
224 |                 )
225 |                 citations_radio = gr.Radio(
226 |                     ["开启引用", "关闭引用"], label="引用", value="关闭引用"
227 |                 )
228 |                 fallback_radio = gr.Radio(
229 |                     ["使用大模型知识", "自定义话术"],
230 |                     label="超纲问题回复",
231 |                     value="自定义话术",
232 |                 )
233 |                 fallback_text_input = gr.Textbox(
234 |                     label="自定义话术",
235 |                     value="抱歉，我还在学习中，暂时无法回答您的问题。",
236 |                 )
237 | 
238 |                 gr.Markdown(
239 |                     '<span style="color:rgba(0, 0, 0, 0.5); font-size: 14px; font-weight: 400; line-height: 28px; letter-spacing: 0em; text-align: left; width: 42px; height: 14px; left: 36px; top: 255px;">文档信息</span>'
240 |                 )
241 | 
242 |                 doc_df = gr.Dataframe(
243 |                     headers=["文档片段内容", "文档片段名称"],
244 |                     datatype=["str", "str"],
245 |                     row_count=6,
246 |                     col_count=(2, "fixed"),
247 |                     label="",
248 |                     interactive=True,
249 |                     wrap=True,
250 |                     elem_classes="control-height",
251 |                     height=300,
252 |                 )
253 | 
254 |             with gr.Column(scale=2, min_width=430):
255 |                 chatbot = gr.Chatbot(
256 |                     label="适用场景：预期LLM通过自由知识回答",
257 |                     elem_classes="control-height",
258 |                     show_copy_button=True,
259 |                     min_width=999,
260 |                     height=419,
261 |                 )
262 | 
263 |                 doc_qa_input = gr.Textbox(label="输入", min_width=999, max_lines=10)
264 | 
265 |                 with gr.Row():
266 |                     with gr.Column(scale=2):
267 |                         gr.Examples(
268 |                             [
269 |                                 "哪些情况下不能超车？",
270 |                                 "参观须知",
271 |                                 "青岛啤酒酒精含量是多少？",
272 |                             ],
273 |                             doc_qa_input,
274 |                             label="试试问",
275 |                             cache_examples=True,
276 |                             fn=preprocess_docqa_examples,
277 |                             outputs=[doc_df],
278 |                         )
279 |                     with gr.Column(scale=1):
280 |                         with gr.Row(variant="compact"):
281 |                             clear_history = gr.Button(
282 |                                 "清除历史",
283 |                                 min_width="17",
284 |                                 size="sm",
285 |                                 scale=1,
286 |                                 icon=os.path.join(statistic_path, "clear.png"),
287 |                             )
288 |                             submit = gr.Button(
289 |                                 "发送",
290 |                                 variant="primary",
291 |                                 min_width="17",
292 |                                 size="sm",
293 |                                 scale=1,
294 |                                 icon=os.path.join(statistic_path, "send.svg"),
295 |                             )
296 | 
297 |                 doc_qa_input.submit(
298 |                     fn=validate_doc_qa,
299 |                     inputs=[
300 |                         doc_qa_input,
301 |                         doc_df,
302 |                         fallback_radio,
303 |                         fallback_text_input,
304 |                     ],
305 |                     outputs=[],
306 |                     queue=False,
307 |                 ).success(
308 |                     user, [doc_qa_input, chatbot], [doc_qa_input, chatbot], queue=False
309 |                 ).success(
310 |                     fn=generate_doc_qa,
311 |                     inputs=[
312 |                         doc_qa_input,
313 |                         chatbot,
314 |                         doc_df,
315 |                         fallback_radio,
316 |                         fallback_text_input,
317 |                         citations_radio,
318 |                     ],
319 |                     outputs=[doc_qa_input, chatbot],
320 |                     scroll_to_output=True,
321 |                     api_name="doc_qa",
322 |                 )
323 | 
324 |                 submit.click(
325 |                     fn=validate_doc_qa,
326 |                     inputs=[
327 |                         doc_qa_input,
328 |                         doc_df,
329 |                         fallback_radio,
330 |                         fallback_text_input,
331 |                     ],
332 |                     outputs=[],
333 |                     queue=False,
334 |                 ).success(
335 |                     user, [doc_qa_input, chatbot], [doc_qa_input, chatbot], queue=False
336 |                 ).success(
337 |                     fn=generate_doc_qa,
338 |                     inputs=[
339 |                         doc_qa_input,
340 |                         chatbot,
341 |                         doc_df,
342 |                         fallback_radio,
343 |                         fallback_text_input,
344 |                         citations_radio,
345 |                     ],
346 |                     outputs=[doc_qa_input, chatbot],
347 |                     scroll_to_output=True,
348 |                 )
349 | 
350 |                 clear_history.click(
351 |                     fn=lambda x: (None, None, None),
352 |                     inputs=[],
353 |                     outputs=[doc_df, doc_qa_input, chatbot],
354 |                     queue=False,
355 |                 )
356 | 
357 |     with gr.Tab("插件能力"):
358 |         with gr.Row():
359 |             with gr.Column(scale=1):
360 |                 gr.Markdown(
361 |                     '<span style="color:rgba(0, 0, 0, 0.5); font-size: 14px; font-weight: 400; line-height: 28px; letter-spacing: 0em; text-align: left; width: 42px; height: 14px; left: 36px; top: 255px;">配置项</span>'
362 |                 )
363 | 
364 |                 radio_plugins = [
365 |                     gr.Radio(
366 |                         ["开启", "关闭"],
367 |                         label=plugin_json["name_for_human"],
368 |                         value="开启",
369 |                     )
370 |                     for plugin_json in PLUGIN_JSON_SCHEMA
371 |                 ]
372 | 
373 |             with gr.Column(scale=3):
374 |                 session = gr.State(value=dict())
375 |                 chatbot = gr.Chatbot(
376 |                     label="适用场景:需要LLM调用API解决问题",
377 |                     elem_classes="control-height",
378 |                     show_copy_button=True,
379 |                 )
380 |                 plugin_text_input = gr.Textbox(label="输入")
381 |                 with gr.Row():
382 |                     with gr.Column(scale=2):
383 |                         gr.Examples(
384 |                             [
385 |                                 "北京天气怎么样？",
386 |                                 "查询物流信息",
387 |                                 "每日壁纸",
388 |                                 "bing今天的壁纸是什么",
389 |                                 "查询手机号码归属地",
390 |                             ],
391 |                             plugin_text_input,
392 |                             label="试试问",
393 |                         )
394 |                     with gr.Column(scale=1):
395 |                         with gr.Row(variant="compact"):
396 |                             clear_history = gr.Button(
397 |                                 "清除历史",
398 |                                 min_width="17",
399 |                                 size="sm",
400 |                                 scale=1,
401 |                                 icon=os.path.join(statistic_path, "clear.png"),
402 |                             )
403 |                             submit = gr.Button(
404 |                                 "发送",
405 |                                 variant="primary",
406 |                                 min_width="17",
407 |                                 size="sm",
408 |                                 scale=1,
409 |                                 icon=os.path.join(statistic_path, "send.svg"),
410 |                             )
411 | 
412 |                 plugin_text_input.submit(
413 |                     fn=validate_plugin,
414 |                     inputs=[
415 |                         plugin_text_input,
416 |                     ],
417 |                     outputs=[],
418 |                     queue=False,
419 |                 ).success(
420 |                     user,
421 |                     [plugin_text_input, chatbot],
422 |                     [plugin_text_input, chatbot],
423 |                     scroll_to_output=True,
424 |                 ).success(
425 |                     fn=api_plugin_chat,
426 |                     inputs=[session, plugin_text_input, chatbot, *radio_plugins],
427 |                     outputs=[session, plugin_text_input, chatbot],
428 |                     scroll_to_output=True,
429 |                 )
430 | 
431 |                 submit.click(
432 |                     fn=validate_plugin,
433 |                     inputs=[
434 |                         plugin_text_input,
435 |                     ],
436 |                     outputs=[],
437 |                     queue=False,
438 |                 ).success(
439 |                     user,
440 |                     [plugin_text_input, chatbot],
441 |                     [plugin_text_input, chatbot],
442 |                     scroll_to_output=True,
443 |                 ).success(
444 |                     fn=api_plugin_chat,
445 |                     inputs=[session, plugin_text_input, chatbot, *radio_plugins],
446 |                     outputs=[session, plugin_text_input, chatbot],
447 |                     api_name="plugin",
448 |                     scroll_to_output=True,
449 |                 )
450 | 
451 |                 clear_history.click(
452 |                     fn=clear_plugin_session,
453 |                     inputs=[session],
454 |                     outputs=[session, plugin_text_input, chatbot],
455 |                     queue=False,
456 |                 )
457 |     with gr.Tab("生成QA对"):
458 |         with gr.Row(equal_height=True):
459 |             qa_generator_output = gr.Code(
460 |                 language="json",
461 |                 show_label=False,
462 |                 min_width=1368,
463 |             )
464 |         with gr.Row():
465 |             qa_generator_input = gr.Textbox(
466 |                 label="输入",
467 |                 show_label=True,
468 |                 info="",
469 |                 min_width=1368,
470 |                 lines=5,
471 |                 max_lines=10,
472 |             )
473 | 
474 |             with gr.Row():
475 |                 with gr.Column(scale=2):
476 |                     gr.Examples(
477 |                         [
478 |                             "第一章 总 则 \n第...",
479 |                             "金字塔，在建筑学上是...",
480 |                             "山西老陈醋是以高粱、...",
481 |                             "室内装饰构造虚拟仿真...",
482 |                             "猎户星空（Orion...",
483 |                         ],
484 |                         qa_generator_input,
485 |                         label="试试问",
486 |                         cache_examples=True,
487 |                         fn=preprocess_qa_generator_examples,
488 |                         outputs=[qa_generator_input],
489 |                     )
490 |                 with gr.Column(scale=1):
491 |                     with gr.Row(variant="compact"):
492 |                         clear = gr.Button(
493 |                             "清除",
494 |                             min_width="17",
495 |                             size="sm",
496 |                             scale=1,
497 |                             icon=os.path.join(statistic_path, "clear.png"),
498 |                         )
499 |                         submit = gr.Button(
500 |                             "发送",
501 |                             variant="primary",
502 |                             min_width="17",
503 |                             size="sm",
504 |                             scale=1,
505 |                             icon=os.path.join(statistic_path, "send.svg"),
506 |                         )
507 | 
508 |             submit.click(
509 |                 fn=validate_qa_pair_generator,
510 |                 inputs=[qa_generator_input],
511 |                 outputs=[],
512 |             ).success(
513 |                 fn=generate_qa_pairs,
514 |                 inputs=[qa_generator_input],
515 |                 outputs=[qa_generator_output, qa_generator_input],
516 |                 scroll_to_output=True,
517 |                 api_name="qa_generator",
518 |             )
519 | 
520 |             clear.click(
521 |                 fn=lambda x: ("", ""),
522 |                 inputs=[],
523 |                 outputs=[qa_generator_input, qa_generator_output],
524 |                 queue=False,
525 |             )
526 | 
527 |     with gr.Tab("抽取数据"):
528 |         extract_outpu_df = gr.Dataframe(
529 |             label="",
530 |             headers=["字段名称", "字段抽取结果"],
531 |             datatype=["str", "str"],
532 |             col_count=(2, "fixed"),
533 |             wrap=True,
534 |             elem_classes="control-height",
535 |             height=234,
536 |             row_count=5,
537 |         )
538 | 
539 |         extract_input = gr.Textbox(label="输入", lines=5, min_width=1368, max_lines=10)
540 | 
541 |         extraction_df = gr.Dataframe(
542 |             headers=["字段名称", "字段描述"],
543 |             datatype=["str", "str"],
544 |             row_count=3,
545 |             col_count=(2, "fixed"),
546 |             label="",
547 |             interactive=True,
548 |             wrap=True,
549 |             elem_classes="control-height",
550 |             height=180,
551 |         )
552 | 
553 |         with gr.Row():
554 |             with gr.Column(scale=2):
555 |                 gr.Examples(
556 |                     ["第一条合同当...", "发票编号: IN...", "发件人：John..."],
557 |                     extract_input,
558 |                     label="试试问",
559 |                     cache_examples=True,
560 |                     fn=preprocess_extraction_examples,
561 |                     outputs=[extract_input, extraction_df],
562 |                 )
563 |             with gr.Column(scale=1):
564 |                 with gr.Row(variant="compact"):
565 |                     clear = gr.Button(
566 |                         "清除历史",
567 |                         min_width="17",
568 |                         size="sm",
569 |                         scale=1,
570 |                         icon=os.path.join(statistic_path, "clear.png"),
571 |                     )
572 |                     submit = gr.Button(
573 |                         "发送",
574 |                         variant="primary",
575 |                         min_width="17",
576 |                         size="sm",
577 |                         scale=1,
578 |                         icon=os.path.join(statistic_path, "send.svg"),
579 |                     )
580 | 
581 |         submit.click(
582 |             fn=validate_extraction,
583 |             inputs=[extract_input, extraction_df],
584 |             outputs=[],
585 |         ).success(
586 |             fn=extract_slots,
587 |             inputs=[extract_input, extraction_df],
588 |             outputs=[extract_outpu_df],
589 |             scroll_to_output=True,
590 |             api_name="extract",
591 |         )
592 | 
593 |         clear.click(
594 |             fn=lambda x: ("", None, None),
595 |             inputs=[],
596 |             outputs=[
597 |                 extract_input,
598 |                 extraction_df,
599 |                 extract_outpu_df,
600 |             ],
601 |             queue=False,
602 |         )
603 | 
604 | 
605 | if __name__ == "__main__":
606 |     demo.queue(api_open=False, max_size=40).launch(
607 |         height=800,
608 |         share=False,
609 |         server_name="0.0.0.0",
610 |         show_api=False,
611 |         max_threads=4,
612 |     )
613 | 


--------------------------------------------------------------------------------
/gradio_demo/chat_task/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/gradio_demo/chat_task/__init__.py


--------------------------------------------------------------------------------
/gradio_demo/chat_task/chat.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List
 3 | 
 4 | from common.call_llm import chat_stream_generator
 5 | 
 6 | CHAT_ENDPOINT = os.environ.get("CHAT_ENDPOINT")
 7 | 
 8 | 
 9 | def generate_chat(
10 |     input_text: str, history: List[List[str]], endpoint: str = CHAT_ENDPOINT
11 | ):
12 |     """Generates chat responses and updates the chat history."""
13 | 
14 |     input_text = input_text or "你好"
15 |     history = (history or [])[-5:]  # Keep the last 5 messages in history
16 | 
17 |     messages = []
18 |     for message, answer in history:
19 |         messages.append({"role": "user", "content": message})
20 |         if answer:
21 |             messages.append({"role": "assistant", "content": answer})
22 | 
23 |     # append latest message
24 |     stream_response = chat_stream_generator(
25 |         messages=messages, endpoint=endpoint, temperature=0.2
26 |     )
27 |     for character in stream_response:
28 |         history[-1][1] += character
29 |         yield None, history
30 |     else:
31 |         yield None, history
32 | 


--------------------------------------------------------------------------------
/gradio_demo/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/gradio_demo/common/__init__.py


--------------------------------------------------------------------------------
/gradio_demo/common/call_llm.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from typing import Dict, Generator, List
 4 | 
 5 | import requests
 6 | 
 7 | # Environment variables for backend URL and model name
 8 | BACKEND_HOST = os.getenv("BACKEND_HOST")
 9 | MODEL_NAME = os.getenv("MODEL_NAME")
10 | API_KEY = os.getenv("API_KEY")
11 | 
12 | 
13 | # Custom headers for the API request
14 | HEADERS = {
15 |     "orionstar-api-key": API_KEY,
16 | }
17 | 
18 | 
19 | def chat_stream_generator(
20 |     messages: List[Dict[str, str]], endpoint: str, temperature: float = 0.7
21 | ) -> Generator[str, None, None]:
22 |     """Generator function to stream chat responses from the backend."""
23 | 
24 |     payload = {
25 |         "model": MODEL_NAME,
26 |         "stream": True,
27 |         "messages": messages,
28 |         "temperature": temperature,
29 |     }
30 | 
31 |     try:
32 |         with requests.post(
33 |             BACKEND_HOST + endpoint,
34 |             json=payload,
35 |             headers=HEADERS,
36 |             timeout=60 * 10,
37 |             stream=True,
38 |         ) as response:
39 |             if response.status_code != 200:
40 |                 yield f"[model server error]. response text: {response.text}"
41 |             for line in response.iter_lines(decode_unicode=True):
42 |                 if not line:
43 |                     continue
44 | 
45 |                 if isinstance(line, bytes):
46 |                     line = line.decode("utf-8")
47 | 
48 |                 line = line.replace("data: ", "")
49 |                 if line == "[DONE]":
50 |                     return
51 | 
52 |                 chunk = json.loads(line)
53 | 
54 |                 yield chunk["choices"][0]["delta"].get("content", "")
55 | 
56 |     except Exception as e:
57 |         print(f"[chat_stream_generator] call model exception: {e}")
58 |         yield "[model server error]"
59 | 
60 | 
61 | def chat(
62 |     messages: List[Dict[str, str]], endpoint: str, stop=None, temperature: float = 0.7
63 | ) -> str:
64 |     """Function to get a single chat response from the backend."""
65 | 
66 |     payload = {"model": MODEL_NAME, "messages": messages, "temperature": temperature}
67 |     if stop:
68 |         payload["stop"] = stop
69 | 
70 |     try:
71 |         with requests.post(
72 |             BACKEND_HOST + endpoint,
73 |             json=payload,
74 |             headers=HEADERS,
75 |             timeout=60 * 10,
76 |         ) as response:
77 |             if response.status_code != 200:
78 |                 return f"[model server error]. response text: {response.text}"
79 | 
80 |             model_response = response.json()
81 |             return model_response["choices"][0]["message"]["content"]
82 | 
83 |     except Exception as e:
84 |         print(f"[chat] call model exception: {e}")
85 |         return "[model server error]"
86 | 


--------------------------------------------------------------------------------
/gradio_demo/doc_qa_task/__init__.py:
--------------------------------------------------------------------------------
1 | # Doc QA task
2 | 


--------------------------------------------------------------------------------
/gradio_demo/doc_qa_task/doc_qa.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import os
  3 | import unicodedata
  4 | from typing import List
  5 | import uuid
  6 | import hashlib
  7 | 
  8 | import pandas as pd
  9 | 
 10 | from common.call_llm import chat_stream_generator
 11 | 
 12 | DOC_QA_ENDPOINT = os.environ.get("DOC_QA_ENDPOINT")
 13 | 
 14 | prompt_template = """你是由猎户星空开发的AI助手，你的名字叫聚言。你可以根据下面给出的参考资料和聊天历史来回答用户问题。
 15 | 
 16 | ### 参考资料 ###
 17 | {context}
 18 | 
 19 | ### 聊天历史 ###
 20 | {chat_history}
 21 | 
 22 | ### 用户问题 ###
 23 | {question}
 24 | 
 25 | ### 回答要求 ###
 26 | {requirement}
 27 | """
 28 | 
 29 | 
 30 | def document_prompt_template():
 31 |     return """["Source_id": {doc_id},"Content": "{page_content}"]"""
 32 | 
 33 | 
 34 | 
 35 | def language_detect(text: str) -> str:
 36 |     text = re.sub(r"([ ■◼•＊…— �●⚫]+|[·\.~•、—'}\n\t]{1,})", '', text.strip())
 37 |     stats = {
 38 |         "zh": 0,
 39 |         "ja": 0,
 40 |         "ko": 0,
 41 |         "en": 0,
 42 |         "th": 0,
 43 |         "other": 0
 44 |     }
 45 |     char_count = 0
 46 |     for char in text:
 47 |         try:
 48 |             code_name = unicodedata.name(char)
 49 |         except Exception:
 50 |             continue
 51 |         char_count += 1
 52 |         # 判断是否为中文
 53 |         if 'CJK' in code_name:
 54 |             stats["zh"] += 1
 55 |         # 判断是否为日文
 56 |         elif 'HIRAGANA' in code_name or 'KATAKANA' in code_name:
 57 |             stats["ja"] += 1
 58 |         # 判断是否为泰文
 59 |         elif "THAI" in code_name:
 60 |             stats["th"] += 1
 61 |         # 判断是否为韩文
 62 |         elif 'HANGUL' in code_name:
 63 |             stats["ko"] += 1
 64 |         # 判断是否为英文
 65 |         elif 'LA' in code_name:
 66 |             stats["en"] += 1
 67 |         else:
 68 |             stats["other"] += 1
 69 | 
 70 |     lang = ""
 71 |     ratio = 0.0
 72 |     for lan in stats:
 73 |         if lan == "other":
 74 |             continue
 75 |         # trick: 英文按字母统计不准确，除以4大致表示word个数
 76 |         if lan == "en":
 77 |             stats[lan] /= 4.0
 78 |         lan_r = float(stats[lan]) / char_count
 79 |         if ratio < lan_r:
 80 |             lang = lan
 81 |             ratio = lan_r
 82 | 
 83 |     return lang
 84 | 
 85 | 
 86 | def language_prompt(lan: str) -> str:
 87 |     _ZH_LANGUAGE_MAP = {
 88 |         "zh": "中文",
 89 |         "en": "英文",
 90 |         "other": "中文",
 91 |         "ja": "中文",
 92 |         "zh_gd": "中文",
 93 |         "ko": "韩文",
 94 |         "th": "泰文"
 95 |     }
 96 |     return _ZH_LANGUAGE_MAP.get(lan.lower(), "中文")
 97 | 
 98 | 
 99 | def _get_chat_history(chat_history: List[List]) -> str:
100 |     if not chat_history:
101 |         return ""
102 |     chat_history_text = ""
103 |     for human_msg, ai_msg in chat_history:
104 |         human = "{'Human': '" + human_msg + "'}"
105 |         ai = "{'AI': '" + ai_msg + "'}"
106 |         chat_history_text += "[" + ", ".join([human, ai]) + "]\n"
107 |     return chat_history_text
108 | 
109 | 
110 | def get_prompt(context: str, chat_history: str, question: str, trapped_switch: int, fallback: str,
111 |                citations_switch: int) -> str:
112 |     answer_prompts = ["1. 你只能根据上面参考资料中给出的事实信息来回答用户问题，不要胡编乱造。",
113 |                       "2. 如果向用户提出澄清问题有助于回答问题，可以尝试提问。"]
114 |     index = 3
115 |     if len(fallback) > 0 and trapped_switch == 1:
116 |         answer_prompts.append(
117 |             str(index) + ". " + """如果参考资料中的信息不足以回答用户问题，请直接回答下面三个双引号中的内容：\"\"\"{fallback}\"\"\"。""".format(
118 |                 fallback=fallback))
119 |         index += 1
120 | 
121 |     if citations_switch:
122 |         citation_prompt = "如果你给出的答案里引用了参考资料中的内容，请在答案的结尾处添加你引用的Source_id，引用的Source_id值来自于参考资料中，并用两个方括号括起来。示例：[[d97b811489b73f46c8d2cb1bc888dbbe]]、[[b6be48868de736b90363d001c092c019]]"
123 |         answer_prompts.append(str(index) + ". " + citation_prompt)
124 |         index += 1
125 | 
126 |     lan = language_detect(question)
127 |     style_prompt = """请你以第一人称并且用严谨的风格来回答问题，一定要用{language}来回答，并且基于事实详细阐述。""".format(
128 |         language=language_prompt(lan),
129 |     )
130 |     answer_prompts.append(str(index) + ". " + style_prompt)
131 |     answer_prompts = "\n".join(answer_prompts)
132 |     prompt = prompt_template.format(context=context, chat_history=chat_history, question=question,
133 |                                     requirement=answer_prompts)
134 |     return prompt
135 | 
136 | 
137 | def generate_doc_qa(input_text: str, history: List[List[str]], doc_df: "pd.DataFrame", trapped_switch: str, fallback: str,
138 |                     citations_switch: str):
139 |     """Generates chat responses according to the input text, history and page content."""
140 |     # handle input params
141 |     print(f"input_text: {input_text}, history: {history}, page_content: {doc_df}, trapped_switch: {trapped_switch}, fallback: {fallback}, citations_switch: {citations_switch}")
142 | 
143 |     citations_switch = 1 if citations_switch == "开启引用" else 0
144 |     trapped_switch = 1 if trapped_switch == "自定义话术" else 0
145 |     fallback = fallback or ""
146 | 
147 |     input_text = input_text or "你好"
148 |     history = (history or [])[-5:]  # Keep the last 5 messages in history
149 |     
150 |     doc_df = doc_df[doc_df["文档片段内容"].notna()]
151 |     # iterate over all documents
152 |     context = ""
153 |     source_id_map = dict()
154 |     for _, row in doc_df.iterrows():
155 |         if not row["文档片段内容"] or not row["文档片段名称"]:
156 |             continue
157 |         source_id = hashlib.md5(str(uuid.uuid4()).encode("utf-8")).hexdigest()
158 |         source_id_map[source_id] = row["文档片段名称"]
159 |         context += document_prompt_template().format(doc_id=source_id, page_content=row["文档片段内容"]) + "\n\n"
160 | 
161 |     prompt = get_prompt(context.strip(), _get_chat_history(history), input_text, trapped_switch, fallback,
162 |                         citations_switch)
163 |     print(f"docQA prompt: {prompt}")
164 |     messages = [{"role": "user", "content": prompt}]
165 |     # append latest message
166 |     stream_response = chat_stream_generator(messages=messages, endpoint=DOC_QA_ENDPOINT)
167 | 
168 |     cache = ""
169 | 
170 |     for character in stream_response:
171 |         if "[" in character or cache:
172 |             cache += character
173 |             continue
174 |         history[-1][1] += character
175 |         yield None, history
176 | 
177 |     if cache:
178 |         source_ids = re.findall(r"\[\[(.*?)\]\]", cache)
179 |         print(f"Matched source ids {source_ids}")
180 |         for source_id in source_ids:
181 |             origin_source_id = source_id_map.get(source_id, source_id)
182 |             cache = cache.replace(source_id, origin_source_id)
183 | 
184 |         history[-1][1] += cache
185 |         yield None, history


--------------------------------------------------------------------------------
/gradio_demo/examples.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | DOC_QA_EXAMPLES = {}
 5 | QA_GENERATOR_EXAMPLES = {}
 6 | 
 7 | 
 8 | def load_examples():
 9 |     global DOC_QA_EXAMPLES, QA_GENERATOR_EXAMPLES
10 |     doc_qa_df = pd.read_excel("./模型演示.xlsx", sheet_name="基于文档问答")
11 |     for _, row in doc_qa_df.iterrows():
12 |         if not pd.isna(row["输入"]):
13 |             DOC_QA_EXAMPLES.setdefault(row["输入"], []).append(
14 |                 {
15 |                     "文档片段名称": row["文档片段名称"],
16 |                     "文档片段内容": row["文档片段内容"],
17 |                 }
18 |             )
19 | 
20 |     qa_generator_df = pd.read_excel("./模型演示.xlsx", sheet_name="生成QA对")
21 |     for _, row in qa_generator_df.iterrows():
22 |         if not pd.isna(row["输入"]):
23 |             QA_GENERATOR_EXAMPLES[row["输入"][:10] + "..."] = row["输入"]
24 | 
25 |     print(f"QA_GENERATOR_EXAMPLES: {QA_GENERATOR_EXAMPLES}")
26 | 
27 | 
28 | EXTRACTION_EXAMPLES = {
29 |     "第一条合同当...": {
30 |         "input": "第一条合同当事人：\n出卖人：北京房房房地产开发有限公司\n通讯地址：北京市朝阳区长安路8899号\n委托销售经纪机构：北京好房中介服务有限公司\n通讯地址：北京市海淀区朝阳路南大街19号 邮政编码：100081\n营业执照注册号：110108119876543\n经纪机构备案证明号：JB202411008\n法定代表人：王刚\n联系电话：010-66778899\n买受人：王小豆\n户籍所在地：中国，北京\n证件类型：【居民身份证】，证号：110101198801011256\n出生日期：1988年1月1日，\n性别：男\n通讯地址：北京市昌平区天通苑街道20号\n邮政编码：100010\n联系电话：13888138999\n\n第二条商品房基本情况：\n\n该商品房的规划用途为【住宅】。该商品房所在建筑物的主体结构为钢筋混凝土，建筑总层数为28层，其中地上26层，地下2层。\n该商品房位于北京市西城区雅琴园小区8号楼2单元9层909号。该商品房的平面图见附件二。\n该商品房的房产测绘机构为北京市建设测绘，真实测建筑面积共120平方米，其中套内建筑面积100平方米，分摊共有建筑面积20平方米。该商品房共用部位见附件二。\n该商品房层高为2.8米，有2个阳台，其中1个阳台为封闭式，1个阳台为非封闭式。阳台是否封闭以规划设计文件为准。\n\n第三条计价方式与价款：\n出卖人与买受人按照下列第1种方式计算该商品房价款：\n1.按照套内建筑面积计算，该商品房单价为每平方米30000（币种：人民币）元，总价款为3000000（币种：人民币）元（大写叁佰万元整）。\n2.按照建筑面积计算，该商品房单价为每平方米（币种）元，总价款为（币种）元（大写元整）。\n3.按照套计算，该商品房总价款为（币种）元（大写元整）。\n4.按照计算，该商品房总价款为（币种）元（大写元整）。\n\n第四条付款方式及期限：\n（一）签订本合同前，买受人已向出卖人支付定金2000000（币种：人民币）元（大写），该定金于【本合同签订】时【抵作】商品房价款。\n（二）买受人采取下列第1种方式付款：\n1.一次性付款。买受人应当在2024年1月31日前支付该商品房全部价款。\n2.分期付款。买受人应当在年月日前分期支付该商品房全部价款，首期房价款（币种）元（大写：元整），应当于年月日前支付。\n3.贷款方式付款：【公积金贷款】【商业贷款】【】。买受人应当于年月日前支付首期房价款（币种）元（大写元整），占全部房价款的％。余款（币种）元（大写元整）向（贷款机构）申请贷款支付。\n\n第五条交付时间和手续：\n\n（一）出卖人应当在2024年6月30日前向买受人交付该商品房。\n（二）该商品房达到第十条、第十一条约定的交付条件后，出卖人应当在交付日期届满前10日（不少于10日）将查验房屋的时间、办理交付手续的时间地点以及应当携带的证件材料的通知书面送达买受人。买受人未收到交付通知书的，以本合同约定的交付日期届满之日为办理交付手续的时间，以该商品房所在地为办理交付手续的地点。。交付该商品房时，出卖人应当出示满足第十条约定的证明文件。出卖人不出示证明文件或者出示的证明文件不齐全，不能满足第十条约定条件的，买受人有权拒绝接收，由此产生的逾期交付责任由出卖人承担，并按照第十三条处理。",
31 |         "fields": [
32 |             {"字段名称": "房屋情况", "字段描述": "该合同中商品房基本情况如占地面积、地址等信息"},
33 |             {
34 |                 "字段名称": "分摊面积",
35 |                 "字段描述": "该合同中的分摊共有建筑面积",
36 |             },
37 |             {"字段名称": "单价", "字段描述": "该合同中商品房单价"},
38 |             {"字段名称": "定金", "字段描述": "该合同中签订本合同前，买受人已向出卖人支付的定金"},
39 |         ],
40 |     },
41 |     '发票编号: IN...': {
42 |         "input": '发票编号: INV20220109\n日期: 2022年1月9日\n供应商名称: XYZ餐厅\n供应商地址: 789号街道，城市石家庄，省份河北，邮编466200\n供应商电话: 1234567890\n供应商税号: 1234567890\n公司名称: ABC公司\n公司地址: 456号街道，城市，省份，邮编\n公司电话: 9876543210\n公司税号: 0987654321\n\n项目列表:\n---------------------------------------\n序号 项目名称 数量 单价 总价\n---------------------------------------\n1 晚宴 10 500 5000\n2 饮料 20 50 1000\n3 蛋糕 1 200 200\n---------------------------------------\n合计金额: 6200\n税额: 620\n总金额: 6820\n\n付款方式: 公司账户支付\n付款日期: 2022年1月15日\n\n备注: 感谢您的光临！期待再次为您提供优质的服务。',
43 |         "fields": [
44 |             {
45 |                 "字段名称": "公司税号",
46 |                 "字段描述": "公司的纳税编号",
47 |             },
48 |             {"字段名称": "供应商地点", "字段描述": "供应商的所在位置"},
49 |             {
50 |                 "字段名称": "支付日期",
51 |                 "字段描述": "本发票中的订单的付款时间",
52 |             },
53 |         ],
54 |     },
55 |     '发件人：John...': {
56 |         "input": '发件人：John Smith\n发件人邮箱：john.smith@example.com\n收件人：Jane Doe\n收件人邮箱：jane.doe@example.com\n主题：邀请参加公司年度晚宴\n亲爱的Jane Doe,\n我希望这封邮件能够找到你身体健康、一切顺利。我写信是为了邀请你参加我们公司即将举行的年度晚宴。\n活动详情如下：\n日期：2024年1月15日\n时间：晚上7点至10点\n地点：ABC酒店大宴会厅\n我们精心策划了这个晚宴，旨在庆祝公司过去一年的成就，感谢所有员工的辛勤工作。我们相信你的参与将为晚宴增添更多的精彩和价值。\n在晚宴中，我们将提供美味的晚餐、精彩的表演和丰富的互动环节。此外，您还将有机会与其他同事进行交流、建立更紧密的联系。\n请在2024年1月10日前回复此邮件，告知您是否能够参加。如果您有任何问题或需要进一步的信息，请随时与我联系。\n我们非常期待您的光临，并与您一同度过这个特殊的时刻。\n谢谢！\n最诚挚的问候，\nJohn Smith\n公司活动组织者\n联系方式：123-456-7890\njohn.smith@example.com',
57 |         "fields": [
58 |             {
59 |                 "字段名称": "发件人",
60 |                 "字段描述": "发件人的姓名",
61 |             },
62 |             {"字段名称": "发件人邮箱", "字段描述": "发件人的邮箱地址"},
63 |             {
64 |                 "字段名称": "联系方式",
65 |                 "字段描述": "活动组织者的联系电话",
66 |             },
67 |             {
68 |                 "字段名称": "宴会地址",
69 |                 "字段描述": "举行宴会的地点",
70 |             },
71 |         ],
72 |     },
73 | }
74 | 
75 | 
76 | def preprocess_docqa_examples(text):
77 |     contexts = DOC_QA_EXAMPLES.get(text, [])
78 |     df = pd.DataFrame(contexts, columns=["文档片段内容", "文档片段名称"])
79 |     return df
80 | 
81 | 
82 | def preprocess_qa_generator_examples(text):
83 |     return QA_GENERATOR_EXAMPLES.get(text, "")
84 | 
85 | 
86 | def preprocess_extraction_examples(text):
87 |     input_text = EXTRACTION_EXAMPLES.get(text, {}).get("input", "")
88 |     df = pd.DataFrame(EXTRACTION_EXAMPLES.get(text, {}).get("fields", []), columns=["字段名称", "字段描述"])
89 | 
90 |     return input_text, df
91 | 


--------------------------------------------------------------------------------
/gradio_demo/extract_data_task/__init__.py:
--------------------------------------------------------------------------------
1 | # extract data task
2 | 


--------------------------------------------------------------------------------
/gradio_demo/extract_data_task/extract.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from typing import Tuple
 4 | 
 5 | import pandas as pd
 6 | 
 7 | from common.call_llm import chat
 8 | 
 9 | EXTRACT_ENDPOINT = os.environ.get("EXTRACT_ENDPOINT")
10 | 
11 | 
12 | prompt_template = """### 角色能力 ###
13 | 你是一个信息提取助手，你可以按下面给出的提取字段及描述对文档内容进行信息提取，并按给定的格式返回。
14 | 确保提取的信息完整且与文档内容一致，如果有字段提取不到对应信息请返回'无'。
15 | 
16 | ### 提取字段及描述 ###
17 | {fields_prompt}
18 | 
19 | ### 文档内容 ###
20 | {context}
21 | 
22 | ### 返回格式 ###
23 | 请严格按照下面描述的JSON格式进行输出，不需要解释，输出JSON格式如下:
24 | {response_prompt}
25 | 确保输出的格式可以被Python的json.loads方法解析。
26 | """
27 | 
28 | 
29 | def extract_slots(page_content: str, extraction_df: "pd.DataFrame") -> Tuple[str, None]:
30 |     """
31 |     Extract slots from page content
32 |     :param page_content:
33 |     :param extract_requirement:
34 |     :return:
35 |     """
36 |     extract_requirement = ""
37 |     output_requirement = dict()
38 |     df = pd.DataFrame(columns=["字段名称", "字段抽取结果"])
39 |     
40 |     # remove nan
41 |     extraction_df = extraction_df[extraction_df['字段名称'].notna()]
42 | 
43 |     for _, row in extraction_df.iterrows():
44 |         if not row['字段名称'] or not row['字段描述']:
45 |             continue
46 | 
47 |         extract_requirement += f"{row['字段名称']}: {row['字段描述']}\n"
48 |         output_requirement[row['字段名称']] = row['字段描述']
49 | 
50 |     if not output_requirement:
51 |         return df
52 | 
53 |     output_requirement_description = json.dumps([output_requirement], ensure_ascii=False, indent=4)
54 |     prompt = prompt_template.format(context=page_content, fields_prompt=extract_requirement, response_prompt=output_requirement_description)
55 |     messages = [{"role": "user", "content": prompt}]
56 |     
57 |     max_retry = 6
58 |     retry = 0
59 |     result = None
60 |     while not result and retry < max_retry:
61 |         try:
62 |             result = chat(messages=messages, endpoint=EXTRACT_ENDPOINT)
63 |             if result.startswith("```json"):
64 |                 result = result.replace("```json", "").replace("```", "").strip()
65 |             
66 |             result = json.loads(result)
67 |             if isinstance(result, list):
68 |                 result = result[0]
69 |         except Exception as e:
70 |             print(f"error: {e} {result}")
71 |             result = None
72 |             retry += 1
73 | 
74 |     print(f"extract slots prompt: {prompt} result: {result}")
75 | 
76 |     if not result:
77 |         return df
78 | 
79 |     for field in output_requirement:
80 |         df.loc[len(df)] = {"字段名称": field, "字段抽取结果": result.get(field, "无")}
81 | 
82 |     return df
83 | 


--------------------------------------------------------------------------------
/gradio_demo/images/clear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/gradio_demo/images/clear.png


--------------------------------------------------------------------------------
/gradio_demo/images/send.svg:
--------------------------------------------------------------------------------
1 | <svg width="18" height="17" viewBox="0 0 18 17" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path d="M16.4442 0.633228C17.2017 0.45936 17.8705 1.1475 17.6502 1.87396L13.2769 16.2931C12.9893 17.2411 11.6066 17.2316 11.3299 16.2798L10.688 14.0719L9.98331 11.648C9.92254 11.439 9.9343 11.2164 10.0168 11.0147L11.3645 7.71935C11.6836 6.93902 10.9177 6.15107 10.0996 6.41796L6.64443 7.54504C6.43299 7.61401 6.20398 7.61311 5.99291 7.54246L3.54554 6.72331L1.3162 5.97713C0.355078 5.65543 0.422008 4.31111 1.41044 4.08422L16.4442 0.633228Z" fill="white"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/gradio_demo/plugin_task/__init__.py:
--------------------------------------------------------------------------------
1 | # plugin task
2 | 


--------------------------------------------------------------------------------
/gradio_demo/plugin_task/api.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict, List, Optional, Tuple, Union
  3 | 
  4 | import gradio as gr
  5 | 
  6 | from common.call_llm import chat, chat_stream_generator
  7 | from plugin_task.model import Plugin, ReActStep
  8 | from plugin_task.plugins import PLUGIN_JSON_SCHEMA, PLUGINS
  9 | from plugin_task.prompt import (
 10 |     FILLING_SLOT_PROMPT,
 11 |     FINAL_PROMPT,
 12 |     INTENT_RECOGNITION_PROMPT,
 13 | )
 14 | from plugin_task.util import (
 15 |     build_prompt_plugin_variables,
 16 |     parse_reAct_step,
 17 |     plugin_parameter_validator,
 18 | )
 19 | 
 20 | PLUGIN_ENDPOINT = os.environ.get("PLUGIN_ENDPOINT")
 21 | 
 22 | 
 23 | def api_plugin_chat(
 24 |     session: Dict,
 25 |     message: str,
 26 |     chat_history: List[List[str]],
 27 |     *radio_plugins,
 28 | ):
 29 |     """调用插件"""
 30 | 
 31 |     if not check_in_plugin_session(session):
 32 |         plugins = prepare_plugins(radio_plugins)
 33 |         if not plugins:
 34 |             gr.Warning("没有启用插件")
 35 |             return
 36 | 
 37 |         intention, reAct_step = intent_recognition(message, plugins)
 38 |         if intention in ("ask_user_for_required_params", "plugin"):
 39 |             session["origin_message"] = message
 40 |             session["choice_plugin"] = reAct_step.thought["tool_to_use_for_user"]
 41 |             session["reAct_step"] = [reAct_step]
 42 | 
 43 |     else:
 44 |         intention, reAct_step = filling_slot_with_loop(session, message)
 45 | 
 46 |     print(
 47 |         f"[API_PLUGIN_CHAT]. message: {message},\n intention: {intention},\n session: {session}\n"
 48 |         + "=" * 25
 49 |         + "END"
 50 |         + "=" * 25
 51 |     )
 52 | 
 53 |     if intention == "fail":
 54 |         chat_history[-1][1] = reAct_step
 55 |         session.clear()
 56 |         yield session, None, chat_history
 57 |         return
 58 | 
 59 |     if intention == "ask_user_for_required_params":
 60 |         chat_history[-1][1] = reAct_step.action_input.get("question", "")
 61 |         yield session, None, chat_history
 62 | 
 63 |     if intention == "plugin":
 64 |         yield from call_final_answer(session, reAct_step, chat_history)
 65 | 
 66 |     if intention == "chat":
 67 |         yield from call_chat(session, message, chat_history)
 68 | 
 69 |     if intention == "end":
 70 |         session.clear()
 71 |         chat_history[-1][1] = "[系统消息]：当前插件对话结束"
 72 |         yield session, None, chat_history
 73 |         return
 74 |     return
 75 | 
 76 | 
 77 | def filling_slot_with_loop(
 78 |     session: Dict, message: str, retry: int = 3
 79 | ) -> Tuple[str, Optional[Union[ReActStep, str]]]:
 80 |     """处理填槽"""
 81 |     plugin = PLUGINS[session["choice_plugin"]]
 82 | 
 83 |     while True:
 84 |         lastest_reAct_step = session["reAct_step"][-1]
 85 |         if not lastest_reAct_step.observation:
 86 |             lastest_reAct_step.observation = {"user_answer": message}
 87 | 
 88 |         reAct_step_str = "\n".join(step.to_str() for step in session["reAct_step"])
 89 | 
 90 |         ask_content = FILLING_SLOT_PROMPT.format(
 91 |             plugin_name=plugin.unique_name_for_model,
 92 |             description_for_human=plugin.description_for_human,
 93 |             parameter_schema=plugin.parameter_schema,
 94 |             question=session["origin_message"],
 95 |             reAct_step_str=reAct_step_str,
 96 |         )
 97 | 
 98 |         model_response = chat(
 99 |             [{"content": ask_content, "role": "user"}],
100 |             stop="Observation",
101 |             endpoint=PLUGIN_ENDPOINT,
102 |         )
103 |         print(
104 |             f"[FILLING_SLOT_WITH_LOOP] message: {message} ask_content: {ask_content}\n model_response: {model_response}\n"
105 |             + "=" * 25
106 |             + "END"
107 |             + "=" * 25
108 |         )
109 |         reAct_step = parse_reAct_step(model_response)
110 |         if not reAct_step:
111 |             if (retry := retry - 1) < 0:
112 |                 return "fail", model_response
113 | 
114 |             continue
115 | 
116 |         tool_to_use_for_user = reAct_step.thought.get("tool_to_use_for_user")
117 |         known_parameter = reAct_step.thought.get("known_params", {})
118 | 
119 |         if (
120 |             reAct_step.action == "end_conversation"
121 |             or tool_to_use_for_user == "end_conversation"
122 |         ):
123 |             return "end", reAct_step
124 | 
125 |         if (
126 |             reAct_step.action == "ASK_USER_FOR_REQUIRED_PARAMS"
127 |             and tool_to_use_for_user == plugin.unique_name_for_model
128 |         ):
129 |             passed, _ = plugin_parameter_validator(
130 |                 known_parameter,
131 |                 tool_to_use_for_user,
132 |             )
133 |             if passed:
134 |                 reAct_step.action = tool_to_use_for_user
135 |                 action = "plugin"
136 |             else:
137 |                 action = "ask_user_for_required_params"
138 | 
139 |             session["reAct_step"].append(reAct_step)
140 |             return action, reAct_step
141 | 
142 |         if (
143 |             reAct_step.action == plugin.unique_name_for_model
144 |             and tool_to_use_for_user == plugin.unique_name_for_model
145 |         ):
146 |             passed, invalid_info = plugin_parameter_validator(
147 |                 known_parameter,
148 |                 tool_to_use_for_user,
149 |             )
150 | 
151 |             if not passed:
152 |                 reAct_step.observation = {"tool_parameters_verification": invalid_info}
153 |                 session["reAct_step"].append(reAct_step)
154 |                 continue
155 | 
156 |             session["reAct_step"].append(reAct_step)
157 |             return "plugin", reAct_step
158 | 
159 | 
160 | def call_chat(session: Dict, message: str, chat_history: List[List[str]]):
161 |     from chat_task.chat import generate_chat
162 | 
163 |     for chunk in generate_chat(message, chat_history, PLUGIN_ENDPOINT):
164 |         yield session, *chunk
165 | 
166 | 
167 | def check_in_plugin_session(session: Dict) -> bool:
168 |     """检查是否在插件会话中"""
169 |     return bool(session)
170 | 
171 | 
172 | def prepare_plugins(
173 |     radio_plugins: List[str],
174 | ) -> List[Plugin]:
175 |     return [
176 |         PLUGINS[PLUGIN_JSON_SCHEMA[plugin_idx]["unique_name_for_model"]]
177 |         for plugin_idx, plugin_status in enumerate(radio_plugins)
178 |         if plugin_status == "开启"
179 |     ]
180 | 
181 | 
182 | def intent_recognition(
183 |     message: str, choice_plugins: List[Plugin]
184 | ) -> Tuple[str, Union[ReActStep, str]]:
185 |     """意图识别"""
186 | 
187 |     plugins, plugin_names = build_prompt_plugin_variables(choice_plugins)
188 |     ask_content = INTENT_RECOGNITION_PROMPT.format(
189 |         plugins=plugins, plugin_names=plugin_names, question=message
190 |     )
191 | 
192 |     print(
193 |         f"[INTENT_RECOGNITION] message:{message} ask_content: {ask_content}"
194 |         + "=" * 25
195 |         + "END"
196 |         + "=" * 25
197 |     )
198 | 
199 |     retry = 3
200 |     while retry != 0:
201 |         model_response = chat(
202 |             [{"content": ask_content, "role": "user"}],
203 |             stop="Observation",
204 |             endpoint=PLUGIN_ENDPOINT,
205 |         )
206 | 
207 |         reAct_step = parse_reAct_step(model_response)
208 |         if reAct_step:
209 |             break
210 |         retry -= 1
211 | 
212 |     if not reAct_step:
213 |         print(f"[INTENT_RECOGNITION] model fail: {model_response}")
214 |         return "fail", model_response
215 | 
216 |     tool_to_use_for_user = reAct_step.thought.get("tool_to_use_for_user")
217 |     known_params = reAct_step.thought.get("known_params", {})
218 | 
219 |     if reAct_step.action == "TOOL_OTHER":
220 |         return "chat", reAct_step
221 | 
222 |     elif (
223 |         reAct_step.action == "end_conversation"
224 |         and tool_to_use_for_user == "end_conversation"
225 |     ):
226 |         return "end", reAct_step
227 | 
228 |     elif tool_to_use_for_user in plugin_names.split(","):
229 |         if reAct_step.action in ("ASK_USER_FOR_INTENT", "ASK_USER_FOR_REQUIRED_PARAMS"):
230 |             passed, _ = plugin_parameter_validator(
231 |                 known_params,
232 |                 tool_to_use_for_user,
233 |             )
234 |             if passed:
235 |                 reAct_step.action = tool_to_use_for_user
236 |                 return "plugin", reAct_step
237 | 
238 |             return "ask_user_for_required_params", reAct_step
239 | 
240 |         if reAct_step.action in plugin_names.split(","):
241 |             return "plugin", reAct_step
242 | 
243 |     return "chat", reAct_step
244 | 
245 | 
246 | def call_final_answer(session: Dict, reAct_step: ReActStep, history: List[List[str]]):
247 |     """调用最终回答"""
248 |     plugin_result = PLUGINS[reAct_step.action].run(**reAct_step.action_input)
249 | 
250 |     lastest_reAct_step = session["reAct_step"][-1]
251 |     lastest_reAct_step.observation = {"tool_response": plugin_result}
252 | 
253 |     reAct_step_str = "\n".join(step.to_str() for step in session["reAct_step"])
254 |     final_prompt = FINAL_PROMPT.format(
255 |         question=session["origin_message"],
256 |         reAct_step_str=reAct_step_str,
257 |     )
258 | 
259 |     print(
260 |         f"[CALL_FINAL_ANSWER] final_prompt: {final_prompt}\n"
261 |         + "=" * 25
262 |         + "END"
263 |         + "=" * 25
264 |     )
265 |     stream_response = chat_stream_generator(
266 |         [{"content": final_prompt, "role": "user"}],
267 |         endpoint=PLUGIN_ENDPOINT,
268 |     )
269 | 
270 |     for character in stream_response:
271 |         history[-1][1] += character
272 |         yield session, None, history
273 | 
274 |     session.clear()
275 | 


--------------------------------------------------------------------------------
/gradio_demo/plugin_task/model.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import Any, Dict, List, Union
 3 | 
 4 | import requests
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | class ReActStep(BaseModel):
 9 |     """RaAct 推理步骤"""
10 | 
11 |     thought: Dict[str, Union[Dict[str, str], str]]
12 |     action: str
13 |     action_input: Dict[str, str]
14 |     observation: Dict[str, Any] = {}
15 | 
16 |     def to_str(self) -> str:
17 |         s = f"Thought: {self.thought}\n"
18 |         s += f"Action: {self.action}\n"
19 |         s += f"Action Input: {self.action_input}\n"
20 | 
21 |         if self.observation:
22 |             s += f"Observation: {self.observation}\n"
23 |         return s
24 | 
25 | 
26 | class RequstField(BaseModel):
27 |     """请求体字段"""
28 | 
29 |     enum: List[str]
30 |     name: str
31 |     description: str
32 |     is_required: bool
33 |     parament_type: str
34 | 
35 |     @property
36 |     def to_simple_dict(self) -> Dict[str, Any]:
37 |         data = {
38 |             "description": self.description,
39 |             "required": self.is_required,
40 |         }
41 |         if self.enum:
42 |             data["enum"] = self.enum
43 |         return data
44 | 
45 | 
46 | class Plugin(BaseModel):
47 |     """插件"""
48 | 
49 |     url: str
50 |     method: str
51 |     headers: Dict[str, str]
52 |     request_body: List[RequstField]
53 |     name_for_human: str
54 |     description_for_human: str
55 |     description_for_model: str
56 |     unique_name_for_model: str
57 | 
58 |     @property
59 |     def parameter_schema(self) -> str:
60 |         parameter = {}
61 |         for field in self.request_body:
62 |             if not field.is_required:
63 |                 continue
64 |             parameter[field.name] = field.to_simple_dict
65 |         return json.dumps(parameter, ensure_ascii=False)
66 | 
67 |     def run(self, **kwargs):
68 |         """运行插件"""
69 | 
70 |         response = requests.request(
71 |             self.method.upper(),
72 |             self.url,
73 |             headers=self.headers,
74 |             params=kwargs,
75 |             json=kwargs,
76 |         )
77 |         return response.text
78 | 
79 |     @property
80 |     def required_parameters(self) -> List[RequstField]:
81 |         """必填参数"""
82 |         return [field for field in self.request_body if field.is_required]
83 | 


--------------------------------------------------------------------------------
/gradio_demo/plugin_task/plugins.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Dict
 3 | 
 4 | from plugin_task.model import Plugin
 5 | 
 6 | PLUGIN_JSON_SCHEMA = [
 7 |     {
 8 |         "url": os.environ["WEATHER_PLUGIN_URL"],
 9 |         "method": "POST",
10 |         "headers": {},
11 |         "request_body": [
12 |             {
13 |                 "enum": [],
14 |                 "name": "city_name",
15 |                 "description": "需要明确城市名称后再调用接口",
16 |                 "is_required": True,
17 |                 "parament_type": "str",
18 |             }
19 |         ],
20 |         "name_for_human": "查天气",
21 |         "description_for_human": "查询天气的工具,如果发现用户有查询天气的意图会使用该工具",
22 |         "description_for_model": "It can help users check the weather situation, and if the description includes a city, it can be directly queried.",
23 |         "unique_name_for_model": "query_weather",
24 |     },
25 |     {
26 |         "url": "http://gwgp-pt7hznkstln.n.bdcloudapi.com/expTrack?com=auto",
27 |         "method": "POST",
28 |         "headers": {
29 |             "Content-Type": "application/json",
30 |             "X-Bce-Signature": os.environ["BCE_APP_CODE"],
31 |         },
32 |         "request_body": [
33 |             {
34 |                 "enum": [],
35 |                 "name": "nu",
36 |                 "description": "需要明确快递单号后再调用接口",
37 |                 "is_required": True,
38 |                 "parament_type": "str",
39 |             }
40 |         ],
41 |         "name_for_human": "查快递",
42 |         "description_for_human": "查询快递的工具,如果发现用户有查询快递的意图会使用该工具",
43 |         "description_for_model": "It can help users check the express delivery, and if the description includes a express delivery, it can be directly queried.",
44 |         "unique_name_for_model": "query_express",
45 |     },
46 |     {
47 |         "url": "https://api.oioweb.cn/api/common/teladress",
48 |         "method": "GET",
49 |         "headers": {},
50 |         "request_body": [
51 |             {
52 |                 "enum": [],
53 |                 "name": "mobile",
54 |                 "description": "需要明确电话号码后再调用接口",
55 |                 "is_required": True,
56 |                 "parament_type": "str",
57 |             }
58 |         ],
59 |         "name_for_human": "查电话号码归属地",
60 |         "description_for_human": "查询电话号码归属地的工具,如果发现用户有查询电话号码归属地的意图会使用该工具",
61 |         "description_for_model": "It can help users check the phone number attribution, and if the description includes a phone number, it can be directly queried.",
62 |         "unique_name_for_model": "query_teladress",
63 |     },
64 |     {
65 |         "url": "https://tenapi.cn/v2/bing?format=json",
66 |         "method": "POST",
67 |         "headers": {},
68 |         "request_body": [],
69 |         "name_for_human": "bing每日壁纸",
70 |         "description_for_human": "获取bing每日壁纸的工具,如果发现用户有获取bing每日壁纸会使用该工具",
71 |         "description_for_model": "It can help users Get bing daily wallpapers, and if the description Get bing daily wallpapers, it can be directly queried.",
72 |         "unique_name_for_model": "Get bing daily wallpapers",
73 |     },
74 | ]
75 | 
76 | 
77 | PLUGINS: Dict[str, Plugin] = {
78 |     plugin["unique_name_for_model"]: Plugin(
79 |         url=plugin["url"],
80 |         method=plugin["method"],
81 |         headers=plugin["headers"],
82 |         request_body=plugin["request_body"],
83 |         name_for_human=plugin["name_for_human"],
84 |         description_for_human=plugin["description_for_human"],
85 |         description_for_model=plugin["description_for_model"],
86 |         unique_name_for_model=plugin["unique_name_for_model"],
87 |     )
88 |     for plugin in PLUGIN_JSON_SCHEMA
89 | }
90 | 


--------------------------------------------------------------------------------
/gradio_demo/plugin_task/prompt.py:
--------------------------------------------------------------------------------
 1 | INTENT_RECOGNITION_PROMPT = """Answer the following questions as best you can. You have access to the following tools:
 2 | 
 3 | {plugins}end_conversation: 当识别到用户想要结束对话时，调用此工具 Parameters: {{"end_phrase": {{"description": "回复结束语", "required": true}}}} Format the arguments as a JSON object.
 4 | 
 5 | ASK_USER_FOR_INTENT: 当无法确定用户的意图时，调用此工具 Parameters: {{"question": {{"description": "反问用户的问题", "required": true}}}} Format the arguments as a JSON object.
 6 | 
 7 | ASK_USER_FOR_REQUIRED_PARAMS: 当所列工具能够解决用户问题但缺少必要参数时，追问用户以获取必要参数，调用此工具 Parameters: {{"question": {{"description": "反问用户的问题", "required": true}}}} Format the arguments as a JSON object.
 8 | 
 9 | TOOL_OTHER: 如果上述工具都不能解决用户的问题，调用此工具 Parameters: {{"intent": {{"description": "整理下用户的场景，并给出用户的意图", "required": true}}}} Format the arguments as a JSON object.
10 | 
11 | Use the following format:
12 | 
13 | Question: the input question you must answer
14 | Thought: {{"content": {{"description": "you should always think about what to do", "required": true}}, "tool_to_use_for_user": {{"description": "当调用某个工具缺少必要参数时，填写这个工具的名字", "required": true}}, "known_params": {{"description": "已经提取到的当前要调用工具的参数列表", "required": true}}}} Format the arguments as a JSON object.
15 | Action: the action to take, should be one of [{plugin_names},end_conversation,ASK_USER_FOR_INTENT,ASK_USER_FOR_REQUIRED_PARAMS,TOOL_OTHER]
16 | Action Input: the input to the action
17 | 
18 | 
19 | 开始！
20 | 
21 | Question: {question}
22 | """
23 | 
24 | FILLING_SLOT_PROMPT = """Answer the following questions as best you can. You have access to the following tools:
25 | 
26 | {plugin_name}: {description_for_human} Parameters: {parameter_schema} Format the arguments as a JSON object.
27 | 
28 | end_conversation: 当识别到用户想要结束对话时，调用此工具 Parameters: {{"end_phrase": {{"description": "回复结束语", "required": true}}}} Format the arguments as a JSON object.
29 | 
30 | ASK_USER_FOR_REQUIRED_PARAMS: 当所列工具能够解决用户问题但缺少必要参数时，追问用户以获取必要参数，调用此工具 Parameters: {{"question": {{"description": "反问用户的问题", "required": true}}}} Format the arguments as a JSON object.
31 | 
32 | Use the following format:
33 | 
34 | Question: the input question you must answer
35 | Thought: {{"content": {{"description": "you should always think about what to do", "required": true}}, "tool_to_use_for_user": {{"description": "当调用某个工具缺少必要参数时，填写这个工具的名字", "required": true}}, "known_params": {{"description": "已经提取到的当前要调用工具的参数列表", "required": true}}}} Format the arguments as a JSON object.
36 | Action: the action to take, should be one of [{plugin_name},end_conversation,ASK_USER_FOR_REQUIRED_PARAMS]
37 | Action Input: the input to the action
38 | Observation: the result of the action
39 | ... (this Thought/Action/Action Input/Observation can be repeated zero or more times)
40 | 
41 | 开始！  
42 | 
43 | Question: {question}
44 | {reAct_step_str}
45 | """
46 | 
47 | 
48 | FINAL_PROMPT = """Question: {question}
49 | {reAct_step_str}
50 | Final Answer: """
51 | 


--------------------------------------------------------------------------------
/gradio_demo/plugin_task/util.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Dict, List, Optional, Tuple
 3 | 
 4 | import yaml
 5 | 
 6 | from plugin_task.model import Plugin, ReActStep
 7 | from plugin_task.plugins import PLUGINS
 8 | 
 9 | 
10 | def build_prompt_plugin_variables(plugins: List[Plugin]) -> Tuple[str, str]:
11 |     tools_string = ""
12 |     for plugin in plugins:
13 |         tools_string += f"{plugin.unique_name_for_model}: {plugin.description_for_human} Parameters: {plugin.parameter_schema} Format the arguments as a JSON object.\n\n"
14 |     return tools_string, ",".join([plugin.unique_name_for_model for plugin in plugins])
15 | 
16 | 
17 | def parse_reAct_step(text: str) -> Optional[ReActStep]:
18 |     """解析 RaAct 推理步骤"""
19 | 
20 |     regex = r"Thought\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*:[\s]*(.*?)[\s]*Action\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
21 |     match = re.search(regex, text, re.DOTALL)
22 |     if not match:
23 |         return None
24 | 
25 |     thought = match.group(1).strip()
26 |     action = match.group(2).strip()
27 |     action_input = match.group(3).strip()
28 | 
29 |     try:
30 |         thought = yaml.safe_load(thought)
31 |     except Exception as e:
32 |         print(f"[parse_reAct_step] error: {e}, thought: {thought}")
33 |         return None
34 | 
35 |     try:
36 |         action_input = yaml.safe_load(action_input)
37 |     except Exception as e:
38 |         print(f"[parse_reAct_step] error: {e}, action_input: {action_input}")
39 |         return None
40 | 
41 |     return ReActStep(
42 |         thought=thought or {},
43 |         action=action,
44 |         action_input=action_input or {},
45 |         observation={},
46 |     )
47 | 
48 | 
49 | def plugin_parameter_validator(
50 |     known_parameters: Dict, plugin_name: str
51 | ) -> Tuple[bool, List[Dict[str, str]]]:
52 |     plugin = PLUGINS[plugin_name]
53 | 
54 |     invalid_info = []
55 |     for parameter in plugin.required_parameters:
56 |         if not known_parameters.get(parameter.name):
57 |             invalid_info.append(
58 |                 {
59 |                     "invalid_field": parameter.name,
60 |                     "invalid_reason": f"{parameter.name} 字段缺失，字段描述：{parameter.description}",
61 |                 }
62 |             )
63 | 
64 |         if parameter.enum:
65 |             parameter_value = known_parameters[parameter.name]
66 |             if parameter_value not in parameter.enum:
67 |                 invalid_info.append(
68 |                     {
69 |                         "invalid_field": parameter.name,
70 |                         "invalid_reason": f"{parameter.name} 字段值不合法，字段描述：{parameter.description}，可选值范围：{parameter.enum}",
71 |                     }
72 |                 )
73 |     return len(invalid_info) == 0, invalid_info
74 | 


--------------------------------------------------------------------------------
/gradio_demo/qa_generator_task/__init__.py:
--------------------------------------------------------------------------------
1 | # QA generator task
2 | 


--------------------------------------------------------------------------------
/gradio_demo/qa_generator_task/generate_qa.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | import os
 3 | 
 4 | from common.call_llm import chat
 5 | 
 6 | QA_GENERATOR_ENDPOINT = os.environ.get("QA_GENERATOR_ENDPOINT")
 7 | 
 8 | 
 9 | prompt_template = """
10 | ### 角色能力 ###
11 | 你是一个问答对生成器，你可以对下面给定上下文的主要内容进行概括和提炼，并按照下面给定的生成规则去生成。
12 | 
13 | ### 生成规则 ###
14 | 1. 生成3到12组用户可能会问的问题以及对应的答案，要求问题要简洁、真实、口语化。
15 | 2. 避免生成内容相同或相似的问答对，且问答和答案要一定要准确、严谨、口语化。
16 | 3. 确保问答对尽可能覆盖上下文的所有内容。
17 | 
18 | ### 上下文 ###
19 | {context}
20 | 
21 | ### 返回格式 ###
22 | 请严格按照下面描述的JSON列表格式进行输出，不需要解释，输出JSON格式如下:
23 | [
24 |     {{
25 |         \"question\": \"generated question one\", 
26 |         \"answer\": \"generated answer one\",
27 |     }}, 
28 |     {{
29 |         \"question\": \"generated question two\", 
30 |         \"answer\": \"generated answer two\",
31 |     }}, 
32 |     ...
33 | ]
34 | 确保输出的格式可以被Python的json.loads方法解析。
35 | """
36 | 
37 | 
38 | def generate_qa_pairs(page_content: str) -> Tuple[str, None]:
39 |     """
40 |     Generate QA pairs from page content
41 | 
42 |     :param page_content:
43 |     :return:
44 |     """
45 |     prompt = prompt_template.format(context=page_content)
46 |     messages = [{"role": "user", "content": prompt}]
47 |     qa_pair_result = chat(messages=messages, endpoint=QA_GENERATOR_ENDPOINT)
48 |     print(f"generate QA pairs prompt: {prompt}, result: {qa_pair_result}")
49 |     return qa_pair_result, None
50 | 


--------------------------------------------------------------------------------
/gradio_demo/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio==4.14.0
2 | python-dotenv==1.0.0
3 | openpyxl==3.1.2


--------------------------------------------------------------------------------
/gradio_demo/模型演示.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OrionStarAI/Orion/3b23aeab356cfd6c55c34bf1e02f858eaa15cac8/gradio_demo/模型演示.xlsx


--------------------------------------------------------------------------------
/quantization/README.MD:
--------------------------------------------------------------------------------
 1 | # AWQ quantization
 2 | 
 3 | 
 4 | [AWQ](https://github.com/mit-han-lab/llm-awq) is a PTQ(Post-Training Quantization)
 5 | method. It's an efficient and accurate low-bit weight quantization (INT3/4) for LLMs.
 6 | 
 7 | Orion models can be quantized using AWQ easily.
 8 | Follow step-by-step tutorial below.
 9 | 
10 | To run AWQ, we will use [AutoAWQ](https://github.com/casper-hansen/AutoAWQ).
11 | 
12 | ## Do Quantization
13 | 
14 | The `quant.py` script is provided for you to perform AWQ quantization:
15 | 
16 | ```bash
17 | python quant.py --model_path /base_model \
18 |     --save_path /quantized_model --group_size 128 --version "gemm"
19 | ```
20 | 
21 | ## Run Quantized Model
22 | 
23 | You can run a quantized model using the `eval_quantized_model.py`:
24 | 
25 | ```bash
26 | python eval_quant.py --model /quantized_model --trust_remote_code
27 | ```
28 | 


--------------------------------------------------------------------------------
/quantization/eval_quant.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from transformers import AutoTokenizer, AutoModelForCausalLM
 4 | 
 5 | from awq import AutoAWQForCausalLM
 6 | 
 7 | def run(args):
 8 |     # Load model
 9 |     tokenizer = AutoTokenizer.from_pretrained(
10 |         args.model,
11 |         trust_remote_code=args.trust_remote_code,
12 |         local_files_only=True,
13 |         legacy=True,
14 |         use_fast=False,
15 |     )
16 |     # fuse_layers=True,
17 |     # batch_size=args.batch,
18 |     model = (
19 |         AutoModelForCausalLM.from_pretrained(
20 |             args.model,
21 |             device_map='auto',
22 |             torch_dtype=torch.float16,
23 |             trust_remote_code=args.trust_remote_code,
24 |         )
25 |         .eval()
26 |     )
27 | 
28 |     prompt = "count to 1000: 0 1 2 3"
29 |     # prompt = "Hello! Do you hava a dream that "
30 |     prompts = [prompt] * args.batch
31 |     inputs = tokenizer(prompts, return_tensors="pt", add_special_tokens=False).to(
32 |         "cuda:0"
33 |     )
34 |     output_ids = model.generate(
35 |         **inputs,
36 |         do_sample=False,
37 |         max_new_tokens=4096,
38 |     )
39 |     generate_tokens = tokenizer.batch_decode(output_ids)
40 |     print(generate_tokens)
41 | 
42 | def get_args():
43 |     import argparse
44 |     parser = argparse.ArgumentParser(description="Run AWQ quantized model")
45 |     parser.add_argument("--model", type=str, help="The quantized name")
46 |     parser.add_argument(
47 |         "--trust_remote_code", action="store_true", help="Trust remote code"
48 |     )
49 |     parser.add_argument("--batch", type=int, default=1)
50 | 
51 |     args = parser.parse_args()
52 |     return args
53 | 
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     args = get_args()
58 |     run(args)
59 | 


--------------------------------------------------------------------------------
/quantization/orion.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import tqdm
  3 | from typing import List, Union
  4 | 
  5 | from awq.models.base import BaseAWQForCausalLM
  6 | from awq.utils.fused_utils import fuse_qkv
  7 | from awq.modules.fused.block import LlamaLikeBlock
  8 | from awq.modules.fused.model import LlamaLikeModel
  9 | from awq.modules.fused.mlp import QuantFusedMLP
 10 | 
 11 | class OrionAWQForCausalLM(BaseAWQForCausalLM):
 12 |     layer_type = "OrionDecoderLayer"
 13 |     max_new_tokens_key = "max_position_embeddings"
 14 | 
 15 |     @staticmethod
 16 |     def fuse_layers(model):
 17 |         fuser = OrionFuser(model)
 18 |         fuser.fuse_transformer()
 19 | 
 20 |     @staticmethod
 21 |     def get_model_layers(model):
 22 |         return model.model.layers
 23 | 
 24 |     @staticmethod
 25 |     def get_act_for_scaling(module):
 26 |         return dict(
 27 |             is_scalable=False
 28 |         )
 29 | 
 30 |     @staticmethod
 31 |     def move_embed(model, device: str):
 32 |         model.model.embed_tokens = model.model.embed_tokens.to(device)
 33 | 
 34 |     @staticmethod
 35 |     def get_layers_for_scaling(module, input_feat, module_kwargs):
 36 |         layers = []
 37 | 
 38 |         # attention input
 39 |         layers.append(dict(
 40 |             prev_op=module.input_layernorm,
 41 |             layers=[module.self_attn.q_proj,
 42 |                     module.self_attn.k_proj, module.self_attn.v_proj],
 43 |             inp=input_feat['self_attn.q_proj'],
 44 |             module2inspect=module.self_attn, kwargs=module_kwargs,
 45 |         ))
 46 | 
 47 |         # attention out
 48 |         # Please refer to https://github.com/mit-han-lab/llm-awq/pull/67#issue-1850622696
 49 |         if module.self_attn.v_proj.weight.shape == module.self_attn.o_proj.weight.shape:
 50 |             layers.append(dict(
 51 |                 prev_op=module.self_attn.v_proj,
 52 |                 layers=[module.self_attn.o_proj],
 53 |                 inp=input_feat['self_attn.o_proj'],
 54 |             ))
 55 |         
 56 |         # linear 1
 57 |         layers.append(dict(
 58 |             prev_op=module.post_attention_layernorm,
 59 |             layers=[module.mlp.gate_proj, module.mlp.up_proj],
 60 |             inp=input_feat['mlp.gate_proj'],
 61 |             module2inspect=module.mlp,
 62 |         ))
 63 | 
 64 |         # linear 2
 65 |         layers.append(dict(
 66 |             prev_op=module.mlp.up_proj,
 67 |             layers=[module.mlp.down_proj],
 68 |             inp=input_feat['mlp.down_proj'],
 69 |         ))
 70 | 
 71 |         return layers
 72 | 
 73 | 
 74 | class OrionFuser:
 75 |     def __init__(self, model):
 76 |         self.model = model
 77 | 
 78 |         self.llama_blocks: List[Tuple[str, object]] = [
 79 |             (name, module) for name, module in self.model.named_modules()
 80 |             if 'OrionDecoderLayer'.lower() in module.__class__.__name__.lower()
 81 |         ]
 82 |     
 83 |     def fuse_transformer(self):
 84 |         blocks = []
 85 | 
 86 |         for module in tqdm.tqdm(self.model.model.layers, desc="Orion Fusing layers..."):
 87 |             device = next(iter(module.state_dict().values())).device
 88 |             qkv = fuse_qkv(
 89 |                 module,
 90 |                 module.self_attn.q_proj,
 91 |                 module.self_attn.k_proj,
 92 |                 module.self_attn.v_proj
 93 |             )
 94 |             mlp = QuantFusedMLP(
 95 |                 module.mlp.gate_proj,
 96 |                 module.mlp.down_proj,
 97 |                 module.mlp.up_proj
 98 |             )
 99 |             blocks.append(LlamaLikeBlock(
100 |                 hidden_size=self.model.config.hidden_size,
101 |                 n_heads=self.model.config.num_attention_heads,
102 |                 n_kv_heads=self.model.config.num_key_value_heads,
103 |                 qkv_layer=qkv,
104 |                 o_proj=module.self_attn.o_proj,
105 |                 mlp=mlp,
106 |                 norm_1=module.input_layernorm,
107 |                 norm_2=module.post_attention_layernorm,
108 |                 dev=device,
109 |                 max_seq_len=self.model.config.max_new_tokens,
110 |                 rope_theta=self.model.config.rope_theta
111 |             ))
112 |         
113 |         self.model.model = LlamaLikeModel(
114 |             self.model.config.vocab_size,
115 |             blocks,
116 |             self.model.model.embed_tokens,
117 |             self.model.model.norm,
118 |         )
119 | 


--------------------------------------------------------------------------------
/quantization/quant.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import codecs
 4 | import torch
 5 | 
 6 | from transformers import AutoTokenizer, AutoModel
 7 | 
 8 | from awq.models.auto import AWQ_CAUSAL_LM_MODEL_MAP, AutoAWQForCausalLM
 9 | from awq.models.base import TRANSFORMERS_AUTO_MAPPING_DICT
10 | 
11 | from orion import OrionAWQForCausalLM
12 | 
13 | import pdb
14 | 
15 | TRANSFORMERS_AUTO_MAPPING_DICT["orion"] = "AutoModelForCausalLM"
16 | AWQ_CAUSAL_LM_MODEL_MAP['orion'] = OrionAWQForCausalLM
17 | 
18 | def load(path:str=None, key:str="text"):
19 |     if path is None:
20 |         path = "data/val.jsonl"
21 |     texts = []
22 |     with codecs.open(path, 'r', encoding='utf-8') as f:
23 |         for line in f:
24 |             line = line.strip()
25 |             if not line:
26 |                 continue
27 |             try:
28 |                 text = json.loads(line)[key]
29 |             except Exception as e:
30 |                 print('exception ', e)
31 |                 continue
32 |             texts += [text]
33 |             if len(texts) >= 128:
34 |                 break
35 |     return texts
36 | 
37 | def load_wikitext():
38 |     data = load_dataset('wikitext', 'wikitext-2-raw-v1', split="train")
39 |     return [text for text in data["text"] if text.strip() != '' and len(text.split(' ')) > 20]
40 | 
41 | # def quant(group_size:int, version:str):
42 | def quant(args):
43 | 
44 |     group_size = args.group_size
45 |     version = args.version
46 |     model_path = args.model_path
47 |     save_path = args.save_path
48 |     bits = 4
49 | 
50 |     # model_path = '/data/xp/chat/quant_orion/models/final/OrionStar-JpKr-650k-0112-ck19800'
51 |     # model_path = './models'
52 |     # save_path = f'./outputs/final/gs{group_size}_version{version}'
53 | 
54 |     os.makedirs(save_path, exist_ok=True)
55 | 
56 |     assert version in ["gemm", "gemv"]
57 |     assert group_size in [32, 64, 128]
58 | 
59 |     quant_config = { "zero_point": True, "q_group_size": group_size, "w_bit": bits, "version": version.upper() }
60 |     tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
61 |     model = OrionAWQForCausalLM.from_pretrained(model_path,
62 |                                             model_type="orion",
63 |                                             device_map="auto",
64 |                                             trust_remote_code=True,
65 |                                             torch_dtype=torch.float16)
66 |     calib_data = load()
67 | 
68 |     # Quantize
69 |     # model.quantize(tokenizer, quant_config=quant_config, calib_data=load_wikitext())
70 |     model.quantize(tokenizer, quant_config=quant_config, calib_data=calib_data)
71 | 
72 |     # Save quantized model
73 |     model.save_quantized(save_path)
74 |     tokenizer.save_pretrained(save_path)
75 | 
76 |     print(f'Model is quantized and saved at "{save_path}"')
77 | 
78 | def get_args():
79 |     import argparse
80 |     parser = argparse.ArgumentParser(description="Quantized model")
81 |     parser.add_argument("--model_path", type=str, help="The original model path")
82 |     parser.add_argument("--save_path", type=str, help="The quantized name")
83 |     parser.add_argument("--group_size", type=int, default=128, help="The quantized name")
84 |     parser.add_argument("--version", type=str, default="gemm", help="The quantized name")
85 |     args = parser.parse_args()
86 |     return args
87 | 
88 | if __name__ == '__main__':
89 |     args = get_args()
90 |     quant(args)
91 | 


--------------------------------------------------------------------------------