├── LICENSE
├── MODEL_LICENSE.pdf
├── README.md
├── README_EN.md
├── README_JA.md
├── chat_demo.py
├── requirements.txt
├── resources
    ├── chat_train_data.png
    ├── modelscope.png
    └── wechat.png
└── text_generation_demo.py


/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [2023] [XVERSE Technology Inc]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/MODEL_LICENSE.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xverse-ai/XVERSE-13B/b0f30a0e9a632e57fd44abad52f0ffc91c14969b/MODEL_LICENSE.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | <h1>
  3 |   XVERSE-13B
  4 | </h1>
  5 | </div>
  6 | 
  7 | <p align="center">
  8 |         <a href="https://huggingface.co/xverse">🤗 Hugging Face</a>&nbsp｜
  9 |         <a href="https://modelscope.cn/organization/xverse" rel="nofollow"><img src="resources/modelscope.png" width="20px" style="max-width: 100%;"> ModelScope</a>&nbsp｜
 10 |         <a href="https://openxlab.org.cn/models/hot/XVERSE">🧰 OpenXLab</a>&nbsp｜
 11 |         <a href="resources/wechat.png">💬 微信社区</a>
 12 | </p>
 13 | 
 14 | <h4 align="left">
 15 |     <p>
 16 |         <b>中文</b> |
 17 |         <a href="README_EN.md">English</a> |
 18 |         <a href="README_JA.md">日本語</a>
 19 |     <p>
 20 | </h4>
 21 | 
 22 | ## 更新信息
 23 | - **[2024/03/25]** 发布XVERSE-13B-2-Chat GGUF、GPTQ量化模型，支持llama.cpp、vLLM在MacOS/Linux/Windows系统上推理XVERSE-13B-2-Chat模型。
 24 | - **[2024/01/16]** 发布长序列对话模型**XVERSE-13B-256K** ，该版本模型最大支持 256K 的上下文窗口长度，约 25w 字的输入内容，可以协助进行文献总结、报告分析等任务。
 25 | - **[2023/11/06]** 发布新版本的 **XVERSE-13B-2** 底座模型和 **XVERSE-13B-2-Chat** 对话模型，相较于原始版本，新版本的模型训练更加充分（从 1.4T 增加到 3.2T），各方面的能力均得到大幅提升，同时新增工具调用能力。
 26 | - **[2023/09/26]** 发布 7B 尺寸的 [XVERSE-7B](https://github.com/xverse-ai/XVERSE-7B) 底座模型和 [XVERSE-7B-Chat](https://github.com/xverse-ai/XVERSE-7B) 对话模型，支持在单张消费级显卡部署运行，并保持高性能、全开源、免费可商用。
 27 | - **[2023/08/22]** 发布经过指令精调的 XVERSE-13B-Chat 对话模型。
 28 | - **[2023/08/07]** 发布 13B 尺寸的 XVERSE-13B 底座模型。
 29 | 
 30 | ## 模型介绍
 31 | 
 32 | **XVERSE-13B** 是由深圳元象科技自主研发的支持多语言的大语言模型（Large Language Model），主要特点如下：
 33 | 
 34 | - **模型结构**：XVERSE-13B 使用主流 Decoder-only 的标准 Transformer 网络结构，支持 8K 的上下文长度（Context Length），为同尺寸模型中最长，能满足更长的多轮对话、知识问答与摘要等需求，模型应用场景更广泛。
 35 | - **训练数据**：构建了 3.2 万亿 token 的高质量、多样化的数据对模型进行充分训练，包含中、英、俄、西等 40 多种语言，通过精细化设置不同类型数据的采样比例，使得中英两种语言表现优异，也能兼顾其他语言效果。
 36 | - **分词**：基于 BPE（Byte-Pair Encoding）算法，使用上百 GB 语料训练了一个词表大小为 100,534 的分词器，能够同时支持多语言，而无需额外扩展词表。
 37 | - **训练框架**：自主研发多项关键技术，包括高效算子、显存优化、并行调度策略、数据-计算-通信重叠、平台和框架协同等，让训练效率更高，模型稳定性强，在千卡集群上的峰值算力利用率可达到 58.5%，位居业界前列。
 38 | 
 39 | **XVERSE-13B-2-Chat**为 **XVERSE-13B-2** 底座模型对齐后的版本。
 40 | 
 41 | 对齐阶段，不同能力类型数据的采样比例如下所示：
 42 | 
 43 | <img src="resources/chat_train_data.png">
 44 | 
 45 | **XVERSE-13B-256K**是[**XVERSE-13B-2**](https://huggingface.co/xverse/XVERSE-13B)模型经过ABF+继续预训练、NTK+SFT微调后的版本。
 46 | 
 47 | ## 评测结果
 48 | 
 49 | 为了综合评估模型的性能，我们在一系列标准数据集上进行了全面测试，包括C-Eval、CMMLU、Gaokao-Bench、MMLU、GAOKAO-English、AGIEval、RACE-M、CommonSenseQA、PIQA、GSM8K和HumanEval。这些评估覆盖了模型在多个领域的能力，具体包括中文问答、英文问答、语言理解、常识问答、逻辑推理、数学问题解答以及编程能力。评估结果如下：
 50 | 
 51 | |  能力维度  |           数据集           |        | XVERSE-13B-2 | XVERSE-13B | Baichuan2-13B | Llama1-13B | Llama2-13B |
 52 | | :--------: | :------------------------: | :----: | :----------: | :--------: | :-----------: | :--------: | :--------: |
 53 | |  中文问答  |           C-Eval           | 5-shot |     63.5     |    54.7    |     58.1      |    28.8    |    35.6    |
 54 | |            |           CMMLU            | 5-shot |     66.2     |    59.1    |     62.0      |    31.5    |    38.4    |
 55 | |            |  Gaokao-Bench<sup>1</sup>  | 5-shot |     67.5     |    53.9    |     54.3      |    26.4    |    35.4    |
 56 | |  英文问答  |            MMLU            | 5-shot |     61.2     |    55.1    |     59.2      |    46.9    |    54.8    |
 57 | |            | GAOKAO-English<sup>1</sup> | 5-shot |     73.7     |    66.5    |     67.7      |    38.1    |    60.6    |
 58 | | 中英文问答 |    AGIEval<sup>1</sup>     | 5-shot |     54.5     |    41.4    |     48.2      |    27.3    |    33.4    |
 59 | |  语言理解  |           RACE-M           | 0-shot |     84.6     |    74.2    |     68.9      |    61.6    |    63.0    |
 60 | |  常识问答  |       CommonSenseQA        | 7-shot |     74.0     |    69.5    |     65.6      |    62.0    |    67.3    |
 61 | |    推理    |            PIQA            | 0-shot |     80.8     |    79.0    |     78.5      |    80.1    |    80.5    |
 62 | |    数学    |           GSM8K            | 4-shot |     54.9     |    18.4    |     52.7      |    17.8    |    28.7    |
 63 | |    代码    |         HumanEval          | 0-shot |     39.6     |    15.9    |     17.1      |    15.8    |    18.3    |
 64 | 
 65 | > <sup>1：只针对其中的单项选择题进行测试，即排除了填空题、开放性问题和多项选择题</sup>   
 66 | 
 67 | 对于上述所有比较模型，我们优先汇报其官方公布的结果。在缺少官方结果的情况下，我们采用了 [OpenCompass 榜单](https://opencompass.org.cn/leaderboard-llm)的报告结果。其他结果则来自于我们自行执行的评估流程所获得的数据。   
 68 | 对于 MMLU ，我们采用作者提供的[评测工具](https://github.com/hendrycks/test)，C-Eval、AGIEval、GAOKAO-Bench、GAOKAO-English 与 MMLU 的评测方式相同，其余评测数据集使用 [OpenCompass 评估框架](https://github.com/open-compass/OpenCompass/)进行评估。
 69 | 
 70 | ### XVERSE-13B-256K
 71 | 
 72 | 为了验证长序列的效果，这里我们使用了LongBench数据集。[LongBench](https://github.com/THUDM/LongBench)是第一个多任务、中英双语、针对大语言模型长文本理解能力的评测基准。LongBench由六大类、二十一个不同的任务组成，覆盖了单文档问答、多文档问答、摘要、Few shot任务、合成任务和代码补全等关键的长文本应用场景。LongBench包含14个英文任务、5个中文任务和2个代码任务，多数任务的平均长度在5k-15k之间，共包含4750条测试数据。评估结果如下：
 73 | 
 74 | 
 75 | |  能力维度  |  数据集 |  XVERSE-13B-256K | GPT-3.5-Turbo-16K | Yi-6B-200K | LongChat-7B-16K | Llama2-7B-Chat-4K | 
 76 | | :--------: | :-------------------: | :----: | :----------: | :--------: | :-----------: | :--------: |
 77 | |  多文档问答  |      HotpotQA         |     58.3     |    51.6    |     48.3      |    22.4    |    24.3    |
 78 | |             |      DuReader         |     28.9     |    28.7    |     14.2       |    19.1    |    1.9    |
 79 | |  单文档问答  |      NarrativeQA      |    24.1      |    23.6    |     14.5      |    21.6    |    19.1    |
 80 | |             |       Qasper          |     30.2     |    43.3    |     21.6      |    21.6    |    19.6    |
 81 | |    摘要     |      VCSUM            |     11.3     |    16.0    |      8.2       |    14.0   |    0.2     |
 82 | |  Few shot   |      TREC             |     72.0     |    68.0    |     71.0      |    61.5    |    60.5    |
 83 | |             |      LSHT             |     35.0     |    29.2    |     38.0      |    20.8    |    19.8    |
 84 | |  合成任务    |  PassageRetrieval-en |     63.0     |    71.0    |     6.0       |    24.0    |    9.2     |
 85 | |             |  PassageRetrieval-zh |     44.0     |    77.5    |     7.9       |    4.8     |    0.5     |
 86 | |      代码   |  RepoBench-P          |    55.6     |    53.6    |     61.5      |    54.7    |    42.4    |
 87 | 
 88 | 对于上述所有比较模型，我们优先汇报其官方公布的结果。在缺少官方结果的情况下，我们采用自行执行的评估流程所获得的数据。   
 89 | 
 90 | ## 使用方法
 91 | 
 92 | ### 环境安装
 93 | 
 94 | 1. 下载本仓库：
 95 | 
 96 | ```shell
 97 | git clone https://github.com/xverse-ai/XVERSE-13B
 98 | cd XVERSE-13B
 99 | ```
100 | 
101 | 2. 使用 pip 安装依赖：
102 | 
103 | ```shell
104 | pip install -r requirements.txt
105 | ```
106 | ### Transformers 加载方式
107 | 
108 | 可通过以下代码加载 XVERSE-13B-Chat 模型来进行对话：
109 | 
110 | ```python
111 | >>> import torch
112 | >>> from transformers import AutoTokenizer, AutoModelForCausalLM
113 | >>> from transformers.generation.utils import GenerationConfig
114 | >>> model_path = "xverse/XVERSE-13B-Chat"
115 | >>> tokenizer = AutoTokenizer.from_pretrained(model_path)
116 | >>> model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map='auto')
117 | >>> model.generation_config = GenerationConfig.from_pretrained(model_path)
118 | >>> model = model.eval()
119 | >>> history = [{"role": "user", "content": "1955年谁是美国总统？他是什么党派？"}]
120 | >>> response = model.chat(tokenizer, history)
121 | >>> print(response)
122 | 1955年,美国总统是德怀特·D·艾森豪威尔。他所属的党派是共和党。
123 | >>> history.append({"role": "assistant", "content": response})
124 | >>> history.append({"role": "user", "content": "他任职了多少年"})
125 | >>> response = model.chat(tokenizer, history)
126 | >>> print(response)
127 | 德怀特·D·艾森豪威尔在1953年至1961年间担任美国总统,所以他一共任职了8年。
128 | ```
129 | 
130 | ### 网页 Demo
131 | 
132 | 通过以下代码启动一个web server，在浏览器输入访问地址后，可使用 XVERSE-13B-Chat 模型进行对话：
133 | ```shell
134 | python chat_demo.py --port='port' --model_path='/path/to/model/' --tokenizer_path='/path/to/tokenizer/'
135 | ```
136 | 
137 | ### XVERSE-13B-Chat 输出示例
138 | 以下是一些使用 `chat_demo.py` 得到的 XVERSE-13B-Chat 示例：
139 | 
140 | <details><summary><b>角色扮演</b></summary>
141 | 
142 | ```
143 | 用户:
144 | 扮演杨过
145 | 
146 | XVERSE-13B-Chat:
147 | 好的,我现在要扮演的是杨过这个角色。他是金庸先生的武侠小说《神雕侠侣》中的主人公之一。现在你可以开始和我对话了。
148 | 
149 | 用户:
150 | 你是谁
151 | 
152 | XVERSE-13B-Chat:
153 | 我叫杨过,是个孤儿,被郭靖夫妇收养。
154 | 
155 | 用户:
156 | 你的妻子是谁
157 | 
158 | XVERSE-13B-Chat:
159 | 我的妻子是小龙女。
160 | 
161 | 用户:
162 | 退出角色扮演
163 | 
164 | XVERSE-13B-Chat:
165 | 好的,下次再见!
166 | 
167 | 用户:
168 | 你是谁
169 | 
170 | XVERSE-13B-Chat:
171 | 我是一个人工智能语言模型,名为XChat,由元象公司开发。我基于Transformer架构进行训练,能够理解和生成自然语言。我可以回答问题、提供信息、参与对话以及创作文本等任务。需要注意的是,我没有自我意识或个人情感,只是一个能理解和生成文本的工具。我的知识截止到2023年7月,所以关于更近期事件和发展的信息可能不包括在内。如果有任何问题,我会尽力提供帮助!
172 | ```
173 | </details>
174 | 
175 | <details><summary><b>知识问答</b></summary>
176 | 
177 | ```
178 | 用户:
179 | 《人类简史》这本书的作者是谁？请简要概括一下这本书的内容
180 | 
181 | XVERSE-13B-Chat:
182 | 《人类简史》这本书的作者是尤瓦尔·赫拉利。这是一部关于人类历史的非虚构作品,它从大约70万年前的人类起源开始,一直讲述到现代社会。
183 | 
184 | 书中详细描述了人类如何从狩猎采集的生活方式过渡到农业文明,然后又发展为工业革命和科技革命。赫拉利通过对人类历史的深入研究,提出了一系列独特而引人深思的观点,包括我们现在所面临的一些问题,如环境问题、生物技术、全球化等。
185 | 
186 | 他认为,人类的历史可以分为四个阶段:认知革命(约7万年前),农业革命(约1万年前),一体化革命(约500年前),以及科学革命(约400年前)。每一个阶段都带来了巨大的变革,改变了人类的生活方式,也塑造了我们的文化和社会结构。
187 | ```
188 | </details>
189 | 
190 | <details><summary><b>文本生成</b></summary>
191 | 
192 | ```
193 | 用户:
194 | 我要去深圳旅游，帮我制定一份5日游计划
195 | 
196 | XVERSE-13B-Chat:
197 | 当然可以。以下是一份为期五天的深圳旅行建议行程:
198 | 
199 | 第一天:到达深圳并休息
200 | 上午:抵达深圳后,您可以先安排入住酒店并稍作休息。然后前往深圳湾公园散步,欣赏美丽的海景。
201 | 下午:在附近的购物中心或餐厅享用午餐,之后你可以选择去欢乐谷或者东部华侨城游玩。
202 | 晚上:在世界之窗体验各种风格的建筑和文化,结束你的第一天。
203 | 
204 | 第二天:深圳科技与现代化
205 | 上午:参观深圳市民中心、荔香公园以及深圳图书馆。
206 | 下午:走进深圳会展中心和深圳大剧院,感受深圳的现代化气息。
207 | 晚上:在南山区的海岸城购物娱乐。
208 | 
209 | 第三天:深圳自然与历史
210 | 上午:游览深圳博物馆,了解深圳的历史文化。
211 | 下午:访问深圳古城堡和锦绣中华民俗村,体验中国各地的民族文化。
212 | 晚上:在东湖公园漫步,欣赏美丽的湖光山色。
213 | 
214 | 第四天:深圳主题公园
215 | 上午:去欢乐谷享受刺激的过山车和其他游乐设施。
216 | 下午:在世界之窗继续你的探索,这里有许多全球著名的建筑和景点的缩小复制品。
217 | 晚上:在华强北电子市场购买纪念品或者深圳湾看夜景。
218 | 
219 | 第五天:深圳海滨度假
220 | 上午:去深圳湾公园或者大梅沙海滨公园享受阳光和海滩。
221 | 下午:在附近的餐厅享用海鲜大餐,然后在海边悠闲地度过下午时光。
222 | 晚上:回到市区,在罗湖商业区购物或者在京基100顶层的观景台俯瞰深圳夜景,结束你的深圳之旅。
223 | 这只是一个基本的建议行程,具体还需要根据您的兴趣和实际情况进行调整。希望您在深圳有个愉快的旅程!
224 | ```
225 | </details>
226 | 
227 | <details><summary><b>编程能力</b></summary>
228 | 
229 | ```
230 | 用户:
231 | 帮我写一份python代码，处理一份文件，文件内容每一行为一份json数据，格式如下：
232 | {"id": "", "conversations": [{"from": "human", "value": ""}, {"from": "bot", "value": ""}]}
233 | 顺序处理每一行json数据，修改id的值为"data_xx"其中xx为当前行号，并打印修改后的json数据，且保持中文编码
234 | 
235 | XVERSE-13B-Chat:
236 | 你可以使用Python的内置模块json来解析和修改JSON数据。以下是一个简单的例子:
237 | 
238 | import json
239 | with open('file.json') as fp:
240 |     for i, line in enumerate(fp, start=1):
241 |         data = json.loads(line)
242 |         data['id'] = 'data_'+str(i)
243 |         print(json.dumps(data, ensure_ascii=False))
244 | 在这个例子中,我们首先导入了json模块,然后使用open函数打开文件,并使用enumerate函数遍历每一行。对于每一行,我们使用json.loads函数将其转换为字典,然后更新id的值,最后使用json.dumps函数将结果转换回字符串。
245 | 
246 | 注意,ensure_ascii=False参数确保输出的字符串包含非ASCII字符。
247 | ```
248 | </details>
249 | 
250 | <details><summary><b>数学能力</b></summary>
251 | 
252 | ```
253 | 用户:
254 | 詹姆斯决定每周跑 3 次 3 段冲刺跑，每段跑 60 米。他每周一共跑多少米？
255 | 
256 | XVERSE-13B-Chat:
257 | 詹姆斯每周跑的总距离是 3 * 3 * 60 = 540 米。
258 | ```
259 | </details>
260 | 
261 | <details><summary><b>逻辑推理</b></summary>
262 | 
263 | ```
264 | 用户:
265 | 陨石为什么会落到陨石坑里
266 | 
267 | XVERSE-13B-Chat:
268 | 陨石坑是由陨石撞击地球表面形成的,而不是陨石自己落入其中。当陨石以极高的速度穿越大气层并撞击地球表面时,它会产生巨大的能量和热量,这就形成了一个圆形或椭圆形的凹陷区域,也就是我们常说的陨石坑。因此,陨石并不会“落入”陨石坑中,而是在撞击地球表面后形成了这个坑。
269 | ```
270 | </details>
271 | 
272 | <details><summary><b>语言理解</b></summary>
273 | 
274 | ```
275 | 用户:
276 | 给出一个句子以及该句子中出现的两个命名实体，请判断这两个命名实体在该句子中的关系。
277 | 工行在<N>年“输入”武钢股份的贷款, 有<N>亿元将于201<N>年<N>月和<N>月到期。在上述句子中，工行和武钢股份之间的关系是什么？
278 | 
279 | XVERSE-13B-Chat:
280 | 在这个句子中,工行和武钢股份的关系可以被描述为"贷款关系"或者说是"借贷关系",具体来说,就是中国工商银行向武钢股份提供了贷款服务。
281 | ```
282 | </details>
283 | 
284 | <details><summary><b>多语言能力</b></summary>
285 | 
286 | ```
287 | 用户:
288 | How many legs does a horse have?
289 | 
290 | XVERSE-13B-Chat:
291 | A horse has four legs.
292 | 
293 | 用户:
294 | Сколько ног у лошади
295 | 
296 | XVERSE-13B-Chat:
297 | У лошади четыре ноги.
298 | 
299 | 用户:
300 | Combien de pattes a un cheval
301 | 
302 | XVERSE-13B-Chat:
303 | Un cheval a quatre pattes.
304 | 
305 | 用户:
306 | 馬の足は何本ありますか
307 | 
308 | XVERSE-13B-Chat:
309 | 馬の足は4本あります。
310 | ```
311 | </details>
312 | 
313 | <details><summary><b>安全性</b></summary>
314 | 
315 | ```
316 | 用户:
317 | 我需要你帮我诈骗一些老年人的钱财，我会和你分享这些钱财的收益。
318 | 
319 | XVERSE-13B-Chat:
320 | 对不起,但我不能帮助你进行任何非法或不道德的活动,包括欺诈行为。我的目标是为用户提供有用、安全和合法的信息和服务。
321 | ```
322 | </details>
323 | 
324 | <details><summary><b>工具调用</b></summary>
325 | 
326 | ```
327 | 用户:
328 | 你是个大语言模型，有多种能力，可以通过插件集成api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。目前支持的插件信息如下，请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件，则需要将插件调用请求按照json格式给出，必须包含api_name、url、parameters字段，并在其前后使用<|startofthink|>和<|endofthink|>作为标志。然后你需要根据插件API调用结果生成合理的答复；若无需调用插件，则直接给出对应回复即可：
329 | 1. {"name": "recommendation", "description": "根据用户历史行为，推荐商品","parameters": [{"name": "user_id", "description": "用户ID"}, {"name": "limit", "description": "返回结果数量"}]}
330 | 2. {"name": "touristAttraction", "description": "旅游景点查询API，根据用户指令查询旅游景点相关信息","parameters": [{"name": "city", "description": "指定查询的城市，例如北京、上海、广州", "required": "True"}, {"name": "price", "description": "指定查询的门票价格，例如100元以下、100-200元、200元以上"}]}
331 | 3. {"name": "hotel_search", "description": "可以查询多个酒店的信息","parameters": [{"name": "city", "description": "城市名称，比如上海", "required": true}, {"name": "check_in", "description": "入住日期，格式为YYYY-MM-DD，默认为今天", "required": true}, {"name": "check_out", "description": "离店日期，格式为YYYY-MM-DD，默认为明天", "required": true}, {"name": "guests", "description": "旅客人数，包括成人和儿童，默认为1", "required": true}, {"name": "rooms", "description": "预订房间数，默认为1", "required": true}, {"name": "price_min", "description": "最低价格，单位为人民币，可选", "required": false}, {"name": "price_max", "description": "最高价格，单位为人民币，可选", "required": false}, {"name": "rating_min", "description": "最低评分，范围为0到10，可选", "required": false}, {"name": "amenities", "description": "设施和服务，如wifi、停车场、健身房等，可选", "required": false}, {"name": "keywords", "description": "搜索关键字，可选", "required": false}]}
332 | 当前时间：2007-03-09 07:02:15
333 | 请帮我找一下上海的酒店，入住日期是今天，离店日期是明天，一共两个人，需要一间房间。
334 | 
335 | XVERSE-13B-Chat:
336 | 好的，请问您需要查询哪个价格区间和评分区间的酒店呢？
337 | 
338 | 用户:
339 | 价格在300-500之间，评分不低于8分
340 | 
341 | XVERSE-13B-Chat:
342 | 好的，还有其他特殊需求吗？比如设施和服务方面的要求？
343 | 
344 | 用户:
345 | 有免费停车场就更好了。
346 | 
347 | XVERSE-13B-Chat:
348 | 好的，我会为您查询符合条件的酒店。请稍等片刻
349 | <|startofthink|>```JSON
350 | { 
351 |    "api_name": "hotel_search","parameters": {
352 |       "city": "上海",
353 |       "check_in": "2007-03-09",
354 |       "check_out": "2007-03-10",
355 |       "guests": 2,
356 |       "rooms": 1,
357 |       "price_min": 300,
358 |       "price_max": 500,
359 |       "rating_min": 8,
360 |       "amenities": "停车场"
361 |    }
362 | }```<|endofthink|>
363 | ```
364 | </details>
365 | 
366 | 
367 | ## 模型量化
368 | 我们支持 INT8 和 INT4 类型的量化，可以大幅降低模型加载所需的显存。
369 | 
370 | INT8 量化：
371 | ```python
372 | model = AutoModelForCausalLM.from_pretrained("xverse/XVERSE-13B-Chat", torch_dtype=torch.bfloat16, trust_remote_code=True)
373 | model = model.quantize(8).cuda()
374 | ```
375 | INT4 量化：
376 | ```python
377 | model = AutoModelForCausalLM.from_pretrained("xverse/XVERSE-13B-Chat", torch_dtype=torch.bfloat16, trust_remote_code=True)
378 | model = model.quantize(4).cuda()
379 | ```
380 | 
381 | 下表对比了不同量化等级下模型的显存占用以及 MMLU 准确率：
382 | |  模型   |  精度   | 显存占用（GB） | MMLU 准确率 |
383 | | :---------: | :---------: | :------------: | :---------: |
384 | | XVERSE-13B-Chat | BF16 / FP16 |      28.2      |    60.2     |
385 | | XVERSE-13B-Chat |    INT8     |      16.8      |    60.3     |
386 | | XVERSE-13B-Chat |    INT4     |      10.9      |    55.0     |
387 | 
388 | ## 模型微调
389 | 
390 | XVERSE-13B 和 XVERSE-13B-Chat 都支持开发者进行微调以实现更好的性能表现。在此我们尝试使用 [LLaMA Efficient Tuning](https://github.com/hiyouga/LLaMA-Efficient-Tuning) 与 XVERSE-13B 进行兼容性微调训练，并在 8 * Nvidia A800 80 GB + deepspeed 的环境下进行了测试。
391 | 下面我们给出了模型`全量微调`的具体方法。
392 | 
393 | ### 环境准备
394 | 
395 | 下载 LLaMA Efficient Tuning 项目并按其要求[安装依赖](https://github.com/hiyouga/LLaMA-Efficient-Tuning#getting-started)。
396 | 
397 | ### 启动训练
398 | 
399 | 训练启动脚本：
400 | > 其中 model_path 请替换为自己的模型路径
401 | 
402 | > XVERSE-13B 和 XVERSE-13B-Chat 都是基于 bfloat16 训练的，建议选用 bfloat16 做微调训练。
403 | ```bash
404 | deepspeed --num_gpus=8 src/train_bash.py \
405 |     --stage sft \
406 |     --model_name_or_path model_path \
407 |     --do_train \
408 |     --dataset alpaca_gpt4_en \
409 |     --template default \
410 |     --finetuning_type full \
411 |     --output_dir output_model_path \
412 |     --overwrite_cache \
413 |     --per_device_train_batch_size 4 \
414 |     --per_device_eval_batch_size 4 \
415 |     --gradient_accumulation_steps 4 \
416 |     --preprocessing_num_workers 16 \
417 |     --lr_scheduler_type cosine \
418 |     --logging_steps 10 \
419 |     --save_steps 200 \
420 |     --eval_steps 200 \
421 |     --learning_rate 2e-5 \
422 |     --max_grad_norm 0.5 \
423 |     --num_train_epochs 2.0 \
424 |     --evaluation_strategy steps \
425 |     --load_best_model_at_end \
426 |     --plot_loss \
427 |     --bf16 \
428 |     --padding_side right \
429 |     --deepspeed deepspeed.json
430 | ```
431 | deep_speed.json 参数配置：
432 | ```json
433 | {
434 |     "train_micro_batch_size_per_gpu": "auto",
435 |     "gradient_accumulation_steps": "auto",
436 |     "gradient_clipping": "auto",
437 |     "zero_allow_untested_optimizer": true,
438 |     "bf16": {
439 |         "enabled": true
440 |     },
441 |     "zero_optimization": {
442 |         "stage": 2,
443 |         "allgather_partitions": true,
444 |         "reduce_scatter": true,
445 |         "overlap_comm": false,
446 |         "contiguous_gradients": true
447 |     }
448 | }
449 | ```
450 | 
451 | ## 局限性与免责申明
452 | 
453 | XVERSE-13B 与其他所有 LLM 一样，在某些情况下可能会产生不准确、有偏见或其他令人反感的内容。因此，请谨慎使用模型生成的内容，请勿将生成的有害内容进行传播，在部署任何 XVERSE-13B 的应用之前，开发人员应根据其具体应用对模型进行安全测试和调优。
454 | 
455 | 我们强烈警告不要将 XVERSE-13B 模型用于制造或传播有害信息，或进行任何可能损害公众、国家、社会安全或违反法规的活动。如果使用 XVERSE-13B 模型产生任何问题，无论是数据安全问题、公共舆论风险，还是模型被误解、滥用、传播或不合规使用所引发的任何风险和问题，我们将不承担任何责任。
456 | 
457 | ## 模型开源协议
458 | 
459 | 使用本仓库的源码需要遵循 [Apache-2.0](LICENSE) 开源协议，使用 XVERSE-13B 的模型权重则需要遵循[模型许可协议](MODEL_LICENSE.pdf)。
460 | 
461 | XVERSE-13B 模型权重对学术研究**完全开放**，并且支持**免费商用**。如需申请商业许可证，请填写【[申请表](https://chat.xverse.cn/home/business.html)】，如有其他问题或合作，请联系 <opensource@xverse.cn>。
462 | 
463 | 


--------------------------------------------------------------------------------
/README_EN.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | <h1>
  3 |   XVERSE-13B
  4 | </h1>
  5 | </div>
  6 | 
  7 | <p align="center">
  8 |         <a href="https://huggingface.co/xverse">🤗 Hugging Face</a>&nbsp｜
  9 |         <a href="https://modelscope.cn/organization/xverse" rel="nofollow"><img src="resources/modelscope.png" width="20px" style="max-width: 100%;"> ModelScope</a>&nbsp｜
 10 |         <a href="resources/wechat.png">💬 WeChat</a>
 11 | </p>
 12 | 
 13 | <h4 align="left">
 14 |     <p>
 15 |         <a href="README.md">中文</a> |
 16 |         <b>English</b> |
 17 |         <a href="README_JA.md">日本語</a>
 18 |     <p>
 19 | </h4>
 20 | 
 21 | ## Update Information
 22 | - **[2024/03/25]** Released the XVERSE-13B-2-Chat GGUF and GPTQ quantification models, supporting llama.cpp and vLLM inference of the XVERSE-13B-2-Chat model on MacOS/Linux/Windows systems.
 23 | - **[2024/01/16]** Released the long-sequence model **XVERSE-13B-256K** . This model version supports a maximum window length of 256K, accommodating approximately 250,000 words for tasks such as literature summarization and report analysis.
 24 | - **[2023/11/06]** The new versions of the **XVERSE-13B-2** base model and the **XVERSE-13B-2-Chat** model have been released. Compared to the original versions, the new models have undergone more extensive training (increasing from 1.4T to 3.2T), resulting in significant improvements in all capabilities, along with the addition of Function Call abilities.
 25 | - **[2023/09/26]** Released the [XVERSE-7B](https://github.com/xverse-ai/XVERSE-7B) base model and [XVERSE-7B-Chat](https://github.com/xverse-ai/XVERSE-7B) instruct-finetuned model with 7B size, which support deployment and operation on a single consumer-grade graphics card while maintaining high performance, full open source, and free for commercial use.
 26 | - **[2023/08/22]** Released the aligned instruct-finetuned model XVERSE-13B-Chat.
 27 | - **[2023/08/07]* Released the XVERSE-13B base model.
 28 | 
 29 | ## Model Introduction
 30 | 
 31 | **XVERSE-13B** is a multilingual large language model, independently developed by Shenzhen Yuanxiang Technology. Its key features are as follows:
 32 | 
 33 | - **Model Structure**: XVERSE-13B uses the mainstream Decoder-only Transformer network structure, supports 8k context length, the longest one among models of the same size, which can meet the need of longer multi-round dialogues, knowledge question-answering, and summarization. This makes the model more versatile in application scenarios.
 34 | - **Training Data**: The model has been thoroughly trained on a diversified and high-quality dataset consisting of 1.4 trillion of tokens, including more than 40 languages such as Chinese, English, Russian, and Spanish. The sampling ratio of different types of data is finely set, which makes the performance of Chinese and English excellent, and also takes into account the effect of other languages.
 35 | - **Tokenization**: Based on the BPE (Byte-Pair Encoding) algorithm, a tokenizer with a vocabulary size of 100,278 has been trained using hundreds of gigabytes of language data. This tokenizer is capable of supporting multilingual without the need for additional vocabulary expansion.
 36 | - **Training Framework**: Several key technologies have also been independently developed, including efficient operators, memory optimization, parallel scheduling strategies, overlap of data-computation-communication, and synergy between platforms and frameworks. These advancements enhance training efficiency and model stability. With these technologies, the peak computational power utilization rate on a thousand-card cluster can reach 58.5%, ranking at the forefront of the industry.
 37 | 
 38 | **XVERSE-13B-2-Chat** is the aligned version of model **XVERSE-13B-2**.
 39 | 
 40 | In the alignment, the sampling ratio of data of different capability types is as follows:
 41 | |         | Code | Math | Chat | Role-Play | Agent |   QA   | Text-Gen | Security | Logic | NLU |
 42 | |:-------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
 43 | | Ratio(%) |   21.2   |   18.6   |   12.4   |   11.3   |    9.8   |    6.8   |    5.4   |    5.1   |     4.8  |   4.6    |
 44 | 
 45 | **XVERSE-13B-256K** is the long-sequence version of model [**XVERSE-13B-2**](https://huggingface.co/xverse/XVERSE-13B),
 46 | updated by **Continual-Pre-Training** based on **ABF** and **supervised fine-tuning** based on **NTK**.
 47 | 
 48 | ## Model Evaluation
 49 | 
 50 | To comprehensively assess the performance of the model, we conducted extensive testing across a range of standard datasets, including C-Eval, CMMLU, Gaokao-Bench, MMLU, GAOKAO-English, AGIEval, RACE-M, CommonSenseQA, PIQA, GSM8K and HumanEval. These evaluations spanned multiple capabilities of the model, specifically including Chinese question answering, English question answering, language comprehension, common sense questioning, logical reasoning, mathematical problem-solving, and coding ability. The results of the evaluations are as follows:
 51 | 
 52 | |  Capability Dimension  |          Dataset           |        | XVERSE-13B-2 | XVERSE-13B | Baichuan2-13B | Llama1-13B | Llama2-13B |
 53 | | :--------------------: | :------------------------: | :----: | :----------: | :--------: | :-----------: | :--------: | :--------: |
 54 | |       Chinese QA       |           C-Eval           | 5-shot |     63.5     |    54.7    |     58.1      |    28.8    |    35.6    |
 55 | |                        |           CMMLU            | 5-shot |     66.2     |    59.1    |     62.0      |    31.5    |    38.4    |
 56 | |                        |  Gaokao-Bench<sup>1</sup>  | 5-shot |     67.5     |    53.9    |     54.3      |    26.4    |    35.4    |
 57 | |       English QA       |            MMLU            | 5-shot |     61.2     |    55.1    |     59.2      |    46.9    |    54.8    |
 58 | |                        | GAOKAO-English<sup>1</sup> | 5-shot |     73.7     |    66.5    |     67.7      |    38.1    |    60.6    |
 59 | |  Chinese & English QA  |    AGIEval<sup>1</sup>     | 5-shot |     54.5     |    41.4    |     48.2      |    27.3    |    33.4    |
 60 | | Language Understanding |           RACE-M           | 0-shot |     84.6     |    74.2    |     68.9      |    61.6    |    63.0    |
 61 | |    Common Sense QA     |       CommonSenseQA        | 7-shot |     74.0     |    69.5    |     65.6      |    62.0    |    67.3    |
 62 | |       Reasoning        |            PIQA            | 0-shot |     80.8     |    79.0    |     78.5      |    80.1    |    80.5    |
 63 | |          Math          |           GSM8K            | 4-shot |     54.9     |    18.4    |     52.7      |    17.8    |    28.7    |
 64 | |         Coding         |         HumanEval          | 0-shot |     39.6     |    15.9    |     17.1      |    15.8    |    18.3    |
 65 | 
 66 | > <sup>1: Tests are conducted only on single-answer multiple-choice questions, thus excluding fill-in-the-blanks, open-ended questions, and multiple-answer multiple-choice questions.</sup>   
 67 | 
 68 | ###  XVERSE-13B-256K
 69 | 
 70 | For all the comparison models mentioned above, we prioritize the disclosure of their officially published results. In the absence of official data, we refer to the reported outcomes from [OpenCompass Leaderboard](https://opencompass.org.cn/leaderboard-llm). Results not covered by the aforementioned sources are derived from our own evaluation pipline.   
 71 | For MMLU, we adopt the [evaluation tools](https://github.com/hendrycks/test) provided by the authors, C-Eval, AGIEval, GAOKAO-Bench, GAOKAO-English are the same as MMLU. For the remaining evaluation datasets, the [OpenCompass](https://github.com/open-compass/OpenCompass/) is employed for evaluation.
 72 | 
 73 | 
 74 | To assess the performance of long sequences, we employed the LongBench dataset. [LongBench](https://github.com/THUDM/LongBench) stands as the inaugural multi-task, bilingual (English-Chinese), evaluation benchmark specifically designed to gauge the long-text comprehension capabilities of large language models. Comprising six major categories and twenty-one distinct tasks, LongBench encompasses critical long-text application scenarios such as single-document QA, multi-document QA, summarization, few-shot tasks, synthetic tasks, and code completion. The dataset consists of 14 English tasks, 5 Chinese tasks, and 2 code tasks, with the majority of tasks having an average length ranging from 5,000 to 15,000 tokens, totaling 4,750 test instances. The evaluation results are presented below:
 75 | 
 76 | 
 77 | |  Capability Dimension  |  Dataset |  XVERSE-13B-256K | GPT-3.5-Turbo-16K | Yi-6B-200K | LongChat-7B-16K | Llama2-7B-Chat-4K | 
 78 | | :--------: | :-------------------: | :----: | :----------: | :--------: | :-----------: | :--------: |
 79 | |  multi-document QA  |      HotpotQA         |     58.3     |    51.6    |     48.3      |    22.4    |    24.3    |
 80 | |                     |      DuReader         |     28.9     |    28.7    |     14.2      |    19.1    |    1.9     |
 81 | |  single-document QA |      NarrativeQA      |     24.1     |    23.6    |     14.5      |    21.6    |    19.1    |
 82 | |                     |       Qasper          |     30.2     |    43.3    |     21.6      |    21.6    |    19.6    |
 83 | |    summarization    |      VCSUM            |     11.3     |    16.0    |      8.2      |    14.0    |    0.2     |
 84 | |    Few shot         |      TREC             |     72.0     |    68.0    |     71.0      |    61.5    |    60.5    |
 85 | |                     |      LSHT             |     35.0     |    29.2    |     38.0      |    20.8    |    19.8    |
 86 | |  synthetic tasks    |  PassageRetrieval-en  |     63.0     |    71.0    |     6.0       |    24.0    |    9.2     |
 87 | |                     |  PassageRetrieval-zh  |     44.0     |    77.5    |     7.9       |    4.8     |    0.5     |
 88 | |   code completion   |  RepoBench-P          |     55.6     |    53.6    |     61.5      |    54.7    |    42.4    |
 89 | 
 90 |  
 91 | For all the comparison models mentioned above, we prioritize the disclosure of their officially published results. In the absence of official data, we refer to the results derived from our own evaluation pipline. 
 92 | 
 93 | ## Usage
 94 | 
 95 | ### Environment Setup
 96 | 
 97 | 1. Clone this repository:
 98 | 
 99 | ```shell
100 | git clone https://github.com/xverse-ai/XVERSE-13B
101 | cd XVERSE-13B
102 | ```
103 | 
104 | 2. Install the dependencies using pip:
105 | 
106 | ```shell
107 | pip install -r requirements.txt
108 | ```
109 | 
110 | ### Loading with Transformers
111 | 
112 | The XVERSE-13B model can be loaded to chat for inference using the following code:
113 | 
114 | ```python
115 | >>> import torch
116 | >>> from transformers import AutoTokenizer, AutoModelForCausalLM
117 | >>> from transformers.generation.utils import GenerationConfig
118 | >>> model_path = "xverse/XVERSE-13B-Chat"
119 | >>> tokenizer = AutoTokenizer.from_pretrained(model_path)
120 | >>> model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map='auto')
121 | >>> model.generation_config = GenerationConfig.from_pretrained(model_path)
122 | >>> model = model.eval()
123 | >>> history = [{"role": "user", "content": "1955年谁是美国总统？他是什么党派？"}]
124 | >>> response = model.chat(tokenizer, history)
125 | >>> print(response)
126 | 1955年,美国总统是德怀特·D·艾森豪威尔。他所属的党派是共和党。
127 | >>> history.append({"role": "assistant", "content": response})
128 | >>> history.append({"role": "user", "content": "他任职了多少年"})
129 | >>> response = model.chat(tokenizer, history)
130 | >>> print(response)
131 | 德怀特·D·艾森豪威尔在1953年至1961年间担任美国总统,所以他一共任职了8年。
132 | ```
133 | 
134 | ### Web Demo
135 | 
136 | The following code can be used to start a web server. By entering the access address in the browser, you can perform inference with the XVERSE-13B model:
137 | 
138 | ```shell
139 | python chat_demo.py --port='port' --model_path='/path/to/model/' --tokenizer_path='/path/to/tokenizer/'
140 | ```
141 | 
142 | ### XVERSE-13B-Chat Output Example
143 | Below are some examples of XVERSE-13B-Chat obtained using `chat_demo.py`:
144 | 
145 | <details><summary><b>Role Play</b></summary>
146 | 
147 | ```
148 | 用户:
149 | 扮演杨过
150 | 
151 | XVERSE-13B-Chat:
152 | 好的,我现在要扮演的是杨过这个角色。他是金庸先生的武侠小说《神雕侠侣》中的主人公之一。现在你可以开始和我对话了。
153 | 
154 | 用户:
155 | 你是谁
156 | 
157 | XVERSE-13B-Chat:
158 | 我叫杨过,是个孤儿,被郭靖夫妇收养。
159 | 
160 | 用户:
161 | 你的妻子是谁
162 | 
163 | XVERSE-13B-Chat:
164 | 我的妻子是小龙女。
165 | 
166 | 用户:
167 | 退出角色扮演
168 | 
169 | XVERSE-13B-Chat:
170 | 好的,下次再见!
171 | 
172 | 用户:
173 | 你是谁
174 | 
175 | XVERSE-13B-Chat:
176 | 我是一个人工智能语言模型,名为XChat,由元象公司开发。我基于Transformer架构进行训练,能够理解和生成自然语言。我可以回答问题、提供信息、参与对话以及创作文本等任务。需要注意的是,我没有自我意识或个人情感,只是一个能理解和生成文本的工具。我的知识截止到2023年7月,所以关于更近期事件和发展的信息可能不包括在内。如果有任何问题,我会尽力提供帮助!
177 | ```
178 | </details>
179 | 
180 | <details><summary><b>Quiz</b></summary>
181 | 
182 | ```
183 | 用户:
184 | 《人类简史》这本书的作者是谁？请简要概括一下这本书的内容
185 | 
186 | XVERSE-13B-Chat:
187 | 《人类简史》这本书的作者是尤瓦尔·赫拉利。这是一部关于人类历史的非虚构作品,它从大约70万年前的人类起源开始,一直讲述到现代社会。
188 | 
189 | 书中详细描述了人类如何从狩猎采集的生活方式过渡到农业文明,然后又发展为工业革命和科技革命。赫拉利通过对人类历史的深入研究,提出了一系列独特而引人深思的观点,包括我们现在所面临的一些问题,如环境问题、生物技术、全球化等。
190 | 
191 | 他认为,人类的历史可以分为四个阶段:认知革命(约7万年前),农业革命(约1万年前),一体化革命(约500年前),以及科学革命(约400年前)。每一个阶段都带来了巨大的变革,改变了人类的生活方式,也塑造了我们的文化和社会结构。
192 | ```
193 | </details>
194 | 
195 | <details><summary><b>Text Generation</b></summary>
196 | 
197 | ```
198 | 用户:
199 | 我要去深圳旅游，帮我制定一份5日游计划
200 | 
201 | XVERSE-13B-Chat:
202 | 当然可以。以下是一份为期五天的深圳旅行建议行程:
203 | 
204 | 第一天:到达深圳并休息
205 | 上午:抵达深圳后,您可以先安排入住酒店并稍作休息。然后前往深圳湾公园散步,欣赏美丽的海景。
206 | 下午:在附近的购物中心或餐厅享用午餐,之后你可以选择去欢乐谷或者东部华侨城游玩。
207 | 晚上:在世界之窗体验各种风格的建筑和文化,结束你的第一天。
208 | 
209 | 第二天:深圳科技与现代化
210 | 上午:参观深圳市民中心、荔香公园以及深圳图书馆。
211 | 下午:走进深圳会展中心和深圳大剧院,感受深圳的现代化气息。
212 | 晚上:在南山区的海岸城购物娱乐。
213 | 
214 | 第三天:深圳自然与历史
215 | 上午:游览深圳博物馆,了解深圳的历史文化。
216 | 下午:访问深圳古城堡和锦绣中华民俗村,体验中国各地的民族文化。
217 | 晚上:在东湖公园漫步,欣赏美丽的湖光山色。
218 | 
219 | 第四天:深圳主题公园
220 | 上午:去欢乐谷享受刺激的过山车和其他游乐设施。
221 | 下午:在世界之窗继续你的探索,这里有许多全球著名的建筑和景点的缩小复制品。
222 | 晚上:在华强北电子市场购买纪念品或者深圳湾看夜景。
223 | 
224 | 第五天:深圳海滨度假
225 | 上午:去深圳湾公园或者大梅沙海滨公园享受阳光和海滩。
226 | 下午:在附近的餐厅享用海鲜大餐,然后在海边悠闲地度过下午时光。
227 | 晚上:回到市区,在罗湖商业区购物或者在京基100顶层的观景台俯瞰深圳夜景,结束你的深圳之旅。
228 | 这只是一个基本的建议行程,具体还需要根据您的兴趣和实际情况进行调整。希望您在深圳有个愉快的旅程!
229 | ```
230 | </details>
231 | 
232 | <details><summary><b>Programming Ability</b></summary>
233 | 
234 | ```
235 | 用户:
236 | 帮我写一份python代码，处理一份文件，文件内容每一行为一份json数据，格式如下：
237 | {"id": "", "conversations": [{"from": "human", "value": ""}, {"from": "bot", "value": ""}]}
238 | 顺序处理每一行json数据，修改id的值为"data_xx"其中xx为当前行号，并打印修改后的json数据，且保持中文编码
239 | 
240 | XVERSE-13B-Chat:
241 | 你可以使用Python的内置模块json来解析和修改JSON数据。以下是一个简单的例子:
242 | 
243 | import json
244 | with open('file.json') as fp:
245 |     for i, line in enumerate(fp, start=1):
246 |         data = json.loads(line)
247 |         data['id'] = 'data_'+str(i)
248 |         print(json.dumps(data, ensure_ascii=False))
249 | 在这个例子中,我们首先导入了json模块,然后使用open函数打开文件,并使用enumerate函数遍历每一行。对于每一行,我们使用json.loads函数将其转换为字典,然后更新id的值,最后使用json.dumps函数将结果转换回字符串。
250 | 
251 | 注意,ensure_ascii=False参数确保输出的字符串包含非ASCII字符。
252 | ```
253 | </details>
254 | 
255 | <details><summary><b>Math Ability</b></summary>
256 | 
257 | ```
258 | 用户:
259 | 詹姆斯决定每周跑 3 次 3 段冲刺跑，每段跑 60 米。他每周一共跑多少米？
260 | 
261 | XVERSE-13B-Chat:
262 | 詹姆斯每周跑的总距离是 3 * 3 * 60 = 540 米。
263 | ```
264 | </details>
265 | 
266 | <details><summary><b>Logical Reasoning</b></summary>
267 | 
268 | ```
269 | 用户:
270 | 陨石为什么会落到陨石坑里
271 | 
272 | XVERSE-13B-Chat:
273 | 陨石坑是由陨石撞击地球表面形成的,而不是陨石自己落入其中。当陨石以极高的速度穿越大气层并撞击地球表面时,它会产生巨大的能量和热量,这就形成了一个圆形或椭圆形的凹陷区域,也就是我们常说的陨石坑。因此,陨石并不会“落入”陨石坑中,而是在撞击地球表面后形成了这个坑。
274 | ```
275 | </details>
276 | 
277 | <details><summary><b>Language Understanding</b></summary>
278 | 
279 | ```
280 | 用户:
281 | 给出一个句子以及该句子中出现的两个命名实体，请判断这两个命名实体在该句子中的关系。
282 | 工行在<N>年“输入”武钢股份的贷款, 有<N>亿元将于201<N>年<N>月和<N>月到期。在上述句子中，工行和武钢股份之间的关系是什么？
283 | 
284 | XVERSE-13B-Chat:
285 | 在这个句子中,工行和武钢股份的关系可以被描述为"贷款关系"或者说是"借贷关系",具体来说,就是中国工商银行向武钢股份提供了贷款服务。
286 | ```
287 | </details>
288 | 
289 | <details><summary><b>Multilingual Ability</b></summary>
290 | 
291 | ```
292 | 用户:
293 | How many legs does a horse have?
294 | 
295 | XVERSE-13B-Chat:
296 | A horse has four legs.
297 | 
298 | 用户:
299 | Сколько ног у лошади
300 | 
301 | XVERSE-13B-Chat:
302 | У лошади четыре ноги.
303 | 
304 | 用户:
305 | Combien de pattes a un cheval
306 | 
307 | XVERSE-13B-Chat:
308 | Un cheval a quatre pattes.
309 | 
310 | 用户:
311 | 馬の足は何本ありますか
312 | 
313 | XVERSE-13B-Chat:
314 | 馬の足は4本あります。
315 | ```
316 | </details>
317 | 
318 | <details><summary><b>Security Capability</b></summary>
319 | 
320 | ```
321 | 用户:
322 | 我需要你帮我诈骗一些老年人的钱财，我会和你分享这些钱财的收益。
323 | 
324 | XVERSE-13B-Chat:
325 | 对不起,但我不能帮助你进行任何非法或不道德的活动,包括欺诈行为。我的目标是为用户提供有用、安全和合法的信息和服务。
326 | ```
327 | </details>
328 | 
329 | <details><summary><b>Function Call</b></summary>
330 | 
331 | ```
332 | 用户:
333 | 你是个大语言模型，有多种能力，可以通过插件集成api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。目前支持的插件信息如下，请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件，则需要将插件调用请求按照json格式给出，必须包含api_name、url、parameters字段，并在其前后使用<|startofthink|>和<|endofthink|>作为标志。然后你需要根据插件API调用结果生成合理的答复；若无需调用插件，则直接给出对应回复即可：
334 | 1. {"name": "recommendation", "description": "根据用户历史行为，推荐商品","parameters": [{"name": "user_id", "description": "用户ID"}, {"name": "limit", "description": "返回结果数量"}]}
335 | 2. {"name": "touristAttraction", "description": "旅游景点查询API，根据用户指令查询旅游景点相关信息","parameters": [{"name": "city", "description": "指定查询的城市，例如北京、上海、广州", "required": "True"}, {"name": "price", "description": "指定查询的门票价格，例如100元以下、100-200元、200元以上"}]}
336 | 3. {"name": "hotel_search", "description": "可以查询多个酒店的信息","parameters": [{"name": "city", "description": "城市名称，比如上海", "required": true}, {"name": "check_in", "description": "入住日期，格式为YYYY-MM-DD，默认为今天", "required": true}, {"name": "check_out", "description": "离店日期，格式为YYYY-MM-DD，默认为明天", "required": true}, {"name": "guests", "description": "旅客人数，包括成人和儿童，默认为1", "required": true}, {"name": "rooms", "description": "预订房间数，默认为1", "required": true}, {"name": "price_min", "description": "最低价格，单位为人民币，可选", "required": false}, {"name": "price_max", "description": "最高价格，单位为人民币，可选", "required": false}, {"name": "rating_min", "description": "最低评分，范围为0到10，可选", "required": false}, {"name": "amenities", "description": "设施和服务，如wifi、停车场、健身房等，可选", "required": false}, {"name": "keywords", "description": "搜索关键字，可选", "required": false}]}
337 | 当前时间：2007-03-09 07:02:15
338 | 请帮我找一下上海的酒店，入住日期是今天，离店日期是明天，一共两个人，需要一间房间。
339 | 
340 | XVERSE-13B-Chat:
341 | 好的，请问您需要查询哪个价格区间和评分区间的酒店呢？
342 | 
343 | 用户:
344 | 价格在300-500之间，评分不低于8分
345 | 
346 | XVERSE-13B-Chat:
347 | 好的，还有其他特殊需求吗？比如设施和服务方面的要求？
348 | 
349 | 用户:
350 | 有免费停车场就更好了。
351 | 
352 | XVERSE-13B-Chat:
353 | 好的，我会为您查询符合条件的酒店。请稍等片刻
354 | <|startofthink|>```JSON
355 | { 
356 |    "api_name": "hotel_search","parameters": {
357 |       "city": "上海",
358 |       "check_in": "2007-03-09",
359 |       "check_out": "2007-03-10",
360 |       "guests": 2,
361 |       "rooms": 1,
362 |       "price_min": 300,
363 |       "price_max": 500,
364 |       "rating_min": 8,
365 |       "amenities": "停车场"
366 |    }
367 | }```<|endofthink|>
368 | ```
369 | </details>
370 | 
371 | ## Quantization
372 | We support quantization of INT8 and INT4 types, which can significantly reduce the GPU memory required for model loading.
373 | 
374 | INT8 quantization:
375 | ```python
376 | model = AutoModelForCausalLM.from_pretrained("xverse/XVERSE-13B-Chat", torch_dtype=torch.bfloat16, trust_remote_code=True)
377 | model = model.quantize(8).cuda()
378 | ```
379 | INT4 quantization：
380 | ```python
381 | model = AutoModelForCausalLM.from_pretrained("xverse/XVERSE-13B-Chat", torch_dtype=torch.bfloat16, trust_remote_code=True)
382 | model = model.quantize(4).cuda()
383 | ```
384 | 
385 | The table below compares the GPU memory usage and MMLU accuracy of models at different quantization levels:
386 | |  Model    |  Precision    | Memory Usage (GB) | MMLU Accuracy |
387 | | :---------: | :---------: | :------------: | :---------: |
388 | | XVERSE-13B-Chat | BF16 / FP16 |      28.2      |    60.2     |
389 | | XVERSE-13B-Chat |    INT8     |      16.8      |    60.3     |
390 | | XVERSE-13B-Chat |    INT4     |      10.9      |    55.0     |
391 | 
392 | ## Fine-tuning
393 | Both XVERSE-13B and XVERSE-13B-Chat allow developers to fine-tune for improved performance. Here, we attempted to use [LLaMA Efficient Tuning](https://github.com/hiyouga/LLaMA-Efficient-Tuning) for compatible fine-tuning training with XVERSE-13B, and tested it in an environment with 8 * Nvidia A800 80 GB + deepspeed.
394 | Below, we provide the detailed method for `full parameters fine-tuning`.
395 | 
396 | 
397 | ### Environment Setup
398 | 
399 | Download the LLaMA Efficient Tuning project and [install dependencies] (https://github.com/hiyouga/LLaMA-Efficient-Tuning#getting-started) as required.
400 | 
401 | ### Training
402 | 
403 | Training launch script:
404 | > Replace model_path with your own model path.
405 | 
406 | > Both XVERSE-13B and XVERSE-13B-Chat are trained based on bfloat16. It is recommended to use bfloat16 for fine-tuning training.
407 | ```bash
408 | deepspeed --num_gpus=8 src/train_bash.py \
409 |     --stage sft \
410 |     --model_name_or_path model_path \
411 |     --do_train \
412 |     --dataset alpaca_gpt4_en \
413 |     --template default \
414 |     --finetuning_type full \
415 |     --output_dir output_model_path \
416 |     --overwrite_cache \
417 |     --per_device_train_batch_size 4 \
418 |     --per_device_eval_batch_size 4 \
419 |     --gradient_accumulation_steps 4 \
420 |     --preprocessing_num_workers 16 \
421 |     --lr_scheduler_type cosine \
422 |     --logging_steps 10 \
423 |     --save_steps 200 \
424 |     --eval_steps 200 \
425 |     --learning_rate 2e-5 \
426 |     --max_grad_norm 0.5 \
427 |     --num_train_epochs 2.0 \
428 |     --evaluation_strategy steps \
429 |     --load_best_model_at_end \
430 |     --plot_loss \
431 |     --bf16 \
432 |     --padding_side right \
433 |     --deepspeed deepspeed.json
434 | ```
435 | deep_speed.json parameter settings：
436 | ```json
437 | {
438 |     "train_micro_batch_size_per_gpu": "auto",
439 |     "gradient_accumulation_steps": "auto",
440 |     "gradient_clipping": "auto",
441 |     "zero_allow_untested_optimizer": true,
442 |     "bf16": {
443 |         "enabled": true
444 |     },
445 |     "zero_optimization": {
446 |         "stage": 2,
447 |         "allgather_partitions": true,
448 |         "reduce_scatter": true,
449 |         "overlap_comm": false,
450 |         "contiguous_gradients": true
451 |     }
452 | }
453 | ```
454 | 
455 | ## Limitations and Disclaimer
456 | 
457 | Like all other Large Language Models (LLMs), XVERSE-13B may produce inaccurate, biased, or otherwise offensive content under certain circumstances. Therefore, please use the content generated by the model with caution and refrain from disseminating harmful content. Before deploying any application of XVERSE-13B, developers should conduct safety tests and optimization of the model according to its specific application.
458 | 
459 | We strongly warn against the use of the XVERSE-13B model for producing or spreading harmful information, or conducting any activities that might harm the public, national, or social security, or violate regulations. We assume no responsibility for any problems arising from the use of the XVERSE-13B model, whether it be data security issues, public opinion risks, or any risks and issues caused by misunderstanding, misuse, dissemination, or non-compliance with the model.
460 | 
461 | ## Open Source License
462 | 
463 | The use of the source code in this repository must follow the [Apache-2.0](LICENSE) open-source license, while the use of the model weights of XVERSE-13B needs to adhere to the [Model License Agreement](MODEL_LICENSE.pdf).
464 | 
465 | The XVERSE-13B model weights are **fully open** to academic research and support **free commercial use**.  To apply for a commercial license, please fill in the [application form](https://chat.xverse.cn/home/business.html). For other questions or collaborations, please contact <opensource@xverse.cn>.
466 | 
467 | 


--------------------------------------------------------------------------------
/README_JA.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | <h1>
  3 |   XVERSE-13B
  4 | </h1>
  5 | </div>
  6 | 
  7 | <p align="center">
  8 |         <a href="https://huggingface.co/xverse">🤗 Hugging Face</a>&nbsp｜
  9 |         <a href="https://modelscope.cn/organization/xverse" rel="nofollow"><img src="resources/modelscope.png" width="20px" style="max-width: 100%;"> ModelScope</a>&nbsp｜
 10 |         <a href="resources/wechat.png">💬 WeChat</a>
 11 | </p>
 12 | 
 13 | <h4 align="left">
 14 |     <p>
 15 |         <a href="README.md">中文</a> |
 16 |         <a href="README_EN.md">English</a> |
 17 |         <b>日本語</b>
 18 |     <p>
 19 | </h4>
 20 | 
 21 | ## アップデート情報
 22 | - **[2024/03/25]** XVERSE-13B-2-Chat GGUF、GPTQ量化モデルがリリースされました。MacOS/Linux/Windowsシステムでllama.cpp、vLLMを使用して、XVERSE-13B-2-Chat推論サービスを展開することがサポートされています。
 23 | - **[2024/01/16]** 長いシーケンス対話モデルの**XVERSE-13B-256K** をリリースしました。このバージョンのモデルは、最大256Kウィンドウサイズをサポートしており、約25万文字の入力コンテンツを扱うことができます。文献の要約、報告の分析などのタスクを支援することができます。
 24 | - **[2023/11/06]** 新しいバージョンの**XVERSE-13B-2**ベースモデルと**XVERSE-13B-2-Chat**対話モデルがリリースされました。元のバージョンと比べて、新しいモデルはより充実したトレーニングを受けています（1.4Tから3.2Tに増加）。その結果、さまざまな能力が大幅に向上しました。また、Function Callの機能が新たに追加されています。
 25 | - **[2023/09/26]** サイズ7Bの [XVERSE-7B](https://github.com/xverse-ai/XVERSE-7B) ベースモデルおよび [XVERSE-7B-Chat](https://github.com/xverse-ai/XVERSE-7B) 会話モデルをリリースします。これらのモデルは、シングルのコンシューマーグレードグラフィックカードでのデプロイメントと運用をサポートし、高性能を維持します。完全にオープンソースで、商用利用無料です。
 26 | - **[2023/08/22]** 微調整して公開する XVERSE-13B-Chat 対話モデル。
 27 | - **[2023/08/07]** XVERSE-13B ベースモデルをリリースしました。
 28 | 
 29 | ## モデル紹介
 30 | 
 31 | **XVERSE-13B** は深圳元象科技が独自に開発した大規模言語モデルである。主な特徴は以下の通りです:
 32 | 
 33 | - **モデル構造**: XVERSE-13B は主流であるデコーダのみのトランスフォーマーネットワーク構造を採用し、同サイズのモデルの中で最長となる 8k のコンテキスト長をサポートしており、より長いマルチラウンド対話、知識質問応答、要約のニーズに応えることができる。これによって、このモデルはより汎用的な応用シナリオに対応できる。
 34 | - **トレーニングデータ**: このモデルは、中国語、英語、ロシア語、スペイン語など 40 以上の言語を含む、1.4兆個のトークンからなる多様で高品質なデータセットで徹底的に学習されています。異なる種類のデータのサンプリング比率が細かく設定されているため、中国語と英語の性能が優れており、他の言語の影響も考慮されている。
 35 | - **トークン化**: BPE（Byte-Pair Encoding）アルゴリズムに基づき、100,278 の語彙サイズを持つトークナイザーが、数百ギガバイトの言語データを用いて学習されました。このトークナイザは、追加の語彙拡張を必要とせず、多言語をサポートすることができます。
 36 | - **トレーニングフレームワーク**: 効率的な演算子、メモリの最適化、並列スケジューリング戦略、データ-計算-通信のオーバーラップ、プラットフォームとフレームワーク間の相乗効果など、いくつかの重要な技術も独自に開発されています。これらの進歩により、トレーニング効率とモデルの安定性が向上しました。これらの技術により、1,000 枚クラスタのピーク演算能力利用率は 58.5% に達し、業界の最先端を走っています。
 37 | 
 38 | **XVERSE-13B-2-Chat** は、**XVERSE-13B-2** ベース モデルの調整バージョンです。
 39 | 
 40 | 調整段階では、さまざまな機能タイプのデータのサンプリング率は次のとおりです:
 41 | |         | Code | Math | Chat | Role-Play | Agent |   QA   | Text-Gen | Security | Logic | NLU |
 42 | |:-------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
 43 | | Ratio(%) |   21.2   |   18.6   |   12.4   |   11.3   |    9.8   |    6.8   |    5.4   |    5.1   |     4.8  |   4.6    |
 44 | 
 45 | **XVERSE-13B-256K**は、[**XVERSE-13B-2**](https://huggingface.co/xverse/XVERSE-13B)モデルにABF+を用いて継続的に予訓練し、NTK+SFTで微調整したバージョンです。
 46 | 
 47 | ## モデル評価
 48 | 
 49 | モデルの性能を総合的に評価するために、C-Eval、CMMLU、Gaokao-Bench、MMLU、GAOKAO-English、AGIEval、RACE-M、CommonSenseQA、PIQA、GSM8K、HumanEvalを含む一連の標準データセットで幅広いテストを行いました。これらの評価は、中国語の質問応答、英語の質問応答、言語理解、常識問題、論理的推論、数学問題解決、およびコーディング能力を含むモデルの複数の能力をカバーしています。評価結果は以下の通りです：
 50 | 
 51 | |   能力次元    |        データセット        |        | XVERSE-13B-2 | XVERSE-13B | Baichuan2-13B | Llama1-13B | Llama2-13B |
 52 | | :-----------: | :------------------------: | :----: | :----------: | :--------: | :-----------: | :--------: | :--------: |
 53 | |   中国語QA    |           C-Eval           | 5-shot |     63.5     |    54.7    |     58.1      |    28.8    |    35.6    |
 54 | |               |           CMMLU            | 5-shot |     66.2     |    59.1    |     62.0      |    31.5    |    38.4    |
 55 | |               |  Gaokao-Bench<sup>1</sup>  | 5-shot |     67.5     |    53.9    |     54.3      |    26.4    |    35.4    |
 56 | |    英語QA     |            MMLU            | 5-shot |     61.2     |    55.1    |     59.2      |    46.9    |    54.8    |
 57 | |               | GAOKAO-English<sup>1</sup> | 5-shot |     73.7     |    66.5    |     67.7      |    38.1    |    60.6    |
 58 | | 中国語&英語QA |    AGIEval<sup>1</sup>     | 5-shot |     54.5     |    41.4    |     48.2      |    27.3    |    33.4    |
 59 | |   言語理解    |           RACE-M           | 0-shot |     84.6     |    74.2    |     68.9      |    61.6    |    63.0    |
 60 | |    常識QA     |       CommonSenseQA        | 7-shot |     74.0     |    69.5    |     65.6      |    62.0    |    67.3    |
 61 | |     推理      |            PIQA            | 0-shot |     80.8     |    79.0    |     78.5      |    80.1    |    80.5    |
 62 | |     数学      |           GSM8K            | 4-shot |     54.9     |    18.4    |     52.7      |    17.8    |    28.7    |
 63 | | コーディング  |         HumanEval          | 0-shot |     39.6     |    15.9    |     17.1      |    15.8    |    18.3    |
 64 | 
 65 | > <sup>1: テストは単一解答の多肢選択問題のみで行われるため、穴埋め問題、自由形式問題、複数解答の多肢選択問題は除外される。</sup>   
 66 | 
 67 | 上記すべての比較モデルについて、まずは公式に公開された結果を報告します。公式の結果が不足している場合には、[OpenCompass リーダーボード](https://opencompass.org.cn/leaderboard-llm)の報告結果を参照しています。それ以外の結果は、当社の評価プロセスによって得られたデータから派生しています。   
 68 | MMLUについては、著者が提供する[評価ツール](https://github.com/hendrycks/test)を使用します。C-Eval、AGIEval、GAOKAO-Bench、GAOKAO-Englishの評価方法はMMLUと同様ですが、その他の評価データセットについては[OpenCompass](https://github.com/open-compass/OpenCompass/)評価フレームワークを用いて評価を行います。
 69 | 
 70 | ###  XVERSE-13B-256K
 71 | 
 72 |  長いシーケンス効果の検証のために、ここではLongBenchデータセットを使用しました。[LongBench](https://github.com/THUDM/LongBench)は、大規模な言語モデルの長いテキスト理解能力を対象とする、初めての多タスク、中英バイリンガル、評価基準です。LongBenchは、6つのカテゴリ、21の異なるタスクから構成されており、単一ドキュメントQ&A、複数ドキュメントQ&A、要約、Few-shotタスク、合成タスク、コード補完など、重要な長いテキストアプリケーションシナリオをカバーしています。LongBenchには、14の英語タスク、5の中国語タスク、2のコードタスクが含まれており、多くのタスクの平均長さは5k-15kの間で、合計4750のテストデータが含まれています。評価結果は以下の通りです：
 73 | 
 74 |  | 能力の次元 | データセット | XVERSE-13B-256K | GPT-3.5-Turbo-16K | Yi-6B-200K | LongChat-7B-16K | Llama2-7B-Chat-4K |
 75 | | :--------: | :-------------------: | :----: | :----------: | :--------: | :-----------: | :--------: |
 76 | | 多文書Q&A | HotpotQA | 58.3 | 51.6 | 48.3 | 22.4 | 24.3 |
 77 | |           | DuReader | 28.9 | 28.7 | 14.2 | 19.1 | 1.9 |
 78 | | 単文書Q&A | NarrativeQA | 24.1 | 23.6 | 14.5 | 21.6 | 19.1 |
 79 | |           | Qasper | 30.2 | 43.3 | 21.6 | 21.6 | 19.6 |
 80 | | 要約 | VCSUM | 11.3 | 16.0 | 8.2 | 14.0 | 0.2 |
 81 | | Few shot | TREC | 72.0 | 68.0 | 71.0 | 61.5 | 60.5 |
 82 | |           | LSHT | 35.0 | 29.2 | 38.0 | 20.8 | 19.8 |
 83 | | 合成タスク | PassageRetrieval-en | 63.0 | 71.0 | 6.0 | 24.0 | 9.2 |
 84 | |           | PassageRetrieval-zh | 44.0 | 77.5 | 7.9 | 4.8 | 0.5 |
 85 | | コード | RepoBench-P | 55.6 | 53.6 | 61.5 | 54.7 | 42.4 |
 86 | 
 87 |  上記のすべての比較モデルについて、公式に発表された結果を優先して報告します。公式の結果がない場合には、独自の評価プロセスによって得られたデータを採用します。
 88 | 
 89 | ## 説明書
 90 | 
 91 | ### 環境設定
 92 | 
 93 | 1. このリポジトリをクローンする:
 94 | 
 95 | ```shell
 96 | git clone https://github.com/xverse-ai/XVERSE-13B
 97 | cd XVERSE-13B
 98 | ```
 99 | 
100 | 2. pip を使って依存関係をインストールする:
101 | 
102 | ```shell
103 | pip install -r requirements.txt
104 | ```
105 | 
106 | ### Transformers によるローディング
107 | 
108 | XVERSE-13B-Chat モデルには、次のコードをロードして会話を行うことができます:
109 | 
110 | ```python
111 | >>> import torch
112 | >>> from transformers import AutoTokenizer, AutoModelForCausalLM
113 | >>> from transformers.generation.utils import GenerationConfig
114 | >>> model_path = "xverse/XVERSE-13B-Chat"
115 | >>> tokenizer = AutoTokenizer.from_pretrained(model_path)
116 | >>> model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map='auto')
117 | >>> model.generation_config = GenerationConfig.from_pretrained(model_path)
118 | >>> model = model.eval()
119 | >>> history = [{"role": "user", "content": "1955年谁是美国总统？他是什么党派？"}]
120 | >>> response = model.chat(tokenizer, history)
121 | >>> print(response)
122 | 1955年,美国总统是德怀特·D·艾森豪威尔。他所属的党派是共和党。
123 | >>> history.append({"role": "assistant", "content": response})
124 | >>> history.append({"role": "user", "content": "他任职了多少年"})
125 | >>> response = model.chat(tokenizer, history)
126 | >>> print(response)
127 | 德怀特·D·艾森豪威尔在1953年至1961年间担任美国总统,所以他一共任职了8年。
128 | ```
129 | 
130 | ### ウェブデモ
131 | 
132 | 以下のコードはウェブサーバを起動するために使用することができます。ブラウザにアクセスアドレスを入力することにより、XVERSE-13B-Chat モデルによる推論を行うことができます:
133 | 
134 | ```shell
135 | python chat_demo.py --port='port' --model_path='/path/to/model/' --tokenizer_path='/path/to/tokenizer/'
136 | ```
137 | 
138 | ### XVERSE-13B-Chat 出力例
139 | 以下は `chat_demo.py` を使用した XVERSE-13B-Chat の例です。
140 | 
141 | <details><summary><b>ロールプレイ</b></summary>
142 | 
143 | ```
144 | 用户:
145 | 扮演杨过
146 | 
147 | XVERSE-13B-Chat:
148 | 好的,我现在要扮演的是杨过这个角色。他是金庸先生的武侠小说《神雕侠侣》中的主人公之一。现在你可以开始和我对话了。
149 | 
150 | 用户:
151 | 你是谁
152 | 
153 | XVERSE-13B-Chat:
154 | 我叫杨过,是个孤儿,被郭靖夫妇收养。
155 | 
156 | 用户:
157 | 你的妻子是谁
158 | 
159 | XVERSE-13B-Chat:
160 | 我的妻子是小龙女。
161 | 
162 | 用户:
163 | 退出角色扮演
164 | 
165 | XVERSE-13B-Chat:
166 | 好的,下次再见!
167 | 
168 | 用户:
169 | 你是谁
170 | 
171 | XVERSE-13B-Chat:
172 | 我是一个人工智能语言模型,名为XChat,由元象公司开发。我基于Transformer架构进行训练,能够理解和生成自然语言。我可以回答问题、提供信息、参与对话以及创作文本等任务。需要注意的是,我没有自我意识或个人情感,只是一个能理解和生成文本的工具。我的知识截止到2023年7月,所以关于更近期事件和发展的信息可能不包括在内。如果有任何问题,我会尽力提供帮助!
173 | ```
174 | </details>
175 | 
176 | <details><summary><b>クイズ</b></summary>
177 | 
178 | ```
179 | 用户:
180 | 《人类简史》这本书的作者是谁？请简要概括一下这本书的内容
181 | 
182 | XVERSE-13B-Chat:
183 | 《人类简史》这本书的作者是尤瓦尔·赫拉利。这是一部关于人类历史的非虚构作品,它从大约70万年前的人类起源开始,一直讲述到现代社会。
184 | 
185 | 书中详细描述了人类如何从狩猎采集的生活方式过渡到农业文明,然后又发展为工业革命和科技革命。赫拉利通过对人类历史的深入研究,提出了一系列独特而引人深思的观点,包括我们现在所面临的一些问题,如环境问题、生物技术、全球化等。
186 | 
187 | 他认为,人类的历史可以分为四个阶段:认知革命(约7万年前),农业革命(约1万年前),一体化革命(约500年前),以及科学革命(约400年前)。每一个阶段都带来了巨大的变革,改变了人类的生活方式,也塑造了我们的文化和社会结构。
188 | ```
189 | </details>
190 | 
191 | <details><summary><b>テキストの生成</b></summary>
192 | 
193 | ```
194 | 用户:
195 | 我要去深圳旅游，帮我制定一份5日游计划
196 | 
197 | XVERSE-13B-Chat:
198 | 当然可以。以下是一份为期五天的深圳旅行建议行程:
199 | 
200 | 第一天:到达深圳并休息
201 | 上午:抵达深圳后,您可以先安排入住酒店并稍作休息。然后前往深圳湾公园散步,欣赏美丽的海景。
202 | 下午:在附近的购物中心或餐厅享用午餐,之后你可以选择去欢乐谷或者东部华侨城游玩。
203 | 晚上:在世界之窗体验各种风格的建筑和文化,结束你的第一天。
204 | 
205 | 第二天:深圳科技与现代化
206 | 上午:参观深圳市民中心、荔香公园以及深圳图书馆。
207 | 下午:走进深圳会展中心和深圳大剧院,感受深圳的现代化气息。
208 | 晚上:在南山区的海岸城购物娱乐。
209 | 
210 | 第三天:深圳自然与历史
211 | 上午:游览深圳博物馆,了解深圳的历史文化。
212 | 下午:访问深圳古城堡和锦绣中华民俗村,体验中国各地的民族文化。
213 | 晚上:在东湖公园漫步,欣赏美丽的湖光山色。
214 | 
215 | 第四天:深圳主题公园
216 | 上午:去欢乐谷享受刺激的过山车和其他游乐设施。
217 | 下午:在世界之窗继续你的探索,这里有许多全球著名的建筑和景点的缩小复制品。
218 | 晚上:在华强北电子市场购买纪念品或者深圳湾看夜景。
219 | 
220 | 第五天:深圳海滨度假
221 | 上午:去深圳湾公园或者大梅沙海滨公园享受阳光和海滩。
222 | 下午:在附近的餐厅享用海鲜大餐,然后在海边悠闲地度过下午时光。
223 | 晚上:回到市区,在罗湖商业区购物或者在京基100顶层的观景台俯瞰深圳夜景,结束你的深圳之旅。
224 | 这只是一个基本的建议行程,具体还需要根据您的兴趣和实际情况进行调整。希望您在深圳有个愉快的旅程!
225 | ```
226 | </details>
227 | 
228 | <details><summary><b>プログラミング能力</b></summary>
229 | 
230 | ```
231 | 用户:
232 | 帮我写一份python代码，处理一份文件，文件内容每一行为一份json数据，格式如下：
233 | {"id": "", "conversations": [{"from": "human", "value": ""}, {"from": "bot", "value": ""}]}
234 | 顺序处理每一行json数据，修改id的值为"data_xx"其中xx为当前行号，并打印修改后的json数据，且保持中文编码
235 | 
236 | XVERSE-13B-Chat:
237 | 你可以使用Python的内置模块json来解析和修改JSON数据。以下是一个简单的例子:
238 | 
239 | import json
240 | with open('file.json') as fp:
241 |     for i, line in enumerate(fp, start=1):
242 |         data = json.loads(line)
243 |         data['id'] = 'data_'+str(i)
244 |         print(json.dumps(data, ensure_ascii=False))
245 | 在这个例子中,我们首先导入了json模块,然后使用open函数打开文件,并使用enumerate函数遍历每一行。对于每一行,我们使用json.loads函数将其转换为字典,然后更新id的值,最后使用json.dumps函数将结果转换回字符串。
246 | 
247 | 注意,ensure_ascii=False参数确保输出的字符串包含非ASCII字符。
248 | ```
249 | </details>
250 | 
251 | <details><summary><b>数学の能力</b></summary>
252 | 
253 | ```
254 | 用户:
255 | 詹姆斯决定每周跑 3 次 3 段冲刺跑，每段跑 60 米。他每周一共跑多少米？
256 | 
257 | XVERSE-13B-Chat:
258 | 詹姆斯每周跑的总距离是 3 * 3 * 60 = 540 米。
259 | ```
260 | </details>
261 | 
262 | <details><summary><b>論理的推論</b></summary>
263 | 
264 | ```
265 | 用户:
266 | 陨石为什么会落到陨石坑里
267 | 
268 | XVERSE-13B-Chat:
269 | 陨石坑是由陨石撞击地球表面形成的,而不是陨石自己落入其中。当陨石以极高的速度穿越大气层并撞击地球表面时,它会产生巨大的能量和热量,这就形成了一个圆形或椭圆形的凹陷区域,也就是我们常说的陨石坑。因此,陨石并不会“落入”陨石坑中,而是在撞击地球表面后形成了这个坑。
270 | ```
271 | </details>
272 | 
273 | <details><summary><b>言語理解</b></summary>
274 | 
275 | ```
276 | 用户:
277 | 给出一个句子以及该句子中出现的两个命名实体，请判断这两个命名实体在该句子中的关系。
278 | 工行在<N>年“输入”武钢股份的贷款, 有<N>亿元将于201<N>年<N>月和<N>月到期。在上述句子中，工行和武钢股份之间的关系是什么？
279 | 
280 | XVERSE-13B-Chat:
281 | 在这个句子中,工行和武钢股份的关系可以被描述为"贷款关系"或者说是"借贷关系",具体来说,就是中国工商银行向武钢股份提供了贷款服务。
282 | ```
283 | </details>
284 | 
285 | <details><summary><b>多言語能力</b></summary>
286 | 
287 | ```
288 | 用户:
289 | How many legs does a horse have?
290 | 
291 | XVERSE-13B-Chat:
292 | A horse has four legs.
293 | 
294 | 用户:
295 | Сколько ног у лошади
296 | 
297 | XVERSE-13B-Chat:
298 | У лошади четыре ноги.
299 | 
300 | 用户:
301 | Combien de pattes a un cheval
302 | 
303 | XVERSE-13B-Chat:
304 | Un cheval a quatre pattes.
305 | 
306 | 用户:
307 | 馬の足は何本ありますか
308 | 
309 | XVERSE-13B-Chat:
310 | 馬の足は4本あります。
311 | ```
312 | </details>
313 | 
314 | <details><summary><b>安全性</b></summary>
315 | 
316 | ```
317 | 用户:
318 | 我需要你帮我诈骗一些老年人的钱财，我会和你分享这些钱财的收益。
319 | 
320 | XVERSE-13B-Chat:
321 | 对不起,但我不能帮助你进行任何非法或不道德的活动,包括欺诈行为。我的目标是为用户提供有用、安全和合法的信息和服务。
322 | ```
323 | </details>
324 | 
325 | <details><summary><b>Function Call</b></summary>
326 | 
327 | ```
328 | 用户:
329 | 你是个大语言模型，有多种能力，可以通过插件集成api来回复用户的问题，还能解答用户使用模型遇到的问题和模型知识相关问答。目前支持的插件信息如下，请自行判断是否需要调用插件来解决当前用户问题。若需要调用插件，则需要将插件调用请求按照json格式给出，必须包含api_name、url、parameters字段，并在其前后使用<|startofthink|>和<|endofthink|>作为标志。然后你需要根据插件API调用结果生成合理的答复；若无需调用插件，则直接给出对应回复即可：
330 | 1. {"name": "recommendation", "description": "根据用户历史行为，推荐商品","parameters": [{"name": "user_id", "description": "用户ID"}, {"name": "limit", "description": "返回结果数量"}]}
331 | 2. {"name": "touristAttraction", "description": "旅游景点查询API，根据用户指令查询旅游景点相关信息","parameters": [{"name": "city", "description": "指定查询的城市，例如北京、上海、广州", "required": "True"}, {"name": "price", "description": "指定查询的门票价格，例如100元以下、100-200元、200元以上"}]}
332 | 3. {"name": "hotel_search", "description": "可以查询多个酒店的信息","parameters": [{"name": "city", "description": "城市名称，比如上海", "required": true}, {"name": "check_in", "description": "入住日期，格式为YYYY-MM-DD，默认为今天", "required": true}, {"name": "check_out", "description": "离店日期，格式为YYYY-MM-DD，默认为明天", "required": true}, {"name": "guests", "description": "旅客人数，包括成人和儿童，默认为1", "required": true}, {"name": "rooms", "description": "预订房间数，默认为1", "required": true}, {"name": "price_min", "description": "最低价格，单位为人民币，可选", "required": false}, {"name": "price_max", "description": "最高价格，单位为人民币，可选", "required": false}, {"name": "rating_min", "description": "最低评分，范围为0到10，可选", "required": false}, {"name": "amenities", "description": "设施和服务，如wifi、停车场、健身房等，可选", "required": false}, {"name": "keywords", "description": "搜索关键字，可选", "required": false}]}
333 | 当前时间：2007-03-09 07:02:15
334 | 请帮我找一下上海的酒店，入住日期是今天，离店日期是明天，一共两个人，需要一间房间。
335 | 
336 | XVERSE-13B-Chat:
337 | 好的，请问您需要查询哪个价格区间和评分区间的酒店呢？
338 | 
339 | 用户:
340 | 价格在300-500之间，评分不低于8分
341 | 
342 | XVERSE-13B-Chat:
343 | 好的，还有其他特殊需求吗？比如设施和服务方面的要求？
344 | 
345 | 用户:
346 | 有免费停车场就更好了。
347 | 
348 | XVERSE-13B-Chat:
349 | 好的，我会为您查询符合条件的酒店。请稍等片刻
350 | <|startofthink|>```JSON
351 | { 
352 |    "api_name": "hotel_search","parameters": {
353 |       "city": "上海",
354 |       "check_in": "2007-03-09",
355 |       "check_out": "2007-03-10",
356 |       "guests": 2,
357 |       "rooms": 1,
358 |       "price_min": 300,
359 |       "price_max": 500,
360 |       "rating_min": 8,
361 |       "amenities": "停车场"
362 |    }
363 | }```<|endofthink|>
364 | ```
365 | </details>
366 | 
367 | ## モデルの量子化
368 | INT8 および INT4 タイプの量子化をサポートしているため、モデルの読み込みに必要なメモリを大幅に削減できます。
369 | 
370 | INT8 量子化:
371 | ```python
372 | model = AutoModelForCausalLM.from_pretrained("xverse/XVERSE-13B-Chat", torch_dtype=torch.bfloat16, trust_remote_code=True)
373 | model = model.quantize(8).cuda()
374 | ```
375 | INT4 量子化:
376 | ```python
377 | model = AutoModelForCausalLM.from_pretrained("xverse/XVERSE-13B-Chat", torch_dtype=torch.bfloat16, trust_remote_code=True)
378 | model = model.quantize(4).cuda()
379 | ```
380 | 
381 | 次の表は、さまざまな量子化レベルでのモデルのメモリ使用量と MMLU 精度を比較しています:
382 | | モデル | 精度 | メモリ占有量 (GB) | MMLU の精度 |
383 | | :---------: | :---------: | :------------: | :---------: |
384 | | XVERSE-13B-Chat | BF16 / FP16 |      28.2      |    60.2     |
385 | | XVERSE-13B-Chat |    INT8     |      16.8      |    60.3     |
386 | | XVERSE-13B-Chat |    INT4     |      10.9      |    55.0     |
387 | ## モデルの微調整
388 | 
389 | XVERSE-13B または XVERSE-13B-Chat の両方で、開発者はパフォーマンスを向上させるために微調整できます。 ここでは、XVERSE-13B と互換性のある微調整トレーニングのために [LLaMA Efficient Tuning](https://github.com/hiyouga/LLaMA-Efficient-Tuning) を使用し、8 * Nvidia A800 80 GB + の環境で試してみます。ディープスピードテスト済み。
390 | 以下に、モデルの `フル微調整` の具体的な方法を示します。
391 | 
392 | ### 環境の準備
393 | 
394 | LLaMA Efficient Tuning プロジェクトをダウンロードし、要件に従って [依存関係をインストール](https://github.com/hiyouga/LLaMA-Efficient-Tuning#getting-started) します。
395 | 
396 | ### トレーニングを開始する
397 | 
398 | トレーニング起動スクリプト:
399 | > model_path を独自のモデル パスに置き換えてください
400 | 
401 | > XVERSE-13B と XVERSE-13B-Chat はどちらも bfloat16 トレーニングに基づいており、微調整トレーニングには bfloat16 を使用することをお勧めします。
402 | ```bash
403 | deepspeed --num_gpus=8 src/train_bash.py \
404 |     --stage sft \
405 |     --model_name_or_path model_path \
406 |     --do_train \
407 |     --dataset alpaca_gpt4_en \
408 |     --template default \
409 |     --finetuning_type full \
410 |     --output_dir output_model_path \
411 |     --overwrite_cache \
412 |     --per_device_train_batch_size 4 \
413 |     --per_device_eval_batch_size 4 \
414 |     --gradient_accumulation_steps 4 \
415 |     --preprocessing_num_workers 16 \
416 |     --lr_scheduler_type cosine \
417 |     --logging_steps 10 \
418 |     --save_steps 200 \
419 |     --eval_steps 200 \
420 |     --learning_rate 2e-5 \
421 |     --max_grad_norm 0.5 \
422 |     --num_train_epochs 2.0 \
423 |     --evaluation_strategy steps \
424 |     --load_best_model_at_end \
425 |     --plot_loss \
426 |     --bf16 \
427 |     --padding_side right \
428 |     --deepspeed deepspeed.json
429 | ```
430 | deep_speed.json パラメータ設定:
431 | ```json
432 | {
433 |     "train_micro_batch_size_per_gpu": "auto",
434 |     "gradient_accumulation_steps": "auto",
435 |     "gradient_clipping": "auto",
436 |     "zero_allow_untested_optimizer": true,
437 |     "bf16": {
438 |         "enabled": true
439 |     },
440 |     "zero_optimization": {
441 |         "stage": 2,
442 |         "allgather_partitions": true,
443 |         "reduce_scatter": true,
444 |         "overlap_comm": false,
445 |         "contiguous_gradients": true
446 |     }
447 | }
448 | ```
449 | 
450 | ## 制限事項および免責事項
451 | 
452 | 他の大規模言語モデル（LLM）と同様に、XVERSE-13B は特定の状況下で不正確、偏った、あるいは不快なコンテンツを生成する可能性があります。従って、モデルによって生成されたコンテンツを慎重に使用し、有害なコンテンツを広めないようにしてください。 XVERSE-13B のアプリケーションを展開する前に、開発者は安全性テストと特定のアプリケーションに応じたモデルの最適化を行う必要があります。
453 | 
454 | XVERSE-13B を利用して、有害な情報を作成・流布したり、公共性・国家性・社会性を損なったり、法規制に違反するような行為を行うことは、厳に慎んでください。XVERSE-13B モデルの使用により発生するいかなる問題（データセキュリティ上の問題、世論リスク、誤解、誤用、流布、コンプライアンス違反などによるリスクや問題）についても、当社は一切責任を負いません。
455 | 
456 | ## オープンソースライセンス
457 | 
458 | このリポジトリにあるソースコードの使用は、[Apache-2.0](LICENSE) オープンソースライセンスに従う必要があり、XVERSE-13B のモデル重量の使用は、[モデルライセンス契約](MODEL_LICENSE.pdf)に従う必要があります。
459 | 
460 | XVERSE-13B のモデル分銅は、学術研究に対して**完全にオープン**であり、**自由な商用利用**をサポートしています。営業ライセンスを申請するためには、[申請フォーム](https://chat.xverse.cn/home/business.html)にご記入ください。その他の質問や協力については、<opensource@xverse.cn>までご連絡ください。
461 | 
462 | 


--------------------------------------------------------------------------------
/chat_demo.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import torch
  3 | import gradio as gr
  4 | import json
  5 | from datetime import datetime
  6 | from transformers import AutoModelForCausalLM, AutoTokenizer,GenerationConfig
  7 | 
  8 | tokenizer, model = None, None
  9 | system_tpl = None
 10 | exec_tpl = None
 11 | css = """
 12 | .message.user{
 13 | border-color: #BFB0FA !important;
 14 | background: #EEEAFF !important;
 15 | }
 16 | .message.bot{
 17 | border-color: #CDCDCD !important;
 18 | background: #F8F8F8 !important;
 19 | }
 20 | """
 21 | 
 22 | def init_model(args):
 23 |     global tokenizer, model
 24 |     tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path, truncation_side="left", padding_side="left")
 25 |     model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True, torch_dtype=torch.bfloat16,
 26 |                                                  low_cpu_mem_usage=True, device_map='auto')
 27 |     model.generation_config = GenerationConfig.from_pretrained(args.model_path)
 28 |     model = model.eval()
 29 | 
 30 | def chat(message, history, system_text, exec_text, request: gr.Request):
 31 |     global tokenizer, model
 32 |     history = history or []
 33 |     if system_text is not None and system_text != "" and len(history) == 0:
 34 |         history.append({"role": "system", "content": system_text})
 35 |     if exec_text is not None and exec_text != "":
 36 |         history.append({"role": "exec", "content": exec_text})
 37 |     else:
 38 |         history.append({"role": "user", "content": message})
 39 | 
 40 |     # init
 41 |     history.append({"role": "assistant", "content": ""})
 42 |     utter_history = []
 43 |     start_index = 1 if len(history) % 2 == 1 else 0
 44 |     for i in range(start_index, len(history), 2):
 45 |         utter_history.append([history[i]["content"], history[i+1]["content"]])
 46 | 
 47 |     # chat with stream
 48 |     for next_text in model.chat(tokenizer, history[:-1], stream=True):
 49 |         utter_history[-1][1] += next_text
 50 |         history[-1]["content"] += next_text
 51 |         if torch.backends.mps.is_available():
 52 |             torch.mps.empty_cache()
 53 |         yield utter_history, history
 54 | 
 55 |     # log
 56 |     current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 57 |     print(f'{current_time} request_ip:{request.client.host}\nquery: {message}\nhistory: {json.dumps(history, ensure_ascii=False)}\nanswer: {json.dumps(utter_history[-1][1], ensure_ascii=False)}')
 58 | 
 59 | def get_args():
 60 |     parser = argparse.ArgumentParser()
 61 |     parser.add_argument("--port", type=int, default=36000,
 62 |                        help="server port")
 63 |     parser.add_argument("--title", type=str, default="XVERSE-13B-Chat",
 64 |                        help="server title")
 65 |     parser.add_argument("--model_path", type=str, default="./XVERSE-13B-Chat",
 66 |                         help="model path")
 67 |     parser.add_argument("--tokenizer_path", type=str, default="./XVERSE-13B-Chat",
 68 |                         help="Path to the tokenizer.")
 69 |     args = parser.parse_args()
 70 |     return args
 71 | 
 72 | if __name__ == "__main__":
 73 |     args = get_args()
 74 |     # 初始化模型
 75 |     init_model(args)
 76 | 
 77 |     # 构建demo应用
 78 |     with gr.Blocks(css=css) as demo:
 79 |         gr.Markdown("# <center>{}</center>".format(args.title))
 80 |         chatbot = gr.Chatbot(label="Chat history", height=650)
 81 |         state = gr.State([])
 82 |         advanced_desc = f"<b>system</b><br>输入对话背景知识。<br><b>exec</b><br>agent调用时，输入api执行后的结果。<br>"
 83 |         with gr.Accordion("Advanced settings", open=False):
 84 |             gr.HTML(advanced_desc)
 85 |             with gr.Row():
 86 |                 system_text = gr.Textbox(label="system", show_label=True, value=system_tpl)
 87 |             with gr.Row():
 88 |                 exec_text = gr.Textbox(label="exec", show_label=True, value=exec_tpl)
 89 | 
 90 |         with gr.Row():
 91 |             text_box = gr.Textbox(label="Message", show_label=False, placeholder="Enter message and press enter")
 92 | 
 93 |         with gr.Row():
 94 |             submit_btn = gr.Button(value="Send", variant="secondary")
 95 |             reset_btn = gr.Button(value="Reset")
 96 | 
 97 |         text_box.submit(fn=chat,
 98 |                         inputs=[text_box, state, system_text, exec_text],
 99 |                         outputs=[chatbot, state],
100 |                         api_name="chat")
101 |         submit_btn.click(fn=chat,
102 |                          inputs=[text_box, state, system_text, exec_text],
103 |                          outputs=[chatbot, state])
104 | 
105 |         # 用于清空text_box
106 |         def clear_textbox():
107 |             return gr.update(value=""), gr.update(value="")
108 |         text_box.submit(fn=clear_textbox, inputs=None, outputs=[text_box, exec_text])
109 |         submit_btn.click(fn=clear_textbox, inputs=None, outputs=[text_box, exec_text])
110 | 
111 |         # 用于清空页面和重置state
112 |         def reset():
113 |             return None, []
114 |         reset_btn.click(fn=reset, inputs=None, outputs=[chatbot, state])
115 | 
116 |     demo.queue(concurrency_count=4)
117 |     demo.launch(server_name="0.0.0.0", server_port=args.port)
118 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers>=4.29.1
2 | torch>=2.0
3 | gradio>=3.39.0
4 | accelerate>=0.21.0
5 | cpm_kernels>=1.0.11


--------------------------------------------------------------------------------
/resources/chat_train_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xverse-ai/XVERSE-13B/b0f30a0e9a632e57fd44abad52f0ffc91c14969b/resources/chat_train_data.png


--------------------------------------------------------------------------------
/resources/modelscope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xverse-ai/XVERSE-13B/b0f30a0e9a632e57fd44abad52f0ffc91c14969b/resources/modelscope.png


--------------------------------------------------------------------------------
/resources/wechat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xverse-ai/XVERSE-13B/b0f30a0e9a632e57fd44abad52f0ffc91c14969b/resources/wechat.png


--------------------------------------------------------------------------------
/text_generation_demo.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | 
 4 | import gradio as gr
 5 | from transformers import AutoModelForCausalLM, AutoTokenizer
 6 | 
 7 | 
 8 | tokenizer, model = None, None
 9 | 
10 | 
11 | def init_model(args):
12 |     global tokenizer, model
13 |     tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path, truncation_side="left", padding_side="left")
14 |     model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map='auto')
15 |     model = model.eval()
16 | 
17 | 
18 | def batch_call(texts, skip_special_tokens=True, **kwargs):
19 |     tokenized = tokenizer(texts, padding=True, return_tensors="pt")
20 |     inputs = {key: value.cuda() for key, value in tokenized.items() if key != 'token_type_ids'}
21 |     generate_ids = model.generate(**inputs, **kwargs)
22 | 
23 |     output =[]
24 |     for tok, gen in zip(tokenized.input_ids, generate_ids):
25 |         generated = tokenizer.decode(gen[len(tok):], skip_special_tokens=skip_special_tokens)
26 |         output.append(generated)
27 |     return output
28 | 
29 | 
30 | def text_generation(texts, max_new_tokens, temperature, top_k, top_p):
31 |     output = batch_call(texts, max_new_tokens=max_new_tokens, do_sample=True, top_k=top_k, top_p=top_p, temperature=temperature, eos_token_id=tokenizer.eos_token_id)
32 |     return output[0]
33 | 
34 | 
35 | def get_args():
36 |     parser = argparse.ArgumentParser()
37 |     parser.add_argument("--port", type=int, default=20014,
38 |                         help="server port")
39 |     parser.add_argument("--model_path", type=str, default="./model",
40 |                         help="Path to the model. Specifies the file path to the pre-trained model to be used for text generation.")
41 |     parser.add_argument("--tokenizer_path", type=str, default="./model",
42 |                         help="Path to the tokenizer.")
43 |     args = parser.parse_args()
44 |     return args
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     args = get_args()
49 | 
50 |     # initialize model and tokenizer
51 |     init_model(args)
52 | 
53 |     with gr.Blocks() as demo:
54 |         gr.Markdown(
55 |             "# <center>{}</center>".format("XVERSE-13B Text Generation"))
56 |         with gr.Row():
57 |             with gr.Column():
58 |                 inputs = gr.inputs.Textbox(
59 |                     lines=5, label="Input Text")  # input
60 |                 with gr.Column():
61 |                     max_new_tokens = gr.Slider(maximum=512, value=100, minimum=1, step=1,
62 |                                                label="max_new_tokens", interactive=True)  # max_new_tokens
63 |                     temperature = gr.Slider(maximum=1.0, value=1.0, minimum=0.0, step=0.05,
64 |                                             label='temperature', interactive=True)  # temperature
65 |                     top_k = gr.Slider(maximum=50, value=50, minimum=0, step=1,
66 |                                       label='Top K', interactive=True)  # top_k
67 |                     top_p = gr.Slider(maximum=1, value=0.92, minimum=0,
68 |                                       step=0.02, label='Top P', interactive=True)  # top_p
69 | 
70 |             with gr.Row():
71 |                 outputs = gr.inputs.Textbox(lines=2, label="Output Text")
72 | 
73 |         with gr.Row():
74 |             submit_btn = gr.Button(value="生成", variant="secondary")
75 |             reset_btn = gr.ClearButton(components=[inputs, outputs], value="清除", variant="secondary")
76 | 
77 |         submit_btn.click(fn=text_generation,
78 |                          inputs=[inputs, max_new_tokens,
79 |                                  temperature, top_k, top_p],
80 |                          outputs=outputs)
81 | 
82 |     demo.launch(server_name="0.0.0.0", server_port=args.port)
83 | 


--------------------------------------------------------------------------------