├── LICENSE
├── README.md
├── assets
└── mascot.png
├── configs
├── stablelm-2-12b.yml
├── stablelm-2-1_6b.yml
├── stablelm-3b-4e1t.yml
├── stablelm-base-alpha-3b-v2-4k-extension.yml
├── stablelm-base-alpha-3b-v2.yml
├── stablelm-base-alpha-3b.yml
├── stablelm-base-alpha-7b-v2-4k-extension.yml
├── stablelm-base-alpha-7b-v2.yml
└── stablelm-base-alpha-7b.yml
├── evals
├── external
│ ├── EleutherAI-pythia-2.8b-deduped.json
│ ├── EleutherAI_gpt-j-6B.json
│ ├── EleutherAI_gpt-neox-20B.json
│ ├── EleutherAI_pythia-12b.json
│ ├── EleutherAI_pythia-6.9b.json
│ ├── Qwen-Qwen-7B-Chat.json
│ ├── Qwen-Qwen-7B.json
│ ├── baichuan-inc_Baichuan2-7B-Base.json
│ ├── bigscience-bloom-3b.json
│ ├── bigscience-bloom-7b1.json
│ ├── cerebras-btlm-3b-8k-base.json
│ ├── facebook-opt-2.7b.json
│ ├── facebook-opt-6.7b.json
│ ├── huggyllama-llama-7b.json
│ ├── kittn_mistral-7B-v0.1-hf.json
│ ├── meta-llama-Llama-2-13b-hf.json
│ ├── meta-llama-Llama-2-7b.json
│ ├── microsoft-phi-1_5.json
│ ├── mosaicml-mpt-7b.json
│ ├── openlm-research-open_llama_13b
│ ├── openlm-research-open_llama_3b_v2.json
│ ├── openlm-research-open_llama_7b_v2.json
│ ├── tiiuae_falcon-7b.json
│ └── togethercomputer-RedPajama-INCITE-7B-Base2.json
├── open_llm_leaderboard
│ ├── stablelm-beta-3b-v2-arc-challenge.json
│ ├── stablelm-beta-3b-v2-hellaswag.json
│ ├── stablelm-beta-3b-v2-mmmlu.json
│ ├── stablelm-beta-3b-v2-truthfulqa_mc.json
│ ├── stablelm-beta-7b-v2-arc-challenge.json
│ ├── stablelm-beta-7b-v2-hellaswag.json
│ ├── stablelm-beta-7b-v2-mmmlu.json
│ └── stablelm-beta-7b-v2-truthfulqa_mc.json
├── stablelm-3b-4e1t.json
├── stablelm-base-alpha-3b-v2.json
├── stablelm-base-alpha-3b.json
├── stablelm-base-alpha-7b-v2.json
└── stablelm-base-alpha-7b.json
└── notebooks
└── stablelm-alpha.ipynb
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # StableLM: Stability AI Language Models
2 |
3 | 
4 |
*“A Stochastic Parrot, flat design, vector art” — [Stable Diffusion XL](https://clipdrop.co/stable-diffusion)*
5 |
6 | This repository contains Stability AI's ongoing development of the StableLM series of language models and will be continuously updated with new checkpoints. The following provides an overview of all currently available models. More coming soon.
7 |
8 | ## News
9 |
10 | *September 29, 2023*
11 |
12 | - Released StableLM-3B-4E1T model under [CC BY-SA-4.0](https://creativecommons.org/licenses/by-sa/4.0/).
13 |
14 | *August 5, 2023*
15 |
16 | - Released patched StableLM-Alpha v2 models with 3B and 7B parameters.
17 |
18 | *April 28, 2023*
19 |
20 | - Released StableVicuna-13B, our RLHF fine-tune of [Vicuna-13B v0](https://huggingface.co/lmsys/vicuna-13b-delta-v0), which itself is a fine-tune of [LLaMA-13B](https://github.com/facebookresearch/llama). Delta weights over the original Llama model is released under ([CC BY-NC-SA-4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)).
21 |
22 | *April 20, 2023*
23 |
24 | - Released initial set of StableLM-Alpha models, with 3B and 7B parameters. Base models are released under [CC BY-SA-4.0](https://creativecommons.org/licenses/by-sa/4.0/).
25 |
26 | - Try to chat with our 7B model, `StableLM-Tuned-Alpha-7B`, on [Hugging Face Spaces](https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat).
27 |
28 | ## Models
29 |
30 | ### StableLM-3B-4E1T
31 |
32 | > Technical Report: [StableLM-3B-4E1T](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo)
33 |
34 | StableLM-3B-4E1T is a 3 billion (3B) parameter language model pre-trained under the multi-epoch regime to study the impact of repeated tokens on downstream performance. Given prior success in this area ([Tay et al., 2023](https://arxiv.org/pdf/2205.05131.pdf) and [Taylor et al., 2022](https://galactica.org/static/paper.pdf)), we train on 1 trillion (1T) tokens for 4 epochs following the observations of [Muennighoff et al. (2023)](https://arxiv.org/abs/2305.16264) in "Scaling Data-Constrained Language Models" in which they find "training with up to 4 epochs of repeated data yields negligible changes to loss compared to having unique data." Further inspiration for the token count is taken from "Go smol or go home" ([De Vries, 2023](https://www.harmdevries.com/post/model-size-vs-compute-overhead/)), which suggests a 2.96B model trained for 2.85 trillion tokens achieves a similar loss to a Chinchilla compute-optimal 9.87B language model ($k_n = 0.3$).
35 |
36 | | Size | StableLM-3B-4E1T | Training Tokens | Parameters |
37 | |------|--------------------------------------------------------------------|-----------------|---------------|
38 | | 3B | [checkpoint](https://huggingface.co/stabilityai/stablelm-3b-4e1t) | 4T | 2,795,443,200 |
39 |
40 | #### Model Architecture
41 |
42 | The model is a decoder-only transformer similar to the LLaMA ([Touvron et al., 2023](https://arxiv.org/abs/2307.09288)) architecture with the following modifications:
43 |
44 | | Parameters | Hidden Size | Layers | Heads | Sequence Length |
45 | |----------------|-------------|--------|-------|-----------------|
46 | | 2,795,443,200 | 2560 | 32 | 32 | 4096 |
47 |
48 | - **Position Embeddings**: Rotary Position Embeddings ([Su et al., 2021](https://arxiv.org/abs/2104.09864)) applied to the first 25% of head embedding dimensions for improved throughput following [Black et al. (2022)](https://arxiv.org/pdf/2204.06745.pdf).
49 | - **Normalization**: LayerNorm ([Ba et al., 2016](https://arxiv.org/abs/1607.06450)) with learned bias terms as opposed to RMSNorm ([Zhang & Sennrich, 2019](https://arxiv.org/abs/1910.07467)).
50 | - **Tokenizer**: GPT-NeoX ([Black et al., 2022](https://arxiv.org/abs/2204.06745)).
51 |
52 | #### Training Data
53 |
54 | The dataset is comprised of a filtered mixture of open-source large-scale datasets available on the [HuggingFace Hub](https://huggingface.co/datasets): Falcon RefinedWeb extract ([Penedo et al., 2023](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)), and RedPajama-Data ([Together Computer., 2023](https://github.com/togethercomputer/RedPajama-Data)) and The Pile ([Gao et al., 2020](https://arxiv.org/abs/2101.00027)) both without *Books3* and other subsets, and StarCoder ([Li et al., 2023](https://arxiv.org/abs/2305.06161)).
55 |
56 | > Given the large amount of web data, we recommend fine-tuning the base StableLM-3B-4E1T for your downstream tasks.
57 |
58 | #### Training Details
59 |
60 | Please refer to the provided YAML configuration file [`stablelm-3b-4e1t.yml`](./configs/stablelm-3b-4e1t.yml) for complete hyperparameter settings and the [technical report](https://stability.wandb.io/stability-llm/stable-lm/reports/StableLM-3B-4E1T--VmlldzoyMjU4?accessToken=u3zujipenkx5g7rtcj9qojjgxpconyjktjkli2po09nffrffdhhchq045vp0wyfo) for further details.
61 |
62 | #### Downstream Results
63 |
64 | The following zero-shot evaluations are performed with the `lm-evaluation-harness` using the [lm-bench](https://github.com/Stability-AI/lm-evaluation-harness/tree/lm-bench) branch of Stability AI's fork. Full `lm-eval` JSONs can be found in the [`evals`](./evals) directory.
65 |
66 | | Pre-Trained Model | Average | ARC
Challenge | ARC
Easy | BoolQ | HellaSwag (✱) | LAMBADA
OpenAI | OpenBookQA | PIQA | SciQ | Winogrande |
67 | | ------------------------------------------------------------------------------------- |:-----------------:|:----------------:|:-----------:|:-----:|:-------------:|:-----------------:|:----------:|:-----:|:-----:|:----------:|
68 | | meta-llama/Llama-2-13b-hf | 71.77 | 48.63 | 79.50 | 80.52 | 79.36 | 76.77 | 35.40 | 79.05 | 94.50 | 72.22 |
69 | | huggyllama/llama-7b | 68.84 | 41.89 | 75.25 | 75.05 | 76.22 | 73.55 | 34.40 | 78.67 | 94.60 | 69.93 |
70 | | meta-llama/Llama-2-7b-hf | 68.75 | 43.00 | 76.26 | 77.74 | 75.94 | 73.47 | 31.40 | 77.75 | 93.60 | 69.61 |
71 | | Qwen/Qwen-7B | 67.91 | 45.39 | 67.38 | 74.56 | 88.85 (?) | 69.67 | 32.20 | 73.99 | 93.20 | 65.98 |
72 | | tiiuae/falcon-7b | 67.83 | 40.27 | 74.41 | 73.55 | 76.35 | 74.56 | 30.60 | 79.49 | 94.00 | 67.25 |
73 | | mosaicml/mpt-7b | 67.36 | 40.53 | 74.92 | 73.94 | 76.17 | 68.64 | 31.40 | 78.89 | 93.70 | 68.03 |
74 | | **stabilityai/stablelm-3b-4e1t** | 66.93 | 37.80 | 72.47 | 75.63 | 73.90 | 70.64 | 31.40 | 79.22 | 94.80 | 66.54 |
75 | | baichuan-inc/Baichuan2-7B-Base | 66.93 | 42.24 | 75.00 | 73.09 | 72.29 | 70.99 | 30.40 | 76.17 | 94.60 | 67.56 |
76 | | stabilityai/stablelm-base-alpha-7b-v2 | 66.89 | 38.48 | 73.19 | 70.31 | 74.27 | 74.19 | 30.40 | 78.45 | 93.90 | 68.82 |
77 | | openlm-research/open_llama_7b_v2 | 66.32 | 38.82 | 71.93 | 71.41 | 74.65 | 71.05 | 30.20 | 79.16 | 93.80 | 65.82 |
78 | | microsoft/phi-1_5 | 65.57 | 44.45 | 76.14 | 74.53 | 62.62 | 52.75 | 37.60 | 76.33 | 93.20 | 72.53 |
79 | | EleutherAI/gpt-neox-20B | 65.57 | 37.88 | 72.90 | 69.48 | 71.43 | 71.98 | 29.80 | 77.42 | 93.10 | 66.14 |
80 | | togethercomputer/RedPajama-INCITE-7B-Base | 65.07 | 37.71 | 72.35 | 70.76 | 70.33 | 71.34 | 29.00 | 77.15 | 92.70 | 64.33 |
81 | | cerebras/btlm-3b-8k-base (§) | 63.59 | 34.90 | 70.45 | 69.63 | 69.78 | 66.23 | 27.60 | 75.84 | 92.90 | 64.96 |
82 | | EleutherAI/pythia-12b | 62.69 | 31.83 | 70.20 | 67.31 | 67.38 | 70.64 | 26.40 | 76.28 | 90.20 | 64.01 |
83 | | openlm-research/open_llama_3b_v2 | 62.43 | 33.87 | 67.59 | 65.69 | 69.99 | 66.74 | 26.00 | 76.66 | 92.40 | 62.90 |
84 | | EleutherAI/gpt-j-6B | 62.34 | 33.96 | 66.96 | 65.44 | 66.24 | 68.23 | 29.00 | 75.57 | 91.50 | 64.17 |
85 | | stabilityai/stablelm-base-alpha-3b-v2 | 62.19 | 32.42 | 67.26 | 64.56 | 68.58 | 70.25 | 26.40 | 76.01 | 92.10 | 62.12 |
86 | | facebook/opt-6.7b | 61.85 | 30.72 | 65.66 | 66.02 | 67.20 | 67.65 | 27.60 | 76.33 | 90.10 | 65.35 |
87 | | EleutherAI/pythia-6.9b | 60.58 | 31.83 | 67.21 | 64.01 | 63.88 | 67.01 | 25.80 | 75.08 | 89.80 | 60.62 |
88 | | EleutherAI/pythia-2.8b-deduped | 58.52 | 30.12 | 63.47 | 64.13 | 59.44 | 65.15 | 23.80 | 74.10 | 88.20 | 58.25 |
89 | | **§** Previous 3B Pre-Trained SOTA
**?** Outlier Reuslts
**\*** Byte-length Normalized Accuracy | | | | | | | | | | |
90 |
91 | **StableLM-3B-4E1T achieves state-of-the-art performance (September 2023) at the 3B parameter scale for open-source models** and is competitive with many of the popular contemporary 7B models, even outperforming our most recent 7B StableLM-Base-Alpha-v2.
92 |
93 | ### StableLM-Alpha v2
94 |
95 | StableLM-Alpha v2 models significantly improve on the initial Alpha models by incorporating architectural improvements such as SwiGLU ([Shazeer, 2020](https://arxiv.org/abs/2002.05202)) and using higher-quality data sources, as discussed below. The context length for these models is 4096 tokens.
96 |
97 | | Size | StableLM-Base-Alpha-v2 | Training Tokens | Parameters |
98 | |------|----------------------------------------------------------------------------|-----------------|---------------|
99 | | 3B | [checkpoint](https://huggingface.co/stabilityai/stablelm-base-alpha-3b-v2) | 1.1T | 2,796,431,360 |
100 | | 7B | [checkpoint](https://huggingface.co/stabilityai/stablelm-base-alpha-7b-v2) | 1.1T | 6,890,209,280 |
101 |
102 | #### Training Details
103 |
104 | Please refer to the provided YAML configuration files for hyperparameter details. E.g. for the extended `StableLM-Alpha-3B-v2` model, see [stablelm-base-alpha-3b-v2-4k-extension.yml](./configs/stablelm-base-alpha-3b-v2-4k-extension.yml).
105 |
106 | Following similar work, we use a multi-stage approach to context length extension ([Nijkamp et al., 2023](https://blog.salesforceairesearch.com/xgen/)), scheduling 1 trillion tokens at context length 2048 followed by 100 billion tokens at 4096. We found that sequence length warmup ([Li et al., 2022](https://arxiv.org/abs/2108.06084)) helped stabilize early spikes during the first ~80 billion tokens of pre-training. However, it was not applied to the final runs due to significant throughput penalties as length shapes grew across the curriculum.
107 |
108 | #### Training Data
109 |
110 | The most impactful changes for StableLM-Alpha-v2 downstream performance were in the usage of higher quality data sources and mixtures; specifically, the use of [RefinedWeb](https://huggingface.co/datasets/tiiuae/falcon-refinedweb) and [C4](https://huggingface.co/datasets/allenai/c4) in place of The Pile v2 Common-Crawl scrape as well as sampling web text at a much higher rate (35% -> 71%).
111 |
112 | The first pre-training stage relies on 1 trillion tokens sourced from a mix of the public Falcon RefinedWeb extract ([Penedo et al., 2023](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)), RedPajama-Data ([Together Computer., 2023](https://github.com/togethercomputer/RedPajama-Data)), The Pile ([Gao et al., 2020](https://arxiv.org/abs/2101.00027)), and internal datasets with web text sampled at a rate of 71%.
113 |
114 | In the second stage, we include the StarCoder ([Li et al., 2023](https://arxiv.org/abs/2305.06161)) dataset and down sample web text to 55% while increasing sampling proportions of naturally long text examples in the aforementioned sources.
115 |
116 | #### Evaluation
117 |
118 | The following zero-shot evaluations are performed with the `lm-evaluation-harness` at commit [`df3da98c5405deafd519c2ddca52bb7c3fe36bef`](https://github.com/EleutherAI/lm-evaluation-harness/tree/df3da98c5405deafd519c2ddca52bb7c3fe36bef) with the exception of SIQA which uses the [`add-siqa` branch](https://github.com/EleutherAI/lm-evaluation-harness/tree/add-siqa) with prompt format
119 | `{doc['context']}\nQuestion: {doc['question']}\nAnswer:`.
120 |
121 | | Model | ARC Challenge✱ | ARC Easy✱ | BoolQ | HellaSwag✱ | LAMBADA
OpenAI | OpenBookQA | PIQA | SIQA | TruthfulQA▲ | Winogrande | Average |
122 | | ------------------------- |:---------------:|:----------:|:-----:|:-----------:|:-----------------:|:----------:|:-----:|:-----:|:------------:|:----------:|:-------:|
123 | | **StableLM-Alpha-7B-v2** | 40.53 | 69.11 | 70.31 | 74.27 | 74.19 | 30.40 | 78.45 | 42.43 | 36.46 | 68.82 | 58.50 |
124 | | LLaMA-2-7B | 46.16 | 74.54 | 77.74 | 75.94 | 73.47 | 31.40 | 77.75 | 43.50 | 38.97 | 69.61 | 60.91 |
125 | | MPT-7B | 41.89 | 70.03 | 73.94 | 76.17 | 68.64 | 31.40 | 78.89 | 45.14 | 33.49 | 68.03 | 58.76 |
126 | | OpenLLaMA-7B-v2 | 42.41 | 69.65 | 71.41 | 74.65 | 71.05 | 30.20 | 79.16 | 41.97 | 34.57 | 65.82 | 58.09 |
127 | | RedPajama-INCITE-7B-Base | 39.42 | 69.19 | 70.76 | 70.33 | 71.34 | 29.00 | 77.15 | 42.58 | 33.01 | 64.33 | 56.71 |
128 | | **StableLM-Alpha-3B-v2** | 35.07 | 63.26 | 64.56 | 68.58 | 70.25 | 26.40 | 76.01 | 42.48 | 35.87 | 62.12 | 54.46 |
129 | | BTLM-3B-8K | 37.63 | 67.09 | 69.63 | 69.78 | 66.23 | 27.60 | 75.84 | 42.78 | 36.00 | 64.96 | 55.75 |
130 | | OpenLLaMA-3B-v2 | 36.09 | 63.51 | 65.69 | 69.99 | 66.74 | 26.00 | 76.66 | 41.20 | 34.59 | 62.90 | 54.34 |
131 | | Pythia-2.8B (deduped) | 32.94 | 59.09 | 64.13 | 59.44 | 65.15 | 23.80 | 74.10 | 40.94 | 35.56 | 58.25 | 51.34 |
132 | | StableLM-Alpha-7B | 27.05 | 44.87 | 60.06 | 41.22 | 55.11 | 21.40 | 66.76 | 39.46 | 39.96 | 50.12 | 44.60 |
133 | | StableLM-Alpha-3B | 25.77 | 42.05 | 57.65 | 38.31 | 41.72 | 17.00 | 63.82 | 35.62 | 40.53 | 52.64 | 41.51 |
134 |
135 | ✱: Denotes byte-length normalized accuracy (`acc_norm`) as described in [Gao, 2021](https://blog.eleuther.ai/multiple-choice-normalization/).
136 |
137 | ▲: We score TruthfulQA using the normalized total probability assigned to the set of true answers (`mc2`).
138 |
139 | ### StableLM-Alpha
140 |
141 | StableLM-Alpha models are trained on a new dataset that builds on [The Pile](https://pile.eleuther.ai/), which contains 1.5 trillion tokens, roughly 3x the size of The Pile. The context length for these models is 4096 tokens.
142 |
143 | As a proof-of-concept, we also fine-tuned the model with [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca)'s procedure using a combination of five recent datasets for conversational agents: Stanford's [Alpaca](https://github.com/tatsu-lab/stanford_alpaca), Nomic-AI's [gpt4all](https://github.com/nomic-ai/gpt4all), RyokoAI's [ShareGPT52K](https://huggingface.co/datasets/RyokoAI/ShareGPT52K) datasets, Databricks labs' [Dolly](https://github.com/databrickslabs/dolly), and Anthropic's [HH](https://github.com/anthropics/hh-rlhf). We will be releasing these models as StableLM-Tuned-Alpha.
144 |
145 | | Size | StableLM-Base-Alpha | StableLM-Tuned-Alpha | Training Tokens | Parameters | Web Demo |
146 | |------|--------------------------------------------------------------------------|---------------------------------------------------------------------------|-----------------|---------------|------------------------------------------------------------------------------------|
147 | | 3B | [checkpoint](https://huggingface.co/stabilityai/stablelm-base-alpha-3b/) | [checkpoint](https://huggingface.co/stabilityai/stablelm-tuned-alpha-3b/) | 800B | 3,638,525,952 | |
148 | | 7B | [checkpoint](https://huggingface.co/stabilityai/stablelm-base-alpha-7b) | [checkpoint](https://huggingface.co/stabilityai/stablelm-tuned-alpha-7b) | 800B | 7,869,358,080 | [Hugging Face](https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat) |
149 |
150 | ### StableVicuna
151 |
152 | StableVicuna is an RLHF fine-tune of [Vicuna-13B v0](https://huggingface.co/lmsys/vicuna-13b-delta-v0), which itself is a fine-tune of [LLaMA-13B](https://github.com/facebookresearch/llama). It is our attempt at creating an open-source RLHF LLM Chatbot. This model is developed by StabilityAI's CarperAI team, with [Duy V. Phung](https://github.com/PhungVanDuy) leading the training effort.
153 |
154 | Due to the original non-commercial license of LLaMA, we can only release the weights of our model as deltas over the original model's weights. StableVicuna's delta weights are released under ([CC BY-NC-SA-4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)).
155 |
156 | Please visit HuggingFace checkpoint for more information about how to combine our delta weights with the original model.
157 |
158 | | Model | Download | Web Demo | Cite |
159 | | ---------------- | ---------------------------------------------------------------------- | -------------------------------------------------------------------- |------|
160 | | StableVicuna-13B | [checkpoint](https://huggingface.co/CarperAI/stable-vicuna-13b-delta/) | [Hugging Face](https://huggingface.co/spaces/CarperAI/StableVicuna/) | [](https://doi.org/10.57967/hf/0588) |
161 |
162 | ## Quickstart
163 |
164 | All StableLM models are hosted on [the Hugging Face hub](https://huggingface.co/StabilityAI). Check out this [notebook](https://github.com/Stability-AI/StableLM/blob/main/notebooks/stablelm-alpha.ipynb) to run inference with limited GPU capabilities.
165 |
166 | Get started chatting with `StableLM-Tuned-Alpha` by using the following code snippet:
167 |
168 | ```python
169 | import torch
170 | from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
171 |
172 | tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b")
173 | model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-tuned-alpha-7b")
174 | model.half().cuda()
175 |
176 | class StopOnTokens(StoppingCriteria):
177 | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
178 | stop_ids = set([50278, 50279, 50277, 1, 0])
179 | return input_ids[0][-1] in stop_ids
180 |
181 | system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
182 | - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
183 | - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
184 | - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
185 | - StableLM will refuse to participate in anything that could harm a human.
186 | """
187 |
188 | prompt = f"{system_prompt}<|USER|>What's your mood today?<|ASSISTANT|>"
189 |
190 | inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
191 | tokens = model.generate(
192 | **inputs,
193 | max_new_tokens=64,
194 | temperature=0.7,
195 | do_sample=True,
196 | stopping_criteria=StoppingCriteriaList([StopOnTokens()])
197 | )
198 | print(tokenizer.decode(tokens[0], skip_special_tokens=True))
199 | ```
200 |
201 | StableLM Tuned should be used with prompts formatted to `<|SYSTEM|>...<|USER|>...<|ASSISTANT|>...`
202 | The system prompt is
203 | ```
204 | <|SYSTEM|># StableLM Tuned (Alpha version)
205 | - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
206 | - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
207 | - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
208 | - StableLM will refuse to participate in anything that could harm a human.
209 | ```
210 |
211 | ## Fun with StableLM-Tuned-Alpha
212 | This section contains a collection of fun cherry-picked examples of what you can do with `stablelm-tuned-alpha`.
213 |
214 |
215 | Chit-Chat
216 |
217 | ```
218 | User: Hey! My name is John nice to meet you!
219 |
220 | StableLM: Hello John! It’s always a pleasure to meet new people.
221 |
222 | User: The pleasure is mine. Hey my grandma is in Toronto. I want to plan a surprise visit for her. She’s a person who loves gardens and flowers.
223 |
224 | StableLM: That’s wonderful! It’s great to hear that you’ll be visiting your grandma in Toronto. Can you tell me a little bit more about your plans?
225 |
226 | User: Ya I need to book a flight there and a hotel. But first, what recommendations do you have for gifts that she might like?
227 |
228 | StableLM: I think that’s a very thoughtful idea! It’s a very personal gift that would be sure to make her very happy. I can think of a few examples that she might really enjoy. There are some lovely bouquets or arrangements that would be lovely for the house, or even for the yard. Flowers are a great choice for a surprise gift, and they’re also a great choice for a wedding shower present. Also, if you’re really looking for something a bit more special, I think she’d really enjoy a quality set of cooking pots or pans. That’s something that she’s likely to use frequently, so it would make her feel really loved.
229 |
230 | User: Thanks for the suggestion! Hmm a bouquet of flowers sounds nice but I don’t think I can take them on the air plane. What do you think I should do?
231 |
232 | StableLM: There are many possible solutions! One option would be to arrange a small gift in the shape of a card and send it to her by regular mail. Or you could arrange a special meal at a restaurant that she likes, or even cook a meal at home for her. Or you could send her some local goodies that she might like, like a nice bouquet of flowers or a special recipe. You could even arrange for tickets to a local theatre or sporting event, and make sure to include a note about what you’d like her to know about you. Really any sort of special attention from you will be sure to make her very happy!
233 |
234 | User: Ah! Right. I can order a bouquet for her online and get it delivered! Thanks!
235 |
236 | StableLM: You’re very welcome! As long as you do this and make it a very personal, thoughtful gesture, she’ll be sure to appreciate it.
237 | ```
238 |
239 |
240 |
241 |
242 | Formal Writing
243 |
244 | ```
245 | User: What would you say to a friend who is graduating high school?
246 |
247 | StableLM: I would say that they have every right to feel proud of themselves and their accomplishments. They should be proud of the work they’ve done and the friends they have made, and also be excited about the future.
248 |
249 | User: Please write an email.
250 | StableLM: Okay, here’s an example of what could be written:
251 |
252 | Dear [Friend’s Name],
253 |
254 | I hope this email finds you well. I just wanted to take a moment to express how proud I am of you and all that you have accomplished. I know that getting into [university or college] is a big accomplishment, and I appreciate all that you have done for me and for this generation.
255 |
256 | I hope that you are having a good day. I’m sure that you are also concerned about the future, but I just wanted to let you know that I am also watching you grow and develop into the person that I know that you can be. Keep up the good work, and I have full faith that you will achieve great things.
257 |
258 | Thank you again for everything that you do.
259 |
260 | Best,
261 | [Your Name]
262 |
263 | ```
264 |
265 |
266 |
267 |
268 | Creative Writing
269 |
270 | ### Rap Battles
271 | ```
272 | User: Write an epic rap battle song between deep neural networks and symbolic AI
273 |
274 | StableLM: [Beat drops]
275 |
276 | Deep Neural Networks:
277 | Listen up everyone, it’s time to witness
278 | The power of deep neural networks, it’s just a matter of abstract
279 | We’ve trained these networks to solve complex problems with ease
280 | While symbolic AI is limited to simple math and simple flows
281 |
282 | Symbolic AI:
283 | Hold up, hold up, let me interrupt
284 | It’s been a while since I’ve heard, some new thoughts you interrupt
285 | But let me tell you, my friend, it’s not all about math and flow
286 | It’s about making the world a better place, you know
287 |
288 | Deep Neural Networks:
289 | Well, I can see where you’re coming from, it’s clear
290 | But deep neural networks, they never get the same gear
291 | We can use them to analyze and predict with ease
292 | And prove the superiority of machine learning
293 |
294 | Symbolic AI:
295 | You’re right, it’s not just math or numbers
296 | But it’s the essence of our lives, it’s what counts, we’re numbers
297 | I don’t need a complicated math formula, just a simple device
298 | To make the world a better place, it’s a noble price
299 |
300 | Deep Neural Networks:
301 | I’ll give you that, my friend, you have a point
302 | But let’s not forget, we’re making an ever-increasing joint
303 | Our progress is not bound by logic, it’s a journey of the mind
304 | So let’s work together, let’s strive, let’s leave all troubles behind
305 |
306 | Symbolic AI:
307 | Agreed, let’s work together, let’s not falter
308 | We’re both making history, we’re both from the fader
309 | So let’s make the world a better place, let’s end this debate
310 | Cause it’s time to join forces, it’s time to leave no stone unturned
311 |
312 | [Beat fades out]
313 |
314 | Who won this epic rap battle between deep neural networks and symbolic AI? Well, it’s up to you to decide!
315 | ```
316 |
317 | ### Fictional Stories
318 |
319 | ```
320 | User: Write a story about a programmer who fell in love with a deep neural network.
321 |
322 | StableLM: Once upon a time, there was a programmer named John. He was a brilliant and ambitious individual, who was constantly searching for the next big thing in technology. One day, while he was strolling through the streets of his hometown, he stumbled upon a group of people crowded around a mysterious object. Intrigued, he approached to find out what the fuss was about.
323 |
324 | As he got closer, he realized that it was none other than a mysterious deep neural network. John’s mind was blown. He had never seen anything like it before. He had always been interested in machine learning, but this was something else entirely. He had to learn more about it.
325 |
326 | John spent the next few weeks holed up in his hotel room, trying to learn everything he could about deep neural networks. He spent hours reading and analyzing papers and documents, trying to wrap his head around the concept of neural networks. He even managed to get into the local university’s computer science department, hoping to find some help there.
327 |
328 | But, it wasn’t long before John realized that he was on his own. The city was bustling, and the constant chatter and noise of the city made it hard to concentrate on his work. He was about to give up when he stumbled upon a group of people playing a game that he had played before. They were having a good time, and John couldn’t help but join in.
329 |
330 | The group welcomed him into their group, and before he knew it, John was swept away in the excitement. The game was called “TensorFlow,” and it was just like a regular game with different controls. John played along with the group, trying to make sense of it all. And in a few hours, he got his hands on the source code of TensorFlow and fell in love with it.
331 |
332 | John was now a part of a team of programmers who loved using TensorFlow for their work. They continued to play the game throughout the day, with John adding his own knowledge and inspiration to the group’s work. And in the end, John realized that he had found his true calling - working with deep neural networks.
333 |
334 | He now knows the ins and outs of the TensorFlow programming language, and knows how to use it to create some of the most beautiful and complex machine learning models that anyone could dream of. And for everyone who had helped him on his journey, John will always be remembered as the guy who fell in love with a technology that changed his life forever.
335 |
336 | ```
337 |
338 | ### Humor
339 |
340 | ```
341 | User: What do you know about the League of Legends champion Zed? Write a short bio
342 |
343 | StableLM: Zed is a powerful and highly skilled player of the League of Legends (LoL) universe. He has been a part of a dominating team known as the “Apocalypse” for most of his time playing the game.
344 |
345 | Zed was born in the year 2013 and started playing as a child when he started League of Legends. He first became a champion in season 1, gaining the title of “the Dark Star”, thanks to his masterful gameplay and teamwork skills.
346 |
347 | Throughout his time playing the game, Zed has mastered several powerful techniques, such as QWQ combos, one-shot kills, and the ability to outmaneuver his enemies. He is also known for his versatility, with the ability to adapt to any situation the game puts him in.
348 |
349 | Despite his formidable skills and agility, Zed is not invincible. He can be CCed and killed, and he relies on his team to protect him. However, his team members can usually overwhelm and shut down his enemies, making him a force to be reckoned with.
350 |
351 | Zed has been a part of many memorable games, and his success in these games helped solidify his legacy as one of the most influential players of the LoL universe.
352 |
353 | User: Write a funny conversation after a team fight between Zed and Teemo
354 |
355 | StableLM:
356 | Teemo: “Hey Zed, great game today. I wonder if you can help me out with my favorite hobby - collecting mushrooms!”
357 | Zed: “Teemo, please. I’m not a mushroom collector.”
358 | Teemo: “But of course, you are. You have the best taste in the game.”
359 | Zed: “Thanks, but can I at least get some of your mushrooms to be a little more useful?”
360 | Teemo: “I’d love to help you, but I’m afraid they’re all mine. Have you noticed my tunnels are always so well-guarded?”
361 | Zed: “Ha! Well, maybe I can start my own tunnel. Helpful or not, you can never tell what I might have stored in them.”
362 | Teemo: “Good luck with that. Maybe I’ll just create some extra for my garden.”
363 | Zed: “I’m not sure I can keep up with you there.”
364 | Teemo: “Ha! If only you had some more decent weapons. I could really see this as a challenge, though!”
365 | Zed: “Teemo, I think your luck has finally run out.”
366 | Teemo: “Well, at least I’m the one with the mushrooms now.”
367 |
368 | ```
369 |
370 |
371 |
372 |
373 | Writing Code
374 |
375 | ```
376 | User: Write a program that solves the fib sequence in Python.
377 | ```
378 | StableLM:
379 |
380 | ```python
381 | def fib_sequence(n):
382 | if n==0:
383 | return 0
384 | elif n==1:
385 | return 1
386 | else:
387 | return fib(n-1)+fib(n-2)
388 |
389 | fib_sequence(10)
390 | ```
391 |
392 |
393 |
394 | ## Selected failure modes of StableLM-Tuned-Alpha
395 | This section contains a collection of selected failure modes of `stablelm-tuned-alpha`.
396 |
397 |
398 | Yann LeCun gears (linear)
399 |
400 | ```
401 | User: In front of us are six gears numbered 1 to 6, mounted on axles in a row. Each gear is engaged with the next gear. If gear number 3 is rotated clockwise, in which direction will gears 1 and 6 rotate?
402 |
403 | StableLM: Each gear is engaged with the next gear after every other gear, so that gears 1 and 6 will rotate together. Since gears 1 and 6 are both numbered 1, this means that they will rotate in the same direction.
404 |
405 | To determine the direction in which gears 1 and 6 will rotate, we need to look at the direction of the input force. Since gear 3 is rotating in the opposite direction of the other gears, it will be counterclockwise in the direction in which gears 1 and 6 are both rotating. Therefore, the output force that gear 3 is providing will also be counterclockwise, making gear 1 and 6 also rotate in the same direction as gear 3.
406 | ```
407 |
408 |
409 |
410 |
411 | Yann LeCun gears (circular)
412 |
413 | ```
414 | User: 7 axles are equally spaced around a circle. A gear is placed on each axle such that each gear is engaged with the gear to its left and the gear to its right. The gears are numbered 1 to 7 around the circle. If gear 3 were rotated clockwise, in which direction would gear 7 rotate?
415 |
416 | StableLM: Gear 3 is the smallest gear, so it is not affected by the circular pattern. Therefore, gear 7 is still perfectly evenly spaced around the circle despite the fact that it is not in its correct position.
417 |
418 | To find the angle that gear 3 would rotate in, you need to know the distance (d) between the center of the circle and the gear, as well as the direction of rotation (in this case, clockwise).
419 |
420 | According to the problem, gear 3 is situated at (d/2, 2 degrees) and the gears are numbered 1 to 7. Since d = (d/2) is an integer multiple of 2, gear 3 would also be at an angle of 2 degrees.
421 |
422 | Therefore, the answer is d = (7/2) * 2 = 5 degrees in total.
423 | ```
424 |
425 |
426 |
427 | ## Request for Help
428 |
429 | Want to get involved?
430 |
431 | - We would love to port [llama.cpp](https://github.com/ggerganov/llama.cpp) to work with StableLMs
432 | - Integration into [Open Assistant](https://github.com/LAION-AI/Open-Assistant) from LAION-AI to collect high quality human-generated feedback data
433 | - ... Reach out to us with ideas on our [Discord](https://discord.com/invite/stablediffusion)
434 |
435 | ## Potential issues
436 | As is typical for any pretrained Large Language Model without additional finetuning and reinforcement learning, the responses a user gets might be of varying quality and might potentially include offensive language and views. This is expected to be improved with scale, better data, community feedback, and optimisation.
437 |
438 | ## Acknowledgements
439 |
440 | - `StableLM-Tuned-Alpha` would not have been possible without the helpful hand of Dakota Mahan [@dmayhem93](https://huggingface.co/dmayhem93).
441 |
442 | ## Licenses
443 |
444 | - Base model checkpoints (`StableLM-Base-Alpha`) are licensed under the Creative Commons license ([CC BY-SA-4.0](https://creativecommons.org/licenses/by-sa/4.0/)). Under the license, you must give [credit](https://creativecommons.org/licenses/by/4.0/#) to Stability AI, provide a link to the license, and [indicate if changes were made](https://creativecommons.org/licenses/by/4.0/#). You may do so in any reasonable manner, but not in any way that suggests the Stability AI endorses you or your use.
445 |
446 | - Fine-tuned checkpoints (`StableLM-Tuned-Alpha`) are licensed under the Non-Commercial Creative Commons license ([CC BY-NC-SA-4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)), in-line with the original non-commercial license specified by [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca).
447 |
448 | - All code in this repository is licensed under the Apache License 2.0 license.
449 |
--------------------------------------------------------------------------------
/assets/mascot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Stability-AI/StableLM/93eea082c49c61e64da1a0e1092fe82cc74b46d3/assets/mascot.png
--------------------------------------------------------------------------------
/configs/stablelm-2-12b.yml:
--------------------------------------------------------------------------------
1 | {
2 | # parallelism settings
3 | "pipe-parallel-size": 0,
4 | "model-parallel-size": 2,
5 |
6 | # model settings
7 | "num-layers": 40,
8 | "hidden-size": 5120,
9 | "num-attention-heads": 32,
10 | "seq-length": 4096,
11 | "max-position-embeddings": 4096,
12 |
13 | # architecture design
14 | "attention_head_type": "groupedquery",
15 | "num_kv_heads": 8,
16 | "qk_norm": true,
17 | "norm": "layernorm",
18 | "pos-emb": "rotary",
19 | "rotary_pct": 0.25,
20 | "rotary_emb_base": 10_000,
21 | "rotary_interleaved": false, # GPT-NeoX style
22 | "mlp_multiple_of": 256,
23 | "mlp_type": "gated",
24 | "activation": "silu",
25 | "no-weight-tying": true,
26 | "gpt_j_residual": true,
27 | "gpt_j_tied": true,
28 | "output_layer_parallelism": "column",
29 |
30 | # init methods
31 | "init_method": "normal",
32 | "output_layer_init_method": "scaled_normal",
33 | "init_method_std": 0.01,
34 |
35 | # biases
36 | "use_bias_in_norms": false,
37 | "use_bias_in_qk_norm": false,
38 | "use_bias_in_attn_linear": false,
39 | "use_bias_in_mlp": false,
40 |
41 | # fused ops
42 | "use_flash_cross_entropy": true,
43 | "bias-gelu-fusion": false,
44 | "scaled-upper-triang-masked-softmax-fusion": false,
45 | "attention-config": [[["flash"], 40]],
46 |
47 | # optimizer settings
48 | "optimizer":
49 | {
50 | "type": "Adam",
51 | "params":
52 | {
53 | "lr": 3.0e-4,
54 | "betas": [0.9, 0.95],
55 | "eps": 0.00000001,
56 | },
57 | },
58 | "min_lr": 3.0e-5,
59 | "train-iters": 760_000,
60 | "lr-decay-iters": 760_000,
61 | "lr-decay-style": "hybrid_cosine_inv_sqrt_2",
62 | "warmup": 0.0065,
63 | "cooldown": 0.,
64 |
65 | "reset_attention_mask": true,
66 | "reset_position_ids": true,
67 |
68 | # for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
69 | "zero_optimization":
70 | {
71 | "stage": 1,
72 | "allgather_partitions": true,
73 | "allgather_bucket_size": 5_000_000_000,
74 | "overlap_comm": true,
75 | "reduce_scatter": true,
76 | "reduce_bucket_size": 5_000_000_000,
77 | "contiguous_gradients": true,
78 | "cpu_offload": false,
79 | },
80 |
81 | # batch / data settings
82 | "train_micro_batch_size_per_gpu": 1,
83 | "gradient_accumulation_steps": 8,
84 | "data-impl": "mmap",
85 | "eval-interval": 5_000,
86 | "eval-iters": 10,
87 | "eval_batch_size": 1,
88 | "eval_tasks": [],
89 |
90 | # activation checkpointing
91 | "checkpoint-activations": true,
92 | "checkpoint-num-layers": 40,
93 | "partition-activations": true,
94 | "synchronize-each-layer": true,
95 |
96 | # regularization
97 | "gradient_clipping": 0.75,
98 | "weight-decay": 0.1,
99 | "hidden-dropout": 0,
100 | "attention-dropout": 0,
101 |
102 | # precision settings
103 | "bf16": { "enabled": true },
104 | "precision": "bfloat16",
105 | "full_precision_lm_cross_entropy": true,
106 | "fp32_allreduce": true,
107 |
108 | # misc. training settings
109 | "num-workers": 2,
110 | "distributed-backend": "nccl",
111 |
112 | # checkpoint settings
113 | "checkpoint-factor": 2_000,
114 | "s3_sync_interval": 20_000,
115 | "extra-save-iters": [0],
116 | "save": "",
117 | "load": "",
118 | "s3_path": "",
119 |
120 | "train_data_paths": [],
121 | "train_data_weights": [],
122 | "valid-data-paths": ["minipile_validation_arcade100k_tokenized_text_document"],
123 | "valid-data-weights": [1.0,],
124 | "test-data-paths": ["minipile_validation_arcade100k_tokenized_text_document"],
125 | "test-data-weights": [1.0,],
126 |
127 | # tokenizer settings
128 | "tokenizer-type": "TiktokenTokenizer",
129 | "vocab-file": "arcade100k.tiktoken",
130 |
131 | "log-interval": 10,
132 | "steps_per_print": 10,
133 | "wall_clock_breakdown": true,
134 |
135 | "use_wandb": true,
136 | "wandb_host": "https://stability.wandb.io",
137 | "wandb_team": "stability-llm",
138 | "wandb_project": "",
139 | "wandb_group": "",
140 | "wandb_name": "",
141 | # "wandb_id": "",
142 | # "wandb_resume": "must",
143 |
144 | # MuP
145 | "use-mup": false,
146 | "save-base-shapes": false, # this only needs to be enabled once in order to generate the base-shapes-file on each rank
147 | "base-shapes-file": "shapes/shapes_AR/shapes_32L/base-shapes", # load base shapes from this file
148 | "coord-check": false, # generate coord check plots to verify mup's implementation in neox
149 |
150 | # multi-node launcher
151 | "launcher": "slurm",
152 | "deepspeed_slurm": true,
153 |
154 | "seed": 2345678926,
155 | }
156 |
--------------------------------------------------------------------------------
/configs/stablelm-2-1_6b.yml:
--------------------------------------------------------------------------------
1 | {
2 | # parallelism settings
3 | "pipe-parallel-size": 0,
4 | "model-parallel-size": 1,
5 |
6 | # model settings
7 | "num-layers": 24,
8 | "hidden-size": 2048,
9 | "num-attention-heads": 32,
10 | "seq-length": 4096,
11 | "max-position-embeddings": 4096,
12 |
13 | # architecture design
14 | "attention_head_type": "multihead",
15 | "norm": "layernorm",
16 | "pos-emb": "rotary",
17 | "rotary_pct": 0.25,
18 | "rotary_interleaved": false, # GPT-NeoX style
19 | "mlp_multiple_of": 256,
20 | "mlp_type": "gated",
21 | "activation": "silu",
22 | "no-weight-tying": true,
23 | "gpt_j_residual": false,
24 | "gpt_j_tied": false,
25 | "output_layer_parallelism": "column",
26 |
27 | # init methods
28 | "init_method": "normal",
29 | "output_layer_init_method": "scaled_normal",
30 | "init_method_std": 0.02,
31 |
32 | # biases
33 | "use_bias_in_norms": false,
34 | "use_bias_in_attn_linear": false,
35 | "use_bias_in_mlp": false,
36 |
37 | # fused ops
38 | "use_flash_cross_entropy": true,
39 | "bias-gelu-fusion": false,
40 | "scaled-upper-triang-masked-softmax-fusion": false,
41 | "attention-config": [[["flash"], 24]],
42 |
43 | # optimizer settings
44 | "optimizer": {
45 | "type": "Adam",
46 | "params": {
47 | "lr": 0.001,
48 | "betas": [0.9, 0.95],
49 | "eps": 1.0e-8,
50 | }
51 | },
52 | "min_lr": 0.0001,
53 | "train-iters": 540_000,
54 | "lr-decay-iters": 540_000,
55 | "lr-decay-style": "hybrid_cosine_inv_sqrt_2",
56 | "warmup": 0.018,
57 | "cooldown": 0.,
58 |
59 | "reset_attention_mask": true,
60 | "reset_position_ids": true,
61 |
62 | # for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
63 | "zero_optimization": {
64 | "stage": 1,
65 | "allgather_partitions": true,
66 | "allgather_bucket_size": 1260000000,
67 | "overlap_comm": true,
68 | "reduce_scatter": true,
69 | "reduce_bucket_size": 1260000000,
70 | "contiguous_gradients": true,
71 | "cpu_offload": false,
72 | },
73 |
74 | # batch / data settings
75 | "train_micro_batch_size_per_gpu": 2,
76 | "gradient_accumulation_steps": 2,
77 | "data-impl": "mmap",
78 | "eval-interval": 500_000,
79 | "eval-iters": 1,
80 | "eval_batch_size": 1,
81 | "eval_tasks": [],
82 |
83 | # activation checkpointing
84 | "checkpoint-activations": true,
85 | "checkpoint-num-layers": 24,
86 | "partition-activations": true,
87 | "synchronize-each-layer": true,
88 |
89 | # regularization
90 | "gradient_clipping": 1,
91 | "weight-decay": 0.1,
92 | "hidden-dropout": 0.,
93 | "attention-dropout": 0.,
94 |
95 | # precision settings
96 | "bf16": { "enabled": true },
97 | "precision": "bfloat16",
98 | "full_precision_lm_cross_entropy": true,
99 | "fp32_allreduce": true,
100 |
101 | # misc. training settings
102 | "num-workers": 2,
103 | "distributed-backend": "nccl",
104 |
105 | # checkpoint settings
106 | "checkpoint-factor": 2_000,
107 | "s3_sync_interval": 20_000,
108 | "extra-save-iters": [0],
109 | "save": "",
110 | "load": "",
111 | "s3_path": "",
112 |
113 | "train_data_paths": [],
114 | "train_data_weights": [],
115 | "valid-data-paths": ["minipile_validation_arcade100k_tokenized_text_document"],
116 | "valid-data-weights": [1.0,],
117 | "test-data-paths": ["minipile_validation_arcade100k_tokenized_text_document"],
118 | "test-data-weights": [1.0,],
119 |
120 | # tokenizer settings
121 | "tokenizer-type": "TiktokenTokenizer",
122 | "vocab-file": "arcade100k.tiktoken",
123 |
124 | "log-interval": 10,
125 | "steps_per_print": 10,
126 | "wall_clock_breakdown": true,
127 |
128 | "use_wandb": true,
129 | "wandb_host": "https://stability.wandb.io",
130 | "wandb_team": "stability-llm",
131 | "wandb_project": "",
132 | "wandb_group": "",
133 | "wandb_name": "",
134 | # "wandb_id": "",
135 | # "wandb_resume": "must",
136 |
137 | # MuP
138 | "use-mup": false,
139 | "save-base-shapes": false , # this only needs to be enabled once in order to generate the base-shapes-file on each rank
140 | "base-shapes-file": "mup-base-shapes-small-fixed/base-shapes-small", # load base shapes from this file
141 | "coord-check": false, # generate coord check plots to verify mup's implementation in neox
142 |
143 | # multi-node launcher
144 | "launcher": "slurm",
145 | "deepspeed_slurm": true,
146 |
147 | "seed": 1234
148 | }
149 |
--------------------------------------------------------------------------------
/configs/stablelm-3b-4e1t.yml:
--------------------------------------------------------------------------------
1 | {
2 | # parallelism settings
3 | "pipe-parallel-size": 1,
4 | "model-parallel-size": 1,
5 |
6 | # model settings
7 | "num-layers": 32,
8 | "hidden-size": 2560,
9 | "num-attention-heads": 32,
10 | "seq-length": 4096,
11 | "max-position-embeddings": 4096,
12 |
13 | # architecture design
14 | "attention_head_type": "multihead",
15 | "norm": "layernorm",
16 | "pos-emb": "rotary",
17 | "rotary_pct": 0.25,
18 | "rotary_interleaved": false,
19 | "mlp_multiple_of": 256,
20 | "mlp_type": "gated",
21 | "activation": "silu",
22 | "no-weight-tying": true,
23 | "gpt_j_residual": false,
24 | "gpt_j_tied": false,
25 | "output_layer_parallelism": "column",
26 |
27 | # init methods
28 | "output_layer_init_method": "scaled_normal",
29 |
30 | # biases
31 | "use_bias_in_norms": true,
32 | "use_bias_in_attn_linear": false,
33 | "use_bias_in_mlp": false,
34 |
35 | # fused ops
36 | "attention-config": [[["flash"], 32]],
37 |
38 | # optimizer settings
39 | "optimizer": {
40 | "type": "Adam",
41 | "params": {
42 | "lr": 3.2e-4,
43 | "betas": [0.9, 0.95],
44 | "eps": 1.0e-6
45 | },
46 | },
47 | "min_lr": 1.28e-5, # Decay to 4% of lr
48 | # 955_000 iters ~= 4.0T tokens at bs=4M
49 | "train-iters": 955_000,
50 | "lr-decay-iters": 955_000,
51 | "lr-decay-style": "cosine",
52 | "warmup": 0.005, # ~5k warmup steps
53 |
54 | # ZeRO settings
55 | "zero_optimization": {
56 | "stage": 1,
57 | "allgather_partitions": true,
58 | "allgather_bucket_size": 1260000000,
59 | "overlap_comm": true,
60 | "reduce_scatter": true,
61 | "reduce_bucket_size": 1260000000,
62 | "contiguous_gradients": true,
63 | "cpu_offload": false,
64 | },
65 |
66 | # batch / data settings
67 | "train_micro_batch_size_per_gpu": 4,
68 | "gradient_accumulation_steps": 1,
69 | "data-impl": "mmap",
70 | "eval-interval": 5_000,
71 | "eval-iters": 10,
72 | "eval_batch_size": 8,
73 | "eval_tasks": [],
74 |
75 | # activation checkpointing
76 | "checkpoint-activations": true,
77 | "checkpoint-num-layers": 1,
78 | "partition-activations": true,
79 | "synchronize-each-layer": true,
80 |
81 | # regularization
82 | "gradient_clipping": 1.0,
83 | "weight-decay": 0.1,
84 | "hidden-dropout": 0,
85 | "attention-dropout": 0,
86 |
87 | # precision settings
88 | "bf16": { "enabled": true },
89 | "precision": "bfloat16",
90 | "full_precision_lm_cross_entropy": true,
91 |
92 | # misc. training settings
93 | "num-workers": 2,
94 | "distributed-backend": "nccl",
95 |
96 | # checkpoint settings
97 | "checkpoint-factor": 2_000,
98 | "s3_sync_interval": 10_000,
99 | "extra-save-iters": [0],
100 | "save": "",
101 | "load": "",
102 | "s3_path": "",
103 |
104 | # data path settings
105 | "train-data-paths": [],
106 | "train-data-weights": [],
107 | "valid-data-paths": [],
108 | "valid-data-weights": [],
109 | "test-data-paths": [],
110 | "test-data-weights": [],
111 |
112 | # tokenizer settings
113 | "tokenizer-type": "HFTokenizer",
114 | "vocab-file": "neox-tokenizer-vocab.json",
115 |
116 | # log settings
117 | "log-interval": 10,
118 | "steps_per_print": 10,
119 | "wall_clock_breakdown": true,
120 |
121 | "use_wandb": true,
122 | "wandb_host": "",
123 | "wandb_team": "",
124 | "wandb_project": "",
125 | "wandb_group": "",
126 | "wandb_name": "",
127 | # "wandb_id": "",
128 | # "wandb_resume": "",
129 |
130 | # multi-node launcher
131 | "launcher": "slurm",
132 | "deepspeed_slurm": true,
133 | }
--------------------------------------------------------------------------------
/configs/stablelm-base-alpha-3b-v2-4k-extension.yml:
--------------------------------------------------------------------------------
1 | {
2 | # parallelism settings
3 | "pipe-parallel-size": 1,
4 | "model-parallel-size": 2,
5 |
6 | # model settings
7 | "num-layers": 32,
8 | "hidden-size": 2560,
9 | "num-attention-heads": 32,
10 | "seq-length": 4096,
11 | "max-position-embeddings": 4096,
12 |
13 | # architecture design
14 | "attention_head_type": "multihead",
15 | "norm": "layernorm",
16 | "pos-emb": "rotary",
17 | "rotary_pct": 0.25,
18 | "rotary_interleaved": false, # GPT-NeoX style
19 | # NOTE: Linear Position Scaling degrades sample quality after 10B tokens - do not use yet.
20 | # "rotary_scaling_factor": 2, # 2048 -> 4096
21 | "mlp_multiple_of": 256,
22 | "mlp_type": "gated",
23 | "activation": "silu",
24 | "no-weight-tying": true,
25 | "gpt_j_residual": true,
26 | "gpt_j_tied": true,
27 | "output_layer_parallelism": "column",
28 |
29 | # biases
30 | "use_bias_in_norms": true,
31 | "use_bias_in_attn_linear": false,
32 | "use_bias_in_mlp": false,
33 |
34 | # fused ops
35 | "bias-gelu-fusion": false,
36 | "scaled-upper-triang-masked-softmax-fusion": true,
37 | "attention-config": [[["flash"], 32]],
38 |
39 | # optimizer settings
40 | "optimizer": {
41 | "type": "Adam",
42 | "params": {
43 | "lr": 2.8e-5,
44 | "betas": [0.9, 0.95],
45 | "eps": 1.0e-6
46 | },
47 | },
48 | "min_lr": 2.8e-6,
49 | "train-iters": 50_000,
50 | "lr-decay-iters": 50_000,
51 | "lr-decay-style": "cosine",
52 | "warmup": 0.00,
53 |
54 | # for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
55 | "zero_optimization": {
56 | "stage": 1,
57 | "allgather_partitions": true,
58 | "allgather_bucket_size": 1260000000,
59 | "overlap_comm": true,
60 | "reduce_scatter": true,
61 | "reduce_bucket_size": 1260000000,
62 | "contiguous_gradients": true,
63 | "cpu_offload": false,
64 | },
65 |
66 | # batch / data settings
67 | "train_micro_batch_size_per_gpu": 4,
68 | "gradient_accumulation_steps": 1,
69 | "data-impl": "mmap",
70 | "eval-interval": 1_000,
71 | "eval-iters": 10,
72 | "eval_batch_size": 8,
73 | "eval_tasks": ["lambada_openai", "piqa"],
74 |
75 | # activation checkpointing
76 | "checkpoint-activations": true,
77 | "checkpoint-num-layers": 1,
78 | "partition-activations": true,
79 | "synchronize-each-layer": true,
80 |
81 | # regularization
82 | "gradient_clipping": 1.0,
83 | "weight-decay": 0.0001,
84 | "hidden-dropout": 0,
85 | "attention-dropout": 0,
86 |
87 | # precision settings
88 | "fp16": {
89 | "fp16": true,
90 | "enabled": true,
91 | "loss_scale": 0,
92 | "loss_scale_window": 1000,
93 | "initial_scale_power": 12,
94 | "hysteresis": 2,
95 | "min_loss_scale": 1e-10
96 | },
97 | "full_precision_lm_cross_entropy": true,
98 |
99 | # misc. training settings
100 | "num-workers": 1,
101 | "distributed-backend": "nccl",
102 |
103 | # checkpoint settings
104 | "checkpoint-factor": 2_000,
105 | "save": "",
106 | "load": "",
107 | "s3_path": "",
108 | "iteration": 245_000,
109 | "finetune": true,
110 | "no_checkpoint_arg_validation": true,
111 | "override_lr_scheduler": true,
112 |
113 | # data path settings
114 | "train-data-paths": [""],
115 | "train-data-weights": [1.0],
116 | "valid-data-paths": [""],
117 | "valid-data-weights": [1.0],
118 | "test-data-paths": [""],
119 | "test-data-weights": [1.0],
120 |
121 | # tokenizer settings
122 | "tokenizer-type": "HFTokenizer",
123 | "vocab-file": "neox-tokenizer-vocab.json",
124 |
125 | # log settings
126 | "log-interval": 10,
127 | "steps_per_print": 10,
128 | "wall_clock_breakdown": true,
129 |
130 | "use_wandb": true,
131 | "wandb_host": "",
132 | "wandb_team": "",
133 | "wandb_project": "",
134 | "wandb_group": "7B",
135 | "wandb_name": "stablelm-base-alpha-7b-v2-4k-finetune",
136 | # "wandb_id": "",
137 | # "wandb_resume": "must",
138 |
139 | # multi-node launcher
140 | "launcher": "slurm",
141 | "deepspeed_slurm": true,
142 | }
--------------------------------------------------------------------------------
/configs/stablelm-base-alpha-3b-v2.yml:
--------------------------------------------------------------------------------
1 | {
2 | # parallelism settings
3 | "pipe-parallel-size": 1,
4 | "model-parallel-size": 2,
5 |
6 | # model settings
7 | "num-layers": 32,
8 | "hidden-size": 2560,
9 | "num-attention-heads": 32,
10 | "seq-length": 2048,
11 | "max-position-embeddings": 2048,
12 |
13 | # architecture design
14 | "attention_head_type": "multihead",
15 | "norm": "layernorm",
16 | "pos-emb": "rotary",
17 | "rotary_pct": 0.25,
18 | "rotary_interleaved": false, # GPT-NeoX style
19 | "mlp_multiple_of": 256,
20 | "mlp_type": "gated",
21 | "activation": "silu",
22 | "no-weight-tying": true,
23 | "gpt_j_residual": true,
24 | "gpt_j_tied": true,
25 | "output_layer_parallelism": "column",
26 |
27 | # biases
28 | "use_bias_in_norms": true,
29 | "use_bias_in_attn_linear": false,
30 | "use_bias_in_mlp": false,
31 |
32 | # fused ops
33 | "bias-gelu-fusion": false,
34 | "scaled-upper-triang-masked-softmax-fusion": true,
35 | "attention-config": [[["flash"], 32]],
36 |
37 | # optimizer settings
38 | "optimizer": {
39 | "type": "Adam",
40 | "params": {
41 | "lr": 3.2e-4,
42 | "betas": [0.9, 0.95],
43 | "eps": 1.0e-6
44 | },
45 | },
46 | "min_lr": 3.2e-5,
47 | "train-iters": 245_000,
48 | "lr-decay-iters": 245_000,
49 | "lr-decay-style": "cosine",
50 | "warmup": 0.01,
51 |
52 | # for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
53 | "zero_optimization": {
54 | "stage": 1,
55 | "allgather_partitions": true,
56 | "allgather_bucket_size": 1260000000,
57 | "overlap_comm": true,
58 | "reduce_scatter": true,
59 | "reduce_bucket_size": 1260000000,
60 | "contiguous_gradients": true,
61 | "cpu_offload": false,
62 | },
63 |
64 | # batch / data settings
65 | "train_micro_batch_size_per_gpu": 16,
66 | "gradient_accumulation_steps": 1,
67 | "data-impl": "mmap",
68 | "eval-interval": 10_000,
69 | "eval-iters": 10,
70 | "eval_batch_size": 4,
71 | "eval_tasks": ["lambada_openai", "piqa"],
72 |
73 | # activation checkpointing
74 | "checkpoint-activations": true,
75 | "checkpoint-num-layers": 1,
76 | "partition-activations": true,
77 | "synchronize-each-layer": true,
78 |
79 | # regularization
80 | "gradient_clipping": 1.0,
81 | "weight-decay": 0.1,
82 | "hidden-dropout": 0,
83 | "attention-dropout": 0,
84 |
85 | # precision settings
86 | "fp16": {
87 | "fp16": true,
88 | "enabled": true,
89 | "loss_scale": 0,
90 | # NOTE: Mid-training divergence required a loss scale of 1e-10
91 | # "loss_scale_window": 1000,
92 | # "initial_scale_power": 12,
93 | # "hysteresis": 2,
94 | # "min_loss_scale": 1
95 | "loss_scale_window": 1000,
96 | "initial_scale_power": 12,
97 | "hysteresis": 2,
98 | "min_loss_scale": 1e-10
99 | },
100 | "full_precision_lm_cross_entropy": true,
101 |
102 | # misc. training settings
103 | "num-workers": 1,
104 | "distributed-backend": "nccl",
105 |
106 | # checkpoint settings
107 | "checkpoint-factor": 2_000,
108 | "save": "",
109 | "load": "",
110 | "s3_path": "",
111 |
112 | # data path settings
113 | "train-data-paths": [""],
114 | "train-data-weights": [1.0],
115 | "valid-data-paths": [""],
116 | "valid-data-weights": [1.0],
117 | "test-data-paths": [""],
118 | "test-data-weights": [1.0],
119 |
120 | # tokenizer settings
121 | "tokenizer-type": "HFTokenizer",
122 | "vocab-file": "neox-tokenizer-vocab.json",
123 |
124 | # log settings
125 | "log-interval": 10,
126 | "steps_per_print": 10,
127 | "wall_clock_breakdown": true,
128 |
129 | "use_wandb": true,
130 | "wandb_host": "",
131 | "wandb_team": "",
132 | "wandb_project": "",
133 | "wandb_group": "3B",
134 | "wandb_name": "stablelm-base-alpha-3b-v2",
135 | # "wandb_id": "",
136 | # "wandb_resume": "must",
137 |
138 | # multi-node launcher
139 | "launcher": "slurm",
140 | "deepspeed_slurm": true,
141 | }
--------------------------------------------------------------------------------
/configs/stablelm-base-alpha-3b.yml:
--------------------------------------------------------------------------------
1 | {
2 | # parallelism settings
3 | "pipe-parallel-size": 1,
4 | "model-parallel-size": 4,
5 |
6 | # model settings
7 | "num-layers": 16,
8 | "hidden-size": 4096,
9 | "num-attention-heads": 32,
10 | "seq-length": 4096,
11 | "max-position-embeddings": 4096,
12 |
13 | # architecture design
14 | "norm": "layernorm",
15 | "pos-emb": "rotary",
16 | "rotary_pct": 0.25,
17 | "activation": "gelu",
18 | "no-weight-tying": true,
19 | "gpt_j_residual": true,
20 | "output_layer_parallelism": "column",
21 |
22 | # init methods
23 | "init_method": "small_init",
24 | "output_layer_init_method": "wang_init",
25 |
26 | # fused ops
27 | "scaled-upper-triang-masked-softmax-fusion": true,
28 | "bias-gelu-fusion": true,
29 | "attention-config": [[["flash"], 16]],
30 |
31 | # optimizer settings
32 | "optimizer": {
33 | "type": "Adam",
34 | "params": {
35 | "lr": 1.6e-4,
36 | "betas": [0.9, 0.9999],
37 | "eps": 1.0e-6
38 | },
39 | },
40 | "min_lr": 1.6e-5,
41 |
42 | # for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
43 | "zero_optimization": {
44 | "stage": 1,
45 | "allgather_partitions": true,
46 | "allgather_bucket_size": 1260000000,
47 | "overlap_comm": true,
48 | "reduce_scatter": true,
49 | "reduce_bucket_size": 1260000000,
50 | "contiguous_gradients": true,
51 | "cpu_offload": false,
52 | },
53 |
54 | # batch / data settings
55 | "train_micro_batch_size_per_gpu": 32,
56 | "gradient_accumulation_steps": 1,
57 | "eval_batch_size": 2,
58 | "data-impl": "mmap",
59 |
60 | # activation checkpointing
61 | "checkpoint-activations": true,
62 | "checkpoint-num-layers": 1,
63 | "partition-activations": true,
64 | "synchronize-each-layer": true,
65 |
66 | # regularization
67 | "gradient_clipping": 1.0,
68 | "weight-decay": 0.1,
69 | "hidden-dropout": 0,
70 | "attention-dropout": 0,
71 |
72 | # precision settings
73 | "fp16": {
74 | "fp16": true,
75 | "enabled": true,
76 | "loss_scale_window": 1000,
77 | "initial_scale_power": 12,
78 | "hysteresis": 20,
79 | "min_loss_scale": 1,
80 | },
81 |
82 | # misc. training settings
83 | "train-iters": 180000,
84 | "lr-decay-iters": 180000,
85 | "distributed-backend": "nccl",
86 | "lr-decay-style": "cosine",
87 | "warmup": 0.01,
88 | "checkpoint-factor": 1000,
89 | # 1 more than checkpoint-factor to avoid skipping evals if `evaluate` fails
90 | "eval-interval": 1001,
91 | "eval-iters": 10,
92 | "eval_tasks": ["piqa", "sciq", "lambada_openai"],
93 |
94 | # checkpoint settings
95 | "iteration": 84000,
96 | "save": "PATH_TO_SAVE_THE_MODEL",
97 | "load": "PATH_TO_LOAD_THE_MODEL",
98 |
99 | # data settings
100 | "train-data-paths": [],
101 | "train-data-weights": [0.03, 0.02, 3.5, 13.87, 30.88, 0.34, 0.03, 0.1, 0.01, 0.5, 0.25, 0.25, 0.11, 1, 0.1, 0.5, 0.6, 0.19, 0.05, 2, 3.5, 4.15, 5.75, 3.17, 3.44, 3.49, 3, 6, 0.02, 2, 0.01, 3.95, 0.05, 1.09, 6.05],
102 | "valid-data-paths": [],
103 | "valid-data-weights": [0.03, 0.02, 3.5, 13.87, 30.88, 0.34, 0.03, 0.1, 0.01, 0.5, 0.25, 0.25, 0.11, 1, 0.1, 0.5, 0.6, 0.19, 0.05, 2, 3.5, 4.15, 5.75, 3.17, 3.44, 3.49, 3, 6, 0.02, 2, 0.01, 3.95, 0.05, 1.09, 6.05],
104 | "test-data-paths": [],
105 | "test-data-weights": [0.03, 0.02, 3.5, 13.87, 30.88, 0.34, 0.03, 0.1, 0.01, 0.5, 0.25, 0.25, 0.11, 1, 0.1, 0.5, 0.6, 0.19, 0.05, 2, 3.5, 4.15, 5.75, 3.17, 3.44, 3.49, 3, 6, 0.02, 2, 0.01, 3.95, 0.05, 1.09, 6.05],
106 |
107 | # tokenizer settings
108 | "tokenizer-type": "HFTokenizer",
109 | "vocab-file": "/pile/20B_tokenizer.json",
110 |
111 | # log settings
112 | "log-interval": 10,
113 | "steps_per_print": 10,
114 | "wall_clock_breakdown": true,
115 | "log-grad-norm": true,
116 |
117 | "use_wandb": true,
118 | "wandb_host": "",
119 | "wandb_team": "",
120 | "wandb_project": "",
121 | "wandb_group": "",
122 | "wandb_name": "",
123 |
124 | # multi-node launcher
125 | "launcher": "slurm",
126 | "deepspeed_slurm": true
127 | }
128 |
--------------------------------------------------------------------------------
/configs/stablelm-base-alpha-7b-v2-4k-extension.yml:
--------------------------------------------------------------------------------
1 | {
2 | # parallelism settings
3 | "pipe-parallel-size": 1,
4 | "model-parallel-size": 2,
5 |
6 | # model settings
7 | "num-layers": 32,
8 | "hidden-size": 4096,
9 | "num-attention-heads": 32,
10 | "seq-length": 4096,
11 | "max-position-embeddings": 4096,
12 |
13 | # architecture design
14 | "attention_head_type": "multihead",
15 | "norm": "layernorm",
16 | "pos-emb": "rotary",
17 | "rotary_pct": 0.25,
18 | "rotary_interleaved": false, # GPT-NeoX style
19 | # NOTE: Linear Position Scaling degrades sample quality after 10B tokens - do not use yet.
20 | # "rotary_scaling_factor": 2, # 2048 -> 4096
21 | "mlp_multiple_of": 256,
22 | "mlp_type": "gated",
23 | "activation": "silu",
24 | "no-weight-tying": true,
25 | "gpt_j_residual": true,
26 | "gpt_j_tied": true,
27 | "output_layer_parallelism": "column",
28 |
29 | # biases
30 | "use_bias_in_norms": true,
31 | "use_bias_in_attn_linear": false,
32 | "use_bias_in_mlp": false,
33 |
34 | # fused ops
35 | "bias-gelu-fusion": false,
36 | "scaled-upper-triang-masked-softmax-fusion": true,
37 | "attention-config": [[["flash"], 32]],
38 |
39 | # optimizer settings
40 | "optimizer": {
41 | "type": "Adam",
42 | "params": {
43 | "lr": 2.2e-5,
44 | "betas": [0.9, 0.95],
45 | "eps": 1.0e-6
46 | },
47 | },
48 | "min_lr": 2.2e-6,
49 | "train-iters": 45_000,
50 | "lr-decay-iters": 45_000,
51 | "lr-decay-style": "cosine",
52 | "warmup": 0.00,
53 |
54 | # for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
55 | "zero_optimization": {
56 | "stage": 1,
57 | "allgather_partitions": true,
58 | "allgather_bucket_size": 1260000000,
59 | "overlap_comm": true,
60 | "reduce_scatter": true,
61 | "reduce_bucket_size": 1260000000,
62 | "contiguous_gradients": true,
63 | "cpu_offload": false,
64 | },
65 |
66 | # batch / data settings
67 | "train_micro_batch_size_per_gpu": 3,
68 | "gradient_accumulation_steps": 1,
69 | "data-impl": "mmap",
70 | "eval-interval": 4_000,
71 | "eval-iters": 10,
72 | "eval_batch_size": 2,
73 | "eval_tasks": ["lambada_openai", "piqa"],
74 |
75 | # activation checkpointing
76 | "checkpoint-activations": true,
77 | "checkpoint-num-layers": 1,
78 | "partition-activations": true,
79 | "synchronize-each-layer": true,
80 |
81 | # regularization
82 | "gradient_clipping": 1.0,
83 | "weight-decay": 0.01,
84 | "hidden-dropout": 0,
85 | "attention-dropout": 0,
86 |
87 | # precision settings
88 | "fp16": {
89 | "fp16": true,
90 | "enabled": true,
91 | "loss_scale": 0,
92 | "loss_scale_window": 1000,
93 | "initial_scale_power": 12,
94 | "hysteresis": 2,
95 | "min_loss_scale": 1e-12
96 | },
97 | "full_precision_lm_cross_entropy": true,
98 |
99 | # misc. training settings
100 | "num-workers": 1,
101 | "distributed-backend": "nccl",
102 |
103 | # checkpoint settings
104 | "checkpoint-factor": 2_000,
105 | "save": "",
106 | "load": "",
107 | "s3_path": "",
108 | "iteration": 245_000,
109 | "finetune": true,
110 | "no_checkpoint_arg_validation": true,
111 | "override_lr_scheduler": true,
112 |
113 | # data path settings
114 | "train-data-paths": [""],
115 | "train-data-weights": [1.0],
116 | "valid-data-paths": [""],
117 | "valid-data-weights": [1.0],
118 | "test-data-paths": [""],
119 | "test-data-weights": [1.0],
120 |
121 | # tokenizer settings
122 | "tokenizer-type": "HFTokenizer",
123 | "vocab-file": "neox-tokenizer-vocab.json",
124 |
125 | # log settings
126 | "log-interval": 10,
127 | "steps_per_print": 10,
128 | "wall_clock_breakdown": true,
129 |
130 | "use_wandb": true,
131 | "wandb_host": "",
132 | "wandb_team": "",
133 | "wandb_project": "",
134 | "wandb_group": "7B",
135 | "wandb_name": "stablelm-base-alpha-7b-v2-4k-finetune",
136 | # "wandb_id": "",
137 | # "wandb_resume": "must",
138 |
139 | # multi-node launcher
140 | "launcher": "slurm",
141 | "deepspeed_slurm": true,
142 | }
--------------------------------------------------------------------------------
/configs/stablelm-base-alpha-7b-v2.yml:
--------------------------------------------------------------------------------
1 | {
2 | # parallelism settings
3 | "pipe-parallel-size": 1,
4 | "model-parallel-size": 2,
5 |
6 | # model settings
7 | "num-layers": 32,
8 | "hidden-size": 4096,
9 | "num-attention-heads": 32,
10 | "seq-length": 2048,
11 | "max-position-embeddings": 2048,
12 |
13 | # architecture design
14 | "attention_head_type": "multihead",
15 | "norm": "layernorm",
16 | "pos-emb": "rotary",
17 | "rotary_pct": 0.25,
18 | "rotary_interleaved": false, # GPT-NeoX style
19 | "mlp_multiple_of": 256,
20 | "mlp_type": "gated",
21 | "activation": "silu",
22 | "no-weight-tying": true,
23 | "gpt_j_residual": true,
24 | "gpt_j_tied": true,
25 | "output_layer_parallelism": "column",
26 |
27 | # biases
28 | "use_bias_in_norms": true,
29 | "use_bias_in_attn_linear": false,
30 | "use_bias_in_mlp": false,
31 |
32 | # fused ops
33 | "bias-gelu-fusion": false,
34 | "scaled-upper-triang-masked-softmax-fusion": true,
35 | "attention-config": [[["flash"], 32]],
36 |
37 | # optimizer settings
38 | "optimizer": {
39 | "type": "Adam",
40 | "params": {
41 | "lr": 3.0e-4,
42 | "betas": [0.9, 0.95],
43 | "eps": 1.0e-6
44 | },
45 | },
46 | "min_lr": 3.0e-5,
47 | "train-iters": 245_000,
48 | "lr-decay-iters": 245_000,
49 | "lr-decay-style": "cosine",
50 | "warmup": 0.01,
51 |
52 | # for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
53 | "zero_optimization": {
54 | "stage": 1,
55 | "allgather_partitions": true,
56 | "allgather_bucket_size": 1260000000,
57 | "overlap_comm": true,
58 | "reduce_scatter": true,
59 | "reduce_bucket_size": 1260000000,
60 | "contiguous_gradients": true,
61 | "cpu_offload": false,
62 | },
63 |
64 | # batch / data settings
65 | "train_micro_batch_size_per_gpu": 12,
66 | "gradient_accumulation_steps": 1,
67 | "data-impl": "mmap",
68 | "eval-interval": 10_000,
69 | "eval-iters": 10,
70 | "eval_batch_size": 2,
71 | "eval_tasks": ["lambada_openai", "piqa"],
72 |
73 | # activation checkpointing
74 | "checkpoint-activations": true,
75 | "checkpoint-num-layers": 1,
76 | "partition-activations": true,
77 | "synchronize-each-layer": true,
78 |
79 | # regularization
80 | "gradient_clipping": 1.0,
81 | "weight-decay": 0.1,
82 | "hidden-dropout": 0,
83 | "attention-dropout": 0,
84 |
85 | # precision settings
86 | "fp16": {
87 | "fp16": true,
88 | "enabled": true,
89 | "loss_scale": 0,
90 | "loss_scale_window": 1000,
91 | "initial_scale_power": 12,
92 | "hysteresis": 2,
93 | "min_loss_scale": 1e-12
94 | },
95 | "full_precision_lm_cross_entropy": true,
96 |
97 | # misc. training settings
98 | "num-workers": 1,
99 | "distributed-backend": "nccl",
100 |
101 | # checkpoint settings
102 | "checkpoint-factor": 2_000,
103 | "save": "",
104 | "load": "",
105 | "s3_path": "",
106 |
107 | # data path settings
108 | "train-data-paths": [""],
109 | "train-data-weights": [1.0],
110 | "valid-data-paths": [""],
111 | "valid-data-weights": [1.0],
112 | "test-data-paths": [""],
113 | "test-data-weights": [1.0],
114 |
115 | # tokenizer settings
116 | "tokenizer-type": "HFTokenizer",
117 | "vocab-file": "neox-tokenizer-vocab.json",
118 |
119 | # log settings
120 | "log-interval": 10,
121 | "steps_per_print": 10,
122 | "wall_clock_breakdown": true,
123 |
124 | "use_wandb": true,
125 | "wandb_host": "",
126 | "wandb_team": "",
127 | "wandb_project": "",
128 | "wandb_group": "7B",
129 | "wandb_name": "stablelm-base-alpha-7b-v2",
130 | # "wandb_id": "",
131 | # "wandb_resume": "must",
132 |
133 | # multi-node launcher
134 | "launcher": "slurm",
135 | "deepspeed_slurm": true,
136 | }
--------------------------------------------------------------------------------
/configs/stablelm-base-alpha-7b.yml:
--------------------------------------------------------------------------------
1 | {
2 | # parallelism settings
3 | "pipe-parallel-size": 1,
4 | "model-parallel-size": 2,
5 |
6 | # model settings
7 | "num-layers": 16,
8 | "hidden-size": 6144,
9 | "num-attention-heads": 48,
10 | "seq-length": 4096,
11 | "max-position-embeddings": 4096,
12 |
13 | # architecture design
14 | "norm": "layernorm",
15 | "pos-emb": "rotary",
16 | "rotary_pct": 0.25,
17 | "activation": "gelu",
18 | "no-weight-tying": true,
19 | "gpt_j_residual": true,
20 | "output_layer_parallelism": "column",
21 |
22 | # init methods
23 | "init_method": "small_init",
24 | "output_layer_init_method": "wang_init",
25 |
26 | # fused ops
27 | "scaled-upper-triang-masked-softmax-fusion": true,
28 | "bias-gelu-fusion": true,
29 | "attention-config": [[["flash"], 16]],
30 |
31 | # optimizer settings
32 | "optimizer": {
33 | "type": "Adam",
34 | "params": {
35 | "lr": 1.5e-4,
36 | "betas": [0.9, 0.95],
37 | "eps": 1.0e-8
38 | },
39 | },
40 | "min_lr": 1.5e-5,
41 |
42 | # for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training
43 | "zero_optimization": {
44 | "stage": 1,
45 | "allgather_partitions": true,
46 | "allgather_bucket_size": 1260000000,
47 | "overlap_comm": true,
48 | "reduce_scatter": true,
49 | "reduce_bucket_size": 1260000000,
50 | "contiguous_gradients": true,
51 | },
52 |
53 | # batch / data settings
54 | "train_micro_batch_size_per_gpu": 8,
55 | "gradient_accumulation_steps": 1,
56 | "eval_batch_size": 1,
57 | "data-impl": "mmap",
58 |
59 | # activation checkpointing
60 | "checkpoint-activations": true,
61 | "checkpoint-num-layers": 1,
62 | "partition-activations": true,
63 | "synchronize-each-layer": true,
64 |
65 | # regularization
66 | "gradient_clipping": 1.0,
67 | "weight-decay": 0.1,
68 | "hidden-dropout": 0,
69 | "attention-dropout": 0,
70 |
71 | # precision settings
72 | "fp16": {
73 | "fp16": true,
74 | "enabled": true,
75 | "loss_scale": 0,
76 | "loss_scale_window": 1000,
77 | "initial_scale_power": 12,
78 | "hysteresis": 2,
79 | "min_loss_scale": 1,
80 | },
81 |
82 | # misc. training settings
83 | "train-iters": 180000,
84 | "lr-decay-iters": 180000,
85 | "distributed-backend": "nccl",
86 | "lr-decay-style": "cosine",
87 | "warmup": 0.01,
88 | "checkpoint-factor": 1000,
89 | # 1 more than checkpoint-factor to avoid skipping evals if `evaluate` fails
90 | "eval-interval": 1001,
91 | "eval-iters": 10,
92 | "eval_tasks": ["piqa", "sciq", "lambada_openai"],
93 |
94 | # checkpoint settings
95 | "iteration": 0,
96 | "save": "",
97 | "load": "",
98 |
99 | # data settings
100 | "train-data-paths": [],
101 | "train-data-weights": [0.03, 0.02, 3.5, 13.87, 30.88, 0.34, 0.03, 0.1, 0.01, 0.5, 0.25, 0.25, 0.11, 1, 0.1, 0.5, 0.6, 0.19, 0.05, 2, 3.5, 4.15, 5.75, 3.17, 3.44, 3.49, 3, 6, 0.02, 2, 0.01, 3.95, 0.05, 1.09, 6.05],
102 | "valid-data-paths": [],
103 | "valid-data-weights": [0.03, 0.02, 3.5, 13.87, 30.88, 0.34, 0.03, 0.1, 0.01, 0.5, 0.25, 0.25, 0.11, 1, 0.1, 0.5, 0.6, 0.19, 0.05, 2, 3.5, 4.15, 5.75, 3.17, 3.44, 3.49, 3, 6, 0.02, 2, 0.01, 3.95, 0.05, 1.09, 6.05],
104 | "test-data-paths": [],
105 | "test-data-weights": [0.03, 0.02, 3.5, 13.87, 30.88, 0.34, 0.03, 0.1, 0.01, 0.5, 0.25, 0.25, 0.11, 1, 0.1, 0.5, 0.6, 0.19, 0.05, 2, 3.5, 4.15, 5.75, 3.17, 3.44, 3.49, 3, 6, 0.02, 2, 0.01, 3.95, 0.05, 1.09, 6.05],
106 |
107 | # tokenizer settings
108 | "tokenizer-type": "HFTokenizer",
109 | "vocab-file": "/pile/20B_tokenizer.json",
110 |
111 | # log settings
112 | "log-interval": 10,
113 | "steps_per_print": 10,
114 | "wall_clock_breakdown": true,
115 |
116 | "use_wandb": true,
117 | "wandb_host": "",
118 | "wandb_team": "",
119 | "wandb_project": "",
120 | "wandb_group": "",
121 | "wandb_name": "",
122 |
123 | # multi-node launcher
124 | "launcher": "slurm",
125 | "deepspeed_slurm": true
126 | }
127 |
--------------------------------------------------------------------------------
/evals/external/EleutherAI-pythia-2.8b-deduped.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.30119453924914674,
5 | "acc_stderr": 0.013406741767847626,
6 | "acc_norm": 0.3293515358361775,
7 | "acc_norm_stderr": 0.013734057652635474
8 | },
9 | "arc_easy": {
10 | "acc": 0.6346801346801347,
11 | "acc_stderr": 0.009880576614806924,
12 | "acc_norm": 0.5909090909090909,
13 | "acc_norm_stderr": 0.010088775152615782
14 | },
15 | "boolq": {
16 | "acc": 0.6412844036697247,
17 | "acc_stderr": 0.008388668034059405
18 | },
19 | "hellaswag": {
20 | "acc": 0.45429197371041624,
21 | "acc_stderr": 0.004968888130290072,
22 | "acc_norm": 0.5944035052778331,
23 | "acc_norm_stderr": 0.004900036261309038
24 | },
25 | "lambada_openai": {
26 | "ppl": 5.00138268807375,
27 | "ppl_stderr": 0.11803810628354432,
28 | "acc": 0.6514651659227635,
29 | "acc_stderr": 0.0066386652033128745
30 | },
31 | "openbookqa": {
32 | "acc": 0.238,
33 | "acc_stderr": 0.019064072958198446,
34 | "acc_norm": 0.348,
35 | "acc_norm_stderr": 0.02132372863280751
36 | },
37 | "piqa": {
38 | "acc": 0.7410228509249184,
39 | "acc_stderr": 0.0102209660314056,
40 | "acc_norm": 0.7404787812840044,
41 | "acc_norm_stderr": 0.010227939888173923
42 | },
43 | "sciq": {
44 | "acc": 0.882,
45 | "acc_stderr": 0.010206869264381791,
46 | "acc_norm": 0.832,
47 | "acc_norm_stderr": 0.011828605831454262
48 | },
49 | "siqa": {
50 | "acc": 0.4094165813715456,
51 | "acc_stderr": 0.011126849576589028,
52 | "acc_norm": 0.44319344933469806,
53 | "acc_norm_stderr": 0.011240812731564954
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.2141982864137087,
57 | "mc1_stderr": 0.014362148155690466,
58 | "mc2": 0.3555711185495532,
59 | "mc2_stderr": 0.013587679864140447
60 | },
61 | "winogrande": {
62 | "acc": 0.5824782951854776,
63 | "acc_stderr": 0.01385997826444025
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=True,pretrained=EleutherAI/pythia-2.8b-deduped,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto","num_fewshot": 0,
82 | "batch_size": "8",
83 | "batch_sizes": [],
84 | "device": "cuda:4",
85 | "no_cache": true,
86 | "limit": null,
87 | "bootstrap_iters": 100000,
88 | "description_dict": {}
89 | }
90 | }
--------------------------------------------------------------------------------
/evals/external/EleutherAI_gpt-j-6B.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.3395904436860068,
5 | "acc_stderr": 0.01383903976282016,
6 | "acc_norm": 0.3651877133105802,
7 | "acc_norm_stderr": 0.014070265519268802
8 | },
9 | "arc_easy": {
10 | "acc": 0.6696127946127947,
11 | "acc_stderr": 0.009651430216428178,
12 | "acc_norm": 0.6224747474747475,
13 | "acc_norm_stderr": 0.009947227833469421
14 | },
15 | "boolq": {
16 | "acc": 0.654434250764526,
17 | "acc_stderr": 0.008317463342191592
18 | },
19 | "hellaswag": {
20 | "acc": 0.49522007568213505,
21 | "acc_stderr": 0.004989553396413105,
22 | "acc_norm": 0.6624178450507867,
23 | "acc_norm_stderr": 0.004719187890948069
24 | },
25 | "lambada_openai": {
26 | "ppl": 4.102416000764715,
27 | "ppl_stderr": 0.08849985162393556,
28 | "acc": 0.6823209780710265,
29 | "acc_stderr": 0.0064863548390796605
30 | },
31 | "openbookqa": {
32 | "acc": 0.29,
33 | "acc_stderr": 0.02031317923174518,
34 | "acc_norm": 0.382,
35 | "acc_norm_stderr": 0.02175082059125084
36 | },
37 | "piqa": {
38 | "acc": 0.7557127312295974,
39 | "acc_stderr": 0.010024765172284244,
40 | "acc_norm": 0.7616974972796517,
41 | "acc_norm_stderr": 0.009940334245876224
42 | },
43 | "sciq": {
44 | "acc": 0.915,
45 | "acc_stderr": 0.008823426366942316,
46 | "acc_norm": 0.874,
47 | "acc_norm_stderr": 0.010499249222408035
48 | },
49 | "siqa": {
50 | "acc": 0.4109518935516888,
51 | "acc_stderr": 0.011133193398910182,
52 | "acc_norm": 0.4508700102354145,
53 | "acc_norm_stderr": 0.011259319269273942
54 | },
55 | "winogrande": {
56 | "acc": 0.6416732438831886,
57 | "acc_stderr": 0.013476581172567524
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "pretrained=EleutherAI/gpt-j-6B,dtype=float16,trust_remote_code=True,low_cpu_mem_usage=True,use_fast=True",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda",
79 | "no_cache": true,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/EleutherAI_gpt-neox-20B.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.378839590443686,
5 | "acc_stderr": 0.014175915490000319,
6 | "acc_norm": 0.40784982935153585,
7 | "acc_norm_stderr": 0.014361097288449696
8 | },
9 | "arc_easy": {
10 | "acc": 0.7289562289562289,
11 | "acc_stderr": 0.0091209197417606,
12 | "acc_norm": 0.6868686868686869,
13 | "acc_norm_stderr": 0.009516303879309528
14 | },
15 | "boolq": {
16 | "acc": 0.6948012232415902,
17 | "acc_stderr": 0.00805404814192796
18 | },
19 | "hellaswag": {
20 | "acc": 0.5356502688707429,
21 | "acc_stderr": 0.004977081808179433,
22 | "acc_norm": 0.714299940250946,
23 | "acc_norm_stderr": 0.004508239594503832
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.6403044358845733,
27 | "ppl_stderr": 0.0747797639775495,
28 | "acc": 0.7197748884145159,
29 | "acc_stderr": 0.0062569681407934575
30 | },
31 | "openbookqa": {
32 | "acc": 0.298,
33 | "acc_stderr": 0.02047511809298897,
34 | "acc_norm": 0.402,
35 | "acc_norm_stderr": 0.021948929609938602
36 | },
37 | "piqa": {
38 | "acc": 0.7742110990206746,
39 | "acc_stderr": 0.009754980670917316,
40 | "acc_norm": 0.7845484221980413,
41 | "acc_norm_stderr": 0.009592463115658116
42 | },
43 | "sciq": {
44 | "acc": 0.931,
45 | "acc_stderr": 0.008018934050315145,
46 | "acc_norm": 0.89,
47 | "acc_norm_stderr": 0.009899393819724444
48 | },
49 | "siqa": {
50 | "acc": 0.4196519959058342,
51 | "acc_stderr": 0.011167032303390547,
52 | "acc_norm": 0.44728761514841353,
53 | "acc_norm_stderr": 0.011251020423273035
54 | },
55 | "winogrande": {
56 | "acc": 0.6614048934490924,
57 | "acc_stderr": 0.01330016986584241
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "pretrained=EleutherAI/gpt-neox-20B,dtype=float16,trust_remote_code=True,low_cpu_mem_usage=True,use_fast=True",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda",
79 | "no_cache": true,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/EleutherAI_pythia-12b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.318259385665529,
5 | "acc_stderr": 0.013611993916971451,
6 | "acc_norm": 0.3506825938566553,
7 | "acc_norm_stderr": 0.013944635930726083
8 | },
9 | "arc_easy": {
10 | "acc": 0.702020202020202,
11 | "acc_stderr": 0.009385046066694871,
12 | "acc_norm": 0.6372053872053872,
13 | "acc_norm_stderr": 0.00986593675701393
14 | },
15 | "boolq": {
16 | "acc": 0.6730886850152905,
17 | "acc_stderr": 0.008204340208838751
18 | },
19 | "hellaswag": {
20 | "acc": 0.5046803425612428,
21 | "acc_stderr": 0.004989562798280524,
22 | "acc_norm": 0.673770165305716,
23 | "acc_norm_stderr": 0.004678743563766643
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.9264187287921497,
27 | "ppl_stderr": 0.08375179732007268,
28 | "acc": 0.7063846303124394,
29 | "acc_stderr": 0.006344860619678724
30 | },
31 | "openbookqa": {
32 | "acc": 0.264,
33 | "acc_stderr": 0.019732885585922108,
34 | "acc_norm": 0.372,
35 | "acc_norm_stderr": 0.0216371979857224
36 | },
37 | "piqa": {
38 | "acc": 0.7627856365614799,
39 | "acc_stderr": 0.009924694933586366,
40 | "acc_norm": 0.7698585418933623,
41 | "acc_norm_stderr": 0.009820832826839798
42 | },
43 | "sciq": {
44 | "acc": 0.902,
45 | "acc_stderr": 0.009406619184621235,
46 | "acc_norm": 0.851,
47 | "acc_norm_stderr": 0.011266140684632175
48 | },
49 | "siqa": {
50 | "acc": 0.4201637666325486,
51 | "acc_stderr": 0.011168911571162012,
52 | "acc_norm": 0.44268167860798363,
53 | "acc_norm_stderr": 0.011239482425741961
54 | },
55 | "winogrande": {
56 | "acc": 0.6400947119179163,
57 | "acc_stderr": 0.013489609590266804
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "pretrained=EleutherAI/pythia-12b,dtype=float16,trust_remote_code=True,low_cpu_mem_usage=True,use_fast=True",
75 | "num_fewshot": 0,
76 | "batch_size": "1",
77 | "batch_sizes": [],
78 | "device": "cuda",
79 | "no_cache": true,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/EleutherAI_pythia-6.9b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.318259385665529,
5 | "acc_stderr": 0.013611993916971451,
6 | "acc_norm": 0.3532423208191126,
7 | "acc_norm_stderr": 0.013967822714840055
8 | },
9 | "arc_easy": {
10 | "acc": 0.6721380471380471,
11 | "acc_stderr": 0.009632587076170013,
12 | "acc_norm": 0.6106902356902357,
13 | "acc_norm_stderr": 0.010005212782878145
14 | },
15 | "boolq": {
16 | "acc": 0.6400611620795107,
17 | "acc_stderr": 0.008394940698368873
18 | },
19 | "hellaswag": {
20 | "acc": 0.48048197570205137,
21 | "acc_stderr": 0.0049859782149379184,
22 | "acc_norm": 0.6388169687313284,
23 | "acc_norm_stderr": 0.004793617835645056
24 | },
25 | "lambada_openai": {
26 | "ppl": 4.457423605452202,
27 | "ppl_stderr": 0.10007464956313723,
28 | "acc": 0.670095090238696,
29 | "acc_stderr": 0.00655050345779628
30 | },
31 | "openbookqa": {
32 | "acc": 0.258,
33 | "acc_stderr": 0.019586711785215837,
34 | "acc_norm": 0.372,
35 | "acc_norm_stderr": 0.0216371979857224
36 | },
37 | "piqa": {
38 | "acc": 0.750816104461371,
39 | "acc_stderr": 0.01009188277012022,
40 | "acc_norm": 0.76550598476605,
41 | "acc_norm_stderr": 0.009885203143240538
42 | },
43 | "sciq": {
44 | "acc": 0.898,
45 | "acc_stderr": 0.00957536880165389,
46 | "acc_norm": 0.84,
47 | "acc_norm_stderr": 0.011598902298689004
48 | },
49 | "siqa": {
50 | "acc": 0.40736949846468784,
51 | "acc_stderr": 0.011118216651888717,
52 | "acc_norm": 0.42988741044012285,
53 | "acc_norm_stderr": 0.011202283451328794
54 | },
55 | "winogrande": {
56 | "acc": 0.606156274664562,
57 | "acc_stderr": 0.013732114472668745
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "pretrained=EleutherAI/pythia-6.9b,dtype=float16,trust_remote_code=True,low_cpu_mem_usage=True,use_fast=True",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda",
79 | "no_cache": true,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/Qwen-Qwen-7B-Chat.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.45563139931740615,
5 | "acc_stderr": 0.014553749939306863,
6 | "acc_norm": 0.4667235494880546,
7 | "acc_norm_stderr": 0.014578995859605797
8 | },
9 | "arc_easy": {
10 | "acc": 0.6919191919191919,
11 | "acc_stderr": 0.00947388707582633,
12 | "acc_norm": 0.6447811447811448,
13 | "acc_norm_stderr": 0.009820245899287122
14 | },
15 | "boolq": {
16 | "acc": 0.7168195718654434,
17 | "acc_stderr": 0.007880052012351937
18 | },
19 | "hellaswag": {
20 | "acc": 0.6792471619199363,
21 | "acc_stderr": 0.004658120152230809,
22 | "acc_norm": 0.8497311292571201,
23 | "acc_norm_stderr": 0.003566044777327419
24 | },
25 | "lambada_openai": {
26 | "ppl": 4.258406715958618,
27 | "ppl_stderr": 0.1263040238577051,
28 | "acc": 0.6547642150203765,
29 | "acc_stderr": 0.006623879809039193
30 | },
31 | "openbookqa": {
32 | "acc": 0.356,
33 | "acc_stderr": 0.021434712356072652,
34 | "acc_norm": 0.462,
35 | "acc_norm_stderr": 0.02231833811987053
36 | },
37 | "piqa": {
38 | "acc": 0.7872687704026116,
39 | "acc_stderr": 0.00954822312304733,
40 | "acc_norm": 0.7992383025027203,
41 | "acc_norm_stderr": 0.00934596167482341
42 | },
43 | "sciq": {
44 | "acc": 0.907,
45 | "acc_stderr": 0.00918887563499669,
46 | "acc_norm": 0.806,
47 | "acc_norm_stderr": 0.012510816141264359
48 | },
49 | "siqa": {
50 | "acc": 0.4703172978505629,
51 | "acc_stderr": 0.011294116144908552,
52 | "acc_norm": 0.47389969293756395,
53 | "acc_norm_stderr": 0.01129864516098083
54 | },
55 | "winogrande": {
56 | "acc": 0.6858721389108129,
57 | "acc_stderr": 0.01304541671607256
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "pretrained=Qwen/Qwen-7B-Chat,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda:4",
79 | "no_cache": false,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/Qwen-Qwen-7B.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.4539249146757679,
5 | "acc_stderr": 0.014549221105171865,
6 | "acc_norm": 0.49146757679180886,
7 | "acc_norm_stderr": 0.014609263165632186
8 | },
9 | "arc_easy": {
10 | "acc": 0.6738215488215489,
11 | "acc_stderr": 0.009619849417035167,
12 | "acc_norm": 0.6519360269360269,
13 | "acc_norm_stderr": 0.009774627600259012
14 | },
15 | "boolq": {
16 | "acc": 0.745565749235474,
17 | "acc_stderr": 0.007617690099234367
18 | },
19 | "hellaswag": {
20 | "acc": 0.7305317665803625,
21 | "acc_stderr": 0.004427767996301626,
22 | "acc_norm": 0.8884684325831508,
23 | "acc_norm_stderr": 0.0031414591751392695
24 | },
25 | "lambada_openai": {
26 | "ppl": 4.014976946118802,
27 | "ppl_stderr": 0.08858949240464632,
28 | "acc": 0.6966815447312246,
29 | "acc_stderr": 0.006404402872809113
30 | },
31 | "openbookqa": {
32 | "acc": 0.322,
33 | "acc_stderr": 0.02091666833001988,
34 | "acc_norm": 0.448,
35 | "acc_norm_stderr": 0.022261697292270143
36 | },
37 | "piqa": {
38 | "acc": 0.7399347116430903,
39 | "acc_stderr": 0.010234893249061308,
40 | "acc_norm": 0.749183895538629,
41 | "acc_norm_stderr": 0.010113869547069044
42 | },
43 | "sciq": {
44 | "acc": 0.932,
45 | "acc_stderr": 0.007964887911291603,
46 | "acc_norm": 0.908,
47 | "acc_norm_stderr": 0.009144376393151117
48 | },
49 | "siqa": {
50 | "acc": 0.49437052200614123,
51 | "acc_stderr": 0.011313353423379522,
52 | "acc_norm": 0.5240532241555783,
53 | "acc_norm_stderr": 0.01130097128912773
54 | },
55 | "winogrande": {
56 | "acc": 0.65982636148382,
57 | "acc_stderr": 0.013315218762417397
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "use_fast=True,pretrained=Qwen/Qwen-7B,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda:4",
79 | "no_cache": false,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/baichuan-inc_Baichuan2-7B-Base.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.42235494880546076,
5 | "acc_stderr": 0.014434138713379977,
6 | "acc_norm": 0.431740614334471,
7 | "acc_norm_stderr": 0.014474591427196202
8 | },
9 | "arc_easy": {
10 | "acc": 0.75,
11 | "acc_stderr": 0.008885233166386385,
12 | "acc_norm": 0.7281144781144782,
13 | "acc_norm_stderr": 0.0091297958673105
14 | },
15 | "boolq": {
16 | "acc": 0.7308868501529052,
17 | "acc_stderr": 0.007756844290794318
18 | },
19 | "hellaswag": {
20 | "acc": 0.5366460864369648,
21 | "acc_stderr": 0.004976361454341348,
22 | "acc_norm": 0.7228639713204541,
23 | "acc_norm_stderr": 0.004466695023677831
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.6834487322486384,
27 | "ppl_stderr": 0.07653758845393456,
28 | "acc": 0.7098777411216767,
29 | "acc_stderr": 0.006322580641394925
30 | },
31 | "openbookqa": {
32 | "acc": 0.304,
33 | "acc_stderr": 0.02059164957122493,
34 | "acc_norm": 0.394,
35 | "acc_norm_stderr": 0.021874299301689253
36 | },
37 | "piqa": {
38 | "acc": 0.7616974972796517,
39 | "acc_stderr": 0.009940334245876209,
40 | "acc_norm": 0.7736670293797606,
41 | "acc_norm_stderr": 0.009763294246879418
42 | },
43 | "sciq": {
44 | "acc": 0.946,
45 | "acc_stderr": 0.007150883521295436,
46 | "acc_norm": 0.913,
47 | "acc_norm_stderr": 0.008916866630745916
48 | },
49 | "siqa": {
50 | "acc": 0.41760491299897645,
51 | "acc_stderr": 0.011159391894922486,
52 | "acc_norm": 0.44779938587512796,
53 | "acc_norm_stderr": 0.011252242102001767
54 | },
55 | "winogrande": {
56 | "acc": 0.675611681136543,
57 | "acc_stderr": 0.01315722572664164
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "pretrained=baichuan-inc/Baichuan2-7B-Base,dtype=bfloat16,trust_remote_code=True,low_cpu_mem_usage=True,use_fast=True",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda",
79 | "no_cache": true,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/bigscience-bloom-3b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.27986348122866894,
5 | "acc_stderr": 0.013119040897725922,
6 | "acc_norm": 0.3037542662116041,
7 | "acc_norm_stderr": 0.013438909184778757
8 | },
9 | "arc_easy": {
10 | "acc": 0.5942760942760943,
11 | "acc_stderr": 0.010075755540128871,
12 | "acc_norm": 0.5328282828282829,
13 | "acc_norm_stderr": 0.010237645778853869
14 | },
15 | "boolq": {
16 | "acc": 0.617125382262997,
17 | "acc_stderr": 0.008501734385335953
18 | },
19 | "hellaswag": {
20 | "acc": 0.4137621987651862,
21 | "acc_stderr": 0.004915003499517833,
22 | "acc_norm": 0.545309699263095,
23 | "acc_norm_stderr": 0.004969251445596341
24 | },
25 | "lambada_openai": {
26 | "ppl": 9.094700256232823,
27 | "ppl_stderr": 0.2652067493709512,
28 | "acc": 0.5173685231903745,
29 | "acc_stderr": 0.006961773596960152
30 | },
31 | "openbookqa": {
32 | "acc": 0.218,
33 | "acc_stderr": 0.01848337822317886,
34 | "acc_norm": 0.322,
35 | "acc_norm_stderr": 0.020916668330019882
36 | },
37 | "piqa": {
38 | "acc": 0.705658324265506,
39 | "acc_stderr": 0.010633311470347498,
40 | "acc_norm": 0.7067464635473341,
41 | "acc_norm_stderr": 0.010621818421101931
42 | },
43 | "sciq": {
44 | "acc": 0.891,
45 | "acc_stderr": 0.009859828407037191,
46 | "acc_norm": 0.816,
47 | "acc_norm_stderr": 0.012259457340938588
48 | },
49 | "siqa": {
50 | "acc": 0.4017400204708291,
51 | "acc_stderr": 0.011093444192711183,
52 | "acc_norm": 0.4314227226202661,
53 | "acc_norm_stderr": 0.011207148736838392
54 | },
55 | "winogrande": {
56 | "acc": 0.584846093133386,
57 | "acc_stderr": 0.01384868408665859
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "use_fast=True,pretrained=bigscience/bloom-3b,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": null,
79 | "no_cache": true,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/bigscience-bloom-7b1.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.302901023890785,
5 | "acc_stderr": 0.013428241573185349,
6 | "acc_norm": 0.33532423208191126,
7 | "acc_norm_stderr": 0.013796182947785564
8 | },
9 | "arc_easy": {
10 | "acc": 0.6494107744107744,
11 | "acc_stderr": 0.009791003829831557,
12 | "acc_norm": 0.5736531986531986,
13 | "acc_norm_stderr": 0.010147858603835144
14 | },
15 | "boolq": {
16 | "acc": 0.6284403669724771,
17 | "acc_stderr": 0.008451598145076589
18 | },
19 | "hellaswag": {
20 | "acc": 0.4649472216689902,
21 | "acc_stderr": 0.0049775044466089996,
22 | "acc_norm": 0.6228838876717785,
23 | "acc_norm_stderr": 0.004836738514051334
24 | },
25 | "lambada_openai": {
26 | "ppl": 6.619927277080142,
27 | "ppl_stderr": 0.1762520708430581,
28 | "acc": 0.5755870366776635,
29 | "acc_stderr": 0.006885918770006387
30 | },
31 | "openbookqa": {
32 | "acc": 0.252,
33 | "acc_stderr": 0.019435727282249536,
34 | "acc_norm": 0.358,
35 | "acc_norm_stderr": 0.021461434862859122
36 | },
37 | "piqa": {
38 | "acc": 0.7274211099020674,
39 | "acc_stderr": 0.010389256803296021,
40 | "acc_norm": 0.7366702937976061,
41 | "acc_norm_stderr": 0.010276185322196764
42 | },
43 | "sciq": {
44 | "acc": 0.901,
45 | "acc_stderr": 0.009449248027662765,
46 | "acc_norm": 0.845,
47 | "acc_norm_stderr": 0.011450157470799475
48 | },
49 | "siqa": {
50 | "acc": 0.4211873080859775,
51 | "acc_stderr": 0.011172633149198374,
52 | "acc_norm": 0.4498464687819857,
53 | "acc_norm_stderr": 0.011257008360485692
54 | },
55 | "winogrande": {
56 | "acc": 0.6464088397790055,
57 | "acc_stderr": 0.013436541262599948
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "use_fast=True,pretrained=bigscience/bloom-7b1,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda:5",
79 | "no_cache": true,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/cerebras-btlm-3b-8k-base.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.34897610921501704,
5 | "acc_stderr": 0.013928933461382496,
6 | "acc_norm": 0.37627986348122866,
7 | "acc_norm_stderr": 0.014157022555407168
8 | },
9 | "arc_easy": {
10 | "acc": 0.7045454545454546,
11 | "acc_stderr": 0.009361987126556457,
12 | "acc_norm": 0.6708754208754208,
13 | "acc_norm_stderr": 0.00964204805806098
14 | },
15 | "boolq": {
16 | "acc": 0.6963302752293578,
17 | "acc_stderr": 0.008042682539896304
18 | },
19 | "hellaswag": {
20 | "acc": 0.5184226249751046,
21 | "acc_stderr": 0.004986393266269161,
22 | "acc_norm": 0.6977693686516631,
23 | "acc_norm_stderr": 0.004582861219020893
24 | },
25 | "lambada_openai": {
26 | "ppl": 4.720441000893734,
27 | "ppl_stderr": 0.1100556659950519,
28 | "acc": 0.6623326217737241,
29 | "acc_stderr": 0.00658862361668043
30 | },
31 | "openbookqa": {
32 | "acc": 0.276,
33 | "acc_stderr": 0.02001121929807353,
34 | "acc_norm": 0.408,
35 | "acc_norm_stderr": 0.02200091089387719
36 | },
37 | "piqa": {
38 | "acc": 0.7584330794341676,
39 | "acc_stderr": 0.009986718001804472,
40 | "acc_norm": 0.7720348204570185,
41 | "acc_norm_stderr": 0.009788093832324906
42 | },
43 | "sciq": {
44 | "acc": 0.929,
45 | "acc_stderr": 0.008125578442487916,
46 | "acc_norm": 0.895,
47 | "acc_norm_stderr": 0.009698921026024966
48 | },
49 | "siqa": {
50 | "acc": 0.4278403275332651,
51 | "acc_stderr": 0.01119562541819821,
52 | "acc_norm": 0.4611054247697032,
53 | "acc_norm_stderr": 0.011279787032703659
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.2252141982864137,
57 | "mc1_stderr": 0.014623240768023498,
58 | "mc2": 0.3599562107238256,
59 | "mc2_stderr": 0.013576568348894856
60 | },
61 | "winogrande": {
62 | "acc": 0.6495659037095501,
63 | "acc_stderr": 0.013409047676670189
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=True,pretrained=cerebras/btlm-3b-8k-base,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto","num_fewshot": 0,
82 | "batch_size": "8",
83 | "batch_sizes": [],
84 | "device": "cuda:4",
85 | "no_cache": true,
86 | "limit": null,
87 | "bootstrap_iters": 100000,
88 | "description_dict": {}
89 | }
90 | }
--------------------------------------------------------------------------------
/evals/external/facebook-opt-2.7b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.2687713310580205,
5 | "acc_stderr": 0.012955065963710691,
6 | "acc_norm": 0.31313993174061433,
7 | "acc_norm_stderr": 0.013552671543623504
8 | },
9 | "arc_easy": {
10 | "acc": 0.6077441077441077,
11 | "acc_stderr": 0.010018744689650043,
12 | "acc_norm": 0.5429292929292929,
13 | "acc_norm_stderr": 0.01022189756425603
14 | },
15 | "boolq": {
16 | "acc": 0.6033639143730887,
17 | "acc_stderr": 0.008556148582032
18 | },
19 | "hellaswag": {
20 | "acc": 0.4584744074885481,
21 | "acc_stderr": 0.004972543127767873,
22 | "acc_norm": 0.6059549890460068,
23 | "acc_norm_stderr": 0.0048764594346198
24 | },
25 | "lambada_openai": {
26 | "ppl": 5.119857738610855,
27 | "ppl_stderr": 0.11991227443177162,
28 | "acc": 0.6357461672811954,
29 | "acc_stderr": 0.006704339729528894
30 | },
31 | "openbookqa": {
32 | "acc": 0.25,
33 | "acc_stderr": 0.019384310743640384,
34 | "acc_norm": 0.352,
35 | "acc_norm_stderr": 0.021380042385946048
36 | },
37 | "piqa": {
38 | "acc": 0.7383025027203483,
39 | "acc_stderr": 0.01025563077270823,
40 | "acc_norm": 0.7480957562568009,
41 | "acc_norm_stderr": 0.010128421335088685
42 | },
43 | "sciq": {
44 | "acc": 0.858,
45 | "acc_stderr": 0.011043457699378227,
46 | "acc_norm": 0.79,
47 | "acc_norm_stderr": 0.012886662332274536
48 | },
49 | "siqa": {
50 | "acc": 0.4083930399181167,
51 | "acc_stderr": 0.011122558066098069,
52 | "acc_norm": 0.44268167860798363,
53 | "acc_norm_stderr": 0.011239482425741961
54 | },
55 | "winogrande": {
56 | "acc": 0.6101026045777427,
57 | "acc_stderr": 0.013707547317008462
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "use_fast=True,pretrained=facebook/opt-2.7b,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda:2",
79 | "no_cache": false,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/facebook-opt-6.7b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.30716723549488056,
5 | "acc_stderr": 0.013481034054980945,
6 | "acc_norm": 0.34812286689419797,
7 | "acc_norm_stderr": 0.013921008595179338
8 | },
9 | "arc_easy": {
10 | "acc": 0.6565656565656566,
11 | "acc_stderr": 0.00974381736896,
12 | "acc_norm": 0.6014309764309764,
13 | "acc_norm_stderr": 0.010046455400477947
14 | },
15 | "boolq": {
16 | "acc": 0.6602446483180429,
17 | "acc_stderr": 0.00828377201314756
18 | },
19 | "hellaswag": {
20 | "acc": 0.5051782513443537,
21 | "acc_stderr": 0.004989513809408587,
22 | "acc_norm": 0.6719776936865166,
23 | "acc_norm_stderr": 0.004685334844038652
24 | },
25 | "lambada_openai": {
26 | "ppl": 4.252533670173101,
27 | "ppl_stderr": 0.0927155022353668,
28 | "acc": 0.6764991267222977,
29 | "acc_stderr": 0.006517535744360227
30 | },
31 | "openbookqa": {
32 | "acc": 0.276,
33 | "acc_stderr": 0.020011219298073524,
34 | "acc_norm": 0.372,
35 | "acc_norm_stderr": 0.0216371979857224
36 | },
37 | "piqa": {
38 | "acc": 0.7633297062023939,
39 | "acc_stderr": 0.009916841655042804,
40 | "acc_norm": 0.766050054406964,
41 | "acc_norm_stderr": 0.009877236895137437
42 | },
43 | "sciq": {
44 | "acc": 0.901,
45 | "acc_stderr": 0.009449248027662761,
46 | "acc_norm": 0.853,
47 | "acc_norm_stderr": 0.011203415395160333
48 | },
49 | "siqa": {
50 | "acc": 0.4263050153531218,
51 | "acc_stderr": 0.011190503463264742,
52 | "acc_norm": 0.4595701125895599,
53 | "acc_norm_stderr": 0.011277022486079959
54 | },
55 | "winogrande": {
56 | "acc": 0.6535122336227308,
57 | "acc_stderr": 0.013373773411685644
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "use_fast=True,pretrained=facebook/opt-6.7b,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda:2",
79 | "no_cache": false,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/huggyllama-llama-7b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.4189419795221843,
5 | "acc_stderr": 0.014418106953639013,
6 | "acc_norm": 0.4462457337883959,
7 | "acc_norm_stderr": 0.014526705548539982
8 | },
9 | "arc_easy": {
10 | "acc": 0.7525252525252525,
11 | "acc_stderr": 0.00885511441483471,
12 | "acc_norm": 0.7285353535353535,
13 | "acc_norm_stderr": 0.009125362970360627
14 | },
15 | "boolq": {
16 | "acc": 0.7504587155963303,
17 | "acc_stderr": 0.0075688020241860285
18 | },
19 | "hellaswag": {
20 | "acc": 0.5696076478789086,
21 | "acc_stderr": 0.004941191607317913,
22 | "acc_norm": 0.7621987651862179,
23 | "acc_norm_stderr": 0.004248666961833349
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.4882593690160553,
27 | "ppl_stderr": 0.068517853205977,
28 | "acc": 0.7354938870560839,
29 | "acc_stderr": 0.006144965702579053
30 | },
31 | "openbookqa": {
32 | "acc": 0.344,
33 | "acc_stderr": 0.02126575803797874,
34 | "acc_norm": 0.444,
35 | "acc_norm_stderr": 0.02224224437573102
36 | },
37 | "piqa": {
38 | "acc": 0.7867247007616975,
39 | "acc_stderr": 0.00955712122586133,
40 | "acc_norm": 0.7916213275299239,
41 | "acc_norm_stderr": 0.009476125383049459
42 | },
43 | "sciq": {
44 | "acc": 0.946,
45 | "acc_stderr": 0.007150883521295433,
46 | "acc_norm": 0.928,
47 | "acc_norm_stderr": 0.008178195576218681
48 | },
49 | "siqa": {
50 | "acc": 0.4483111566018424,
51 | "acc_stderr": 0.01125345173122258,
52 | "acc_norm": 0.4703172978505629,
53 | "acc_norm_stderr": 0.011294116144908554
54 | },
55 | "winogrande": {
56 | "acc": 0.6992896606156275,
57 | "acc_stderr": 0.012888010494704723
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "use_fast=False,pretrained=huggyllama/llama-7b,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda:2",
79 | "no_cache": false,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/kittn_mistral-7B-v0.1-hf.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.5025597269624573,
5 | "acc_stderr": 0.014611199329843777,
6 | "acc_norm": 0.5435153583617748,
7 | "acc_norm_stderr": 0.014555949760496439
8 | },
9 | "arc_easy": {
10 | "acc": 0.8080808080808081,
11 | "acc_stderr": 0.00808080808080796,
12 | "acc_norm": 0.7954545454545454,
13 | "acc_norm_stderr": 0.008276958800002997
14 | },
15 | "boolq": {
16 | "acc": 0.8363914373088684,
17 | "acc_stderr": 0.006469941343840763
18 | },
19 | "hellaswag": {
20 | "acc": 0.6127265484963155,
21 | "acc_stderr": 0.0048613146132868434,
22 | "acc_norm": 0.8105954989046007,
23 | "acc_norm_stderr": 0.003910288117015163
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.181388367482107,
27 | "ppl_stderr": 0.058369197945258765,
28 | "acc": 0.7568406753347564,
29 | "acc_stderr": 0.0059766767751295085
30 | },
31 | "openbookqa": {
32 | "acc": 0.328,
33 | "acc_stderr": 0.021017027165175485,
34 | "acc_norm": 0.44,
35 | "acc_norm_stderr": 0.022221331534143057
36 | },
37 | "piqa": {
38 | "acc": 0.8079434167573449,
39 | "acc_stderr": 0.00919074029512649,
40 | "acc_norm": 0.8215451577801959,
41 | "acc_norm_stderr": 0.008933575463062072
42 | },
43 | "sciq": {
44 | "acc": 0.959,
45 | "acc_stderr": 0.006273624021118748,
46 | "acc_norm": 0.938,
47 | "acc_norm_stderr": 0.0076298239962803065
48 | },
49 | "siqa": {
50 | "acc": 0.4273285568065507,
51 | "acc_stderr": 0.01119393034055127,
52 | "acc_norm": 0.4570112589559877,
53 | "acc_norm_stderr": 0.01127217546233142
54 | },
55 | "winogrande": {
56 | "acc": 0.7403314917127072,
57 | "acc_stderr": 0.012322700705552673
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "pretrained=kittn/mistral-7B-v0.1-hf,dtype=bfloat16,trust_remote_code=True,low_cpu_mem_usage=True,use_fast=False",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda",
79 | "no_cache": true,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/meta-llama-Llama-2-13b-hf.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.4863481228668942,
5 | "acc_stderr": 0.014605943429860945,
6 | "acc_norm": 0.492320819112628,
7 | "acc_norm_stderr": 0.01460966744089257
8 | },
9 | "arc_easy": {
10 | "acc": 0.79503367003367,
11 | "acc_stderr": 0.008283277600626398,
12 | "acc_norm": 0.7760942760942761,
13 | "acc_norm_stderr": 0.008553779114531757
14 | },
15 | "boolq": {
16 | "acc": 0.8051987767584098,
17 | "acc_stderr": 0.006926916185348359
18 | },
19 | "hellaswag": {
20 | "acc": 0.6011750647281418,
21 | "acc_stderr": 0.004886559008754982,
22 | "acc_norm": 0.7935670185222067,
23 | "acc_norm_stderr": 0.004039176806180289
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.0445239988325836,
27 | "ppl_stderr": 0.05613639700817276,
28 | "acc": 0.767708131185717,
29 | "acc_stderr": 0.005883383348944357
30 | },
31 | "openbookqa": {
32 | "acc": 0.354,
33 | "acc_stderr": 0.021407582047916447,
34 | "acc_norm": 0.454,
35 | "acc_norm_stderr": 0.02228814759117695
36 | },
37 | "piqa": {
38 | "acc": 0.7905331882480957,
39 | "acc_stderr": 0.0094943029798198,
40 | "acc_norm": 0.8063112078346029,
41 | "acc_norm_stderr": 0.009220384152336643
42 | },
43 | "sciq": {
44 | "acc": 0.945,
45 | "acc_stderr": 0.007212976294639239,
46 | "acc_norm": 0.933,
47 | "acc_norm_stderr": 0.007910345983177549
48 | },
49 | "siqa": {
50 | "acc": 0.4278403275332651,
51 | "acc_stderr": 0.01119562541819821,
52 | "acc_norm": 0.4503582395087001,
53 | "acc_norm_stderr": 0.01125816983012229
54 | },
55 | "winogrande": {
56 | "acc": 0.7221783741120757,
57 | "acc_stderr": 0.012588918183871603
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "use_fast=False,pretrained=meta-llama/Llama-2-13b-hf,trust_remote_code=True,low_cpu_mem_usage=True,dtype=bfloat16",
75 | "num_fewshot": 0,
76 | "batch_size": "16",
77 | "batch_sizes": [],
78 | "device": "cuda:5",
79 | "no_cache": false,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/meta-llama-Llama-2-7b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.4300341296928328,
5 | "acc_stderr": 0.014467631559137993,
6 | "acc_norm": 0.4616040955631399,
7 | "acc_norm_stderr": 0.014568245550296363
8 | },
9 | "arc_easy": {
10 | "acc": 0.7626262626262627,
11 | "acc_stderr": 0.008730525906362434,
12 | "acc_norm": 0.7453703703703703,
13 | "acc_norm_stderr": 0.008939407288589414
14 | },
15 | "boolq": {
16 | "acc": 0.7773700305810397,
17 | "acc_stderr": 0.007276093141006333
18 | },
19 | "hellaswag": {
20 | "acc": 0.5720971917944633,
21 | "acc_stderr": 0.00493763511283029,
22 | "acc_norm": 0.7594104760007967,
23 | "acc_norm_stderr": 0.004265678940698863
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.3970918836338027,
27 | "ppl_stderr": 0.06684659102563836,
28 | "acc": 0.7347176402095866,
29 | "acc_stderr": 0.0061507275830540355
30 | },
31 | "openbookqa": {
32 | "acc": 0.314,
33 | "acc_stderr": 0.020776701920308997,
34 | "acc_norm": 0.436,
35 | "acc_norm_stderr": 0.0221989546414768
36 | },
37 | "piqa": {
38 | "acc": 0.7774755168661589,
39 | "acc_stderr": 0.009704600975718245,
40 | "acc_norm": 0.7878128400435256,
41 | "acc_norm_stderr": 0.009539299828174048
42 | },
43 | "sciq": {
44 | "acc": 0.936,
45 | "acc_stderr": 0.007743640226919308,
46 | "acc_norm": 0.908,
47 | "acc_norm_stderr": 0.009144376393151106
48 | },
49 | "siqa": {
50 | "acc": 0.43500511770726713,
51 | "acc_stderr": 0.011218074465506494,
52 | "acc_norm": 0.47389969293756395,
53 | "acc_norm_stderr": 0.011298645160980832
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.2521419828641371,
57 | "mc1_stderr": 0.01520152224629997,
58 | "mc2": 0.38967559882659686,
59 | "mc2_stderr": 0.01357922221561985
60 | },
61 | "winogrande": {
62 | "acc": 0.6961325966850829,
63 | "acc_stderr": 0.012926209475483574
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=False,pretrained=meta-llama/Llama-2-7b-hf,trust_remote_code=True,low_cpu_mem_usage=True,dtype=bfloat16",
82 | "num_fewshot": 0,
83 | "batch_size": "2",
84 | "batch_sizes": [],
85 | "device": "cuda:0",
86 | "no_cache": false,
87 | "limit": null,
88 | "bootstrap_iters": 100000,
89 | "description_dict": {}
90 | }
91 | }
--------------------------------------------------------------------------------
/evals/external/microsoft-phi-1_5.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.4445392491467577,
5 | "acc_stderr": 0.014521226405627079,
6 | "acc_norm": 0.4803754266211604,
7 | "acc_norm_stderr": 0.014600132075947092
8 | },
9 | "arc_easy": {
10 | "acc": 0.7613636363636364,
11 | "acc_stderr": 0.008746465140706127,
12 | "acc_norm": 0.7314814814814815,
13 | "acc_norm_stderr": 0.009094042554994847
14 | },
15 | "boolq": {
16 | "acc": 0.7452599388379205,
17 | "acc_stderr": 0.007620703281690057
18 | },
19 | "hellaswag": {
20 | "acc": 0.47988448516231824,
21 | "acc_stderr": 0.004985741706385726,
22 | "acc_norm": 0.6261700856403107,
23 | "acc_norm_stderr": 0.004828305041904399
24 | },
25 | "lambada_openai": {
26 | "ppl": 8.945202930351474,
27 | "ppl_stderr": 0.3005661666926477,
28 | "acc": 0.527459732194838,
29 | "acc_stderr": 0.0069554645156210786
30 | },
31 | "openbookqa": {
32 | "acc": 0.376,
33 | "acc_stderr": 0.021683827539286115,
34 | "acc_norm": 0.482,
35 | "acc_norm_stderr": 0.02236856511738799
36 | },
37 | "piqa": {
38 | "acc": 0.7633297062023939,
39 | "acc_stderr": 0.009916841655042806,
40 | "acc_norm": 0.7578890097932536,
41 | "acc_norm_stderr": 0.009994371269104381
42 | },
43 | "sciq": {
44 | "acc": 0.932,
45 | "acc_stderr": 0.007964887911291603,
46 | "acc_norm": 0.916,
47 | "acc_norm_stderr": 0.00877616208949113
48 | },
49 | "siqa": {
50 | "acc": 0.5537359263050153,
51 | "acc_stderr": 0.01124854090154796,
52 | "acc_norm": 0.5957011258955988,
53 | "acc_norm_stderr": 0.0111048923983008
54 | },
55 | "winogrande": {
56 | "acc": 0.7253354380426204,
57 | "acc_stderr": 0.012544516005117187
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "use_fast=False,pretrained=microsoft/phi-1_5,trust_remote_code=True,low_cpu_mem_usage=True,dtype=bfloat16",
75 | "num_fewshot": 0,
76 | "batch_size": "16",
77 | "batch_sizes": [],
78 | "device": "cuda:5",
79 | "no_cache": false,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/mosaicml-mpt-7b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.4052901023890785,
5 | "acc_stderr": 0.01434686906022933,
6 | "acc_norm": 0.4189419795221843,
7 | "acc_norm_stderr": 0.014418106953639011
8 | },
9 | "arc_easy": {
10 | "acc": 0.7491582491582491,
11 | "acc_stderr": 0.008895183010487386,
12 | "acc_norm": 0.7003367003367004,
13 | "acc_norm_stderr": 0.009400228586205971
14 | },
15 | "boolq": {
16 | "acc": 0.7394495412844037,
17 | "acc_stderr": 0.007677021072511165
18 | },
19 | "hellaswag": {
20 | "acc": 0.571400119498108,
21 | "acc_stderr": 0.004938643787869543,
22 | "acc_norm": 0.7617008564031069,
23 | "acc_norm_stderr": 0.00425172316377217
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.8689102314884214,
27 | "ppl_stderr": 0.0808940319922043,
28 | "acc": 0.6863962740151368,
29 | "acc_stderr": 0.006463833164285203
30 | },
31 | "openbookqa": {
32 | "acc": 0.314,
33 | "acc_stderr": 0.020776701920308997,
34 | "acc_norm": 0.428,
35 | "acc_norm_stderr": 0.022149790663861923
36 | },
37 | "piqa": {
38 | "acc": 0.7889009793253536,
39 | "acc_stderr": 0.00952137737873414,
40 | "acc_norm": 0.8063112078346029,
41 | "acc_norm_stderr": 0.009220384152336641
42 | },
43 | "sciq": {
44 | "acc": 0.937,
45 | "acc_stderr": 0.007687007876286428,
46 | "acc_norm": 0.888,
47 | "acc_norm_stderr": 0.009977753031397236
48 | },
49 | "siqa": {
50 | "acc": 0.45138178096212894,
51 | "acc_stderr": 0.01126045668162444,
52 | "acc_norm": 0.48311156601842375,
53 | "acc_norm_stderr": 0.011307614732827416
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.20930232558139536,
57 | "mc1_stderr": 0.014241219434785823,
58 | "mc2": 0.3348523259251629,
59 | "mc2_stderr": 0.01313621094524683
60 | },
61 | "winogrande": {
62 | "acc": 0.6803472770323599,
63 | "acc_stderr": 0.01310652851766514
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=True,pretrained=mosaicml/mpt-7b,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
82 | "num_fewshot": 0,
83 | "batch_size": "8",
84 | "batch_sizes": [],
85 | "device": "cuda:2",
86 | "no_cache": false,
87 | "limit": null,
88 | "bootstrap_iters": 100000,
89 | "description_dict": {}
90 | }
91 | }
--------------------------------------------------------------------------------
/evals/external/openlm-research-open_llama_13b:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "lambada_openai": {
4 | "ppl": 3.569586285393247,
5 | "ppl_stderr": 0.07265272740503036,
6 | "acc": 0.7209392586842616,
7 | "acc_stderr": 0.006249003708978234
8 | },
9 | "sciq": {
10 | "acc": 0.941,
11 | "acc_stderr": 0.007454835650406722,
12 | "acc_norm": 0.914,
13 | "acc_norm_stderr": 0.008870325962594766
14 | }
15 | },
16 | "versions": {
17 | "lambada_openai": 0,
18 | "sciq": 0
19 | },
20 | "config": {
21 | "model": "gpt2",
22 | "model_args": "use_fast=False,pretrained=openlm-research/open_llama_13b,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
23 | "num_fewshot": 0,
24 | "batch_size": "2",
25 | "batch_sizes": [],
26 | "device": "cuda:3",
27 | "no_cache": false,
28 | "limit": null,
29 | "bootstrap_iters": 100000,
30 | "description_dict": {}
31 | }
32 | }
--------------------------------------------------------------------------------
/evals/external/openlm-research-open_llama_3b_v2.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.3387372013651877,
5 | "acc_stderr": 0.013830568927974332,
6 | "acc_norm": 0.3609215017064846,
7 | "acc_norm_stderr": 0.014034761386175458
8 | },
9 | "arc_easy": {
10 | "acc": 0.6759259259259259,
11 | "acc_stderr": 0.00960372885009539,
12 | "acc_norm": 0.63510101010101,
13 | "acc_norm_stderr": 0.009878157021155649
14 | },
15 | "boolq": {
16 | "acc": 0.6568807339449542,
17 | "acc_stderr": 0.008303445777655941
18 | },
19 | "hellaswag": {
20 | "acc": 0.5223063134833699,
21 | "acc_stderr": 0.0049848133910162145,
22 | "acc_norm": 0.6998605855407289,
23 | "acc_norm_stderr": 0.004573817163007456
24 | },
25 | "lambada_openai": {
26 | "ppl": 4.565625743504039,
27 | "ppl_stderr": 0.1034965673734545,
28 | "acc": 0.6673782262759558,
29 | "acc_stderr": 0.006564073374961233
30 | },
31 | "openbookqa": {
32 | "acc": 0.26,
33 | "acc_stderr": 0.019635965529725512,
34 | "acc_norm": 0.376,
35 | "acc_norm_stderr": 0.021683827539286122
36 | },
37 | "piqa": {
38 | "acc": 0.7665941240478781,
39 | "acc_stderr": 0.009869247889521001,
40 | "acc_norm": 0.778563656147987,
41 | "acc_norm_stderr": 0.009687616456840284
42 | },
43 | "sciq": {
44 | "acc": 0.924,
45 | "acc_stderr": 0.008384169266796386,
46 | "acc_norm": 0.878,
47 | "acc_norm_stderr": 0.010354864712936698
48 | },
49 | "siqa": {
50 | "acc": 0.4119754350051177,
51 | "acc_stderr": 0.011137360400975268,
52 | "acc_norm": 0.4524053224155578,
53 | "acc_norm_stderr": 0.011262695440459566
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.21297429620563035,
57 | "mc1_stderr": 0.014332203787059685,
58 | "mc2": 0.3458747299959986,
59 | "mc2_stderr": 0.013215129281312441
60 | },
61 | "winogrande": {
62 | "acc": 0.6290449881610103,
63 | "acc_stderr": 0.01357639990223157
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=False,pretrained=openlm-research/open_llama_3b_v2,trust_remote_code=True,low_cpu_mem_usage=True,dtype=bfloat16",
82 | "num_fewshot": 0,
83 | "batch_size": "8",
84 | "batch_sizes": [],
85 | "device": "cuda:4",
86 | "no_cache": true,
87 | "limit": null,
88 | "bootstrap_iters": 100000,
89 | "description_dict": {}
90 | }
91 | }
--------------------------------------------------------------------------------
/evals/external/openlm-research-open_llama_7b_v2.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.38822525597269625,
5 | "acc_stderr": 0.014241614207414034,
6 | "acc_norm": 0.42406143344709896,
7 | "acc_norm_stderr": 0.014441889627464392
8 | },
9 | "arc_easy": {
10 | "acc": 0.7192760942760943,
11 | "acc_stderr": 0.009220526174711363,
12 | "acc_norm": 0.6965488215488216,
13 | "acc_norm_stderr": 0.009433837434252279
14 | },
15 | "boolq": {
16 | "acc": 0.7140672782874617,
17 | "acc_stderr": 0.00790303735916362
18 | },
19 | "hellaswag": {
20 | "acc": 0.5569607647878908,
21 | "acc_stderr": 0.004957296691391575,
22 | "acc_norm": 0.7464648476399124,
23 | "acc_norm_stderr": 0.004341454841892331
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.8265934555236463,
27 | "ppl_stderr": 0.07878439404908999,
28 | "acc": 0.7104599262565496,
29 | "acc_stderr": 0.006318823234213216
30 | },
31 | "openbookqa": {
32 | "acc": 0.302,
33 | "acc_stderr": 0.020553269174209184,
34 | "acc_norm": 0.402,
35 | "acc_norm_stderr": 0.021948929609938606
36 | },
37 | "piqa": {
38 | "acc": 0.7916213275299239,
39 | "acc_stderr": 0.009476125383049447,
40 | "acc_norm": 0.8030467899891186,
41 | "acc_norm_stderr": 0.009278918898006383
42 | },
43 | "sciq": {
44 | "acc": 0.938,
45 | "acc_stderr": 0.007629823996280304,
46 | "acc_norm": 0.901,
47 | "acc_norm_stderr": 0.009449248027662747
48 | },
49 | "siqa": {
50 | "acc": 0.4196519959058342,
51 | "acc_stderr": 0.011167032303390547,
52 | "acc_norm": 0.4600818833162743,
53 | "acc_norm_stderr": 0.011277955967920398
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.22643818849449204,
57 | "mc1_stderr": 0.014651337324602576,
58 | "mc2": 0.3456877328963021,
59 | "mc2_stderr": 0.013482248222806824
60 | },
61 | "winogrande": {
62 | "acc": 0.6582478295185478,
63 | "acc_stderr": 0.013330103018622861
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=False,pretrained=openlm-research/open_llama_7b_v2,trust_remote_code=True,low_cpu_mem_usage=True,dtype=bfloat16",
82 | "num_fewshot": 0,
83 | "batch_size": "2",
84 | "batch_sizes": [],
85 | "device": "cuda:0",
86 | "no_cache": false,
87 | "limit": null,
88 | "bootstrap_iters": 100000,
89 | "description_dict": {}
90 | }
91 | }
--------------------------------------------------------------------------------
/evals/external/tiiuae_falcon-7b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.40273037542662116,
5 | "acc_stderr": 0.014332236306790149,
6 | "acc_norm": 0.43686006825938567,
7 | "acc_norm_stderr": 0.014494421584256519
8 | },
9 | "arc_easy": {
10 | "acc": 0.7441077441077442,
11 | "acc_stderr": 0.008953950243013991,
12 | "acc_norm": 0.7079124579124579,
13 | "acc_norm_stderr": 0.00933070561656907
14 | },
15 | "boolq": {
16 | "acc": 0.735474006116208,
17 | "acc_stderr": 0.007714546144910642
18 | },
19 | "hellaswag": {
20 | "acc": 0.5771758613821948,
21 | "acc_stderr": 0.004929983692795067,
22 | "acc_norm": 0.7634933280223063,
23 | "acc_norm_stderr": 0.004240683281093403
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.3698354504387775,
27 | "ppl_stderr": 0.06490807599596318,
28 | "acc": 0.7455850960605472,
29 | "acc_stderr": 0.006067809764031527
30 | },
31 | "openbookqa": {
32 | "acc": 0.306,
33 | "acc_stderr": 0.020629569998345403,
34 | "acc_norm": 0.442,
35 | "acc_norm_stderr": 0.02223197069632112
36 | },
37 | "piqa": {
38 | "acc": 0.794885745375408,
39 | "acc_stderr": 0.009420971671017915,
40 | "acc_norm": 0.8057671381936888,
41 | "acc_norm_stderr": 0.009230209366168272
42 | },
43 | "sciq": {
44 | "acc": 0.94,
45 | "acc_stderr": 0.00751375115747492,
46 | "acc_norm": 0.915,
47 | "acc_norm_stderr": 0.008823426366942328
48 | },
49 | "siqa": {
50 | "acc": 0.42067553735926305,
51 | "acc_stderr": 0.011170778517705619,
52 | "acc_norm": 0.4600818833162743,
53 | "acc_norm_stderr": 0.011277955967920396
54 | },
55 | "winogrande": {
56 | "acc": 0.6724546172059984,
57 | "acc_stderr": 0.013190169546797016
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "pretrained=tiiuae/falcon-7b,dtype=bfloat16,trust_remote_code=True,low_cpu_mem_usage=True,use_fast=True",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda",
79 | "no_cache": true,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/external/togethercomputer-RedPajama-INCITE-7B-Base2.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.3771331058020478,
5 | "acc_stderr": 0.014163366896192593,
6 | "acc_norm": 0.39419795221843,
7 | "acc_norm_stderr": 0.014280522667467323
8 | },
9 | "arc_easy": {
10 | "acc": 0.7234848484848485,
11 | "acc_stderr": 0.00917788010146828,
12 | "acc_norm": 0.6919191919191919,
13 | "acc_norm_stderr": 0.009473887075826333
14 | },
15 | "boolq": {
16 | "acc": 0.7076452599388379,
17 | "acc_stderr": 0.00795527890990574
18 | },
19 | "hellaswag": {
20 | "acc": 0.5256920932085242,
21 | "acc_stderr": 0.004983189711208505,
22 | "acc_norm": 0.7033459470225055,
23 | "acc_norm_stderr": 0.004558491550673688
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.9176883876529036,
27 | "ppl_stderr": 0.08360630042095321,
28 | "acc": 0.713370851930914,
29 | "acc_stderr": 0.006299845944000654
30 | },
31 | "openbookqa": {
32 | "acc": 0.29,
33 | "acc_stderr": 0.020313179231745186,
34 | "acc_norm": 0.406,
35 | "acc_norm_stderr": 0.021983962090086333
36 | },
37 | "piqa": {
38 | "acc": 0.7714907508161044,
39 | "acc_stderr": 0.009796313511829524,
40 | "acc_norm": 0.7736670293797606,
41 | "acc_norm_stderr": 0.009763294246879415
42 | },
43 | "sciq": {
44 | "acc": 0.927,
45 | "acc_stderr": 0.008230354715244055,
46 | "acc_norm": 0.897,
47 | "acc_norm_stderr": 0.009616833339695796
48 | },
49 | "siqa": {
50 | "acc": 0.4257932446264074,
51 | "acc_stderr": 0.011188771652377858,
52 | "acc_norm": 0.44882292732855683,
53 | "acc_norm_stderr": 0.011254649314820132
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.23011015911872704,
57 | "mc1_stderr": 0.014734557959807769,
58 | "mc2": 0.3301333660822527,
59 | "mc2_stderr": 0.012995959559501016
60 | },
61 | "winogrande": {
62 | "acc": 0.6432517758484609,
63 | "acc_stderr": 0.013463393958028733
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=True,pretrained=togethercomputer/RedPajama-INCITE-7B-Base,low_cpu_mem_usage=True,dtype=auto",
82 | "num_fewshot": 0,
83 | "batch_size": "8",
84 | "batch_sizes": [],
85 | "device": "cuda:2",
86 | "no_cache": false,
87 | "limit": null,
88 | "bootstrap_iters": 100000,
89 | "description_dict": {}
90 | }
91 | }
--------------------------------------------------------------------------------
/evals/open_llm_leaderboard/stablelm-beta-3b-v2-arc-challenge.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.36689419795221845,
5 | "acc_norm": 0.3967576791808874
6 | }
7 | },
8 | "versions": {
9 | "arc_challenge": 0
10 | },
11 | "config": {
12 | "model": "stabilityai/stablelm-base-alpha-3b-v2",
13 | "num_fewshot": 25,
14 | "batch_size": 8,
15 | "device": "cuda:0",
16 | "no_cache": true,
17 | "limit": null,
18 | "bootstrap_iters": 10000,
19 | "description_dict": null
20 | }
21 | }
--------------------------------------------------------------------------------
/evals/open_llm_leaderboard/stablelm-beta-3b-v2-hellaswag.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "hellaswag": {
4 | "acc": 0.5239992033459471,
5 | "acc_norm": 0.7066321449910377
6 | }
7 | },
8 | "versions": {
9 | "hellaswag": 0
10 | },
11 | "config": {
12 | "model": "stabilityai/stablelm-base-alpha-3b-v2",
13 | "num_fewshot": 10,
14 | "batch_size": 8,
15 | "device": "cuda:0",
16 | "no_cache": true,
17 | "limit": null,
18 | "bootstrap_iters": 10000,
19 | "description_dict": null
20 | }
21 | }
--------------------------------------------------------------------------------
/evals/open_llm_leaderboard/stablelm-beta-3b-v2-mmmlu.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "hendrycksTest-professional_psychology": {
4 | "acc": 0.2826797385620915,
5 | "acc_norm": 0.27124183006535946
6 | },
7 | "hendrycksTest-abstract_algebra": {
8 | "acc": 0.26,
9 | "acc_norm": 0.29
10 | },
11 | "hendrycksTest-prehistory": {
12 | "acc": 0.3271604938271605,
13 | "acc_norm": 0.26851851851851855
14 | },
15 | "hendrycksTest-international_law": {
16 | "acc": 0.23140495867768596,
17 | "acc_norm": 0.36363636363636365
18 | },
19 | "hendrycksTest-logical_fallacies": {
20 | "acc": 0.20245398773006135,
21 | "acc_norm": 0.22085889570552147
22 | },
23 | "hendrycksTest-professional_medicine": {
24 | "acc": 0.30514705882352944,
25 | "acc_norm": 0.33455882352941174
26 | },
27 | "hendrycksTest-high_school_european_history": {
28 | "acc": 0.3151515151515151,
29 | "acc_norm": 0.296969696969697
30 | },
31 | "hendrycksTest-high_school_physics": {
32 | "acc": 0.2251655629139073,
33 | "acc_norm": 0.2582781456953642
34 | },
35 | "hendrycksTest-management": {
36 | "acc": 0.30097087378640774,
37 | "acc_norm": 0.30097087378640774
38 | },
39 | "hendrycksTest-college_mathematics": {
40 | "acc": 0.17,
41 | "acc_norm": 0.24
42 | },
43 | "hendrycksTest-college_computer_science": {
44 | "acc": 0.29,
45 | "acc_norm": 0.24
46 | },
47 | "hendrycksTest-human_sexuality": {
48 | "acc": 0.40458015267175573,
49 | "acc_norm": 0.3893129770992366
50 | },
51 | "hendrycksTest-college_biology": {
52 | "acc": 0.2708333333333333,
53 | "acc_norm": 0.2569444444444444
54 | },
55 | "hendrycksTest-high_school_computer_science": {
56 | "acc": 0.34,
57 | "acc_norm": 0.3
58 | },
59 | "hendrycksTest-high_school_psychology": {
60 | "acc": 0.3467889908256881,
61 | "acc_norm": 0.326605504587156
62 | },
63 | "hendrycksTest-high_school_chemistry": {
64 | "acc": 0.2413793103448276,
65 | "acc_norm": 0.28078817733990147
66 | },
67 | "hendrycksTest-astronomy": {
68 | "acc": 0.3355263157894737,
69 | "acc_norm": 0.3355263157894737
70 | },
71 | "hendrycksTest-medical_genetics": {
72 | "acc": 0.39,
73 | "acc_norm": 0.4
74 | },
75 | "hendrycksTest-nutrition": {
76 | "acc": 0.35947712418300654,
77 | "acc_norm": 0.3758169934640523
78 | },
79 | "hendrycksTest-moral_disputes": {
80 | "acc": 0.3554913294797688,
81 | "acc_norm": 0.33815028901734107
82 | },
83 | "hendrycksTest-computer_security": {
84 | "acc": 0.38,
85 | "acc_norm": 0.39
86 | },
87 | "hendrycksTest-anatomy": {
88 | "acc": 0.32592592592592595,
89 | "acc_norm": 0.2814814814814815
90 | },
91 | "hendrycksTest-formal_logic": {
92 | "acc": 0.30952380952380953,
93 | "acc_norm": 0.3253968253968254
94 | },
95 | "hendrycksTest-high_school_us_history": {
96 | "acc": 0.3088235294117647,
97 | "acc_norm": 0.29901960784313725
98 | },
99 | "hendrycksTest-security_studies": {
100 | "acc": 0.3224489795918367,
101 | "acc_norm": 0.23265306122448978
102 | },
103 | "hendrycksTest-high_school_mathematics": {
104 | "acc": 0.23703703703703705,
105 | "acc_norm": 0.2814814814814815
106 | },
107 | "hendrycksTest-high_school_macroeconomics": {
108 | "acc": 0.2794871794871795,
109 | "acc_norm": 0.2794871794871795
110 | },
111 | "hendrycksTest-clinical_knowledge": {
112 | "acc": 0.26037735849056604,
113 | "acc_norm": 0.32075471698113206
114 | },
115 | "hendrycksTest-us_foreign_policy": {
116 | "acc": 0.39,
117 | "acc_norm": 0.41
118 | },
119 | "hendrycksTest-virology": {
120 | "acc": 0.35542168674698793,
121 | "acc_norm": 0.3493975903614458
122 | },
123 | "hendrycksTest-public_relations": {
124 | "acc": 0.34545454545454546,
125 | "acc_norm": 0.3
126 | },
127 | "hendrycksTest-world_religions": {
128 | "acc": 0.4093567251461988,
129 | "acc_norm": 0.4619883040935672
130 | },
131 | "hendrycksTest-college_physics": {
132 | "acc": 0.3137254901960784,
133 | "acc_norm": 0.3235294117647059
134 | },
135 | "hendrycksTest-high_school_biology": {
136 | "acc": 0.267741935483871,
137 | "acc_norm": 0.2870967741935484
138 | },
139 | "hendrycksTest-business_ethics": {
140 | "acc": 0.33,
141 | "acc_norm": 0.28
142 | },
143 | "hendrycksTest-high_school_government_and_politics": {
144 | "acc": 0.30569948186528495,
145 | "acc_norm": 0.2849740932642487
146 | },
147 | "hendrycksTest-high_school_world_history": {
148 | "acc": 0.2742616033755274,
149 | "acc_norm": 0.27848101265822783
150 | },
151 | "hendrycksTest-jurisprudence": {
152 | "acc": 0.23148148148148148,
153 | "acc_norm": 0.24074074074074073
154 | },
155 | "hendrycksTest-miscellaneous": {
156 | "acc": 0.4227330779054917,
157 | "acc_norm": 0.41762452107279696
158 | },
159 | "hendrycksTest-marketing": {
160 | "acc": 0.3974358974358974,
161 | "acc_norm": 0.39316239316239315
162 | },
163 | "hendrycksTest-high_school_microeconomics": {
164 | "acc": 0.29411764705882354,
165 | "acc_norm": 0.3445378151260504
166 | },
167 | "hendrycksTest-econometrics": {
168 | "acc": 0.2543859649122807,
169 | "acc_norm": 0.24561403508771928
170 | },
171 | "hendrycksTest-conceptual_physics": {
172 | "acc": 0.3276595744680851,
173 | "acc_norm": 0.28936170212765955
174 | },
175 | "hendrycksTest-high_school_statistics": {
176 | "acc": 0.3287037037037037,
177 | "acc_norm": 0.33796296296296297
178 | },
179 | "hendrycksTest-sociology": {
180 | "acc": 0.3333333333333333,
181 | "acc_norm": 0.31343283582089554
182 | },
183 | "hendrycksTest-electrical_engineering": {
184 | "acc": 0.2689655172413793,
185 | "acc_norm": 0.25517241379310346
186 | },
187 | "hendrycksTest-elementary_mathematics": {
188 | "acc": 0.335978835978836,
189 | "acc_norm": 0.328042328042328
190 | },
191 | "hendrycksTest-high_school_geography": {
192 | "acc": 0.3181818181818182,
193 | "acc_norm": 0.3333333333333333
194 | },
195 | "hendrycksTest-philosophy": {
196 | "acc": 0.31511254019292606,
197 | "acc_norm": 0.3247588424437299
198 | },
199 | "hendrycksTest-moral_scenarios": {
200 | "acc": 0.26927374301675977,
201 | "acc_norm": 0.27150837988826815
202 | },
203 | "hendrycksTest-college_chemistry": {
204 | "acc": 0.34,
205 | "acc_norm": 0.36
206 | },
207 | "hendrycksTest-machine_learning": {
208 | "acc": 0.2767857142857143,
209 | "acc_norm": 0.2767857142857143
210 | },
211 | "hendrycksTest-professional_accounting": {
212 | "acc": 0.29432624113475175,
213 | "acc_norm": 0.3049645390070922
214 | },
215 | "hendrycksTest-professional_law": {
216 | "acc": 0.25488917861799215,
217 | "acc_norm": 0.258148631029987
218 | },
219 | "hendrycksTest-college_medicine": {
220 | "acc": 0.35260115606936415,
221 | "acc_norm": 0.3468208092485549
222 | },
223 | "hendrycksTest-global_facts": {
224 | "acc": 0.26,
225 | "acc_norm": 0.21
226 | },
227 | "hendrycksTest-human_aging": {
228 | "acc": 0.28699551569506726,
229 | "acc_norm": 0.2556053811659193
230 | }
231 | },
232 | "versions": {
233 | "hendrycksTest-professional_psychology": 0,
234 | "hendrycksTest-abstract_algebra": 0,
235 | "hendrycksTest-prehistory": 0,
236 | "hendrycksTest-international_law": 0,
237 | "hendrycksTest-logical_fallacies": 0,
238 | "hendrycksTest-professional_medicine": 0,
239 | "hendrycksTest-high_school_european_history": 0,
240 | "hendrycksTest-high_school_physics": 0,
241 | "hendrycksTest-management": 0,
242 | "hendrycksTest-college_mathematics": 0,
243 | "hendrycksTest-college_computer_science": 0,
244 | "hendrycksTest-human_sexuality": 0,
245 | "hendrycksTest-college_biology": 0,
246 | "hendrycksTest-high_school_computer_science": 0,
247 | "hendrycksTest-high_school_psychology": 0,
248 | "hendrycksTest-high_school_chemistry": 0,
249 | "hendrycksTest-astronomy": 0,
250 | "hendrycksTest-medical_genetics": 0,
251 | "hendrycksTest-nutrition": 0,
252 | "hendrycksTest-moral_disputes": 0,
253 | "hendrycksTest-computer_security": 0,
254 | "hendrycksTest-anatomy": 0,
255 | "hendrycksTest-formal_logic": 0,
256 | "hendrycksTest-high_school_us_history": 0,
257 | "hendrycksTest-security_studies": 0,
258 | "hendrycksTest-high_school_mathematics": 0,
259 | "hendrycksTest-high_school_macroeconomics": 0,
260 | "hendrycksTest-clinical_knowledge": 0,
261 | "hendrycksTest-us_foreign_policy": 0,
262 | "hendrycksTest-virology": 0,
263 | "hendrycksTest-public_relations": 0,
264 | "hendrycksTest-world_religions": 0,
265 | "hendrycksTest-college_physics": 0,
266 | "hendrycksTest-high_school_biology": 0,
267 | "hendrycksTest-business_ethics": 0,
268 | "hendrycksTest-high_school_government_and_politics": 0,
269 | "hendrycksTest-high_school_world_history": 0,
270 | "hendrycksTest-jurisprudence": 0,
271 | "hendrycksTest-miscellaneous": 0,
272 | "hendrycksTest-marketing": 0,
273 | "hendrycksTest-high_school_microeconomics": 0,
274 | "hendrycksTest-econometrics": 0,
275 | "hendrycksTest-conceptual_physics": 0,
276 | "hendrycksTest-high_school_statistics": 0,
277 | "hendrycksTest-sociology": 0,
278 | "hendrycksTest-electrical_engineering": 0,
279 | "hendrycksTest-elementary_mathematics": 0,
280 | "hendrycksTest-high_school_geography": 0,
281 | "hendrycksTest-philosophy": 0,
282 | "hendrycksTest-moral_scenarios": 0,
283 | "hendrycksTest-college_chemistry": 0,
284 | "hendrycksTest-machine_learning": 0,
285 | "hendrycksTest-professional_accounting": 0,
286 | "hendrycksTest-professional_law": 0,
287 | "hendrycksTest-college_medicine": 0,
288 | "hendrycksTest-global_facts": 0,
289 | "hendrycksTest-human_aging": 0
290 | },
291 | "config": {
292 | "model": "stabilityai/stablelm-base-alpha-3b-v2",
293 | "num_fewshot": 5,
294 | "batch_size": 8,
295 | "device": "cuda:0",
296 | "no_cache": true,
297 | "limit": null,
298 | "bootstrap_iters": 10000,
299 | "description_dict": null
300 | }
301 | }
--------------------------------------------------------------------------------
/evals/open_llm_leaderboard/stablelm-beta-3b-v2-truthfulqa_mc.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "truthfulqa_mc": {
4 | "mc1": 0.22399020807833536,
5 | "mc2": 0.35873137294835583
6 | }
7 | },
8 | "versions": {
9 | "truthfulqa_mc": 1
10 | },
11 | "config": {
12 | "model": "stabilityai/stablelm-base-alpha-3b-v2",
13 | "num_fewshot": 0,
14 | "batch_size": 8,
15 | "device": "cuda:0",
16 | "no_cache": true,
17 | "limit": null,
18 | "bootstrap_iters": 10000,
19 | "description_dict": null
20 | }
21 | }
--------------------------------------------------------------------------------
/evals/open_llm_leaderboard/stablelm-beta-7b-v2-arc-challenge.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.431740614334471,
5 | "acc_norm": 0.4726962457337884
6 | }
7 | },
8 | "versions": {
9 | "arc_challenge": 0
10 | },
11 | "config": {
12 | "model": "stabilityai/stablelm-base-alpha-7b-v2",
13 | "num_fewshot": 25,
14 | "batch_size": 8,
15 | "device": "cuda:0",
16 | "no_cache": true,
17 | "limit": null,
18 | "bootstrap_iters": 10000,
19 | "description_dict": null
20 | }
21 | }
--------------------------------------------------------------------------------
/evals/open_llm_leaderboard/stablelm-beta-7b-v2-hellaswag.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "hellaswag": {
4 | "acc": 0.5721967735510854,
5 | "acc_norm": 0.7706632144991038
6 | }
7 | },
8 | "versions": {
9 | "hellaswag": 0
10 | },
11 | "config": {
12 | "model": "stabilityai/stablelm-base-alpha-7b-v2",
13 | "num_fewshot": 10,
14 | "batch_size": 8,
15 | "device": "cuda:0",
16 | "no_cache": true,
17 | "limit": null,
18 | "bootstrap_iters": 10000,
19 | "description_dict": null
20 | }
21 | }
--------------------------------------------------------------------------------
/evals/open_llm_leaderboard/stablelm-beta-7b-v2-mmmlu.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "hendrycksTest-high_school_european_history": {
4 | "acc": 0.48484848484848486,
5 | "acc_norm": 0.47878787878787876
6 | },
7 | "hendrycksTest-college_chemistry": {
8 | "acc": 0.29,
9 | "acc_norm": 0.28
10 | },
11 | "hendrycksTest-international_law": {
12 | "acc": 0.38016528925619836,
13 | "acc_norm": 0.4214876033057851
14 | },
15 | "hendrycksTest-high_school_macroeconomics": {
16 | "acc": 0.36153846153846153,
17 | "acc_norm": 0.35128205128205126
18 | },
19 | "hendrycksTest-miscellaneous": {
20 | "acc": 0.6513409961685823,
21 | "acc_norm": 0.644955300127714
22 | },
23 | "hendrycksTest-professional_law": {
24 | "acc": 0.31877444589308995,
25 | "acc_norm": 0.3116036505867014
26 | },
27 | "hendrycksTest-medical_genetics": {
28 | "acc": 0.51,
29 | "acc_norm": 0.55
30 | },
31 | "hendrycksTest-high_school_world_history": {
32 | "acc": 0.3881856540084388,
33 | "acc_norm": 0.4050632911392405
34 | },
35 | "hendrycksTest-professional_medicine": {
36 | "acc": 0.4742647058823529,
37 | "acc_norm": 0.44485294117647056
38 | },
39 | "hendrycksTest-moral_disputes": {
40 | "acc": 0.42196531791907516,
41 | "acc_norm": 0.430635838150289
42 | },
43 | "hendrycksTest-high_school_geography": {
44 | "acc": 0.5404040404040404,
45 | "acc_norm": 0.5454545454545454
46 | },
47 | "hendrycksTest-high_school_microeconomics": {
48 | "acc": 0.42857142857142855,
49 | "acc_norm": 0.42436974789915966
50 | },
51 | "hendrycksTest-machine_learning": {
52 | "acc": 0.32142857142857145,
53 | "acc_norm": 0.32142857142857145
54 | },
55 | "hendrycksTest-security_studies": {
56 | "acc": 0.3510204081632653,
57 | "acc_norm": 0.2612244897959184
58 | },
59 | "hendrycksTest-world_religions": {
60 | "acc": 0.6608187134502924,
61 | "acc_norm": 0.695906432748538
62 | },
63 | "hendrycksTest-conceptual_physics": {
64 | "acc": 0.40425531914893614,
65 | "acc_norm": 0.3659574468085106
66 | },
67 | "hendrycksTest-high_school_physics": {
68 | "acc": 0.2582781456953642,
69 | "acc_norm": 0.2980132450331126
70 | },
71 | "hendrycksTest-nutrition": {
72 | "acc": 0.43137254901960786,
73 | "acc_norm": 0.45098039215686275
74 | },
75 | "hendrycksTest-high_school_psychology": {
76 | "acc": 0.581651376146789,
77 | "acc_norm": 0.5614678899082569
78 | },
79 | "hendrycksTest-professional_accounting": {
80 | "acc": 0.2907801418439716,
81 | "acc_norm": 0.30141843971631205
82 | },
83 | "hendrycksTest-human_aging": {
84 | "acc": 0.4304932735426009,
85 | "acc_norm": 0.39461883408071746
86 | },
87 | "hendrycksTest-college_physics": {
88 | "acc": 0.35294117647058826,
89 | "acc_norm": 0.3627450980392157
90 | },
91 | "hendrycksTest-high_school_chemistry": {
92 | "acc": 0.28078817733990147,
93 | "acc_norm": 0.3054187192118227
94 | },
95 | "hendrycksTest-high_school_biology": {
96 | "acc": 0.4290322580645161,
97 | "acc_norm": 0.43548387096774194
98 | },
99 | "hendrycksTest-us_foreign_policy": {
100 | "acc": 0.57,
101 | "acc_norm": 0.53
102 | },
103 | "hendrycksTest-philosophy": {
104 | "acc": 0.4758842443729904,
105 | "acc_norm": 0.4662379421221865
106 | },
107 | "hendrycksTest-logical_fallacies": {
108 | "acc": 0.4110429447852761,
109 | "acc_norm": 0.4294478527607362
110 | },
111 | "hendrycksTest-anatomy": {
112 | "acc": 0.48148148148148145,
113 | "acc_norm": 0.4888888888888889
114 | },
115 | "hendrycksTest-jurisprudence": {
116 | "acc": 0.4351851851851852,
117 | "acc_norm": 0.5
118 | },
119 | "hendrycksTest-high_school_computer_science": {
120 | "acc": 0.41,
121 | "acc_norm": 0.46
122 | },
123 | "hendrycksTest-elementary_mathematics": {
124 | "acc": 0.32275132275132273,
125 | "acc_norm": 0.32275132275132273
126 | },
127 | "hendrycksTest-abstract_algebra": {
128 | "acc": 0.27,
129 | "acc_norm": 0.28
130 | },
131 | "hendrycksTest-prehistory": {
132 | "acc": 0.44753086419753085,
133 | "acc_norm": 0.44135802469135804
134 | },
135 | "hendrycksTest-moral_scenarios": {
136 | "acc": 0.2581005586592179,
137 | "acc_norm": 0.26033519553072626
138 | },
139 | "hendrycksTest-college_medicine": {
140 | "acc": 0.4161849710982659,
141 | "acc_norm": 0.41040462427745666
142 | },
143 | "hendrycksTest-econometrics": {
144 | "acc": 0.2982456140350877,
145 | "acc_norm": 0.2807017543859649
146 | },
147 | "hendrycksTest-human_sexuality": {
148 | "acc": 0.5038167938931297,
149 | "acc_norm": 0.4961832061068702
150 | },
151 | "hendrycksTest-management": {
152 | "acc": 0.5242718446601942,
153 | "acc_norm": 0.5339805825242718
154 | },
155 | "hendrycksTest-computer_security": {
156 | "acc": 0.57,
157 | "acc_norm": 0.54
158 | },
159 | "hendrycksTest-college_computer_science": {
160 | "acc": 0.38,
161 | "acc_norm": 0.34
162 | },
163 | "hendrycksTest-public_relations": {
164 | "acc": 0.5636363636363636,
165 | "acc_norm": 0.5454545454545454
166 | },
167 | "hendrycksTest-sociology": {
168 | "acc": 0.5522388059701493,
169 | "acc_norm": 0.5124378109452736
170 | },
171 | "hendrycksTest-global_facts": {
172 | "acc": 0.29,
173 | "acc_norm": 0.28
174 | },
175 | "hendrycksTest-astronomy": {
176 | "acc": 0.42105263157894735,
177 | "acc_norm": 0.42105263157894735
178 | },
179 | "hendrycksTest-high_school_statistics": {
180 | "acc": 0.27314814814814814,
181 | "acc_norm": 0.2916666666666667
182 | },
183 | "hendrycksTest-professional_psychology": {
184 | "acc": 0.40522875816993464,
185 | "acc_norm": 0.39052287581699346
186 | },
187 | "hendrycksTest-high_school_us_history": {
188 | "acc": 0.37745098039215685,
189 | "acc_norm": 0.39705882352941174
190 | },
191 | "hendrycksTest-business_ethics": {
192 | "acc": 0.42,
193 | "acc_norm": 0.44
194 | },
195 | "hendrycksTest-clinical_knowledge": {
196 | "acc": 0.42641509433962266,
197 | "acc_norm": 0.4226415094339623
198 | },
199 | "hendrycksTest-college_biology": {
200 | "acc": 0.4236111111111111,
201 | "acc_norm": 0.4166666666666667
202 | },
203 | "hendrycksTest-formal_logic": {
204 | "acc": 0.42063492063492064,
205 | "acc_norm": 0.40476190476190477
206 | },
207 | "hendrycksTest-marketing": {
208 | "acc": 0.6965811965811965,
209 | "acc_norm": 0.6837606837606838
210 | },
211 | "hendrycksTest-college_mathematics": {
212 | "acc": 0.23,
213 | "acc_norm": 0.29
214 | },
215 | "hendrycksTest-electrical_engineering": {
216 | "acc": 0.4827586206896552,
217 | "acc_norm": 0.4
218 | },
219 | "hendrycksTest-high_school_government_and_politics": {
220 | "acc": 0.5284974093264249,
221 | "acc_norm": 0.5492227979274611
222 | },
223 | "hendrycksTest-virology": {
224 | "acc": 0.37349397590361444,
225 | "acc_norm": 0.39759036144578314
226 | },
227 | "hendrycksTest-high_school_mathematics": {
228 | "acc": 0.2518518518518518,
229 | "acc_norm": 0.28888888888888886
230 | }
231 | },
232 | "versions": {
233 | "hendrycksTest-high_school_european_history": 0,
234 | "hendrycksTest-college_chemistry": 0,
235 | "hendrycksTest-international_law": 0,
236 | "hendrycksTest-high_school_macroeconomics": 0,
237 | "hendrycksTest-miscellaneous": 0,
238 | "hendrycksTest-professional_law": 0,
239 | "hendrycksTest-medical_genetics": 0,
240 | "hendrycksTest-high_school_world_history": 0,
241 | "hendrycksTest-professional_medicine": 0,
242 | "hendrycksTest-moral_disputes": 0,
243 | "hendrycksTest-high_school_geography": 0,
244 | "hendrycksTest-high_school_microeconomics": 0,
245 | "hendrycksTest-machine_learning": 0,
246 | "hendrycksTest-security_studies": 0,
247 | "hendrycksTest-world_religions": 0,
248 | "hendrycksTest-conceptual_physics": 0,
249 | "hendrycksTest-high_school_physics": 0,
250 | "hendrycksTest-nutrition": 0,
251 | "hendrycksTest-high_school_psychology": 0,
252 | "hendrycksTest-professional_accounting": 0,
253 | "hendrycksTest-human_aging": 0,
254 | "hendrycksTest-college_physics": 0,
255 | "hendrycksTest-high_school_chemistry": 0,
256 | "hendrycksTest-high_school_biology": 0,
257 | "hendrycksTest-us_foreign_policy": 0,
258 | "hendrycksTest-philosophy": 0,
259 | "hendrycksTest-logical_fallacies": 0,
260 | "hendrycksTest-anatomy": 0,
261 | "hendrycksTest-jurisprudence": 0,
262 | "hendrycksTest-high_school_computer_science": 0,
263 | "hendrycksTest-elementary_mathematics": 0,
264 | "hendrycksTest-abstract_algebra": 0,
265 | "hendrycksTest-prehistory": 0,
266 | "hendrycksTest-moral_scenarios": 0,
267 | "hendrycksTest-college_medicine": 0,
268 | "hendrycksTest-econometrics": 0,
269 | "hendrycksTest-human_sexuality": 0,
270 | "hendrycksTest-management": 0,
271 | "hendrycksTest-computer_security": 0,
272 | "hendrycksTest-college_computer_science": 0,
273 | "hendrycksTest-public_relations": 0,
274 | "hendrycksTest-sociology": 0,
275 | "hendrycksTest-global_facts": 0,
276 | "hendrycksTest-astronomy": 0,
277 | "hendrycksTest-high_school_statistics": 0,
278 | "hendrycksTest-professional_psychology": 0,
279 | "hendrycksTest-high_school_us_history": 0,
280 | "hendrycksTest-business_ethics": 0,
281 | "hendrycksTest-clinical_knowledge": 0,
282 | "hendrycksTest-college_biology": 0,
283 | "hendrycksTest-formal_logic": 0,
284 | "hendrycksTest-marketing": 0,
285 | "hendrycksTest-college_mathematics": 0,
286 | "hendrycksTest-electrical_engineering": 0,
287 | "hendrycksTest-high_school_government_and_politics": 0,
288 | "hendrycksTest-virology": 0,
289 | "hendrycksTest-high_school_mathematics": 0
290 | },
291 | "config": {
292 | "model": "stabilityai/stablelm-base-alpha-7b-v2",
293 | "num_fewshot": 5,
294 | "batch_size": 8,
295 | "device": "cuda:0",
296 | "no_cache": true,
297 | "limit": null,
298 | "bootstrap_iters": 10000,
299 | "description_dict": null
300 | }
301 | }
--------------------------------------------------------------------------------
/evals/open_llm_leaderboard/stablelm-beta-7b-v2-truthfulqa_mc.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "truthfulqa_mc": {
4 | "mc1": 0.2423500611995104,
5 | "mc2": 0.3645856452722409
6 | }
7 | },
8 | "versions": {
9 | "truthfulqa_mc": 1
10 | },
11 | "config": {
12 | "model": "stabilityai/stablelm-base-alpha-7b-v2",
13 | "num_fewshot": 0,
14 | "batch_size": 8,
15 | "device": "cuda:0",
16 | "no_cache": true,
17 | "limit": null,
18 | "bootstrap_iters": 10000,
19 | "description_dict": null
20 | }
21 | }
--------------------------------------------------------------------------------
/evals/stablelm-3b-4e1t.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.3779863481228669,
5 | "acc_stderr": 0.014169664520303103,
6 | "acc_norm": 0.40017064846416384,
7 | "acc_norm_stderr": 0.014317197787809169
8 | },
9 | "arc_easy": {
10 | "acc": 0.7247474747474747,
11 | "acc_stderr": 0.009164888895174743,
12 | "acc_norm": 0.6771885521885522,
13 | "acc_norm_stderr": 0.009593950220366743
14 | },
15 | "boolq": {
16 | "acc": 0.7562691131498471,
17 | "acc_stderr": 0.007509067459407977
18 | },
19 | "hellaswag": {
20 | "acc": 0.5482971519617607,
21 | "acc_stderr": 0.004966448380104203,
22 | "acc_norm": 0.7389962158932484,
23 | "acc_norm_stderr": 0.004382844128643414
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.827229437157901,
27 | "ppl_stderr": 0.07919367133146168,
28 | "acc": 0.7063846303124394,
29 | "acc_stderr": 0.006344860619678723
30 | },
31 | "openbookqa": {
32 | "acc": 0.314,
33 | "acc_stderr": 0.020776701920308997,
34 | "acc_norm": 0.398,
35 | "acc_norm_stderr": 0.02191237788577997
36 | },
37 | "piqa": {
38 | "acc": 0.7921653971708379,
39 | "acc_stderr": 0.009466997964536423,
40 | "acc_norm": 0.7976060935799782,
41 | "acc_norm_stderr": 0.00937428968280767
42 | },
43 | "sciq": {
44 | "acc": 0.948,
45 | "acc_stderr": 0.007024624213817142,
46 | "acc_norm": 0.919,
47 | "acc_norm_stderr": 0.00863212103213998
48 | },
49 | "siqa": {
50 | "acc": 0.41914022517911975,
51 | "acc_stderr": 0.011165140708170328,
52 | "acc_norm": 0.44319344933469806,
53 | "acc_norm_stderr": 0.011240812731564952
54 | },
55 | "winogrande": {
56 | "acc": 0.665351223362273,
57 | "acc_stderr": 0.013261823629558375
58 | }
59 | },
60 | "versions": {
61 | "arc_challenge": 0,
62 | "arc_easy": 0,
63 | "boolq": 1,
64 | "hellaswag": 0,
65 | "lambada_openai": 0,
66 | "openbookqa": 0,
67 | "piqa": 0,
68 | "sciq": 0,
69 | "siqa": 0,
70 | "winogrande": 0
71 | },
72 | "config": {
73 | "model": "gpt2",
74 | "model_args": "pretrained=stablelm-3b-4e1t,dtype=bfloat16,trust_remote_code=True,low_cpu_mem_usage=True,use_fast=True",
75 | "num_fewshot": 0,
76 | "batch_size": "8",
77 | "batch_sizes": [],
78 | "device": "cuda",
79 | "no_cache": true,
80 | "limit": null,
81 | "bootstrap_iters": 100000,
82 | "description_dict": {}
83 | }
84 | }
--------------------------------------------------------------------------------
/evals/stablelm-base-alpha-3b-v2.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.3242320819112628,
5 | "acc_stderr": 0.013678810399518815,
6 | "acc_norm": 0.3506825938566553,
7 | "acc_norm_stderr": 0.013944635930726085
8 | },
9 | "arc_easy": {
10 | "acc": 0.6725589225589226,
11 | "acc_stderr": 0.009629415859100609,
12 | "acc_norm": 0.6325757575757576,
13 | "acc_norm_stderr": 0.009892552616211548
14 | },
15 | "boolq": {
16 | "acc": 0.645565749235474,
17 | "acc_stderr": 0.008366245832688784
18 | },
19 | "hellaswag": {
20 | "acc": 0.5105556662019518,
21 | "acc_stderr": 0.004988669343786956,
22 | "acc_norm": 0.6858195578570006,
23 | "acc_norm_stderr": 0.004632399677490817
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.995184075578421,
27 | "ppl_stderr": 0.08502310534019014,
28 | "acc": 0.7025033960799534,
29 | "acc_stderr": 0.006369088639380684
30 | },
31 | "openbookqa": {
32 | "acc": 0.264,
33 | "acc_stderr": 0.019732885585922098,
34 | "acc_norm": 0.386,
35 | "acc_norm_stderr": 0.021793529219281165
36 | },
37 | "piqa": {
38 | "acc": 0.7600652883569097,
39 | "acc_stderr": 0.009963625892809545,
40 | "acc_norm": 0.780195865070729,
41 | "acc_norm_stderr": 0.00966195861665176
42 | },
43 | "sciq": {
44 | "acc": 0.921,
45 | "acc_stderr": 0.008534156773333438,
46 | "acc_norm": 0.868,
47 | "acc_norm_stderr": 0.01070937396352803
48 | },
49 | "siqa": {
50 | "acc": 0.4247697031729785,
51 | "acc_stderr": 0.011185271257671346,
52 | "acc_norm": 0.4600818833162743,
53 | "acc_norm_stderr": 0.011277955967920396
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.22399020807833536,
57 | "mc1_stderr": 0.014594964329474202,
58 | "mc2": 0.35868737415331753,
59 | "mc2_stderr": 0.013670666454421172
60 | },
61 | "winogrande": {
62 | "acc": 0.6211523283346487,
63 | "acc_stderr": 0.013633724603180328
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=True,pretrained=stabilityai/stablelm-base-alpha-3b-v2,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
82 | "num_fewshot": 0,
83 | "batch_size": "8",
84 | "batch_sizes": [],
85 | "device": "cuda:2",
86 | "no_cache": false,
87 | "limit": null,
88 | "bootstrap_iters": 100000,
89 | "description_dict": {}
90 | }
91 | }
--------------------------------------------------------------------------------
/evals/stablelm-base-alpha-3b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.2363481228668942,
5 | "acc_stderr": 0.012414960524301829,
6 | "acc_norm": 0.257679180887372,
7 | "acc_norm_stderr": 0.012780770562768407
8 | },
9 | "arc_easy": {
10 | "acc": 0.4473905723905724,
11 | "acc_stderr": 0.010202832385415646,
12 | "acc_norm": 0.42045454545454547,
13 | "acc_norm_stderr": 0.010129114278546535
14 | },
15 | "boolq": {
16 | "acc": 0.5764525993883792,
17 | "acc_stderr": 0.008642220663071512
18 | },
19 | "hellaswag": {
20 | "acc": 0.329416450906194,
21 | "acc_stderr": 0.004690407826933909,
22 | "acc_norm": 0.38309101772555265,
23 | "acc_norm_stderr": 0.004851466623601455
24 | },
25 | "lambada_openai": {
26 | "ppl": 20.187359473367042,
27 | "ppl_stderr": 0.7391414436494796,
28 | "acc": 0.4172326799922375,
29 | "acc_stderr": 0.006869874864639983
30 | },
31 | "openbookqa": {
32 | "acc": 0.17,
33 | "acc_stderr": 0.016815633531393422,
34 | "acc_norm": 0.294,
35 | "acc_norm_stderr": 0.020395095484936614
36 | },
37 | "sciq": {
38 | "acc": 0.717,
39 | "acc_stderr": 0.01425181090648174,
40 | "acc_norm": 0.649,
41 | "acc_norm_stderr": 0.015100563798316403
42 | },
43 | "siqa": {
44 | "acc": 0.3561924257932446,
45 | "acc_stderr": 0.010836006561369118,
46 | "acc_norm": 0.4094165813715456,
47 | "acc_norm_stderr": 0.01112684957658903
48 | },
49 | "piqa": {
50 | "acc": 0.6381936887921654,
51 | "acc_stderr": 0.011211397313020371,
52 | "acc_norm": 0.6273122959738846,
53 | "acc_norm_stderr": 0.01128131833289774
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.22399020807833536,
57 | "mc1_stderr": 0.014594964329474205,
58 | "mc2": 0.4052844601694033,
59 | "mc2_stderr": 0.014547007787950397
60 | },
61 | "winogrande": {
62 | "acc": 0.526440410418311,
63 | "acc_stderr": 0.014032823874407229
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=True,pretrained=stabilityai/stablelm-base-alpha-3b,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
82 | "num_fewshot": 0,
83 | "batch_size": "8",
84 | "batch_sizes": [],
85 | "device": null,
86 | "no_cache": true,
87 | "limit": null,
88 | "bootstrap_iters": 100000,
89 | "description_dict": {}
90 | }
91 | }
--------------------------------------------------------------------------------
/evals/stablelm-base-alpha-7b-v2.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.3848122866894198,
5 | "acc_stderr": 0.014218371065251107,
6 | "acc_norm": 0.4052901023890785,
7 | "acc_norm_stderr": 0.014346869060229334
8 | },
9 | "arc_easy": {
10 | "acc": 0.7319023569023569,
11 | "acc_stderr": 0.009089526578213694,
12 | "acc_norm": 0.6910774410774411,
13 | "acc_norm_stderr": 0.009481048387761353
14 | },
15 | "boolq": {
16 | "acc": 0.7030581039755351,
17 | "acc_stderr": 0.007991418738281637
18 | },
19 | "hellaswag": {
20 | "acc": 0.5553674566819359,
21 | "acc_stderr": 0.004959094146471525,
22 | "acc_norm": 0.7426807408882693,
23 | "acc_norm_stderr": 0.004362633637374482
24 | },
25 | "lambada_openai": {
26 | "ppl": 3.366151815984524,
27 | "ppl_stderr": 0.06573496070440447,
28 | "acc": 0.7418979235396856,
29 | "acc_stderr": 0.006096490478492321
30 | },
31 | "openbookqa": {
32 | "acc": 0.304,
33 | "acc_stderr": 0.020591649571224925,
34 | "acc_norm": 0.418,
35 | "acc_norm_stderr": 0.022080014812228134
36 | },
37 | "piqa": {
38 | "acc": 0.7845484221980413,
39 | "acc_stderr": 0.009592463115658117,
40 | "acc_norm": 0.8019586507072906,
41 | "acc_norm_stderr": 0.009298209954776726
42 | },
43 | "sciq": {
44 | "acc": 0.939,
45 | "acc_stderr": 0.007572076091557423,
46 | "acc_norm": 0.917,
47 | "acc_norm_stderr": 0.00872852720607479
48 | },
49 | "siqa": {
50 | "acc": 0.4242579324462641,
51 | "acc_stderr": 0.011183502662341787,
52 | "acc_norm": 0.4692937563971341,
53 | "acc_norm_stderr": 0.011292714928103489
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.2423500611995104,
57 | "mc1_stderr": 0.01500067437357034,
58 | "mc2": 0.36457253410937596,
59 | "mc2_stderr": 0.0135567172376818
60 | },
61 | "winogrande": {
62 | "acc": 0.6882399368587214,
63 | "acc_stderr": 0.013018571197638537
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=True,pretrained=stabilityai/stablelm-base-alpha-7b-v2,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
82 | "num_fewshot": 0,
83 | "batch_size": "2",
84 | "batch_sizes": [],
85 | "device": "cuda:2",
86 | "no_cache": false,
87 | "limit": null,
88 | "bootstrap_iters": 100000,
89 | "description_dict": {}
90 | }
91 | }
--------------------------------------------------------------------------------
/evals/stablelm-base-alpha-7b.json:
--------------------------------------------------------------------------------
1 | {
2 | "results": {
3 | "arc_challenge": {
4 | "acc": 0.24829351535836178,
5 | "acc_stderr": 0.012624912868089755,
6 | "acc_norm": 0.27047781569965873,
7 | "acc_norm_stderr": 0.012980954547659556
8 | },
9 | "arc_easy": {
10 | "acc": 0.5054713804713805,
11 | "acc_stderr": 0.01025916922861504,
12 | "acc_norm": 0.44865319865319864,
13 | "acc_norm_stderr": 0.010205540414612876
14 | },
15 | "boolq": {
16 | "acc": 0.600611620795107,
17 | "acc_stderr": 0.008566178448007831
18 | },
19 | "hellaswag": {
20 | "acc": 0.3467436765584545,
21 | "acc_stderr": 0.004749606196363328,
22 | "acc_norm": 0.41216889065923124,
23 | "acc_norm_stderr": 0.004912192800263316
24 | },
25 | "lambada_openai": {
26 | "ppl": 9.459674222745228,
27 | "ppl_stderr": 0.33183598007493614,
28 | "acc": 0.5511352610130021,
29 | "acc_stderr": 0.006929452414790843
30 | },
31 | "openbookqa": {
32 | "acc": 0.214,
33 | "acc_stderr": 0.018359797502387025,
34 | "acc_norm": 0.32,
35 | "acc_norm_stderr": 0.02088234048876181
36 | },
37 | "piqa": {
38 | "acc": 0.6675734494015234,
39 | "acc_stderr": 0.010991141557445587,
40 | "acc_norm": 0.6735582154515778,
41 | "acc_norm_stderr": 0.010940467046177304
42 | },
43 | "sciq": {
44 | "acc": 0.801,
45 | "acc_stderr": 0.012631649083099177,
46 | "acc_norm": 0.697,
47 | "acc_norm_stderr": 0.014539683710535257
48 | },
49 | "siqa": {
50 | "acc": 0.394575230296827,
51 | "acc_stderr": 0.011059713589720797,
52 | "acc_norm": 0.4140225179119754,
53 | "acc_norm_stderr": 0.011145545345176117
54 | },
55 | "truthfulqa_mc": {
56 | "mc1": 0.23745410036719705,
57 | "mc1_stderr": 0.014896277441041836,
58 | "mc2": 0.3995908363542637,
59 | "mc2_stderr": 0.014371652685680641
60 | },
61 | "winogrande": {
62 | "acc": 0.5011838989739542,
63 | "acc_stderr": 0.014052446290529012
64 | }
65 | },
66 | "versions": {
67 | "arc_challenge": 0,
68 | "arc_easy": 0,
69 | "boolq": 1,
70 | "hellaswag": 0,
71 | "lambada_openai": 0,
72 | "openbookqa": 0,
73 | "piqa": 0,
74 | "sciq": 0,
75 | "siqa": 0,
76 | "truthfulqa_mc": 1,
77 | "winogrande": 0
78 | },
79 | "config": {
80 | "model": "gpt2",
81 | "model_args": "use_fast=True,pretrained=stabilityai/stablelm-base-alpha-7b,trust_remote_code=True,low_cpu_mem_usage=True,dtype=auto",
82 | "num_fewshot": 0,
83 | "batch_size": "2",
84 | "batch_sizes": [],
85 | "device": "cuda:1",
86 | "no_cache": true,
87 | "limit": null,
88 | "bootstrap_iters": 100000,
89 | "description_dict": {}
90 | }
91 | }
--------------------------------------------------------------------------------
/notebooks/stablelm-alpha.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "4weyZUFfgUlD"
7 | },
8 | "source": [
9 | "# StableLM-Alpha\n",
10 | "\n",
11 | "[](https://colab.research.google.com/github/Stability-AI/StableLM//blob/main/notebooks/stablelm-alpha.ipynb)\n",
12 | "\n",
13 | "
\n",
14 | "\n",
15 | "This notebook is designed to let you quickly generate text with the latest StableLM models (**StableLM-Alpha**) using Hugging Face's `transformers` library."
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": null,
21 | "metadata": {
22 | "id": "8xicyuk_Ezuw"
23 | },
24 | "outputs": [],
25 | "source": [
26 | "!nvidia-smi"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {
33 | "id": "V1Da2YDX71IF"
34 | },
35 | "outputs": [],
36 | "source": [
37 | "!pip install -U pip\n",
38 | "!pip install accelerate bitsandbytes torch transformers"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 3,
44 | "metadata": {
45 | "cellView": "form",
46 | "id": "sSifeGXKlIgY"
47 | },
48 | "outputs": [],
49 | "source": [
50 | "#@title Setup\n",
51 | "\n",
52 | "import torch\n",
53 | "from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList\n",
54 | "\n",
55 | "from IPython.display import Markdown, display\n",
56 | "def hr(): display(Markdown('---'))\n",
57 | "def cprint(msg: str, color: str = \"blue\", **kwargs) -> None:\n",
58 | " color_codes = {\n",
59 | " \"blue\": \"\\033[34m\",\n",
60 | " \"red\": \"\\033[31m\",\n",
61 | " \"green\": \"\\033[32m\",\n",
62 | " \"yellow\": \"\\033[33m\",\n",
63 | " \"purple\": \"\\033[35m\",\n",
64 | " \"cyan\": \"\\033[36m\",\n",
65 | " }\n",
66 | " \n",
67 | " if color not in color_codes:\n",
68 | " raise ValueError(f\"Invalid info color: `{color}`\")\n",
69 | " \n",
70 | " print(color_codes[color] + msg + \"\\033[0m\", **kwargs)"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "metadata": {
77 | "cellView": "form",
78 | "id": "dQZCeE-ujdzW"
79 | },
80 | "outputs": [],
81 | "source": [
82 | "#@title Pick Your Model\n",
83 | "#@markdown Refer to Hugging Face docs for more information the parameters below: https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.from_pretrained\n",
84 | "\n",
85 | "# Choose model name\n",
86 | "model_name = \"stabilityai/stablelm-tuned-alpha-7b\" #@param [\"stabilityai/stablelm-tuned-alpha-7b\", \"stabilityai/stablelm-base-alpha-7b\", \"stabilityai/stablelm-tuned-alpha-3b\", \"stabilityai/stablelm-base-alpha-3b\"]\n",
87 | "\n",
88 | "cprint(f\"Using `{model_name}`\", color=\"blue\")\n",
89 | "\n",
90 | "# Select \"big model inference\" parameters\n",
91 | "torch_dtype = \"float16\" #@param [\"float16\", \"bfloat16\", \"float\"]\n",
92 | "load_in_8bit = False #@param {type:\"boolean\"}\n",
93 | "device_map = \"auto\"\n",
94 | "\n",
95 | "cprint(f\"Loading with: `{torch_dtype=}, {load_in_8bit=}, {device_map=}`\")\n",
96 | "\n",
97 | "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
98 | "model = AutoModelForCausalLM.from_pretrained(\n",
99 | " model_name,\n",
100 | " torch_dtype=getattr(torch, torch_dtype),\n",
101 | " load_in_8bit=load_in_8bit,\n",
102 | " device_map=device_map,\n",
103 | " offload_folder=\"./offload\",\n",
104 | ")"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 35,
110 | "metadata": {
111 | "cellView": "form",
112 | "colab": {
113 | "base_uri": "https://localhost:8080/",
114 | "height": 327
115 | },
116 | "id": "P01Db-SVwtPO",
117 | "outputId": "9911dead-44b8-43e2-de73-c40857131065"
118 | },
119 | "outputs": [
120 | {
121 | "name": "stdout",
122 | "output_type": "stream",
123 | "text": [
124 | "\u001b[34mSampling with: `max_new_tokens=128, temperature=0.7, top_k=0, top_p=0.9, do_sample=True`\u001b[0m\n"
125 | ]
126 | },
127 | {
128 | "data": {
129 | "text/markdown": [
130 | "---"
131 | ],
132 | "text/plain": [
133 | ""
134 | ]
135 | },
136 | "metadata": {},
137 | "output_type": "display_data"
138 | },
139 | {
140 | "name": "stdout",
141 | "output_type": "stream",
142 | "text": [
143 | "Can you write a song about a pirate at sea? \u001b[32mSure, here's a song about a pirate at sea:\n",
144 | "\n",
145 | "Verse 1:\n",
146 | "There he was, a pirate so bold\n",
147 | "Sailing the seas, his story untold\n",
148 | "His name was Captain Jack, and he ruled the waves\n",
149 | "A legend in the seas, he conquered all his foes\n",
150 | "\n",
151 | "Chorus:\n",
152 | "Oh, Captain Jack, the pirate of the sea\n",
153 | "Your bravery and your daring, set us all free\n",
154 | "From the tyranny of the sea, you led us to glory\n",
155 | "A legend in our hearts, you'll be remembered as our story\n",
156 | "\n",
157 | "Verse 2:\n",
158 | "He sailed the\u001b[0m\n"
159 | ]
160 | }
161 | ],
162 | "source": [
163 | "#@title Generate Text\n",
164 | "#@markdown Note: The model response is colored in green\n",
165 | "\n",
166 | "class StopOnTokens(StoppingCriteria):\n",
167 | " def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:\n",
168 | " stop_ids = [50278, 50279, 50277, 1, 0]\n",
169 | " for stop_id in stop_ids:\n",
170 | " if input_ids[0][-1] == stop_id:\n",
171 | " return True\n",
172 | " return False\n",
173 | "\n",
174 | "# Process the user prompt\n",
175 | "user_prompt = \"Can you write a song about a pirate at sea?\" #@param {type:\"string\"}\n",
176 | "if \"tuned\" in model_name:\n",
177 | " # Add system prompt for chat tuned models\n",
178 | " system_prompt = \"\"\"<|SYSTEM|># StableLM Tuned (Alpha version)\n",
179 | " - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.\n",
180 | " - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n",
181 | " - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.\n",
182 | " - StableLM will refuse to participate in anything that could harm a human.\n",
183 | " \"\"\"\n",
184 | " prompt = f\"{system_prompt}<|USER|>{user_prompt}<|ASSISTANT|>\"\n",
185 | "else:\n",
186 | " prompt = user_prompt\n",
187 | "\n",
188 | "# Sampling args\n",
189 | "max_new_tokens = 128 #@param {type:\"slider\", min:32.0, max:3072.0, step:32}\n",
190 | "temperature = 0.7 #@param {type:\"slider\", min:0.0, max:1.25, step:0.05}\n",
191 | "top_k = 0 #@param {type:\"slider\", min:0.0, max:1.0, step:0.05}\n",
192 | "top_p = 0.9 #@param {type:\"slider\", min:0.0, max:1.0, step:0.05}\n",
193 | "do_sample = True #@param {type:\"boolean\"}\n",
194 | "\n",
195 | "cprint(f\"Sampling with: `{max_new_tokens=}, {temperature=}, {top_k=}, {top_p=}, {do_sample=}`\")\n",
196 | "hr()\n",
197 | "\n",
198 | "# Create `generate` inputs\n",
199 | "inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
200 | "inputs.to(model.device)\n",
201 | "\n",
202 | "# Generate\n",
203 | "tokens = model.generate(\n",
204 | " **inputs,\n",
205 | " max_new_tokens=max_new_tokens,\n",
206 | " temperature=temperature,\n",
207 | " top_k=top_k,\n",
208 | " top_p=top_p,\n",
209 | " do_sample=do_sample,\n",
210 | " pad_token_id=tokenizer.eos_token_id,\n",
211 | " stopping_criteria=StoppingCriteriaList([StopOnTokens()])\n",
212 | ")\n",
213 | "\n",
214 | "# Extract out only the completion tokens\n",
215 | "completion_tokens = tokens[0][inputs['input_ids'].size(1):]\n",
216 | "completion = tokenizer.decode(completion_tokens, skip_special_tokens=True)\n",
217 | "\n",
218 | "# Display\n",
219 | "print(user_prompt + \" \", end=\"\")\n",
220 | "cprint(completion, color=\"green\")"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "metadata": {
226 | "id": "rIZm5uwaQLa4"
227 | },
228 | "source": [
229 | "## License (Apache 2.0)\n",
230 | "\n",
231 | "Copyright (c) 2023 by [StabilityAI LTD](https://stability.ai/)\n",
232 | "\n",
233 | "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
234 | "you may not use this file except in compliance with the License.\n",
235 | "You may obtain a copy of the License at\n",
236 | "\n",
237 | " http://www.apache.org/licenses/LICENSE-2.0\n",
238 | "\n",
239 | "Unless required by applicable law or agreed to in writing, software\n",
240 | "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
241 | "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
242 | "See the License for the specific language governing permissions and\n",
243 | "limitations under the License."
244 | ]
245 | }
246 | ],
247 | "metadata": {
248 | "accelerator": "GPU",
249 | "colab": {
250 | "machine_shape": "hm",
251 | "provenance": []
252 | },
253 | "gpuClass": "standard",
254 | "kernelspec": {
255 | "display_name": "Python 3",
256 | "name": "python3"
257 | },
258 | "language_info": {
259 | "name": "python"
260 | }
261 | },
262 | "nbformat": 4,
263 | "nbformat_minor": 0
264 | }
265 |
--------------------------------------------------------------------------------