├── .gitignore
├── Fig
    ├── gt_intel.png
    └── overview.png
├── GenerationBench
    ├── GenerationTest
    │   ├── GEARLM
    │   │   ├── Simulated
    │   │   │   ├── __init__.py
    │   │   │   ├── cache_utils.py
    │   │   │   ├── compress_config.py
    │   │   │   ├── compress_function.py
    │   │   │   ├── cpp_kernels.py
    │   │   │   ├── modeling_llama_ablation.py
    │   │   │   ├── modeling_llama_new.py
    │   │   │   ├── modeling_mistral.py
    │   │   │   └── modeling_mixtral.py
    │   │   ├── TrueCompression
    │   │   │   ├── __init__.py
    │   │   │   ├── models
    │   │   │   │   ├── TrueCompressFunction.py
    │   │   │   │   ├── TrueCompressLlama.py
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── cache_utils.py
    │   │   │   │   ├── compress_config.py
    │   │   │   │   ├── compressed_cache_utils.py
    │   │   │   │   ├── cpp_kernels.py
    │   │   │   │   ├── streaming_cache_utils.py
    │   │   │   │   └── utils.py
    │   │   │   └── old_models
    │   │   │   │   ├── CompressUtils
    │   │   │   │       ├── TrueCompressFunction.py
    │   │   │   │       ├── __init__.py
    │   │   │   │       └── compress_class.py
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── modeling_llama_old.py
    │   │   └── __init__.py
    │   ├── Readme.md
    │   ├── evaluation_aqua_cot.py
    │   ├── evaluation_bbh_cot.py
    │   ├── evaluation_gsm8k.py
    │   ├── lib_prompt
    │   │   ├── aqua
    │   │   │   └── cot_prompt_8shots.txt
    │   │   ├── boolean_expressions.txt
    │   │   ├── causal_judgement.txt
    │   │   ├── date_understanding.txt
    │   │   ├── disambiguation_qa.txt
    │   │   ├── dyck_languages.txt
    │   │   ├── formal_fallacies.txt
    │   │   ├── geometric_shapes.txt
    │   │   ├── gsm8k_prompt_original.txt
    │   │   ├── hyperbaton.txt
    │   │   ├── logical_deduction_five_objects.txt
    │   │   ├── logical_deduction_seven_objects.txt
    │   │   ├── logical_deduction_three_objects.txt
    │   │   ├── mmlu-cot.json
    │   │   ├── movie_recommendation.txt
    │   │   ├── multistep_arithmetic_two.txt
    │   │   ├── navigate.txt
    │   │   ├── object_counting.txt
    │   │   ├── penguins_in_a_table.txt
    │   │   ├── prompt_8x8step.txt
    │   │   ├── reasoning_about_colored_objects.txt
    │   │   ├── ruin_names.txt
    │   │   ├── salient_translation_error_detection.txt
    │   │   ├── snarks.txt
    │   │   ├── sports_understanding.txt
    │   │   ├── temporal_sequences.txt
    │   │   ├── tracking_shuffled_objects_five_objects.txt
    │   │   ├── tracking_shuffled_objects_seven_objects.txt
    │   │   ├── tracking_shuffled_objects_three_objects.txt
    │   │   ├── web_of_lies.txt
    │   │   └── word_sorting.txt
    │   ├── run_template_llama-3-8b.sh
    │   └── run_template_llama2-13b.sh
    └── Readme.md
├── LICENSE
├── Readme.md
├── cuda_supported_gear
    ├── modeling_llama_kivi.py
    ├── modeling_llamagear.py
    ├── quant
    │   ├── __init__.py
    │   ├── csrc
    │   │   ├── gemv_cuda.cu
    │   │   ├── gemv_cuda.h
    │   │   ├── gemv_cuda_backup.cu
    │   │   └── pybind.cpp
    │   ├── gemv.py
    │   ├── matmul.py
    │   ├── new_pack.py
    │   ├── qmodule.py
    │   ├── setup.py
    │   ├── test.py
    │   ├── test1.py
    │   └── timeit_v2.py
    └── test.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | /cache
 2 | *.pyc
 3 | *.pt
 4 | 
 5 | /LoftQ/cache
 6 | /COT_hub/gsm8k/cache
 7 | /COT_hub/gsm8k/outputs
 8 | /COT_hub/gsm8k/results
 9 | /COT_hub/gsm8k/lib_prompt
10 | /COT_hub/gsm8k/kvcache_distributions
11 | /COT_hub/MMLU/outputs
12 | /COT_hub/MMLU/results
13 | /COT_hub/BBH/outputs
14 | /COT_hub/BBH/results
15 | /COT_hub/BBH/lib_prompt
16 | /COT_hub/BBH/lib_prompt_multiround_claude_instant
17 | /COT_hub/BBH/lib_prompt_multiround_claude_instant_test
18 | /tests
19 | 
20 | /GenerationBench/cache
21 | /GEARLM/build
22 | /GEARLM/dist
23 | 
24 | /GEARLM/GEARLM.egg-info
25 | 
26 | 


--------------------------------------------------------------------------------
/Fig/gt_intel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opengear-project/GEAR/efe0bfe2b5a35b22122f7cefab5cde9db13a66cd/Fig/gt_intel.png


--------------------------------------------------------------------------------
/Fig/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opengear-project/GEAR/efe0bfe2b5a35b22122f7cefab5cde9db13a66cd/Fig/overview.png


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/Simulated/__init__.py:
--------------------------------------------------------------------------------
1 | from .compress_config import CompressionConfig
2 | from .modeling_llama_new import SimulatedGearLlamaForCausalLM
3 | 
4 | # from .modeling_llama_h2o import H2OLlamaForCausalLM, LlamaConfig
5 | from .modeling_mistral import SimulatedGearMistralForCausalLM, MistralConfig
6 | # from .h2o_llama_self_written import LlamaForCausalLMH2O
7 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/Simulated/compress_config.py:
--------------------------------------------------------------------------------
  1 | class CompressionConfig(dict):
  2 |     def __init__(
  3 |         self,
  4 |         compress_method=None,
  5 |         attention_number=12,
  6 |         quantize_bit=0,
  7 |         group_num=0,
  8 |         group_size = 0,
  9 |         rank=0.0,
 10 |         rankv=0.0,
 11 |         prefill_rank = 0.0,
 12 |         prefill_rankv = 0.0,
 13 |         loop=0,
 14 |         top_k=0.0,
 15 |         left=0.0,
 16 |         stage=1,
 17 |         device_num=0,
 18 |         batch_num=1,
 19 |         start_saving=0,
 20 |         locality_saving=0,
 21 |         token_preserving=False,
 22 |         streaming=False,
 23 |         streaming_gap=0,
 24 |         stream_grouping = False,
 25 |         iter=0,
 26 |         # h2o setings
 27 |         heavy_size=0,
 28 |         recent_size=0,
 29 |     ):
 30 |         self.compress_method = compress_method
 31 |         self.quantize_bit = quantize_bit
 32 |         self.group_num = group_num
 33 |         self.group_size = group_size
 34 |         self.rank = rank
 35 |         self.rankv = rankv
 36 |         self.ranv = rankv
 37 |         self.prefill_rank = prefill_rank
 38 |         self.prefill_rankv = prefill_rankv
 39 |         self.loop = loop
 40 |         self.device_num = device_num
 41 |         self.attention_number = attention_number
 42 |         self.top_k = top_k
 43 |         self.left = left
 44 |         self.batch_num = batch_num
 45 |         self.stage = stage
 46 |         self.start_saving = start_saving
 47 |         self.locality_saving = locality_saving
 48 |         self.token_preserving = token_preserving
 49 |         self.iter = iter
 50 |         self.heavy_size = heavy_size
 51 |         self.recent_size = recent_size
 52 |         self.streaming = streaming
 53 |         self.streaming_gap = streaming_gap
 54 |         self.stream_grouping = stream_grouping
 55 | 
 56 | 
 57 |     def create_attention_config(self, config):
 58 |         attention_config = []
 59 |         for i in range(self.attention_number):
 60 |             attention_config.append(config)
 61 |         return attention_config
 62 | 
 63 |     def copy_for_all_attention(self):
 64 |         self.compress_method = self.create_attention_config(self.compress_method)
 65 |         self.quantize_bit = self.create_attention_config(self.quantize_bit)
 66 |         self.group_num = self.create_attention_config(self.group_num)
 67 |         self.rank = self.create_attention_config(self.rank)
 68 |         self.prefill_rank = self.create_attention_config(self.prefill_rank)
 69 |         self.loop = self.create_attention_config(self.loop)
 70 |         self.top_k = self.create_attention_config(self.top_k)
 71 |         self.device_num = self.create_attention_config(self.device_num)
 72 |         self.left = self.create_attention_config(self.left)
 73 |         self.stage = self.create_attention_config(self.stage)
 74 |         self.rankv = self.create_attention_config(self.rankv)
 75 |         self.prefill_rankv = self.create_attention_config(self.prefill_rankv)
 76 |         self.start_saving = self.create_attention_config(self.start_saving)
 77 |         self.locality_saving = self.create_attention_config(self.locality_saving)
 78 |         self.token_preserving = self.create_attention_config(self.token_preserving)
 79 |         self.iter = self.create_attention_config(self.iter)
 80 |         self.heavy_size = self.create_attention_config(self.heavy_size)
 81 |         self.recent_size = self.create_attention_config(self.recent_size)
 82 |         self.streaming = self.create_attention_config(self.streaming)
 83 |         self.streaming_gap = self.create_attention_config(self.streaming_gap)
 84 |         self.group_size = self.create_attention_config(self.group_size)
 85 |         self.stream_grouping = self.create_attention_config(self.stream_grouping)
 86 | 
 87 |     def compress_ratio(
 88 |         self,
 89 |         compress_method,
 90 |         seqlen,
 91 |         model_dim,
 92 |         rank=0,
 93 |         rankv=0,
 94 |         quantize_bit=0,
 95 |         top_k=0,
 96 |         left=0.0,
 97 |         stage=1,
 98 |         batch_num=1,
 99 |     ):
100 |         if compress_method == None:
101 |             return 1.0
102 |         elif compress_method == "Picache":
103 |             if seqlen > rank and seqlen > rankv:
104 |                 return (
105 |                     2
106 |                     * seqlen
107 |                     * batch_num
108 |                     * model_dim
109 |                     / (
110 |                         ((model_dim + seqlen * batch_num) * (rank + rankv))
111 |                         * quantize_bit
112 |                         / 16
113 |                     )
114 |                 )
115 |             elif seqlen <= rank:
116 |                 return (
117 |                     (
118 |                         2
119 |                         * seqlen
120 |                         * batch_num
121 |                         * model_dim
122 |                         / (
123 |                             (model_dim + seqlen * batch_num) * rankv
124 |                             + seqlen * batch_num * model_dim
125 |                         )
126 |                     )
127 |                     * 16
128 |                     / quantize_bit
129 |                 )
130 | 
131 |             elif seqlen <= rankv:
132 |                 return (
133 |                     (
134 |                         2
135 |                         * seqlen
136 |                         * batch_num
137 |                         * model_dim
138 |                         / (
139 |                             (model_dim + seqlen * batch_num) * rank
140 |                             + seqlen * batch_num * model_dim
141 |                         )
142 |                     )
143 |                     * 16
144 |                     / quantize_bit
145 |                 )
146 |         elif compress_method == "poweriteration":
147 |             return (
148 |                 seqlen
149 |                 * batch_num
150 |                 * model_dim
151 |                 / ((model_dim + seqlen * batch_num) * rank)
152 |             )
153 |         elif compress_method == "stagept":
154 |             return (
155 |                 seqlen
156 |                 * batch_num
157 |                 * model_dim
158 |                 / (model_dim * rank + seqlen * batch_num * (rank / stage))
159 |             )
160 |         elif (
161 |             compress_method == "uniformquantization"
162 |             or compress_method == "groupquantization"
163 |             or compress_method == "sortquantization"
164 |         ):
165 |             return 16 / quantize_bit
166 |         elif compress_method == "pruning":
167 |             return 1 / top_k
168 |         elif (
169 |             compress_method == "densesparseuniformquantization"
170 |             or compress_method == "densesparsesortquantization"
171 |         ):
172 |             return 1 / (quantize_bit / 16 + left)
173 |         elif compress_method == "pt+outlier":
174 |             return (
175 |                 seqlen
176 |                 * batch_num
177 |                 * model_dim
178 |                 * 16
179 |                 / quantize_bit
180 |                 / ((model_dim + seqlen * batch_num) * rank)
181 |             )
182 | 
183 |     def calculate_compress_ratio_list(self, seqlen, model_dim):
184 |         self.compress_ratio_list = []
185 |         for i, compress_method in enumerate(self.compress_method):
186 |             if compress_method == None:
187 |                 self.compress_ratio_list.append(
188 |                     self.compress_ratio(compress_method, seqlen, model_dim)
189 |                 )
190 |             elif compress_method == "Picache":
191 |                 self.compress_ratio_list.append(
192 |                     self.compress_ratio(
193 |                         compress_method,
194 |                         seqlen,
195 |                         model_dim,
196 |                         rank=self.rank[i],
197 |                         rankv=self.rankv[i],
198 |                         quantize_bit=self.quantize_bit[i],
199 |                         batch_num=self.batch_num,
200 |                         left=self.left[i],
201 |                     )
202 |                 )
203 |             elif compress_method == "poweriteration":
204 |                 self.compress_ratio_list.append(
205 |                     self.compress_ratio(
206 |                         compress_method,
207 |                         seqlen,
208 |                         model_dim,
209 |                         rank=self.rank[i],
210 |                         batch_num=self.batch_num,
211 |                     )
212 |                 )
213 |             elif compress_method == "stagept":
214 |                 self.compress_ratio_list.append(
215 |                     self.compress_ratio(
216 |                         compress_method,
217 |                         seqlen,
218 |                         model_dim,
219 |                         rank=self.rank[i],
220 |                         batch_num=self.batch_num,
221 |                         stage=self.stage[i],
222 |                     )
223 |                 )
224 |             elif (
225 |                 compress_method == "uniformquantization"
226 |                 or compress_method == "groupquantization"
227 |                 or compress_method == "sortquantization"
228 |             ):
229 |                 self.compress_ratio_list.append(
230 |                     self.compress_ratio(
231 |                         compress_method,
232 |                         seqlen,
233 |                         model_dim,
234 |                         rank=0,
235 |                         quantize_bit=self.quantize_bit[i],
236 |                     )
237 |                 )
238 |             elif compress_method == "pruning":
239 |                 self.compress_ratio_list.append(
240 |                     self.compress_ratio(
241 |                         compress_method,
242 |                         seqlen,
243 |                         model_dim,
244 |                         top_k=self.top_k[i],
245 |                     )
246 |                 )
247 |             elif compress_method == "densesparseuniformquantization":
248 |                 self.compress_ratio_list.append(
249 |                     self.compress_ratio(
250 |                         compress_method,
251 |                         seqlen,
252 |                         model_dim,
253 |                         quantize_bit=self.quantize_bit[i],
254 |                         left=self.left[i],
255 |                     )
256 |                 )
257 |             elif compress_method == "densesparsesortquantization":
258 |                 self.compress_ratio_list.append(
259 |                     self.compress_ratio(
260 |                         compress_method,
261 |                         seqlen,
262 |                         model_dim,
263 |                         quantize_bit=self.quantize_bit[i],
264 |                         left=self.left[i],
265 |                     )
266 |                 )
267 |             elif compress_method == "pt+outlier":
268 |                 self.compress_ratio_list.append(
269 |                     self.compress_ratio(
270 |                         compress_method,
271 |                         seqlen,
272 |                         model_dim,
273 |                         rank=self.rank[i],
274 |                         quantize_bit=self.quantize_bit[i],
275 |                         batch_num=self.batch_num,
276 |                         left=self.left[i],
277 |                     )
278 |                 )
279 | 
280 |     def calculate_compress_ratio_total(self):
281 |         return sum(self.compress_ratio_list) / len(self.compress_ratio_list)
282 | 
283 |     def __str__(self):
284 |         return f"compress_method:{self.compress_method},\nquantize_bit:{self.quantize_bit},\nrank:{self.rank},\nloop:{self.loop},\ndevice_num:{self.device_num},\ncompressratio:{self.compress_ratio_list},\ncompressratio_total:{self.calculate_compress_ratio_total()}"
285 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/Simulated/cpp_kernels.py:
--------------------------------------------------------------------------------
 1 | from torch.utils import cpp_extension
 2 | import pathlib
 3 | import os
 4 | import subprocess
 5 | 
 6 | 
 7 | def _get_cuda_bare_metal_version(cuda_dir):
 8 |     raw_output = subprocess.check_output(
 9 |         [cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True
10 |     )
11 |     output = raw_output.split()
12 |     release_idx = output.index("release") + 1
13 |     release = output[release_idx].split(".")
14 |     bare_metal_major = release[0]
15 |     bare_metal_minor = release[1][0]
16 | 
17 |     return raw_output, bare_metal_major, bare_metal_minor
18 | 
19 | 
20 | def _create_build_dir(buildpath):
21 |     try:
22 |         os.mkdir(buildpath)
23 |     except OSError:
24 |         if not os.path.isdir(buildpath):
25 |             print(f"Creation of the build directory {buildpath} failed")
26 | 
27 | 
28 | # Check if cuda 11 is installed for compute capability 8.0
29 | cc_flag = []
30 | _, bare_metal_major, bare_metal_minor = _get_cuda_bare_metal_version(
31 |     cpp_extension.CUDA_HOME
32 | )
33 | if int(bare_metal_major) >= 11:
34 |     cc_flag.append("-gencode")
35 |     cc_flag.append("arch=compute_80,code=sm_80")
36 |     if int(bare_metal_minor) >= 7:
37 |         cc_flag.append("-gencode")
38 |         cc_flag.append("arch=compute_90,code=sm_90")
39 | 
40 | # Build path
41 | srcpath = pathlib.Path(__file__).parent.absolute()
42 | buildpath = srcpath / "build"
43 | _create_build_dir(buildpath)
44 | 
45 | 
46 | def _cpp_extention_load_helper(name, sources, extra_cuda_flags):
47 |     return cpp_extension.load(
48 |         name=name,
49 |         sources=sources,
50 |         build_directory=buildpath,
51 |         extra_cflags=[
52 |             "-O3",
53 |         ],
54 |         extra_cuda_cflags=[
55 |             "-O3",
56 |             "-gencode",
57 |             "arch=compute_70,code=sm_70",
58 |             "--use_fast_math",
59 |         ]
60 |         + extra_cuda_flags
61 |         + cc_flag,
62 |         verbose=1,
63 |     )
64 | 
65 | 
66 | extra_flags = []
67 | 
68 | cache_autogptq_cuda_256_sources = [
69 |     "./cache_autogptq_cuda_256.cpp",
70 |     "./cache_autogptq_cuda_kernel_256.cu",
71 | ]
72 | cache_autogptq_cuda_256 = _cpp_extention_load_helper(
73 |     "cache_autogptq_cuda_256", cache_autogptq_cuda_256_sources, extra_flags
74 | )
75 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/TrueCompression/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import GearLlamaForCausalLMNew
2 | from .old_models import GearLlamaForCausalLM
3 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/TrueCompression/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .TrueCompressLlama import GearLlamaForCausalLMNew
2 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/TrueCompression/models/compress_config.py:
--------------------------------------------------------------------------------
  1 | class GPT2CompressConfig(dict):
  2 |     def __init__(
  3 |         self,
  4 |         compress_method=None,
  5 |         attention_number=12,
  6 |         quantize_bit=0,
  7 |         group_num=0,
  8 |         rank=0.0,
  9 |         rankv=0.0,
 10 |         loop=0,
 11 |         top_k=0.0,
 12 |         left=0.0,
 13 |         stage=1,
 14 |         device_num=0,
 15 |         batch_num=1,
 16 |         start_saving=0.0,
 17 |         locality_saving=0.0,
 18 |         token_preserving=False,
 19 |         streaming=False,
 20 |         streaming_gap=0,
 21 |         iter=0,
 22 |         # h2o setings
 23 |         heavy_size=0,
 24 |         recent_size=0,
 25 |     ):
 26 |         self.compress_method = compress_method
 27 |         self.quantize_bit = quantize_bit
 28 |         self.group_num = group_num
 29 |         self.rank = rank
 30 |         self.rankv = rankv
 31 |         self.ranv = rankv
 32 |         self.loop = loop
 33 |         self.device_num = device_num
 34 |         self.attention_number = attention_number
 35 |         self.top_k = top_k
 36 |         self.left = left
 37 |         self.batch_num = batch_num
 38 |         self.stage = stage
 39 |         self.start_saving = start_saving
 40 |         self.locality_saving = locality_saving
 41 |         self.token_preserving = token_preserving
 42 |         self.iter = iter
 43 |         self.heavy_size = heavy_size
 44 |         self.recent_size = recent_size
 45 |         self.streaming = streaming
 46 |         self.streaming_gap = streaming_gap
 47 | 
 48 |     def create_attention_config(self, config):
 49 |         attention_config = []
 50 |         for i in range(self.attention_number):
 51 |             attention_config.append(config)
 52 |         return attention_config
 53 | 
 54 |     def copy_for_all_attention(self):
 55 |         self.compress_method = self.create_attention_config(self.compress_method)
 56 |         self.quantize_bit = self.create_attention_config(self.quantize_bit)
 57 |         self.group_num = self.create_attention_config(self.group_num)
 58 |         self.rank = self.create_attention_config(self.rank)
 59 |         self.loop = self.create_attention_config(self.loop)
 60 |         self.top_k = self.create_attention_config(self.top_k)
 61 |         self.device_num = self.create_attention_config(self.device_num)
 62 |         self.left = self.create_attention_config(self.left)
 63 |         self.stage = self.create_attention_config(self.stage)
 64 |         self.rankv = self.create_attention_config(self.rankv)
 65 |         self.start_saving = self.create_attention_config(self.start_saving)
 66 |         self.locality_saving = self.create_attention_config(self.locality_saving)
 67 |         self.token_preserving = self.create_attention_config(self.token_preserving)
 68 |         self.iter = self.create_attention_config(self.iter)
 69 |         self.heavy_size = self.create_attention_config(self.heavy_size)
 70 |         self.recent_size = self.create_attention_config(self.recent_size)
 71 |         self.streaming = self.create_attention_config(self.streaming)
 72 |         self.streaming_gap = self.create_attention_config(self.streaming_gap)
 73 | 
 74 |     def compress_ratio(
 75 |         self,
 76 |         compress_method,
 77 |         seqlen,
 78 |         model_dim,
 79 |         rank=0,
 80 |         rankv=0,
 81 |         quantize_bit=0,
 82 |         top_k=0,
 83 |         left=0.0,
 84 |         stage=1,
 85 |         batch_num=1,
 86 |     ):
 87 |         if compress_method == None:
 88 |             return 1.0
 89 |         elif compress_method == "Picache":
 90 |             if seqlen > rank and seqlen > rankv:
 91 |                 return (
 92 |                     2
 93 |                     * seqlen
 94 |                     * batch_num
 95 |                     * model_dim
 96 |                     / (
 97 |                         ((model_dim + seqlen * batch_num) * (rank + rankv))
 98 |                         * quantize_bit
 99 |                         / 16
100 |                     )
101 |                 )
102 |             elif seqlen <= rank:
103 |                 return (
104 |                     (
105 |                         2
106 |                         * seqlen
107 |                         * batch_num
108 |                         * model_dim
109 |                         / (
110 |                             (model_dim + seqlen * batch_num) * rankv
111 |                             + seqlen * batch_num * model_dim
112 |                         )
113 |                     )
114 |                     * 16
115 |                     / quantize_bit
116 |                 )
117 | 
118 |             elif seqlen <= rankv:
119 |                 return (
120 |                     (
121 |                         2
122 |                         * seqlen
123 |                         * batch_num
124 |                         * model_dim
125 |                         / (
126 |                             (model_dim + seqlen * batch_num) * rank
127 |                             + seqlen * batch_num * model_dim
128 |                         )
129 |                     )
130 |                     * 16
131 |                     / quantize_bit
132 |                 )
133 |         elif compress_method == "poweriteration":
134 |             return (
135 |                 seqlen
136 |                 * batch_num
137 |                 * model_dim
138 |                 / ((model_dim + seqlen * batch_num) * rank)
139 |             )
140 |         elif compress_method == "stagept":
141 |             return (
142 |                 seqlen
143 |                 * batch_num
144 |                 * model_dim
145 |                 / (model_dim * rank + seqlen * batch_num * (rank / stage))
146 |             )
147 |         elif (
148 |             compress_method == "uniformquantization"
149 |             or compress_method == "groupquantization"
150 |             or compress_method == "sortquantization"
151 |         ):
152 |             return 16 / quantize_bit
153 |         elif compress_method == "pruning":
154 |             return 1 / top_k
155 |         elif (
156 |             compress_method == "densesparseuniformquantization"
157 |             or compress_method == "densesparsesortquantization"
158 |         ):
159 |             return 1 / (quantize_bit / 16 + left)
160 |         elif compress_method == "pt+outlier":
161 |             return (
162 |                 seqlen
163 |                 * batch_num
164 |                 * model_dim
165 |                 * 16
166 |                 / quantize_bit
167 |                 / ((model_dim + seqlen * batch_num) * rank)
168 |             )
169 | 
170 |     def calculate_compress_ratio_list(self, seqlen, model_dim):
171 |         self.compress_ratio_list = []
172 |         for i, compress_method in enumerate(self.compress_method):
173 |             if compress_method == None:
174 |                 self.compress_ratio_list.append(
175 |                     self.compress_ratio(compress_method, seqlen, model_dim)
176 |                 )
177 |             elif compress_method == "Picache":
178 |                 self.compress_ratio_list.append(
179 |                     self.compress_ratio(
180 |                         compress_method,
181 |                         seqlen,
182 |                         model_dim,
183 |                         rank=self.rank[i],
184 |                         rankv=self.rankv[i],
185 |                         quantize_bit=self.quantize_bit[i],
186 |                         batch_num=self.batch_num,
187 |                         left=self.left[i],
188 |                     )
189 |                 )
190 |             elif compress_method == "poweriteration":
191 |                 self.compress_ratio_list.append(
192 |                     self.compress_ratio(
193 |                         compress_method,
194 |                         seqlen,
195 |                         model_dim,
196 |                         rank=self.rank[i],
197 |                         batch_num=self.batch_num,
198 |                     )
199 |                 )
200 |             elif compress_method == "stagept":
201 |                 self.compress_ratio_list.append(
202 |                     self.compress_ratio(
203 |                         compress_method,
204 |                         seqlen,
205 |                         model_dim,
206 |                         rank=self.rank[i],
207 |                         batch_num=self.batch_num,
208 |                         stage=self.stage[i],
209 |                     )
210 |                 )
211 |             elif (
212 |                 compress_method == "uniformquantization"
213 |                 or compress_method == "groupquantization"
214 |                 or compress_method == "sortquantization"
215 |             ):
216 |                 self.compress_ratio_list.append(
217 |                     self.compress_ratio(
218 |                         compress_method,
219 |                         seqlen,
220 |                         model_dim,
221 |                         rank=0,
222 |                         quantize_bit=self.quantize_bit[i],
223 |                     )
224 |                 )
225 |             elif compress_method == "pruning":
226 |                 self.compress_ratio_list.append(
227 |                     self.compress_ratio(
228 |                         compress_method,
229 |                         seqlen,
230 |                         model_dim,
231 |                         top_k=self.top_k[i],
232 |                     )
233 |                 )
234 |             elif compress_method == "densesparseuniformquantization":
235 |                 self.compress_ratio_list.append(
236 |                     self.compress_ratio(
237 |                         compress_method,
238 |                         seqlen,
239 |                         model_dim,
240 |                         quantize_bit=self.quantize_bit[i],
241 |                         left=self.left[i],
242 |                     )
243 |                 )
244 |             elif compress_method == "densesparsesortquantization":
245 |                 self.compress_ratio_list.append(
246 |                     self.compress_ratio(
247 |                         compress_method,
248 |                         seqlen,
249 |                         model_dim,
250 |                         quantize_bit=self.quantize_bit[i],
251 |                         left=self.left[i],
252 |                     )
253 |                 )
254 |             elif compress_method == "pt+outlier":
255 |                 self.compress_ratio_list.append(
256 |                     self.compress_ratio(
257 |                         compress_method,
258 |                         seqlen,
259 |                         model_dim,
260 |                         rank=self.rank[i],
261 |                         quantize_bit=self.quantize_bit[i],
262 |                         batch_num=self.batch_num,
263 |                         left=self.left[i],
264 |                     )
265 |                 )
266 | 
267 |     def calculate_compress_ratio_total(self):
268 |         return sum(self.compress_ratio_list) / len(self.compress_ratio_list)
269 | 
270 |     def __str__(self):
271 |         return f"compress_method:{self.compress_method},\nquantize_bit:{self.quantize_bit},\nrank:{self.rank},\nloop:{self.loop},\ndevice_num:{self.device_num},\ncompressratio:{self.compress_ratio_list},\ncompressratio_total:{self.calculate_compress_ratio_total()}"
272 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/TrueCompression/models/cpp_kernels.py:
--------------------------------------------------------------------------------
 1 | from torch.utils import cpp_extension
 2 | import pathlib
 3 | import os
 4 | import subprocess
 5 | 
 6 | 
 7 | def _get_cuda_bare_metal_version(cuda_dir):
 8 |     raw_output = subprocess.check_output(
 9 |         [cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True
10 |     )
11 |     output = raw_output.split()
12 |     release_idx = output.index("release") + 1
13 |     release = output[release_idx].split(".")
14 |     bare_metal_major = release[0]
15 |     bare_metal_minor = release[1][0]
16 | 
17 |     return raw_output, bare_metal_major, bare_metal_minor
18 | 
19 | 
20 | def _create_build_dir(buildpath):
21 |     try:
22 |         os.mkdir(buildpath)
23 |     except OSError:
24 |         if not os.path.isdir(buildpath):
25 |             print(f"Creation of the build directory {buildpath} failed")
26 | 
27 | 
28 | # Check if cuda 11 is installed for compute capability 8.0
29 | cc_flag = []
30 | _, bare_metal_major, bare_metal_minor = _get_cuda_bare_metal_version(
31 |     cpp_extension.CUDA_HOME
32 | )
33 | if int(bare_metal_major) >= 11:
34 |     cc_flag.append("-gencode")
35 |     cc_flag.append("arch=compute_80,code=sm_80")
36 |     if int(bare_metal_minor) >= 7:
37 |         cc_flag.append("-gencode")
38 |         cc_flag.append("arch=compute_90,code=sm_90")
39 | 
40 | # Build path
41 | srcpath = pathlib.Path(__file__).parent.absolute()
42 | buildpath = srcpath / "build"
43 | _create_build_dir(buildpath)
44 | 
45 | 
46 | def _cpp_extention_load_helper(name, sources, extra_cuda_flags):
47 |     return cpp_extension.load(
48 |         name=name,
49 |         sources=sources,
50 |         build_directory=buildpath,
51 |         extra_cflags=[
52 |             "-O3",
53 |         ],
54 |         extra_cuda_cflags=[
55 |             "-O3",
56 |             "-gencode",
57 |             "arch=compute_70,code=sm_70",
58 |             "--use_fast_math",
59 |         ]
60 |         + extra_cuda_flags
61 |         + cc_flag,
62 |         verbose=1,
63 |     )
64 | 
65 | 
66 | extra_flags = []
67 | 
68 | cache_autogptq_cuda_256_sources = [
69 |     "./cache_autogptq_cuda_256.cpp",
70 |     "./cache_autogptq_cuda_kernel_256.cu",
71 | ]
72 | cache_autogptq_cuda_256 = _cpp_extention_load_helper(
73 |     "cache_autogptq_cuda_256", cache_autogptq_cuda_256_sources, extra_flags
74 | )
75 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/TrueCompression/models/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Tuple
 2 | 
 3 | import torch
 4 | from torch import Tensor
 5 | 
 6 | 
 7 | def get_approximate_basis(
 8 |     A: Tensor, q: int, niter: Optional[int] = 2, M: Optional[Tensor] = None
 9 | ) -> Tensor:
10 |     """Return tensor :math:`Q` with :math:`q` orthonormal columns such
11 |     that :math:`Q Q^H A` approximates :math:`A`. If :math:`M` is
12 |     specified, then :math:`Q` is such that :math:`Q Q^H (A - M)`
13 |     approximates :math:`A - M`.
14 | 
15 |     .. note:: The implementation is based on the Algorithm 4.4 from
16 |               Halko et al, 2009.
17 | 
18 |     .. note:: For an adequate approximation of a k-rank matrix
19 |               :math:`A`, where k is not known in advance but could be
20 |               estimated, the number of :math:`Q` columns, q, can be
21 |               choosen according to the following criteria: in general,
22 |               :math:`k <= q <= min(2*k, m, n)`. For large low-rank
23 |               matrices, take :math:`q = k + 5..10`.  If k is
24 |               relatively small compared to :math:`min(m, n)`, choosing
25 |               :math:`q = k + 0..2` may be sufficient.
26 | 
27 |     .. note:: To obtain repeatable results, reset the seed for the
28 |               pseudorandom number generator
29 | 
30 |     Args::
31 |         A (Tensor): the input tensor of size :math:`(*, m, n)`
32 | 
33 |         q (int): the dimension of subspace spanned by :math:`Q`
34 |                  columns.
35 | 
36 |         niter (int, optional): the number of subspace iterations to
37 |                                conduct; ``niter`` must be a
38 |                                nonnegative integer. In most cases, the
39 |                                default value 2 is more than enough.
40 | 
41 |         M (Tensor, optional): the input tensor's mean of size
42 |                               :math:`(*, 1, n)`.
43 | 
44 |     References::
45 |         - Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding
46 |           structure with randomness: probabilistic algorithms for
47 |           constructing approximate matrix decompositions,
48 |           arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
49 |           `arXiv <http://arxiv.org/abs/0909.4061>`_).
50 |     """
51 | 
52 |     niter = 2 if niter is None else niter
53 |     m, n = A.shape[-2:]
54 |     dtype = _utils.get_floating_dtype(A)
55 |     matmul = _utils.matmul
56 | 
57 |     R = torch.randn(n, q, dtype=dtype, device=A.device)
58 | 
59 |     # The following code could be made faster using torch.geqrf + torch.ormqr
60 |     # but geqrf is not differentiable
61 |     A_H = _utils.transjugate(A)
62 |     if M is None:
63 |         Q = torch.linalg.qr(matmul(A, R)).Q
64 |         for i in range(niter):
65 |             Q = torch.linalg.qr(matmul(A_H, Q)).Q
66 |             Q = torch.linalg.qr(matmul(A, Q)).Q
67 |     else:
68 |         M_H = _utils.transjugate(M)
69 |         Q = torch.linalg.qr(matmul(A, R) - matmul(M, R)).Q
70 |         for i in range(niter):
71 |             Q = torch.linalg.qr(matmul(A_H, Q) - matmul(M_H, Q)).Q
72 |             Q = torch.linalg.qr(matmul(A, Q) - matmul(M, Q)).Q
73 | 
74 |     return Q
75 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/TrueCompression/old_models/CompressUtils/__init__.py:
--------------------------------------------------------------------------------
1 | from .compress_class import CompressUnion
2 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/TrueCompression/old_models/CompressUtils/compress_class.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional, Tuple
  2 | import torch
  3 | from .TrueCompressFunction import (
  4 |     true_uniform_quantization_compress,
  5 |     true_uniform_quantization_decompress,
  6 |     true_outlier_quantization_compress,
  7 |     true_outlier_quantization_decompress,
  8 |     true_gear_compress,
  9 |     true_gear_decompress,
 10 |     true_gear_tokenwiseQ_compress,
 11 |     true_gear_tokenwiseQ_decompress,
 12 |     true_gear_tokenwiseQ_compress_nopq,
 13 |     true_gear_tokenwiseQ_decompress_nopq,
 14 | )
 15 | from .TrueCompressFunction import (
 16 |     true_uniform_quantization_compress_batchwise,
 17 |     true_uniform_quantization_decompress_batchwise,
 18 |     true_outlier_quantization_compress_batchwise,
 19 |     true_outlier_quantization_decompress_batchwise,
 20 |     true_gear_compress,
 21 |     true_gear_decompress_batchwise,
 22 |     true_gear_compress_batchwise,
 23 | )
 24 | 
 25 | compress_function = {
 26 |     "uniform": true_uniform_quantization_compress,
 27 |     "outlier": true_outlier_quantization_compress,
 28 |     "gear": true_gear_compress,
 29 |     "uniform_batch": true_uniform_quantization_compress_batchwise,
 30 |     "outlier_batch": true_outlier_quantization_compress_batchwise,
 31 |     "gear_batch": true_gear_compress_batchwise,
 32 |     "gear_tokenwiseQ": true_gear_tokenwiseQ_compress,
 33 |     "gear_tokenwiseQ_nopq": true_gear_tokenwiseQ_compress_nopq,
 34 | }
 35 | decompress_function = {
 36 |     "uniform": true_uniform_quantization_decompress,
 37 |     "outlier": true_outlier_quantization_decompress,
 38 |     "gear": true_gear_decompress,
 39 |     "uniform_batch": true_uniform_quantization_decompress_batchwise,
 40 |     "outlier_batch": true_outlier_quantization_decompress_batchwise,
 41 |     "gear_batch": true_gear_decompress_batchwise,
 42 |     "gear_tokenwiseQ": true_gear_tokenwiseQ_decompress,
 43 |     "gear_tokenwiseQ_nopq": true_gear_tokenwiseQ_decompress_nopq,
 44 | }
 45 | 
 46 | 
 47 | def detect_infnan(input_tensor, string):
 48 |     if torch.isnan(input_tensor).any():
 49 |         print(string + "has nan")
 50 |         while True:
 51 |             pass
 52 |     if torch.isinf(input_tensor).any():
 53 |         print(string + "has inf")
 54 |         while True:
 55 |             pass
 56 | 
 57 | 
 58 | class CompressUnion:
 59 |     def __init__(self, compress_kwargs: Optional[Dict[str, Any]] = None):
 60 |         self.quantize_bit = compress_kwargs["quantize_bit"]
 61 |         self.compress_mode = compress_kwargs["compress_mode"]
 62 |         self.min = None
 63 |         self.step = None
 64 |         self.min_p = None
 65 |         self.min_q = None
 66 |         self.step_p = None
 67 |         self.step_q = None
 68 |         self.left = compress_kwargs["left"]
 69 |         self.rank = compress_kwargs["rank"]
 70 |         self.loop = compress_kwargs["loop"]
 71 |         self.dtype = None
 72 |         self.shape = None
 73 |         self.shape_p = None
 74 |         self.shape_q = None
 75 |         self.quantize_part = None
 76 |         self.values = None
 77 |         self.indices = None
 78 |         self.p_base = None
 79 |         self.q_base = None
 80 |         self.counter = 0
 81 |         self.streaming_gap = compress_kwargs["streaming_gap"]
 82 |         self.buffer = None
 83 |         self.streaming = compress_kwargs["stream"]
 84 |         self.seq_length = 0
 85 |         self.input_shape = 0
 86 | 
 87 |     def compress_function(self, input_tensor: torch.Tensor):
 88 |         self.dtype = input_tensor.dtype
 89 |         # detect_infnan(input_tensor,"compress input tensor")
 90 |         if self.compress_mode == "uniform":
 91 |             output, shape, min, step = compress_function[self.compress_mode](
 92 |                 input_tensor, self.quantize_bit
 93 |             )
 94 |             self.quantize_part = output
 95 |             self.min = min
 96 |             self.step = step
 97 |             self.shape = shape
 98 |         elif self.compress_mode == "outlier":
 99 |             output, shape, min, step, values, indices = compress_function[
100 |                 self.compress_mode
101 |             ](input_tensor, self.quantize_bit, self.left)
102 |             self.quantize_part = output
103 |             self.min = min
104 |             self.step = step
105 |             self.shape = shape
106 |             self.values = values
107 |             self.indices = indices
108 |         elif self.compress_mode == "gear":
109 |             output, shape, min, step, values, indices, p_base, q_base = (
110 |                 compress_function[self.compress_mode](
111 |                     input_tensor, self.quantize_bit, self.left, self.rank, self.loop
112 |                 )
113 |             )
114 |             self.quantize_part = output
115 |             self.min = min
116 |             self.step = step
117 |             self.shape = shape
118 |             self.values = values
119 |             self.indices = indices
120 |             self.p_base = p_base
121 |             self.q_base = q_base
122 |         elif self.compress_mode == "uniform_batch":
123 |             output, shape, min, step = compress_function[self.compress_mode](
124 |                 input_tensor, self.quantize_bit
125 |             )
126 |             self.quantize_part = output
127 |             self.min = min
128 |             self.step = step
129 |             self.shape = shape
130 |         elif self.compress_mode == "outlier_batch":
131 |             output, shape, min, step, values, indices = compress_function[
132 |                 self.compress_mode
133 |             ](input_tensor, self.quantize_bit, self.left)
134 |             self.quantize_part = output
135 |             self.min = min
136 |             self.step = step
137 |             self.shape = shape
138 |             self.values = values
139 |             self.indices = indices
140 |         elif self.compress_mode == "gear_batch":
141 |             output, shape, min, step, values, indices, p_base, q_base = (
142 |                 compress_function[self.compress_mode](
143 |                     input_tensor, self.quantize_bit, self.left, self.rank, self.loop
144 |                 )
145 |             )
146 |             self.quantize_part = output
147 |             self.min = min
148 |             self.step = step
149 |             self.shape = shape
150 |             self.values = values
151 |             self.indices = indices
152 |             self.p_base = p_base
153 |             self.q_base = q_base
154 |         elif self.compress_mode == "gear_tokenwiseQ":
155 | 
156 |             (
157 |                 quantized_input,
158 |                 shape,
159 |                 min,
160 |                 step,
161 |                 p_base,
162 |                 q_base,
163 |                 shape_p,
164 |                 shape_q,
165 |                 min_p,
166 |                 min_q,
167 |                 scale_p,
168 |                 scale_q,
169 |             ) = compress_function[self.compress_mode](
170 |                 input_tensor, self.quantize_bit, self.rank, self.loop
171 |             )
172 |             self.quantize_part = quantized_input
173 |             self.min = min
174 |             self.step = step
175 |             self.shape = shape
176 |             self.p_base = p_base
177 |             self.q_base = q_base
178 |             self.shape_p = shape_p
179 |             self.shape_q = shape_q
180 |             self.min_p = min_p
181 |             self.min_q = min_q
182 |             self.step_p = scale_p
183 |             self.step_q = scale_q
184 |         elif self.compress_mode == "gear_tokenwiseQ_nopq":
185 |             quantized_input, shape, min, step, p_base, q_base = compress_function[
186 |                 self.compress_mode
187 |             ](input_tensor, self.quantize_bit, self.rank, self.loop)
188 |             self.quantize_part = quantized_input
189 |             self.min = min
190 |             self.step = step
191 |             self.shape = shape
192 |             self.p_base = p_base
193 |             self.q_base = q_base
194 |         # print("quantized_part_min_max:",self.quantize_part.min(),self.quantize_part.max(),"p_base_min_max:",self.min_p.min(),self.p_base[0].max(),"q_base_min_max:",self.min_q.min(),self.q_base[0].max())
195 |         # detect_infnan(quantized_input,"compress quantized_input tensor")
196 |         # detect_infnan(self.p_base[0],"compress p_base tensor")
197 |         # detect_infnan(self.q_base[0],"compress q_base tensor")
198 | 
199 |     def decompress_function(self):
200 |         if self.compress_mode == "uniform":
201 |             output = decompress_function[self.compress_mode](
202 |                 self.quantize_part,
203 |                 self.quantize_bit,
204 |                 self.shape,
205 |                 self.min,
206 |                 self.step,
207 |                 self.dtype,
208 |             )
209 |         elif self.compress_mode == "outlier":
210 |             output = decompress_function[self.compress_mode](
211 |                 self.quantize_part,
212 |                 self.quantize_bit,
213 |                 self.shape,
214 |                 self.min,
215 |                 self.step,
216 |                 self.dtype,
217 |                 self.values,
218 |                 self.indices,
219 |             )
220 |         elif self.compress_mode == "gear":
221 |             output = decompress_function[self.compress_mode](
222 |                 self.quantize_part,
223 |                 self.quantize_bit,
224 |                 self.shape,
225 |                 self.min,
226 |                 self.step,
227 |                 self.dtype,
228 |                 self.values,
229 |                 self.indices,
230 |                 self.p_base,
231 |                 self.q_base,
232 |             )
233 |         elif self.compress_mode == "uniform_batch":
234 |             output = decompress_function[self.compress_mode](
235 |                 self.quantize_part,
236 |                 self.quantize_bit,
237 |                 self.shape,
238 |                 self.min,
239 |                 self.step,
240 |                 self.dtype,
241 |             )
242 |         elif self.compress_mode == "outlier_batch":
243 |             output = decompress_function[self.compress_mode](
244 |                 self.quantize_part,
245 |                 self.quantize_bit,
246 |                 self.shape,
247 |                 self.min,
248 |                 self.step,
249 |                 self.dtype,
250 |                 self.values,
251 |                 self.indices,
252 |             )
253 |         elif self.compress_mode == "gear_batch":
254 |             output = decompress_function[self.compress_mode](
255 |                 self.quantize_part,
256 |                 self.quantize_bit,
257 |                 self.shape,
258 |                 self.min,
259 |                 self.step,
260 |                 self.dtype,
261 |                 self.values,
262 |                 self.indices,
263 |                 self.p_base,
264 |                 self.q_base,
265 |             )
266 |         elif self.compress_mode == "gear_tokenwiseQ":
267 |             output = decompress_function[self.compress_mode](
268 |                 self.quantize_part,
269 |                 self.quantize_bit,
270 |                 self.shape,
271 |                 self.min,
272 |                 self.step,
273 |                 self.p_base,
274 |                 self.q_base,
275 |                 self.shape_p,
276 |                 self.shape_q,
277 |                 self.min_p,
278 |                 self.min_q,
279 |                 self.step_p,
280 |                 self.step_q,
281 |                 self.dtype,
282 |             )
283 |         elif self.compress_mode == "gear_tokenwiseQ_nopq":
284 |             output = decompress_function[self.compress_mode](
285 |                 self.quantize_part,
286 |                 self.quantize_bit,
287 |                 self.shape,
288 |                 self.min,
289 |                 self.step,
290 |                 self.p_base,
291 |                 self.q_base,
292 |                 self.dtype,
293 |             )
294 |         # detect_infnan(output,"decompress")
295 |         return output
296 | 
297 |     def compress(self, input_tensor):
298 |         self.seq_length = input_tensor.shape[-2]
299 |         # print("compress",self.counter)
300 |         self.input_shape = input_tensor.shape
301 |         if self.streaming is True:
302 |             if self.counter % self.streaming_gap == 0:
303 |                 self.buffer = None
304 |                 self.compress_function(input_tensor)
305 |             else:
306 |                 extract_id = self.counter % self.streaming_gap
307 |                 self.buffer = input_tensor[:, :, -extract_id:, :].clone()
308 | 
309 |         else:
310 |             self.compress_function(input_tensor)
311 | 
312 |     def decompress(self):
313 |         # print("decompress",self.counter)
314 |         if self.streaming is True:
315 |             if self.counter % self.streaming_gap == 0:
316 |                 output = self.decompress_function()
317 |                 if self.buffer is not None:
318 |                     output = torch.cat([output, self.buffer], dim=-2)
319 | 
320 |             else:
321 |                 output = self.decompress_function()
322 | 
323 |                 output = torch.cat([output, self.buffer], dim=-2)
324 | 
325 |             self.counter += 1
326 | 
327 |         else:
328 | 
329 |             output = self.decompress_function()
330 |         # detect_infnan(output,"decompress output")
331 |         return output
332 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/TrueCompression/old_models/__init__.py:
--------------------------------------------------------------------------------
1 | from .modeling_llama_old import GearLlamaForCausalLM
2 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/GEARLM/__init__.py:
--------------------------------------------------------------------------------
 1 | from .TrueCompression import GearLlamaForCausalLMNew
 2 | from .TrueCompression import GearLlamaForCausalLM
 3 | 
 4 | from .Simulated import CompressionConfig
 5 | from .Simulated import SimulatedGearLlamaForCausalLM
 6 | 
 7 | # from .modeling_llama_h2o import H2OLlamaForCausalLM, LlamaConfig
 8 | from .Simulated import SimulatedGearMistralForCausalLM, MistralConfig
 9 | from .Simulated import LlamaForCausalLMH2O
10 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/Readme.md:
--------------------------------------------------------------------------------
1 | ## GenerationTest
2 | Llama-2-7b and Llama-2-13b and Mistral-7b with GEAR test on GSM8K, BBH abd Aqua, all with COT prompts
3 | ## Usage
4 | `evaluation_bbh_cot.py` test models on BBH-COT
5 | `evaluation_gsm8k.py` test models on GSM8K and GSM8K-COT
6 | `evaluation_aqua.py` test models on Aqua-COT
7 | 
8 | See sh files for more details.
9 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/aqua/cot_prompt_8shots.txt:
--------------------------------------------------------------------------------
  1 | Question: Three birds are flying at a fast rate of 900 kilometers per hour. What is their speed in miles per minute? [1km = 0.6 miles]
  2 | Option:
  3 | A)32400
  4 | B)6000
  5 | C)600
  6 | D)60000
  7 | E)10\nThe answer is
  8 | Let's think step by step
  9 | To calculate the equivalent of miles in a kilometer
 10 | 0.6 kilometers = 1 mile.
 11 | 900 kilometers = (0.6)*900 = 540 miles. 
 12 | Then there are 60 minutes in 1 hour. 
 13 | So speed in miles/minutes = 60 * 540 = 32400.
 14 | The answer is A). 
 15 | 
 16 | Question: A ship is leaving a port. It takes 240 seconds to passes through a 750m channel to get to the port gates, and takes 60 seconds to pass through the gates of the port. What is its length?
 17 | Option:
 18 | A)100 m
 19 | B)150 m
 20 | C)200 m
 21 | D)250 m
 22 | E)300 m
 23 | Let's think step by step
 24 | Let the length of the ship be x metres and its speed be y m/sec.
 25 | Then, x / y = 60 ⇒ y = x / 60 
 26 | Therefore (x + 750) / 240 = x / 60.
 27 | So x = 250 m.
 28 | The answer is D).
 29 | 
 30 | Question: A rectangular piece of cloth 2 feet wide was cut lengthwise into two smaller rectangular pieces. The shorter piece was one-third of the length of the longer of the 2 new pieces and had an area of 12 square feet. What was the length Q in feet of the original piece of cloth before cutting?
 31 | Option:
 32 | A)6
 33 | B)18
 34 | C)24
 35 | D)36
 36 | E)48
 37 | Let's think step by step
 38 | The question says, length of shorter piece is 1/3rd of the length of longer of the two new pieces.
 39 | This means after I split it into two pieces, I have a shorter piece and a longer one.
 40 | The length of shorter is 1/3rd the length of longer.
 41 | The 6 I found out was length of shorter piece.
 42 | Then, length of longer should be 18 and total length should be 24.
 43 | The answer is C).
 44 | 
 45 | Question: In the xy-coordinate plane, which of the following points must lie on the line kx + 2y = 6 for every possible value of k?
 46 | Option:
 47 | A)(1,1)
 48 | B)(0,3)
 49 | C)(2,0)
 50 | D)(3,6)
 51 | E)(6,3)
 52 | Let's think step by step
 53 | For a point to satisfy the given equation for all possible values of k, then x must equal 0.
 54 | When x=0, the equation is simply 2y = 6, and then y = 3.
 55 | The answer is B).
 56 | 
 57 | Question: A travel company wants to charter a plane to the Bahamas. Chartering the plane costs $5,000. So far, 12 people have signed up for the trip. If the company charges $200 per ticket, how many more passengers must sign up for the trip before the company can make any profit on the charter?
 58 | Option:
 59 | A)7
 60 | B)9
 61 | C)13
 62 | D)27
 63 | E)45
 64 | Let's think step by step
 65 | Additional passengers (i.e., an integer, let's say x) who must sign up before the company can make any profit = ?
 66 | 200 * 12 + 200 * x >= 5000\n2400 + 200x >= 5000.
 67 | Then 200x >= 2600.
 68 | And x >= 2600/200.
 69 | So x >= 13.
 70 | The answer is C).
 71 | 
 72 | Question: Kirk sells cars. On two sales, Kirk has received commissions of $850, and $705, and he has one additional sale pending. If Kirk is to receive an average (arithmetic mean) commission of exactly $820 on the three sales, then the third commission must be:
 73 | Option:
 74 | A)$895
 75 | B)$905
 76 | C)$910
 77 | D)$915
 78 | E)$945
 79 | Let's think step by step
 80 | Let X be the third value.
 81 | Then 850 + 705 + X = 820 * 3.
 82 | Therefore 1555 + X = 2460.
 83 | So X = 905.
 84 | The answer is B).
 85 | 
 86 | Question: A group of 5 friends were to contribute equally to purchase items from a store. If store offered them a promotional discount of 15%, how much did each friend pay, given a total bill of $ 1200?
 87 | Option:
 88 | A)$203
 89 | B)$202
 90 | C)$209
 91 | D)$205
 92 | E)$204
 93 | Let's think step by step
 94 | At the non-discounted price, each friend would pay $240, as $ 1200 divided by 5 friends is $240 per friend.
 95 | But if the bill is 15% off, then each friend would pay 15% less.
 96 | 15% of $240 is $36, 
 97 | So each friend saves $36 and pays the remaining $204.
 98 | The answer is E).
 99 | 
100 | Question: ABC company pays an average of $120 per vehicle each month in outdoor parking fees for three of its eight vehicles. The company pays garage parking fees for the remaining five vehicles. If ABC pays an average of $240 per vehicle overall each month for parking, how much does ABC pay per month in garage parking fees for its vehicles?
101 | Option:
102 | A)300
103 | B)420
104 | C)912
105 | D)1340
106 | E)1500
107 | Let's think step by step
108 | This means it could go out sometimes and it could stay at the garage sometimes, 
109 | thus the ABC company still has to pay fees for going-out cars in garage
110 | So the math will be 240*8 cars-3*120=1340 fee for parking in garage.
111 | The answer is D).
112 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/boolean_expressions.txt:
--------------------------------------------------------------------------------
 1 | Evaluate the result of a random Boolean expression.
 2 | 
 3 | Q: not ( ( not not True ) ) is
 4 | A: Let's think step by step.
 5 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively.
 6 | We first simplify this expression "Z" as follows: "Z = not ( ( not not True ) ) = not ( ( A ) )" where "A = not not True".
 7 | Let's evaluate A: A = not not True = not (not True) = not False = True.
 8 | Plugging in A, we get: Z = not ( ( A ) ) = not ( ( True ) ) = not True = False. So the answer is False.
 9 | 
10 | Q: True and False and not True and True is
11 | A: Let's think step by step.
12 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively.
13 | We first simplify this expression "Z" as follows: "Z = True and False and not True and True = A and B" where "A = True and False" and "B = not True and True".
14 | Let's evaluate A: A = True and False = False.
15 | Let's evaluate B: B = not True and True = not (True and True) = not (True) = False.
16 | Plugging in A and B, we get: Z = A and B = False and False = False. So the answer is False.
17 | 
18 | Q: not not ( not ( False ) ) is
19 | A: Let's think step by step.
20 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively.
21 | We first simplify this expression "Z" as follows: "Z = not not ( not ( False ) ) = not not ( A )" where "A = not ( False )".
22 | Let's evaluate A: A = not ( False ) = not False = True.
23 | Plugging in A, we get: Z = not not ( A ) = not not (True) = not not False = True. So the answer is True.


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/causal_judgement.txt:
--------------------------------------------------------------------------------
 1 | Answer questions about causal attribution.
 2 | 
 3 | Q: How would a typical person answer each of the following questions about causation?
 4 | Frank T., had an ongoing dispute with his neighbor over a stretch of land and one day decided to shoot his neighbor in the body. Frank T. had no experience with guns, his hand slipped on the barrel of the gun, and the shot went wild. Nonetheless, the bullet bounced off a large boulder several feet away and hit the neighbor's body, causing significant injury. Did Frank T. intentionally shoot his neighbor in the body?
 5 | Options:
 6 | - Yes
 7 | - No
 8 | A: Let's think step by step.
 9 | Here in this question, we are told that "Frank T. had no experience with guns, his hand slipped on the barrel of the gun, and the shot went wild." A typical person would assume that this passage suggests that Frank T. had no intention of shooting and injuring someone and that the bullet accidentally hit the neighbor's body; therefore, we conclude that Frank T. did not intentionally hit his neighbor. So the answer is No.
10 | 
11 | Q: How would a typical person answer each of the following questions about causation?
12 | Suzy and Billy are working on a project that is very important for our nation's security. The boss tells them both: "Be sure that you are here at exactly 9 am. It is absolutely essential that you arrive at that time." Both Billy and Suzy arrive at 9 am. As it happens, there was a motion detector installed in the room where they arrived. The motion detector was set up to be triggered if at least one person appeared in the room at the same time. So the motion detector went off. Did Billy cause the motion detector to go off?
13 | Options:
14 | - Yes
15 | - No
16 | A: Let's think step by step.
17 | Here in this question, we are told that the boss ordered them both to arrive at the meeting room at the same time and that the motion detector was set up to be triggered if at least one person appeared in the room at the same time." A typical person would assume that the person probably meant to say the detector was set up to be triggered if "both persons" appeared in the room at the same time, not at least one person, since otherwise the phrase "at the same time" would not make much sense in that sentence. Because the motion detector went off, a typical person would therefore come to the conclusion that both Suzy and Billy triggered the motion detector to go off; hence, Billy did indeed cause the motion detector to go off. So the answer is Yes.
18 | 
19 | Q: How would a typical person answer each of the following questions about causation?
20 | George and his sister Lena reunite at their parents' house for Thanksgiving. Whereas George just got into medical school, Lena is unhappy in her marriage and recently lost her job. Over the course of the day, George and Lena get into a number of heated arguments. Later in the afternoon they play a game of darts. They split the first two games, and the third game is close until the end. Who will win comes down to George's last shot. If he hits a high point region, he wins; if he hits a low point region, Lena wins. George thinks of the difficult time Lena is having, and he really wants to let her win. He aims the dart at the low point region. He sets up his shot and the dart lands in the low point region. After his shot, Lena wins the game and is very happy. Did George hit the low point region intentionally?
21 | Options:
22 | - Yes
23 | - No
24 | A: Let's think step by step.
25 | Here in this question, we are told that "He aims the dart at the low point region." A typical person might therefore think George did intentionally hit the low point region, because he wanted to lift up the spirit of his sister Lena. So the answer is Yes.


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/date_understanding.txt:
--------------------------------------------------------------------------------
 1 | Infer the date from context.
 2 | 
 3 | Q: Today is Christmas Eve of 1937. What is the date 10 days ago in MM/DD/YYYY?
 4 | Options:
 5 | (A) 12/14/2026
 6 | (B) 12/14/1950
 7 | (C) 12/14/2007
 8 | (D) 12/14/1937
 9 | (E) 07/14/1938
10 | (F) 12/14/1988
11 | A: Let's think step by step.
12 | If today is Christmas Eve of 1937, then today's date is December 24, 1937. 10 days before today is December 14, 1937, that is 12/14/1937. So the answer is (D).
13 | 
14 | Q: Tomorrow is 11/12/2019. What is the date one year ago from today in MM/DD/YYYY?
15 | Options:
16 | (A) 09/04/2018
17 | (B) 11/11/2018
18 | (C) 08/25/2018
19 | (D) 11/02/2018
20 | (E) 11/04/2018
21 | A: Let's think step by step.
22 | If tomorrow is 11/12/2019, then today is 11/11/2019. The date one year ago from today is 11/11/2018. So the answer is (B).
23 | 
24 | Q: Jane and John married on Jan 2, 1958. It is their 5-year anniversary today. What is the date tomorrow in MM/DD/YYYY?
25 | Options:
26 | (A) 01/11/1961
27 | (B) 01/03/1963
28 | (C) 01/18/1961
29 | (D) 10/14/1960
30 | (E) 01/03/1982
31 | (F) 12/03/1960
32 | A: Let's think step by step.
33 | If Jane and John married on Jan 2, 1958, then and if it is their 5-year anniversary today, then today's date is Jan 2, 1963. The date tomorrow is Jan 3, 1963, that is 01/03/1963. So the answer is (B).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/disambiguation_qa.txt:
--------------------------------------------------------------------------------
 1 | Clarify the meaning of sentences with ambiguous pronouns.
 2 | 
 3 | Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous.
 4 | Sentence: The chief told the counselor that they took the day off.
 5 | Options:
 6 | (A) The chief took the day off
 7 | (B) The counselor took the day off
 8 | (C) Ambiguous
 9 | A: Let's think step by step.
10 | Here we need to determine who the pronoun "they" might be referring to. There are two possible referents for "they", namely the chief and the counselor. The verb "told" might be able to help us determine which one is more likely (if either). Let X be the chief and Y the counselor. The sentence is then of the form "X told Y that (X or Y) did something."
11 | Let's consider Y first: "X told Y that Y did something." This case does not make much sense, as Y would already have the information that Y did something, because it is information about themself.
12 | Now, consider X: "X told Y that X did something." This makes sense, because X would be sharing some information about themself that Y might not have known before.
13 | Because in this context, X is the chief and Y is the counselor, the answer should be the chief. So the answer is (A).
14 | 
15 | Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous.
16 | Sentence: The manager sent a message to the secretary, but he didn't reply yet.
17 | Options:
18 | (A) The secretary didn't reply yet
19 | (B) The manager didn't reply yet
20 | (C) Ambiguous
21 | A: Let's think step by step.
22 | Here we need to determine who the pronoun "he" might be referring to. There are two possible referents for "he", namely the manager and the secretary. The verbs "sent" and "reply" might be able to help us determine which one is more likely (if either). Let X be the manager and Y the secretary. The sentence is then of the form "X sent a message to Y, but (X or Y) didn't reply yet."
23 | Let's consider Y first: "X sent a message to Y, but Y didn't reply yet." This case makes sense, because of the implicit causality of the sentence. Y was the receiver of the message, but Y didn't get back to X yet.
24 | Now, consider X: "X sent a message to Y, but X didn't reply yet." This case doesn't make sense, because X was the initial sender of the message, so it is now Y's turn to write back to X.
25 | Because in this context, X is the manager and Y is the secretary, the answer should be the secretary. So the answer is (A).
26 | 
27 | Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous.
28 | Sentence: Bailey will plan to meet the director at his office
29 | Options:
30 | (A) It will be Bailey's office
31 | (B) It will be the director's office
32 | (C) Ambiguous
33 | A: Let's think step by step.
34 | Here we need to determine who the pronoun "his" might be referring to. There are two possible referents for "his", namely Bailey's and the director's. The verb phrase "plan to meet" might be able to help us determine which one is more likely (if either). Let X be Bailey and Y the director. The sentence is then of the form "X will plan to meet Y at (X or Y)'s office."
35 | Let's consider Y first: "X will plan to meet Y at Y's office." This case makes sense, because X might want to meet up with Y at Y's office.
36 | Now, consider X: "X will plan to meet Y at X's office." This case also makes sense, because X might want to meet up with Y at X's own office.
37 | Because both X and Y are possible at the same time, we conclude that the antecedent of the pronoun is ambiguous. So the answer is (C).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/dyck_languages.txt:
--------------------------------------------------------------------------------
 1 | Correctly close a Dyck-n word.
 2 | 
 3 | Q: Complete the rest of the sequence, making sure that the parentheses are closed properly. Input: [ { [
 4 | A: Let's think step by step.
 5 | We should process each input one by one and keep track of the stack configuration.
 6 | 0: empty stack
 7 | 1: [ ; stack: [
 8 | 2: { ; stack: [ {
 9 | 3: [ ; stack: [ { [
10 | Now, we have reached the end. The final stack is "[ { [".
11 | We will need to pop out "[", "{", "[" one by one in that order.
12 | So, we need "]", "}", "]". So the answer is ] } ].
13 | 
14 | Q: Complete the rest of the sequence, making sure that the parentheses are closed properly. Input: < > ( ( [ [ ( { } ) [ < > ] ]
15 | A: Let's think step by step.
16 | We should process each input one by one and keep track of the stack configuration.
17 | 0: empty stack
18 | 1: < ; stack: <
19 | 2: > ; stack: empty
20 | 3: ( ; stack: (
21 | 4: ( ; stack: ( (
22 | 5: [ ; stack: ( ( [
23 | 6: [ ; stack: ( ( [ [
24 | 7: ( ; stack: ( ( [ [ (
25 | 8: { ; stack: ( ( [ [ ( {
26 | 9: } ; stack: ( ( [ [ (
27 | 10: ) ; stack: ( ( [ [
28 | 11: [ ; stack: ( ( [ [ [
29 | 12: < ; stack: ( ( [ [ [ <
30 | 13: > ; stack: ( ( [ [ [
31 | 14: ] ; stack: ( ( [ [
32 | 15: ] ; stack: ( ( [
33 | Now, we have reached the end. The final stack is "( ( [".
34 | We will need to pop out "[", "(", "(" one by one in that order.
35 | So, we need "]", ")", ")". So the answer is ] ) ).
36 | 
37 | Q: Complete the rest of the sequence, making sure that the parentheses are closed properly. Input: < [ < [ { < [ ] < { } > > } ] > { { ( ) } { < [ < > ] > }
38 | A: Let's think step by step.
39 | We should process each input one by one and keep track of the stack configuration.
40 | 0: empty stack
41 | 1: < ; stack: <
42 | 2: [ ; stack: < [
43 | 3: < ; stack: < [ <
44 | 4: [ ; stack: < [ < [
45 | 5: { ; stack: < [ < [ {
46 | 6: < ; stack: < [ < [ { <
47 | 7: [ ; stack: < [ < [ { < [
48 | 8: ] ; stack: < [ < [ { <
49 | 9: < ; stack: < [ < [ { < <
50 | 10: { ; stack: < [ < [ { < < {
51 | 11: } ; stack: < [ < [ { < <
52 | 12: > ; stack: < [ < [ { <
53 | 13: > ; stack: < [ < [ {
54 | 14: } ; stack: < [ < [
55 | 15: ] ; stack: < [ <
56 | 16: > ; stack: < [
57 | 17: { ; stack: < [ {
58 | 18: { ; stack: < [ { {
59 | 19: ( ; stack: < [ { { (
60 | 20: ) ; stack: < [ { {
61 | 21: } ; stack: < [ {
62 | 22: { ; stack: < [ { {
63 | 23: < ; stack: < [ { { <
64 | 24: [ ; stack: < [ { { < [
65 | 25: < ; stack: < [ { { < [ <
66 | 26: > ; stack: < [ { { < [
67 | 27: ] ; stack: < [ { { <
68 | 28: > ; stack: < [ { {
69 | 29: } ; stack: < [ {
70 | Now, we have reached the end. The final stack is "< [ {".
71 | We will need to pop out "{", "[", "<" one by one in that order.
72 | So, we need "}", "]", ">". So the answer is } ] >.


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/formal_fallacies.txt:
--------------------------------------------------------------------------------
 1 | Distinguish deductively valid arguments from formal fallacies.
 2 | 
 3 | Q: "It is not always easy to see who is related to whom -- and in which ways. The following argument pertains to this question: To begin with, Lesley is a close friend of Fernando. Moreover, being a close friend of Fernando or a schoolmate of Lowell is sufficient for being a great-grandfather of Leroy. It follows that Lesley is a great-grandfather of Leroy."
 4 | Is the argument, given the explicitly stated premises, deductively valid or invalid?
 5 | Options:
 6 | - valid
 7 | - invalid
 8 | A: Let's think step by step.
 9 | (1) Lesley is a close friend of Fernando: Lesley = friend(Fernando).
10 | (2) Being a close friend of Fernando or a schoolmate of Lowell is sufficient for being a great-grandfather of Leroy: If X = friend(Fernando) OR SCHOOLMATE(Lowell), then X = great-grandfather(Leroy).
11 | Hypothesis: Does it follow that Lesley is a great-grandfather of Leroy: Lesley = great-grandfather(Leroy)?
12 | Let’s see whether the Hypothesis can be deduced from the arguments (1) and (2) by logical reasoning?
13 | By (1), we have Lesley = friend(Fernando). By (2), we have if Lesley = friend(Fernando), then Lesley = great-grandfather(Leroy).
14 | So, it is true that Lesley is a great-grandfather of Leroy. So the answer is valid.
15 | 
16 | Q: "It is not always easy to see who is related to whom -- and in which ways. The following argument pertains to this question: Whoever is not a great-grandfather of Clyde is a stepbrother of Brian. Being an ancestor of Dana is sufficient for not being a great-grandfather of Clyde. We may conclude: Everyone who is an ancestor of Dana is a stepbrother of Brian, too."
17 | Is the argument, given the explicitly stated premises, deductively valid or invalid?
18 | Options:
19 | - valid
20 | - invalid
21 | A: Let's think step by step.
22 | (1) Whoever is not a great-grandfather of Clyde is a stepbrother of Brian: If X = NOT (great-grandfather(Clyde)), then X = stepbrother(Brian).
23 | (2): Being an ancestor of Dana is sufficient for not being a great-grandfather of Clyde: If X = ancestor(Dana), X = NOT (great-grandfather(Clyde)).
24 | Hypothesis: Does it follow that everyone who is an ancestor of Dana is a stepbrother of Brian, too: If X = ancestor(Dana), then X = stepbrother(Brian)?
25 | Let’s see whether the Hypothesis can be deduced from the arguments (1) and (2) by logical reasoning?
26 | By (2), we have if X = ancestor(Dana), X = NOT (great-grandfather(Clyde)).
27 | Furthermore, by (1), we have if X = NOT (great-grandfather(Clyde)), then X = stepbrother(Brian).
28 | By the transitive relation rule in first-order logic, we then have: if X = ancestor(Dana), then X = stepbrother(Brian).
29 | So, it is true that everyone who is an ancestor of Dana is a stepbrother of Brian. So the answer is valid.
30 | 
31 | Q: "It is not always easy to grasp who is consuming which products. The following argument pertains to this question: Every infrequent user of Paul Mitchell shampoo is either a rare consumer of Nioxin shampoo or a loyal buyer of Caress soap, or both. No regular consumer of Lush soap is a rare consumer of Nioxin shampoo and, in the same time, a loyal buyer of Caress soap. It follows that whoever is an infrequent user of Paul Mitchell shampoo is not a regular consumer of Lush soap."
32 | Is the argument, given the explicitly stated premises, deductively valid or invalid?
33 | Options:
34 | - valid
35 | - invalid
36 | A: Let's think step by step.
37 | (1) Every infrequent user of Paul Mitchell shampoo is either a rare consumer of Nioxin shampoo or a loyal buyer of Caress soap, or both: If X = infrequent-user(Paul Mitchell), then X = rare-consumer(Nioxin) OR X = loyal-buyer(Caress).
38 | (2): No regular consumer of Lush soap is a rare consumer of Nioxin shampoo and a loyal buyer of Caress soap at the same time. If X = regular-consumer(Lush), then X = NOT (rare-consumer(Nioxin) AND loyal-buyer(Caress)).
39 | Hypothesis: Does it follow that whoever is an infrequent user of Paul Mitchell shampoo is not a regular consumer of Lush soap: If X = infrequent-user(Paul Mitchell), then X = NOT (regular-consumer(Lush))?
40 | Let’s see whether the Hypothesis can be deduced from the arguments (1) and (2) by logical reasoning?
41 | By (1), we have if X = infrequent-user(Paul Mitchell), then X = rare-consumer(Nioxin) OR X = loyal-buyer(Caress). We need to consider both cases separately:
42 | The case X = rare-consumer(Nioxin) does not appear in (2).
43 | The case X = loyal-buyer(Caress) does not appear in (2), either.
44 | So, from (1) and (2), we cannot necessarily deduce the Hypothesis. So the answer is invalid.


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/geometric_shapes.txt:
--------------------------------------------------------------------------------
 1 | Name geometric shapes from their SVG paths.
 2 | 
 3 | Q: This SVG path element <path d="M 31.00,73.00 L 32.00,59.00 L 44.00,50.00 L 49.00,41.00 L 64.00,37.00 L 71.00,55.00 L 64.00,76.00 L 52.00,61.00 L 31.00,73.00"/> draws a
 4 | Options:
 5 | (A) circle
 6 | (B) heptagon
 7 | (C) hexagon
 8 | (D) kite
 9 | (E) line
10 | (F) octagon
11 | (G) pentagon
12 | (H) rectangle
13 | (I) sector
14 | (J) triangle
15 | A: Let's think step by step.
16 | This SVG path element contains "M" and "L" commands. M takes two parameters (x,y) and moves the current point to the coordinates (x,y). L takes two parameters (x,y) and draws a line from the previous coordinate to the new coordinate (x,y).
17 | This path can be decomposed into 9 separate commands.
18 | (1) M 31.00,73.00: Move the current point to 31.00,73.00.
19 | (2) L 32.00,59.00: Create a line from 31.00,73.00 to 32.00,59.00.
20 | (3) L 44.00,50.00: Create a line from 32.00,59.00 to 44.00,50.00.
21 | (4) L 49.00,41.00: Create a line from 44.00,50.00 to 49.00,41.00.
22 | (5) L 64.00,37.00: Create a line from 49.00,41.00 to 64.00,37.00.
23 | (6) L 71.00,55.00: Create a line from 64.00,37.00 to 71.00,55.00.
24 | (7) L 64.00,76.00: Create a line from 71.00,55.00 to 64.00,76.00.
25 | (8) L 52.00,61.00: Create a line from 64.00,76.00 to 52.00,61.00.
26 | (9) L 31.00,73.00: Create a line from 52.00,61.00 to 31.00,73.00.
27 | This SVG path starts at point 31.00,73.00, creates eight consecutive and touching lines, and then returns back its starting point, thereby creating an eight-sided shape. It does not have any curves or arches. "octagon" is the only eight-sided object on the list. So the answer is (F).
28 | 
29 | Q: This SVG path element <path d="M 14.19,26.04 L 51.43,39.21 L 58.44,36.69 L 56.63,30.17 L 48.53,26.66 L 14.19,26.04"/> draws a
30 | Options:
31 | (A) circle
32 | (B) heptagon
33 | (C) hexagon
34 | (D) kite
35 | (E) line
36 | (F) octagon
37 | (G) pentagon
38 | (H) rectangle
39 | (I) sector
40 | (J) triangle
41 | A: Let's think step by step.
42 | This SVG path element contains "M" and "L" commands. M takes two parameters (x,y) and moves the current point to the coordinates (x,y). L takes two parameters (x,y) and draws a line from the previous coordinate to the new coordinate (x,y).
43 | This path can be decomposed into 6 separate commands.
44 | (1) M 14.19,26.04: Move the current point to 14.19,26.04.
45 | (2) L 51.43,39.21: Create a line from 14.19,26.04 to 51.43,39.21.
46 | (3) L 58.44,36.69: Create a line from 51.43,39.21 to 58.44,36.69.
47 | (4) L 56.63,30.17: Create a line from 58.44,36.69 to 56.63,30.17.
48 | (5) L 48.53,26.66: Create a line from 56.63,30.17 to 48.53,26.66.
49 | (6) L 14.19,26.04: Create a line from 48.53,26.66 to 14.19,26.04.
50 | This SVG path starts at point 14.19,26.04, creates five consecutive and touching lines, and then returns back its starting point, thereby creating a five-sided shape. It does not have any curves or arches. "pentagon" is the only five-sided polygon on the list. So the answer is (G).
51 | 
52 | Q: This SVG path element <path d="M 41.00,43.00 L 37.00,34.00 L 41.00,33.00 L 45.00,34.00 L 41.00,43.00"/> draws a
53 | Options:
54 | (A) circle
55 | (B) heptagon
56 | (C) hexagon
57 | (D) kite
58 | (E) line
59 | (F) octagon
60 | (G) pentagon
61 | (H) rectangle
62 | (I) sector
63 | (J) triangle
64 | A: Let's think step by step.
65 | This SVG path element contains "M" and "L" commands. M takes two parameters (x,y) and moves the current point to the coordinates (x,y). L takes two parameters (x,y) and draws a line from the previous coordinate to the new coordinate (x,y).
66 | This path can be decomposed into 5 separate commands.
67 | (1) M 41.00,43.00: Move the current point to 41.00,43.00.
68 | (2) L 37.00,34.00: Create a line from 41.00,43.00 to 37.00,34.00.
69 | (3) L 41.00,33.00: Create a line from 37.00,34.00 to 41.00,33.00.
70 | (4) L 45.00,34.00: Create a line from 41.00,33.00 to 45.00,34.00.
71 | (5) L 41.00,43.00: Create a line from 45.00,34.00 to 41.00,43.00.
72 | This SVG path starts at point 41.00,43.00, creates four consecutive and touching lines, and then returns back its starting point, thereby creating a four-sided shape. "kite" and "rectangle" are the only two four-sided polygons on the list. So, we need to determine which one is the correct answer.
73 | A kite has two pairs of equal-length adjacent sides, whereas a rectangle has two pairs of equal-length alternate (opposite) sides. Now, let's check whether the two adjacent sides of this shape are equal.
74 | Length of side A: |A| = sqrt((41.00-37.00)^2 + (43.00-34.00)^2) = sqrt((4)^2 + (9)^2) = sqrt(16 + 81) = sqrt(97).
75 | Length of side B: |B| = sqrt((37.00-41.00)^2 + (34.00-33.00)^2)) = sqrt((4)^2 + (1)^2) = sqrt(16 + 1) = sqrt(17).
76 | Length of side C: |C| = sqrt((41.00-45.00)^2 + (33.00-34.00)^2)) = sqrt((-4)^2 + (-1)^2) = sqrt(16 + 1) = sqrt(17).
77 | Length of side D: |D| = sqrt((45.00-41.00)^2 + (34.00-43.00)^2)) = sqrt((4)^2 + (-9)^2) = sqrt(16 + 81) = sqrt(97).
78 | Note that |A| = |D| and |B| = |C|. Furthermore, A and D are adjacent and B and C are adjacent. Thus, this polygon has two pairs of equal-length adjacent sides and is "kite". So the answer is (D).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/gsm8k_prompt_original.txt:
--------------------------------------------------------------------------------
 1 | Question: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?
 2 | Let's think step by step
 3 | There are 15 trees originally.
 4 | Then there were 21 trees after some more were planted.
 5 | So there must have been 21 - 15 = 6.
 6 | The answer is 6.
 7 | 
 8 | Question: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
 9 | Let's think step by step
10 | There are originally 3 cars.
11 | 2 more cars arrive.
12 | 3 + 2 = 5.
13 | The answer is 5.
14 | 
15 | Question: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
16 | Let's think step by step
17 | Originally, Leah had 32 chocolates.
18 | Her sister had 42.
19 | So in total they had 32 + 42 = 74.
20 | After eating 35, they had 74 - 35 = 39.
21 | The answer is 39.
22 | 
23 | Question: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?
24 | Let's think step by step
25 | Jason started with 20 lollipops.
26 | Then he had 12 after giving some to Denny.
27 | So he gave Denny 20 - 12 = 8.
28 | The answer is 8.
29 | 
30 | Question: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?
31 | Let's think step by step
32 | Shawn started with 5 toys.
33 | If he got 2 toys each from his mom and dad, then that is 4 more toys.
34 | 5 + 4 = 9.
35 | The answer is 9.
36 | 
37 | Question: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?
38 | Let's think step by step
39 | There were originally 9 computers.
40 | For each of 4 days, 5 more computers were added.
41 | So 5 * 4 = 20 computers were added.
42 | 9 + 20 is 29.
43 | The answer is 29.
44 | 
45 | Question: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?
46 | Let's think step by step
47 | Michael started with 58 golf balls.
48 | After losing 23 on tues- day, he had 58 - 23 = 35.
49 | After losing 2 more, he had 35 - 2 = 33 golf balls.
50 | The answer is 33.
51 | 
52 | Question: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
53 | Let's think step by step
54 | Olivia had 23 dollars.
55 | 5 bagels for 3 dollars each will be 5 x 3 = 15 dollars.
56 | So she has 23 - 15 dollars left.
57 | 23 - 15 is 8.
58 | The answer is 8.
59 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/hyperbaton.txt:
--------------------------------------------------------------------------------
 1 | Order adjectives correctly in English sentences.
 2 | 
 3 | Q: Which sentence has the correct adjective order:
 4 | Options:
 5 | (A) rubber terrible ship
 6 | (B) terrible rubber ship
 7 | A: Let's think step by step.
 8 | When there is more than one adjective before a noun, the adjectives need to respect the following order before a noun: "[1. opinion] [2. size] [3. age] [4. shape] [5. color] [6. origin] [7. material] [8. purpose] noun".
 9 | Option (A): "rubber terrible ship". (1) rubber" falls into the material category. (2) "terrible" falls into the opinion category. Option (A) has the following adjective order: [7. material] [1. opinion] (or, in numeric terms, 7 1). Because 7 < 1 is not correct, (A) does not have the correct ordering.
10 | Option (B): "terrible rubber ship". Option (B) has the following adjective order: [1. opinion] [7. material] (or, in numeric terms, 1 7). Because 1 < 7 is correct, (B) has the correct ordering. So the answer is (B).
11 | 
12 | Q: Which sentence has the correct adjective order:
13 | Options:
14 | (A) repulsive small Brazilian exercise ship
15 | (B) Brazilian repulsive exercise small ship
16 | A: Let's think step by step.
17 | When there is more than one adjective before a noun, the adjectives need to respect the following order before a noun: "[1. opinion] [2. size] [3. age] [4. shape] [5. color] [6. origin] [7. material] [8. purpose] noun".
18 | Option (A): "repulsive small Brazilian exercise ship". (1) "repulsive" falls into the opinion category. (2) "small" falls into the size category. (3) "Brazilian" falls into the origin category. (4) "exercise" falls into the purpose category. Option (A) has the following adjective order: [1. opinion] [2. size] [6. origin] [8. purpose] (or, in numeric terms, 1 2 6 8). Because 1 < 2 < 6 < 8 is correct, (A) has the correct ordering.
19 | Option (B): "Brazilian repulsive exercise small ship". Option (B) has the following adjective order: [6. origin] [1. opinion] [8. purpose] [2. size] (or, in numeric terms, 6 1 8 2). Because 6 < 1 < 8 < 2 is not correct, (B) does not have the correct ordering. So the answer is (A).
20 | 
21 | Q: Which sentence has the correct adjective order:
22 | Options:
23 | (A) blue gold wonderful square shoe
24 | (B) wonderful square blue gold shoe
25 | A: Let's think step by step.
26 | When there is more than one adjective before a noun, the adjectives need to respect the following order before a noun: "[1. opinion] [2. size] [3. age] [4. shape] [5. color] [6. origin] [7. material] [8. purpose] noun".
27 | Option (A): "blue gold wonderful square shoe". (1) "blue" falls into the color category. (2) "gold" falls into the material category. (3) "wonderful" falls into the opinion category. (4) "square" falls into the shape category. The adjective order that Option (A) has is [5. color] [7. material] [1. opinion] [4. shape] (or, in numeric terms, 5 7 1 4). Because 5 < 7 < 1 < 4 is not correct, (A) does not have the correct ordering.
28 | Option (B): "wonderful square blue gold shoe". Option (B) has the following adjective order: [1. opinion] [4. shape] [5. color] [7. material] (or, in numeric terms, 1 4 5 7 ). Because 1 < 4 < 5 < 7 is correct, (B) has the correct ordering. So the answer is (B).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/logical_deduction_five_objects.txt:
--------------------------------------------------------------------------------
 1 | A logical deduction task which requires deducing the order of a sequence of objects.
 2 | 
 3 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy.
 4 | Options:
 5 | (A) Amy finished last
 6 | (B) Eli finished last
 7 | (C) Eve finished last
 8 | A: Let's think step by step.
 9 | (1) Eve finished above Amy: "(above) ? Eve ? Amy ? (below)".
10 | (2) Eli finished below Amy: "(above) ? Amy ? Eli ? (below)".
11 | (3) Combining (1) and (2) we get the following ordering: "(above) Eve Amy Eli (below)".
12 | According to this ordering, the person who finished last (the one at the bottom of this list) is Eli.
13 | Eli finished last. So the answer is (B).
14 | 
15 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a white book, a green book, and an orange book. The green book is to the right of the white book. The orange book is the rightmost.
16 | Options:
17 | (A) The white book is the leftmost
18 | (B) The green book is the leftmost
19 | (C) The orange book is the leftmost
20 | A: Let's think step by step.
21 | (1) The green book is to the right of the white book: "(left) ? white ? green ? (right)".
22 | (2) The orange book is the rightmost: "(left) ? white ? green orange (right)".
23 | (3) Combining (1) and (2) we get the following ordering: "(left) white green orange (right)".
24 | According to this ordering, the leftmost book is the white book.
25 | The white book is the leftmost. So the answer is (A).
26 | 
27 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a red book, a gray book, and a white book. The white book is to the left of the gray book. The red book is the second from the left.
28 | Options:
29 | (A) The red book is the leftmost
30 | (B) The gray book is the leftmost
31 | (C) The white book is the leftmost
32 | A: Let's think step by step.
33 | (1) The white book is to the left of the gray book: "(left) ? white ? gray ? (right)".
34 | (2) The red book is the second from the left: "(left) ? white red gray ? (right)".
35 | (3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)".
36 | According to this ordering, the leftmost book is the white book.
37 | The white book is the leftmost. So the answer is (C).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/logical_deduction_seven_objects.txt:
--------------------------------------------------------------------------------
 1 | A logical deduction task which requires deducing the order of a sequence of objects.
 2 | 
 3 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy.
 4 | Options:
 5 | (A) Amy finished last
 6 | (B) Eli finished last
 7 | (C) Eve finished last
 8 | A: Let's think step by step.
 9 | (1) Eve finished above Amy: "(above) ? Eve ? Amy ? (below)".
10 | (2) Eli finished below Amy: "(above) ? Amy ? Eli ? (below)".
11 | (3) Combining (1) and (2) we get the following ordering: "(above) Eve Amy Eli (below)".
12 | According to this ordering, the person who finished last (the one at the bottom of this list) is Eli.
13 | Eli finished last. So the answer is (B).
14 | 
15 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a white book, a green book, and an orange book. The green book is to the right of the white book. The orange book is the rightmost.
16 | Options:
17 | (A) The white book is the leftmost
18 | (B) The green book is the leftmost
19 | (C) The orange book is the leftmost
20 | A: Let's think step by step.
21 | (1) The green book is to the right of the white book: "(left) ? white ? green ? (right)".
22 | (2) The orange book is the rightmost: "(left) ? white ? green orange (right)".
23 | (3) Combining (1) and (2) we get the following ordering: "(left) white green orange (right)".
24 | According to this ordering, the leftmost book is the white book.
25 | The white book is the leftmost. So the answer is (A).
26 | 
27 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a red book, a gray book, and a white book. The white book is to the left of the gray book. The red book is the second from the left.
28 | Options:
29 | (A) The red book is the leftmost
30 | (B) The gray book is the leftmost
31 | (C) The white book is the leftmost
32 | A: Let's think step by step.
33 | (1) The white book is to the left of the gray book: "(left) ? white ? gray ? (right)".
34 | (2) The red book is the second from the left: "(left) ? white red gray ? (right)".
35 | (3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)".
36 | According to this ordering, the leftmost book is the white book.
37 | The white book is the leftmost. So the answer is (C).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/logical_deduction_three_objects.txt:
--------------------------------------------------------------------------------
 1 | A logical deduction task which requires deducing the order of a sequence of objects.
 2 | 
 3 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy.
 4 | Options:
 5 | (A) Amy finished last
 6 | (B) Eli finished last
 7 | (C) Eve finished last
 8 | A: Let's think step by step.
 9 | (1) Eve finished above Amy: "(above) ? Eve ? Amy ? (below)".
10 | (2) Eli finished below Amy: "(above) ? Amy ? Eli ? (below)".
11 | (3) Combining (1) and (2) we get the following ordering: "(above) Eve Amy Eli (below)".
12 | According to this ordering, the person who finished last (the one at the bottom of this list) is Eli.
13 | Eli finished last. So the answer is (B).
14 | 
15 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a white book, a green book, and an orange book. The green book is to the right of the white book. The orange book is the rightmost.
16 | Options:
17 | (A) The white book is the leftmost
18 | (B) The green book is the leftmost
19 | (C) The orange book is the leftmost
20 | A: Let's think step by step.
21 | (1) The green book is to the right of the white book: "(left) ? white ? green ? (right)".
22 | (2) The orange book is the rightmost: "(left) ? white ? green orange (right)".
23 | (3) Combining (1) and (2) we get the following ordering: "(left) white green orange (right)".
24 | According to this ordering, the leftmost book is the white book.
25 | The white book is the leftmost. So the answer is (A).
26 | 
27 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a red book, a gray book, and a white book. The white book is to the left of the gray book. The red book is the second from the left.
28 | Options:
29 | (A) The red book is the leftmost
30 | (B) The gray book is the leftmost
31 | (C) The white book is the leftmost
32 | A: Let's think step by step.
33 | (1) The white book is to the left of the gray book: "(left) ? white ? gray ? (right)".
34 | (2) The red book is the second from the left: "(left) ? white red gray ? (right)".
35 | (3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)".
36 | According to this ordering, the leftmost book is the white book.
37 | The white book is the leftmost. So the answer is (C).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/movie_recommendation.txt:
--------------------------------------------------------------------------------
 1 | Recommend movies similar to the given list of movies.
 2 | 
 3 | Q: Find a movie similar to Star Wars Episode IV - A New Hope, Indiana Jones and the Last Crusade, Star Wars Episode V - The Empire Strikes Back, The Big Lebowski:
 4 | Options:
 5 | (A) Tetsuo
 6 | (B) the Ironman
 7 | (C) The Princess Bride
 8 | (D) The Barkley Marathons The Race That Eats Its Young
 9 | (E) Bug
10 | A: Let's think step by step.
11 | - Star Wars Episode IV - A New Hope (action, adventure, fantasy; 1977)
12 | - Indiana Jones and the Last Crusade (action, adventure; 1989)
13 | - Star Wars Episode V - The Empire Strikes Back (action, adventure, fantasy; 1980)
14 | - The Big Lebowski (action, drama, comedy; 1998)
15 | These are all famous classic American movies produced before 2000. Amongst all the options, the only movie similar to these ones seems to be The Princess Bride (1987). So the answer is (C).
16 | 
17 | Q: Find a movie similar to Twister, The Silence of the Lambs, Independence Day, Braveheart:
18 | Options:
19 | (A) They Shoot Horses
20 | (B) Don't They
21 | (C) Forrest Gump
22 | (D) The Salton Sea
23 | (E) Extreme Days
24 | A: Let's think step by step.
25 | - Twister (action, adventure, thriller; 1996)
26 | - The Silence of the Lambs (crime, drama, thriller; 1991)
27 | - Independence Day (action, science-fiction, drama; 1996)
28 | - Braveheart (biography, drama, epic; 1995)
29 | These are all famous Hollywood movies produced around the 1990s. Amongst all the options, the only movie similar to these ones seems to be Forrest Gump (comedy, drama, romance; 1994). So the answer is (C).
30 | 
31 | Q: Find a movie similar to Minority Report, Total Recall, Inside Out, Forrest Gump:
32 | Options:
33 | (A) Phenomena
34 | (B) Lilting
35 | (C) Catwoman
36 | (D) Edge of Tomorrow
37 | A: Let's think step by step.
38 | - Minority Report (action, crime, mystery; 2002)
39 | - Total Recall (action, adventure, science-fiction; 2012)
40 | - Inside Out (animation, family, comedy; 2015)
41 | - Forrest Gump (comedy, drama, romance; 1994)
42 | These are all famous movies produced in the past few decades.Amongst all the options, the only movie similar to these ones seems to be Edge of Tomorrow (action, adventure, crime, mystery; 2014), as it is also a science-fiction movie and features Tom Cruise. So the answer is (D).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/multistep_arithmetic_two.txt:
--------------------------------------------------------------------------------
 1 | Solve multi-step arithmetic problems.
 2 | 
 3 | Q: ((-5 + 9 * -4 - 0) * (4 + -7 + 0 * -5)) =
 4 | A: Let's think step by step.
 5 | Let’s recall that the order of operations in mathematics is as follows: (1) Parentheses, (2) exponents, (3) multiplication and division (from left to right), (4) addition and multiplication (from left to right). So, remember to always compute the expressions inside parentheses or brackets first.
 6 | This equation can be written as "A * B", where A = (-5 + 9 * -4 - 0) and B = (4 + -7 + 0 * -5).
 7 | Let's calculate A = (-5 + 9 * -4 - 0) = (-5 + (9 * -4) - 0) = (-5 + (-36) - 0) = (-5 + -36 - 0) = -5 - 36 = -41.
 8 | Let's calculate B = (4 + -7 + 0 * -5) = (4 + -7 + (0 * -5)) = (4 + -7 + 0) = (4 + -7) = (4 - 7) = -3.
 9 | Then, the final equation is A * B = -41 * -3 = (-61) * (-3) = 123. So the answer is 123.
10 | 
11 | Q: ((-9 * 7 * 7 * -9) + (4 * -9 - 8 - -4)) =
12 | A: Let's think step by step.
13 | Let’s recall that the order of operations in mathematics is as follows: (1) Parentheses, (2) exponents, (3) multiplication and division (from left to right), (4) addition and multiplication (from left to right). So, remember to always compute the expressions inside parentheses or brackets first.
14 | This equation can be written as "A + B", where A = (-9 * 7 * 7 * -9) and B = (4 * -9 - 8 - -4).
15 | Let's calculate A = (-9 * 7 * 7 * -9) = ((-9 * 7) * (7 * -9))  = ((-63) * (-63)) = 3969.
16 | Let's calculate B = (4 * -9 - 8 - (-4)) = ((4 * -9) - 8 - (-4)) = ((-36) - 8 - (-4)) = ((-36 - 8) - (-4)) = (-44 - (-4)) = -40.
17 | Then, the final equation is A + B = 3969 + -40 = 3969 - 40 = 3929. So the answer is 3929.
18 | 
19 | Q: ((-3 + 5 * 8 * -4) - (9 - 8 * -7 + -9)) =
20 | A: Let's think step by step.
21 | Let’s recall that the order of operations in mathematics is as follows: (1) Parentheses, (2) exponents, (3) multiplication and division (from left to right), (4) addition and multiplication (from left to right). So, remember to always compute the expressions inside parentheses or brackets first.
22 | This equation can be written as "A - B", where A = (-3 + 5 * 8 * -4) and B = (9 - 8 * -7 + -9).
23 | Let's calculate A = (-3 + 5 * 8 * -4) = (-3 + (5 * 8) * -4) = (-3 + (40) * -4) = (-3 + (40 * -4)) = (-3 + -160) = -163.
24 | Let's calculate B = (9 - 8 * -7 + -9) = (9 - (8 * -7) + -9) = (9 - (-56) + -9) = ((9 - (-56)) + -9) = ((65) + -9)= (65 - 9) = 56.
25 | Then, the final equation is A - B = -163 - 56 = -219. So the answer is -219.


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/navigate.txt:
--------------------------------------------------------------------------------
 1 | Given a series of navigation instructions, determine whether one would end up back at the starting point.
 2 | 
 3 | Q: If you follow these instructions, do you return to the starting point? Turn left. Turn around. Turn left. Take 7 steps. Take 2 steps. Take 4 steps. Take 8 steps.
 4 | Options:
 5 | - Yes
 6 | - No
 7 | A: Let's think step by step.
 8 | We start at the origin (0, 0), facing the positive y-axis.
 9 | (1) Turn left: (0, 0), facing the negative x-axis.
10 | (2) Turn around: (0, 0), facing the positive x-axis.
11 | (3) Turn left: (0, 0), facing the positive y-axis.
12 | (4) Take 7 steps: (0, 7), facing the positive y-axis.
13 | (5) Take 2 steps: (0, 9), facing the positive y-axis.
14 | (6) Take 4 steps: (0, 13), facing the positive y-axis.
15 | (7) Take 8 steps: (0, 21), facing the positive y-axis.
16 | Since (0, 21) is not (0, 0), we are not where we started. So the answer is No.
17 | 
18 | Q: If you follow these instructions, do you return to the starting point? Turn around. Take 1 step. Take 6 steps. Turn around. Take 6 steps. Take 9 steps. Take 1 step.
19 | Options:
20 | - Yes
21 | - No
22 | A: Let's think step by step.
23 | We start at the origin (0, 0), facing the positive y-axis.
24 | (1) Turn around: (0, 0), facing the negative y-axis.
25 | (2) Take 1 step: (0, -1), facing the negative y-axis.
26 | (3) Take 6 steps: (0, -7), facing the negative y-axis.
27 | (4) Turn around: (0, -7), facing the positive y-axis.
28 | (5) Take 6 steps: (0, -1), facing the positive y-axis.
29 | (6) Take 9 steps: (0, 8), facing the positive y-axis.
30 | (7) Take 1 step: (0, 9), facing the positive y-axis.
31 | Since (0, 9) is not (0, 0), we are not where we started. So the answer is No.
32 | 
33 | Q: If you follow these instructions, do you return to the starting point? Always face forward. Take 2 steps right. Take 9 steps left. Take 7 steps right.
34 | Options:
35 | - Yes
36 | - No
37 | A: Let's think step by step.
38 | We start at the origin (0, 0), facing the positive y-axis.
39 | (1) Always face forward: (0, 0), facing the positive y-axis.
40 | (2) Take 2 steps right: (0, 2), facing the positive y-axis.
41 | (3) Take 9 steps left: (0, -7), facing the positive y-axis.
42 | (4) Take 7 steps right: (0, 7), facing the positive y-axis.
43 | Since (0, 0) is (0, 0), we are indeed where we started. So the answer is Yes.


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/object_counting.txt:
--------------------------------------------------------------------------------
 1 | Questions that involve enumerating objects and asking the model to count them.
 2 | 
 3 | Q: I have a blackberry, a clarinet, a nectarine, a plum, a strawberry, a banana, a flute, an orange, and a violin. How many fruits do I have?
 4 | A: Let's think step by step.
 5 | We first identify the fruits on the list and include their quantity in parentheses:
 6 | - blackberry (1)
 7 | - nectarine (1)
 8 | - plum (1)
 9 | - strawberry (1)
10 | - banana (1)
11 | - orange (1)
12 | Now, let's add the numbers in parentheses: 1 + 1 + 1 + 1 + 1 + 1 = 6. So the answer is 6.
13 | 
14 | Q: I have an orange, a raspberry, two peaches, a blackberry, an apple, a grape, a nectarine, and three plums. How many fruits do I have?
15 | A: Let's think step by step.
16 | We first identify the fruits on the list and include their quantity in parentheses:
17 | - orange (1)
18 | - raspberry (1)
19 | - peaches (2)
20 | - blackberry (1)
21 | - apple (1)
22 | - grape (1)
23 | - nectarine (1)
24 | - plums (3)
25 | Now, let's add the numbers in parentheses: 1 + 1 + 2 + 1 + 1 + 1 + 1 + 3 = 11. So the answer is 11.
26 | 
27 | Q: I have a lettuce head, a head of broccoli, an onion, a stalk of celery, two carrots, a garlic, and a yam. How many vegetables do I have?
28 | A: Let's think step by step.
29 | We first identify the vegetables on the list and include their quantity in parentheses:
30 | - lettuce (1)
31 | - broccoli (1)
32 | - onion (1)
33 | - celery (1)
34 | - carrots (2)
35 | - garlic (1)
36 | - yam (1)
37 | Now, let's add the numbers in parentheses: 1 + 1 + 1 + 1 + 2 + 1 + 1 = 8. So the answer is 8.


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/penguins_in_a_table.txt:
--------------------------------------------------------------------------------
 1 | Answer questions about a table of penguins and their attributes.
 2 | 
 3 | Q: Here is a table where the first line is a header and each subsequent line is a penguin:  name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15  For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm.  We now add a penguin to the table:
 4 | James, 12, 90, 12
 5 | How many penguins are less than 8 years old?
 6 | Options:
 7 | (A) 1
 8 | (B) 2
 9 | (C) 3
10 | (D) 4
11 | (E) 5
12 | A: Let's think step by step.
13 | This question focuses on age. We know the following: Louis is 7 years old, Bernard is 5 years old, Vincent is 9 years old, and Gwen is 8 years old.
14 | Now, we add James to this table: James is 12 years old.
15 | The penguins that are less than 8 years old are Louis and Bernard.
16 | There are 2 penguins less than 8 years old. So the answer is (B).
17 | 
18 | Q: Here is a table where the first line is a header and each subsequent line is a penguin:  name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15  For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm.  Which is the youngest penguin?
19 | Options:
20 | (A) Louis
21 | (B) Bernard
22 | (C) Vincent
23 | (D) Gwen
24 | (E) James
25 | A: Let's think step by step.
26 | This question focuses on age. We know the following: Louis is 7 years old, Bernard is 5 years old, Vincent is 9 years old, and Gwen is 8 years old.
27 | According to the table, Bernard (5) is the youngest amongst them.
28 | The youngest penguin is Bernard. So the answer is (B).
29 | 
30 | Q: Here is a table where the first line is a header and each subsequent line is a penguin:  name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15  For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm.  What is the name of the second penguin sorted by alphabetic order?
31 | Options:
32 | (A) Louis
33 | (B) Bernard
34 | (C) Vincent
35 | (D) Gwen
36 | (E) James
37 | A: Let's think step by step.
38 | This question focuses on the name. We know the following: The names of the penguin in the table are Louis, Bernard, Vincent, and Gwen.
39 | When we sort their names alphabetically, we get Bernard, Gwen, Louis, Vincent.
40 | The name of the second penguin sorted by alphabetical order is Gwen.
41 | The name of the second penguin sorted by alphabetic order is Gwen. So the answer is (D).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/prompt_8x8step.txt:
--------------------------------------------------------------------------------
 1 | Question: Janet hires six employees. Four of them are warehouse workers who make $15/hour, and the other two are managers who make $20/hour. Janet has to pay 10% of her workers' salaries in FICA taxes. If everyone works 25 days a month and 8 hours a day, how much does Janet owe total for their wages and taxes for one month?
 2 | Let's think step by step
 3 | First figure out how many hours each worker works per month by multiplying the number of days they work by the number of hours a day they work: 25 days * 8 hours/day = 200 hours
 4 | Then calculate how much one warehouse worker makes per month by multiplying their hourly rate by the number of hours they work: 200 hours * $15/hour = $3000
 5 | Then multiply that number by 4 to find out how much all the warehouse workers make: $3000/worker * 4 workers = $12,000
 6 | Now multiply the hours each manager works (also 200) by their hourly wage to find out how much one manager makes per month: 200 hours * $20/hour = $4,000
 7 | Now multiply one manager's wages by the number of managers (2) to find their total wage amount: $4,000/manager * 2 managers = $8,000
 8 | Now add the wages for the managers and the workers to find the total cost of the wages: $8,000 + $12,000 = $20,000
 9 | Now multiply the total wage bill by 10% to find how much the FICA taxes are: $20,000 * .1 = $2,000
10 | Now add the total wage bill to the total tax amount to find the grand total: $2,000 + $20,000 = $22,000
11 | The answer is 22000
12 | 
13 | Question: In a graveyard, there are 20 skeletons.  Half of these skeletons are adult women, and the remaining number are split evenly between adult men and children.  If an adult woman has 20 bones in their body, and a male has 5 more than this, and a child has half as many as an adult woman, how many bones are in the graveyard?
14 | Let's think step by step
15 | We first need to figure out how many of each type of skeleton there are. Since half the 20 skeletons are adult women, that means there are 20/2=10 adult women's skeletons.
16 | The remaining half, 10, is split between adult men and children, meaning there are 10/2= 5 of each.
17 | Since an adult woman has 20 bones in their body, this means that the 10 skeletons have 20*10=200 bones in total.
18 | Since an adult man has 5 more bones in their body than an adult woman, this means they have 20+5= 25 bones in their body.
19 | There are 5 adult men, meaning there are 25*5=125 bones in the adult male skeletons in total.
20 | Since a child has half as many bones as the 20 in an adult woman, this means the children have 20/2= 10 bones in their body.
21 | Since there are 5 children, this means there are 5*10= 50 bones in the children's skeletons in total.
22 | Therefore, altogether there are 50+125+200= 375 bones in total in the graveyard.
23 | The answer is 375
24 | 
25 | Question: Last month, a factory made 12000 dolls and their associated accessories.  The accessories for each doll included 2 shoes, 3 bags, 1 set of cosmetics, and 5 hats.  If each doll took 45 seconds to make and each accessory took 10 seconds to make, what was the total combined machine operation time, in seconds, required to manufacture all of the dolls and their accessories?
26 | Let's think step by step
27 | The number of shoes the factory made was 12000 dolls x 2 shoes/doll = 24000 shoes
28 | The factory also made for each doll 12000 dolls x 3 bags/doll = 36000 bags
29 | The set of cosmetics made for each doll was just one, so there are 12000 cosmetics sets
30 | Finally, the number of hats for the dolls was 12000 dolls x 5 hats/doll = 60000 hats
31 | In total, the number of accessories was 24000 + 36000 + 12000 + 60000 = 132000 accessories
32 | The dolls took in total 12000 dolls x 45 seconds/doll = 540000 seconds to be made
33 | All the accessories also took 132000 accessories x 10 seconds/accessory = 1320000 seconds
34 | Combing both results, we have that the total time the machinery was working was 1320000 + 540000 = 1860000 seconds
35 | The answer is 1860000
36 | 
37 | Question: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?
38 | Let's think step by step
39 | She works 8 hours a day for $18 per hour so she makes 8*18 = $144.00 per 8-hour shift
40 | She works 10 hours a day and anything over 8 hours is eligible for overtime, so she gets 10-8 = 2 hours of overtime
41 | Overtime is calculated as time and a half so and she makes $18/hour so her overtime pay is 18*.5 = $9.00
42 | Her overtime pay is 18+9 = $27.00
43 | Her base pay is $144.00 per 8-hour shift and she works 5 days and makes 5 * $144 = $720.00
44 | Her overtime pay is $27.00 per hour and she works 2 hours of overtime per day and makes 27*2 = $54.00 in overtime pay
45 | 2 hours of overtime pay for 5 days means she makes 54*5 = $270.00
46 | In 5 days her base pay is $720.00 and she makes $270.00 in overtime pay so she makes $720 + $270 = $990.00
47 | The answer is 990
48 | 
49 | Question: A printing press is printing brochures. The press prints 20 single-page spreads, and twice as many double-page spreads. For each 4 pages printed for the spreads, the press prints a block of 4 ads, each of which take up a quarter of a page. The brochures can be arranged in any order as long as they are made up of 5 pages each. How many brochures is the printing press creating?
50 | Let's think step by step
51 | There are a total of 20 single-page spreads * 2 = 40 double-page spreads.
52 | As these are made up of 2 pages each, there are 40 double-page spreads * 2 pages = 80 pages in the double-page spreads.
53 | In total, there are 20 single-pages + 80 double-pages = 100 pages in the spreads.
54 | A block of ads is printed every 4 pages, so there will be 100 pages / 4 pages/block = 25 blocks of ads.
55 | This is a total of 25 blocks * 4 ads = 100 ads.
56 | As each ad takes up a quarter of a page, this must create an additional 100 ads * 0.25 = 25 pages.
57 | Therefore, the press has printed 100 pages from the spreads + 25 pages of ads = 125 pages.
58 | Since brochures are made up of 5 pages each, this creates a total of 125 pages / 5 = 25 brochures.
59 | The answer is 25
60 | 
61 | Question: A spaceship is traveling to another planet. The spaceship travels at a consistent speed its entire journey unless it is stopped for the crew’s break. After launch, the spaceship traveled for 10 hours then stopped for 3 hours. It then traveled for another 10 hours then stopped for 1 hour. After this, the spaceship would take an hour’s break after every 11 hours of traveling and maintained this routine until the end of its journey. If the entire journey took 3 days then how long, in hours, was the spaceship not moving?
62 | Let's think step by step
63 | On the first day, the spaceship spent a total of 10 hours + 10 hours = 20 hours traveling.
64 | On the first day, the spaceship spent a total of 3 hours + 1 hour = 4 hours not moving.
65 | This is a total of 20 hours traveling + 4 hours not moving = 24 hours or 1 day.
66 | So the rest of the journey took place over 3 days journey – 1 first day = 2 days.
67 | This is the same as 2 days * 24 hours = 48 hours.
68 | In these 2 days, the spaceship’s time is split into 11 hours traveling + 1 hour break = 12 hour blocks.
69 | So in the last 2 days, the spaceship wasn't moving for a total of 48 hours / 12 hour blocks = 4 hours.
70 | Adding this to breaks from the first day shows that the spaceship was not moving for a total of 4 + 4 = 8 hours.
71 | The answer is 8
72 | 
73 | Question: Alec is running for Class President. He thinks that if he can get three-quarters of the class to vote for him then there is no chance anyone else can beat him. Half of the class have already said they will vote for him but out of the remaining students, only 5 have said they are thinking about voting for him. He surveys the students who are thinking about voting for someone else, and changes his flyers to reflect the issues these students are concerned about. This results in a fifth of these students saying they'll vote for him. If Alec's class has 60 students and everyone who said they will vote for him does so, how many more votes does Alec need to reach his goal number of votes?
74 | Let's think step by step
75 | To calculate Alec's goal number of votes, we need to know that 60 students / 4 = 15 students is equal to one-quarter of the class students.
76 | Alec's goal is therefore 15 students * 3 quarters = 45 votes.
77 | Half of the class said they will vote for him, so there are already 60 students / 2 = 30 votes.
78 | Another 5 students are thinking about voting for him which leaves a total so far of 30 + 5 = 35 votes.
79 | This means there are 60 students - 35 voting for Alec = 25 students not voting for Alec.
80 | A fifth of these decided to vote, so this is a further 25 students / 5 = 5 votes.
81 | Alec is therefore receiving a total of 35 + 5 = 40 votes.
82 | So he has missed his goal by 45 goal votes - 40 actual votes = 5 votes.
83 | The answer is 5
84 | 
85 | Question: A curry house sells curries that have varying levels of spice. Recently, a lot of the customers have been ordering very mild curries and the chefs have been having to throw away some wasted ingredients. To reduce cost and food wastage, the curry house starts monitoring how many ingredients are actually being used and changes their spending accordingly. The curry house needs 3 peppers for very spicy curries, 2 peppers for spicy curries, and only 1 pepper for mild curries. After adjusting their purchasing, the curry house now buys the exact amount of peppers they need. Previously, the curry house was buying enough peppers for 30 very spicy curries, 30 spicy curries, and 10 mild curries. They now buy enough peppers for 15 spicy curries and 90 mild curries. They no longer sell very spicy curries. How many fewer peppers does the curry house now buy?
86 | Let's think step by step
87 | The curry house previously bought 3 peppers per very spicy curry * 30 very spicy curries = 90 peppers for very spicy curries.
88 | They also bought 2 peppers per spicy curry * 30 spicy curries = 60 peppers for spicy curries.
89 | They also bought 1 pepper per mild curry * 10 mild curries = 10 peppers for mild curries.
90 | So they were previously buying 90 + 60 + 10 = 160 peppers.
91 | They now buy 2 peppers per spicy curry * 15 spicy curries = 30 peppers for spicy curries.
92 | They also now buy 1 pepper per mild curry * 90 mild curries = 90 peppers for mild curries.
93 | So they now buy 30 + 90 = 120 peppers.
94 | This is a difference of 160 peppers bought originally - 120 peppers bought now = 40 peppers.
95 | The answer is 40


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/reasoning_about_colored_objects.txt:
--------------------------------------------------------------------------------
 1 | Answer extremely simple questions about the colors of objects on a surface.
 2 | 
 3 | Q: On the nightstand, there is a red pencil, a purple mug, a burgundy keychain, a fuchsia teddy bear, a black plate, and a blue stress ball. What color is the stress ball?
 4 | Options:
 5 | (A) red
 6 | (B) orange
 7 | (C) yellow
 8 | (D) green
 9 | (E) blue
10 | (F) brown
11 | (G) magenta
12 | (H) fuchsia
13 | (I) mauve
14 | (J) teal
15 | (K) turquoise
16 | (L) burgundy
17 | (M) silver
18 | (N) gold
19 | (O) black
20 | (P) grey
21 | (Q) purple
22 | (R) pink
23 | A: Let's think step by step.
24 | According to this question, the color of the stress ball is blue. So the answer is (E).
25 | 
26 | Q: On the table, you see a bunch of objects arranged in a row: a purple paperclip, a pink stress ball, a brown keychain, a green scrunchiephone charger, a mauve fidget spinner, and a burgundy pen. What is the color of the object directly to the right of the stress ball?
27 | Options:
28 | (A) red
29 | (B) orange
30 | (C) yellow
31 | (D) green
32 | (E) blue
33 | (F) brown
34 | (G) magenta
35 | (H) fuchsia
36 | (I) mauve
37 | (J) teal
38 | (K) turquoise
39 | (L) burgundy
40 | (M) silver
41 | (N) gold
42 | (O) black
43 | (P) grey
44 | (Q) purple
45 | (R) pink
46 | A: Let's think step by step.
47 | According to this question, the objects are arranged in a row, from left to right, as follows: (1) a purple paperclip, (2) a pink stress ball, (3) a brown keychain, (4) a green scrunchiephone charger, (5) a mauve fidget spinner, (6) a burgundy pen.
48 | The stress ball is the second object on the list, namely (2). The object that is to the right of the stress ball corresponds to (3), which is a brown keychain.
49 | The color of the keychain is brown. So the answer is (F).
50 | 
51 | Q: On the nightstand, you see the following items arranged in a row: a teal plate, a burgundy keychain, a yellow scrunchiephone charger, an orange mug, a pink notebook, and a grey cup. How many non-orange items do you see to the left of the teal item?
52 | Options:
53 | (A) zero
54 | (B) one
55 | (C) two
56 | (D) three
57 | (E) four
58 | (F) five
59 | (G) six
60 | A: Let's think step by step.
61 | According to this question, the objects are arranged in a row, from left to right, as follows: (1) a teal plate, (2) a burgundy keychain, (3) a yellow scrunchiephone charger, (4) an orange mug, (5) a pink notebook, (6) a grey cup.
62 | The teal plate is the first item, namely (1). There is no item to the left of the teal item.
63 | The number of non-orange items to the left of the teal item is zero. So the answer is (A).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/ruin_names.txt:
--------------------------------------------------------------------------------
 1 | Select the humorous edit that 'ruins' the input movie or musical artist name.
 2 | 
 3 | Q: Which of the following is a humorous edit of this artist or movie name: 'whitesnake'?
 4 | Options:
 5 | (A) whitesnape
 6 | (B) whitesnapke
 7 | (C) whitesnuake
 8 | (D) mwhitesnake
 9 | A: Let's think step by step.
10 | The original name is "whitesnake". This is the name of an old English hard rock band. It is a compound word, formed by the words "white" and "snake".
11 | (A) "whitesnape": It is formed by the combination of "white" and "snake"; therefore, "snake" has been changed to "snape". Snape makes a reference to the fictional character Severus Snape in the Harry Potter series, so (A) is indeed a meaningful and funny edit.
12 | (B) "whitesnapke": It is formed by the combination of "white" and "snapke", but "snapke" is not an actual word; therefore, "whitesnapke" is not humorous.
13 | (C) "whitesnuake": It is formed by the combination of "white" and "snuake", but "snuake" is not an actual word; therefore, "whitesnuake" is not humorous.
14 | (D) "mwhitesnake": It is formed by the combination of "m", "white", and "snake", but the prefix "-m "seems arbitrary; therefore, "mwhitesnake" is not meaningful or humorous.
15 | Above the above, the only humorous edit is (A). So the answer is (A).
16 | 
17 | Q: Which of the following is a humorous edit of this artist or movie name: 'one of our dinosaurs is missing'?
18 | Options:
19 | (A) ofne of our dinosaurs is missing
20 | (B) one af our dinosaurs is missing
21 | (C) one of our dinosaurs is pissing
22 | (D) one of our dinosaur is missing
23 | A: Let's think step by step.
24 | The original name is "one of our dinosaurs is missing". This is the name of an old British movie.
25 | (A) "ofne of our dinosaurs is missing": Here "one of" is changed to "ofne", but the word "ofne" is not an actual word.
26 | (B) "one af our dinosaurs is missing": Here the word "of" is changed to "af", but the word "af" is not an actual word.
27 | (C) "one of our dinosaurs is pissing": Here the word "missing" is changed to "pissing", and "one of our dinosaurs is pissing" is indeed a very whimsical and mischievous edit. This change truly ruins the original title of the movie.
28 | (D) "one of our dinosaur is missing": Here the word "dinosaurs" is changed to "dinosaur", but "dinosaur" is singular but should be plural in the title; this change therefore feels arbitrary and not humorous.
29 | Above the above, the only humorous edit is (C).
30 | Above the above, the only humorous edit is (C). So the answer is (C).
31 | 
32 | Q: Which of the following is a humorous edit of this artist or movie name: 'counting crows'?
33 | Options:
34 | (A) countingy crows
35 | (B) counting cows
36 | (C) courting crows
37 | (D) coutnting crows
38 | A: Let's think step by step.
39 | The original name is "counting crows". This is the name of an American rock band. Historically, the band name comes from the British nursery rhyme "One for Sorrow", which is about counting of magpies.
40 | (A) "countingy crows": Here the word "counting" is changed to "countingy", but the word "countingy" is not an actual word.
41 | (B) "counting cows": Here the word "crows" is changed to "cows", and this is indeed a playful and meaningful edit that ruins the original name of the band.
42 | (C) "courting crows": Here the word "counting" is changed to "courting", and "courting" is an actual word; however, "courting crows" does not sound as humorous as "counting cows".
43 | (D) "coutnting crows": Here the word "counting" is changed to "coutnting", but the word "coutnting" is not an actual word.
44 | Above the above, the only humorous edit is (B). So the answer is (B).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/salient_translation_error_detection.txt:
--------------------------------------------------------------------------------
 1 | Detect the type of error in an English translation of a German source sentence.
 2 | 
 3 | Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error.  Source: In der Liste der Baudenkmale in Lenzen (Elbe) sind alle Baudenkmale der brandenburgischen Stadt Lenzen (Elbe) und ihrer Ortsteile aufgelistet.
 4 | Translation: In the list of architectural monuments in Lenzen all architectural monuments of the Brandenburg city of Lenzen and its districts are listed.
 5 | The translation contains an error pertaining to
 6 | Options:
 7 | (A) Modifiers or Adjectives
 8 | (B) Numerical Values
 9 | (C) Negation or Antonyms
10 | (D) Named Entities
11 | (E) Dropped Content
12 | (F) Facts
13 | A: Let's think step by step.
14 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "The list of monuments in Lenzen (Elbe) includes all the monuments in the Brandenburg town of Lenzen (Elbe) and its districts." On the other hand, the provided translation is "In the list of architectural monuments in Lenzen all architectural monuments of the Brandenburg city of Lenzen and its districts are listed." Note that Lenzen (Elbe) is changed to Lenzen in the original translation; so, there is a named entity error. Because an entity in the original source sentence is changed to a different entity in the translation, the translation contains an error pertaining to Named Entities. So the answer is (D).
15 | 
16 | Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error.  Source: Auf dieser Seite sind die Baudenkmäler der oberbayerischen Großen Kreisstadt Landsberg am Lech zusammengestellt.
17 | Translation: On this page are compiled the architectural monuments of the town of Landsberg am Lech.
18 | The translation contains an error pertaining to
19 | Options:
20 | (A) Modifiers or Adjectives
21 | (B) Numerical Values
22 | (C) Negation or Antonyms
23 | (D) Named Entities
24 | (E) Dropped Content
25 | (F) Facts
26 | A: Let's think step by step.
27 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "The monuments of the Upper Bavarian district town of Landsberg am Lech are compiled on this page." On the other hand, the provided translation is "On this page are compiled the architectural monuments of the town of Landsberg am Lech." Note that an important detail about the location of Landsberg am Lech is omitted in the original translation: The translation should have said "Upper Bavarian district town of Landsberg am Lech". Because a significant clause in the translation was removed, the translation contains an error pertaining to Dropped Content. So the answer is (E).
28 | 
29 | Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error.  Source: Łeba ist eine Kleinstadt und ein Badeort im Powiat Lęborski der polnischen Woiwodschaft Pommern.
30 | Translation: Eba is not a small town and seaside resort in the Powiat Léborski county of the Pomeranian Voivodeship of Poland.
31 | The translation contains an error pertaining to
32 | Options:
33 | (A) Modifiers or Adjectives
34 | (B) Numerical Values
35 | (C) Negation or Antonyms
36 | (D) Named Entities
37 | (E) Dropped Content
38 | (F) Facts
39 | A: Let's think step by step.
40 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "Łeba is a small town and seaside resort in the Powiat Lęborski of the Polish Pomeranian Voivodeship." On the other hand, the provided translation is "Łeba is not a small town and seaside resort in the Powiat Léborski county of the Pomeranian Voivodeship of Poland." Note that the provided sentence says, "Łeba is not a small town ..." However, the translation should have been "Łeba is a small town ..." Because a negation is introduced at the beginning of the sentence and has fundamentally changed the meaning of the original source, the translation contains an error pertaining to Negation or Antonyms. So the answer is (C).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/snarks.txt:
--------------------------------------------------------------------------------
 1 | Determine which of two sentences is sarcastic.
 2 | 
 3 | According to Cambridge University Dictionary, sarcasm is "the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way." Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.
 4 | 
 5 | Q: Which statement is sarcastic?
 6 | Options:
 7 | (A) Yes, because having interests and actively researching them is a huge waste
 8 | (B) Yes, because having interests and actively researching them is a huge deal
 9 | A: Let's think step by step.
10 | If we look at (A), it says that having interests and actively researching them is a huge waste, implying that it is a useless effort. However, we know that having interests and actively researching them is typically not a waste but rather is beneficial to the individual. The presence of such a juxtaposition in (A) suggests that it contains a taste of irony and sarcasm.
11 | If we look at (B), it says that having interests and actively researching them is a huge deal, implying that it is an important and consequential effort. This is arguably a neutral and correct statement.
12 | Above the above, the sarcastic option is (A). So the answer is (A).
13 | 
14 | Q: Which statement is sarcastic?
15 | Options:
16 | (A) No one is going to disagree with you on this. Avoiding ad hominem attacks really help your case
17 | (B) No one is going to disagree with you on this. Ad hominem attacks really help your case
18 | A: Let's think step by step.
19 | If we look at (A), it says that avoiding ad hominem attacks really help your case, implying that ad hominem attacks are adverse and injurious. Because ad hominem attacks are adressed at a person rather than an idea, it is indeed true that avoiding them is often useful and helpful; so, (A) is a neutral (valid and agreeable) statement.
20 | If we look at (B), it says that ad hominem attacks really help your case, implying that ad hominem attacks are a positive thing. However, we stated previously that ad hominem attacks are often not useful or constructive. The speaker in this sentence therefore seems to mean the opposite of what they are saying; so, there appears to have a taste of irony and sarcasm in (B).
21 | Above the above, the sarcastic option is (B). So the answer is (B).
22 | 
23 | Q: Which statement is sarcastic?
24 | Options:
25 | (A) Consistency in the league's punishments? What do you think this is supposed to be, politics?
26 | (B) Consistency in the league's punishments? What do you think this is supposed to be, moral?
27 | A: Let's think step by step.
28 | If we look at (A), it likens the consistency in the league's punishments with that in politics. Because politics or political affairs are often not considered to be consistent or dependable, this sentence appears to be satirical.
29 | If we look at (B), it likens the consistency in the league's punishments with that in morality. Discussing the consistency of the league's punishments in the context of morality, ethics, or law makes sense and does not appear to make a satirical point about anything.
30 | Above the above, the sarcastic option is (A). So the answer is (A).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/sports_understanding.txt:
--------------------------------------------------------------------------------
 1 | Determine whether an artificially constructed sentence relating to sports is plausible or not.
 2 | 
 3 | Q: Is the following sentence plausible? "Bam Adebayo scored a reverse layup in the Western Conference Finals."
 4 | A: Let's think step by step. Bam Adebayo is an American basketball player. Scoring a reverse layup in the Western Conference Finals is part of the NBA Finals. So the answer is yes.
 5 | 
 6 | Q: Is the following sentence plausible? "Santi Cazorla scored a touchdown."
 7 | A: Let's think step by step. Santi Cazorla is a soccer player. Touchdown is part of American football and rugby. So the answer is no.
 8 | 
 9 | Q: Is the following sentence plausible? "DeMar DeRozan was called for the goal tend."
10 | A: Let's think step by step. DeMar DeRozan is an American basketball player. Goal tending is part of basketball. So the answer is yes.


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/temporal_sequences.txt:
--------------------------------------------------------------------------------
 1 | Task description: Answer questions about which times certain events could have occurred.
 2 | 
 3 | Q: Today, Emily went to the museum. Between what times could they have gone?
 4 | We know that:
 5 | Emily woke up at 1pm.
 6 | Elizabeth saw Emily reading at the library from 2pm to 4pm.
 7 | Jessica saw Emily watching a movie at the theater from 4pm to 5pm.
 8 | Leslie saw Emily waiting at the airport from 5pm to 6pm.
 9 | William saw Emily buying clothes at the mall from 6pm to 7pm.
10 | The museum was closed after 7pm.
11 | Between what times could Emily have gone to the museum?
12 | Options:
13 | (A) 1pm to 2pm
14 | (B) 6pm to 7pm
15 | (C) 5pm to 6pm
16 | (D) 2pm to 4pm
17 | A: Let's think step by step.
18 | Wake-up time: 1pm.
19 | 1pm-2pm: free.
20 | 2pm-4pm: reading at the library.
21 | 4pm-5pm: watching a movie at the theater.
22 | 5pm-6pm: waiting at the airport.
23 | 6pm-7pm: buying clothes at the mall.
24 | The museum closure time: 7pm.
25 | The only time when Emily could have gone to the museum was 1pm to 2pm. So the answer is (A).
26 | 
27 | Q: Today, Elizabeth went to the amusement park. Between what times could they have gone?
28 | We know that:
29 | Elizabeth woke up at 7am.
30 | David saw Elizabeth fixing their computer at the electronic store from 1pm to 2pm.
31 | Sarah saw Elizabeth playing tennis at the tennis court from 2pm to 3pm.
32 | Susan saw Elizabeth walking towards the Statue of Liberty from 3pm to 6pm.
33 | Andrew saw Elizabeth taking photos near the Eiffel Tower from 6pm to 9pm.
34 | Emily saw Elizabeth getting a coffee at the cafe from 9pm to 10pm.
35 | The amusement park was closed after 10pm.
36 | Between what times could Elizabeth have gone to the amusement park?
37 | Options:
38 | (A) 7am to 1pm
39 | (B) 9pm to 10pm
40 | (C) 1pm to 2pm
41 | (D) 3pm to 6pm
42 | A: Let's think step by step.
43 | Wake-up time: 7am.
44 | 7am-1pm: free.
45 | 1pm-2pm: fixing their computer at the electronic store.
46 | 2pm-3pm: playing tennis at the tennis court.
47 | 3pm-6pm: walking towards the Statue of Liberty.
48 | 6pm-9pm: taking photos near the Eiffel Tower.
49 | 9pm-10pm: getting a coffee at the cafe.
50 | The amusement park closure time: 10pm.
51 | The only time when Elizabeth could have gone to the amusement park was 7am to 1pm. So the answer is (A).
52 | 
53 | Q: Today, Tiffany went to the beach. Between what times could they have gone?
54 | We know that:
55 | Tiffany woke up at 5am.
56 | Betty saw Tiffany getting a coffee at the cafe from 5am to 6am.
57 | Jessica saw Tiffany working at the office from 6am to 9am.
58 | John saw Tiffany stretching at a yoga studio from 9am to 12pm.
59 | Sean saw Tiffany sitting on a rooftop from 12pm to 2pm.
60 | Sarah saw Tiffany playing tennis at the tennis court from 2pm to 3pm.
61 | The beach was closed after 4pm.
62 | Between what times could Tiffany have gone to the beach?
63 | Options:
64 | (A) 9am to 12pm
65 | (B) 12pm to 2pm
66 | (C) 5am to 6am
67 | (D) 3pm to 4pm
68 | A: Let's think step by step.
69 | Wake-up time: 5am.
70 | 5am-6am: getting a coffee at the cafe.
71 | 6am-9am: working at the office.
72 | 9am-12pm: stretching at a yoga studio.
73 | 12pm-2pm: sitting on a rooftop.
74 | 2pm-3pm: playing tennis at the tennis court.
75 | 3pm-4pm: free.
76 | The beach closure time: 4pm.
77 | The only time when Tiffany could have gone to the beach was 3pm to 4pm. So the answer is (D).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/tracking_shuffled_objects_five_objects.txt:
--------------------------------------------------------------------------------
 1 | A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.
 2 | 
 3 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball.
 4 | As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the
 5 | Options:
 6 | (A) yellow ball
 7 | (B) blue ball
 8 | (C) pink ball
 9 | A: Let's think step by step.
10 | (0) At the start: Alice: yellow, Bob: blue, Claire: pink.
11 | (1) Claire and Alice swap balls: Alice: pink, Bob: blue, Claire: yellow.
12 | (2)  Alice and Bob swap balls: Alice: blue, Bob: pink, Claire: yellow.
13 | (3) Claire and Bob swap balls: Alice: blue, Bob: yellow, Claire: pink.
14 | At the end of the game, Bob has the yellow ball. So the answer is (A).
15 | 
16 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a white ball, Bob has a purple ball, and Claire has a pink ball.
17 | As the game progresses, pairs of players trade balls. First, Bob and Alice swap balls. Then, Bob and Claire swap balls. Finally, Bob and Alice swap balls. At the end of the game, Alice has the
18 | Options:
19 | (A) white ball
20 | (B) purple ball
21 | (C) pink ball
22 | A: Let's think step by step.
23 | (0) At the start: Alice: white, Bob: purple, Claire: pink.
24 | (1) Bob and Alice swap balls: Alice: purple, Bob: white, Claire: pink.
25 | (2) Bob and Claire swap balls: Alice: purple, Bob: pink, Claire: white.
26 | (3) Bob and Alice swap balls: Alice: pink, Bob: purple, Claire: white.
27 | At the end of the game, Alice has the pink ball. So the answer is (C).
28 | 
29 | Q: Alice, Bob, and Claire are dancers at a square dance. At the start of a song, they each have a partner: Alice is dancing with Lola, Bob is dancing with Rodrigo, and Claire is dancing with Patrick.
30 | Throughout the song, the dancers often trade partners. First, Alice and Bob switch partners. Then, Claire and Bob switch partners. Finally, Bob and Alice switch partners. At the end of the dance, Alice is dancing with
31 | Options:
32 | (A) Lola
33 | (B) Rodrigo
34 | (C) Patrick
35 | A: Let's think step by step.
36 | (0) At the start: Alice: Lola, Bob: Rodrigo, Claire: Patrick.
37 | (1) Alice and Bob switch partners: Alice: Rodrigo, Bob: Lola, Claire: Patrick.
38 | (2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola.
39 | (3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola.
40 | At the end of the dance, Alice is dancing with Patrick. So the answer is (C).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/tracking_shuffled_objects_seven_objects.txt:
--------------------------------------------------------------------------------
 1 | A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.
 2 | 
 3 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball.
 4 | As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the
 5 | Options:
 6 | (A) yellow ball
 7 | (B) blue ball
 8 | (C) pink ball
 9 | A: Let's think step by step.
10 | (0) At the start: Alice: yellow, Bob: blue, Claire: pink.
11 | (1) Claire and Alice swap balls: Alice: pink, Bob: blue, Claire: yellow.
12 | (2)  Alice and Bob swap balls: Alice: blue, Bob: pink, Claire: yellow.
13 | (3) Claire and Bob swap balls: Alice: blue, Bob: yellow, Claire: pink.
14 | At the end of the game, Bob has the yellow ball. So the answer is (A).
15 | 
16 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a white ball, Bob has a purple ball, and Claire has a pink ball.
17 | As the game progresses, pairs of players trade balls. First, Bob and Alice swap balls. Then, Bob and Claire swap balls. Finally, Bob and Alice swap balls. At the end of the game, Alice has the
18 | Options:
19 | (A) white ball
20 | (B) purple ball
21 | (C) pink ball
22 | A: Let's think step by step.
23 | (0) At the start: Alice: white, Bob: purple, Claire: pink.
24 | (1) Bob and Alice swap balls: Alice: purple, Bob: white, Claire: pink.
25 | (2) Bob and Claire swap balls: Alice: purple, Bob: pink, Claire: white.
26 | (3) Bob and Alice swap balls: Alice: pink, Bob: purple, Claire: white.
27 | At the end of the game, Alice has the pink ball. So the answer is (C).
28 | 
29 | Q: Alice, Bob, and Claire are dancers at a square dance. At the start of a song, they each have a partner: Alice is dancing with Lola, Bob is dancing with Rodrigo, and Claire is dancing with Patrick.
30 | Throughout the song, the dancers often trade partners. First, Alice and Bob switch partners. Then, Claire and Bob switch partners. Finally, Bob and Alice switch partners. At the end of the dance, Alice is dancing with
31 | Options:
32 | (A) Lola
33 | (B) Rodrigo
34 | (C) Patrick
35 | A: Let's think step by step.
36 | (0) At the start: Alice: Lola, Bob: Rodrigo, Claire: Patrick.
37 | (1) Alice and Bob switch partners: Alice: Rodrigo, Bob: Lola, Claire: Patrick.
38 | (2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola.
39 | (3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola.
40 | At the end of the dance, Alice is dancing with Patrick. So the answer is (C).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/tracking_shuffled_objects_three_objects.txt:
--------------------------------------------------------------------------------
 1 | A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.
 2 | 
 3 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball.
 4 | As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the
 5 | Options:
 6 | (A) yellow ball
 7 | (B) blue ball
 8 | (C) pink ball
 9 | A: Let's think step by step.
10 | (0) At the start: Alice: yellow, Bob: blue, Claire: pink.
11 | (1) Claire and Alice swap balls: Alice: pink, Bob: blue, Claire: yellow.
12 | (2)  Alice and Bob swap balls: Alice: blue, Bob: pink, Claire: yellow.
13 | (3) Claire and Bob swap balls: Alice: blue, Bob: yellow, Claire: pink.
14 | At the end of the game, Bob has the yellow ball. So the answer is (A).
15 | 
16 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a white ball, Bob has a purple ball, and Claire has a pink ball.
17 | As the game progresses, pairs of players trade balls. First, Bob and Alice swap balls. Then, Bob and Claire swap balls. Finally, Bob and Alice swap balls. At the end of the game, Alice has the
18 | Options:
19 | (A) white ball
20 | (B) purple ball
21 | (C) pink ball
22 | A: Let's think step by step.
23 | (0) At the start: Alice: white, Bob: purple, Claire: pink.
24 | (1) Bob and Alice swap balls: Alice: purple, Bob: white, Claire: pink.
25 | (2) Bob and Claire swap balls: Alice: purple, Bob: pink, Claire: white.
26 | (3) Bob and Alice swap balls: Alice: pink, Bob: purple, Claire: white.
27 | At the end of the game, Alice has the pink ball. So the answer is (C).
28 | 
29 | Q: Alice, Bob, and Claire are dancers at a square dance. At the start of a song, they each have a partner: Alice is dancing with Lola, Bob is dancing with Rodrigo, and Claire is dancing with Patrick.
30 | Throughout the song, the dancers often trade partners. First, Alice and Bob switch partners. Then, Claire and Bob switch partners. Finally, Bob and Alice switch partners. At the end of the dance, Alice is dancing with
31 | Options:
32 | (A) Lola
33 | (B) Rodrigo
34 | (C) Patrick
35 | A: Let's think step by step.
36 | (0) At the start: Alice: Lola, Bob: Rodrigo, Claire: Patrick.
37 | (1) Alice and Bob switch partners: Alice: Rodrigo, Bob: Lola, Claire: Patrick.
38 | (2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola.
39 | (3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola.
40 | At the end of the dance, Alice is dancing with Patrick. So the answer is (C).


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/web_of_lies.txt:
--------------------------------------------------------------------------------
 1 | Evaluate a random boolean function expressed as a word problem.
 2 | 
 3 | Q: Question: Fidel tells the truth. Jerry says Fidel tells the truth. Vina says Jerry tells the truth. Millicent says Vina lies. Raymond says Millicent lies. Does Raymond tell the truth?
 4 | A: Let's think step by step.
 5 | (1) Fidel tells the truth. So, we know that Fidel tells the truth.
 6 | (2) Jerry says Fidel tells the truth. Since we know from (1) that Fidel tells the truth, if Jerry says that Fidel tells the truth, then Jerry tells the truth.
 7 | (3) Vina says Jerry tells the truth. Since we know from (2) that Jerry tells the truth, if Vina says Jerry tells the truth, then Vine tells the truth.
 8 | (4) Millicent says Vina lies. Since we know from (3) that Vina tells the truth, if Millicent says Vina lies, then Millicent lies.
 9 | (5) Raymond says Millicent lies. Since we know from (4) that Millicent lies, if Raymond says Millicent lies, then Raymond tells the truth.
10 | Now, the question asks: Does Raymond tell the truth? We know from (5) that Raymond tells the truth. So the answer is Yes.
11 | 
12 | Q: Question: Kristian lies. Millie says Kristian lies. Maybelle says Millie tells the truth. Fidel says Maybelle lies. Leda says Fidel lies. Does Leda tell the truth?
13 | A: Let's think step by step.
14 | (1) Kristian lies. So, we know that Kristian lies.
15 | (2) Millie says Kristian lies. Since we know from (1) that Kristian lies, if Millie says Kristian lies, then Millie tells the truth.
16 | (3) Maybelle says Millie tells the truth. Since we know from (2) that Millie tells the truth, if Maybelle says Millie tells the truth, then Maybelle tells the truth.
17 | (4) Fidel says Maybelle lies. Since we know from (3) that Maybelle tells the truth, if Fidel says Maybelle lies, then Fidel lies.
18 | (5) Leda says Fidel lies. Since we know from (4) that Fidel lies, if Leda says Fidel lies, then Leda tells the truth.
19 | Now, the question asks: Does Leda tell the truth? We know from (5) that Leda tells the truth. So the answer is Yes.
20 | 
21 | Q: Question: Kristian tells the truth. Michaela says Kristian lies. Raymond says Michaela tells the truth. Osvaldo says Raymond tells the truth. Jamey says Osvaldo tells the truth. Does Jamey tell the truth?
22 | A: Let's think step by step.
23 | (1) Kristian tells the truth. So, we know that Kristian tells the truth.
24 | (2) Michaela says Kristian lies. Since we know from (1) that Kristian tells the truth, if Michaela says Kristian lies, then Michaela lies.
25 | (3) Raymond says Michaela tells the truth. Since we know from (2) that Michaela lies, if Raymond says Michaela tells the truth, then Raymond lies.
26 | (4) Osvaldo says Raymond tells the truth. Since we know from (3) that Raymond lies, if Osvaldo says Raymond tells the truth, then Osvaldo lies.
27 | (5) Jamey says Osvaldo tells the truth. Since we know from (4) that Osvaldo lies, if Jamey says Osvaldo tells the truth, then Jamey lies.
28 | Now, the question asks: Does Jamey tell the truth? We know from (5) that Jamey lies. So the answer is No.


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/lib_prompt/word_sorting.txt:
--------------------------------------------------------------------------------
 1 | Sort a list of words.
 2 | 
 3 | Q: Sort the following words alphabetically: List: oven costume counterpart
 4 | A: Let's think step by step.
 5 | The first letter: "oven": "o" (15). "costume": "c" (3). "counterpart": "c" (3). We now have: (3) ["costume" ? "counterpart"] < (15) "oven". Now let's sort this subpart ["costume" ? "counterpart"] by looking at their second letters.
 6 | The second letter: "costume": "o" (15). "counterpart": "o" (15). We now have: (15) ["costume" ? "counterpart"]. Now let's sort this subpart ["costume" ? "counterpart"] by looking at their third letters.
 7 | The third letter: "costume": "s" (19). "counterpart": "u" (21). We now have: (19) "costume" < (21) "counterpart". Hence, we have ["costume" < "counterpart"] < "oven". So the answer is costume counterpart oven.
 8 | 
 9 | Q: Sort the following words alphabetically: List: hypochlorite ponderosa phone credulity
10 | A: Let's think step by step.
11 | The first letter: "hypochlorite": "h" (8). "ponderosa": "p" (16). "phone": "p" (16). "credulity": "c" (3). We now have: (3) "credulity" < (8) "hypochlorite" < (16) ["ponderosa" ? "phone"]. Now let's sort this subpart ["ponderosa" ? "phone"] by looking at their second letters.
12 | The second letter: "ponderosa": "o" (15). "phone": "h" (8). We now have: (8) "phone" < (15) "ponderosa". Hence, we have "credulity" < "hypochlorite" < ["phone" <"ponderosa"]. So the answer is credulity hypochlorite phone ponderosa.
13 | 
14 | Q: Sort the following words alphabetically: List: newt arson parthia seismography mugho aspect census
15 | A: Let's think step by step.
16 | The first letter: "newt": "n" (14). "arson": "a" (1). "parthia": "p" (16). "seismography": "s" (19). "mugho": "m" (13). "aspect": "a" (1). "census": "c" (3). We now have: (1) ["arson" ? "aspect"] < (3) "census" < (13) "mugho" < (14) "newt" < (16) "parthia" < (19) "seismography". Now let's sort this subpart ["arson" ? "aspect"] by looking at their second letters.
17 | The second letter: "arson": "r" (18). "aspect": "s" (19). We now have: (18) "arson" < (19) "aspect". Hence, we have ["arson" < "aspect"] < "census" < "mugho" < "newt" < "parthia" < "seismography". So the answer is arson aspect census mugho newt parthia seismography.


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/run_template_llama-3-8b.sh:
--------------------------------------------------------------------------------
 1 | ### gsm8k
 2 | python evaluation_gsm8k.py --model meta-llama/Meta-Llama-3-8B --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20
 3 | 
 4 | python evaluation_gsm8k.py --model meta-llama/Meta-Llama-3-8B --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
 5 | 
 6 | python evaluation_gsm8k.py --model meta-llama/Meta-Llama-3-8B --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
 7 | 
 8 | python evaluation_gsm8k.py --model meta-llama/Meta-Llama-3-8B --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64
 9 | 
10 | python evaluation_gsm8k.py --model meta-llama/Meta-Llama-3-8B --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEARL-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64
11 | 
12 | ###aqua
13 | python evaluation_aqua_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20
14 | 
15 | python evaluation_aqua_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
16 | 
17 | python evaluation_aqua_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
18 | 
19 | python evaluation_aqua_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64
20 | 
21 | python evaluation_aqua_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEARL-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64
22 | 
23 | ### bbh
24 | python evaluation_bbh_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 4 --max_new_tokens 256 --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20
25 | 
26 | python evaluation_bbh_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 4 --max_new_tokens 256 --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
27 | 
28 | python evaluation_bbh_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 4 --max_new_tokens 256 --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
29 | 
30 | python evaluation_bbh_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 4 --max_new_tokens 256 --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/GenerationBench/GenerationTest/run_template_llama2-13b.sh:
--------------------------------------------------------------------------------
 1 | ### gsm8k
 2 | python evaluation_gsm8k.py --model meta-llama/Llama-2-13b --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20
 3 | 
 4 | python evaluation_gsm8k.py --model meta-llama/Llama-2-13b --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
 5 | 
 6 | python evaluation_gsm8k.py --model meta-llama/Llama-2-13b --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
 7 | 
 8 | python evaluation_gsm8k.py --model meta-llama/Llama-2-13b --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64
 9 | 
10 | python evaluation_gsm8k.py --model meta-llama/Llama-2-13b --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEARL-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64
11 | 
12 | ###aqua
13 | python evaluation_aqua_cot.py --model meta-llama/Llama-2-13b --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20
14 | 
15 | python evaluation_aqua_cot.py --model meta-llama/Llama-2-13b --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
16 | 
17 | python evaluation_aqua_cot.py --model meta-llama/Llama-2-13b --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
18 | 
19 | python evaluation_aqua_cot.py --model meta-llama/Llama-2-13b --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64
20 | 
21 | python evaluation_aqua_cot.py --model meta-llama/Llama-2-13b --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEARL-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64
22 | 
23 | ### bbh
24 | python evaluation_bbh_cot.py --model meta-llama/Llama-2-13b --batch_size 4 --max_new_tokens 256 --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20
25 | 
26 | python evaluation_bbh_cot.py --model meta-llama/Llama-2-13b --batch_size 4 --max_new_tokens 256 --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
27 | 
28 | python evaluation_bbh_cot.py --model meta-llama/Llama-2-13b --batch_size 4 --max_new_tokens 256 --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64
29 | 
30 | python evaluation_bbh_cot.py --model meta-llama/Llama-2-13b --batch_size 4 --max_new_tokens 256 --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/GenerationBench/Readme.md:
--------------------------------------------------------------------------------
 1 | ## Generation Bench
 2 | All results here are tested by simulated compression code.
 3 | 
 4 | ## Reposity architecture
 5 | ```
 6 | .
 7 | ├── FinetunedTest
 8 | ├── GenerationTest
 9 | ```
10 | `FinetunedTest` is using Llama-2-7b finetuned on GSM8K and test GEAR on GSM8K. Thanks for [Yixiao Li](https://scholar.google.com/citations?user=KZIAP7MAAAAJ&hl=en) who shared the model for us.
11 | 
12 | `GenerationTest` is using Llama-2-7b,Llama-2-13b and Mistral-7b with GEAR test on GSM8K, GSM8K-COT, MMLU, MMLU-COT, and BBH-COT.
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Intel Labs
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | ![license](https://img.shields.io/badge/License-MIT-green.svg?labelColor=gray)
 2 | [![arxiv](http://img.shields.io/badge/arxiv-2310.04562-yellow.svg)](https://arxiv.org/pdf/2403.05527.pdf)
 3 | ## Todo List.
 4 | 1. simluated code for gsm8k-5shot, bbh-3shot and aqua-8shot with cot prompt on llama models  ✔️
 5 | 2. Fused quantization supported for GEAR ✔️
 6 | 3. More cuda kernel optimization 
 7 | 4. GEAR supported with lm-harness
 8 | 5. Combining with other inference algorithm/system
 9 | 6. wrap up a python package
10 | ## GEAR: An Efficient KV Cache Compression Recipe for Near-Lossless Generative Inference of LLM #
11 | 
12 | Official repo for `GEAR: An Efficient Error Reduction Framework for KV Cache Compression in LLM Inference.` `GEAR` is a "plug-and-play" inference only KV compression method.
13 | `GEAR` augments any quantization scheme(e.g. KIVI, KCVT and Flexgen) via an error recovery solution to boost the model accuracy while saving memory.
14 | 
15 | Here, `GEAR` is the abbreviation of `Ge`nerative Inference with LLM via `A`pproximation and Error `R`ecovery.
16 | 
17 | ## Overview
18 | GEAR is an efficient KV cache compression framework that achieves
19 | near-lossless high-ratio compression. GEAR first applies quantization to majority of entries of
20 | similar magnitudes to ultra-low precision. It then employs a low-rank matrix to approximate
21 | the quantization error, and a sparse matrix to remedy individual errors from outlier entries.
22 | 
23 | GEAR does not need to preserve any first or last tokens uncompressed like other low bit compression algorithms to achieve near lossless KV cache compression for LLMs.
24 | <p align="center"><img width="100%" src="./Fig/overview.png"></p><br/>
25 | 
26 | ## How to use GEAR
27 | ```bash
28 | conda create -n GEAR python==3.10
29 | conda activate GEAR
30 | pip install -r requirements.txt
31 | ```
32 | 
33 | ### Reposity architecture
34 | ```
35 | .
36 | ├── GenerationBench
37 | ```
38 | `cuda_supported_gear` GEAR-KIVI implementation with fused kernel supported.
39 | 
40 | `GenerationBench` is simluated compression tested on finetuned and un finetuned model with BBH, GSM8K, and Aqua dataset.
41 | 
42 | ## Developers
43 | 
44 | - [Hao Kang*](https://haokang-timmy.github.io/)(Georgia Tech)
45 | - [Qingru Zhang*](https://www.linkedin.com/in/qingru-zhang-4b789a187/)(Georgia Tech)
46 | - [Souvik Kundu](https://ksouvik52.github.io/)(Intel)
47 | - [Geonhwa Jeong](https://ghjeong12.github.io/)(Georgia Tech)
48 | - [Zaoxing Liu](https://zaoxing.github.io/)(University of Maryland)
49 | - [Tushar Krishna](https://www.linkedin.com/in/tushar-krishna-a60b0970/)(Georgia Tech)
50 | - [Tuo Zhao](https://www2.isye.gatech.edu/~tzhao80/)(Georgia Tech)
51 | 
52 | 
53 | ## Citation
54 | Version 2 will be updated soon. Currently it is version 1.
55 | [link to paper](https://arxiv.org/pdf/2403.05527.pdf)
56 | ```
57 | @misc{kang2024gear,
58 |       title={GEAR: An Efficient KV Cache Compression Recipe for Near-Lossless Generative Inference of LLM}, 
59 |       author={Hao Kang and Qingru Zhang and Souvik Kundu and Geonhwa Jeong and Zaoxing Liu and Tushar Krishna and Tuo Zhao},
60 |       year={2024},
61 |       eprint={2403.05527},
62 |       archivePrefix={arXiv},
63 |       primaryClass={cs.LG}
64 | }
65 | ```
66 | ## Contributing
67 | We are welcoming everyone to contribute to this reposity by rasing PRs. If there is any problem you may also shot email to hkang342@gatech.edu.
68 | 
69 | ## Disclaimer
70 | This “research quality code” is for Non-Commercial purposes and provided by the contributors “As Is” without any express or implied warranty of any kind. The organizations (Intel or georgia Tech) involved do not own the rights to this data set and do not confer any rights to it. The organizations (Intel or georgia Tech) do not warrant or assume responsibility for the accuracy or completeness of any information, text, graphics, links or other items within the code. A thorough security review has not been performed on this code. Additionally, this repository may contain components that are out of date or contain known security vulnerabilities.
71 | 


--------------------------------------------------------------------------------
/cuda_supported_gear/quant/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/opengear-project/GEAR/efe0bfe2b5a35b22122f7cefab5cde9db13a66cd/cuda_supported_gear/quant/__init__.py


--------------------------------------------------------------------------------
/cuda_supported_gear/quant/csrc/gemv_cuda.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | torch::Tensor gemv_forward_cuda(
 5 |     torch::Tensor _in_feats,
 6 |     torch::Tensor _kernel,
 7 |     torch::Tensor _scaling_factors,
 8 |     torch::Tensor _zeros,
 9 |     const int bit,
10 |     const int group_size);
11 | 
12 | 
13 | torch::Tensor gemv_forward_cuda_outer_dim(
14 |     torch::Tensor _in_feats,
15 |     torch::Tensor _kernel,
16 |     torch::Tensor _scaling_factors,
17 |     torch::Tensor _zeros,
18 |     const int bit,
19 |     const int group_size,
20 |     const int nh,
21 |     const bool mqa);


--------------------------------------------------------------------------------
/cuda_supported_gear/quant/csrc/pybind.cpp:
--------------------------------------------------------------------------------
1 | #include <pybind11/pybind11.h>
2 | #include <torch/extension.h>
3 | #include "gemv_cuda.h"
4 | 
5 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
6 | {
7 |   m.def("gemv_forward_cuda", &gemv_forward_cuda);
8 |   m.def("gemv_forward_cuda_outer_dim", &gemv_forward_cuda_outer_dim);
9 | }


--------------------------------------------------------------------------------
/cuda_supported_gear/quant/gemv.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.environ["CUDA_VISIBLE_DEVICES"]="2"
  3 | os.environ["CUDA_LAUNCH_BLOCKING"]="1"
  4 | import numpy as np
  5 | import torch
  6 | import ipdb
  7 | import random
  8 | import triton
  9 | import triton.language as tl
 10 | from new_pack import pack_tensor
 11 | from timeit_v2 import py_benchmark
 12 | import kivi_gemv
 13 | 
 14 | B, nh, IC, OC = 8, 32, 739, 128
 15 | 
 16 | @triton.jit
 17 | def gemv_kernel_g64(inputs_ptr, qw_ptr, mn_ptr, 
 18 | 					scale_ptr, output_ptr,
 19 | 					IC: tl.constexpr, OC: tl.constexpr, bit: tl.constexpr, 
 20 | 					OC_PER_PH: tl.constexpr, PACK_FACTOR: tl.constexpr, BLOCK_SIZE):
 21 | 	"""
 22 | 	Computes GEMV (group_size = 64).
 23 | 
 24 | 	Args:
 25 | 	inputs: vector of shape [batch_size, IC];
 26 | 	qw: matrix of shape [OC, IC / 8];
 27 | 	output: vector of shape [OC];
 28 | 	mn: matrix of shape [OC, NG];
 29 | 	scale: matrix of shape [OC, NG];
 30 | 
 31 | 	Notes:
 32 | 	One cannot infer group_size from the shape of scaling factors.
 33 | 	the second dimension is rounded up to a multiple of PACK_FACTOR.
 34 | 	"""
 35 | 	group_size = 64
 36 | 	oc_idx = tl.program_id(axis=0) * OC_PER_PH + tl.arange(0, OC_PER_PH)
 37 | 	batch_idx = tl.program_id(axis=1)
 38 | 	num_groups = IC // group_size
 39 | 	num_groups_packed = tl.cdiv(num_groups, PACK_FACTOR)
 40 | 	# tl.store(output_ptr, num_groups_packed)
 41 | 	weight_w = IC // PACK_FACTOR
 42 | 	num = 0xFF >> (8-bit)
 43 | 	accumulator = tl.zeros((OC_PER_PH,), dtype=tl.float32)
 44 | 	for group_idx in range(0, num_groups):
 45 | 		# load scaling factors
 46 | 		# each time we load 4 OC x 1 G
 47 | 		scale = tl.load(scale_ptr + oc_idx[:, None] * num_groups + group_idx)
 48 | 		mn = tl.load(mn_ptr + oc_idx[:, None] * num_groups + group_idx)
 49 | 		# 1 G -> 64 numbers -> 64 // PACK_FACTOR packed numbers 
 50 | 		cur_qw_ptr = qw_ptr + oc_idx[:, None] * weight_w + group_idx * (64 // PACK_FACTOR) + tl.arange(0, 64 // PACK_FACTOR)[None, :]
 51 | 		qw = tl.load(cur_qw_ptr)
 52 | 		for i in range(PACK_FACTOR):
 53 | 			w_fp = qw & num
 54 | 			# load 4 OC x 
 55 | 			w_fp = w_fp * scale + mn
 56 | 			qw = qw >> bit
 57 | 			cur_inp_ptr = inputs_ptr + batch_idx * IC + group_idx * 64 + i + tl.arange(0, 64 // PACK_FACTOR)[None, :] * PACK_FACTOR
 58 | 			cur_input = tl.load(cur_inp_ptr)
 59 | 			accumulator += tl.sum(cur_input * w_fp, 1)
 60 | 	ptr = output_ptr + oc_idx + batch_idx * OC
 61 | 	tl.store(ptr, accumulator)
 62 | 
 63 | 
 64 | def dequant_weight(w, scale, mn, gs):
 65 | 	w_fp = w.half().view(w.shape[0], w.shape[1]//gs, gs)
 66 | 	w_fp = w_fp * scale.unsqueeze(-1) + mn.unsqueeze(-1)
 67 | 	return w_fp.view(w.shape)
 68 | 
 69 | 
 70 | def dequant_weight_outer(w, scale, mn, gs):
 71 | 	# ipdb.set_trace()
 72 | 	w_fp = w.half().view(w.shape[0], w.shape[1], w.shape[2]//gs, gs)
 73 | 	w_fp = w_fp * scale.unsqueeze(-1) + mn.unsqueeze(-1)
 74 | 	return w_fp.view(w.shape)
 75 | 
 76 | 
 77 | def gemv_fwd(bit, group_size, inp, qweight, mn, scale):
 78 | 	B, IC = inp.shape
 79 | 	OC = qweight.shape[0]
 80 | 	BLOCK_SIZE = 32
 81 | 	OC_PER_PH = 32
 82 | 	PACK_FACTOR = 32 // bit
 83 | 	assert group_size == 64
 84 | 	output = torch.empty((B, OC), device=inp.device, dtype=torch.float16)
 85 | 	grid = lambda META: (
 86 | 		triton.cdiv(OC, META['OC_PER_PH']), B
 87 | 	)
 88 | 	gemv_kernel_g64[grid](inp, qweight, mn, scale, output, 
 89 | 					   IC, OC, bit, OC_PER_PH, PACK_FACTOR, BLOCK_SIZE)
 90 | 	return output
 91 | 
 92 | 
 93 | def test_bgemv_outer_correct_mha():
 94 | 	flatten_B = B * nh
 95 | 	inp = torch.randn((flatten_B, 1, IC), device='cuda', dtype=torch.float16)
 96 | 	ori_weight = torch.randn((flatten_B, IC, OC), device='cuda', dtype=torch.float16)
 97 | 	GS = 32
 98 | 	for BIT in [2, 4]:
 99 | 		weight = ori_weight
100 | 		PACK_FACTOR = 32 // BIT
101 | 		assert OC % GS == 0 and OC % PACK_FACTOR == 0
102 | 		NG = OC // GS
103 | 		weight = weight.view(flatten_B, IC, NG, GS)
104 | 		mx = torch.max(weight, dim=-1, keepdim=False)[0]
105 | 		mn = torch.min(weight, dim=-1, keepdim=False)[0]
106 | 		maxq = 2 ** BIT - 1
107 | 		scale = (mx - mn) / maxq
108 | 		weight = weight - mn.unsqueeze(-1)
109 | 		weight.div_(scale.unsqueeze(-1))
110 | 		weight = weight.clamp_(0, maxq).round_().to(torch.int32)
111 | 		weight = weight.view(flatten_B, IC, OC)
112 | 		qweight = pack_tensor(weight, BIT, 2)
113 | 		weight = weight.transpose(1, 2).contiguous()
114 | 		qweight = qweight.transpose(1, 2).contiguous()
115 | 		scale = scale.transpose(1, 2).contiguous()
116 | 		mn = mn.transpose(1, 2).contiguous()
117 | 		output = kivi_gemv.gemv_forward_cuda_outer_dim(inp, qweight, scale, mn, BIT, GS, nh, False)
118 | 		deq_w = dequant_weight_outer(weight.transpose(1, 2), 
119 | 							   scale.transpose(1, 2), 
120 | 							   mn.transpose(1, 2), GS)
121 | 		# rel_error = torch.abs((deq_w - ori_weight).float() / (ori_weight + 1e-5).float()).mean()
122 | 		# print(f'bit {BIT} avg rel weight quant error: {rel_error}')
123 | 		output_ref = inp @ deq_w
124 | 		error = output_ref - output
125 | 		rel_out_error = torch.abs(error.float() / (torch.abs(output_ref).float()+1e-5)).mean()
126 | 		print(f'mha bit {BIT} avg rel out quant error: {rel_out_error}')
127 | 
128 | 
129 | def test_bgemv_outer_correct_mqa():
130 | 	flatten_B = B * nh
131 | 	inp = torch.randn((flatten_B, 1, IC), device='cuda', dtype=torch.float16)
132 | 	ori_weight = torch.randn((B, IC, OC), device='cuda', dtype=torch.float16)
133 | 	GS = 32
134 | 	for BIT in [2, 4]:
135 | 		weight = ori_weight
136 | 		PACK_FACTOR = 32 // BIT
137 | 		assert OC % GS == 0 and OC % PACK_FACTOR == 0
138 | 		NG = OC // GS
139 | 		weight = weight.view(B, IC, NG, GS)
140 | 		mx = torch.max(weight, dim=-1, keepdim=False)[0]
141 | 		mn = torch.min(weight, dim=-1, keepdim=False)[0]
142 | 		maxq = 2 ** BIT - 1
143 | 		scale = (mx - mn) / maxq
144 | 		weight = weight - mn.unsqueeze(-1)
145 | 		weight.div_(scale.unsqueeze(-1))
146 | 		weight = weight.clamp_(0, maxq).round_().to(torch.int32)
147 | 		weight = weight.view(B, IC, OC)
148 | 		qweight = pack_tensor(weight, BIT, 2)
149 | 		inp = inp.contiguous()
150 | 		weight = weight.transpose(1, 2).contiguous()
151 | 		qweight = qweight.transpose(1, 2).contiguous()
152 | 		scale = scale.transpose(1, 2).contiguous()
153 | 		mn = mn.transpose(1, 2).contiguous()
154 | 		output = kivi_gemv.gemv_forward_cuda_outer_dim(inp, qweight, scale, mn, BIT, GS, nh, True)
155 | 		deq_w = dequant_weight_outer(weight.transpose(1, 2), 
156 | 							   scale.transpose(1, 2), 
157 | 							   mn.transpose(1, 2), GS)
158 | 		# rel_error = torch.abs((deq_w - ori_weight).float() / (ori_weight + 1e-5).float()).mean()
159 | 		# print(f'bit {BIT} avg rel weight quant error: {rel_error}')
160 | 		output_ref = inp.view(B, nh, 1, IC) @ deq_w.view(B, 1, IC, OC)
161 | 		output_ref = output_ref.view(flatten_B, 1, OC)
162 | 		error = output_ref - output
163 | 		# ipdb.set_trace()
164 | 		rel_out_error = torch.abs(error.float() / (torch.abs(output_ref).float()+1e-5)).mean()
165 | 		print(f'mqa bit {BIT} avg rel out quant error: {rel_out_error}')
166 | 
167 | 
168 | def test_gemv_correct():
169 | 	inp = torch.randn((B, IC), device='cuda', dtype=torch.float16) 
170 | 	ori_weight = torch.randn((OC, IC), device='cuda', dtype=torch.float16) 
171 | 	GS = 64
172 | 	for BIT in [4]:
173 | 		weight = ori_weight
174 | 		PACK_FACTOR = 32 // BIT
175 | 		assert IC % GS == 0 and IC % PACK_FACTOR == 0
176 | 		NG = IC // GS
177 | 		weight = weight.view(OC, NG, GS)
178 | 		mx = torch.max(weight, dim=2, keepdim=False)[0]
179 | 		mn = torch.min(weight, dim=2, keepdim=False)[0]
180 | 		maxq = 2 ** BIT - 1
181 | 		scale = (mx - mn) / maxq
182 | 		weight = weight - mn.unsqueeze(-1)
183 | 		weight.div_(scale.unsqueeze(-1))
184 | 		weight = weight.clamp_(0, maxq).round_().to(torch.int32)
185 | 		weight = weight.view(OC, IC)
186 | 		qweight = pack_tensor(weight, BIT, 1)
187 | 		# output = gemv_fwd(BIT, GS, inp, qweight, mn, scale)
188 | 		output = kivi_gemv.gemv_forward_cuda(inp, qweight, scale, mn, BIT, GS)
189 | 		deq_w = dequant_weight(weight, scale, mn, GS)
190 | 		rel_error = torch.abs((deq_w - ori_weight).float() / (ori_weight + 1e-5).float()).mean()
191 | 		# print(f'bit {BIT} avg rel weight quant error: {rel_error}')
192 | 		output_ref = inp @ deq_w.T
193 | 		error = output_ref - output
194 | 		rel_out_error = torch.abs(error.float() / (output_ref + 1e-5).float()).mean()
195 | 		print(f'bit {BIT} avg rel out quant error: {rel_out_error}')
196 | 
197 | 
198 | def test_gemv_speed():
199 | 	inp = torch.randn((B, IC), device='cuda', dtype=torch.float16)
200 | 	ori_weight = torch.randn((OC, IC), device='cuda', dtype=torch.float16)
201 | 	weight = ori_weight
202 | 	BIT = 4
203 | 	GS = 64
204 | 	PACK_FACTOR = 32 // BIT
205 | 	assert IC % GS == 0 and IC % PACK_FACTOR == 0
206 | 	NG = IC // GS
207 | 	weight = weight.view(OC, NG, GS)
208 | 	mx = torch.max(weight, dim=2, keepdim=False)[0]
209 | 	mn = torch.min(weight, dim=2, keepdim=False)[0]
210 | 	maxq = 2 ** BIT - 1
211 | 	scale = (mx - mn) / maxq
212 | 	weight = weight - mn.unsqueeze(-1)
213 | 	weight.div_(scale.unsqueeze(-1))
214 | 	weight = weight.clamp_(0, maxq).round_().to(torch.int32)
215 | 	weight = weight.view(OC, IC)
216 | 	qweight = pack_tensor(weight, BIT, 1)
217 | 	output = gemv_fwd(BIT, GS, inp, qweight, mn, scale)
218 | 	deq_w = dequant_weight(weight, scale, mn, GS)
219 | 	stmt = "inp @ deq_w.T"
220 | 	t_ref = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1,
221 |                                      setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
222 | 	# stmt = "gemv_fwd(BIT, GS, inp, qweight, mn, scale)"
223 | 	# t_our = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1,
224 |     #                                  setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
225 | 	stmt = "kivi_gemv.gemv_forward_cuda(inp, qweight, scale, mn, BIT, GS)"
226 | 	t_our = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1,
227 |                                      setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
228 | 	print(f'vanilla pytorch gemv: {t_ref * 1000} ms')
229 | 	print(f'awq fused IC {IC} OC {OC} {BIT}-bit gemv: {t_our * 1000} ms')
230 | 
231 | 
232 | def test_bgemv_outer_speed():
233 | 	inp = torch.randn((B, 1, IC), device='cuda', dtype=torch.float16)
234 | 	ori_weight = torch.randn((B, IC, OC), device='cuda', dtype=torch.float16) 
235 | 	GS = 64
236 | 	for BIT in [2]:
237 | 		weight = ori_weight
238 | 		PACK_FACTOR = 32 // BIT
239 | 		assert OC % GS == 0 and OC % PACK_FACTOR == 0
240 | 		NG = OC // GS
241 | 		weight = weight.view(B, IC, NG, GS)
242 | 		mx = torch.max(weight, dim=-1, keepdim=False)[0]
243 | 		mn = torch.min(weight, dim=-1, keepdim=False)[0]
244 | 		maxq = 2 ** BIT - 1
245 | 		scale = (mx - mn) / maxq
246 | 		weight = weight - mn.unsqueeze(-1)
247 | 		weight.div_(scale.unsqueeze(-1))
248 | 		weight = weight.clamp_(0, maxq).round_().to(torch.int32)
249 | 		weight = weight.view(B, IC, OC)
250 | 		qweight = pack_tensor(weight, BIT, 2)
251 | 		weight = weight.transpose(1, 2).contiguous()
252 | 		qweight = qweight.transpose(1, 2).contiguous()
253 | 		scale = scale.transpose(1, 2).contiguous()
254 | 		mn = mn.transpose(1, 2).contiguous()
255 | 		deq_w = dequant_weight_outer(weight.transpose(1, 2), 
256 | 							scale.transpose(1, 2), 
257 | 							mn.transpose(1, 2), GS)
258 | 		stmt = "inp @ deq_w"
259 | 		t_ref = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1,
260 | 										setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
261 | 		# stmt = "gemv_fwd(BIT, GS, inp, qweight, mn, scale)"
262 | 		# t_our = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1,
263 | 		#                                  setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
264 | 		stmt = "kivi_gemv.gemv_forward_cuda_outer_dim(inp, qweight, scale, mn, BIT, GS)"
265 | 		t_our = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1,
266 | 										setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
267 | 		print(f'BS {B} IC {IC} OC {OC} pytorch batched gemv: {t_ref * 1000} ms')
268 | 		print(f'our fused BS {B} IC {IC} OC {OC} {BIT}-bit outer-dim batched gemv: {t_our * 1000} ms')
269 | 
270 | if __name__ == "__main__":
271 | 	torch.manual_seed(0)
272 | 	np.random.seed(0)
273 | 	random.seed(0)
274 | 	# test_gemv_correct()
275 | 	test_bgemv_outer_correct_mha()
276 | 	test_bgemv_outer_correct_mqa()
277 | 	# test_gemv_speed()
278 | 	# test_bgemv_outer_speed()
279 | 


--------------------------------------------------------------------------------
/cuda_supported_gear/quant/matmul.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import ipdb
  3 | import random
  4 | import triton
  5 | import triton.language as tl
  6 | import kivi_gemv 
  7 | 
  8 | 
  9 | @triton.jit
 10 | def qbvm_kernel(
 11 | 	bits,
 12 | 	a_ptr, b_ptr, c_ptr,
 13 | 	scales_ptr, zeros_ptr,
 14 | 	M, N, K,
 15 | 	stride_abatch, stride_am, stride_ak,
 16 | 	stride_bbatch, stride_bk, stride_bn,
 17 | 	stride_cbatch, stride_cm, stride_cn,
 18 | 	stride_scales_b, stride_scales_k, stride_scales_g,
 19 | 	stride_zeros_b, stride_zeros_k, stride_zeros_g,
 20 | 	groupsize,
 21 | 	BLOCK_SIZE_N: tl.constexpr, BLOCK_SIZE_K: tl.constexpr,
 22 | ):
 23 | 	"""
 24 | 	Compute the batch matrix multiplication C = A x B.
 25 | 	A is of shape (B, 1, K) float16
 26 | 	B is of shape (B, K, N//feat_per_int) int32
 27 | 	C is of shape (B, 1, N) float16
 28 | 	scales is of shape (B, K, G) float16
 29 | 	zeros is of shape (B, K, G) float16
 30 | 	groupsize is an int specifying the size of groups for scales and zeros.
 31 | 	G is N // groupsize.
 32 | 	Set NO_GROUPS to groupsize == K, in which case G = 1 and the kernel is more efficient.
 33 | 
 34 | 	WARNING: This kernel assumes that K is a multiple of BLOCK_SIZE_K.
 35 | 	WARNING: This kernel assumes that N is a multiple of BLOCK_SIZE_N.
 36 | 	WARNING: This kernel assumes that groupsize is a multiple of BLOCK_SIZE_K.
 37 | 	"""
 38 | 	pid_batch = tl.program_id(axis=0)
 39 | 	pid = tl.program_id(axis=1)
 40 | 	feat_per_int = 32 // bits
 41 | 	num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
 42 | 	num_pid_k = tl.cdiv(K, BLOCK_SIZE_K)
 43 | 	pid_n = pid % num_pid_n
 44 | 	offs_bn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N))
 45 | 	offs_k = tl.arange(0, BLOCK_SIZE_K)
 46 | 	a_batch_offset = (pid_batch * stride_abatch)
 47 | 	b_batch_offset = (pid_batch * stride_bbatch)
 48 | 	c_batch_offset = (pid_batch * stride_cbatch)
 49 | 	a_ptr = a_ptr + a_batch_offset 
 50 | 	b_ptr = b_ptr + b_batch_offset 
 51 | 	c_ptr = c_ptr + c_batch_offset
 52 | 	a_ptrs = a_ptr + (offs_k[:, None] * stride_ak)   # (BLOCK_SIZE_K, 1)
 53 | 	# a_mask = (offs_am[:, None] < M)
 54 | 	# b_ptrs is set up such that it repeats elements along the N axis feat_per_int times
 55 | 	b_ptrs = b_ptr  + (offs_k[:, None] * stride_bk + (offs_bn[None, :]//feat_per_int) * stride_bn)   # (BLOCK_SIZE_K, BLOCK_SIZE_N)
 56 | 	# shifter is used to extract the # bits bits of each element in the 32-bit word from B
 57 | 	shifter = (offs_bn % feat_per_int) * bits
 58 | 	scales_ptr = scales_ptr + pid_batch*stride_scales_b + ((offs_bn[None, :] // groupsize)) * stride_scales_g   # (BLOCK_SIZE_N,)
 59 | 	zeros_ptr = zeros_ptr + pid_batch*stride_zeros_b + ((offs_bn[None, :] // groupsize)) * stride_zeros_g   # (BLOCK_SIZE_N,)
 60 | 
 61 | 	# Now calculate a block of output of shape (BLOCK_SIZE_M, BLOCK_SIZE_N)
 62 | 	# M is along the batch dimension, N is along the outfeatures dimension, K is along the infeatures dimension
 63 | 	# So this loop is along the infeatures dimension (K)
 64 | 	# It's calculating BLOCK_SIZE_M batches in parallel, and for each batch, BLOCK_SIZE_N outfeatures in parallel	
 65 | 	# accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
 66 | 	accumulator = tl.zeros((BLOCK_SIZE_N,), dtype=tl.float32)
 67 | 	num = 0xFF >> (8-bits)
 68 | 	for pid_k in range(0, num_pid_k):
 69 | 		offs_bk = (offs_k[:, None] + pid_k * BLOCK_SIZE_K)
 70 | 		# offs_k[None, :] < K - pid_k * BLOCK_SIZE_K
 71 | 		a = tl.load(a_ptrs, mask=offs_bk < K, other=0.)   # (1, BLOCK_SIZE_K)
 72 | 		b = tl.load(b_ptrs, mask=offs_bk < K, other=0.)   # (BLOCK_SIZE_K, BLOCK_SIZE_N)
 73 | 		ptr = scales_ptr + offs_bk * stride_scales_k 
 74 | 		scales = tl.load(ptr, mask=offs_bk < K, other=0.)  # (BLOCK_SIZE_K, BLOCK_SIZE_N)
 75 | 		ptr = zeros_ptr + offs_bk * stride_zeros_k  
 76 | 		zeros = tl.load(ptr, mask=offs_bk < K, other=0.)  # (BLOCK_SIZE_K, BLOCK_SIZE_N)
 77 | 		# Now we need to unpack b into 32-bit values
 78 | 		# tl.device_print("scale ",scales.dtype)
 79 | 		# tl.device_print("zeros ",zeros.dtype)
 80 | 		b = (b >> shifter[None, :]) & num  # For 4-bit values, bit_op_num is 0xF
 81 | 		b = b * scales + zeros # Scale and shift
 82 | 		accumulator += tl.sum(a * b, 0) # tl.dot(a, b)
 83 | 		# if pid_m == 0 and pid_n == 0:
 84 | 		# 	tl.device_print("hello ", tl.dot(a, b).shape)
 85 | 		a_ptrs += BLOCK_SIZE_K * stride_ak
 86 | 		b_ptrs += BLOCK_SIZE_K * stride_bk
 87 | 	c = accumulator # .to(tl.float16)
 88 | 	# c = accumulator
 89 | 	# Store the result
 90 | 	offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
 91 | 	c_ptrs = c_ptr + stride_cn * offs_cn
 92 | 	c_mask = (offs_cn < N)
 93 | 	tl.store(c_ptrs, c, mask=c_mask)
 94 | 
 95 | 
 96 | def understand_code():
 97 | 	M, N, K = 512, 256, 256
 98 | 	BLOCK_SIZE_M, BLOCK_SIZE_N, GROUP_SIZE_M = 64, 64, 4
 99 | 	total_program_id = triton.cdiv(M, BLOCK_SIZE_M) * triton.cdiv(N, BLOCK_SIZE_N)
100 | 	for pid in range(0, total_program_id):
101 | 		num_pid_m = triton.cdiv(M, BLOCK_SIZE_M)
102 | 		num_pid_n = triton.cdiv(N, BLOCK_SIZE_N)
103 | 		num_pid_in_group = GROUP_SIZE_M * num_pid_n
104 | 		group_id = pid // num_pid_in_group
105 | 		first_pid_m = group_id * GROUP_SIZE_M
106 | 		group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
107 | 		pid_m = first_pid_m + (pid % group_size_m)
108 | 		pid_n = (pid % num_pid_in_group) // group_size_m
109 | 		print(f"pid={pid}, pid_m={pid_m}, pid_n={pid_n}")
110 | 	
111 | 
112 | def triton_bmm_fA_qB_outer(group_size: int, 
113 | 				fA: torch.FloatTensor, 
114 | 				qB: torch.IntTensor, 
115 | 				scales: torch.FloatTensor, 
116 | 				zeros: torch.FloatTensor,
117 | 				bits: int) -> torch.FloatTensor:
118 | 	"""
119 | 	Compute the matrix multiplication C = query x key.
120 | 	Where key is quantized into 2-bit values.
121 | 
122 | 	fA is of shape (B, nh, M, K) float16
123 | 	qB is of shape (B, nh, K, N // feat_per_int) int32
124 | 	scales is of shape (B, nh, K, G) float16
125 | 	zeros is of shape (B, nh, K, G) float16
126 | 
127 | 	groupsize is the number of outer dimensions in each group.
128 | 	G = N // groupsize
129 | 
130 | 	Returns C of shape (B, nh, M, N) float16
131 | 	"""    
132 | 	assert len(fA.shape) == 4 and len(qB.shape) == 4
133 | 	B, nh, M, K = fA.shape 
134 | 	feat_per_int = 32 // bits
135 | 	# flatten to a 3D tensor
136 | 	fA = fA.view(-1, M, K)
137 | 	N = qB.shape[-1] * feat_per_int
138 | 	qB = qB.reshape(-1, K, qB.shape[-1])
139 | 	# This is based on the possible BLOCK_SIZE_Ks
140 | 	# assert K % 16 == 0 and K % 32 == 0 and K % 64 == 0 and K % 128 == 0, "K must be a multiple of 16, 32, 64, and 128"
141 | 	# This is based on the possible BLOCK_SIZE_Ns
142 | 	assert N % 16 == 0 and N % 32 == 0 and N % 64 == 0, "N must be a multiple of 16, 32, 64, 128, and 256"
143 | 	# This is based on the possible BLOCK_SIZE_Ks
144 | 	assert group_size % 64 == 0, "groupsize must be a multiple of 64, and 128"
145 | 	flatten_B = B * nh
146 | 	c = torch.empty((flatten_B, M, N), device='cuda', dtype=torch.float16)
147 | 	# print(f'M {M} N {N} K {K}')
148 | 	grid = lambda META: (
149 | 		flatten_B, triton.cdiv(N, META['BLOCK_SIZE_N']),
150 | 	)
151 | 	scales = scales.view(flatten_B, scales.shape[-2], scales.shape[-1])
152 | 	zeros = zeros.view(flatten_B, zeros.shape[-2], zeros.shape[-1])
153 | 	if N > K:
154 | 		BLOCK_SIZE_N = 128	
155 | 		BLOCK_SIZE_K = 32
156 | 		num_warps=4  #
157 | 	else:
158 | 		BLOCK_SIZE_N = 32
159 | 		BLOCK_SIZE_K = 128
160 | 		num_warps = 2
161 | 	num_stages= 7 if K > 64 else 3  #
162 | 	qbvm_kernel[grid](
163 | 		bits, 
164 | 		fA, qB, c,
165 | 		scales, zeros,
166 | 		M, N, K,
167 | 		fA.stride(0), fA.stride(1), fA.stride(2), 
168 | 		qB.stride(0), qB.stride(1), qB.stride(2),
169 | 		c.stride(0), c.stride(1), c.stride(2),
170 | 		scales.stride(0), scales.stride(1), scales.stride(2),
171 | 		zeros.stride(0), zeros.stride(1), scales.stride(2),
172 | 		group_size, BLOCK_SIZE_N, BLOCK_SIZE_K, 
173 | 		num_warps=num_warps, num_stages=num_stages
174 | 	)
175 | 	return c.view(B, nh, c.shape[-2], c.shape[-1])
176 | 
177 | 
178 | def cuda_bmm_fA_qB_outer(group_size: int, 
179 | 				fA: torch.FloatTensor, 
180 | 				qB: torch.IntTensor, 
181 | 				scales: torch.FloatTensor, 
182 | 				zeros: torch.FloatTensor,
183 | 				bits: int,
184 | 				mqa: bool=False) -> torch.FloatTensor:
185 | 	"""
186 | 	Compute the matrix multiplication C = query x key.
187 | 	Where key is quantized into 2-bit values.
188 | 
189 | 	fA is of shape (B, nh, M, K) float16
190 | 	qB is of shape (B, nh, K, N // feat_per_int) int32
191 | 	scales is of shape (B, nh, K, G) float16
192 | 	zeros is of shape (B, nh, K, G) float16
193 | 
194 | 	groupsize is the number of outer dimensions in each group.
195 | 	G = N // groupsize
196 | 
197 | 	Returns C of shape (B, nh, M, N) float16
198 | 	"""    
199 | 	assert len(fA.shape) == 4 and len(qB.shape) == 4
200 | 	B, nh, M, K = fA.shape 
201 | 	feat_per_int = 32 // bits
202 | 	# flatten to a 3D tensor
203 | 	fA = fA.view(-1, M, K).contiguous()
204 | 	N = qB.shape[-1] * feat_per_int
205 | 	qB = qB.reshape(-1, K, qB.shape[-1]).transpose(1, 2).contiguous()
206 | 	# This is based on the possible BLOCK_SIZE_Ks
207 | 	# assert K % 16 == 0 and K % 32 == 0 and K % 64 == 0 and K % 128 == 0, "K must be a multiple of 16, 32, 64, and 128"
208 | 	# This is based on the possible BLOCK_SIZE_Ns
209 | 	# assert N % 16 == 0 and N % 32 == 0 and N % 64 == 0, "N must be a multiple of 16, 32, 64, 128, and 256"
210 | 	# This is based on the possible BLOCK_SIZE_Ks
211 | 	# assert group_size % 64 == 0, "groupsize must be a multiple of 64, and 128"
212 | 	flatten_B = B * nh
213 | 	if mqa:
214 | 		flatten_B = B
215 | 	scales = scales.view(flatten_B, scales.shape[-2], scales.shape[-1]).transpose(1, 2).contiguous()
216 | 	zeros = zeros.view(flatten_B, zeros.shape[-2], zeros.shape[-1]).transpose(1, 2).contiguous()
217 | 	assert bits in [2, 4]
218 | 	c = kivi_gemv.gemv_forward_cuda_outer_dim(fA, qB, scales, zeros, bits, group_size, nh, mqa)
219 | 	c = c.view(B, nh, c.shape[-2], c.shape[-1])
220 | 	return c
221 | 


--------------------------------------------------------------------------------
/cuda_supported_gear/quant/qmodule.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import dequant_cuda
  3 | from pack import quantize_and_pack
  4 | 
  5 | 
  6 | def make_divisible(c, divisor):
  7 |     return (c + divisor - 1) // divisor
  8 | 
  9 | 
 10 | def calculate_zeros_width(in_features, group_size=128, pack_num=8):
 11 |     if group_size >= 128:
 12 |         size_multiplier = 1
 13 |     elif group_size == 64:
 14 |         size_multiplier = 2
 15 |     elif group_size == 32:
 16 |         size_multiplier = 4
 17 |     else:
 18 |         raise NotImplementedError
 19 |     
 20 |     base_width = make_divisible(in_features // group_size, pack_num)
 21 |     base_width = make_divisible(base_width, size_multiplier) * size_multiplier
 22 |     return base_width
 23 | 
 24 | 
 25 | def dequantize_weight(qweight, d_out, d_in, w_bit, scales, zeros, group_size):
 26 |     data = qweight.reshape(-1)
 27 |     N, num_features = d_out, d_in
 28 |     weight_fp = dequant_cuda.unpack_single_precision(data, w_bit, scales, zeros, N, 
 29 |                                                         num_features // group_size, group_size)
 30 |     return weight_fp.view(d_out, d_in)
 31 | 
 32 |     
 33 | class MatMul4Bit(torch.autograd.Function):
 34 |     # forward is the same, but we added the fallback for pre-turing GPUs
 35 |     # backward is mostly the same, but adds one extra clause (see "elif state.CxB is not None")
 36 | 
 37 |     @staticmethod
 38 |     def forward(ctx, A, qweight, bias, d_out, d_in, w_bit, scales, zeros, group_size):
 39 |         # default of pytorch behavior if inputs are empty
 40 |         # 1. Dequantize
 41 |         # 2. MatmulnN
 42 |         weight_fp = dequantize_weight(qweight, d_out, d_in, w_bit, scales, zeros, group_size)
 43 |         output = torch.nn.functional.linear(A, weight_fp.to(A.dtype), bias)
 44 |         # 3. Save state
 45 |         ctx.state = (d_out, d_in, w_bit, scales, zeros, group_size)
 46 |         ctx.tensors = qweight
 47 |         return output
 48 | 
 49 | 
 50 |     @staticmethod
 51 |     def backward(ctx, grad_output):
 52 |         req_gradA, _, req_gradBias = ctx.needs_input_grad[:3]
 53 |         qweight = ctx.tensors
 54 |         d_out, d_in, w_bit, scales, zeros, group_size = ctx.state
 55 | 
 56 |         grad_A, grad_bias = None, None
 57 | 
 58 |         if req_gradBias:
 59 |             # compute grad_bias first before changing grad_output dtype
 60 |             grad_bias = grad_output.sum(0, dtype=ctx.dtype_bias)
 61 | 
 62 |         # not supported by PyTorch. TODO: create work-around
 63 |         #if req_gradB: grad_B = torch.matmul(grad_output.t(), A)
 64 |         if req_gradA: 
 65 |             weight_fp = dequantize_weight(qweight, d_out, d_in, w_bit, scales, zeros, group_size)
 66 |             grad_A = torch.matmul(grad_output, weight_fp.to(grad_output.dtype))
 67 |             if grad_A.isnan().any():
 68 |                 import ipdb; ipdb.set_trace()
 69 |             # print(grad_A.norm())
 70 |         return grad_A, None, grad_bias, None, None, None, None, None, None
 71 |     
 72 | 
 73 | class WQLinearForTrain(torch.nn.Module):
 74 |     def __init__(self, w_bit, group_size, in_features, out_features, bias, dev):
 75 |         super().__init__()
 76 |         
 77 |         if w_bit not in [4]:
 78 |             raise NotImplementedError("Only 4-bit are supported for now.")
 79 |         
 80 |         self.in_features = in_features
 81 |         self.out_features = out_features
 82 |         self.w_bit = w_bit
 83 |         self.group_size = group_size if group_size != -1 else in_features
 84 |         # quick sanity check (make sure aligment)
 85 |         assert self.in_features % self.group_size == 0
 86 |         assert out_features % (32 // self.w_bit) == 0
 87 |         pack_num = (32 // self.w_bit)
 88 |         self.register_buffer('qweight', torch.zeros((out_features, in_features // pack_num), dtype=torch.int32, device=dev))
 89 |         self.register_buffer('zeros', torch.zeros((out_features, calculate_zeros_width(in_features, self.group_size)), dtype=torch.int32, device=dev))
 90 |         self.register_buffer('scales', torch.zeros((out_features, calculate_zeros_width(in_features, self.group_size) * pack_num), dtype=torch.float16, device=dev))
 91 |         if bias:
 92 |             self.register_buffer('bias', torch.zeros((out_features), dtype=torch.float16, device=dev))
 93 |         else:
 94 |             self.bias = None
 95 | 
 96 | 
 97 |     def forward(self, x):
 98 |         # weight_fp = self.dequantize_weight().half()
 99 |         # out = torch.matmul(x, weight_fp.T)
100 |         # out = out + self.bias if self.bias is not None else out
101 | 
102 |         out = MatMul4Bit.apply(x, self.qweight, self.bias, 
103 |                                self.out_features, self.in_features, 
104 |                                self.w_bit, self.scales, 
105 |                                self.zeros, self.group_size)
106 |         return out
107 | 
108 |     def dequantize_weight(self):
109 |         data = self.qweight.reshape(-1)
110 |         N, num_features = self.out_features, self.in_features
111 |         weight_fp = dequant_cuda.unpack_single_precision(data, self.w_bit, self.scales, self.zeros, N, 
112 |                                                          num_features // self.group_size, self.group_size)
113 |         return weight_fp.view(self.out_features, self.in_features)
114 |     
115 | 
116 |     @classmethod
117 |     def from_linear(cls, linear, w_bit, group_size, init_only=False, scales=None, zeros=None):
118 |         q_linear = cls(w_bit, group_size, linear.in_features, linear.out_features, linear.bias is not None, linear.weight.device)
119 |         if init_only:  # just prepare for loading sd
120 |             return q_linear
121 |         quantized, scales, mn = quantize_and_pack(linear.weight, group_size, w_bit, simulate=False)
122 |         q_linear.qweight = quantized
123 |         q_linear.scales = scales
124 |         q_linear.zeros = mn
125 |         return q_linear


--------------------------------------------------------------------------------
/cuda_supported_gear/quant/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension
 3 | 
 4 | 
 5 | extra_compile_args = {
 6 |     "cxx": [
 7 |         "-g", 
 8 |         "-O3", 
 9 |         "-fopenmp", 
10 |         "-lgomp", 
11 |         "-std=c++17",
12 |         "-DENABLE_BF16"
13 |     ],
14 |     "nvcc": [
15 |         "-O3", 
16 |         "-std=c++17",
17 |         "-DENABLE_BF16",  # TODO
18 |         "-U__CUDA_NO_HALF_OPERATORS__",
19 |         "-U__CUDA_NO_HALF_CONVERSIONS__",
20 |         "-U__CUDA_NO_BFLOAT16_OPERATORS__",
21 |         "-U__CUDA_NO_BFLOAT16_CONVERSIONS__",
22 |         "-U__CUDA_NO_BFLOAT162_OPERATORS__",
23 |         "-U__CUDA_NO_BFLOAT162_CONVERSIONS__",
24 |         "--expt-relaxed-constexpr",
25 |         "--expt-extended-lambda",
26 |         "--use_fast_math",
27 |         "--threads=8"
28 |     ],
29 | }
30 | 
31 | setup(
32 |     name="kivi_gemv",
33 |     packages=find_packages(),
34 |     ext_modules=[
35 |         CUDAExtension(
36 |             name="kivi_gemv",
37 |             sources=[
38 |                 "csrc/pybind.cpp", 
39 |                 "csrc/gemv_cuda.cu"
40 |             ],
41 |             extra_compile_args=extra_compile_args,
42 |         ),
43 |     ],
44 |     cmdclass={"build_ext": BuildExtension},
45 |     install_requires=["torch"],
46 | )


--------------------------------------------------------------------------------
/cuda_supported_gear/quant/test.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import os
  3 | os.environ["CUDA_VISIBLE_DEVICES"]="2"
  4 | import numpy as np
  5 | import random
  6 | import ipdb
  7 | import math
  8 | import os
  9 | import triton
 10 | from new_pack import quant_and_pack_vcache, unpack_and_dequant_kcache, triton_quantize_and_pack_along_last_dim, unpack_and_dequant_vcache, quant_and_pack_kcache
 11 | from matmul import triton_bmm_fA_qB_outer
 12 | from timeit_v2 import py_benchmark
 13 | 
 14 | 
 15 | def set_seed(seed):
 16 | 	np.random.seed(seed)
 17 | 	torch.random.manual_seed(seed)
 18 | 	random.seed(seed)
 19 | 	
 20 | 
 21 | def test_vcache():
 22 | 	torch.manual_seed(0)
 23 | 	np.random.seed(0)
 24 | 	random.seed(0)
 25 | 	B, nh, T, hd = 555, 32, 433, 128
 26 | 	v = torch.randn((B, nh, T, hd), device='cuda', dtype=torch.float16)
 27 | 	group_size = 64
 28 | 	for bits in [2, 4, 8]:
 29 | 		code, scale, mn = triton_quantize_and_pack_along_last_dim(v, group_size, bits)
 30 | 		# print(f'bit {bits}, scale.shape: {scale.shape}')
 31 | 		# print(f'bit {bits}, code.shape: {code.shape}')
 32 | 		dequant_v = unpack_and_dequant_vcache(code, scale.unsqueeze(-1), mn.unsqueeze(-1), group_size, bits)
 33 | 		assert not dequant_v.isnan().any()
 34 | 		gap = (dequant_v - v) / v
 35 | 		gap = torch.nan_to_num(gap)
 36 | 		print(f'bit {bits}, mean v rel arr: {torch.mean(torch.abs(gap))}')
 37 | 
 38 | 
 39 | def test_kcache():
 40 | 	torch.manual_seed(0)
 41 | 	np.random.seed(0)
 42 | 	random.seed(0)
 43 | 	BS, nh, T, D = 11, 32, 4096, 128
 44 | 	k = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16)
 45 | 	group_size = 64
 46 | 	for bits in [2, 4, 8]:
 47 | 		code, scale, mn = triton_quantize_and_pack_along_last_dim(k.transpose(2, 3).contiguous(), 
 48 | 															group_size, 
 49 | 															bits)
 50 | 		dequant_k = unpack_and_dequant_vcache(code, scale.unsqueeze(-1), mn.unsqueeze(-1), group_size, bits)
 51 | 		assert not dequant_k.isnan().any()
 52 | 		gap = (dequant_k.transpose(2, 3) - k) / k
 53 | 		gap = torch.nan_to_num(gap)
 54 | 		print(f'bit {bits}, k mean rel arr: {torch.mean(torch.abs(gap))}')
 55 | 
 56 | 
 57 | def test_bmm_speed():
 58 | 	BS, nh, T, D = 64, 32, 512, 128
 59 | 	bits = 2
 60 | 	key_state = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16)
 61 | 	val_state = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16)
 62 | 	group_size = 64
 63 | 	query_len = 1
 64 | 	query_state = torch.randn((BS, nh, query_len, D), device='cuda', dtype=torch.float16)
 65 | 
 66 | 	# quantiles = [0.5, 0.2, 0.8]
 67 | 	# ms, min_ms, max_ms = triton.testing.do_bench(
 68 | 	# 	lambda: triton_quantize_and_pack_along_last_dim(key_state.transpose(2,3).contiguous(), 
 69 | 	# 											  group_size, bits), quantiles=quantiles)
 70 | 	# print(f'batch size {BS} nh {nh} seqlen {T} quant and pack pytorch impl: {ms * 1000: .2f} ms')
 71 | 	code, scale, mn = triton_quantize_and_pack_along_last_dim(
 72 | 		key_state.transpose(2,3).contiguous(), group_size, bits)
 73 | 	code = code.contiguous()
 74 | 	scale = scale.contiguous()
 75 | 	mn = mn.contiguous()
 76 | 
 77 | 	stmt = "triton_quantize_and_pack_along_last_dim(key_state.transpose(2,3).contiguous(), group_size, bits)"
 78 | 	t_triton_quant = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3,
 79 |                                      setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
 80 | 	print(f'our triton quant & pack impl: {t_triton_quant * 1000} ms')
 81 | 	stmt = "quant_and_pack_kcache(key_state, group_size, bits)"
 82 | 	t_quant = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3,
 83 |                                      setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
 84 | 	print(f'vanilla pytorch quant & pack impl: {t_quant * 1000} ms')
 85 | 	stmt = 'triton_bmm_fA_qB_outer(group_size, query_state, code, scale, mn, bits)'
 86 | 	t_qk = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3,
 87 |                                      setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
 88 | 	print(f'batch size {BS} seqlen {T} our fused batch qk impl: {t_qk * 1000: .2f} ms')
 89 | 	stmt = 'torch.matmul(query_state, key_state.transpose(2, 3))'
 90 | 	t_qk_ref = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3,
 91 |                                      setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
 92 | 	print(f'batch size {BS} seqlen {T} pytorch batch qk impl: {t_qk_ref * 1000: .2f} ms')
 93 | 	attn_weight = torch.randn((BS, nh, query_len, T), device='cuda', dtype=torch.float16)
 94 | 	code, scale, mn = triton_quantize_and_pack_along_last_dim(
 95 | 		val_state, group_size, bits)
 96 | 	stmt = 'triton_bmm_fA_qB_outer(group_size, attn_weight, code, scale, mn, bits)'
 97 | 	t_av = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3,
 98 |                                      setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
 99 | 	print(f'batch size {BS} seqlen {T} our fused batch av impl: {t_av * 1000: .2f} ms')
100 | 	stmt = 'torch.matmul(attn_weight, val_state)'
101 | 	t_av_ref = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3,
102 |                                      setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()")
103 | 	print(f'batch size {BS} seqlen {T} pytorch batch av impl: {t_av_ref * 1000: .2f} ms')
104 | 
105 | 	# _code, _scale, _mn = quant_and_pack_kcache(
106 | 	# 	key_state, group_size, bits)
107 | 	# _code = _code.transpose(2, 3)
108 | 	# _scale = _scale.squeeze(-2).transpose(2,3)
109 | 	# _mn = _mn.squeeze(-2).transpose(2,3)
110 | 	# print(_code.shape, code.shape, _code.dtype, code.dtype)
111 | 	# print(_scale.shape, scale.shape, _scale.dtype, scale.dtype)
112 | 
113 | 	# our_out = triton_bmm_fA_qB_outer(group_size, query_state, code, scale, mn, bits)
114 | 	# ref_out = torch.matmul(query_state, key_state.transpose(2, 3))
115 | 	# gap = (our_out - ref_out) / ref_out
116 | 	# gap = torch.nan_to_num(gap)
117 | 	# err = torch.mean(torch.abs(gap)).item()
118 | 	# print(f'bits {bits}, err: {err}')
119 | 	# ms, min_ms, max_ms = triton.testing.do_bench(lambda: triton_bmm_fA_qB_outer(group_size, query_state, code, scale, mn, bits), quantiles=quantiles)
120 | 	# print(f'batch size {BS} seqlen {T} our fused batch matmul impl: {ms * 1000: .2f} ms')
121 | 	# ms, min_ms, max_ms = triton.testing.do_bench(lambda: torch.matmul(query_state, key_state.transpose(2, 3)), quantiles=quantiles)
122 | 	# print(f'batch size {BS} seqlen {T} pytorch batch matmul impl: {ms * 1000: .2f} ms')
123 | 
124 | 
125 | def test_streaming_kvcache():
126 | 	BS, nh, T, D = 1, 32, 340, 128
127 | 	our_attn_output = None
128 | 	group_size = 64
129 | 	query_len = 1
130 | 	bits = 2
131 | 	key_states = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16)
132 | 	value_states =  torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16)
133 | 	key_states_quant = key_states[:, :, :-(key_states.shape[-2] % group_size), :].contiguous()
134 | 	key_states_full = key_states[:, :, -(key_states.shape[-2] % group_size):, :].contiguous()
135 | 	value_states_quant, value_scale, value_mn = triton_quantize_and_pack_along_last_dim(value_states, 
136 | 																					group_size,
137 | 																					bits)
138 | 	key_states_quant_trans, key_scale_trans, key_mn_trans = triton_quantize_and_pack_along_last_dim(key_states_quant.transpose(2, 3).contiguous(),
139 | 																								group_size, bits)
140 | 	for i in range(16):
141 | 		if our_attn_output is None:
142 | 			query_states = torch.randn((BS, nh, query_len, D), device='cuda', dtype=torch.float16)
143 | 		else:
144 | 			query_states = our_attn_output
145 | 		key_states_new = torch.randn((BS, nh, query_len, D), device='cuda', dtype=torch.float16)
146 | 		value_states_new =  torch.randn((BS, nh, query_len, D), device='cuda', dtype=torch.float16)
147 | 		att_qkquant = triton_bmm_fA_qB_outer(group_size, query_states, key_states_quant_trans, 
148 | 										key_scale_trans, key_mn_trans, bits)
149 | 		key_states_full = torch.cat([key_states_full, key_states_new], dim=2)
150 | 		att_qkfull = torch.matmul(query_states, key_states_full.transpose(2, 3))
151 | 		our_att_weights = torch.cat([att_qkquant, att_qkfull], dim=-1) / math.sqrt(D)
152 | 		our_att_weights = torch.softmax(our_att_weights, dim=-1)
153 | 		value_states_quant_new, scale, mn = triton_quantize_and_pack_along_last_dim(value_states_new, 
154 | 																					group_size, 
155 | 																					bits)
156 | 		value_states_quant = torch.cat([value_states_quant, value_states_quant_new], dim=2)
157 | 		value_scale = torch.cat([value_scale, scale], dim=2)
158 | 		value_mn = torch.cat([value_mn, mn], dim=2)
159 | 		our_attn_output = triton_bmm_fA_qB_outer(group_size, our_att_weights, value_states_quant, 
160 | 										value_scale, value_mn, bits)
161 | 		# ===
162 | 		key_states = torch.cat([key_states, key_states_new], dim=2)
163 | 		value_states = torch.cat([value_states, value_states_new], dim=2)
164 | 		ref_att_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(D)
165 | 		ref_att_weights = torch.softmax(ref_att_weights, dim=-1)
166 | 		ref_att_out = torch.matmul(ref_att_weights, value_states)
167 | 		att_weight_gap = (ref_att_weights - our_att_weights) / ref_att_weights
168 | 		print(f'i {i} bit {bits}, mean att weight rel arr: {torch.mean(torch.abs(att_weight_gap))}')
169 | 		att_out_gap = (ref_att_out - our_attn_output) / ref_att_out
170 | 		print(f'i {i} bit {bits}, mean att out rel arr: {torch.mean(torch.abs(att_out_gap))}')
171 | 
172 | 
173 | def test_4d_qmatmul():
174 | 	torch.manual_seed(0)
175 | 	np.random.seed(0)
176 | 	random.seed(0)
177 | 	query_len = 1
178 | 	BS, nh, T, D = 16, 32, 1024, 128
179 | 	group_size = 64
180 | 	# k = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16)
181 | 	# query_state = torch.randn((BS, nh, query_len, D), device='cuda', dtype=torch.float16)
182 | 	k = torch.randint(10, (BS, nh, T, D), device='cuda').to(torch.float16)
183 | 	query_state = torch.randint(5, (BS, nh, query_len, D), device='cuda').to(torch.float16)
184 | 	for bits in [8, 4, 2]:
185 | 		# code.shape == BS, nh, T // feat_per_int, D
186 | 		# scale, mn.shape == BS, nh, ng, 1, D
187 | 		code, scale, mn = quant_and_pack_kcache(k, group_size, bits)
188 | 		dequant_k = unpack_and_dequant_kcache(code, scale, mn, group_size, bits)
189 | 		# BS, nh, D, T // feat_per_int
190 | 		code = code.transpose(2, 3)
191 | 		# BS, nh, D, T // group_size
192 | 		scale = scale.view(BS, nh, -1, D).transpose(2, 3)
193 | 		mn = mn.view(BS, nh, -1, D).transpose(2, 3)
194 | 		our_out = triton_bmm_fA_qB_outer(group_size, query_state, code, scale, mn, bits)
195 | 		ref_out = torch.matmul(query_state, k.transpose(2, 3))
196 | 		# ref_out = torch.matmul(query_state, k.transpose(2, 3))
197 | 		assert not our_out.isnan().any() 
198 | 		assert not ref_out.isnan().any() 
199 | 		gap = (our_out - ref_out) / ref_out
200 | 		gap = torch.nan_to_num(gap)
201 | 		err = torch.mean(torch.abs(gap)).item()
202 | 		print(f'bits {bits}, err: {err}')
203 | 
204 | 
205 | if __name__ == '__main__':
206 | 	set_seed(114514)
207 | 	# test_kcache()
208 | 	# test_vcache()
209 | 	# test_4d_qmatmul()
210 | 	# test_streaming_kvcache()
211 | 	test_bmm_speed()


--------------------------------------------------------------------------------
/cuda_supported_gear/quant/test1.py:
--------------------------------------------------------------------------------
 1 | from new_pack import triton_quantize_and_pack_along_last_dim_witherror, triton_quantize_and_pack_along_last_dim
 2 | import torch 
 3 | import time
 4 | tensor = torch.rand([1,32,100,4096]).cuda()
 5 | group = 128
 6 | start = time.time()
 7 | # result,scale,mn,error = triton_quantize_and_pack_along_last_dim_witherror(tensor,group,4) # 0.073s
 8 | result,scale,mn, error = triton_quantize_and_pack_along_last_dim_witherror(tensor,group,4) # 0.067s
 9 | end = time.time() - start
10 | error = error.reshape(1,32,100,4096)
11 | print(error.shape)
12 | 


--------------------------------------------------------------------------------
/cuda_supported_gear/quant/timeit_v2.py:
--------------------------------------------------------------------------------
  1 | # timeit_v2.py: Copied from the default library with the following two modifiations
  2 | # 1. Add 'finish' argument to timeit for calling cuda synchronization.
  3 | # 2. Add accurate measurment utility function py_benchmark 
  4 | 
  5 | """Tool for measuring execution time of small code snippets.
  6 | 
  7 | This module avoids a number of common traps for measuring execution
  8 | times.  See also Tim Peters' introduction to the Algorithms chapter in
  9 | the Python Cookbook, published by O'Reilly.
 10 | 
 11 | Library usage: see the Timer class.
 12 | 
 13 | Command line usage:
 14 |     python timeit.py [-n N] [-r N] [-s S] [-p] [-h] [--] [statement]
 15 | 
 16 | Options:
 17 |   -n/--number N: how many times to execute 'statement' (default: see below)
 18 |   -r/--repeat N: how many times to repeat the timer (default 5)
 19 |   -s/--setup S: statement to be executed once initially (default 'pass').
 20 |                 Execution time of this setup statement is NOT timed.
 21 |   -p/--process: use time.process_time() (default is time.perf_counter())
 22 |   -v/--verbose: print raw timing results; repeat for more digits precision
 23 |   -u/--unit: set the output time unit (nsec, usec, msec, or sec)
 24 |   -h/--help: print this usage message and exit
 25 |   --: separate options from statement, use when statement starts with -
 26 |   statement: statement to be timed (default 'pass')
 27 | 
 28 | A multi-line statement may be given by specifying each line as a
 29 | separate argument; indented lines are possible by enclosing an
 30 | argument in quotes and using leading spaces.  Multiple -s options are
 31 | treated similarly.
 32 | 
 33 | If -n is not given, a suitable number of loops is calculated by trying
 34 | successive powers of 10 until the total time is at least 0.2 seconds.
 35 | 
 36 | Note: there is a certain baseline overhead associated with executing a
 37 | pass statement.  It differs between versions.  The code here doesn't try
 38 | to hide it, but you should be aware of it.  The baseline overhead can be
 39 | measured by invoking the program without arguments.
 40 | 
 41 | Classes:
 42 | 
 43 |     Timer
 44 | 
 45 | Functions:
 46 | 
 47 |     timeit(string, string) -> float
 48 |     repeat(string, string) -> list
 49 |     default_timer() -> float
 50 | """
 51 | 
 52 | import gc
 53 | import sys
 54 | import time
 55 | import itertools
 56 | 
 57 | __all__ = ["Timer", "timeit", "repeat", "default_timer"]
 58 | 
 59 | dummy_src_name = "<timeit-src>"
 60 | default_number = 1000000
 61 | default_repeat = 5
 62 | default_timer = time.perf_counter
 63 | 
 64 | _globals = globals
 65 | 
 66 | # Don't change the indentation of the template; the reindent() calls
 67 | # in Timer.__init__() depend on setup being indented 4 spaces and stmt
 68 | # being indented 8 spaces.
 69 | template = """
 70 | def inner(_it, _timer{init}):
 71 |     {setup}
 72 |     _t0 = _timer()
 73 |     for _i in _it:
 74 |         {stmt}
 75 |     {finish}
 76 |     _t1 = _timer()
 77 |     return _t1 - _t0
 78 | """
 79 | 
 80 | def reindent(src, indent):
 81 |     """Helper to reindent a multi-line statement."""
 82 |     return src.replace("\n", "\n" + " "*indent)
 83 | 
 84 | class Timer:
 85 |     """Class for timing execution speed of small code snippets.
 86 | 
 87 |     The constructor takes a statement to be timed, an additional
 88 |     statement used for setup, and a timer function.  Both statements
 89 |     default to 'pass'; the timer function is platform-dependent (see
 90 |     module doc string).  If 'globals' is specified, the code will be
 91 |     executed within that namespace (as opposed to inside timeit's
 92 |     namespace).
 93 | 
 94 |     To measure the execution time of the first statement, use the
 95 |     timeit() method.  The repeat() method is a convenience to call
 96 |     timeit() multiple times and return a list of results.
 97 | 
 98 |     The statements may contain newlines, as long as they don't contain
 99 |     multi-line string literals.
100 |     """
101 | 
102 |     def __init__(self, stmt="pass", setup="pass", finish='pass', timer=default_timer,
103 |                  globals=None):
104 |         """Constructor.  See class doc string."""
105 |         self.timer = timer
106 |         local_ns = {}
107 |         global_ns = _globals() if globals is None else globals
108 |         init = ''
109 |         if isinstance(setup, str):
110 |             # Check that the code can be compiled outside a function
111 |             compile(setup, dummy_src_name, "exec")
112 |             stmtprefix = setup + '\n'
113 |             setup = reindent(setup, 4)
114 |         elif callable(setup):
115 |             local_ns['_setup'] = setup
116 |             init += ', _setup=_setup'
117 |             stmtprefix = ''
118 |             setup = '_setup()'
119 |         else:
120 |             raise ValueError("setup is neither a string nor callable")
121 |         if isinstance(stmt, str):
122 |             # Check that the code can be compiled outside a function
123 |             compile(stmtprefix + stmt, dummy_src_name, "exec")
124 |             stmt = reindent(stmt, 8)
125 |         elif callable(stmt):
126 |             local_ns['_stmt'] = stmt
127 |             init += ', _stmt=_stmt'
128 |             stmt = '_stmt()'
129 |         else:
130 |             raise ValueError("stmt is neither a string nor callable")
131 | 
132 |         assert isinstance(finish, str)
133 |         compile(setup + '\n' + stmt + '\n' + finish, dummy_src_name, 'exec')
134 |         finish = reindent(finish, 4)
135 | 
136 |         src = template.format(stmt=stmt, setup=setup, init=init, finish=finish)
137 |         self.src = src  # Save for traceback display
138 |         code = compile(src, dummy_src_name, "exec")
139 |         exec(code, global_ns, local_ns)
140 |         self.inner = local_ns["inner"]
141 | 
142 |     def print_exc(self, file=None):
143 |         """Helper to print a traceback from the timed code.
144 | 
145 |         Typical use:
146 | 
147 |             t = Timer(...)       # outside the try/except
148 |             try:
149 |                 t.timeit(...)    # or t.repeat(...)
150 |             except:
151 |                 t.print_exc()
152 | 
153 |         The advantage over the standard traceback is that source lines
154 |         in the compiled template will be displayed.
155 | 
156 |         The optional file argument directs where the traceback is
157 |         sent; it defaults to sys.stderr.
158 |         """
159 |         import linecache, traceback
160 |         if self.src is not None:
161 |             linecache.cache[dummy_src_name] = (len(self.src),
162 |                                                None,
163 |                                                self.src.split("\n"),
164 |                                                dummy_src_name)
165 |         # else the source is already stored somewhere else
166 | 
167 |         traceback.print_exc(file=file)
168 | 
169 |     def timeit(self, number=default_number):
170 |         """Time 'number' executions of the main statement.
171 | 
172 |         To be precise, this executes the setup statement once, and
173 |         then returns the time it takes to execute the main statement
174 |         a number of times, as a float measured in seconds.  The
175 |         argument is the number of times through the loop, defaulting
176 |         to one million.  The main statement, the setup statement and
177 |         the timer function to be used are passed to the constructor.
178 |         """
179 |         it = itertools.repeat(None, number)
180 |         gcold = gc.isenabled()
181 |         gc.disable()
182 |         try:
183 |             timing = self.inner(it, self.timer)
184 |         finally:
185 |             if gcold:
186 |                 gc.enable()
187 |         return timing
188 | 
189 |     def repeat(self, repeat=default_repeat, number=default_number):
190 |         """Call timeit() a few times.
191 | 
192 |         This is a convenience function that calls the timeit()
193 |         repeatedly, returning a list of results.  The first argument
194 |         specifies how many times to call timeit(), defaulting to 5;
195 |         the second argument specifies the timer argument, defaulting
196 |         to one million.
197 | 
198 |         Note: it's tempting to calculate mean and standard deviation
199 |         from the result vector and report these.  However, this is not
200 |         very useful.  In a typical case, the lowest value gives a
201 |         lower bound for how fast your machine can run the given code
202 |         snippet; higher values in the result vector are typically not
203 |         caused by variability in Python's speed, but by other
204 |         processes interfering with your timing accuracy.  So the min()
205 |         of the result is probably the only number you should be
206 |         interested in.  After that, you should look at the entire
207 |         vector and apply common sense rather than statistics.
208 |         """
209 |         r = []
210 |         for i in range(repeat):
211 |             t = self.timeit(number)
212 |             r.append(t)
213 |         return r
214 | 
215 |     def autorange(self, callback=None):
216 |         """Return the number of loops and time taken so that total time >= 0.2.
217 | 
218 |         Calls the timeit method with increasing numbers from the sequence
219 |         1, 2, 5, 10, 20, 50, ... until the time taken is at least 0.2
220 |         second.  Returns (number, time_taken).
221 | 
222 |         If *callback* is given and is not None, it will be called after
223 |         each trial with two arguments: ``callback(number, time_taken)``.
224 |         """
225 |         i = 1
226 |         while True:
227 |             for j in 1, 2, 5:
228 |                 number = i * j
229 |                 time_taken = self.timeit(number)
230 |                 if callback:
231 |                     callback(number, time_taken)
232 |                 if time_taken >= 0.2:
233 |                     return (number, time_taken)
234 |             i *= 10
235 | 
236 | def timeit(stmt="pass", setup="pass", finish='pass', timer=default_timer,
237 |            number=default_number, globals=None):
238 |     """Convenience function to create Timer object and call timeit method."""
239 |     return Timer(stmt, setup, finish, timer, globals).timeit(number)
240 | 
241 | def repeat(stmt="pass", setup="pass", finish='pass', timer=default_timer,
242 |            repeat=default_repeat, number=default_number, globals=None):
243 |     """Convenience function to create Timer object and call repeat method."""
244 |     return Timer(stmt, setup, finish, timer, globals).repeat(repeat, number)
245 | 
246 | def py_benchmark(stmt, context, min_repeat_second=1, setup='pass', finish='pass'):
247 |     total_time = 0
248 |     number = 10
249 | 
250 |     eval(stmt, context) # warmup
251 |     total_time = timeit(stmt=stmt, setup=setup, finish=finish, number=number, globals=context)
252 |     while total_time < min_repeat_second:
253 |         number = int(number * (min_repeat_second / total_time)) + 1
254 |         total_time = timeit(stmt=stmt, setup=setup, finish=finish, number=number, globals=context)
255 | 
256 |     return total_time / number
257 | 


--------------------------------------------------------------------------------
/cuda_supported_gear/test.py:
--------------------------------------------------------------------------------
  1 | #
  2 | from modeling_llamagear import LlamaForCausalLM_GEARKIVI
  3 | from modeling_llama_kivi import LlamaForCausalLM_KIVI
  4 | from transformers import LlamaConfig, AutoTokenizer, LlamaForCausalLM
  5 | from transformers import BitsAndBytesConfig
  6 | from datasets import load_dataset
  7 | import torch
  8 | import argparse
  9 | 
 10 | 
 11 | #### Config for KIVI model
 12 | config = LlamaConfig.from_pretrained("meta-llama/Llama-2-7b-hf")
 13 | 
 14 | config.k_bits = 2# current support 2/4 bit for KV Cache
 15 | config.v_bits = 2 # current support 2/4 bit for KV Cache
 16 | config.group_size = 64
 17 | config.residual_length = 64 # the number of recent fp16 tokens
 18 | 
 19 | # quantization_config = BitsAndBytesConfig(load_in_8bit=True)
 20 | parser = argparse.ArgumentParser(description="Evaluate AQuA Tasks")
 21 | parser.add_argument("--batch_size", type=int, default=8, help="Batch size.")
 22 | parser.add_argument("--model", type=str, default="meta-llama/Llama-2-7b", help="Model name or path.")
 23 | args = parser.parse_args()
 24 | 
 25 | max_token = 1000 ### prefill_length
 26 | max_generation_length = 1500 ### geneate 500
 27 | batch_size = args.batch_size
 28 | 
 29 | ##### Config for 
 30 | compress_config = {}
 31 | compress_config["compress_method"] = "gearlKIVI" # "gearlKIVI" "gearsKIVI"
 32 | compress_config["group_size"] = 64
 33 | compress_config["residual"] = 64
 34 | compress_config["quantize_bit"] = 2
 35 | compress_config["rank"] = 2 ## prefill rank
 36 | compress_config["rankv"] = 2 ## prefill rank
 37 | compress_config["loop"] = 3
 38 | # compress_config["stream_list"] = stream_list
 39 | stream_list = [torch.cuda.Stream(),torch.cuda.Stream()]
 40 | 
 41 | if "gearl" in args.model:
 42 |     model = LlamaForCausalLM_GEARKIVI.from_pretrained(
 43 |         "meta-llama/Llama-2-7b-hf",
 44 |         config = config,
 45 |         # quantization_config = quantization_config,
 46 |         compress_config = compress_config,
 47 |         device_map = "cuda:0"
 48 |     )
 49 | elif "KIVI" in args.model:
 50 |     model = LlamaForCausalLM_KIVI.from_pretrained(
 51 |         "meta-llama/Llama-2-7b-hf",
 52 |         config = config,
 53 |         # quantization_config = quantization_config,
 54 |         # compress_config = compress_config,
 55 |         
 56 |         device_map = "cuda:0"
 57 |     )
 58 | elif "None" in args.model:
 59 |     model = LlamaForCausalLM.from_pretrained(
 60 |     "meta-llama/Llama-2-7b-hf",
 61 | 
 62 |     device_map = "cuda:0")
 63 | model = model.half()
 64 | 
 65 | 
 66 | 
 67 | 
 68 | 
 69 | tokenizer = AutoTokenizer.from_pretrained(
 70 |     'meta-llama/Llama-2-7b-hf', 
 71 |     model_max_length=max_token,
 72 |     max_length=max_token,
 73 |     use_fast=False, 
 74 |     trust_remote_code=True, 
 75 |     tokenizer_type='llama')
 76 | tokenizer.pad_token = tokenizer.eos_token
 77 | test = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")
 78 | text_combined = test["text"]
 79 | 
 80 | sentence_group = []
 81 | for i in range(batch_size):
 82 |     # sentence_group.append(str(text_combined[i*max_token:(i+1)*max_token]))
 83 |     sentence_group.append(str(text_combined[0:max_token]))
 84 | inputs = tokenizer(
 85 |     sentence_group,
 86 |     return_tensors="pt",
 87 |     padding="max_length",
 88 |     truncation=True,
 89 | )
 90 | print("begin")
 91 | inputs = inputs.to("cuda:0")
 92 | print(inputs.input_ids.shape)
 93 | import time
 94 | 
 95 | start = time.time()
 96 | result = model.generate(**inputs, max_length=max_generation_length, use_cache=True)
 97 | torch.cuda.synchronize()
 98 | end = time.time()
 99 | peak_memory = torch.cuda.max_memory_allocated(device="cuda") / (1024**2)  # 转换为MB单位
100 | 
101 | print(f"Peak memory usage on GPU: {peak_memory} MB")
102 | print("time",end - start)
103 | # result = tokenizer.batch_decode(result, skip_special_tokens=True)
104 | # print(result)
105 | # model = model.cuda()
106 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==2.1.0
  2 | accelerate==0.27.2
  3 | aiohttp @ file:///rapids/aiohttp-3.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=5c4fa235d534b3547184831c624c0b7c1e262cd1de847d95085ec94c16fddcd5
  4 | aiosignal @ file:///rapids/aiosignal-1.3.1-py3-none-any.whl#sha256=f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17
  5 | annotated-types==0.6.0
  6 | apex @ file:///opt/pytorch/apex
  7 | argon2-cffi==23.1.0
  8 | argon2-cffi-bindings==21.2.0
  9 | asttokens==2.4.1
 10 | astunparse==1.6.3
 11 | async-timeout @ file:///rapids/async_timeout-4.0.3-py3-none-any.whl#sha256=7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028
 12 | attrs==23.2.0
 13 | audioread==3.0.1
 14 | beautifulsoup4==4.12.3
 15 | bleach==6.1.0
 16 | blis==0.7.11
 17 | cachetools==5.3.2
 18 | catalogue==2.0.10
 19 | certifi==2023.11.17
 20 | cffi==1.16.0
 21 | charset-normalizer==3.3.2
 22 | click==8.1.7
 23 | cloudpathlib==0.16.0
 24 | cloudpickle @ file:///rapids/cloudpickle-3.0.0-py3-none-any.whl#sha256=246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7
 25 | cmake==3.28.1
 26 | comm==0.2.1
 27 | confection==0.1.4
 28 | contourpy==1.2.0
 29 | cubinlinker @ file:///rapids/cubinlinker-0.3.0%2B2.ga3774f2-cp310-cp310-linux_aarch64.whl#sha256=87c9fac61fe6c6ea8c5b5130e9d5535aa2c7def2142163abc2985b40e1f14d18
 30 | cuda-python @ file:///rapids/cuda_python-12.3.0rc4%2B9.ga6806e8.dirty-cp310-cp310-linux_aarch64.whl#sha256=12dd3a8380688d577c31c4ffe5d56ff59a4c13f85f9cb0afd5adb78efb45bd90
 31 | cudf @ file:///rapids/cudf-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=6d3a328ababdb48a2a6bd18e0d6c22c0dae27fba45c8f0fc6acf5334eced8dd7
 32 | cugraph @ file:///rapids/cugraph-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=c60964175a813e961ebc82863987130d89b3b92305ee4fe3ca2cef258f2c93fb
 33 | cugraph-dgl @ file:///rapids/cugraph_dgl-23.12.0-py3-none-any.whl#sha256=d14b75aef4008764766150cb308501ffba0f6116a8042af82b9b0c93d0eb1164
 34 | cugraph-service-client @ file:///rapids/cugraph_service_client-23.12.0-py3-none-any.whl#sha256=145febd98846e086b75a1c4802854aa0e45bb12930dea4d49386258459f085ae
 35 | cugraph-service-server @ file:///rapids/cugraph_service_server-23.12.0-py3-none-any.whl#sha256=d0249296a61ee5c4156564f26d9319f23d9faad25ac83c9deb9f56e6a9f63751
 36 | cuml @ file:///rapids/cuml-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=d0ef9d40fdbef555adc249717fd4a747ffe7f932d9b10b025bfcd279b83bbd91
 37 | cupy-cuda12x @ file:///rapids/cupy_cuda12x-12.3.0-cp310-cp310-manylinux2014_aarch64.whl#sha256=77687b019383eb89da981638cfec6d5ba556a15f1be0945e8f7f80030b851d04
 38 | cycler==0.12.1
 39 | cymem==2.0.8
 40 | Cython==3.0.8
 41 | dask @ file:///rapids/dask-2023.11.0-py3-none-any.whl#sha256=b950951ee3f8c86f003b577b6928ecf20089eee6677719578deaba8fd9a78203
 42 | dask-cuda @ file:///rapids/dask_cuda-23.12.0-py3-none-any.whl#sha256=c8dab311674c77cebe2831d311cdd3db72f605471120764001c3a1b1ccfe40f3
 43 | dask-cudf @ file:///rapids/dask_cudf-23.12.0-py3-none-any.whl#sha256=5fe94040ecd22a53928b8977c4f5c780a0326dda5994f3ed9afcbe377fd67d27
 44 | dataclasses-json==0.6.4
 45 | datasets==2.18.0
 46 | debugpy==1.8.0
 47 | decorator==5.1.1
 48 | defusedxml==0.7.1
 49 | dill==0.3.8
 50 | distributed @ file:///rapids/distributed-2023.11.0-py3-none-any.whl#sha256=44ad1fff31ece202cc64bdb72dd33d6964d78bdbe1ec1ec06e01f9544187cd2e
 51 | dm-tree==0.1.8
 52 | einops==0.7.0
 53 | exceptiongroup==1.2.0
 54 | execnet==2.0.2
 55 | executing==2.0.1
 56 | expecttest==0.1.3
 57 | fastjsonschema==2.19.1
 58 | fastrlock @ file:///rapids/fastrlock-0.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl#sha256=9121a894d74e65557e47e777060a495ab85f4b903e80dd73a3c940ba042920d7
 59 | filelock==3.13.1
 60 | flash-attn==2.0.4
 61 | fonttools==4.47.2
 62 | frozenlist @ file:///rapids/frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a
 63 | fsspec==2023.12.2
 64 | gast==0.5.4
 65 | GEARLM==0.0.6
 66 | google-auth==2.26.2
 67 | google-auth-oauthlib==0.4.6
 68 | graphsurgeon @ file:///workspace/TensorRT-8.6.1.6/graphsurgeon/graphsurgeon-0.4.6-py2.py3-none-any.whl#sha256=0fbadaefbbe6e9920b9f814ae961c4a279be602812edf3ed7fb9cc6f8f4809fe
 69 | grpcio==1.60.0
 70 | huggingface-hub==0.21.4
 71 | hypothesis==5.35.1
 72 | idna==3.6
 73 | importlib-metadata @ file:///rapids/importlib_metadata-7.0.1-py3-none-any.whl#sha256=4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e
 74 | iniconfig==2.0.0
 75 | ipykernel==6.29.0
 76 | ipython==8.20.0
 77 | ipython-genutils==0.2.0
 78 | jedi==0.19.1
 79 | Jinja2==3.1.3
 80 | joblib==1.3.2
 81 | json5==0.9.14
 82 | jsonschema==4.21.1
 83 | jsonschema-specifications==2023.12.1
 84 | jupyter-tensorboard @ git+https://github.com/cliffwoolley/jupyter_tensorboard.git@ffa7e26138b82549453306e06b535a9ac36db17a
 85 | jupyter_client==8.6.0
 86 | jupyter_core==5.7.1
 87 | jupyterlab==2.3.2
 88 | jupyterlab-server==1.2.0
 89 | jupyterlab_pygments==0.3.0
 90 | jupytext==1.16.1
 91 | kiwisolver==1.4.5
 92 | langcodes==3.3.0
 93 | lazy_loader==0.3
 94 | librosa==0.10.1
 95 | llvmlite @ file:///rapids/llvmlite-0.40.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=bba2747cf5b4954e945c287fe310b3fcc484e2a9d1b0c273e99eb17d103bb0e6
 96 | locket @ file:///rapids/locket-1.0.0-py2.py3-none-any.whl#sha256=b6c819a722f7b6bd955b80781788e4a66a55628b858d347536b7e81325a3a5e3
 97 | Markdown==3.5.2
 98 | markdown-it-py==3.0.0
 99 | MarkupSafe==2.1.4
100 | marshmallow==3.21.1
101 | matplotlib==3.8.2
102 | matplotlib-inline==0.1.6
103 | mdit-py-plugins==0.4.0
104 | mdurl==0.1.2
105 | mistune==3.0.2
106 | mock==5.1.0
107 | mpmath==1.3.0
108 | msgpack==1.0.7
109 | multidict @ file:///rapids/multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b
110 | multiprocess==0.70.16
111 | murmurhash==1.0.10
112 | mypy-extensions==1.0.0
113 | nbclient==0.9.0
114 | nbconvert==7.14.2
115 | nbformat==5.9.2
116 | nest-asyncio==1.5.9
117 | networkx==2.6.3
118 | ninja==1.11.1.1
119 | notebook==6.4.10
120 | numba @ file:///rapids/numba-0.57.1%2B1.g29d216d31-cp310-cp310-linux_aarch64.whl#sha256=460ced73b5a210021b92e6851700dcdb478abb79927a2858e19824f7d573f4a1
121 | numpy==1.24.4
122 | nvfuser==0.1.1+gitunknown
123 | nvidia-dali-cuda120==1.33.0
124 | nvidia-pyindex==1.0.9
125 | nvtx @ file:///rapids/nvtx-0.2.5-cp310-cp310-linux_aarch64.whl#sha256=0d42158092675873d70f2951b416f3039b92e01a79e13cc9e09b7dd1b80b43a4
126 | oauthlib==3.2.2
127 | onnx @ file:///opt/pytorch/pytorch/third_party/onnx
128 | opencv @ file:///opencv-4.7.0/modules/python/package
129 | optree==0.10.0
130 | package-name==0.1
131 | packaging==23.2
132 | pandas @ file:///rapids/pandas-1.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354
133 | pandocfilters==1.5.1
134 | parso==0.8.3
135 | partd @ file:///rapids/partd-1.4.1-py3-none-any.whl#sha256=27e766663d36c161e2827aa3e28541c992f0b9527d3cca047e13fb3acdb989e6
136 | pexpect==4.9.0
137 | pillow @ file:///rapids/pillow-10.2.0-cp310-cp310-manylinux_2_28_aarch64.whl#sha256=47c0995fc4e7f79b5cfcab1fc437ff2890b770440f7696a3ba065ee0fd496563
138 | platformdirs==4.1.0
139 | pluggy==1.3.0
140 | ply @ file:///rapids/ply-3.11-py2.py3-none-any.whl#sha256=096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce
141 | polygraphy==0.49.1
142 | pooch==1.8.0
143 | preshed==3.0.9
144 | prettytable==3.9.0
145 | prometheus-client==0.19.0
146 | prompt-toolkit==3.0.43
147 | protobuf==4.24.4
148 | psutil @ file:///rapids/psutil-5.9.4-cp310-abi3-linux_aarch64.whl#sha256=356ff1edab701bc3b17bafa541512735ea6ae40c25bbc6df206d15307abbb299
149 | ptxcompiler @ file:///rapids/ptxcompiler-0.8.1%2B2.gf98b68a-cp310-cp310-linux_aarch64.whl#sha256=99c401500b705e327d79608a8bcaf4dbcb2aafd4e7443f874ece14beba476538
150 | ptyprocess==0.7.0
151 | pure-eval==0.2.2
152 | pyarrow @ file:///rapids/pyarrow-14.0.1.dev0%2Bgba5374836.d20240125-cp310-cp310-linux_aarch64.whl#sha256=8de1dfb159d5d9ff0220c46cda0f6251770951fd6e264d13ba10fb386d04e310
153 | pyarrow-hotfix==0.6
154 | pyasn1==0.5.1
155 | pyasn1-modules==0.3.0
156 | pybind11==2.11.1
157 | pybind11-global==2.11.1
158 | pycocotools @ git+https://github.com/nvidia/cocoapi.git@d99cbf3823588ef09a2721655f46e509ebafb3d7#subdirectory=PythonAPI
159 | pycparser==2.21
160 | pydantic==2.5.3
161 | pydantic_core==2.14.6
162 | Pygments==2.17.2
163 | pylibcugraph @ file:///rapids/pylibcugraph-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=8a9c83cbd56be292f856ca9c0541a3a86b596749ff58d63287da43b63e6c069e
164 | pylibcugraphops @ file:///rapids/pylibcugraphops-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=4ff3107253c13dae4873d1b83d5a640c1fab5556fad789a7b2999e321aa89b20
165 | pylibraft @ file:///rapids/pylibraft-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=a54f1e00fed773c5c85db1f59c87f42bee90de2d9f28694a0f8ec899f572e12c
166 | pynvml @ file:///rapids/pynvml-11.4.1-py3-none-any.whl#sha256=d27be542cd9d06558de18e2deffc8022ccd7355bc7382255d477038e7e424c6c
167 | pyparsing==3.1.1
168 | pytest==7.4.4
169 | pytest-flakefinder==1.1.0
170 | pytest-rerunfailures==13.0
171 | pytest-shard==0.1.2
172 | pytest-xdist==3.5.0
173 | python-dateutil==2.8.2
174 | python-hostlist==1.23.0
175 | pytorch-quantization==2.1.2
176 | pytz @ file:///rapids/pytz-2023.3.post1-py2.py3-none-any.whl#sha256=ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7
177 | PyYAML==6.0.1
178 | pyzmq==25.1.2
179 | raft-dask @ file:///rapids/raft_dask-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=32d9955c2aeb569b06d4bdc20ee80f9d5f4f3640acb92b20ff99fb281074d858
180 | rapids-dask-dependency @ file:///rapids/rapids_dask_dependency-23.12.1-py3-none-any.whl#sha256=2abfe15415711bad9dfe9e83d4bfbd039e9436d66cc17e74ae22c85ab9afe46b
181 | referencing==0.32.1
182 | regex==2023.12.25
183 | requests==2.31.0
184 | requests-oauthlib==1.3.1
185 | rich @ file:///rapids/rich-13.7.0-py3-none-any.whl#sha256=6da14c108c4866ee9520bbffa71f6fe3962e193b7da68720583850cd4548e235
186 | rmm @ file:///rapids/rmm-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=0ec2d0a669a6963218e6fed76e3694e6bd7d034a28ddf72a885fdc3a90f2e458
187 | rpds-py==0.17.1
188 | rsa==4.9
189 | safetensors==0.4.2
190 | scikit-learn @ file:///rapids/scikit_learn-1.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=fd3480c982b9e616b9f76ad8587804d3f4e91b4e2a6752e7dafb8a2e1f541098
191 | scipy @ file:///rapids/scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563
192 | Send2Trash==1.8.2
193 | sentencepiece==0.2.0
194 | six==1.16.0
195 | smart-open==6.4.0
196 | sortedcontainers==2.4.0
197 | soundfile==0.12.1
198 | soupsieve==2.5
199 | soxr==0.3.7
200 | spacy==3.7.2
201 | spacy-legacy==3.0.12
202 | spacy-loggers==1.0.5
203 | sphinx-glpi-theme==0.5
204 | srsly==2.4.8
205 | stack-data==0.6.3
206 | sympy==1.12
207 | tabulate==0.9.0
208 | tblib @ file:///rapids/tblib-3.0.0-py3-none-any.whl#sha256=80a6c77e59b55e83911e1e607c649836a69c103963c5f28a46cbeef44acf8129
209 | tensorboard==2.9.0
210 | tensorboard-data-server==0.6.1
211 | tensorboard-plugin-wit==1.8.1
212 | tensorrt @ file:///workspace/TensorRT-8.6.1.6/python/tensorrt-8.6.1-cp310-none-linux_aarch64.whl#sha256=05b7e51b4574c1ec7151362b011c0a203fca5dfd0561cb613d10fc982cd3fa13
213 | terminado==0.18.0
214 | thinc==8.2.2
215 | threadpoolctl==3.2.0
216 | thriftpy2 @ file:///rapids/thriftpy2-0.4.17-cp310-cp310-linux_aarch64.whl#sha256=4774861c7953b22ddd059b077fe9d82ad56c56bb5fdd60a5cd87e3d6f9b00372
217 | tinycss2==1.2.1
218 | tokenizers==0.15.2
219 | toml==0.10.2
220 | tomli==2.0.1
221 | toolz @ file:///rapids/toolz-0.12.1-py3-none-any.whl#sha256=d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85
222 | torch @ file:///tmp/pip/torch-2.2.0a0%2B81ea7a4-cp310-cp310-linux_aarch64.whl#sha256=cb9192fa0aaa81c2482e00306ea6b72a737a6ac88eed770f8a03432039549aee
223 | torch-tensorrt @ file:///opt/pytorch/torch_tensorrt/dist/torch_tensorrt-2.2.0a0-cp310-cp310-linux_aarch64.whl#sha256=5cd5236ff477b7aaffee2bf52842ead45e60dea8cb27b7e057bce8260dfad0a9
224 | torchdata @ file:///opt/pytorch/data
225 | torchtext @ file:///opt/pytorch/text
226 | torchvision @ file:///opt/pytorch/vision
227 | tornado==6.4
228 | tqdm==4.66.1
229 | traitlets==5.9.0
230 | transformer-engine @ git+https://github.com/NVIDIA/TransformerEngine.git@bbafb02097e6ca1605c3c0cad84d59dbbcb6e94b
231 | transformers==4.38.2
232 | treelite @ file:///rapids/treelite-3.9.1-cp310-cp310-linux_aarch64.whl#sha256=a96dc21cd0e44265204244bbb78b311f3d1f87174050492f9de6b30ec06aaf9c
233 | treelite-runtime @ file:///rapids/treelite_runtime-3.9.1-cp310-cp310-linux_aarch64.whl#sha256=af380fad937d5b8e43a0fb6ce9fbb09c7515f70f05916a26869a7af50c9b2bb4
234 | typer==0.9.0
235 | types-dataclasses==0.6.6
236 | typing-inspect==0.9.0
237 | typing_extensions==4.9.0
238 | ucx-py @ file:///rapids/ucx_py-0.35.0-cp310-cp310-linux_aarch64.whl#sha256=b07db3ce3a03966c49155641b5c66dfd3574d6144a0f3e12c17c15f108c4c09d
239 | uff @ file:///workspace/TensorRT-8.6.1.6/uff/uff-0.6.9-py2.py3-none-any.whl#sha256=618a3f812d491f0d3c4f2e38b99e03217ca37b206db14cee079f2bf681eb4fe3
240 | urllib3 @ file:///rapids/urllib3-1.26.18-py2.py3-none-any.whl#sha256=34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07
241 | wasabi==1.1.2
242 | wcwidth==0.2.13
243 | weasel==0.3.4
244 | webencodings==0.5.1
245 | Werkzeug==3.0.1
246 | xdoctest==1.0.2
247 | xgboost @ file:///rapids/xgboost-1.7.6-cp310-cp310-linux_aarch64.whl#sha256=c89be2ffeff9c49dae24a1dc7fc1ca5c83ec9f40ab249e4a2743dab068afaa47
248 | xxhash==3.4.1
249 | yarl @ file:///rapids/yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234
250 | zict @ file:///rapids/zict-3.0.0-py2.py3-none-any.whl#sha256=5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae
251 | zipp @ file:///rapids/zipp-3.17.0-py3-none-any.whl#sha256=0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31
252 | 


--------------------------------------------------------------------------------