├── .gitignore ├── Fig ├── gt_intel.png └── overview.png ├── GenerationBench ├── GenerationTest │ ├── GEARLM │ │ ├── Simulated │ │ │ ├── __init__.py │ │ │ ├── cache_utils.py │ │ │ ├── compress_config.py │ │ │ ├── compress_function.py │ │ │ ├── cpp_kernels.py │ │ │ ├── modeling_llama_ablation.py │ │ │ ├── modeling_llama_new.py │ │ │ ├── modeling_mistral.py │ │ │ └── modeling_mixtral.py │ │ ├── TrueCompression │ │ │ ├── __init__.py │ │ │ ├── models │ │ │ │ ├── TrueCompressFunction.py │ │ │ │ ├── TrueCompressLlama.py │ │ │ │ ├── __init__.py │ │ │ │ ├── cache_utils.py │ │ │ │ ├── compress_config.py │ │ │ │ ├── compressed_cache_utils.py │ │ │ │ ├── cpp_kernels.py │ │ │ │ ├── streaming_cache_utils.py │ │ │ │ └── utils.py │ │ │ └── old_models │ │ │ │ ├── CompressUtils │ │ │ │ ├── TrueCompressFunction.py │ │ │ │ ├── __init__.py │ │ │ │ └── compress_class.py │ │ │ │ ├── __init__.py │ │ │ │ └── modeling_llama_old.py │ │ └── __init__.py │ ├── Readme.md │ ├── evaluation_aqua_cot.py │ ├── evaluation_bbh_cot.py │ ├── evaluation_gsm8k.py │ ├── lib_prompt │ │ ├── aqua │ │ │ └── cot_prompt_8shots.txt │ │ ├── boolean_expressions.txt │ │ ├── causal_judgement.txt │ │ ├── date_understanding.txt │ │ ├── disambiguation_qa.txt │ │ ├── dyck_languages.txt │ │ ├── formal_fallacies.txt │ │ ├── geometric_shapes.txt │ │ ├── gsm8k_prompt_original.txt │ │ ├── hyperbaton.txt │ │ ├── logical_deduction_five_objects.txt │ │ ├── logical_deduction_seven_objects.txt │ │ ├── logical_deduction_three_objects.txt │ │ ├── mmlu-cot.json │ │ ├── movie_recommendation.txt │ │ ├── multistep_arithmetic_two.txt │ │ ├── navigate.txt │ │ ├── object_counting.txt │ │ ├── penguins_in_a_table.txt │ │ ├── prompt_8x8step.txt │ │ ├── reasoning_about_colored_objects.txt │ │ ├── ruin_names.txt │ │ ├── salient_translation_error_detection.txt │ │ ├── snarks.txt │ │ ├── sports_understanding.txt │ │ ├── temporal_sequences.txt │ │ ├── tracking_shuffled_objects_five_objects.txt │ │ ├── tracking_shuffled_objects_seven_objects.txt │ │ ├── tracking_shuffled_objects_three_objects.txt │ │ ├── web_of_lies.txt │ │ └── word_sorting.txt │ ├── run_template_llama-3-8b.sh │ └── run_template_llama2-13b.sh └── Readme.md ├── LICENSE ├── Readme.md ├── cuda_supported_gear ├── modeling_llama_kivi.py ├── modeling_llamagear.py ├── quant │ ├── __init__.py │ ├── csrc │ │ ├── gemv_cuda.cu │ │ ├── gemv_cuda.h │ │ ├── gemv_cuda_backup.cu │ │ └── pybind.cpp │ ├── gemv.py │ ├── matmul.py │ ├── new_pack.py │ ├── qmodule.py │ ├── setup.py │ ├── test.py │ ├── test1.py │ └── timeit_v2.py └── test.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | /cache 2 | *.pyc 3 | *.pt 4 | 5 | /LoftQ/cache 6 | /COT_hub/gsm8k/cache 7 | /COT_hub/gsm8k/outputs 8 | /COT_hub/gsm8k/results 9 | /COT_hub/gsm8k/lib_prompt 10 | /COT_hub/gsm8k/kvcache_distributions 11 | /COT_hub/MMLU/outputs 12 | /COT_hub/MMLU/results 13 | /COT_hub/BBH/outputs 14 | /COT_hub/BBH/results 15 | /COT_hub/BBH/lib_prompt 16 | /COT_hub/BBH/lib_prompt_multiround_claude_instant 17 | /COT_hub/BBH/lib_prompt_multiround_claude_instant_test 18 | /tests 19 | 20 | /GenerationBench/cache 21 | /GEARLM/build 22 | /GEARLM/dist 23 | 24 | /GEARLM/GEARLM.egg-info 25 | 26 | -------------------------------------------------------------------------------- /Fig/gt_intel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengear-project/GEAR/efe0bfe2b5a35b22122f7cefab5cde9db13a66cd/Fig/gt_intel.png -------------------------------------------------------------------------------- /Fig/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengear-project/GEAR/efe0bfe2b5a35b22122f7cefab5cde9db13a66cd/Fig/overview.png -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/Simulated/__init__.py: -------------------------------------------------------------------------------- 1 | from .compress_config import CompressionConfig 2 | from .modeling_llama_new import SimulatedGearLlamaForCausalLM 3 | 4 | # from .modeling_llama_h2o import H2OLlamaForCausalLM, LlamaConfig 5 | from .modeling_mistral import SimulatedGearMistralForCausalLM, MistralConfig 6 | # from .h2o_llama_self_written import LlamaForCausalLMH2O 7 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/Simulated/compress_config.py: -------------------------------------------------------------------------------- 1 | class CompressionConfig(dict): 2 | def __init__( 3 | self, 4 | compress_method=None, 5 | attention_number=12, 6 | quantize_bit=0, 7 | group_num=0, 8 | group_size = 0, 9 | rank=0.0, 10 | rankv=0.0, 11 | prefill_rank = 0.0, 12 | prefill_rankv = 0.0, 13 | loop=0, 14 | top_k=0.0, 15 | left=0.0, 16 | stage=1, 17 | device_num=0, 18 | batch_num=1, 19 | start_saving=0, 20 | locality_saving=0, 21 | token_preserving=False, 22 | streaming=False, 23 | streaming_gap=0, 24 | stream_grouping = False, 25 | iter=0, 26 | # h2o setings 27 | heavy_size=0, 28 | recent_size=0, 29 | ): 30 | self.compress_method = compress_method 31 | self.quantize_bit = quantize_bit 32 | self.group_num = group_num 33 | self.group_size = group_size 34 | self.rank = rank 35 | self.rankv = rankv 36 | self.ranv = rankv 37 | self.prefill_rank = prefill_rank 38 | self.prefill_rankv = prefill_rankv 39 | self.loop = loop 40 | self.device_num = device_num 41 | self.attention_number = attention_number 42 | self.top_k = top_k 43 | self.left = left 44 | self.batch_num = batch_num 45 | self.stage = stage 46 | self.start_saving = start_saving 47 | self.locality_saving = locality_saving 48 | self.token_preserving = token_preserving 49 | self.iter = iter 50 | self.heavy_size = heavy_size 51 | self.recent_size = recent_size 52 | self.streaming = streaming 53 | self.streaming_gap = streaming_gap 54 | self.stream_grouping = stream_grouping 55 | 56 | 57 | def create_attention_config(self, config): 58 | attention_config = [] 59 | for i in range(self.attention_number): 60 | attention_config.append(config) 61 | return attention_config 62 | 63 | def copy_for_all_attention(self): 64 | self.compress_method = self.create_attention_config(self.compress_method) 65 | self.quantize_bit = self.create_attention_config(self.quantize_bit) 66 | self.group_num = self.create_attention_config(self.group_num) 67 | self.rank = self.create_attention_config(self.rank) 68 | self.prefill_rank = self.create_attention_config(self.prefill_rank) 69 | self.loop = self.create_attention_config(self.loop) 70 | self.top_k = self.create_attention_config(self.top_k) 71 | self.device_num = self.create_attention_config(self.device_num) 72 | self.left = self.create_attention_config(self.left) 73 | self.stage = self.create_attention_config(self.stage) 74 | self.rankv = self.create_attention_config(self.rankv) 75 | self.prefill_rankv = self.create_attention_config(self.prefill_rankv) 76 | self.start_saving = self.create_attention_config(self.start_saving) 77 | self.locality_saving = self.create_attention_config(self.locality_saving) 78 | self.token_preserving = self.create_attention_config(self.token_preserving) 79 | self.iter = self.create_attention_config(self.iter) 80 | self.heavy_size = self.create_attention_config(self.heavy_size) 81 | self.recent_size = self.create_attention_config(self.recent_size) 82 | self.streaming = self.create_attention_config(self.streaming) 83 | self.streaming_gap = self.create_attention_config(self.streaming_gap) 84 | self.group_size = self.create_attention_config(self.group_size) 85 | self.stream_grouping = self.create_attention_config(self.stream_grouping) 86 | 87 | def compress_ratio( 88 | self, 89 | compress_method, 90 | seqlen, 91 | model_dim, 92 | rank=0, 93 | rankv=0, 94 | quantize_bit=0, 95 | top_k=0, 96 | left=0.0, 97 | stage=1, 98 | batch_num=1, 99 | ): 100 | if compress_method == None: 101 | return 1.0 102 | elif compress_method == "Picache": 103 | if seqlen > rank and seqlen > rankv: 104 | return ( 105 | 2 106 | * seqlen 107 | * batch_num 108 | * model_dim 109 | / ( 110 | ((model_dim + seqlen * batch_num) * (rank + rankv)) 111 | * quantize_bit 112 | / 16 113 | ) 114 | ) 115 | elif seqlen <= rank: 116 | return ( 117 | ( 118 | 2 119 | * seqlen 120 | * batch_num 121 | * model_dim 122 | / ( 123 | (model_dim + seqlen * batch_num) * rankv 124 | + seqlen * batch_num * model_dim 125 | ) 126 | ) 127 | * 16 128 | / quantize_bit 129 | ) 130 | 131 | elif seqlen <= rankv: 132 | return ( 133 | ( 134 | 2 135 | * seqlen 136 | * batch_num 137 | * model_dim 138 | / ( 139 | (model_dim + seqlen * batch_num) * rank 140 | + seqlen * batch_num * model_dim 141 | ) 142 | ) 143 | * 16 144 | / quantize_bit 145 | ) 146 | elif compress_method == "poweriteration": 147 | return ( 148 | seqlen 149 | * batch_num 150 | * model_dim 151 | / ((model_dim + seqlen * batch_num) * rank) 152 | ) 153 | elif compress_method == "stagept": 154 | return ( 155 | seqlen 156 | * batch_num 157 | * model_dim 158 | / (model_dim * rank + seqlen * batch_num * (rank / stage)) 159 | ) 160 | elif ( 161 | compress_method == "uniformquantization" 162 | or compress_method == "groupquantization" 163 | or compress_method == "sortquantization" 164 | ): 165 | return 16 / quantize_bit 166 | elif compress_method == "pruning": 167 | return 1 / top_k 168 | elif ( 169 | compress_method == "densesparseuniformquantization" 170 | or compress_method == "densesparsesortquantization" 171 | ): 172 | return 1 / (quantize_bit / 16 + left) 173 | elif compress_method == "pt+outlier": 174 | return ( 175 | seqlen 176 | * batch_num 177 | * model_dim 178 | * 16 179 | / quantize_bit 180 | / ((model_dim + seqlen * batch_num) * rank) 181 | ) 182 | 183 | def calculate_compress_ratio_list(self, seqlen, model_dim): 184 | self.compress_ratio_list = [] 185 | for i, compress_method in enumerate(self.compress_method): 186 | if compress_method == None: 187 | self.compress_ratio_list.append( 188 | self.compress_ratio(compress_method, seqlen, model_dim) 189 | ) 190 | elif compress_method == "Picache": 191 | self.compress_ratio_list.append( 192 | self.compress_ratio( 193 | compress_method, 194 | seqlen, 195 | model_dim, 196 | rank=self.rank[i], 197 | rankv=self.rankv[i], 198 | quantize_bit=self.quantize_bit[i], 199 | batch_num=self.batch_num, 200 | left=self.left[i], 201 | ) 202 | ) 203 | elif compress_method == "poweriteration": 204 | self.compress_ratio_list.append( 205 | self.compress_ratio( 206 | compress_method, 207 | seqlen, 208 | model_dim, 209 | rank=self.rank[i], 210 | batch_num=self.batch_num, 211 | ) 212 | ) 213 | elif compress_method == "stagept": 214 | self.compress_ratio_list.append( 215 | self.compress_ratio( 216 | compress_method, 217 | seqlen, 218 | model_dim, 219 | rank=self.rank[i], 220 | batch_num=self.batch_num, 221 | stage=self.stage[i], 222 | ) 223 | ) 224 | elif ( 225 | compress_method == "uniformquantization" 226 | or compress_method == "groupquantization" 227 | or compress_method == "sortquantization" 228 | ): 229 | self.compress_ratio_list.append( 230 | self.compress_ratio( 231 | compress_method, 232 | seqlen, 233 | model_dim, 234 | rank=0, 235 | quantize_bit=self.quantize_bit[i], 236 | ) 237 | ) 238 | elif compress_method == "pruning": 239 | self.compress_ratio_list.append( 240 | self.compress_ratio( 241 | compress_method, 242 | seqlen, 243 | model_dim, 244 | top_k=self.top_k[i], 245 | ) 246 | ) 247 | elif compress_method == "densesparseuniformquantization": 248 | self.compress_ratio_list.append( 249 | self.compress_ratio( 250 | compress_method, 251 | seqlen, 252 | model_dim, 253 | quantize_bit=self.quantize_bit[i], 254 | left=self.left[i], 255 | ) 256 | ) 257 | elif compress_method == "densesparsesortquantization": 258 | self.compress_ratio_list.append( 259 | self.compress_ratio( 260 | compress_method, 261 | seqlen, 262 | model_dim, 263 | quantize_bit=self.quantize_bit[i], 264 | left=self.left[i], 265 | ) 266 | ) 267 | elif compress_method == "pt+outlier": 268 | self.compress_ratio_list.append( 269 | self.compress_ratio( 270 | compress_method, 271 | seqlen, 272 | model_dim, 273 | rank=self.rank[i], 274 | quantize_bit=self.quantize_bit[i], 275 | batch_num=self.batch_num, 276 | left=self.left[i], 277 | ) 278 | ) 279 | 280 | def calculate_compress_ratio_total(self): 281 | return sum(self.compress_ratio_list) / len(self.compress_ratio_list) 282 | 283 | def __str__(self): 284 | return f"compress_method:{self.compress_method},\nquantize_bit:{self.quantize_bit},\nrank:{self.rank},\nloop:{self.loop},\ndevice_num:{self.device_num},\ncompressratio:{self.compress_ratio_list},\ncompressratio_total:{self.calculate_compress_ratio_total()}" 285 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/Simulated/cpp_kernels.py: -------------------------------------------------------------------------------- 1 | from torch.utils import cpp_extension 2 | import pathlib 3 | import os 4 | import subprocess 5 | 6 | 7 | def _get_cuda_bare_metal_version(cuda_dir): 8 | raw_output = subprocess.check_output( 9 | [cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True 10 | ) 11 | output = raw_output.split() 12 | release_idx = output.index("release") + 1 13 | release = output[release_idx].split(".") 14 | bare_metal_major = release[0] 15 | bare_metal_minor = release[1][0] 16 | 17 | return raw_output, bare_metal_major, bare_metal_minor 18 | 19 | 20 | def _create_build_dir(buildpath): 21 | try: 22 | os.mkdir(buildpath) 23 | except OSError: 24 | if not os.path.isdir(buildpath): 25 | print(f"Creation of the build directory {buildpath} failed") 26 | 27 | 28 | # Check if cuda 11 is installed for compute capability 8.0 29 | cc_flag = [] 30 | _, bare_metal_major, bare_metal_minor = _get_cuda_bare_metal_version( 31 | cpp_extension.CUDA_HOME 32 | ) 33 | if int(bare_metal_major) >= 11: 34 | cc_flag.append("-gencode") 35 | cc_flag.append("arch=compute_80,code=sm_80") 36 | if int(bare_metal_minor) >= 7: 37 | cc_flag.append("-gencode") 38 | cc_flag.append("arch=compute_90,code=sm_90") 39 | 40 | # Build path 41 | srcpath = pathlib.Path(__file__).parent.absolute() 42 | buildpath = srcpath / "build" 43 | _create_build_dir(buildpath) 44 | 45 | 46 | def _cpp_extention_load_helper(name, sources, extra_cuda_flags): 47 | return cpp_extension.load( 48 | name=name, 49 | sources=sources, 50 | build_directory=buildpath, 51 | extra_cflags=[ 52 | "-O3", 53 | ], 54 | extra_cuda_cflags=[ 55 | "-O3", 56 | "-gencode", 57 | "arch=compute_70,code=sm_70", 58 | "--use_fast_math", 59 | ] 60 | + extra_cuda_flags 61 | + cc_flag, 62 | verbose=1, 63 | ) 64 | 65 | 66 | extra_flags = [] 67 | 68 | cache_autogptq_cuda_256_sources = [ 69 | "./cache_autogptq_cuda_256.cpp", 70 | "./cache_autogptq_cuda_kernel_256.cu", 71 | ] 72 | cache_autogptq_cuda_256 = _cpp_extention_load_helper( 73 | "cache_autogptq_cuda_256", cache_autogptq_cuda_256_sources, extra_flags 74 | ) 75 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/TrueCompression/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import GearLlamaForCausalLMNew 2 | from .old_models import GearLlamaForCausalLM 3 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/TrueCompression/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .TrueCompressLlama import GearLlamaForCausalLMNew 2 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/TrueCompression/models/compress_config.py: -------------------------------------------------------------------------------- 1 | class GPT2CompressConfig(dict): 2 | def __init__( 3 | self, 4 | compress_method=None, 5 | attention_number=12, 6 | quantize_bit=0, 7 | group_num=0, 8 | rank=0.0, 9 | rankv=0.0, 10 | loop=0, 11 | top_k=0.0, 12 | left=0.0, 13 | stage=1, 14 | device_num=0, 15 | batch_num=1, 16 | start_saving=0.0, 17 | locality_saving=0.0, 18 | token_preserving=False, 19 | streaming=False, 20 | streaming_gap=0, 21 | iter=0, 22 | # h2o setings 23 | heavy_size=0, 24 | recent_size=0, 25 | ): 26 | self.compress_method = compress_method 27 | self.quantize_bit = quantize_bit 28 | self.group_num = group_num 29 | self.rank = rank 30 | self.rankv = rankv 31 | self.ranv = rankv 32 | self.loop = loop 33 | self.device_num = device_num 34 | self.attention_number = attention_number 35 | self.top_k = top_k 36 | self.left = left 37 | self.batch_num = batch_num 38 | self.stage = stage 39 | self.start_saving = start_saving 40 | self.locality_saving = locality_saving 41 | self.token_preserving = token_preserving 42 | self.iter = iter 43 | self.heavy_size = heavy_size 44 | self.recent_size = recent_size 45 | self.streaming = streaming 46 | self.streaming_gap = streaming_gap 47 | 48 | def create_attention_config(self, config): 49 | attention_config = [] 50 | for i in range(self.attention_number): 51 | attention_config.append(config) 52 | return attention_config 53 | 54 | def copy_for_all_attention(self): 55 | self.compress_method = self.create_attention_config(self.compress_method) 56 | self.quantize_bit = self.create_attention_config(self.quantize_bit) 57 | self.group_num = self.create_attention_config(self.group_num) 58 | self.rank = self.create_attention_config(self.rank) 59 | self.loop = self.create_attention_config(self.loop) 60 | self.top_k = self.create_attention_config(self.top_k) 61 | self.device_num = self.create_attention_config(self.device_num) 62 | self.left = self.create_attention_config(self.left) 63 | self.stage = self.create_attention_config(self.stage) 64 | self.rankv = self.create_attention_config(self.rankv) 65 | self.start_saving = self.create_attention_config(self.start_saving) 66 | self.locality_saving = self.create_attention_config(self.locality_saving) 67 | self.token_preserving = self.create_attention_config(self.token_preserving) 68 | self.iter = self.create_attention_config(self.iter) 69 | self.heavy_size = self.create_attention_config(self.heavy_size) 70 | self.recent_size = self.create_attention_config(self.recent_size) 71 | self.streaming = self.create_attention_config(self.streaming) 72 | self.streaming_gap = self.create_attention_config(self.streaming_gap) 73 | 74 | def compress_ratio( 75 | self, 76 | compress_method, 77 | seqlen, 78 | model_dim, 79 | rank=0, 80 | rankv=0, 81 | quantize_bit=0, 82 | top_k=0, 83 | left=0.0, 84 | stage=1, 85 | batch_num=1, 86 | ): 87 | if compress_method == None: 88 | return 1.0 89 | elif compress_method == "Picache": 90 | if seqlen > rank and seqlen > rankv: 91 | return ( 92 | 2 93 | * seqlen 94 | * batch_num 95 | * model_dim 96 | / ( 97 | ((model_dim + seqlen * batch_num) * (rank + rankv)) 98 | * quantize_bit 99 | / 16 100 | ) 101 | ) 102 | elif seqlen <= rank: 103 | return ( 104 | ( 105 | 2 106 | * seqlen 107 | * batch_num 108 | * model_dim 109 | / ( 110 | (model_dim + seqlen * batch_num) * rankv 111 | + seqlen * batch_num * model_dim 112 | ) 113 | ) 114 | * 16 115 | / quantize_bit 116 | ) 117 | 118 | elif seqlen <= rankv: 119 | return ( 120 | ( 121 | 2 122 | * seqlen 123 | * batch_num 124 | * model_dim 125 | / ( 126 | (model_dim + seqlen * batch_num) * rank 127 | + seqlen * batch_num * model_dim 128 | ) 129 | ) 130 | * 16 131 | / quantize_bit 132 | ) 133 | elif compress_method == "poweriteration": 134 | return ( 135 | seqlen 136 | * batch_num 137 | * model_dim 138 | / ((model_dim + seqlen * batch_num) * rank) 139 | ) 140 | elif compress_method == "stagept": 141 | return ( 142 | seqlen 143 | * batch_num 144 | * model_dim 145 | / (model_dim * rank + seqlen * batch_num * (rank / stage)) 146 | ) 147 | elif ( 148 | compress_method == "uniformquantization" 149 | or compress_method == "groupquantization" 150 | or compress_method == "sortquantization" 151 | ): 152 | return 16 / quantize_bit 153 | elif compress_method == "pruning": 154 | return 1 / top_k 155 | elif ( 156 | compress_method == "densesparseuniformquantization" 157 | or compress_method == "densesparsesortquantization" 158 | ): 159 | return 1 / (quantize_bit / 16 + left) 160 | elif compress_method == "pt+outlier": 161 | return ( 162 | seqlen 163 | * batch_num 164 | * model_dim 165 | * 16 166 | / quantize_bit 167 | / ((model_dim + seqlen * batch_num) * rank) 168 | ) 169 | 170 | def calculate_compress_ratio_list(self, seqlen, model_dim): 171 | self.compress_ratio_list = [] 172 | for i, compress_method in enumerate(self.compress_method): 173 | if compress_method == None: 174 | self.compress_ratio_list.append( 175 | self.compress_ratio(compress_method, seqlen, model_dim) 176 | ) 177 | elif compress_method == "Picache": 178 | self.compress_ratio_list.append( 179 | self.compress_ratio( 180 | compress_method, 181 | seqlen, 182 | model_dim, 183 | rank=self.rank[i], 184 | rankv=self.rankv[i], 185 | quantize_bit=self.quantize_bit[i], 186 | batch_num=self.batch_num, 187 | left=self.left[i], 188 | ) 189 | ) 190 | elif compress_method == "poweriteration": 191 | self.compress_ratio_list.append( 192 | self.compress_ratio( 193 | compress_method, 194 | seqlen, 195 | model_dim, 196 | rank=self.rank[i], 197 | batch_num=self.batch_num, 198 | ) 199 | ) 200 | elif compress_method == "stagept": 201 | self.compress_ratio_list.append( 202 | self.compress_ratio( 203 | compress_method, 204 | seqlen, 205 | model_dim, 206 | rank=self.rank[i], 207 | batch_num=self.batch_num, 208 | stage=self.stage[i], 209 | ) 210 | ) 211 | elif ( 212 | compress_method == "uniformquantization" 213 | or compress_method == "groupquantization" 214 | or compress_method == "sortquantization" 215 | ): 216 | self.compress_ratio_list.append( 217 | self.compress_ratio( 218 | compress_method, 219 | seqlen, 220 | model_dim, 221 | rank=0, 222 | quantize_bit=self.quantize_bit[i], 223 | ) 224 | ) 225 | elif compress_method == "pruning": 226 | self.compress_ratio_list.append( 227 | self.compress_ratio( 228 | compress_method, 229 | seqlen, 230 | model_dim, 231 | top_k=self.top_k[i], 232 | ) 233 | ) 234 | elif compress_method == "densesparseuniformquantization": 235 | self.compress_ratio_list.append( 236 | self.compress_ratio( 237 | compress_method, 238 | seqlen, 239 | model_dim, 240 | quantize_bit=self.quantize_bit[i], 241 | left=self.left[i], 242 | ) 243 | ) 244 | elif compress_method == "densesparsesortquantization": 245 | self.compress_ratio_list.append( 246 | self.compress_ratio( 247 | compress_method, 248 | seqlen, 249 | model_dim, 250 | quantize_bit=self.quantize_bit[i], 251 | left=self.left[i], 252 | ) 253 | ) 254 | elif compress_method == "pt+outlier": 255 | self.compress_ratio_list.append( 256 | self.compress_ratio( 257 | compress_method, 258 | seqlen, 259 | model_dim, 260 | rank=self.rank[i], 261 | quantize_bit=self.quantize_bit[i], 262 | batch_num=self.batch_num, 263 | left=self.left[i], 264 | ) 265 | ) 266 | 267 | def calculate_compress_ratio_total(self): 268 | return sum(self.compress_ratio_list) / len(self.compress_ratio_list) 269 | 270 | def __str__(self): 271 | return f"compress_method:{self.compress_method},\nquantize_bit:{self.quantize_bit},\nrank:{self.rank},\nloop:{self.loop},\ndevice_num:{self.device_num},\ncompressratio:{self.compress_ratio_list},\ncompressratio_total:{self.calculate_compress_ratio_total()}" 272 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/TrueCompression/models/cpp_kernels.py: -------------------------------------------------------------------------------- 1 | from torch.utils import cpp_extension 2 | import pathlib 3 | import os 4 | import subprocess 5 | 6 | 7 | def _get_cuda_bare_metal_version(cuda_dir): 8 | raw_output = subprocess.check_output( 9 | [cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True 10 | ) 11 | output = raw_output.split() 12 | release_idx = output.index("release") + 1 13 | release = output[release_idx].split(".") 14 | bare_metal_major = release[0] 15 | bare_metal_minor = release[1][0] 16 | 17 | return raw_output, bare_metal_major, bare_metal_minor 18 | 19 | 20 | def _create_build_dir(buildpath): 21 | try: 22 | os.mkdir(buildpath) 23 | except OSError: 24 | if not os.path.isdir(buildpath): 25 | print(f"Creation of the build directory {buildpath} failed") 26 | 27 | 28 | # Check if cuda 11 is installed for compute capability 8.0 29 | cc_flag = [] 30 | _, bare_metal_major, bare_metal_minor = _get_cuda_bare_metal_version( 31 | cpp_extension.CUDA_HOME 32 | ) 33 | if int(bare_metal_major) >= 11: 34 | cc_flag.append("-gencode") 35 | cc_flag.append("arch=compute_80,code=sm_80") 36 | if int(bare_metal_minor) >= 7: 37 | cc_flag.append("-gencode") 38 | cc_flag.append("arch=compute_90,code=sm_90") 39 | 40 | # Build path 41 | srcpath = pathlib.Path(__file__).parent.absolute() 42 | buildpath = srcpath / "build" 43 | _create_build_dir(buildpath) 44 | 45 | 46 | def _cpp_extention_load_helper(name, sources, extra_cuda_flags): 47 | return cpp_extension.load( 48 | name=name, 49 | sources=sources, 50 | build_directory=buildpath, 51 | extra_cflags=[ 52 | "-O3", 53 | ], 54 | extra_cuda_cflags=[ 55 | "-O3", 56 | "-gencode", 57 | "arch=compute_70,code=sm_70", 58 | "--use_fast_math", 59 | ] 60 | + extra_cuda_flags 61 | + cc_flag, 62 | verbose=1, 63 | ) 64 | 65 | 66 | extra_flags = [] 67 | 68 | cache_autogptq_cuda_256_sources = [ 69 | "./cache_autogptq_cuda_256.cpp", 70 | "./cache_autogptq_cuda_kernel_256.cu", 71 | ] 72 | cache_autogptq_cuda_256 = _cpp_extention_load_helper( 73 | "cache_autogptq_cuda_256", cache_autogptq_cuda_256_sources, extra_flags 74 | ) 75 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/TrueCompression/models/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | 3 | import torch 4 | from torch import Tensor 5 | 6 | 7 | def get_approximate_basis( 8 | A: Tensor, q: int, niter: Optional[int] = 2, M: Optional[Tensor] = None 9 | ) -> Tensor: 10 | """Return tensor :math:`Q` with :math:`q` orthonormal columns such 11 | that :math:`Q Q^H A` approximates :math:`A`. If :math:`M` is 12 | specified, then :math:`Q` is such that :math:`Q Q^H (A - M)` 13 | approximates :math:`A - M`. 14 | 15 | .. note:: The implementation is based on the Algorithm 4.4 from 16 | Halko et al, 2009. 17 | 18 | .. note:: For an adequate approximation of a k-rank matrix 19 | :math:`A`, where k is not known in advance but could be 20 | estimated, the number of :math:`Q` columns, q, can be 21 | choosen according to the following criteria: in general, 22 | :math:`k <= q <= min(2*k, m, n)`. For large low-rank 23 | matrices, take :math:`q = k + 5..10`. If k is 24 | relatively small compared to :math:`min(m, n)`, choosing 25 | :math:`q = k + 0..2` may be sufficient. 26 | 27 | .. note:: To obtain repeatable results, reset the seed for the 28 | pseudorandom number generator 29 | 30 | Args:: 31 | A (Tensor): the input tensor of size :math:`(*, m, n)` 32 | 33 | q (int): the dimension of subspace spanned by :math:`Q` 34 | columns. 35 | 36 | niter (int, optional): the number of subspace iterations to 37 | conduct; ``niter`` must be a 38 | nonnegative integer. In most cases, the 39 | default value 2 is more than enough. 40 | 41 | M (Tensor, optional): the input tensor's mean of size 42 | :math:`(*, 1, n)`. 43 | 44 | References:: 45 | - Nathan Halko, Per-Gunnar Martinsson, and Joel Tropp, Finding 46 | structure with randomness: probabilistic algorithms for 47 | constructing approximate matrix decompositions, 48 | arXiv:0909.4061 [math.NA; math.PR], 2009 (available at 49 | `arXiv `_). 50 | """ 51 | 52 | niter = 2 if niter is None else niter 53 | m, n = A.shape[-2:] 54 | dtype = _utils.get_floating_dtype(A) 55 | matmul = _utils.matmul 56 | 57 | R = torch.randn(n, q, dtype=dtype, device=A.device) 58 | 59 | # The following code could be made faster using torch.geqrf + torch.ormqr 60 | # but geqrf is not differentiable 61 | A_H = _utils.transjugate(A) 62 | if M is None: 63 | Q = torch.linalg.qr(matmul(A, R)).Q 64 | for i in range(niter): 65 | Q = torch.linalg.qr(matmul(A_H, Q)).Q 66 | Q = torch.linalg.qr(matmul(A, Q)).Q 67 | else: 68 | M_H = _utils.transjugate(M) 69 | Q = torch.linalg.qr(matmul(A, R) - matmul(M, R)).Q 70 | for i in range(niter): 71 | Q = torch.linalg.qr(matmul(A_H, Q) - matmul(M_H, Q)).Q 72 | Q = torch.linalg.qr(matmul(A, Q) - matmul(M, Q)).Q 73 | 74 | return Q 75 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/TrueCompression/old_models/CompressUtils/__init__.py: -------------------------------------------------------------------------------- 1 | from .compress_class import CompressUnion 2 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/TrueCompression/old_models/CompressUtils/compress_class.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Tuple 2 | import torch 3 | from .TrueCompressFunction import ( 4 | true_uniform_quantization_compress, 5 | true_uniform_quantization_decompress, 6 | true_outlier_quantization_compress, 7 | true_outlier_quantization_decompress, 8 | true_gear_compress, 9 | true_gear_decompress, 10 | true_gear_tokenwiseQ_compress, 11 | true_gear_tokenwiseQ_decompress, 12 | true_gear_tokenwiseQ_compress_nopq, 13 | true_gear_tokenwiseQ_decompress_nopq, 14 | ) 15 | from .TrueCompressFunction import ( 16 | true_uniform_quantization_compress_batchwise, 17 | true_uniform_quantization_decompress_batchwise, 18 | true_outlier_quantization_compress_batchwise, 19 | true_outlier_quantization_decompress_batchwise, 20 | true_gear_compress, 21 | true_gear_decompress_batchwise, 22 | true_gear_compress_batchwise, 23 | ) 24 | 25 | compress_function = { 26 | "uniform": true_uniform_quantization_compress, 27 | "outlier": true_outlier_quantization_compress, 28 | "gear": true_gear_compress, 29 | "uniform_batch": true_uniform_quantization_compress_batchwise, 30 | "outlier_batch": true_outlier_quantization_compress_batchwise, 31 | "gear_batch": true_gear_compress_batchwise, 32 | "gear_tokenwiseQ": true_gear_tokenwiseQ_compress, 33 | "gear_tokenwiseQ_nopq": true_gear_tokenwiseQ_compress_nopq, 34 | } 35 | decompress_function = { 36 | "uniform": true_uniform_quantization_decompress, 37 | "outlier": true_outlier_quantization_decompress, 38 | "gear": true_gear_decompress, 39 | "uniform_batch": true_uniform_quantization_decompress_batchwise, 40 | "outlier_batch": true_outlier_quantization_decompress_batchwise, 41 | "gear_batch": true_gear_decompress_batchwise, 42 | "gear_tokenwiseQ": true_gear_tokenwiseQ_decompress, 43 | "gear_tokenwiseQ_nopq": true_gear_tokenwiseQ_decompress_nopq, 44 | } 45 | 46 | 47 | def detect_infnan(input_tensor, string): 48 | if torch.isnan(input_tensor).any(): 49 | print(string + "has nan") 50 | while True: 51 | pass 52 | if torch.isinf(input_tensor).any(): 53 | print(string + "has inf") 54 | while True: 55 | pass 56 | 57 | 58 | class CompressUnion: 59 | def __init__(self, compress_kwargs: Optional[Dict[str, Any]] = None): 60 | self.quantize_bit = compress_kwargs["quantize_bit"] 61 | self.compress_mode = compress_kwargs["compress_mode"] 62 | self.min = None 63 | self.step = None 64 | self.min_p = None 65 | self.min_q = None 66 | self.step_p = None 67 | self.step_q = None 68 | self.left = compress_kwargs["left"] 69 | self.rank = compress_kwargs["rank"] 70 | self.loop = compress_kwargs["loop"] 71 | self.dtype = None 72 | self.shape = None 73 | self.shape_p = None 74 | self.shape_q = None 75 | self.quantize_part = None 76 | self.values = None 77 | self.indices = None 78 | self.p_base = None 79 | self.q_base = None 80 | self.counter = 0 81 | self.streaming_gap = compress_kwargs["streaming_gap"] 82 | self.buffer = None 83 | self.streaming = compress_kwargs["stream"] 84 | self.seq_length = 0 85 | self.input_shape = 0 86 | 87 | def compress_function(self, input_tensor: torch.Tensor): 88 | self.dtype = input_tensor.dtype 89 | # detect_infnan(input_tensor,"compress input tensor") 90 | if self.compress_mode == "uniform": 91 | output, shape, min, step = compress_function[self.compress_mode]( 92 | input_tensor, self.quantize_bit 93 | ) 94 | self.quantize_part = output 95 | self.min = min 96 | self.step = step 97 | self.shape = shape 98 | elif self.compress_mode == "outlier": 99 | output, shape, min, step, values, indices = compress_function[ 100 | self.compress_mode 101 | ](input_tensor, self.quantize_bit, self.left) 102 | self.quantize_part = output 103 | self.min = min 104 | self.step = step 105 | self.shape = shape 106 | self.values = values 107 | self.indices = indices 108 | elif self.compress_mode == "gear": 109 | output, shape, min, step, values, indices, p_base, q_base = ( 110 | compress_function[self.compress_mode]( 111 | input_tensor, self.quantize_bit, self.left, self.rank, self.loop 112 | ) 113 | ) 114 | self.quantize_part = output 115 | self.min = min 116 | self.step = step 117 | self.shape = shape 118 | self.values = values 119 | self.indices = indices 120 | self.p_base = p_base 121 | self.q_base = q_base 122 | elif self.compress_mode == "uniform_batch": 123 | output, shape, min, step = compress_function[self.compress_mode]( 124 | input_tensor, self.quantize_bit 125 | ) 126 | self.quantize_part = output 127 | self.min = min 128 | self.step = step 129 | self.shape = shape 130 | elif self.compress_mode == "outlier_batch": 131 | output, shape, min, step, values, indices = compress_function[ 132 | self.compress_mode 133 | ](input_tensor, self.quantize_bit, self.left) 134 | self.quantize_part = output 135 | self.min = min 136 | self.step = step 137 | self.shape = shape 138 | self.values = values 139 | self.indices = indices 140 | elif self.compress_mode == "gear_batch": 141 | output, shape, min, step, values, indices, p_base, q_base = ( 142 | compress_function[self.compress_mode]( 143 | input_tensor, self.quantize_bit, self.left, self.rank, self.loop 144 | ) 145 | ) 146 | self.quantize_part = output 147 | self.min = min 148 | self.step = step 149 | self.shape = shape 150 | self.values = values 151 | self.indices = indices 152 | self.p_base = p_base 153 | self.q_base = q_base 154 | elif self.compress_mode == "gear_tokenwiseQ": 155 | 156 | ( 157 | quantized_input, 158 | shape, 159 | min, 160 | step, 161 | p_base, 162 | q_base, 163 | shape_p, 164 | shape_q, 165 | min_p, 166 | min_q, 167 | scale_p, 168 | scale_q, 169 | ) = compress_function[self.compress_mode]( 170 | input_tensor, self.quantize_bit, self.rank, self.loop 171 | ) 172 | self.quantize_part = quantized_input 173 | self.min = min 174 | self.step = step 175 | self.shape = shape 176 | self.p_base = p_base 177 | self.q_base = q_base 178 | self.shape_p = shape_p 179 | self.shape_q = shape_q 180 | self.min_p = min_p 181 | self.min_q = min_q 182 | self.step_p = scale_p 183 | self.step_q = scale_q 184 | elif self.compress_mode == "gear_tokenwiseQ_nopq": 185 | quantized_input, shape, min, step, p_base, q_base = compress_function[ 186 | self.compress_mode 187 | ](input_tensor, self.quantize_bit, self.rank, self.loop) 188 | self.quantize_part = quantized_input 189 | self.min = min 190 | self.step = step 191 | self.shape = shape 192 | self.p_base = p_base 193 | self.q_base = q_base 194 | # print("quantized_part_min_max:",self.quantize_part.min(),self.quantize_part.max(),"p_base_min_max:",self.min_p.min(),self.p_base[0].max(),"q_base_min_max:",self.min_q.min(),self.q_base[0].max()) 195 | # detect_infnan(quantized_input,"compress quantized_input tensor") 196 | # detect_infnan(self.p_base[0],"compress p_base tensor") 197 | # detect_infnan(self.q_base[0],"compress q_base tensor") 198 | 199 | def decompress_function(self): 200 | if self.compress_mode == "uniform": 201 | output = decompress_function[self.compress_mode]( 202 | self.quantize_part, 203 | self.quantize_bit, 204 | self.shape, 205 | self.min, 206 | self.step, 207 | self.dtype, 208 | ) 209 | elif self.compress_mode == "outlier": 210 | output = decompress_function[self.compress_mode]( 211 | self.quantize_part, 212 | self.quantize_bit, 213 | self.shape, 214 | self.min, 215 | self.step, 216 | self.dtype, 217 | self.values, 218 | self.indices, 219 | ) 220 | elif self.compress_mode == "gear": 221 | output = decompress_function[self.compress_mode]( 222 | self.quantize_part, 223 | self.quantize_bit, 224 | self.shape, 225 | self.min, 226 | self.step, 227 | self.dtype, 228 | self.values, 229 | self.indices, 230 | self.p_base, 231 | self.q_base, 232 | ) 233 | elif self.compress_mode == "uniform_batch": 234 | output = decompress_function[self.compress_mode]( 235 | self.quantize_part, 236 | self.quantize_bit, 237 | self.shape, 238 | self.min, 239 | self.step, 240 | self.dtype, 241 | ) 242 | elif self.compress_mode == "outlier_batch": 243 | output = decompress_function[self.compress_mode]( 244 | self.quantize_part, 245 | self.quantize_bit, 246 | self.shape, 247 | self.min, 248 | self.step, 249 | self.dtype, 250 | self.values, 251 | self.indices, 252 | ) 253 | elif self.compress_mode == "gear_batch": 254 | output = decompress_function[self.compress_mode]( 255 | self.quantize_part, 256 | self.quantize_bit, 257 | self.shape, 258 | self.min, 259 | self.step, 260 | self.dtype, 261 | self.values, 262 | self.indices, 263 | self.p_base, 264 | self.q_base, 265 | ) 266 | elif self.compress_mode == "gear_tokenwiseQ": 267 | output = decompress_function[self.compress_mode]( 268 | self.quantize_part, 269 | self.quantize_bit, 270 | self.shape, 271 | self.min, 272 | self.step, 273 | self.p_base, 274 | self.q_base, 275 | self.shape_p, 276 | self.shape_q, 277 | self.min_p, 278 | self.min_q, 279 | self.step_p, 280 | self.step_q, 281 | self.dtype, 282 | ) 283 | elif self.compress_mode == "gear_tokenwiseQ_nopq": 284 | output = decompress_function[self.compress_mode]( 285 | self.quantize_part, 286 | self.quantize_bit, 287 | self.shape, 288 | self.min, 289 | self.step, 290 | self.p_base, 291 | self.q_base, 292 | self.dtype, 293 | ) 294 | # detect_infnan(output,"decompress") 295 | return output 296 | 297 | def compress(self, input_tensor): 298 | self.seq_length = input_tensor.shape[-2] 299 | # print("compress",self.counter) 300 | self.input_shape = input_tensor.shape 301 | if self.streaming is True: 302 | if self.counter % self.streaming_gap == 0: 303 | self.buffer = None 304 | self.compress_function(input_tensor) 305 | else: 306 | extract_id = self.counter % self.streaming_gap 307 | self.buffer = input_tensor[:, :, -extract_id:, :].clone() 308 | 309 | else: 310 | self.compress_function(input_tensor) 311 | 312 | def decompress(self): 313 | # print("decompress",self.counter) 314 | if self.streaming is True: 315 | if self.counter % self.streaming_gap == 0: 316 | output = self.decompress_function() 317 | if self.buffer is not None: 318 | output = torch.cat([output, self.buffer], dim=-2) 319 | 320 | else: 321 | output = self.decompress_function() 322 | 323 | output = torch.cat([output, self.buffer], dim=-2) 324 | 325 | self.counter += 1 326 | 327 | else: 328 | 329 | output = self.decompress_function() 330 | # detect_infnan(output,"decompress output") 331 | return output 332 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/TrueCompression/old_models/__init__.py: -------------------------------------------------------------------------------- 1 | from .modeling_llama_old import GearLlamaForCausalLM 2 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/GEARLM/__init__.py: -------------------------------------------------------------------------------- 1 | from .TrueCompression import GearLlamaForCausalLMNew 2 | from .TrueCompression import GearLlamaForCausalLM 3 | 4 | from .Simulated import CompressionConfig 5 | from .Simulated import SimulatedGearLlamaForCausalLM 6 | 7 | # from .modeling_llama_h2o import H2OLlamaForCausalLM, LlamaConfig 8 | from .Simulated import SimulatedGearMistralForCausalLM, MistralConfig 9 | from .Simulated import LlamaForCausalLMH2O 10 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/Readme.md: -------------------------------------------------------------------------------- 1 | ## GenerationTest 2 | Llama-2-7b and Llama-2-13b and Mistral-7b with GEAR test on GSM8K, BBH abd Aqua, all with COT prompts 3 | ## Usage 4 | `evaluation_bbh_cot.py` test models on BBH-COT 5 | `evaluation_gsm8k.py` test models on GSM8K and GSM8K-COT 6 | `evaluation_aqua.py` test models on Aqua-COT 7 | 8 | See sh files for more details. 9 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/aqua/cot_prompt_8shots.txt: -------------------------------------------------------------------------------- 1 | Question: Three birds are flying at a fast rate of 900 kilometers per hour. What is their speed in miles per minute? [1km = 0.6 miles] 2 | Option: 3 | A)32400 4 | B)6000 5 | C)600 6 | D)60000 7 | E)10\nThe answer is 8 | Let's think step by step 9 | To calculate the equivalent of miles in a kilometer 10 | 0.6 kilometers = 1 mile. 11 | 900 kilometers = (0.6)*900 = 540 miles. 12 | Then there are 60 minutes in 1 hour. 13 | So speed in miles/minutes = 60 * 540 = 32400. 14 | The answer is A). 15 | 16 | Question: A ship is leaving a port. It takes 240 seconds to passes through a 750m channel to get to the port gates, and takes 60 seconds to pass through the gates of the port. What is its length? 17 | Option: 18 | A)100 m 19 | B)150 m 20 | C)200 m 21 | D)250 m 22 | E)300 m 23 | Let's think step by step 24 | Let the length of the ship be x metres and its speed be y m/sec. 25 | Then, x / y = 60 ⇒ y = x / 60 26 | Therefore (x + 750) / 240 = x / 60. 27 | So x = 250 m. 28 | The answer is D). 29 | 30 | Question: A rectangular piece of cloth 2 feet wide was cut lengthwise into two smaller rectangular pieces. The shorter piece was one-third of the length of the longer of the 2 new pieces and had an area of 12 square feet. What was the length Q in feet of the original piece of cloth before cutting? 31 | Option: 32 | A)6 33 | B)18 34 | C)24 35 | D)36 36 | E)48 37 | Let's think step by step 38 | The question says, length of shorter piece is 1/3rd of the length of longer of the two new pieces. 39 | This means after I split it into two pieces, I have a shorter piece and a longer one. 40 | The length of shorter is 1/3rd the length of longer. 41 | The 6 I found out was length of shorter piece. 42 | Then, length of longer should be 18 and total length should be 24. 43 | The answer is C). 44 | 45 | Question: In the xy-coordinate plane, which of the following points must lie on the line kx + 2y = 6 for every possible value of k? 46 | Option: 47 | A)(1,1) 48 | B)(0,3) 49 | C)(2,0) 50 | D)(3,6) 51 | E)(6,3) 52 | Let's think step by step 53 | For a point to satisfy the given equation for all possible values of k, then x must equal 0. 54 | When x=0, the equation is simply 2y = 6, and then y = 3. 55 | The answer is B). 56 | 57 | Question: A travel company wants to charter a plane to the Bahamas. Chartering the plane costs $5,000. So far, 12 people have signed up for the trip. If the company charges $200 per ticket, how many more passengers must sign up for the trip before the company can make any profit on the charter? 58 | Option: 59 | A)7 60 | B)9 61 | C)13 62 | D)27 63 | E)45 64 | Let's think step by step 65 | Additional passengers (i.e., an integer, let's say x) who must sign up before the company can make any profit = ? 66 | 200 * 12 + 200 * x >= 5000\n2400 + 200x >= 5000. 67 | Then 200x >= 2600. 68 | And x >= 2600/200. 69 | So x >= 13. 70 | The answer is C). 71 | 72 | Question: Kirk sells cars. On two sales, Kirk has received commissions of $850, and $705, and he has one additional sale pending. If Kirk is to receive an average (arithmetic mean) commission of exactly $820 on the three sales, then the third commission must be: 73 | Option: 74 | A)$895 75 | B)$905 76 | C)$910 77 | D)$915 78 | E)$945 79 | Let's think step by step 80 | Let X be the third value. 81 | Then 850 + 705 + X = 820 * 3. 82 | Therefore 1555 + X = 2460. 83 | So X = 905. 84 | The answer is B). 85 | 86 | Question: A group of 5 friends were to contribute equally to purchase items from a store. If store offered them a promotional discount of 15%, how much did each friend pay, given a total bill of $ 1200? 87 | Option: 88 | A)$203 89 | B)$202 90 | C)$209 91 | D)$205 92 | E)$204 93 | Let's think step by step 94 | At the non-discounted price, each friend would pay $240, as $ 1200 divided by 5 friends is $240 per friend. 95 | But if the bill is 15% off, then each friend would pay 15% less. 96 | 15% of $240 is $36, 97 | So each friend saves $36 and pays the remaining $204. 98 | The answer is E). 99 | 100 | Question: ABC company pays an average of $120 per vehicle each month in outdoor parking fees for three of its eight vehicles. The company pays garage parking fees for the remaining five vehicles. If ABC pays an average of $240 per vehicle overall each month for parking, how much does ABC pay per month in garage parking fees for its vehicles? 101 | Option: 102 | A)300 103 | B)420 104 | C)912 105 | D)1340 106 | E)1500 107 | Let's think step by step 108 | This means it could go out sometimes and it could stay at the garage sometimes, 109 | thus the ABC company still has to pay fees for going-out cars in garage 110 | So the math will be 240*8 cars-3*120=1340 fee for parking in garage. 111 | The answer is D). 112 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/boolean_expressions.txt: -------------------------------------------------------------------------------- 1 | Evaluate the result of a random Boolean expression. 2 | 3 | Q: not ( ( not not True ) ) is 4 | A: Let's think step by step. 5 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively. 6 | We first simplify this expression "Z" as follows: "Z = not ( ( not not True ) ) = not ( ( A ) )" where "A = not not True". 7 | Let's evaluate A: A = not not True = not (not True) = not False = True. 8 | Plugging in A, we get: Z = not ( ( A ) ) = not ( ( True ) ) = not True = False. So the answer is False. 9 | 10 | Q: True and False and not True and True is 11 | A: Let's think step by step. 12 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively. 13 | We first simplify this expression "Z" as follows: "Z = True and False and not True and True = A and B" where "A = True and False" and "B = not True and True". 14 | Let's evaluate A: A = True and False = False. 15 | Let's evaluate B: B = not True and True = not (True and True) = not (True) = False. 16 | Plugging in A and B, we get: Z = A and B = False and False = False. So the answer is False. 17 | 18 | Q: not not ( not ( False ) ) is 19 | A: Let's think step by step. 20 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively. 21 | We first simplify this expression "Z" as follows: "Z = not not ( not ( False ) ) = not not ( A )" where "A = not ( False )". 22 | Let's evaluate A: A = not ( False ) = not False = True. 23 | Plugging in A, we get: Z = not not ( A ) = not not (True) = not not False = True. So the answer is True. -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/causal_judgement.txt: -------------------------------------------------------------------------------- 1 | Answer questions about causal attribution. 2 | 3 | Q: How would a typical person answer each of the following questions about causation? 4 | Frank T., had an ongoing dispute with his neighbor over a stretch of land and one day decided to shoot his neighbor in the body. Frank T. had no experience with guns, his hand slipped on the barrel of the gun, and the shot went wild. Nonetheless, the bullet bounced off a large boulder several feet away and hit the neighbor's body, causing significant injury. Did Frank T. intentionally shoot his neighbor in the body? 5 | Options: 6 | - Yes 7 | - No 8 | A: Let's think step by step. 9 | Here in this question, we are told that "Frank T. had no experience with guns, his hand slipped on the barrel of the gun, and the shot went wild." A typical person would assume that this passage suggests that Frank T. had no intention of shooting and injuring someone and that the bullet accidentally hit the neighbor's body; therefore, we conclude that Frank T. did not intentionally hit his neighbor. So the answer is No. 10 | 11 | Q: How would a typical person answer each of the following questions about causation? 12 | Suzy and Billy are working on a project that is very important for our nation's security. The boss tells them both: "Be sure that you are here at exactly 9 am. It is absolutely essential that you arrive at that time." Both Billy and Suzy arrive at 9 am. As it happens, there was a motion detector installed in the room where they arrived. The motion detector was set up to be triggered if at least one person appeared in the room at the same time. So the motion detector went off. Did Billy cause the motion detector to go off? 13 | Options: 14 | - Yes 15 | - No 16 | A: Let's think step by step. 17 | Here in this question, we are told that the boss ordered them both to arrive at the meeting room at the same time and that the motion detector was set up to be triggered if at least one person appeared in the room at the same time." A typical person would assume that the person probably meant to say the detector was set up to be triggered if "both persons" appeared in the room at the same time, not at least one person, since otherwise the phrase "at the same time" would not make much sense in that sentence. Because the motion detector went off, a typical person would therefore come to the conclusion that both Suzy and Billy triggered the motion detector to go off; hence, Billy did indeed cause the motion detector to go off. So the answer is Yes. 18 | 19 | Q: How would a typical person answer each of the following questions about causation? 20 | George and his sister Lena reunite at their parents' house for Thanksgiving. Whereas George just got into medical school, Lena is unhappy in her marriage and recently lost her job. Over the course of the day, George and Lena get into a number of heated arguments. Later in the afternoon they play a game of darts. They split the first two games, and the third game is close until the end. Who will win comes down to George's last shot. If he hits a high point region, he wins; if he hits a low point region, Lena wins. George thinks of the difficult time Lena is having, and he really wants to let her win. He aims the dart at the low point region. He sets up his shot and the dart lands in the low point region. After his shot, Lena wins the game and is very happy. Did George hit the low point region intentionally? 21 | Options: 22 | - Yes 23 | - No 24 | A: Let's think step by step. 25 | Here in this question, we are told that "He aims the dart at the low point region." A typical person might therefore think George did intentionally hit the low point region, because he wanted to lift up the spirit of his sister Lena. So the answer is Yes. -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/date_understanding.txt: -------------------------------------------------------------------------------- 1 | Infer the date from context. 2 | 3 | Q: Today is Christmas Eve of 1937. What is the date 10 days ago in MM/DD/YYYY? 4 | Options: 5 | (A) 12/14/2026 6 | (B) 12/14/1950 7 | (C) 12/14/2007 8 | (D) 12/14/1937 9 | (E) 07/14/1938 10 | (F) 12/14/1988 11 | A: Let's think step by step. 12 | If today is Christmas Eve of 1937, then today's date is December 24, 1937. 10 days before today is December 14, 1937, that is 12/14/1937. So the answer is (D). 13 | 14 | Q: Tomorrow is 11/12/2019. What is the date one year ago from today in MM/DD/YYYY? 15 | Options: 16 | (A) 09/04/2018 17 | (B) 11/11/2018 18 | (C) 08/25/2018 19 | (D) 11/02/2018 20 | (E) 11/04/2018 21 | A: Let's think step by step. 22 | If tomorrow is 11/12/2019, then today is 11/11/2019. The date one year ago from today is 11/11/2018. So the answer is (B). 23 | 24 | Q: Jane and John married on Jan 2, 1958. It is their 5-year anniversary today. What is the date tomorrow in MM/DD/YYYY? 25 | Options: 26 | (A) 01/11/1961 27 | (B) 01/03/1963 28 | (C) 01/18/1961 29 | (D) 10/14/1960 30 | (E) 01/03/1982 31 | (F) 12/03/1960 32 | A: Let's think step by step. 33 | If Jane and John married on Jan 2, 1958, then and if it is their 5-year anniversary today, then today's date is Jan 2, 1963. The date tomorrow is Jan 3, 1963, that is 01/03/1963. So the answer is (B). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/disambiguation_qa.txt: -------------------------------------------------------------------------------- 1 | Clarify the meaning of sentences with ambiguous pronouns. 2 | 3 | Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous. 4 | Sentence: The chief told the counselor that they took the day off. 5 | Options: 6 | (A) The chief took the day off 7 | (B) The counselor took the day off 8 | (C) Ambiguous 9 | A: Let's think step by step. 10 | Here we need to determine who the pronoun "they" might be referring to. There are two possible referents for "they", namely the chief and the counselor. The verb "told" might be able to help us determine which one is more likely (if either). Let X be the chief and Y the counselor. The sentence is then of the form "X told Y that (X or Y) did something." 11 | Let's consider Y first: "X told Y that Y did something." This case does not make much sense, as Y would already have the information that Y did something, because it is information about themself. 12 | Now, consider X: "X told Y that X did something." This makes sense, because X would be sharing some information about themself that Y might not have known before. 13 | Because in this context, X is the chief and Y is the counselor, the answer should be the chief. So the answer is (A). 14 | 15 | Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous. 16 | Sentence: The manager sent a message to the secretary, but he didn't reply yet. 17 | Options: 18 | (A) The secretary didn't reply yet 19 | (B) The manager didn't reply yet 20 | (C) Ambiguous 21 | A: Let's think step by step. 22 | Here we need to determine who the pronoun "he" might be referring to. There are two possible referents for "he", namely the manager and the secretary. The verbs "sent" and "reply" might be able to help us determine which one is more likely (if either). Let X be the manager and Y the secretary. The sentence is then of the form "X sent a message to Y, but (X or Y) didn't reply yet." 23 | Let's consider Y first: "X sent a message to Y, but Y didn't reply yet." This case makes sense, because of the implicit causality of the sentence. Y was the receiver of the message, but Y didn't get back to X yet. 24 | Now, consider X: "X sent a message to Y, but X didn't reply yet." This case doesn't make sense, because X was the initial sender of the message, so it is now Y's turn to write back to X. 25 | Because in this context, X is the manager and Y is the secretary, the answer should be the secretary. So the answer is (A). 26 | 27 | Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous. 28 | Sentence: Bailey will plan to meet the director at his office 29 | Options: 30 | (A) It will be Bailey's office 31 | (B) It will be the director's office 32 | (C) Ambiguous 33 | A: Let's think step by step. 34 | Here we need to determine who the pronoun "his" might be referring to. There are two possible referents for "his", namely Bailey's and the director's. The verb phrase "plan to meet" might be able to help us determine which one is more likely (if either). Let X be Bailey and Y the director. The sentence is then of the form "X will plan to meet Y at (X or Y)'s office." 35 | Let's consider Y first: "X will plan to meet Y at Y's office." This case makes sense, because X might want to meet up with Y at Y's office. 36 | Now, consider X: "X will plan to meet Y at X's office." This case also makes sense, because X might want to meet up with Y at X's own office. 37 | Because both X and Y are possible at the same time, we conclude that the antecedent of the pronoun is ambiguous. So the answer is (C). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/dyck_languages.txt: -------------------------------------------------------------------------------- 1 | Correctly close a Dyck-n word. 2 | 3 | Q: Complete the rest of the sequence, making sure that the parentheses are closed properly. Input: [ { [ 4 | A: Let's think step by step. 5 | We should process each input one by one and keep track of the stack configuration. 6 | 0: empty stack 7 | 1: [ ; stack: [ 8 | 2: { ; stack: [ { 9 | 3: [ ; stack: [ { [ 10 | Now, we have reached the end. The final stack is "[ { [". 11 | We will need to pop out "[", "{", "[" one by one in that order. 12 | So, we need "]", "}", "]". So the answer is ] } ]. 13 | 14 | Q: Complete the rest of the sequence, making sure that the parentheses are closed properly. Input: < > ( ( [ [ ( { } ) [ < > ] ] 15 | A: Let's think step by step. 16 | We should process each input one by one and keep track of the stack configuration. 17 | 0: empty stack 18 | 1: < ; stack: < 19 | 2: > ; stack: empty 20 | 3: ( ; stack: ( 21 | 4: ( ; stack: ( ( 22 | 5: [ ; stack: ( ( [ 23 | 6: [ ; stack: ( ( [ [ 24 | 7: ( ; stack: ( ( [ [ ( 25 | 8: { ; stack: ( ( [ [ ( { 26 | 9: } ; stack: ( ( [ [ ( 27 | 10: ) ; stack: ( ( [ [ 28 | 11: [ ; stack: ( ( [ [ [ 29 | 12: < ; stack: ( ( [ [ [ < 30 | 13: > ; stack: ( ( [ [ [ 31 | 14: ] ; stack: ( ( [ [ 32 | 15: ] ; stack: ( ( [ 33 | Now, we have reached the end. The final stack is "( ( [". 34 | We will need to pop out "[", "(", "(" one by one in that order. 35 | So, we need "]", ")", ")". So the answer is ] ) ). 36 | 37 | Q: Complete the rest of the sequence, making sure that the parentheses are closed properly. Input: < [ < [ { < [ ] < { } > > } ] > { { ( ) } { < [ < > ] > } 38 | A: Let's think step by step. 39 | We should process each input one by one and keep track of the stack configuration. 40 | 0: empty stack 41 | 1: < ; stack: < 42 | 2: [ ; stack: < [ 43 | 3: < ; stack: < [ < 44 | 4: [ ; stack: < [ < [ 45 | 5: { ; stack: < [ < [ { 46 | 6: < ; stack: < [ < [ { < 47 | 7: [ ; stack: < [ < [ { < [ 48 | 8: ] ; stack: < [ < [ { < 49 | 9: < ; stack: < [ < [ { < < 50 | 10: { ; stack: < [ < [ { < < { 51 | 11: } ; stack: < [ < [ { < < 52 | 12: > ; stack: < [ < [ { < 53 | 13: > ; stack: < [ < [ { 54 | 14: } ; stack: < [ < [ 55 | 15: ] ; stack: < [ < 56 | 16: > ; stack: < [ 57 | 17: { ; stack: < [ { 58 | 18: { ; stack: < [ { { 59 | 19: ( ; stack: < [ { { ( 60 | 20: ) ; stack: < [ { { 61 | 21: } ; stack: < [ { 62 | 22: { ; stack: < [ { { 63 | 23: < ; stack: < [ { { < 64 | 24: [ ; stack: < [ { { < [ 65 | 25: < ; stack: < [ { { < [ < 66 | 26: > ; stack: < [ { { < [ 67 | 27: ] ; stack: < [ { { < 68 | 28: > ; stack: < [ { { 69 | 29: } ; stack: < [ { 70 | Now, we have reached the end. The final stack is "< [ {". 71 | We will need to pop out "{", "[", "<" one by one in that order. 72 | So, we need "}", "]", ">". So the answer is } ] >. -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/formal_fallacies.txt: -------------------------------------------------------------------------------- 1 | Distinguish deductively valid arguments from formal fallacies. 2 | 3 | Q: "It is not always easy to see who is related to whom -- and in which ways. The following argument pertains to this question: To begin with, Lesley is a close friend of Fernando. Moreover, being a close friend of Fernando or a schoolmate of Lowell is sufficient for being a great-grandfather of Leroy. It follows that Lesley is a great-grandfather of Leroy." 4 | Is the argument, given the explicitly stated premises, deductively valid or invalid? 5 | Options: 6 | - valid 7 | - invalid 8 | A: Let's think step by step. 9 | (1) Lesley is a close friend of Fernando: Lesley = friend(Fernando). 10 | (2) Being a close friend of Fernando or a schoolmate of Lowell is sufficient for being a great-grandfather of Leroy: If X = friend(Fernando) OR SCHOOLMATE(Lowell), then X = great-grandfather(Leroy). 11 | Hypothesis: Does it follow that Lesley is a great-grandfather of Leroy: Lesley = great-grandfather(Leroy)? 12 | Let’s see whether the Hypothesis can be deduced from the arguments (1) and (2) by logical reasoning? 13 | By (1), we have Lesley = friend(Fernando). By (2), we have if Lesley = friend(Fernando), then Lesley = great-grandfather(Leroy). 14 | So, it is true that Lesley is a great-grandfather of Leroy. So the answer is valid. 15 | 16 | Q: "It is not always easy to see who is related to whom -- and in which ways. The following argument pertains to this question: Whoever is not a great-grandfather of Clyde is a stepbrother of Brian. Being an ancestor of Dana is sufficient for not being a great-grandfather of Clyde. We may conclude: Everyone who is an ancestor of Dana is a stepbrother of Brian, too." 17 | Is the argument, given the explicitly stated premises, deductively valid or invalid? 18 | Options: 19 | - valid 20 | - invalid 21 | A: Let's think step by step. 22 | (1) Whoever is not a great-grandfather of Clyde is a stepbrother of Brian: If X = NOT (great-grandfather(Clyde)), then X = stepbrother(Brian). 23 | (2): Being an ancestor of Dana is sufficient for not being a great-grandfather of Clyde: If X = ancestor(Dana), X = NOT (great-grandfather(Clyde)). 24 | Hypothesis: Does it follow that everyone who is an ancestor of Dana is a stepbrother of Brian, too: If X = ancestor(Dana), then X = stepbrother(Brian)? 25 | Let’s see whether the Hypothesis can be deduced from the arguments (1) and (2) by logical reasoning? 26 | By (2), we have if X = ancestor(Dana), X = NOT (great-grandfather(Clyde)). 27 | Furthermore, by (1), we have if X = NOT (great-grandfather(Clyde)), then X = stepbrother(Brian). 28 | By the transitive relation rule in first-order logic, we then have: if X = ancestor(Dana), then X = stepbrother(Brian). 29 | So, it is true that everyone who is an ancestor of Dana is a stepbrother of Brian. So the answer is valid. 30 | 31 | Q: "It is not always easy to grasp who is consuming which products. The following argument pertains to this question: Every infrequent user of Paul Mitchell shampoo is either a rare consumer of Nioxin shampoo or a loyal buyer of Caress soap, or both. No regular consumer of Lush soap is a rare consumer of Nioxin shampoo and, in the same time, a loyal buyer of Caress soap. It follows that whoever is an infrequent user of Paul Mitchell shampoo is not a regular consumer of Lush soap." 32 | Is the argument, given the explicitly stated premises, deductively valid or invalid? 33 | Options: 34 | - valid 35 | - invalid 36 | A: Let's think step by step. 37 | (1) Every infrequent user of Paul Mitchell shampoo is either a rare consumer of Nioxin shampoo or a loyal buyer of Caress soap, or both: If X = infrequent-user(Paul Mitchell), then X = rare-consumer(Nioxin) OR X = loyal-buyer(Caress). 38 | (2): No regular consumer of Lush soap is a rare consumer of Nioxin shampoo and a loyal buyer of Caress soap at the same time. If X = regular-consumer(Lush), then X = NOT (rare-consumer(Nioxin) AND loyal-buyer(Caress)). 39 | Hypothesis: Does it follow that whoever is an infrequent user of Paul Mitchell shampoo is not a regular consumer of Lush soap: If X = infrequent-user(Paul Mitchell), then X = NOT (regular-consumer(Lush))? 40 | Let’s see whether the Hypothesis can be deduced from the arguments (1) and (2) by logical reasoning? 41 | By (1), we have if X = infrequent-user(Paul Mitchell), then X = rare-consumer(Nioxin) OR X = loyal-buyer(Caress). We need to consider both cases separately: 42 | The case X = rare-consumer(Nioxin) does not appear in (2). 43 | The case X = loyal-buyer(Caress) does not appear in (2), either. 44 | So, from (1) and (2), we cannot necessarily deduce the Hypothesis. So the answer is invalid. -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/geometric_shapes.txt: -------------------------------------------------------------------------------- 1 | Name geometric shapes from their SVG paths. 2 | 3 | Q: This SVG path element draws a 4 | Options: 5 | (A) circle 6 | (B) heptagon 7 | (C) hexagon 8 | (D) kite 9 | (E) line 10 | (F) octagon 11 | (G) pentagon 12 | (H) rectangle 13 | (I) sector 14 | (J) triangle 15 | A: Let's think step by step. 16 | This SVG path element contains "M" and "L" commands. M takes two parameters (x,y) and moves the current point to the coordinates (x,y). L takes two parameters (x,y) and draws a line from the previous coordinate to the new coordinate (x,y). 17 | This path can be decomposed into 9 separate commands. 18 | (1) M 31.00,73.00: Move the current point to 31.00,73.00. 19 | (2) L 32.00,59.00: Create a line from 31.00,73.00 to 32.00,59.00. 20 | (3) L 44.00,50.00: Create a line from 32.00,59.00 to 44.00,50.00. 21 | (4) L 49.00,41.00: Create a line from 44.00,50.00 to 49.00,41.00. 22 | (5) L 64.00,37.00: Create a line from 49.00,41.00 to 64.00,37.00. 23 | (6) L 71.00,55.00: Create a line from 64.00,37.00 to 71.00,55.00. 24 | (7) L 64.00,76.00: Create a line from 71.00,55.00 to 64.00,76.00. 25 | (8) L 52.00,61.00: Create a line from 64.00,76.00 to 52.00,61.00. 26 | (9) L 31.00,73.00: Create a line from 52.00,61.00 to 31.00,73.00. 27 | This SVG path starts at point 31.00,73.00, creates eight consecutive and touching lines, and then returns back its starting point, thereby creating an eight-sided shape. It does not have any curves or arches. "octagon" is the only eight-sided object on the list. So the answer is (F). 28 | 29 | Q: This SVG path element draws a 30 | Options: 31 | (A) circle 32 | (B) heptagon 33 | (C) hexagon 34 | (D) kite 35 | (E) line 36 | (F) octagon 37 | (G) pentagon 38 | (H) rectangle 39 | (I) sector 40 | (J) triangle 41 | A: Let's think step by step. 42 | This SVG path element contains "M" and "L" commands. M takes two parameters (x,y) and moves the current point to the coordinates (x,y). L takes two parameters (x,y) and draws a line from the previous coordinate to the new coordinate (x,y). 43 | This path can be decomposed into 6 separate commands. 44 | (1) M 14.19,26.04: Move the current point to 14.19,26.04. 45 | (2) L 51.43,39.21: Create a line from 14.19,26.04 to 51.43,39.21. 46 | (3) L 58.44,36.69: Create a line from 51.43,39.21 to 58.44,36.69. 47 | (4) L 56.63,30.17: Create a line from 58.44,36.69 to 56.63,30.17. 48 | (5) L 48.53,26.66: Create a line from 56.63,30.17 to 48.53,26.66. 49 | (6) L 14.19,26.04: Create a line from 48.53,26.66 to 14.19,26.04. 50 | This SVG path starts at point 14.19,26.04, creates five consecutive and touching lines, and then returns back its starting point, thereby creating a five-sided shape. It does not have any curves or arches. "pentagon" is the only five-sided polygon on the list. So the answer is (G). 51 | 52 | Q: This SVG path element draws a 53 | Options: 54 | (A) circle 55 | (B) heptagon 56 | (C) hexagon 57 | (D) kite 58 | (E) line 59 | (F) octagon 60 | (G) pentagon 61 | (H) rectangle 62 | (I) sector 63 | (J) triangle 64 | A: Let's think step by step. 65 | This SVG path element contains "M" and "L" commands. M takes two parameters (x,y) and moves the current point to the coordinates (x,y). L takes two parameters (x,y) and draws a line from the previous coordinate to the new coordinate (x,y). 66 | This path can be decomposed into 5 separate commands. 67 | (1) M 41.00,43.00: Move the current point to 41.00,43.00. 68 | (2) L 37.00,34.00: Create a line from 41.00,43.00 to 37.00,34.00. 69 | (3) L 41.00,33.00: Create a line from 37.00,34.00 to 41.00,33.00. 70 | (4) L 45.00,34.00: Create a line from 41.00,33.00 to 45.00,34.00. 71 | (5) L 41.00,43.00: Create a line from 45.00,34.00 to 41.00,43.00. 72 | This SVG path starts at point 41.00,43.00, creates four consecutive and touching lines, and then returns back its starting point, thereby creating a four-sided shape. "kite" and "rectangle" are the only two four-sided polygons on the list. So, we need to determine which one is the correct answer. 73 | A kite has two pairs of equal-length adjacent sides, whereas a rectangle has two pairs of equal-length alternate (opposite) sides. Now, let's check whether the two adjacent sides of this shape are equal. 74 | Length of side A: |A| = sqrt((41.00-37.00)^2 + (43.00-34.00)^2) = sqrt((4)^2 + (9)^2) = sqrt(16 + 81) = sqrt(97). 75 | Length of side B: |B| = sqrt((37.00-41.00)^2 + (34.00-33.00)^2)) = sqrt((4)^2 + (1)^2) = sqrt(16 + 1) = sqrt(17). 76 | Length of side C: |C| = sqrt((41.00-45.00)^2 + (33.00-34.00)^2)) = sqrt((-4)^2 + (-1)^2) = sqrt(16 + 1) = sqrt(17). 77 | Length of side D: |D| = sqrt((45.00-41.00)^2 + (34.00-43.00)^2)) = sqrt((4)^2 + (-9)^2) = sqrt(16 + 81) = sqrt(97). 78 | Note that |A| = |D| and |B| = |C|. Furthermore, A and D are adjacent and B and C are adjacent. Thus, this polygon has two pairs of equal-length adjacent sides and is "kite". So the answer is (D). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/gsm8k_prompt_original.txt: -------------------------------------------------------------------------------- 1 | Question: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today? 2 | Let's think step by step 3 | There are 15 trees originally. 4 | Then there were 21 trees after some more were planted. 5 | So there must have been 21 - 15 = 6. 6 | The answer is 6. 7 | 8 | Question: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot? 9 | Let's think step by step 10 | There are originally 3 cars. 11 | 2 more cars arrive. 12 | 3 + 2 = 5. 13 | The answer is 5. 14 | 15 | Question: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total? 16 | Let's think step by step 17 | Originally, Leah had 32 chocolates. 18 | Her sister had 42. 19 | So in total they had 32 + 42 = 74. 20 | After eating 35, they had 74 - 35 = 39. 21 | The answer is 39. 22 | 23 | Question: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny? 24 | Let's think step by step 25 | Jason started with 20 lollipops. 26 | Then he had 12 after giving some to Denny. 27 | So he gave Denny 20 - 12 = 8. 28 | The answer is 8. 29 | 30 | Question: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now? 31 | Let's think step by step 32 | Shawn started with 5 toys. 33 | If he got 2 toys each from his mom and dad, then that is 4 more toys. 34 | 5 + 4 = 9. 35 | The answer is 9. 36 | 37 | Question: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room? 38 | Let's think step by step 39 | There were originally 9 computers. 40 | For each of 4 days, 5 more computers were added. 41 | So 5 * 4 = 20 computers were added. 42 | 9 + 20 is 29. 43 | The answer is 29. 44 | 45 | Question: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday? 46 | Let's think step by step 47 | Michael started with 58 golf balls. 48 | After losing 23 on tues- day, he had 58 - 23 = 35. 49 | After losing 2 more, he had 35 - 2 = 33 golf balls. 50 | The answer is 33. 51 | 52 | Question: Olivia has $23. She bought five bagels for $3 each. How much money does she have left? 53 | Let's think step by step 54 | Olivia had 23 dollars. 55 | 5 bagels for 3 dollars each will be 5 x 3 = 15 dollars. 56 | So she has 23 - 15 dollars left. 57 | 23 - 15 is 8. 58 | The answer is 8. 59 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/hyperbaton.txt: -------------------------------------------------------------------------------- 1 | Order adjectives correctly in English sentences. 2 | 3 | Q: Which sentence has the correct adjective order: 4 | Options: 5 | (A) rubber terrible ship 6 | (B) terrible rubber ship 7 | A: Let's think step by step. 8 | When there is more than one adjective before a noun, the adjectives need to respect the following order before a noun: "[1. opinion] [2. size] [3. age] [4. shape] [5. color] [6. origin] [7. material] [8. purpose] noun". 9 | Option (A): "rubber terrible ship". (1) rubber" falls into the material category. (2) "terrible" falls into the opinion category. Option (A) has the following adjective order: [7. material] [1. opinion] (or, in numeric terms, 7 1). Because 7 < 1 is not correct, (A) does not have the correct ordering. 10 | Option (B): "terrible rubber ship". Option (B) has the following adjective order: [1. opinion] [7. material] (or, in numeric terms, 1 7). Because 1 < 7 is correct, (B) has the correct ordering. So the answer is (B). 11 | 12 | Q: Which sentence has the correct adjective order: 13 | Options: 14 | (A) repulsive small Brazilian exercise ship 15 | (B) Brazilian repulsive exercise small ship 16 | A: Let's think step by step. 17 | When there is more than one adjective before a noun, the adjectives need to respect the following order before a noun: "[1. opinion] [2. size] [3. age] [4. shape] [5. color] [6. origin] [7. material] [8. purpose] noun". 18 | Option (A): "repulsive small Brazilian exercise ship". (1) "repulsive" falls into the opinion category. (2) "small" falls into the size category. (3) "Brazilian" falls into the origin category. (4) "exercise" falls into the purpose category. Option (A) has the following adjective order: [1. opinion] [2. size] [6. origin] [8. purpose] (or, in numeric terms, 1 2 6 8). Because 1 < 2 < 6 < 8 is correct, (A) has the correct ordering. 19 | Option (B): "Brazilian repulsive exercise small ship". Option (B) has the following adjective order: [6. origin] [1. opinion] [8. purpose] [2. size] (or, in numeric terms, 6 1 8 2). Because 6 < 1 < 8 < 2 is not correct, (B) does not have the correct ordering. So the answer is (A). 20 | 21 | Q: Which sentence has the correct adjective order: 22 | Options: 23 | (A) blue gold wonderful square shoe 24 | (B) wonderful square blue gold shoe 25 | A: Let's think step by step. 26 | When there is more than one adjective before a noun, the adjectives need to respect the following order before a noun: "[1. opinion] [2. size] [3. age] [4. shape] [5. color] [6. origin] [7. material] [8. purpose] noun". 27 | Option (A): "blue gold wonderful square shoe". (1) "blue" falls into the color category. (2) "gold" falls into the material category. (3) "wonderful" falls into the opinion category. (4) "square" falls into the shape category. The adjective order that Option (A) has is [5. color] [7. material] [1. opinion] [4. shape] (or, in numeric terms, 5 7 1 4). Because 5 < 7 < 1 < 4 is not correct, (A) does not have the correct ordering. 28 | Option (B): "wonderful square blue gold shoe". Option (B) has the following adjective order: [1. opinion] [4. shape] [5. color] [7. material] (or, in numeric terms, 1 4 5 7 ). Because 1 < 4 < 5 < 7 is correct, (B) has the correct ordering. So the answer is (B). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/logical_deduction_five_objects.txt: -------------------------------------------------------------------------------- 1 | A logical deduction task which requires deducing the order of a sequence of objects. 2 | 3 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy. 4 | Options: 5 | (A) Amy finished last 6 | (B) Eli finished last 7 | (C) Eve finished last 8 | A: Let's think step by step. 9 | (1) Eve finished above Amy: "(above) ? Eve ? Amy ? (below)". 10 | (2) Eli finished below Amy: "(above) ? Amy ? Eli ? (below)". 11 | (3) Combining (1) and (2) we get the following ordering: "(above) Eve Amy Eli (below)". 12 | According to this ordering, the person who finished last (the one at the bottom of this list) is Eli. 13 | Eli finished last. So the answer is (B). 14 | 15 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a white book, a green book, and an orange book. The green book is to the right of the white book. The orange book is the rightmost. 16 | Options: 17 | (A) The white book is the leftmost 18 | (B) The green book is the leftmost 19 | (C) The orange book is the leftmost 20 | A: Let's think step by step. 21 | (1) The green book is to the right of the white book: "(left) ? white ? green ? (right)". 22 | (2) The orange book is the rightmost: "(left) ? white ? green orange (right)". 23 | (3) Combining (1) and (2) we get the following ordering: "(left) white green orange (right)". 24 | According to this ordering, the leftmost book is the white book. 25 | The white book is the leftmost. So the answer is (A). 26 | 27 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a red book, a gray book, and a white book. The white book is to the left of the gray book. The red book is the second from the left. 28 | Options: 29 | (A) The red book is the leftmost 30 | (B) The gray book is the leftmost 31 | (C) The white book is the leftmost 32 | A: Let's think step by step. 33 | (1) The white book is to the left of the gray book: "(left) ? white ? gray ? (right)". 34 | (2) The red book is the second from the left: "(left) ? white red gray ? (right)". 35 | (3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)". 36 | According to this ordering, the leftmost book is the white book. 37 | The white book is the leftmost. So the answer is (C). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/logical_deduction_seven_objects.txt: -------------------------------------------------------------------------------- 1 | A logical deduction task which requires deducing the order of a sequence of objects. 2 | 3 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy. 4 | Options: 5 | (A) Amy finished last 6 | (B) Eli finished last 7 | (C) Eve finished last 8 | A: Let's think step by step. 9 | (1) Eve finished above Amy: "(above) ? Eve ? Amy ? (below)". 10 | (2) Eli finished below Amy: "(above) ? Amy ? Eli ? (below)". 11 | (3) Combining (1) and (2) we get the following ordering: "(above) Eve Amy Eli (below)". 12 | According to this ordering, the person who finished last (the one at the bottom of this list) is Eli. 13 | Eli finished last. So the answer is (B). 14 | 15 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a white book, a green book, and an orange book. The green book is to the right of the white book. The orange book is the rightmost. 16 | Options: 17 | (A) The white book is the leftmost 18 | (B) The green book is the leftmost 19 | (C) The orange book is the leftmost 20 | A: Let's think step by step. 21 | (1) The green book is to the right of the white book: "(left) ? white ? green ? (right)". 22 | (2) The orange book is the rightmost: "(left) ? white ? green orange (right)". 23 | (3) Combining (1) and (2) we get the following ordering: "(left) white green orange (right)". 24 | According to this ordering, the leftmost book is the white book. 25 | The white book is the leftmost. So the answer is (A). 26 | 27 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a red book, a gray book, and a white book. The white book is to the left of the gray book. The red book is the second from the left. 28 | Options: 29 | (A) The red book is the leftmost 30 | (B) The gray book is the leftmost 31 | (C) The white book is the leftmost 32 | A: Let's think step by step. 33 | (1) The white book is to the left of the gray book: "(left) ? white ? gray ? (right)". 34 | (2) The red book is the second from the left: "(left) ? white red gray ? (right)". 35 | (3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)". 36 | According to this ordering, the leftmost book is the white book. 37 | The white book is the leftmost. So the answer is (C). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/logical_deduction_three_objects.txt: -------------------------------------------------------------------------------- 1 | A logical deduction task which requires deducing the order of a sequence of objects. 2 | 3 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy. 4 | Options: 5 | (A) Amy finished last 6 | (B) Eli finished last 7 | (C) Eve finished last 8 | A: Let's think step by step. 9 | (1) Eve finished above Amy: "(above) ? Eve ? Amy ? (below)". 10 | (2) Eli finished below Amy: "(above) ? Amy ? Eli ? (below)". 11 | (3) Combining (1) and (2) we get the following ordering: "(above) Eve Amy Eli (below)". 12 | According to this ordering, the person who finished last (the one at the bottom of this list) is Eli. 13 | Eli finished last. So the answer is (B). 14 | 15 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a white book, a green book, and an orange book. The green book is to the right of the white book. The orange book is the rightmost. 16 | Options: 17 | (A) The white book is the leftmost 18 | (B) The green book is the leftmost 19 | (C) The orange book is the leftmost 20 | A: Let's think step by step. 21 | (1) The green book is to the right of the white book: "(left) ? white ? green ? (right)". 22 | (2) The orange book is the rightmost: "(left) ? white ? green orange (right)". 23 | (3) Combining (1) and (2) we get the following ordering: "(left) white green orange (right)". 24 | According to this ordering, the leftmost book is the white book. 25 | The white book is the leftmost. So the answer is (A). 26 | 27 | Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a red book, a gray book, and a white book. The white book is to the left of the gray book. The red book is the second from the left. 28 | Options: 29 | (A) The red book is the leftmost 30 | (B) The gray book is the leftmost 31 | (C) The white book is the leftmost 32 | A: Let's think step by step. 33 | (1) The white book is to the left of the gray book: "(left) ? white ? gray ? (right)". 34 | (2) The red book is the second from the left: "(left) ? white red gray ? (right)". 35 | (3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)". 36 | According to this ordering, the leftmost book is the white book. 37 | The white book is the leftmost. So the answer is (C). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/movie_recommendation.txt: -------------------------------------------------------------------------------- 1 | Recommend movies similar to the given list of movies. 2 | 3 | Q: Find a movie similar to Star Wars Episode IV - A New Hope, Indiana Jones and the Last Crusade, Star Wars Episode V - The Empire Strikes Back, The Big Lebowski: 4 | Options: 5 | (A) Tetsuo 6 | (B) the Ironman 7 | (C) The Princess Bride 8 | (D) The Barkley Marathons The Race That Eats Its Young 9 | (E) Bug 10 | A: Let's think step by step. 11 | - Star Wars Episode IV - A New Hope (action, adventure, fantasy; 1977) 12 | - Indiana Jones and the Last Crusade (action, adventure; 1989) 13 | - Star Wars Episode V - The Empire Strikes Back (action, adventure, fantasy; 1980) 14 | - The Big Lebowski (action, drama, comedy; 1998) 15 | These are all famous classic American movies produced before 2000. Amongst all the options, the only movie similar to these ones seems to be The Princess Bride (1987). So the answer is (C). 16 | 17 | Q: Find a movie similar to Twister, The Silence of the Lambs, Independence Day, Braveheart: 18 | Options: 19 | (A) They Shoot Horses 20 | (B) Don't They 21 | (C) Forrest Gump 22 | (D) The Salton Sea 23 | (E) Extreme Days 24 | A: Let's think step by step. 25 | - Twister (action, adventure, thriller; 1996) 26 | - The Silence of the Lambs (crime, drama, thriller; 1991) 27 | - Independence Day (action, science-fiction, drama; 1996) 28 | - Braveheart (biography, drama, epic; 1995) 29 | These are all famous Hollywood movies produced around the 1990s. Amongst all the options, the only movie similar to these ones seems to be Forrest Gump (comedy, drama, romance; 1994). So the answer is (C). 30 | 31 | Q: Find a movie similar to Minority Report, Total Recall, Inside Out, Forrest Gump: 32 | Options: 33 | (A) Phenomena 34 | (B) Lilting 35 | (C) Catwoman 36 | (D) Edge of Tomorrow 37 | A: Let's think step by step. 38 | - Minority Report (action, crime, mystery; 2002) 39 | - Total Recall (action, adventure, science-fiction; 2012) 40 | - Inside Out (animation, family, comedy; 2015) 41 | - Forrest Gump (comedy, drama, romance; 1994) 42 | These are all famous movies produced in the past few decades.Amongst all the options, the only movie similar to these ones seems to be Edge of Tomorrow (action, adventure, crime, mystery; 2014), as it is also a science-fiction movie and features Tom Cruise. So the answer is (D). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/multistep_arithmetic_two.txt: -------------------------------------------------------------------------------- 1 | Solve multi-step arithmetic problems. 2 | 3 | Q: ((-5 + 9 * -4 - 0) * (4 + -7 + 0 * -5)) = 4 | A: Let's think step by step. 5 | Let’s recall that the order of operations in mathematics is as follows: (1) Parentheses, (2) exponents, (3) multiplication and division (from left to right), (4) addition and multiplication (from left to right). So, remember to always compute the expressions inside parentheses or brackets first. 6 | This equation can be written as "A * B", where A = (-5 + 9 * -4 - 0) and B = (4 + -7 + 0 * -5). 7 | Let's calculate A = (-5 + 9 * -4 - 0) = (-5 + (9 * -4) - 0) = (-5 + (-36) - 0) = (-5 + -36 - 0) = -5 - 36 = -41. 8 | Let's calculate B = (4 + -7 + 0 * -5) = (4 + -7 + (0 * -5)) = (4 + -7 + 0) = (4 + -7) = (4 - 7) = -3. 9 | Then, the final equation is A * B = -41 * -3 = (-61) * (-3) = 123. So the answer is 123. 10 | 11 | Q: ((-9 * 7 * 7 * -9) + (4 * -9 - 8 - -4)) = 12 | A: Let's think step by step. 13 | Let’s recall that the order of operations in mathematics is as follows: (1) Parentheses, (2) exponents, (3) multiplication and division (from left to right), (4) addition and multiplication (from left to right). So, remember to always compute the expressions inside parentheses or brackets first. 14 | This equation can be written as "A + B", where A = (-9 * 7 * 7 * -9) and B = (4 * -9 - 8 - -4). 15 | Let's calculate A = (-9 * 7 * 7 * -9) = ((-9 * 7) * (7 * -9)) = ((-63) * (-63)) = 3969. 16 | Let's calculate B = (4 * -9 - 8 - (-4)) = ((4 * -9) - 8 - (-4)) = ((-36) - 8 - (-4)) = ((-36 - 8) - (-4)) = (-44 - (-4)) = -40. 17 | Then, the final equation is A + B = 3969 + -40 = 3969 - 40 = 3929. So the answer is 3929. 18 | 19 | Q: ((-3 + 5 * 8 * -4) - (9 - 8 * -7 + -9)) = 20 | A: Let's think step by step. 21 | Let’s recall that the order of operations in mathematics is as follows: (1) Parentheses, (2) exponents, (3) multiplication and division (from left to right), (4) addition and multiplication (from left to right). So, remember to always compute the expressions inside parentheses or brackets first. 22 | This equation can be written as "A - B", where A = (-3 + 5 * 8 * -4) and B = (9 - 8 * -7 + -9). 23 | Let's calculate A = (-3 + 5 * 8 * -4) = (-3 + (5 * 8) * -4) = (-3 + (40) * -4) = (-3 + (40 * -4)) = (-3 + -160) = -163. 24 | Let's calculate B = (9 - 8 * -7 + -9) = (9 - (8 * -7) + -9) = (9 - (-56) + -9) = ((9 - (-56)) + -9) = ((65) + -9)= (65 - 9) = 56. 25 | Then, the final equation is A - B = -163 - 56 = -219. So the answer is -219. -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/navigate.txt: -------------------------------------------------------------------------------- 1 | Given a series of navigation instructions, determine whether one would end up back at the starting point. 2 | 3 | Q: If you follow these instructions, do you return to the starting point? Turn left. Turn around. Turn left. Take 7 steps. Take 2 steps. Take 4 steps. Take 8 steps. 4 | Options: 5 | - Yes 6 | - No 7 | A: Let's think step by step. 8 | We start at the origin (0, 0), facing the positive y-axis. 9 | (1) Turn left: (0, 0), facing the negative x-axis. 10 | (2) Turn around: (0, 0), facing the positive x-axis. 11 | (3) Turn left: (0, 0), facing the positive y-axis. 12 | (4) Take 7 steps: (0, 7), facing the positive y-axis. 13 | (5) Take 2 steps: (0, 9), facing the positive y-axis. 14 | (6) Take 4 steps: (0, 13), facing the positive y-axis. 15 | (7) Take 8 steps: (0, 21), facing the positive y-axis. 16 | Since (0, 21) is not (0, 0), we are not where we started. So the answer is No. 17 | 18 | Q: If you follow these instructions, do you return to the starting point? Turn around. Take 1 step. Take 6 steps. Turn around. Take 6 steps. Take 9 steps. Take 1 step. 19 | Options: 20 | - Yes 21 | - No 22 | A: Let's think step by step. 23 | We start at the origin (0, 0), facing the positive y-axis. 24 | (1) Turn around: (0, 0), facing the negative y-axis. 25 | (2) Take 1 step: (0, -1), facing the negative y-axis. 26 | (3) Take 6 steps: (0, -7), facing the negative y-axis. 27 | (4) Turn around: (0, -7), facing the positive y-axis. 28 | (5) Take 6 steps: (0, -1), facing the positive y-axis. 29 | (6) Take 9 steps: (0, 8), facing the positive y-axis. 30 | (7) Take 1 step: (0, 9), facing the positive y-axis. 31 | Since (0, 9) is not (0, 0), we are not where we started. So the answer is No. 32 | 33 | Q: If you follow these instructions, do you return to the starting point? Always face forward. Take 2 steps right. Take 9 steps left. Take 7 steps right. 34 | Options: 35 | - Yes 36 | - No 37 | A: Let's think step by step. 38 | We start at the origin (0, 0), facing the positive y-axis. 39 | (1) Always face forward: (0, 0), facing the positive y-axis. 40 | (2) Take 2 steps right: (0, 2), facing the positive y-axis. 41 | (3) Take 9 steps left: (0, -7), facing the positive y-axis. 42 | (4) Take 7 steps right: (0, 7), facing the positive y-axis. 43 | Since (0, 0) is (0, 0), we are indeed where we started. So the answer is Yes. -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/object_counting.txt: -------------------------------------------------------------------------------- 1 | Questions that involve enumerating objects and asking the model to count them. 2 | 3 | Q: I have a blackberry, a clarinet, a nectarine, a plum, a strawberry, a banana, a flute, an orange, and a violin. How many fruits do I have? 4 | A: Let's think step by step. 5 | We first identify the fruits on the list and include their quantity in parentheses: 6 | - blackberry (1) 7 | - nectarine (1) 8 | - plum (1) 9 | - strawberry (1) 10 | - banana (1) 11 | - orange (1) 12 | Now, let's add the numbers in parentheses: 1 + 1 + 1 + 1 + 1 + 1 = 6. So the answer is 6. 13 | 14 | Q: I have an orange, a raspberry, two peaches, a blackberry, an apple, a grape, a nectarine, and three plums. How many fruits do I have? 15 | A: Let's think step by step. 16 | We first identify the fruits on the list and include their quantity in parentheses: 17 | - orange (1) 18 | - raspberry (1) 19 | - peaches (2) 20 | - blackberry (1) 21 | - apple (1) 22 | - grape (1) 23 | - nectarine (1) 24 | - plums (3) 25 | Now, let's add the numbers in parentheses: 1 + 1 + 2 + 1 + 1 + 1 + 1 + 3 = 11. So the answer is 11. 26 | 27 | Q: I have a lettuce head, a head of broccoli, an onion, a stalk of celery, two carrots, a garlic, and a yam. How many vegetables do I have? 28 | A: Let's think step by step. 29 | We first identify the vegetables on the list and include their quantity in parentheses: 30 | - lettuce (1) 31 | - broccoli (1) 32 | - onion (1) 33 | - celery (1) 34 | - carrots (2) 35 | - garlic (1) 36 | - yam (1) 37 | Now, let's add the numbers in parentheses: 1 + 1 + 1 + 1 + 2 + 1 + 1 = 8. So the answer is 8. -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/penguins_in_a_table.txt: -------------------------------------------------------------------------------- 1 | Answer questions about a table of penguins and their attributes. 2 | 3 | Q: Here is a table where the first line is a header and each subsequent line is a penguin: name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15 For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm. We now add a penguin to the table: 4 | James, 12, 90, 12 5 | How many penguins are less than 8 years old? 6 | Options: 7 | (A) 1 8 | (B) 2 9 | (C) 3 10 | (D) 4 11 | (E) 5 12 | A: Let's think step by step. 13 | This question focuses on age. We know the following: Louis is 7 years old, Bernard is 5 years old, Vincent is 9 years old, and Gwen is 8 years old. 14 | Now, we add James to this table: James is 12 years old. 15 | The penguins that are less than 8 years old are Louis and Bernard. 16 | There are 2 penguins less than 8 years old. So the answer is (B). 17 | 18 | Q: Here is a table where the first line is a header and each subsequent line is a penguin: name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15 For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm. Which is the youngest penguin? 19 | Options: 20 | (A) Louis 21 | (B) Bernard 22 | (C) Vincent 23 | (D) Gwen 24 | (E) James 25 | A: Let's think step by step. 26 | This question focuses on age. We know the following: Louis is 7 years old, Bernard is 5 years old, Vincent is 9 years old, and Gwen is 8 years old. 27 | According to the table, Bernard (5) is the youngest amongst them. 28 | The youngest penguin is Bernard. So the answer is (B). 29 | 30 | Q: Here is a table where the first line is a header and each subsequent line is a penguin: name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15 For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm. What is the name of the second penguin sorted by alphabetic order? 31 | Options: 32 | (A) Louis 33 | (B) Bernard 34 | (C) Vincent 35 | (D) Gwen 36 | (E) James 37 | A: Let's think step by step. 38 | This question focuses on the name. We know the following: The names of the penguin in the table are Louis, Bernard, Vincent, and Gwen. 39 | When we sort their names alphabetically, we get Bernard, Gwen, Louis, Vincent. 40 | The name of the second penguin sorted by alphabetical order is Gwen. 41 | The name of the second penguin sorted by alphabetic order is Gwen. So the answer is (D). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/prompt_8x8step.txt: -------------------------------------------------------------------------------- 1 | Question: Janet hires six employees. Four of them are warehouse workers who make $15/hour, and the other two are managers who make $20/hour. Janet has to pay 10% of her workers' salaries in FICA taxes. If everyone works 25 days a month and 8 hours a day, how much does Janet owe total for their wages and taxes for one month? 2 | Let's think step by step 3 | First figure out how many hours each worker works per month by multiplying the number of days they work by the number of hours a day they work: 25 days * 8 hours/day = 200 hours 4 | Then calculate how much one warehouse worker makes per month by multiplying their hourly rate by the number of hours they work: 200 hours * $15/hour = $3000 5 | Then multiply that number by 4 to find out how much all the warehouse workers make: $3000/worker * 4 workers = $12,000 6 | Now multiply the hours each manager works (also 200) by their hourly wage to find out how much one manager makes per month: 200 hours * $20/hour = $4,000 7 | Now multiply one manager's wages by the number of managers (2) to find their total wage amount: $4,000/manager * 2 managers = $8,000 8 | Now add the wages for the managers and the workers to find the total cost of the wages: $8,000 + $12,000 = $20,000 9 | Now multiply the total wage bill by 10% to find how much the FICA taxes are: $20,000 * .1 = $2,000 10 | Now add the total wage bill to the total tax amount to find the grand total: $2,000 + $20,000 = $22,000 11 | The answer is 22000 12 | 13 | Question: In a graveyard, there are 20 skeletons. Half of these skeletons are adult women, and the remaining number are split evenly between adult men and children. If an adult woman has 20 bones in their body, and a male has 5 more than this, and a child has half as many as an adult woman, how many bones are in the graveyard? 14 | Let's think step by step 15 | We first need to figure out how many of each type of skeleton there are. Since half the 20 skeletons are adult women, that means there are 20/2=10 adult women's skeletons. 16 | The remaining half, 10, is split between adult men and children, meaning there are 10/2= 5 of each. 17 | Since an adult woman has 20 bones in their body, this means that the 10 skeletons have 20*10=200 bones in total. 18 | Since an adult man has 5 more bones in their body than an adult woman, this means they have 20+5= 25 bones in their body. 19 | There are 5 adult men, meaning there are 25*5=125 bones in the adult male skeletons in total. 20 | Since a child has half as many bones as the 20 in an adult woman, this means the children have 20/2= 10 bones in their body. 21 | Since there are 5 children, this means there are 5*10= 50 bones in the children's skeletons in total. 22 | Therefore, altogether there are 50+125+200= 375 bones in total in the graveyard. 23 | The answer is 375 24 | 25 | Question: Last month, a factory made 12000 dolls and their associated accessories. The accessories for each doll included 2 shoes, 3 bags, 1 set of cosmetics, and 5 hats. If each doll took 45 seconds to make and each accessory took 10 seconds to make, what was the total combined machine operation time, in seconds, required to manufacture all of the dolls and their accessories? 26 | Let's think step by step 27 | The number of shoes the factory made was 12000 dolls x 2 shoes/doll = 24000 shoes 28 | The factory also made for each doll 12000 dolls x 3 bags/doll = 36000 bags 29 | The set of cosmetics made for each doll was just one, so there are 12000 cosmetics sets 30 | Finally, the number of hats for the dolls was 12000 dolls x 5 hats/doll = 60000 hats 31 | In total, the number of accessories was 24000 + 36000 + 12000 + 60000 = 132000 accessories 32 | The dolls took in total 12000 dolls x 45 seconds/doll = 540000 seconds to be made 33 | All the accessories also took 132000 accessories x 10 seconds/accessory = 1320000 seconds 34 | Combing both results, we have that the total time the machinery was working was 1320000 + 540000 = 1860000 seconds 35 | The answer is 1860000 36 | 37 | Question: Tina makes $18.00 an hour. If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage. If she works 10 hours every day for 5 days, how much money does she make? 38 | Let's think step by step 39 | She works 8 hours a day for $18 per hour so she makes 8*18 = $144.00 per 8-hour shift 40 | She works 10 hours a day and anything over 8 hours is eligible for overtime, so she gets 10-8 = 2 hours of overtime 41 | Overtime is calculated as time and a half so and she makes $18/hour so her overtime pay is 18*.5 = $9.00 42 | Her overtime pay is 18+9 = $27.00 43 | Her base pay is $144.00 per 8-hour shift and she works 5 days and makes 5 * $144 = $720.00 44 | Her overtime pay is $27.00 per hour and she works 2 hours of overtime per day and makes 27*2 = $54.00 in overtime pay 45 | 2 hours of overtime pay for 5 days means she makes 54*5 = $270.00 46 | In 5 days her base pay is $720.00 and she makes $270.00 in overtime pay so she makes $720 + $270 = $990.00 47 | The answer is 990 48 | 49 | Question: A printing press is printing brochures. The press prints 20 single-page spreads, and twice as many double-page spreads. For each 4 pages printed for the spreads, the press prints a block of 4 ads, each of which take up a quarter of a page. The brochures can be arranged in any order as long as they are made up of 5 pages each. How many brochures is the printing press creating? 50 | Let's think step by step 51 | There are a total of 20 single-page spreads * 2 = 40 double-page spreads. 52 | As these are made up of 2 pages each, there are 40 double-page spreads * 2 pages = 80 pages in the double-page spreads. 53 | In total, there are 20 single-pages + 80 double-pages = 100 pages in the spreads. 54 | A block of ads is printed every 4 pages, so there will be 100 pages / 4 pages/block = 25 blocks of ads. 55 | This is a total of 25 blocks * 4 ads = 100 ads. 56 | As each ad takes up a quarter of a page, this must create an additional 100 ads * 0.25 = 25 pages. 57 | Therefore, the press has printed 100 pages from the spreads + 25 pages of ads = 125 pages. 58 | Since brochures are made up of 5 pages each, this creates a total of 125 pages / 5 = 25 brochures. 59 | The answer is 25 60 | 61 | Question: A spaceship is traveling to another planet. The spaceship travels at a consistent speed its entire journey unless it is stopped for the crew’s break. After launch, the spaceship traveled for 10 hours then stopped for 3 hours. It then traveled for another 10 hours then stopped for 1 hour. After this, the spaceship would take an hour’s break after every 11 hours of traveling and maintained this routine until the end of its journey. If the entire journey took 3 days then how long, in hours, was the spaceship not moving? 62 | Let's think step by step 63 | On the first day, the spaceship spent a total of 10 hours + 10 hours = 20 hours traveling. 64 | On the first day, the spaceship spent a total of 3 hours + 1 hour = 4 hours not moving. 65 | This is a total of 20 hours traveling + 4 hours not moving = 24 hours or 1 day. 66 | So the rest of the journey took place over 3 days journey – 1 first day = 2 days. 67 | This is the same as 2 days * 24 hours = 48 hours. 68 | In these 2 days, the spaceship’s time is split into 11 hours traveling + 1 hour break = 12 hour blocks. 69 | So in the last 2 days, the spaceship wasn't moving for a total of 48 hours / 12 hour blocks = 4 hours. 70 | Adding this to breaks from the first day shows that the spaceship was not moving for a total of 4 + 4 = 8 hours. 71 | The answer is 8 72 | 73 | Question: Alec is running for Class President. He thinks that if he can get three-quarters of the class to vote for him then there is no chance anyone else can beat him. Half of the class have already said they will vote for him but out of the remaining students, only 5 have said they are thinking about voting for him. He surveys the students who are thinking about voting for someone else, and changes his flyers to reflect the issues these students are concerned about. This results in a fifth of these students saying they'll vote for him. If Alec's class has 60 students and everyone who said they will vote for him does so, how many more votes does Alec need to reach his goal number of votes? 74 | Let's think step by step 75 | To calculate Alec's goal number of votes, we need to know that 60 students / 4 = 15 students is equal to one-quarter of the class students. 76 | Alec's goal is therefore 15 students * 3 quarters = 45 votes. 77 | Half of the class said they will vote for him, so there are already 60 students / 2 = 30 votes. 78 | Another 5 students are thinking about voting for him which leaves a total so far of 30 + 5 = 35 votes. 79 | This means there are 60 students - 35 voting for Alec = 25 students not voting for Alec. 80 | A fifth of these decided to vote, so this is a further 25 students / 5 = 5 votes. 81 | Alec is therefore receiving a total of 35 + 5 = 40 votes. 82 | So he has missed his goal by 45 goal votes - 40 actual votes = 5 votes. 83 | The answer is 5 84 | 85 | Question: A curry house sells curries that have varying levels of spice. Recently, a lot of the customers have been ordering very mild curries and the chefs have been having to throw away some wasted ingredients. To reduce cost and food wastage, the curry house starts monitoring how many ingredients are actually being used and changes their spending accordingly. The curry house needs 3 peppers for very spicy curries, 2 peppers for spicy curries, and only 1 pepper for mild curries. After adjusting their purchasing, the curry house now buys the exact amount of peppers they need. Previously, the curry house was buying enough peppers for 30 very spicy curries, 30 spicy curries, and 10 mild curries. They now buy enough peppers for 15 spicy curries and 90 mild curries. They no longer sell very spicy curries. How many fewer peppers does the curry house now buy? 86 | Let's think step by step 87 | The curry house previously bought 3 peppers per very spicy curry * 30 very spicy curries = 90 peppers for very spicy curries. 88 | They also bought 2 peppers per spicy curry * 30 spicy curries = 60 peppers for spicy curries. 89 | They also bought 1 pepper per mild curry * 10 mild curries = 10 peppers for mild curries. 90 | So they were previously buying 90 + 60 + 10 = 160 peppers. 91 | They now buy 2 peppers per spicy curry * 15 spicy curries = 30 peppers for spicy curries. 92 | They also now buy 1 pepper per mild curry * 90 mild curries = 90 peppers for mild curries. 93 | So they now buy 30 + 90 = 120 peppers. 94 | This is a difference of 160 peppers bought originally - 120 peppers bought now = 40 peppers. 95 | The answer is 40 -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/reasoning_about_colored_objects.txt: -------------------------------------------------------------------------------- 1 | Answer extremely simple questions about the colors of objects on a surface. 2 | 3 | Q: On the nightstand, there is a red pencil, a purple mug, a burgundy keychain, a fuchsia teddy bear, a black plate, and a blue stress ball. What color is the stress ball? 4 | Options: 5 | (A) red 6 | (B) orange 7 | (C) yellow 8 | (D) green 9 | (E) blue 10 | (F) brown 11 | (G) magenta 12 | (H) fuchsia 13 | (I) mauve 14 | (J) teal 15 | (K) turquoise 16 | (L) burgundy 17 | (M) silver 18 | (N) gold 19 | (O) black 20 | (P) grey 21 | (Q) purple 22 | (R) pink 23 | A: Let's think step by step. 24 | According to this question, the color of the stress ball is blue. So the answer is (E). 25 | 26 | Q: On the table, you see a bunch of objects arranged in a row: a purple paperclip, a pink stress ball, a brown keychain, a green scrunchiephone charger, a mauve fidget spinner, and a burgundy pen. What is the color of the object directly to the right of the stress ball? 27 | Options: 28 | (A) red 29 | (B) orange 30 | (C) yellow 31 | (D) green 32 | (E) blue 33 | (F) brown 34 | (G) magenta 35 | (H) fuchsia 36 | (I) mauve 37 | (J) teal 38 | (K) turquoise 39 | (L) burgundy 40 | (M) silver 41 | (N) gold 42 | (O) black 43 | (P) grey 44 | (Q) purple 45 | (R) pink 46 | A: Let's think step by step. 47 | According to this question, the objects are arranged in a row, from left to right, as follows: (1) a purple paperclip, (2) a pink stress ball, (3) a brown keychain, (4) a green scrunchiephone charger, (5) a mauve fidget spinner, (6) a burgundy pen. 48 | The stress ball is the second object on the list, namely (2). The object that is to the right of the stress ball corresponds to (3), which is a brown keychain. 49 | The color of the keychain is brown. So the answer is (F). 50 | 51 | Q: On the nightstand, you see the following items arranged in a row: a teal plate, a burgundy keychain, a yellow scrunchiephone charger, an orange mug, a pink notebook, and a grey cup. How many non-orange items do you see to the left of the teal item? 52 | Options: 53 | (A) zero 54 | (B) one 55 | (C) two 56 | (D) three 57 | (E) four 58 | (F) five 59 | (G) six 60 | A: Let's think step by step. 61 | According to this question, the objects are arranged in a row, from left to right, as follows: (1) a teal plate, (2) a burgundy keychain, (3) a yellow scrunchiephone charger, (4) an orange mug, (5) a pink notebook, (6) a grey cup. 62 | The teal plate is the first item, namely (1). There is no item to the left of the teal item. 63 | The number of non-orange items to the left of the teal item is zero. So the answer is (A). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/ruin_names.txt: -------------------------------------------------------------------------------- 1 | Select the humorous edit that 'ruins' the input movie or musical artist name. 2 | 3 | Q: Which of the following is a humorous edit of this artist or movie name: 'whitesnake'? 4 | Options: 5 | (A) whitesnape 6 | (B) whitesnapke 7 | (C) whitesnuake 8 | (D) mwhitesnake 9 | A: Let's think step by step. 10 | The original name is "whitesnake". This is the name of an old English hard rock band. It is a compound word, formed by the words "white" and "snake". 11 | (A) "whitesnape": It is formed by the combination of "white" and "snake"; therefore, "snake" has been changed to "snape". Snape makes a reference to the fictional character Severus Snape in the Harry Potter series, so (A) is indeed a meaningful and funny edit. 12 | (B) "whitesnapke": It is formed by the combination of "white" and "snapke", but "snapke" is not an actual word; therefore, "whitesnapke" is not humorous. 13 | (C) "whitesnuake": It is formed by the combination of "white" and "snuake", but "snuake" is not an actual word; therefore, "whitesnuake" is not humorous. 14 | (D) "mwhitesnake": It is formed by the combination of "m", "white", and "snake", but the prefix "-m "seems arbitrary; therefore, "mwhitesnake" is not meaningful or humorous. 15 | Above the above, the only humorous edit is (A). So the answer is (A). 16 | 17 | Q: Which of the following is a humorous edit of this artist or movie name: 'one of our dinosaurs is missing'? 18 | Options: 19 | (A) ofne of our dinosaurs is missing 20 | (B) one af our dinosaurs is missing 21 | (C) one of our dinosaurs is pissing 22 | (D) one of our dinosaur is missing 23 | A: Let's think step by step. 24 | The original name is "one of our dinosaurs is missing". This is the name of an old British movie. 25 | (A) "ofne of our dinosaurs is missing": Here "one of" is changed to "ofne", but the word "ofne" is not an actual word. 26 | (B) "one af our dinosaurs is missing": Here the word "of" is changed to "af", but the word "af" is not an actual word. 27 | (C) "one of our dinosaurs is pissing": Here the word "missing" is changed to "pissing", and "one of our dinosaurs is pissing" is indeed a very whimsical and mischievous edit. This change truly ruins the original title of the movie. 28 | (D) "one of our dinosaur is missing": Here the word "dinosaurs" is changed to "dinosaur", but "dinosaur" is singular but should be plural in the title; this change therefore feels arbitrary and not humorous. 29 | Above the above, the only humorous edit is (C). 30 | Above the above, the only humorous edit is (C). So the answer is (C). 31 | 32 | Q: Which of the following is a humorous edit of this artist or movie name: 'counting crows'? 33 | Options: 34 | (A) countingy crows 35 | (B) counting cows 36 | (C) courting crows 37 | (D) coutnting crows 38 | A: Let's think step by step. 39 | The original name is "counting crows". This is the name of an American rock band. Historically, the band name comes from the British nursery rhyme "One for Sorrow", which is about counting of magpies. 40 | (A) "countingy crows": Here the word "counting" is changed to "countingy", but the word "countingy" is not an actual word. 41 | (B) "counting cows": Here the word "crows" is changed to "cows", and this is indeed a playful and meaningful edit that ruins the original name of the band. 42 | (C) "courting crows": Here the word "counting" is changed to "courting", and "courting" is an actual word; however, "courting crows" does not sound as humorous as "counting cows". 43 | (D) "coutnting crows": Here the word "counting" is changed to "coutnting", but the word "coutnting" is not an actual word. 44 | Above the above, the only humorous edit is (B). So the answer is (B). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/salient_translation_error_detection.txt: -------------------------------------------------------------------------------- 1 | Detect the type of error in an English translation of a German source sentence. 2 | 3 | Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error. Source: In der Liste der Baudenkmale in Lenzen (Elbe) sind alle Baudenkmale der brandenburgischen Stadt Lenzen (Elbe) und ihrer Ortsteile aufgelistet. 4 | Translation: In the list of architectural monuments in Lenzen all architectural monuments of the Brandenburg city of Lenzen and its districts are listed. 5 | The translation contains an error pertaining to 6 | Options: 7 | (A) Modifiers or Adjectives 8 | (B) Numerical Values 9 | (C) Negation or Antonyms 10 | (D) Named Entities 11 | (E) Dropped Content 12 | (F) Facts 13 | A: Let's think step by step. 14 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "The list of monuments in Lenzen (Elbe) includes all the monuments in the Brandenburg town of Lenzen (Elbe) and its districts." On the other hand, the provided translation is "In the list of architectural monuments in Lenzen all architectural monuments of the Brandenburg city of Lenzen and its districts are listed." Note that Lenzen (Elbe) is changed to Lenzen in the original translation; so, there is a named entity error. Because an entity in the original source sentence is changed to a different entity in the translation, the translation contains an error pertaining to Named Entities. So the answer is (D). 15 | 16 | Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error. Source: Auf dieser Seite sind die Baudenkmäler der oberbayerischen Großen Kreisstadt Landsberg am Lech zusammengestellt. 17 | Translation: On this page are compiled the architectural monuments of the town of Landsberg am Lech. 18 | The translation contains an error pertaining to 19 | Options: 20 | (A) Modifiers or Adjectives 21 | (B) Numerical Values 22 | (C) Negation or Antonyms 23 | (D) Named Entities 24 | (E) Dropped Content 25 | (F) Facts 26 | A: Let's think step by step. 27 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "The monuments of the Upper Bavarian district town of Landsberg am Lech are compiled on this page." On the other hand, the provided translation is "On this page are compiled the architectural monuments of the town of Landsberg am Lech." Note that an important detail about the location of Landsberg am Lech is omitted in the original translation: The translation should have said "Upper Bavarian district town of Landsberg am Lech". Because a significant clause in the translation was removed, the translation contains an error pertaining to Dropped Content. So the answer is (E). 28 | 29 | Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error. Source: Łeba ist eine Kleinstadt und ein Badeort im Powiat Lęborski der polnischen Woiwodschaft Pommern. 30 | Translation: Eba is not a small town and seaside resort in the Powiat Léborski county of the Pomeranian Voivodeship of Poland. 31 | The translation contains an error pertaining to 32 | Options: 33 | (A) Modifiers or Adjectives 34 | (B) Numerical Values 35 | (C) Negation or Antonyms 36 | (D) Named Entities 37 | (E) Dropped Content 38 | (F) Facts 39 | A: Let's think step by step. 40 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "Łeba is a small town and seaside resort in the Powiat Lęborski of the Polish Pomeranian Voivodeship." On the other hand, the provided translation is "Łeba is not a small town and seaside resort in the Powiat Léborski county of the Pomeranian Voivodeship of Poland." Note that the provided sentence says, "Łeba is not a small town ..." However, the translation should have been "Łeba is a small town ..." Because a negation is introduced at the beginning of the sentence and has fundamentally changed the meaning of the original source, the translation contains an error pertaining to Negation or Antonyms. So the answer is (C). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/snarks.txt: -------------------------------------------------------------------------------- 1 | Determine which of two sentences is sarcastic. 2 | 3 | According to Cambridge University Dictionary, sarcasm is "the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way." Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks. 4 | 5 | Q: Which statement is sarcastic? 6 | Options: 7 | (A) Yes, because having interests and actively researching them is a huge waste 8 | (B) Yes, because having interests and actively researching them is a huge deal 9 | A: Let's think step by step. 10 | If we look at (A), it says that having interests and actively researching them is a huge waste, implying that it is a useless effort. However, we know that having interests and actively researching them is typically not a waste but rather is beneficial to the individual. The presence of such a juxtaposition in (A) suggests that it contains a taste of irony and sarcasm. 11 | If we look at (B), it says that having interests and actively researching them is a huge deal, implying that it is an important and consequential effort. This is arguably a neutral and correct statement. 12 | Above the above, the sarcastic option is (A). So the answer is (A). 13 | 14 | Q: Which statement is sarcastic? 15 | Options: 16 | (A) No one is going to disagree with you on this. Avoiding ad hominem attacks really help your case 17 | (B) No one is going to disagree with you on this. Ad hominem attacks really help your case 18 | A: Let's think step by step. 19 | If we look at (A), it says that avoiding ad hominem attacks really help your case, implying that ad hominem attacks are adverse and injurious. Because ad hominem attacks are adressed at a person rather than an idea, it is indeed true that avoiding them is often useful and helpful; so, (A) is a neutral (valid and agreeable) statement. 20 | If we look at (B), it says that ad hominem attacks really help your case, implying that ad hominem attacks are a positive thing. However, we stated previously that ad hominem attacks are often not useful or constructive. The speaker in this sentence therefore seems to mean the opposite of what they are saying; so, there appears to have a taste of irony and sarcasm in (B). 21 | Above the above, the sarcastic option is (B). So the answer is (B). 22 | 23 | Q: Which statement is sarcastic? 24 | Options: 25 | (A) Consistency in the league's punishments? What do you think this is supposed to be, politics? 26 | (B) Consistency in the league's punishments? What do you think this is supposed to be, moral? 27 | A: Let's think step by step. 28 | If we look at (A), it likens the consistency in the league's punishments with that in politics. Because politics or political affairs are often not considered to be consistent or dependable, this sentence appears to be satirical. 29 | If we look at (B), it likens the consistency in the league's punishments with that in morality. Discussing the consistency of the league's punishments in the context of morality, ethics, or law makes sense and does not appear to make a satirical point about anything. 30 | Above the above, the sarcastic option is (A). So the answer is (A). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/sports_understanding.txt: -------------------------------------------------------------------------------- 1 | Determine whether an artificially constructed sentence relating to sports is plausible or not. 2 | 3 | Q: Is the following sentence plausible? "Bam Adebayo scored a reverse layup in the Western Conference Finals." 4 | A: Let's think step by step. Bam Adebayo is an American basketball player. Scoring a reverse layup in the Western Conference Finals is part of the NBA Finals. So the answer is yes. 5 | 6 | Q: Is the following sentence plausible? "Santi Cazorla scored a touchdown." 7 | A: Let's think step by step. Santi Cazorla is a soccer player. Touchdown is part of American football and rugby. So the answer is no. 8 | 9 | Q: Is the following sentence plausible? "DeMar DeRozan was called for the goal tend." 10 | A: Let's think step by step. DeMar DeRozan is an American basketball player. Goal tending is part of basketball. So the answer is yes. -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/temporal_sequences.txt: -------------------------------------------------------------------------------- 1 | Task description: Answer questions about which times certain events could have occurred. 2 | 3 | Q: Today, Emily went to the museum. Between what times could they have gone? 4 | We know that: 5 | Emily woke up at 1pm. 6 | Elizabeth saw Emily reading at the library from 2pm to 4pm. 7 | Jessica saw Emily watching a movie at the theater from 4pm to 5pm. 8 | Leslie saw Emily waiting at the airport from 5pm to 6pm. 9 | William saw Emily buying clothes at the mall from 6pm to 7pm. 10 | The museum was closed after 7pm. 11 | Between what times could Emily have gone to the museum? 12 | Options: 13 | (A) 1pm to 2pm 14 | (B) 6pm to 7pm 15 | (C) 5pm to 6pm 16 | (D) 2pm to 4pm 17 | A: Let's think step by step. 18 | Wake-up time: 1pm. 19 | 1pm-2pm: free. 20 | 2pm-4pm: reading at the library. 21 | 4pm-5pm: watching a movie at the theater. 22 | 5pm-6pm: waiting at the airport. 23 | 6pm-7pm: buying clothes at the mall. 24 | The museum closure time: 7pm. 25 | The only time when Emily could have gone to the museum was 1pm to 2pm. So the answer is (A). 26 | 27 | Q: Today, Elizabeth went to the amusement park. Between what times could they have gone? 28 | We know that: 29 | Elizabeth woke up at 7am. 30 | David saw Elizabeth fixing their computer at the electronic store from 1pm to 2pm. 31 | Sarah saw Elizabeth playing tennis at the tennis court from 2pm to 3pm. 32 | Susan saw Elizabeth walking towards the Statue of Liberty from 3pm to 6pm. 33 | Andrew saw Elizabeth taking photos near the Eiffel Tower from 6pm to 9pm. 34 | Emily saw Elizabeth getting a coffee at the cafe from 9pm to 10pm. 35 | The amusement park was closed after 10pm. 36 | Between what times could Elizabeth have gone to the amusement park? 37 | Options: 38 | (A) 7am to 1pm 39 | (B) 9pm to 10pm 40 | (C) 1pm to 2pm 41 | (D) 3pm to 6pm 42 | A: Let's think step by step. 43 | Wake-up time: 7am. 44 | 7am-1pm: free. 45 | 1pm-2pm: fixing their computer at the electronic store. 46 | 2pm-3pm: playing tennis at the tennis court. 47 | 3pm-6pm: walking towards the Statue of Liberty. 48 | 6pm-9pm: taking photos near the Eiffel Tower. 49 | 9pm-10pm: getting a coffee at the cafe. 50 | The amusement park closure time: 10pm. 51 | The only time when Elizabeth could have gone to the amusement park was 7am to 1pm. So the answer is (A). 52 | 53 | Q: Today, Tiffany went to the beach. Between what times could they have gone? 54 | We know that: 55 | Tiffany woke up at 5am. 56 | Betty saw Tiffany getting a coffee at the cafe from 5am to 6am. 57 | Jessica saw Tiffany working at the office from 6am to 9am. 58 | John saw Tiffany stretching at a yoga studio from 9am to 12pm. 59 | Sean saw Tiffany sitting on a rooftop from 12pm to 2pm. 60 | Sarah saw Tiffany playing tennis at the tennis court from 2pm to 3pm. 61 | The beach was closed after 4pm. 62 | Between what times could Tiffany have gone to the beach? 63 | Options: 64 | (A) 9am to 12pm 65 | (B) 12pm to 2pm 66 | (C) 5am to 6am 67 | (D) 3pm to 4pm 68 | A: Let's think step by step. 69 | Wake-up time: 5am. 70 | 5am-6am: getting a coffee at the cafe. 71 | 6am-9am: working at the office. 72 | 9am-12pm: stretching at a yoga studio. 73 | 12pm-2pm: sitting on a rooftop. 74 | 2pm-3pm: playing tennis at the tennis court. 75 | 3pm-4pm: free. 76 | The beach closure time: 4pm. 77 | The only time when Tiffany could have gone to the beach was 3pm to 4pm. So the answer is (D). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/tracking_shuffled_objects_five_objects.txt: -------------------------------------------------------------------------------- 1 | A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps. 2 | 3 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball. 4 | As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the 5 | Options: 6 | (A) yellow ball 7 | (B) blue ball 8 | (C) pink ball 9 | A: Let's think step by step. 10 | (0) At the start: Alice: yellow, Bob: blue, Claire: pink. 11 | (1) Claire and Alice swap balls: Alice: pink, Bob: blue, Claire: yellow. 12 | (2) Alice and Bob swap balls: Alice: blue, Bob: pink, Claire: yellow. 13 | (3) Claire and Bob swap balls: Alice: blue, Bob: yellow, Claire: pink. 14 | At the end of the game, Bob has the yellow ball. So the answer is (A). 15 | 16 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a white ball, Bob has a purple ball, and Claire has a pink ball. 17 | As the game progresses, pairs of players trade balls. First, Bob and Alice swap balls. Then, Bob and Claire swap balls. Finally, Bob and Alice swap balls. At the end of the game, Alice has the 18 | Options: 19 | (A) white ball 20 | (B) purple ball 21 | (C) pink ball 22 | A: Let's think step by step. 23 | (0) At the start: Alice: white, Bob: purple, Claire: pink. 24 | (1) Bob and Alice swap balls: Alice: purple, Bob: white, Claire: pink. 25 | (2) Bob and Claire swap balls: Alice: purple, Bob: pink, Claire: white. 26 | (3) Bob and Alice swap balls: Alice: pink, Bob: purple, Claire: white. 27 | At the end of the game, Alice has the pink ball. So the answer is (C). 28 | 29 | Q: Alice, Bob, and Claire are dancers at a square dance. At the start of a song, they each have a partner: Alice is dancing with Lola, Bob is dancing with Rodrigo, and Claire is dancing with Patrick. 30 | Throughout the song, the dancers often trade partners. First, Alice and Bob switch partners. Then, Claire and Bob switch partners. Finally, Bob and Alice switch partners. At the end of the dance, Alice is dancing with 31 | Options: 32 | (A) Lola 33 | (B) Rodrigo 34 | (C) Patrick 35 | A: Let's think step by step. 36 | (0) At the start: Alice: Lola, Bob: Rodrigo, Claire: Patrick. 37 | (1) Alice and Bob switch partners: Alice: Rodrigo, Bob: Lola, Claire: Patrick. 38 | (2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola. 39 | (3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola. 40 | At the end of the dance, Alice is dancing with Patrick. So the answer is (C). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/tracking_shuffled_objects_seven_objects.txt: -------------------------------------------------------------------------------- 1 | A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps. 2 | 3 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball. 4 | As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the 5 | Options: 6 | (A) yellow ball 7 | (B) blue ball 8 | (C) pink ball 9 | A: Let's think step by step. 10 | (0) At the start: Alice: yellow, Bob: blue, Claire: pink. 11 | (1) Claire and Alice swap balls: Alice: pink, Bob: blue, Claire: yellow. 12 | (2) Alice and Bob swap balls: Alice: blue, Bob: pink, Claire: yellow. 13 | (3) Claire and Bob swap balls: Alice: blue, Bob: yellow, Claire: pink. 14 | At the end of the game, Bob has the yellow ball. So the answer is (A). 15 | 16 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a white ball, Bob has a purple ball, and Claire has a pink ball. 17 | As the game progresses, pairs of players trade balls. First, Bob and Alice swap balls. Then, Bob and Claire swap balls. Finally, Bob and Alice swap balls. At the end of the game, Alice has the 18 | Options: 19 | (A) white ball 20 | (B) purple ball 21 | (C) pink ball 22 | A: Let's think step by step. 23 | (0) At the start: Alice: white, Bob: purple, Claire: pink. 24 | (1) Bob and Alice swap balls: Alice: purple, Bob: white, Claire: pink. 25 | (2) Bob and Claire swap balls: Alice: purple, Bob: pink, Claire: white. 26 | (3) Bob and Alice swap balls: Alice: pink, Bob: purple, Claire: white. 27 | At the end of the game, Alice has the pink ball. So the answer is (C). 28 | 29 | Q: Alice, Bob, and Claire are dancers at a square dance. At the start of a song, they each have a partner: Alice is dancing with Lola, Bob is dancing with Rodrigo, and Claire is dancing with Patrick. 30 | Throughout the song, the dancers often trade partners. First, Alice and Bob switch partners. Then, Claire and Bob switch partners. Finally, Bob and Alice switch partners. At the end of the dance, Alice is dancing with 31 | Options: 32 | (A) Lola 33 | (B) Rodrigo 34 | (C) Patrick 35 | A: Let's think step by step. 36 | (0) At the start: Alice: Lola, Bob: Rodrigo, Claire: Patrick. 37 | (1) Alice and Bob switch partners: Alice: Rodrigo, Bob: Lola, Claire: Patrick. 38 | (2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola. 39 | (3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola. 40 | At the end of the dance, Alice is dancing with Patrick. So the answer is (C). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/tracking_shuffled_objects_three_objects.txt: -------------------------------------------------------------------------------- 1 | A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps. 2 | 3 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball. 4 | As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the 5 | Options: 6 | (A) yellow ball 7 | (B) blue ball 8 | (C) pink ball 9 | A: Let's think step by step. 10 | (0) At the start: Alice: yellow, Bob: blue, Claire: pink. 11 | (1) Claire and Alice swap balls: Alice: pink, Bob: blue, Claire: yellow. 12 | (2) Alice and Bob swap balls: Alice: blue, Bob: pink, Claire: yellow. 13 | (3) Claire and Bob swap balls: Alice: blue, Bob: yellow, Claire: pink. 14 | At the end of the game, Bob has the yellow ball. So the answer is (A). 15 | 16 | Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a white ball, Bob has a purple ball, and Claire has a pink ball. 17 | As the game progresses, pairs of players trade balls. First, Bob and Alice swap balls. Then, Bob and Claire swap balls. Finally, Bob and Alice swap balls. At the end of the game, Alice has the 18 | Options: 19 | (A) white ball 20 | (B) purple ball 21 | (C) pink ball 22 | A: Let's think step by step. 23 | (0) At the start: Alice: white, Bob: purple, Claire: pink. 24 | (1) Bob and Alice swap balls: Alice: purple, Bob: white, Claire: pink. 25 | (2) Bob and Claire swap balls: Alice: purple, Bob: pink, Claire: white. 26 | (3) Bob and Alice swap balls: Alice: pink, Bob: purple, Claire: white. 27 | At the end of the game, Alice has the pink ball. So the answer is (C). 28 | 29 | Q: Alice, Bob, and Claire are dancers at a square dance. At the start of a song, they each have a partner: Alice is dancing with Lola, Bob is dancing with Rodrigo, and Claire is dancing with Patrick. 30 | Throughout the song, the dancers often trade partners. First, Alice and Bob switch partners. Then, Claire and Bob switch partners. Finally, Bob and Alice switch partners. At the end of the dance, Alice is dancing with 31 | Options: 32 | (A) Lola 33 | (B) Rodrigo 34 | (C) Patrick 35 | A: Let's think step by step. 36 | (0) At the start: Alice: Lola, Bob: Rodrigo, Claire: Patrick. 37 | (1) Alice and Bob switch partners: Alice: Rodrigo, Bob: Lola, Claire: Patrick. 38 | (2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola. 39 | (3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola. 40 | At the end of the dance, Alice is dancing with Patrick. So the answer is (C). -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/web_of_lies.txt: -------------------------------------------------------------------------------- 1 | Evaluate a random boolean function expressed as a word problem. 2 | 3 | Q: Question: Fidel tells the truth. Jerry says Fidel tells the truth. Vina says Jerry tells the truth. Millicent says Vina lies. Raymond says Millicent lies. Does Raymond tell the truth? 4 | A: Let's think step by step. 5 | (1) Fidel tells the truth. So, we know that Fidel tells the truth. 6 | (2) Jerry says Fidel tells the truth. Since we know from (1) that Fidel tells the truth, if Jerry says that Fidel tells the truth, then Jerry tells the truth. 7 | (3) Vina says Jerry tells the truth. Since we know from (2) that Jerry tells the truth, if Vina says Jerry tells the truth, then Vine tells the truth. 8 | (4) Millicent says Vina lies. Since we know from (3) that Vina tells the truth, if Millicent says Vina lies, then Millicent lies. 9 | (5) Raymond says Millicent lies. Since we know from (4) that Millicent lies, if Raymond says Millicent lies, then Raymond tells the truth. 10 | Now, the question asks: Does Raymond tell the truth? We know from (5) that Raymond tells the truth. So the answer is Yes. 11 | 12 | Q: Question: Kristian lies. Millie says Kristian lies. Maybelle says Millie tells the truth. Fidel says Maybelle lies. Leda says Fidel lies. Does Leda tell the truth? 13 | A: Let's think step by step. 14 | (1) Kristian lies. So, we know that Kristian lies. 15 | (2) Millie says Kristian lies. Since we know from (1) that Kristian lies, if Millie says Kristian lies, then Millie tells the truth. 16 | (3) Maybelle says Millie tells the truth. Since we know from (2) that Millie tells the truth, if Maybelle says Millie tells the truth, then Maybelle tells the truth. 17 | (4) Fidel says Maybelle lies. Since we know from (3) that Maybelle tells the truth, if Fidel says Maybelle lies, then Fidel lies. 18 | (5) Leda says Fidel lies. Since we know from (4) that Fidel lies, if Leda says Fidel lies, then Leda tells the truth. 19 | Now, the question asks: Does Leda tell the truth? We know from (5) that Leda tells the truth. So the answer is Yes. 20 | 21 | Q: Question: Kristian tells the truth. Michaela says Kristian lies. Raymond says Michaela tells the truth. Osvaldo says Raymond tells the truth. Jamey says Osvaldo tells the truth. Does Jamey tell the truth? 22 | A: Let's think step by step. 23 | (1) Kristian tells the truth. So, we know that Kristian tells the truth. 24 | (2) Michaela says Kristian lies. Since we know from (1) that Kristian tells the truth, if Michaela says Kristian lies, then Michaela lies. 25 | (3) Raymond says Michaela tells the truth. Since we know from (2) that Michaela lies, if Raymond says Michaela tells the truth, then Raymond lies. 26 | (4) Osvaldo says Raymond tells the truth. Since we know from (3) that Raymond lies, if Osvaldo says Raymond tells the truth, then Osvaldo lies. 27 | (5) Jamey says Osvaldo tells the truth. Since we know from (4) that Osvaldo lies, if Jamey says Osvaldo tells the truth, then Jamey lies. 28 | Now, the question asks: Does Jamey tell the truth? We know from (5) that Jamey lies. So the answer is No. -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/lib_prompt/word_sorting.txt: -------------------------------------------------------------------------------- 1 | Sort a list of words. 2 | 3 | Q: Sort the following words alphabetically: List: oven costume counterpart 4 | A: Let's think step by step. 5 | The first letter: "oven": "o" (15). "costume": "c" (3). "counterpart": "c" (3). We now have: (3) ["costume" ? "counterpart"] < (15) "oven". Now let's sort this subpart ["costume" ? "counterpart"] by looking at their second letters. 6 | The second letter: "costume": "o" (15). "counterpart": "o" (15). We now have: (15) ["costume" ? "counterpart"]. Now let's sort this subpart ["costume" ? "counterpart"] by looking at their third letters. 7 | The third letter: "costume": "s" (19). "counterpart": "u" (21). We now have: (19) "costume" < (21) "counterpart". Hence, we have ["costume" < "counterpart"] < "oven". So the answer is costume counterpart oven. 8 | 9 | Q: Sort the following words alphabetically: List: hypochlorite ponderosa phone credulity 10 | A: Let's think step by step. 11 | The first letter: "hypochlorite": "h" (8). "ponderosa": "p" (16). "phone": "p" (16). "credulity": "c" (3). We now have: (3) "credulity" < (8) "hypochlorite" < (16) ["ponderosa" ? "phone"]. Now let's sort this subpart ["ponderosa" ? "phone"] by looking at their second letters. 12 | The second letter: "ponderosa": "o" (15). "phone": "h" (8). We now have: (8) "phone" < (15) "ponderosa". Hence, we have "credulity" < "hypochlorite" < ["phone" <"ponderosa"]. So the answer is credulity hypochlorite phone ponderosa. 13 | 14 | Q: Sort the following words alphabetically: List: newt arson parthia seismography mugho aspect census 15 | A: Let's think step by step. 16 | The first letter: "newt": "n" (14). "arson": "a" (1). "parthia": "p" (16). "seismography": "s" (19). "mugho": "m" (13). "aspect": "a" (1). "census": "c" (3). We now have: (1) ["arson" ? "aspect"] < (3) "census" < (13) "mugho" < (14) "newt" < (16) "parthia" < (19) "seismography". Now let's sort this subpart ["arson" ? "aspect"] by looking at their second letters. 17 | The second letter: "arson": "r" (18). "aspect": "s" (19). We now have: (18) "arson" < (19) "aspect". Hence, we have ["arson" < "aspect"] < "census" < "mugho" < "newt" < "parthia" < "seismography". So the answer is arson aspect census mugho newt parthia seismography. -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/run_template_llama-3-8b.sh: -------------------------------------------------------------------------------- 1 | ### gsm8k 2 | python evaluation_gsm8k.py --model meta-llama/Meta-Llama-3-8B --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20 3 | 4 | python evaluation_gsm8k.py --model meta-llama/Meta-Llama-3-8B --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 5 | 6 | python evaluation_gsm8k.py --model meta-llama/Meta-Llama-3-8B --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 7 | 8 | python evaluation_gsm8k.py --model meta-llama/Meta-Llama-3-8B --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64 9 | 10 | python evaluation_gsm8k.py --model meta-llama/Meta-Llama-3-8B --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEARL-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64 11 | 12 | ###aqua 13 | python evaluation_aqua_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20 14 | 15 | python evaluation_aqua_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 16 | 17 | python evaluation_aqua_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 18 | 19 | python evaluation_aqua_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64 20 | 21 | python evaluation_aqua_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEARL-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64 22 | 23 | ### bbh 24 | python evaluation_bbh_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 4 --max_new_tokens 256 --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20 25 | 26 | python evaluation_bbh_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 4 --max_new_tokens 256 --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 27 | 28 | python evaluation_bbh_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 4 --max_new_tokens 256 --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 29 | 30 | python evaluation_bbh_cot.py --model meta-llama/Meta-Llama-3-8B --batch_size 4 --max_new_tokens 256 --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /GenerationBench/GenerationTest/run_template_llama2-13b.sh: -------------------------------------------------------------------------------- 1 | ### gsm8k 2 | python evaluation_gsm8k.py --model meta-llama/Llama-2-13b --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20 3 | 4 | python evaluation_gsm8k.py --model meta-llama/Llama-2-13b --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 5 | 6 | python evaluation_gsm8k.py --model meta-llama/Llama-2-13b --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 7 | 8 | python evaluation_gsm8k.py --model meta-llama/Llama-2-13b --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64 9 | 10 | python evaluation_gsm8k.py --model meta-llama/Llama-2-13b --prompt_file gsm8k_prompt_original.txt --batch_size 6 --max_new_tokens 256 --compress_method GEARL-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64 11 | 12 | ###aqua 13 | python evaluation_aqua_cot.py --model meta-llama/Llama-2-13b --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20 14 | 15 | python evaluation_aqua_cot.py --model meta-llama/Llama-2-13b --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 16 | 17 | python evaluation_aqua_cot.py --model meta-llama/Llama-2-13b --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 18 | 19 | python evaluation_aqua_cot.py --model meta-llama/Llama-2-13b --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64 20 | 21 | python evaluation_aqua_cot.py --model meta-llama/Llama-2-13b --batch_size 6 --max_new_tokens 196 --model_max_length 4096 --root_output_dir ./aqua --compress_method GEARL-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64 22 | 23 | ### bbh 24 | python evaluation_bbh_cot.py --model meta-llama/Llama-2-13b --batch_size 4 --max_new_tokens 256 --compress_method KCVT --attention_number 40 --quantize_bit 4 --streaming --streaming_gap 20 25 | 26 | python evaluation_bbh_cot.py --model meta-llama/Llama-2-13b --batch_size 4 --max_new_tokens 256 --compress_method GEAR --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 27 | 28 | python evaluation_bbh_cot.py --model meta-llama/Llama-2-13b --batch_size 4 --max_new_tokens 256 --compress_method GEAR-KCVT --attention_number 40 --quantize_bit 4 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --left 0.02 --streaming --streaming_gap 64 29 | 30 | python evaluation_bbh_cot.py --model meta-llama/Llama-2-13b --batch_size 4 --max_new_tokens 256 --compress_method GEARL --attention_number 40 --quantize_bit 2 --group_size 64 --loop 3 --prefillrank 4 --prefillrankv 4 --rank 2 --rankv 2 --streaming --streaming_gap 64 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /GenerationBench/Readme.md: -------------------------------------------------------------------------------- 1 | ## Generation Bench 2 | All results here are tested by simulated compression code. 3 | 4 | ## Reposity architecture 5 | ``` 6 | . 7 | ├── FinetunedTest 8 | ├── GenerationTest 9 | ``` 10 | `FinetunedTest` is using Llama-2-7b finetuned on GSM8K and test GEAR on GSM8K. Thanks for [Yixiao Li](https://scholar.google.com/citations?user=KZIAP7MAAAAJ&hl=en) who shared the model for us. 11 | 12 | `GenerationTest` is using Llama-2-7b,Llama-2-13b and Mistral-7b with GEAR test on GSM8K, GSM8K-COT, MMLU, MMLU-COT, and BBH-COT. 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Intel Labs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | ![license](https://img.shields.io/badge/License-MIT-green.svg?labelColor=gray) 2 | [![arxiv](http://img.shields.io/badge/arxiv-2310.04562-yellow.svg)](https://arxiv.org/pdf/2403.05527.pdf) 3 | ## Todo List. 4 | 1. simluated code for gsm8k-5shot, bbh-3shot and aqua-8shot with cot prompt on llama models ✔️ 5 | 2. Fused quantization supported for GEAR ✔️ 6 | 3. More cuda kernel optimization 7 | 4. GEAR supported with lm-harness 8 | 5. Combining with other inference algorithm/system 9 | 6. wrap up a python package 10 | ## GEAR: An Efficient KV Cache Compression Recipe for Near-Lossless Generative Inference of LLM # 11 | 12 | Official repo for `GEAR: An Efficient Error Reduction Framework for KV Cache Compression in LLM Inference.` `GEAR` is a "plug-and-play" inference only KV compression method. 13 | `GEAR` augments any quantization scheme(e.g. KIVI, KCVT and Flexgen) via an error recovery solution to boost the model accuracy while saving memory. 14 | 15 | Here, `GEAR` is the abbreviation of `Ge`nerative Inference with LLM via `A`pproximation and Error `R`ecovery. 16 | 17 | ## Overview 18 | GEAR is an efficient KV cache compression framework that achieves 19 | near-lossless high-ratio compression. GEAR first applies quantization to majority of entries of 20 | similar magnitudes to ultra-low precision. It then employs a low-rank matrix to approximate 21 | the quantization error, and a sparse matrix to remedy individual errors from outlier entries. 22 | 23 | GEAR does not need to preserve any first or last tokens uncompressed like other low bit compression algorithms to achieve near lossless KV cache compression for LLMs. 24 |


25 | 26 | ## How to use GEAR 27 | ```bash 28 | conda create -n GEAR python==3.10 29 | conda activate GEAR 30 | pip install -r requirements.txt 31 | ``` 32 | 33 | ### Reposity architecture 34 | ``` 35 | . 36 | ├── GenerationBench 37 | ``` 38 | `cuda_supported_gear` GEAR-KIVI implementation with fused kernel supported. 39 | 40 | `GenerationBench` is simluated compression tested on finetuned and un finetuned model with BBH, GSM8K, and Aqua dataset. 41 | 42 | ## Developers 43 | 44 | - [Hao Kang*](https://haokang-timmy.github.io/)(Georgia Tech) 45 | - [Qingru Zhang*](https://www.linkedin.com/in/qingru-zhang-4b789a187/)(Georgia Tech) 46 | - [Souvik Kundu](https://ksouvik52.github.io/)(Intel) 47 | - [Geonhwa Jeong](https://ghjeong12.github.io/)(Georgia Tech) 48 | - [Zaoxing Liu](https://zaoxing.github.io/)(University of Maryland) 49 | - [Tushar Krishna](https://www.linkedin.com/in/tushar-krishna-a60b0970/)(Georgia Tech) 50 | - [Tuo Zhao](https://www2.isye.gatech.edu/~tzhao80/)(Georgia Tech) 51 | 52 | 53 | ## Citation 54 | Version 2 will be updated soon. Currently it is version 1. 55 | [link to paper](https://arxiv.org/pdf/2403.05527.pdf) 56 | ``` 57 | @misc{kang2024gear, 58 | title={GEAR: An Efficient KV Cache Compression Recipe for Near-Lossless Generative Inference of LLM}, 59 | author={Hao Kang and Qingru Zhang and Souvik Kundu and Geonhwa Jeong and Zaoxing Liu and Tushar Krishna and Tuo Zhao}, 60 | year={2024}, 61 | eprint={2403.05527}, 62 | archivePrefix={arXiv}, 63 | primaryClass={cs.LG} 64 | } 65 | ``` 66 | ## Contributing 67 | We are welcoming everyone to contribute to this reposity by rasing PRs. If there is any problem you may also shot email to hkang342@gatech.edu. 68 | 69 | ## Disclaimer 70 | This “research quality code” is for Non-Commercial purposes and provided by the contributors “As Is” without any express or implied warranty of any kind. The organizations (Intel or georgia Tech) involved do not own the rights to this data set and do not confer any rights to it. The organizations (Intel or georgia Tech) do not warrant or assume responsibility for the accuracy or completeness of any information, text, graphics, links or other items within the code. A thorough security review has not been performed on this code. Additionally, this repository may contain components that are out of date or contain known security vulnerabilities. 71 | -------------------------------------------------------------------------------- /cuda_supported_gear/quant/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/opengear-project/GEAR/efe0bfe2b5a35b22122f7cefab5cde9db13a66cd/cuda_supported_gear/quant/__init__.py -------------------------------------------------------------------------------- /cuda_supported_gear/quant/csrc/gemv_cuda.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | torch::Tensor gemv_forward_cuda( 5 | torch::Tensor _in_feats, 6 | torch::Tensor _kernel, 7 | torch::Tensor _scaling_factors, 8 | torch::Tensor _zeros, 9 | const int bit, 10 | const int group_size); 11 | 12 | 13 | torch::Tensor gemv_forward_cuda_outer_dim( 14 | torch::Tensor _in_feats, 15 | torch::Tensor _kernel, 16 | torch::Tensor _scaling_factors, 17 | torch::Tensor _zeros, 18 | const int bit, 19 | const int group_size, 20 | const int nh, 21 | const bool mqa); -------------------------------------------------------------------------------- /cuda_supported_gear/quant/csrc/pybind.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "gemv_cuda.h" 4 | 5 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) 6 | { 7 | m.def("gemv_forward_cuda", &gemv_forward_cuda); 8 | m.def("gemv_forward_cuda_outer_dim", &gemv_forward_cuda_outer_dim); 9 | } -------------------------------------------------------------------------------- /cuda_supported_gear/quant/gemv.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_VISIBLE_DEVICES"]="2" 3 | os.environ["CUDA_LAUNCH_BLOCKING"]="1" 4 | import numpy as np 5 | import torch 6 | import ipdb 7 | import random 8 | import triton 9 | import triton.language as tl 10 | from new_pack import pack_tensor 11 | from timeit_v2 import py_benchmark 12 | import kivi_gemv 13 | 14 | B, nh, IC, OC = 8, 32, 739, 128 15 | 16 | @triton.jit 17 | def gemv_kernel_g64(inputs_ptr, qw_ptr, mn_ptr, 18 | scale_ptr, output_ptr, 19 | IC: tl.constexpr, OC: tl.constexpr, bit: tl.constexpr, 20 | OC_PER_PH: tl.constexpr, PACK_FACTOR: tl.constexpr, BLOCK_SIZE): 21 | """ 22 | Computes GEMV (group_size = 64). 23 | 24 | Args: 25 | inputs: vector of shape [batch_size, IC]; 26 | qw: matrix of shape [OC, IC / 8]; 27 | output: vector of shape [OC]; 28 | mn: matrix of shape [OC, NG]; 29 | scale: matrix of shape [OC, NG]; 30 | 31 | Notes: 32 | One cannot infer group_size from the shape of scaling factors. 33 | the second dimension is rounded up to a multiple of PACK_FACTOR. 34 | """ 35 | group_size = 64 36 | oc_idx = tl.program_id(axis=0) * OC_PER_PH + tl.arange(0, OC_PER_PH) 37 | batch_idx = tl.program_id(axis=1) 38 | num_groups = IC // group_size 39 | num_groups_packed = tl.cdiv(num_groups, PACK_FACTOR) 40 | # tl.store(output_ptr, num_groups_packed) 41 | weight_w = IC // PACK_FACTOR 42 | num = 0xFF >> (8-bit) 43 | accumulator = tl.zeros((OC_PER_PH,), dtype=tl.float32) 44 | for group_idx in range(0, num_groups): 45 | # load scaling factors 46 | # each time we load 4 OC x 1 G 47 | scale = tl.load(scale_ptr + oc_idx[:, None] * num_groups + group_idx) 48 | mn = tl.load(mn_ptr + oc_idx[:, None] * num_groups + group_idx) 49 | # 1 G -> 64 numbers -> 64 // PACK_FACTOR packed numbers 50 | cur_qw_ptr = qw_ptr + oc_idx[:, None] * weight_w + group_idx * (64 // PACK_FACTOR) + tl.arange(0, 64 // PACK_FACTOR)[None, :] 51 | qw = tl.load(cur_qw_ptr) 52 | for i in range(PACK_FACTOR): 53 | w_fp = qw & num 54 | # load 4 OC x 55 | w_fp = w_fp * scale + mn 56 | qw = qw >> bit 57 | cur_inp_ptr = inputs_ptr + batch_idx * IC + group_idx * 64 + i + tl.arange(0, 64 // PACK_FACTOR)[None, :] * PACK_FACTOR 58 | cur_input = tl.load(cur_inp_ptr) 59 | accumulator += tl.sum(cur_input * w_fp, 1) 60 | ptr = output_ptr + oc_idx + batch_idx * OC 61 | tl.store(ptr, accumulator) 62 | 63 | 64 | def dequant_weight(w, scale, mn, gs): 65 | w_fp = w.half().view(w.shape[0], w.shape[1]//gs, gs) 66 | w_fp = w_fp * scale.unsqueeze(-1) + mn.unsqueeze(-1) 67 | return w_fp.view(w.shape) 68 | 69 | 70 | def dequant_weight_outer(w, scale, mn, gs): 71 | # ipdb.set_trace() 72 | w_fp = w.half().view(w.shape[0], w.shape[1], w.shape[2]//gs, gs) 73 | w_fp = w_fp * scale.unsqueeze(-1) + mn.unsqueeze(-1) 74 | return w_fp.view(w.shape) 75 | 76 | 77 | def gemv_fwd(bit, group_size, inp, qweight, mn, scale): 78 | B, IC = inp.shape 79 | OC = qweight.shape[0] 80 | BLOCK_SIZE = 32 81 | OC_PER_PH = 32 82 | PACK_FACTOR = 32 // bit 83 | assert group_size == 64 84 | output = torch.empty((B, OC), device=inp.device, dtype=torch.float16) 85 | grid = lambda META: ( 86 | triton.cdiv(OC, META['OC_PER_PH']), B 87 | ) 88 | gemv_kernel_g64[grid](inp, qweight, mn, scale, output, 89 | IC, OC, bit, OC_PER_PH, PACK_FACTOR, BLOCK_SIZE) 90 | return output 91 | 92 | 93 | def test_bgemv_outer_correct_mha(): 94 | flatten_B = B * nh 95 | inp = torch.randn((flatten_B, 1, IC), device='cuda', dtype=torch.float16) 96 | ori_weight = torch.randn((flatten_B, IC, OC), device='cuda', dtype=torch.float16) 97 | GS = 32 98 | for BIT in [2, 4]: 99 | weight = ori_weight 100 | PACK_FACTOR = 32 // BIT 101 | assert OC % GS == 0 and OC % PACK_FACTOR == 0 102 | NG = OC // GS 103 | weight = weight.view(flatten_B, IC, NG, GS) 104 | mx = torch.max(weight, dim=-1, keepdim=False)[0] 105 | mn = torch.min(weight, dim=-1, keepdim=False)[0] 106 | maxq = 2 ** BIT - 1 107 | scale = (mx - mn) / maxq 108 | weight = weight - mn.unsqueeze(-1) 109 | weight.div_(scale.unsqueeze(-1)) 110 | weight = weight.clamp_(0, maxq).round_().to(torch.int32) 111 | weight = weight.view(flatten_B, IC, OC) 112 | qweight = pack_tensor(weight, BIT, 2) 113 | weight = weight.transpose(1, 2).contiguous() 114 | qweight = qweight.transpose(1, 2).contiguous() 115 | scale = scale.transpose(1, 2).contiguous() 116 | mn = mn.transpose(1, 2).contiguous() 117 | output = kivi_gemv.gemv_forward_cuda_outer_dim(inp, qweight, scale, mn, BIT, GS, nh, False) 118 | deq_w = dequant_weight_outer(weight.transpose(1, 2), 119 | scale.transpose(1, 2), 120 | mn.transpose(1, 2), GS) 121 | # rel_error = torch.abs((deq_w - ori_weight).float() / (ori_weight + 1e-5).float()).mean() 122 | # print(f'bit {BIT} avg rel weight quant error: {rel_error}') 123 | output_ref = inp @ deq_w 124 | error = output_ref - output 125 | rel_out_error = torch.abs(error.float() / (torch.abs(output_ref).float()+1e-5)).mean() 126 | print(f'mha bit {BIT} avg rel out quant error: {rel_out_error}') 127 | 128 | 129 | def test_bgemv_outer_correct_mqa(): 130 | flatten_B = B * nh 131 | inp = torch.randn((flatten_B, 1, IC), device='cuda', dtype=torch.float16) 132 | ori_weight = torch.randn((B, IC, OC), device='cuda', dtype=torch.float16) 133 | GS = 32 134 | for BIT in [2, 4]: 135 | weight = ori_weight 136 | PACK_FACTOR = 32 // BIT 137 | assert OC % GS == 0 and OC % PACK_FACTOR == 0 138 | NG = OC // GS 139 | weight = weight.view(B, IC, NG, GS) 140 | mx = torch.max(weight, dim=-1, keepdim=False)[0] 141 | mn = torch.min(weight, dim=-1, keepdim=False)[0] 142 | maxq = 2 ** BIT - 1 143 | scale = (mx - mn) / maxq 144 | weight = weight - mn.unsqueeze(-1) 145 | weight.div_(scale.unsqueeze(-1)) 146 | weight = weight.clamp_(0, maxq).round_().to(torch.int32) 147 | weight = weight.view(B, IC, OC) 148 | qweight = pack_tensor(weight, BIT, 2) 149 | inp = inp.contiguous() 150 | weight = weight.transpose(1, 2).contiguous() 151 | qweight = qweight.transpose(1, 2).contiguous() 152 | scale = scale.transpose(1, 2).contiguous() 153 | mn = mn.transpose(1, 2).contiguous() 154 | output = kivi_gemv.gemv_forward_cuda_outer_dim(inp, qweight, scale, mn, BIT, GS, nh, True) 155 | deq_w = dequant_weight_outer(weight.transpose(1, 2), 156 | scale.transpose(1, 2), 157 | mn.transpose(1, 2), GS) 158 | # rel_error = torch.abs((deq_w - ori_weight).float() / (ori_weight + 1e-5).float()).mean() 159 | # print(f'bit {BIT} avg rel weight quant error: {rel_error}') 160 | output_ref = inp.view(B, nh, 1, IC) @ deq_w.view(B, 1, IC, OC) 161 | output_ref = output_ref.view(flatten_B, 1, OC) 162 | error = output_ref - output 163 | # ipdb.set_trace() 164 | rel_out_error = torch.abs(error.float() / (torch.abs(output_ref).float()+1e-5)).mean() 165 | print(f'mqa bit {BIT} avg rel out quant error: {rel_out_error}') 166 | 167 | 168 | def test_gemv_correct(): 169 | inp = torch.randn((B, IC), device='cuda', dtype=torch.float16) 170 | ori_weight = torch.randn((OC, IC), device='cuda', dtype=torch.float16) 171 | GS = 64 172 | for BIT in [4]: 173 | weight = ori_weight 174 | PACK_FACTOR = 32 // BIT 175 | assert IC % GS == 0 and IC % PACK_FACTOR == 0 176 | NG = IC // GS 177 | weight = weight.view(OC, NG, GS) 178 | mx = torch.max(weight, dim=2, keepdim=False)[0] 179 | mn = torch.min(weight, dim=2, keepdim=False)[0] 180 | maxq = 2 ** BIT - 1 181 | scale = (mx - mn) / maxq 182 | weight = weight - mn.unsqueeze(-1) 183 | weight.div_(scale.unsqueeze(-1)) 184 | weight = weight.clamp_(0, maxq).round_().to(torch.int32) 185 | weight = weight.view(OC, IC) 186 | qweight = pack_tensor(weight, BIT, 1) 187 | # output = gemv_fwd(BIT, GS, inp, qweight, mn, scale) 188 | output = kivi_gemv.gemv_forward_cuda(inp, qweight, scale, mn, BIT, GS) 189 | deq_w = dequant_weight(weight, scale, mn, GS) 190 | rel_error = torch.abs((deq_w - ori_weight).float() / (ori_weight + 1e-5).float()).mean() 191 | # print(f'bit {BIT} avg rel weight quant error: {rel_error}') 192 | output_ref = inp @ deq_w.T 193 | error = output_ref - output 194 | rel_out_error = torch.abs(error.float() / (output_ref + 1e-5).float()).mean() 195 | print(f'bit {BIT} avg rel out quant error: {rel_out_error}') 196 | 197 | 198 | def test_gemv_speed(): 199 | inp = torch.randn((B, IC), device='cuda', dtype=torch.float16) 200 | ori_weight = torch.randn((OC, IC), device='cuda', dtype=torch.float16) 201 | weight = ori_weight 202 | BIT = 4 203 | GS = 64 204 | PACK_FACTOR = 32 // BIT 205 | assert IC % GS == 0 and IC % PACK_FACTOR == 0 206 | NG = IC // GS 207 | weight = weight.view(OC, NG, GS) 208 | mx = torch.max(weight, dim=2, keepdim=False)[0] 209 | mn = torch.min(weight, dim=2, keepdim=False)[0] 210 | maxq = 2 ** BIT - 1 211 | scale = (mx - mn) / maxq 212 | weight = weight - mn.unsqueeze(-1) 213 | weight.div_(scale.unsqueeze(-1)) 214 | weight = weight.clamp_(0, maxq).round_().to(torch.int32) 215 | weight = weight.view(OC, IC) 216 | qweight = pack_tensor(weight, BIT, 1) 217 | output = gemv_fwd(BIT, GS, inp, qweight, mn, scale) 218 | deq_w = dequant_weight(weight, scale, mn, GS) 219 | stmt = "inp @ deq_w.T" 220 | t_ref = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1, 221 | setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 222 | # stmt = "gemv_fwd(BIT, GS, inp, qweight, mn, scale)" 223 | # t_our = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1, 224 | # setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 225 | stmt = "kivi_gemv.gemv_forward_cuda(inp, qweight, scale, mn, BIT, GS)" 226 | t_our = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1, 227 | setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 228 | print(f'vanilla pytorch gemv: {t_ref * 1000} ms') 229 | print(f'awq fused IC {IC} OC {OC} {BIT}-bit gemv: {t_our * 1000} ms') 230 | 231 | 232 | def test_bgemv_outer_speed(): 233 | inp = torch.randn((B, 1, IC), device='cuda', dtype=torch.float16) 234 | ori_weight = torch.randn((B, IC, OC), device='cuda', dtype=torch.float16) 235 | GS = 64 236 | for BIT in [2]: 237 | weight = ori_weight 238 | PACK_FACTOR = 32 // BIT 239 | assert OC % GS == 0 and OC % PACK_FACTOR == 0 240 | NG = OC // GS 241 | weight = weight.view(B, IC, NG, GS) 242 | mx = torch.max(weight, dim=-1, keepdim=False)[0] 243 | mn = torch.min(weight, dim=-1, keepdim=False)[0] 244 | maxq = 2 ** BIT - 1 245 | scale = (mx - mn) / maxq 246 | weight = weight - mn.unsqueeze(-1) 247 | weight.div_(scale.unsqueeze(-1)) 248 | weight = weight.clamp_(0, maxq).round_().to(torch.int32) 249 | weight = weight.view(B, IC, OC) 250 | qweight = pack_tensor(weight, BIT, 2) 251 | weight = weight.transpose(1, 2).contiguous() 252 | qweight = qweight.transpose(1, 2).contiguous() 253 | scale = scale.transpose(1, 2).contiguous() 254 | mn = mn.transpose(1, 2).contiguous() 255 | deq_w = dequant_weight_outer(weight.transpose(1, 2), 256 | scale.transpose(1, 2), 257 | mn.transpose(1, 2), GS) 258 | stmt = "inp @ deq_w" 259 | t_ref = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1, 260 | setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 261 | # stmt = "gemv_fwd(BIT, GS, inp, qweight, mn, scale)" 262 | # t_our = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1, 263 | # setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 264 | stmt = "kivi_gemv.gemv_forward_cuda_outer_dim(inp, qweight, scale, mn, BIT, GS)" 265 | t_our = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=1, 266 | setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 267 | print(f'BS {B} IC {IC} OC {OC} pytorch batched gemv: {t_ref * 1000} ms') 268 | print(f'our fused BS {B} IC {IC} OC {OC} {BIT}-bit outer-dim batched gemv: {t_our * 1000} ms') 269 | 270 | if __name__ == "__main__": 271 | torch.manual_seed(0) 272 | np.random.seed(0) 273 | random.seed(0) 274 | # test_gemv_correct() 275 | test_bgemv_outer_correct_mha() 276 | test_bgemv_outer_correct_mqa() 277 | # test_gemv_speed() 278 | # test_bgemv_outer_speed() 279 | -------------------------------------------------------------------------------- /cuda_supported_gear/quant/matmul.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import ipdb 3 | import random 4 | import triton 5 | import triton.language as tl 6 | import kivi_gemv 7 | 8 | 9 | @triton.jit 10 | def qbvm_kernel( 11 | bits, 12 | a_ptr, b_ptr, c_ptr, 13 | scales_ptr, zeros_ptr, 14 | M, N, K, 15 | stride_abatch, stride_am, stride_ak, 16 | stride_bbatch, stride_bk, stride_bn, 17 | stride_cbatch, stride_cm, stride_cn, 18 | stride_scales_b, stride_scales_k, stride_scales_g, 19 | stride_zeros_b, stride_zeros_k, stride_zeros_g, 20 | groupsize, 21 | BLOCK_SIZE_N: tl.constexpr, BLOCK_SIZE_K: tl.constexpr, 22 | ): 23 | """ 24 | Compute the batch matrix multiplication C = A x B. 25 | A is of shape (B, 1, K) float16 26 | B is of shape (B, K, N//feat_per_int) int32 27 | C is of shape (B, 1, N) float16 28 | scales is of shape (B, K, G) float16 29 | zeros is of shape (B, K, G) float16 30 | groupsize is an int specifying the size of groups for scales and zeros. 31 | G is N // groupsize. 32 | Set NO_GROUPS to groupsize == K, in which case G = 1 and the kernel is more efficient. 33 | 34 | WARNING: This kernel assumes that K is a multiple of BLOCK_SIZE_K. 35 | WARNING: This kernel assumes that N is a multiple of BLOCK_SIZE_N. 36 | WARNING: This kernel assumes that groupsize is a multiple of BLOCK_SIZE_K. 37 | """ 38 | pid_batch = tl.program_id(axis=0) 39 | pid = tl.program_id(axis=1) 40 | feat_per_int = 32 // bits 41 | num_pid_n = tl.cdiv(N, BLOCK_SIZE_N) 42 | num_pid_k = tl.cdiv(K, BLOCK_SIZE_K) 43 | pid_n = pid % num_pid_n 44 | offs_bn = (pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)) 45 | offs_k = tl.arange(0, BLOCK_SIZE_K) 46 | a_batch_offset = (pid_batch * stride_abatch) 47 | b_batch_offset = (pid_batch * stride_bbatch) 48 | c_batch_offset = (pid_batch * stride_cbatch) 49 | a_ptr = a_ptr + a_batch_offset 50 | b_ptr = b_ptr + b_batch_offset 51 | c_ptr = c_ptr + c_batch_offset 52 | a_ptrs = a_ptr + (offs_k[:, None] * stride_ak) # (BLOCK_SIZE_K, 1) 53 | # a_mask = (offs_am[:, None] < M) 54 | # b_ptrs is set up such that it repeats elements along the N axis feat_per_int times 55 | b_ptrs = b_ptr + (offs_k[:, None] * stride_bk + (offs_bn[None, :]//feat_per_int) * stride_bn) # (BLOCK_SIZE_K, BLOCK_SIZE_N) 56 | # shifter is used to extract the # bits bits of each element in the 32-bit word from B 57 | shifter = (offs_bn % feat_per_int) * bits 58 | scales_ptr = scales_ptr + pid_batch*stride_scales_b + ((offs_bn[None, :] // groupsize)) * stride_scales_g # (BLOCK_SIZE_N,) 59 | zeros_ptr = zeros_ptr + pid_batch*stride_zeros_b + ((offs_bn[None, :] // groupsize)) * stride_zeros_g # (BLOCK_SIZE_N,) 60 | 61 | # Now calculate a block of output of shape (BLOCK_SIZE_M, BLOCK_SIZE_N) 62 | # M is along the batch dimension, N is along the outfeatures dimension, K is along the infeatures dimension 63 | # So this loop is along the infeatures dimension (K) 64 | # It's calculating BLOCK_SIZE_M batches in parallel, and for each batch, BLOCK_SIZE_N outfeatures in parallel 65 | # accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32) 66 | accumulator = tl.zeros((BLOCK_SIZE_N,), dtype=tl.float32) 67 | num = 0xFF >> (8-bits) 68 | for pid_k in range(0, num_pid_k): 69 | offs_bk = (offs_k[:, None] + pid_k * BLOCK_SIZE_K) 70 | # offs_k[None, :] < K - pid_k * BLOCK_SIZE_K 71 | a = tl.load(a_ptrs, mask=offs_bk < K, other=0.) # (1, BLOCK_SIZE_K) 72 | b = tl.load(b_ptrs, mask=offs_bk < K, other=0.) # (BLOCK_SIZE_K, BLOCK_SIZE_N) 73 | ptr = scales_ptr + offs_bk * stride_scales_k 74 | scales = tl.load(ptr, mask=offs_bk < K, other=0.) # (BLOCK_SIZE_K, BLOCK_SIZE_N) 75 | ptr = zeros_ptr + offs_bk * stride_zeros_k 76 | zeros = tl.load(ptr, mask=offs_bk < K, other=0.) # (BLOCK_SIZE_K, BLOCK_SIZE_N) 77 | # Now we need to unpack b into 32-bit values 78 | # tl.device_print("scale ",scales.dtype) 79 | # tl.device_print("zeros ",zeros.dtype) 80 | b = (b >> shifter[None, :]) & num # For 4-bit values, bit_op_num is 0xF 81 | b = b * scales + zeros # Scale and shift 82 | accumulator += tl.sum(a * b, 0) # tl.dot(a, b) 83 | # if pid_m == 0 and pid_n == 0: 84 | # tl.device_print("hello ", tl.dot(a, b).shape) 85 | a_ptrs += BLOCK_SIZE_K * stride_ak 86 | b_ptrs += BLOCK_SIZE_K * stride_bk 87 | c = accumulator # .to(tl.float16) 88 | # c = accumulator 89 | # Store the result 90 | offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) 91 | c_ptrs = c_ptr + stride_cn * offs_cn 92 | c_mask = (offs_cn < N) 93 | tl.store(c_ptrs, c, mask=c_mask) 94 | 95 | 96 | def understand_code(): 97 | M, N, K = 512, 256, 256 98 | BLOCK_SIZE_M, BLOCK_SIZE_N, GROUP_SIZE_M = 64, 64, 4 99 | total_program_id = triton.cdiv(M, BLOCK_SIZE_M) * triton.cdiv(N, BLOCK_SIZE_N) 100 | for pid in range(0, total_program_id): 101 | num_pid_m = triton.cdiv(M, BLOCK_SIZE_M) 102 | num_pid_n = triton.cdiv(N, BLOCK_SIZE_N) 103 | num_pid_in_group = GROUP_SIZE_M * num_pid_n 104 | group_id = pid // num_pid_in_group 105 | first_pid_m = group_id * GROUP_SIZE_M 106 | group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M) 107 | pid_m = first_pid_m + (pid % group_size_m) 108 | pid_n = (pid % num_pid_in_group) // group_size_m 109 | print(f"pid={pid}, pid_m={pid_m}, pid_n={pid_n}") 110 | 111 | 112 | def triton_bmm_fA_qB_outer(group_size: int, 113 | fA: torch.FloatTensor, 114 | qB: torch.IntTensor, 115 | scales: torch.FloatTensor, 116 | zeros: torch.FloatTensor, 117 | bits: int) -> torch.FloatTensor: 118 | """ 119 | Compute the matrix multiplication C = query x key. 120 | Where key is quantized into 2-bit values. 121 | 122 | fA is of shape (B, nh, M, K) float16 123 | qB is of shape (B, nh, K, N // feat_per_int) int32 124 | scales is of shape (B, nh, K, G) float16 125 | zeros is of shape (B, nh, K, G) float16 126 | 127 | groupsize is the number of outer dimensions in each group. 128 | G = N // groupsize 129 | 130 | Returns C of shape (B, nh, M, N) float16 131 | """ 132 | assert len(fA.shape) == 4 and len(qB.shape) == 4 133 | B, nh, M, K = fA.shape 134 | feat_per_int = 32 // bits 135 | # flatten to a 3D tensor 136 | fA = fA.view(-1, M, K) 137 | N = qB.shape[-1] * feat_per_int 138 | qB = qB.reshape(-1, K, qB.shape[-1]) 139 | # This is based on the possible BLOCK_SIZE_Ks 140 | # assert K % 16 == 0 and K % 32 == 0 and K % 64 == 0 and K % 128 == 0, "K must be a multiple of 16, 32, 64, and 128" 141 | # This is based on the possible BLOCK_SIZE_Ns 142 | assert N % 16 == 0 and N % 32 == 0 and N % 64 == 0, "N must be a multiple of 16, 32, 64, 128, and 256" 143 | # This is based on the possible BLOCK_SIZE_Ks 144 | assert group_size % 64 == 0, "groupsize must be a multiple of 64, and 128" 145 | flatten_B = B * nh 146 | c = torch.empty((flatten_B, M, N), device='cuda', dtype=torch.float16) 147 | # print(f'M {M} N {N} K {K}') 148 | grid = lambda META: ( 149 | flatten_B, triton.cdiv(N, META['BLOCK_SIZE_N']), 150 | ) 151 | scales = scales.view(flatten_B, scales.shape[-2], scales.shape[-1]) 152 | zeros = zeros.view(flatten_B, zeros.shape[-2], zeros.shape[-1]) 153 | if N > K: 154 | BLOCK_SIZE_N = 128 155 | BLOCK_SIZE_K = 32 156 | num_warps=4 # 157 | else: 158 | BLOCK_SIZE_N = 32 159 | BLOCK_SIZE_K = 128 160 | num_warps = 2 161 | num_stages= 7 if K > 64 else 3 # 162 | qbvm_kernel[grid]( 163 | bits, 164 | fA, qB, c, 165 | scales, zeros, 166 | M, N, K, 167 | fA.stride(0), fA.stride(1), fA.stride(2), 168 | qB.stride(0), qB.stride(1), qB.stride(2), 169 | c.stride(0), c.stride(1), c.stride(2), 170 | scales.stride(0), scales.stride(1), scales.stride(2), 171 | zeros.stride(0), zeros.stride(1), scales.stride(2), 172 | group_size, BLOCK_SIZE_N, BLOCK_SIZE_K, 173 | num_warps=num_warps, num_stages=num_stages 174 | ) 175 | return c.view(B, nh, c.shape[-2], c.shape[-1]) 176 | 177 | 178 | def cuda_bmm_fA_qB_outer(group_size: int, 179 | fA: torch.FloatTensor, 180 | qB: torch.IntTensor, 181 | scales: torch.FloatTensor, 182 | zeros: torch.FloatTensor, 183 | bits: int, 184 | mqa: bool=False) -> torch.FloatTensor: 185 | """ 186 | Compute the matrix multiplication C = query x key. 187 | Where key is quantized into 2-bit values. 188 | 189 | fA is of shape (B, nh, M, K) float16 190 | qB is of shape (B, nh, K, N // feat_per_int) int32 191 | scales is of shape (B, nh, K, G) float16 192 | zeros is of shape (B, nh, K, G) float16 193 | 194 | groupsize is the number of outer dimensions in each group. 195 | G = N // groupsize 196 | 197 | Returns C of shape (B, nh, M, N) float16 198 | """ 199 | assert len(fA.shape) == 4 and len(qB.shape) == 4 200 | B, nh, M, K = fA.shape 201 | feat_per_int = 32 // bits 202 | # flatten to a 3D tensor 203 | fA = fA.view(-1, M, K).contiguous() 204 | N = qB.shape[-1] * feat_per_int 205 | qB = qB.reshape(-1, K, qB.shape[-1]).transpose(1, 2).contiguous() 206 | # This is based on the possible BLOCK_SIZE_Ks 207 | # assert K % 16 == 0 and K % 32 == 0 and K % 64 == 0 and K % 128 == 0, "K must be a multiple of 16, 32, 64, and 128" 208 | # This is based on the possible BLOCK_SIZE_Ns 209 | # assert N % 16 == 0 and N % 32 == 0 and N % 64 == 0, "N must be a multiple of 16, 32, 64, 128, and 256" 210 | # This is based on the possible BLOCK_SIZE_Ks 211 | # assert group_size % 64 == 0, "groupsize must be a multiple of 64, and 128" 212 | flatten_B = B * nh 213 | if mqa: 214 | flatten_B = B 215 | scales = scales.view(flatten_B, scales.shape[-2], scales.shape[-1]).transpose(1, 2).contiguous() 216 | zeros = zeros.view(flatten_B, zeros.shape[-2], zeros.shape[-1]).transpose(1, 2).contiguous() 217 | assert bits in [2, 4] 218 | c = kivi_gemv.gemv_forward_cuda_outer_dim(fA, qB, scales, zeros, bits, group_size, nh, mqa) 219 | c = c.view(B, nh, c.shape[-2], c.shape[-1]) 220 | return c 221 | -------------------------------------------------------------------------------- /cuda_supported_gear/quant/qmodule.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import dequant_cuda 3 | from pack import quantize_and_pack 4 | 5 | 6 | def make_divisible(c, divisor): 7 | return (c + divisor - 1) // divisor 8 | 9 | 10 | def calculate_zeros_width(in_features, group_size=128, pack_num=8): 11 | if group_size >= 128: 12 | size_multiplier = 1 13 | elif group_size == 64: 14 | size_multiplier = 2 15 | elif group_size == 32: 16 | size_multiplier = 4 17 | else: 18 | raise NotImplementedError 19 | 20 | base_width = make_divisible(in_features // group_size, pack_num) 21 | base_width = make_divisible(base_width, size_multiplier) * size_multiplier 22 | return base_width 23 | 24 | 25 | def dequantize_weight(qweight, d_out, d_in, w_bit, scales, zeros, group_size): 26 | data = qweight.reshape(-1) 27 | N, num_features = d_out, d_in 28 | weight_fp = dequant_cuda.unpack_single_precision(data, w_bit, scales, zeros, N, 29 | num_features // group_size, group_size) 30 | return weight_fp.view(d_out, d_in) 31 | 32 | 33 | class MatMul4Bit(torch.autograd.Function): 34 | # forward is the same, but we added the fallback for pre-turing GPUs 35 | # backward is mostly the same, but adds one extra clause (see "elif state.CxB is not None") 36 | 37 | @staticmethod 38 | def forward(ctx, A, qweight, bias, d_out, d_in, w_bit, scales, zeros, group_size): 39 | # default of pytorch behavior if inputs are empty 40 | # 1. Dequantize 41 | # 2. MatmulnN 42 | weight_fp = dequantize_weight(qweight, d_out, d_in, w_bit, scales, zeros, group_size) 43 | output = torch.nn.functional.linear(A, weight_fp.to(A.dtype), bias) 44 | # 3. Save state 45 | ctx.state = (d_out, d_in, w_bit, scales, zeros, group_size) 46 | ctx.tensors = qweight 47 | return output 48 | 49 | 50 | @staticmethod 51 | def backward(ctx, grad_output): 52 | req_gradA, _, req_gradBias = ctx.needs_input_grad[:3] 53 | qweight = ctx.tensors 54 | d_out, d_in, w_bit, scales, zeros, group_size = ctx.state 55 | 56 | grad_A, grad_bias = None, None 57 | 58 | if req_gradBias: 59 | # compute grad_bias first before changing grad_output dtype 60 | grad_bias = grad_output.sum(0, dtype=ctx.dtype_bias) 61 | 62 | # not supported by PyTorch. TODO: create work-around 63 | #if req_gradB: grad_B = torch.matmul(grad_output.t(), A) 64 | if req_gradA: 65 | weight_fp = dequantize_weight(qweight, d_out, d_in, w_bit, scales, zeros, group_size) 66 | grad_A = torch.matmul(grad_output, weight_fp.to(grad_output.dtype)) 67 | if grad_A.isnan().any(): 68 | import ipdb; ipdb.set_trace() 69 | # print(grad_A.norm()) 70 | return grad_A, None, grad_bias, None, None, None, None, None, None 71 | 72 | 73 | class WQLinearForTrain(torch.nn.Module): 74 | def __init__(self, w_bit, group_size, in_features, out_features, bias, dev): 75 | super().__init__() 76 | 77 | if w_bit not in [4]: 78 | raise NotImplementedError("Only 4-bit are supported for now.") 79 | 80 | self.in_features = in_features 81 | self.out_features = out_features 82 | self.w_bit = w_bit 83 | self.group_size = group_size if group_size != -1 else in_features 84 | # quick sanity check (make sure aligment) 85 | assert self.in_features % self.group_size == 0 86 | assert out_features % (32 // self.w_bit) == 0 87 | pack_num = (32 // self.w_bit) 88 | self.register_buffer('qweight', torch.zeros((out_features, in_features // pack_num), dtype=torch.int32, device=dev)) 89 | self.register_buffer('zeros', torch.zeros((out_features, calculate_zeros_width(in_features, self.group_size)), dtype=torch.int32, device=dev)) 90 | self.register_buffer('scales', torch.zeros((out_features, calculate_zeros_width(in_features, self.group_size) * pack_num), dtype=torch.float16, device=dev)) 91 | if bias: 92 | self.register_buffer('bias', torch.zeros((out_features), dtype=torch.float16, device=dev)) 93 | else: 94 | self.bias = None 95 | 96 | 97 | def forward(self, x): 98 | # weight_fp = self.dequantize_weight().half() 99 | # out = torch.matmul(x, weight_fp.T) 100 | # out = out + self.bias if self.bias is not None else out 101 | 102 | out = MatMul4Bit.apply(x, self.qweight, self.bias, 103 | self.out_features, self.in_features, 104 | self.w_bit, self.scales, 105 | self.zeros, self.group_size) 106 | return out 107 | 108 | def dequantize_weight(self): 109 | data = self.qweight.reshape(-1) 110 | N, num_features = self.out_features, self.in_features 111 | weight_fp = dequant_cuda.unpack_single_precision(data, self.w_bit, self.scales, self.zeros, N, 112 | num_features // self.group_size, self.group_size) 113 | return weight_fp.view(self.out_features, self.in_features) 114 | 115 | 116 | @classmethod 117 | def from_linear(cls, linear, w_bit, group_size, init_only=False, scales=None, zeros=None): 118 | q_linear = cls(w_bit, group_size, linear.in_features, linear.out_features, linear.bias is not None, linear.weight.device) 119 | if init_only: # just prepare for loading sd 120 | return q_linear 121 | quantized, scales, mn = quantize_and_pack(linear.weight, group_size, w_bit, simulate=False) 122 | q_linear.qweight = quantized 123 | q_linear.scales = scales 124 | q_linear.zeros = mn 125 | return q_linear -------------------------------------------------------------------------------- /cuda_supported_gear/quant/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension 3 | 4 | 5 | extra_compile_args = { 6 | "cxx": [ 7 | "-g", 8 | "-O3", 9 | "-fopenmp", 10 | "-lgomp", 11 | "-std=c++17", 12 | "-DENABLE_BF16" 13 | ], 14 | "nvcc": [ 15 | "-O3", 16 | "-std=c++17", 17 | "-DENABLE_BF16", # TODO 18 | "-U__CUDA_NO_HALF_OPERATORS__", 19 | "-U__CUDA_NO_HALF_CONVERSIONS__", 20 | "-U__CUDA_NO_BFLOAT16_OPERATORS__", 21 | "-U__CUDA_NO_BFLOAT16_CONVERSIONS__", 22 | "-U__CUDA_NO_BFLOAT162_OPERATORS__", 23 | "-U__CUDA_NO_BFLOAT162_CONVERSIONS__", 24 | "--expt-relaxed-constexpr", 25 | "--expt-extended-lambda", 26 | "--use_fast_math", 27 | "--threads=8" 28 | ], 29 | } 30 | 31 | setup( 32 | name="kivi_gemv", 33 | packages=find_packages(), 34 | ext_modules=[ 35 | CUDAExtension( 36 | name="kivi_gemv", 37 | sources=[ 38 | "csrc/pybind.cpp", 39 | "csrc/gemv_cuda.cu" 40 | ], 41 | extra_compile_args=extra_compile_args, 42 | ), 43 | ], 44 | cmdclass={"build_ext": BuildExtension}, 45 | install_requires=["torch"], 46 | ) -------------------------------------------------------------------------------- /cuda_supported_gear/quant/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | os.environ["CUDA_VISIBLE_DEVICES"]="2" 4 | import numpy as np 5 | import random 6 | import ipdb 7 | import math 8 | import os 9 | import triton 10 | from new_pack import quant_and_pack_vcache, unpack_and_dequant_kcache, triton_quantize_and_pack_along_last_dim, unpack_and_dequant_vcache, quant_and_pack_kcache 11 | from matmul import triton_bmm_fA_qB_outer 12 | from timeit_v2 import py_benchmark 13 | 14 | 15 | def set_seed(seed): 16 | np.random.seed(seed) 17 | torch.random.manual_seed(seed) 18 | random.seed(seed) 19 | 20 | 21 | def test_vcache(): 22 | torch.manual_seed(0) 23 | np.random.seed(0) 24 | random.seed(0) 25 | B, nh, T, hd = 555, 32, 433, 128 26 | v = torch.randn((B, nh, T, hd), device='cuda', dtype=torch.float16) 27 | group_size = 64 28 | for bits in [2, 4, 8]: 29 | code, scale, mn = triton_quantize_and_pack_along_last_dim(v, group_size, bits) 30 | # print(f'bit {bits}, scale.shape: {scale.shape}') 31 | # print(f'bit {bits}, code.shape: {code.shape}') 32 | dequant_v = unpack_and_dequant_vcache(code, scale.unsqueeze(-1), mn.unsqueeze(-1), group_size, bits) 33 | assert not dequant_v.isnan().any() 34 | gap = (dequant_v - v) / v 35 | gap = torch.nan_to_num(gap) 36 | print(f'bit {bits}, mean v rel arr: {torch.mean(torch.abs(gap))}') 37 | 38 | 39 | def test_kcache(): 40 | torch.manual_seed(0) 41 | np.random.seed(0) 42 | random.seed(0) 43 | BS, nh, T, D = 11, 32, 4096, 128 44 | k = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16) 45 | group_size = 64 46 | for bits in [2, 4, 8]: 47 | code, scale, mn = triton_quantize_and_pack_along_last_dim(k.transpose(2, 3).contiguous(), 48 | group_size, 49 | bits) 50 | dequant_k = unpack_and_dequant_vcache(code, scale.unsqueeze(-1), mn.unsqueeze(-1), group_size, bits) 51 | assert not dequant_k.isnan().any() 52 | gap = (dequant_k.transpose(2, 3) - k) / k 53 | gap = torch.nan_to_num(gap) 54 | print(f'bit {bits}, k mean rel arr: {torch.mean(torch.abs(gap))}') 55 | 56 | 57 | def test_bmm_speed(): 58 | BS, nh, T, D = 64, 32, 512, 128 59 | bits = 2 60 | key_state = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16) 61 | val_state = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16) 62 | group_size = 64 63 | query_len = 1 64 | query_state = torch.randn((BS, nh, query_len, D), device='cuda', dtype=torch.float16) 65 | 66 | # quantiles = [0.5, 0.2, 0.8] 67 | # ms, min_ms, max_ms = triton.testing.do_bench( 68 | # lambda: triton_quantize_and_pack_along_last_dim(key_state.transpose(2,3).contiguous(), 69 | # group_size, bits), quantiles=quantiles) 70 | # print(f'batch size {BS} nh {nh} seqlen {T} quant and pack pytorch impl: {ms * 1000: .2f} ms') 71 | code, scale, mn = triton_quantize_and_pack_along_last_dim( 72 | key_state.transpose(2,3).contiguous(), group_size, bits) 73 | code = code.contiguous() 74 | scale = scale.contiguous() 75 | mn = mn.contiguous() 76 | 77 | stmt = "triton_quantize_and_pack_along_last_dim(key_state.transpose(2,3).contiguous(), group_size, bits)" 78 | t_triton_quant = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3, 79 | setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 80 | print(f'our triton quant & pack impl: {t_triton_quant * 1000} ms') 81 | stmt = "quant_and_pack_kcache(key_state, group_size, bits)" 82 | t_quant = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3, 83 | setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 84 | print(f'vanilla pytorch quant & pack impl: {t_quant * 1000} ms') 85 | stmt = 'triton_bmm_fA_qB_outer(group_size, query_state, code, scale, mn, bits)' 86 | t_qk = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3, 87 | setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 88 | print(f'batch size {BS} seqlen {T} our fused batch qk impl: {t_qk * 1000: .2f} ms') 89 | stmt = 'torch.matmul(query_state, key_state.transpose(2, 3))' 90 | t_qk_ref = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3, 91 | setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 92 | print(f'batch size {BS} seqlen {T} pytorch batch qk impl: {t_qk_ref * 1000: .2f} ms') 93 | attn_weight = torch.randn((BS, nh, query_len, T), device='cuda', dtype=torch.float16) 94 | code, scale, mn = triton_quantize_and_pack_along_last_dim( 95 | val_state, group_size, bits) 96 | stmt = 'triton_bmm_fA_qB_outer(group_size, attn_weight, code, scale, mn, bits)' 97 | t_av = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3, 98 | setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 99 | print(f'batch size {BS} seqlen {T} our fused batch av impl: {t_av * 1000: .2f} ms') 100 | stmt = 'torch.matmul(attn_weight, val_state)' 101 | t_av_ref = py_benchmark(stmt, {**globals(), **locals()}, min_repeat_second=3, 102 | setup="torch.cuda.synchronize()", finish="torch.cuda.synchronize()") 103 | print(f'batch size {BS} seqlen {T} pytorch batch av impl: {t_av_ref * 1000: .2f} ms') 104 | 105 | # _code, _scale, _mn = quant_and_pack_kcache( 106 | # key_state, group_size, bits) 107 | # _code = _code.transpose(2, 3) 108 | # _scale = _scale.squeeze(-2).transpose(2,3) 109 | # _mn = _mn.squeeze(-2).transpose(2,3) 110 | # print(_code.shape, code.shape, _code.dtype, code.dtype) 111 | # print(_scale.shape, scale.shape, _scale.dtype, scale.dtype) 112 | 113 | # our_out = triton_bmm_fA_qB_outer(group_size, query_state, code, scale, mn, bits) 114 | # ref_out = torch.matmul(query_state, key_state.transpose(2, 3)) 115 | # gap = (our_out - ref_out) / ref_out 116 | # gap = torch.nan_to_num(gap) 117 | # err = torch.mean(torch.abs(gap)).item() 118 | # print(f'bits {bits}, err: {err}') 119 | # ms, min_ms, max_ms = triton.testing.do_bench(lambda: triton_bmm_fA_qB_outer(group_size, query_state, code, scale, mn, bits), quantiles=quantiles) 120 | # print(f'batch size {BS} seqlen {T} our fused batch matmul impl: {ms * 1000: .2f} ms') 121 | # ms, min_ms, max_ms = triton.testing.do_bench(lambda: torch.matmul(query_state, key_state.transpose(2, 3)), quantiles=quantiles) 122 | # print(f'batch size {BS} seqlen {T} pytorch batch matmul impl: {ms * 1000: .2f} ms') 123 | 124 | 125 | def test_streaming_kvcache(): 126 | BS, nh, T, D = 1, 32, 340, 128 127 | our_attn_output = None 128 | group_size = 64 129 | query_len = 1 130 | bits = 2 131 | key_states = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16) 132 | value_states = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16) 133 | key_states_quant = key_states[:, :, :-(key_states.shape[-2] % group_size), :].contiguous() 134 | key_states_full = key_states[:, :, -(key_states.shape[-2] % group_size):, :].contiguous() 135 | value_states_quant, value_scale, value_mn = triton_quantize_and_pack_along_last_dim(value_states, 136 | group_size, 137 | bits) 138 | key_states_quant_trans, key_scale_trans, key_mn_trans = triton_quantize_and_pack_along_last_dim(key_states_quant.transpose(2, 3).contiguous(), 139 | group_size, bits) 140 | for i in range(16): 141 | if our_attn_output is None: 142 | query_states = torch.randn((BS, nh, query_len, D), device='cuda', dtype=torch.float16) 143 | else: 144 | query_states = our_attn_output 145 | key_states_new = torch.randn((BS, nh, query_len, D), device='cuda', dtype=torch.float16) 146 | value_states_new = torch.randn((BS, nh, query_len, D), device='cuda', dtype=torch.float16) 147 | att_qkquant = triton_bmm_fA_qB_outer(group_size, query_states, key_states_quant_trans, 148 | key_scale_trans, key_mn_trans, bits) 149 | key_states_full = torch.cat([key_states_full, key_states_new], dim=2) 150 | att_qkfull = torch.matmul(query_states, key_states_full.transpose(2, 3)) 151 | our_att_weights = torch.cat([att_qkquant, att_qkfull], dim=-1) / math.sqrt(D) 152 | our_att_weights = torch.softmax(our_att_weights, dim=-1) 153 | value_states_quant_new, scale, mn = triton_quantize_and_pack_along_last_dim(value_states_new, 154 | group_size, 155 | bits) 156 | value_states_quant = torch.cat([value_states_quant, value_states_quant_new], dim=2) 157 | value_scale = torch.cat([value_scale, scale], dim=2) 158 | value_mn = torch.cat([value_mn, mn], dim=2) 159 | our_attn_output = triton_bmm_fA_qB_outer(group_size, our_att_weights, value_states_quant, 160 | value_scale, value_mn, bits) 161 | # === 162 | key_states = torch.cat([key_states, key_states_new], dim=2) 163 | value_states = torch.cat([value_states, value_states_new], dim=2) 164 | ref_att_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(D) 165 | ref_att_weights = torch.softmax(ref_att_weights, dim=-1) 166 | ref_att_out = torch.matmul(ref_att_weights, value_states) 167 | att_weight_gap = (ref_att_weights - our_att_weights) / ref_att_weights 168 | print(f'i {i} bit {bits}, mean att weight rel arr: {torch.mean(torch.abs(att_weight_gap))}') 169 | att_out_gap = (ref_att_out - our_attn_output) / ref_att_out 170 | print(f'i {i} bit {bits}, mean att out rel arr: {torch.mean(torch.abs(att_out_gap))}') 171 | 172 | 173 | def test_4d_qmatmul(): 174 | torch.manual_seed(0) 175 | np.random.seed(0) 176 | random.seed(0) 177 | query_len = 1 178 | BS, nh, T, D = 16, 32, 1024, 128 179 | group_size = 64 180 | # k = torch.randn((BS, nh, T, D), device='cuda', dtype=torch.float16) 181 | # query_state = torch.randn((BS, nh, query_len, D), device='cuda', dtype=torch.float16) 182 | k = torch.randint(10, (BS, nh, T, D), device='cuda').to(torch.float16) 183 | query_state = torch.randint(5, (BS, nh, query_len, D), device='cuda').to(torch.float16) 184 | for bits in [8, 4, 2]: 185 | # code.shape == BS, nh, T // feat_per_int, D 186 | # scale, mn.shape == BS, nh, ng, 1, D 187 | code, scale, mn = quant_and_pack_kcache(k, group_size, bits) 188 | dequant_k = unpack_and_dequant_kcache(code, scale, mn, group_size, bits) 189 | # BS, nh, D, T // feat_per_int 190 | code = code.transpose(2, 3) 191 | # BS, nh, D, T // group_size 192 | scale = scale.view(BS, nh, -1, D).transpose(2, 3) 193 | mn = mn.view(BS, nh, -1, D).transpose(2, 3) 194 | our_out = triton_bmm_fA_qB_outer(group_size, query_state, code, scale, mn, bits) 195 | ref_out = torch.matmul(query_state, k.transpose(2, 3)) 196 | # ref_out = torch.matmul(query_state, k.transpose(2, 3)) 197 | assert not our_out.isnan().any() 198 | assert not ref_out.isnan().any() 199 | gap = (our_out - ref_out) / ref_out 200 | gap = torch.nan_to_num(gap) 201 | err = torch.mean(torch.abs(gap)).item() 202 | print(f'bits {bits}, err: {err}') 203 | 204 | 205 | if __name__ == '__main__': 206 | set_seed(114514) 207 | # test_kcache() 208 | # test_vcache() 209 | # test_4d_qmatmul() 210 | # test_streaming_kvcache() 211 | test_bmm_speed() -------------------------------------------------------------------------------- /cuda_supported_gear/quant/test1.py: -------------------------------------------------------------------------------- 1 | from new_pack import triton_quantize_and_pack_along_last_dim_witherror, triton_quantize_and_pack_along_last_dim 2 | import torch 3 | import time 4 | tensor = torch.rand([1,32,100,4096]).cuda() 5 | group = 128 6 | start = time.time() 7 | # result,scale,mn,error = triton_quantize_and_pack_along_last_dim_witherror(tensor,group,4) # 0.073s 8 | result,scale,mn, error = triton_quantize_and_pack_along_last_dim_witherror(tensor,group,4) # 0.067s 9 | end = time.time() - start 10 | error = error.reshape(1,32,100,4096) 11 | print(error.shape) 12 | -------------------------------------------------------------------------------- /cuda_supported_gear/quant/timeit_v2.py: -------------------------------------------------------------------------------- 1 | # timeit_v2.py: Copied from the default library with the following two modifiations 2 | # 1. Add 'finish' argument to timeit for calling cuda synchronization. 3 | # 2. Add accurate measurment utility function py_benchmark 4 | 5 | """Tool for measuring execution time of small code snippets. 6 | 7 | This module avoids a number of common traps for measuring execution 8 | times. See also Tim Peters' introduction to the Algorithms chapter in 9 | the Python Cookbook, published by O'Reilly. 10 | 11 | Library usage: see the Timer class. 12 | 13 | Command line usage: 14 | python timeit.py [-n N] [-r N] [-s S] [-p] [-h] [--] [statement] 15 | 16 | Options: 17 | -n/--number N: how many times to execute 'statement' (default: see below) 18 | -r/--repeat N: how many times to repeat the timer (default 5) 19 | -s/--setup S: statement to be executed once initially (default 'pass'). 20 | Execution time of this setup statement is NOT timed. 21 | -p/--process: use time.process_time() (default is time.perf_counter()) 22 | -v/--verbose: print raw timing results; repeat for more digits precision 23 | -u/--unit: set the output time unit (nsec, usec, msec, or sec) 24 | -h/--help: print this usage message and exit 25 | --: separate options from statement, use when statement starts with - 26 | statement: statement to be timed (default 'pass') 27 | 28 | A multi-line statement may be given by specifying each line as a 29 | separate argument; indented lines are possible by enclosing an 30 | argument in quotes and using leading spaces. Multiple -s options are 31 | treated similarly. 32 | 33 | If -n is not given, a suitable number of loops is calculated by trying 34 | successive powers of 10 until the total time is at least 0.2 seconds. 35 | 36 | Note: there is a certain baseline overhead associated with executing a 37 | pass statement. It differs between versions. The code here doesn't try 38 | to hide it, but you should be aware of it. The baseline overhead can be 39 | measured by invoking the program without arguments. 40 | 41 | Classes: 42 | 43 | Timer 44 | 45 | Functions: 46 | 47 | timeit(string, string) -> float 48 | repeat(string, string) -> list 49 | default_timer() -> float 50 | """ 51 | 52 | import gc 53 | import sys 54 | import time 55 | import itertools 56 | 57 | __all__ = ["Timer", "timeit", "repeat", "default_timer"] 58 | 59 | dummy_src_name = "" 60 | default_number = 1000000 61 | default_repeat = 5 62 | default_timer = time.perf_counter 63 | 64 | _globals = globals 65 | 66 | # Don't change the indentation of the template; the reindent() calls 67 | # in Timer.__init__() depend on setup being indented 4 spaces and stmt 68 | # being indented 8 spaces. 69 | template = """ 70 | def inner(_it, _timer{init}): 71 | {setup} 72 | _t0 = _timer() 73 | for _i in _it: 74 | {stmt} 75 | {finish} 76 | _t1 = _timer() 77 | return _t1 - _t0 78 | """ 79 | 80 | def reindent(src, indent): 81 | """Helper to reindent a multi-line statement.""" 82 | return src.replace("\n", "\n" + " "*indent) 83 | 84 | class Timer: 85 | """Class for timing execution speed of small code snippets. 86 | 87 | The constructor takes a statement to be timed, an additional 88 | statement used for setup, and a timer function. Both statements 89 | default to 'pass'; the timer function is platform-dependent (see 90 | module doc string). If 'globals' is specified, the code will be 91 | executed within that namespace (as opposed to inside timeit's 92 | namespace). 93 | 94 | To measure the execution time of the first statement, use the 95 | timeit() method. The repeat() method is a convenience to call 96 | timeit() multiple times and return a list of results. 97 | 98 | The statements may contain newlines, as long as they don't contain 99 | multi-line string literals. 100 | """ 101 | 102 | def __init__(self, stmt="pass", setup="pass", finish='pass', timer=default_timer, 103 | globals=None): 104 | """Constructor. See class doc string.""" 105 | self.timer = timer 106 | local_ns = {} 107 | global_ns = _globals() if globals is None else globals 108 | init = '' 109 | if isinstance(setup, str): 110 | # Check that the code can be compiled outside a function 111 | compile(setup, dummy_src_name, "exec") 112 | stmtprefix = setup + '\n' 113 | setup = reindent(setup, 4) 114 | elif callable(setup): 115 | local_ns['_setup'] = setup 116 | init += ', _setup=_setup' 117 | stmtprefix = '' 118 | setup = '_setup()' 119 | else: 120 | raise ValueError("setup is neither a string nor callable") 121 | if isinstance(stmt, str): 122 | # Check that the code can be compiled outside a function 123 | compile(stmtprefix + stmt, dummy_src_name, "exec") 124 | stmt = reindent(stmt, 8) 125 | elif callable(stmt): 126 | local_ns['_stmt'] = stmt 127 | init += ', _stmt=_stmt' 128 | stmt = '_stmt()' 129 | else: 130 | raise ValueError("stmt is neither a string nor callable") 131 | 132 | assert isinstance(finish, str) 133 | compile(setup + '\n' + stmt + '\n' + finish, dummy_src_name, 'exec') 134 | finish = reindent(finish, 4) 135 | 136 | src = template.format(stmt=stmt, setup=setup, init=init, finish=finish) 137 | self.src = src # Save for traceback display 138 | code = compile(src, dummy_src_name, "exec") 139 | exec(code, global_ns, local_ns) 140 | self.inner = local_ns["inner"] 141 | 142 | def print_exc(self, file=None): 143 | """Helper to print a traceback from the timed code. 144 | 145 | Typical use: 146 | 147 | t = Timer(...) # outside the try/except 148 | try: 149 | t.timeit(...) # or t.repeat(...) 150 | except: 151 | t.print_exc() 152 | 153 | The advantage over the standard traceback is that source lines 154 | in the compiled template will be displayed. 155 | 156 | The optional file argument directs where the traceback is 157 | sent; it defaults to sys.stderr. 158 | """ 159 | import linecache, traceback 160 | if self.src is not None: 161 | linecache.cache[dummy_src_name] = (len(self.src), 162 | None, 163 | self.src.split("\n"), 164 | dummy_src_name) 165 | # else the source is already stored somewhere else 166 | 167 | traceback.print_exc(file=file) 168 | 169 | def timeit(self, number=default_number): 170 | """Time 'number' executions of the main statement. 171 | 172 | To be precise, this executes the setup statement once, and 173 | then returns the time it takes to execute the main statement 174 | a number of times, as a float measured in seconds. The 175 | argument is the number of times through the loop, defaulting 176 | to one million. The main statement, the setup statement and 177 | the timer function to be used are passed to the constructor. 178 | """ 179 | it = itertools.repeat(None, number) 180 | gcold = gc.isenabled() 181 | gc.disable() 182 | try: 183 | timing = self.inner(it, self.timer) 184 | finally: 185 | if gcold: 186 | gc.enable() 187 | return timing 188 | 189 | def repeat(self, repeat=default_repeat, number=default_number): 190 | """Call timeit() a few times. 191 | 192 | This is a convenience function that calls the timeit() 193 | repeatedly, returning a list of results. The first argument 194 | specifies how many times to call timeit(), defaulting to 5; 195 | the second argument specifies the timer argument, defaulting 196 | to one million. 197 | 198 | Note: it's tempting to calculate mean and standard deviation 199 | from the result vector and report these. However, this is not 200 | very useful. In a typical case, the lowest value gives a 201 | lower bound for how fast your machine can run the given code 202 | snippet; higher values in the result vector are typically not 203 | caused by variability in Python's speed, but by other 204 | processes interfering with your timing accuracy. So the min() 205 | of the result is probably the only number you should be 206 | interested in. After that, you should look at the entire 207 | vector and apply common sense rather than statistics. 208 | """ 209 | r = [] 210 | for i in range(repeat): 211 | t = self.timeit(number) 212 | r.append(t) 213 | return r 214 | 215 | def autorange(self, callback=None): 216 | """Return the number of loops and time taken so that total time >= 0.2. 217 | 218 | Calls the timeit method with increasing numbers from the sequence 219 | 1, 2, 5, 10, 20, 50, ... until the time taken is at least 0.2 220 | second. Returns (number, time_taken). 221 | 222 | If *callback* is given and is not None, it will be called after 223 | each trial with two arguments: ``callback(number, time_taken)``. 224 | """ 225 | i = 1 226 | while True: 227 | for j in 1, 2, 5: 228 | number = i * j 229 | time_taken = self.timeit(number) 230 | if callback: 231 | callback(number, time_taken) 232 | if time_taken >= 0.2: 233 | return (number, time_taken) 234 | i *= 10 235 | 236 | def timeit(stmt="pass", setup="pass", finish='pass', timer=default_timer, 237 | number=default_number, globals=None): 238 | """Convenience function to create Timer object and call timeit method.""" 239 | return Timer(stmt, setup, finish, timer, globals).timeit(number) 240 | 241 | def repeat(stmt="pass", setup="pass", finish='pass', timer=default_timer, 242 | repeat=default_repeat, number=default_number, globals=None): 243 | """Convenience function to create Timer object and call repeat method.""" 244 | return Timer(stmt, setup, finish, timer, globals).repeat(repeat, number) 245 | 246 | def py_benchmark(stmt, context, min_repeat_second=1, setup='pass', finish='pass'): 247 | total_time = 0 248 | number = 10 249 | 250 | eval(stmt, context) # warmup 251 | total_time = timeit(stmt=stmt, setup=setup, finish=finish, number=number, globals=context) 252 | while total_time < min_repeat_second: 253 | number = int(number * (min_repeat_second / total_time)) + 1 254 | total_time = timeit(stmt=stmt, setup=setup, finish=finish, number=number, globals=context) 255 | 256 | return total_time / number 257 | -------------------------------------------------------------------------------- /cuda_supported_gear/test.py: -------------------------------------------------------------------------------- 1 | # 2 | from modeling_llamagear import LlamaForCausalLM_GEARKIVI 3 | from modeling_llama_kivi import LlamaForCausalLM_KIVI 4 | from transformers import LlamaConfig, AutoTokenizer, LlamaForCausalLM 5 | from transformers import BitsAndBytesConfig 6 | from datasets import load_dataset 7 | import torch 8 | import argparse 9 | 10 | 11 | #### Config for KIVI model 12 | config = LlamaConfig.from_pretrained("meta-llama/Llama-2-7b-hf") 13 | 14 | config.k_bits = 2# current support 2/4 bit for KV Cache 15 | config.v_bits = 2 # current support 2/4 bit for KV Cache 16 | config.group_size = 64 17 | config.residual_length = 64 # the number of recent fp16 tokens 18 | 19 | # quantization_config = BitsAndBytesConfig(load_in_8bit=True) 20 | parser = argparse.ArgumentParser(description="Evaluate AQuA Tasks") 21 | parser.add_argument("--batch_size", type=int, default=8, help="Batch size.") 22 | parser.add_argument("--model", type=str, default="meta-llama/Llama-2-7b", help="Model name or path.") 23 | args = parser.parse_args() 24 | 25 | max_token = 1000 ### prefill_length 26 | max_generation_length = 1500 ### geneate 500 27 | batch_size = args.batch_size 28 | 29 | ##### Config for 30 | compress_config = {} 31 | compress_config["compress_method"] = "gearlKIVI" # "gearlKIVI" "gearsKIVI" 32 | compress_config["group_size"] = 64 33 | compress_config["residual"] = 64 34 | compress_config["quantize_bit"] = 2 35 | compress_config["rank"] = 2 ## prefill rank 36 | compress_config["rankv"] = 2 ## prefill rank 37 | compress_config["loop"] = 3 38 | # compress_config["stream_list"] = stream_list 39 | stream_list = [torch.cuda.Stream(),torch.cuda.Stream()] 40 | 41 | if "gearl" in args.model: 42 | model = LlamaForCausalLM_GEARKIVI.from_pretrained( 43 | "meta-llama/Llama-2-7b-hf", 44 | config = config, 45 | # quantization_config = quantization_config, 46 | compress_config = compress_config, 47 | device_map = "cuda:0" 48 | ) 49 | elif "KIVI" in args.model: 50 | model = LlamaForCausalLM_KIVI.from_pretrained( 51 | "meta-llama/Llama-2-7b-hf", 52 | config = config, 53 | # quantization_config = quantization_config, 54 | # compress_config = compress_config, 55 | 56 | device_map = "cuda:0" 57 | ) 58 | elif "None" in args.model: 59 | model = LlamaForCausalLM.from_pretrained( 60 | "meta-llama/Llama-2-7b-hf", 61 | 62 | device_map = "cuda:0") 63 | model = model.half() 64 | 65 | 66 | 67 | 68 | 69 | tokenizer = AutoTokenizer.from_pretrained( 70 | 'meta-llama/Llama-2-7b-hf', 71 | model_max_length=max_token, 72 | max_length=max_token, 73 | use_fast=False, 74 | trust_remote_code=True, 75 | tokenizer_type='llama') 76 | tokenizer.pad_token = tokenizer.eos_token 77 | test = load_dataset("wikitext", "wikitext-2-raw-v1", split="train") 78 | text_combined = test["text"] 79 | 80 | sentence_group = [] 81 | for i in range(batch_size): 82 | # sentence_group.append(str(text_combined[i*max_token:(i+1)*max_token])) 83 | sentence_group.append(str(text_combined[0:max_token])) 84 | inputs = tokenizer( 85 | sentence_group, 86 | return_tensors="pt", 87 | padding="max_length", 88 | truncation=True, 89 | ) 90 | print("begin") 91 | inputs = inputs.to("cuda:0") 92 | print(inputs.input_ids.shape) 93 | import time 94 | 95 | start = time.time() 96 | result = model.generate(**inputs, max_length=max_generation_length, use_cache=True) 97 | torch.cuda.synchronize() 98 | end = time.time() 99 | peak_memory = torch.cuda.max_memory_allocated(device="cuda") / (1024**2) # 转换为MB单位 100 | 101 | print(f"Peak memory usage on GPU: {peak_memory} MB") 102 | print("time",end - start) 103 | # result = tokenizer.batch_decode(result, skip_special_tokens=True) 104 | # print(result) 105 | # model = model.cuda() 106 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==2.1.0 2 | accelerate==0.27.2 3 | aiohttp @ file:///rapids/aiohttp-3.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=5c4fa235d534b3547184831c624c0b7c1e262cd1de847d95085ec94c16fddcd5 4 | aiosignal @ file:///rapids/aiosignal-1.3.1-py3-none-any.whl#sha256=f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17 5 | annotated-types==0.6.0 6 | apex @ file:///opt/pytorch/apex 7 | argon2-cffi==23.1.0 8 | argon2-cffi-bindings==21.2.0 9 | asttokens==2.4.1 10 | astunparse==1.6.3 11 | async-timeout @ file:///rapids/async_timeout-4.0.3-py3-none-any.whl#sha256=7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028 12 | attrs==23.2.0 13 | audioread==3.0.1 14 | beautifulsoup4==4.12.3 15 | bleach==6.1.0 16 | blis==0.7.11 17 | cachetools==5.3.2 18 | catalogue==2.0.10 19 | certifi==2023.11.17 20 | cffi==1.16.0 21 | charset-normalizer==3.3.2 22 | click==8.1.7 23 | cloudpathlib==0.16.0 24 | cloudpickle @ file:///rapids/cloudpickle-3.0.0-py3-none-any.whl#sha256=246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7 25 | cmake==3.28.1 26 | comm==0.2.1 27 | confection==0.1.4 28 | contourpy==1.2.0 29 | cubinlinker @ file:///rapids/cubinlinker-0.3.0%2B2.ga3774f2-cp310-cp310-linux_aarch64.whl#sha256=87c9fac61fe6c6ea8c5b5130e9d5535aa2c7def2142163abc2985b40e1f14d18 30 | cuda-python @ file:///rapids/cuda_python-12.3.0rc4%2B9.ga6806e8.dirty-cp310-cp310-linux_aarch64.whl#sha256=12dd3a8380688d577c31c4ffe5d56ff59a4c13f85f9cb0afd5adb78efb45bd90 31 | cudf @ file:///rapids/cudf-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=6d3a328ababdb48a2a6bd18e0d6c22c0dae27fba45c8f0fc6acf5334eced8dd7 32 | cugraph @ file:///rapids/cugraph-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=c60964175a813e961ebc82863987130d89b3b92305ee4fe3ca2cef258f2c93fb 33 | cugraph-dgl @ file:///rapids/cugraph_dgl-23.12.0-py3-none-any.whl#sha256=d14b75aef4008764766150cb308501ffba0f6116a8042af82b9b0c93d0eb1164 34 | cugraph-service-client @ file:///rapids/cugraph_service_client-23.12.0-py3-none-any.whl#sha256=145febd98846e086b75a1c4802854aa0e45bb12930dea4d49386258459f085ae 35 | cugraph-service-server @ file:///rapids/cugraph_service_server-23.12.0-py3-none-any.whl#sha256=d0249296a61ee5c4156564f26d9319f23d9faad25ac83c9deb9f56e6a9f63751 36 | cuml @ file:///rapids/cuml-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=d0ef9d40fdbef555adc249717fd4a747ffe7f932d9b10b025bfcd279b83bbd91 37 | cupy-cuda12x @ file:///rapids/cupy_cuda12x-12.3.0-cp310-cp310-manylinux2014_aarch64.whl#sha256=77687b019383eb89da981638cfec6d5ba556a15f1be0945e8f7f80030b851d04 38 | cycler==0.12.1 39 | cymem==2.0.8 40 | Cython==3.0.8 41 | dask @ file:///rapids/dask-2023.11.0-py3-none-any.whl#sha256=b950951ee3f8c86f003b577b6928ecf20089eee6677719578deaba8fd9a78203 42 | dask-cuda @ file:///rapids/dask_cuda-23.12.0-py3-none-any.whl#sha256=c8dab311674c77cebe2831d311cdd3db72f605471120764001c3a1b1ccfe40f3 43 | dask-cudf @ file:///rapids/dask_cudf-23.12.0-py3-none-any.whl#sha256=5fe94040ecd22a53928b8977c4f5c780a0326dda5994f3ed9afcbe377fd67d27 44 | dataclasses-json==0.6.4 45 | datasets==2.18.0 46 | debugpy==1.8.0 47 | decorator==5.1.1 48 | defusedxml==0.7.1 49 | dill==0.3.8 50 | distributed @ file:///rapids/distributed-2023.11.0-py3-none-any.whl#sha256=44ad1fff31ece202cc64bdb72dd33d6964d78bdbe1ec1ec06e01f9544187cd2e 51 | dm-tree==0.1.8 52 | einops==0.7.0 53 | exceptiongroup==1.2.0 54 | execnet==2.0.2 55 | executing==2.0.1 56 | expecttest==0.1.3 57 | fastjsonschema==2.19.1 58 | fastrlock @ file:///rapids/fastrlock-0.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl#sha256=9121a894d74e65557e47e777060a495ab85f4b903e80dd73a3c940ba042920d7 59 | filelock==3.13.1 60 | flash-attn==2.0.4 61 | fonttools==4.47.2 62 | frozenlist @ file:///rapids/frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a 63 | fsspec==2023.12.2 64 | gast==0.5.4 65 | GEARLM==0.0.6 66 | google-auth==2.26.2 67 | google-auth-oauthlib==0.4.6 68 | graphsurgeon @ file:///workspace/TensorRT-8.6.1.6/graphsurgeon/graphsurgeon-0.4.6-py2.py3-none-any.whl#sha256=0fbadaefbbe6e9920b9f814ae961c4a279be602812edf3ed7fb9cc6f8f4809fe 69 | grpcio==1.60.0 70 | huggingface-hub==0.21.4 71 | hypothesis==5.35.1 72 | idna==3.6 73 | importlib-metadata @ file:///rapids/importlib_metadata-7.0.1-py3-none-any.whl#sha256=4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e 74 | iniconfig==2.0.0 75 | ipykernel==6.29.0 76 | ipython==8.20.0 77 | ipython-genutils==0.2.0 78 | jedi==0.19.1 79 | Jinja2==3.1.3 80 | joblib==1.3.2 81 | json5==0.9.14 82 | jsonschema==4.21.1 83 | jsonschema-specifications==2023.12.1 84 | jupyter-tensorboard @ git+https://github.com/cliffwoolley/jupyter_tensorboard.git@ffa7e26138b82549453306e06b535a9ac36db17a 85 | jupyter_client==8.6.0 86 | jupyter_core==5.7.1 87 | jupyterlab==2.3.2 88 | jupyterlab-server==1.2.0 89 | jupyterlab_pygments==0.3.0 90 | jupytext==1.16.1 91 | kiwisolver==1.4.5 92 | langcodes==3.3.0 93 | lazy_loader==0.3 94 | librosa==0.10.1 95 | llvmlite @ file:///rapids/llvmlite-0.40.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=bba2747cf5b4954e945c287fe310b3fcc484e2a9d1b0c273e99eb17d103bb0e6 96 | locket @ file:///rapids/locket-1.0.0-py2.py3-none-any.whl#sha256=b6c819a722f7b6bd955b80781788e4a66a55628b858d347536b7e81325a3a5e3 97 | Markdown==3.5.2 98 | markdown-it-py==3.0.0 99 | MarkupSafe==2.1.4 100 | marshmallow==3.21.1 101 | matplotlib==3.8.2 102 | matplotlib-inline==0.1.6 103 | mdit-py-plugins==0.4.0 104 | mdurl==0.1.2 105 | mistune==3.0.2 106 | mock==5.1.0 107 | mpmath==1.3.0 108 | msgpack==1.0.7 109 | multidict @ file:///rapids/multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b 110 | multiprocess==0.70.16 111 | murmurhash==1.0.10 112 | mypy-extensions==1.0.0 113 | nbclient==0.9.0 114 | nbconvert==7.14.2 115 | nbformat==5.9.2 116 | nest-asyncio==1.5.9 117 | networkx==2.6.3 118 | ninja==1.11.1.1 119 | notebook==6.4.10 120 | numba @ file:///rapids/numba-0.57.1%2B1.g29d216d31-cp310-cp310-linux_aarch64.whl#sha256=460ced73b5a210021b92e6851700dcdb478abb79927a2858e19824f7d573f4a1 121 | numpy==1.24.4 122 | nvfuser==0.1.1+gitunknown 123 | nvidia-dali-cuda120==1.33.0 124 | nvidia-pyindex==1.0.9 125 | nvtx @ file:///rapids/nvtx-0.2.5-cp310-cp310-linux_aarch64.whl#sha256=0d42158092675873d70f2951b416f3039b92e01a79e13cc9e09b7dd1b80b43a4 126 | oauthlib==3.2.2 127 | onnx @ file:///opt/pytorch/pytorch/third_party/onnx 128 | opencv @ file:///opencv-4.7.0/modules/python/package 129 | optree==0.10.0 130 | package-name==0.1 131 | packaging==23.2 132 | pandas @ file:///rapids/pandas-1.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354 133 | pandocfilters==1.5.1 134 | parso==0.8.3 135 | partd @ file:///rapids/partd-1.4.1-py3-none-any.whl#sha256=27e766663d36c161e2827aa3e28541c992f0b9527d3cca047e13fb3acdb989e6 136 | pexpect==4.9.0 137 | pillow @ file:///rapids/pillow-10.2.0-cp310-cp310-manylinux_2_28_aarch64.whl#sha256=47c0995fc4e7f79b5cfcab1fc437ff2890b770440f7696a3ba065ee0fd496563 138 | platformdirs==4.1.0 139 | pluggy==1.3.0 140 | ply @ file:///rapids/ply-3.11-py2.py3-none-any.whl#sha256=096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce 141 | polygraphy==0.49.1 142 | pooch==1.8.0 143 | preshed==3.0.9 144 | prettytable==3.9.0 145 | prometheus-client==0.19.0 146 | prompt-toolkit==3.0.43 147 | protobuf==4.24.4 148 | psutil @ file:///rapids/psutil-5.9.4-cp310-abi3-linux_aarch64.whl#sha256=356ff1edab701bc3b17bafa541512735ea6ae40c25bbc6df206d15307abbb299 149 | ptxcompiler @ file:///rapids/ptxcompiler-0.8.1%2B2.gf98b68a-cp310-cp310-linux_aarch64.whl#sha256=99c401500b705e327d79608a8bcaf4dbcb2aafd4e7443f874ece14beba476538 150 | ptyprocess==0.7.0 151 | pure-eval==0.2.2 152 | pyarrow @ file:///rapids/pyarrow-14.0.1.dev0%2Bgba5374836.d20240125-cp310-cp310-linux_aarch64.whl#sha256=8de1dfb159d5d9ff0220c46cda0f6251770951fd6e264d13ba10fb386d04e310 153 | pyarrow-hotfix==0.6 154 | pyasn1==0.5.1 155 | pyasn1-modules==0.3.0 156 | pybind11==2.11.1 157 | pybind11-global==2.11.1 158 | pycocotools @ git+https://github.com/nvidia/cocoapi.git@d99cbf3823588ef09a2721655f46e509ebafb3d7#subdirectory=PythonAPI 159 | pycparser==2.21 160 | pydantic==2.5.3 161 | pydantic_core==2.14.6 162 | Pygments==2.17.2 163 | pylibcugraph @ file:///rapids/pylibcugraph-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=8a9c83cbd56be292f856ca9c0541a3a86b596749ff58d63287da43b63e6c069e 164 | pylibcugraphops @ file:///rapids/pylibcugraphops-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=4ff3107253c13dae4873d1b83d5a640c1fab5556fad789a7b2999e321aa89b20 165 | pylibraft @ file:///rapids/pylibraft-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=a54f1e00fed773c5c85db1f59c87f42bee90de2d9f28694a0f8ec899f572e12c 166 | pynvml @ file:///rapids/pynvml-11.4.1-py3-none-any.whl#sha256=d27be542cd9d06558de18e2deffc8022ccd7355bc7382255d477038e7e424c6c 167 | pyparsing==3.1.1 168 | pytest==7.4.4 169 | pytest-flakefinder==1.1.0 170 | pytest-rerunfailures==13.0 171 | pytest-shard==0.1.2 172 | pytest-xdist==3.5.0 173 | python-dateutil==2.8.2 174 | python-hostlist==1.23.0 175 | pytorch-quantization==2.1.2 176 | pytz @ file:///rapids/pytz-2023.3.post1-py2.py3-none-any.whl#sha256=ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7 177 | PyYAML==6.0.1 178 | pyzmq==25.1.2 179 | raft-dask @ file:///rapids/raft_dask-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=32d9955c2aeb569b06d4bdc20ee80f9d5f4f3640acb92b20ff99fb281074d858 180 | rapids-dask-dependency @ file:///rapids/rapids_dask_dependency-23.12.1-py3-none-any.whl#sha256=2abfe15415711bad9dfe9e83d4bfbd039e9436d66cc17e74ae22c85ab9afe46b 181 | referencing==0.32.1 182 | regex==2023.12.25 183 | requests==2.31.0 184 | requests-oauthlib==1.3.1 185 | rich @ file:///rapids/rich-13.7.0-py3-none-any.whl#sha256=6da14c108c4866ee9520bbffa71f6fe3962e193b7da68720583850cd4548e235 186 | rmm @ file:///rapids/rmm-23.12.0-cp310-cp310-linux_aarch64.whl#sha256=0ec2d0a669a6963218e6fed76e3694e6bd7d034a28ddf72a885fdc3a90f2e458 187 | rpds-py==0.17.1 188 | rsa==4.9 189 | safetensors==0.4.2 190 | scikit-learn @ file:///rapids/scikit_learn-1.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=fd3480c982b9e616b9f76ad8587804d3f4e91b4e2a6752e7dafb8a2e1f541098 191 | scipy @ file:///rapids/scipy-1.12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=e53958531a7c695ff66c2e7bb7b79560ffdc562e2051644c5576c39ff8efb563 192 | Send2Trash==1.8.2 193 | sentencepiece==0.2.0 194 | six==1.16.0 195 | smart-open==6.4.0 196 | sortedcontainers==2.4.0 197 | soundfile==0.12.1 198 | soupsieve==2.5 199 | soxr==0.3.7 200 | spacy==3.7.2 201 | spacy-legacy==3.0.12 202 | spacy-loggers==1.0.5 203 | sphinx-glpi-theme==0.5 204 | srsly==2.4.8 205 | stack-data==0.6.3 206 | sympy==1.12 207 | tabulate==0.9.0 208 | tblib @ file:///rapids/tblib-3.0.0-py3-none-any.whl#sha256=80a6c77e59b55e83911e1e607c649836a69c103963c5f28a46cbeef44acf8129 209 | tensorboard==2.9.0 210 | tensorboard-data-server==0.6.1 211 | tensorboard-plugin-wit==1.8.1 212 | tensorrt @ file:///workspace/TensorRT-8.6.1.6/python/tensorrt-8.6.1-cp310-none-linux_aarch64.whl#sha256=05b7e51b4574c1ec7151362b011c0a203fca5dfd0561cb613d10fc982cd3fa13 213 | terminado==0.18.0 214 | thinc==8.2.2 215 | threadpoolctl==3.2.0 216 | thriftpy2 @ file:///rapids/thriftpy2-0.4.17-cp310-cp310-linux_aarch64.whl#sha256=4774861c7953b22ddd059b077fe9d82ad56c56bb5fdd60a5cd87e3d6f9b00372 217 | tinycss2==1.2.1 218 | tokenizers==0.15.2 219 | toml==0.10.2 220 | tomli==2.0.1 221 | toolz @ file:///rapids/toolz-0.12.1-py3-none-any.whl#sha256=d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85 222 | torch @ file:///tmp/pip/torch-2.2.0a0%2B81ea7a4-cp310-cp310-linux_aarch64.whl#sha256=cb9192fa0aaa81c2482e00306ea6b72a737a6ac88eed770f8a03432039549aee 223 | torch-tensorrt @ file:///opt/pytorch/torch_tensorrt/dist/torch_tensorrt-2.2.0a0-cp310-cp310-linux_aarch64.whl#sha256=5cd5236ff477b7aaffee2bf52842ead45e60dea8cb27b7e057bce8260dfad0a9 224 | torchdata @ file:///opt/pytorch/data 225 | torchtext @ file:///opt/pytorch/text 226 | torchvision @ file:///opt/pytorch/vision 227 | tornado==6.4 228 | tqdm==4.66.1 229 | traitlets==5.9.0 230 | transformer-engine @ git+https://github.com/NVIDIA/TransformerEngine.git@bbafb02097e6ca1605c3c0cad84d59dbbcb6e94b 231 | transformers==4.38.2 232 | treelite @ file:///rapids/treelite-3.9.1-cp310-cp310-linux_aarch64.whl#sha256=a96dc21cd0e44265204244bbb78b311f3d1f87174050492f9de6b30ec06aaf9c 233 | treelite-runtime @ file:///rapids/treelite_runtime-3.9.1-cp310-cp310-linux_aarch64.whl#sha256=af380fad937d5b8e43a0fb6ce9fbb09c7515f70f05916a26869a7af50c9b2bb4 234 | typer==0.9.0 235 | types-dataclasses==0.6.6 236 | typing-inspect==0.9.0 237 | typing_extensions==4.9.0 238 | ucx-py @ file:///rapids/ucx_py-0.35.0-cp310-cp310-linux_aarch64.whl#sha256=b07db3ce3a03966c49155641b5c66dfd3574d6144a0f3e12c17c15f108c4c09d 239 | uff @ file:///workspace/TensorRT-8.6.1.6/uff/uff-0.6.9-py2.py3-none-any.whl#sha256=618a3f812d491f0d3c4f2e38b99e03217ca37b206db14cee079f2bf681eb4fe3 240 | urllib3 @ file:///rapids/urllib3-1.26.18-py2.py3-none-any.whl#sha256=34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07 241 | wasabi==1.1.2 242 | wcwidth==0.2.13 243 | weasel==0.3.4 244 | webencodings==0.5.1 245 | Werkzeug==3.0.1 246 | xdoctest==1.0.2 247 | xgboost @ file:///rapids/xgboost-1.7.6-cp310-cp310-linux_aarch64.whl#sha256=c89be2ffeff9c49dae24a1dc7fc1ca5c83ec9f40ab249e4a2743dab068afaa47 248 | xxhash==3.4.1 249 | yarl @ file:///rapids/yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl#sha256=d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234 250 | zict @ file:///rapids/zict-3.0.0-py2.py3-none-any.whl#sha256=5796e36bd0e0cc8cf0fbc1ace6a68912611c1dbd74750a3f3026b9b9d6a327ae 251 | zipp @ file:///rapids/zipp-3.17.0-py3-none-any.whl#sha256=0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31 252 | --------------------------------------------------------------------------------