├── 0_Sagemaker_Setup.ipynb ├── 1_Run_Benchmark.ipynb ├── 2_Parse_Results.ipynb ├── 3_Advanced_Usage.ipynb ├── 4_Advanced_GUI_Benchmark_Client.ipynb ├── 5_LaunchRunpod.ipynb ├── LICENSE ├── Proprietary Model Evaluations ├── Bard Eval.ipynb ├── BingChat Eval.ipynb ├── ClaudePlus Eval.ipynb ├── ClaudeReg Eval.ipynb ├── GPT Eval.ipynb └── GPT4 Eval.ipynb ├── README.md ├── benchmark_manager.py ├── benchmark_utils.py ├── jsonl_examples ├── Aeala_VicUnlocked-alpaca-65b-4bit_ALPACA_MD_RECALC.jsonl ├── Aeala_VicUnlocked-alpaca-65b-4bit_ALPACA_MEDIUM_RECALC.jsonl ├── Aeala_VicUnlocked-alpaca-65b-4bit_ALPACA_MEMSPLIT.jsonl ├── Bard_custom.jsonl ├── Claude_Plus_custom_md.jsonl ├── Claude_custom.jsonl ├── GeorgiaTechResearchInstitute_starcoder-gpteacher-code-instruct_ALPACA_INPUT_LONG_FIXED.jsonl ├── GeorgiaTechResearchInstitute_starcoder-gpteacher-code-instruct_ALPACA_INPUT_MARKDOWN.jsonl ├── GeorgiaTechResearchInstitute_starcoder-gpteacher-code-instruct_ALPACA_INPUT_MEDIUM.jsonl ├── TheBloke_Manticore-13B-GPTQ_ALPACA_LONG_FIXED.jsonl ├── TheBloke_Manticore-13B-GPTQ_ALPACA_MARKDOWN.jsonl ├── TheBloke_Manticore-13B-GPTQ_ALPACA_MEDIUM.jsonl ├── TheBloke_Manticore-13B-GPTQ_VICUNA_LONG.jsonl ├── TheBloke_Nous-Hermes-13B-GPTQ_ALPACA_LONG_FIXED.jsonl ├── TheBloke_Nous-Hermes-13B-GPTQ_ALPACA_MARKDOWN.jsonl ├── TheBloke_Nous-Hermes-13B-GPTQ_ALPACA_MEDIUM.jsonl ├── TheBloke_OpenAssistant-SFT-7-Llama-30B-GPTQ_OPENA_.jsonl ├── TheBloke_VicUnlocked-30B-LoRA-GPTQ_ALPACA_LONG_FIXED.jsonl ├── TheBloke_VicUnlocked-30B-LoRA-GPTQ_ALPACA_MARKDOWN.jsonl ├── TheBloke_VicUnlocked-30B-LoRA-GPTQ_ALPACA_MEDIUM.jsonl ├── TheBloke_VicUnlocked-30B-LoRA-GPTQ_VICUNA_LONG.jsonl ├── TheBloke_VicUnlocked-30B-LoRA-GPTQ_VICUNA_VERYSHORT.jsonl ├── TheBloke_Wizard-Vicuna-13B-Uncensored-GPTQ_VICUNA_MARKDOWN.jsonl ├── TheBloke_Wizard-Vicuna-13B-Uncensored-GPTQ_VICUNA_MEDIUM.jsonl ├── TheBloke_Wizard-Vicuna-30B-Uncensored-GPTQ_VICUNA_MARKDOWN.jsonl ├── TheBloke_Wizard-Vicuna-30B-Uncensored-GPTQ_VICUNA_MEDIUM.jsonl ├── TheBloke_Wizard-Vicuna-7B-Uncensored-GPTQ_VICUNA_MARKDOWN.jsonl ├── TheBloke_Wizard-Vicuna-7B-Uncensored-GPTQ_VICUNA_MEDIUM.jsonl ├── TheBloke_WizardLM-30B-GPTQ_VICUNA_DEBUG_LONG.jsonl ├── TheBloke_WizardLM-30B-GPTQ_VICUNA_DEBUG_LONG_OLD_PARSER_BAD_NL_HIGHTEMP.jsonl ├── TheBloke_WizardLM-30B-GPTQ_VICUNA_MARKDOWN.jsonl ├── TheBloke_WizardLM-30B-GPTQ_VICUNA_MEDIUM.jsonl ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_ALPACA_DEBUG_LONG.jsonl ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_ALPACA_DEBUG_LONG_OLD_PARSER_BAD_NL_HIGHTEMP.jsonl ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_ALPACA_LONG_FIXED.jsonl ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_ALPACA_MARKDOWN.jsonl ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_ALPACA_MEDIUM.jsonl ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_VICUNA_LONG.jsonl ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_VICUNA_VERYSHORT.jsonl ├── TheBloke_falcon-7b-instruct-GPTQ_FALCON_.jsonl ├── TheBloke_guanaco-13B-GPTQ_ALPACA_LONG_FIXED.jsonl ├── TheBloke_guanaco-13B-GPTQ_ALPACA_MARKDOWN.jsonl ├── TheBloke_guanaco-13B-GPTQ_ALPACA_MEDIUM.jsonl ├── TheBloke_guanaco-33B-GPTQ_ALPACA_LONG_FIXED.jsonl ├── TheBloke_guanaco-33B-GPTQ_ALPACA_MARKDOWN.jsonl ├── TheBloke_guanaco-33B-GPTQ_ALPACA_MEDIUM.jsonl ├── TheBloke_guanaco-65B-GPTQ_ALPACA_.jsonl ├── TheBloke_guanaco-65B-GPTQ_ALPACA_MD_RECALC.jsonl ├── TheBloke_guanaco-65B-GPTQ_SHORT_.jsonl ├── TheBloke_guanaco-65B-GPTQ_VERYSHORT_.jsonl ├── TheBloke_guanaco-7B-GPTQ_ALPACA_MARKDOWN.jsonl ├── TheBloke_guanaco-7B-GPTQ_ALPACA_MEDIUM.jsonl ├── TheBloke_open-llama-7b-open-instruct-GPTQ_ALPACA_PREFIX_openllama.jsonl ├── TheBloke_starcoderplus-GPTQ_ALPACA_INPUT_MEDIUM_STARPLUS_5.jsonl ├── TheBloke_starcoderplus-GPTQ_ALPACA_INPUT_STARPLUS_2.jsonl ├── TheBloke_starcoderplus-GPTQ_ALPACA_MEDIUM_STARPLUS_4.jsonl ├── TheBloke_starcoderplus-GPTQ_ALPACA_PREFIX1_MEDIUM_STARPLUS_6.jsonl ├── TheBloke_starcoderplus-GPTQ_ALPACA_PREFIX1_STARPLUS_3.jsonl ├── TheBloke_starcoderplus-GPTQ_ALPACA_STARPLUS_1.jsonl ├── TheBloke_starcoderplus-GPTQ_SHORT_STARPLUS_7.jsonl ├── TheBloke_starcoderplus-GPTQ_VERYSHORT_STARPLUS_8.jsonl ├── TheBloke_vicuna-13B-1.1-GPTQ-4bit-128g_VICUNA_MARKDOWN.jsonl ├── TheBloke_vicuna-13B-1.1-GPTQ-4bit-128g_VICUNA_MEDIUM.jsonl ├── TheBloke_vicuna-7B-GPTQ-4bit-128g_VICUNA_MARKDOWN.jsonl ├── TheBloke_vicuna-7B-GPTQ-4bit-128g_VICUNA_MEDIUM.jsonl ├── TheBloke_wizard-vicuna-13B-GPTQ_VICUNA_MARKDOWN.jsonl ├── TheBloke_wizard-vicuna-13B-GPTQ_VICUNA_MEDIUM.jsonl ├── TheBloke_wizardLM-13B-1.0-GPTQ_ALPACA_DEBUG_LONG.jsonl ├── TheBloke_wizardLM-13B-1.0-GPTQ_ALPACA_DEBUG_LONG_OLD_PARSER_BAD_NL_HIGHTEMP.jsonl ├── TheBloke_wizardLM-13B-1.0-GPTQ_ALPACA_LONG_FIXED.jsonl ├── TheBloke_wizardLM-13B-1.0-GPTQ_ALPACA_MARKDOWN.jsonl ├── TheBloke_wizardLM-13B-1.0-GPTQ_ALPACA_MEDIUM.jsonl ├── TheBloke_wizardLM-7B-GPTQ_ALPACA_DEBUG_LONG.jsonl ├── TheBloke_wizardLM-7B-GPTQ_ALPACA_DEBUG_LONG_OLD_PARSER_BAD_NL_HIGHTEMP.jsonl ├── TheBloke_wizardLM-7B-GPTQ_ALPACA_LONG_FIXED.jsonl ├── TheBloke_wizardLM-7B-GPTQ_ALPACA_MARKDOWN.jsonl ├── TheBloke_wizardLM-7B-GPTQ_ALPACA_MEDIUM.jsonl ├── anon8231489123_gpt4-x-alpaca-13b-native-4bit-128g_ALPACA_LONG_FIXED.jsonl ├── anon8231489123_gpt4-x-alpaca-13b-native-4bit-128g_ALPACA_MARKDOWN.jsonl ├── anon8231489123_gpt4-x-alpaca-13b-native-4bit-128g_ALPACA_MEDIUM.jsonl ├── gpt3_final_500c_long.jsonl ├── gpt4_final_500c_long.jsonl ├── h2oai_h2ogpt-gm-oasst1-en-2048-falcon-7b-v2_H2O_h2o.jsonl ├── mindrage_Manticore-13B-Chat-Pyg-Guanaco-GPTQ-4bit-128g.no-act-order.safetensors_ALPACA_LONG_FIXED.jsonl ├── mindrage_Manticore-13B-Chat-Pyg-Guanaco-GPTQ-4bit-128g.no-act-order.safetensors_ALPACA_MARKDOWN.jsonl ├── mindrage_Manticore-13B-Chat-Pyg-Guanaco-GPTQ-4bit-128g.no-act-order.safetensors_ALPACA_MEDIUM.jsonl ├── sahil2801_instruct-codegen-16B_ALPACA_PREFIX1_LONG_FIXED.jsonl ├── sahil2801_instruct-codegen-16B_ALPACA_PREFIX1_MARKDOWN.jsonl ├── sahil2801_instruct-codegen-16B_ALPACA_PREFIX1_MEDIUM.jsonl ├── tiiuae_falcon-40b-instruct_FALCON_FALCON_FULL.jsonl ├── togethercomputer_RedPajama-INCITE-7B-Instruct_Q_A_MARKDOWN.jsonl ├── togethercomputer_RedPajama-INCITE-7B-Instruct_Q_A_MEDIUM.jsonl ├── togethercomputer_RedPajama-INCITE-Instruct-3B-v1_Q_A_MARKDOWN.jsonl └── togethercomputer_RedPajama-INCITE-Instruct-3B-v1_Q_A_MEDIUM.jsonl ├── launch_pod.py ├── prompt_formats.txt └── templates.py /0_Sagemaker_Setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "f06f277f-971b-4d35-a06f-49ec81df18aa", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "# !git clone https://github.com/oobabooga/text-generation-webui" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "1878dabf-e204-4ebb-9692-4126dab0eed8", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "# !git clone https://github.com/PanQiWei/AutoGPTQ.git" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "id": "0bf898e2-8fe4-4a30-a98d-1ce868e991f9", 29 | "metadata": { 30 | "tags": [] 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "%%bash\n", 35 | "\n", 36 | "sudo mv /usr/bin/g++ /usr/bin/g++_old\n", 37 | "sudo mv /usr/bin/gcc /usr/bin/gcc_old\n", 38 | "sudo ln -s /usr/bin/gcc10-cc /usr/bin/gcc\n", 39 | "sudo ln -s /usr/bin/gcc10-c++ /usr/bin/g++" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "6d935db7-8656-4117-b9fb-2073ab236358", 46 | "metadata": { 47 | "tags": [] 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "!sudo yum -y remove gcc-c++" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "id": "15294776-1d0d-4150-a355-a8f6404b3dee", 58 | "metadata": { 59 | "tags": [] 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "!sudo yum -y install gcc-c++" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "id": "1907ae21-09a7-460f-ae17-91b258ecd14e", 70 | "metadata": { 71 | "tags": [] 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "%%writefile text-generation-webui/requirements.txt\n", 76 | "colorama\n", 77 | "datasets\n", 78 | "einops\n", 79 | "flexgen==0.1.7\n", 80 | "gradio_client==0.2.5\n", 81 | "gradio==3.31.0\n", 82 | "markdown\n", 83 | "numpy\n", 84 | "pandas\n", 85 | "Pillow>=9.5.0\n", 86 | "pyyaml\n", 87 | "requests\n", 88 | "safetensors==0.3.1\n", 89 | "sentencepiece\n", 90 | "tqdm\n", 91 | "scipy\n", 92 | "git+https://github.com/huggingface/peft@3714aa2fff158fdfa637b2b65952580801d890b2\n", 93 | "git+https://github.com/huggingface/transformers@e45e756d22206ca8fa9fb057c8c3d8fa79bf81c6\n", 94 | "git+https://github.com/huggingface/accelerate@0226f750257b3bf2cadc4f189f9eef0c764a0467" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "id": "d17702d0-b1a3-422d-9974-10c33d70fec7", 101 | "metadata": { 102 | "scrolled": true, 103 | "tags": [] 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "!cd text-generation-webui && pip install -r requirements.txt" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "id": "c43a3975-07e5-4183-872d-21630ec9d88d", 114 | "metadata": { 115 | "tags": [] 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "%%writefile GPTQ-for-LLaMa/requirements.txt\n", 120 | "safetensors==0.3.1\n", 121 | "datasets==2.10.1\n", 122 | "sentencepiece\n", 123 | "git+https://github.com/huggingface/transformers\n", 124 | "accelerate==0.19.0" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "id": "df3ddb38-d864-4b63-ab07-cf9baade08cd", 131 | "metadata": { 132 | "tags": [] 133 | }, 134 | "outputs": [], 135 | "source": [ 136 | "!pip uninstall -y llama-cpp-python" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "id": "f5bb1d0b-9e6f-41ad-b653-05e58a1ebfb9", 143 | "metadata": { 144 | "scrolled": true, 145 | "tags": [] 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "!CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install -v llama-cpp-python --no-cache-dir" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "id": "56447029-7497-402b-8aad-c729537179e8", 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "!cd AutoGPTQ && pip install ." 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "id": "d7b2d613-f243-44eb-b0e1-a186f61507ed", 166 | "metadata": { 167 | "scrolled": true, 168 | "tags": [] 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "!rm -rf GPTQ-for-LLaMa && git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa \n", 173 | "!cd GPTQ-for-LLaMa && git checkout cuda\n", 174 | "!cd GPTQ-for-LLaMa && pip install -r requirements.txt" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "id": "c8ea6e35-61f9-4557-b0a9-a1bc24287414", 181 | "metadata": { 182 | "scrolled": true, 183 | "tags": [] 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "!rm -rf GPTQ-for-LLaMa && git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa \n", 188 | "!cd GPTQ-for-LLaMa && git checkout cuda\n", 189 | "!cd GPTQ-for-LLaMa && python setup_cuda.py install" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "id": "3f125711-b7c7-4c4a-9546-1b1ea395ced3", 196 | "metadata": { 197 | "tags": [] 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "!pip freeze | grep torch" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "id": "961b8d23-c59f-40f0-9523-fed73b46334a", 208 | "metadata": { 209 | "tags": [] 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "!pip install --upgrade evalplus torch>=2.0.0" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "id": "3ed48bc9-6559-40c1-a3f9-ceed0bd13c67", 220 | "metadata": { 221 | "tags": [] 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "!cp -r GPTQ-for-LLaMa/* text-generation-webui/" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "fb1157d4-5420-4533-be98-5e0e07a7dfa9", 232 | "metadata": { 233 | "scrolled": true, 234 | "tags": [] 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "!python -m torch.utils.collect_env" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "id": "0c8f7ec9-8c1c-4dac-be52-f1baaa072bf5", 245 | "metadata": { 246 | "scrolled": true, 247 | "tags": [] 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "!cd text-generation-webui && python server.py --share --api --trust-remote-code" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "id": "72c47abb-10b1-43a5-b11e-691b0e114ad5", 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [] 261 | } 262 | ], 263 | "metadata": { 264 | "kernelspec": { 265 | "display_name": "conda_pytorch_p39", 266 | "language": "python", 267 | "name": "conda_pytorch_p39" 268 | }, 269 | "language_info": { 270 | "codemirror_mode": { 271 | "name": "ipython", 272 | "version": 3 273 | }, 274 | "file_extension": ".py", 275 | "mimetype": "text/x-python", 276 | "name": "python", 277 | "nbconvert_exporter": "python", 278 | "pygments_lexer": "ipython3", 279 | "version": "3.9.15" 280 | } 281 | }, 282 | "nbformat": 4, 283 | "nbformat_minor": 5 284 | } 285 | -------------------------------------------------------------------------------- /1_Run_Benchmark.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "7146c2a6-89e7-49b6-8aca-dd7592361fda", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "from benchmark_manager import run_benchmark_workflow" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "7dfed66f-9100-478b-a67f-493daca1fe68", 19 | "metadata": { 20 | "tags": [] 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "# !cd text-generation-webui && python server.py --model-menu" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "id": "77d1e871-71bb-419e-8293-dd90d50f7d5b", 31 | "metadata": { 32 | "scrolled": true, 33 | "tags": [] 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "run_benchmark_workflow(\"Aeala_VicUnlocked-alpaca-65b-4bit\", 6666, group_size=128,\n", 38 | " prompt_type=\"long\", user_tag=\"### Instruction:\", \n", 39 | " assistant_tag=\"### Response:\", system_prefix=\"\", experiment_tag=\"alpaca\")" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "d7d9fb60-2463-4016-bc49-3ae882d4a378", 46 | "metadata": { 47 | "tags": [] 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "run_benchmark_workflow(\"Aeala_VicUnlocked-alpaca-65b-4bit\", 6666, group_size=128,\n", 52 | " prompt_type=\"long\", user_tag=\"USER:\", \n", 53 | " assistant_tag=\"ASSISTANT:\", system_prefix=\"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\", experiment_tag=\"vicuna\")" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "id": "192fb97e-b4e4-4ed8-8fc6-24ac22e4a768", 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [] 63 | } 64 | ], 65 | "metadata": { 66 | "kernelspec": { 67 | "display_name": "conda_pytorch_p39", 68 | "language": "python", 69 | "name": "conda_pytorch_p39" 70 | }, 71 | "language_info": { 72 | "codemirror_mode": { 73 | "name": "ipython", 74 | "version": 3 75 | }, 76 | "file_extension": ".py", 77 | "mimetype": "text/x-python", 78 | "name": "python", 79 | "nbconvert_exporter": "python", 80 | "pygments_lexer": "ipython3", 81 | "version": "3.9.15" 82 | } 83 | }, 84 | "nbformat": 4, 85 | "nbformat_minor": 5 86 | } 87 | -------------------------------------------------------------------------------- /3_Advanced_Usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "7dfed66f-9100-478b-a67f-493daca1fe68", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "# !cd text-generation-webui && python server.py --model-menu" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "id": "77d1e871-71bb-419e-8293-dd90d50f7d5b", 19 | "metadata": { 20 | "scrolled": true, 21 | "tags": [] 22 | }, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "Starting server...\n", 29 | "INFO:\u001b[32mLoading TheBloke_vicuna-7B-1.1-GPTQ-4bit-128g...\u001b[0m\n", 30 | "CUDA extension not installed.\n", 31 | "INFO:\u001b[32mFound the following quantized model: models/TheBloke_vicuna-7B-1.1-GPTQ-4bit-128g/vicuna-7B-1.1-GPTQ-4bit-128g.safetensors\u001b[0m\n", 32 | "INFO:\u001b[32mLoaded the model in 2.90 seconds.\n", 33 | "\u001b[0m\n", 34 | "Starting streaming server at ws://127.0.0.1:6667/api/v1/stream\n", 35 | "Starting API at http://127.0.0.1:6666/api\n", 36 | "Server started!\n", 37 | "127.0.0.1 - - [05/Jun/2023 00:51:37] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 38 | "Running on local URL: http://127.0.0.1:6668\n", 39 | "\n", 40 | "To create a public link, set `share=True` in `launch()`.\n", 41 | "Output generated in 22.05 seconds (2.54 tokens/s, 56 tokens, context 244, seed 0)\n", 42 | "127.0.0.1 - - [05/Jun/2023 00:52:00] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 43 | "Output generated in 113.19 seconds (2.60 tokens/s, 294 tokens, context 248, seed 1)\n", 44 | "127.0.0.1 - - [05/Jun/2023 00:53:53] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 45 | "Output generated in 29.99 seconds (2.60 tokens/s, 78 tokens, context 204, seed 2)\n", 46 | "127.0.0.1 - - [05/Jun/2023 00:54:23] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 47 | "Output generated in 16.93 seconds (2.60 tokens/s, 44 tokens, context 237, seed 3)\n", 48 | "127.0.0.1 - - [05/Jun/2023 00:54:40] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 49 | "Output generated in 49.77 seconds (2.59 tokens/s, 129 tokens, context 240, seed 4)\n", 50 | "127.0.0.1 - - [05/Jun/2023 00:55:30] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 51 | "Output generated in 42.37 seconds (2.60 tokens/s, 110 tokens, context 215, seed 5)\n", 52 | "127.0.0.1 - - [05/Jun/2023 00:56:13] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 53 | "Output generated in 20.01 seconds (2.60 tokens/s, 52 tokens, context 239, seed 6)\n", 54 | "127.0.0.1 - - [05/Jun/2023 00:56:33] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 55 | "Output generated in 12.32 seconds (2.60 tokens/s, 32 tokens, context 215, seed 7)\n", 56 | "127.0.0.1 - - [05/Jun/2023 00:56:45] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 57 | "Output generated in 29.22 seconds (2.60 tokens/s, 76 tokens, context 235, seed 8)\n", 58 | "127.0.0.1 - - [05/Jun/2023 00:57:14] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 59 | "Output generated in 41.18 seconds (2.60 tokens/s, 107 tokens, context 216, seed 9)\n", 60 | "127.0.0.1 - - [05/Jun/2023 00:57:56] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 61 | "Output generated in 164.00 seconds (2.59 tokens/s, 425 tokens, context 289, seed 10)\n", 62 | "127.0.0.1 - - [05/Jun/2023 01:00:40] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 63 | "Output generated in 29.64 seconds (2.60 tokens/s, 77 tokens, context 200, seed 11)\n", 64 | "127.0.0.1 - - [05/Jun/2023 01:01:10] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 65 | "Output generated in 43.85 seconds (2.60 tokens/s, 114 tokens, context 217, seed 12)\n", 66 | "127.0.0.1 - - [05/Jun/2023 01:01:54] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 67 | "Output generated in 24.22 seconds (2.60 tokens/s, 63 tokens, context 194, seed 13)\n", 68 | "127.0.0.1 - - [05/Jun/2023 01:02:18] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 69 | "Output generated in 14.99 seconds (2.60 tokens/s, 39 tokens, context 175, seed 14)\n", 70 | "127.0.0.1 - - [05/Jun/2023 01:02:33] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 71 | "Output generated in 12.31 seconds (2.60 tokens/s, 32 tokens, context 188, seed 15)\n", 72 | "127.0.0.1 - - [05/Jun/2023 01:02:46] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 73 | "Output generated in 18.45 seconds (2.60 tokens/s, 48 tokens, context 195, seed 16)\n", 74 | "127.0.0.1 - - [05/Jun/2023 01:03:04] \"POST /api/v1/generate HTTP/1.1\" 200 -\n", 75 | "Traceback (most recent call last):\n", 76 | " File \"/home/ec2-user/SageMaker/text-generation-webui/server.py\", line 1118, in \n", 77 | " time.sleep(0.5)\n", 78 | "KeyboardInterrupt\n", 79 | "terminate called without an active exception\n" 80 | ] 81 | }, 82 | { 83 | "ename": "KeyboardInterrupt", 84 | "evalue": "", 85 | "output_type": "error", 86 | "traceback": [ 87 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 88 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 89 | "\u001b[0;32m/tmp/ipykernel_1862/98509441.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mbenchmark_manager\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrun_benchmark_workflow\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m run_benchmark_workflow(\"TheBloke_vicuna-7B-1.1-GPTQ-4bit-128g\", 6666, group_size=128,\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprompt_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"long\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muser_tag\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"USER:\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m assistant_tag=\"ASSISTANT:\", system_prefix=\"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\", experiment_tag=\"vicuna\")\n", 90 | "\u001b[0;32m~/SageMaker/benchmark_manager.py\u001b[0m in \u001b[0;36mrun_benchmark_workflow\u001b[0;34m(model_name, portnum, group_size, maxnum, prompt_type, user_tag, assistant_tag, system_prefix, experiment_tag, working_directory)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0;31m# Run the benchmark\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0mrun_benchmark\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"_\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mexperiment_tag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmaxnum\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mportnum\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprompt_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muser_tag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0massistant_tag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msystem_prefix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;31m# Once the benchmark has finished running, terminate the server process\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 91 | "\u001b[0;32m~/SageMaker/benchmark_utils.py\u001b[0m in \u001b[0;36mrun_benchmark\u001b[0;34m(filename, maxnum, port, prompt_type, user_tag, assistant_tag, system_prefix, custom_completion)\u001b[0m\n\u001b[1;32m 106\u001b[0m params = {\n\u001b[1;32m 107\u001b[0m \u001b[0;34m'task_id'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtask_id\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m 'completion': custom_completion(\n\u001b[0m\u001b[1;32m 109\u001b[0m \u001b[0mproblems\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtask_id\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"prompt\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 92 | "\u001b[0;32m~/SageMaker/benchmark_utils.py\u001b[0m in \u001b[0;36mgenerate_one_completion\u001b[0;34m(prompt_code, seed, port, prompt_type, user_tag, assistant_tag, system_prefix)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0msuffix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'def'\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mprompt_code\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"def\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"(\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"(\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0mprompt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenerate_prompt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt_code\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msuffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprompt_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muser_tag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0massistant_tag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msystem_prefix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 88\u001b[0;31m \u001b[0mcode_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mport\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mport\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 89\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mget_function_body\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcut_off_prefix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcode_result\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"```python\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 93 | "\u001b[0;32m~/SageMaker/benchmark_utils.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(prompt, seed, port)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0mURI\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'http://{HOST}:{port}/api/v1/generate'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;31m# Send the request and return the response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mURI\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 50\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mprompt\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'results'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus_code\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m200\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 94 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/api.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \"\"\"\n\u001b[1;32m 114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 115\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"post\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 95 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/api.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 96 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 585\u001b[0m }\n\u001b[1;32m 586\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 587\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 588\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 97 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 743\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 744\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mstream\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 745\u001b[0;31m \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 746\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 747\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 98 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/models.py\u001b[0m in \u001b[0;36mcontent\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 897\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_content\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 899\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_content\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mb\"\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miter_content\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCONTENT_CHUNK_SIZE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34mb\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 900\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 901\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_content_consumed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 99 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/models.py\u001b[0m in \u001b[0;36mgenerate\u001b[0;34m()\u001b[0m\n\u001b[1;32m 814\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"stream\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 815\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 816\u001b[0;31m \u001b[0;32myield\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunk_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 817\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mProtocolError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 818\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mChunkedEncodingError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 100 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/urllib3/response.py\u001b[0m in \u001b[0;36mstream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m 574\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 575\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_fp_closed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 576\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mamt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mamt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_content\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 577\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 578\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 101 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/urllib3/response.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m 517\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 518\u001b[0m \u001b[0mcache_content\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 519\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mamt\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfp_closed\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34mb\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 520\u001b[0m if (\n\u001b[1;32m 521\u001b[0m \u001b[0mamt\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 102 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/http/client.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m 461\u001b[0m \u001b[0;31m# Amount is given, implement using readinto\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 462\u001b[0m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbytearray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mamt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 463\u001b[0;31m \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadinto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 464\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmemoryview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtobytes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 465\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 103 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/http/client.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 505\u001b[0m \u001b[0;31m# connection, and the user is reading more bytes than will be provided\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 506\u001b[0m \u001b[0;31m# (for example, reading in 1k chunks)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 507\u001b[0;31m \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadinto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 508\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 509\u001b[0m \u001b[0;31m# Ideally, we would raise IncompleteRead if the content-length\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 104 | "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 702\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 703\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 704\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 705\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 706\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 105 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "from benchmark_manager import run_benchmark_workflow, run\n", 111 | "run_benchmark_workflow(\"TheBloke_vicuna-7B-1.1-GPTQ-4bit-128g\", 6666, group_size=128,\n", 112 | " prompt_type=\"long\", user_tag=\"USER:\", \n", 113 | " assistant_tag=\"ASSISTANT:\", system_prefix=\"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\", experiment_tag=\"vicuna\")" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "id": "12b698d4-8b57-4f5f-bd3d-56c74b095e13", 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "run_benchmark_workflow(\"TheBloke_wizardLM-7B-GPTQ\", 6666, group_size=128,\n", 124 | " prompt_type=\"long\", user_tag=\"USER:\", \n", 125 | " assistant_tag=\"ASSISTANT:\", system_prefix=\"\", experiment_tag=\"alpaca\")" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "id": "d7ff04e5-8c9c-4600-ba61-bd1098773e4f", 132 | "metadata": { 133 | "scrolled": true, 134 | "tags": [] 135 | }, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "Starting server...\n", 142 | "INFO:\u001b[32mLoading TheBloke_wizardLM-7B-GPTQ...\u001b[0m\n", 143 | "WARNING:\u001b[33mMore than one .safetensors model has been found. The last one will be selected. It could be wrong.\u001b[0m\n", 144 | "INFO:\u001b[32mFound the following quantized model: models/TheBloke_wizardLM-7B-GPTQ/wizardLM-7B-GPTQ-4bit.latest.act-order.safetensors\u001b[0m\n", 145 | "INFO:\u001b[32mLoaded the model in 2.96 seconds.\n", 146 | "\u001b[0m\n", 147 | "Starting streaming server at ws://127.0.0.1:6667/api/v1/stream\n", 148 | "Starting API at http://127.0.0.1:6666/api\n", 149 | "Server started!\n", 150 | "Results will be written to: results/TheBloke_wizardLM-7B-GPTQ_custom.jsonl\n", 151 | "Processing Task 0 of 164\n", 152 | "def has_close_elements(numbers: List[float], threshold: float) -> bool:\n", 153 | " \"\"\" Check if in given list of numbers, are any two numbers closer to each other than\n", 154 | " given threshold.\n", 155 | " :param numbers: List[float]\n", 156 | " :param threshold: float\n", 157 | " :return: bool\n", 158 | " \"\"\"\n", 159 | " return len(set(numbers)) < 2 or abs(sum(numbers) - threshold) <= 1\n", 160 | "Processing Task 1 of 164\n", 161 | "def separate_paren_groups(paren_string: str) -> List[str]:\n", 162 | " # Define a helper function to check if two brackets match\n", 163 | " def are_brackets_matching(*args):\n", 164 | " for i in range(len(args)):\n", 165 | " if args[i]!= args[i+1]:\n", 166 | " return False\n", 167 | " return True\n", 168 | " # Split the input string into lists of brackets based on their depth\n", 169 | " depth = 0\n", 170 | " result = []\n", 171 | " for char in paren_string:\n", 172 | " if char == '(':\n", 173 | " depth += 1\n", 174 | " elif char == ')':\n", 175 | " depth -= 1\n", 176 | " elif depth > 0:\n", 177 | " result.append(char)\n", 178 | " else:\n", 179 | " result.extend([ch for ch in paren_string if ch not in'()'])\n", 180 | " # Recursively split each sublist of brackets into individual strings\n", 181 | " for i in range(len(result)-1):\n", 182 | " if len(result[i]) > 1:\n", 183 | " result[i], result[i+1] = result[i+1], result[i]\n", 184 | " # Join the resulting lists of brackets into a single list of strings\n", 185 | " return ['. '.join(lst) for lst in result]\n", 186 | "Processing Task 2 of 164\n", 187 | "def truncate_number(number: float) -> float:\n", 188 | " \"\"\" Given a positive floating point number, it can be decomposed into\n", 189 | " and integer part (largest integer smaller than given number) and decimals\n", 190 | " (leftover part always smaller than 1).\n", 191 | " Return the decimal part of the number.\n", 192 | " >>> truncate_number(3.5)\n", 193 | " 0.5\n", 194 | " \"\"\"\n", 195 | " return number - int(round(number))\n", 196 | "Processing Task 3 of 164\n", 197 | "def below_zero(operations: T.List[int]) -> T.Union[bool, None]:\n", 198 | " # Initialize a variable to keep track of whether the balance fell below zero or not\n", 199 | " did_fall_below_zero = False\n", 200 | " for operation in operations:\n", 201 | " # Check if the current operation is a withdrawal operation\n", 202 | " if operation < 0:\n", 203 | " did_fall_below_zero = True\n", 204 | " break\n", 205 | " # Return True if the balance fell below zero, False otherwise\n", 206 | " return did_fall_below_zero\n", 207 | "Processing Task 4 of 164\n", 208 | "def mean_absolute_deviation(numbers: typing.List[float]) -> float:\n", 209 | " \"\"\" Calculate Mean Absolute Deviation for a given list of input numbers.\n", 210 | " See documentation for details.\n", 211 | " Returns:\n", 212 | " The calculated value of Mean Absolute Deviation.\n", 213 | " \"\"\"\n", 214 | " # Initialize variables for calculating MAD\n", 215 | " num_elements = len(numbers)\n", 216 | " sum_of_squares = sum([num * num for num in numbers])\n", 217 | " sum_of_absolutes = sum([abs(num) for num in numbers])\n", 218 | " mean_value = sum_of_absolutes / num_elements\n", 219 | " mad_value = math.sqrt(sum_of_squares - ((sum_of_absolutes ** 2) / num_elements))\n", 220 | " return mad_value\n", 221 | "Processing Task 5 of 164\n", 222 | "def intersperse(numbers: List[int], delimeter: int) -> List[int]:\n", 223 | " \"\"\"Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\n", 224 | " >>> intersperse([], 4)\n", 225 | " []\n", 226 | " >>> intersperse([1, 2, 3], 4)\n", 227 | " [1, 4, 2, 4, 3]\n", 228 | " \"\"\"\n", 229 | "Processing Task 6 of 164\n" 230 | ] 231 | } 232 | ], 233 | "source": [ 234 | "# Make sure to pip install evalplus\n", 235 | "\n", 236 | "import os, signal\n", 237 | "from benchmark_utils import run_benchmark, run, extract_code\n", 238 | "from benchmark_manager import start_server\n", 239 | "\n", 240 | "model_name = \"TheBloke_wizardLM-7B-GPTQ\"\n", 241 | "portnum = 6666\n", 242 | "group_size=128\n", 243 | "\n", 244 | "server_process = start_server(model_name, portnum, group_size=group_size, \n", 245 | " working_directory='text-generation-webui') # Make sure server.py is in working_directory\n", 246 | "\n", 247 | "def my_completion(code, **kwargs):\n", 248 | " prompt = \"Complete this code:\\n%s\\nASSISTANT:\" % code\n", 249 | " results = extract_code(run(prompt, port=kwargs[\"port\"]))\n", 250 | " print(results)\n", 251 | " return results\n", 252 | "\n", 253 | "run_benchmark(model_name, port=portnum, custom_completion=my_completion, prompt_type = \"custom\")\n", 254 | "\n", 255 | "os.kill(server_process.pid, signal.SIGTERM)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "id": "f8bd1a5e-f349-46a4-b0c1-449c493536cb", 262 | "metadata": { 263 | "tags": [] 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "import subprocess\n", 268 | "\n", 269 | "filename = \"results/TheBloke_wizardLM-7B-GPTQ_custom.jsonl\"\n", 270 | "\n", 271 | "result = subprocess.run([\"sudo\", \"/home/ec2-user/anaconda3/envs/pytorch_p39/bin/evalplus.evaluate\",\n", 272 | " \"--dataset\", \"humaneval\", \"--samples\", filename, \"--i-just-wanna-run\"], \n", 273 | " text=True, capture_output=True, check=False)\n", 274 | "\n", 275 | "print(result.stdout, \"\\n\", result.stderr)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 2, 281 | "id": "9dd23386-f2fc-4d4a-85eb-a89fd554aaa0", 282 | "metadata": { 283 | "tags": [] 284 | }, 285 | "outputs": [], 286 | "source": [ 287 | "os.kill(server_process.pid, signal.SIGTERM)" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "id": "a35b5a37-cd8c-4390-a32c-539dc88cf900", 293 | "metadata": {}, 294 | "source": [ 295 | "You can also start a server, configure and load a model via the GUI, then benchmark against it (in a separate process or Jupyter notebook - see the advanced benchmark client notebook)" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "id": "7169fe61-9490-46ca-a4c0-b35a6bb3a024", 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "from benchmark_manager import start_server, block_log_server\n", 306 | "\n", 307 | "PORT = 6000 # remember this for the other notebook\n", 308 | "\n", 309 | "server_process = start_server(None, portnum=PORT, group_size=None, wbits=None,\n", 310 | " working_directory='text-generation-webui', public=True)\n", 311 | "block_log_server(server_process)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "id": "985db790-f822-4557-8f99-c1032a3cb202", 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [] 321 | } 322 | ], 323 | "metadata": { 324 | "kernelspec": { 325 | "display_name": "conda_pytorch_p39", 326 | "language": "python", 327 | "name": "conda_pytorch_p39" 328 | }, 329 | "language_info": { 330 | "codemirror_mode": { 331 | "name": "ipython", 332 | "version": 3 333 | }, 334 | "file_extension": ".py", 335 | "mimetype": "text/x-python", 336 | "name": "python", 337 | "nbconvert_exporter": "python", 338 | "pygments_lexer": "ipython3", 339 | "version": "3.9.15" 340 | } 341 | }, 342 | "nbformat": 4, 343 | "nbformat_minor": 5 344 | } 345 | -------------------------------------------------------------------------------- /4_Advanced_GUI_Benchmark_Client.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "7146c2a6-89e7-49b6-8aca-dd7592361fda", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "from benchmark_manager import start_server, run_benchmark_workflow\n", 13 | "from benchmark_utils import run_benchmark" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 3, 19 | "id": "7960dc96-8bca-4553-8094-e2892f6d93d3", 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "PORT = 6000 # Same port as GUI API is running on\n", 24 | "\n", 25 | "VICUNA = {\n", 26 | " 'user_tag':\"USER:\", \n", 27 | " 'assistant_tag':\"ASSISTANT:\", \n", 28 | " 'system_prefix':\"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\", \n", 29 | " 'experiment_tag':\"vicuna\"\n", 30 | "}\n", 31 | "\n", 32 | "ALPACA = {\n", 33 | " 'user_tag':\"### Instruction:\", \n", 34 | " 'assistant_tag':\"### Response:\", \n", 35 | " 'system_prefix':\"\", \n", 36 | " 'experiment_tag':\"alpaca\"\n", 37 | "}\n", 38 | "\n", 39 | "ALPACA_PREFIX = {\n", 40 | " 'user_tag':\"### Instruction:\", \n", 41 | " 'assistant_tag':\"### Response:\", \n", 42 | " 'system_prefix':\"Below is an instruction that describes a task.\\n Write a response that appropriately completes the request.\\n\", \n", 43 | " 'experiment_tag':\"alpaca_prefix\"\n", 44 | "}" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "id": "192fb97e-b4e4-4ed8-8fc6-24ac22e4a768", 51 | "metadata": { 52 | "tags": [] 53 | }, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "Results will be written to: results/Salesforce_instructcodet5p-16b_long_alpaca.jsonl\n", 60 | "Processing Task 0 of 164\n", 61 | "def has_close_elements(\"numbers: List[float], threshold: float) -> bool:\n", 62 | " \"\"\"Check if in given list of numbers, are any two numbers closer than given threshold.\n", 63 | " >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n", 64 | " False\n", 65 | " >>> has_close_elements([1.0, 2.8, 3.0], 0.5)\n", 66 | " True\n", 67 | " >>> has_close_elements([1.0, 2.8, 3.0], 0.3)\n", 68 | " False\n", 69 | " \"\"\"\n", 70 | " for i in range(len(numbers)):\n", 71 | " for j in range(i + 1, len(numbers)):\n", 72 | " if abs(numbers[i] - numbers[j]) < threshold:\n", 73 | " return True\n", 74 | " return False\n", 75 | "\n", 76 | "\n", 77 | "Processing Task 1 of 164\n", 78 | "def separate_paren_groups(\"paren_string: \")\n", 79 | "Processing Task 2 of 164\n", 80 | "def truncate_number(\"number\": float(number),\n", 81 | " }\n", 82 | " return float(number)\n", 83 | "Processing Task 3 of 164\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "run_benchmark(\"Salesforce_instructcodet5p-16b\", port=PORT, prompt_type=\"long\", **ALPACA)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "id": "7a347ae4-b008-479f-a502-eb2f262776ff", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [] 98 | } 99 | ], 100 | "metadata": { 101 | "kernelspec": { 102 | "display_name": "conda_pytorch_p39", 103 | "language": "python", 104 | "name": "conda_pytorch_p39" 105 | }, 106 | "language_info": { 107 | "codemirror_mode": { 108 | "name": "ipython", 109 | "version": 3 110 | }, 111 | "file_extension": ".py", 112 | "mimetype": "text/x-python", 113 | "name": "python", 114 | "nbconvert_exporter": "python", 115 | "pygments_lexer": "ipython3", 116 | "version": "3.9.15" 117 | } 118 | }, 119 | "nbformat": 4, 120 | "nbformat_minor": 5 121 | } 122 | -------------------------------------------------------------------------------- /5_LaunchRunpod.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "5eba32bb-854a-4230-9cd1-2f509fa17bbd", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "from launch_pod import launch_webui, stop_and_terminate_pod, remove_safetensors, pod_benchmark, killall_pods\n", 13 | "from templates import *\n", 14 | "from multiprocessing import Process\n", 15 | "import os, time" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "id": "55cd2e0f-3cdf-417c-bf11-e7bdf09c5161", 22 | "metadata": { 23 | "tags": [] 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "API_KEY = 'YOUR KEY HERE'\n", 28 | "file_tag = \"whatever_you_want\"" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": null, 34 | "id": "e84085ea-ad32-41c5-aa04-fa2672583845", 35 | "metadata": { 36 | "tags": [] 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "models = {\n", 41 | " \"TheBloke/wizardLM-7B-GPTQ\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n", 42 | " \"TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ\": {\"template\": \"VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n", 43 | " \"TheBloke/vicuna-7B-GPTQ-4bit-128g\": {\"template\": \"VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 44 | " \"TheBloke/guanaco-7B-GPTQ\": {\"template\": \"ALPACA/SHORT\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n", 45 | " \"TheBloke/vicuna-13B-1.1-GPTQ-4bit-128g\": {\"template\": \"VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 46 | " \"TheBloke/wizardLM-13B-1.0-GPTQ\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 47 | " \"TheBloke/guanaco-13B-GPTQ\": {\"template\": \"ALPACA/SHORT\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 48 | " \"TheBloke/Manticore-13B-GPTQ\": {\"template\": \"ALPACA/VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 49 | " \"mindrage/Manticore-13B-Chat-Pyg-Guanaco-GPTQ-4bit-128g.no-act-order.safetensors\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 50 | " \"TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ\": {\"template\": \"VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 51 | " \"TheBloke/VicUnlocked-30B-LoRA-GPTQ\": {\"template\": \"ALPACA/VICUNA\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 52 | " \"TheBloke/WizardLM-30B-Uncensored-GPTQ\": {\"template\": \"ALPACA/VICUNA\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 53 | " \"TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ\": {\"template\": \"VICUNA\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 54 | " \"TheBloke/guanaco-33B-GPTQ\": {\"template\": \"ALPACA/SHORT\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 55 | " \"TheBloke/guanaco-65B-GPTQ\": {\"template\": \"ALPACA/SHORT\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA A100-SXM4-80GB\", \"additional_cmd\": \"\"},\n", 56 | " \"Aeala/VicUnlocked-alpaca-65b-4bit\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"2xNVIDIA A100-SXM4-80GB\", \"additional_cmd\": \"\"},\n", 57 | "}\n", 58 | "additional_models = {\n", 59 | " \"togethercomputer/RedPajama-INCITE-Instruct-3B-v1\": {\"template\": \"Q_A\", \"groupsize\": \"\", \"wbits\": \"\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n", 60 | " \"togethercomputer/RedPajama-INCITE-7B-Instruct\": {\"template\": \"Q_A\", \"groupsize\": \"\", \"wbits\": \"\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n", 61 | " \"TheBloke/falcon-7b-instruct-GPTQ\": {\"template\": \"FALCON\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n", 62 | " \"anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 63 | " \"TheBloke/Nous-Hermes-13B-GPTQ\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 64 | " \"TheBloke/wizard-vicuna-13B-GPTQ\": {\"template\": \"VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 65 | " \"GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct\": {\"template\": \"ALPACA_INPUT\", \"groupsize\": \"\", \"wbits\": \"\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 66 | " \"sahil2801/instruct-codegen-16B\": {\"template\": \"ALPACA_PREFIX1\", \"groupsize\": \"\", \"wbits\": \"\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 67 | " \"TheBloke/OpenAssistant-SFT-7-Llama-30B-GPTQ\": {\"template\": \"OPENA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 68 | " \"TheBloke/WizardLM-30B-GPTQ\": {\"template\": \"VICUNA\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n", 69 | " \"TheBloke/falcon-40b-instruct-GPTQ\": {\"template\": \"FALCON\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA A100-SXM4-80GB\", \"additional_cmd\": \"\"},\n", 70 | "}\n", 71 | "\n", 72 | "combined_models = {**models, **additional_models}\n", 73 | "\n", 74 | "template_map = {\"ALPACA\" : ALPACA_TEMPLATE, \n", 75 | " \"VICUNA\" : VICUNA_TEMPLATE,\n", 76 | " \"ALPACA_INPUT\" : ALPACA_INPUT_TEMPLATE,\n", 77 | " \"OPENA\" : OPENASSISTANT_TEMPLATE,\n", 78 | " \"FALCON\" : FALCON_TEMPLATE,\n", 79 | " \"Q_A\" : QA_TEMPLATE,\n", 80 | " \"ALPACA_PREFIX1\" : ALPACA_PREFIX_TEMPLATE,\n", 81 | " \"ALPACA_MEDIUM\" : ALPACA_TEMPLATE_MEDIUM, \n", 82 | " \"ALPACA_INPUT_MEDIUM\" : ALPACA_INPUT_TEMPLATE_MEDIUM,\n", 83 | " \"ALPACA_PREFIX1_MEDIUM\" : ALPACA_PREFIX_TEMPLATE_MEDIUM,\n", 84 | " \"SHORT\" : SHORT_TEMPLATE,\n", 85 | " \"VERYSHORT\" : VERYSHORT_TEMPLATE\n", 86 | " }" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "id": "dbd10d9a-303c-4f18-a3c4-c66a252ec215", 93 | "metadata": { 94 | "tags": [] 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "import os\n", 99 | "import sys\n", 100 | "import contextlib\n", 101 | "\n", 102 | "def process_model(model, file_tag=\"\"):\n", 103 | " model_file = model.replace(\"/\", \"_\")\n", 104 | " pod_tag = model_file\n", 105 | "\n", 106 | " if not os.path.exists(\"logs\"):\n", 107 | " os.mkdir(\"logs\")\n", 108 | "\n", 109 | " log_filename = f\"logs/log_{model_file}_{file_tag}.txt\" # Define your log file name here\n", 110 | " print(\"Starting\", model)\n", 111 | " # Redirect standard output and error to the log file\n", 112 | " with open(log_filename, 'w') as f, contextlib.redirect_stdout(f), contextlib.redirect_stderr(f):\n", 113 | " gpu_type = combined_models[model][\"GPU\"]\n", 114 | " groupsize = combined_models[model][\"groupsize\"]\n", 115 | " wbits = combined_models[model][\"wbits\"]\n", 116 | " gptq_params = \"\"\n", 117 | "\n", 118 | " if groupsize:\n", 119 | " gptq_params += f\"--groupsize {groupsize} \"\n", 120 | " if wbits:\n", 121 | " gptq_params += f\"--wbits {wbits} \"\n", 122 | "\n", 123 | " templ = combined_models[model][\"template\"]\n", 124 | " if \"/\" in templ:\n", 125 | " templ = templ.split(\"/\")[0]\n", 126 | " prompt = template_map[templ]\n", 127 | " print(pod_tag, gpu_type, gptq_params,\"*** PROMPT ***\", prompt,\"*** *** ***\", sep=\"\\n\")\n", 128 | "\n", 129 | " api_url, model_file, pod_id = launch_webui(api_key=API_KEY, \n", 130 | " model=model, \n", 131 | " gptq_params=gptq_params, \n", 132 | " gpu_type=gpu_type,\n", 133 | " pod_num=pod_tag)\n", 134 | "\n", 135 | "\n", 136 | " print(\"starting benchmark...\")\n", 137 | " pod_benchmark(model_file + \"_\" + templ + \"_\" + file_tag, prompt, assistant_tag=prompt[-15:], pod_id=pod_id, api_key=API_KEY, start_from=0, host=api_url, port=443, insert_func_stub=True)\n", 138 | "\n", 139 | " stop_and_terminate_pod(pod_id, API_KEY)\n", 140 | "\n", 141 | " # Close the log files\n", 142 | " sys.stdout.close()\n", 143 | " sys.stderr.close()\n", 144 | " print(\"Finished\", model)\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "id": "8434f398-497c-4234-aa79-09807866c986", 151 | "metadata": { 152 | "tags": [] 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "# to_run = [f for f in sorted(combined_models.keys()) if \"7B\" in f][:4]\n", 157 | "to_run = combined_models.keys()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "id": "6dd713b7-81ed-4c80-9d1c-759d1e539bd9", 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "import concurrent.futures\n", 168 | "import contextlib\n", 169 | "import os\n", 170 | "\n", 171 | "MAX_WORKERS = 8\n", 172 | "\n", 173 | "from multiprocessing import Pool\n", 174 | "\n", 175 | "def worker(model):\n", 176 | " return process_model(model, file_tag)\n", 177 | "\n", 178 | "pool = Pool(processes=MAX_WORKERS)\n", 179 | "\n", 180 | "for model in to_run:\n", 181 | " pool.apply_async(worker, (model,))\n", 182 | "\n", 183 | "pool.close()\n", 184 | "pool.join()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "id": "7ff5c221-7250-4ead-b17e-3f7a4262b3c9", 191 | "metadata": { 192 | "tags": [] 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "killall_pods(API_KEY)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "id": "a5f79168-309e-420d-bcad-2c49197d443b", 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [] 206 | } 207 | ], 208 | "metadata": { 209 | "kernelspec": { 210 | "display_name": "conda_pytorch_p310", 211 | "language": "python", 212 | "name": "conda_pytorch_p310" 213 | }, 214 | "language_info": { 215 | "codemirror_mode": { 216 | "name": "ipython", 217 | "version": 3 218 | }, 219 | "file_extension": ".py", 220 | "mimetype": "text/x-python", 221 | "name": "python", 222 | "nbconvert_exporter": "python", 223 | "pygments_lexer": "ipython3", 224 | "version": "3.10.10" 225 | } 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 5 229 | } 230 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 my-other-github-account 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Proprietary Model Evaluations/ClaudeReg Eval.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "9cd96fcb-08b3-4845-9083-306e825f6b98", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "# !pip install poe-api" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "id": "b8c9a7b1-76e5-4dba-be3c-65a7f0776fd5", 19 | "metadata": { 20 | "tags": [] 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import poe\n", 25 | "client = poe.Client(\"\")" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "id": "80e1e50a-866a-444f-a8c4-95214ce292ab", 32 | "metadata": { 33 | "scrolled": true, 34 | "tags": [] 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import os, time\n", 39 | "from benchmark_utils import run_benchmark, run, extract_code\n", 40 | "\n", 41 | "model_name = \"Claude\"\n", 42 | "\n", 43 | "def my_completion_cplus(code, **kwargs):\n", 44 | " prompt = \"Please complete the following code:\\n```python\\n%s```\" % code\n", 45 | " while True:\n", 46 | " try:\n", 47 | " for chunk in client.send_message(\"a2\", prompt, with_chat_break = True):\n", 48 | " pass\n", 49 | " answer = chunk[\"text\"]\n", 50 | " break\n", 51 | " except:\n", 52 | " print(\"trying again in 30s!\")\n", 53 | " time.sleep(30)\n", 54 | " try:\n", 55 | " client = poe.Client(\"\")\n", 56 | " except:\n", 57 | " pass\n", 58 | " print(\"trying now!\")\n", 59 | " # print(\"***\", answer, \"***\")\n", 60 | " results = extract_code(answer, \"\")\n", 61 | " print(results)\n", 62 | " return results\n", 63 | "\n", 64 | "run_benchmark(model_name, custom_completion=my_completion_cplus, prompt_type = \"custom\")" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "id": "f4db244a-2a26-49fd-9377-3776889366dd", 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [] 74 | } 75 | ], 76 | "metadata": { 77 | "kernelspec": { 78 | "display_name": "conda_pytorch_p310", 79 | "language": "python", 80 | "name": "conda_pytorch_p310" 81 | }, 82 | "language_info": { 83 | "codemirror_mode": { 84 | "name": "ipython", 85 | "version": 3 86 | }, 87 | "file_extension": ".py", 88 | "mimetype": "text/x-python", 89 | "name": "python", 90 | "nbconvert_exporter": "python", 91 | "pygments_lexer": "ipython3", 92 | "version": "3.10.10" 93 | } 94 | }, 95 | "nbformat": 4, 96 | "nbformat_minor": 5 97 | } 98 | -------------------------------------------------------------------------------- /Proprietary Model Evaluations/GPT Eval.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "72d8974d-c534-4c07-b780-cda0d0a06d56", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "!pip install openai" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "2155ec61-375b-47a4-9e36-4bdfb1d1551f", 19 | "metadata": { 20 | "tags": [] 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import openai\n", 25 | "\n", 26 | "openai.api_key = \"\"" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "08b758f0-44aa-41ac-80f9-e271fdebedc4", 33 | "metadata": { 34 | "tags": [] 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import requests\n", 39 | "from evalplus.data import get_human_eval_plus, write_jsonl\n", 40 | "\n", 41 | "problems = get_human_eval_plus()\n", 42 | "\n", 43 | "num_samples_per_task = 1\n", 44 | "len(list(problems.keys()))" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "id": "9de9eabb-5a58-4202-8730-82c5325fda99", 51 | "metadata": { 52 | "tags": [] 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "import time\n", 57 | "def run(prompt, seed, port = 5000):\n", 58 | " while True: # Keep trying until we break out\n", 59 | " try:\n", 60 | " result = openai.ChatCompletion.create(\n", 61 | " model=\"gpt-3.5-turbo\",\n", 62 | " messages=[\n", 63 | " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", 64 | " {\"role\": \"user\", \"content\": prompt},\n", 65 | " ],\n", 66 | " temperature=0.0\n", 67 | " )\n", 68 | "\n", 69 | " response = result['choices'][0][\"message\"][\"content\"]\n", 70 | " return response\n", 71 | " except Exception: # If we hit the rate limit\n", 72 | " time.sleep(1) # Wait for a second before retrying" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "id": "50b6101a-3aeb-4cdf-a95b-a08044f6c6e1", 79 | "metadata": { 80 | "tags": [] 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "def get_function_body(code):\n", 85 | " lines = code.splitlines()\n", 86 | " function_lines = []\n", 87 | " for line in lines:\n", 88 | " # Include lines that start with a whitespace character or are empty\n", 89 | " if line.startswith((' ', '\\t')) or line.strip() == '':\n", 90 | " function_lines.append(line)\n", 91 | " # Stop including lines when we hit a line that contains text but does not start with a whitespace character\n", 92 | " elif line.strip() != '' and not line.startswith((' ', '\\t')):\n", 93 | " break\n", 94 | " return '\\n'.join(function_lines)\n", 95 | "\n", 96 | "def generate_one_completion(prompt_code, seed = -1, port = 5000, long_prompt = False, user_tag = \"HUMAN:\", assistant_tag = \"AI MODEL:\", system_prefix = \"\"):\n", 97 | " print(seed)\n", 98 | " suffix = 'def'+prompt_code.split(\"def\")[1].split(\"(\")[0]+\"(\"\n", 99 | " \n", 100 | " if long_prompt:\n", 101 | " prompt = \"\"\"%s\n", 102 | "%s\n", 103 | "Complete the following Python code: \n", 104 | "Notes: respond with the entire complete function definition\n", 105 | "do not add any comments, be as concise in your code as possible\n", 106 | "use only built-in libraries, assume no additional imports other than those provided (if any)\n", 107 | "\n", 108 | "code:\n", 109 | "%s\n", 110 | "\n", 111 | "%s\n", 112 | "```python\n", 113 | "\n", 114 | " %s\"\"\" % (system_prefix, user_tag, prompt_code, assistant_tag, suffix)\n", 115 | " else:\n", 116 | " prompt = \"\"\"```python\n", 117 | "%s\"\"\" % prompt_code\n", 118 | " \n", 119 | " code_result = run(prompt, seed = seed, port = port)\n", 120 | " result = \"\\n\".join(code_result.split(\"def\")[-1].split(\"\\n\")[1:]).split(\"```\")[0]\n", 121 | " result = get_function_body(result)\n", 122 | " print(\"####\", prompt, \"####\")\n", 123 | " print(result)\n", 124 | " return result\n", 125 | "\n", 126 | "import itertools\n", 127 | "\n", 128 | "def run_benchmark(filename, maxnum=-1, port=5000, long_prompt = False, user_tag = \"HUMAN:\", assistant_tag = \"AI MODEL:\", system_prefix = \"\"):\n", 129 | " iterc = itertools.count()\n", 130 | " problem_keys = list(problems)[:maxnum]\n", 131 | " all_samples = []\n", 132 | "\n", 133 | " for idx, task_id in enumerate(problem_keys):\n", 134 | " # Generate real completions\n", 135 | " for _ in range(num_samples_per_task):\n", 136 | " completion = generate_one_completion(problems[task_id][\"prompt\"], seed=next(iterc), port=port, long_prompt = long_prompt, user_tag = user_tag, assistant_tag = assistant_tag, system_prefix = system_prefix)\n", 137 | " all_samples.append(dict(task_id=task_id, completion=completion))\n", 138 | "\n", 139 | " # Create a temporary copy of all_samples, to which we will append 'pass' completions\n", 140 | " temp_samples = all_samples.copy()\n", 141 | " \n", 142 | " # Append 'pass' completions for the rest of the tasks\n", 143 | " for remaining_task_id in list(problems)[idx+1:maxnum] + list(problems)[maxnum:]:\n", 144 | " for _ in range(num_samples_per_task):\n", 145 | " temp_samples.append(dict(task_id=remaining_task_id, completion=\" pass\"))\n", 146 | " \n", 147 | " # Write all samples to the file, overwriting it completely\n", 148 | " write_jsonl(filename, temp_samples)\n" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "id": "8f2412b0-788f-4aba-9f73-cccc7a9db504", 155 | "metadata": { 156 | "scrolled": true, 157 | "tags": [] 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "run_benchmark(\"gpt_verylowtemp_long.jsonl\", maxnum=-1, port=5000, long_prompt = True, user_tag = \"HUMAN:\", assistant_tag = \"\", system_prefix = \"\")" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "id": "d58c9606-0951-4eeb-8179-9abfebdfe771", 168 | "metadata": { 169 | "tags": [] 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "print(\"done\")" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "id": "32d6b1ea-276c-4a3a-be77-ea23f807ff61", 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [] 183 | } 184 | ], 185 | "metadata": { 186 | "kernelspec": { 187 | "display_name": "conda_amazonei_pytorch_latest_p37", 188 | "language": "python", 189 | "name": "conda_amazonei_pytorch_latest_p37" 190 | }, 191 | "language_info": { 192 | "codemirror_mode": { 193 | "name": "ipython", 194 | "version": 3 195 | }, 196 | "file_extension": ".py", 197 | "mimetype": "text/x-python", 198 | "name": "python", 199 | "nbconvert_exporter": "python", 200 | "pygments_lexer": "ipython3", 201 | "version": "3.7.10" 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 5 206 | } 207 | -------------------------------------------------------------------------------- /Proprietary Model Evaluations/GPT4 Eval.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "72d8974d-c534-4c07-b780-cda0d0a06d56", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "!pip install openai" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "2155ec61-375b-47a4-9e36-4bdfb1d1551f", 19 | "metadata": { 20 | "tags": [] 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import openai\n", 25 | "\n", 26 | "openai.api_key = \"\"" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "08b758f0-44aa-41ac-80f9-e271fdebedc4", 33 | "metadata": { 34 | "tags": [] 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "import requests\n", 39 | "from evalplus.data import get_human_eval_plus, write_jsonl\n", 40 | "\n", 41 | "problems = get_human_eval_plus()\n", 42 | "\n", 43 | "num_samples_per_task = 1\n", 44 | "len(list(problems.keys()))" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "id": "9de9eabb-5a58-4202-8730-82c5325fda99", 51 | "metadata": { 52 | "tags": [] 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "import time\n", 57 | "def run(prompt, seed, port = 5000):\n", 58 | " while True: # Keep trying until we break out\n", 59 | " try:\n", 60 | " result = openai.ChatCompletion.create(\n", 61 | " model=\"gpt-4\",\n", 62 | " # model=\"gpt-4\",\n", 63 | " messages=[\n", 64 | " {\"role\": \"system\", \"content\": \"You are a helpful assistant. Please complete the following code snippet.\"},\n", 65 | " {\"role\": \"user\", \"content\": prompt},\n", 66 | " ],\n", 67 | " temperature=0.0,\n", 68 | " max_tokens=500,\n", 69 | " n=1\n", 70 | " )\n", 71 | "\n", 72 | " response = result['choices'][0][\"message\"][\"content\"]\n", 73 | " return response\n", 74 | " except Exception as e: # If we hit the rate limit\n", 75 | " print(e)\n", 76 | " time.sleep(1) # Wait for a second before retrying" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "id": "50b6101a-3aeb-4cdf-a95b-a08044f6c6e1", 83 | "metadata": { 84 | "tags": [] 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "def get_function_body(code):\n", 89 | " lines = code.splitlines()\n", 90 | " function_lines = []\n", 91 | " found_def = False\n", 92 | "\n", 93 | " for line in lines:\n", 94 | " # If 'def ' is found in a line, mark that we've entered the function\n", 95 | " if 'def ' in line:\n", 96 | " found_def = True\n", 97 | " function_lines.append(line)\n", 98 | " continue\n", 99 | "\n", 100 | " # If we've entered the function, stop including lines when we hit a line that contains text but does not start with a whitespace character\n", 101 | " if found_def and line.strip() != '' and not line.startswith((' ', '\\t')):\n", 102 | " break\n", 103 | "\n", 104 | " # Always include the line in the function lines\n", 105 | " function_lines.append(line)\n", 106 | "\n", 107 | " return '\\n'.join(function_lines)\n", 108 | "\n", 109 | "def cut_off_prefix(s):\n", 110 | " idx_from = s.find('from ')\n", 111 | " idx_def = s.find('def ')\n", 112 | " idx_import = s.find('import ')\n", 113 | "\n", 114 | " # Check if none of the keywords were found\n", 115 | " if idx_from == -1 and idx_def == -1 and idx_import == -1:\n", 116 | " return s\n", 117 | "\n", 118 | " # Prepare a list of found indices, excluding those where the keyword was not found\n", 119 | " indices = [idx for idx in [idx_from, idx_def, idx_import] if idx != -1]\n", 120 | "\n", 121 | " # Return the string starting from the earliest found keyword\n", 122 | " return s[min(indices):]\n", 123 | " \n", 124 | "def generate_one_completion(prompt_code, seed = -1, port = 5000, long_prompt = False, user_tag = \"HUMAN:\", assistant_tag = \"AI MODEL:\", system_prefix = \"\"):\n", 125 | " print(seed)\n", 126 | " # suffix = 'def'+prompt_code.split(\"def\")[1].split(\"(\")[0]+\"(\"\n", 127 | " suffix = \"\"\n", 128 | " if long_prompt:\n", 129 | " prompt = \"\"\"%s\n", 130 | "%s\n", 131 | "Complete the following Python code: \n", 132 | "Notes: respond with the entire complete function definition\n", 133 | "do not add any comments, be as concise in your code as possible\n", 134 | "use only built-in libraries, assume no additional imports other than those provided (if any)\n", 135 | "\n", 136 | "code:\n", 137 | "%s\n", 138 | "\"\"\" % (system_prefix, user_tag, prompt_code)# , assistant_tag, suffix)\n", 139 | " else:\n", 140 | " prompt = \"\"\"```\n", 141 | "%s\n", 142 | "```\"\"\" % prompt_code\n", 143 | " \n", 144 | " code_result = run(prompt, seed = seed, port = port)\n", 145 | " # result = \"\\n\".join(code_result.split(\"def\")[-1].split(\"\\n\")[1:]).split(\"```\")[0]\n", 146 | " result = cut_off_prefix(code_result)\n", 147 | " result = get_function_body(result)\n", 148 | " print(\"####\", prompt, \"####\")\n", 149 | " print(\"***\", result, \"***\")\n", 150 | " return result\n", 151 | "\n", 152 | "import itertools\n", 153 | "\n", 154 | "def run_benchmark(filename, maxnum=-1, port=5000, long_prompt = False, user_tag = \"\", assistant_tag = \"\", system_prefix = \"\"):\n", 155 | " iterc = itertools.count()\n", 156 | " problem_keys = list(problems)[:maxnum]\n", 157 | " all_samples = []\n", 158 | "\n", 159 | " for idx, task_id in enumerate(problem_keys):\n", 160 | " # Generate real completions\n", 161 | " for _ in range(num_samples_per_task):\n", 162 | " completion = generate_one_completion(problems[task_id][\"prompt\"], seed=next(iterc), port=port, long_prompt = long_prompt, user_tag = user_tag, assistant_tag = assistant_tag, system_prefix = system_prefix)\n", 163 | " all_samples.append(dict(task_id=task_id, completion=completion))\n", 164 | "\n", 165 | " # Create a temporary copy of all_samples, to which we will append 'pass' completions\n", 166 | " temp_samples = all_samples.copy()\n", 167 | " \n", 168 | " # Append 'pass' completions for the rest of the tasks\n", 169 | " for remaining_task_id in list(problems)[idx+1:maxnum] + list(problems)[maxnum:]:\n", 170 | " for _ in range(num_samples_per_task):\n", 171 | " temp_samples.append(dict(task_id=remaining_task_id, completion=\" pass\"))\n", 172 | " \n", 173 | " # Write all samples to the file, overwriting it completely\n", 174 | " write_jsonl(filename, temp_samples)\n" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "id": "8f2412b0-788f-4aba-9f73-cccc7a9db504", 181 | "metadata": { 182 | "scrolled": true, 183 | "tags": [] 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "run_benchmark(\"gpt4_final_500c_long.jsonl\", maxnum=-1, port=5000, long_prompt = True, user_tag = \"\", assistant_tag = \"\", system_prefix = \"\")" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "id": "d58c9606-0951-4eeb-8179-9abfebdfe771", 194 | "metadata": { 195 | "tags": [] 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "print(\"done\")" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "id": "0c487464-c6f4-43b7-98af-2ad4aa2ec24c", 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [] 209 | } 210 | ], 211 | "metadata": { 212 | "kernelspec": { 213 | "display_name": "conda_pytorch_p310", 214 | "language": "python", 215 | "name": "conda_pytorch_p310" 216 | }, 217 | "language_info": { 218 | "codemirror_mode": { 219 | "name": "ipython", 220 | "version": 3 221 | }, 222 | "file_extension": ".py", 223 | "mimetype": "text/x-python", 224 | "name": "python", 225 | "nbconvert_exporter": "python", 226 | "pygments_lexer": "ipython3", 227 | "version": "3.10.10" 228 | } 229 | }, 230 | "nbformat": 4, 231 | "nbformat_minor": 5 232 | } 233 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OSS Language Model Programming Evaluations Using the HumanEval+ Python Benchmark 2 | 3 | This repo is designed to evaluate OSS language models pulled from the HuggingFace Model Hub by running them against the standardized Python coding benchmark HumanEval+. The tool interacts with these models via the oobabooga text-generation-webui API, which serves as the interface for model interaction and generation of Python code completions. 4 | 5 | ## Results: 6 | 7 | ![HumanEval_v3-4](https://github.com/my-other-github-account/llm-humaneval-benchmarks/assets/82987814/a8f6e32c-5688-4eea-a7ca-3c8c48683cda) 8 | 9 | 10 | ## Functionality 11 | 12 | - **Interfacing with HuggingFace Models:** The program leverages the text-generation-webui API to interact with language models pulled from the HuggingFace Model Hub. These models are used for generating Python code completions. 13 | 14 | - **Code Completion:** Provides functionality for completing given Python code. Different prompt formatting options (short, medium, long) are provided to facilitate various completion contexts. The completion results are then returned. 15 | 16 | - **Benchmarking with HumanEval+:** This tool is designed to run benchmarks using the HumanEval+ standardized Python coding tasks. The `run_benchmark` function generates multiple code completions for different tasks and logs the results in a JSONL file. This enables comprehensive evaluation of a model's performance across various coding tasks. 17 | 18 | - **Server Management:** The tool includes functionality for starting a separate server process, which runs an AI model. This process serves as the backend that the script interacts with to generate code completions. The server's output is logged for debugging or record-keeping purposes. 19 | 20 | ## Usage 21 | 22 | You'll need to install evalplus for this (on top of having text-generation-webui installed) 23 | pip install evalplus 24 | 25 | The notebooks show how to get this running on AWS SageMaker, run a benchmark, and evaluate benchmark results using the Eval+ evaluation CLI. 26 | 27 | Basic usage looks like (you'll need to have the model already downloaded from Huggingface, which you can do easily in the usual text-generation-webui GUI) : 28 | 29 | ```python 30 | from benchmark_manager import run_benchmark_workflow 31 | 32 | # Vicuna prompt style: 33 | run_benchmark_workflow("TheBloke_vicuna-7B-1.1-GPTQ-4bit-128g", 6666, group_size=128, 34 | prompt_type="long", user_tag="USER:", 35 | assistant_tag="ASSISTANT:", system_prefix="A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.", experiment_tag="vicuna") 36 | 37 | # Alpaca prompt style: 38 | run_benchmark_workflow("TheBloke_wizardLM-7B-GPTQ", 6666, group_size=128, 39 | prompt_type="long", user_tag="USER:", 40 | assistant_tag="ASSISTANT:", system_prefix="", experiment_tag="alpaca") 41 | ``` 42 | 43 | More advanced usage if you want to customize your prompt more looks like: 44 | 45 | ```python 46 | import os, signal 47 | from benchmark_utils import run_benchmark, run, extract_code 48 | from benchmark_manager import start_server 49 | 50 | model_name = "TheBloke_wizardLM-7B-GPTQ" 51 | portnum = 6666 52 | group_size=128 53 | 54 | server_process = start_server(model_name, portnum, group_size=group_size, 55 | working_directory='text-generation-webui') # Make sure server.py is in working_directory 56 | 57 | def my_completion(code, **kwargs): 58 | prompt = "Complete this code:\n%s\nASSISTANT:" % code 59 | results = extract_code(run(prompt, port=kwargs["port"])) 60 | print(results) 61 | return results 62 | 63 | run_benchmark(model_name, port=portnum, custom_completion=my_completion, prompt_type = "custom") 64 | 65 | os.kill(server_process.pid, signal.SIGTERM) 66 | ``` 67 | 68 | To run evalplus against your results (more advanced analysis is in 2_Parse_Results.ipynb) 69 | 70 | ```python 71 | import subprocess 72 | 73 | filename = "results/TheBloke_wizardLM-7B-GPTQ_custom.jsonl" 74 | 75 | result = subprocess.run(["sudo", "/home/ec2-user/anaconda3/envs/pytorch_p39/bin/evalplus.evaluate", 76 | "--dataset", "humaneval", "--samples", filename, "--i-just-wanna-run"], 77 | text=True, capture_output=True, check=False) 78 | 79 | print(result.stdout, "\n", result.stderr) 80 | ``` 81 | 82 | ## References: 83 | 84 | https://github.com/evalplus/evalplus 85 | 86 | https://github.com/openai/human-eval 87 | 88 | https://arxiv.org/abs/2107.03374 89 | 90 | https://github.com/oobabooga/text-generation-webui 91 | 92 | --- 93 | -------------------------------------------------------------------------------- /benchmark_manager.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | import signal 4 | import threading 5 | from benchmark_utils import generate_one_completion, run_benchmark 6 | 7 | def print_server_output(process): 8 | """ 9 | This function prints the output of a subprocess in real-time. 10 | 11 | :param process: The subprocess whose output should be printed. 12 | """ 13 | for line in process.stdout: 14 | print(line, end='') 15 | 16 | 17 | def start_server(model_name, portnum, group_size=None, wbits=None, working_directory='text-generation-webui', public = False): 18 | """ 19 | This function starts a server in a new process. 20 | 21 | :param model_name: The name of the model to be used by the server. 22 | :param portnum: The base port number to be used by the server. 23 | :param group_size: The group size to be used by the server. 24 | :param working_directory: The working directory in which the server script resides. 25 | :return: The subprocess in which the server is running. 26 | """ 27 | # Create the base command list 28 | command = [ 29 | 'python', 'server.py', 30 | '--api', 31 | '--api-blocking-port', str(portnum), 32 | '--api-streaming-port', str(portnum+1), 33 | '--listen-port', str(portnum+2), 34 | '--model_type', 'llama', 35 | '--trust-remote-code' 36 | ] 37 | 38 | if model_name is not None: 39 | command += ['--model', model_name] 40 | 41 | if wbits is not None: 42 | command += ['--wbits', str(wbits),] 43 | 44 | if public: 45 | command += ['--share'] 46 | 47 | # If group_size is provided, append it to the command list 48 | if group_size is not None: 49 | command += ['--groupsize', str(group_size)] 50 | 51 | print("Starting server...") 52 | 53 | # Start the server in a new process 54 | server_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, cwd=working_directory) 55 | 56 | # Wait for "Starting API" to be printed before proceeding 57 | for line in iter(server_process.stdout.readline, ''): 58 | print(line, end='') # Optionally print the server output 59 | if "Starting API" in line: 60 | break 61 | 62 | print("Server started!") 63 | 64 | return server_process 65 | 66 | def block_log_server(server_process): 67 | for line in iter(server_process.stdout.readline, ''): 68 | print(line, end='') 69 | 70 | def run_benchmark_workflow(model_name, portnum, group_size=None, wbits=4, 71 | maxnum=-1, start_from=0, 72 | prompt_type="long", user_tag="### Instruction:", 73 | assistant_tag="### Response:", system_prefix="", experiment_tag="", 74 | working_directory='text-generation-webui', public=False): 75 | """ 76 | This function manages the complete benchmark workflow, including starting the server, running the benchmark, and stopping the server. 77 | 78 | :param model_name: The name of the model to be used. 79 | :param portnum: The base port number to be used by the server. 80 | :param group_size: The group size to be used by the server. 81 | :param maxnum: The maximum number of items to process during the benchmark. 82 | :param prompt_type: The type of prompt to use during the benchmark. 83 | :param user_tag: The user tag to use during the benchmark. 84 | :param assistant_tag: The assistant tag to use during the benchmark. 85 | :param system_prefix: The system prefix to use during the benchmark. 86 | :param experiment_tag: The experiment tag to use during the benchmark. 87 | :param working_directory: The working directory in which the server script resides. 88 | """ 89 | # Start the server 90 | server_process = start_server(model_name, portnum, wbits = wbits, group_size=group_size, working_directory=working_directory, public=public) 91 | 92 | # Create a separate thread to print the server output 93 | print_thread = threading.Thread(target=print_server_output, args=(server_process,)) 94 | print_thread.start() 95 | 96 | # Run the benchmark 97 | run_benchmark(model_name, maxnum=maxnum, start_from=start_from, 98 | port=portnum, prompt_type=prompt_type, user_tag=user_tag, 99 | assistant_tag=assistant_tag, experiment_tag=experiment_tag, system_prefix=system_prefix) 100 | 101 | # Once the benchmark has finished running, terminate the server process 102 | os.kill(server_process.pid, signal.SIGTERM) 103 | 104 | # Wait for the print_thread to finish 105 | print_thread.join() 106 | -------------------------------------------------------------------------------- /benchmark_utils.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import itertools 3 | from evalplus.data import get_human_eval_plus, write_jsonl 4 | import os 5 | import json 6 | import sys 7 | import asyncio 8 | import websockets 9 | 10 | # Load the problem data 11 | problems = get_human_eval_plus() 12 | num_samples_per_task = 1 13 | 14 | HOST = 'localhost' 15 | URI = f'http://{HOST}/api/v1/generate' 16 | 17 | # Configure the request parameters 18 | request = { 19 | 'prompt': '', 20 | 'max_new_tokens': 500, 21 | 'do_sample': True, 22 | 'temperature': 0.7, 23 | 'top_p': 0.1, 24 | 'typical_p': 1, 25 | 'epsilon_cutoff': 0, # In units of 1e-4 26 | 'eta_cutoff': 0, # In units of 1e-4 27 | 'repetition_penalty': 1.18, 28 | 'top_k': 40, 29 | 'min_length': 0, 30 | 'no_repeat_ngram_size': 0, 31 | 'num_beams': 1, 32 | 'penalty_alpha': 0, 33 | 'length_penalty': 1, 34 | 'early_stopping': False, 35 | 'mirostat_mode': 0, 36 | 'mirostat_tau': 5, 37 | 'mirostat_eta': 0.1, 38 | 'seed': '', 39 | 'add_bos_token': True, 40 | 'truncation_length': 2048, 41 | 'ban_eos_token': False, 42 | 'skip_special_tokens': True, 43 | 'stopping_strings': [] 44 | } 45 | 46 | import requests 47 | from time import sleep 48 | 49 | async def run_async(prompt, seed=-1, port = 443, deterministic = True, host='localhost'): 50 | # Set the prompt and seed for the current request 51 | request = { 52 | 'prompt': prompt, 53 | 'seed': seed, 54 | 'max_new_tokens': 250, 55 | 'truncation_length': 2048, 56 | 'skip_special_tokens': True 57 | } 58 | 59 | if deterministic: 60 | request['do_sample'] = False 61 | request['temperature'] = 1 62 | request['top_p'] = 1 63 | request['top_k'] = 0 64 | request['repetition_penalty'] = 1 65 | request['num_beams'] = 1 66 | request['early_stopping'] = False 67 | 68 | URI = f'ws://{host}:{port}/api/v1/stream' 69 | 70 | async with websockets.connect(URI, ping_interval=None) as websocket: 71 | await websocket.send(json.dumps(request)) 72 | 73 | response_text = prompt 74 | 75 | while True: 76 | incoming_data = await websocket.recv() 77 | incoming_data = json.loads(incoming_data) 78 | 79 | match incoming_data['event']: 80 | case 'text_stream': 81 | response_text += incoming_data['text'] 82 | case 'stream_end': 83 | return response_text 84 | 85 | def run_sync(prompt, seed=-1, port=443, deterministic=True, host='localhost'): 86 | return asyncio.run(run_async(prompt, seed, port, deterministic, host)) 87 | 88 | def run(prompt, seed=-1, port = 443, deterministic = True, host='localhost'): 89 | # Set the prompt and seed for the current request 90 | request = {} 91 | request['prompt'] = prompt 92 | request['seed'] = seed 93 | if deterministic: 94 | request['do_sample'] = False 95 | request['temperature'] = 1 96 | request['top_p'] = 1 97 | request['top_k'] = 0 98 | request['repetition_penalty'] = 1 99 | 100 | # Set the URI for the request 101 | URI = f'{host}:{port}/api/v1/generate' 102 | 103 | # Set up retry mechanism 104 | retries = 2 105 | backoff_factor = 0.1 106 | 107 | for i in range(retries): 108 | try: 109 | # Send the request and return the response 110 | response = requests.post(URI, json=request, timeout=420) 111 | response.raise_for_status() 112 | return prompt + response.json()['results'][0]['text'] 113 | except Exception as err: 114 | print(f"Attempt {i+1} failed. Error: {err}") 115 | sleep(backoff_factor * (2 ** i)) # Exponential backoff 116 | except requests.exceptions.RequestException as e: 117 | # For any other request exception, raise immediately 118 | raise e 119 | raise Exception("All attempts failed") 120 | 121 | def get_function_body(code): 122 | # Extract the function body from the provided code 123 | lines = code.splitlines() 124 | function_count = 0 125 | function_lines = [] 126 | 127 | for line in lines: 128 | if line.strip().startswith('def '): 129 | function_count += 1 130 | if function_count > 6: # if more than 3 functions start, break the loop 131 | break 132 | elif not line.startswith((' ', '\t')) and not line.strip() == '': 133 | # If a non-empty line does not start with an indent, break the loop 134 | if function_count > 0: 135 | break 136 | if function_count > 0: 137 | function_lines.append(line) 138 | 139 | return '\n'.join(function_lines) 140 | 141 | def get_function_body_old(code): 142 | lines = code.splitlines() 143 | function_lines = [] 144 | found_def = False 145 | 146 | for line in lines: 147 | # If 'def ' is found in a line, mark that we've entered the function 148 | if 'def ' in line: 149 | found_def = True 150 | function_lines.append(line) 151 | continue 152 | 153 | # If we've entered the function, stop including lines when we hit a line that contains text but does not start with a whitespace character 154 | if found_def and line.strip() != '' and not line.startswith((' ', '\t')): 155 | break 156 | 157 | # Always include the line in the function lines 158 | function_lines.append(line) 159 | 160 | return '\n'.join(function_lines) 161 | 162 | def cut_off_prefix_old(s): 163 | idx_from = s.find('from ') 164 | idx_def = s.find('def ') 165 | idx_import = s.find('import ') 166 | 167 | # Check if none of the keywords were found 168 | if idx_from == -1 and idx_def == -1 and idx_import == -1: 169 | return s 170 | 171 | # Prepare a list of found indices, excluding those where the keyword was not found 172 | indices = [idx for idx in [idx_from, idx_def, idx_import] if idx != -1] 173 | 174 | # Return the string starting from the earliest found keyword 175 | return s[min(indices):] 176 | 177 | def extract_code_old(code): 178 | code = cut_off_prefix(code.split("```python")[-1]) 179 | code = get_function_body(code) 180 | return code 181 | 182 | 183 | def cut_off_prefix(s): 184 | # Cut off the prefix from the provided string 185 | indices = [idx for keyword in ['from ', 'def ', 'import '] if (idx := s.find(keyword)) != -1] 186 | return s[min(indices):] if indices else s 187 | 188 | def extract_code(code, assistant_tag, use_old_parser = False): 189 | if use_old_parser: 190 | return extract_code_old(code) 191 | 192 | if assistant_tag == "": 193 | try: 194 | return get_function_body(cut_off_prefix(code.split("```python")[1])) 195 | except: 196 | return get_function_body(cut_off_prefix(code)) 197 | # print("***", code, "***") 198 | try: 199 | return get_function_body(cut_off_prefix(code.split(assistant_tag)[1].split("```python")[1])) 200 | except: 201 | return get_function_body(code.split(assistant_tag)[1]) 202 | 203 | def generate_one_completion(prompt_code, seed=-1, port=5000, prompt_template="", user_tag="HUMAN:", 204 | assistant_tag="AI MODEL:", host="localhost", insert_func_stub=False, 205 | deterministic=True, use_old_parser = False, use_async = False, **kwargs): 206 | # Generate a completion for one prompt 207 | suffix = "" 208 | if insert_func_stub: 209 | suffix = 'def'+prompt_code.split("def")[1].split("(")[0]+"(" 210 | prompt = prompt_template.format(PROMPT=prompt_code) + suffix 211 | # print(prompt) 212 | if use_async: 213 | code_result = run_sync(prompt, seed=seed, port=port, deterministic=deterministic, host=host) 214 | else: 215 | code_result = run(prompt, seed=seed, port=port, deterministic=deterministic, host=host) 216 | 217 | if code_result == prompt: 218 | raise Exception("Model doesn't appear to be loaded. Quitting.") 219 | 220 | to_ret = extract_code(code_result, assistant_tag=assistant_tag, use_old_parser = use_old_parser) 221 | print(to_ret) 222 | return to_ret 223 | 224 | def run_benchmark(filename, prompt_template, maxnum=-1, start_from=0, port=5000, user_tag="", 225 | assistant_tag="", host="localhost", insert_func_stub=False, 226 | custom_completion=generate_one_completion, use_async = False, deterministic=True, use_old_parser = False, **kwargs): 227 | 228 | filepath = f"results/{filename}.jsonl" 229 | print("Results will be written to:", filepath) 230 | problem_keys = list(problems) if maxnum == -1 else list(problems)[:maxnum] 231 | 232 | all_samples, iterc = [], itertools.count() 233 | 234 | if not os.path.exists("results"): 235 | os.makedirs("results") 236 | 237 | # If start_from is greater than 0, load existing data 238 | if start_from > 0: 239 | with open(filepath, 'r') as file: 240 | existing_data = [json.loads(line) for line in file] 241 | all_samples = existing_data[:start_from*num_samples_per_task] 242 | last_task_id = all_samples[-1]['task_id'] if all_samples else None 243 | start_it = problem_keys.index(last_task_id) + 1 if last_task_id else 0 244 | problem_keys = problem_keys[start_it:] 245 | 246 | for idx, task_id in enumerate(problem_keys, start=start_from): 247 | print("Processing Task", idx, "of", len(list(problems))) 248 | for _ in range(num_samples_per_task): 249 | # Prepare parameters for custom completion 250 | params = { 251 | 'task_id': task_id, 252 | 'completion': custom_completion( 253 | problems[task_id]["prompt"], 254 | seed=next(iterc), 255 | port=port, 256 | prompt_template=prompt_template, 257 | user_tag=user_tag, 258 | assistant_tag=assistant_tag, 259 | insert_func_stub=insert_func_stub, 260 | deterministic=deterministic, 261 | host=host, 262 | use_old_parser = use_old_parser, 263 | use_async = use_async, 264 | **kwargs 265 | ) 266 | } 267 | all_samples.append(params) 268 | 269 | # Always add placeholders for remaining problems 270 | remaining_keys = problem_keys[idx+1:] 271 | placeholders = [dict(task_id=remaining_task_id, completion=" pass") 272 | for remaining_task_id in remaining_keys 273 | for _ in range(num_samples_per_task)] 274 | temp_samples = all_samples + placeholders 275 | 276 | # Write to the file, overwriting previous data 277 | with open(filepath, 'w') as file: 278 | for item in temp_samples: 279 | file.write(json.dumps(item) + '\n') 280 | sys.stdout.flush() 281 | sys.stderr.flush() 282 | 283 | print("Done writing to", filepath) 284 | 285 | -------------------------------------------------------------------------------- /jsonl_examples/GeorgiaTechResearchInstitute_starcoder-gpteacher-code-instruct_ALPACA_INPUT_LONG_FIXED.jsonl: -------------------------------------------------------------------------------- 1 | {"task_id": "HumanEval/0", "completion": "def has_close_elements(numbers: List[float], threshold: float) -> bool:\n return len(set(numbers)) < len(numbers)"} 2 | {"task_id": "HumanEval/1", "completion": "def separate_paren_groups(paren_string):\n result = []\n current_group = ''\n for char in paren_string:\n if char == '(':\n current_group += char\n elif char == ')':\n current_group += char\n result.append(current_group)\n current_group = ''\n else:\n current_group += char\n return result"} 3 | {"task_id": "HumanEval/2", "completion": "def truncate_number(number: float) -> float:\n integer_part = int(number)\n decimal_part = number - integer_part\n return decimal_part"} 4 | {"task_id": "HumanEval/3", "completion": "def below_zero(operations: List[int]) -> bool:\n current_balance = 0\n for operation in operations:\n current_balance += operation\n if current_balance < 0:\n return True\n return False"} 5 | {"task_id": "HumanEval/4", "completion": "def mean_absolute_deviation(numbers: List[float]) -> float:\n \"\"\" For a given list of input numbers, calculate Mean Absolute Deviation\n around the mean of this dataset.\n Mean Absolute Deviation is the average absolute difference between each\n element and a centerpoint (mean in this case):\n MAD = average | x - x_mean |\n >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\n 1.0\n \"\"\"\n mean = sum(numbers) / len(numbers)\n absolute_deviations = [abs(x - mean) for x in numbers]\n return sum(absolute_deviations) / len(absolute_deviations)"} 6 | {"task_id": "HumanEval/5", "completion": "def intersperse(numbers: List[int], delimeter: int) -> List[int]:\n result = []\n for num in numbers:\n result.append(num)\n result.append(delimeter)\n if result:\n result.pop() # remove the last delimeter\n return result"} 7 | {"task_id": "HumanEval/6", "completion": "def parse_nested_parens(paren_string: str) -> List[int]:\n max_nesting = []\n for group in paren_string.split():\n nesting = 0\n for char in group:\n if char == '(':\n nesting += 1\n elif char == ')':\n nesting -= 1\n max_nesting.append(nesting)\n return max_nesting\n"} 8 | {"task_id": "HumanEval/7", "completion": "def filter_by_substring(strings: List[str], substring: str) -> List[str]:\n return [s for s in strings if substring in s]"} 9 | {"task_id": "HumanEval/8", "completion": "def sum_product(numbers: List[int]) -> Tuple[int, int]:\n total_sum = sum(numbers)\n total_product = 1\n for num in numbers:\n total_product *= num\n return (total_sum, total_product)"} 10 | {"task_id": "HumanEval/9", "completion": "def rolling_max(numbers: List[int]) -> List[int]:\n max_list = [numbers[0]]\n for num in numbers[1:]:\n max_list.append(max(num, max_list[-1]))\n return max_list"} 11 | {"task_id": "HumanEval/10", "completion": "def is_palindrome(string: str) -> bool:\n \"\"\" Test if given string is a palindrome \"\"\"\n return string == string[::-1]\n\n\ndef make_palindrome(string: str) -> str:\n \"\"\" Find the shortest palindrome that begins with a supplied string.\n Algorithm idea is simple:\n - Find the longest postfix of supplied string that is a palindrome.\n - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n >>> make_palindrome('')\n ''\n >>> make_palindrome('cat')\n 'catac'\n >>> make_palindrome('cata')\n 'catac'\n \"\"\"\n longest_palindrome = ''\n for i in range(len(string)):\n if is_palindrome(string[i:]):\n longest_palindrome = string[i:]\n return string + string[::-1][len(longest_palindrome):]"} 12 | {"task_id": "HumanEval/11", "completion": "def string_xor(a: str, b: str) -> str:\n result = ''\n for i, j in zip(a, b):\n if i == j:\n result += '0'\n else:\n result += '1'\n return result"} 13 | {"task_id": "HumanEval/12", "completion": "def longest(strings: List[str]) -> Optional[str]:\n if not strings:\n return None\n longest_string = strings[0]\n for string in strings:\n if len(string) > len(longest_string):\n longest_string = string\n return longest_string"} 14 | {"task_id": "HumanEval/13", "completion": "def greatest_common_divisor(a: int, b: int) -> int:\n if b == 0:\n return a\n else:\n return greatest_common_divisor(b, a % b)"} 15 | {"task_id": "HumanEval/14", "completion": "def all_prefixes(string):\n return [string[:i] for i in range(1, len(string) + 1)]"} 16 | {"task_id": "HumanEval/15", "completion": "def string_sequence(n: int) -> str:\n return''.join(map(str, range(n + 1)))"} 17 | {"task_id": "HumanEval/16", "completion": "def count_distinct_characters(string):\n return len(set(string.lower()))\n"} 18 | {"task_id": "HumanEval/17", "completion": "def parse_music(music_string):\n notes = music_string.split()\n beats = [4, 2, 1]\n result = []\n\n for note in notes:\n if note == 'o':\n result.append(beats[0])\n elif note == 'o|':\n result.append(beats[1])\n elif note == '.|':\n result.append(beats[2])\n\n return result\n"} 19 | {"task_id": "HumanEval/18", "completion": "def how_many_times(string, substring):\n return string.count(substring) - 1 if substring in string else 0"} 20 | {"task_id": "HumanEval/19", "completion": "def sort_numbers(numbers: str) -> str:\n number_list = numbers.split()\n number_list.sort()\n return''.join(number_list)\n"} 21 | {"task_id": "HumanEval/20", "completion": "def find_closest_elements(numbers: List[float]) -> Tuple[float, float]:\n \"\"\" From a supplied list of numbers (of length at least two) select and return two that are the closest to each\n other and return them in order (smaller number, larger number).\n >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.2])\n (2.0, 2.2)\n >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.0])\n (2.0, 2.0)\n \"\"\"\n return sorted(set(numbers))[0:2]"} 22 | {"task_id": "HumanEval/21", "completion": "def rescale_to_unit(numbers: List[float]) -> List[float]:\n min_val = min(numbers)\n max_val = max(numbers)\n return [((val - min_val) / (max_val - min_val)) for val in numbers]"} 23 | {"task_id": "HumanEval/22", "completion": "def filter_integers(values: List[Any]) -> List[int]:\n return [value for value in values if isinstance(value, int)]"} 24 | {"task_id": "HumanEval/23", "completion": "def strlen(string: str) -> int:\n return len(string)"} 25 | {"task_id": "HumanEval/24", "completion": "def largest_divisor(n):\n for i in range(n - 1, 1, -1):\n if n % i == 0:\n return i\n\n"} 26 | {"task_id": "HumanEval/25", "completion": "def factorize(n: int) -> List[int]:\n factors = []\n while n % 2 == 0:\n factors.append(2)\n n //= 2\n for i in range(3, int(n**0.5)+1, 2):\n while n % i == 0:\n factors.append(i)\n n //= i\n if n > 2:\n factors.append(n)\n return factors"} 27 | {"task_id": "HumanEval/26", "completion": "def remove_duplicates(numbers: List[int]) -> List[int]:\n return list(set(numbers))"} 28 | {"task_id": "HumanEval/27", "completion": "def flip_case(string):\n return ''.join([c.upper() if c.islower() else c.lower() for c in string])"} 29 | {"task_id": "HumanEval/28", "completion": "def concatenate(strings: List[str]) -> str:\n return ''.join(strings)"} 30 | {"task_id": "HumanEval/29", "completion": "def filter_by_prefix(strings: List[str], prefix: str) -> List[str]:\n return [s for s in strings if s.startswith(prefix)]"} 31 | {"task_id": "HumanEval/30", "completion": "def get_positive(l):\n return [num for num in l if num > 0]"} 32 | {"task_id": "HumanEval/31", "completion": "def is_prime(n):\n if n <= 1:\n return False\n for i in range(2, int(n**0.5)+1):\n if n % i == 0:\n return False\n return True"} 33 | {"task_id": "HumanEval/32", "completion": "def poly(xs, x):\n return sum([coeff * math.pow(x, i) for i, coeff in enumerate(xs)])\n\n\ndef find_zero(xs):\n if len(xs) % 2!= 0:\n raise ValueError('find_zero only takes list xs having even number of coefficients')\n largest_coeff = max(xs)\n if largest_coeff == 0:\n raise ValueError('find_zero only takes list xs having largest non zero coefficient')\n xs = xs.copy()\n xs.reverse()\n roots = []\n for i in range(len(xs) // 2):\n root = -xs[i] / xs[i + 1]\n roots.append(root)\n return roots"} 34 | {"task_id": "HumanEval/33", "completion": "def sort_third(l):\n result = l.copy()\n for i in range(0, len(l), 3):\n result[i:i+3] = sorted(result[i:i+3])\n return result"} 35 | {"task_id": "HumanEval/34", "completion": "def unique(l):\n return sorted(set(l))"} 36 | {"task_id": "HumanEval/35", "completion": "def max_element(l):\n return max(l)"} 37 | {"task_id": "HumanEval/36", "completion": "def fizz_buzz(n: int):\n count = 0\n for i in range(1, n):\n if i % 11 == 0 or i % 13 == 0:\n count += 1\n return count"} 38 | {"task_id": "HumanEval/37", "completion": "def sort_even(l):\n even_sorted = sorted(l[::2])\n return [val for pair in zip(l, even_sorted) for val in pair]\n"} 39 | {"task_id": "HumanEval/38", "completion": "def encode_cyclic(s: str):\n \"\"\"\n returns encoded string by cycling groups of three characters.\n \"\"\"\n # split string to groups. Each of length 3.\n groups = [s[(3 * i):min((3 * i + 3), len(s))] for i in range((len(s) + 2) // 3)]\n # cycle elements in each group. Unless group has fewer elements than 3.\n groups = [(group[1:] + group[0]) if len(group) == 3 else group for group in groups]\n return \"\".join(groups)\n\n\ndef decode_cyclic(s: str):\n \"\"\"\n takes as input string encoded with encode_cyclic function. Returns decoded string.\n \"\"\"\n # split string to groups. Each of length 3.\n groups = [s[(3 * i):min((3 * i + 3), len(s))] for i in range((len(s) + 2) // 3)]\n # reverse elements in each group. Unless group has fewer elements than 3.\n groups = [(group[::-1]) if len(group) == 3 else group for group in groups]\n return \"\".join(groups)"} 40 | {"task_id": "HumanEval/39", "completion": "def prime_fib(n: int):\n primes = [True] * (n + 1)\n primes[0], primes[1] = False, False\n i = 2\n while i * i <= n:\n if primes[i]:\n for j in range(i * i, n + 1, i):\n primes[j] = False\n i += 1\n fib_numbers = [i for i in range(n + 1) if primes[i]]\n return fib_numbers[n]"} 41 | {"task_id": "HumanEval/40", "completion": "def triples_sum_to_zero(l):\n return len(set(filter(lambda x: x == 0, l))) == 3"} 42 | {"task_id": "HumanEval/41", "completion": "def car_race_collision(n: int):\n return 0"} 43 | {"task_id": "HumanEval/42", "completion": "def incr_list(l):\n return [x + 1 for x in l]\n"} 44 | {"task_id": "HumanEval/43", "completion": "def pairs_sum_to_zero(l):\n return len(set(filter(lambda x: x == 0, l))) >= 2"} 45 | {"task_id": "HumanEval/44", "completion": "def change_base(x: int, base: int):\n return str(x).replace('0', '').zfill(base)"} 46 | {"task_id": "HumanEval/45", "completion": "def triangle_area(a, h):\n return 0.5 * a * h"} 47 | {"task_id": "HumanEval/46", "completion": "def fib4(n: int):\n sequence = [0, 0, 2, 0]\n for i in range(4, n):\n sequence.append(sequence[i-1] + sequence[i-2] + sequence[i-3] + sequence[i-4])\n return sequence[n]"} 48 | {"task_id": "HumanEval/47", "completion": "def median(l: list):\n l.sort()\n length = len(l)\n if length % 2 == 0:\n middle1 = l[length // 2 - 1]\n middle2 = l[length // 2]\n median_value = (middle1 + middle2) / 2\n else:\n median_value = l[length // 2]\n return median_value"} 49 | {"task_id": "HumanEval/48", "completion": "def is_palindrome(text: str):\n return text == text[::-1]"} 50 | {"task_id": "HumanEval/49", "completion": "def modp(n: int, p: int):\n return 2**n % p"} 51 | {"task_id": "HumanEval/50", "completion": "def encode_shift(s: str):\n \"\"\"\n returns encoded string by shifting every character by 5 in the alphabet.\n \"\"\"\n return \"\".join([chr(((ord(ch) + 5 - ord(\"a\")) % 26) + ord(\"a\") for ch in s])\n\ndef decode_shift(s: str):\n \"\"\"\n takes as input string encoded with encode_shift function. Returns decoded string.\n \"\"\"\n return \"\".join([chr(((ord(ch) - 5 - ord(\"a\")) % 26) + ord(\"a\") for ch in s])"} 52 | {"task_id": "HumanEval/51", "completion": "def remove_vowels(text):\n vowels = 'aeiouAEIOU'\n return ''.join([char for char in text if char not in vowels])"} 53 | {"task_id": "HumanEval/52", "completion": "def below_threshold(l: list, t: int):\n return all(num < t for num in l)"} 54 | {"task_id": "HumanEval/53", "completion": "def add(x, y):\n return x + y\n"} 55 | {"task_id": "HumanEval/54", "completion": "def same_chars(s0: str, s1: str):\n return sorted(s0.lower()) == sorted(s1.lower())"} 56 | {"task_id": "HumanEval/55", "completion": "def fib(n: int):\n if n <= 1:\n return n\n else:\n return fib(n - 1) + fib(n - 2)"} 57 | {"task_id": "HumanEval/56", "completion": "def correct_bracketing(brackets):\n stack = []\n for char in brackets:\n if char == \"<\":\n stack.append(char)\n elif char == \">\":\n if not stack:\n return False\n stack.pop()\n return not stack"} 58 | {"task_id": "HumanEval/57", "completion": "def monotonic(l):\n return all(x <= y for x, y in zip(l, l[1:])) or all(x >= y for x, y in zip(l, l[1:]))"} 59 | {"task_id": "HumanEval/58", "completion": "def common(l1, l2):\n return sorted(set(l1) & set(l2))\n"} 60 | {"task_id": "HumanEval/59", "completion": "def largest_prime_factor(n: int):\n i = 2\n while i * i <= n:\n if n % i:\n i += 1\n else:\n n //= i\n return n"} 61 | {"task_id": "HumanEval/60", "completion": "def sum_to_n(n: int):\n return (n * (n + 1)) // 2\n"} 62 | {"task_id": "HumanEval/61", "completion": "def correct_bracketing(brackets):\n stack = []\n for char in brackets:\n if char == '(':\n stack.append('(')\n elif char == ')':\n if not stack:\n return False\n stack.pop()\n return not stack"} 63 | {"task_id": "HumanEval/62", "completion": "def derivative(xs):\n return [x * i for i, x in enumerate(xs[1:], 1)]"} 64 | {"task_id": "HumanEval/63", "completion": "def fibfib(n: int):\n if n == 0:\n return 0\n elif n == 1:\n return 0\n elif n == 2:\n return 1\n else:\n return fibfib(n-1) + fibfib(n-2) + fibfib(n-3)\n"} 65 | {"task_id": "HumanEval/64", "completion": "def vowels_count(s):\n vowels = 'aeiouAEIOU'\n count = 0\n for char in s:\n if char in vowels:\n count += 1\n if s[-1] == 'y':\n count += 1\n return count"} 66 | {"task_id": "HumanEval/65", "completion": "def circular_shift(x, shift):\n num_digits = len(str(x))\n if shift > num_digits:\n return str(x)[::-1]\n else:\n return str(x)[num_digits - shift:]"} 67 | {"task_id": "HumanEval/66", "completion": "def digitSum(s):\n return sum(c.isupper() for c in s)"} 68 | {"task_id": "HumanEval/67", "completion": "def fruit_distribution(s,n):\n apples, oranges = s.split()\n apples = int(apples)\n oranges = int(oranges)\n mangoes = n - apples - oranges\n return mangoes\n"} 69 | {"task_id": "HumanEval/68", "completion": "def pluck(arr):\n if not arr:\n return []\n even_values = [node for node in arr if node % 2 == 0]\n if not even_values:\n return []\n return [min(even_values), even_values.index(min(even_values))]"} 70 | {"task_id": "HumanEval/69", "completion": "def search(lst):\n freq_dict = {}\n for num in lst:\n freq_dict[num] = freq_dict.get(num, 0) + 1\n\n for num in freq_dict:\n if freq_dict[num] > num:\n return num\n\n return -1"} 71 | {"task_id": "HumanEval/70", "completion": "def strange_sort_list(lst):\n if not lst:\n return lst\n\n min_val = min(lst)\n max_val = max(lst)\n result = [min_val]\n\n while result[-1]!= max_val:\n result.append(max_val)\n if max_val in lst:\n lst.remove(max_val)\n\n while result[-1]!= min_val:\n result.append(min_val)\n if min_val in lst:\n lst.remove(min_val)\n\n return result"} 72 | {"task_id": "HumanEval/71", "completion": "def triangle_area(a, b, c):\n '''\n Given the lengths of the three sides of a triangle. Return the area of\n the triangle rounded to 2 decimal points if the three sides form a valid triangle. \n Otherwise return -1\n Three sides make a valid triangle when the sum of any two sides is greater \n than the third side.\n Example:\n triangle_area(3, 4, 5) == 6.00\n triangle_area(1, 2, 10) == -1\n '''\n s = (a + b + c) / 2\n area = (s * (s - a) * (s - b) * (s - c)) ** 0.5\n return round(area, 2)"} 73 | {"task_id": "HumanEval/72", "completion": "def will_it_fly(q, w):\n return q == q[::-1] and sum(q) <= w"} 74 | {"task_id": "HumanEval/73", "completion": "def smallest_change(arr):\n n = len(arr)\n i = 0\n j = n - 1\n count = 0\n\n while i < j:\n if arr[i]!= arr[j]:\n arr[i], arr[j] = arr[j], arr[i]\n count += 1\n i += 1\n j -= 1\n\n return count"} 75 | {"task_id": "HumanEval/74", "completion": "def total_match(lst1, lst2):\n if len(lst1) < len(lst2):\n return lst1\n elif len(lst1) > len(lst2):\n return lst2\n else:\n return lst1"} 76 | {"task_id": "HumanEval/75", "completion": "def is_multiply_prime(a):\n if a < 100:\n primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]\n for p in primes:\n if a % p!= 0:\n return False\n return True\n else:\n return False"} 77 | {"task_id": "HumanEval/76", "completion": "def is_simple_power(x, n):\n return n**int(log(x, n)) == x"} 78 | {"task_id": "HumanEval/77", "completion": "def iscube(a):\n return a ** 3 >= 0"} 79 | {"task_id": "HumanEval/78", "completion": "def hex_key(num):\n primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]\n count = 0\n for char in num:\n if char in '0123456789ABCDEF':\n if int(char, 16) in primes:\n count += 1\n return count"} 80 | {"task_id": "HumanEval/79", "completion": "def decimal_to_binary(decimal):\n binary_string = '0' if decimal == 0 else ''\n while decimal > 0:\n binary_string = '1' + binary_string\n decimal //= 2\n return 'db' + binary_string + 'db'\n"} 81 | {"task_id": "HumanEval/80", "completion": "def is_happy(s):\n return len(s) >= 3 and len(s) % 3!= 0 and len(set(s[i:i+3] for i in range(0, len(s), 3))) == len(s) / 3"} 82 | {"task_id": "HumanEval/81", "completion": "def numerical_letter_grade(grades):\n gpa_to_letter_grade = {4.0: 'A+', 3.7: 'A', 3.3: 'A-', 3.0: 'B+', 2.7: 'B', 2.3: 'B-', 2.0: 'C+', 1.7: 'C', 1.3: 'C-', 1.0: 'D+', 0.7: 'D', 0.0: 'D-'}\n return [gpa_to_letter_grade[gpa] for gpa in grades]\n"} 83 | {"task_id": "HumanEval/82", "completion": "def prime_length(string):\n if not string:\n return False\n n = len(string)\n if n == 1:\n return True\n if n <= 3:\n return False\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True"} 84 | {"task_id": "HumanEval/83", "completion": "def starts_one_ends(n):\n count = 0\n for i in range(1, 10 ** n):\n if str(i)[0] == '1' or str(i)[-1] == '1':\n count += 1\n return count"} 85 | {"task_id": "HumanEval/84", "completion": "def solve(N):\n total_sum = 0\n while N > 0:\n digit = N % 10\n total_sum += digit\n N //= 10\n return bin(total_sum)\n"} 86 | {"task_id": "HumanEval/85", "completion": "def add(lst):\n even_sum = 0\n for i, num in enumerate(lst):\n if i % 2 == 0:\n even_sum += num\n return even_sum\n"} 87 | {"task_id": "HumanEval/86", "completion": "def anti_shuffle(s):\n words = s.split()\n sorted_words =''.join(sorted(words))\n return sorted_words\n"} 88 | {"task_id": "HumanEval/87", "completion": "def get_row(lst, x):\n result = []\n for row_index, row in enumerate(lst):\n for col_index, value in enumerate(row):\n if value == x:\n result.append((row_index, col_index))\n result.sort()\n return result"} 89 | {"task_id": "HumanEval/88", "completion": "def sort_array(array):\n if sum(array[0], array[-1]) % 2 == 0:\n array.sort(reverse=True)\n else:\n array.sort()\n return array"} 90 | {"task_id": "HumanEval/89", "completion": "def encrypt(s):\n alphabet = 'abcdefghijklmnopqrstuvwxyz'\n encrypted = ''\n for char in s:\n if char in alphabet:\n index = alphabet.index(char)\n encrypted += alphabet[index + 2]\n else:\n encrypted += char\n return encrypted"} 91 | {"task_id": "HumanEval/90", "completion": "def next_smallest(lst):\n if len(lst) < 2:\n return None\n return sorted(set(lst))[1]\n"} 92 | {"task_id": "HumanEval/91", "completion": "def is_bored(S):\n boredom_count = 0\n for sentence in S.split('.'):\n if sentence.startswith('I'):\n boredom_count += 1\n return boredom_count"} 93 | {"task_id": "HumanEval/92", "completion": "def any_int(x, y, z):\n return (x == (y + z)) and (isinstance(x, int) and isinstance(y, int) and isinstance(z, int))"} 94 | {"task_id": "HumanEval/93", "completion": "def encode(message):\n vowels = 'aeiouAEIOU'\n result = ''\n for char in message:\n if char in vowels:\n result += vowels[(vowels.index(char) + 2) % 26]\n else:\n result += char\n return result"} 95 | {"task_id": "HumanEval/94", "completion": "def skjkasdkd(lst):\n primes = [x for x in lst if x > 1]\n largest_prime = max(primes)\n sum_of_digits = sum(int(x) for x in str(largest_prime))\n return sum_of_digits\n"} 96 | {"task_id": "HumanEval/95", "completion": "def check_dict_case(dict):\n \"\"\"\n Given a dictionary, return True if all keys are strings in lower \n case or all keys are strings in upper case, else return False.\n The function should return False is the given dictionary is empty.\n Examples:\n check_dict_case({\"a\":\"apple\", \"b\":\"banana\"}) should return True.\n check_dict_case({\"a\":\"apple\", \"A\":\"banana\", \"B\":\"banana\"}) should return False.\n check_dict_case({\"a\":\"apple\", 8:\"banana\", \"a\":\"apple\"}) should return False.\n check_dict_case({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}) should return False.\n check_dict_case({\"STATE\":\"NC\", \"ZIP\":\"12345\" }) should return True.\n \"\"\"\n if not dict:\n return False\n lower_case = all(isinstance(key, str) and key.islower() for key in dict)\n upper_case = all(isinstance(key, str) and key.isupper() for key in dict)\n return lower_case or upper_case"} 97 | {"task_id": "HumanEval/96", "completion": "def count_up_to(n):\n primes = []\n num = 2\n while len(primes) < n:\n if is_prime(num):\n primes.append(num)\n num += 1\n return primes\n\ndef is_prime(num):\n if num < 2:\n return False\n for i in range(2, num):\n if num % i == 0:\n return False\n return True"} 98 | {"task_id": "HumanEval/97", "completion": "def multiply(a, b):\n return int(str(a)[-1]) * int(str(b)[-1])"} 99 | {"task_id": "HumanEval/98", "completion": "def count_upper(s):\n vowels = 'aeiouAEIOU'\n count = 0\n for i, char in enumerate(s):\n if i % 2 == 0 and char in vowels:\n count += 1\n return count"} 100 | {"task_id": "HumanEval/99", "completion": "def closest_integer(value):\n num = float(value)\n if num < 0:\n return int(num - 0.5)\n else:\n return int(num + 0.5)"} 101 | {"task_id": "HumanEval/100", "completion": "def make_a_pile(n):\n pile = [n]\n while n > 1:\n n = n + 1 if n % 2 == 0 else n + 2\n pile.append(n)\n return pile"} 102 | {"task_id": "HumanEval/101", "completion": "def words_string(s):\n words = s.split(', ')\n return words\n"} 103 | {"task_id": "HumanEval/102", "completion": "def choose_num(x, y):\n if x % 2!= 0 or y % 2!= 0:\n return -1\n return max(x, y)"} 104 | {"task_id": "HumanEval/103", "completion": "def rounded_avg(n, m):\n if n > m:\n return -1\n total = 0\n for i in range(n, m + 1):\n total += i\n avg = total / (m - n + 1)\n return f\"0b{bin(int(avg))[2:]:0>4}\""} 105 | {"task_id": "HumanEval/104", "completion": "def unique_digits(x):\n result = [num for num in x if not any(digit % 2 == 0 for digit in num)]\n result.sort()\n return result"} 106 | {"task_id": "HumanEval/105", "completion": "def by_length(arr):\n \"\"\"\n Given an array of integers, sort the integers that are between 1 and 9 inclusive,\n reverse the resulting array, and then replace each digit by its corresponding name from\n \"One\", \"Two\", \"Three\", \"Four\", \"Five\", \"Six\", \"Seven\", \"Eight\", \"Nine\".\n\n For example:\n arr = [2, 1, 1, 4, 5, 8, 2, 3] \n -> sort arr -> [1, 1, 2, 2, 3, 4, 5, 8] \n -> reverse arr -> [8, 5, 4, 3, 2, 2, 1, 1]\n return [\"Eight\", \"Five\", \"Four\", \"Three\", \"Two\", \"Two\", \"One\", \"One\"]\n \n If the array is empty, return an empty array:\n arr = []\n return []\n \n If the array has any strange number ignore it:\n arr = [1, -1, 55] \n -> sort arr -> [-1, 1, 55]\n -> reverse arr -> [55, 1, -1]\n return = ['One']\n ```"} 107 | {"task_id": "HumanEval/106", "completion": "def f(n):\n result = []\n for i in range(1, n + 1):\n if i % 2 == 0:\n result.append(factorial(i))\n else:\n result.append(sum(range(1, i + 1)))\n return result"} 108 | {"task_id": "HumanEval/107", "completion": "def even_odd_palindrome(n):\n even = 0\n odd = 0\n for i in range(1, n + 1):\n s = str(i)\n if s == s[::-1]:\n if i % 2 == 0:\n even += 1\n else:\n odd += 1\n return (even, odd)"} 109 | {"task_id": "HumanEval/108", "completion": "def count_nums(arr):\n return sum(1 for num in arr if sum(abs(digit) for digit in num) > 0)"} 110 | {"task_id": "HumanEval/109", "completion": "def move_one_ball(arr):\n if not arr:\n return True\n for i in range(len(arr) - 1):\n if arr[i] > arr[i + 1]:\n return False\n return True"} 111 | {"task_id": "HumanEval/110", "completion": "def exchange(lst1, lst2):\n even_list = [num for num in lst1 if num % 2 == 0]\n if len(even_list) == len(lst1):\n return 'YES'\n else:\n return 'NO'\n"} 112 | {"task_id": "HumanEval/111", "completion": "def histogram(test):\n letter_count = {}\n for letter in test.lower():\n if letter in letter_count:\n letter_count[letter] += 1\n else:\n letter_count[letter] = 1\n\n max_count = max(letter_count.values())\n result = {key: value for key, value in letter_count.items() if value == max_count}\n return result"} 113 | {"task_id": "HumanEval/112", "completion": "def reverse_delete(s,c):\n result = ''.join([char for char in s if char not in c])\n return (result, result == result[::-1])"} 114 | {"task_id": "HumanEval/113", "completion": "def odd_count(lst):\n output = []\n for num in lst:\n count = 0\n for digit in num:\n if int(digit) % 2!= 0:\n count += 1\n output.append(f\"the number of odd elements {count}n the string {num} of the input.\")\n return output\n"} 115 | {"task_id": "HumanEval/114", "completion": "def minSubArraySum(nums):\n if not nums:\n return 0\n min_sum = math.inf\n cur_sum = 0\n for num in nums:\n cur_sum += num\n min_sum = min(min_sum, cur_sum)\n return min_sum"} 116 | {"task_id": "HumanEval/115", "completion": "def max_fill(grid, capacity):\n import math\n num_rows = len(grid)\n num_cols = len(grid[0])\n num_buckets = num_rows * num_cols // capacity\n filled_buckets = sum(1 for row in grid for cell in row if cell)\n return max(0, num_buckets - filled_buckets)"} 117 | {"task_id": "HumanEval/116", "completion": "def sort_array(arr):\n return sorted(arr, key=lambda x: (x.bit_count(), x))"} 118 | {"task_id": "HumanEval/117", "completion": "def select_words(s, n):\n words = s.split()\n result = []\n vowels = 'aeiouAEIOU'\n\n for word in words:\n consonants = 0\n for char in word:\n if char not in vowels:\n consonants += 1\n if consonants == n:\n result.append(word)\n\n return result"} 119 | {"task_id": "HumanEval/118", "completion": "def get_closest_vowel(word):\n vowels = 'aeiouAEIOU'\n for i in range(len(word) - 1, -1, -1):\n if word[i] in vowels:\n return word[i]\n return ''"} 120 | {"task_id": "HumanEval/119", "completion": "def match_parens(lst):\n stack = []\n for char in lst[0]:\n if char == '(':\n stack.append('(')\n elif not stack:\n return 'No'\n else:\n stack.pop()\n if stack:\n return 'No'\n return 'Yes'\n"} 121 | {"task_id": "HumanEval/120", "completion": "def maximum(arr, k):\n return sorted(set(arr))[-k:]"} 122 | {"task_id": "HumanEval/121", "completion": "def solution(lst):\n return sum(num for i, num in enumerate(lst) if i % 2 == 0 and num % 2!= 0)"} 123 | {"task_id": "HumanEval/122", "completion": "def add_elements(arr, k):\n return sum(arr[:k])\n"} 124 | {"task_id": "HumanEval/123", "completion": "def get_odd_collatz(n):\n sequence = [n]\n while sequence[-1]!= 1:\n if sequence[-1] % 2 == 0:\n sequence.append(sequence[-1] // 2)\n else:\n sequence.append(3 * sequence[-1] + 1)\n return sequence[:-1]"} 125 | {"task_id": "HumanEval/124", "completion": "def valid_date(date):\n if not date:\n return False\n date_format = '%m-%d-%Y'\n try:\n datetime.strptime(date, date_format)\n except ValueError:\n return False\n return True"} 126 | {"task_id": "HumanEval/125", "completion": "def split_words(txt):\n words = txt.split()\n if not words:\n if ',' in txt:\n words = txt.split(',')\n else:\n return len([c for c in txt.lower() if c.islower() and c.isalnum()]) % 2\n return words"} 127 | {"task_id": "HumanEval/126", "completion": "def is_sorted(lst):\n if len(lst) <= 1:\n return True\n for i in range(1, len(lst)):\n if lst[i] < lst[i - 1]:\n return False\n return True"} 128 | {"task_id": "HumanEval/127", "completion": "def intersection(interval1, interval2):\n start1, end1 = interval1\n start2, end2 = interval2\n\n # Find the intersection of the two intervals\n start = max(start1, start2)\n end = min(end1, end2)\n\n # If the intersection is empty, return \"NO\"\n if start > end:\n return \"NO\"\n\n # Find the length of the intersection\n length = end - start + 1\n\n # Check if the length is a prime number\n if is_prime(length):\n return \"YES\"\n else:\n return \"NO\"\n\ndef is_prime(n):\n if n <= 1:\n return False\n for i in range(2, int(n**0.5)+1):\n if n % i == 0:\n return False\n return True"} 129 | {"task_id": "HumanEval/128", "completion": "def prod_signs(arr):\n if not arr:\n return None\n product = 1\n sum_of_magnitudes = 0\n for num in arr:\n if num > 0:\n product *= 1\n elif num < 0:\n product *= -1\n else:\n continue\n sum_of_magnitudes += abs(num)\n return sum_of_magnitudes * product"} 130 | {"task_id": "HumanEval/129", "completion": "def minPath(grid, k):\n n = len(grid)\n m = len(grid[0])\n visited = [[False] * m for _ in range(n)]\n min_path = []\n\n def dfs(i, j):\n if i < 0 or i >= n or j < 0 or j >= m:\n return False\n if visited[i][j]:\n return False\n visited[i][j] = True\n min_path.append(grid[i][j])\n if len(min_path) == k:\n return True\n return any(dfs(i + x, j + y) for x, y in [(1, 0), (-1, 0), (0, 1), (0, -1)])\n\n dfs(0, 0)\n return min_path"} 131 | {"task_id": "HumanEval/130", "completion": "def tri(n):\n sequence = [0, 1, 1]\n for i in range(3, n + 1):\n sequence.append(sequence[i - 1] + sequence[i - 2] + sequence[i - 3])\n return sequence[:n + 1]"} 132 | {"task_id": "HumanEval/131", "completion": "def digits(n):\n product = 1\n while n > 0:\n digit = n % 10\n if digit % 2!= 0:\n product *= digit\n n //= 10\n return product"} 133 | {"task_id": "HumanEval/132", "completion": "def is_nested(string):\n stack = []\n for char in string:\n if char == '[':\n stack.append('(')\n elif char == ']':\n if not stack:\n return False\n if stack[-1]!= '(':\n return False\n stack.pop()\n return not stack"} 134 | {"task_id": "HumanEval/133", "completion": "def sum_squares(lst):\n return sum(int(ceil(x**2)) for x in lst)"} 135 | {"task_id": "HumanEval/134", "completion": "def check_if_last_char_is_a_letter(txt):\n return txt and txt[-1].isalpha() and not txt[-1].isspace()"} 136 | {"task_id": "HumanEval/135", "completion": "def can_arrange(arr):\n for i in range(1, len(arr)):\n if arr[i] > arr[i-1]:\n return i-1\n return -1"} 137 | {"task_id": "HumanEval/136", "completion": "def largest_smallest_integers(lst):\n if not lst:\n return (None, None)\n neg_max = max(x for x in lst if x < 0)\n pos_min = min(x for x in lst if x > 0)\n return (neg_max, pos_min)"} 138 | {"task_id": "HumanEval/137", "completion": "def compare_one(a, b):\n if a == b:\n return None\n if isinstance(a, (int, float)) and isinstance(b, (int, float)):\n return max(a, b)\n if isinstance(a, str) and isinstance(b, str):\n return max(a.replace(',', ''), b.replace(',', ''))\n return max(a, b)"} 139 | {"task_id": "HumanEval/138", "completion": "def is_equal_to_sum_even(n):\n return n % 2 == 0 and n % 4 == 0 and n % 6 == 0 and n % 8 == 0"} 140 | {"task_id": "HumanEval/139", "completion": "def special_factorial(n):\n result = 1\n for i in range(1, n + 1):\n result *= i\n return result"} 141 | {"task_id": "HumanEval/140", "completion": "def fix_spaces(text):\n return text.replace(' ', '_').replace(' ', '-').strip('-')"} 142 | {"task_id": "HumanEval/141", "completion": "def file_name_check(file_name):\n if len(file_name) > 3 or not file_name.endswith(('.txt', '.exe', '.dll')):\n return 'No'\n if '.' not in file_name:\n return 'No'\n before_dot, after_dot = file_name.split('.')\n if not before_dot or not before_dot[0].isalpha():\n return 'No'\n if after_dot not in ('txt', 'exe', 'dll'):\n return 'No'\n return 'Yes'"} 143 | {"task_id": "HumanEval/142", "completion": "def sum_squares(lst):\n return sum(x**2 if i % 3 == 0 else x**3 if i % 4 == 0 else x for i, x in enumerate(lst))"} 144 | {"task_id": "HumanEval/143", "completion": "def words_in_sentence(sentence):\n words = sentence.split()\n primes = [i for i in range(2, len(words) + 1) if is_prime(i)]\n result =''.join([words[i] for i in primes])\n return result\n\ndef is_prime(n):\n if n <= 1:\n return False\n for i in range(2, int(n**0.5) + 1):\n if n % i == 0:\n return False\n return True"} 145 | {"task_id": "HumanEval/144", "completion": "def simplify(x, n):\n num, denom = x.split('/')\n n_num, n_denom = n.split('/')\n result = int(num) * int(n_denom) + int(n_num) * int(denom)\n return result == int(num) * int(denom)"} 146 | {"task_id": "HumanEval/145", "completion": "def order_by_points(nums):\n return sorted(nums, key=lambda x: sum(map(int, str(abs(x)))))"} 147 | {"task_id": "HumanEval/146", "completion": "def specialFilter(nums):\n return sum(1 for num in nums if num > 10 and num % 10 in (1, 3, 5, 7, 9))\n"} 148 | {"task_id": "HumanEval/147", "completion": "def get_max_triples(n):\n a = [i * i - i + 1 for i in range(1, n + 1)]\n count = 0\n for i in range(n):\n for j in range(i + 1, n):\n for k in range(j + 1, n):\n if a[i] + a[j] + a[k] % 3 == 0:\n count += 1\n return count\n"} 149 | {"task_id": "HumanEval/148", "completion": "def bf(planet1, planet2):\n planets = ('Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune')\n if planet1 not in planets or planet2 not in planets:\n return ()\n i1 = planets.index(planet1)\n i2 = planets.index(planet2)\n return tuple(planets[i] for i in range(i1, i2+1))"} 150 | {"task_id": "HumanEval/149", "completion": "def sorted_list_sum(lst):\n result = [word for word in lst if len(word) % 2 == 0]\n result.sort(key=len)\n return result"} 151 | {"task_id": "HumanEval/150", "completion": "def x_or_y(n, x, y):\n if n % 2 == 0:\n return x\n else:\n return y"} 152 | {"task_id": "HumanEval/151", "completion": "def double_the_difference(lst):\n return sum(x**2 for x in lst if x % 2!= 0) or 0"} 153 | {"task_id": "HumanEval/152", "completion": "def compare(scores, guesses):\n result = []\n for score, guess in zip(scores, guesses):\n if score == guess:\n result.append(0)\n else:\n result.append(abs(score - guess))\n return result\n"} 154 | {"task_id": "HumanEval/153", "completion": "def Strongest_Extension(class_name, extensions):\n strengths = []\n for extension in extensions:\n CAP = sum(1 for c in extension if c.isupper())\n SM = sum(1 for c in extension if c.islower())\n strengths.append((CAP - SM, extension))\n strengths.sort(reverse=True)\n return f'{class_name}.{strengths[0][1]}'\n"} 155 | {"task_id": "HumanEval/154", "completion": "def cycpattern_check(a, b):\n return b in a or b in a[1:] or b in a[:-1]\n"} 156 | {"task_id": "HumanEval/155", "completion": "def even_odd_count(num):\n count_even = 0\n count_odd = 0\n while num > 0:\n digit = num % 10\n if digit % 2 == 0:\n count_even += 1\n else:\n count_odd += 1\n num //= 10\n return (count_even, count_odd)"} 157 | {"task_id": "HumanEval/156", "completion": "def int_to_mini_roman(num):\n roman_numerals = ['I', 'V', 'X', 'L', 'C', 'D', 'M']\n result = ''\n while num > 0:\n for i in range(len(roman_numerals) - 1, -1, -1):\n if num >= roman_numerals[i]:\n result += roman_numerals[i]\n num -= roman_numerals[i]\n break\n return result.lower()"} 158 | {"task_id": "HumanEval/157", "completion": "def right_angle_triangle(a, b, c):\n '''\n Given the lengths of the three sides of a triangle. Return True if the three\n sides form a right-angled triangle, False otherwise.\n A right-angled triangle is a triangle in which one angle is right angle or \n 90 degree.\n Example:\n right_angle_triangle(3, 4, 5) == True\n right_angle_triangle(1, 2, 3) == False\n '''\n return a**2 + b**2 == c**2"} 159 | {"task_id": "HumanEval/158", "completion": "def find_max(words):\n max_word = ''\n max_count = 0\n\n for word in words:\n count = len(set(word.lower()))\n if count > max_count:\n max_count = count\n max_word = word\n\n return max_word\n"} 160 | {"task_id": "HumanEval/159", "completion": "def eat(number, need, remaining):\n if need <= remaining:\n return [number + need, remaining - need]\n else:\n return [number + remaining, 0]"} 161 | {"task_id": "HumanEval/160", "completion": "def do_algebra(operator, operand):\n expression = ''.join(map(str, operator))\n for i, num in enumerate(operand):\n expression = expression.replace(f'@{i}', str(num))\n return eval(expression)\n"} 162 | {"task_id": "HumanEval/161", "completion": "def solve(s):\n result = ''\n for char in s:\n if char.isalpha():\n result += char.upper() if char.islower() else char.lower()\n else:\n result += char\n return result"} 163 | {"task_id": "HumanEval/162", "completion": "def string_to_md5(text):\n if text == '':\n return None\n return hashlib.md5(text.encode('utf-8')).hexdigest()"} 164 | {"task_id": "HumanEval/163", "completion": "def generate_integers(a, b):\n even_digits = []\n current_number = a\n while current_number <= b:\n if current_number % 2 == 0:\n even_digits.append(current_number)\n current_number += 1\n return even_digits"} 165 | -------------------------------------------------------------------------------- /jsonl_examples/gpt3_final_500c_long.jsonl: -------------------------------------------------------------------------------- 1 | {"task_id": "HumanEval/0", "completion": "from typing import List\n\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n for i in range(len(numbers)):\n for j in range(i+1, len(numbers)):\n if abs(numbers[i] - numbers[j]) < threshold:\n return True\n return False"} 2 | {"task_id": "HumanEval/1", "completion": "from typing import List\n\n\ndef separate_paren_groups(paren_string: str) -> List[str]:\n stack = []\n result = []\n current = ''\n for char in paren_string:\n if char == '(':\n stack.append(char)\n elif char == ')':\n stack.pop()\n current += char\n if not stack:\n result.append(current.replace(' ', ''))\n current = ''\n return result"} 3 | {"task_id": "HumanEval/2", "completion": "def truncate_number(number: float) -> float:\n return number - int(number)"} 4 | {"task_id": "HumanEval/3", "completion": "from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n balance = 0\n for operation in operations:\n balance += operation\n if balance < 0:\n return True\n return False"} 5 | {"task_id": "HumanEval/4", "completion": "from typing import List\n\n\ndef mean_absolute_deviation(numbers: List[float]) -> float:\n mean = sum(numbers) / len(numbers)\n absolute_deviations = [abs(num - mean) for num in numbers]\n return sum(absolute_deviations) / len(absolute_deviations)"} 6 | {"task_id": "HumanEval/5", "completion": "from typing import List\n\n\ndef intersperse(numbers: List[int], delimeter: int) -> List[int]:\n return [x for pair in zip(numbers, [delimeter] * (len(numbers) - 1) + [0]) for x in pair]"} 7 | {"task_id": "HumanEval/6", "completion": "from typing import List\n\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n return [max([paren_string.count('(' , i, j) - paren_string.count(')' , i, j) for i in range(len(paren_string))]) for j in range(len(paren_string)) if paren_string[j] == ' ']"} 8 | {"task_id": "HumanEval/7", "completion": "from typing import List\n\n\ndef filter_by_substring(strings: List[str], substring: str) -> List[str]:\n return [s for s in strings if substring in s]"} 9 | {"task_id": "HumanEval/8", "completion": "from typing import List, Tuple\n\n\ndef sum_product(numbers: List[int]) -> Tuple[int, int]:\n s = sum(numbers)\n p = 1\n for n in numbers:\n p *= n\n return (s, p) if numbers else (0, 1)"} 10 | {"task_id": "HumanEval/9", "completion": "from typing import List, Tuple\n\n\ndef rolling_max(numbers: List[int]) -> List[int]:\n max_list = []\n current_max = float('-inf')\n for num in numbers:\n current_max = max(current_max, num)\n max_list.append(current_max)\n return max_list"} 11 | {"task_id": "HumanEval/10", "completion": "def is_palindrome(string: str) -> bool:\n return string == string[::-1]\n\ndef make_palindrome(string: str) -> str:\n for i in range(len(string), -1, -1):\n if is_palindrome(string[:i]):\n return string + string[:i][::-1]"} 12 | {"task_id": "HumanEval/11", "completion": "from typing import List\n\n\ndef string_xor(a: str, b: str) -> str:\n return ''.join(str(int(x) ^ int(y)) for x, y in zip(a, b))"} 13 | {"task_id": "HumanEval/12", "completion": "from typing import List, Optional\n\n\ndef longest(strings: List[str]) -> Optional[str]:\n if not strings:\n return None\n return max(strings, key=len)"} 14 | {"task_id": "HumanEval/13", "completion": "def greatest_common_divisor(a: int, b: int) -> int:\n while b:\n a, b = b, a % b\n return a"} 15 | {"task_id": "HumanEval/14", "completion": "from typing import List\n\n\ndef all_prefixes(string: str) -> List[str]:\n return [string[:i+1] for i in range(len(string))]"} 16 | {"task_id": "HumanEval/15", "completion": "def string_sequence(n: int) -> str:\n return ' '.join(str(i) for i in range(n+1))"} 17 | {"task_id": "HumanEval/16", "completion": "def count_distinct_characters(string: str) -> int:\n return len(set(string.lower()))"} 18 | {"task_id": "HumanEval/17", "completion": "from typing import List\n\n\ndef parse_music(music_string: str) -> List[int]:\n notes = {'o': 4, 'o|': 2, '.|': 1}\n return [notes[note] for note in music_string.split()]"} 19 | {"task_id": "HumanEval/18", "completion": "def how_many_times(string: str, substring: str) -> int:\n return sum(1 for i in range(len(string)-len(substring)+1) if string[i:i+len(substring)] == substring)"} 20 | {"task_id": "HumanEval/19", "completion": "from typing import List\n\n\ndef sort_numbers(numbers: str) -> str:\n num_dict = {'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9}\n num_list = numbers.split()\n num_list.sort(key=lambda x: num_dict[x])\n return ' '.join(num_list)"} 21 | {"task_id": "HumanEval/20", "completion": "from typing import List, Tuple\n\n\ndef find_closest_elements(numbers: List[float]) -> Tuple[float, float]:\n numbers.sort()\n min_diff = float('inf')\n closest_pair = None\n for i in range(len(numbers)-1):\n diff = numbers[i+1] - numbers[i]\n if diff < min_diff:\n min_diff = diff\n closest_pair = (numbers[i], numbers[i+1])\n return closest_pair"} 22 | {"task_id": "HumanEval/21", "completion": "from typing import List\n\n\ndef rescale_to_unit(numbers: List[float]) -> List[float]:\n min_num = min(numbers)\n max_num = max(numbers)\n return [(num - min_num) / (max_num - min_num) for num in numbers]"} 23 | {"task_id": "HumanEval/22", "completion": "from typing import List, Any\n\n\ndef filter_integers(values: List[Any]) -> List[int]:\n return [value for value in values if isinstance(value, int)]"} 24 | {"task_id": "HumanEval/23", "completion": "def strlen(string: str) -> int:\n return len(string)"} 25 | {"task_id": "HumanEval/24", "completion": "def largest_divisor(n: int) -> int:\n for i in range(n-1, 0, -1):\n if n % i == 0:\n return i"} 26 | {"task_id": "HumanEval/25", "completion": "from typing import List\n\n\ndef factorize(n: int) -> List[int]:\n factors = []\n i = 2\n while i * i <= n:\n if n % i:\n i += 1\n else:\n n //= i\n factors.append(i)\n if n > 1:\n factors.append(n)\n return factors"} 27 | {"task_id": "HumanEval/26", "completion": "from typing import List\n\n\ndef remove_duplicates(numbers: List[int]) -> List[int]:\n return list(dict.fromkeys(numbers))"} 28 | {"task_id": "HumanEval/27", "completion": "def flip_case(string: str) -> str:\n return string.swapcase()"} 29 | {"task_id": "HumanEval/28", "completion": "from typing import List\n\n\ndef concatenate(strings: List[str]) -> str:\n return ''.join(strings)"} 30 | {"task_id": "HumanEval/29", "completion": "from typing import List\n\n\ndef filter_by_prefix(strings: List[str], prefix: str) -> List[str]:\n return [s for s in strings if s.startswith(prefix)]"} 31 | {"task_id": "HumanEval/30", "completion": "def get_positive(l: list):\n return [num for num in l if num > 0]"} 32 | {"task_id": "HumanEval/31", "completion": "def is_prime(n):\n if n < 2:\n return False\n for i in range(2, int(n**0.5) + 1):\n if n % i == 0:\n return False\n return True"} 33 | {"task_id": "HumanEval/32", "completion": "import math\n\n\ndef poly(xs: list, x: float):\n return sum([coeff * math.pow(x, i) for i, coeff in enumerate(xs)])\n\n\ndef find_zero(xs: list):\n n = len(xs)\n if n % 2 != 0 or xs[-1] == 0:\n raise ValueError(\"Invalid input\")\n a = xs[-1]\n b = xs[-2]\n for i in range(n - 2):\n if i % 2 == 0:\n a += xs[i]\n else:\n b += xs[i]\n return -b / (2 * a)"} 34 | {"task_id": "HumanEval/33", "completion": "def sort_third(l: list):\n return [sorted(l[i:i+3]) if i%3 == 0 else l[i] for i in range(len(l))]"} 35 | {"task_id": "HumanEval/34", "completion": "def unique(l: list):\n return sorted(list(set(l)))"} 36 | {"task_id": "HumanEval/35", "completion": "def max_element(l: list):\n return max(l)"} 37 | {"task_id": "HumanEval/36", "completion": "def fizz_buzz(n: int):\n return sum(str(i).count('7') for i in range(n) if i % 11 == 0 or i % 13 == 0)"} 38 | {"task_id": "HumanEval/37", "completion": "def sort_even(l: list):\n even_indices = [i for i in range(len(l)) if i % 2 == 0]\n even_values = [l[i] for i in even_indices]\n sorted_even_values = sorted(even_values)\n l_prime = l.copy()\n for i in even_indices:\n l_prime[i] = sorted_even_values.pop(0)\n return l_prime"} 39 | {"task_id": "HumanEval/38", "completion": "def decode_cyclic(s: str):\n \"\"\"\n takes as input string encoded with encode_cyclic function. Returns decoded string.\n \"\"\"\n # split string to groups. Each of length 3.\n groups = [s[(3 * i):min((3 * i + 3), len(s))] for i in range((len(s) + 2) // 3)]\n # cycle elements in each group. Unless group has fewer elements than 3.\n groups = [(group[-1] + group[:-1]) if len(group) == 3 else group for group in groups]\n return \"\".join(groups)"} 40 | {"task_id": "HumanEval/39", "completion": "def prime_fib(n: int):\n def is_prime(num):\n if num < 2:\n return False\n for i in range(2, int(num ** 0.5) + 1):\n if num % i == 0:\n return False\n return True\n\n def fibonacci(num):\n if num == 0:\n return 0\n elif num == 1:\n return 1\n else:\n return fibonacci(num - 1) + fibonacci(num - 2)\n\n count = 0\n num = 2\n while count < n:\n if is_prime(fibonacci(num)):\n count += 1\n num += 1\n return fibonacci(num - 1)"} 41 | {"task_id": "HumanEval/40", "completion": "def triples_sum_to_zero(l: list):\n return any(sum([l[i], l[j], l[k]]) == 0 for i in range(len(l)) for j in range(i+1, len(l)) for k in range(j+1, len(l)))"} 42 | {"task_id": "HumanEval/41", "completion": "def car_race_collision(n: int):\n return n*(n-1)//2"} 43 | {"task_id": "HumanEval/42", "completion": "def incr_list(l: list):\n return [x+1 for x in l]"} 44 | {"task_id": "HumanEval/43", "completion": "def pairs_sum_to_zero(l):\n return any(-x in l for x in l if x != 0)"} 45 | {"task_id": "HumanEval/44", "completion": "def change_base(x: int, base: int):\n if x == 0:\n return '0'\n digits = []\n while x > 0:\n digits.append(str(x % base))\n x //= base\n return ''.join(digits[::-1])"} 46 | {"task_id": "HumanEval/45", "completion": "def triangle_area(a, h):\n return 0.5 * a * h"} 47 | {"task_id": "HumanEval/46", "completion": "def fib4(n: int):\n if n == 0:\n return 0\n elif n == 1:\n return 0\n elif n == 2:\n return 2\n else:\n a, b, c, d = 0, 0, 2, 0\n for i in range(3, n+1):\n d = a + b + c + d\n a, b, c = b, c, d\n return d"} 48 | {"task_id": "HumanEval/47", "completion": "def median(l: list):\n l.sort()\n n = len(l)\n if n % 2 == 0:\n return (l[n//2-1] + l[n//2])/2\n else:\n return l[n//2]"} 49 | {"task_id": "HumanEval/48", "completion": "def is_palindrome(text: str):\n return text == text[::-1]"} 50 | {"task_id": "HumanEval/49", "completion": "def modp(n: int, p: int):\n return pow(2, n, p) if n else 1"} 51 | {"task_id": "HumanEval/50", "completion": "def decode_shift(s: str):\n \"\"\"\n takes as input string encoded with encode_shift function. Returns decoded string.\n \"\"\"\n return \"\".join([chr(((ord(ch) - 5 - ord(\"a\")) % 26) + ord(\"a\")) for ch in s])"} 52 | {"task_id": "HumanEval/51", "completion": "def remove_vowels(text):\n vowels = \"aeiouAEIOU\"\n return \"\".join([char for char in text if char not in vowels])"} 53 | {"task_id": "HumanEval/52", "completion": "def below_threshold(l: list, t: int) -> bool:\n return all(num < t for num in l)"} 54 | {"task_id": "HumanEval/53", "completion": "def add(x: int, y: int) -> int:\n return x + y"} 55 | {"task_id": "HumanEval/54", "completion": "def same_chars(s0: str, s1: str):\n return sorted(s0) == sorted(s1)"} 56 | {"task_id": "HumanEval/55", "completion": "def fib(n: int):\n if n <= 1:\n return n\n else:\n return fib(n-1) + fib(n-2)"} 57 | {"task_id": "HumanEval/56", "completion": "def correct_bracketing(brackets: str):\n stack = []\n for bracket in brackets:\n if bracket == \"<\":\n stack.append(bracket)\n elif bracket == \">\":\n if not stack:\n return False\n stack.pop()\n return not stack"} 58 | {"task_id": "HumanEval/57", "completion": "def monotonic(l: list):\n return all(l[i] <= l[i+1] for i in range(len(l)-1)) or all(l[i] >= l[i+1] for i in range(len(l)-1))"} 59 | {"task_id": "HumanEval/58", "completion": "def common(l1: list, l2: list):\n return sorted(list(set(l1) & set(l2)))"} 60 | {"task_id": "HumanEval/59", "completion": "def largest_prime_factor(n: int):\n i = 2\n while i * i <= n:\n if n % i:\n i += 1\n else:\n n //= i\n return n"} 61 | {"task_id": "HumanEval/60", "completion": "def sum_to_n(n: int):\n return sum(range(1, n+1))"} 62 | {"task_id": "HumanEval/61", "completion": "def correct_bracketing(brackets: str):\n stack = []\n for bracket in brackets:\n if bracket == \"(\":\n stack.append(bracket)\n elif bracket == \")\":\n if not stack:\n return False\n stack.pop()\n return not stack"} 63 | {"task_id": "HumanEval/62", "completion": "def derivative(xs: list):\n return [xs[i] * i for i in range(1, len(xs))]"} 64 | {"task_id": "HumanEval/63", "completion": "def fibfib(n: int):\n if n == 0:\n return 0\n elif n == 1 or n == 2:\n return 1\n else:\n a, b, c = 1, 1, 0\n for i in range(3, n+1):\n c = a + b + c\n a, b = b, c\n return c"} 65 | {"task_id": "HumanEval/64", "completion": "def vowels_count(s):\n vowels = ['a', 'e', 'i', 'o', 'u']\n count = 0\n s = s.lower()\n if s[-1] == 'y':\n vowels.append('y')\n for char in s:\n if char in vowels:\n count += 1\n return count"} 66 | {"task_id": "HumanEval/65", "completion": "def circular_shift(x, shift):\n x_str = str(x)\n if shift > len(x_str):\n return x_str[::-1]\n else:\n return x_str[-shift:] + x_str[:-shift]"} 67 | {"task_id": "HumanEval/66", "completion": "def digitSum(s):\n return sum(ord(c) for c in s if c.isupper())"} 68 | {"task_id": "HumanEval/67", "completion": "def fruit_distribution(s, n):\n fruits = s.split()\n apples = int(fruits[0])\n oranges = int(fruits[3])\n mangoes = n - apples - oranges\n return mangoes"} 69 | {"task_id": "HumanEval/68", "completion": "def pluck(arr):\n even_nodes = [node for node in arr if node % 2 == 0]\n if not even_nodes:\n return []\n min_even = min(even_nodes)\n min_even_index = arr.index(min_even)\n return [min_even, min_even_index]"} 70 | {"task_id": "HumanEval/69", "completion": "def search(lst):\n for i in sorted(set(lst), reverse=True):\n if lst.count(i) >= i > 0:\n return i\n return -1"} 71 | {"task_id": "HumanEval/70", "completion": "def strange_sort_list(lst):\n result = []\n while lst:\n result.append(min(lst))\n lst.remove(min(lst))\n if lst:\n result.append(max(lst))\n lst.remove(max(lst))\n return result"} 72 | {"task_id": "HumanEval/71", "completion": "def triangle_area(a, b, c):\n s = (a + b + c) / 2\n area = (s * (s - a) * (s - b) * (s - c)) ** 0.5 if a + b > c and a + c > b and b + c > a else -1\n return round(area, 2)"} 73 | {"task_id": "HumanEval/72", "completion": "def will_it_fly(q,w):\n return q == q[::-1] and sum(q) <= w"} 74 | {"task_id": "HumanEval/73", "completion": "def smallest_change(arr):\n count = 0\n for i in range(len(arr)//2):\n if arr[i] != arr[-i-1]:\n count += 1\n return count"} 75 | {"task_id": "HumanEval/74", "completion": "def total_match(lst1, lst2):\n return lst1 if sum(len(s) for s in lst1) <= sum(len(s) for s in lst2) else lst2"} 76 | {"task_id": "HumanEval/75", "completion": "def is_multiply_prime(a):\n primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]\n count = 0\n for prime in primes:\n if a % prime == 0:\n count += 1\n if count == 3:\n return True\n return False"} 77 | {"task_id": "HumanEval/76", "completion": "def is_simple_power(x, n):\n return x > 0 and n > 0 and (x == 1 or n != 1 and x % n == 0 and is_simple_power(x // n, n))"} 78 | {"task_id": "HumanEval/77", "completion": "def iscube(a):\n return (a**(1/3)).is_integer()"} 79 | {"task_id": "HumanEval/78", "completion": "def hex_key(num):\n primes = ['2', '3', '5', '7', 'B', 'D']\n count = 0\n for digit in num:\n if digit in primes:\n count += 1\n return count"} 80 | {"task_id": "HumanEval/79", "completion": "def decimal_to_binary(decimal):\n binary = bin(decimal)[2:]\n return f\"db{binary}db\""} 81 | {"task_id": "HumanEval/80", "completion": "def is_happy(s):\n return len(s) >= 3 and all(s[i] != s[i+1] and s[i] != s[i+2] and s[i+1] != s[i+2] for i in range(len(s)-2))"} 82 | {"task_id": "HumanEval/81", "completion": "def numerical_letter_grade(grades):\n return ['A+' if gpa == 4.0 else 'A' if gpa > 3.7 else 'A-' if gpa > 3.3 else 'B+' if gpa > 3.0 else 'B' if gpa > 2.7 else 'B-' if gpa > 2.3 else 'C+' if gpa > 2.0 else 'C' if gpa > 1.7 else 'C-' if gpa > 1.3 else 'D+' if gpa > 1.0 else 'D' if gpa > 0.7 else 'D-' if gpa > 0.0 else 'E' for gpa in grades]"} 83 | {"task_id": "HumanEval/82", "completion": "import math\n\ndef prime_length(string):\n return all(string.count(char) == 1 for char in string) and all(string.count(char) != 0 for char in string) and all(string.count(char) != 2 for char in string) and all(string.count(char) != 3 for char in string) and all(string.count(char) != 5 for char in string) and all(string.count(char) != 7 for char in string) and len(string) > 1 and all(string.count(char) != 11 for char in string) and all(string.count(char) != 13 for char in string) and all(string.count(char) != 17 for char in string) and all(string.count(char) != 19 for char in string) and all(string.count(char) != 23 for char in string) and all(string.count(char) != 29 for char in string) and all(string.count(char) != 31 for char in string) and all(string.count(char) != 37 for char in string) and all(string.count(char) != 41 for char in string) and all(string.count(char) != 43 for char in string) and all(string.count(char) != 47 for char in string) and all(string.count(char) != 53 for char in string) and all(string.count(char) != 59 for char in string) and all(string.count(char) != 61 for char in string) and all(string.count(char) != 67 for char in string) and all(string.count(char) != 71 for char in string) and all(string.count(char) != 73 for char in string) and all(string.count(char) != 79 for char in string) and all(string.count(char) != 83 for char in string) and all(string.count(char) != 89 for char in string) and all(string.count(char) != 97 for char in string) and all(string.count(char) != 101 for char in string) and all(string.count(char) != 103 for char in string) and all(string.count(char) != 107 for char in string) and all(string.count(char) != 109 for char in string) and all(string.count(char) != 113 for char in string) and all(string.count(char) != 127 for char in string) and all(string.count(char) != 131 for char in string) and all(string.count(char) != "} 84 | {"task_id": "HumanEval/83", "completion": "def starts_one_ends(n):\n count = 0\n for i in range(10**(n-1), 10**n):\n if str(i)[0] == '1' or str(i)[-1] == '1':\n count += 1\n return count"} 85 | {"task_id": "HumanEval/84", "completion": "def solve(N):\n return bin(sum(int(digit) for digit in str(N)))[2:]"} 86 | {"task_id": "HumanEval/85", "completion": "def add(lst):\n return sum([lst[i] for i in range(1, len(lst), 2) if lst[i] % 2 == 0])"} 87 | {"task_id": "HumanEval/86", "completion": "def anti_shuffle(s):\n words = s.split()\n ordered_words = []\n for word in words:\n ordered_word = ''.join(sorted(word))\n ordered_words.append(ordered_word)\n return ' '.join(ordered_words)"} 88 | {"task_id": "HumanEval/87", "completion": "def get_row(lst, x):\n return [(i, j) for i in range(len(lst)) for j in sorted(range(len(lst[i])), reverse=True) if lst[i][j] == x]"} 89 | {"task_id": "HumanEval/88", "completion": "def sort_array(array):\n return sorted(array, reverse=(array and (array[0]+array[-1])%2==0))"} 90 | {"task_id": "HumanEval/89", "completion": "def encrypt(s):\n return ''.join([chr((ord(c) - 97 + 2 * 2) % 26 + 97) for c in s])"} 91 | {"task_id": "HumanEval/90", "completion": "def next_smallest(lst):\n if len(lst) < 2:\n return None\n lst = list(set(lst))\n if len(lst) < 2:\n return None\n lst.sort()\n return lst[1]"} 92 | {"task_id": "HumanEval/91", "completion": "def is_bored(S):\n sentences = S.split('.')\n sentences += S.split('?')\n sentences += S.split('!')\n count = 0\n for sentence in sentences:\n if sentence.strip().startswith('I'):\n count += 1\n return count"} 93 | {"task_id": "HumanEval/92", "completion": "def any_int(x, y, z):\n return isinstance(x, int) and isinstance(y, int) and isinstance(z, int) and (x == y + z or y == x + z or z == x + y)"} 94 | {"task_id": "HumanEval/93", "completion": "def encode(message):\n vowels = 'aeiouAEIOU'\n encoded = ''\n for char in message:\n if char.isalpha():\n if char in vowels:\n encoded += chr(ord(char)+2)\n else:\n if char.islower():\n encoded += char.upper()\n else:\n encoded += char.lower()\n else:\n encoded += char\n return encoded"} 95 | {"task_id": "HumanEval/94", "completion": "def largest_prime_sum(lst):\n def is_prime(n):\n if n < 2:\n return False\n for i in range(2, int(n**0.5)+1):\n if n % i == 0:\n return False\n return True\n \n max_prime = 0\n for num in lst:\n if is_prime(num) and num > max_prime:\n max_prime = num\n \n return sum(int(digit) for digit in str(max_prime)) if max_prime != 0 else 0"} 96 | {"task_id": "HumanEval/95", "completion": "def check_dict_case(dict):\n if not dict:\n return False\n keys = list(dict.keys())\n if all(key.islower() for key in keys) or all(key.isupper() for key in keys):\n return True\n else:\n return False"} 97 | {"task_id": "HumanEval/96", "completion": "def count_up_to(n):\n if n < 2:\n return []\n primes = [2]\n i = 3\n while i < n:\n if all(i % p != 0 for p in primes):\n primes.append(i)\n i += 2\n return primes"} 98 | {"task_id": "HumanEval/97", "completion": "def multiply(a, b):\n return abs(a % 10 * b % 10)"} 99 | {"task_id": "HumanEval/98", "completion": "def count_upper(s):\n vowels = set(['A', 'E', 'I', 'O', 'U'])\n count = 0\n for i in range(0, len(s), 2):\n if s[i] in vowels and s[i].isupper():\n count += 1\n return count"} 100 | {"task_id": "HumanEval/99", "completion": "def closest_integer(value):\n value = float(value)\n if value >= 0:\n return int(value + 0.5)\n else:\n return int(value - 0.5)"} 101 | {"task_id": "HumanEval/100", "completion": "def make_a_pile(n):\n pile = []\n stones = n\n for i in range(n):\n pile.append(stones)\n if n % 2 == 0:\n stones += 2\n else:\n stones += 1\n return pile"} 102 | {"task_id": "HumanEval/101", "completion": "def words_string(s):\n return re.findall(r'\\w+', s)"} 103 | {"task_id": "HumanEval/102", "completion": "def choose_num(x, y):\n return max([i for i in range(x, y+1) if i%2==0]+[-1])"} 104 | {"task_id": "HumanEval/103", "completion": "def rounded_avg(n, m):\n if n > m:\n return -1\n avg = round(sum(range(n, m+1)) / (m-n+1))\n return bin(avg)"} 105 | {"task_id": "HumanEval/104", "completion": "def unique_digits(x):\n return sorted([num for num in x if all(int(digit) % 2 != 0 for digit in str(num))])"} 106 | {"task_id": "HumanEval/105", "completion": "def by_length(arr):\n num_names = [\"\", \"One\", \"Two\", \"Three\", \"Four\", \"Five\", \"Six\", \"Seven\", \"Eight\", \"Nine\"]\n filtered_arr = [num for num in arr if 1 <= num <= 9]\n sorted_arr = sorted(filtered_arr)\n reversed_arr = sorted_arr[::-1]\n result = [num_names[num] for num in reversed_arr]\n return result"} 107 | {"task_id": "HumanEval/106", "completion": "def f(n):\n def factorial(num):\n if num == 0:\n return 1\n else:\n return num * factorial(num - 1)\n\n result = []\n for i in range(1, n+1):\n if i % 2 == 0:\n result.append(factorial(i))\n else:\n result.append(sum(range(1, i+1)))\n return result"} 108 | {"task_id": "HumanEval/107", "completion": "def even_odd_palindrome(n):\n even_count = 0\n odd_count = 0\n for i in range(1, n+1):\n if str(i) == str(i)[::-1]:\n if i % 2 == 0:\n even_count += 1\n else:\n odd_count += 1\n return (even_count, odd_count)"} 109 | {"task_id": "HumanEval/108", "completion": "def count_nums(arr):\n def sum_digits(n):\n return sum(int(d) for d in str(abs(n)))\n return sum(1 for num in arr if sum_digits(num) > 0)"} 110 | {"task_id": "HumanEval/109", "completion": "def move_one_ball(arr):\n if not arr:\n return True\n n = len(arr)\n for i in range(n):\n if arr == sorted(arr):\n return True\n arr = [arr[-1]] + arr[:-1]\n return False"} 111 | {"task_id": "HumanEval/110", "completion": "def exchange(lst1, lst2):\n if sum(x % 2 == 1 for x in lst1) > sum(x % 2 == 1 for x in lst2):\n return \"NO\"\n return \"YES\""} 112 | {"task_id": "HumanEval/111", "completion": "def histogram(test):\n if not test:\n return {}\n counts = {}\n for letter in test.split():\n counts[letter] = counts.get(letter, 0) + 1\n max_count = max(counts.values())\n return {letter: count for letter, count in counts.items() if count == max_count}"} 113 | {"task_id": "HumanEval/112", "completion": "def reverse_delete(s, c):\n s = ''.join([char for char in s if char not in c])\n return (s, s == s[::-1])"} 114 | {"task_id": "HumanEval/113", "completion": "def odd_count(lst):\n return [\"the number of odd elements in the string {} of the input.\".format(i) for i in lst]"} 115 | {"task_id": "HumanEval/114", "completion": "import math\n\ndef minSubArraySum(nums):\n min_sum = math.inf\n curr_sum = 0\n for num in nums:\n curr_sum += num\n min_sum = min(min_sum, curr_sum)\n curr_sum = max(curr_sum, 0)\n return min_sum"} 116 | {"task_id": "HumanEval/115", "completion": "def max_fill(grid, capacity):\n num_buckets = 0\n for row in grid:\n num_buckets += math.ceil(sum(row)/capacity)\n return num_buckets"} 117 | {"task_id": "HumanEval/116", "completion": "def sort_array(arr):\n return sorted(arr, key=lambda x: (bin(x).count('1'), x))"} 118 | {"task_id": "HumanEval/117", "completion": "def select_words(s, n):\n if not s:\n return []\n words = s.split()\n result = []\n for word in words:\n consonants = sum(1 for c in word if c.isalpha() and c.lower() not in 'aeiou')\n if consonants == n:\n result.append(word)\n return result"} 119 | {"task_id": "HumanEval/118", "completion": "def get_closest_vowel(word):\n vowels = set('aeiouAEIOU')\n consonants = set('bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ')\n word = word.strip(consonants)\n if len(word) < 2:\n return \"\"\n for i in range(len(word)-2, -1, -1):\n if word[i] in vowels:\n return word[i]\n return \"\""} 120 | {"task_id": "HumanEval/119", "completion": "def match_parens(lst):\n s = ''.join(lst)\n stack = []\n for c in s:\n if c == '(':\n stack.append(c)\n elif not stack:\n return 'No'\n else:\n stack.pop()\n return 'Yes' if not stack else 'No'"} 121 | {"task_id": "HumanEval/120", "completion": "def maximum(arr, k):\n return sorted(arr)[-k:]"} 122 | {"task_id": "HumanEval/121", "completion": "def solution(lst):\n return sum(lst[i] for i in range(1, len(lst), 2) if lst[i] % 2 != 0)"} 123 | {"task_id": "HumanEval/122", "completion": "def add_elements(arr, k):\n return sum([x for x in arr[:k] if abs(x) < 100])"} 124 | {"task_id": "HumanEval/123", "completion": "def get_odd_collatz(n):\n collatz = [n]\n while n != 1:\n if n % 2 == 0:\n n = n // 2\n else:\n n = 3 * n + 1\n collatz.append(n)\n return sorted(list(set(filter(lambda x: x % 2 == 1, collatz)) if n != 1 else [1]))"} 125 | {"task_id": "HumanEval/124", "completion": "def valid_date(date):\n try:\n month, day, year = date.split('-')\n month = int(month)\n day = int(day)\n year = int(year)\n if month < 1 or month > 12:\n return False\n if month in [1, 3, 5, 7, 8, 10, 12]:\n if day < 1 or day > 31:\n return False\n elif month in [4, 6, 9, 11]:\n if day < 1 or day > 30:\n return False\n elif month == 2:\n if year % 4 == 0 and (year % 100 != 0 or year % 400 == 0):\n if day < 1 or day > 29:\n return False\n else:\n if day < 1 or day > 28:\n return False\n else:\n return False\n return True\n except:\n return False"} 126 | {"task_id": "HumanEval/125", "completion": "def split_words(txt):\n words = txt.split()\n if len(words) == 1:\n words = txt.split(',')\n if len(words) == 1:\n count = sum(1 for c in txt if c.islower() and ord(c) % 2 == 1)\n return count\n return words"} 127 | {"task_id": "HumanEval/126", "completion": "def is_sorted(lst):\n return lst == sorted(set(lst)) and lst == sorted(lst)"} 128 | {"task_id": "HumanEval/127", "completion": "def intersection(interval1, interval2):\n def is_prime(n):\n if n < 2:\n return False\n for i in range(2, int(n**0.5)+1):\n if n % i == 0:\n return False\n return True\n \n start1, end1 = interval1\n start2, end2 = interval2\n intersection_start = max(start1, start2)\n intersection_end = min(end1, end2)\n if intersection_start > intersection_end:\n return \"NO\"\n intersection_length = intersection_end - intersection_start + 1\n if is_prime(intersection_length):\n return \"YES\"\n else:\n return \"NO\""} 129 | {"task_id": "HumanEval/128", "completion": "def prod_signs(arr):\n if not arr:\n return None\n sign_prod = 1\n mag_sum = 0\n for num in arr:\n sign = 0 if num == 0 else num // abs(num)\n sign_prod *= sign\n mag_sum += abs(num)\n return sign_prod * mag_sum"} 130 | {"task_id": "HumanEval/129", "completion": "def minPath(grid, k):\n n = len(grid)\n visited = set()\n heap = [(grid[i][j], [(i, j)]) for i in range(n) for j in range(n)]\n heapq.heapify(heap)\n while heap:\n val, path = heapq.heappop(heap)\n if len(path) == k:\n return [grid[i][j] for i, j in path]\n i, j = path[-1]\n for ni, nj in ((i+1,j), (i-1,j), (i,j+1), (i,j-1)):\n if 0 <= ni < n and 0 <= nj < n and (ni, nj) not in visited:\n visited.add((ni, nj))\n heapq.heappush(heap, (grid[ni][nj], path + [(ni, nj)]))"} 131 | {"task_id": "HumanEval/130", "completion": "def tri(n):\n seq = [3]\n if n == 0:\n return []\n elif n == 1:\n return seq\n else:\n seq.append(1)\n for i in range(2, n+1):\n if i % 2 == 0:\n seq.append(1 + i//2)\n else:\n seq.append(seq[-1] + seq[-2] + seq[-3])\n return seq"} 132 | {"task_id": "HumanEval/131", "completion": "def digits(n):\n odd_digits = [int(d) for d in str(n) if int(d) % 2 != 0]\n if len(odd_digits) == 0:\n return 0\n else:\n product = 1\n for digit in odd_digits:\n product *= digit\n return product"} 133 | {"task_id": "HumanEval/132", "completion": "def is_nested(string):\n stack = []\n for char in string:\n if char == '[':\n stack.append(char)\n elif char == ']':\n if stack and stack[-1] == '[':\n stack.pop()\n else:\n stack.append(char)\n return '[' in stack"} 134 | {"task_id": "HumanEval/133", "completion": "def sum_squares(lst):\n return sum([int(x)**2 for x in lst])"} 135 | {"task_id": "HumanEval/134", "completion": "def check_if_last_char_is_a_letter(txt):\n if len(txt) == 0:\n return False\n last_char = txt[-1]\n if not last_char.isalpha():\n return False\n if len(txt.split()[-1]) == 1:\n return True\n return False"} 136 | {"task_id": "HumanEval/135", "completion": "def can_arrange(arr):\n for i in range(len(arr)-1, 0, -1):\n if arr[i] < arr[i-1]:\n return i-1\n return -1"} 137 | {"task_id": "HumanEval/136", "completion": "def largest_smallest_integers(lst):\n neg_ints = [i for i in lst if i < 0]\n pos_ints = [i for i in lst if i > 0]\n if not neg_ints:\n max_neg = None\n else:\n max_neg = max(neg_ints)\n if not pos_ints:\n min_pos = None\n else:\n min_pos = min(pos_ints)\n return (max_neg, min_pos)"} 138 | {"task_id": "HumanEval/137", "completion": "def compare_one(a, b):\n if type(a) == type(b):\n if type(a) == int or type(a) == float:\n if a > b:\n return a\n elif b > a:\n return b\n else:\n return None\n elif type(a) == str:\n a = a.replace(\",\", \".\")\n b = b.replace(\",\", \".\")\n if float(a) > float(b):\n return a\n elif float(b) > float(a):\n return b\n else:\n return None\n else:\n return None"} 139 | {"task_id": "HumanEval/138", "completion": "def is_equal_to_sum_even(n):\n return n % 8 == 0 or (n - 4) % 8 == 0"} 140 | {"task_id": "HumanEval/139", "completion": "from math import factorial\n\ndef special_factorial(n):\n result = 1\n for i in range(n, 0, -1):\n result *= factorial(i)\n return result"} 141 | {"task_id": "HumanEval/140", "completion": "def fix_spaces(text):\n text = text.replace(\" \", \"_\")\n count = 0\n new_text = \"\"\n for char in text:\n if char == \" \":\n count += 1\n if count > 2:\n new_text = new_text[:-1] + \"-\"\n else:\n count = 0\n new_text += char\n return new_text"} 142 | {"task_id": "HumanEval/141", "completion": "def file_name_check(file_name):\n import re\n pattern = r'^[a-zA-Z][a-zA-Z0-9]*\\.[txt|exe|dll]{3}$'\n if re.match(pattern, file_name):\n return 'Yes'\n else:\n return 'No'"} 143 | {"task_id": "HumanEval/142", "completion": "def sum_squares(lst):\n return sum([x**2 if i%3==0 else x**3 if i%4==0 else x for i,x in enumerate(lst)])"} 144 | {"task_id": "HumanEval/143", "completion": "def words_in_sentence(sentence):\n def is_prime(n):\n if n < 2:\n return False\n for i in range(2, int(n**0.5)+1):\n if n % i == 0:\n return False\n return True\n\n words = sentence.split()\n prime_words = [word for word in words if is_prime(len(word))]\n return ' '.join(prime_words)"} 145 | {"task_id": "HumanEval/144", "completion": "def simplify(x, n):\n x_num, x_den = map(int, x.split('/'))\n n_num, n_den = map(int, n.split('/'))\n result = x_num * n_num / (x_den * n_den)\n return result.is_integer()"} 146 | {"task_id": "HumanEval/145", "completion": "def order_by_points(nums):\n def digit_sum(num):\n return sum(int(digit) for digit in str(abs(num)))\n return sorted(nums, key=lambda num: (digit_sum(num), nums.index(num)))"} 147 | {"task_id": "HumanEval/146", "completion": "def specialFilter(nums):\n return len([num for num in nums if num > 10 and str(num)[0] in ['1', '3', '5', '7', '9'] and str(num)[-1] in ['1', '3', '5', '7', '9']])"} 148 | {"task_id": "HumanEval/147", "completion": "def get_max_triples(n):\n a = [i*i-i+1 for i in range(1,n+1)]\n count = 0\n for i in range(n-2):\n for j in range(i+1,n-1):\n for k in range(j+1,n):\n if (a[i]+a[j]+a[k])%3 == 0:\n count += 1\n return count"} 149 | {"task_id": "HumanEval/148", "completion": "def bf(planet1, planet2):\n planets = [\"Mercury\", \"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\", \"Uranus\", \"Neptune\"]\n if planet1 not in planets or planet2 not in planets:\n return ()\n start = planets.index(planet1)\n end = planets.index(planet2)\n if start > end:\n start, end = end, start\n return tuple(sorted(planets[start+1:end], key=lambda x: planets.index(x)))"} 150 | {"task_id": "HumanEval/149", "completion": "def sorted_list_sum(lst):\n return sorted([word for word in lst if len(word) % 2 == 0], key=lambda x: (len(x), x))"} 151 | {"task_id": "HumanEval/150", "completion": "import math\n\ndef x_or_y(n, x, y):\n return x if all(n % i != 0 for i in range(2, int(math.sqrt(n))+1)) and n > 1 else y"} 152 | {"task_id": "HumanEval/151", "completion": "def double_the_difference(lst):\n return sum(x**2 for x in lst if isinstance(x, int) and x > 0 and x % 2 != 0) * 2"} 153 | {"task_id": "HumanEval/152", "completion": "def compare(game, guess):\n return [abs(game[i]-guess[i]) if game[i]!=guess[i] else 0 for i in range(len(game))]"} 154 | {"task_id": "HumanEval/153", "completion": "def Strongest_Extension(class_name, extensions):\n def strength(ext):\n return ext.count(ext.upper()) - ext.count(ext.lower())\n strongest = max(extensions, key=lambda ext: (strength(ext), extensions.index(ext)))\n return f\"{class_name}.{strongest}\""} 155 | {"task_id": "HumanEval/154", "completion": "def cycpattern_check(a, b):\n return any(b in a[i:] + a[:i] for i in range(len(a)))"} 156 | {"task_id": "HumanEval/155", "completion": "def even_odd_count(num):\n even_count = 0\n odd_count = 0\n for digit in str(abs(num)):\n if int(digit) % 2 == 0:\n even_count += 1\n else:\n odd_count += 1\n if num < 0:\n return (odd_count, even_count)\n else:\n return (even_count, odd_count)"} 157 | {"task_id": "HumanEval/156", "completion": "def int_to_mini_roman(number):\n roman_numerals = {\n 1000: 'm',\n 900: 'cm',\n 500: 'd',\n 400: 'cd',\n 100: 'c',\n 90: 'xc',\n 50: 'l',\n 40: 'xl',\n 10: 'x',\n 9: 'ix',\n 5: 'v',\n 4: 'iv',\n 1: 'i'\n }\n result = ''\n for value, numeral in roman_numerals.items():\n while number >= value:\n result += numeral\n number -= value\n return result.lower()"} 158 | {"task_id": "HumanEval/157", "completion": "def right_angle_triangle(a, b, c):\n sides = [a, b, c]\n sides.sort()\n return sides[0]**2 + sides[1]**2 == sides[2]**2"} 159 | {"task_id": "HumanEval/158", "completion": "def find_max(words):\n def count_unique(word):\n return len(set(word))\n\n max_word = \"\"\n max_count = 0\n\n for word in words:\n unique_count = count_unique(word)\n if unique_count > max_count:\n max_word = word\n max_count = unique_count\n elif unique_count == max_count and word < max_word:\n max_word = word\n\n return max_word"} 160 | {"task_id": "HumanEval/159", "completion": "def eat(number, need, remaining):\n eaten = min(need, remaining + number)\n remaining = max(0, remaining - (eaten - number))\n return [eaten, remaining]"} 161 | {"task_id": "HumanEval/160", "completion": "def do_algebra(operator, operand):\n expression = str(operand[0])\n for i in range(len(operator)):\n expression += operator[i] + str(operand[i+1])\n return eval(expression)"} 162 | {"task_id": "HumanEval/161", "completion": "def solve(s):\n return s.swapcase() if any(c.isalpha() for c in s) else s[::-1]"} 163 | {"task_id": "HumanEval/162", "completion": "import hashlib\n\ndef string_to_md5(text):\n if text:\n return hashlib.md5(text.encode()).hexdigest()"} 164 | {"task_id": "HumanEval/163", "completion": " pass"} 165 | -------------------------------------------------------------------------------- /launch_pod.py: -------------------------------------------------------------------------------- 1 | import json 2 | import subprocess 3 | import threading 4 | import sys 5 | import time 6 | from benchmark_utils import run_benchmark 7 | 8 | def get_output_and_retry(call_string, max_retries=5): 9 | for attempt in range(max_retries): 10 | result = get_ipython().getoutput(f'curl -s --request POST {call_string}') 11 | data_dict = json.loads(result[-1]) 12 | print(data_dict) 13 | if "INTERNAL_SERVER_ERROR" not in str(data_dict): 14 | return data_dict 15 | else: 16 | print(f"ERROR - Attempt {attempt+1}, Retrying in 5s") 17 | time.sleep(5) 18 | raise Exception(f"Failed after {max_retries} attempts") 19 | 20 | def launch_pod(api_key, gpu_type, pod_num): 21 | 22 | gpu_count = 1 23 | if gpu_type[:2] == "2x": 24 | gpu_type = gpu_type[2:] 25 | gpu_count = 2 26 | 27 | call_string = f"""--header 'content-type: application/json' \ 28 | --url 'https://api.runpod.io/graphql?api_key={api_key}' \ 29 | --data '{{"query": "mutation {{ podFindAndDeployOnDemand( input: {{ cloudType: SECURE, gpuCount: {gpu_count}, volumeInGb: 100, containerDiskInGb: 10, minVcpuCount: 2, minMemoryInGb: 20, gpuTypeId: \\"{gpu_type}\\", name: \\"RunPod TextGenWebUI #{pod_num}\\", imageName: \\"succulentsteve/webui:latest\\", dockerArgs: \\"\\", ports: \\"7860/http,5000/http,22/tcp,5005/tcp\\", volumeMountPath: \\"/workspace\\", env: [] }} ) {{ id imageName env machineId machine {{ podHostId }} }} }}"}}'""" 30 | 31 | result = get_ipython().getoutput(f'curl -s --request POST {call_string}') 32 | data_dict = get_output_and_retry(call_string, max_retries=5) 33 | pod_id = data_dict['data']['podFindAndDeployOnDemand']['id'] 34 | machine_id = data_dict['data']['podFindAndDeployOnDemand']['machineId'] 35 | host_id = machine_id = data_dict['data']['podFindAndDeployOnDemand']['machine']['podHostId'] 36 | return pod_id, machine_id, host_id 37 | 38 | def wait_for_pod(pod_id, api_key): 39 | curl_command = f"""curl -s --request POST \ 40 | --header 'content-type: application/json' \ 41 | --url 'https://api.runpod.io/graphql?api_key={api_key}' \ 42 | --data '{{"query": "query Pod {{ pod(input: {{podId: \\"{pod_id}\\"}}) {{ id name runtime {{ uptimeInSeconds ports {{ ip isIpPublic privatePort publicPort type }} gpus {{ id gpuUtilPercent memoryUtilPercent }} container {{ cpuPercent memoryPercent }} }} }} }}"}}'""" 43 | while True: 44 | result = get_ipython().getoutput(curl_command) 45 | data = json.loads(result[-1]) 46 | try: 47 | port_22_info = [port for port in data["data"]["pod"]["runtime"]["ports"] if port["type"] == "tcp" and port["privatePort"] == 22] 48 | ssh_ip = port_22_info[0]["ip"] 49 | ssh_port = port_22_info[0]["publicPort"] 50 | 51 | port_5005_info = [port for port in data["data"]["pod"]["runtime"]["ports"] if port["type"] == "tcp" and port["privatePort"] == 5005] 52 | ws_ip = port_5005_info[0]["ip"] 53 | ws_port = port_5005_info[0]["publicPort"] 54 | 55 | return ssh_ip, ssh_port, ws_ip, ws_port 56 | except: 57 | print("not ready yet") 58 | time.sleep(10) 59 | 60 | def download_model(ssh_ip, ssh_port, model): 61 | model_file = model.replace("/", "_") 62 | cmd = f"'cd /root/text-generation-webui && python download-model.py --output /workspace/models {model}'" 63 | get_ipython().system(f'ssh -o StrictHostKeyChecking=no root@{ssh_ip} -p {ssh_port} -i ~/.ssh/id_ed25519 {cmd}') 64 | return model_file 65 | 66 | def swap_gptq(ssh_ip, ssh_port): 67 | commands = [ 68 | "cd /workspace && git clone -n https://github.com/qwopqwop200/GPTQ-for-LLaMa", 69 | "ln -s /workspace/GPTQ-for-LLaMa /root/text-generation-webui/repositories/GPTQ-for-LLaMa" 70 | ] 71 | for cmd in commands: 72 | get_ipython().system(f'ssh -o StrictHostKeyChecking=no root@{ssh_ip} -p {ssh_port} -i ~/.ssh/id_ed25519 {cmd}') 73 | 74 | def remove_safetensors(ssh_ip, ssh_port, model_file, model_wget_path): 75 | cmd = f"'cd /workspace/models/{model_file} && rm -rf *.safetensors && wget -q {model_wget_path}'" 76 | get_ipython().system(f'ssh -o StrictHostKeyChecking=no root@{ssh_ip} -p {ssh_port} -i ~/.ssh/id_ed25519 {cmd}') 77 | 78 | import time 79 | 80 | import threading 81 | from concurrent.futures import ThreadPoolExecutor, Future 82 | 83 | def run_ssh_command(command, pod_id, api_key): 84 | 85 | process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1, universal_newlines=True) 86 | server_started = threading.Event() 87 | 88 | def stdout_thread(process): 89 | for line in iter(process.stdout.readline, ''): 90 | line = line.strip() 91 | print(line) 92 | sys.stdout.flush() 93 | if "Running on local URL" in line: 94 | server_started.set() 95 | 96 | def stderr_thread(process): 97 | for line in iter(process.stderr.readline, ''): 98 | print(line.strip(), file=sys.stderr) 99 | sys.stderr.flush() 100 | if "Connection to " in line: 101 | stop_and_terminate_pod(pod_id, api_key) 102 | raise Exception(line) 103 | 104 | threading.Thread(target=stdout_thread, args=(process,), daemon=True).start() 105 | threading.Thread(target=stderr_thread, args=(process,), daemon=True).start() 106 | 107 | def check_timeout(timeout, event): 108 | time.sleep(timeout * 60) # Timeout in minutes 109 | if not event.is_set(): 110 | raise Exception('Timeout exceeded') 111 | 112 | timeout_thread = threading.Thread(target=check_timeout, args=(5, server_started), daemon=False) 113 | timeout_thread.start() 114 | 115 | while not server_started.is_set(): 116 | time.sleep(1) 117 | if not timeout_thread.is_alive(): 118 | raise Exception('Timeout exceeded') 119 | 120 | print("continuing") 121 | 122 | sys.stdout.flush() 123 | sys.stderr.flush() 124 | 125 | 126 | 127 | def run_pod_server(ssh_ip, ssh_port, model_file, gptq_params, pod_id, api_key, use_autoq = False, additional_cmd=""): 128 | auto = "" 129 | if use_autoq: 130 | auto = "--autogptq" 131 | try: 132 | command = f"""ssh -tt -o StrictHostKeyChecking=no root@{ssh_ip} -p {ssh_port} -i ~/.ssh/id_ed25519 'cd /root/text-generation-webui && python server.py --listen --api --model {model_file} --model-dir /workspace/models {gptq_params} {additional_cmd} --trust-remote-code --auto-devices {auto} --model_type "Llama"'""" 133 | print(command) 134 | run_ssh_command(command, pod_id, api_key) 135 | print("Done") 136 | except Exception as e: 137 | print(f"An error occurred: {e}") 138 | 139 | def launch_webui(api_key, api_url='https://api.runpod.io/graphql', model="TheBloke/wizard-vicuna-13B-GPTQ", gptq_params="", gpu_type="NVIDIA RTX A4000", pod_num="1", url_replace_safetensors = None, use_new_gptq = False, use_autoq = False, additional_cmd="", use_async = False): 140 | pod_id, machine_id, host_id = launch_pod(api_key, gpu_type, pod_num) 141 | ssh_ip, ssh_port, ws_ip, ws_port = wait_for_pod(pod_id, api_key) 142 | url = "https://%s-7860.proxy.runpod.net" % pod_id 143 | api_url = "https://%s-5000.proxy.runpod.net" % pod_id 144 | ssh = "ssh -o StrictHostKeyChecking=no %s@ssh.runpod.io -i ~/.ssh/id_ed25519" % machine_id 145 | print("WebUI:", url, "API", api_url, "SSH", ssh, sep="\n") 146 | print("\n\n") 147 | 148 | if use_new_gptq: 149 | swap_gptq(ssh_ip, ssh_port) 150 | 151 | model_file = download_model(ssh_ip, ssh_port, model) 152 | 153 | if url_replace_safetensors is not None: 154 | print("Removing safetensors...") 155 | remove_safetensors(ssh_ip, ssh_port, model_file, url_replace_safetensors) 156 | run_pod_server(ssh_ip, ssh_port, model_file, gptq_params, pod_id, api_key, use_autoq=use_autoq, additional_cmd=additional_cmd) 157 | if use_async: 158 | return api_url, model_file, pod_id, ws_ip, ws_port 159 | return api_url, model_file, pod_id 160 | 161 | def pod_benchmark(filename, prompt, assistant_tag, pod_id, api_key, start_from=0, 162 | host="http://localhost", port=5000, insert_func_stub=True, use_old_parser = False, deterministic=True, use_async = False): 163 | import time 164 | 165 | while True: 166 | try: 167 | run_benchmark(filename, prompt, start_from=start_from, 168 | host=host, port=port, insert_func_stub=insert_func_stub, assistant_tag=assistant_tag, use_old_parser = use_old_parser, deterministic=deterministic, use_async = use_async) 169 | break # If the function executes without raising an exception, exit the loop 170 | except Exception as e: 171 | # Check if the first argument of the exception is a tuple and the second element of the tuple is 404 172 | if isinstance(e.args[0], tuple) and e.args[0][1] == 404: 173 | # Retry the operation after waiting for some time 174 | print("Caught 404 error. Retrying after 5 seconds...") 175 | time.sleep(5) 176 | else: 177 | # Reraise the exception if it's not the specific one we're handling 178 | stop_and_terminate_pod(pod_id, api_key) 179 | raise 180 | 181 | def stop_pod(pod_id, api_key): 182 | stop_pod_command = f"""curl --request POST \ 183 | --header 'content-type: application/json' \ 184 | --url 'https://api.runpod.io/graphql?api_key={api_key}' \ 185 | --data '{{"query": "mutation {{ podStop(input: {{podId: \\"{pod_id}\\"}}) {{ id desiredStatus }} }}"}}'""" 186 | get_ipython().system('{stop_pod_command}') 187 | 188 | def terminate_pod(pod_id, api_key): 189 | terminate_pod_command = f"""curl --request POST \ 190 | --header 'content-type: application/json' \ 191 | --url 'https://api.runpod.io/graphql?api_key={api_key}' \ 192 | --data '{{"query": "mutation {{ podTerminate(input: {{podId: \\"{pod_id}\\"}}) }}"}}'""" 193 | get_ipython().system('{terminate_pod_command}') 194 | 195 | def stop_and_terminate_pod(pod_id, api_key): 196 | stop_pod(pod_id, api_key) 197 | terminate_pod(pod_id, api_key) 198 | 199 | def killall_pods(api_key): 200 | cmd = """curl -s --request POST \ 201 | --header 'content-type: application/json' \ 202 | --url 'https://api.runpod.io/graphql?api_key=%s' \ 203 | --data '{"query": "query Pods { myself { pods { id name runtime { uptimeInSeconds ports { ip isIpPublic privatePort publicPort type } gpus { id gpuUtilPercent memoryUtilPercent } container { cpuPercent memoryPercent } } } } }"}'""" % api_key 204 | result = get_ipython().getoutput(cmd) 205 | # Parse the JSON string 206 | data = json.loads("".join(result)) 207 | 208 | # Extract pod ids 209 | pod_ids = [pod['id'] for pod in data['data']['myself']['pods']] 210 | 211 | for pid in pod_ids: 212 | stop_and_terminate_pod(pid, api_key) -------------------------------------------------------------------------------- /prompt_formats.txt: -------------------------------------------------------------------------------- 1 | —— 2 | 3 | Alpaca: 4 | 5 | —— 6 | 7 | ### Instruction: 8 | Complete the following Python code: 9 | Notes: respond with the entire complete function definition 10 | do not add any comments, be as concise in your code as possible 11 | use only built-in libraries, assume no additional imports other than those provided (if any) 12 | 13 | code: 14 | from typing import List 15 | 16 | 17 | def has_close_elements(numbers: List[float], threshold: float) -> bool: 18 | """ Check if in given list of numbers, are any two numbers closer to each other than 19 | given threshold. 20 | >>> has_close_elements([1.0, 2.0, 3.0], 0.5) 21 | False 22 | >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3) 23 | True 24 | """ 25 | 26 | 27 | ### Response: 28 | ```python 29 | def has_close_elements( 30 | 31 | 32 | 33 | —— 34 | 35 | Vicuna: 36 | 37 | —— 38 | 39 | 40 | A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. 41 | USER: 42 | Complete the following Python code: 43 | Notes: respond with the entire complete function definition 44 | do not add any comments, be as concise in your code as possible 45 | use only built-in libraries, assume no additional imports other than those provided (if any) 46 | 47 | code: 48 | from typing import List 49 | 50 | 51 | def has_close_elements(numbers: List[float], threshold: float) -> bool: 52 | """ Check if in given list of numbers, are any two numbers closer to each other than 53 | given threshold. 54 | >>> has_close_elements([1.0, 2.0, 3.0], 0.5) 55 | False 56 | >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3) 57 | True 58 | """ 59 | 60 | 61 | ASSISTANT: 62 | ```python 63 | def has_close_elements( 64 | —— 65 | -------------------------------------------------------------------------------- /templates.py: -------------------------------------------------------------------------------- 1 | VERYSHORT_TEMPLATE = """{PROMPT}""" 2 | 3 | SHORT_TEMPLATE = """```python 4 | {PROMPT}""" 5 | 6 | H2O_TEMPLATE = """<|prompt|>{PROMPT}<|endoftext|><|answer|>```python 7 | """ 8 | ###### 9 | 10 | ALPACA_PREFIX_TEMPLATE = """Below is an instruction that describes a task. 11 | Write a response that appropriately completes the request. 12 | 13 | ### Instruction: 14 | Complete the following Python code: 15 | Notes: respond with the entire complete function definition 16 | do not add any comments, be as concise in your code as possible 17 | use only built-in libraries, assume no additional imports other than those provided (if any) 18 | 19 | code: 20 | {PROMPT} 21 | 22 | ### Response: 23 | ```python 24 | """ 25 | 26 | ALPACA_PREFIX_TEMPLATE_MEDIUM = """Below is an instruction that describes a task. 27 | Write a response that appropriately completes the request. 28 | 29 | ### Instruction: 30 | Please complete the following code: 31 | {PROMPT} 32 | 33 | ### Response: 34 | ```python 35 | """ 36 | 37 | ALPACA_PREFIX_TEMPLATE_MD = """Below is an instruction that describes a task.\n Write a response that appropriately completes the request. 38 | 39 | ### Instruction: 40 | Complete the following Python code: 41 | Notes: respond with the entire complete function definition 42 | do not add any comments, be as concise in your code as possible 43 | use only built-in libraries, assume no additional imports other than those provided (if any) 44 | 45 | code: 46 | ```python 47 | {PROMPT} 48 | ``` 49 | 50 | ### Response: 51 | ```python 52 | """ 53 | 54 | ###### 55 | 56 | QA_TEMPLATE = """Q: Complete the following Python code: 57 | Notes: respond with the entire complete function definition 58 | do not add any comments, be as concise in your code as possible 59 | use only built-in libraries, assume no additional imports other than those provided (if any) 60 | 61 | code: 62 | {PROMPT} 63 | 64 | A: 65 | ```python 66 | """ 67 | 68 | QA_TEMPLATE_MD = """Q: Complete the following Python code: 69 | Notes: respond with the entire complete function definition 70 | do not add any comments, be as concise in your code as possible 71 | use only built-in libraries, assume no additional imports other than those provided (if any) 72 | 73 | code: 74 | ```python 75 | {PROMPT} 76 | ``` 77 | 78 | A: 79 | ```python 80 | """ 81 | 82 | QA_TEMPLATE_MEDIUM = """Q: Please complete the following code: 83 | {PROMPT} 84 | 85 | A: 86 | ```python 87 | """ 88 | 89 | ###### 90 | 91 | FALCON_TEMPLATE = """[|Human|] Complete the following Python code: 92 | Notes: respond with the entire complete function definition 93 | do not add any comments, be as concise in your code as possible 94 | use only built-in libraries, assume no additional imports other than those provided (if any) 95 | 96 | code: 97 | {PROMPT} 98 | 99 | [|AI|] 100 | ```python 101 | """ 102 | 103 | FALCON_TEMPLATE_MEDIUM = """[|Human|] Please complete the following code: 104 | {PROMPT} 105 | 106 | [|AI|] 107 | ```python 108 | """ 109 | 110 | FALCON_TEMPLATE_MD = """[|Human|] Complete the following Python code: 111 | Notes: respond with the entire complete function definition 112 | do not add any comments, be as concise in your code as possible 113 | use only built-in libraries, assume no additional imports other than those provided (if any) 114 | 115 | code: 116 | ```python 117 | {PROMPT} 118 | ``` 119 | 120 | [|AI|] 121 | ```python 122 | """ 123 | 124 | ###### 125 | 126 | OPENASSISTANT_TEMPLATE = """<|prompter|> Complete the following Python code: 127 | Notes: respond with the entire complete function definition 128 | do not add any comments, be as concise in your code as possible 129 | use only built-in libraries, assume no additional imports other than those provided (if any) 130 | 131 | code: 132 | {PROMPT} 133 | 134 | <|assistant|>: 135 | ```python 136 | """ 137 | 138 | OPENASSISTANT_TEMPLATE_MEDIUM = """<|prompter|> Please complete the following code: 139 | {PROMPT} 140 | 141 | <|assistant|>: 142 | ```python 143 | """ 144 | 145 | OPENASSISTANT_TEMPLATE_MD = """<|prompter|> Complete the following Python code: 146 | Notes: respond with the entire complete function definition 147 | do not add any comments, be as concise in your code as possible 148 | use only built-in libraries, assume no additional imports other than those provided (if any) 149 | 150 | code: 151 | ```python 152 | {PROMPT} 153 | ``` 154 | 155 | <|assistant|>: 156 | ```python 157 | """ 158 | 159 | ###### 160 | 161 | VICUNA_TEMPLATE = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. 162 | 163 | USER: Complete the following Python code: 164 | Notes: respond with the entire complete function definition 165 | do not add any comments, be as concise in your code as possible 166 | use only built-in libraries, assume no additional imports other than those provided (if any) 167 | 168 | code: 169 | {PROMPT} 170 | 171 | ASSISTANT: 172 | ```python 173 | """ 174 | 175 | VICUNA_TEMPLATE_MEDIUM = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. 176 | 177 | USER: Please complete the following code: 178 | {PROMPT} 179 | 180 | ASSISTANT: 181 | ```python 182 | """ 183 | 184 | VICUNA_TEMPLATE_MD = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. 185 | 186 | USER: Complete the following Python code: 187 | Notes: respond with the entire complete function definition 188 | do not add any comments, be as concise in your code as possible 189 | use only built-in libraries, assume no additional imports other than those provided (if any) 190 | 191 | code: 192 | ```python 193 | {PROMPT} 194 | ``` 195 | 196 | ASSISTANT: 197 | ```python 198 | """ 199 | 200 | ###### 201 | 202 | ALPACA_INPUT_TEMPLATE = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. 203 | 204 | ### Instruction: 205 | Complete the following Python code: 206 | Notes: respond with the entire complete function definition 207 | do not add any comments, be as concise in your code as possible 208 | use only built-in libraries, assume no additional imports other than those provided (if any) 209 | 210 | ### Input: 211 | {PROMPT} 212 | 213 | ### Response: 214 | ```python 215 | """ 216 | 217 | ALPACA_INPUT_TEMPLATE_MEDIUM = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. 218 | 219 | ### Instruction: 220 | Please complete the following code. 221 | 222 | ### Input: 223 | {PROMPT} 224 | 225 | ### Response: 226 | ```python 227 | """ 228 | 229 | ALPACA_INPUT_TEMPLATE_MD = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. 230 | 231 | ### Instruction: 232 | Complete the following Python code: 233 | Notes: respond with the entire complete function definition 234 | do not add any comments, be as concise in your code as possible 235 | use only built-in libraries, assume no additional imports other than those provided (if any) 236 | 237 | ### Input: 238 | ```python 239 | {PROMPT} 240 | ``` 241 | 242 | ### Response: 243 | ```python 244 | """ 245 | 246 | ###### 247 | 248 | ALPACA_TEMPLATE = """### Instruction: 249 | Complete the following Python code: 250 | Notes: respond with the entire complete function definition 251 | do not add any comments, be as concise in your code as possible 252 | use only built-in libraries, assume no additional imports other than those provided (if any) 253 | 254 | code: 255 | {PROMPT} 256 | 257 | ### Response: 258 | ```python 259 | """ 260 | 261 | 262 | ALPACA_TEMPLATE_BAD_NL = """ 263 | ### Instruction: 264 | Complete the following Python code: 265 | Notes: respond with the entire complete function definition 266 | do not add any comments, be as concise in your code as possible 267 | use only built-in libraries, assume no additional imports other than those provided (if any) 268 | 269 | code: 270 | {PROMPT} 271 | 272 | ### Response: 273 | ```python 274 | """ 275 | 276 | ALPACA_TEMPLATE_MEDIUM = """### Instruction: 277 | Please complete the following code: 278 | {PROMPT} 279 | 280 | ### Response: 281 | ```python 282 | """ 283 | 284 | ALPACA_TEMPLATE_MD = """### Instruction: 285 | Complete the following Python code: 286 | Notes: respond with the entire complete function definition 287 | do not add any comments, be as concise in your code as possible 288 | use only built-in libraries, assume no additional imports other than those provided (if any) 289 | 290 | code: 291 | ```python 292 | {PROMPT} 293 | ``` 294 | 295 | ### Response: 296 | ```python 297 | """ 298 | 299 | ALPACA_TEMPLATE_MD_MEDIUM = """### Instruction: 300 | Please complete the following code: 301 | ```python 302 | {PROMPT} 303 | ``` 304 | 305 | ### Response: 306 | ```python 307 | """ 308 | 309 | ###### --------------------------------------------------------------------------------