├── 0_Sagemaker_Setup.ipynb
├── 1_Run_Benchmark.ipynb
├── 2_Parse_Results.ipynb
├── 3_Advanced_Usage.ipynb
├── 4_Advanced_GUI_Benchmark_Client.ipynb
├── 5_LaunchRunpod.ipynb
├── LICENSE
├── Proprietary Model Evaluations
    ├── Bard Eval.ipynb
    ├── BingChat Eval.ipynb
    ├── ClaudePlus Eval.ipynb
    ├── ClaudeReg Eval.ipynb
    ├── GPT Eval.ipynb
    └── GPT4 Eval.ipynb
├── README.md
├── benchmark_manager.py
├── benchmark_utils.py
├── jsonl_examples
    ├── Aeala_VicUnlocked-alpaca-65b-4bit_ALPACA_MD_RECALC.jsonl
    ├── Aeala_VicUnlocked-alpaca-65b-4bit_ALPACA_MEDIUM_RECALC.jsonl
    ├── Aeala_VicUnlocked-alpaca-65b-4bit_ALPACA_MEMSPLIT.jsonl
    ├── Bard_custom.jsonl
    ├── Claude_Plus_custom_md.jsonl
    ├── Claude_custom.jsonl
    ├── GeorgiaTechResearchInstitute_starcoder-gpteacher-code-instruct_ALPACA_INPUT_LONG_FIXED.jsonl
    ├── GeorgiaTechResearchInstitute_starcoder-gpteacher-code-instruct_ALPACA_INPUT_MARKDOWN.jsonl
    ├── GeorgiaTechResearchInstitute_starcoder-gpteacher-code-instruct_ALPACA_INPUT_MEDIUM.jsonl
    ├── TheBloke_Manticore-13B-GPTQ_ALPACA_LONG_FIXED.jsonl
    ├── TheBloke_Manticore-13B-GPTQ_ALPACA_MARKDOWN.jsonl
    ├── TheBloke_Manticore-13B-GPTQ_ALPACA_MEDIUM.jsonl
    ├── TheBloke_Manticore-13B-GPTQ_VICUNA_LONG.jsonl
    ├── TheBloke_Nous-Hermes-13B-GPTQ_ALPACA_LONG_FIXED.jsonl
    ├── TheBloke_Nous-Hermes-13B-GPTQ_ALPACA_MARKDOWN.jsonl
    ├── TheBloke_Nous-Hermes-13B-GPTQ_ALPACA_MEDIUM.jsonl
    ├── TheBloke_OpenAssistant-SFT-7-Llama-30B-GPTQ_OPENA_.jsonl
    ├── TheBloke_VicUnlocked-30B-LoRA-GPTQ_ALPACA_LONG_FIXED.jsonl
    ├── TheBloke_VicUnlocked-30B-LoRA-GPTQ_ALPACA_MARKDOWN.jsonl
    ├── TheBloke_VicUnlocked-30B-LoRA-GPTQ_ALPACA_MEDIUM.jsonl
    ├── TheBloke_VicUnlocked-30B-LoRA-GPTQ_VICUNA_LONG.jsonl
    ├── TheBloke_VicUnlocked-30B-LoRA-GPTQ_VICUNA_VERYSHORT.jsonl
    ├── TheBloke_Wizard-Vicuna-13B-Uncensored-GPTQ_VICUNA_MARKDOWN.jsonl
    ├── TheBloke_Wizard-Vicuna-13B-Uncensored-GPTQ_VICUNA_MEDIUM.jsonl
    ├── TheBloke_Wizard-Vicuna-30B-Uncensored-GPTQ_VICUNA_MARKDOWN.jsonl
    ├── TheBloke_Wizard-Vicuna-30B-Uncensored-GPTQ_VICUNA_MEDIUM.jsonl
    ├── TheBloke_Wizard-Vicuna-7B-Uncensored-GPTQ_VICUNA_MARKDOWN.jsonl
    ├── TheBloke_Wizard-Vicuna-7B-Uncensored-GPTQ_VICUNA_MEDIUM.jsonl
    ├── TheBloke_WizardLM-30B-GPTQ_VICUNA_DEBUG_LONG.jsonl
    ├── TheBloke_WizardLM-30B-GPTQ_VICUNA_DEBUG_LONG_OLD_PARSER_BAD_NL_HIGHTEMP.jsonl
    ├── TheBloke_WizardLM-30B-GPTQ_VICUNA_MARKDOWN.jsonl
    ├── TheBloke_WizardLM-30B-GPTQ_VICUNA_MEDIUM.jsonl
    ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_ALPACA_DEBUG_LONG.jsonl
    ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_ALPACA_DEBUG_LONG_OLD_PARSER_BAD_NL_HIGHTEMP.jsonl
    ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_ALPACA_LONG_FIXED.jsonl
    ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_ALPACA_MARKDOWN.jsonl
    ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_ALPACA_MEDIUM.jsonl
    ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_VICUNA_LONG.jsonl
    ├── TheBloke_WizardLM-30B-Uncensored-GPTQ_VICUNA_VERYSHORT.jsonl
    ├── TheBloke_falcon-7b-instruct-GPTQ_FALCON_.jsonl
    ├── TheBloke_guanaco-13B-GPTQ_ALPACA_LONG_FIXED.jsonl
    ├── TheBloke_guanaco-13B-GPTQ_ALPACA_MARKDOWN.jsonl
    ├── TheBloke_guanaco-13B-GPTQ_ALPACA_MEDIUM.jsonl
    ├── TheBloke_guanaco-33B-GPTQ_ALPACA_LONG_FIXED.jsonl
    ├── TheBloke_guanaco-33B-GPTQ_ALPACA_MARKDOWN.jsonl
    ├── TheBloke_guanaco-33B-GPTQ_ALPACA_MEDIUM.jsonl
    ├── TheBloke_guanaco-65B-GPTQ_ALPACA_.jsonl
    ├── TheBloke_guanaco-65B-GPTQ_ALPACA_MD_RECALC.jsonl
    ├── TheBloke_guanaco-65B-GPTQ_SHORT_.jsonl
    ├── TheBloke_guanaco-65B-GPTQ_VERYSHORT_.jsonl
    ├── TheBloke_guanaco-7B-GPTQ_ALPACA_MARKDOWN.jsonl
    ├── TheBloke_guanaco-7B-GPTQ_ALPACA_MEDIUM.jsonl
    ├── TheBloke_open-llama-7b-open-instruct-GPTQ_ALPACA_PREFIX_openllama.jsonl
    ├── TheBloke_starcoderplus-GPTQ_ALPACA_INPUT_MEDIUM_STARPLUS_5.jsonl
    ├── TheBloke_starcoderplus-GPTQ_ALPACA_INPUT_STARPLUS_2.jsonl
    ├── TheBloke_starcoderplus-GPTQ_ALPACA_MEDIUM_STARPLUS_4.jsonl
    ├── TheBloke_starcoderplus-GPTQ_ALPACA_PREFIX1_MEDIUM_STARPLUS_6.jsonl
    ├── TheBloke_starcoderplus-GPTQ_ALPACA_PREFIX1_STARPLUS_3.jsonl
    ├── TheBloke_starcoderplus-GPTQ_ALPACA_STARPLUS_1.jsonl
    ├── TheBloke_starcoderplus-GPTQ_SHORT_STARPLUS_7.jsonl
    ├── TheBloke_starcoderplus-GPTQ_VERYSHORT_STARPLUS_8.jsonl
    ├── TheBloke_vicuna-13B-1.1-GPTQ-4bit-128g_VICUNA_MARKDOWN.jsonl
    ├── TheBloke_vicuna-13B-1.1-GPTQ-4bit-128g_VICUNA_MEDIUM.jsonl
    ├── TheBloke_vicuna-7B-GPTQ-4bit-128g_VICUNA_MARKDOWN.jsonl
    ├── TheBloke_vicuna-7B-GPTQ-4bit-128g_VICUNA_MEDIUM.jsonl
    ├── TheBloke_wizard-vicuna-13B-GPTQ_VICUNA_MARKDOWN.jsonl
    ├── TheBloke_wizard-vicuna-13B-GPTQ_VICUNA_MEDIUM.jsonl
    ├── TheBloke_wizardLM-13B-1.0-GPTQ_ALPACA_DEBUG_LONG.jsonl
    ├── TheBloke_wizardLM-13B-1.0-GPTQ_ALPACA_DEBUG_LONG_OLD_PARSER_BAD_NL_HIGHTEMP.jsonl
    ├── TheBloke_wizardLM-13B-1.0-GPTQ_ALPACA_LONG_FIXED.jsonl
    ├── TheBloke_wizardLM-13B-1.0-GPTQ_ALPACA_MARKDOWN.jsonl
    ├── TheBloke_wizardLM-13B-1.0-GPTQ_ALPACA_MEDIUM.jsonl
    ├── TheBloke_wizardLM-7B-GPTQ_ALPACA_DEBUG_LONG.jsonl
    ├── TheBloke_wizardLM-7B-GPTQ_ALPACA_DEBUG_LONG_OLD_PARSER_BAD_NL_HIGHTEMP.jsonl
    ├── TheBloke_wizardLM-7B-GPTQ_ALPACA_LONG_FIXED.jsonl
    ├── TheBloke_wizardLM-7B-GPTQ_ALPACA_MARKDOWN.jsonl
    ├── TheBloke_wizardLM-7B-GPTQ_ALPACA_MEDIUM.jsonl
    ├── anon8231489123_gpt4-x-alpaca-13b-native-4bit-128g_ALPACA_LONG_FIXED.jsonl
    ├── anon8231489123_gpt4-x-alpaca-13b-native-4bit-128g_ALPACA_MARKDOWN.jsonl
    ├── anon8231489123_gpt4-x-alpaca-13b-native-4bit-128g_ALPACA_MEDIUM.jsonl
    ├── gpt3_final_500c_long.jsonl
    ├── gpt4_final_500c_long.jsonl
    ├── h2oai_h2ogpt-gm-oasst1-en-2048-falcon-7b-v2_H2O_h2o.jsonl
    ├── mindrage_Manticore-13B-Chat-Pyg-Guanaco-GPTQ-4bit-128g.no-act-order.safetensors_ALPACA_LONG_FIXED.jsonl
    ├── mindrage_Manticore-13B-Chat-Pyg-Guanaco-GPTQ-4bit-128g.no-act-order.safetensors_ALPACA_MARKDOWN.jsonl
    ├── mindrage_Manticore-13B-Chat-Pyg-Guanaco-GPTQ-4bit-128g.no-act-order.safetensors_ALPACA_MEDIUM.jsonl
    ├── sahil2801_instruct-codegen-16B_ALPACA_PREFIX1_LONG_FIXED.jsonl
    ├── sahil2801_instruct-codegen-16B_ALPACA_PREFIX1_MARKDOWN.jsonl
    ├── sahil2801_instruct-codegen-16B_ALPACA_PREFIX1_MEDIUM.jsonl
    ├── tiiuae_falcon-40b-instruct_FALCON_FALCON_FULL.jsonl
    ├── togethercomputer_RedPajama-INCITE-7B-Instruct_Q_A_MARKDOWN.jsonl
    ├── togethercomputer_RedPajama-INCITE-7B-Instruct_Q_A_MEDIUM.jsonl
    ├── togethercomputer_RedPajama-INCITE-Instruct-3B-v1_Q_A_MARKDOWN.jsonl
    └── togethercomputer_RedPajama-INCITE-Instruct-3B-v1_Q_A_MEDIUM.jsonl
├── launch_pod.py
├── prompt_formats.txt
└── templates.py


/0_Sagemaker_Setup.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "f06f277f-971b-4d35-a06f-49ec81df18aa",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "# !git clone https://github.com/oobabooga/text-generation-webui"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "id": "1878dabf-e204-4ebb-9692-4126dab0eed8",
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "# !git clone https://github.com/PanQiWei/AutoGPTQ.git"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "id": "0bf898e2-8fe4-4a30-a98d-1ce868e991f9",
 29 |    "metadata": {
 30 |     "tags": []
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "%%bash\n",
 35 |     "\n",
 36 |     "sudo mv /usr/bin/g++ /usr/bin/g++_old\n",
 37 |     "sudo mv /usr/bin/gcc /usr/bin/gcc_old\n",
 38 |     "sudo ln -s /usr/bin/gcc10-cc /usr/bin/gcc\n",
 39 |     "sudo ln -s /usr/bin/gcc10-c++ /usr/bin/g++"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "id": "6d935db7-8656-4117-b9fb-2073ab236358",
 46 |    "metadata": {
 47 |     "tags": []
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "!sudo yum -y remove gcc-c++"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "id": "15294776-1d0d-4150-a355-a8f6404b3dee",
 58 |    "metadata": {
 59 |     "tags": []
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "!sudo yum -y install gcc-c++"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "id": "1907ae21-09a7-460f-ae17-91b258ecd14e",
 70 |    "metadata": {
 71 |     "tags": []
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "%%writefile text-generation-webui/requirements.txt\n",
 76 |     "colorama\n",
 77 |     "datasets\n",
 78 |     "einops\n",
 79 |     "flexgen==0.1.7\n",
 80 |     "gradio_client==0.2.5\n",
 81 |     "gradio==3.31.0\n",
 82 |     "markdown\n",
 83 |     "numpy\n",
 84 |     "pandas\n",
 85 |     "Pillow>=9.5.0\n",
 86 |     "pyyaml\n",
 87 |     "requests\n",
 88 |     "safetensors==0.3.1\n",
 89 |     "sentencepiece\n",
 90 |     "tqdm\n",
 91 |     "scipy\n",
 92 |     "git+https://github.com/huggingface/peft@3714aa2fff158fdfa637b2b65952580801d890b2\n",
 93 |     "git+https://github.com/huggingface/transformers@e45e756d22206ca8fa9fb057c8c3d8fa79bf81c6\n",
 94 |     "git+https://github.com/huggingface/accelerate@0226f750257b3bf2cadc4f189f9eef0c764a0467"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "id": "d17702d0-b1a3-422d-9974-10c33d70fec7",
101 |    "metadata": {
102 |     "scrolled": true,
103 |     "tags": []
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "!cd text-generation-webui && pip install -r requirements.txt"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "id": "c43a3975-07e5-4183-872d-21630ec9d88d",
114 |    "metadata": {
115 |     "tags": []
116 |    },
117 |    "outputs": [],
118 |    "source": [
119 |     "%%writefile GPTQ-for-LLaMa/requirements.txt\n",
120 |     "safetensors==0.3.1\n",
121 |     "datasets==2.10.1\n",
122 |     "sentencepiece\n",
123 |     "git+https://github.com/huggingface/transformers\n",
124 |     "accelerate==0.19.0"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "id": "df3ddb38-d864-4b63-ab07-cf9baade08cd",
131 |    "metadata": {
132 |     "tags": []
133 |    },
134 |    "outputs": [],
135 |    "source": [
136 |     "!pip uninstall -y llama-cpp-python"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "id": "f5bb1d0b-9e6f-41ad-b653-05e58a1ebfb9",
143 |    "metadata": {
144 |     "scrolled": true,
145 |     "tags": []
146 |    },
147 |    "outputs": [],
148 |    "source": [
149 |     "!CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install -v llama-cpp-python --no-cache-dir"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "id": "56447029-7497-402b-8aad-c729537179e8",
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "!cd AutoGPTQ && pip install ."
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "id": "d7b2d613-f243-44eb-b0e1-a186f61507ed",
166 |    "metadata": {
167 |     "scrolled": true,
168 |     "tags": []
169 |    },
170 |    "outputs": [],
171 |    "source": [
172 |     "!rm -rf GPTQ-for-LLaMa && git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa \n",
173 |     "!cd GPTQ-for-LLaMa && git checkout cuda\n",
174 |     "!cd GPTQ-for-LLaMa && pip install -r requirements.txt"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "id": "c8ea6e35-61f9-4557-b0a9-a1bc24287414",
181 |    "metadata": {
182 |     "scrolled": true,
183 |     "tags": []
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "!rm -rf GPTQ-for-LLaMa && git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa \n",
188 |     "!cd GPTQ-for-LLaMa && git checkout cuda\n",
189 |     "!cd GPTQ-for-LLaMa && python setup_cuda.py install"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "id": "3f125711-b7c7-4c4a-9546-1b1ea395ced3",
196 |    "metadata": {
197 |     "tags": []
198 |    },
199 |    "outputs": [],
200 |    "source": [
201 |     "!pip freeze | grep torch"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "id": "961b8d23-c59f-40f0-9523-fed73b46334a",
208 |    "metadata": {
209 |     "tags": []
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "!pip install --upgrade evalplus torch>=2.0.0"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "id": "3ed48bc9-6559-40c1-a3f9-ceed0bd13c67",
220 |    "metadata": {
221 |     "tags": []
222 |    },
223 |    "outputs": [],
224 |    "source": [
225 |     "!cp -r GPTQ-for-LLaMa/* text-generation-webui/"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "id": "fb1157d4-5420-4533-be98-5e0e07a7dfa9",
232 |    "metadata": {
233 |     "scrolled": true,
234 |     "tags": []
235 |    },
236 |    "outputs": [],
237 |    "source": [
238 |     "!python -m torch.utils.collect_env"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "id": "0c8f7ec9-8c1c-4dac-be52-f1baaa072bf5",
245 |    "metadata": {
246 |     "scrolled": true,
247 |     "tags": []
248 |    },
249 |    "outputs": [],
250 |    "source": [
251 |     "!cd text-generation-webui && python server.py --share --api --trust-remote-code"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "id": "72c47abb-10b1-43a5-b11e-691b0e114ad5",
258 |    "metadata": {},
259 |    "outputs": [],
260 |    "source": []
261 |   }
262 |  ],
263 |  "metadata": {
264 |   "kernelspec": {
265 |    "display_name": "conda_pytorch_p39",
266 |    "language": "python",
267 |    "name": "conda_pytorch_p39"
268 |   },
269 |   "language_info": {
270 |    "codemirror_mode": {
271 |     "name": "ipython",
272 |     "version": 3
273 |    },
274 |    "file_extension": ".py",
275 |    "mimetype": "text/x-python",
276 |    "name": "python",
277 |    "nbconvert_exporter": "python",
278 |    "pygments_lexer": "ipython3",
279 |    "version": "3.9.15"
280 |   }
281 |  },
282 |  "nbformat": 4,
283 |  "nbformat_minor": 5
284 | }
285 | 


--------------------------------------------------------------------------------
/1_Run_Benchmark.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "id": "7146c2a6-89e7-49b6-8aca-dd7592361fda",
 7 |    "metadata": {
 8 |     "tags": []
 9 |    },
10 |    "outputs": [],
11 |    "source": [
12 |     "from benchmark_manager import run_benchmark_workflow"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "code",
17 |    "execution_count": null,
18 |    "id": "7dfed66f-9100-478b-a67f-493daca1fe68",
19 |    "metadata": {
20 |     "tags": []
21 |    },
22 |    "outputs": [],
23 |    "source": [
24 |     "# !cd text-generation-webui && python server.py --model-menu"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "code",
29 |    "execution_count": null,
30 |    "id": "77d1e871-71bb-419e-8293-dd90d50f7d5b",
31 |    "metadata": {
32 |     "scrolled": true,
33 |     "tags": []
34 |    },
35 |    "outputs": [],
36 |    "source": [
37 |     "run_benchmark_workflow(\"Aeala_VicUnlocked-alpaca-65b-4bit\", 6666, group_size=128,\n",
38 |     "                           prompt_type=\"long\", user_tag=\"### Instruction:\", \n",
39 |     "                           assistant_tag=\"### Response:\", system_prefix=\"\", experiment_tag=\"alpaca\")"
40 |    ]
41 |   },
42 |   {
43 |    "cell_type": "code",
44 |    "execution_count": null,
45 |    "id": "d7d9fb60-2463-4016-bc49-3ae882d4a378",
46 |    "metadata": {
47 |     "tags": []
48 |    },
49 |    "outputs": [],
50 |    "source": [
51 |     "run_benchmark_workflow(\"Aeala_VicUnlocked-alpaca-65b-4bit\", 6666, group_size=128,\n",
52 |     "                           prompt_type=\"long\", user_tag=\"USER:\", \n",
53 |     "                           assistant_tag=\"ASSISTANT:\", system_prefix=\"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\", experiment_tag=\"vicuna\")"
54 |    ]
55 |   },
56 |   {
57 |    "cell_type": "code",
58 |    "execution_count": null,
59 |    "id": "192fb97e-b4e4-4ed8-8fc6-24ac22e4a768",
60 |    "metadata": {},
61 |    "outputs": [],
62 |    "source": []
63 |   }
64 |  ],
65 |  "metadata": {
66 |   "kernelspec": {
67 |    "display_name": "conda_pytorch_p39",
68 |    "language": "python",
69 |    "name": "conda_pytorch_p39"
70 |   },
71 |   "language_info": {
72 |    "codemirror_mode": {
73 |     "name": "ipython",
74 |     "version": 3
75 |    },
76 |    "file_extension": ".py",
77 |    "mimetype": "text/x-python",
78 |    "name": "python",
79 |    "nbconvert_exporter": "python",
80 |    "pygments_lexer": "ipython3",
81 |    "version": "3.9.15"
82 |   }
83 |  },
84 |  "nbformat": 4,
85 |  "nbformat_minor": 5
86 | }
87 | 


--------------------------------------------------------------------------------
/3_Advanced_Usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "7dfed66f-9100-478b-a67f-493daca1fe68",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "# !cd text-generation-webui && python server.py --model-menu"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "id": "77d1e871-71bb-419e-8293-dd90d50f7d5b",
 19 |    "metadata": {
 20 |     "scrolled": true,
 21 |     "tags": []
 22 |    },
 23 |    "outputs": [
 24 |     {
 25 |      "name": "stdout",
 26 |      "output_type": "stream",
 27 |      "text": [
 28 |       "Starting server...\n",
 29 |       "INFO:\u001b[32mLoading TheBloke_vicuna-7B-1.1-GPTQ-4bit-128g...\u001b[0m\n",
 30 |       "CUDA extension not installed.\n",
 31 |       "INFO:\u001b[32mFound the following quantized model: models/TheBloke_vicuna-7B-1.1-GPTQ-4bit-128g/vicuna-7B-1.1-GPTQ-4bit-128g.safetensors\u001b[0m\n",
 32 |       "INFO:\u001b[32mLoaded the model in 2.90 seconds.\n",
 33 |       "\u001b[0m\n",
 34 |       "Starting streaming server at ws://127.0.0.1:6667/api/v1/stream\n",
 35 |       "Starting API at http://127.0.0.1:6666/api\n",
 36 |       "Server started!\n",
 37 |       "127.0.0.1 - - [05/Jun/2023 00:51:37] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 38 |       "Running on local URL:  http://127.0.0.1:6668\n",
 39 |       "\n",
 40 |       "To create a public link, set `share=True` in `launch()`.\n",
 41 |       "Output generated in 22.05 seconds (2.54 tokens/s, 56 tokens, context 244, seed 0)\n",
 42 |       "127.0.0.1 - - [05/Jun/2023 00:52:00] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 43 |       "Output generated in 113.19 seconds (2.60 tokens/s, 294 tokens, context 248, seed 1)\n",
 44 |       "127.0.0.1 - - [05/Jun/2023 00:53:53] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 45 |       "Output generated in 29.99 seconds (2.60 tokens/s, 78 tokens, context 204, seed 2)\n",
 46 |       "127.0.0.1 - - [05/Jun/2023 00:54:23] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 47 |       "Output generated in 16.93 seconds (2.60 tokens/s, 44 tokens, context 237, seed 3)\n",
 48 |       "127.0.0.1 - - [05/Jun/2023 00:54:40] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 49 |       "Output generated in 49.77 seconds (2.59 tokens/s, 129 tokens, context 240, seed 4)\n",
 50 |       "127.0.0.1 - - [05/Jun/2023 00:55:30] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 51 |       "Output generated in 42.37 seconds (2.60 tokens/s, 110 tokens, context 215, seed 5)\n",
 52 |       "127.0.0.1 - - [05/Jun/2023 00:56:13] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 53 |       "Output generated in 20.01 seconds (2.60 tokens/s, 52 tokens, context 239, seed 6)\n",
 54 |       "127.0.0.1 - - [05/Jun/2023 00:56:33] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 55 |       "Output generated in 12.32 seconds (2.60 tokens/s, 32 tokens, context 215, seed 7)\n",
 56 |       "127.0.0.1 - - [05/Jun/2023 00:56:45] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 57 |       "Output generated in 29.22 seconds (2.60 tokens/s, 76 tokens, context 235, seed 8)\n",
 58 |       "127.0.0.1 - - [05/Jun/2023 00:57:14] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 59 |       "Output generated in 41.18 seconds (2.60 tokens/s, 107 tokens, context 216, seed 9)\n",
 60 |       "127.0.0.1 - - [05/Jun/2023 00:57:56] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 61 |       "Output generated in 164.00 seconds (2.59 tokens/s, 425 tokens, context 289, seed 10)\n",
 62 |       "127.0.0.1 - - [05/Jun/2023 01:00:40] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 63 |       "Output generated in 29.64 seconds (2.60 tokens/s, 77 tokens, context 200, seed 11)\n",
 64 |       "127.0.0.1 - - [05/Jun/2023 01:01:10] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 65 |       "Output generated in 43.85 seconds (2.60 tokens/s, 114 tokens, context 217, seed 12)\n",
 66 |       "127.0.0.1 - - [05/Jun/2023 01:01:54] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 67 |       "Output generated in 24.22 seconds (2.60 tokens/s, 63 tokens, context 194, seed 13)\n",
 68 |       "127.0.0.1 - - [05/Jun/2023 01:02:18] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 69 |       "Output generated in 14.99 seconds (2.60 tokens/s, 39 tokens, context 175, seed 14)\n",
 70 |       "127.0.0.1 - - [05/Jun/2023 01:02:33] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 71 |       "Output generated in 12.31 seconds (2.60 tokens/s, 32 tokens, context 188, seed 15)\n",
 72 |       "127.0.0.1 - - [05/Jun/2023 01:02:46] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 73 |       "Output generated in 18.45 seconds (2.60 tokens/s, 48 tokens, context 195, seed 16)\n",
 74 |       "127.0.0.1 - - [05/Jun/2023 01:03:04] \"POST /api/v1/generate HTTP/1.1\" 200 -\n",
 75 |       "Traceback (most recent call last):\n",
 76 |       "  File \"/home/ec2-user/SageMaker/text-generation-webui/server.py\", line 1118, in <module>\n",
 77 |       "    time.sleep(0.5)\n",
 78 |       "KeyboardInterrupt\n",
 79 |       "terminate called without an active exception\n"
 80 |      ]
 81 |     },
 82 |     {
 83 |      "ename": "KeyboardInterrupt",
 84 |      "evalue": "",
 85 |      "output_type": "error",
 86 |      "traceback": [
 87 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 88 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
 89 |       "\u001b[0;32m/tmp/ipykernel_1862/98509441.py\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mbenchmark_manager\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrun_benchmark_workflow\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m run_benchmark_workflow(\"TheBloke_vicuna-7B-1.1-GPTQ-4bit-128g\", 6666, group_size=128,\n\u001b[0m\u001b[1;32m      3\u001b[0m                            \u001b[0mprompt_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"long\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muser_tag\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"USER:\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m                            assistant_tag=\"ASSISTANT:\", system_prefix=\"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\", experiment_tag=\"vicuna\")\n",
 90 |       "\u001b[0;32m~/SageMaker/benchmark_manager.py\u001b[0m in \u001b[0;36mrun_benchmark_workflow\u001b[0;34m(model_name, portnum, group_size, maxnum, prompt_type, user_tag, assistant_tag, system_prefix, experiment_tag, working_directory)\u001b[0m\n\u001b[1;32m     82\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     83\u001b[0m     \u001b[0;31m# Run the benchmark\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m     \u001b[0mrun_benchmark\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"_\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mexperiment_tag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmaxnum\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mportnum\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprompt_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muser_tag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0massistant_tag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msystem_prefix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     85\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     86\u001b[0m     \u001b[0;31m# Once the benchmark has finished running, terminate the server process\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 91 |       "\u001b[0;32m~/SageMaker/benchmark_utils.py\u001b[0m in \u001b[0;36mrun_benchmark\u001b[0;34m(filename, maxnum, port, prompt_type, user_tag, assistant_tag, system_prefix, custom_completion)\u001b[0m\n\u001b[1;32m    106\u001b[0m             params = {\n\u001b[1;32m    107\u001b[0m                 \u001b[0;34m'task_id'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mtask_id\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 108\u001b[0;31m                 'completion': custom_completion(\n\u001b[0m\u001b[1;32m    109\u001b[0m                     \u001b[0mproblems\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtask_id\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"prompt\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    110\u001b[0m                     \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 92 |       "\u001b[0;32m~/SageMaker/benchmark_utils.py\u001b[0m in \u001b[0;36mgenerate_one_completion\u001b[0;34m(prompt_code, seed, port, prompt_type, user_tag, assistant_tag, system_prefix)\u001b[0m\n\u001b[1;32m     86\u001b[0m     \u001b[0msuffix\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'def'\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mprompt_code\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"def\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"(\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"(\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     87\u001b[0m     \u001b[0mprompt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgenerate_prompt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt_code\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msuffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprompt_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muser_tag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0massistant_tag\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msystem_prefix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 88\u001b[0;31m     \u001b[0mcode_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprompt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mport\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mport\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     89\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mget_function_body\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcut_off_prefix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcode_result\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"```python\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 93 |       "\u001b[0;32m~/SageMaker/benchmark_utils.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(prompt, seed, port)\u001b[0m\n\u001b[1;32m     47\u001b[0m     \u001b[0mURI\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'http://{HOST}:{port}/api/v1/generate'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     48\u001b[0m     \u001b[0;31m# Send the request and return the response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m     \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mURI\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     50\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mprompt\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'results'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstatus_code\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m200\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 94 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/api.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m    113\u001b[0m     \"\"\"\n\u001b[1;32m    114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 115\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"post\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 95 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/api.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m     57\u001b[0m     \u001b[0;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     58\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     61\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 96 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m    585\u001b[0m         }\n\u001b[1;32m    586\u001b[0m         \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 587\u001b[0;31m         \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    588\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    589\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 97 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m    743\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    744\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mstream\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 745\u001b[0;31m             \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    746\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    747\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 98 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/models.py\u001b[0m in \u001b[0;36mcontent\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    897\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_content\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    898\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 899\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_content\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mb\"\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miter_content\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCONTENT_CHUNK_SIZE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34mb\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    900\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    901\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_content_consumed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 99 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/requests/models.py\u001b[0m in \u001b[0;36mgenerate\u001b[0;34m()\u001b[0m\n\u001b[1;32m    814\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"stream\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    815\u001b[0m                 \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 816\u001b[0;31m                     \u001b[0;32myield\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunk_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    817\u001b[0m                 \u001b[0;32mexcept\u001b[0m \u001b[0mProtocolError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    818\u001b[0m                     \u001b[0;32mraise\u001b[0m \u001b[0mChunkedEncodingError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
100 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/urllib3/response.py\u001b[0m in \u001b[0;36mstream\u001b[0;34m(self, amt, decode_content)\u001b[0m\n\u001b[1;32m    574\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    575\u001b[0m             \u001b[0;32mwhile\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_fp_closed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 576\u001b[0;31m                 \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mamt\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mamt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecode_content\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    577\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    578\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
101 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/site-packages/urllib3/response.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, amt, decode_content, cache_content)\u001b[0m\n\u001b[1;32m    517\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    518\u001b[0m                 \u001b[0mcache_content\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 519\u001b[0;31m                 \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mamt\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfp_closed\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34mb\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    520\u001b[0m                 if (\n\u001b[1;32m    521\u001b[0m                     \u001b[0mamt\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
102 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/http/client.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, amt)\u001b[0m\n\u001b[1;32m    461\u001b[0m             \u001b[0;31m# Amount is given, implement using readinto\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    462\u001b[0m             \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbytearray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mamt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 463\u001b[0;31m             \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadinto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    464\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mmemoryview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtobytes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    465\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
103 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/http/client.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m    505\u001b[0m         \u001b[0;31m# connection, and the user is reading more bytes than will be provided\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    506\u001b[0m         \u001b[0;31m# (for example, reading in 1k chunks)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 507\u001b[0;31m         \u001b[0mn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadinto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    508\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mn\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    509\u001b[0m             \u001b[0;31m# Ideally, we would raise IncompleteRead if the content-length\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
104 |       "\u001b[0;32m~/anaconda3/envs/pytorch_p39/lib/python3.9/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m    702\u001b[0m         \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    703\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 704\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    705\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    706\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
105 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
106 |      ]
107 |     }
108 |    ],
109 |    "source": [
110 |     "from benchmark_manager import run_benchmark_workflow, run\n",
111 |     "run_benchmark_workflow(\"TheBloke_vicuna-7B-1.1-GPTQ-4bit-128g\", 6666, group_size=128,\n",
112 |     "                           prompt_type=\"long\", user_tag=\"USER:\", \n",
113 |     "                           assistant_tag=\"ASSISTANT:\", system_prefix=\"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\", experiment_tag=\"vicuna\")"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "id": "12b698d4-8b57-4f5f-bd3d-56c74b095e13",
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "run_benchmark_workflow(\"TheBloke_wizardLM-7B-GPTQ\", 6666, group_size=128,\n",
124 |     "                           prompt_type=\"long\", user_tag=\"USER:\", \n",
125 |     "                           assistant_tag=\"ASSISTANT:\", system_prefix=\"\", experiment_tag=\"alpaca\")"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "id": "d7ff04e5-8c9c-4600-ba61-bd1098773e4f",
132 |    "metadata": {
133 |     "scrolled": true,
134 |     "tags": []
135 |    },
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "Starting server...\n",
142 |       "INFO:\u001b[32mLoading TheBloke_wizardLM-7B-GPTQ...\u001b[0m\n",
143 |       "WARNING:\u001b[33mMore than one .safetensors model has been found. The last one will be selected. It could be wrong.\u001b[0m\n",
144 |       "INFO:\u001b[32mFound the following quantized model: models/TheBloke_wizardLM-7B-GPTQ/wizardLM-7B-GPTQ-4bit.latest.act-order.safetensors\u001b[0m\n",
145 |       "INFO:\u001b[32mLoaded the model in 2.96 seconds.\n",
146 |       "\u001b[0m\n",
147 |       "Starting streaming server at ws://127.0.0.1:6667/api/v1/stream\n",
148 |       "Starting API at http://127.0.0.1:6666/api\n",
149 |       "Server started!\n",
150 |       "Results will be written to: results/TheBloke_wizardLM-7B-GPTQ_custom.jsonl\n",
151 |       "Processing Task 0 of 164\n",
152 |       "def has_close_elements(numbers: List[float], threshold: float) -> bool:\n",
153 |       "    \"\"\" Check if in given list of numbers, are any two numbers closer to each other than\n",
154 |       "        given threshold.\n",
155 |       "        :param numbers: List[float]\n",
156 |       "        :param threshold: float\n",
157 |       "        :return: bool\n",
158 |       "        \"\"\"\n",
159 |       "    return len(set(numbers)) < 2 or abs(sum(numbers) - threshold) <= 1\n",
160 |       "Processing Task 1 of 164\n",
161 |       "def separate_paren_groups(paren_string: str) -> List[str]:\n",
162 |       "    # Define a helper function to check if two brackets match\n",
163 |       "    def are_brackets_matching(*args):\n",
164 |       "        for i in range(len(args)):\n",
165 |       "            if args[i]!= args[i+1]:\n",
166 |       "                return False\n",
167 |       "        return True\n",
168 |       "    # Split the input string into lists of brackets based on their depth\n",
169 |       "    depth = 0\n",
170 |       "    result = []\n",
171 |       "    for char in paren_string:\n",
172 |       "        if char == '(':\n",
173 |       "            depth += 1\n",
174 |       "        elif char == ')':\n",
175 |       "            depth -= 1\n",
176 |       "        elif depth > 0:\n",
177 |       "            result.append(char)\n",
178 |       "        else:\n",
179 |       "            result.extend([ch for ch in paren_string if ch not in'()'])\n",
180 |       "    # Recursively split each sublist of brackets into individual strings\n",
181 |       "    for i in range(len(result)-1):\n",
182 |       "        if len(result[i]) > 1:\n",
183 |       "            result[i], result[i+1] = result[i+1], result[i]\n",
184 |       "    # Join the resulting lists of brackets into a single list of strings\n",
185 |       "    return ['. '.join(lst) for lst in result]\n",
186 |       "Processing Task 2 of 164\n",
187 |       "def truncate_number(number: float) -> float:\n",
188 |       "    \"\"\" Given a positive floating point number, it can be decomposed into\n",
189 |       "    and integer part (largest integer smaller than given number) and decimals\n",
190 |       "    (leftover part always smaller than 1).\n",
191 |       "    Return the decimal part of the number.\n",
192 |       "    >>> truncate_number(3.5)\n",
193 |       "    0.5\n",
194 |       "    \"\"\"\n",
195 |       "    return number - int(round(number))\n",
196 |       "Processing Task 3 of 164\n",
197 |       "def below_zero(operations: T.List[int]) -> T.Union[bool, None]:\n",
198 |       "    # Initialize a variable to keep track of whether the balance fell below zero or not\n",
199 |       "    did_fall_below_zero = False\n",
200 |       "    for operation in operations:\n",
201 |       "        # Check if the current operation is a withdrawal operation\n",
202 |       "        if operation < 0:\n",
203 |       "            did_fall_below_zero = True\n",
204 |       "            break\n",
205 |       "    # Return True if the balance fell below zero, False otherwise\n",
206 |       "    return did_fall_below_zero\n",
207 |       "Processing Task 4 of 164\n",
208 |       "def mean_absolute_deviation(numbers: typing.List[float]) -> float:\n",
209 |       "    \"\"\" Calculate Mean Absolute Deviation for a given list of input numbers.\n",
210 |       "        See documentation for details.\n",
211 |       "    Returns:\n",
212 |       "        The calculated value of Mean Absolute Deviation.\n",
213 |       "    \"\"\"\n",
214 |       "    # Initialize variables for calculating MAD\n",
215 |       "    num_elements = len(numbers)\n",
216 |       "    sum_of_squares = sum([num * num for num in numbers])\n",
217 |       "    sum_of_absolutes = sum([abs(num) for num in numbers])\n",
218 |       "    mean_value = sum_of_absolutes / num_elements\n",
219 |       "    mad_value = math.sqrt(sum_of_squares - ((sum_of_absolutes ** 2) / num_elements))\n",
220 |       "    return mad_value\n",
221 |       "Processing Task 5 of 164\n",
222 |       "def intersperse(numbers: List[int], delimeter: int) -> List[int]:\n",
223 |       "    \"\"\"Insert a number 'delimeter' between every two consecutive elements of input list `numbers'\n",
224 |       "    >>> intersperse([], 4)\n",
225 |       "    []\n",
226 |       "    >>> intersperse([1, 2, 3], 4)\n",
227 |       "    [1, 4, 2, 4, 3]\n",
228 |       "    \"\"\"\n",
229 |       "Processing Task 6 of 164\n"
230 |      ]
231 |     }
232 |    ],
233 |    "source": [
234 |     "# Make sure to pip install evalplus\n",
235 |     "\n",
236 |     "import os, signal\n",
237 |     "from benchmark_utils import run_benchmark, run, extract_code\n",
238 |     "from benchmark_manager import start_server\n",
239 |     "\n",
240 |     "model_name = \"TheBloke_wizardLM-7B-GPTQ\"\n",
241 |     "portnum = 6666\n",
242 |     "group_size=128\n",
243 |     "\n",
244 |     "server_process = start_server(model_name, portnum, group_size=group_size, \n",
245 |     "                              working_directory='text-generation-webui') # Make sure server.py is in working_directory\n",
246 |     "\n",
247 |     "def my_completion(code, **kwargs):\n",
248 |     "    prompt = \"Complete this code:\\n%s\\nASSISTANT:\" % code\n",
249 |     "    results = extract_code(run(prompt, port=kwargs[\"port\"]))\n",
250 |     "    print(results)\n",
251 |     "    return results\n",
252 |     "\n",
253 |     "run_benchmark(model_name, port=portnum, custom_completion=my_completion, prompt_type = \"custom\")\n",
254 |     "\n",
255 |     "os.kill(server_process.pid, signal.SIGTERM)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "id": "f8bd1a5e-f349-46a4-b0c1-449c493536cb",
262 |    "metadata": {
263 |     "tags": []
264 |    },
265 |    "outputs": [],
266 |    "source": [
267 |     "import subprocess\n",
268 |     "\n",
269 |     "filename = \"results/TheBloke_wizardLM-7B-GPTQ_custom.jsonl\"\n",
270 |     "\n",
271 |     "result = subprocess.run([\"sudo\", \"/home/ec2-user/anaconda3/envs/pytorch_p39/bin/evalplus.evaluate\",\n",
272 |     "                \"--dataset\", \"humaneval\", \"--samples\", filename, \"--i-just-wanna-run\"], \n",
273 |     "                        text=True, capture_output=True, check=False)\n",
274 |     "\n",
275 |     "print(result.stdout, \"\\n\", result.stderr)"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": 2,
281 |    "id": "9dd23386-f2fc-4d4a-85eb-a89fd554aaa0",
282 |    "metadata": {
283 |     "tags": []
284 |    },
285 |    "outputs": [],
286 |    "source": [
287 |     "os.kill(server_process.pid, signal.SIGTERM)"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "markdown",
292 |    "id": "a35b5a37-cd8c-4390-a32c-539dc88cf900",
293 |    "metadata": {},
294 |    "source": [
295 |     "You can also start a server, configure and load a model via the GUI, then benchmark against it (in a separate process or Jupyter notebook - see the advanced benchmark client notebook)"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": null,
301 |    "id": "7169fe61-9490-46ca-a4c0-b35a6bb3a024",
302 |    "metadata": {},
303 |    "outputs": [],
304 |    "source": [
305 |     "from benchmark_manager import start_server, block_log_server\n",
306 |     "\n",
307 |     "PORT = 6000 # remember this for the other notebook\n",
308 |     "\n",
309 |     "server_process = start_server(None, portnum=PORT, group_size=None, wbits=None,\n",
310 |     "                              working_directory='text-generation-webui', public=True)\n",
311 |     "block_log_server(server_process)"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "id": "985db790-f822-4557-8f99-c1032a3cb202",
318 |    "metadata": {},
319 |    "outputs": [],
320 |    "source": []
321 |   }
322 |  ],
323 |  "metadata": {
324 |   "kernelspec": {
325 |    "display_name": "conda_pytorch_p39",
326 |    "language": "python",
327 |    "name": "conda_pytorch_p39"
328 |   },
329 |   "language_info": {
330 |    "codemirror_mode": {
331 |     "name": "ipython",
332 |     "version": 3
333 |    },
334 |    "file_extension": ".py",
335 |    "mimetype": "text/x-python",
336 |    "name": "python",
337 |    "nbconvert_exporter": "python",
338 |    "pygments_lexer": "ipython3",
339 |    "version": "3.9.15"
340 |   }
341 |  },
342 |  "nbformat": 4,
343 |  "nbformat_minor": 5
344 | }
345 | 


--------------------------------------------------------------------------------
/4_Advanced_GUI_Benchmark_Client.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "7146c2a6-89e7-49b6-8aca-dd7592361fda",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "from benchmark_manager import start_server, run_benchmark_workflow\n",
 13 |     "from benchmark_utils import run_benchmark"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 3,
 19 |    "id": "7960dc96-8bca-4553-8094-e2892f6d93d3",
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "PORT = 6000 # Same port as GUI API is running on\n",
 24 |     "\n",
 25 |     "VICUNA = {\n",
 26 |     "    'user_tag':\"USER:\", \n",
 27 |     "    'assistant_tag':\"ASSISTANT:\", \n",
 28 |     "    'system_prefix':\"A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\", \n",
 29 |     "    'experiment_tag':\"vicuna\"\n",
 30 |     "}\n",
 31 |     "\n",
 32 |     "ALPACA = {\n",
 33 |     "    'user_tag':\"### Instruction:\", \n",
 34 |     "    'assistant_tag':\"### Response:\", \n",
 35 |     "    'system_prefix':\"\", \n",
 36 |     "    'experiment_tag':\"alpaca\"\n",
 37 |     "}\n",
 38 |     "\n",
 39 |     "ALPACA_PREFIX = {\n",
 40 |     "    'user_tag':\"### Instruction:\", \n",
 41 |     "    'assistant_tag':\"### Response:\", \n",
 42 |     "    'system_prefix':\"Below is an instruction that describes a task.\\n Write a response that appropriately completes the request.\\n\", \n",
 43 |     "    'experiment_tag':\"alpaca_prefix\"\n",
 44 |     "}"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "id": "192fb97e-b4e4-4ed8-8fc6-24ac22e4a768",
 51 |    "metadata": {
 52 |     "tags": []
 53 |    },
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "Results will be written to: results/Salesforce_instructcodet5p-16b_long_alpaca.jsonl\n",
 60 |       "Processing Task 0 of 164\n",
 61 |       "def has_close_elements(\"numbers: List[float], threshold: float) -> bool:\n",
 62 |       "    \"\"\"Check if in given list of numbers, are any two numbers closer than given threshold.\n",
 63 |       "    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n",
 64 |       "    False\n",
 65 |       "    >>> has_close_elements([1.0, 2.8, 3.0], 0.5)\n",
 66 |       "    True\n",
 67 |       "    >>> has_close_elements([1.0, 2.8, 3.0], 0.3)\n",
 68 |       "    False\n",
 69 |       "    \"\"\"\n",
 70 |       "    for i in range(len(numbers)):\n",
 71 |       "        for j in range(i + 1, len(numbers)):\n",
 72 |       "            if abs(numbers[i] - numbers[j]) < threshold:\n",
 73 |       "                return True\n",
 74 |       "    return False\n",
 75 |       "\n",
 76 |       "\n",
 77 |       "Processing Task 1 of 164\n",
 78 |       "def separate_paren_groups(\"paren_string: \")\n",
 79 |       "Processing Task 2 of 164\n",
 80 |       "def truncate_number(\"number\": float(number),\n",
 81 |       "    }\n",
 82 |       "    return float(number)\n",
 83 |       "Processing Task 3 of 164\n"
 84 |      ]
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "run_benchmark(\"Salesforce_instructcodet5p-16b\", port=PORT, prompt_type=\"long\", **ALPACA)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "id": "7a347ae4-b008-479f-a502-eb2f262776ff",
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": []
 98 |   }
 99 |  ],
100 |  "metadata": {
101 |   "kernelspec": {
102 |    "display_name": "conda_pytorch_p39",
103 |    "language": "python",
104 |    "name": "conda_pytorch_p39"
105 |   },
106 |   "language_info": {
107 |    "codemirror_mode": {
108 |     "name": "ipython",
109 |     "version": 3
110 |    },
111 |    "file_extension": ".py",
112 |    "mimetype": "text/x-python",
113 |    "name": "python",
114 |    "nbconvert_exporter": "python",
115 |    "pygments_lexer": "ipython3",
116 |    "version": "3.9.15"
117 |   }
118 |  },
119 |  "nbformat": 4,
120 |  "nbformat_minor": 5
121 | }
122 | 


--------------------------------------------------------------------------------
/5_LaunchRunpod.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "5eba32bb-854a-4230-9cd1-2f509fa17bbd",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "from launch_pod import launch_webui, stop_and_terminate_pod, remove_safetensors, pod_benchmark, killall_pods\n",
 13 |     "from templates import *\n",
 14 |     "from multiprocessing import Process\n",
 15 |     "import os, time"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "id": "55cd2e0f-3cdf-417c-bf11-e7bdf09c5161",
 22 |    "metadata": {
 23 |     "tags": []
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "API_KEY = 'YOUR KEY HERE'\n",
 28 |     "file_tag = \"whatever_you_want\""
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "id": "e84085ea-ad32-41c5-aa04-fa2672583845",
 35 |    "metadata": {
 36 |     "tags": []
 37 |    },
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "models = {\n",
 41 |     "    \"TheBloke/wizardLM-7B-GPTQ\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n",
 42 |     "    \"TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ\": {\"template\": \"VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n",
 43 |     "    \"TheBloke/vicuna-7B-GPTQ-4bit-128g\": {\"template\": \"VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 44 |     "    \"TheBloke/guanaco-7B-GPTQ\": {\"template\": \"ALPACA/SHORT\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n",
 45 |     "    \"TheBloke/vicuna-13B-1.1-GPTQ-4bit-128g\": {\"template\": \"VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 46 |     "    \"TheBloke/wizardLM-13B-1.0-GPTQ\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 47 |     "    \"TheBloke/guanaco-13B-GPTQ\": {\"template\": \"ALPACA/SHORT\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 48 |     "    \"TheBloke/Manticore-13B-GPTQ\": {\"template\": \"ALPACA/VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 49 |     "    \"mindrage/Manticore-13B-Chat-Pyg-Guanaco-GPTQ-4bit-128g.no-act-order.safetensors\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 50 |     "    \"TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ\": {\"template\": \"VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 51 |     "    \"TheBloke/VicUnlocked-30B-LoRA-GPTQ\": {\"template\": \"ALPACA/VICUNA\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 52 |     "    \"TheBloke/WizardLM-30B-Uncensored-GPTQ\": {\"template\": \"ALPACA/VICUNA\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 53 |     "    \"TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ\": {\"template\": \"VICUNA\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 54 |     "    \"TheBloke/guanaco-33B-GPTQ\": {\"template\": \"ALPACA/SHORT\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 55 |     "    \"TheBloke/guanaco-65B-GPTQ\": {\"template\": \"ALPACA/SHORT\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA A100-SXM4-80GB\", \"additional_cmd\": \"\"},\n",
 56 |     "    \"Aeala/VicUnlocked-alpaca-65b-4bit\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"2xNVIDIA A100-SXM4-80GB\", \"additional_cmd\": \"\"},\n",
 57 |     "}\n",
 58 |     "additional_models = {\n",
 59 |     "    \"togethercomputer/RedPajama-INCITE-Instruct-3B-v1\": {\"template\": \"Q_A\", \"groupsize\": \"\", \"wbits\": \"\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n",
 60 |     "    \"togethercomputer/RedPajama-INCITE-7B-Instruct\": {\"template\": \"Q_A\", \"groupsize\": \"\", \"wbits\": \"\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n",
 61 |     "    \"TheBloke/falcon-7b-instruct-GPTQ\": {\"template\": \"FALCON\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A4000\", \"additional_cmd\": \"\"},\n",
 62 |     "    \"anon8231489123/gpt4-x-alpaca-13b-native-4bit-128g\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 63 |     "    \"TheBloke/Nous-Hermes-13B-GPTQ\": {\"template\": \"ALPACA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 64 |     "    \"TheBloke/wizard-vicuna-13B-GPTQ\": {\"template\": \"VICUNA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 65 |     "    \"GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct\": {\"template\": \"ALPACA_INPUT\", \"groupsize\": \"\", \"wbits\": \"\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 66 |     "    \"sahil2801/instruct-codegen-16B\": {\"template\": \"ALPACA_PREFIX1\", \"groupsize\": \"\", \"wbits\": \"\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 67 |     "    \"TheBloke/OpenAssistant-SFT-7-Llama-30B-GPTQ\": {\"template\": \"OPENA\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 68 |     "    \"TheBloke/WizardLM-30B-GPTQ\": {\"template\": \"VICUNA\", \"groupsize\": \"\", \"wbits\": \"4\", \"GPU\": \"NVIDIA RTX A6000\", \"additional_cmd\": \"\"},\n",
 69 |     "    \"TheBloke/falcon-40b-instruct-GPTQ\": {\"template\": \"FALCON\", \"groupsize\": \"128\", \"wbits\": \"4\", \"GPU\": \"NVIDIA A100-SXM4-80GB\", \"additional_cmd\": \"\"},\n",
 70 |     "}\n",
 71 |     "\n",
 72 |     "combined_models = {**models, **additional_models}\n",
 73 |     "\n",
 74 |     "template_map = {\"ALPACA\" : ALPACA_TEMPLATE, \n",
 75 |     "                \"VICUNA\" : VICUNA_TEMPLATE,\n",
 76 |     "                \"ALPACA_INPUT\" : ALPACA_INPUT_TEMPLATE,\n",
 77 |     "                \"OPENA\" : OPENASSISTANT_TEMPLATE,\n",
 78 |     "                \"FALCON\" : FALCON_TEMPLATE,\n",
 79 |     "                \"Q_A\" : QA_TEMPLATE,\n",
 80 |     "                \"ALPACA_PREFIX1\" : ALPACA_PREFIX_TEMPLATE,\n",
 81 |     "                \"ALPACA_MEDIUM\" : ALPACA_TEMPLATE_MEDIUM, \n",
 82 |     "                \"ALPACA_INPUT_MEDIUM\" : ALPACA_INPUT_TEMPLATE_MEDIUM,\n",
 83 |     "                \"ALPACA_PREFIX1_MEDIUM\" : ALPACA_PREFIX_TEMPLATE_MEDIUM,\n",
 84 |     "                \"SHORT\" : SHORT_TEMPLATE,\n",
 85 |     "                \"VERYSHORT\" : VERYSHORT_TEMPLATE\n",
 86 |     "               }"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "id": "dbd10d9a-303c-4f18-a3c4-c66a252ec215",
 93 |    "metadata": {
 94 |     "tags": []
 95 |    },
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "import os\n",
 99 |     "import sys\n",
100 |     "import contextlib\n",
101 |     "\n",
102 |     "def process_model(model, file_tag=\"\"):\n",
103 |     "    model_file = model.replace(\"/\", \"_\")\n",
104 |     "    pod_tag = model_file\n",
105 |     "\n",
106 |     "    if not os.path.exists(\"logs\"):\n",
107 |     "        os.mkdir(\"logs\")\n",
108 |     "\n",
109 |     "    log_filename = f\"logs/log_{model_file}_{file_tag}.txt\"  # Define your log file name here\n",
110 |     "    print(\"Starting\", model)\n",
111 |     "    # Redirect standard output and error to the log file\n",
112 |     "    with open(log_filename, 'w') as f, contextlib.redirect_stdout(f), contextlib.redirect_stderr(f):\n",
113 |     "        gpu_type = combined_models[model][\"GPU\"]\n",
114 |     "        groupsize = combined_models[model][\"groupsize\"]\n",
115 |     "        wbits = combined_models[model][\"wbits\"]\n",
116 |     "        gptq_params = \"\"\n",
117 |     "\n",
118 |     "        if groupsize:\n",
119 |     "            gptq_params += f\"--groupsize {groupsize} \"\n",
120 |     "        if wbits:\n",
121 |     "            gptq_params += f\"--wbits {wbits} \"\n",
122 |     "\n",
123 |     "        templ = combined_models[model][\"template\"]\n",
124 |     "        if \"/\" in templ:\n",
125 |     "            templ = templ.split(\"/\")[0]\n",
126 |     "        prompt = template_map[templ]\n",
127 |     "        print(pod_tag, gpu_type, gptq_params,\"*** PROMPT ***\", prompt,\"*** *** ***\", sep=\"\\n\")\n",
128 |     "\n",
129 |     "        api_url, model_file, pod_id = launch_webui(api_key=API_KEY, \n",
130 |     "                               model=model, \n",
131 |     "                               gptq_params=gptq_params, \n",
132 |     "                               gpu_type=gpu_type,\n",
133 |     "                               pod_num=pod_tag)\n",
134 |     "\n",
135 |     "\n",
136 |     "        print(\"starting benchmark...\")\n",
137 |     "        pod_benchmark(model_file + \"_\" + templ + \"_\" + file_tag, prompt, assistant_tag=prompt[-15:], pod_id=pod_id, api_key=API_KEY, start_from=0, host=api_url, port=443, insert_func_stub=True)\n",
138 |     "\n",
139 |     "        stop_and_terminate_pod(pod_id, API_KEY)\n",
140 |     "\n",
141 |     "        # Close the log files\n",
142 |     "        sys.stdout.close()\n",
143 |     "        sys.stderr.close()\n",
144 |     "    print(\"Finished\", model)\n"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "id": "8434f398-497c-4234-aa79-09807866c986",
151 |    "metadata": {
152 |     "tags": []
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "# to_run = [f for f in sorted(combined_models.keys()) if \"7B\" in f][:4]\n",
157 |     "to_run = combined_models.keys()"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "id": "6dd713b7-81ed-4c80-9d1c-759d1e539bd9",
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "import concurrent.futures\n",
168 |     "import contextlib\n",
169 |     "import os\n",
170 |     "\n",
171 |     "MAX_WORKERS = 8\n",
172 |     "\n",
173 |     "from multiprocessing import Pool\n",
174 |     "\n",
175 |     "def worker(model):\n",
176 |     "    return process_model(model, file_tag)\n",
177 |     "\n",
178 |     "pool = Pool(processes=MAX_WORKERS)\n",
179 |     "\n",
180 |     "for model in to_run:\n",
181 |     "    pool.apply_async(worker, (model,))\n",
182 |     "\n",
183 |     "pool.close()\n",
184 |     "pool.join()"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "id": "7ff5c221-7250-4ead-b17e-3f7a4262b3c9",
191 |    "metadata": {
192 |     "tags": []
193 |    },
194 |    "outputs": [],
195 |    "source": [
196 |     "killall_pods(API_KEY)"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "id": "a5f79168-309e-420d-bcad-2c49197d443b",
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": []
206 |   }
207 |  ],
208 |  "metadata": {
209 |   "kernelspec": {
210 |    "display_name": "conda_pytorch_p310",
211 |    "language": "python",
212 |    "name": "conda_pytorch_p310"
213 |   },
214 |   "language_info": {
215 |    "codemirror_mode": {
216 |     "name": "ipython",
217 |     "version": 3
218 |    },
219 |    "file_extension": ".py",
220 |    "mimetype": "text/x-python",
221 |    "name": "python",
222 |    "nbconvert_exporter": "python",
223 |    "pygments_lexer": "ipython3",
224 |    "version": "3.10.10"
225 |   }
226 |  },
227 |  "nbformat": 4,
228 |  "nbformat_minor": 5
229 | }
230 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 my-other-github-account
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Proprietary Model Evaluations/ClaudeReg Eval.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "id": "9cd96fcb-08b3-4845-9083-306e825f6b98",
 7 |    "metadata": {
 8 |     "tags": []
 9 |    },
10 |    "outputs": [],
11 |    "source": [
12 |     "# !pip install poe-api"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "code",
17 |    "execution_count": 2,
18 |    "id": "b8c9a7b1-76e5-4dba-be3c-65a7f0776fd5",
19 |    "metadata": {
20 |     "tags": []
21 |    },
22 |    "outputs": [],
23 |    "source": [
24 |     "import poe\n",
25 |     "client = poe.Client(\"<KEY>\")"
26 |    ]
27 |   },
28 |   {
29 |    "cell_type": "code",
30 |    "execution_count": null,
31 |    "id": "80e1e50a-866a-444f-a8c4-95214ce292ab",
32 |    "metadata": {
33 |     "scrolled": true,
34 |     "tags": []
35 |    },
36 |    "outputs": [],
37 |    "source": [
38 |     "import os, time\n",
39 |     "from benchmark_utils import run_benchmark, run, extract_code\n",
40 |     "\n",
41 |     "model_name = \"Claude\"\n",
42 |     "\n",
43 |     "def my_completion_cplus(code, **kwargs):\n",
44 |     "    prompt = \"Please complete the following code:\\n```python\\n%s```\" % code\n",
45 |     "    while True:\n",
46 |     "        try:\n",
47 |     "            for chunk in client.send_message(\"a2\", prompt, with_chat_break = True):\n",
48 |     "                pass\n",
49 |     "            answer = chunk[\"text\"]\n",
50 |     "            break\n",
51 |     "        except:\n",
52 |     "            print(\"trying again in 30s!\")\n",
53 |     "            time.sleep(30)\n",
54 |     "            try:\n",
55 |     "                client = poe.Client(\"<KEY>\")\n",
56 |     "            except:\n",
57 |     "                pass\n",
58 |     "            print(\"trying now!\")\n",
59 |     "    # print(\"***\", answer, \"***\")\n",
60 |     "    results = extract_code(answer, \"\")\n",
61 |     "    print(results)\n",
62 |     "    return results\n",
63 |     "\n",
64 |     "run_benchmark(model_name, custom_completion=my_completion_cplus, prompt_type = \"custom\")"
65 |    ]
66 |   },
67 |   {
68 |    "cell_type": "code",
69 |    "execution_count": null,
70 |    "id": "f4db244a-2a26-49fd-9377-3776889366dd",
71 |    "metadata": {},
72 |    "outputs": [],
73 |    "source": []
74 |   }
75 |  ],
76 |  "metadata": {
77 |   "kernelspec": {
78 |    "display_name": "conda_pytorch_p310",
79 |    "language": "python",
80 |    "name": "conda_pytorch_p310"
81 |   },
82 |   "language_info": {
83 |    "codemirror_mode": {
84 |     "name": "ipython",
85 |     "version": 3
86 |    },
87 |    "file_extension": ".py",
88 |    "mimetype": "text/x-python",
89 |    "name": "python",
90 |    "nbconvert_exporter": "python",
91 |    "pygments_lexer": "ipython3",
92 |    "version": "3.10.10"
93 |   }
94 |  },
95 |  "nbformat": 4,
96 |  "nbformat_minor": 5
97 | }
98 | 


--------------------------------------------------------------------------------
/Proprietary Model Evaluations/GPT Eval.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "72d8974d-c534-4c07-b780-cda0d0a06d56",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "!pip install openai"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "id": "2155ec61-375b-47a4-9e36-4bdfb1d1551f",
 19 |    "metadata": {
 20 |     "tags": []
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import openai\n",
 25 |     "\n",
 26 |     "openai.api_key = \"<KEY>\""
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "id": "08b758f0-44aa-41ac-80f9-e271fdebedc4",
 33 |    "metadata": {
 34 |     "tags": []
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "import requests\n",
 39 |     "from evalplus.data import get_human_eval_plus, write_jsonl\n",
 40 |     "\n",
 41 |     "problems = get_human_eval_plus()\n",
 42 |     "\n",
 43 |     "num_samples_per_task = 1\n",
 44 |     "len(list(problems.keys()))"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "id": "9de9eabb-5a58-4202-8730-82c5325fda99",
 51 |    "metadata": {
 52 |     "tags": []
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "import time\n",
 57 |     "def run(prompt, seed, port = 5000):\n",
 58 |     "    while True:  # Keep trying until we break out\n",
 59 |     "        try:\n",
 60 |     "            result = openai.ChatCompletion.create(\n",
 61 |     "                model=\"gpt-3.5-turbo\",\n",
 62 |     "                messages=[\n",
 63 |     "                    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
 64 |     "                    {\"role\": \"user\", \"content\": prompt},\n",
 65 |     "                ],\n",
 66 |     "                temperature=0.0\n",
 67 |     "            )\n",
 68 |     "\n",
 69 |     "            response = result['choices'][0][\"message\"][\"content\"]\n",
 70 |     "            return response\n",
 71 |     "        except Exception:  # If we hit the rate limit\n",
 72 |     "            time.sleep(1)  # Wait for a second before retrying"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "id": "50b6101a-3aeb-4cdf-a95b-a08044f6c6e1",
 79 |    "metadata": {
 80 |     "tags": []
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "def get_function_body(code):\n",
 85 |     "    lines = code.splitlines()\n",
 86 |     "    function_lines = []\n",
 87 |     "    for line in lines:\n",
 88 |     "        # Include lines that start with a whitespace character or are empty\n",
 89 |     "        if line.startswith((' ', '\\t')) or line.strip() == '':\n",
 90 |     "            function_lines.append(line)\n",
 91 |     "        # Stop including lines when we hit a line that contains text but does not start with a whitespace character\n",
 92 |     "        elif line.strip() != '' and not line.startswith((' ', '\\t')):\n",
 93 |     "            break\n",
 94 |     "    return '\\n'.join(function_lines)\n",
 95 |     "\n",
 96 |     "def generate_one_completion(prompt_code, seed = -1, port = 5000, long_prompt = False, user_tag = \"HUMAN:\", assistant_tag = \"AI MODEL:\", system_prefix = \"\"):\n",
 97 |     "    print(seed)\n",
 98 |     "    suffix = 'def'+prompt_code.split(\"def\")[1].split(\"(\")[0]+\"(\"\n",
 99 |     "    \n",
100 |     "    if long_prompt:\n",
101 |     "        prompt = \"\"\"%s\n",
102 |     "%s\n",
103 |     "Complete the following Python code: \n",
104 |     "Notes: respond with the entire complete function definition\n",
105 |     "do not add any comments, be as concise in your code as possible\n",
106 |     "use only built-in libraries, assume no additional imports other than those provided (if any)\n",
107 |     "\n",
108 |     "code:\n",
109 |     "%s\n",
110 |     "\n",
111 |     "%s\n",
112 |     "```python\n",
113 |     "\n",
114 |     "            %s\"\"\" % (system_prefix, user_tag, prompt_code, assistant_tag, suffix)\n",
115 |     "    else:\n",
116 |     "        prompt = \"\"\"```python\n",
117 |     "%s\"\"\" % prompt_code\n",
118 |     "    \n",
119 |     "    code_result = run(prompt, seed = seed, port = port)\n",
120 |     "    result = \"\\n\".join(code_result.split(\"def\")[-1].split(\"\\n\")[1:]).split(\"```\")[0]\n",
121 |     "    result = get_function_body(result)\n",
122 |     "    print(\"####\", prompt, \"####\")\n",
123 |     "    print(result)\n",
124 |     "    return result\n",
125 |     "\n",
126 |     "import itertools\n",
127 |     "\n",
128 |     "def run_benchmark(filename, maxnum=-1, port=5000, long_prompt = False, user_tag = \"HUMAN:\", assistant_tag = \"AI MODEL:\", system_prefix = \"\"):\n",
129 |     "    iterc = itertools.count()\n",
130 |     "    problem_keys = list(problems)[:maxnum]\n",
131 |     "    all_samples = []\n",
132 |     "\n",
133 |     "    for idx, task_id in enumerate(problem_keys):\n",
134 |     "        # Generate real completions\n",
135 |     "        for _ in range(num_samples_per_task):\n",
136 |     "            completion = generate_one_completion(problems[task_id][\"prompt\"], seed=next(iterc), port=port, long_prompt = long_prompt, user_tag = user_tag, assistant_tag = assistant_tag, system_prefix = system_prefix)\n",
137 |     "            all_samples.append(dict(task_id=task_id, completion=completion))\n",
138 |     "\n",
139 |     "        # Create a temporary copy of all_samples, to which we will append 'pass' completions\n",
140 |     "        temp_samples = all_samples.copy()\n",
141 |     "        \n",
142 |     "        # Append 'pass' completions for the rest of the tasks\n",
143 |     "        for remaining_task_id in list(problems)[idx+1:maxnum] + list(problems)[maxnum:]:\n",
144 |     "            for _ in range(num_samples_per_task):\n",
145 |     "                temp_samples.append(dict(task_id=remaining_task_id, completion=\"    pass\"))\n",
146 |     "        \n",
147 |     "        # Write all samples to the file, overwriting it completely\n",
148 |     "        write_jsonl(filename, temp_samples)\n"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "id": "8f2412b0-788f-4aba-9f73-cccc7a9db504",
155 |    "metadata": {
156 |     "scrolled": true,
157 |     "tags": []
158 |    },
159 |    "outputs": [],
160 |    "source": [
161 |     "run_benchmark(\"gpt_verylowtemp_long.jsonl\", maxnum=-1, port=5000, long_prompt = True, user_tag = \"HUMAN:\", assistant_tag = \"\", system_prefix = \"\")"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "id": "d58c9606-0951-4eeb-8179-9abfebdfe771",
168 |    "metadata": {
169 |     "tags": []
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "print(\"done\")"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "id": "32d6b1ea-276c-4a3a-be77-ea23f807ff61",
180 |    "metadata": {},
181 |    "outputs": [],
182 |    "source": []
183 |   }
184 |  ],
185 |  "metadata": {
186 |   "kernelspec": {
187 |    "display_name": "conda_amazonei_pytorch_latest_p37",
188 |    "language": "python",
189 |    "name": "conda_amazonei_pytorch_latest_p37"
190 |   },
191 |   "language_info": {
192 |    "codemirror_mode": {
193 |     "name": "ipython",
194 |     "version": 3
195 |    },
196 |    "file_extension": ".py",
197 |    "mimetype": "text/x-python",
198 |    "name": "python",
199 |    "nbconvert_exporter": "python",
200 |    "pygments_lexer": "ipython3",
201 |    "version": "3.7.10"
202 |   }
203 |  },
204 |  "nbformat": 4,
205 |  "nbformat_minor": 5
206 | }
207 | 


--------------------------------------------------------------------------------
/Proprietary Model Evaluations/GPT4 Eval.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "72d8974d-c534-4c07-b780-cda0d0a06d56",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "!pip install openai"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "id": "2155ec61-375b-47a4-9e36-4bdfb1d1551f",
 19 |    "metadata": {
 20 |     "tags": []
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import openai\n",
 25 |     "\n",
 26 |     "openai.api_key = \"<KEY>\""
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "id": "08b758f0-44aa-41ac-80f9-e271fdebedc4",
 33 |    "metadata": {
 34 |     "tags": []
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "import requests\n",
 39 |     "from evalplus.data import get_human_eval_plus, write_jsonl\n",
 40 |     "\n",
 41 |     "problems = get_human_eval_plus()\n",
 42 |     "\n",
 43 |     "num_samples_per_task = 1\n",
 44 |     "len(list(problems.keys()))"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "id": "9de9eabb-5a58-4202-8730-82c5325fda99",
 51 |    "metadata": {
 52 |     "tags": []
 53 |    },
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "import time\n",
 57 |     "def run(prompt, seed, port = 5000):\n",
 58 |     "    while True:  # Keep trying until we break out\n",
 59 |     "        try:\n",
 60 |     "            result = openai.ChatCompletion.create(\n",
 61 |     "                model=\"gpt-4\",\n",
 62 |     "                # model=\"gpt-4\",\n",
 63 |     "                messages=[\n",
 64 |     "                    {\"role\": \"system\", \"content\": \"You are a helpful assistant. Please complete the following code snippet.\"},\n",
 65 |     "                    {\"role\": \"user\", \"content\": prompt},\n",
 66 |     "                ],\n",
 67 |     "                temperature=0.0,\n",
 68 |     "                max_tokens=500,\n",
 69 |     "                n=1\n",
 70 |     "            )\n",
 71 |     "\n",
 72 |     "            response = result['choices'][0][\"message\"][\"content\"]\n",
 73 |     "            return response\n",
 74 |     "        except Exception as e:  # If we hit the rate limit\n",
 75 |     "            print(e)\n",
 76 |     "            time.sleep(1)  # Wait for a second before retrying"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "id": "50b6101a-3aeb-4cdf-a95b-a08044f6c6e1",
 83 |    "metadata": {
 84 |     "tags": []
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "def get_function_body(code):\n",
 89 |     "    lines = code.splitlines()\n",
 90 |     "    function_lines = []\n",
 91 |     "    found_def = False\n",
 92 |     "\n",
 93 |     "    for line in lines:\n",
 94 |     "        # If 'def ' is found in a line, mark that we've entered the function\n",
 95 |     "        if 'def ' in line:\n",
 96 |     "            found_def = True\n",
 97 |     "            function_lines.append(line)\n",
 98 |     "            continue\n",
 99 |     "\n",
100 |     "        # If we've entered the function, stop including lines when we hit a line that contains text but does not start with a whitespace character\n",
101 |     "        if found_def and line.strip() != '' and not line.startswith((' ', '\\t')):\n",
102 |     "            break\n",
103 |     "\n",
104 |     "        # Always include the line in the function lines\n",
105 |     "        function_lines.append(line)\n",
106 |     "\n",
107 |     "    return '\\n'.join(function_lines)\n",
108 |     "\n",
109 |     "def cut_off_prefix(s):\n",
110 |     "    idx_from = s.find('from ')\n",
111 |     "    idx_def = s.find('def ')\n",
112 |     "    idx_import = s.find('import ')\n",
113 |     "\n",
114 |     "    # Check if none of the keywords were found\n",
115 |     "    if idx_from == -1 and idx_def == -1 and idx_import == -1:\n",
116 |     "        return s\n",
117 |     "\n",
118 |     "    # Prepare a list of found indices, excluding those where the keyword was not found\n",
119 |     "    indices = [idx for idx in [idx_from, idx_def, idx_import] if idx != -1]\n",
120 |     "\n",
121 |     "    # Return the string starting from the earliest found keyword\n",
122 |     "    return s[min(indices):]\n",
123 |     "    \n",
124 |     "def generate_one_completion(prompt_code, seed = -1, port = 5000, long_prompt = False, user_tag = \"HUMAN:\", assistant_tag = \"AI MODEL:\", system_prefix = \"\"):\n",
125 |     "    print(seed)\n",
126 |     "    # suffix = 'def'+prompt_code.split(\"def\")[1].split(\"(\")[0]+\"(\"\n",
127 |     "    suffix = \"\"\n",
128 |     "    if long_prompt:\n",
129 |     "        prompt = \"\"\"%s\n",
130 |     "%s\n",
131 |     "Complete the following Python code: \n",
132 |     "Notes: respond with the entire complete function definition\n",
133 |     "do not add any comments, be as concise in your code as possible\n",
134 |     "use only built-in libraries, assume no additional imports other than those provided (if any)\n",
135 |     "\n",
136 |     "code:\n",
137 |     "%s\n",
138 |     "\"\"\" % (system_prefix, user_tag, prompt_code)# , assistant_tag, suffix)\n",
139 |     "    else:\n",
140 |     "        prompt = \"\"\"```\n",
141 |     "%s\n",
142 |     "```\"\"\" % prompt_code\n",
143 |     "    \n",
144 |     "    code_result = run(prompt, seed = seed, port = port)\n",
145 |     "    # result = \"\\n\".join(code_result.split(\"def\")[-1].split(\"\\n\")[1:]).split(\"```\")[0]\n",
146 |     "    result = cut_off_prefix(code_result)\n",
147 |     "    result = get_function_body(result)\n",
148 |     "    print(\"####\", prompt, \"####\")\n",
149 |     "    print(\"***\", result, \"***\")\n",
150 |     "    return result\n",
151 |     "\n",
152 |     "import itertools\n",
153 |     "\n",
154 |     "def run_benchmark(filename, maxnum=-1, port=5000, long_prompt = False, user_tag = \"\", assistant_tag = \"\", system_prefix = \"\"):\n",
155 |     "    iterc = itertools.count()\n",
156 |     "    problem_keys = list(problems)[:maxnum]\n",
157 |     "    all_samples = []\n",
158 |     "\n",
159 |     "    for idx, task_id in enumerate(problem_keys):\n",
160 |     "        # Generate real completions\n",
161 |     "        for _ in range(num_samples_per_task):\n",
162 |     "            completion = generate_one_completion(problems[task_id][\"prompt\"], seed=next(iterc), port=port, long_prompt = long_prompt, user_tag = user_tag, assistant_tag = assistant_tag, system_prefix = system_prefix)\n",
163 |     "            all_samples.append(dict(task_id=task_id, completion=completion))\n",
164 |     "\n",
165 |     "        # Create a temporary copy of all_samples, to which we will append 'pass' completions\n",
166 |     "        temp_samples = all_samples.copy()\n",
167 |     "        \n",
168 |     "        # Append 'pass' completions for the rest of the tasks\n",
169 |     "        for remaining_task_id in list(problems)[idx+1:maxnum] + list(problems)[maxnum:]:\n",
170 |     "            for _ in range(num_samples_per_task):\n",
171 |     "                temp_samples.append(dict(task_id=remaining_task_id, completion=\"    pass\"))\n",
172 |     "        \n",
173 |     "        # Write all samples to the file, overwriting it completely\n",
174 |     "        write_jsonl(filename, temp_samples)\n"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "id": "8f2412b0-788f-4aba-9f73-cccc7a9db504",
181 |    "metadata": {
182 |     "scrolled": true,
183 |     "tags": []
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "run_benchmark(\"gpt4_final_500c_long.jsonl\", maxnum=-1, port=5000, long_prompt = True, user_tag = \"\", assistant_tag = \"\", system_prefix = \"\")"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "id": "d58c9606-0951-4eeb-8179-9abfebdfe771",
194 |    "metadata": {
195 |     "tags": []
196 |    },
197 |    "outputs": [],
198 |    "source": [
199 |     "print(\"done\")"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "id": "0c487464-c6f4-43b7-98af-2ad4aa2ec24c",
206 |    "metadata": {},
207 |    "outputs": [],
208 |    "source": []
209 |   }
210 |  ],
211 |  "metadata": {
212 |   "kernelspec": {
213 |    "display_name": "conda_pytorch_p310",
214 |    "language": "python",
215 |    "name": "conda_pytorch_p310"
216 |   },
217 |   "language_info": {
218 |    "codemirror_mode": {
219 |     "name": "ipython",
220 |     "version": 3
221 |    },
222 |    "file_extension": ".py",
223 |    "mimetype": "text/x-python",
224 |    "name": "python",
225 |    "nbconvert_exporter": "python",
226 |    "pygments_lexer": "ipython3",
227 |    "version": "3.10.10"
228 |   }
229 |  },
230 |  "nbformat": 4,
231 |  "nbformat_minor": 5
232 | }
233 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # OSS Language Model Programming Evaluations Using the HumanEval+ Python Benchmark
 2 | 
 3 | This repo is designed to evaluate OSS language models pulled from the HuggingFace Model Hub by running them against the standardized Python coding benchmark HumanEval+. The tool interacts with these models via the oobabooga text-generation-webui API, which serves as the interface for model interaction and generation of Python code completions.
 4 | 
 5 | ## Results:
 6 | 
 7 | ![HumanEval_v3-4](https://github.com/my-other-github-account/llm-humaneval-benchmarks/assets/82987814/a8f6e32c-5688-4eea-a7ca-3c8c48683cda)
 8 | 
 9 | 
10 | ## Functionality
11 | 
12 | - **Interfacing with HuggingFace Models:** The program leverages the text-generation-webui API to interact with language models pulled from the HuggingFace Model Hub. These models are used for generating Python code completions.
13 | 
14 | - **Code Completion:** Provides functionality for completing given Python code. Different prompt formatting options (short, medium, long) are provided to facilitate various completion contexts. The completion results are then returned.
15 | 
16 | - **Benchmarking with HumanEval+:** This tool is designed to run benchmarks using the HumanEval+ standardized Python coding tasks. The `run_benchmark` function generates multiple code completions for different tasks and logs the results in a JSONL file. This enables comprehensive evaluation of a model's performance across various coding tasks.
17 | 
18 | - **Server Management:** The tool includes functionality for starting a separate server process, which runs an AI model. This process serves as the backend that the script interacts with to generate code completions. The server's output is logged for debugging or record-keeping purposes.
19 | 
20 | ## Usage
21 | 
22 | You'll need to install evalplus for this (on top of having text-generation-webui installed)
23 | pip install evalplus
24 | 
25 | The notebooks show how to get this running on AWS SageMaker, run a benchmark, and evaluate benchmark results using the Eval+ evaluation CLI.
26 | 
27 | Basic usage looks like (you'll need to have the model already downloaded from Huggingface, which you can do easily in the usual text-generation-webui GUI) :
28 | 
29 | ```python
30 | from benchmark_manager import run_benchmark_workflow
31 | 
32 | # Vicuna prompt style:
33 | run_benchmark_workflow("TheBloke_vicuna-7B-1.1-GPTQ-4bit-128g", 6666, group_size=128,
34 |                            prompt_type="long", user_tag="USER:", 
35 |                            assistant_tag="ASSISTANT:", system_prefix="A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.", experiment_tag="vicuna")
36 |                            
37 | # Alpaca prompt style:
38 | run_benchmark_workflow("TheBloke_wizardLM-7B-GPTQ", 6666, group_size=128,
39 |                            prompt_type="long", user_tag="USER:", 
40 |                            assistant_tag="ASSISTANT:", system_prefix="", experiment_tag="alpaca")
41 | ```
42 | 
43 | More advanced usage if you want to customize your prompt more looks like:
44 | 
45 | ```python
46 | import os, signal
47 | from benchmark_utils import run_benchmark, run, extract_code
48 | from benchmark_manager import start_server
49 | 
50 | model_name = "TheBloke_wizardLM-7B-GPTQ"
51 | portnum = 6666
52 | group_size=128
53 | 
54 | server_process = start_server(model_name, portnum, group_size=group_size, 
55 |                               working_directory='text-generation-webui') # Make sure server.py is in working_directory
56 | 
57 | def my_completion(code, **kwargs):
58 |     prompt = "Complete this code:\n%s\nASSISTANT:" % code
59 |     results = extract_code(run(prompt, port=kwargs["port"]))
60 |     print(results)
61 |     return results
62 | 
63 | run_benchmark(model_name, port=portnum, custom_completion=my_completion, prompt_type = "custom")
64 | 
65 | os.kill(server_process.pid, signal.SIGTERM)
66 | ```
67 | 
68 | To run evalplus against your results (more advanced analysis is in 2_Parse_Results.ipynb)
69 | 
70 | ```python
71 | import subprocess
72 | 
73 | filename = "results/TheBloke_wizardLM-7B-GPTQ_custom.jsonl"
74 | 
75 | result = subprocess.run(["sudo", "/home/ec2-user/anaconda3/envs/pytorch_p39/bin/evalplus.evaluate",
76 |                 "--dataset", "humaneval", "--samples", filename, "--i-just-wanna-run"], 
77 |                         text=True, capture_output=True, check=False)
78 | 
79 | print(result.stdout, "\n", result.stderr)
80 | ```
81 | 
82 | ## References:
83 | 
84 | https://github.com/evalplus/evalplus
85 | 
86 | https://github.com/openai/human-eval
87 | 
88 | https://arxiv.org/abs/2107.03374
89 | 
90 | https://github.com/oobabooga/text-generation-webui
91 | 
92 | ---
93 | 


--------------------------------------------------------------------------------
/benchmark_manager.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | import os
  3 | import signal
  4 | import threading
  5 | from benchmark_utils import generate_one_completion, run_benchmark
  6 | 
  7 | def print_server_output(process):
  8 |     """
  9 |     This function prints the output of a subprocess in real-time.
 10 |     
 11 |     :param process: The subprocess whose output should be printed.
 12 |     """
 13 |     for line in process.stdout:
 14 |         print(line, end='')
 15 | 
 16 | 
 17 | def start_server(model_name, portnum, group_size=None, wbits=None, working_directory='text-generation-webui', public = False):
 18 |     """
 19 |     This function starts a server in a new process.
 20 |     
 21 |     :param model_name: The name of the model to be used by the server.
 22 |     :param portnum: The base port number to be used by the server.
 23 |     :param group_size: The group size to be used by the server.
 24 |     :param working_directory: The working directory in which the server script resides.
 25 |     :return: The subprocess in which the server is running.
 26 |     """
 27 |     # Create the base command list
 28 |     command = [
 29 |         'python', 'server.py', 
 30 |         '--api', 
 31 |         '--api-blocking-port', str(portnum), 
 32 |         '--api-streaming-port', str(portnum+1),  
 33 |         '--listen-port', str(portnum+2), 
 34 |         '--model_type', 'llama',
 35 |         '--trust-remote-code'
 36 |     ]
 37 |     
 38 |     if model_name is not None:
 39 |         command += ['--model', model_name]
 40 |     
 41 |     if wbits is not None:
 42 |         command += ['--wbits', str(wbits),]
 43 |         
 44 |     if public:
 45 |         command += ['--share']
 46 | 
 47 |     # If group_size is provided, append it to the command list
 48 |     if group_size is not None:
 49 |         command += ['--groupsize', str(group_size)]
 50 | 
 51 |     print("Starting server...")
 52 |     
 53 |     # Start the server in a new process
 54 |     server_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, cwd=working_directory)
 55 | 
 56 |     # Wait for "Starting API" to be printed before proceeding
 57 |     for line in iter(server_process.stdout.readline, ''):
 58 |         print(line, end='')  # Optionally print the server output
 59 |         if "Starting API" in line:
 60 |             break
 61 | 
 62 |     print("Server started!")
 63 |     
 64 |     return server_process
 65 | 
 66 | def block_log_server(server_process):
 67 |     for line in iter(server_process.stdout.readline, ''):
 68 |         print(line, end='')
 69 |         
 70 | def run_benchmark_workflow(model_name, portnum, group_size=None, wbits=4,
 71 |                            maxnum=-1, start_from=0, 
 72 |                            prompt_type="long", user_tag="### Instruction:", 
 73 |                            assistant_tag="### Response:", system_prefix="", experiment_tag="", 
 74 |                            working_directory='text-generation-webui', public=False):
 75 |     """
 76 |     This function manages the complete benchmark workflow, including starting the server, running the benchmark, and stopping the server.
 77 |     
 78 |     :param model_name: The name of the model to be used.
 79 |     :param portnum: The base port number to be used by the server.
 80 |     :param group_size: The group size to be used by the server.
 81 |     :param maxnum: The maximum number of items to process during the benchmark.
 82 |     :param prompt_type: The type of prompt to use during the benchmark.
 83 |     :param user_tag: The user tag to use during the benchmark.
 84 |     :param assistant_tag: The assistant tag to use during the benchmark.
 85 |     :param system_prefix: The system prefix to use during the benchmark.
 86 |     :param experiment_tag: The experiment tag to use during the benchmark.
 87 |     :param working_directory: The working directory in which the server script resides.
 88 |     """
 89 |     # Start the server
 90 |     server_process = start_server(model_name, portnum, wbits = wbits, group_size=group_size, working_directory=working_directory, public=public)
 91 | 
 92 |     # Create a separate thread to print the server output
 93 |     print_thread = threading.Thread(target=print_server_output, args=(server_process,))
 94 |     print_thread.start()
 95 | 
 96 |     # Run the benchmark
 97 |     run_benchmark(model_name, maxnum=maxnum, start_from=start_from,
 98 |               port=portnum, prompt_type=prompt_type, user_tag=user_tag,
 99 |               assistant_tag=assistant_tag, experiment_tag=experiment_tag, system_prefix=system_prefix)
100 | 
101 |     # Once the benchmark has finished running, terminate the server process
102 |     os.kill(server_process.pid, signal.SIGTERM)
103 |     
104 |     # Wait for the print_thread to finish
105 |     print_thread.join()
106 | 


--------------------------------------------------------------------------------
/benchmark_utils.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import itertools
  3 | from evalplus.data import get_human_eval_plus, write_jsonl
  4 | import os
  5 | import json
  6 | import sys
  7 | import asyncio
  8 | import websockets
  9 | 
 10 | # Load the problem data
 11 | problems = get_human_eval_plus()
 12 | num_samples_per_task = 1
 13 | 
 14 | HOST = 'localhost'
 15 | URI = f'http://{HOST}/api/v1/generate'
 16 | 
 17 | # Configure the request parameters
 18 | request = {
 19 |     'prompt': '',
 20 |     'max_new_tokens': 500,
 21 |     'do_sample': True,
 22 |     'temperature': 0.7,
 23 |     'top_p': 0.1,
 24 |     'typical_p': 1,
 25 |     'epsilon_cutoff': 0,  # In units of 1e-4
 26 |     'eta_cutoff': 0,  # In units of 1e-4
 27 |     'repetition_penalty': 1.18,
 28 |     'top_k': 40,
 29 |     'min_length': 0,
 30 |     'no_repeat_ngram_size': 0,
 31 |     'num_beams': 1,
 32 |     'penalty_alpha': 0,
 33 |     'length_penalty': 1,
 34 |     'early_stopping': False,
 35 |     'mirostat_mode': 0,
 36 |     'mirostat_tau': 5,
 37 |     'mirostat_eta': 0.1,
 38 |     'seed': '',
 39 |     'add_bos_token': True,
 40 |     'truncation_length': 2048,
 41 |     'ban_eos_token': False,
 42 |     'skip_special_tokens': True,
 43 |     'stopping_strings': []
 44 | }
 45 | 
 46 | import requests
 47 | from time import sleep
 48 | 
 49 | async def run_async(prompt, seed=-1, port = 443, deterministic = True, host='localhost'):
 50 |     # Set the prompt and seed for the current request
 51 |     request = {
 52 |         'prompt': prompt,
 53 |         'seed': seed,
 54 |         'max_new_tokens': 250,
 55 |         'truncation_length': 2048,
 56 |         'skip_special_tokens': True
 57 |     }
 58 | 
 59 |     if deterministic:
 60 |         request['do_sample'] = False
 61 |         request['temperature'] = 1
 62 |         request['top_p'] = 1
 63 |         request['top_k'] = 0
 64 |         request['repetition_penalty'] = 1
 65 |         request['num_beams'] = 1
 66 |         request['early_stopping'] = False
 67 | 
 68 |     URI = f'ws://{host}:{port}/api/v1/stream'
 69 | 
 70 |     async with websockets.connect(URI, ping_interval=None) as websocket:
 71 |         await websocket.send(json.dumps(request))
 72 | 
 73 |         response_text = prompt
 74 | 
 75 |         while True:
 76 |             incoming_data = await websocket.recv()
 77 |             incoming_data = json.loads(incoming_data)
 78 | 
 79 |             match incoming_data['event']:
 80 |                 case 'text_stream':
 81 |                     response_text += incoming_data['text']
 82 |                 case 'stream_end':
 83 |                     return response_text
 84 | 
 85 | def run_sync(prompt, seed=-1, port=443, deterministic=True, host='localhost'):
 86 |     return asyncio.run(run_async(prompt, seed, port, deterministic, host))
 87 | 
 88 | def run(prompt, seed=-1, port = 443, deterministic = True, host='localhost'):
 89 |     # Set the prompt and seed for the current request
 90 |     request = {}
 91 |     request['prompt'] = prompt
 92 |     request['seed'] = seed
 93 |     if deterministic:
 94 |         request['do_sample'] = False
 95 |         request['temperature'] = 1
 96 |         request['top_p'] = 1
 97 |         request['top_k'] = 0
 98 |         request['repetition_penalty'] = 1
 99 | 
100 |     # Set the URI for the request
101 |     URI = f'{host}:{port}/api/v1/generate'
102 |     
103 |     # Set up retry mechanism
104 |     retries = 2
105 |     backoff_factor = 0.1
106 | 
107 |     for i in range(retries):
108 |         try:
109 |             # Send the request and return the response
110 |             response = requests.post(URI, json=request, timeout=420)
111 |             response.raise_for_status()
112 |             return prompt + response.json()['results'][0]['text']
113 |         except Exception as err:
114 |             print(f"Attempt {i+1} failed. Error: {err}")
115 |             sleep(backoff_factor * (2 ** i))  # Exponential backoff
116 |         except requests.exceptions.RequestException as e:
117 |             # For any other request exception, raise immediately
118 |             raise e
119 |     raise Exception("All attempts failed")
120 | 
121 | def get_function_body(code):
122 |     # Extract the function body from the provided code
123 |     lines = code.splitlines()
124 |     function_count = 0
125 |     function_lines = []
126 |     
127 |     for line in lines:
128 |         if line.strip().startswith('def '):
129 |             function_count += 1
130 |             if function_count > 6:  # if more than 3 functions start, break the loop
131 |                 break
132 |         elif not line.startswith((' ', '\t')) and not line.strip() == '':
133 |             # If a non-empty line does not start with an indent, break the loop
134 |             if function_count > 0:
135 |                 break
136 |         if function_count > 0:
137 |             function_lines.append(line)
138 |             
139 |     return '\n'.join(function_lines)
140 | 
141 | def get_function_body_old(code):
142 |     lines = code.splitlines()
143 |     function_lines = []
144 |     found_def = False
145 | 
146 |     for line in lines:
147 |         # If 'def ' is found in a line, mark that we've entered the function
148 |         if 'def ' in line:
149 |             found_def = True
150 |             function_lines.append(line)
151 |             continue
152 | 
153 |         # If we've entered the function, stop including lines when we hit a line that contains text but does not start with a whitespace character
154 |         if found_def and line.strip() != '' and not line.startswith((' ', '\t')):
155 |             break
156 | 
157 |         # Always include the line in the function lines
158 |         function_lines.append(line)
159 | 
160 |     return '\n'.join(function_lines)
161 | 
162 | def cut_off_prefix_old(s):
163 |     idx_from = s.find('from ')
164 |     idx_def = s.find('def ')
165 |     idx_import = s.find('import ')
166 | 
167 |     # Check if none of the keywords were found
168 |     if idx_from == -1 and idx_def == -1 and idx_import == -1:
169 |         return s
170 | 
171 |     # Prepare a list of found indices, excluding those where the keyword was not found
172 |     indices = [idx for idx in [idx_from, idx_def, idx_import] if idx != -1]
173 | 
174 |     # Return the string starting from the earliest found keyword
175 |     return s[min(indices):]
176 | 
177 | def extract_code_old(code):
178 |     code = cut_off_prefix(code.split("```python")[-1])
179 |     code = get_function_body(code)
180 |     return code
181 | 
182 |     
183 | def cut_off_prefix(s):
184 |     # Cut off the prefix from the provided string
185 |     indices = [idx for keyword in ['from ', 'def ', 'import '] if (idx := s.find(keyword)) != -1]
186 |     return s[min(indices):] if indices else s
187 | 
188 | def extract_code(code, assistant_tag, use_old_parser = False):
189 |     if use_old_parser:
190 |         return extract_code_old(code)
191 |     
192 |     if assistant_tag == "":
193 |         try:
194 |             return get_function_body(cut_off_prefix(code.split("```python")[1]))
195 |         except:
196 |             return get_function_body(cut_off_prefix(code))
197 |     # print("***", code, "***")
198 |     try:
199 |         return get_function_body(cut_off_prefix(code.split(assistant_tag)[1].split("```python")[1]))
200 |     except:
201 |         return get_function_body(code.split(assistant_tag)[1])
202 | 
203 | def generate_one_completion(prompt_code, seed=-1, port=5000, prompt_template="", user_tag="HUMAN:", 
204 |                             assistant_tag="AI MODEL:", host="localhost", insert_func_stub=False, 
205 |                             deterministic=True, use_old_parser = False, use_async = False, **kwargs):
206 |     # Generate a completion for one prompt
207 |     suffix = ""
208 |     if insert_func_stub:
209 |         suffix = 'def'+prompt_code.split("def")[1].split("(")[0]+"("
210 |     prompt = prompt_template.format(PROMPT=prompt_code) + suffix
211 |     # print(prompt)
212 |     if use_async:
213 |         code_result = run_sync(prompt, seed=seed, port=port, deterministic=deterministic, host=host)
214 |     else:
215 |         code_result = run(prompt, seed=seed, port=port, deterministic=deterministic, host=host)
216 | 
217 |     if code_result == prompt:
218 |         raise Exception("Model doesn't appear to be loaded. Quitting.")
219 | 
220 |     to_ret = extract_code(code_result, assistant_tag=assistant_tag, use_old_parser = use_old_parser)
221 |     print(to_ret)
222 |     return to_ret
223 | 
224 | def run_benchmark(filename, prompt_template, maxnum=-1, start_from=0, port=5000, user_tag="", 
225 |                   assistant_tag="", host="localhost", insert_func_stub=False, 
226 |                   custom_completion=generate_one_completion, use_async = False, deterministic=True, use_old_parser = False, **kwargs):
227 | 
228 |     filepath = f"results/{filename}.jsonl"
229 |     print("Results will be written to:", filepath)
230 |     problem_keys = list(problems) if maxnum == -1 else list(problems)[:maxnum]
231 | 
232 |     all_samples, iterc = [], itertools.count()
233 | 
234 |     if not os.path.exists("results"):
235 |             os.makedirs("results")
236 | 
237 |     # If start_from is greater than 0, load existing data
238 |     if start_from > 0:
239 |         with open(filepath, 'r') as file:
240 |             existing_data = [json.loads(line) for line in file]
241 |             all_samples = existing_data[:start_from*num_samples_per_task]
242 |             last_task_id = all_samples[-1]['task_id'] if all_samples else None
243 |             start_it = problem_keys.index(last_task_id) + 1 if last_task_id else 0
244 |             problem_keys = problem_keys[start_it:]
245 | 
246 |     for idx, task_id in enumerate(problem_keys, start=start_from):
247 |         print("Processing Task", idx, "of", len(list(problems)))
248 |         for _ in range(num_samples_per_task):
249 |             # Prepare parameters for custom completion
250 |             params = {
251 |                 'task_id': task_id,
252 |                 'completion': custom_completion(
253 |                     problems[task_id]["prompt"],
254 |                     seed=next(iterc),
255 |                     port=port,
256 |                     prompt_template=prompt_template,
257 |                     user_tag=user_tag,
258 |                     assistant_tag=assistant_tag,
259 |                     insert_func_stub=insert_func_stub,
260 |                     deterministic=deterministic,
261 |                     host=host,
262 |                     use_old_parser = use_old_parser, 
263 |                     use_async = use_async,
264 |                     **kwargs
265 |                 )
266 |             }
267 |             all_samples.append(params)
268 | 
269 |         # Always add placeholders for remaining problems
270 |         remaining_keys = problem_keys[idx+1:]
271 |         placeholders = [dict(task_id=remaining_task_id, completion="    pass") 
272 |                         for remaining_task_id in remaining_keys
273 |                         for _ in range(num_samples_per_task)]
274 |         temp_samples = all_samples + placeholders
275 | 
276 |         # Write to the file, overwriting previous data
277 |         with open(filepath, 'w') as file:
278 |             for item in temp_samples:
279 |                 file.write(json.dumps(item) + '\n')
280 |         sys.stdout.flush()
281 |         sys.stderr.flush()
282 | 
283 |     print("Done writing to", filepath)
284 | 
285 | 


--------------------------------------------------------------------------------
/jsonl_examples/GeorgiaTechResearchInstitute_starcoder-gpteacher-code-instruct_ALPACA_INPUT_LONG_FIXED.jsonl:
--------------------------------------------------------------------------------
  1 | {"task_id": "HumanEval/0", "completion": "def has_close_elements(numbers: List[float], threshold: float) -> bool:\n    return len(set(numbers)) < len(numbers)"}
  2 | {"task_id": "HumanEval/1", "completion": "def separate_paren_groups(paren_string):\n    result = []\n    current_group = ''\n    for char in paren_string:\n        if char == '(':\n            current_group += char\n        elif char == ')':\n            current_group += char\n            result.append(current_group)\n            current_group = ''\n        else:\n            current_group += char\n    return result"}
  3 | {"task_id": "HumanEval/2", "completion": "def truncate_number(number: float) -> float:\n    integer_part = int(number)\n    decimal_part = number - integer_part\n    return decimal_part"}
  4 | {"task_id": "HumanEval/3", "completion": "def below_zero(operations: List[int]) -> bool:\n    current_balance = 0\n    for operation in operations:\n        current_balance += operation\n        if current_balance < 0:\n            return True\n    return False"}
  5 | {"task_id": "HumanEval/4", "completion": "def mean_absolute_deviation(numbers: List[float]) -> float:\n    \"\"\" For a given list of input numbers, calculate Mean Absolute Deviation\n    around the mean of this dataset.\n    Mean Absolute Deviation is the average absolute difference between each\n    element and a centerpoint (mean in this case):\n    MAD = average | x - x_mean |\n    >>> mean_absolute_deviation([1.0, 2.0, 3.0, 4.0])\n    1.0\n    \"\"\"\n    mean = sum(numbers) / len(numbers)\n    absolute_deviations = [abs(x - mean) for x in numbers]\n    return sum(absolute_deviations) / len(absolute_deviations)"}
  6 | {"task_id": "HumanEval/5", "completion": "def intersperse(numbers: List[int], delimeter: int) -> List[int]:\n    result = []\n    for num in numbers:\n        result.append(num)\n        result.append(delimeter)\n    if result:\n        result.pop()  # remove the last delimeter\n    return result"}
  7 | {"task_id": "HumanEval/6", "completion": "def parse_nested_parens(paren_string: str) -> List[int]:\n    max_nesting = []\n    for group in paren_string.split():\n        nesting = 0\n        for char in group:\n            if char == '(':\n                nesting += 1\n            elif char == ')':\n                nesting -= 1\n        max_nesting.append(nesting)\n    return max_nesting\n"}
  8 | {"task_id": "HumanEval/7", "completion": "def filter_by_substring(strings: List[str], substring: str) -> List[str]:\n    return [s for s in strings if substring in s]"}
  9 | {"task_id": "HumanEval/8", "completion": "def sum_product(numbers: List[int]) -> Tuple[int, int]:\n    total_sum = sum(numbers)\n    total_product = 1\n    for num in numbers:\n        total_product *= num\n    return (total_sum, total_product)"}
 10 | {"task_id": "HumanEval/9", "completion": "def rolling_max(numbers: List[int]) -> List[int]:\n    max_list = [numbers[0]]\n    for num in numbers[1:]:\n        max_list.append(max(num, max_list[-1]))\n    return max_list"}
 11 | {"task_id": "HumanEval/10", "completion": "def is_palindrome(string: str) -> bool:\n    \"\"\" Test if given string is a palindrome \"\"\"\n    return string == string[::-1]\n\n\ndef make_palindrome(string: str) -> str:\n    \"\"\" Find the shortest palindrome that begins with a supplied string.\n    Algorithm idea is simple:\n    - Find the longest postfix of supplied string that is a palindrome.\n    - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.\n    >>> make_palindrome('')\n    ''\n    >>> make_palindrome('cat')\n    'catac'\n    >>> make_palindrome('cata')\n    'catac'\n    \"\"\"\n    longest_palindrome = ''\n    for i in range(len(string)):\n        if is_palindrome(string[i:]):\n            longest_palindrome = string[i:]\n    return string + string[::-1][len(longest_palindrome):]"}
 12 | {"task_id": "HumanEval/11", "completion": "def string_xor(a: str, b: str) -> str:\n    result = ''\n    for i, j in zip(a, b):\n        if i == j:\n            result += '0'\n        else:\n            result += '1'\n    return result"}
 13 | {"task_id": "HumanEval/12", "completion": "def longest(strings: List[str]) -> Optional[str]:\n    if not strings:\n        return None\n    longest_string = strings[0]\n    for string in strings:\n        if len(string) > len(longest_string):\n            longest_string = string\n    return longest_string"}
 14 | {"task_id": "HumanEval/13", "completion": "def greatest_common_divisor(a: int, b: int) -> int:\n    if b == 0:\n        return a\n    else:\n        return greatest_common_divisor(b, a % b)"}
 15 | {"task_id": "HumanEval/14", "completion": "def all_prefixes(string):\n    return [string[:i] for i in range(1, len(string) + 1)]"}
 16 | {"task_id": "HumanEval/15", "completion": "def string_sequence(n: int) -> str:\n    return''.join(map(str, range(n + 1)))"}
 17 | {"task_id": "HumanEval/16", "completion": "def count_distinct_characters(string):\n    return len(set(string.lower()))\n"}
 18 | {"task_id": "HumanEval/17", "completion": "def parse_music(music_string):\n    notes = music_string.split()\n    beats = [4, 2, 1]\n    result = []\n\n    for note in notes:\n        if note == 'o':\n            result.append(beats[0])\n        elif note == 'o|':\n            result.append(beats[1])\n        elif note == '.|':\n            result.append(beats[2])\n\n    return result\n"}
 19 | {"task_id": "HumanEval/18", "completion": "def how_many_times(string, substring):\n    return string.count(substring) - 1 if substring in string else 0"}
 20 | {"task_id": "HumanEval/19", "completion": "def sort_numbers(numbers: str) -> str:\n    number_list = numbers.split()\n    number_list.sort()\n    return''.join(number_list)\n"}
 21 | {"task_id": "HumanEval/20", "completion": "def find_closest_elements(numbers: List[float]) -> Tuple[float, float]:\n    \"\"\" From a supplied list of numbers (of length at least two) select and return two that are the closest to each\n    other and return them in order (smaller number, larger number).\n    >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.2])\n    (2.0, 2.2)\n    >>> find_closest_elements([1.0, 2.0, 3.0, 4.0, 5.0, 2.0])\n    (2.0, 2.0)\n    \"\"\"\n    return sorted(set(numbers))[0:2]"}
 22 | {"task_id": "HumanEval/21", "completion": "def rescale_to_unit(numbers: List[float]) -> List[float]:\n    min_val = min(numbers)\n    max_val = max(numbers)\n    return [((val - min_val) / (max_val - min_val)) for val in numbers]"}
 23 | {"task_id": "HumanEval/22", "completion": "def filter_integers(values: List[Any]) -> List[int]:\n    return [value for value in values if isinstance(value, int)]"}
 24 | {"task_id": "HumanEval/23", "completion": "def strlen(string: str) -> int:\n    return len(string)"}
 25 | {"task_id": "HumanEval/24", "completion": "def largest_divisor(n):\n    for i in range(n - 1, 1, -1):\n        if n % i == 0:\n            return i\n\n"}
 26 | {"task_id": "HumanEval/25", "completion": "def factorize(n: int) -> List[int]:\n    factors = []\n    while n % 2 == 0:\n        factors.append(2)\n        n //= 2\n    for i in range(3, int(n**0.5)+1, 2):\n        while n % i == 0:\n            factors.append(i)\n            n //= i\n    if n > 2:\n        factors.append(n)\n    return factors"}
 27 | {"task_id": "HumanEval/26", "completion": "def remove_duplicates(numbers: List[int]) -> List[int]:\n    return list(set(numbers))"}
 28 | {"task_id": "HumanEval/27", "completion": "def flip_case(string):\n    return ''.join([c.upper() if c.islower() else c.lower() for c in string])"}
 29 | {"task_id": "HumanEval/28", "completion": "def concatenate(strings: List[str]) -> str:\n    return ''.join(strings)"}
 30 | {"task_id": "HumanEval/29", "completion": "def filter_by_prefix(strings: List[str], prefix: str) -> List[str]:\n    return [s for s in strings if s.startswith(prefix)]"}
 31 | {"task_id": "HumanEval/30", "completion": "def get_positive(l):\n    return [num for num in l if num > 0]"}
 32 | {"task_id": "HumanEval/31", "completion": "def is_prime(n):\n    if n <= 1:\n        return False\n    for i in range(2, int(n**0.5)+1):\n        if n % i == 0:\n            return False\n    return True"}
 33 | {"task_id": "HumanEval/32", "completion": "def poly(xs, x):\n    return sum([coeff * math.pow(x, i) for i, coeff in enumerate(xs)])\n\n\ndef find_zero(xs):\n    if len(xs) % 2!= 0:\n        raise ValueError('find_zero only takes list xs having even number of coefficients')\n    largest_coeff = max(xs)\n    if largest_coeff == 0:\n        raise ValueError('find_zero only takes list xs having largest non zero coefficient')\n    xs = xs.copy()\n    xs.reverse()\n    roots = []\n    for i in range(len(xs) // 2):\n        root = -xs[i] / xs[i + 1]\n        roots.append(root)\n    return roots"}
 34 | {"task_id": "HumanEval/33", "completion": "def sort_third(l):\n    result = l.copy()\n    for i in range(0, len(l), 3):\n        result[i:i+3] = sorted(result[i:i+3])\n    return result"}
 35 | {"task_id": "HumanEval/34", "completion": "def unique(l):\n    return sorted(set(l))"}
 36 | {"task_id": "HumanEval/35", "completion": "def max_element(l):\n    return max(l)"}
 37 | {"task_id": "HumanEval/36", "completion": "def fizz_buzz(n: int):\n    count = 0\n    for i in range(1, n):\n        if i % 11 == 0 or i % 13 == 0:\n            count += 1\n    return count"}
 38 | {"task_id": "HumanEval/37", "completion": "def sort_even(l):\n    even_sorted = sorted(l[::2])\n    return [val for pair in zip(l, even_sorted) for val in pair]\n"}
 39 | {"task_id": "HumanEval/38", "completion": "def encode_cyclic(s: str):\n    \"\"\"\n    returns encoded string by cycling groups of three characters.\n    \"\"\"\n    # split string to groups. Each of length 3.\n    groups = [s[(3 * i):min((3 * i + 3), len(s))] for i in range((len(s) + 2) // 3)]\n    # cycle elements in each group. Unless group has fewer elements than 3.\n    groups = [(group[1:] + group[0]) if len(group) == 3 else group for group in groups]\n    return \"\".join(groups)\n\n\ndef decode_cyclic(s: str):\n    \"\"\"\n    takes as input string encoded with encode_cyclic function. Returns decoded string.\n    \"\"\"\n    # split string to groups. Each of length 3.\n    groups = [s[(3 * i):min((3 * i + 3), len(s))] for i in range((len(s) + 2) // 3)]\n    # reverse elements in each group. Unless group has fewer elements than 3.\n    groups = [(group[::-1]) if len(group) == 3 else group for group in groups]\n    return \"\".join(groups)"}
 40 | {"task_id": "HumanEval/39", "completion": "def prime_fib(n: int):\n    primes = [True] * (n + 1)\n    primes[0], primes[1] = False, False\n    i = 2\n    while i * i <= n:\n        if primes[i]:\n            for j in range(i * i, n + 1, i):\n                primes[j] = False\n        i += 1\n    fib_numbers = [i for i in range(n + 1) if primes[i]]\n    return fib_numbers[n]"}
 41 | {"task_id": "HumanEval/40", "completion": "def triples_sum_to_zero(l):\n    return len(set(filter(lambda x: x == 0, l))) == 3"}
 42 | {"task_id": "HumanEval/41", "completion": "def car_race_collision(n: int):\n    return 0"}
 43 | {"task_id": "HumanEval/42", "completion": "def incr_list(l):\n    return [x + 1 for x in l]\n"}
 44 | {"task_id": "HumanEval/43", "completion": "def pairs_sum_to_zero(l):\n    return len(set(filter(lambda x: x == 0, l))) >= 2"}
 45 | {"task_id": "HumanEval/44", "completion": "def change_base(x: int, base: int):\n    return str(x).replace('0', '').zfill(base)"}
 46 | {"task_id": "HumanEval/45", "completion": "def triangle_area(a, h):\n    return 0.5 * a * h"}
 47 | {"task_id": "HumanEval/46", "completion": "def fib4(n: int):\n    sequence = [0, 0, 2, 0]\n    for i in range(4, n):\n        sequence.append(sequence[i-1] + sequence[i-2] + sequence[i-3] + sequence[i-4])\n    return sequence[n]"}
 48 | {"task_id": "HumanEval/47", "completion": "def median(l: list):\n    l.sort()\n    length = len(l)\n    if length % 2 == 0:\n        middle1 = l[length // 2 - 1]\n        middle2 = l[length // 2]\n        median_value = (middle1 + middle2) / 2\n    else:\n        median_value = l[length // 2]\n    return median_value"}
 49 | {"task_id": "HumanEval/48", "completion": "def is_palindrome(text: str):\n    return text == text[::-1]"}
 50 | {"task_id": "HumanEval/49", "completion": "def modp(n: int, p: int):\n    return 2**n % p"}
 51 | {"task_id": "HumanEval/50", "completion": "def encode_shift(s: str):\n    \"\"\"\n    returns encoded string by shifting every character by 5 in the alphabet.\n    \"\"\"\n    return \"\".join([chr(((ord(ch) + 5 - ord(\"a\")) % 26) + ord(\"a\") for ch in s])\n\ndef decode_shift(s: str):\n    \"\"\"\n    takes as input string encoded with encode_shift function. Returns decoded string.\n    \"\"\"\n    return \"\".join([chr(((ord(ch) - 5 - ord(\"a\")) % 26) + ord(\"a\") for ch in s])"}
 52 | {"task_id": "HumanEval/51", "completion": "def remove_vowels(text):\n    vowels = 'aeiouAEIOU'\n    return ''.join([char for char in text if char not in vowels])"}
 53 | {"task_id": "HumanEval/52", "completion": "def below_threshold(l: list, t: int):\n    return all(num < t for num in l)"}
 54 | {"task_id": "HumanEval/53", "completion": "def add(x, y):\n    return x + y\n"}
 55 | {"task_id": "HumanEval/54", "completion": "def same_chars(s0: str, s1: str):\n    return sorted(s0.lower()) == sorted(s1.lower())"}
 56 | {"task_id": "HumanEval/55", "completion": "def fib(n: int):\n    if n <= 1:\n        return n\n    else:\n        return fib(n - 1) + fib(n - 2)"}
 57 | {"task_id": "HumanEval/56", "completion": "def correct_bracketing(brackets):\n    stack = []\n    for char in brackets:\n        if char == \"<\":\n            stack.append(char)\n        elif char == \">\":\n            if not stack:\n                return False\n            stack.pop()\n    return not stack"}
 58 | {"task_id": "HumanEval/57", "completion": "def monotonic(l):\n    return all(x <= y for x, y in zip(l, l[1:])) or all(x >= y for x, y in zip(l, l[1:]))"}
 59 | {"task_id": "HumanEval/58", "completion": "def common(l1, l2):\n    return sorted(set(l1) & set(l2))\n"}
 60 | {"task_id": "HumanEval/59", "completion": "def largest_prime_factor(n: int):\n    i = 2\n    while i * i <= n:\n        if n % i:\n            i += 1\n        else:\n            n //= i\n    return n"}
 61 | {"task_id": "HumanEval/60", "completion": "def sum_to_n(n: int):\n    return (n * (n + 1)) // 2\n"}
 62 | {"task_id": "HumanEval/61", "completion": "def correct_bracketing(brackets):\n    stack = []\n    for char in brackets:\n        if char == '(':\n            stack.append('(')\n        elif char == ')':\n            if not stack:\n                return False\n            stack.pop()\n    return not stack"}
 63 | {"task_id": "HumanEval/62", "completion": "def derivative(xs):\n    return [x * i for i, x in enumerate(xs[1:], 1)]"}
 64 | {"task_id": "HumanEval/63", "completion": "def fibfib(n: int):\n    if n == 0:\n        return 0\n    elif n == 1:\n        return 0\n    elif n == 2:\n        return 1\n    else:\n        return fibfib(n-1) + fibfib(n-2) + fibfib(n-3)\n"}
 65 | {"task_id": "HumanEval/64", "completion": "def vowels_count(s):\n    vowels = 'aeiouAEIOU'\n    count = 0\n    for char in s:\n        if char in vowels:\n            count += 1\n    if s[-1] == 'y':\n        count += 1\n    return count"}
 66 | {"task_id": "HumanEval/65", "completion": "def circular_shift(x, shift):\n    num_digits = len(str(x))\n    if shift > num_digits:\n        return str(x)[::-1]\n    else:\n        return str(x)[num_digits - shift:]"}
 67 | {"task_id": "HumanEval/66", "completion": "def digitSum(s):\n    return sum(c.isupper() for c in s)"}
 68 | {"task_id": "HumanEval/67", "completion": "def fruit_distribution(s,n):\n    apples, oranges = s.split()\n    apples = int(apples)\n    oranges = int(oranges)\n    mangoes = n - apples - oranges\n    return mangoes\n"}
 69 | {"task_id": "HumanEval/68", "completion": "def pluck(arr):\n    if not arr:\n        return []\n    even_values = [node for node in arr if node % 2 == 0]\n    if not even_values:\n        return []\n    return [min(even_values), even_values.index(min(even_values))]"}
 70 | {"task_id": "HumanEval/69", "completion": "def search(lst):\n    freq_dict = {}\n    for num in lst:\n        freq_dict[num] = freq_dict.get(num, 0) + 1\n\n    for num in freq_dict:\n        if freq_dict[num] > num:\n            return num\n\n    return -1"}
 71 | {"task_id": "HumanEval/70", "completion": "def strange_sort_list(lst):\n    if not lst:\n        return lst\n\n    min_val = min(lst)\n    max_val = max(lst)\n    result = [min_val]\n\n    while result[-1]!= max_val:\n        result.append(max_val)\n        if max_val in lst:\n            lst.remove(max_val)\n\n    while result[-1]!= min_val:\n        result.append(min_val)\n        if min_val in lst:\n            lst.remove(min_val)\n\n    return result"}
 72 | {"task_id": "HumanEval/71", "completion": "def triangle_area(a, b, c):\n    '''\n    Given the lengths of the three sides of a triangle. Return the area of\n    the triangle rounded to 2 decimal points if the three sides form a valid triangle. \n    Otherwise return -1\n    Three sides make a valid triangle when the sum of any two sides is greater \n    than the third side.\n    Example:\n    triangle_area(3, 4, 5) == 6.00\n    triangle_area(1, 2, 10) == -1\n    '''\n    s = (a + b + c) / 2\n    area = (s * (s - a) * (s - b) * (s - c)) ** 0.5\n    return round(area, 2)"}
 73 | {"task_id": "HumanEval/72", "completion": "def will_it_fly(q, w):\n    return q == q[::-1] and sum(q) <= w"}
 74 | {"task_id": "HumanEval/73", "completion": "def smallest_change(arr):\n    n = len(arr)\n    i = 0\n    j = n - 1\n    count = 0\n\n    while i < j:\n        if arr[i]!= arr[j]:\n            arr[i], arr[j] = arr[j], arr[i]\n            count += 1\n        i += 1\n        j -= 1\n\n    return count"}
 75 | {"task_id": "HumanEval/74", "completion": "def total_match(lst1, lst2):\n    if len(lst1) < len(lst2):\n        return lst1\n    elif len(lst1) > len(lst2):\n        return lst2\n    else:\n        return lst1"}
 76 | {"task_id": "HumanEval/75", "completion": "def is_multiply_prime(a):\n    if a < 100:\n        primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]\n        for p in primes:\n            if a % p!= 0:\n                return False\n        return True\n    else:\n        return False"}
 77 | {"task_id": "HumanEval/76", "completion": "def is_simple_power(x, n):\n    return n**int(log(x, n)) == x"}
 78 | {"task_id": "HumanEval/77", "completion": "def iscube(a):\n    return a ** 3 >= 0"}
 79 | {"task_id": "HumanEval/78", "completion": "def hex_key(num):\n    primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]\n    count = 0\n    for char in num:\n        if char in '0123456789ABCDEF':\n            if int(char, 16) in primes:\n                count += 1\n    return count"}
 80 | {"task_id": "HumanEval/79", "completion": "def decimal_to_binary(decimal):\n    binary_string = '0' if decimal == 0 else ''\n    while decimal > 0:\n        binary_string = '1' + binary_string\n        decimal //= 2\n    return 'db' + binary_string + 'db'\n"}
 81 | {"task_id": "HumanEval/80", "completion": "def is_happy(s):\n    return len(s) >= 3 and len(s) % 3!= 0 and len(set(s[i:i+3] for i in range(0, len(s), 3))) == len(s) / 3"}
 82 | {"task_id": "HumanEval/81", "completion": "def numerical_letter_grade(grades):\n    gpa_to_letter_grade = {4.0: 'A+', 3.7: 'A', 3.3: 'A-', 3.0: 'B+', 2.7: 'B', 2.3: 'B-', 2.0: 'C+', 1.7: 'C', 1.3: 'C-', 1.0: 'D+', 0.7: 'D', 0.0: 'D-'}\n    return [gpa_to_letter_grade[gpa] for gpa in grades]\n"}
 83 | {"task_id": "HumanEval/82", "completion": "def prime_length(string):\n    if not string:\n        return False\n    n = len(string)\n    if n == 1:\n        return True\n    if n <= 3:\n        return False\n    if n % 2 == 0 or n % 3 == 0:\n        return False\n    i = 5\n    while i * i <= n:\n        if n % i == 0 or n % (i + 2) == 0:\n            return False\n        i += 6\n    return True"}
 84 | {"task_id": "HumanEval/83", "completion": "def starts_one_ends(n):\n    count = 0\n    for i in range(1, 10 ** n):\n        if str(i)[0] == '1' or str(i)[-1] == '1':\n            count += 1\n    return count"}
 85 | {"task_id": "HumanEval/84", "completion": "def solve(N):\n    total_sum = 0\n    while N > 0:\n        digit = N % 10\n        total_sum += digit\n        N //= 10\n    return bin(total_sum)\n"}
 86 | {"task_id": "HumanEval/85", "completion": "def add(lst):\n    even_sum = 0\n    for i, num in enumerate(lst):\n        if i % 2 == 0:\n            even_sum += num\n    return even_sum\n"}
 87 | {"task_id": "HumanEval/86", "completion": "def anti_shuffle(s):\n    words = s.split()\n    sorted_words =''.join(sorted(words))\n    return sorted_words\n"}
 88 | {"task_id": "HumanEval/87", "completion": "def get_row(lst, x):\n    result = []\n    for row_index, row in enumerate(lst):\n        for col_index, value in enumerate(row):\n            if value == x:\n                result.append((row_index, col_index))\n    result.sort()\n    return result"}
 89 | {"task_id": "HumanEval/88", "completion": "def sort_array(array):\n    if sum(array[0], array[-1]) % 2 == 0:\n        array.sort(reverse=True)\n    else:\n        array.sort()\n    return array"}
 90 | {"task_id": "HumanEval/89", "completion": "def encrypt(s):\n    alphabet = 'abcdefghijklmnopqrstuvwxyz'\n    encrypted = ''\n    for char in s:\n        if char in alphabet:\n            index = alphabet.index(char)\n            encrypted += alphabet[index + 2]\n        else:\n            encrypted += char\n    return encrypted"}
 91 | {"task_id": "HumanEval/90", "completion": "def next_smallest(lst):\n    if len(lst) < 2:\n        return None\n    return sorted(set(lst))[1]\n"}
 92 | {"task_id": "HumanEval/91", "completion": "def is_bored(S):\n    boredom_count = 0\n    for sentence in S.split('.'):\n        if sentence.startswith('I'):\n            boredom_count += 1\n    return boredom_count"}
 93 | {"task_id": "HumanEval/92", "completion": "def any_int(x, y, z):\n    return (x == (y + z)) and (isinstance(x, int) and isinstance(y, int) and isinstance(z, int))"}
 94 | {"task_id": "HumanEval/93", "completion": "def encode(message):\n    vowels = 'aeiouAEIOU'\n    result = ''\n    for char in message:\n        if char in vowels:\n            result += vowels[(vowels.index(char) + 2) % 26]\n        else:\n            result += char\n    return result"}
 95 | {"task_id": "HumanEval/94", "completion": "def skjkasdkd(lst):\n    primes = [x for x in lst if x > 1]\n    largest_prime = max(primes)\n    sum_of_digits = sum(int(x) for x in str(largest_prime))\n    return sum_of_digits\n"}
 96 | {"task_id": "HumanEval/95", "completion": "def check_dict_case(dict):\n    \"\"\"\n    Given a dictionary, return True if all keys are strings in lower \n    case or all keys are strings in upper case, else return False.\n    The function should return False is the given dictionary is empty.\n    Examples:\n    check_dict_case({\"a\":\"apple\", \"b\":\"banana\"}) should return True.\n    check_dict_case({\"a\":\"apple\", \"A\":\"banana\", \"B\":\"banana\"}) should return False.\n    check_dict_case({\"a\":\"apple\", 8:\"banana\", \"a\":\"apple\"}) should return False.\n    check_dict_case({\"Name\":\"John\", \"Age\":\"36\", \"City\":\"Houston\"}) should return False.\n    check_dict_case({\"STATE\":\"NC\", \"ZIP\":\"12345\" }) should return True.\n    \"\"\"\n    if not dict:\n        return False\n    lower_case = all(isinstance(key, str) and key.islower() for key in dict)\n    upper_case = all(isinstance(key, str) and key.isupper() for key in dict)\n    return lower_case or upper_case"}
 97 | {"task_id": "HumanEval/96", "completion": "def count_up_to(n):\n    primes = []\n    num = 2\n    while len(primes) < n:\n        if is_prime(num):\n            primes.append(num)\n        num += 1\n    return primes\n\ndef is_prime(num):\n    if num < 2:\n        return False\n    for i in range(2, num):\n        if num % i == 0:\n            return False\n    return True"}
 98 | {"task_id": "HumanEval/97", "completion": "def multiply(a, b):\n    return int(str(a)[-1]) * int(str(b)[-1])"}
 99 | {"task_id": "HumanEval/98", "completion": "def count_upper(s):\n    vowels = 'aeiouAEIOU'\n    count = 0\n    for i, char in enumerate(s):\n        if i % 2 == 0 and char in vowels:\n            count += 1\n    return count"}
100 | {"task_id": "HumanEval/99", "completion": "def closest_integer(value):\n    num = float(value)\n    if num < 0:\n        return int(num - 0.5)\n    else:\n        return int(num + 0.5)"}
101 | {"task_id": "HumanEval/100", "completion": "def make_a_pile(n):\n    pile = [n]\n    while n > 1:\n        n = n + 1 if n % 2 == 0 else n + 2\n        pile.append(n)\n    return pile"}
102 | {"task_id": "HumanEval/101", "completion": "def words_string(s):\n    words = s.split(', ')\n    return words\n"}
103 | {"task_id": "HumanEval/102", "completion": "def choose_num(x, y):\n    if x % 2!= 0 or y % 2!= 0:\n        return -1\n    return max(x, y)"}
104 | {"task_id": "HumanEval/103", "completion": "def rounded_avg(n, m):\n    if n > m:\n        return -1\n    total = 0\n    for i in range(n, m + 1):\n        total += i\n    avg = total / (m - n + 1)\n    return f\"0b{bin(int(avg))[2:]:0>4}\""}
105 | {"task_id": "HumanEval/104", "completion": "def unique_digits(x):\n    result = [num for num in x if not any(digit % 2 == 0 for digit in num)]\n    result.sort()\n    return result"}
106 | {"task_id": "HumanEval/105", "completion": "def by_length(arr):\n    \"\"\"\n    Given an array of integers, sort the integers that are between 1 and 9 inclusive,\n    reverse the resulting array, and then replace each digit by its corresponding name from\n    \"One\", \"Two\", \"Three\", \"Four\", \"Five\", \"Six\", \"Seven\", \"Eight\", \"Nine\".\n\n    For example:\n      arr = [2, 1, 1, 4, 5, 8, 2, 3]   \n            -> sort arr -> [1, 1, 2, 2, 3, 4, 5, 8] \n            -> reverse arr -> [8, 5, 4, 3, 2, 2, 1, 1]\n      return [\"Eight\", \"Five\", \"Four\", \"Three\", \"Two\", \"Two\", \"One\", \"One\"]\n    \n      If the array is empty, return an empty array:\n      arr = []\n      return []\n    \n      If the array has any strange number ignore it:\n      arr = [1, -1, 55] \n            -> sort arr -> [-1, 1, 55]\n            -> reverse arr -> [55, 1, -1]\n      return = ['One']\n    ```"}
107 | {"task_id": "HumanEval/106", "completion": "def f(n):\n    result = []\n    for i in range(1, n + 1):\n        if i % 2 == 0:\n            result.append(factorial(i))\n        else:\n            result.append(sum(range(1, i + 1)))\n    return result"}
108 | {"task_id": "HumanEval/107", "completion": "def even_odd_palindrome(n):\n    even = 0\n    odd = 0\n    for i in range(1, n + 1):\n        s = str(i)\n        if s == s[::-1]:\n            if i % 2 == 0:\n                even += 1\n            else:\n                odd += 1\n    return (even, odd)"}
109 | {"task_id": "HumanEval/108", "completion": "def count_nums(arr):\n    return sum(1 for num in arr if sum(abs(digit) for digit in num) > 0)"}
110 | {"task_id": "HumanEval/109", "completion": "def move_one_ball(arr):\n    if not arr:\n        return True\n    for i in range(len(arr) - 1):\n        if arr[i] > arr[i + 1]:\n            return False\n    return True"}
111 | {"task_id": "HumanEval/110", "completion": "def exchange(lst1, lst2):\n    even_list = [num for num in lst1 if num % 2 == 0]\n    if len(even_list) == len(lst1):\n        return 'YES'\n    else:\n        return 'NO'\n"}
112 | {"task_id": "HumanEval/111", "completion": "def histogram(test):\n    letter_count = {}\n    for letter in test.lower():\n        if letter in letter_count:\n            letter_count[letter] += 1\n        else:\n            letter_count[letter] = 1\n\n    max_count = max(letter_count.values())\n    result = {key: value for key, value in letter_count.items() if value == max_count}\n    return result"}
113 | {"task_id": "HumanEval/112", "completion": "def reverse_delete(s,c):\n    result = ''.join([char for char in s if char not in c])\n    return (result, result == result[::-1])"}
114 | {"task_id": "HumanEval/113", "completion": "def odd_count(lst):\n    output = []\n    for num in lst:\n        count = 0\n        for digit in num:\n            if int(digit) % 2!= 0:\n                count += 1\n        output.append(f\"the number of odd elements {count}n the string {num} of the input.\")\n    return output\n"}
115 | {"task_id": "HumanEval/114", "completion": "def minSubArraySum(nums):\n    if not nums:\n        return 0\n    min_sum = math.inf\n    cur_sum = 0\n    for num in nums:\n        cur_sum += num\n        min_sum = min(min_sum, cur_sum)\n    return min_sum"}
116 | {"task_id": "HumanEval/115", "completion": "def max_fill(grid, capacity):\n    import math\n    num_rows = len(grid)\n    num_cols = len(grid[0])\n    num_buckets = num_rows * num_cols // capacity\n    filled_buckets = sum(1 for row in grid for cell in row if cell)\n    return max(0, num_buckets - filled_buckets)"}
117 | {"task_id": "HumanEval/116", "completion": "def sort_array(arr):\n    return sorted(arr, key=lambda x: (x.bit_count(), x))"}
118 | {"task_id": "HumanEval/117", "completion": "def select_words(s, n):\n    words = s.split()\n    result = []\n    vowels = 'aeiouAEIOU'\n\n    for word in words:\n        consonants = 0\n        for char in word:\n            if char not in vowels:\n                consonants += 1\n        if consonants == n:\n            result.append(word)\n\n    return result"}
119 | {"task_id": "HumanEval/118", "completion": "def get_closest_vowel(word):\n    vowels = 'aeiouAEIOU'\n    for i in range(len(word) - 1, -1, -1):\n        if word[i] in vowels:\n            return word[i]\n    return ''"}
120 | {"task_id": "HumanEval/119", "completion": "def match_parens(lst):\n    stack = []\n    for char in lst[0]:\n        if char == '(':\n            stack.append('(')\n        elif not stack:\n            return 'No'\n        else:\n            stack.pop()\n    if stack:\n        return 'No'\n    return 'Yes'\n"}
121 | {"task_id": "HumanEval/120", "completion": "def maximum(arr, k):\n    return sorted(set(arr))[-k:]"}
122 | {"task_id": "HumanEval/121", "completion": "def solution(lst):\n    return sum(num for i, num in enumerate(lst) if i % 2 == 0 and num % 2!= 0)"}
123 | {"task_id": "HumanEval/122", "completion": "def add_elements(arr, k):\n    return sum(arr[:k])\n"}
124 | {"task_id": "HumanEval/123", "completion": "def get_odd_collatz(n):\n    sequence = [n]\n    while sequence[-1]!= 1:\n        if sequence[-1] % 2 == 0:\n            sequence.append(sequence[-1] // 2)\n        else:\n            sequence.append(3 * sequence[-1] + 1)\n    return sequence[:-1]"}
125 | {"task_id": "HumanEval/124", "completion": "def valid_date(date):\n    if not date:\n        return False\n    date_format = '%m-%d-%Y'\n    try:\n        datetime.strptime(date, date_format)\n    except ValueError:\n        return False\n    return True"}
126 | {"task_id": "HumanEval/125", "completion": "def split_words(txt):\n    words = txt.split()\n    if not words:\n        if ',' in txt:\n            words = txt.split(',')\n        else:\n            return len([c for c in txt.lower() if c.islower() and c.isalnum()]) % 2\n    return words"}
127 | {"task_id": "HumanEval/126", "completion": "def is_sorted(lst):\n    if len(lst) <= 1:\n        return True\n    for i in range(1, len(lst)):\n        if lst[i] < lst[i - 1]:\n            return False\n    return True"}
128 | {"task_id": "HumanEval/127", "completion": "def intersection(interval1, interval2):\n    start1, end1 = interval1\n    start2, end2 = interval2\n\n    # Find the intersection of the two intervals\n    start = max(start1, start2)\n    end = min(end1, end2)\n\n    # If the intersection is empty, return \"NO\"\n    if start > end:\n        return \"NO\"\n\n    # Find the length of the intersection\n    length = end - start + 1\n\n    # Check if the length is a prime number\n    if is_prime(length):\n        return \"YES\"\n    else:\n        return \"NO\"\n\ndef is_prime(n):\n    if n <= 1:\n        return False\n    for i in range(2, int(n**0.5)+1):\n        if n % i == 0:\n            return False\n    return True"}
129 | {"task_id": "HumanEval/128", "completion": "def prod_signs(arr):\n    if not arr:\n        return None\n    product = 1\n    sum_of_magnitudes = 0\n    for num in arr:\n        if num > 0:\n            product *= 1\n        elif num < 0:\n            product *= -1\n        else:\n            continue\n        sum_of_magnitudes += abs(num)\n    return sum_of_magnitudes * product"}
130 | {"task_id": "HumanEval/129", "completion": "def minPath(grid, k):\n    n = len(grid)\n    m = len(grid[0])\n    visited = [[False] * m for _ in range(n)]\n    min_path = []\n\n    def dfs(i, j):\n        if i < 0 or i >= n or j < 0 or j >= m:\n            return False\n        if visited[i][j]:\n            return False\n        visited[i][j] = True\n        min_path.append(grid[i][j])\n        if len(min_path) == k:\n            return True\n        return any(dfs(i + x, j + y) for x, y in [(1, 0), (-1, 0), (0, 1), (0, -1)])\n\n    dfs(0, 0)\n    return min_path"}
131 | {"task_id": "HumanEval/130", "completion": "def tri(n):\n    sequence = [0, 1, 1]\n    for i in range(3, n + 1):\n        sequence.append(sequence[i - 1] + sequence[i - 2] + sequence[i - 3])\n    return sequence[:n + 1]"}
132 | {"task_id": "HumanEval/131", "completion": "def digits(n):\n    product = 1\n    while n > 0:\n        digit = n % 10\n        if digit % 2!= 0:\n            product *= digit\n        n //= 10\n    return product"}
133 | {"task_id": "HumanEval/132", "completion": "def is_nested(string):\n    stack = []\n    for char in string:\n        if char == '[':\n            stack.append('(')\n        elif char == ']':\n            if not stack:\n                return False\n            if stack[-1]!= '(':\n                return False\n            stack.pop()\n    return not stack"}
134 | {"task_id": "HumanEval/133", "completion": "def sum_squares(lst):\n    return sum(int(ceil(x**2)) for x in lst)"}
135 | {"task_id": "HumanEval/134", "completion": "def check_if_last_char_is_a_letter(txt):\n    return txt and txt[-1].isalpha() and not txt[-1].isspace()"}
136 | {"task_id": "HumanEval/135", "completion": "def can_arrange(arr):\n    for i in range(1, len(arr)):\n        if arr[i] > arr[i-1]:\n            return i-1\n    return -1"}
137 | {"task_id": "HumanEval/136", "completion": "def largest_smallest_integers(lst):\n    if not lst:\n        return (None, None)\n    neg_max = max(x for x in lst if x < 0)\n    pos_min = min(x for x in lst if x > 0)\n    return (neg_max, pos_min)"}
138 | {"task_id": "HumanEval/137", "completion": "def compare_one(a, b):\n    if a == b:\n        return None\n    if isinstance(a, (int, float)) and isinstance(b, (int, float)):\n        return max(a, b)\n    if isinstance(a, str) and isinstance(b, str):\n        return max(a.replace(',', ''), b.replace(',', ''))\n    return max(a, b)"}
139 | {"task_id": "HumanEval/138", "completion": "def is_equal_to_sum_even(n):\n    return n % 2 == 0 and n % 4 == 0 and n % 6 == 0 and n % 8 == 0"}
140 | {"task_id": "HumanEval/139", "completion": "def special_factorial(n):\n    result = 1\n    for i in range(1, n + 1):\n        result *= i\n    return result"}
141 | {"task_id": "HumanEval/140", "completion": "def fix_spaces(text):\n    return text.replace(' ', '_').replace('  ', '-').strip('-')"}
142 | {"task_id": "HumanEval/141", "completion": "def file_name_check(file_name):\n    if len(file_name) > 3 or not file_name.endswith(('.txt', '.exe', '.dll')):\n        return 'No'\n    if '.' not in file_name:\n        return 'No'\n    before_dot, after_dot = file_name.split('.')\n    if not before_dot or not before_dot[0].isalpha():\n        return 'No'\n    if after_dot not in ('txt', 'exe', 'dll'):\n        return 'No'\n    return 'Yes'"}
143 | {"task_id": "HumanEval/142", "completion": "def sum_squares(lst):\n    return sum(x**2 if i % 3 == 0 else x**3 if i % 4 == 0 else x for i, x in enumerate(lst))"}
144 | {"task_id": "HumanEval/143", "completion": "def words_in_sentence(sentence):\n    words = sentence.split()\n    primes = [i for i in range(2, len(words) + 1) if is_prime(i)]\n    result =''.join([words[i] for i in primes])\n    return result\n\ndef is_prime(n):\n    if n <= 1:\n        return False\n    for i in range(2, int(n**0.5) + 1):\n        if n % i == 0:\n            return False\n    return True"}
145 | {"task_id": "HumanEval/144", "completion": "def simplify(x, n):\n    num, denom = x.split('/')\n    n_num, n_denom = n.split('/')\n    result = int(num) * int(n_denom) + int(n_num) * int(denom)\n    return result == int(num) * int(denom)"}
146 | {"task_id": "HumanEval/145", "completion": "def order_by_points(nums):\n    return sorted(nums, key=lambda x: sum(map(int, str(abs(x)))))"}
147 | {"task_id": "HumanEval/146", "completion": "def specialFilter(nums):\n    return sum(1 for num in nums if num > 10 and num % 10 in (1, 3, 5, 7, 9))\n"}
148 | {"task_id": "HumanEval/147", "completion": "def get_max_triples(n):\n    a = [i * i - i + 1 for i in range(1, n + 1)]\n    count = 0\n    for i in range(n):\n        for j in range(i + 1, n):\n            for k in range(j + 1, n):\n                if a[i] + a[j] + a[k] % 3 == 0:\n                    count += 1\n    return count\n"}
149 | {"task_id": "HumanEval/148", "completion": "def bf(planet1, planet2):\n    planets = ('Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune')\n    if planet1 not in planets or planet2 not in planets:\n        return ()\n    i1 = planets.index(planet1)\n    i2 = planets.index(planet2)\n    return tuple(planets[i] for i in range(i1, i2+1))"}
150 | {"task_id": "HumanEval/149", "completion": "def sorted_list_sum(lst):\n    result = [word for word in lst if len(word) % 2 == 0]\n    result.sort(key=len)\n    return result"}
151 | {"task_id": "HumanEval/150", "completion": "def x_or_y(n, x, y):\n    if n % 2 == 0:\n        return x\n    else:\n        return y"}
152 | {"task_id": "HumanEval/151", "completion": "def double_the_difference(lst):\n    return sum(x**2 for x in lst if x % 2!= 0) or 0"}
153 | {"task_id": "HumanEval/152", "completion": "def compare(scores, guesses):\n    result = []\n    for score, guess in zip(scores, guesses):\n        if score == guess:\n            result.append(0)\n        else:\n            result.append(abs(score - guess))\n    return result\n"}
154 | {"task_id": "HumanEval/153", "completion": "def Strongest_Extension(class_name, extensions):\n    strengths = []\n    for extension in extensions:\n        CAP = sum(1 for c in extension if c.isupper())\n        SM = sum(1 for c in extension if c.islower())\n        strengths.append((CAP - SM, extension))\n    strengths.sort(reverse=True)\n    return f'{class_name}.{strengths[0][1]}'\n"}
155 | {"task_id": "HumanEval/154", "completion": "def cycpattern_check(a, b):\n    return b in a or b in a[1:] or b in a[:-1]\n"}
156 | {"task_id": "HumanEval/155", "completion": "def even_odd_count(num):\n    count_even = 0\n    count_odd = 0\n    while num > 0:\n        digit = num % 10\n        if digit % 2 == 0:\n            count_even += 1\n        else:\n            count_odd += 1\n        num //= 10\n    return (count_even, count_odd)"}
157 | {"task_id": "HumanEval/156", "completion": "def int_to_mini_roman(num):\n    roman_numerals = ['I', 'V', 'X', 'L', 'C', 'D', 'M']\n    result = ''\n    while num > 0:\n        for i in range(len(roman_numerals) - 1, -1, -1):\n            if num >= roman_numerals[i]:\n                result += roman_numerals[i]\n                num -= roman_numerals[i]\n                break\n    return result.lower()"}
158 | {"task_id": "HumanEval/157", "completion": "def right_angle_triangle(a, b, c):\n    '''\n    Given the lengths of the three sides of a triangle. Return True if the three\n    sides form a right-angled triangle, False otherwise.\n    A right-angled triangle is a triangle in which one angle is right angle or \n    90 degree.\n    Example:\n    right_angle_triangle(3, 4, 5) == True\n    right_angle_triangle(1, 2, 3) == False\n    '''\n    return a**2 + b**2 == c**2"}
159 | {"task_id": "HumanEval/158", "completion": "def find_max(words):\n    max_word = ''\n    max_count = 0\n\n    for word in words:\n        count = len(set(word.lower()))\n        if count > max_count:\n            max_count = count\n            max_word = word\n\n    return max_word\n"}
160 | {"task_id": "HumanEval/159", "completion": "def eat(number, need, remaining):\n    if need <= remaining:\n        return [number + need, remaining - need]\n    else:\n        return [number + remaining, 0]"}
161 | {"task_id": "HumanEval/160", "completion": "def do_algebra(operator, operand):\n    expression = ''.join(map(str, operator))\n    for i, num in enumerate(operand):\n        expression = expression.replace(f'@{i}', str(num))\n    return eval(expression)\n"}
162 | {"task_id": "HumanEval/161", "completion": "def solve(s):\n    result = ''\n    for char in s:\n        if char.isalpha():\n            result += char.upper() if char.islower() else char.lower()\n        else:\n            result += char\n    return result"}
163 | {"task_id": "HumanEval/162", "completion": "def string_to_md5(text):\n    if text == '':\n        return None\n    return hashlib.md5(text.encode('utf-8')).hexdigest()"}
164 | {"task_id": "HumanEval/163", "completion": "def generate_integers(a, b):\n    even_digits = []\n    current_number = a\n    while current_number <= b:\n        if current_number % 2 == 0:\n            even_digits.append(current_number)\n        current_number += 1\n    return even_digits"}
165 | 


--------------------------------------------------------------------------------
/jsonl_examples/gpt3_final_500c_long.jsonl:
--------------------------------------------------------------------------------
  1 | {"task_id": "HumanEval/0", "completion": "from typing import List\n\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n    for i in range(len(numbers)):\n        for j in range(i+1, len(numbers)):\n            if abs(numbers[i] - numbers[j]) < threshold:\n                return True\n    return False"}
  2 | {"task_id": "HumanEval/1", "completion": "from typing import List\n\n\ndef separate_paren_groups(paren_string: str) -> List[str]:\n    stack = []\n    result = []\n    current = ''\n    for char in paren_string:\n        if char == '(':\n            stack.append(char)\n        elif char == ')':\n            stack.pop()\n        current += char\n        if not stack:\n            result.append(current.replace(' ', ''))\n            current = ''\n    return result"}
  3 | {"task_id": "HumanEval/2", "completion": "def truncate_number(number: float) -> float:\n    return number - int(number)"}
  4 | {"task_id": "HumanEval/3", "completion": "from typing import List\n\n\ndef below_zero(operations: List[int]) -> bool:\n    balance = 0\n    for operation in operations:\n        balance += operation\n        if balance < 0:\n            return True\n    return False"}
  5 | {"task_id": "HumanEval/4", "completion": "from typing import List\n\n\ndef mean_absolute_deviation(numbers: List[float]) -> float:\n    mean = sum(numbers) / len(numbers)\n    absolute_deviations = [abs(num - mean) for num in numbers]\n    return sum(absolute_deviations) / len(absolute_deviations)"}
  6 | {"task_id": "HumanEval/5", "completion": "from typing import List\n\n\ndef intersperse(numbers: List[int], delimeter: int) -> List[int]:\n    return [x for pair in zip(numbers, [delimeter] * (len(numbers) - 1) + [0]) for x in pair]"}
  7 | {"task_id": "HumanEval/6", "completion": "from typing import List\n\n\ndef parse_nested_parens(paren_string: str) -> List[int]:\n    return [max([paren_string.count('(' , i, j) - paren_string.count(')' , i, j) for i in range(len(paren_string))]) for j in range(len(paren_string)) if paren_string[j] == ' ']"}
  8 | {"task_id": "HumanEval/7", "completion": "from typing import List\n\n\ndef filter_by_substring(strings: List[str], substring: str) -> List[str]:\n    return [s for s in strings if substring in s]"}
  9 | {"task_id": "HumanEval/8", "completion": "from typing import List, Tuple\n\n\ndef sum_product(numbers: List[int]) -> Tuple[int, int]:\n    s = sum(numbers)\n    p = 1\n    for n in numbers:\n        p *= n\n    return (s, p) if numbers else (0, 1)"}
 10 | {"task_id": "HumanEval/9", "completion": "from typing import List, Tuple\n\n\ndef rolling_max(numbers: List[int]) -> List[int]:\n    max_list = []\n    current_max = float('-inf')\n    for num in numbers:\n        current_max = max(current_max, num)\n        max_list.append(current_max)\n    return max_list"}
 11 | {"task_id": "HumanEval/10", "completion": "def is_palindrome(string: str) -> bool:\n    return string == string[::-1]\n\ndef make_palindrome(string: str) -> str:\n    for i in range(len(string), -1, -1):\n        if is_palindrome(string[:i]):\n            return string + string[:i][::-1]"}
 12 | {"task_id": "HumanEval/11", "completion": "from typing import List\n\n\ndef string_xor(a: str, b: str) -> str:\n    return ''.join(str(int(x) ^ int(y)) for x, y in zip(a, b))"}
 13 | {"task_id": "HumanEval/12", "completion": "from typing import List, Optional\n\n\ndef longest(strings: List[str]) -> Optional[str]:\n    if not strings:\n        return None\n    return max(strings, key=len)"}
 14 | {"task_id": "HumanEval/13", "completion": "def greatest_common_divisor(a: int, b: int) -> int:\n    while b:\n        a, b = b, a % b\n    return a"}
 15 | {"task_id": "HumanEval/14", "completion": "from typing import List\n\n\ndef all_prefixes(string: str) -> List[str]:\n    return [string[:i+1] for i in range(len(string))]"}
 16 | {"task_id": "HumanEval/15", "completion": "def string_sequence(n: int) -> str:\n    return ' '.join(str(i) for i in range(n+1))"}
 17 | {"task_id": "HumanEval/16", "completion": "def count_distinct_characters(string: str) -> int:\n    return len(set(string.lower()))"}
 18 | {"task_id": "HumanEval/17", "completion": "from typing import List\n\n\ndef parse_music(music_string: str) -> List[int]:\n    notes = {'o': 4, 'o|': 2, '.|': 1}\n    return [notes[note] for note in music_string.split()]"}
 19 | {"task_id": "HumanEval/18", "completion": "def how_many_times(string: str, substring: str) -> int:\n    return sum(1 for i in range(len(string)-len(substring)+1) if string[i:i+len(substring)] == substring)"}
 20 | {"task_id": "HumanEval/19", "completion": "from typing import List\n\n\ndef sort_numbers(numbers: str) -> str:\n    num_dict = {'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4, 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9}\n    num_list = numbers.split()\n    num_list.sort(key=lambda x: num_dict[x])\n    return ' '.join(num_list)"}
 21 | {"task_id": "HumanEval/20", "completion": "from typing import List, Tuple\n\n\ndef find_closest_elements(numbers: List[float]) -> Tuple[float, float]:\n    numbers.sort()\n    min_diff = float('inf')\n    closest_pair = None\n    for i in range(len(numbers)-1):\n        diff = numbers[i+1] - numbers[i]\n        if diff < min_diff:\n            min_diff = diff\n            closest_pair = (numbers[i], numbers[i+1])\n    return closest_pair"}
 22 | {"task_id": "HumanEval/21", "completion": "from typing import List\n\n\ndef rescale_to_unit(numbers: List[float]) -> List[float]:\n    min_num = min(numbers)\n    max_num = max(numbers)\n    return [(num - min_num) / (max_num - min_num) for num in numbers]"}
 23 | {"task_id": "HumanEval/22", "completion": "from typing import List, Any\n\n\ndef filter_integers(values: List[Any]) -> List[int]:\n    return [value for value in values if isinstance(value, int)]"}
 24 | {"task_id": "HumanEval/23", "completion": "def strlen(string: str) -> int:\n    return len(string)"}
 25 | {"task_id": "HumanEval/24", "completion": "def largest_divisor(n: int) -> int:\n    for i in range(n-1, 0, -1):\n        if n % i == 0:\n            return i"}
 26 | {"task_id": "HumanEval/25", "completion": "from typing import List\n\n\ndef factorize(n: int) -> List[int]:\n    factors = []\n    i = 2\n    while i * i <= n:\n        if n % i:\n            i += 1\n        else:\n            n //= i\n            factors.append(i)\n    if n > 1:\n        factors.append(n)\n    return factors"}
 27 | {"task_id": "HumanEval/26", "completion": "from typing import List\n\n\ndef remove_duplicates(numbers: List[int]) -> List[int]:\n    return list(dict.fromkeys(numbers))"}
 28 | {"task_id": "HumanEval/27", "completion": "def flip_case(string: str) -> str:\n    return string.swapcase()"}
 29 | {"task_id": "HumanEval/28", "completion": "from typing import List\n\n\ndef concatenate(strings: List[str]) -> str:\n    return ''.join(strings)"}
 30 | {"task_id": "HumanEval/29", "completion": "from typing import List\n\n\ndef filter_by_prefix(strings: List[str], prefix: str) -> List[str]:\n    return [s for s in strings if s.startswith(prefix)]"}
 31 | {"task_id": "HumanEval/30", "completion": "def get_positive(l: list):\n    return [num for num in l if num > 0]"}
 32 | {"task_id": "HumanEval/31", "completion": "def is_prime(n):\n    if n < 2:\n        return False\n    for i in range(2, int(n**0.5) + 1):\n        if n % i == 0:\n            return False\n    return True"}
 33 | {"task_id": "HumanEval/32", "completion": "import math\n\n\ndef poly(xs: list, x: float):\n    return sum([coeff * math.pow(x, i) for i, coeff in enumerate(xs)])\n\n\ndef find_zero(xs: list):\n    n = len(xs)\n    if n % 2 != 0 or xs[-1] == 0:\n        raise ValueError(\"Invalid input\")\n    a = xs[-1]\n    b = xs[-2]\n    for i in range(n - 2):\n        if i % 2 == 0:\n            a += xs[i]\n        else:\n            b += xs[i]\n    return -b / (2 * a)"}
 34 | {"task_id": "HumanEval/33", "completion": "def sort_third(l: list):\n    return [sorted(l[i:i+3]) if i%3 == 0 else l[i] for i in range(len(l))]"}
 35 | {"task_id": "HumanEval/34", "completion": "def unique(l: list):\n    return sorted(list(set(l)))"}
 36 | {"task_id": "HumanEval/35", "completion": "def max_element(l: list):\n    return max(l)"}
 37 | {"task_id": "HumanEval/36", "completion": "def fizz_buzz(n: int):\n    return sum(str(i).count('7') for i in range(n) if i % 11 == 0 or i % 13 == 0)"}
 38 | {"task_id": "HumanEval/37", "completion": "def sort_even(l: list):\n    even_indices = [i for i in range(len(l)) if i % 2 == 0]\n    even_values = [l[i] for i in even_indices]\n    sorted_even_values = sorted(even_values)\n    l_prime = l.copy()\n    for i in even_indices:\n        l_prime[i] = sorted_even_values.pop(0)\n    return l_prime"}
 39 | {"task_id": "HumanEval/38", "completion": "def decode_cyclic(s: str):\n    \"\"\"\n    takes as input string encoded with encode_cyclic function. Returns decoded string.\n    \"\"\"\n    # split string to groups. Each of length 3.\n    groups = [s[(3 * i):min((3 * i + 3), len(s))] for i in range((len(s) + 2) // 3)]\n    # cycle elements in each group. Unless group has fewer elements than 3.\n    groups = [(group[-1] + group[:-1]) if len(group) == 3 else group for group in groups]\n    return \"\".join(groups)"}
 40 | {"task_id": "HumanEval/39", "completion": "def prime_fib(n: int):\n    def is_prime(num):\n        if num < 2:\n            return False\n        for i in range(2, int(num ** 0.5) + 1):\n            if num % i == 0:\n                return False\n        return True\n\n    def fibonacci(num):\n        if num == 0:\n            return 0\n        elif num == 1:\n            return 1\n        else:\n            return fibonacci(num - 1) + fibonacci(num - 2)\n\n    count = 0\n    num = 2\n    while count < n:\n        if is_prime(fibonacci(num)):\n            count += 1\n        num += 1\n    return fibonacci(num - 1)"}
 41 | {"task_id": "HumanEval/40", "completion": "def triples_sum_to_zero(l: list):\n    return any(sum([l[i], l[j], l[k]]) == 0 for i in range(len(l)) for j in range(i+1, len(l)) for k in range(j+1, len(l)))"}
 42 | {"task_id": "HumanEval/41", "completion": "def car_race_collision(n: int):\n    return n*(n-1)//2"}
 43 | {"task_id": "HumanEval/42", "completion": "def incr_list(l: list):\n    return [x+1 for x in l]"}
 44 | {"task_id": "HumanEval/43", "completion": "def pairs_sum_to_zero(l):\n    return any(-x in l for x in l if x != 0)"}
 45 | {"task_id": "HumanEval/44", "completion": "def change_base(x: int, base: int):\n    if x == 0:\n        return '0'\n    digits = []\n    while x > 0:\n        digits.append(str(x % base))\n        x //= base\n    return ''.join(digits[::-1])"}
 46 | {"task_id": "HumanEval/45", "completion": "def triangle_area(a, h):\n    return 0.5 * a * h"}
 47 | {"task_id": "HumanEval/46", "completion": "def fib4(n: int):\n    if n == 0:\n        return 0\n    elif n == 1:\n        return 0\n    elif n == 2:\n        return 2\n    else:\n        a, b, c, d = 0, 0, 2, 0\n        for i in range(3, n+1):\n            d = a + b + c + d\n            a, b, c = b, c, d\n        return d"}
 48 | {"task_id": "HumanEval/47", "completion": "def median(l: list):\n    l.sort()\n    n = len(l)\n    if n % 2 == 0:\n        return (l[n//2-1] + l[n//2])/2\n    else:\n        return l[n//2]"}
 49 | {"task_id": "HumanEval/48", "completion": "def is_palindrome(text: str):\n    return text == text[::-1]"}
 50 | {"task_id": "HumanEval/49", "completion": "def modp(n: int, p: int):\n    return pow(2, n, p) if n else 1"}
 51 | {"task_id": "HumanEval/50", "completion": "def decode_shift(s: str):\n    \"\"\"\n    takes as input string encoded with encode_shift function. Returns decoded string.\n    \"\"\"\n    return \"\".join([chr(((ord(ch) - 5 - ord(\"a\")) % 26) + ord(\"a\")) for ch in s])"}
 52 | {"task_id": "HumanEval/51", "completion": "def remove_vowels(text):\n    vowels = \"aeiouAEIOU\"\n    return \"\".join([char for char in text if char not in vowels])"}
 53 | {"task_id": "HumanEval/52", "completion": "def below_threshold(l: list, t: int) -> bool:\n    return all(num < t for num in l)"}
 54 | {"task_id": "HumanEval/53", "completion": "def add(x: int, y: int) -> int:\n    return x + y"}
 55 | {"task_id": "HumanEval/54", "completion": "def same_chars(s0: str, s1: str):\n    return sorted(s0) == sorted(s1)"}
 56 | {"task_id": "HumanEval/55", "completion": "def fib(n: int):\n    if n <= 1:\n        return n\n    else:\n        return fib(n-1) + fib(n-2)"}
 57 | {"task_id": "HumanEval/56", "completion": "def correct_bracketing(brackets: str):\n    stack = []\n    for bracket in brackets:\n        if bracket == \"<\":\n            stack.append(bracket)\n        elif bracket == \">\":\n            if not stack:\n                return False\n            stack.pop()\n    return not stack"}
 58 | {"task_id": "HumanEval/57", "completion": "def monotonic(l: list):\n    return all(l[i] <= l[i+1] for i in range(len(l)-1)) or all(l[i] >= l[i+1] for i in range(len(l)-1))"}
 59 | {"task_id": "HumanEval/58", "completion": "def common(l1: list, l2: list):\n    return sorted(list(set(l1) & set(l2)))"}
 60 | {"task_id": "HumanEval/59", "completion": "def largest_prime_factor(n: int):\n    i = 2\n    while i * i <= n:\n        if n % i:\n            i += 1\n        else:\n            n //= i\n    return n"}
 61 | {"task_id": "HumanEval/60", "completion": "def sum_to_n(n: int):\n    return sum(range(1, n+1))"}
 62 | {"task_id": "HumanEval/61", "completion": "def correct_bracketing(brackets: str):\n    stack = []\n    for bracket in brackets:\n        if bracket == \"(\":\n            stack.append(bracket)\n        elif bracket == \")\":\n            if not stack:\n                return False\n            stack.pop()\n    return not stack"}
 63 | {"task_id": "HumanEval/62", "completion": "def derivative(xs: list):\n    return [xs[i] * i for i in range(1, len(xs))]"}
 64 | {"task_id": "HumanEval/63", "completion": "def fibfib(n: int):\n    if n == 0:\n        return 0\n    elif n == 1 or n == 2:\n        return 1\n    else:\n        a, b, c = 1, 1, 0\n        for i in range(3, n+1):\n            c = a + b + c\n            a, b = b, c\n        return c"}
 65 | {"task_id": "HumanEval/64", "completion": "def vowels_count(s):\n    vowels = ['a', 'e', 'i', 'o', 'u']\n    count = 0\n    s = s.lower()\n    if s[-1] == 'y':\n        vowels.append('y')\n    for char in s:\n        if char in vowels:\n            count += 1\n    return count"}
 66 | {"task_id": "HumanEval/65", "completion": "def circular_shift(x, shift):\n    x_str = str(x)\n    if shift > len(x_str):\n        return x_str[::-1]\n    else:\n        return x_str[-shift:] + x_str[:-shift]"}
 67 | {"task_id": "HumanEval/66", "completion": "def digitSum(s):\n    return sum(ord(c) for c in s if c.isupper())"}
 68 | {"task_id": "HumanEval/67", "completion": "def fruit_distribution(s, n):\n    fruits = s.split()\n    apples = int(fruits[0])\n    oranges = int(fruits[3])\n    mangoes = n - apples - oranges\n    return mangoes"}
 69 | {"task_id": "HumanEval/68", "completion": "def pluck(arr):\n    even_nodes = [node for node in arr if node % 2 == 0]\n    if not even_nodes:\n        return []\n    min_even = min(even_nodes)\n    min_even_index = arr.index(min_even)\n    return [min_even, min_even_index]"}
 70 | {"task_id": "HumanEval/69", "completion": "def search(lst):\n    for i in sorted(set(lst), reverse=True):\n        if lst.count(i) >= i > 0:\n            return i\n    return -1"}
 71 | {"task_id": "HumanEval/70", "completion": "def strange_sort_list(lst):\n    result = []\n    while lst:\n        result.append(min(lst))\n        lst.remove(min(lst))\n        if lst:\n            result.append(max(lst))\n            lst.remove(max(lst))\n    return result"}
 72 | {"task_id": "HumanEval/71", "completion": "def triangle_area(a, b, c):\n    s = (a + b + c) / 2\n    area = (s * (s - a) * (s - b) * (s - c)) ** 0.5 if a + b > c and a + c > b and b + c > a else -1\n    return round(area, 2)"}
 73 | {"task_id": "HumanEval/72", "completion": "def will_it_fly(q,w):\n    return q == q[::-1] and sum(q) <= w"}
 74 | {"task_id": "HumanEval/73", "completion": "def smallest_change(arr):\n    count = 0\n    for i in range(len(arr)//2):\n        if arr[i] != arr[-i-1]:\n            count += 1\n    return count"}
 75 | {"task_id": "HumanEval/74", "completion": "def total_match(lst1, lst2):\n    return lst1 if sum(len(s) for s in lst1) <= sum(len(s) for s in lst2) else lst2"}
 76 | {"task_id": "HumanEval/75", "completion": "def is_multiply_prime(a):\n    primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]\n    count = 0\n    for prime in primes:\n        if a % prime == 0:\n            count += 1\n            if count == 3:\n                return True\n    return False"}
 77 | {"task_id": "HumanEval/76", "completion": "def is_simple_power(x, n):\n    return x > 0 and n > 0 and (x == 1 or n != 1 and x % n == 0 and is_simple_power(x // n, n))"}
 78 | {"task_id": "HumanEval/77", "completion": "def iscube(a):\n    return (a**(1/3)).is_integer()"}
 79 | {"task_id": "HumanEval/78", "completion": "def hex_key(num):\n    primes = ['2', '3', '5', '7', 'B', 'D']\n    count = 0\n    for digit in num:\n        if digit in primes:\n            count += 1\n    return count"}
 80 | {"task_id": "HumanEval/79", "completion": "def decimal_to_binary(decimal):\n    binary = bin(decimal)[2:]\n    return f\"db{binary}db\""}
 81 | {"task_id": "HumanEval/80", "completion": "def is_happy(s):\n    return len(s) >= 3 and all(s[i] != s[i+1] and s[i] != s[i+2] and s[i+1] != s[i+2] for i in range(len(s)-2))"}
 82 | {"task_id": "HumanEval/81", "completion": "def numerical_letter_grade(grades):\n    return ['A+' if gpa == 4.0 else 'A' if gpa > 3.7 else 'A-' if gpa > 3.3 else 'B+' if gpa > 3.0 else 'B' if gpa > 2.7 else 'B-' if gpa > 2.3 else 'C+' if gpa > 2.0 else 'C' if gpa > 1.7 else 'C-' if gpa > 1.3 else 'D+' if gpa > 1.0 else 'D' if gpa > 0.7 else 'D-' if gpa > 0.0 else 'E' for gpa in grades]"}
 83 | {"task_id": "HumanEval/82", "completion": "import math\n\ndef prime_length(string):\n    return all(string.count(char) == 1 for char in string) and all(string.count(char) != 0 for char in string) and all(string.count(char) != 2 for char in string) and all(string.count(char) != 3 for char in string) and all(string.count(char) != 5 for char in string) and all(string.count(char) != 7 for char in string) and len(string) > 1 and all(string.count(char) != 11 for char in string) and all(string.count(char) != 13 for char in string) and all(string.count(char) != 17 for char in string) and all(string.count(char) != 19 for char in string) and all(string.count(char) != 23 for char in string) and all(string.count(char) != 29 for char in string) and all(string.count(char) != 31 for char in string) and all(string.count(char) != 37 for char in string) and all(string.count(char) != 41 for char in string) and all(string.count(char) != 43 for char in string) and all(string.count(char) != 47 for char in string) and all(string.count(char) != 53 for char in string) and all(string.count(char) != 59 for char in string) and all(string.count(char) != 61 for char in string) and all(string.count(char) != 67 for char in string) and all(string.count(char) != 71 for char in string) and all(string.count(char) != 73 for char in string) and all(string.count(char) != 79 for char in string) and all(string.count(char) != 83 for char in string) and all(string.count(char) != 89 for char in string) and all(string.count(char) != 97 for char in string) and all(string.count(char) != 101 for char in string) and all(string.count(char) != 103 for char in string) and all(string.count(char) != 107 for char in string) and all(string.count(char) != 109 for char in string) and all(string.count(char) != 113 for char in string) and all(string.count(char) != 127 for char in string) and all(string.count(char) != 131 for char in string) and all(string.count(char) != "}
 84 | {"task_id": "HumanEval/83", "completion": "def starts_one_ends(n):\n    count = 0\n    for i in range(10**(n-1), 10**n):\n        if str(i)[0] == '1' or str(i)[-1] == '1':\n            count += 1\n    return count"}
 85 | {"task_id": "HumanEval/84", "completion": "def solve(N):\n    return bin(sum(int(digit) for digit in str(N)))[2:]"}
 86 | {"task_id": "HumanEval/85", "completion": "def add(lst):\n    return sum([lst[i] for i in range(1, len(lst), 2) if lst[i] % 2 == 0])"}
 87 | {"task_id": "HumanEval/86", "completion": "def anti_shuffle(s):\n    words = s.split()\n    ordered_words = []\n    for word in words:\n        ordered_word = ''.join(sorted(word))\n        ordered_words.append(ordered_word)\n    return ' '.join(ordered_words)"}
 88 | {"task_id": "HumanEval/87", "completion": "def get_row(lst, x):\n    return [(i, j) for i in range(len(lst)) for j in sorted(range(len(lst[i])), reverse=True) if lst[i][j] == x]"}
 89 | {"task_id": "HumanEval/88", "completion": "def sort_array(array):\n    return sorted(array, reverse=(array and (array[0]+array[-1])%2==0))"}
 90 | {"task_id": "HumanEval/89", "completion": "def encrypt(s):\n    return ''.join([chr((ord(c) - 97 + 2 * 2) % 26 + 97) for c in s])"}
 91 | {"task_id": "HumanEval/90", "completion": "def next_smallest(lst):\n    if len(lst) < 2:\n        return None\n    lst = list(set(lst))\n    if len(lst) < 2:\n        return None\n    lst.sort()\n    return lst[1]"}
 92 | {"task_id": "HumanEval/91", "completion": "def is_bored(S):\n    sentences = S.split('.')\n    sentences += S.split('?')\n    sentences += S.split('!')\n    count = 0\n    for sentence in sentences:\n        if sentence.strip().startswith('I'):\n            count += 1\n    return count"}
 93 | {"task_id": "HumanEval/92", "completion": "def any_int(x, y, z):\n    return isinstance(x, int) and isinstance(y, int) and isinstance(z, int) and (x == y + z or y == x + z or z == x + y)"}
 94 | {"task_id": "HumanEval/93", "completion": "def encode(message):\n    vowels = 'aeiouAEIOU'\n    encoded = ''\n    for char in message:\n        if char.isalpha():\n            if char in vowels:\n                encoded += chr(ord(char)+2)\n            else:\n                if char.islower():\n                    encoded += char.upper()\n                else:\n                    encoded += char.lower()\n        else:\n            encoded += char\n    return encoded"}
 95 | {"task_id": "HumanEval/94", "completion": "def largest_prime_sum(lst):\n    def is_prime(n):\n        if n < 2:\n            return False\n        for i in range(2, int(n**0.5)+1):\n            if n % i == 0:\n                return False\n        return True\n    \n    max_prime = 0\n    for num in lst:\n        if is_prime(num) and num > max_prime:\n            max_prime = num\n    \n    return sum(int(digit) for digit in str(max_prime)) if max_prime != 0 else 0"}
 96 | {"task_id": "HumanEval/95", "completion": "def check_dict_case(dict):\n    if not dict:\n        return False\n    keys = list(dict.keys())\n    if all(key.islower() for key in keys) or all(key.isupper() for key in keys):\n        return True\n    else:\n        return False"}
 97 | {"task_id": "HumanEval/96", "completion": "def count_up_to(n):\n    if n < 2:\n        return []\n    primes = [2]\n    i = 3\n    while i < n:\n        if all(i % p != 0 for p in primes):\n            primes.append(i)\n        i += 2\n    return primes"}
 98 | {"task_id": "HumanEval/97", "completion": "def multiply(a, b):\n    return abs(a % 10 * b % 10)"}
 99 | {"task_id": "HumanEval/98", "completion": "def count_upper(s):\n    vowels = set(['A', 'E', 'I', 'O', 'U'])\n    count = 0\n    for i in range(0, len(s), 2):\n        if s[i] in vowels and s[i].isupper():\n            count += 1\n    return count"}
100 | {"task_id": "HumanEval/99", "completion": "def closest_integer(value):\n    value = float(value)\n    if value >= 0:\n        return int(value + 0.5)\n    else:\n        return int(value - 0.5)"}
101 | {"task_id": "HumanEval/100", "completion": "def make_a_pile(n):\n    pile = []\n    stones = n\n    for i in range(n):\n        pile.append(stones)\n        if n % 2 == 0:\n            stones += 2\n        else:\n            stones += 1\n    return pile"}
102 | {"task_id": "HumanEval/101", "completion": "def words_string(s):\n    return re.findall(r'\\w+', s)"}
103 | {"task_id": "HumanEval/102", "completion": "def choose_num(x, y):\n    return max([i for i in range(x, y+1) if i%2==0]+[-1])"}
104 | {"task_id": "HumanEval/103", "completion": "def rounded_avg(n, m):\n    if n > m:\n        return -1\n    avg = round(sum(range(n, m+1)) / (m-n+1))\n    return bin(avg)"}
105 | {"task_id": "HumanEval/104", "completion": "def unique_digits(x):\n    return sorted([num for num in x if all(int(digit) % 2 != 0 for digit in str(num))])"}
106 | {"task_id": "HumanEval/105", "completion": "def by_length(arr):\n    num_names = [\"\", \"One\", \"Two\", \"Three\", \"Four\", \"Five\", \"Six\", \"Seven\", \"Eight\", \"Nine\"]\n    filtered_arr = [num for num in arr if 1 <= num <= 9]\n    sorted_arr = sorted(filtered_arr)\n    reversed_arr = sorted_arr[::-1]\n    result = [num_names[num] for num in reversed_arr]\n    return result"}
107 | {"task_id": "HumanEval/106", "completion": "def f(n):\n    def factorial(num):\n        if num == 0:\n            return 1\n        else:\n            return num * factorial(num - 1)\n\n    result = []\n    for i in range(1, n+1):\n        if i % 2 == 0:\n            result.append(factorial(i))\n        else:\n            result.append(sum(range(1, i+1)))\n    return result"}
108 | {"task_id": "HumanEval/107", "completion": "def even_odd_palindrome(n):\n    even_count = 0\n    odd_count = 0\n    for i in range(1, n+1):\n        if str(i) == str(i)[::-1]:\n            if i % 2 == 0:\n                even_count += 1\n            else:\n                odd_count += 1\n    return (even_count, odd_count)"}
109 | {"task_id": "HumanEval/108", "completion": "def count_nums(arr):\n    def sum_digits(n):\n        return sum(int(d) for d in str(abs(n)))\n    return sum(1 for num in arr if sum_digits(num) > 0)"}
110 | {"task_id": "HumanEval/109", "completion": "def move_one_ball(arr):\n    if not arr:\n        return True\n    n = len(arr)\n    for i in range(n):\n        if arr == sorted(arr):\n            return True\n        arr = [arr[-1]] + arr[:-1]\n    return False"}
111 | {"task_id": "HumanEval/110", "completion": "def exchange(lst1, lst2):\n    if sum(x % 2 == 1 for x in lst1) > sum(x % 2 == 1 for x in lst2):\n        return \"NO\"\n    return \"YES\""}
112 | {"task_id": "HumanEval/111", "completion": "def histogram(test):\n    if not test:\n        return {}\n    counts = {}\n    for letter in test.split():\n        counts[letter] = counts.get(letter, 0) + 1\n    max_count = max(counts.values())\n    return {letter: count for letter, count in counts.items() if count == max_count}"}
113 | {"task_id": "HumanEval/112", "completion": "def reverse_delete(s, c):\n    s = ''.join([char for char in s if char not in c])\n    return (s, s == s[::-1])"}
114 | {"task_id": "HumanEval/113", "completion": "def odd_count(lst):\n    return [\"the number of odd elements in the string {} of the input.\".format(i) for i in lst]"}
115 | {"task_id": "HumanEval/114", "completion": "import math\n\ndef minSubArraySum(nums):\n    min_sum = math.inf\n    curr_sum = 0\n    for num in nums:\n        curr_sum += num\n        min_sum = min(min_sum, curr_sum)\n        curr_sum = max(curr_sum, 0)\n    return min_sum"}
116 | {"task_id": "HumanEval/115", "completion": "def max_fill(grid, capacity):\n    num_buckets = 0\n    for row in grid:\n        num_buckets += math.ceil(sum(row)/capacity)\n    return num_buckets"}
117 | {"task_id": "HumanEval/116", "completion": "def sort_array(arr):\n    return sorted(arr, key=lambda x: (bin(x).count('1'), x))"}
118 | {"task_id": "HumanEval/117", "completion": "def select_words(s, n):\n    if not s:\n        return []\n    words = s.split()\n    result = []\n    for word in words:\n        consonants = sum(1 for c in word if c.isalpha() and c.lower() not in 'aeiou')\n        if consonants == n:\n            result.append(word)\n    return result"}
119 | {"task_id": "HumanEval/118", "completion": "def get_closest_vowel(word):\n    vowels = set('aeiouAEIOU')\n    consonants = set('bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ')\n    word = word.strip(consonants)\n    if len(word) < 2:\n        return \"\"\n    for i in range(len(word)-2, -1, -1):\n        if word[i] in vowels:\n            return word[i]\n    return \"\""}
120 | {"task_id": "HumanEval/119", "completion": "def match_parens(lst):\n    s = ''.join(lst)\n    stack = []\n    for c in s:\n        if c == '(':\n            stack.append(c)\n        elif not stack:\n            return 'No'\n        else:\n            stack.pop()\n    return 'Yes' if not stack else 'No'"}
121 | {"task_id": "HumanEval/120", "completion": "def maximum(arr, k):\n    return sorted(arr)[-k:]"}
122 | {"task_id": "HumanEval/121", "completion": "def solution(lst):\n    return sum(lst[i] for i in range(1, len(lst), 2) if lst[i] % 2 != 0)"}
123 | {"task_id": "HumanEval/122", "completion": "def add_elements(arr, k):\n    return sum([x for x in arr[:k] if abs(x) < 100])"}
124 | {"task_id": "HumanEval/123", "completion": "def get_odd_collatz(n):\n    collatz = [n]\n    while n != 1:\n        if n % 2 == 0:\n            n = n // 2\n        else:\n            n = 3 * n + 1\n        collatz.append(n)\n    return sorted(list(set(filter(lambda x: x % 2 == 1, collatz)) if n != 1 else [1]))"}
125 | {"task_id": "HumanEval/124", "completion": "def valid_date(date):\n    try:\n        month, day, year = date.split('-')\n        month = int(month)\n        day = int(day)\n        year = int(year)\n        if month < 1 or month > 12:\n            return False\n        if month in [1, 3, 5, 7, 8, 10, 12]:\n            if day < 1 or day > 31:\n                return False\n        elif month in [4, 6, 9, 11]:\n            if day < 1 or day > 30:\n                return False\n        elif month == 2:\n            if year % 4 == 0 and (year % 100 != 0 or year % 400 == 0):\n                if day < 1 or day > 29:\n                    return False\n            else:\n                if day < 1 or day > 28:\n                    return False\n        else:\n            return False\n        return True\n    except:\n        return False"}
126 | {"task_id": "HumanEval/125", "completion": "def split_words(txt):\n    words = txt.split()\n    if len(words) == 1:\n        words = txt.split(',')\n    if len(words) == 1:\n        count = sum(1 for c in txt if c.islower() and ord(c) % 2 == 1)\n        return count\n    return words"}
127 | {"task_id": "HumanEval/126", "completion": "def is_sorted(lst):\n    return lst == sorted(set(lst)) and lst == sorted(lst)"}
128 | {"task_id": "HumanEval/127", "completion": "def intersection(interval1, interval2):\n    def is_prime(n):\n        if n < 2:\n            return False\n        for i in range(2, int(n**0.5)+1):\n            if n % i == 0:\n                return False\n        return True\n    \n    start1, end1 = interval1\n    start2, end2 = interval2\n    intersection_start = max(start1, start2)\n    intersection_end = min(end1, end2)\n    if intersection_start > intersection_end:\n        return \"NO\"\n    intersection_length = intersection_end - intersection_start + 1\n    if is_prime(intersection_length):\n        return \"YES\"\n    else:\n        return \"NO\""}
129 | {"task_id": "HumanEval/128", "completion": "def prod_signs(arr):\n    if not arr:\n        return None\n    sign_prod = 1\n    mag_sum = 0\n    for num in arr:\n        sign = 0 if num == 0 else num // abs(num)\n        sign_prod *= sign\n        mag_sum += abs(num)\n    return sign_prod * mag_sum"}
130 | {"task_id": "HumanEval/129", "completion": "def minPath(grid, k):\n    n = len(grid)\n    visited = set()\n    heap = [(grid[i][j], [(i, j)]) for i in range(n) for j in range(n)]\n    heapq.heapify(heap)\n    while heap:\n        val, path = heapq.heappop(heap)\n        if len(path) == k:\n            return [grid[i][j] for i, j in path]\n        i, j = path[-1]\n        for ni, nj in ((i+1,j), (i-1,j), (i,j+1), (i,j-1)):\n            if 0 <= ni < n and 0 <= nj < n and (ni, nj) not in visited:\n                visited.add((ni, nj))\n                heapq.heappush(heap, (grid[ni][nj], path + [(ni, nj)]))"}
131 | {"task_id": "HumanEval/130", "completion": "def tri(n):\n    seq = [3]\n    if n == 0:\n        return []\n    elif n == 1:\n        return seq\n    else:\n        seq.append(1)\n        for i in range(2, n+1):\n            if i % 2 == 0:\n                seq.append(1 + i//2)\n            else:\n                seq.append(seq[-1] + seq[-2] + seq[-3])\n        return seq"}
132 | {"task_id": "HumanEval/131", "completion": "def digits(n):\n    odd_digits = [int(d) for d in str(n) if int(d) % 2 != 0]\n    if len(odd_digits) == 0:\n        return 0\n    else:\n        product = 1\n        for digit in odd_digits:\n            product *= digit\n        return product"}
133 | {"task_id": "HumanEval/132", "completion": "def is_nested(string):\n    stack = []\n    for char in string:\n        if char == '[':\n            stack.append(char)\n        elif char == ']':\n            if stack and stack[-1] == '[':\n                stack.pop()\n            else:\n                stack.append(char)\n    return '[' in stack"}
134 | {"task_id": "HumanEval/133", "completion": "def sum_squares(lst):\n    return sum([int(x)**2 for x in lst])"}
135 | {"task_id": "HumanEval/134", "completion": "def check_if_last_char_is_a_letter(txt):\n    if len(txt) == 0:\n        return False\n    last_char = txt[-1]\n    if not last_char.isalpha():\n        return False\n    if len(txt.split()[-1]) == 1:\n        return True\n    return False"}
136 | {"task_id": "HumanEval/135", "completion": "def can_arrange(arr):\n    for i in range(len(arr)-1, 0, -1):\n        if arr[i] < arr[i-1]:\n            return i-1\n    return -1"}
137 | {"task_id": "HumanEval/136", "completion": "def largest_smallest_integers(lst):\n    neg_ints = [i for i in lst if i < 0]\n    pos_ints = [i for i in lst if i > 0]\n    if not neg_ints:\n        max_neg = None\n    else:\n        max_neg = max(neg_ints)\n    if not pos_ints:\n        min_pos = None\n    else:\n        min_pos = min(pos_ints)\n    return (max_neg, min_pos)"}
138 | {"task_id": "HumanEval/137", "completion": "def compare_one(a, b):\n    if type(a) == type(b):\n        if type(a) == int or type(a) == float:\n            if a > b:\n                return a\n            elif b > a:\n                return b\n            else:\n                return None\n        elif type(a) == str:\n            a = a.replace(\",\", \".\")\n            b = b.replace(\",\", \".\")\n            if float(a) > float(b):\n                return a\n            elif float(b) > float(a):\n                return b\n            else:\n                return None\n    else:\n        return None"}
139 | {"task_id": "HumanEval/138", "completion": "def is_equal_to_sum_even(n):\n    return n % 8 == 0 or (n - 4) % 8 == 0"}
140 | {"task_id": "HumanEval/139", "completion": "from math import factorial\n\ndef special_factorial(n):\n    result = 1\n    for i in range(n, 0, -1):\n        result *= factorial(i)\n    return result"}
141 | {"task_id": "HumanEval/140", "completion": "def fix_spaces(text):\n    text = text.replace(\" \", \"_\")\n    count = 0\n    new_text = \"\"\n    for char in text:\n        if char == \" \":\n            count += 1\n            if count > 2:\n                new_text = new_text[:-1] + \"-\"\n        else:\n            count = 0\n        new_text += char\n    return new_text"}
142 | {"task_id": "HumanEval/141", "completion": "def file_name_check(file_name):\n    import re\n    pattern = r'^[a-zA-Z][a-zA-Z0-9]*\\.[txt|exe|dll]{3}$'\n    if re.match(pattern, file_name):\n        return 'Yes'\n    else:\n        return 'No'"}
143 | {"task_id": "HumanEval/142", "completion": "def sum_squares(lst):\n    return sum([x**2 if i%3==0 else x**3 if i%4==0 else x for i,x in enumerate(lst)])"}
144 | {"task_id": "HumanEval/143", "completion": "def words_in_sentence(sentence):\n    def is_prime(n):\n        if n < 2:\n            return False\n        for i in range(2, int(n**0.5)+1):\n            if n % i == 0:\n                return False\n        return True\n\n    words = sentence.split()\n    prime_words = [word for word in words if is_prime(len(word))]\n    return ' '.join(prime_words)"}
145 | {"task_id": "HumanEval/144", "completion": "def simplify(x, n):\n    x_num, x_den = map(int, x.split('/'))\n    n_num, n_den = map(int, n.split('/'))\n    result = x_num * n_num / (x_den * n_den)\n    return result.is_integer()"}
146 | {"task_id": "HumanEval/145", "completion": "def order_by_points(nums):\n    def digit_sum(num):\n        return sum(int(digit) for digit in str(abs(num)))\n    return sorted(nums, key=lambda num: (digit_sum(num), nums.index(num)))"}
147 | {"task_id": "HumanEval/146", "completion": "def specialFilter(nums):\n    return len([num for num in nums if num > 10 and str(num)[0] in ['1', '3', '5', '7', '9'] and str(num)[-1] in ['1', '3', '5', '7', '9']])"}
148 | {"task_id": "HumanEval/147", "completion": "def get_max_triples(n):\n    a = [i*i-i+1 for i in range(1,n+1)]\n    count = 0\n    for i in range(n-2):\n        for j in range(i+1,n-1):\n            for k in range(j+1,n):\n                if (a[i]+a[j]+a[k])%3 == 0:\n                    count += 1\n    return count"}
149 | {"task_id": "HumanEval/148", "completion": "def bf(planet1, planet2):\n    planets = [\"Mercury\", \"Venus\", \"Earth\", \"Mars\", \"Jupiter\", \"Saturn\", \"Uranus\", \"Neptune\"]\n    if planet1 not in planets or planet2 not in planets:\n        return ()\n    start = planets.index(planet1)\n    end = planets.index(planet2)\n    if start > end:\n        start, end = end, start\n    return tuple(sorted(planets[start+1:end], key=lambda x: planets.index(x)))"}
150 | {"task_id": "HumanEval/149", "completion": "def sorted_list_sum(lst):\n    return sorted([word for word in lst if len(word) % 2 == 0], key=lambda x: (len(x), x))"}
151 | {"task_id": "HumanEval/150", "completion": "import math\n\ndef x_or_y(n, x, y):\n    return x if all(n % i != 0 for i in range(2, int(math.sqrt(n))+1)) and n > 1 else y"}
152 | {"task_id": "HumanEval/151", "completion": "def double_the_difference(lst):\n    return sum(x**2 for x in lst if isinstance(x, int) and x > 0 and x % 2 != 0) * 2"}
153 | {"task_id": "HumanEval/152", "completion": "def compare(game, guess):\n    return [abs(game[i]-guess[i]) if game[i]!=guess[i] else 0 for i in range(len(game))]"}
154 | {"task_id": "HumanEval/153", "completion": "def Strongest_Extension(class_name, extensions):\n    def strength(ext):\n        return ext.count(ext.upper()) - ext.count(ext.lower())\n    strongest = max(extensions, key=lambda ext: (strength(ext), extensions.index(ext)))\n    return f\"{class_name}.{strongest}\""}
155 | {"task_id": "HumanEval/154", "completion": "def cycpattern_check(a, b):\n    return any(b in a[i:] + a[:i] for i in range(len(a)))"}
156 | {"task_id": "HumanEval/155", "completion": "def even_odd_count(num):\n    even_count = 0\n    odd_count = 0\n    for digit in str(abs(num)):\n        if int(digit) % 2 == 0:\n            even_count += 1\n        else:\n            odd_count += 1\n    if num < 0:\n        return (odd_count, even_count)\n    else:\n        return (even_count, odd_count)"}
157 | {"task_id": "HumanEval/156", "completion": "def int_to_mini_roman(number):\n    roman_numerals = {\n        1000: 'm',\n        900: 'cm',\n        500: 'd',\n        400: 'cd',\n        100: 'c',\n        90: 'xc',\n        50: 'l',\n        40: 'xl',\n        10: 'x',\n        9: 'ix',\n        5: 'v',\n        4: 'iv',\n        1: 'i'\n    }\n    result = ''\n    for value, numeral in roman_numerals.items():\n        while number >= value:\n            result += numeral\n            number -= value\n    return result.lower()"}
158 | {"task_id": "HumanEval/157", "completion": "def right_angle_triangle(a, b, c):\n    sides = [a, b, c]\n    sides.sort()\n    return sides[0]**2 + sides[1]**2 == sides[2]**2"}
159 | {"task_id": "HumanEval/158", "completion": "def find_max(words):\n    def count_unique(word):\n        return len(set(word))\n\n    max_word = \"\"\n    max_count = 0\n\n    for word in words:\n        unique_count = count_unique(word)\n        if unique_count > max_count:\n            max_word = word\n            max_count = unique_count\n        elif unique_count == max_count and word < max_word:\n            max_word = word\n\n    return max_word"}
160 | {"task_id": "HumanEval/159", "completion": "def eat(number, need, remaining):\n    eaten = min(need, remaining + number)\n    remaining = max(0, remaining - (eaten - number))\n    return [eaten, remaining]"}
161 | {"task_id": "HumanEval/160", "completion": "def do_algebra(operator, operand):\n    expression = str(operand[0])\n    for i in range(len(operator)):\n        expression += operator[i] + str(operand[i+1])\n    return eval(expression)"}
162 | {"task_id": "HumanEval/161", "completion": "def solve(s):\n    return s.swapcase() if any(c.isalpha() for c in s) else s[::-1]"}
163 | {"task_id": "HumanEval/162", "completion": "import hashlib\n\ndef string_to_md5(text):\n    if text:\n        return hashlib.md5(text.encode()).hexdigest()"}
164 | {"task_id": "HumanEval/163", "completion": "    pass"}
165 | 


--------------------------------------------------------------------------------
/launch_pod.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import subprocess
  3 | import threading
  4 | import sys
  5 | import time
  6 | from benchmark_utils import run_benchmark
  7 | 
  8 | def get_output_and_retry(call_string, max_retries=5):
  9 |     for attempt in range(max_retries):
 10 |         result = get_ipython().getoutput(f'curl -s --request POST {call_string}')
 11 |         data_dict = json.loads(result[-1])
 12 |         print(data_dict)
 13 |         if "INTERNAL_SERVER_ERROR" not in str(data_dict):
 14 |             return data_dict
 15 |         else:
 16 |             print(f"ERROR - Attempt {attempt+1}, Retrying in 5s")
 17 |             time.sleep(5)
 18 |     raise Exception(f"Failed after {max_retries} attempts")
 19 | 
 20 | def launch_pod(api_key, gpu_type, pod_num):
 21 | 
 22 |     gpu_count = 1
 23 |     if gpu_type[:2] == "2x":
 24 |         gpu_type = gpu_type[2:]
 25 |         gpu_count = 2
 26 | 
 27 |     call_string = f"""--header 'content-type: application/json' \
 28 |   --url 'https://api.runpod.io/graphql?api_key={api_key}' \
 29 |   --data '{{"query": "mutation {{ podFindAndDeployOnDemand( input: {{ cloudType: SECURE, gpuCount: {gpu_count}, volumeInGb: 100, containerDiskInGb: 10, minVcpuCount: 2, minMemoryInGb: 20, gpuTypeId: \\"{gpu_type}\\", name: \\"RunPod TextGenWebUI #{pod_num}\\", imageName: \\"succulentsteve/webui:latest\\", dockerArgs: \\"\\", ports: \\"7860/http,5000/http,22/tcp,5005/tcp\\", volumeMountPath: \\"/workspace\\", env: [] }} ) {{ id imageName env machineId machine {{ podHostId }} }} }}"}}'"""
 30 | 
 31 |     result = get_ipython().getoutput(f'curl -s --request POST {call_string}')
 32 |     data_dict = get_output_and_retry(call_string, max_retries=5)
 33 |     pod_id = data_dict['data']['podFindAndDeployOnDemand']['id']
 34 |     machine_id = data_dict['data']['podFindAndDeployOnDemand']['machineId']
 35 |     host_id = machine_id = data_dict['data']['podFindAndDeployOnDemand']['machine']['podHostId']
 36 |     return pod_id, machine_id, host_id
 37 | 
 38 | def wait_for_pod(pod_id, api_key):
 39 |     curl_command = f"""curl -s --request POST \
 40 |     --header 'content-type: application/json' \
 41 |     --url 'https://api.runpod.io/graphql?api_key={api_key}' \
 42 |     --data '{{"query": "query Pod {{ pod(input: {{podId: \\"{pod_id}\\"}}) {{ id name runtime {{ uptimeInSeconds ports {{ ip isIpPublic privatePort publicPort type }} gpus {{ id gpuUtilPercent memoryUtilPercent }} container {{ cpuPercent memoryPercent }} }} }} }}"}}'"""
 43 |     while True:
 44 |         result = get_ipython().getoutput(curl_command)
 45 |         data = json.loads(result[-1])
 46 |         try:
 47 |             port_22_info = [port for port in data["data"]["pod"]["runtime"]["ports"] if port["type"] == "tcp" and port["privatePort"] == 22]
 48 |             ssh_ip = port_22_info[0]["ip"]
 49 |             ssh_port = port_22_info[0]["publicPort"]
 50 | 
 51 |             port_5005_info = [port for port in data["data"]["pod"]["runtime"]["ports"] if port["type"] == "tcp" and port["privatePort"] == 5005]
 52 |             ws_ip = port_5005_info[0]["ip"]
 53 |             ws_port = port_5005_info[0]["publicPort"]
 54 | 
 55 |             return ssh_ip, ssh_port, ws_ip, ws_port
 56 |         except:
 57 |             print("not ready yet")
 58 |             time.sleep(10)
 59 | 
 60 | def download_model(ssh_ip, ssh_port, model):
 61 |     model_file = model.replace("/", "_")
 62 |     cmd = f"'cd /root/text-generation-webui && python download-model.py --output /workspace/models {model}'"
 63 |     get_ipython().system(f'ssh -o StrictHostKeyChecking=no root@{ssh_ip} -p {ssh_port} -i ~/.ssh/id_ed25519 {cmd}')
 64 |     return model_file
 65 | 
 66 | def swap_gptq(ssh_ip, ssh_port):
 67 |     commands = [
 68 |     "cd /workspace && git clone -n https://github.com/qwopqwop200/GPTQ-for-LLaMa",
 69 |     "ln -s /workspace/GPTQ-for-LLaMa /root/text-generation-webui/repositories/GPTQ-for-LLaMa"
 70 |     ]
 71 |     for cmd in commands:
 72 |         get_ipython().system(f'ssh -o StrictHostKeyChecking=no root@{ssh_ip} -p {ssh_port} -i ~/.ssh/id_ed25519 {cmd}')
 73 | 
 74 | def remove_safetensors(ssh_ip, ssh_port, model_file, model_wget_path):
 75 |     cmd = f"'cd /workspace/models/{model_file} && rm -rf *.safetensors && wget -q {model_wget_path}'"
 76 |     get_ipython().system(f'ssh -o StrictHostKeyChecking=no root@{ssh_ip} -p {ssh_port} -i ~/.ssh/id_ed25519 {cmd}')
 77 | 
 78 | import time
 79 | 
 80 | import threading
 81 | from concurrent.futures import ThreadPoolExecutor, Future
 82 | 
 83 | def run_ssh_command(command, pod_id, api_key):
 84 | 
 85 |     process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1, universal_newlines=True)
 86 |     server_started = threading.Event()
 87 | 
 88 |     def stdout_thread(process):
 89 |         for line in iter(process.stdout.readline, ''):
 90 |             line = line.strip()
 91 |             print(line)
 92 |             sys.stdout.flush()
 93 |             if "Running on local URL" in line:
 94 |                 server_started.set()
 95 | 
 96 |     def stderr_thread(process):
 97 |         for line in iter(process.stderr.readline, ''):
 98 |             print(line.strip(), file=sys.stderr)
 99 |             sys.stderr.flush()
100 |             if "Connection to " in line:
101 |                 stop_and_terminate_pod(pod_id, api_key)
102 |                 raise Exception(line)
103 | 
104 |     threading.Thread(target=stdout_thread, args=(process,), daemon=True).start()
105 |     threading.Thread(target=stderr_thread, args=(process,), daemon=True).start()
106 | 
107 |     def check_timeout(timeout, event):
108 |         time.sleep(timeout * 60)  # Timeout in minutes
109 |         if not event.is_set():
110 |             raise Exception('Timeout exceeded')
111 | 
112 |     timeout_thread = threading.Thread(target=check_timeout, args=(5, server_started), daemon=False)
113 |     timeout_thread.start()
114 | 
115 |     while not server_started.is_set():
116 |         time.sleep(1)
117 |         if not timeout_thread.is_alive():
118 |             raise Exception('Timeout exceeded')
119 | 
120 |     print("continuing")
121 | 
122 |     sys.stdout.flush()
123 |     sys.stderr.flush()
124 | 
125 | 
126 | 
127 | def run_pod_server(ssh_ip, ssh_port, model_file, gptq_params, pod_id, api_key, use_autoq = False, additional_cmd=""):
128 |     auto = ""
129 |     if use_autoq:
130 |         auto = "--autogptq"
131 |     try:
132 |         command = f"""ssh -tt -o StrictHostKeyChecking=no root@{ssh_ip} -p {ssh_port} -i ~/.ssh/id_ed25519 'cd /root/text-generation-webui && python server.py --listen --api --model {model_file} --model-dir /workspace/models {gptq_params} {additional_cmd} --trust-remote-code --auto-devices {auto} --model_type "Llama"'"""
133 |         print(command)
134 |         run_ssh_command(command, pod_id, api_key)
135 |         print("Done")
136 |     except Exception as e:
137 |         print(f"An error occurred: {e}")
138 | 
139 | def launch_webui(api_key, api_url='https://api.runpod.io/graphql', model="TheBloke/wizard-vicuna-13B-GPTQ", gptq_params="", gpu_type="NVIDIA RTX A4000", pod_num="1", url_replace_safetensors = None, use_new_gptq = False, use_autoq = False, additional_cmd="", use_async = False):
140 |     pod_id, machine_id, host_id = launch_pod(api_key, gpu_type, pod_num)
141 |     ssh_ip, ssh_port, ws_ip, ws_port = wait_for_pod(pod_id, api_key)
142 |     url = "https://%s-7860.proxy.runpod.net" % pod_id
143 |     api_url = "https://%s-5000.proxy.runpod.net" % pod_id
144 |     ssh = "ssh -o StrictHostKeyChecking=no %s@ssh.runpod.io -i ~/.ssh/id_ed25519" % machine_id
145 |     print("WebUI:", url, "API", api_url, "SSH", ssh, sep="\n")
146 |     print("\n\n")
147 |     
148 |     if use_new_gptq:
149 |         swap_gptq(ssh_ip, ssh_port)
150 |     
151 |     model_file = download_model(ssh_ip, ssh_port, model)
152 |     
153 |     if url_replace_safetensors is not None:
154 |         print("Removing safetensors...")
155 |         remove_safetensors(ssh_ip, ssh_port, model_file, url_replace_safetensors)
156 |     run_pod_server(ssh_ip, ssh_port, model_file, gptq_params, pod_id, api_key, use_autoq=use_autoq, additional_cmd=additional_cmd)
157 |     if use_async:
158 |         return api_url, model_file, pod_id, ws_ip, ws_port
159 |     return api_url, model_file, pod_id
160 | 
161 | def pod_benchmark(filename, prompt, assistant_tag, pod_id, api_key, start_from=0,
162 |                       host="http://localhost", port=5000, insert_func_stub=True, use_old_parser = False, deterministic=True, use_async = False):
163 |     import time
164 | 
165 |     while True:
166 |         try:
167 |             run_benchmark(filename, prompt, start_from=start_from,
168 |                           host=host, port=port, insert_func_stub=insert_func_stub, assistant_tag=assistant_tag, use_old_parser = use_old_parser, deterministic=deterministic, use_async = use_async)
169 |             break  # If the function executes without raising an exception, exit the loop
170 |         except Exception as e:
171 |             # Check if the first argument of the exception is a tuple and the second element of the tuple is 404
172 |             if isinstance(e.args[0], tuple) and e.args[0][1] == 404:
173 |                 # Retry the operation after waiting for some time
174 |                 print("Caught 404 error. Retrying after 5 seconds...")
175 |                 time.sleep(5)
176 |             else:
177 |                 # Reraise the exception if it's not the specific one we're handling
178 |                 stop_and_terminate_pod(pod_id, api_key)
179 |                 raise
180 | 
181 | def stop_pod(pod_id, api_key):
182 |     stop_pod_command = f"""curl --request POST \
183 |     --header 'content-type: application/json' \
184 |     --url 'https://api.runpod.io/graphql?api_key={api_key}' \
185 |     --data '{{"query": "mutation {{ podStop(input: {{podId: \\"{pod_id}\\"}}) {{ id desiredStatus }} }}"}}'"""
186 |     get_ipython().system('{stop_pod_command}')
187 | 
188 | def terminate_pod(pod_id, api_key):
189 |     terminate_pod_command = f"""curl --request POST \
190 |     --header 'content-type: application/json' \
191 |     --url 'https://api.runpod.io/graphql?api_key={api_key}' \
192 |     --data '{{"query": "mutation {{ podTerminate(input: {{podId: \\"{pod_id}\\"}}) }}"}}'"""
193 |     get_ipython().system('{terminate_pod_command}')
194 | 
195 | def stop_and_terminate_pod(pod_id, api_key):
196 |     stop_pod(pod_id, api_key)
197 |     terminate_pod(pod_id, api_key)
198 |     
199 | def killall_pods(api_key):
200 |     cmd = """curl -s --request POST \
201 |   --header 'content-type: application/json' \
202 |   --url 'https://api.runpod.io/graphql?api_key=%s' \
203 |   --data '{"query": "query Pods { myself { pods { id name runtime { uptimeInSeconds ports { ip isIpPublic privatePort publicPort type } gpus { id gpuUtilPercent memoryUtilPercent } container { cpuPercent memoryPercent } } } } }"}'""" % api_key
204 |     result = get_ipython().getoutput(cmd)
205 |     # Parse the JSON string
206 |     data = json.loads("".join(result))
207 | 
208 |     # Extract pod ids
209 |     pod_ids = [pod['id'] for pod in data['data']['myself']['pods']]
210 | 
211 |     for pid in pod_ids:
212 |         stop_and_terminate_pod(pid, api_key)


--------------------------------------------------------------------------------
/prompt_formats.txt:
--------------------------------------------------------------------------------
 1 | ——
 2 | 
 3 | Alpaca:
 4 | 
 5 | ——
 6 | 
 7 | ### Instruction:
 8 | Complete the following Python code: 
 9 | Notes: respond with the entire complete function definition
10 | do not add any comments, be as concise in your code as possible
11 | use only built-in libraries, assume no additional imports other than those provided (if any)
12 | 
13 | code:
14 | from typing import List
15 | 
16 | 
17 | def has_close_elements(numbers: List[float], threshold: float) -> bool:
18 |     """ Check if in given list of numbers, are any two numbers closer to each other than
19 |     given threshold.
20 |     >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
21 |     False
22 |     >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
23 |     True
24 |     """
25 | 
26 | 
27 | ### Response:
28 | ```python
29 | def has_close_elements(
30 | 
31 | 
32 | 
33 | ——
34 | 
35 | Vicuna:
36 | 
37 | ——
38 | 
39 | 
40 | A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
41 | USER:
42 | Complete the following Python code: 
43 | Notes: respond with the entire complete function definition
44 | do not add any comments, be as concise in your code as possible
45 | use only built-in libraries, assume no additional imports other than those provided (if any)
46 | 
47 | code:
48 | from typing import List
49 | 
50 | 
51 | def has_close_elements(numbers: List[float], threshold: float) -> bool:
52 |     """ Check if in given list of numbers, are any two numbers closer to each other than
53 |     given threshold.
54 |     >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
55 |     False
56 |     >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
57 |     True
58 |     """
59 | 
60 | 
61 | ASSISTANT:
62 | ```python
63 | def has_close_elements( 
64 | ——
65 | 


--------------------------------------------------------------------------------
/templates.py:
--------------------------------------------------------------------------------
  1 | VERYSHORT_TEMPLATE = """{PROMPT}"""
  2 | 
  3 | SHORT_TEMPLATE = """```python
  4 | {PROMPT}"""
  5 | 
  6 | H2O_TEMPLATE = """<|prompt|>{PROMPT}<|endoftext|><|answer|>```python
  7 | """
  8 | ######
  9 | 
 10 | ALPACA_PREFIX_TEMPLATE = """Below is an instruction that describes a task. 
 11 | Write a response that appropriately completes the request.
 12 | 
 13 | ### Instruction:
 14 | Complete the following Python code: 
 15 | Notes: respond with the entire complete function definition
 16 | do not add any comments, be as concise in your code as possible
 17 | use only built-in libraries, assume no additional imports other than those provided (if any)
 18 | 
 19 | code:
 20 | {PROMPT}
 21 | 
 22 | ### Response:
 23 | ```python
 24 | """
 25 | 
 26 | ALPACA_PREFIX_TEMPLATE_MEDIUM = """Below is an instruction that describes a task. 
 27 | Write a response that appropriately completes the request.
 28 | 
 29 | ### Instruction:
 30 | Please complete the following code:
 31 | {PROMPT}
 32 | 
 33 | ### Response:
 34 | ```python
 35 | """
 36 | 
 37 | ALPACA_PREFIX_TEMPLATE_MD = """Below is an instruction that describes a task.\n Write a response that appropriately completes the request.
 38 | 
 39 | ### Instruction:
 40 | Complete the following Python code: 
 41 | Notes: respond with the entire complete function definition
 42 | do not add any comments, be as concise in your code as possible
 43 | use only built-in libraries, assume no additional imports other than those provided (if any)
 44 | 
 45 | code:
 46 | ```python
 47 | {PROMPT}
 48 | ```
 49 | 
 50 | ### Response:
 51 | ```python
 52 | """
 53 | 
 54 | ######
 55 | 
 56 | QA_TEMPLATE = """Q: Complete the following Python code: 
 57 | Notes: respond with the entire complete function definition
 58 | do not add any comments, be as concise in your code as possible
 59 | use only built-in libraries, assume no additional imports other than those provided (if any)
 60 | 
 61 | code:
 62 | {PROMPT}
 63 | 
 64 | A:
 65 | ```python
 66 | """
 67 | 
 68 | QA_TEMPLATE_MD = """Q: Complete the following Python code: 
 69 | Notes: respond with the entire complete function definition
 70 | do not add any comments, be as concise in your code as possible
 71 | use only built-in libraries, assume no additional imports other than those provided (if any)
 72 | 
 73 | code:
 74 | ```python
 75 | {PROMPT}
 76 | ```
 77 | 
 78 | A:
 79 | ```python
 80 | """
 81 | 
 82 | QA_TEMPLATE_MEDIUM = """Q: Please complete the following code:
 83 | {PROMPT}
 84 | 
 85 | A:
 86 | ```python
 87 | """
 88 | 
 89 | ######
 90 | 
 91 | FALCON_TEMPLATE = """[|Human|] Complete the following Python code: 
 92 | Notes: respond with the entire complete function definition
 93 | do not add any comments, be as concise in your code as possible
 94 | use only built-in libraries, assume no additional imports other than those provided (if any)
 95 | 
 96 | code:
 97 | {PROMPT}
 98 | 
 99 | [|AI|]
100 | ```python
101 | """
102 | 
103 | FALCON_TEMPLATE_MEDIUM = """[|Human|] Please complete the following code:
104 | {PROMPT}
105 | 
106 | [|AI|]
107 | ```python
108 | """
109 | 
110 | FALCON_TEMPLATE_MD = """[|Human|] Complete the following Python code: 
111 | Notes: respond with the entire complete function definition
112 | do not add any comments, be as concise in your code as possible
113 | use only built-in libraries, assume no additional imports other than those provided (if any)
114 | 
115 | code:
116 | ```python
117 | {PROMPT}
118 | ```
119 | 
120 | [|AI|]
121 | ```python
122 | """
123 | 
124 | ######
125 | 
126 | OPENASSISTANT_TEMPLATE = """<|prompter|> Complete the following Python code: 
127 | Notes: respond with the entire complete function definition
128 | do not add any comments, be as concise in your code as possible
129 | use only built-in libraries, assume no additional imports other than those provided (if any)
130 | 
131 | code:
132 | {PROMPT}
133 | 
134 | <|assistant|>:
135 | ```python
136 | """
137 | 
138 | OPENASSISTANT_TEMPLATE_MEDIUM = """<|prompter|> Please complete the following code:
139 | {PROMPT}
140 | 
141 | <|assistant|>:
142 | ```python
143 | """
144 | 
145 | OPENASSISTANT_TEMPLATE_MD = """<|prompter|> Complete the following Python code: 
146 | Notes: respond with the entire complete function definition
147 | do not add any comments, be as concise in your code as possible
148 | use only built-in libraries, assume no additional imports other than those provided (if any)
149 | 
150 | code:
151 | ```python
152 | {PROMPT}
153 | ```
154 | 
155 | <|assistant|>:
156 | ```python
157 | """
158 | 
159 | ######
160 | 
161 | VICUNA_TEMPLATE = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
162 | 
163 | USER: Complete the following Python code: 
164 | Notes: respond with the entire complete function definition
165 | do not add any comments, be as concise in your code as possible
166 | use only built-in libraries, assume no additional imports other than those provided (if any)
167 | 
168 | code:
169 | {PROMPT}
170 | 
171 | ASSISTANT:
172 | ```python
173 | """
174 | 
175 | VICUNA_TEMPLATE_MEDIUM = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
176 | 
177 | USER: Please complete the following code:
178 | {PROMPT}
179 | 
180 | ASSISTANT:
181 | ```python
182 | """
183 | 
184 | VICUNA_TEMPLATE_MD = """A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
185 | 
186 | USER: Complete the following Python code: 
187 | Notes: respond with the entire complete function definition
188 | do not add any comments, be as concise in your code as possible
189 | use only built-in libraries, assume no additional imports other than those provided (if any)
190 | 
191 | code:
192 | ```python
193 | {PROMPT}
194 | ```
195 | 
196 | ASSISTANT:
197 | ```python
198 | """
199 | 
200 | ######
201 | 
202 | ALPACA_INPUT_TEMPLATE = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
203 | 
204 | ### Instruction:
205 | Complete the following Python code: 
206 | Notes: respond with the entire complete function definition
207 | do not add any comments, be as concise in your code as possible
208 | use only built-in libraries, assume no additional imports other than those provided (if any)
209 | 
210 | ### Input:
211 | {PROMPT}
212 | 
213 | ### Response:
214 | ```python
215 | """
216 | 
217 | ALPACA_INPUT_TEMPLATE_MEDIUM = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
218 | 
219 | ### Instruction:
220 | Please complete the following code.
221 | 
222 | ### Input:
223 | {PROMPT}
224 | 
225 | ### Response:
226 | ```python
227 | """
228 | 
229 | ALPACA_INPUT_TEMPLATE_MD = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
230 | 
231 | ### Instruction:
232 | Complete the following Python code: 
233 | Notes: respond with the entire complete function definition
234 | do not add any comments, be as concise in your code as possible
235 | use only built-in libraries, assume no additional imports other than those provided (if any)
236 | 
237 | ### Input:
238 | ```python
239 | {PROMPT}
240 | ```
241 | 
242 | ### Response:
243 | ```python
244 | """
245 | 
246 | ######
247 | 
248 | ALPACA_TEMPLATE = """### Instruction:
249 | Complete the following Python code: 
250 | Notes: respond with the entire complete function definition
251 | do not add any comments, be as concise in your code as possible
252 | use only built-in libraries, assume no additional imports other than those provided (if any)
253 | 
254 | code:
255 | {PROMPT}
256 | 
257 | ### Response:
258 | ```python
259 | """
260 | 
261 | 
262 | ALPACA_TEMPLATE_BAD_NL = """
263 | ### Instruction:
264 | Complete the following Python code: 
265 | Notes: respond with the entire complete function definition
266 | do not add any comments, be as concise in your code as possible
267 | use only built-in libraries, assume no additional imports other than those provided (if any)
268 | 
269 | code:
270 | {PROMPT}
271 | 
272 | ### Response:
273 | ```python
274 | """
275 | 
276 | ALPACA_TEMPLATE_MEDIUM = """### Instruction:
277 | Please complete the following code:
278 | {PROMPT}
279 | 
280 | ### Response:
281 | ```python
282 | """
283 | 
284 | ALPACA_TEMPLATE_MD = """### Instruction:
285 | Complete the following Python code: 
286 | Notes: respond with the entire complete function definition
287 | do not add any comments, be as concise in your code as possible
288 | use only built-in libraries, assume no additional imports other than those provided (if any)
289 | 
290 | code:
291 | ```python
292 | {PROMPT}
293 | ```
294 | 
295 | ### Response:
296 | ```python
297 | """
298 | 
299 | ALPACA_TEMPLATE_MD_MEDIUM = """### Instruction:
300 | Please complete the following code:
301 | ```python
302 | {PROMPT}
303 | ```
304 | 
305 | ### Response:
306 | ```python
307 | """
308 | 
309 | ######


--------------------------------------------------------------------------------