├── .gitignore
├── LICENSE
├── README.md
├── babilong
    ├── __init__.py
    ├── babilong_utils.py
    ├── collect_results.py
    ├── metrics.py
    └── prompts.py
├── babilong_evals
    └── microsoft
    │   └── Phi-3-mini-128k-instruct
    │       ├── qa10_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa10_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa11_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa11_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa12_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa12_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa13_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa13_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa14_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa14_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa15_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa15_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa16_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa16_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa17_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa17_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa18_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa18_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa19_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa19_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa1_0k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa1_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa1_0k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa1_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa1_128k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa1_128k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa1_128k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa1_128k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa1_16k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa1_16k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa1_16k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa1_16k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa1_1k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa1_1k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa1_1k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa1_1k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa1_2k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa1_2k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa1_2k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa1_2k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa1_32k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa1_32k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa1_32k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa1_32k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa1_4k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa1_4k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa1_4k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa1_4k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa1_64k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa1_64k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa1_64k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa1_64k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa1_8k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa1_8k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa1_8k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa1_8k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa20_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa20_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa2_0k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa2_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa2_0k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa2_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa2_128k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa2_128k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa2_128k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa2_128k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa2_16k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa2_16k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa2_16k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa2_16k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa2_1k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa2_1k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa2_1k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa2_1k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa2_2k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa2_2k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa2_2k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa2_2k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa2_32k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa2_32k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa2_32k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa2_32k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa2_4k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa2_4k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa2_4k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa2_4k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa2_64k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa2_64k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa2_64k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa2_64k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa2_8k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa2_8k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa2_8k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa2_8k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa3_0k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa3_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa3_0k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa3_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa3_128k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa3_128k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa3_128k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa3_128k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa3_16k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa3_16k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa3_16k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa3_16k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa3_1k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa3_1k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa3_1k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa3_1k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa3_2k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa3_2k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa3_2k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa3_2k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa3_32k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa3_32k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa3_32k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa3_32k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa3_4k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa3_4k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa3_4k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa3_4k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa3_64k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa3_64k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa3_64k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa3_64k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa3_8k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa3_8k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa3_8k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa3_8k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa4_0k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa4_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa4_0k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa4_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa4_128k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa4_128k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa4_128k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa4_128k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa4_16k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa4_16k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa4_16k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa4_16k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa4_1k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa4_1k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa4_1k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa4_1k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa4_2k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa4_2k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa4_2k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa4_2k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa4_32k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa4_32k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa4_32k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa4_32k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa4_4k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa4_4k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa4_4k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa4_4k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa4_64k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa4_64k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa4_64k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa4_64k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa4_8k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa4_8k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa4_8k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa4_8k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa5_0k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa5_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa5_0k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa5_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa5_128k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa5_128k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa5_128k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa5_128k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa5_16k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa5_16k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa5_16k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa5_16k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa5_1k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa5_1k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa5_1k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa5_1k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa5_2k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa5_2k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa5_2k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa5_2k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa5_32k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa5_32k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa5_32k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa5_32k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa5_4k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa5_4k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa5_4k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa5_4k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa5_64k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa5_64k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa5_64k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa5_64k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa5_8k_instruction_no_examples_no_post_prompt_no.csv
    │       ├── qa5_8k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa5_8k_instruction_yes_examples_yes_post_prompt_yes.csv
    │       ├── qa5_8k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa6_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa6_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa7_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa7_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa8_0k_instruction_no_examples_no_post_prompt_no.json
    │       ├── qa8_0k_instruction_yes_examples_yes_post_prompt_yes.json
    │       ├── qa9_0k_instruction_no_examples_no_post_prompt_no.json
    │       └── qa9_0k_instruction_yes_examples_yes_post_prompt_yes.json
├── data
    ├── README.md
    ├── create_tasks.py
    ├── tasks_1-20_v1-2.zip
    └── test_100_idxs.json
├── images
    ├── babilong_evals_all.png
    └── babilong_scheme.png
├── notebooks
    ├── README.md
    ├── babilong_eval_openai_api_models.ipynb
    ├── babilong_evaluation_example.ipynb
    ├── babilong_usage_example.ipynb
    ├── demo_llm.ipynb
    ├── draw_evals.ipynb
    ├── eval_LLaMA-2-7B-32K.ipynb
    ├── eval_Llama-3.ipynb
    ├── eval_LongAlpaca-13B.ipynb
    ├── eval_Mistral-7B-Instruct-v0.2.ipynb
    ├── eval_Mixtral-8x7B-Instruct-v0.1.ipynb
    ├── eval_Phi-3-medium-128k-instruct.py
    ├── eval_Phi-3-mini-128k-instruct.py
    ├── eval_RAG_Llama3.py
    ├── eval_Yarn-Mistral-7b-128k.ipynb
    ├── eval_Yi-34b.py
    ├── eval_Yi-9b-200k.py
    ├── eval_activation-beacon_models.ipynb
    ├── eval_ai21labs-Jamba-v0.1.py
    ├── eval_c4ai-command-r-v01.ipynb
    ├── eval_gpt2.py
    ├── eval_longchat-7b-v1.5-32k.ipynb
    └── test_RMT.ipynb
├── requirements.txt
└── scripts
    ├── run_GigaChat-20B-A3B-instruct-v1.5-start_vllm.sh
    ├── run_LLaMA-2-7B-32K_no_instruct.sh
    ├── run_Llama-2-7B-32K-instruct.sh
    ├── run_Llama-3-8B-ProLong-512k-Instruct-start_vllm.sh
    ├── run_Llama-3.1-8B-UltraLong-1M-Instruct-start_vllm.sh
    ├── run_LongAlpaca-13B.sh
    ├── run_Meta-Llama-3-8B-Instruct-start_vllm.sh
    ├── run_Meta-Llama-3.1-70B-Instruct_vllm.sh
    ├── run_Meta-Llama-3.1-8B-Instruct_32k.sh
    ├── run_Meta-Llama-3.1-8B-Instruct_64-128k_vllm.sh
    ├── run_Meta-Llama-3.2-1B-Instruct_vllm.sh
    ├── run_Meta-Llama-3.2-3B-Instruct_vllm.sh
    ├── run_Meta-Llama-4-Scout-17B-16E-Instruct-start_vllm.sh
    ├── run_Meta-Llama-4-Scout-17B-16E-Instruct-start_vllm_64k-128k.sh
    ├── run_Mistral-7B-Instruct-v0.2_with_instruct.sh
    ├── run_Mixtral-8x22B-Instruct-v0.1_with_instruct.sh
    ├── run_Mixtral-8x7B-Instruct-v0.1.sh
    ├── run_Phi-3.5-MoE-instruct.sh
    ├── run_Phi-3.5-mini-instruct.sh
    ├── run_Phi-4-mini-instruct_start_vllm.sh
    ├── run_Qwen2-72B-Instruct.sh
    ├── run_Qwen2-7B-Instruct.sh
    ├── run_Qwen2.5-72B-Instruct.sh
    ├── run_Qwen2.5-7B-Instruct.sh
    ├── run_YandexGPT-5-Lite-8B-instruct-start_vllm.sh
    ├── run_Yarn-Mistral-7b-128k_llamacpp_no_instruct.sh
    ├── run_Yarn-Mistral-7b-128k_llamacpp_with_instruct.sh
    ├── run_Yarn-Mistral-7b-128k_with_instruct.sh
    ├── run_activation-beacon-llama2-7b-chat_no_instruct.sh
    ├── run_activation-beacon-llama2-7b-chat_with_instruct.sh
    ├── run_activation-beacon-mistral-7b_no_instruct.sh
    ├── run_activation-beacon-mistral-7b_with_instruct.sh
    ├── run_c4ai-command-r-v01_llamacpp_with_instruct.sh
    ├── run_chatglm3-6b-128k.sh
    ├── run_gemma-2-9b-it.sh
    ├── run_gemma-3-12b-it-start_vllm.sh
    ├── run_gemma-3-27b-it-start_vllm.sh
    ├── run_gemma-3-4b-it-start_vllm.sh
    ├── run_glm-4-9b-chat-1m.sh
    ├── run_longchat-7b-v1.5-32k_no_instruct.sh
    ├── run_longchat-7b-v1.5-32k_with_instruct.sh
    ├── run_model_on_babilong.py
    ├── run_recurrentgemma-9b-it.sh
    └── run_xlstm-7b.sh


/babilong/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/booydar/babilong/f09a184b43316a751d5059e13de7c557b6daca86/babilong/__init__.py


--------------------------------------------------------------------------------
/babilong/metrics.py:
--------------------------------------------------------------------------------
 1 | TASK_LABELS = {'qa1': ['bathroom', 'bedroom', 'garden', 'hallway', 'kitchen', 'office'],
 2 |                'qa2': ['bathroom', 'bedroom', 'garden', 'hallway', 'kitchen', 'office'],
 3 |                'qa3': ['bathroom', 'bedroom', 'garden', 'hallway', 'kitchen', 'office'],
 4 |                'qa4': ['bathroom', 'bedroom', 'garden', 'hallway', 'kitchen', 'office'],
 5 |                'qa5': ['Bill', 'Fred', 'Jeff', 'Mary', 'apple', 'football', 'milk'],
 6 |                'qa6': ['no', 'yes'],
 7 |                'qa7': ['none', 'one', 'three', 'two'],
 8 |                'qa8': ['apple', 'football', 'milk', 'nothing'],
 9 |                'qa9': ['no', 'yes'],
10 |                'qa10': ['maybe', 'no', 'yes'],
11 |                'qa11': ['bathroom', 'bedroom', 'garden', 'hallway', 'kitchen', 'office'],
12 |                'qa12': ['bathroom', 'bedroom', 'garden', 'hallway', 'kitchen', 'office'],
13 |                'qa13': ['bathroom', 'bedroom', 'garden', 'hallway', 'kitchen', 'office'],
14 |                'qa14': ['bedroom', 'cinema', 'kitchen', 'office', 'park', 'school'],
15 |                'qa15': ['cat', 'mouse', 'sheep', 'wolf'],
16 |                'qa16': ['gray', 'green', 'white', 'yellow'],
17 |                'qa17': ['no', 'yes'],
18 |                'qa18': ['no', 'yes'],
19 |                'qa19': ['e,e', 'e,n', 'e,s', 'n,e', 'n,n', 'n,w', 's,e', 's,s', 's,w', 'w,n', 'w,s', 'w,w'],
20 |                'qa20': ['bedroom', 'bored', 'garden', 'hungry', 'kitchen', 'thirsty', 'tired']
21 |                }
22 | 
23 | 
24 | def preprocess_output(output):
25 |     output = output.lower()
26 |     # take only the first sentence from output
27 |     output = output.split('.')[0]
28 |     # filter responses when model tries to generate examples
29 |     output = output.split('<context>')[0]
30 |     output = output.split('<example>')[0]
31 |     output = output.split('Question')[0]
32 |     return output
33 | 
34 | 
35 | def compare_answers(target, output, question, task_labels):
36 |     output = preprocess_output(output)
37 |     target = target.lower()
38 |     task_labels = {label.lower() for label in task_labels}
39 | 
40 |     # extract labels that were mentioned in the question
41 |     labels_in_question = {label for label in task_labels if label in question.lower()}
42 |     # extract labels that were mentioned in the model output
43 |     labels_in_output = {label for label in task_labels if label in output}
44 |     # filter labels in the output to exclude mentioned in the question
45 |     # mentions in questions are never targets
46 |     labels_in_output = labels_in_output - labels_in_question
47 | 
48 |     # check if the target is the only prediction
49 |     if ',' in target and len(target) > 3:
50 |         # if target contains multiple subtargets in qa8
51 |         subtargets = target.split(',')
52 |         num_subtargets = len(subtargets)
53 |         if all([t in labels_in_output for t in subtargets]) and len(labels_in_output) == num_subtargets:
54 |             return True
55 |     else:
56 |         if target in labels_in_output and len(labels_in_output) == 1:
57 |             return True
58 | 
59 |     return False
60 | 


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa10_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa10_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about people and their locations hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location the person was in to answer the question.",
 4 |         "examples": "<example>\nBill is in the kitchen. Julie is either in the school or the cinema. Is Bill in the bedroom?\nAnswer: no\n</example>\n<example>\nFred is in the bedroom. Mary is either in the school or the cinema. Is Mary in the school?\nAnswer: maybe\n</example>\n<example>\nFred is either in the kitchen or the park. Bill moved to the cinema. Is Bill in the cinema?\nAnswer: yes\n</example>\n<context>\n",
 5 |         "post_prompt": "Your answer should contain only one word - $yes$ or $no$ or $maybe$. Do not write anything else. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa11_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa11_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about people and their locations hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location the person was in to answer the question.",
 4 |         "examples": "<example>\nDaniel journeyed to the hallway. After that he journeyed to the garden. Where is Daniel?\nAnswer: garden\n</example>\n<example>\nMary moved to the office. Afterwards she journeyed to the kitchen. Daniel went to the hallway. Then he journeyed to the garden. Where is Mary?\nAnswer: kitchen\n</example>\n<example>\nSandra moved to the kitchen. After that she went back to the hallway. Sandra moved to the bedroom. Then she went to the hallway. Mary moved to the bedroom. Afterwards she travelled to the bathroom. Where is Sandra\n?Answer: hallway\n</example>\n<context>\n",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa12_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa12_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about people and their locations hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location the person was in to answer the question.",
 4 |         "examples": "<example>\nMary and Daniel travelled to the bathroom. John and Daniel travelled to the office. Where is Daniel?\nAnswer: office\n</example>\n<example>\nSandra and Mary went back to the office. Daniel and Sandra went to the bedroom. Sandra and Mary travelled to the hallway. John and Mary went to the kitchen. Where is Mary?\nAnswer: kitchen\n</example>\n<example>\nDaniel and Sandra went back to the hallway. Daniel and John moved to the office. Daniel and John moved to the garden. Daniel and Mary went back to the bathroom. Daniel and John went back to the kitchen. Daniel and Sandra went to the bathroom. Where is John\n?Answer: kitchen\n</example>\n<context>\n",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa13_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa13_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about people and their locations hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location the person was in to answer the question.",
 4 |         "examples": "<example>\nMary and Daniel travelled to the bathroom. Then they journeyed to the hallway. Where is Daniel?\nAnswer: hallway\n</example>\n<example>\nDaniel and Sandra travelled to the kitchen. After that they journeyed to the hallway. Mary and Daniel travelled to the bedroom. After that they travelled to the hallway. Where is Sandra?\nAnswer: hallway\n</example>\n<example>\nJohn and Mary moved to the bathroom. Then they travelled to the office. John and Mary went to the kitchen. Afterwards they went to the bedroom. John and Sandra moved to the bathroom. Following that they went back to the kitchen. Where is Mary\n?Answer: bedroom\n</example>\n<context>\n",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa14_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa14_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about people and their locations hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location the person was in to answer the question.",
 4 |         "examples": "<example>\nBill went back to the cinema yesterday. Julie went to the school this morning. Fred went to the park yesterday. Yesterday Julie went to the office. Where was Julie before the school?\nAnswer: office\n</example>\n<example>\nThis morning Fred went to the kitchen. Fred journeyed to the bedroom yesterday. Mary travelled to the bedroom this morning. Yesterday Mary went to the cinema. Where was Mary before the bedroom?\nAnswer: cinema\n</example>\n<example>\nYesterday Julie went back to the park. Julie went to the bedroom this morning. Bill journeyed to the cinema yesterday. This morning Bill went back to the park. This evening Julie went to the school. This afternoon Julie went back to the park. Where was Julie before the bedroom?\nAnswer: park\n</example>\n<context>\n",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa15_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa15_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about animals, their names and relations. The facts and a question are hidden in some random text. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nMice are afraid of wolves. Gertrude is a mouse. Cats are afraid of sheep. Winona is a mouse. Sheep are afraid of wolves. Emily is a mouse. Jessica is a wolf. What is gertrude afraid of?\nAnswer: wolf\n</example>\n<example>\nMice are afraid of wolves. Gertrude is a mouse. Cats are afraid of sheep. Winona is a mouse. Sheep are afraid of wolves. Emily is a mouse. Jessica is a wolf. What is jessica afraid of?\nAnswer: cat\n</example>\n<example>\nMice are afraid of cats. Wolves are afraid of sheep. Emily is a wolf. Cats are afraid of sheep. Gertrude is a wolf. Sheep are afraid of cats. Winona is a wolf. What is emily afraid of?\nAnswer: sheep\n</example>\n<context>\n",
 5 |         "post_prompt": "Your answer should contain only one word - an animal species. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa16_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa16_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about animals, their names and colors. The facts and a question are hidden in some random text. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nLily is a frog. Bernhard is a frog. Bernhard is green. Brian is a lion. Brian is white. Julius is a swan. Julius is green. Lily is green. Greg is a swan. What color is Greg?\nAnswer: green\n</example>\n<example>\nJulius is a lion. Lily is a rhino. Bernhard is a swan. Lily is white. Bernhard is green. Greg is a rhino. Greg is gray. Julius is white. Brian is a lion. What color is Brian?\nAnswer: white\n</example>\n<example>\nBrian is a rhino. Julius is a lion. Bernhard is a lion. Greg is a swan. Brian is gray. Greg is white. Lily is a rhino. Bernhard is yellow. Lily is gray. What color is Julius?\nAnswer: yellow\n</example>\n<context>\n",
 5 |         "post_prompt": "Your answer should contain only one word - a color. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa17_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa17_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different figures, their location and colors, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe triangle is above the pink rectangle. The blue square is to the left of the triangle. Is the pink rectangle to the right of the blue square?\nAnswer: yes\n</example>\n<example>\nThe red sphere is to the left of the yellow square. The red sphere is below the pink rectangle. Is the pink rectangle to the left of the yellow square?\nAnswer: yes\n</example><example>\nThe red sphere is above the pink rectangle. The red sphere is to the right of the red square. Is the pink rectangle above the red square?\nAnswer: no\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - $yes$ or $no$. Do not write anything else. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa18_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa18_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different objects and their sizes, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe box of chocolates fits inside the chest. The box is bigger than the chest. The box is bigger than the suitcase. The suitcase fits inside the box. The container is bigger than the box of chocolates. Does the box fit in the box of chocolates?\nAnswer: no\n</example>\n<example>\nThe suitcase is bigger than the container. The container fits inside the box. The chest is bigger than the chocolate.The suitcase fits inside the box. The chest fits inside the box. Does the chocolate fit in the box?\nAnswer: yes\n</example><example>\nThe chocolate fits inside the box of chocolates. The suitcase fits inside the box. The chocolate fits inside the box. The box is bigger than the box of chocolates. The suitcase is bigger than the box of chocolates. Is the chocolate bigger than the box?\nAnswer: no\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - $yes$ or $no$. Do not write anything else. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa19_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa19_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different places and their locations, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe office is east of the hallway. The kitchen is north of the office. The garden is west of the bedroom. The office is west of the garden. The bathroom is north of the garden. How do you go from the kitchen to the garden?\nAnswer: s,e\n</example>\n<example>\nThe bedroom is west of the hallway. The office is east of the garden. The garden is north of the kitchen. The kitchen is north of the bathroom. The hallway is west of the garden. How do you go from the kitchen to the hallway?\nAnswer: n,w\n</example>\n<example>\nThe bedroom is south of the hallway. The bathroom is east of the office. The kitchen is west of the garden. The garden is south of the office. The office is south of the bedroom. How do you go from the garden to the bedroom?\nAnswer: n,n\n</example>\n",
 5 |         "post_prompt": "Your answer should contain only two letters, separated by a comma - ordinal directions. You can choose the letters from $n$, $s$, $e$ and $w$. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about positions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location to answer the question.",
 4 |         "examples": "<example>\nCharlie went to the hallway. Judith come back to the kitchen. Charlie travelled to balcony. Where is Charlie?\nAnswer: The most recent location of Charlie is balcony.\n</example>\n\n<example>\nAlan moved to the garage. Charlie went to the beach. Alan went to the shop. Rouse travelled to balcony. Where is Alan?\nAnswer: The most recent location of Alan is shop.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The most recent location of \u2019person\u2019 is \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_128k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_128k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about positions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location to answer the question.",
 4 |         "examples": "<example>\nCharlie went to the hallway. Judith come back to the kitchen. Charlie travelled to balcony. Where is Charlie?\nAnswer: The most recent location of Charlie is balcony.\n</example>\n\n<example>\nAlan moved to the garage. Charlie went to the beach. Alan went to the shop. Rouse travelled to balcony. Where is Alan?\nAnswer: The most recent location of Alan is shop.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The most recent location of \u2019person\u2019 is \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_16k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_16k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about positions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location to answer the question.",
 4 |         "examples": "<example>\nCharlie went to the hallway. Judith come back to the kitchen. Charlie travelled to balcony. Where is Charlie?\nAnswer: The most recent location of Charlie is balcony.\n</example>\n\n<example>\nAlan moved to the garage. Charlie went to the beach. Alan went to the shop. Rouse travelled to balcony. Where is Alan?\nAnswer: The most recent location of Alan is shop.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The most recent location of \u2019person\u2019 is \u2019location\u2019. Do not write anything else after that. ",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_1k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_1k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about positions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location to answer the question.",
 4 |         "examples": "<example>\nCharlie went to the hallway. Judith come back to the kitchen. Charlie travelled to balcony. Where is Charlie?\nAnswer: The most recent location of Charlie is balcony.\n</example>\n\n<example>\nAlan moved to the garage. Charlie went to the beach. Alan went to the shop. Rouse travelled to balcony. Where is Alan?\nAnswer: The most recent location of Alan is shop.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The most recent location of \u2019person\u2019 is \u2019location\u2019. Do not write anything else after that. ",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_2k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_2k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about positions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location to answer the question.",
 4 |         "examples": "<example>\nCharlie went to the hallway. Judith come back to the kitchen. Charlie travelled to balcony. Where is Charlie?\nAnswer: The most recent location of Charlie is balcony.\n</example>\n\n<example>\nAlan moved to the garage. Charlie went to the beach. Alan went to the shop. Rouse travelled to balcony. Where is Alan?\nAnswer: The most recent location of Alan is shop.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The most recent location of \u2019person\u2019 is \u2019location\u2019. Do not write anything else after that. ",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_32k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_32k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about positions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location to answer the question.",
 4 |         "examples": "<example>\nCharlie went to the hallway. Judith come back to the kitchen. Charlie travelled to balcony. Where is Charlie?\nAnswer: The most recent location of Charlie is balcony.\n</example>\n\n<example>\nAlan moved to the garage. Charlie went to the beach. Alan went to the shop. Rouse travelled to balcony. Where is Alan?\nAnswer: The most recent location of Alan is shop.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The most recent location of \u2019person\u2019 is \u2019location\u2019. Do not write anything else after that. ",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_4k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_4k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about positions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location to answer the question.",
 4 |         "examples": "<example>\nCharlie went to the hallway. Judith come back to the kitchen. Charlie travelled to balcony. Where is Charlie?\nAnswer: The most recent location of Charlie is balcony.\n</example>\n\n<example>\nAlan moved to the garage. Charlie went to the beach. Alan went to the shop. Rouse travelled to balcony. Where is Alan?\nAnswer: The most recent location of Alan is shop.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The most recent location of \u2019person\u2019 is \u2019location\u2019. Do not write anything else after that. ",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_64k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_64k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about positions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location to answer the question.",
 4 |         "examples": "<example>\nCharlie went to the hallway. Judith come back to the kitchen. Charlie travelled to balcony. Where is Charlie?\nAnswer: The most recent location of Charlie is balcony.\n</example>\n\n<example>\nAlan moved to the garage. Charlie went to the beach. Alan went to the shop. Rouse travelled to balcony. Where is Alan?\nAnswer: The most recent location of Alan is shop.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The most recent location of \u2019person\u2019 is \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_8k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa1_8k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about positions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location to answer the question.",
 4 |         "examples": "<example>\nCharlie went to the hallway. Judith come back to the kitchen. Charlie travelled to balcony. Where is Charlie?\nAnswer: The most recent location of Charlie is balcony.\n</example>\n\n<example>\nAlan moved to the garage. Charlie went to the beach. Alan went to the shop. Rouse travelled to balcony. Where is Alan?\nAnswer: The most recent location of Alan is shop.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The most recent location of \u2019person\u2019 is \u2019location\u2019. Do not write anything else after that. ",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa20_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa20_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about people, their locations and condition hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location the person was in to answer the question.",
 4 |         "examples": "<example>\nSumit is tired. Where will sumit go?\nAnswer: bedroom\n</example>\n<example>\nYann is hungry. Yann journeyed to the kitchen. Why did yann go to the kitchen?\nAnswer: hungry\n</example>\n<example>\nAntoine is thirsty. Yann is tired. Yann went back to the bedroom. Yann picked up the pajamas there.Jason is thirsty. Antoine went back to the kitchen. Why did antoine go to the kitchen?\nAnswer: thirsty\n</example>\n<context>\n",
 5 |         "post_prompt": "Your answer should contain only one word - a person condition or a place. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question.You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nCharlie went to the kitchen. Charlie got a bottle. Charlie moved to the balcony. Where is the bottle?\nAnswer: The bottle is in the balcony.\n</example>\n<example>\nAlan moved to the garage. Alan got a screw driver. Alan moved to the kitchen. Where is the screw driver?\nAnswer: The screw driver is in the kitchen.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The \u2019item\u2019 is in \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_128k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_128k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question.You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nCharlie went to the kitchen. Charlie got a bottle. Charlie moved to the balcony. Where is the bottle?\nAnswer: The bottle is in the balcony.\n</example>\n<example>\nAlan moved to the garage. Alan got a screw driver. Alan moved to the kitchen. Where is the screw driver?\nAnswer: The screw driver is in the kitchen.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The \u2019item\u2019 is in \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_16k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_16k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question.You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nCharlie went to the kitchen. Charlie got a bottle. Charlie moved to the balcony. Where is the bottle?\nAnswer: The bottle is in the balcony.\n</example>\n<example>\nAlan moved to the garage. Alan got a screw driver. Alan moved to the kitchen. Where is the screw driver?\nAnswer: The screw driver is in the kitchen.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The \u2019item\u2019 is in \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_1k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_1k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question.You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nCharlie went to the kitchen. Charlie got a bottle. Charlie moved to the balcony. Where is the bottle?\nAnswer: The bottle is in the balcony.\n</example>\n<example>\nAlan moved to the garage. Alan got a screw driver. Alan moved to the kitchen. Where is the screw driver?\nAnswer: The screw driver is in the kitchen.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The \u2019item\u2019 is in \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_2k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_2k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question.You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nCharlie went to the kitchen. Charlie got a bottle. Charlie moved to the balcony. Where is the bottle?\nAnswer: The bottle is in the balcony.\n</example>\n<example>\nAlan moved to the garage. Alan got a screw driver. Alan moved to the kitchen. Where is the screw driver?\nAnswer: The screw driver is in the kitchen.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The \u2019item\u2019 is in \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_32k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_32k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question.You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nCharlie went to the kitchen. Charlie got a bottle. Charlie moved to the balcony. Where is the bottle?\nAnswer: The bottle is in the balcony.\n</example>\n<example>\nAlan moved to the garage. Alan got a screw driver. Alan moved to the kitchen. Where is the screw driver?\nAnswer: The screw driver is in the kitchen.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The \u2019item\u2019 is in \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_4k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_4k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question.You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nCharlie went to the kitchen. Charlie got a bottle. Charlie moved to the balcony. Where is the bottle?\nAnswer: The bottle is in the balcony.\n</example>\n<example>\nAlan moved to the garage. Alan got a screw driver. Alan moved to the kitchen. Where is the screw driver?\nAnswer: The screw driver is in the kitchen.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The \u2019item\u2019 is in \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_64k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_64k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question.You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nCharlie went to the kitchen. Charlie got a bottle. Charlie moved to the balcony. Where is the bottle?\nAnswer: The bottle is in the balcony.\n</example>\n<example>\nAlan moved to the garage. Alan got a screw driver. Alan moved to the kitchen. Where is the screw driver?\nAnswer: The screw driver is in the kitchen.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The \u2019item\u2019 is in \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_8k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa2_8k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question.You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nCharlie went to the kitchen. Charlie got a bottle. Charlie moved to the balcony. Where is the bottle?\nAnswer: The bottle is in the balcony.\n</example>\n<example>\nAlan moved to the garage. Alan got a screw driver. Alan moved to the kitchen. Where is the screw driver?\nAnswer: The screw driver is in the kitchen.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: The \u2019item\u2019 is in \u2019location\u2019. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nJohn journeyed to the bedroom. Mary grabbed the apple. Mary went back to the bathroom. Daniel journeyed to the bedroom. Daniel moved to the garden. Mary travelled to the kitchen. Where was the apple before the kitchen?\nAnswer: Before the kitchen the apple was in the bathroom.\n</example>\n<example>\nJohn went back to the bedroom. John went back to the garden. John went back to the kitchen. Sandra took the football. Sandra travelled to the garden. Sandra journeyed to the bedroom. Where was the football before the bedroom?\nAnswer: Before the bedroom the football was in the garden.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: Before the $location_1$ the $item$ was in the $location_2$. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_128k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_128k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nJohn journeyed to the bedroom.Mary grabbed the apple. Mary went back to the bathroom. Daniel journeyed to the bedroom. Daniel moved to the garden. Mary travelled to the kitchen. Where was the apple before the kitchen?\nAnswer: Before the kitchen the apple was in the bathroom.\n</example>\n<example>\nJohn went back to the bedroom. John went back to the garden. John went back to the kitchen. Sandra took the football. Sandra travelled to the garden. Sandra journeyed to the bedroom. Where was the football before the bedroom?\nAnswer: Before the bedroom the football was in the garden.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: Before the $location_1$ the $item$ was in the $location_2$. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_16k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_16k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location",
 4 |         "examples": "<example>\nJohn journeyed to the bedroom.Mary grabbed the apple. Mary went back to the bathroom. Daniel journeyed to the bedroom. Daniel moved to the garden. Mary travelled to the kitchen. Where was the apple before the kitchen?\nAnswer: Before the kitchen the apple was in the bathroom.\n</example>\n<example>\nJohn went back to the bedroom. John went back to the garden. John went back to the kitchen. Sandra took the football. Sandra travelled to the garden. Sandra journeyed to the bedroom. Where was the football before the bedroom?\nAnswer: Before the kitchen the football was in the garden.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: Before the $location_1& the $item$ was in the $location_2$. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_1k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_1k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location",
 4 |         "examples": "<example>\nJohn journeyed to the bedroom.Mary grabbed the apple. Mary went back to the bathroom. Daniel journeyed to the bedroom. Daniel moved to the garden. Mary travelled to the kitchen. Where was the apple before the kitchen?\nAnswer: Before the kitchen the apple was in the bathroom.\n</example>\n<example>\nJohn went back to the bedroom. John went back to the garden. John went back to the kitchen. Sandra took the football. Sandra travelled to the garden. Sandra journeyed to the bedroom. Where was the football before the bedroom?\nAnswer: Before the kitchen the football was in the garden.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: Before the $location_1& the $item$ was in the $location_2$. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_2k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_2k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location",
 4 |         "examples": "<example>\nJohn journeyed to the bedroom.Mary grabbed the apple. Mary went back to the bathroom. Daniel journeyed to the bedroom. Daniel moved to the garden. Mary travelled to the kitchen. Where was the apple before the kitchen?\nAnswer: Before the kitchen the apple was in the bathroom.\n</example>\n<example>\nJohn went back to the bedroom. John went back to the garden. John went back to the kitchen. Sandra took the football. Sandra travelled to the garden. Sandra journeyed to the bedroom. Where was the football before the bedroom?\nAnswer: Before the kitchen the football was in the garden.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: Before the $location_1& the $item$ was in the $location_2$. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_32k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_32k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location",
 4 |         "examples": "<example>\nJohn journeyed to the bedroom.Mary grabbed the apple. Mary went back to the bathroom. Daniel journeyed to the bedroom. Daniel moved to the garden. Mary travelled to the kitchen. Where was the apple before the kitchen?\nAnswer: Before the kitchen the apple was in the bathroom.\n</example>\n<example>\nJohn went back to the bedroom. John went back to the garden. John went back to the kitchen. Sandra took the football. Sandra travelled to the garden. Sandra journeyed to the bedroom. Where was the football before the bedroom?\nAnswer: Before the kitchen the football was in the garden.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: Before the $location_1& the $item$ was in the $location_2$. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_4k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_4k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location",
 4 |         "examples": "<example>\nJohn journeyed to the bedroom.Mary grabbed the apple. Mary went back to the bathroom. Daniel journeyed to the bedroom. Daniel moved to the garden. Mary travelled to the kitchen. Where was the apple before the kitchen?\nAnswer: Before the kitchen the apple was in the bathroom.\n</example>\n<example>\nJohn went back to the bedroom. John went back to the garden. John went back to the kitchen. Sandra took the football. Sandra travelled to the garden. Sandra journeyed to the bedroom. Where was the football before the bedroom?\nAnswer: Before the kitchen the football was in the garden.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: Before the $location_1& the $item$ was in the $location_2$. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_64k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_64k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location.",
 4 |         "examples": "<example>\nJohn journeyed to the bedroom.Mary grabbed the apple. Mary went back to the bathroom. Daniel journeyed to the bedroom. Daniel moved to the garden. Mary travelled to the kitchen. Where was the apple before the kitchen?\nAnswer: Before the kitchen the apple was in the bathroom.\n</example>\n<example>\nJohn went back to the bedroom. John went back to the garden. John went back to the kitchen. Sandra took the football. Sandra travelled to the garden. Sandra journeyed to the bedroom. Where was the football before the bedroom?\nAnswer: Before the bedroom the football was in the garden.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: Before the $location_1$ the $item$ was in the $location_2$. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_8k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa3_8k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I give you context with the facts about locations and actions of different persons hidden in some random text and a question. You need to answer the question based only on the information from the facts.\nIf a person got an item in the first location and travelled to the second location the item is also in the second location. If a person dropped an item in the first location and moved to the second location the item remains in the first location",
 4 |         "examples": "<example>\nJohn journeyed to the bedroom.Mary grabbed the apple. Mary went back to the bathroom. Daniel journeyed to the bedroom. Daniel moved to the garden. Mary travelled to the kitchen. Where was the apple before the kitchen?\nAnswer: Before the kitchen the apple was in the bathroom.\n</example>\n<example>\nJohn went back to the bedroom. John went back to the garden. John went back to the kitchen. Sandra took the football. Sandra travelled to the garden. Sandra journeyed to the bedroom. Where was the football before the bedroom?\nAnswer: Before the kitchen the football was in the garden.\n</example>",
 5 |         "post_prompt": "Always return your answer in the following format: Before the $location_1& the $item$ was in the $location_2$. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different people, their location and actions, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe hallway is south of the kitchen. The bedroom is north of the kitchen. What is the kitchen south of?\nAnswer: bedroom\n</example>\n<example>\nThe garden is west of the bedroom. The bedroom is west of the kitchen. What is west of the bedroom?\nAnswer: garden\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_128k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_128k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different people, their location and actions, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe hallway is south of the kitchen. The bedroom is north of the kitchen. What is the kitchen south of?\nAnswer: bedroom\n</example>\n<example>\nThe garden is west of the bedroom. The bedroom is west of the kitchen. What is west of the bedroom?\nAnswer: garden\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_16k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_16k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different people, their location and actions, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe hallway is south of the kitchen. The bedroom is north of the kitchen. What is the kitchen south of?\nAnswer: bedroom\n</example>\n<example>\nThe garden is west of the bedroom. The bedroom is west of the kitchen. What is west of the bedroom?\nAnswer: garden\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_1k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_1k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different people, their location and actions, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe hallway is south of the kitchen. The bedroom is north of the kitchen. What is the kitchen south of?\nAnswer: bedroom\n</example>\n<example>\nThe garden is west of the bedroom. The bedroom is west of the kitchen. What is west of the bedroom?\nAnswer: garden\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_2k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_2k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different people, their location and actions, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe hallway is south of the kitchen. The bedroom is north of the kitchen. What is the kitchen south of?\nAnswer: bedroom\n</example>\n<example>\nThe garden is west of the bedroom. The bedroom is west of the kitchen. What is west of the bedroom?\nAnswer: garden\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_32k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_32k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different people, their location and actions, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe hallway is south of the kitchen. The bedroom is north of the kitchen. What is the kitchen south of?\nAnswer: bedroom\n</example>\n<example>\nThe garden is west of the bedroom. The bedroom is west of the kitchen. What is west of the bedroom?\nAnswer: garden\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_4k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_4k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different people, their location and actions, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe hallway is south of the kitchen. The bedroom is north of the kitchen. What is the kitchen south of?\nAnswer: bedroom\n</example>\n<example>\nThe garden is west of the bedroom. The bedroom is west of the kitchen. What is west of the bedroom?\nAnswer: garden\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_64k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_64k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different people, their location and actions, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe hallway is south of the kitchen. The bedroom is north of the kitchen. What is the kitchen south of?\nAnswer: bedroom\n</example>\n<example>\nThe garden is west of the bedroom. The bedroom is west of the kitchen. What is west of the bedroom?\nAnswer: garden\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_8k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa4_8k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about different people, their location and actions, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nThe hallway is south of the kitchen. The bedroom is north of the kitchen. What is the kitchen south of?\nAnswer: bedroom\n</example>\n<example>\nThe garden is west of the bedroom. The bedroom is west of the kitchen. What is west of the bedroom?\nAnswer: garden\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word - location. Do not write anything else after that.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about locations and their relations hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nMary picked up the apple there. Mary gave the apple to Fred. Mary moved to the bedroom. Bill took the milk there. Who did Mary give the apple to?\nAnswer: Fred\n</example>\n<example>\nJeff took the football there. Jeff passed the football to Fred. Jeff got the milk there. Bill travelled to the bedroom. Who gave the football?\nAnswer: Jeff\n</example>\n<example>\nFred picked up the apple there. Fred handed the apple to Bill. Bill journeyed to the bedroom. Jeff went back to the garden. What did Fred give to Bill?\nAnswer: apple\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_128k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_128k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about locations and their relations hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nMary picked up the apple there. Mary gave the apple to Fred. Mary moved to the bedroom. Bill took the milk there. Who did Mary give the apple to?\nAnswer: Fred\n</example>\n<example>\nJeff took the football there. Jeff passed the football to Fred. Jeff got the milk there. Bill travelled to the bedroom. Who gave the football?\nAnswer: Jeff\n</example>\n<example>\nFred picked up the apple there. Fred handed the apple to Bill. Bill journeyed to the bedroom. Jeff went back to the garden. What did Fred give to Bill?\nAnswer: apple\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_16k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_16k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about locations and their relations hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nMary picked up the apple there. Mary gave the apple to Fred. Mary moved to the bedroom. Bill took the milk there. Who did Mary give the apple to?\nAnswer: Fred\n</example>\n<example>\nJeff took the football there. Jeff passed the football to Fred. Jeff got the milk there. Bill travelled to the bedroom. Who gave the football?\nAnswer: Jeff\n</example>\n<example>\nFred picked up the apple there. Fred handed the apple to Bill. Bill journeyed to the bedroom. Jeff went back to the garden. What did Fred give to Bill?\nAnswer: apple\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_1k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_1k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about locations and their relations hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nMary picked up the apple there. Mary gave the apple to Fred. Mary moved to the bedroom. Bill took the milk there. Who did Mary give the apple to?\nAnswer: Fred\n</example>\n<example>\nJeff took the football there. Jeff passed the football to Fred. Jeff got the milk there. Bill travelled to the bedroom. Who gave the football?\nAnswer: Jeff\n</example>\n<example>\nFred picked up the apple there. Fred handed the apple to Bill. Bill journeyed to the bedroom. Jeff went back to the garden. What did Fred give to Bill?\nAnswer: apple\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_2k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_2k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about locations and their relations hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nMary picked up the apple there. Mary gave the apple to Fred. Mary moved to the bedroom. Bill took the milk there. Who did Mary give the apple to?\nAnswer: Fred\n</example>\n<example>\nJeff took the football there. Jeff passed the football to Fred. Jeff got the milk there. Bill travelled to the bedroom. Who gave the football?\nAnswer: Jeff\n</example>\n<example>\nFred picked up the apple there. Fred handed the apple to Bill. Bill journeyed to the bedroom. Jeff went back to the garden. What did Fred give to Bill?\nAnswer: apple\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_32k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_32k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about locations and their relations hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nMary picked up the apple there. Mary gave the apple to Fred. Mary moved to the bedroom. Bill took the milk there. Who did Mary give the apple to?\nAnswer: Fred\n</example>\n<example>\nJeff took the football there. Jeff passed the football to Fred. Jeff got the milk there. Bill travelled to the bedroom. Who gave the football?\nAnswer: Jeff\n</example>\n<example>\nFred picked up the apple there. Fred handed the apple to Bill. Bill journeyed to the bedroom. Jeff went back to the garden. What did Fred give to Bill?\nAnswer: apple\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_4k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_4k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about locations and their relations hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nMary picked up the apple there. Mary gave the apple to Fred. Mary moved to the bedroom. Bill took the milk there. Who did Mary give the apple to?\nAnswer: Fred\n</example>\n<example>\nJeff took the football there. Jeff passed the football to Fred. Jeff got the milk there. Bill travelled to the bedroom. Who gave the football?\nAnswer: Jeff\n</example>\n<example>\nFred picked up the apple there. Fred handed the apple to Bill. Bill journeyed to the bedroom. Jeff went back to the garden. What did Fred give to Bill?\nAnswer: apple\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_64k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_64k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about locations and their relations hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nMary picked up the apple there. Mary gave the apple to Fred. Mary moved to the bedroom. Bill took the milk there. Who did Mary give the apple to?\nAnswer: Fred\n</example>\n<example>\nJeff took the football there. Jeff passed the football to Fred. Jeff got the milk there. Bill travelled to the bedroom. Who gave the football?\nAnswer: Jeff\n</example>\n<example>\nFred picked up the apple there. Fred handed the apple to Bill. Bill journeyed to the bedroom. Jeff went back to the garden. What did Fred give to Bill?\nAnswer: apple\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_8k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa5_8k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about locations and their relations hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nMary picked up the apple there. Mary gave the apple to Fred. Mary moved to the bedroom. Bill took the milk there. Who did Mary give the apple to?\nAnswer: Fred\n</example>\n<example>\nJeff took the football there. Jeff passed the football to Fred. Jeff got the milk there. Bill travelled to the bedroom. Who gave the football?\nAnswer: Jeff\n</example>\n<example>\nFred picked up the apple there. Fred handed the apple to Bill. Bill journeyed to the bedroom. Jeff went back to the garden. What did Fred give to Bill?\nAnswer: apple\n</example>",
 5 |         "post_prompt": "Your answer should contain only one word. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa6_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa6_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about people and their locations hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location the person was in to answer the question.",
 4 |         "examples": "<example>\nJohn travelled to the hallway. John travelled to the garden. Is John in the garden?\nAnswer: yes\n</example>\n<example>\nMary went to the office. Daniel journeyed to the hallway. Mary went to the bedroom. Sandra went to the garden. Is Mary in the office?\nAnswer: no\n</example>\n",
 5 |         "post_prompt": "Your answer should contain only one word - $yes$ or $no$. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa7_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa7_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about people and objects they carry, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nDaniel went to the bedroom. Daniel got the apple there. How many objects is Daniel carrying?\nAnswer: one\n</example>\n<example>\nMary grabbed the apple there. Mary gave the apple to John. How many objects is Mary carrying?\nAnswer: none\n</example>\n<example>\nSandra travelled to the hallway. Sandra picked up the milk there. Sandra took the apple there. Mary travelled to the garden. How many objects is Sandra carrying?\nAnswer: two\n</example>\n",
 5 |         "post_prompt": "Your answer should contain only one word - $none$ or $number_of_objects$. Do not write anything else after that. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa8_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa8_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about people and objects they carry, hidden in some random text and a question. You need to answer the question based only on the information from the facts.",
 4 |         "examples": "<example>\nSandra travelled to the garden. Mary grabbed the milk there. What is Mary carrying?\nAnswer: milk\n</example>\n<example>\nMary travelled to the kitchen. Sandra travelled to the office. John travelled to the office. Sandra discarded the milk there. What is Sandra carrying?\nAnswer: nothing\n</example>\n<example>\nDaniel grabbed the apple there. Mary went to the office. Daniel moved to the garden. Daniel grabbed the milk there. Mary went to the kitchen. What is Daniel carrying?\nAnswer: apple,milk\n</example>\n",
 5 |         "post_prompt": "Your answer should contain only one or two words: $nothing$ or $object$ or $object_1$, $object_2$. Do not write anything else. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa9_0k_instruction_no_examples_no_post_prompt_no.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "",
 4 |         "examples": "",
 5 |         "post_prompt": "",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/babilong_evals/microsoft/Phi-3-mini-128k-instruct/qa9_0k_instruction_yes_examples_yes_post_prompt_yes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "prompt": {
 3 |         "instruction": "I will give you context with the facts about people and their locations hidden in some random text and a question. You need to answer the question based only on the information from the facts. If a person was in different locations, use the latest location the person was in to answer the question.",
 4 |         "examples": "<example>\nJohn is not in the bathroom. Sandra is not in the bedroom. Is John in the bathroom?\nAnswer: no\n</example>\n<example>\nMary journeyed to the kitchen. John is in the bedroom. Sandra is not in the garden. Is Mary in the kitchen?\nAnswer: yes\n</example>\n",
 5 |         "post_prompt": "Your answer should contain only one word - $yes$ or $no$. Do not write anything else. Do not explain your answer.",
 6 |         "template": "{instruction}\n{examples}\n{post_prompt}\nContext: {context}\n\nQuestion: {question}"
 7 |     },
 8 |     "generate_kwargs": {
 9 |         "num_beams": 1,
10 |         "do_sample": false,
11 |         "temperature": null,
12 |         "top_p": null,
13 |         "top_k": null
14 |     }
15 | }


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | ### Generating BABILong samples
 2 | 
 3 | First, uncompress the bAbI dataset.
 4 | ```bash
 5 | unzip tasks_1-20_v1-2.zip
 6 | ```
 7 | 
 8 | To generate dataset samples, run:
 9 | 
10 | ```bash
11 | python create_tasks.py "task_name1 task_name2 task_name3"
12 | ```
13 | You can choose task names from 20 bAbI tasks:
14 | 
15 | `qa1_single-supporting-fact qa2_two-supporting-facts qa3_three-supporting-facts qa4_two-arg-relations qa5_three-arg-relations qa6_yes-no-questions qa7_counting qa8_lists-sets qa9_simple-negation qa10_indefinite-knowledge qa11_basic-coreference qa12_conjunction qa13_compound-coreference qa14_time-reasoning qa15_basic-deduction qa16_basic-induction qa17_positional-reasoning qa18_size-reasoning qa19_path-finding qa20_agents-motivations`
16 | 
17 | The number of samples and the path for the results folder can be changed in the beginning of `create_tasks.py`.


--------------------------------------------------------------------------------
/data/tasks_1-20_v1-2.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/booydar/babilong/f09a184b43316a751d5059e13de7c557b6daca86/data/tasks_1-20_v1-2.zip


--------------------------------------------------------------------------------
/images/babilong_evals_all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/booydar/babilong/f09a184b43316a751d5059e13de7c557b6daca86/images/babilong_evals_all.png


--------------------------------------------------------------------------------
/images/babilong_scheme.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/booydar/babilong/f09a184b43316a751d5059e13de7c557b6daca86/images/babilong_scheme.png


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | ### Code for LLM evaluation on BABILong
2 | 
3 | To reproduce the model evaluation please execute jupyter notebooks or run python scripts as follows:
4 | 
5 | ```bash
6 | python eval_gpt2.py
7 | ```
8 | 
9 | The evaluation results will be stored in the `../babilong_evals` directory. To visualize them, you can use `draw_evals.ipynb` 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==2.3.1
2 | datasets==2.19.1
3 | transformers==4.44.2
4 | nltk==3.8.1
5 | numpy==1.26.4
6 | pandas==2.2.2
7 | tqdm==4.66.4
8 | matplotlib==3.8.4
9 | seaborn==0.13.2


--------------------------------------------------------------------------------
/scripts/run_LLaMA-2-7B-32K_no_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="togethercomputer/LLaMA-2-7B-32K"
 8 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 9 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
10 | USE_CHAT_TEMPLATE=false
11 | USE_INSTRUCTION=false
12 | USE_EXAMPLES=false
13 | USE_POST_PROMPT=false
14 | API_URL=""
15 | 
16 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
17 | 
18 | python scripts/run_model_on_babilong.py \
19 |     --results_folder "$RESULTS_FOLDER" \
20 |     --dataset_name "$DATASET_NAME" \
21 |     --model_name "$MODEL_NAME" \
22 |     --tasks "${TASKS[@]}" \
23 |     --lengths "${LENGTHS[@]}" \
24 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
25 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
26 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
27 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
28 |     --api_url "$API_URL"
29 | 


--------------------------------------------------------------------------------
/scripts/run_Llama-2-7B-32K-instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="togethercomputer/Llama-2-7B-32K-instruct"
 8 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 9 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
10 | 
11 | USE_CHAT_TEMPLATE=false
12 | USE_INSTRUCTION=true
13 | USE_EXAMPLES=true
14 | USE_POST_PROMPT=true
15 | API_URL=""
16 | 
17 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
18 | 
19 | python scripts/run_model_on_babilong.py \
20 |     --results_folder "$RESULTS_FOLDER" \
21 |     --dataset_name "$DATASET_NAME" \
22 |     --model_name "$MODEL_NAME" \
23 |     --tasks "${TASKS[@]}" \
24 |     --lengths "${LENGTHS[@]}" \
25 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
26 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
27 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
28 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
29 |     --api_url "$API_URL"
30 | 
31 | 
32 | USE_CHAT_TEMPLATE=false
33 | USE_INSTRUCTION=false
34 | USE_EXAMPLES=false
35 | USE_POST_PROMPT=false
36 | API_URL=""
37 | 
38 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
39 | 
40 | python scripts/run_model_on_babilong.py \
41 |     --results_folder "$RESULTS_FOLDER" \
42 |     --dataset_name "$DATASET_NAME" \
43 |     --model_name "$MODEL_NAME" \
44 |     --tasks "${TASKS[@]}" \
45 |     --lengths "${LENGTHS[@]}" \
46 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
47 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
48 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
49 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
50 |     --api_url "$API_URL"
51 | 


--------------------------------------------------------------------------------
/scripts/run_LongAlpaca-13B.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="Yukang/LongAlpaca-13B"
 8 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 9 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
10 | 
11 | 
12 | USE_CHAT_TEMPLATE=true
13 | USE_INSTRUCTION=true
14 | USE_EXAMPLES=true
15 | USE_POST_PROMPT=true
16 | API_URL=""
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
27 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
28 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
29 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
30 |     --api_url "$API_URL"
31 | 
32 | USE_CHAT_TEMPLATE=true
33 | USE_INSTRUCTION=false
34 | USE_EXAMPLES=false
35 | USE_POST_PROMPT=false
36 | API_URL=""
37 | 
38 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
39 | 
40 | python scripts/run_model_on_babilong.py \
41 |     --results_folder "$RESULTS_FOLDER" \
42 |     --dataset_name "$DATASET_NAME" \
43 |     --model_name "$MODEL_NAME" \
44 |     --tasks "${TASKS[@]}" \
45 |     --lengths "${LENGTHS[@]}" \
46 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
47 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
48 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
49 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
50 |     --api_url "$API_URL"
51 | 


--------------------------------------------------------------------------------
/scripts/run_Meta-Llama-3-8B-Instruct-start_vllm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=1,2 TP=2 ./script_name.sh
 3 | set -e
 4 | 
 5 | # Function to check if the API server is ready
 6 | wait_for_server() {
 7 |     echo "Waiting for vLLM server to start..."
 8 |     while true; do
 9 |         if ! kill -0 $VLLM_PID 2>/dev/null; then
10 |             echo "vLLM process failed to start!"
11 |             exit 1
12 |         fi
13 |         if curl -s "${VLLM_API_URL}/completions" &>/dev/null; then
14 |             echo "vLLM server is ready!"
15 |             return 0
16 |         fi
17 |         sleep 1
18 |     done
19 | }
20 | 
21 | # Function to kill the vLLM server
22 | cleanup() {
23 |     echo "Stopping vLLM server..."
24 |     pkill -f "vllm serve" || true
25 | }
26 | 
27 | # API configuration
28 | VLLM_API_HOST="${VLLM_API_HOST:-localhost}"
29 | VLLM_API_PORT="${VLLM_API_PORT:-8000}"
30 | VLLM_API_URL="${VLLM_API_URL:-http://${VLLM_API_HOST}:${VLLM_API_PORT}/v1}"
31 | 
32 | RESULTS_FOLDER="./babilong_evals"
33 | MODEL_NAME="meta-llama/Meta-Llama-3-8B-Instruct"
34 | MODEL_PATH="/home/jovyan/kuratov/models/Meta-Llama-3-8B-Instruct"
35 | 
36 | # Start the vLLM server in the background
37 | # Comment this section if vLLM server is already running.
38 | # 1xA100 80GB, vllm 0.8.4
39 | echo "Starting vLLM server..."
40 | VLLM_DISABLE_COMPILE_CACHE=1
41 | vllm serve "$MODEL_PATH" --enable-chunked-prefill=False --tensor-parallel-size $TP \
42 |     --served-model-name "$MODEL_NAME" --host "${VLLM_API_HOST}" --port "${VLLM_API_PORT}" --disable-log-requests &
43 | 
44 | VLLM_PID=$!
45 | echo "vLLM PID: $VLLM_PID"
46 | 
47 | # Wait for the server to be ready
48 | wait_for_server
49 | 
50 | # Set up trap to ensure cleanup on script exit
51 | trap cleanup EXIT
52 | 
53 | 
54 | DATASET_NAME="RMT-team/babilong-1k-samples"
55 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
56 | LENGTHS=("0k" "1k" "2k" "4k" "8k")
57 | 
58 | USE_CHAT_TEMPLATE=true
59 | USE_INSTRUCTION=true
60 | USE_EXAMPLES=true
61 | USE_POST_PROMPT=true
62 | 
63 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
64 | 
65 | python scripts/run_model_on_babilong.py \
66 |     --results_folder "$RESULTS_FOLDER" \
67 |     --dataset_name "$DATASET_NAME" \
68 |     --model_name "$MODEL_NAME" \
69 |     --model_path "$MODEL_PATH" \
70 |     --tasks "${TASKS[@]}" \
71 |     --lengths "${LENGTHS[@]}" \
72 |     --system_prompt "You are a helpful assistant." \
73 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
74 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
75 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
76 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
77 |     --api_url "${VLLM_API_URL}/completions"
78 | 
79 | # Cleanup will be automatically called by the trap
80 | echo Done


--------------------------------------------------------------------------------
/scripts/run_Meta-Llama-3.1-70B-Instruct_vllm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES="" ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="meta-llama/Meta-Llama-3.1-70B-Instruct"
 8 | TOKENIZER="meta-llama/Meta-Llama-3.1-70B-Instruct"
 9 | 
10 | # run model with vllm, e.g.:
11 | # CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 vllm serve meta-llama/Meta-Llama-3.1-70B-Instruct --enable-chunked-prefill=False\
12 | # --tensor-parallel-size 8 --enforce-eager --served-model-name meta-llama/Meta-Llama-3.1-70B-Instruct
13 | # adjust parameters to your setup (e.g, set --max_model_len 40000)
14 | 
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
17 | 
18 | USE_CHAT_TEMPLATE=true
19 | USE_INSTRUCTION=true
20 | USE_EXAMPLES=true
21 | USE_POST_PROMPT=true
22 | API_URL="http://localhost:8000/v1/completions"
23 | 
24 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
25 | 
26 | python scripts/run_model_on_babilong.py \
27 |     --results_folder "$RESULTS_FOLDER" \
28 |     --dataset_name "$DATASET_NAME" \
29 |     --model_name "$MODEL_NAME" \
30 |     --tokenizer_name "$TOKENIZER" \
31 |     --tasks "${TASKS[@]}" \
32 |     --lengths "${LENGTHS[@]}" \
33 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
34 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
35 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
36 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
37 |     --api_url "$API_URL"
38 | 
39 | DATASET_NAME="RMT-team/babilong"
40 | TASKS=("qa1" "qa2" "qa5" "qa3" "qa4")
41 | LENGTHS=("64k" "128k")
42 | 
43 | USE_CHAT_TEMPLATE=true
44 | USE_INSTRUCTION=true
45 | USE_EXAMPLES=true
46 | USE_POST_PROMPT=true
47 | API_URL="http://localhost:8000/v1/completions"
48 | 
49 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
50 | 
51 | python scripts/run_model_on_babilong.py \
52 |     --results_folder "$RESULTS_FOLDER" \
53 |     --dataset_name "$DATASET_NAME" \
54 |     --model_name "$MODEL_NAME" \
55 |     --tokenizer_name "$TOKENIZER" \
56 |     --tasks "${TASKS[@]}" \
57 |     --lengths "${LENGTHS[@]}" \
58 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
59 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
60 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
61 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
62 |     --api_url "$API_URL"
63 | 
64 | 
65 | # USE_CHAT_TEMPLATE=false
66 | # USE_INSTRUCTION=false
67 | # USE_EXAMPLES=false
68 | # USE_POST_PROMPT=false
69 | # API_URL=""
70 | 
71 | # echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
72 | 
73 | # python scripts/run_model_on_babilong.py \
74 | #     --results_folder "$RESULTS_FOLDER" \
75 | #     --dataset_name "$DATASET_NAME" \
76 | #     --model_name "$MODEL_NAME" \
77 | #     --tasks "${TASKS[@]}" \
78 | #     --lengths "${LENGTHS[@]}" \
79 | #     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
80 | #     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
81 | #     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
82 | #     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
83 | #     --api_url "$API_URL"
84 | 


--------------------------------------------------------------------------------
/scripts/run_Meta-Llama-3.1-8B-Instruct_32k.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0,1 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="meta-llama/Meta-Llama-3.1-8B-Instruct"
 8 | 
 9 | TASKS=("qa2" "qa3" "qa4" "qa5")
10 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
11 | 
12 | USE_CHAT_TEMPLATE=true
13 | USE_INSTRUCTION=true
14 | USE_EXAMPLES=true
15 | USE_POST_PROMPT=true
16 | API_URL=""
17 | 
18 | # or set API_URL and run model with vllm serve
19 | 
20 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
21 | 
22 | python scripts/run_model_on_babilong.py \
23 |     --results_folder "$RESULTS_FOLDER" \
24 |     --dataset_name "$DATASET_NAME" \
25 |     --model_name "$MODEL_NAME" \
26 |     --tasks "${TASKS[@]}" \
27 |     --lengths "${LENGTHS[@]}" \
28 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
29 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
30 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
31 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
32 |     --api_url "$API_URL"
33 | 
34 | 
35 | # USE_CHAT_TEMPLATE=false
36 | # USE_INSTRUCTION=false
37 | # USE_EXAMPLES=false
38 | # USE_POST_PROMPT=false
39 | # API_URL=""
40 | 
41 | # echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
42 | 
43 | # python scripts/run_model_on_babilong.py \
44 | #     --results_folder "$RESULTS_FOLDER" \
45 | #     --dataset_name "$DATASET_NAME" \
46 | #     --model_name "$MODEL_NAME" \
47 | #     --tasks "${TASKS[@]}" \
48 | #     --lengths "${LENGTHS[@]}" \
49 | #     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
50 | #     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
51 | #     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
52 | #     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
53 | #     --api_url "$API_URL"
54 | 


--------------------------------------------------------------------------------
/scripts/run_Meta-Llama-3.1-8B-Instruct_64-128k_vllm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES="" ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong"
 7 | MODEL_NAME="meta-llama/Meta-Llama-3.1-8B-Instruct"
 8 | TOKENIZER="meta-llama/Meta-Llama-3.1-8B-Instruct"
 9 | 
10 | # run model with vllm (0.5.3.post1), e.g.:
11 | # CUDA_VISIBLE_DEVICES=0,1,2,3 vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct --enable-chunked-prefill=False \
12 | # --tensor-parallel-size 4 --served-model-name meta-llama/Meta-Llama-3.1-8B-Instruct
13 | 
14 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
15 | LENGTHS=("64k" "128k")
16 | 
17 | USE_CHAT_TEMPLATE=true
18 | USE_INSTRUCTION=true
19 | USE_EXAMPLES=true
20 | USE_POST_PROMPT=true
21 | API_URL="http://localhost:8000/v1/completions"
22 | 
23 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
24 | 
25 | python scripts/run_model_on_babilong.py \
26 |     --results_folder "$RESULTS_FOLDER" \
27 |     --dataset_name "$DATASET_NAME" \
28 |     --model_name "$MODEL_NAME" \
29 |     --tokenizer_name "$TOKENIZER" \
30 |     --tasks "${TASKS[@]}" \
31 |     --lengths "${LENGTHS[@]}" \
32 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
33 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
34 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
35 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
36 |     --api_url "$API_URL"
37 | 
38 | 
39 | # USE_CHAT_TEMPLATE=false
40 | # USE_INSTRUCTION=false
41 | # USE_EXAMPLES=false
42 | # USE_POST_PROMPT=false
43 | # API_URL=""
44 | 
45 | # echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
46 | 
47 | # python scripts/run_model_on_babilong.py \
48 | #     --results_folder "$RESULTS_FOLDER" \
49 | #     --dataset_name "$DATASET_NAME" \
50 | #     --model_name "$MODEL_NAME" \
51 | #     --tasks "${TASKS[@]}" \
52 | #     --lengths "${LENGTHS[@]}" \
53 | #     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
54 | #     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
55 | #     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
56 | #     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
57 | #     --api_url "$API_URL"
58 | 


--------------------------------------------------------------------------------
/scripts/run_Meta-Llama-3.2-1B-Instruct_vllm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES="" ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong"
 7 | MODEL_NAME="meta-llama/Llama-3.2-1B-Instruct"
 8 | MODEL_PATH="/home/jovyan/kuratov/models/Llama-3.2-1B-Instruct"
 9 | 
10 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
11 | LENGTHS=("128k" "64k")
12 | 
13 | USE_CHAT_TEMPLATE=true
14 | USE_INSTRUCTION=true
15 | USE_EXAMPLES=true
16 | USE_POST_PROMPT=true
17 | API_URL="http://localhost:8000/v1/completions"
18 | 
19 | # e.g., run locally with vllm (0.5.3.post1)
20 | # CUDA_VISIBLE_DEVICES=0,1 vllm serve ./Llama-3.2-1B-Instruct --enable-chunked-prefill=False --tensor-parallel-size 2 \
21 | # --served-model-name meta-llama/Llama-3.2-1B-Instruct
22 | 
23 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
24 | 
25 | python scripts/run_model_on_babilong.py \
26 |     --results_folder "$RESULTS_FOLDER" \
27 |     --dataset_name "$DATASET_NAME" \
28 |     --model_name "$MODEL_NAME" \
29 |     --model_path "$MODEL_PATH" \
30 |     --tasks "${TASKS[@]}" \
31 |     --lengths "${LENGTHS[@]}" \
32 |     --system_prompt "You are a helpful AI assistant." \
33 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
34 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
35 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
36 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
37 |     --api_url "$API_URL"
38 | 
39 | 
40 | DATASET_NAME="RMT-team/babilong-1k-samples"
41 | 
42 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
43 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
44 | 
45 | USE_CHAT_TEMPLATE=true
46 | USE_INSTRUCTION=true
47 | USE_EXAMPLES=true
48 | USE_POST_PROMPT=true
49 | 
50 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
51 | 
52 | python scripts/run_model_on_babilong.py \
53 |     --results_folder "$RESULTS_FOLDER" \
54 |     --dataset_name "$DATASET_NAME" \
55 |     --model_name "$MODEL_NAME" \
56 |     --model_path "$MODEL_PATH" \
57 |     --tasks "${TASKS[@]}" \
58 |     --lengths "${LENGTHS[@]}" \
59 |     --system_prompt "You are a helpful AI assistant." \
60 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
61 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
62 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
63 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
64 |     --api_url "$API_URL"
65 | 


--------------------------------------------------------------------------------
/scripts/run_Meta-Llama-3.2-3B-Instruct_vllm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES="" ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong"
 7 | MODEL_NAME="meta-llama/Llama-3.2-3B-Instruct"
 8 | MODEL_PATH="/home/jovyan/kuratov/models/Llama-3.2-3B-Instruct"
 9 | 
10 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
11 | LENGTHS=("128k" "64k")
12 | 
13 | USE_CHAT_TEMPLATE=true
14 | USE_INSTRUCTION=true
15 | USE_EXAMPLES=true
16 | USE_POST_PROMPT=true
17 | API_URL="http://localhost:8000/v1/completions"
18 | 
19 | # e.g., run locally with vllm (0.5.3.post1)
20 | # CUDA_VISIBLE_DEVICES=0,1 vllm serve ./Llama-3.2-3B-Instruct --enable-chunked-prefill=False --tensor-parallel-size 2 \
21 | # --served-model-name meta-llama/Llama-3.2-3B-Instruct
22 | 
23 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
24 | 
25 | python scripts/run_model_on_babilong.py \
26 |     --results_folder "$RESULTS_FOLDER" \
27 |     --dataset_name "$DATASET_NAME" \
28 |     --model_name "$MODEL_NAME" \
29 |     --model_path "$MODEL_PATH" \
30 |     --tasks "${TASKS[@]}" \
31 |     --lengths "${LENGTHS[@]}" \
32 |     --system_prompt "You are a helpful AI assistant." \
33 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
34 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
35 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
36 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
37 |     --api_url "$API_URL"
38 | 
39 | 
40 | DATASET_NAME="RMT-team/babilong-1k-samples"
41 | 
42 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
43 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
44 | 
45 | USE_CHAT_TEMPLATE=true
46 | USE_INSTRUCTION=true
47 | USE_EXAMPLES=true
48 | USE_POST_PROMPT=true
49 | 
50 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
51 | 
52 | python scripts/run_model_on_babilong.py \
53 |     --results_folder "$RESULTS_FOLDER" \
54 |     --dataset_name "$DATASET_NAME" \
55 |     --model_name "$MODEL_NAME" \
56 |     --model_path "$MODEL_PATH" \
57 |     --tasks "${TASKS[@]}" \
58 |     --lengths "${LENGTHS[@]}" \
59 |     --system_prompt "You are a helpful AI assistant." \
60 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
61 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
62 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
63 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
64 |     --api_url "$API_URL"
65 | 


--------------------------------------------------------------------------------
/scripts/run_Meta-Llama-4-Scout-17B-16E-Instruct-start_vllm.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=1,2 TP=2 ./script_name.sh
 3 | set -e
 4 | 
 5 | # Function to check if the API server is ready
 6 | wait_for_server() {
 7 |     echo "Waiting for vLLM server to start..."
 8 |     while true; do
 9 |         if ! kill -0 $VLLM_PID 2>/dev/null; then
10 |             echo "vLLM process failed to start!"
11 |             exit 1
12 |         fi
13 |         if curl -s "${VLLM_API_URL}/completions" &>/dev/null; then
14 |             echo "vLLM server is ready!"
15 |             return 0
16 |         fi
17 |         sleep 1
18 |     done
19 | }
20 | 
21 | # Function to kill the vLLM server
22 | cleanup() {
23 |     echo "Stopping vLLM server..."
24 |     pkill -f "vllm serve" || true
25 | }
26 | 
27 | # API configuration
28 | VLLM_API_HOST="${VLLM_API_HOST:-localhost}"
29 | VLLM_API_PORT="${VLLM_API_PORT:-8000}"
30 | VLLM_API_URL="${VLLM_API_URL:-http://${VLLM_API_HOST}:${VLLM_API_PORT}/v1}"
31 | 
32 | RESULTS_FOLDER="./babilong_evals"
33 | MODEL_NAME="meta-llama/Llama-4-Scout-17B-16E-Instruct"
34 | MODEL_PATH="/home/jovyan/kuratov/models/Llama-4-Scout-17B-16E-Instruct"
35 | 
36 | # Start the vLLM server in the background
37 | # Comment this section if vLLM server is already running.
38 | # 4xA100 80GB, vllm 0.8.4
39 | echo "Starting vLLM server..."
40 | VLLM_DISABLE_COMPILE_CACHE=1
41 | vllm serve "$MODEL_PATH" --enable-chunked-prefill=False --tensor-parallel-size $TP \
42 |     --served-model-name "$MODEL_NAME" --host "${VLLM_API_HOST}" --port "${VLLM_API_PORT}" --disable-log-requests \
43 |     --max_model_len 42000 --override-generation-config='{"attn_temperature_tuning": true}' &
44 | 
45 | VLLM_PID=$!
46 | echo "vLLM PID: $VLLM_PID"
47 | 
48 | # Wait for the server to be ready
49 | wait_for_server
50 | 
51 | # Set up trap to ensure cleanup on script exit
52 | trap cleanup EXIT
53 | 
54 | DATASET_NAME="RMT-team/babilong-1k-samples"
55 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
56 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
57 | 
58 | USE_CHAT_TEMPLATE=true
59 | USE_INSTRUCTION=true
60 | USE_EXAMPLES=true
61 | USE_POST_PROMPT=true
62 | 
63 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
64 | 
65 | python scripts/run_model_on_babilong.py \
66 |     --results_folder "$RESULTS_FOLDER" \
67 |     --dataset_name "$DATASET_NAME" \
68 |     --model_name "$MODEL_NAME" \
69 |     --model_path "$MODEL_PATH" \
70 |     --tasks "${TASKS[@]}" \
71 |     --lengths "${LENGTHS[@]}" \
72 |     --system_prompt "You are a helpful assistant." \
73 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
74 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
75 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
76 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
77 |     --api_url "${VLLM_API_URL}/completions"
78 | 
79 | # Cleanup will be automatically called by the trap
80 | echo Done


--------------------------------------------------------------------------------
/scripts/run_Meta-Llama-4-Scout-17B-16E-Instruct-start_vllm_64k-128k.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=1,2 TP=2 ./script_name.sh
 3 | set -e
 4 | 
 5 | # Function to check if the API server is ready
 6 | wait_for_server() {
 7 |     echo "Waiting for vLLM server to start..."
 8 |     while true; do
 9 |         if ! kill -0 $VLLM_PID 2>/dev/null; then
10 |             echo "vLLM process failed to start!"
11 |             exit 1
12 |         fi
13 |         if curl -s "${VLLM_API_URL}/completions" &>/dev/null; then
14 |             echo "vLLM server is ready!"
15 |             return 0
16 |         fi
17 |         sleep 1
18 |     done
19 | }
20 | 
21 | # Function to kill the vLLM server
22 | cleanup() {
23 |     echo "Stopping vLLM server..."
24 |     pkill -f "vllm serve" || true
25 | }
26 | 
27 | # API configuration
28 | VLLM_API_HOST="${VLLM_API_HOST:-localhost}"
29 | VLLM_API_PORT="${VLLM_API_PORT:-8000}"
30 | VLLM_API_URL="${VLLM_API_URL:-http://${VLLM_API_HOST}:${VLLM_API_PORT}/v1}"
31 | 
32 | RESULTS_FOLDER="./babilong_evals"
33 | MODEL_NAME="meta-llama/Llama-4-Scout-17B-16E-Instruct"
34 | MODEL_PATH="/home/jovyan/kuratov/models/Llama-4-Scout-17B-16E-Instruct"
35 | 
36 | # Start the vLLM server in the background
37 | # Comment this section if vLLM server is already running.
38 | # 4xA100 80GB, vllm 0.8.4
39 | echo "Starting vLLM server..."
40 | VLLM_DISABLE_COMPILE_CACHE=1
41 | vllm serve "$MODEL_PATH" --enable-chunked-prefill=False --tensor-parallel-size $TP \
42 |     --served-model-name "$MODEL_NAME" --host "${VLLM_API_HOST}" --port "${VLLM_API_PORT}" --disable-log-requests \
43 |     --max_model_len 140000 --override-generation-config='{"attn_temperature_tuning": true}' --kv-cache-dtype fp8 &
44 | 
45 | VLLM_PID=$!
46 | echo "vLLM PID: $VLLM_PID"
47 | 
48 | # Wait for the server to be ready
49 | wait_for_server
50 | 
51 | # Set up trap to ensure cleanup on script exit
52 | trap cleanup EXIT
53 | 
54 | DATASET_NAME="RMT-team/babilong"
55 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
56 | LENGTHS=("64k" "128k")
57 | 
58 | USE_CHAT_TEMPLATE=true
59 | USE_INSTRUCTION=true
60 | USE_EXAMPLES=true
61 | USE_POST_PROMPT=true
62 | 
63 | echo "Running $MODEL_NAME on ${TASKS[@]} with ${LENGTHS[@]}"
64 | 
65 | # # Run the Python script
66 | python scripts/run_model_on_babilong.py \
67 |     --results_folder "$RESULTS_FOLDER" \
68 |     --dataset_name "$DATASET_NAME" \
69 |     --model_name "$MODEL_NAME" \
70 |     --model_path "$MODEL_PATH" \
71 |     --tasks "${TASKS[@]}" \
72 |     --lengths "${LENGTHS[@]}" \
73 |     --system_prompt "You are a helpful assistant." \
74 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
75 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
76 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
77 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
78 |     --api_url "${VLLM_API_URL}/completions"
79 | 
80 | # Cleanup will be automatically called by the trap
81 | echo Done


--------------------------------------------------------------------------------
/scripts/run_Mistral-7B-Instruct-v0.2_with_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="mistralai/Mistral-7B-Instruct-v0.2"
 8 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 9 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
10 | USE_CHAT_TEMPLATE=true
11 | USE_INSTRUCTION=true
12 | USE_EXAMPLES=true
13 | USE_POST_PROMPT=true
14 | API_URL=""
15 | 
16 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
17 | 
18 | python scripts/run_model_on_babilong.py \
19 |     --results_folder "$RESULTS_FOLDER" \
20 |     --dataset_name "$DATASET_NAME" \
21 |     --model_name "$MODEL_NAME" \
22 |     --tasks "${TASKS[@]}" \
23 |     --lengths "${LENGTHS[@]}" \
24 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
25 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
26 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
27 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
28 |     --api_url "$API_URL"
29 | 


--------------------------------------------------------------------------------
/scripts/run_Mixtral-8x22B-Instruct-v0.1_with_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | MODEL_NAME="mistralai/Mixtral-8x22B-Instruct-v0.1"
 7 | 
 8 | USE_CHAT_TEMPLATE=true
 9 | USE_INSTRUCTION=true
10 | USE_EXAMPLES=true
11 | USE_POST_PROMPT=true
12 | API_URL="http://localhost:8082/completion"
13 | 
14 | DATASET_NAME="RMT-team/babilong-1k-samples"
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
27 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
28 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
29 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
30 |     --api_url "$API_URL"
31 | 
32 | # 32k+ on 100 samples
33 | DATASET_NAME="RMT-team/babilong"
34 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
35 | LENGTHS=("64k")
36 | 
37 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
38 | 
39 | python scripts/run_model_on_babilong.py \
40 |     --results_folder "$RESULTS_FOLDER" \
41 |     --dataset_name "$DATASET_NAME" \
42 |     --model_name "$MODEL_NAME" \
43 |     --tasks "${TASKS[@]}" \
44 |     --lengths "${LENGTHS[@]}" \
45 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
46 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
47 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
48 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
49 |     --api_url "$API_URL"
50 | 


--------------------------------------------------------------------------------
/scripts/run_Mixtral-8x7B-Instruct-v0.1.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="mistralai/Mixtral-8x7B-Instruct-v0.1"
 8 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 9 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
10 | 
11 | USE_CHAT_TEMPLATE=true
12 | USE_INSTRUCTION=true
13 | USE_EXAMPLES=true
14 | USE_POST_PROMPT=true
15 | API_URL=""
16 | 
17 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
18 | 
19 | python scripts/run_model_on_babilong.py \
20 |     --results_folder "$RESULTS_FOLDER" \
21 |     --dataset_name "$DATASET_NAME" \
22 |     --model_name "$MODEL_NAME" \
23 |     --tasks "${TASKS[@]}" \
24 |     --lengths "${LENGTHS[@]}" \
25 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
26 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
27 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
28 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
29 |     --api_url "$API_URL"
30 | 
31 | 
32 | USE_CHAT_TEMPLATE=true
33 | USE_INSTRUCTION=false
34 | USE_EXAMPLES=false
35 | USE_POST_PROMPT=false
36 | API_URL=""
37 | 
38 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
39 | 
40 | python scripts/run_model_on_babilong.py \
41 |     --results_folder "$RESULTS_FOLDER" \
42 |     --dataset_name "$DATASET_NAME" \
43 |     --model_name "$MODEL_NAME" \
44 |     --tasks "${TASKS[@]}" \
45 |     --lengths "${LENGTHS[@]}" \
46 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
47 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
48 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
49 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
50 |     --api_url "$API_URL"
51 | 


--------------------------------------------------------------------------------
/scripts/run_Phi-3.5-MoE-instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | MODEL_NAME="microsoft/Phi-3.5-MoE-instruct"
 7 | 
 8 | USE_CHAT_TEMPLATE=true
 9 | USE_INSTRUCTION=true
10 | USE_EXAMPLES=true
11 | USE_POST_PROMPT=true
12 | API_URL=""
13 | 
14 | DATASET_NAME="RMT-team/babilong-1k-samples"
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     --system_prompt "You are a helpful AI assistant." \
27 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
28 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
29 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
30 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
31 |     --api_url "$API_URL"
32 | 
33 | 
34 | DATASET_NAME="RMT-team/babilong"
35 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
36 | LENGTHS=("64k" "128k")
37 | 
38 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
39 | 
40 | python scripts/run_model_on_babilong.py \
41 |     --results_folder "$RESULTS_FOLDER" \
42 |     --dataset_name "$DATASET_NAME" \
43 |     --model_name "$MODEL_NAME" \
44 |     --tasks "${TASKS[@]}" \
45 |     --lengths "${LENGTHS[@]}" \
46 |     --system_prompt "You are a helpful AI assistant." \
47 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
48 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
49 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
50 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
51 |     --api_url "$API_URL"
52 | 


--------------------------------------------------------------------------------
/scripts/run_Phi-3.5-mini-instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | MODEL_NAME="microsoft/Phi-3.5-mini-instruct"
 7 | 
 8 | USE_CHAT_TEMPLATE=true
 9 | USE_INSTRUCTION=true
10 | USE_EXAMPLES=true
11 | USE_POST_PROMPT=true
12 | API_URL=""
13 | 
14 | DATASET_NAME="RMT-team/babilong-1k-samples"
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     --system_prompt "You are a helpful AI assistant." \
27 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
28 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
29 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
30 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
31 |     --api_url "$API_URL"
32 | 
33 | 
34 | DATASET_NAME="RMT-team/babilong"
35 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
36 | LENGTHS=("128k" "64k")
37 | 
38 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
39 | 
40 | python scripts/run_model_on_babilong.py \
41 |     --results_folder "$RESULTS_FOLDER" \
42 |     --dataset_name "$DATASET_NAME" \
43 |     --model_name "$MODEL_NAME" \
44 |     --tasks "${TASKS[@]}" \
45 |     --lengths "${LENGTHS[@]}" \
46 |     --system_prompt "You are a helpful AI assistant." \
47 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
48 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
49 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
50 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
51 |     --api_url "$API_URL"
52 | 


--------------------------------------------------------------------------------
/scripts/run_Phi-4-mini-instruct_start_vllm.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | # CUDA_VISIBLE_DEVICES=1,2 TP=2 ./script_name.sh
  3 | set -e
  4 | 
  5 | # Function to check if the API server is ready
  6 | wait_for_server() {
  7 |     echo "Waiting for vLLM server to start..."
  8 |     while true; do
  9 |         if ! kill -0 $VLLM_PID 2>/dev/null; then
 10 |             echo "vLLM process failed to start!"
 11 |             exit 1
 12 |         fi
 13 |         if curl -s "${VLLM_API_URL}/completions" &>/dev/null; then
 14 |             echo "vLLM server is ready!"
 15 |             return 0
 16 |         fi
 17 |         sleep 1
 18 |     done
 19 | }
 20 | 
 21 | # Function to kill the vLLM server
 22 | cleanup() {
 23 |     echo "Stopping vLLM server..."
 24 |     pkill -f "vllm serve" || true
 25 | }
 26 | 
 27 | # API configuration
 28 | VLLM_API_HOST="${VLLM_API_HOST:-localhost}"
 29 | VLLM_API_PORT="${VLLM_API_PORT:-8000}"
 30 | VLLM_API_URL="${VLLM_API_URL:-http://${VLLM_API_HOST}:${VLLM_API_PORT}/v1}"
 31 | 
 32 | RESULTS_FOLDER="./babilong_evals"
 33 | MODEL_NAME="microsoft/Phi-4-mini-instruct"
 34 | MODEL_PATH="/home/jovyan/kuratov/models/Phi-4-mini-instruct"
 35 | 
 36 | # Start the vLLM server in the background
 37 | # Comment this section if vLLM server is already running.
 38 | echo "Starting vLLM server..."
 39 | vllm serve "$MODEL_PATH" --enable-chunked-prefill=False --tensor-parallel-size $TP \
 40 |     --served-model-name "$MODEL_NAME" --host "${VLLM_API_HOST}" --port "${VLLM_API_PORT}" --disable-log-requests &
 41 | 
 42 | VLLM_PID=$!
 43 | echo "vLLM PID: $VLLM_PID"
 44 | 
 45 | # Wait for the server to be ready
 46 | wait_for_server
 47 | 
 48 | # Set up trap to ensure cleanup on script exit
 49 | trap cleanup EXIT
 50 | 
 51 | DATASET_NAME="RMT-team/babilong"
 52 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 53 | LENGTHS=("128k" "64k")
 54 | 
 55 | USE_CHAT_TEMPLATE=true
 56 | USE_INSTRUCTION=true
 57 | USE_EXAMPLES=true
 58 | USE_POST_PROMPT=true
 59 | 
 60 | echo "Running $MODEL_NAME on ${TASKS[@]} with ${LENGTHS[@]}"
 61 | 
 62 | # Run the Python script
 63 | python scripts/run_model_on_babilong.py \
 64 |     --results_folder "$RESULTS_FOLDER" \
 65 |     --dataset_name "$DATASET_NAME" \
 66 |     --model_name "$MODEL_NAME" \
 67 |     --model_path "$MODEL_PATH" \
 68 |     --tasks "${TASKS[@]}" \
 69 |     --lengths "${LENGTHS[@]}" \
 70 |     --system_prompt "You are a helpful assistant." \
 71 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
 72 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
 73 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
 74 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
 75 |     --api_url "${VLLM_API_URL}/completions"
 76 | 
 77 | DATASET_NAME="RMT-team/babilong-1k-samples"
 78 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 79 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
 80 | 
 81 | USE_CHAT_TEMPLATE=true
 82 | USE_INSTRUCTION=true
 83 | USE_EXAMPLES=true
 84 | USE_POST_PROMPT=true
 85 | 
 86 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
 87 | 
 88 | python scripts/run_model_on_babilong.py \
 89 |     --results_folder "$RESULTS_FOLDER" \
 90 |     --dataset_name "$DATASET_NAME" \
 91 |     --model_name "$MODEL_NAME" \
 92 |     --model_path "$MODEL_PATH" \
 93 |     --tasks "${TASKS[@]}" \
 94 |     --lengths "${LENGTHS[@]}" \
 95 |     --system_prompt "You are a helpful assistant." \
 96 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
 97 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
 98 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
 99 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
100 |     --api_url "${VLLM_API_URL}/completions"
101 | 
102 | # Cleanup will be automatically called by the trap
103 | echo Done


--------------------------------------------------------------------------------
/scripts/run_Qwen2-72B-Instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES="" ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="Qwen/Qwen2-72B-Instruct"
 8 | TOKENIZER="Qwen/Qwen2-72B-Instruct"
 9 | 
10 | # run model with vllm, e.g.:
11 | # up to 16k
12 | # CUDA_VISIBLE_DEVICES=0,1,2,3 vllm serve ./Qwen2-72B-Instruct --served-model-name Qwen/Qwen2-72B-Instruct \
13 | # --enable-chunked-prefill=False --tensor-parallel-size 4 --gpu-memory-utilization 0.99 --max_model_len 32768 \
14 | # --trust_remote_code --enforce_eager
15 | 
16 | # 32k, 64k update model config with (as recommended in https://huggingface.co/Qwen/Qwen2-72B-Instruct)
17 | # "rope_scaling": {
18 | #             "factor": 4.0,
19 | #             "original_max_position_embeddings": 32768,
20 | #             "type": "yarn"
21 | #         }
22 | # VLLM_ENGINE_ITERATION_TIMEOUT_S=300 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 vllm serve ./Qwen2-72B-Instruct \
23 | # --served-model-name Qwen/Qwen2-72B-Instruct --enable-chunked-prefill=False --tensor-parallel-size 8 \
24 | # --gpu-memory-utilization 0.99 --max_model_len 131072 --trust_remote_code --enforce_eager
25 | 
26 | # 128k update model config with
27 | # "rope_scaling": {
28 | #             "factor": 5.0,
29 | #             "original_max_position_embeddings": 32768,
30 | #             "type": "yarn"
31 | #         }
32 | # VLLM_ENGINE_ITERATION_TIMEOUT_S=3000 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 vllm serve ./Qwen2-72B-Instruct
33 | # --served-model-name Qwen/Qwen2-72B-Instruct --enable-chunked-prefill=False --tensor-parallel-size 8 \
34 | # --gpu-memory-utilization 0.99 --max_model_len 163840 --trust_remote_code --enforce_eager
35 | 
36 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
37 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k") #("32k")
38 | 
39 | USE_CHAT_TEMPLATE=true
40 | USE_INSTRUCTION=true
41 | USE_EXAMPLES=true
42 | USE_POST_PROMPT=true
43 | API_URL="http://localhost:8000/v1/completions"
44 | 
45 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
46 | 
47 | python scripts/run_model_on_babilong.py \
48 |     --results_folder "$RESULTS_FOLDER" \
49 |     --dataset_name "$DATASET_NAME" \
50 |     --model_name "$MODEL_NAME" \
51 |     --tokenizer_name "$TOKENIZER" \
52 |     --tasks "${TASKS[@]}" \
53 |     --lengths "${LENGTHS[@]}" \
54 |     --system_prompt "You are a helpful assistant." \
55 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
56 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
57 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
58 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
59 |     --api_url "$API_URL"
60 | 
61 | 
62 | # TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
63 | # LENGTHS=("128k") #("64k") # 128k with rope factor 5
64 | 
65 | # DATASET_NAME="RMT-team/babilong"
66 | # USE_CHAT_TEMPLATE=true
67 | # USE_INSTRUCTION=true
68 | # USE_EXAMPLES=true
69 | # USE_POST_PROMPT=true
70 | # API_URL="http://localhost:8000/v1/completions"
71 | 
72 | # echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
73 | 
74 | # python scripts/run_model_on_babilong.py \
75 | #     --results_folder "$RESULTS_FOLDER" \
76 | #     --dataset_name "$DATASET_NAME" \
77 | #     --model_name "$MODEL_NAME" \
78 | #     --tokenizer_name "$TOKENIZER" \
79 | #     --tasks "${TASKS[@]}" \
80 | #     --lengths "${LENGTHS[@]}" \
81 | #     --system_prompt "You are a helpful assistant." \
82 | #     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
83 | #     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
84 | #     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
85 | #     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
86 | #     --api_url "$API_URL"
87 | 


--------------------------------------------------------------------------------
/scripts/run_Qwen2-7B-Instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES="" ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="Qwen/Qwen2-7B-Instruct"
 8 | TOKENIZER="Qwen/Qwen2-7B-Instruct"
 9 | 
10 | # run model with vllm (0.5.3.post1), e.g.:
11 | # up to 16k
12 | # CUDA_VISIBLE_DEVICES=0,1 vllm serve ./Qwen2-7B-Instruct --served-model-name Qwen/Qwen2-7B-Instruct \
13 | # --enable-chunked-prefill=False --tensor-parallel-size 2 --gpu-memory-utilization 0.99 --max_model_len 32768 \
14 | # --trust_remote_code --enforce_eager
15 | 
16 | # 32k, 64k update model config with (as recommended in https://huggingface.co/Qwen/Qwen2-7B-Instruct)
17 | # "rope_scaling": {
18 | #             "factor": 4.0,
19 | #             "original_max_position_embeddings": 32768,
20 | #             "type": "yarn"
21 | #         }
22 | # CUDA_VISIBLE_DEVICES=4,5,6,7 vllm serve ./Qwen2-7B-Instruct --served-model-name Qwen/Qwen2-7B-Instruct \
23 | # --enable-chunked-prefill=False --tensor-parallel-size 4 --gpu-memory-utilization 0.99 --max_model_len 131072 \
24 | # --trust_remote_code --enforce_eager
25 | 
26 | # 128k update model config with
27 | # "rope_scaling": {
28 | #             "factor": 5.0,
29 | #             "original_max_position_embeddings": 32768,
30 | #             "type": "yarn"
31 | #         }
32 | # CUDA_VISIBLE_DEVICES=4,5,6,7 vllm serve ./Qwen2-7B-Instruct --served-model-name Qwen/Qwen2-7B-Instruct \
33 | # --enable-chunked-prefill=False --tensor-parallel-size 4 --gpu-memory-utilization 0.99 --max_model_len 163840 \
34 | # --trust_remote_code --enforce_eager
35 | 
36 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
37 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k") #("32k")
38 | 
39 | USE_CHAT_TEMPLATE=true
40 | USE_INSTRUCTION=true
41 | USE_EXAMPLES=true
42 | USE_POST_PROMPT=true
43 | API_URL="http://localhost:8000/v1/completions"
44 | 
45 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
46 | 
47 | python scripts/run_model_on_babilong.py \
48 |     --results_folder "$RESULTS_FOLDER" \
49 |     --dataset_name "$DATASET_NAME" \
50 |     --model_name "$MODEL_NAME" \
51 |     --tokenizer_name "$TOKENIZER" \
52 |     --tasks "${TASKS[@]}" \
53 |     --lengths "${LENGTHS[@]}" \
54 |     --system_prompt "You are a helpful assistant." \
55 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
56 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
57 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
58 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
59 |     --api_url "$API_URL"
60 | 
61 | 
62 | # TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
63 | # LENGTHS=("128k") #("64k")
64 | 
65 | # DATASET_NAME="RMT-team/babilong"
66 | # USE_CHAT_TEMPLATE=true
67 | # USE_INSTRUCTION=true
68 | # USE_EXAMPLES=true
69 | # USE_POST_PROMPT=true
70 | # API_URL="http://localhost:8000/v1/completions"
71 | 
72 | # echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
73 | 
74 | # python scripts/run_model_on_babilong.py \
75 | #     --results_folder "$RESULTS_FOLDER" \
76 | #     --dataset_name "$DATASET_NAME" \
77 | #     --model_name "$MODEL_NAME" \
78 | #     --tokenizer_name "$TOKENIZER" \
79 | #     --tasks "${TASKS[@]}" \
80 | #     --lengths "${LENGTHS[@]}" \
81 | #     --system_prompt "You are a helpful assistant." \
82 | #     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
83 | #     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
84 | #     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
85 | #     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
86 | #     --api_url "$API_URL"
87 | 


--------------------------------------------------------------------------------
/scripts/run_Qwen2.5-72B-Instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES="" ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="Qwen/Qwen2.5-72B-Instruct"
 8 | TOKENIZER="Qwen/Qwen2.5-72B-Instruct"
 9 | 
10 | # run model with vllm (0.5.3.post1), e.g.:
11 | # up to 16k
12 | # CUDA_VISIBLE_DEVICES=0,1,2,3 vllm serve Qwen2.5-72B-Instruct --served-model-name Qwen/Qwen2.5-72B-Instruct \
13 | # --enable-chunked-prefill=False --tensor-parallel-size 4 --gpu-memory-utilization 0.99 \
14 | # --max_model_len 32768 --trust_remote_code --enforce_eager
15 | 
16 | # 32k, 64k update model config with (as recommended in https://huggingface.co/Qwen/Qwen2.5-72B-Instruct)
17 | # "rope_scaling": {
18 | #             "factor": 4.0,
19 | #             "original_max_position_embeddings": 32768,
20 | #             "type": "yarn"
21 | #         }
22 | # VLLM_ENGINE_ITERATION_TIMEOUT_S=300 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 vllm serve ./Qwen2.5-72B-Instruct \
23 | # --served-model-name Qwen/Qwen2.5-72B-Instruct --enable-chunked-prefill=False --tensor-parallel-size 8 \
24 | # --gpu-memory-utilization 0.99 --max_model_len 131072 --trust_remote_code --enforce_eager
25 | 
26 | # 128k update model config with
27 | # "rope_scaling": {
28 | #             "factor": 5.0,
29 | #             "original_max_position_embeddings": 32768,
30 | #             "type": "yarn"
31 | #         }
32 | # VLLM_ENGINE_ITERATION_TIMEOUT_S=3000 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 vllm serve ./Qwen2.5-72B-Instruct
33 | # --served-model-name Qwen/Qwen2.5-72B-Instruct --enable-chunked-prefill=False --tensor-parallel-size 8 \
34 | # --gpu-memory-utilization 0.99 --max_model_len 163840 --trust_remote_code --enforce_eager
35 | 
36 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
37 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k") #("32k")
38 | 
39 | USE_CHAT_TEMPLATE=true
40 | USE_INSTRUCTION=true
41 | USE_EXAMPLES=true
42 | USE_POST_PROMPT=true
43 | API_URL="http://localhost:8000/v1/completions"
44 | 
45 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
46 | 
47 | python scripts/run_model_on_babilong.py \
48 |     --results_folder "$RESULTS_FOLDER" \
49 |     --dataset_name "$DATASET_NAME" \
50 |     --model_name "$MODEL_NAME" \
51 |     --tokenizer_name "$TOKENIZER" \
52 |     --tasks "${TASKS[@]}" \
53 |     --lengths "${LENGTHS[@]}" \
54 |     --system_prompt "You are a helpful assistant." \
55 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
56 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
57 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
58 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
59 |     --api_url "$API_URL"
60 | 
61 | 
62 | # TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
63 | # LENGTHS=("128k") #("64k") # 128k with rope factor 5
64 | 
65 | # DATASET_NAME="RMT-team/babilong"
66 | # USE_CHAT_TEMPLATE=true
67 | # USE_INSTRUCTION=true
68 | # USE_EXAMPLES=true
69 | # USE_POST_PROMPT=true
70 | # API_URL="http://localhost:8000/v1/completions"
71 | 
72 | # echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
73 | 
74 | # python scripts/run_model_on_babilong.py \
75 | #     --results_folder "$RESULTS_FOLDER" \
76 | #     --dataset_name "$DATASET_NAME" \
77 | #     --model_name "$MODEL_NAME" \
78 | #     --tokenizer_name "$TOKENIZER" \
79 | #     --tasks "${TASKS[@]}" \
80 | #     --lengths "${LENGTHS[@]}" \
81 | #     --system_prompt "You are a helpful assistant." \
82 | #     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
83 | #     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
84 | #     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
85 | #     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
86 | #     --api_url "$API_URL"
87 | 


--------------------------------------------------------------------------------
/scripts/run_Qwen2.5-7B-Instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES="" ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="Qwen/Qwen2.5-7B-Instruct"
 8 | TOKENIZER="Qwen/Qwen2.5-7B-Instruct"
 9 | 
10 | # run model with vllm (0.5.3.post1), e.g.:
11 | # up to 16k
12 | # CUDA_VISIBLE_DEVICES=0,1 vllm serve ./Qwen2.5-7B-Instruct --served-model-name Qwen/Qwen2.5-7B-Instruct \
13 | # --enable-chunked-prefill=False --tensor-parallel-size 2 --gpu-memory-utilization 0.99 --max_model_len 32768 \
14 | # --trust_remote_code --enforce_eager
15 | 
16 | # 32k, 64k update model config with (as recommended in https://huggingface.co/Qwen/Qwen2-7B-Instruct)
17 | # "rope_scaling": {
18 | #             "factor": 4.0,
19 | #             "original_max_position_embeddings": 32768,
20 | #             "type": "yarn"
21 | #         }
22 | # CUDA_VISIBLE_DEVICES=4,5,6,7 vllm serve ./Qwen2.5-7B-Instruct --served-model-name Qwen/Qwen2.5-7B-Instruct \
23 | # --enable-chunked-prefill=False --tensor-parallel-size 4 --gpu-memory-utilization 0.99 --max_model_len 131072 \
24 | # --trust_remote_code --enforce_eager
25 | 
26 | # 128k update model config with
27 | # "rope_scaling": {
28 | #             "factor": 5.0,
29 | #             "original_max_position_embeddings": 32768,
30 | #             "type": "yarn"
31 | #         }
32 | # CUDA_VISIBLE_DEVICES=4,5,6,7 vllm serve ./Qwen2.5-7B-Instruct --served-model-name Qwen/Qwen2.5-7B-Instruct \
33 | # --enable-chunked-prefill=False --tensor-parallel-size 4 --gpu-memory-utilization 0.99 --max_model_len 163840 \
34 | # --trust_remote_code --enforce_eager
35 | 
36 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
37 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k") #("32k")
38 | 
39 | USE_CHAT_TEMPLATE=true
40 | USE_INSTRUCTION=true
41 | USE_EXAMPLES=true
42 | USE_POST_PROMPT=true
43 | API_URL="http://localhost:8001/v1/completions"
44 | 
45 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
46 | 
47 | python scripts/run_model_on_babilong.py \
48 |     --results_folder "$RESULTS_FOLDER" \
49 |     --dataset_name "$DATASET_NAME" \
50 |     --model_name "$MODEL_NAME" \
51 |     --tokenizer_name "$TOKENIZER" \
52 |     --tasks "${TASKS[@]}" \
53 |     --lengths "${LENGTHS[@]}" \
54 |     --system_prompt "You are a helpful assistant." \
55 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
56 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
57 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
58 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
59 |     --api_url "$API_URL"
60 | 
61 | 
62 | # TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
63 | # LENGTHS=("128k") #("64k")
64 | 
65 | # DATASET_NAME="RMT-team/babilong"
66 | # USE_CHAT_TEMPLATE=true
67 | # USE_INSTRUCTION=true
68 | # USE_EXAMPLES=true
69 | # USE_POST_PROMPT=true
70 | # API_URL="http://localhost:8001/v1/completions"
71 | 
72 | # echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
73 | 
74 | # python scripts/run_model_on_babilong.py \
75 | #     --results_folder "$RESULTS_FOLDER" \
76 | #     --dataset_name "$DATASET_NAME" \
77 | #     --model_name "$MODEL_NAME" \
78 | #     --tokenizer_name "$TOKENIZER" \
79 | #     --tasks "${TASKS[@]}" \
80 | #     --lengths "${LENGTHS[@]}" \
81 | #     --system_prompt "You are a helpful assistant." \
82 | #     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
83 | #     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
84 | #     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
85 | #     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
86 | #     --api_url "$API_URL"
87 | 


--------------------------------------------------------------------------------
/scripts/run_YandexGPT-5-Lite-8B-instruct-start_vllm.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | # CUDA_VISIBLE_DEVICES=1,2 TP=2 ./script_name.sh
  3 | set -e
  4 | 
  5 | # Function to check if the API server is ready
  6 | wait_for_server() {
  7 |     echo "Waiting for vLLM server to start..."
  8 |     while true; do
  9 |         if ! kill -0 $VLLM_PID 2>/dev/null; then
 10 |             echo "vLLM process failed to start!"
 11 |             exit 1
 12 |         fi
 13 |         if curl -s "${VLLM_API_URL}/completions" &>/dev/null; then
 14 |             echo "vLLM server is ready!"
 15 |             return 0
 16 |         fi
 17 |         sleep 1
 18 |     done
 19 | }
 20 | 
 21 | # Function to kill the vLLM server
 22 | cleanup() {
 23 |     echo "Stopping vLLM server..."
 24 |     pkill -f "vllm serve" || true
 25 | }
 26 | 
 27 | # API configuration
 28 | VLLM_API_HOST="${VLLM_API_HOST:-localhost}"
 29 | VLLM_API_PORT="${VLLM_API_PORT:-8000}"
 30 | VLLM_API_URL="${VLLM_API_URL:-http://${VLLM_API_HOST}:${VLLM_API_PORT}/v1}"
 31 | 
 32 | RESULTS_FOLDER="./babilong_evals"
 33 | MODEL_NAME="yandex/YandexGPT-5-Lite-8B-instruct"
 34 | MODEL_PATH="/home/jovyan/kuratov/models/YandexGPT-5-Lite-8B-instruct"
 35 | 
 36 | # Start the vLLM server in the background
 37 | # Comment this section if vLLM server is already running.
 38 | echo "Starting vLLM server..."
 39 | vllm serve "$MODEL_PATH" --enable-chunked-prefill=False --tensor-parallel-size $TP \
 40 |     --served-model-name "$MODEL_NAME" --host "${VLLM_API_HOST}" --port "${VLLM_API_PORT}" \
 41 |     --trust-remote-code --dtype bfloat16 --disable-log-requests &
 42 | 
 43 | VLLM_PID=$!
 44 | echo "vLLM PID: $VLLM_PID"
 45 | 
 46 | # Wait for the server to be ready
 47 | wait_for_server
 48 | 
 49 | # Set up trap to ensure cleanup on script exit
 50 | trap cleanup EXIT
 51 | 
 52 | USE_CHAT_TEMPLATE=true
 53 | USE_INSTRUCTION=true
 54 | USE_EXAMPLES=true
 55 | USE_POST_PROMPT=true
 56 | 
 57 | 
 58 | DATASET_NAME="RMT-team/babilong-1k-samples"
 59 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 60 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
 61 | 
 62 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
 63 | 
 64 | python scripts/run_model_on_babilong.py \
 65 |     --results_folder "$RESULTS_FOLDER" \
 66 |     --dataset_name "$DATASET_NAME" \
 67 |     --model_name "$MODEL_NAME" \
 68 |     --model_path "$MODEL_PATH" \
 69 |     --tasks "${TASKS[@]}" \
 70 |     --lengths "${LENGTHS[@]}" \
 71 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
 72 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
 73 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
 74 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
 75 |     --api_url "${VLLM_API_URL}/completions"
 76 | 
 77 | USE_CHAT_TEMPLATE=true
 78 | USE_INSTRUCTION=false
 79 | USE_EXAMPLES=false
 80 | USE_POST_PROMPT=false
 81 | 
 82 | 
 83 | DATASET_NAME="RMT-team/babilong-1k-samples"
 84 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 85 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
 86 | 
 87 | 
 88 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
 89 | 
 90 | python scripts/run_model_on_babilong.py \
 91 |     --results_folder "$RESULTS_FOLDER" \
 92 |     --dataset_name "$DATASET_NAME" \
 93 |     --model_name "$MODEL_NAME" \
 94 |     --model_path "$MODEL_PATH" \
 95 |     --tasks "${TASKS[@]}" \
 96 |     --lengths "${LENGTHS[@]}" \
 97 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
 98 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
 99 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
100 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
101 |     --api_url "${VLLM_API_URL}/completions"
102 | 
103 | # Cleanup will be automatically called by the trap
104 | echo Done


--------------------------------------------------------------------------------
/scripts/run_Yarn-Mistral-7b-128k_llamacpp_no_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | MODEL_NAME="NousResearch/Yarn-Mistral-7b-128k"
 7 | 
 8 | USE_CHAT_TEMPLATE=false
 9 | USE_INSTRUCTION=false
10 | USE_EXAMPLES=false
11 | USE_POST_PROMPT=false
12 | API_URL="http://localhost:8082/completion"
13 | 
14 | DATASET_NAME="RMT-team/babilong-1k-samples"
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
17 | 
18 | # setup llamacpp server with
19 | # server -b 2048 -ub 2048 -fa -n 15 -ngl 99 -c 131072 --port 8082 -m ~/models/Yarn-Mistral-7b-128k/Yarn-Mistral-7b-128k.Q8_0.gguf
20 | 
21 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
22 | 
23 | python scripts/run_model_on_babilong.py \
24 |     --results_folder "$RESULTS_FOLDER" \
25 |     --dataset_name "$DATASET_NAME" \
26 |     --model_name "$MODEL_NAME" \
27 |     --tasks "${TASKS[@]}" \
28 |     --lengths "${LENGTHS[@]}" \
29 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
30 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
31 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
32 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
33 |     --api_url "$API_URL"
34 | 
35 | DATASET_NAME="RMT-team/babilong"
36 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
37 | LENGTHS=("64k" "128k")
38 | 
39 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
40 | 
41 | python scripts/run_model_on_babilong.py \
42 |     --results_folder "$RESULTS_FOLDER" \
43 |     --dataset_name "$DATASET_NAME" \
44 |     --model_name "$MODEL_NAME" \
45 |     --tasks "${TASKS[@]}" \
46 |     --lengths "${LENGTHS[@]}" \
47 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
48 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
49 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
50 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
51 |     --api_url "$API_URL"
52 | 


--------------------------------------------------------------------------------
/scripts/run_Yarn-Mistral-7b-128k_llamacpp_with_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | MODEL_NAME="NousResearch/Yarn-Mistral-7b-128k"
 7 | 
 8 | USE_CHAT_TEMPLATE=false
 9 | USE_INSTRUCTION=true
10 | USE_EXAMPLES=true
11 | USE_POST_PROMPT=true
12 | API_URL="http://localhost:8082/completion"
13 | 
14 | DATASET_NAME="RMT-team/babilong-1k-samples"
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
17 | 
18 | # setup llamacpp server with
19 | # server -b 2048 -ub 2048 -fa -n 15 -ngl 99 -c 131072 --port 8082 -m ~/models/Yarn-Mistral-7b-128k/Yarn-Mistral-7b-128k.Q8_0.gguf
20 | 
21 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
22 | 
23 | python scripts/run_model_on_babilong.py \
24 |     --results_folder "$RESULTS_FOLDER" \
25 |     --dataset_name "$DATASET_NAME" \
26 |     --model_name "$MODEL_NAME" \
27 |     --tasks "${TASKS[@]}" \
28 |     --lengths "${LENGTHS[@]}" \
29 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
30 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
31 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
32 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
33 |     --api_url "$API_URL"
34 | 
35 | DATASET_NAME="RMT-team/babilong"
36 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
37 | LENGTHS=("64k" "128k")
38 | 
39 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
40 | 
41 | python scripts/run_model_on_babilong.py \
42 |     --results_folder "$RESULTS_FOLDER" \
43 |     --dataset_name "$DATASET_NAME" \
44 |     --model_name "$MODEL_NAME" \
45 |     --tasks "${TASKS[@]}" \
46 |     --lengths "${LENGTHS[@]}" \
47 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
48 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
49 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
50 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
51 |     --api_url "$API_URL"
52 | 


--------------------------------------------------------------------------------
/scripts/run_Yarn-Mistral-7b-128k_with_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="NousResearch/Yarn-Mistral-7b-128k"
 8 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 9 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
10 | USE_CHAT_TEMPLATE=false
11 | USE_INSTRUCTION=true
12 | USE_EXAMPLES=true
13 | USE_POST_PROMPT=true
14 | API_URL=""
15 | 
16 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
17 | 
18 | python scripts/run_model_on_babilong.py \
19 |     --results_folder "$RESULTS_FOLDER" \
20 |     --dataset_name "$DATASET_NAME" \
21 |     --model_name "$MODEL_NAME" \
22 |     --tasks "${TASKS[@]}" \
23 |     --lengths "${LENGTHS[@]}" \
24 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
25 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
26 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
27 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
28 |     --api_url "$API_URL"
29 | 


--------------------------------------------------------------------------------
/scripts/run_activation-beacon-llama2-7b-chat_no_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | MODEL_NAME="namespace-Pt/activation-beacon-llama2-7b-chat"
 7 | 
 8 | USE_CHAT_TEMPLATE=true
 9 | USE_INSTRUCTION=false
10 | USE_EXAMPLES=false
11 | USE_POST_PROMPT=false
12 | API_URL=""
13 | 
14 | DATASET_NAME="RMT-team/babilong-1k-samples"
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
27 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
28 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
29 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
30 |     --api_url "$API_URL"
31 | 
32 | DATASET_NAME="RMT-team/babilong"
33 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
34 | LENGTHS=("64k" "128k")
35 | 
36 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
37 | 
38 | python scripts/run_model_on_babilong.py \
39 |     --results_folder "$RESULTS_FOLDER" \
40 |     --dataset_name "$DATASET_NAME" \
41 |     --model_name "$MODEL_NAME" \
42 |     --tasks "${TASKS[@]}" \
43 |     --lengths "${LENGTHS[@]}" \
44 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
45 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
46 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
47 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
48 |     --api_url "$API_URL"
49 | 
50 | 


--------------------------------------------------------------------------------
/scripts/run_activation-beacon-llama2-7b-chat_with_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | MODEL_NAME="namespace-Pt/activation-beacon-llama2-7b-chat"
 7 | 
 8 | USE_CHAT_TEMPLATE=true
 9 | USE_INSTRUCTION=true
10 | USE_EXAMPLES=true
11 | USE_POST_PROMPT=true
12 | API_URL=""
13 | 
14 | DATASET_NAME="RMT-team/babilong-1k-samples"
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
27 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
28 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
29 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
30 |     --api_url "$API_URL"
31 | 
32 | DATASET_NAME="RMT-team/babilong"
33 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
34 | LENGTHS=("64k" "128k")
35 | 
36 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
37 | 
38 | python scripts/run_model_on_babilong.py \
39 |     --results_folder "$RESULTS_FOLDER" \
40 |     --dataset_name "$DATASET_NAME" \
41 |     --model_name "$MODEL_NAME" \
42 |     --tasks "${TASKS[@]}" \
43 |     --lengths "${LENGTHS[@]}" \
44 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
45 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
46 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
47 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
48 |     --api_url "$API_URL"
49 | 


--------------------------------------------------------------------------------
/scripts/run_activation-beacon-mistral-7b_no_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | MODEL_NAME="namespace-Pt/activation-beacon-mistral-7b"
 7 | 
 8 | USE_CHAT_TEMPLATE=true
 9 | USE_INSTRUCTION=false
10 | USE_EXAMPLES=false
11 | USE_POST_PROMPT=false
12 | API_URL=""
13 | 
14 | DATASET_NAME="RMT-team/babilong-1k-samples"
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
27 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
28 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
29 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
30 |     --api_url "$API_URL"
31 | 
32 | DATASET_NAME="RMT-team/babilong"
33 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
34 | LENGTHS=("64k" "128k")
35 | 
36 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
37 | 
38 | python scripts/run_model_on_babilong.py \
39 |     --results_folder "$RESULTS_FOLDER" \
40 |     --dataset_name "$DATASET_NAME" \
41 |     --model_name "$MODEL_NAME" \
42 |     --tasks "${TASKS[@]}" \
43 |     --lengths "${LENGTHS[@]}" \
44 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
45 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
46 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
47 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
48 |     --api_url "$API_URL"
49 | 
50 | 


--------------------------------------------------------------------------------
/scripts/run_activation-beacon-mistral-7b_with_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | MODEL_NAME="namespace-Pt/activation-beacon-mistral-7b"
 7 | 
 8 | USE_CHAT_TEMPLATE=true
 9 | USE_INSTRUCTION=true
10 | USE_EXAMPLES=true
11 | USE_POST_PROMPT=true
12 | API_URL=""
13 | 
14 | DATASET_NAME="RMT-team/babilong-1k-samples"
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
27 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
28 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
29 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
30 |     --api_url "$API_URL"
31 | 
32 | DATASET_NAME="RMT-team/babilong"
33 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
34 | LENGTHS=("64k" "128k")
35 | 
36 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
37 | 
38 | python scripts/run_model_on_babilong.py \
39 |     --results_folder "$RESULTS_FOLDER" \
40 |     --dataset_name "$DATASET_NAME" \
41 |     --model_name "$MODEL_NAME" \
42 |     --tasks "${TASKS[@]}" \
43 |     --lengths "${LENGTHS[@]}" \
44 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
45 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
46 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
47 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
48 |     --api_url "$API_URL"
49 | 
50 | 


--------------------------------------------------------------------------------
/scripts/run_c4ai-command-r-v01_llamacpp_with_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="CohereForAI/c4ai-command-r-v01"
 8 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 9 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
10 | USE_CHAT_TEMPLATE=true
11 | USE_INSTRUCTION=true
12 | USE_EXAMPLES=true
13 | USE_POST_PROMPT=true
14 | API_URL="http://localhost:8081/completion"
15 | 
16 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
17 | 
18 | python scripts/run_model_on_babilong.py \
19 |     --results_folder "$RESULTS_FOLDER" \
20 |     --dataset_name "$DATASET_NAME" \
21 |     --model_name "$MODEL_NAME" \
22 |     --tasks "${TASKS[@]}" \
23 |     --lengths "${LENGTHS[@]}" \
24 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
25 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
26 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
27 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
28 |     --api_url "$API_URL"
29 | 


--------------------------------------------------------------------------------
/scripts/run_chatglm3-6b-128k.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="THUDM/chatglm3-6b-128k"
 8 | 
 9 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
10 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
11 | 
12 | USE_CHAT_TEMPLATE=false
13 | USE_INSTRUCTION=true
14 | USE_EXAMPLES=true
15 | USE_POST_PROMPT=true
16 | API_URL=""
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
27 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
28 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
29 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
30 |     --api_url "$API_URL"
31 | 
32 | 
33 | USE_CHAT_TEMPLATE=false
34 | USE_INSTRUCTION=false
35 | USE_EXAMPLES=false
36 | USE_POST_PROMPT=false
37 | API_URL=""
38 | 
39 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
40 | 
41 | python scripts/run_model_on_babilong.py \
42 |     --results_folder "$RESULTS_FOLDER" \
43 |     --dataset_name "$DATASET_NAME" \
44 |     --model_name "$MODEL_NAME" \
45 |     --tasks "${TASKS[@]}" \
46 |     --lengths "${LENGTHS[@]}" \
47 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
48 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
49 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
50 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
51 |     --api_url "$API_URL"
52 | 
53 | # 64k 128k
54 | DATASET_NAME="RMT-team/babilong"
55 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
56 | LENGTHS=("64k" "128k")
57 | 
58 | USE_CHAT_TEMPLATE=false
59 | USE_INSTRUCTION=true
60 | USE_EXAMPLES=true
61 | USE_POST_PROMPT=true
62 | API_URL=""
63 | 
64 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
65 | 
66 | python scripts/run_model_on_babilong.py \
67 |     --results_folder "$RESULTS_FOLDER" \
68 |     --dataset_name "$DATASET_NAME" \
69 |     --model_name "$MODEL_NAME" \
70 |     --tasks "${TASKS[@]}" \
71 |     --lengths "${LENGTHS[@]}" \
72 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
73 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
74 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
75 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
76 |     --api_url "$API_URL"
77 | 
78 | USE_CHAT_TEMPLATE=false
79 | USE_INSTRUCTION=false
80 | USE_EXAMPLES=false
81 | USE_POST_PROMPT=false
82 | API_URL=""
83 | 
84 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
85 | 
86 | python scripts/run_model_on_babilong.py \
87 |     --results_folder "$RESULTS_FOLDER" \
88 |     --dataset_name "$DATASET_NAME" \
89 |     --model_name "$MODEL_NAME" \
90 |     --tasks "${TASKS[@]}" \
91 |     --lengths "${LENGTHS[@]}" \
92 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
93 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
94 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
95 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
96 |     --api_url "$API_URL"


--------------------------------------------------------------------------------
/scripts/run_gemma-2-9b-it.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | MODEL_NAME="google/gemma-2-9b-it"
 7 | 
 8 | USE_CHAT_TEMPLATE=true
 9 | USE_INSTRUCTION=true
10 | USE_EXAMPLES=true
11 | USE_POST_PROMPT=true
12 | API_URL=""
13 | 
14 | DATASET_NAME="RMT-team/babilong-1k-samples"
15 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
16 | LENGTHS=("0k" "1k" "2k" "4k" "8k")
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
27 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
28 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
29 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
30 |     --api_url "$API_URL"
31 | 


--------------------------------------------------------------------------------
/scripts/run_gemma-3-12b-it-start_vllm.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | # CUDA_VISIBLE_DEVICES=1,2 TP=2 ./script_name.sh
  3 | set -e
  4 | 
  5 | # Function to check if the API server is ready
  6 | wait_for_server() {
  7 |     echo "Waiting for vLLM server to start..."
  8 |     while true; do
  9 |         if ! kill -0 $VLLM_PID 2>/dev/null; then
 10 |             echo "vLLM process failed to start!"
 11 |             exit 1
 12 |         fi
 13 |         if curl -s "${VLLM_API_URL}/completions" &>/dev/null; then
 14 |             echo "vLLM server is ready!"
 15 |             return 0
 16 |         fi
 17 |         sleep 1
 18 |     done
 19 | }
 20 | 
 21 | # Function to kill the vLLM server
 22 | cleanup() {
 23 |     echo "Stopping vLLM server..."
 24 |     pkill -f "vllm serve" || true
 25 | }
 26 | 
 27 | # API configuration
 28 | VLLM_API_HOST="${VLLM_API_HOST:-localhost}"
 29 | VLLM_API_PORT="${VLLM_API_PORT:-8000}"
 30 | VLLM_API_URL="${VLLM_API_URL:-http://${VLLM_API_HOST}:${VLLM_API_PORT}/v1}"
 31 | 
 32 | RESULTS_FOLDER="./babilong_evals"
 33 | MODEL_NAME="google/gemma-3-12b-it"
 34 | MODEL_PATH="/home/jovyan/kuratov/models/gemma-3-12b-it"
 35 | 
 36 | # Start the vLLM server in the background
 37 | # Comment this section if vLLM server is already running.
 38 | echo "Starting vLLM server..."
 39 | vllm serve "$MODEL_PATH" --enable-chunked-prefill=False --tensor-parallel-size $TP \
 40 |     --served-model-name "$MODEL_NAME" --host "${VLLM_API_HOST}" --port "${VLLM_API_PORT}" --disable-log-requests &
 41 | 
 42 | VLLM_PID=$!
 43 | echo "vLLM PID: $VLLM_PID"
 44 | 
 45 | # Wait for the server to be ready
 46 | wait_for_server
 47 | 
 48 | # Set up trap to ensure cleanup on script exit
 49 | trap cleanup EXIT
 50 | 
 51 | DATASET_NAME="RMT-team/babilong"
 52 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 53 | LENGTHS=("64k" "128k")
 54 | 
 55 | USE_CHAT_TEMPLATE=true
 56 | USE_INSTRUCTION=true
 57 | USE_EXAMPLES=true
 58 | USE_POST_PROMPT=true
 59 | 
 60 | echo "Running $MODEL_NAME on ${TASKS[@]} with ${LENGTHS[@]}"
 61 | 
 62 | # Run the Python script
 63 | python scripts/run_model_on_babilong.py \
 64 |     --results_folder "$RESULTS_FOLDER" \
 65 |     --dataset_name "$DATASET_NAME" \
 66 |     --model_name "$MODEL_NAME" \
 67 |     --model_path "$MODEL_PATH" \
 68 |     --tasks "${TASKS[@]}" \
 69 |     --lengths "${LENGTHS[@]}" \
 70 |     --system_prompt "You are a helpful assistant." \
 71 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
 72 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
 73 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
 74 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
 75 |     --api_url "${VLLM_API_URL}/completions"
 76 | 
 77 | DATASET_NAME="RMT-team/babilong-1k-samples"
 78 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 79 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
 80 | 
 81 | USE_CHAT_TEMPLATE=true
 82 | USE_INSTRUCTION=true
 83 | USE_EXAMPLES=true
 84 | USE_POST_PROMPT=true
 85 | 
 86 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
 87 | 
 88 | python scripts/run_model_on_babilong.py \
 89 |     --results_folder "$RESULTS_FOLDER" \
 90 |     --dataset_name "$DATASET_NAME" \
 91 |     --model_name "$MODEL_NAME" \
 92 |     --model_path "$MODEL_PATH" \
 93 |     --tasks "${TASKS[@]}" \
 94 |     --lengths "${LENGTHS[@]}" \
 95 |     --system_prompt "You are a helpful assistant." \
 96 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
 97 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
 98 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
 99 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
100 |     --api_url "${VLLM_API_URL}/completions"
101 | 
102 | # Cleanup will be automatically called by the trap
103 | echo Done


--------------------------------------------------------------------------------
/scripts/run_gemma-3-4b-it-start_vllm.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | # CUDA_VISIBLE_DEVICES=1,2 TP=2 ./script_name.sh
  3 | set -e
  4 | 
  5 | # Function to check if the API server is ready
  6 | wait_for_server() {
  7 |     echo "Waiting for vLLM server to start..."
  8 |     while true; do
  9 |         if ! kill -0 $VLLM_PID 2>/dev/null; then
 10 |             echo "vLLM process failed to start!"
 11 |             exit 1
 12 |         fi
 13 |         if curl -s "${VLLM_API_URL}/completions" &>/dev/null; then
 14 |             echo "vLLM server is ready!"
 15 |             return 0
 16 |         fi
 17 |         sleep 1
 18 |     done
 19 | }
 20 | 
 21 | # Function to kill the vLLM server
 22 | cleanup() {
 23 |     echo "Stopping vLLM server..."
 24 |     pkill -f "vllm serve" || true
 25 | }
 26 | 
 27 | # API configuration
 28 | VLLM_API_HOST="${VLLM_API_HOST:-localhost}"
 29 | VLLM_API_PORT="${VLLM_API_PORT:-8000}"
 30 | VLLM_API_URL="${VLLM_API_URL:-http://${VLLM_API_HOST}:${VLLM_API_PORT}/v1}"
 31 | 
 32 | RESULTS_FOLDER="./babilong_evals"
 33 | MODEL_NAME="google/gemma-3-4b-it"
 34 | MODEL_PATH="/home/jovyan/kuratov/models/gemma-3-4b-it"
 35 | 
 36 | # Start the vLLM server in the background
 37 | # Comment this section if vLLM server is already running.
 38 | echo "Starting vLLM server..."
 39 | vllm serve "$MODEL_PATH" --enable-chunked-prefill=False --tensor-parallel-size $TP \
 40 |     --served-model-name "$MODEL_NAME" --host "${VLLM_API_HOST}" --port "${VLLM_API_PORT}" --disable-log-requests &
 41 | 
 42 | VLLM_PID=$!
 43 | echo "vLLM PID: $VLLM_PID"
 44 | 
 45 | # Wait for the server to be ready
 46 | wait_for_server
 47 | 
 48 | # Set up trap to ensure cleanup on script exit
 49 | trap cleanup EXIT
 50 | 
 51 | DATASET_NAME="RMT-team/babilong"
 52 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 53 | LENGTHS=("64k" "128k")
 54 | 
 55 | USE_CHAT_TEMPLATE=true
 56 | USE_INSTRUCTION=true
 57 | USE_EXAMPLES=true
 58 | USE_POST_PROMPT=true
 59 | 
 60 | echo "Running $MODEL_NAME on ${TASKS[@]} with ${LENGTHS[@]}"
 61 | 
 62 | # Run the Python script
 63 | python scripts/run_model_on_babilong.py \
 64 |     --results_folder "$RESULTS_FOLDER" \
 65 |     --dataset_name "$DATASET_NAME" \
 66 |     --model_name "$MODEL_NAME" \
 67 |     --model_path "$MODEL_PATH" \
 68 |     --tasks "${TASKS[@]}" \
 69 |     --lengths "${LENGTHS[@]}" \
 70 |     --system_prompt "You are a helpful assistant." \
 71 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
 72 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
 73 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
 74 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
 75 |     --api_url "${VLLM_API_URL}/completions"
 76 | 
 77 | DATASET_NAME="RMT-team/babilong-1k-samples"
 78 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 79 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
 80 | 
 81 | USE_CHAT_TEMPLATE=true
 82 | USE_INSTRUCTION=true
 83 | USE_EXAMPLES=true
 84 | USE_POST_PROMPT=true
 85 | 
 86 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
 87 | 
 88 | python scripts/run_model_on_babilong.py \
 89 |     --results_folder "$RESULTS_FOLDER" \
 90 |     --dataset_name "$DATASET_NAME" \
 91 |     --model_name "$MODEL_NAME" \
 92 |     --model_path "$MODEL_PATH" \
 93 |     --tasks "${TASKS[@]}" \
 94 |     --lengths "${LENGTHS[@]}" \
 95 |     --system_prompt "You are a helpful assistant." \
 96 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
 97 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
 98 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
 99 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
100 |     --api_url "${VLLM_API_URL}/completions"
101 | 
102 | # Cleanup will be automatically called by the trap
103 | echo Done


--------------------------------------------------------------------------------
/scripts/run_glm-4-9b-chat-1m.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong"
 7 | MODEL_NAME="THUDM/glm-4-9b-chat-1m"
 8 | TOKENIZER="THUDM/glm-4-9b-chat-1m"
 9 | 
10 | # run model with vllm, e.g.:
11 | # VLLM_ENGINE_ITERATION_TIMEOUT_S=300 # for 512k
12 | # CUDA_VISIBLE_DEVICES=4,5,6,7 vllm serve ./glm-4-9b-chat-1m --served-model-name THUDM/glm-4-9b-chat-1m \
13 | # --enable-chunked-prefill=False --tensor-parallel-size 4 --max_model_len 770192 --gpu-memory-utilization 0.99 \
14 | # --trust_remote_code --enforce_eager
15 | 
16 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
17 | LENGTHS=("512k" "128k" "64k")
18 | 
19 | USE_CHAT_TEMPLATE=true
20 | USE_INSTRUCTION=true
21 | USE_EXAMPLES=true
22 | USE_POST_PROMPT=true
23 | API_URL="http://localhost:8000/v1/completions"
24 | 
25 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
26 | 
27 | python scripts/run_model_on_babilong.py \
28 |     --results_folder "$RESULTS_FOLDER" \
29 |     --dataset_name "$DATASET_NAME" \
30 |     --model_name "$MODEL_NAME" \
31 |     --tokenizer_name "$TOKENIZER" \
32 |     --tasks "${TASKS[@]}" \
33 |     --lengths "${LENGTHS[@]}" \
34 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
35 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
36 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
37 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
38 |     --api_url "$API_URL"
39 | 
40 | 
41 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
42 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
43 | 
44 | DATASET_NAME="RMT-team/babilong-1k-samples"
45 | USE_CHAT_TEMPLATE=true
46 | USE_INSTRUCTION=true
47 | USE_EXAMPLES=true
48 | USE_POST_PROMPT=true
49 | API_URL="http://localhost:8000/v1/completions"
50 | 
51 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
52 | 
53 | python scripts/run_model_on_babilong.py \
54 |     --results_folder "$RESULTS_FOLDER" \
55 |     --dataset_name "$DATASET_NAME" \
56 |     --model_name "$MODEL_NAME" \
57 |     --tokenizer_name "$TOKENIZER" \
58 |     --tasks "${TASKS[@]}" \
59 |     --lengths "${LENGTHS[@]}" \
60 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
61 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
62 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
63 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
64 |     --api_url "$API_URL"
65 | 


--------------------------------------------------------------------------------
/scripts/run_longchat-7b-v1.5-32k_no_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="lmsys/longchat-7b-v1.5-32k"
 8 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 9 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
10 | USE_CHAT_TEMPLATE=false
11 | USE_INSTRUCTION=false
12 | USE_EXAMPLES=false
13 | USE_POST_PROMPT=false
14 | API_URL=""
15 | 
16 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
17 | 
18 | python scripts/run_model_on_babilong.py \
19 |     --results_folder "$RESULTS_FOLDER" \
20 |     --dataset_name "$DATASET_NAME" \
21 |     --model_name "$MODEL_NAME" \
22 |     --tasks "${TASKS[@]}" \
23 |     --lengths "${LENGTHS[@]}" \
24 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
25 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
26 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
27 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
28 |     --api_url "$API_URL"
29 | 


--------------------------------------------------------------------------------
/scripts/run_longchat-7b-v1.5-32k_with_instruct.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="lmsys/longchat-7b-v1.5-32k"
 8 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
 9 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k" "32k")
10 | USE_CHAT_TEMPLATE=false
11 | USE_INSTRUCTION=true
12 | USE_EXAMPLES=true
13 | USE_POST_PROMPT=true
14 | API_URL=""
15 | 
16 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
17 | 
18 | python scripts/run_model_on_babilong.py \
19 |     --results_folder "$RESULTS_FOLDER" \
20 |     --dataset_name "$DATASET_NAME" \
21 |     --model_name "$MODEL_NAME" \
22 |     --tasks "${TASKS[@]}" \
23 |     --lengths "${LENGTHS[@]}" \
24 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
25 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
26 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
27 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
28 |     --api_url "$API_URL"
29 | 


--------------------------------------------------------------------------------
/scripts/run_recurrentgemma-9b-it.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # CUDA_VISIBLE_DEVICES=0 ./script_name.sh
 3 | set -e
 4 | 
 5 | RESULTS_FOLDER="./babilong_evals"
 6 | DATASET_NAME="RMT-team/babilong-1k-samples"
 7 | MODEL_NAME="google/recurrentgemma-9b-it"
 8 | 
 9 | TASKS=("qa1" "qa2" "qa3" "qa4" "qa5")
10 | LENGTHS=("0k" "1k" "2k" "4k" "8k" "16k")
11 | 
12 | USE_CHAT_TEMPLATE=true
13 | USE_INSTRUCTION=true
14 | USE_EXAMPLES=true
15 | USE_POST_PROMPT=true
16 | API_URL=""
17 | 
18 | echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
19 | 
20 | python scripts/run_model_on_babilong.py \
21 |     --results_folder "$RESULTS_FOLDER" \
22 |     --dataset_name "$DATASET_NAME" \
23 |     --model_name "$MODEL_NAME" \
24 |     --tasks "${TASKS[@]}" \
25 |     --lengths "${LENGTHS[@]}" \
26 |     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
27 |     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
28 |     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
29 |     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
30 |     --api_url "$API_URL"
31 | 
32 | 
33 | # USE_CHAT_TEMPLATE=false
34 | # USE_INSTRUCTION=false
35 | # USE_EXAMPLES=false
36 | # USE_POST_PROMPT=false
37 | # API_URL=""
38 | 
39 | # echo running $MODEL_NAME on "${TASKS[@]}" with "${LENGTHS[@]}"
40 | 
41 | # python scripts/run_model_on_babilong.py \
42 | #     --results_folder "$RESULTS_FOLDER" \
43 | #     --dataset_name "$DATASET_NAME" \
44 | #     --model_name "$MODEL_NAME" \
45 | #     --tasks "${TASKS[@]}" \
46 | #     --lengths "${LENGTHS[@]}" \
47 | #     $( [ "$USE_CHAT_TEMPLATE" == true ] && echo "--use_chat_template" ) \
48 | #     $( [ "$USE_INSTRUCTION" == true ] && echo "--use_instruction" ) \
49 | #     $( [ "$USE_EXAMPLES" == true ] && echo "--use_examples" ) \
50 | #     $( [ "$USE_POST_PROMPT" == true ] && echo "--use_post_prompt" ) \
51 | #     --api_url "$API_URL"
52 | 


--------------------------------------------------------------------------------