├── source ├── requirements.txt ├── util.py ├── train.py ├── test.py ├── prompts.py ├── models.py └── datasets.py ├── requirements.txt ├── artifacts ├── config │ ├── arithmetic │ │ ├── base-10 │ │ │ ├── gpt-4_0-shot_cot.yaml │ │ │ ├── gemini-pro_0-shot_cot.yaml │ │ │ ├── gpt-3.5_0-shot_cot.yaml │ │ │ ├── gpt-4_5-shot_cot.yaml │ │ │ ├── gemini-pro_5-shot_cot.yaml │ │ │ └── gpt-3.5_5-shot_cot.yaml │ │ ├── base-11 │ │ │ ├── gpt-4_0-shot_cot.yaml │ │ │ ├── gemini-pro_0-shot_cot.yaml │ │ │ ├── gpt-4_5-shot_cot.yaml │ │ │ ├── gpt-3.5_0-shot_cot.yaml │ │ │ ├── gemini-pro_5-shot_cot.yaml │ │ │ ├── gpt-3.5_5-shot_cot.yaml │ │ │ ├── gpt-4_5-shot_cot_htt.yaml │ │ │ ├── gemini-pro_5-shot_cot_htt.yaml │ │ │ └── gpt-3.5_5-shot_cot_htt.yaml │ │ ├── base-16 │ │ │ ├── gpt-4_0-shot_cot.yaml │ │ │ ├── gemini-pro_0-shot_cot.yaml │ │ │ ├── gpt-4_5-shot_cot.yaml │ │ │ ├── gpt-3.5_0-shot_cot.yaml │ │ │ ├── gemini-pro_5-shot_cot.yaml │ │ │ ├── gpt-3.5_5-shot_cot.yaml │ │ │ ├── gemini-pro_5-shot_cot_htt.yaml │ │ │ ├── gpt-4_5-shot_cot_htt.yaml │ │ │ └── gpt-3.5_5-shot_cot_htt.yaml │ │ └── base-9 │ │ │ ├── gpt-4_0-shot_cot.yaml │ │ │ ├── gpt-4_5-shot_cot.yaml │ │ │ ├── gemini-pro_0-shot_cot.yaml │ │ │ ├── gemini-pro_5-shot_cot.yaml │ │ │ ├── gpt-3.5_0-shot_cot.yaml │ │ │ ├── gpt-3.5_5-shot_cot.yaml │ │ │ ├── gpt-4_5-shot_cot_htt.yaml │ │ │ ├── gemini-pro_5-shot_cot_htt.yaml │ │ │ └── gpt-3.5_5-shot_cot_htt.yaml │ ├── clutrr │ │ ├── symbolic │ │ │ ├── gpt-4_0-shot_cot.yaml │ │ │ ├── gpt-4_5-shot_cot.yaml │ │ │ ├── gemini-pro_0-shot_cot.yaml │ │ │ ├── gemini-pro_5-shot_cot.yaml │ │ │ ├── gpt-3.5_0-shot_cot.yaml │ │ │ ├── gpt-3.5_5-shot_cot.yaml │ │ │ ├── gpt-4_5-shot_cot_htt.yaml │ │ │ ├── gemini-pro_5-shot_cot_htt.yaml │ │ │ └── gpt-3.5_5-shot_cot_htt.yaml │ │ └── textual │ │ │ ├── gpt-4_0-shot_cot.yaml │ │ │ ├── gpt-4_5-shot_cot.yaml │ │ │ ├── gemini-pro_0-shot_cot.yaml │ │ │ ├── gemini-pro_5-shot_cot.yaml │ │ │ ├── gpt-3.5_0-shot_cot.yaml │ │ │ ├── gpt-3.5_5-shot_cot.yaml │ │ │ ├── gpt-4_5-shot_cot_htt.yaml │ │ │ ├── gpt-3.5_5-shot_cot_htt.yaml │ │ │ └── gemini-pro_5-shot_cot_htt.yaml │ └── list_functions │ │ ├── gpt-4_0-shot_cot.yaml │ │ ├── gpt-4_4-shot_cot.yaml │ │ ├── gemini-pro_0-shot_cot.yaml │ │ ├── gemini-pro_4-shot_cot.yaml │ │ ├── gpt-3.5_0-shot_cot.yaml │ │ ├── gpt-3.5_4-shot_cot.yaml │ │ ├── gpt-4_4-shot_cot_htt.yaml │ │ ├── gemini-pro_4-shot_cot_htt.yaml │ │ └── gpt-3.5_4-shot_cot_htt.yaml ├── prompt │ ├── arithmetic │ │ ├── 0-shot_cot.yaml │ │ ├── base-10 │ │ │ └── 5-shot_cot.yaml │ │ ├── base-11 │ │ │ ├── 5-shot_cot.yaml │ │ │ ├── 5-shot_cot_htt_train.yaml │ │ │ └── 5-shot_cot_htt_test.yaml │ │ ├── base-16 │ │ │ ├── 5-shot_cot.yaml │ │ │ ├── 5-shot_cot_htt_train.yaml │ │ │ └── 5-shot_cot_htt_test.yaml │ │ └── base-9 │ │ │ ├── 5-shot_cot.yaml │ │ │ ├── 5-shot_cot_htt_train.yaml │ │ │ └── 5-shot_cot_htt_test.yaml │ ├── clutrr │ │ ├── textual │ │ │ ├── 0-shot_cot.yaml │ │ │ ├── 5-shot_cot.yaml │ │ │ └── 5-shot_cot_htt_test.yaml │ │ └── symbolic │ │ │ ├── 0-shot_cot.yaml │ │ │ ├── 5-shot_cot.yaml │ │ │ ├── 5-shot_cot_htt_train.yaml │ │ │ └── 5-shot_cot_htt_test.yaml │ └── list_functions │ │ ├── 0-shot_cot.yaml │ │ ├── 4-shot_cot.yaml │ │ ├── 4-shot_cot_htt_train.yaml │ │ └── 4-shot_cot_htt_test.yaml ├── dataset │ └── download.sh └── checkpoint │ ├── arithmetic │ ├── base-9 │ │ ├── gpt-4_5-shot_cot_htt_2000.yaml │ │ ├── gpt-3.5_5-shot_cot_htt_2000.yaml │ │ └── gemini-pro_5-shot_cot_htt_2000.yaml │ └── base-11 │ │ ├── gpt-3.5_5-shot_cot_htt_2000.yaml │ │ └── gpt-4_5-shot_cot_htt_2000.yaml │ └── clutrr │ └── symbolic │ ├── gpt-4_5-shot_cot_htt_2000.yaml │ ├── gpt-3.5_5-shot_cot_htt_2000.yaml │ └── gemini-pro_5-shot_cot_htt_2000.yaml ├── .gitignore ├── CONTRIBUTING.md ├── README.md └── LICENSE /source/requirements.txt: -------------------------------------------------------------------------------- 1 | nltk 2 | tqdm 3 | numpy 4 | jinja2 5 | pyyaml 6 | openai>=1.0 7 | tenacity 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | nltk 2 | tqdm 3 | numpy 4 | jinja2 5 | pyyaml 6 | openai 7 | tenacity 8 | tiktoken 9 | google-generativeai 10 | -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-10/gpt-4_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-10 3 | model: gpt-4 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-11/gpt-4_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-11 3 | model: gpt-4 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-16/gpt-4_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-16 3 | model: gpt-4 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-9/gpt-4_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-9 3 | model: gpt-4 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/symbolic/gpt-4_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gpt-4 4 | prompt: prompt/clutrr/symbolic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/symbolic/gpt-4_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gpt-4 4 | prompt: prompt/clutrr/symbolic/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/textual/gpt-4_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gpt-4 4 | prompt: prompt/clutrr/textual/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/textual/gpt-4_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gpt-4 4 | prompt: prompt/clutrr/textual/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-9/gpt-4_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-9 3 | model: gpt-4 4 | prompt: prompt/arithmetic/base-9/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-10/gemini-pro_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-10 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-10/gpt-3.5_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-10 3 | model: gpt-3.5-turbo 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-10/gpt-4_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-10 3 | model: gpt-4 4 | prompt: prompt/arithmetic/base-10/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-11/gemini-pro_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-11 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-11/gpt-4_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-11 3 | model: gpt-4 4 | prompt: prompt/arithmetic/base-11/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-16/gemini-pro_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-16 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-16/gpt-4_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-16 3 | model: gpt-4 4 | prompt: prompt/arithmetic/base-16/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-9/gemini-pro_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-9 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/symbolic/gemini-pro_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gemini-pro 4 | prompt: prompt/clutrr/symbolic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/symbolic/gemini-pro_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gemini-pro 4 | prompt: prompt/clutrr/symbolic/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/textual/gemini-pro_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gemini-pro 4 | prompt: prompt/clutrr/textual/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/textual/gemini-pro_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gemini-pro 4 | prompt: prompt/clutrr/textual/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/textual/gpt-3.5_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gpt-3.5-turbo 4 | prompt: prompt/clutrr/textual/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/textual/gpt-3.5_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gpt-3.5-turbo 4 | prompt: prompt/clutrr/textual/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/list_functions/gpt-4_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: list_functions 3 | model: gpt-4 4 | prompt: prompt/list_functions/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/list_functions/gpt-4_4-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: list_functions 3 | model: gpt-4 4 | prompt: prompt/list_functions/4-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-11/gpt-3.5_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-11 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-16/gpt-3.5_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-16 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-9/gemini-pro_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-9 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/base-9/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-9/gpt-3.5_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-9 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/arithmetic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/symbolic/gpt-3.5_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/clutrr/symbolic/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/clutrr/symbolic/gpt-3.5_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/clutrr/symbolic/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-10/gemini-pro_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-10 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/base-10/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-10/gpt-3.5_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-10 3 | model: gpt-3.5-turbo 4 | prompt: prompt/arithmetic/base-10/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-11/gemini-pro_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-11 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/base-11/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-16/gemini-pro_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-16 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/base-16/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-9/gpt-3.5_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-9 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/arithmetic/base-9/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/list_functions/gemini-pro_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: list_functions 3 | model: gemini-pro 4 | prompt: prompt/list_functions/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/list_functions/gemini-pro_4-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: list_functions 3 | model: gemini-pro 4 | prompt: prompt/list_functions/4-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-11/gpt-3.5_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-11 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/arithmetic/base-11/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-16/gpt-3.5_5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: base-16 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/arithmetic/base-16/5-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/list_functions/gpt-3.5_0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: list_functions 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/list_functions/0-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/config/list_functions/gpt-3.5_4-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: list_functions 3 | model: gpt-3.5-turbo-16k-0613 4 | prompt: prompt/list_functions/4-shot_cot.yaml -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Question: In base-{{ base }}, what is {{ query[0] }} + {{ query[1] }}? 3 | Answer: 4 | Let's think step by step. 5 | 6 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/clutrr/textual/0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Document: {{ document }} 3 | Question: {{ query[1] }} is {{ query[0] }}'s what? 4 | Answer: 5 | Let's think step by step. 6 | 7 | return_last: yes -------------------------------------------------------------------------------- /artifacts/config/clutrr/textual/gpt-4_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gpt-4 4 | prompt: prompt/clutrr/textual/5-shot_cot_htt_test.yaml 5 | library: checkpoint/clutrr/symbolic/gpt-4_5-shot_cot_htt_2000.yaml 6 | min_coverage: 2 7 | min_confidence: 0.7 -------------------------------------------------------------------------------- /artifacts/config/clutrr/textual/gpt-3.5_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gpt-3.5-turbo 4 | prompt: prompt/clutrr/textual/5-shot_cot_htt_test.yaml 5 | library: checkpoint/clutrr/symbolic/gpt-3.5_5-shot_cot_htt_2000.yaml 6 | min_coverage: 2 7 | min_confidence: 0.3 -------------------------------------------------------------------------------- /artifacts/prompt/clutrr/symbolic/0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Context: The relations on the path from {{ query[0] }} to {{ query[1] }} are {{ path | join(", ") }}. 3 | Question: {{ query[1] }} is {{ query[0] }}'s what? 4 | Answer: 5 | Let's think step by step. 6 | 7 | return_last: yes -------------------------------------------------------------------------------- /artifacts/config/clutrr/textual/gemini-pro_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | test: 2 | dataset: clutrr 3 | model: gemini-pro 4 | prompt: prompt/clutrr/textual/5-shot_cot_htt_test.yaml 5 | library: checkpoint/clutrr/symbolic/gemini-pro_5-shot_cot_htt_2000.yaml 6 | min_coverage: 2 7 | min_confidence: 0.3 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Distribution / packaging 7 | .Python 8 | build/ 9 | develop-eggs/ 10 | dist/ 11 | downloads/ 12 | eggs/ 13 | .eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | wheels/ 20 | share/python-wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | MANIFEST 25 | -------------------------------------------------------------------------------- /artifacts/config/clutrr/symbolic/gpt-4_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: clutrr 3 | model: gpt-4 4 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: clutrr 8 | model: gpt-4 9 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_test.yaml 10 | library: checkpoint/clutrr/symbolic/gpt-4_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.7 -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-9/gpt-4_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: base-9 3 | model: gpt-4 4 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: base-9 8 | model: gpt-4 9 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_test.yaml 10 | library: checkpoint/arithmetic/base-9/gpt-4_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.3 -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-11/gpt-4_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: base-11 3 | model: gpt-4 4 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: base-11 8 | model: gpt-4 9 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_test.yaml 10 | library: checkpoint/arithmetic/base-11/gpt-4_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.3 -------------------------------------------------------------------------------- /artifacts/config/list_functions/gpt-4_4-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: list_functions 3 | model: gpt-4 4 | prompt: prompt/list_functions/4-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: list_functions 8 | model: gpt-4 9 | prompt: prompt/list_functions/4-shot_cot_htt_test.yaml 10 | library: checkpoint/list_functions/gpt-4_4-shot_cot_htt_5000.yaml 11 | min_coverage: 1 12 | min_confidence: 0.1 -------------------------------------------------------------------------------- /artifacts/config/clutrr/symbolic/gemini-pro_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: clutrr 3 | model: gemini-pro 4 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: clutrr 8 | model: gemini-pro 9 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_test.yaml 10 | library: checkpoint/clutrr/symbolic/gemini-pro_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.3 -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-9/gemini-pro_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: base-9 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: base-9 8 | model: gemini-pro 9 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_test.yaml 10 | library: checkpoint/arithmetic/base-9/gemini-pro_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.5 -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-11/gemini-pro_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: base-11 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: base-11 8 | model: gemini-pro 9 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_test.yaml 10 | library: checkpoint/arithmetic/base-11/gemini-pro_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.5 -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-16/gemini-pro_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: base-16 3 | model: gemini-pro 4 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: base-16 8 | model: gemini-pro 9 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_test.yaml 10 | library: checkpoint/arithmetic/base-16/gemini-pro_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.3 -------------------------------------------------------------------------------- /artifacts/config/clutrr/symbolic/gpt-3.5_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: clutrr 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: clutrr 8 | model: gpt-3.5-turbo-0613 9 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_test.yaml 10 | library: checkpoint/clutrr/symbolic/gpt-3.5_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.3 -------------------------------------------------------------------------------- /artifacts/config/list_functions/gemini-pro_4-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: list_functions 3 | model: gemini-pro 4 | prompt: prompt/list_functions/4-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: list_functions 8 | model: gemini-pro 9 | prompt: prompt/list_functions/4-shot_cot_htt_test.yaml 10 | library: checkpoint/list_functions/gemini-pro_4-shot_cot_htt_5000.yaml 11 | min_coverage: 1 12 | min_confidence: 0.3 -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-16/gpt-4_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: base-16 3 | model: gpt-4 4 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: base-16 8 | model: gpt-4 9 | max_tokens: 1000 10 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_test.yaml 11 | library: checkpoint/arithmetic/base-16/gpt-4_5-shot_cot_htt_2000.yaml 12 | min_coverage: 2 13 | min_confidence: 0.5 -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-9/gpt-3.5_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: base-9 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: base-9 8 | model: gpt-3.5-turbo-16k-0613 9 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_test.yaml 10 | library: checkpoint/arithmetic/base-9/gpt-3.5_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.5 -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-11/gpt-3.5_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: base-11 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: base-11 8 | model: gpt-3.5-turbo-16k-0613 9 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_test.yaml 10 | library: checkpoint/arithmetic/base-11/gpt-3.5_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.5 -------------------------------------------------------------------------------- /artifacts/config/arithmetic/base-16/gpt-3.5_5-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: base-16 3 | model: gpt-3.5-turbo-0613 4 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: base-16 8 | model: gpt-3.5-turbo-16k-0613 9 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_test.yaml 10 | library: checkpoint/arithmetic/base-16/gpt-3.5_5-shot_cot_htt_2000.yaml 11 | min_coverage: 2 12 | min_confidence: 0.3 -------------------------------------------------------------------------------- /artifacts/config/list_functions/gpt-3.5_4-shot_cot_htt.yaml: -------------------------------------------------------------------------------- 1 | train: 2 | dataset: list_functions 3 | model: gpt-3.5-turbo-16k-0613 4 | prompt: prompt/list_functions/4-shot_cot_htt_train.yaml 5 | 6 | test: 7 | dataset: list_functions 8 | model: gpt-3.5-turbo-16k-0613 9 | prompt: prompt/list_functions/4-shot_cot_htt_test.yaml 10 | library: checkpoint/list_functions/gpt-3.5_4-shot_cot_htt_5000.yaml 11 | min_coverage: 1 12 | min_confidence: 0.1 -------------------------------------------------------------------------------- /artifacts/prompt/list_functions/0-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | system: | 2 | Instruction: Infer the function behind the examples. Use the function to answer the questions. 3 | 4 | prompt: | 5 | Examples: 6 | {%- for i in range(train_queries | length) %} 7 | {{ train_queries[i] }} -> {{ train_answers[i] }} 8 | {%- endfor %} 9 | Questions: 10 | {%- for query in queries %} 11 | {{ query }} -> ? 12 | {%- endfor %} 13 | Answers: 14 | Let's think step by step. -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | ## Contributor License Agreement 4 | 5 | Contributions to this project must be accompanied by a Contributor License 6 | Agreement. You (or your employer) retain the copyright to your contribution, 7 | this simply gives us permission to use and redistribute your contributions as 8 | part of the project. Head over to to see 9 | your current agreements on file or to sign a new one. 10 | 11 | You generally only need to submit a CLA once, so if you've already submitted one 12 | (even if it was for a different project), you probably don't need to do it 13 | again. 14 | 15 | ## Code reviews 16 | 17 | All submissions, including submissions by project members, require review. We 18 | use GitHub pull requests for this purpose. Consult 19 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 20 | information on using pull requests. 21 | 22 | ## Community Guidelines 23 | 24 | This project follows [Google's Open Source Community 25 | Guidelines](https://opensource.google/conduct/). 26 | -------------------------------------------------------------------------------- /artifacts/dataset/download.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | !/usr/bin/env bash 17 | 18 | # CLUTRR 19 | wget "https://drive.google.com/u/2/uc?id=1SEq_e1IVCDDzsBIBhoUQ5pOVH5kxRoZF&export=download" -O clutrr.zip 20 | unzip -po clutrr.zip data_emnlp_final/data_089907f8.zip > data_089907f8.zip 21 | unzip -o data_089907f8.zip -d clutrr 22 | rm clutrr.zip data_089907f8.zip 23 | 24 | # Arithmetic 25 | URL=https://raw.githubusercontent.com/ZhaofengWu/counterfactual-evaluation/master/arithmetic/data 26 | for BASE in 9 10 11 16 27 | do 28 | mkdir -p arithmetic/base-${BASE} 29 | wget ${URL}/0shot/base${BASE}.txt -O - | head -n 900 > arithmetic/base-${BASE}/2_train.txt 30 | wget ${URL}/0shot/base${BASE}.txt -O - | tail -n 100 > arithmetic/base-${BASE}/2_test.txt 31 | wget ${URL}/0shot_3digits/base${BASE}.txt -O - | tail -n 100 > arithmetic/base-${BASE}/3_test.txt 32 | wget ${URL}/0shot_4digits/base${BASE}.txt -O - | tail -n 100 > arithmetic/base-${BASE}/4_test.txt 33 | done 34 | 35 | # List Functions 36 | URL=https://raw.githubusercontent.com/google/BIG-bench/main/bigbench/benchmark_tasks/list_functions/ 37 | mkdir list_functions 38 | for i in {1..250} 39 | do 40 | id=$(printf "%03d" ${i}) 41 | wget ${URL}/c${id}/task.json -O list_functions/c${id}.json 42 | done -------------------------------------------------------------------------------- /artifacts/prompt/clutrr/symbolic/5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Context: The relations on the path from Alan to Anthony are daughter, uncle, son. 3 | Question: Anthony is Alan's what? 4 | Answer: 5 | For daughter's uncle, we have daughter's uncle is brother. So the relations are reduced to brother, son. 6 | For brother's son, we have brother's son is nephew. So the relations are reduced to nephew. 7 | Therefore, the answer is nephew. 8 | 9 | Context: The relations on the path from Annie to Carlos are brother, mother, son. 10 | Question: Carlos is Annie's what? 11 | Answer: 12 | For brother's mother, we have brother's mother is mother. So the relations are reduced to mother, son. 13 | For mother's son, we have mother's son is brother. So the relations are reduced to brother. 14 | Therefore, the answer is brother. 15 | 16 | Context: The relations on the path from Beverly to Michelle are father, daughter, aunt. 17 | Question: Michelle is Beverly's what? 18 | Answer: 19 | For father's daughter, we have father's daughter is sister. So the relations are reduced to sister, aunt. 20 | For sister's aunt, we have sister's aunt is aunt. So the relations are reduced to aunt. 21 | Therefore, the answer is aunt. 22 | 23 | Context: The relations on the path from Lee to Jeanna are father, daughter, sister. 24 | Question: Jeanna is Lee's what? 25 | Answer: 26 | For father's daughter, we have father's daughter is sister. So the relations are reduced to sister, sister. 27 | For sister's sister, we have sister's sister is sister. So the relations are reduced to sister. 28 | Therefore, the answer is sister. 29 | 30 | Context: The relations on the path from Craig to Molly are sister, father, mother. 31 | Question: Molly is Craig's what? 32 | Answer: 33 | For sister's father, we have sister's father is father. So the relations are reduced to father, mother. 34 | For father's mother, we have father's mother is grandmother. So the relations are reduced to grandmother. 35 | Therefore, the answer is grandmother. 36 | 37 | Context: The relations on the path from {{ query[0] }} to {{ query[1] }} are {{ path | join(", ") }}. 38 | Question: {{ query[1] }} is {{ query[0] }}'s what? 39 | Answer: 40 | 41 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/clutrr/symbolic/5-shot_cot_htt_train.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Context: The relations on the path from Alan to Anthony are daughter, uncle, son. 3 | Question: Anthony is Alan's what? 4 | Answer: 5 | For daughter's uncle, we have daughter's uncle is brother. So the relations are reduced to brother, son. 6 | For brother's son, we have brother's son is nephew. So the relations are reduced to nephew. 7 | Therefore, the answer is nephew. 8 | 9 | Context: The relations on the path from Annie to Carlos are brother, mother, son. 10 | Question: Carlos is Annie's what? 11 | Answer: 12 | For brother's mother, we have brother's mother is mother. So the relations are reduced to mother, son. 13 | For mother's son, we have mother's son is brother. So the relations are reduced to brother. 14 | Therefore, the answer is brother. 15 | 16 | Context: The relations on the path from Beverly to Michelle are father, daughter, aunt. 17 | Question: Michelle is Beverly's what? 18 | Answer: 19 | For father's daughter, we have father's daughter is sister. So the relations are reduced to sister, aunt. 20 | For sister's aunt, we have sister's aunt is aunt. So the relations are reduced to aunt. 21 | Therefore, the answer is aunt. 22 | 23 | Context: The relations on the path from Lee to Jeanna are father, daughter, sister. 24 | Question: Jeanna is Lee's what? 25 | Answer: 26 | For father's daughter, we have father's daughter is sister. So the relations are reduced to sister, sister. 27 | For sister's sister, we have sister's sister is sister. So the relations are reduced to sister. 28 | Therefore, the answer is sister. 29 | 30 | Context: The relations on the path from Craig to Molly are sister, father, mother. 31 | Question: Molly is Craig's what? 32 | Answer: 33 | For sister's father, we have sister's father is father. So the relations are reduced to father, mother. 34 | For father's mother, we have father's mother is grandmother. So the relations are reduced to grandmother. 35 | Therefore, the answer is grandmother. 36 | 37 | Context: The relations on the path from {{ query[0] }} to {{ query[1] }} are {{ path | join(", ") }}. 38 | Question: {{ query[1] }} is {{ query[0] }}'s what? 39 | Answer: 40 | 41 | pattern: '[a-z\-]+''s [a-z\-]+ is [a-z\-]+\.' 42 | 43 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/base-10/5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Question: In base-10, what is 76 + 14? 3 | Answer: 4 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1. 5 | There is no carry. 6 + 4 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0. 6 | The carry is 1. 7 + 1 + 1 = 9. 9 is 0, 9. So we clear the carry. Prepend 9 to the answer. So far the answer has 2 digits: 9, 0. 7 | There is no carry. So far the answer has 2 digits: 9, 0. 8 | Therefore, the answer is 90. 9 | 10 | Question: In base-10, what is 97 + 74? 11 | Answer: 12 | 97 is 9, 7. 74 is 7, 4. So the steps are 7 + 4, 9 + 7. 13 | There is no carry. 7 + 4 = 11. 11 is 1, 1. So we set the carry to 1. Prepend 1 to the answer. So far the answer has 1 digit: 1. 14 | The carry is 1. 9 + 7 + 1 = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 2 digits: 7, 1. 15 | The carry is 1. So far the answer has 3 digits: 1, 7, 1. 16 | Therefore, the answer is 171. 17 | 18 | Question: In base-10, what is 85 + 48? 19 | Answer: 20 | 85 is 8, 5. 48 is 4, 8. So the steps are 5 + 8, 8 + 4. 21 | There is no carry. 5 + 8 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 1 digit: 3. 22 | The carry is 1. 8 + 4 + 1 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, 3. 23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, 3. 24 | Therefore, the answer is 133. 25 | 26 | Question: In base-10, what is 34 + 31? 27 | Answer: 28 | 34 is 3, 4. 31 is 3, 1. So the steps are 4 + 1, 3 + 3. 29 | There is no carry. 4 + 1 = 5. 5 is 0, 5. So we clear the carry. Prepend 5 to the answer. So far the answer has 1 digit: 5. 30 | There is no carry. 3 + 3 = 6. 6 is 0, 6. So we clear the carry. Prepend 6 to the answer. So far the answer has 2 digits: 6, 5. 31 | There is no carry. So far the answer has 2 digits: 6, 5. 32 | Therefore, the answer is 65. 33 | 34 | Question: In base-10, what is 58 + 34? 35 | Answer: 36 | 58 is 5, 8. 34 is 3, 4. So the steps are 8 + 4, 5 + 3. 37 | There is no carry. 8 + 4 = 12. 12 is 1, 2. So we set the carry to 1. Prepend 2 to the answer. So far the answer has 1 digit: 2. 38 | The carry is 1. 5 + 3 + 1 = 9. 9 is 0, 9. So we clear the carry. Prepend 9 to the answer. So far the answer has 2 digits: 9, 2. 39 | There is no carry. So far the answer has 2 digits: 9, 2. 40 | Therefore, the answer is 92. 41 | 42 | Question: In base-10, what is {{ query[0] }} + {{ query[1] }}? 43 | Answer: 44 | 45 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/base-11/5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Question: In base-11, what is 76 + 14? 3 | Answer: 4 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1. 5 | There is no carry. 6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A. 6 | There is no carry. 7 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, A. 7 | There is no carry. So far the answer has 2 digits: 8, A. 8 | Therefore, the answer is 8A. 9 | 10 | Question: In base-11, what is 97 + 74? 11 | Answer: 12 | 97 is 9, 7. 74 is 7, 4. So the steps are 7 + 4, 9 + 7. 13 | There is no carry. 7 + 4 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0. 14 | The carry is 1. 9 + 7 + 1 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 2 digits: 6, 0. 15 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 6, 0. 16 | Therefore, the answer is 160. 17 | 18 | Question: In base-11, what is 85 + A3? 19 | Answer: 20 | 85 is 8, 5. A3 is A, 3. So the steps are 5 + 3, 8 + A. 21 | There is no carry. 5 + 3 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 1 digit: 8. 22 | There is no carry. 8 + A = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 2 digits: 7, 8. 23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 7, 8. 24 | Therefore, the answer is 178. 25 | 26 | Question: In base-11, what is 92 + 52? 27 | Answer: 28 | 92 is 9, 2. 52 is 5, 2. So the steps are 2 + 2, 9 + 5. 29 | There is no carry. 2 + 2 = 4. 4 is 0, 4. So we clear the carry. Prepend 4 to the answer. So far the answer has 1 digit: 4. 30 | There is no carry. 9 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, 4. 31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, 4. 32 | Therefore, the answer is 134. 33 | 34 | Question: In base-11, what is 29 + 58? 35 | Answer: 36 | 29 is 2, 9. 58 is 5, 8. So the steps are 9 + 8, 2 + 5. 37 | There is no carry. 9 + 8 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 1 digit: 6. 38 | The carry is 1. 2 + 5 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, 6. 39 | There is no carry. So far the answer has 2 digits: 8, 6. 40 | Therefore, the answer is 86. 41 | 42 | Question: In base-11, what is {{ query[0] }} + {{ query[1] }}? 43 | Answer: 44 | 45 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/base-16/5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Question: In base-16, what is EC + DD? 3 | Answer: 4 | EC is E, C. DD is D, D. So the steps are C + D, E + D. 5 | There is no carry. C + D = 19. 19 is 1, 9. So we set the carry to 1. Prepend 9 to the answer. So far the answer has 1 digit: 9. 6 | The carry is 1. E + D + 1 = 1C. 1C is 1, C. So we set the carry to 1. Prepend C to the answer. So far the answer has 2 digits: C, 9. 7 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, C, 9. 8 | Therefore, the answer is 1C9. 9 | 10 | Question: In base-16, what is 18 + 9F? 11 | Answer: 12 | 18 is 1, 8. 9F is 9, F. So the steps are 8 + F, 1 + 9. 13 | There is no carry. 8 + F = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 1 digit: 7. 14 | The carry is 1. 1 + 9 + 1 = B. B is 0, B. So we clear the carry. Prepend B to the answer. So far the answer has 2 digits: B, 7. 15 | There is no carry. So far the answer has 2 digits: B, 7. 16 | Therefore, the answer is B7. 17 | 18 | Question: In base-16, what is 79 + 8B? 19 | Answer: 20 | 79 is 7, 9. 8B is 8, B. So the steps are 9 + B, 7 + 8. 21 | There is no carry. 9 + B = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4. 22 | The carry is 1. 7 + 8 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 4. 23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 4. 24 | Therefore, the answer is 104. 25 | 26 | Question: In base-16, what is A6 + 94? 27 | Answer: 28 | A6 is A, 6. 94 is 9, 4. So the steps are 6 + 4, A + 9. 29 | There is no carry. 6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A. 30 | There is no carry. A + 9 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, A. 31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, A. 32 | Therefore, the answer is 13A. 33 | 34 | Question: In base-16, what is 54 + D3? 35 | Answer: 36 | 54 is 5, 4. D3 is D, 3. So the steps are 4 + 3, 5 + D. 37 | There is no carry. 4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7. 38 | There is no carry. 5 + D = 12. 12 is 1, 2. So we set the carry to 1. Prepend 2 to the answer. So far the answer has 2 digits: 2, 7. 39 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 2, 7. 40 | Therefore, the answer is 127. 41 | 42 | Question: In base-16, what is {{ query[0] }} + {{ query[1] }}? 43 | Answer: 44 | 45 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/base-9/5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Question: In base-9, what is 76 + 14? 3 | Answer: 4 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1. 5 | There is no carry. 6 + 4 = 11. 11 is 1, 1. So we set the carry to 1. Prepend 1 to the answer. So far the answer has 1 digit: 1. 6 | The carry is 1. 7 + 1 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 1. 7 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 1. 8 | Therefore, the answer is 101. 9 | 10 | Question: In base-9, what is 86 + 57? 11 | Answer: 12 | 86 is 8, 6. 57 is 5, 7. So the steps are 6 + 7, 8 + 5. 13 | There is no carry. 6 + 7 = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4. 14 | The carry is 1. 8 + 5 + 1 = 15. 15 is 1, 5. So we set the carry to 1. Prepend 5 to the answer. So far the answer has 2 digits: 5, 4. 15 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 5, 4. 16 | Therefore, the answer is 154. 17 | 18 | Question: In base-9, what is 63 + 34? 19 | Answer: 20 | 63 is 6, 3. 34 is 3, 4. So the steps are 3 + 4, 6 + 3. 21 | There is no carry. 4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7. 22 | There is no carry. 6 + 3 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 7. 23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 7. 24 | Therefore, the answer is 107. 25 | 26 | Question: In base-9, what is 31 + 58? 27 | Answer: 28 | 31 is 3, 1. 58 is 5, 8. So the steps are 1 + 8, 3 + 5. 29 | There is no carry. 1 + 8 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0. 30 | The carry is 1. 3 + 5 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 0. 31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 0. 32 | Therefore, the answer is 100. 33 | 34 | Question: In base-9, what is 67 + 25? 35 | Answer: 36 | 67 is 6, 7. 25 is 2, 5. So the steps are 7 + 5, 6 + 2. 37 | There is no carry. 7 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 1 digit: 3. 38 | The carry is 1. 6 + 2 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 3. 39 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 3. 40 | Therefore, the answer is 103. 41 | 42 | Question: In base-9, what is {{ query[0] }} + {{ query[1] }}? 43 | Answer: 44 | 45 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/base-11/5-shot_cot_htt_train.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Question: In base-11, what is 76 + 14? 3 | Answer: 4 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1. 5 | There is no carry. 6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A. 6 | There is no carry. 7 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, A. 7 | There is no carry. So far the answer has 2 digits: 8, A. 8 | Therefore, the answer is 8A. 9 | 10 | Question: In base-11, what is 97 + 74? 11 | Answer: 12 | 97 is 9, 7. 74 is 7, 4. So the steps are 7 + 4, 9 + 7. 13 | There is no carry. 7 + 4 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0. 14 | The carry is 1. 9 + 7 + 1 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 2 digits: 6, 0. 15 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 6, 0. 16 | Therefore, the answer is 160. 17 | 18 | Question: In base-11, what is 85 + A3? 19 | Answer: 20 | 85 is 8, 5. A3 is A, 3. So the steps are 5 + 3, 8 + A. 21 | There is no carry. 5 + 3 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 1 digit: 8. 22 | There is no carry. 8 + A = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 2 digits: 7, 8. 23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 7, 8. 24 | Therefore, the answer is 178. 25 | 26 | Question: In base-11, what is 92 + 52? 27 | Answer: 28 | 92 is 9, 2. 52 is 5, 2. So the steps are 2 + 2, 9 + 5. 29 | There is no carry. 2 + 2 = 4. 4 is 0, 4. So we clear the carry. Prepend 4 to the answer. So far the answer has 1 digit: 4. 30 | There is no carry. 9 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, 4. 31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, 4. 32 | Therefore, the answer is 134. 33 | 34 | Question: In base-11, what is 29 + 58? 35 | Answer: 36 | 29 is 2, 9. 58 is 5, 8. So the steps are 9 + 8, 2 + 5. 37 | There is no carry. 9 + 8 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 1 digit: 6. 38 | The carry is 1. 2 + 5 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, 6. 39 | There is no carry. So far the answer has 2 digits: 8, 6. 40 | Therefore, the answer is 86. 41 | 42 | Question: In base-11, what is {{ query[0] }} + {{ query[1] }}? 43 | Answer: 44 | 45 | pattern: '[A-Z0-9]+(?: \+ [A-Z0-9]+)+ = [A-Z0-9]+\.' 46 | 47 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/base-16/5-shot_cot_htt_train.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Question: In base-16, what is EC + DD? 3 | Answer: 4 | EC is E, C. DD is D, D. So the steps are C + D, E + D. 5 | There is no carry. C + D = 19. 19 is 1, 9. So we set the carry to 1. Prepend 9 to the answer. So far the answer has 1 digit: 9. 6 | The carry is 1. E + D + 1 = 1C. 1C is 1, C. So we set the carry to 1. Prepend C to the answer. So far the answer has 2 digits: C, 9. 7 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, C, 9. 8 | Therefore, the answer is 1C9. 9 | 10 | Question: In base-16, what is 18 + 9F? 11 | Answer: 12 | 18 is 1, 8. 9F is 9, F. So the steps are 8 + F, 1 + 9. 13 | There is no carry. 8 + F = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 1 digit: 7. 14 | The carry is 1. 1 + 9 + 1 = B. B is 0, B. So we clear the carry. Prepend B to the answer. So far the answer has 2 digits: B, 7. 15 | There is no carry. So far the answer has 2 digits: B, 7. 16 | Therefore, the answer is B7. 17 | 18 | Question: In base-16, what is 79 + 8B? 19 | Answer: 20 | 79 is 7, 9. 8B is 8, B. So the steps are 9 + B, 7 + 8. 21 | There is no carry. 9 + B = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4. 22 | The carry is 1. 7 + 8 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 4. 23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 4. 24 | Therefore, the answer is 104. 25 | 26 | Question: In base-16, what is A6 + 94? 27 | Answer: 28 | A6 is A, 6. 94 is 9, 4. So the steps are 6 + 4, A + 9. 29 | There is no carry. 6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A. 30 | There is no carry. A + 9 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, A. 31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, A. 32 | Therefore, the answer is 13A. 33 | 34 | Question: In base-16, what is 54 + D3? 35 | Answer: 36 | 54 is 5, 4. D3 is D, 3. So the steps are 4 + 3, 5 + D. 37 | There is no carry. 4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7. 38 | There is no carry. 5 + D = 12. 12 is 1, 2. So we set the carry to 1. Prepend 2 to the answer. So far the answer has 2 digits: 2, 7. 39 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 2, 7. 40 | Therefore, the answer is 127. 41 | 42 | Question: In base-16, what is {{ query[0] }} + {{ query[1] }}? 43 | Answer: 44 | 45 | pattern: '[A-Z0-9]+(?: \+ [A-Z0-9]+)+ = [A-Z0-9]+\.' 46 | 47 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/base-9/5-shot_cot_htt_train.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Question: In base-9, what is 76 + 14? 3 | Answer: 4 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1. 5 | There is no carry. 6 + 4 = 11. 11 is 1, 1. So we set the carry to 1. Prepend 1 to the answer. So far the answer has 1 digit: 1. 6 | The carry is 1. 7 + 1 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 1. 7 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 1. 8 | Therefore, the answer is 101. 9 | 10 | Question: In base-9, what is 86 + 57? 11 | Answer: 12 | 86 is 8, 6. 57 is 5, 7. So the steps are 6 + 7, 8 + 5. 13 | There is no carry. 6 + 7 = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4. 14 | The carry is 1. 8 + 5 + 1 = 15. 15 is 1, 5. So we set the carry to 1. Prepend 5 to the answer. So far the answer has 2 digits: 5, 4. 15 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 5, 4. 16 | Therefore, the answer is 154. 17 | 18 | Question: In base-9, what is 63 + 34? 19 | Answer: 20 | 63 is 6, 3. 34 is 3, 4. So the steps are 3 + 4, 6 + 3. 21 | There is no carry. 4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7. 22 | There is no carry. 6 + 3 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 7. 23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 7. 24 | Therefore, the answer is 107. 25 | 26 | Question: In base-9, what is 31 + 58? 27 | Answer: 28 | 31 is 3, 1. 58 is 5, 8. So the steps are 1 + 8, 3 + 5. 29 | There is no carry. 1 + 8 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0. 30 | The carry is 1. 3 + 5 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 0. 31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 0. 32 | Therefore, the answer is 100. 33 | 34 | Question: In base-9, what is 67 + 25? 35 | Answer: 36 | 67 is 6, 7. 25 is 2, 5. So the steps are 7 + 5, 6 + 2. 37 | There is no carry. 7 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 1 digit: 3. 38 | The carry is 1. 6 + 2 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 3. 39 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 3. 40 | Therefore, the answer is 103. 41 | 42 | Question: In base-9, what is {{ query[0] }} + {{ query[1] }}? 43 | Answer: 44 | 45 | pattern: '[A-Z0-9]+(?: \+ [A-Z0-9]+)+ = [A-Z0-9]+\.' 46 | 47 | return_last: yes -------------------------------------------------------------------------------- /source/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Utilities for training and testing.""" 17 | 18 | import argparse 19 | from collections.abc import Mapping 20 | import logging 21 | import os 22 | import sys 23 | import time 24 | from typing import Any 25 | 26 | 27 | class _DebugHook(object): 28 | instance = None 29 | 30 | def __call__(self, *args, **kwargs): 31 | if self.instance is None: 32 | from IPython.core import ultratb # pylint: disable=g-import-not-at-top 33 | self.instance = ultratb.FormattedTB( 34 | mode="Plain", color_scheme="Linux", call_pdb=1) 35 | return self.instance(*args, **kwargs) 36 | 37 | 38 | sys.excepthook = _DebugHook() 39 | 40 | 41 | def parse_args() -> argparse.Namespace: 42 | """Parse command line arguments.""" 43 | 44 | parser = argparse.ArgumentParser("") 45 | parser.add_argument( 46 | "-a", "--artifacts", default="artifacts", 47 | help="folder for all the artifacts", required=False) 48 | parser.add_argument("-c", "--config", 49 | help="yaml configuration file", required=True) 50 | parser.add_argument("-s", "--split", 51 | help="data split to train / test on", default=None) 52 | parser.add_argument("-n", "--num-iteration", 53 | help="number of training iterations", 54 | type=int, default=2000) 55 | parser.add_argument("-o", "--output-dir", 56 | help="directory to store logs and checkpoints", 57 | default="experiment/") 58 | return parser.parse_args() 59 | 60 | 61 | def create_working_directory( 62 | args: argparse.Namespace, 63 | cfg: Mapping[str, Any], 64 | ) -> str: 65 | """Creates a working directory. 66 | 67 | Args: 68 | args: args 69 | cfg: config dict 70 | Returns: 71 | working directory 72 | """ 73 | config = os.path.splitext(os.path.basename(args.config))[0] 74 | time_str = time.strftime("%Y-%m-%d-%H-%M-%S") 75 | working_dir = os.path.join(args.output_dir, cfg["dataset"], 76 | f"{config}_{args.split}_{time_str}") 77 | os.makedirs(working_dir) 78 | return working_dir 79 | 80 | 81 | def create_logger(working_dir: str) -> logging.Logger: 82 | """Create a logger with both stream and file handlers. 83 | 84 | Args: 85 | working_dir: working directory 86 | Returns: 87 | logger 88 | """ 89 | logger = logging.getLogger("") 90 | logger.setLevel(logging.INFO) 91 | handler = logging.StreamHandler() 92 | logger.addHandler(handler) 93 | log_file = os.path.join(working_dir, "log.txt") 94 | handler = logging.FileHandler(log_file) 95 | logger.addHandler(handler) 96 | return logger 97 | -------------------------------------------------------------------------------- /artifacts/prompt/clutrr/symbolic/5-shot_cot_htt_test.yaml: -------------------------------------------------------------------------------- 1 | system: | 2 | Instruction: When you answer the questions, try to use the provided knowledge whenever possible. Try not to invent knowledge by yourself unless necessary. 3 | Knowledge: 4 | {%- set key_rules = {} -%} 5 | {%- for rule in rules -%} 6 | {%- set tokens = rule.split(" ") -%} 7 | {%- set _ = key_rules.update({(tokens[0].split("'")[0], tokens[1]): rule}) -%} 8 | {%- endfor -%} 9 | {%- set global = namespace(old =["", ""]) -%} 10 | {%- for key, rule in key_rules | dictsort -%} 11 | {%- if global.old[1] and global.old != key -%} 12 | 13 | {%- endif -%} 14 | {%- if global.old[0] and global.old[0] != key[0] -%} 15 | 16 | {%- endif -%} 17 | {%- if global.old[0] != key[0] %} 18 | <{{ key[0] }}> 19 | {%- endif -%} 20 | {%- if global.old != key -%} 21 | <{{ key[1] }}> 22 | {%- endif -%} 23 | {{- rule -}} 24 | {%- set global.old = key -%} 25 | {%- endfor -%} 26 | 27 | 28 | prompt: | 29 | Context: The relations on the path from Alan to Anthony are daughter, uncle, son. 30 | Question: Anthony is Alan's what? 31 | Answer: 32 | For daughter's uncle, we retrieve daughter's uncle is brother. So the relations are reduced to brother, son. 33 | For brother's son, we retrieve brother's son is nephew. So the relations are reduced to nephew. 34 | Therefore, the answer is nephew. 35 | 36 | Context: The relations on the path from Annie to Carlos are brother, mother, son. 37 | Question: Carlos is Annie's what? 38 | Answer: 39 | For brother's mother, we retrieve brother's mother is mother. So the relations are reduced to mother, son. 40 | For mother's son, we retrieve mother's son is brother. So the relations are reduced to brother. 41 | Therefore, the answer is brother. 42 | 43 | Context: The relations on the path from Beverly to Michelle are father, daughter, aunt. 44 | Question: Michelle is Beverly's what? 45 | Answer: 46 | For father's daughter, we retrieve father's daughter is sister. So the relations are reduced to sister, aunt. 47 | For sister's aunt, we retrieve sister's aunt is aunt. So the relations are reduced to aunt. 48 | Therefore, the answer is aunt. 49 | 50 | Context: The relations on the path from Lee to Jeanna are father, daughter, sister. 51 | Question: Jeanna is Lee's what? 52 | Answer: 53 | For father's daughter, we retrieve father's daughter is sister. So the relations are reduced to sister, sister. 54 | For sister's sister, we retrieve sister's sister is sister. So the relations are reduced to sister. 55 | Therefore, the answer is sister. 56 | 57 | Context: The relations on the path from Craig to Molly are sister, father, mother. 58 | Question: Molly is Craig's what? 59 | Answer: 60 | For sister's father, we retrieve sister's father is father. So the relations are reduced to father, mother. 61 | For father's mother, we retrieve father's mother is grandmother. So the relations are reduced to grandmother. 62 | Therefore, the answer is grandmother. 63 | 64 | Context: The relations on the path from {{ query[0] }} to {{ query[1] }} are {{ path | join(", ") }}. 65 | Question: {{ query[1] }} is {{ query[0] }}'s what? 66 | Answer: 67 | 68 | return_last: yes -------------------------------------------------------------------------------- /source/train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Training, i.e., rule learning file.""" 17 | 18 | import os 19 | import pprint 20 | import random 21 | 22 | import datasets 23 | import models 24 | import prompts 25 | import tqdm 26 | import util 27 | import yaml 28 | 29 | 30 | def main(): 31 | random.seed(0) 32 | args = util.parse_args() 33 | args.split = args.split or "train" 34 | 35 | with open(args.config, "r") as fin: 36 | cfg = yaml.safe_load(fin.read()) 37 | cfg = cfg[args.split] 38 | 39 | working_dir = util.create_working_directory(args, cfg) 40 | logger = util.create_logger(working_dir) 41 | logger.warning(pprint.pformat(vars(args))) 42 | logger.warning(pprint.pformat(cfg)) 43 | 44 | if cfg["dataset"] == "clutrr": 45 | dataset = datasets.CLUTRR(os.path.join(args.artifacts, "dataset/clutrr")) 46 | elif cfg["dataset"].startswith("base-"): 47 | base = int(cfg["dataset"][5:]) 48 | dataset = datasets.Arithmetic( 49 | os.path.join(args.artifacts, "dataset/arithmetic"), base=base) 50 | elif cfg["dataset"] == "list_functions": 51 | dataset = datasets.ListFunctions( 52 | os.path.join(args.artifacts, "dataset/list_functions")) 53 | else: 54 | raise ValueError(f"Unknown dataset `{cfg['datasets']}`") 55 | 56 | train_set = dataset.get_split(args.split) 57 | max_tokens = cfg.get("max_tokens", 2000) 58 | if cfg["model"].startswith("gpt"): 59 | model = models.GPT(cfg["model"], max_tokens=max_tokens) 60 | elif cfg["model"].startswith("gemini"): 61 | model = models.Gemini(cfg["model"], max_tokens=max_tokens) 62 | else: 63 | raise ValueError(f"Unknown model `{cfg['model']}`") 64 | function = prompts.PromptFunction.from_yaml( 65 | os.path.join(args.artifacts, cfg["prompt"])) 66 | library = prompts.RuleLibrary() 67 | 68 | num_epoch = args.num_iteration // len(train_set) 69 | train_set = train_set * num_epoch + random.sample( 70 | train_set, args.num_iteration % len(train_set)) 71 | 72 | total_cost = 0 73 | num_iteration = 0 74 | for sample in tqdm.tqdm(train_set): 75 | truth = sample["answer"] 76 | pred, cost, rules = function(model, sample) 77 | logger.warning("rules:") 78 | for rule in rules: 79 | logger.warning(rule) 80 | acc = dataset.evaluate(truth, pred) 81 | 82 | if "concept" in sample: 83 | concept = sample["concept"] 84 | rules = [f"[{concept}] {rule}" for rule in rules] 85 | library.update(rules, acc) 86 | total_cost += cost 87 | logger.warning("truth: %s, pred: %s, accuracy: %s", truth, pred, acc) 88 | logger.warning("total cost: %s", total_cost) 89 | 90 | num_iteration += 1 91 | if num_iteration % 100 == 0 or num_iteration == args.num_iteration: 92 | save_file = os.path.join(working_dir, f"library_{num_iteration}.yaml") 93 | library.save(save_file) 94 | logger.warning("Save the rule library to `%s`", save_file) 95 | 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/base-11/5-shot_cot_htt_test.yaml: -------------------------------------------------------------------------------- 1 | system: | 2 | Instruction: When you answer the questions, try to use the provided knowledge whenever possible. Try not to invent knowledge by yourself unless necessary. 3 | Knowledge: 4 | {%- set key_rules = {} -%} 5 | {%- for rule in rules -%} 6 | {%- set tokens = rule.split("=")[0].split("+") -%} 7 | {%- set _ = key_rules.update({(tokens | length, tokens[0] | trim, tokens[1] | trim): rule}) -%} 8 | {%- endfor -%} 9 | {%- set global = namespace(old=["", "", ""]) -%} 10 | {%- for key, rule in key_rules | dictsort -%} 11 | {%- if key[0] > 2 -%} 12 | {%- set key = ["carry", key[1], key[2]] -%} 13 | {%- else -%} 14 | {%- set key = ["no_carry", key[1], key[2]] -%} 15 | {%- endif -%} 16 | {%- if global.old[2] and global.old != key -%} 17 | 18 | {%- endif -%} 19 | {%- if global.old[1] and global.old[:2] != key[:2] -%} 20 | 21 | {%- endif -%} 22 | {%- if global.old[0] and global.old[0] != key[0] %} 23 | 24 | {%- endif -%} 25 | {%- if global.old[0] != key[0] %} 26 | <{{ key[0] }}> 27 | {%- endif -%} 28 | {%- if global.old[:2] != key[:2] %} 29 | <{{ key[1] }}> 30 | {%- endif -%} 31 | {%- if global.old != key -%} 32 | <{{ key[2] }}> 33 | {%- endif -%} 34 | {{- rule -}} 35 | {%- set global.old = key -%} 36 | {%- endfor -%} 37 | 38 | 39 | 40 | prompt: | 41 | Question: In base-11, what is 76 + 14? 42 | Answer: 43 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1. 44 | There is no carry. <6><4>6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A. 45 | There is no carry. <7><1>7 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, A. 46 | There is no carry. So far the answer has 2 digits: 8, A. 47 | Therefore, the answer is 8A. 48 | 49 | Question: In base-11, what is 97 + 74? 50 | Answer: 51 | 97 is 9, 7. 74 is 7, 4. So the steps are 7 + 4, 9 + 7. 52 | There is no carry. <7><4>7 + 4 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0. 53 | The carry is 1. <9><7>9 + 7 + 1 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 2 digits: 6, 0. 54 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 6, 0. 55 | Therefore, the answer is 160. 56 | 57 | Question: In base-11, what is 85 + A3? 58 | Answer: 59 | 85 is 8, 5. A3 is A, 3. So the steps are 5 + 3, 8 + A. 60 | There is no carry. <5><3>5 + 3 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 1 digit: 8. 61 | There is no carry. <8>8 + A = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 2 digits: 7, 8. 62 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 7, 8. 63 | Therefore, the answer is 178. 64 | 65 | Question: In base-11, what is 92 + 52? 66 | Answer: 67 | 92 is 9, 2. 52 is 5, 2. So the steps are 2 + 2, 9 + 5. 68 | There is no carry. <2><2>2 + 2 = 4. 4 is 0, 4. So we clear the carry. Prepend 4 to the answer. So far the answer has 1 digit: 4. 69 | There is no carry. <9><5>9 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, 4. 70 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, 4. 71 | Therefore, the answer is 134. 72 | 73 | Question: In base-11, what is 29 + 58? 74 | Answer: 75 | 29 is 2, 9. 58 is 5, 8. So the steps are 9 + 8, 2 + 5. 76 | There is no carry. <9><8>9 + 8 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 1 digit: 6. 77 | The carry is 1. <2><5>2 + 5 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, 6. 78 | There is no carry. So far the answer has 2 digits: 8, 6. 79 | Therefore, the answer is 86. 80 | 81 | Question: In base-11, what is {{ query[0] }} + {{ query[1] }}? 82 | Answer: 83 | 84 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/list_functions/4-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | system: | 2 | Instruction: Infer the function behind the examples. Use the function to answer the questions. 3 | 4 | prompt: | 5 | Examples: 6 | [0, 8, 5, 2, 7, 1, 4, 6, 9, 3] -> [3, 8, 5, 2, 7, 1, 4, 6, 9, 3] 7 | [4, 0, 1] -> [1, 0, 1] 8 | [6, 1, 7, 5, 3, 2, 8, 4, 9] -> [9, 1, 7, 5, 3, 2, 8, 4, 9] 9 | [6, 2, 1, 9, 4] -> [4, 2, 1, 9, 4] 10 | [2, 9, 7, 5, 3, 8, 1, 4] -> [4, 9, 7, 5, 3, 8, 1, 4] 11 | [5, 1, 7, 8, 9, 4, 0, 3, 2] -> [2, 1, 7, 8, 9, 4, 0, 3, 2] 12 | Questions: 13 | [5, 8, 6, 1, 0, 9, 7] -> ? 14 | [3, 8, 6, 0] -> ? 15 | [8, 3] -> ? 16 | [3, 2, 0, 1, 6, 8, 7, 5] -> ? 17 | [5, 2, 0, 8, 9, 6] -> ? 18 | [8, 5, 7, 4, 2, 3, 6] -> ? 19 | Answers: 20 | From the examples, we infer the function is to replace the first element with the last element. 21 | Using this function, the answers to the questions are: 22 | [5, 8, 6, 1, 0, 9, 7] -> [7, 8, 6, 1, 0, 9, 7] 23 | [3, 8, 6, 0] -> [0, 8, 6, 0] 24 | [8, 3] -> [3, 3] 25 | [3, 2, 0, 1, 6, 8, 7, 5] -> [5, 2, 0, 1, 6, 8, 7, 5] 26 | [5, 2, 0, 8, 9, 6] -> [6, 2, 0, 8, 9, 6] 27 | [8, 5, 7, 4, 2, 3, 6] -> [6, 5, 7, 4, 2, 3, 6] 28 | 29 | Examples: 30 | [2] -> [2] 31 | [4, 3, 0, 1, 7, 8] -> [4, 3, 0, 1, 7, 8, 3] 32 | [5, 0, 2, 9] -> [5, 0, 2, 9, 9] 33 | [7, 0, 2, 5] -> [7, 0, 2, 5] 34 | [3, 4, 7, 6, 0] -> [3, 4, 7, 6, 0, 3] 35 | [8, 1, 2, 3, 7] -> [8, 1, 2, 3, 7, 3] 36 | Questions: 37 | [9, 1] -> ? 38 | [6] -> ? 39 | [1, 9, 5, 0] -> ? 40 | [4, 6, 9, 0, 7, 8, 1, 2] -> ? 41 | [4, 2, 8] -> ? 42 | [6, 2, 0, 3, 1, 8, 7] -> ? 43 | Answers: 44 | From the examples, we infer the function is to append 3 if the list contains a 3, else append 9 if the list contains a 9. 45 | Using this function, the answers to the questions are: 46 | [9, 1] -> [9, 1, 9] 47 | [6] -> [6] 48 | [1, 9, 5, 0] -> [1, 9, 5, 0, 9] 49 | [4, 6, 9, 0, 7, 8, 1, 2] -> [4, 6, 9, 0, 7, 8, 1, 2, 9] 50 | [4, 2, 8] -> [4, 2, 8] 51 | [6, 2, 0, 3, 1, 8, 7] -> [6, 2, 0, 3, 1, 8, 7, 3] 52 | 53 | Examples: 54 | [1, 0, 9, 7, 4, 2, 5, 3, 6, 8] -> [9, 0, 1, 4, 4, 5] 55 | [3, 8, 4, 6, 1, 5, 7, 0] -> [4, 8, 3, 4, 1, 7] 56 | [5, 4, 7, 2, 9, 3, 8, 1] -> [7, 4, 5, 4, 9, 8] 57 | [3, 9, 2, 0, 6, 8, 5, 1, 7] -> [2, 9, 3, 4, 6, 5] 58 | [9, 2, 1, 3, 4, 7, 6, 8, 5, 0] -> [1, 2, 9, 4, 4, 6] 59 | [0, 7, 9, 3, 1, 5, 8, 2, 6] -> [9, 7, 0, 4, 1, 8] 60 | Questions: 61 | [3, 9, 7, 6, 0, 5, 1] -> ? 62 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> ? 63 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> ? 64 | [8, 4, 9, 1, 3, 2, 7] -> ? 65 | [8, 3, 7, 0, 4, 2, 5] -> ? 66 | [6, 2, 1, 0, 9, 8, 5] -> ? 67 | Answers: 68 | From the examples, we infer the function is to generate a list of elements 3, 2, 1, the number 4, then elements 5 and 7. 69 | Using this function, the answers to the questions are: 70 | [3, 9, 7, 6, 0, 5, 1] -> [7, 9, 3, 4, 0, 1] 71 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> [9, 5, 2, 4, 8, 0] 72 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> [7, 0, 9, 4, 4, 3] 73 | [8, 4, 9, 1, 3, 2, 7] -> [9, 4, 8, 4, 3, 7] 74 | [8, 3, 7, 0, 4, 2, 5] -> [7, 3, 8, 4, 4, 5] 75 | [6, 2, 1, 0, 9, 8, 5] -> [1, 2, 6, 4, 9, 5] 76 | 77 | Examples: 78 | [] -> [] 79 | [1, 5, 6, 2, 8, 3, 7] -> [7, 3, 8, 2, 6, 5, 1] 80 | [2, 1, 9, 6, 3, 5, 4, 8] -> [8, 4, 5, 3, 6, 9, 1, 2] 81 | [9, 1, 2, 8, 0] -> [0, 8, 2, 1, 9] 82 | [1, 0, 7, 3, 9, 2] -> [2, 9, 3, 7, 0, 1] 83 | [7, 6, 3, 0, 4, 1, 5, 2] -> [2, 5, 1, 4, 0, 3, 6, 7] 84 | Questions: 85 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> ? 86 | [6, 4, 0] -> ? 87 | [3, 6, 1, 7, 0, 4] -> ? 88 | [5, 4, 2, 7] -> ? 89 | [5, 7, 6, 2, 3] -> ? 90 | [7, 9] -> ? 91 | Answers: 92 | From the examples, we infer the function is to reverse the elements. 93 | Using this function, the answers to the questions are: 94 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> [9, 1, 3, 4, 0, 8, 7, 5, 6, 2] 95 | [6, 4, 0] -> [0, 4, 6] 96 | [3, 6, 1, 7, 0, 4] -> [4, 0, 7, 1, 6, 3] 97 | [5, 4, 2, 7] -> [7, 2, 4, 5] 98 | [5, 7, 6, 2, 3] -> [3, 2, 6, 7, 5] 99 | [7, 9] -> [9, 7] 100 | 101 | Examples: 102 | {%- for i in range(train_queries | length) %} 103 | {{ train_queries[i] }} -> {{ train_answers[i] }} 104 | {%- endfor %} 105 | Questions: 106 | {%- for query in queries %} 107 | {{ query }} -> ? 108 | {%- endfor %} 109 | Answers: -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/base-16/5-shot_cot_htt_test.yaml: -------------------------------------------------------------------------------- 1 | system: | 2 | Instruction: When you answer the questions, try to use the provided knowledge whenever possible. Try not to invent knowledge by yourself unless necessary. 3 | Knowledge: 4 | {%- set key_rules = {} -%} 5 | {%- for rule in rules -%} 6 | {%- set tokens = rule.split("=")[0].split("+") -%} 7 | {%- set _ = key_rules.update({(tokens | length, tokens[0] | trim, tokens[1] | trim): rule}) -%} 8 | {%- endfor -%} 9 | {%- set global = namespace(old=["", "", ""]) -%} 10 | {%- for key, rule in key_rules | dictsort -%} 11 | {%- if key[0] > 2 -%} 12 | {%- set key = ["carry", key[1], key[2]] -%} 13 | {%- else -%} 14 | {%- set key = ["no_carry", key[1], key[2]] -%} 15 | {%- endif -%} 16 | {%- if global.old[2] and global.old != key -%} 17 | 18 | {%- endif -%} 19 | {%- if global.old[1] and global.old[:2] != key[:2] -%} 20 | 21 | {%- endif -%} 22 | {%- if global.old[0] and global.old[0] != key[0] %} 23 | 24 | {%- endif -%} 25 | {%- if global.old[0] != key[0] %} 26 | <{{ key[0] }}> 27 | {%- endif -%} 28 | {%- if global.old[:2] != key[:2] %} 29 | <{{ key[1] }}> 30 | {%- endif -%} 31 | {%- if global.old != key -%} 32 | <{{ key[2] }}> 33 | {%- endif -%} 34 | {{- rule -}} 35 | {%- set global.old = key -%} 36 | {%- endfor -%} 37 | 38 | 39 | 40 | prompt: | 41 | Question: In base-16, what is EC + DD? 42 | Answer: 43 | EC is E, C. DD is D, D. So the steps are C + D, E + D. 44 | There is no carry. C + D = 19. 19 is 1, 9. So we set the carry to 1. Prepend 9 to the answer. So far the answer has 1 digit: 9. 45 | The carry is 1. E + D + 1 = 1C. 1C is 1, C. So we set the carry to 1. Prepend C to the answer. So far the answer has 2 digits: C, 9. 46 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, C, 9. 47 | Therefore, the answer is 1C9. 48 | 49 | Question: In base-16, what is 18 + 9F? 50 | Answer: 51 | 18 is 1, 8. 9F is 9, F. So the steps are 8 + F, 1 + 9. 52 | There is no carry. <8>8 + F = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 1 digit: 7. 53 | The carry is 1. <1><9>1 + 9 + 1 = B. B is 0, B. So we clear the carry. Prepend B to the answer. So far the answer has 2 digits: B, 7. 54 | There is no carry. So far the answer has 2 digits: B, 7. 55 | Therefore, the answer is B7. 56 | 57 | Question: In base-16, what is 79 + 8B? 58 | Answer: 59 | 79 is 7, 9. 8B is 8, B. So the steps are 9 + B, 7 + 8. 60 | There is no carry. <9>9 + B = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4. 61 | The carry is 1. <7><8>7 + 8 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 4. 62 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 4. 63 | Therefore, the answer is 104. 64 | 65 | Question: In base-16, what is A6 + 94? 66 | Answer: 67 | A6 is A, 6. 94 is 9, 4. So the steps are 6 + 4, A + 9. 68 | There is no carry. <6><4>6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A. 69 | There is no carry. <9>A + 9 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, A. 70 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, A. 71 | Therefore, the answer is 13A. 72 | 73 | Question: In base-16, what is 54 + D3? 74 | Answer: 75 | 54 is 5, 4. D3 is D, 3. So the steps are 4 + 3, 5 + D. 76 | There is no carry. <4><3>4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7. 77 | There is no carry. <5>5 + D = 12. 12 is 1, 2. So we set the carry to 1. Prepend 2 to the answer. So far the answer has 2 digits: 2, 7. 78 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 2, 7. 79 | Therefore, the answer is 127. 80 | 81 | Question: In base-16, what is {{ query[0] }} + {{ query[1] }}? 82 | Answer: 83 | 84 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/arithmetic/base-9/5-shot_cot_htt_test.yaml: -------------------------------------------------------------------------------- 1 | system: | 2 | Instruction: When you answer the questions, try to use the provided knowledge whenever possible. Try not to invent knowledge by yourself unless necessary. 3 | Knowledge: 4 | {%- set key_rules = {} -%} 5 | {%- for rule in rules -%} 6 | {%- set tokens = rule.split("=")[0].split("+") -%} 7 | {%- set _ = key_rules.update({(tokens | length, tokens[0] | trim, tokens[1] | trim): rule}) -%} 8 | {%- endfor -%} 9 | {%- set global = namespace(old=["", "", ""]) -%} 10 | {%- for key, rule in key_rules | dictsort -%} 11 | {%- if key[0] > 2 -%} 12 | {%- set key = ["carry", key[1], key[2]] -%} 13 | {%- else -%} 14 | {%- set key = ["no_carry", key[1], key[2]] -%} 15 | {%- endif -%} 16 | {%- if global.old[2] and global.old != key -%} 17 | 18 | {%- endif -%} 19 | {%- if global.old[1] and global.old[:2] != key[:2] -%} 20 | 21 | {%- endif -%} 22 | {%- if global.old[0] and global.old[0] != key[0] %} 23 | 24 | {%- endif -%} 25 | {%- if global.old[0] != key[0] %} 26 | <{{ key[0] }}> 27 | {%- endif -%} 28 | {%- if global.old[:2] != key[:2] %} 29 | <{{ key[1] }}> 30 | {%- endif -%} 31 | {%- if global.old != key -%} 32 | <{{ key[2] }}> 33 | {%- endif -%} 34 | {{- rule -}} 35 | {%- set global.old = key -%} 36 | {%- endfor -%} 37 | 38 | 39 | 40 | prompt: | 41 | Question: In base-9, what is 76 + 14? 42 | Answer: 43 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1. 44 | There is no carry. <6><4>6 + 4 = 11. 11 is 1, 1. So we set the carry to 1. Prepend 1 to the answer. So far the answer has 1 digit: 1. 45 | The carry is 1. <7><1>7 + 1 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 1. 46 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 1. 47 | Therefore, the answer is 101. 48 | 49 | Question: In base-9, what is 86 + 57? 50 | Answer: 51 | 86 is 8, 6. 57 is 5, 7. So the steps are 6 + 7, 8 + 5. 52 | There is no carry. <6><7>6 + 7 = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4. 53 | The carry is 1. <8><5>8 + 5 + 1 = 15. 15 is 1, 5. So we set the carry to 1. Prepend 5 to the answer. So far the answer has 2 digits: 5, 4. 54 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 5, 4. 55 | Therefore, the answer is 154. 56 | 57 | Question: In base-9, what is 63 + 34? 58 | Answer: 59 | 63 is 6, 3. 34 is 3, 4. So the steps are 3 + 4, 6 + 3. 60 | There is no carry. <4><3>4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7. 61 | There is no carry. <6><3>6 + 3 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 7. 62 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 7. 63 | Therefore, the answer is 107. 64 | 65 | Question: In base-9, what is 31 + 58? 66 | Answer: 67 | 31 is 3, 1. 58 is 5, 8. So the steps are 1 + 8, 3 + 5. 68 | There is no carry. <1><8>1 + 8 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0. 69 | The carry is 1. <3><5>3 + 5 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 0. 70 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 0. 71 | Therefore, the answer is 100. 72 | 73 | Question: In base-9, what is 67 + 25? 74 | Answer: 75 | 67 is 6, 7. 25 is 2, 5. So the steps are 7 + 5, 6 + 2. 76 | There is no carry. <7><5>7 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 1 digit: 3. 77 | The carry is 1. <6><2>6 + 2 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 3. 78 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 3. 79 | Therefore, the answer is 103. 80 | 81 | Question: In base-9, what is {{ query[0] }} + {{ query[1] }}? 82 | Answer: 83 | 84 | return_last: yes -------------------------------------------------------------------------------- /artifacts/prompt/list_functions/4-shot_cot_htt_train.yaml: -------------------------------------------------------------------------------- 1 | system: | 2 | Instruction: Infer the function behind the examples. Use the function to answer the questions. 3 | 4 | prompt: | 5 | Examples: 6 | [0, 8, 5, 2, 7, 1, 4, 6, 9, 3] -> [3, 8, 5, 2, 7, 1, 4, 6, 9, 3] 7 | [4, 0, 1] -> [1, 0, 1] 8 | [6, 1, 7, 5, 3, 2, 8, 4, 9] -> [9, 1, 7, 5, 3, 2, 8, 4, 9] 9 | [6, 2, 1, 9, 4] -> [4, 2, 1, 9, 4] 10 | [2, 9, 7, 5, 3, 8, 1, 4] -> [4, 9, 7, 5, 3, 8, 1, 4] 11 | [5, 1, 7, 8, 9, 4, 0, 3, 2] -> [2, 1, 7, 8, 9, 4, 0, 3, 2] 12 | Questions: 13 | [5, 8, 6, 1, 0, 9, 7] -> ? 14 | [3, 8, 6, 0] -> ? 15 | [8, 3] -> ? 16 | [3, 2, 0, 1, 6, 8, 7, 5] -> ? 17 | [5, 2, 0, 8, 9, 6] -> ? 18 | [8, 5, 7, 4, 2, 3, 6] -> ? 19 | Answers: 20 | From the examples, we infer the function is to replace the first element with the last element. 21 | Using this function, the answers to the questions are: 22 | [5, 8, 6, 1, 0, 9, 7] -> [7, 8, 6, 1, 0, 9, 7] 23 | [3, 8, 6, 0] -> [0, 8, 6, 0] 24 | [8, 3] -> [3, 3] 25 | [3, 2, 0, 1, 6, 8, 7, 5] -> [5, 2, 0, 1, 6, 8, 7, 5] 26 | [5, 2, 0, 8, 9, 6] -> [6, 2, 0, 8, 9, 6] 27 | [8, 5, 7, 4, 2, 3, 6] -> [6, 5, 7, 4, 2, 3, 6] 28 | 29 | Examples: 30 | [2] -> [2] 31 | [4, 3, 0, 1, 7, 8] -> [4, 3, 0, 1, 7, 8, 3] 32 | [5, 0, 2, 9] -> [5, 0, 2, 9, 9] 33 | [7, 0, 2, 5] -> [7, 0, 2, 5] 34 | [3, 4, 7, 6, 0] -> [3, 4, 7, 6, 0, 3] 35 | [8, 1, 2, 3, 7] -> [8, 1, 2, 3, 7, 3] 36 | Questions: 37 | [9, 1] -> ? 38 | [6] -> ? 39 | [1, 9, 5, 0] -> ? 40 | [4, 6, 9, 0, 7, 8, 1, 2] -> ? 41 | [4, 2, 8] -> ? 42 | [6, 2, 0, 3, 1, 8, 7] -> ? 43 | Answers: 44 | From the examples, we infer the function is to append 3 if the list contains a 3, else append 9 if the list contains a 9. 45 | Using this function, the answers to the questions are: 46 | [9, 1] -> [9, 1, 9] 47 | [6] -> [6] 48 | [1, 9, 5, 0] -> [1, 9, 5, 0, 9] 49 | [4, 6, 9, 0, 7, 8, 1, 2] -> [4, 6, 9, 0, 7, 8, 1, 2, 9] 50 | [4, 2, 8] -> [4, 2, 8] 51 | [6, 2, 0, 3, 1, 8, 7] -> [6, 2, 0, 3, 1, 8, 7, 3] 52 | 53 | Examples: 54 | [1, 0, 9, 7, 4, 2, 5, 3, 6, 8] -> [9, 0, 1, 4, 4, 5] 55 | [3, 8, 4, 6, 1, 5, 7, 0] -> [4, 8, 3, 4, 1, 7] 56 | [5, 4, 7, 2, 9, 3, 8, 1] -> [7, 4, 5, 4, 9, 8] 57 | [3, 9, 2, 0, 6, 8, 5, 1, 7] -> [2, 9, 3, 4, 6, 5] 58 | [9, 2, 1, 3, 4, 7, 6, 8, 5, 0] -> [1, 2, 9, 4, 4, 6] 59 | [0, 7, 9, 3, 1, 5, 8, 2, 6] -> [9, 7, 0, 4, 1, 8] 60 | Questions: 61 | [3, 9, 7, 6, 0, 5, 1] -> ? 62 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> ? 63 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> ? 64 | [8, 4, 9, 1, 3, 2, 7] -> ? 65 | [8, 3, 7, 0, 4, 2, 5] -> ? 66 | [6, 2, 1, 0, 9, 8, 5] -> ? 67 | Answers: 68 | From the examples, we infer the function is to generate a list of elements 3, 2, 1, the number 4, then elements 5 and 7. 69 | Using this function, the answers to the questions are: 70 | [3, 9, 7, 6, 0, 5, 1] -> [7, 9, 3, 4, 0, 1] 71 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> [9, 5, 2, 4, 8, 0] 72 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> [7, 0, 9, 4, 4, 3] 73 | [8, 4, 9, 1, 3, 2, 7] -> [9, 4, 8, 4, 3, 7] 74 | [8, 3, 7, 0, 4, 2, 5] -> [7, 3, 8, 4, 4, 5] 75 | [6, 2, 1, 0, 9, 8, 5] -> [1, 2, 6, 4, 9, 5] 76 | 77 | Examples: 78 | [] -> [] 79 | [1, 5, 6, 2, 8, 3, 7] -> [7, 3, 8, 2, 6, 5, 1] 80 | [2, 1, 9, 6, 3, 5, 4, 8] -> [8, 4, 5, 3, 6, 9, 1, 2] 81 | [9, 1, 2, 8, 0] -> [0, 8, 2, 1, 9] 82 | [1, 0, 7, 3, 9, 2] -> [2, 9, 3, 7, 0, 1] 83 | [7, 6, 3, 0, 4, 1, 5, 2] -> [2, 5, 1, 4, 0, 3, 6, 7] 84 | Questions: 85 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> ? 86 | [6, 4, 0] -> ? 87 | [3, 6, 1, 7, 0, 4] -> ? 88 | [5, 4, 2, 7] -> ? 89 | [5, 7, 6, 2, 3] -> ? 90 | [7, 9] -> ? 91 | Answers: 92 | From the examples, we infer the function is to reverse the elements. 93 | Using this function, the answers to the questions are: 94 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> [9, 1, 3, 4, 0, 8, 7, 5, 6, 2] 95 | [6, 4, 0] -> [0, 4, 6] 96 | [3, 6, 1, 7, 0, 4] -> [4, 0, 7, 1, 6, 3] 97 | [5, 4, 2, 7] -> [7, 2, 4, 5] 98 | [5, 7, 6, 2, 3] -> [3, 2, 6, 7, 5] 99 | [7, 9] -> [9, 7] 100 | 101 | Examples: 102 | {%- for i in range(train_queries | length) %} 103 | {{ train_queries[i] }} -> {{ train_answers[i] }} 104 | {%- endfor %} 105 | Questions: 106 | {%- for query in queries %} 107 | {{ query }} -> ? 108 | {%- endfor %} 109 | Answers: 110 | 111 | pattern: '(?<=the function is to )[^.]+\.' -------------------------------------------------------------------------------- /source/test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Test the llm after learning the rules.""" 17 | 18 | import collections 19 | import os 20 | import pprint 21 | import random 22 | 23 | import datasets 24 | import models 25 | import prompts 26 | import tqdm 27 | import util 28 | import yaml 29 | 30 | 31 | def main(): 32 | random.seed(0) 33 | args = util.parse_args() 34 | args.split = args.split or "test" 35 | 36 | with open(args.config, "r") as fin: 37 | cfg = yaml.safe_load(fin.read()) 38 | cfg = cfg[args.split] 39 | 40 | working_dir = util.create_working_directory(args, cfg) 41 | logger = util.create_logger(working_dir) 42 | logger.warning(pprint.pformat(vars(args))) 43 | logger.warning(pprint.pformat(cfg)) 44 | 45 | if cfg["dataset"] == "clutrr": 46 | dataset = datasets.CLUTRR(os.path.join(args.artifacts, "dataset/clutrr")) 47 | elif cfg["dataset"].startswith("base-"): 48 | base = int(cfg["dataset"][5:]) 49 | dataset = datasets.Arithmetic( 50 | os.path.join(args.artifacts, "dataset/arithmetic"), base=base) 51 | elif cfg["dataset"] == "list_functions": 52 | dataset = datasets.ListFunctions( 53 | os.path.join(args.artifacts, "dataset/list_functions")) 54 | else: 55 | raise ValueError(f"Unknown dataset `{cfg['datasets']}`") 56 | 57 | test_set = dataset.get_split(args.split) 58 | max_tokens = cfg.get("max_tokens", 2000) 59 | if cfg["model"].startswith("gpt"): 60 | model = models.GPT(cfg["model"], max_tokens=max_tokens) 61 | elif cfg["model"].startswith("gemini"): 62 | model = models.Gemini(cfg["model"], max_tokens=max_tokens) 63 | else: 64 | raise ValueError(f"Unknown model `{cfg['model']}`") 65 | if "library" in cfg: 66 | library = prompts.RuleLibrary() 67 | library.load(os.path.join(args.artifacts, cfg["library"])) 68 | rules = library.to_prompt(cfg["min_coverage"], cfg["min_confidence"]) 69 | function = prompts.PromptFunction.from_yaml( 70 | os.path.join(args.artifacts, cfg["prompt"]), rules=rules) 71 | logger.warning("Load the rule library from `%s`", cfg["library"]) 72 | logger.warning("min coverage: %d, min confidence: %s, #rules: %d", 73 | cfg["min_coverage"], cfg["min_confidence"], len(rules)) 74 | else: 75 | function = prompts.PromptFunction.from_yaml( 76 | os.path.join(args.artifacts, cfg["prompt"])) 77 | 78 | level2accs = collections.defaultdict(list) 79 | total_cost = 0 80 | for sample in tqdm.tqdm(test_set): 81 | truth = sample["answer"] 82 | level = sample["level"] 83 | pred, cost = function(model, sample) 84 | acc = dataset.evaluate(truth, pred) 85 | 86 | level2accs[level].append(acc) 87 | total_cost += cost 88 | logger.warning("truth: %s, pred: %s, accuracy: %s", truth, pred, acc) 89 | logger.warning("total cost: %s", total_cost) 90 | 91 | accs = [] 92 | task_accs = [] 93 | for level, level_accs in sorted(level2accs.items()): 94 | acc = sum(level_accs) / len(level_accs) 95 | accs.append(acc) 96 | if isinstance(level_accs[0], float): 97 | task_acc = sum(x > 0.999 for x in level_accs) / len(level_accs) 98 | task_accs.append(task_acc) 99 | logger.warning("[%s] #sample: %d, raw accuracy: %s, task accuracy: %s", 100 | level, len(level_accs), acc, task_acc) 101 | else: 102 | logger.warning("[%s] #sample: %d, accuracy: %s", 103 | level, len(level_accs), acc) 104 | if task_accs: 105 | logger.warning("average raw accuracy: %s, average task accuracy: %s", 106 | sum(accs) / len(accs), sum(task_accs) / len(task_accs)) 107 | else: 108 | logger.warning("average accuracy: %s", sum(accs) / len(accs)) 109 | 110 | 111 | if __name__ == "__main__": 112 | main() 113 | -------------------------------------------------------------------------------- /artifacts/prompt/clutrr/textual/5-shot_cot.yaml: -------------------------------------------------------------------------------- 1 | prompt: | 2 | Document: Anthony went to the park with his father, James. Annie took her uncle James to the grocery store. Alan and his daughter Annie spent Father's Day together. Annie took her dad out to a sports bar, and they had a great time watching football and drinking beer there. 3 | Question: Anthony is Alan's what? 4 | Answer: We first extract all triplets from the document. We then find the path from Alan to Anthony. Finally, we reduce the relations on the path to get the answer. 5 | The triplets include (Anthony, father, James), (Annie, uncle, James), (Alan, daughter, Annie). 6 | The path from Alan to Anthony is (Alan, daughter, Annie), (Annie, uncle, James), (James, son, Anthony). 7 | The relations on the path are daughter, uncle, son. 8 | Daughter's uncle is brother. So the relations are reduced to brother, son. 9 | Brother's son is nephew. So the relations are reduced to nephew. 10 | Therefore, Anthony is Alan's nephew. 11 | 12 | Document: Valerie's biggest accomplishment is raising her son Carlos. Annie does n't like having to babysit her younger brother, Emmanuel. Valerie and her son Emmanuel had lunch together at a local Chinese restaurant. 13 | Question: Carlos is Annie's what? 14 | Answer: We first extract all triplets from the document. We then find the path from Annie to Carlos. Finally, we reduce the relations on the path to get the answer. 15 | The triplets include (Valerie, son, Carlos), (Annie, brother, Emmanuel), (Valerie, son, Emmanuel). 16 | The path from Annie to Carlos is (Annie, brother, Emmanuel), (Emmanuel, mother, Valerie), (Valerie, son, Carlos). 17 | The relations on the path are brother, mother, son. 18 | Brother's mother is mother. So the relations are reduced to mother, son. 19 | Mother's son is brother. So the relations are reduced to brother. 20 | Therefore, Carlos is Annie's brother. 21 | 22 | Document: James likes to take his daughter Jeanna fishing. James loves cooking with his daughter. Her name is Beverly. Jeanna loves visiting with her aunt Michelle. 23 | Question: Michelle is Beverly's what? 24 | Answer: We first extract all triplets from the document. We then find the path from Beverly to Michelle. Finally, we reduce the relations on the path to get the answer. 25 | The triplets include (James, daughter, Jeanna), (James, daughter, Beverly), (Jeanna, aunt, Michelle). 26 | The path from Beverly to Michelle is (Beverly, father, James), (James, daughter, Jeanna), (Jeanna, aunt, Michelle). 27 | The relations on the path are father, daughter, aunt. 28 | Father's daughter is sister. So the relations are reduced to sister, aunt. 29 | Sister's aunt is aunt. So the relations are reduced to aunt. 30 | Therefore, Michelle is Beverly's aunt. 31 | 32 | Document: Lee was finally coming of age and it was time for him and his father to go on a coming of age camping trip. Beverly, James's younger daughter, decided she wanted to go on the trip despite being several years younger. Jeanna took her younger sister Beverly to the carnival last weekend. 33 | Question: Jeanna is Lee's what? 34 | Answer: We first extract all triplets from the document. We then find the path from Lee to Jeanna. Finally, we reduce the relations on the path to get the answer. 35 | The triplets include (Lee, father, James), (James, daughter, Beverly), (Jeanna, sister, Beverly). 36 | The path from Lee to Jeanna is (Lee, father, James), (James, daughter, Beverly), (Beverly, sister, Jeanna). 37 | The relations on the path are father, daughter, sister. 38 | Father's daughter is sister. So the relations are reduced to sister, sister. 39 | Sister's sister is sister. So the relations are reduced to sister. 40 | Therefore, Jeanna is Lee's sister. 41 | 42 | Document: Craig's sister, Rosie, bought movie tickets at a discount rate. Rosie and her father Elliott love to go skiing. Often, Elliott will invite his mother Molly to join them. 43 | Question: Molly is Craig's what? 44 | Answer: We first extract all triplets from the document. We then find the path from Craig to Molly. Finally, we reduce the relations on the path to get the answer. 45 | The triplets include (Craig, sister, Rosie), (Rosie, father, Elliott), (Elliott, mother, Molly). 46 | The path from Craig to Molly is (Craig, sister, Rosie), (Rosie, father, Elliott), (Elliott, mother, Molly). 47 | The relations on the path are sister, father, mother. 48 | Sister's father is father. So the relations are reduced to father, mother. 49 | Father's mother is grandmother. So the relations are reduced to grandmother. 50 | Therefore, Molly is Craig's grandmother. 51 | 52 | Document: {{ document }} 53 | Question: {{ query[1] }} is {{ query[0] }}'s what? 54 | Answer: 55 | 56 | return_last: yes -------------------------------------------------------------------------------- /artifacts/checkpoint/arithmetic/base-9/gpt-4_5-shot_cot_htt_2000.yaml: -------------------------------------------------------------------------------- 1 | 0 + 0 = 0.: [17, 14.0] 2 | 0 + 1 = 1.: [14, 11.0] 3 | 0 + 2 = 2.: [15, 13.0] 4 | 0 + 3 = 3.: [22, 20.0] 5 | 0 + 4 = 4.: [17, 17.0] 6 | 0 + 5 = 5.: [27, 24.0] 7 | 0 + 6 = 6.: [23, 23.0] 8 | 0 + 7 = 7.: [15, 13.0] 9 | 0 + 8 = 8.: [9, 9.0] 10 | 1 + 0 = 1.: [7, 7.0] 11 | 1 + 1 + 1 = 3.: [1, 1.0] 12 | 1 + 1 + 1 = 4.: [10, 0.0] 13 | 1 + 1 = 2.: [24, 23.0] 14 | 1 + 2 + 1 = 4.: [1, 0.0] 15 | 1 + 2 + 1 = 5.: [18, 0.0] 16 | 1 + 2 = 3.: [25, 19.0] 17 | 1 + 3 + 1 = 5.: [4, 2.0] 18 | 1 + 3 + 1 = 6.: [12, 0.0] 19 | 1 + 3 = 4.: [18, 15.0] 20 | 1 + 4 + 1 = 7.: [23, 0.0] 21 | 1 + 4 = 5.: [17, 16.0] 22 | 1 + 5 + 1 = 8.: [11, 0.0] 23 | 1 + 5 = 6.: [26, 23.0] 24 | 1 + 6 + 1 = 9.: [14, 0.0] 25 | 1 + 6 = 7.: [11, 8.0] 26 | 1 + 7 + 1 = 10.: [27, 24.0] 27 | 1 + 7 = 8.: [10, 9.0] 28 | 1 + 8 + 1 = 11.: [12, 11.0] 29 | 1 + 8 = 10.: [59, 43.0] 30 | 2 + 0 = 2.: [24, 23.0] 31 | 2 + 1 + 1 = 5.: [16, 0.0] 32 | 2 + 1 = 3.: [26, 22.0] 33 | 2 + 2 + 1 = 6.: [21, 0.0] 34 | 2 + 2 = 4.: [15, 13.0] 35 | 2 + 3 + 1 = 7.: [29, 0.0] 36 | 2 + 3 = 5.: [32, 27.0] 37 | 2 + 4 + 1 = 8.: [14, 0.0] 38 | 2 + 4 = 6.: [13, 10.0] 39 | 2 + 5 + 1 = 9.: [17, 0.0] 40 | 2 + 5 = 7.: [6, 5.0] 41 | 2 + 5 = 8.: [6, 0.0] 42 | 2 + 6 + 1 = 10.: [21, 19.0] 43 | 2 + 6 = 11.: [1, 0.0] 44 | 2 + 6 = 8.: [23, 17.0] 45 | 2 + 7 + 1 = 11.: [21, 18.0] 46 | 2 + 7 = 10.: [56, 36.0] 47 | 2 + 8 + 1 = 12.: [9, 8.0] 48 | 2 + 8 = 10.: [2, 0.0] 49 | 2 + 8 = 11.: [48, 26.0] 50 | 3 + 0 = 3.: [17, 15.0] 51 | 3 + 1 + 1 = 5.: [3, 2.0] 52 | 3 + 1 + 1 = 6.: [23, 0.0] 53 | 3 + 1 = 4.: [22, 17.0] 54 | 3 + 2 + 1 = 7.: [15, 0.0] 55 | 3 + 2 = 5.: [7, 5.0] 56 | 3 + 3 + 1 = 8.: [24, 0.0] 57 | 3 + 3 = 6.: [16, 15.0] 58 | 3 + 4 + 1 = 9.: [24, 0.0] 59 | 3 + 4 = 7.: [25, 22.0] 60 | 3 + 4 = 8.: [1, 0.0] 61 | 3 + 5 + 1 = 10.: [26, 21.0] 62 | 3 + 5 = 8.: [17, 13.0] 63 | 3 + 6 + 1 = 11.: [20, 17.0] 64 | 3 + 6 = 10.: [60, 44.0] 65 | 3 + 7 + 1 = 12.: [4, 3.0] 66 | 3 + 7 = 10.: [1, 0.0] 67 | 3 + 7 = 11.: [48, 25.0] 68 | 3 + 8 + 1 = 13.: [13, 10.0] 69 | 3 + 8 = 12.: [56, 39.0] 70 | 4 + 0 = 4.: [20, 19.0] 71 | 4 + 1 + 1 = 7.: [17, 0.0] 72 | 4 + 1 = 5.: [16, 14.0] 73 | 4 + 2 + 1 = 8.: [17, 0.0] 74 | 4 + 2 = 6.: [26, 22.0] 75 | 4 + 3 + 1 = 8.: [1, 1.0] 76 | 4 + 3 + 1 = 9.: [10, 0.0] 77 | 4 + 3 = 7.: [21, 15.0] 78 | 4 + 3 = 8.: [1, 0.0] 79 | 4 + 4 + 1 = 10.: [6, 5.0] 80 | 4 + 4 = 8.: [21, 20.0] 81 | 4 + 5 + 1 = 11.: [10, 7.0] 82 | 4 + 5 = 10.: [45, 30.0] 83 | 4 + 6 + 1 = 12.: [24, 17.0] 84 | 4 + 6 = 10.: [2, 0.0] 85 | 4 + 6 = 11.: [58, 30.0] 86 | 4 + 7 + 1 = 13.: [13, 10.0] 87 | 4 + 7 = 12.: [61, 43.0] 88 | 4 + 8 + 1 = 14.: [15, 12.0] 89 | 4 + 8 = 13.: [49, 33.0] 90 | 5 + 0 = 5.: [25, 23.0] 91 | 5 + 1 + 1 = 8.: [11, 0.0] 92 | 5 + 1 = 6.: [23, 22.0] 93 | 5 + 2 + 1 = 9.: [19, 0.0] 94 | 5 + 2 = 7.: [8, 8.0] 95 | 5 + 2 = 8.: [4, 0.0] 96 | 5 + 3 + 1 = 10.: [14, 11.0] 97 | 5 + 3 = 8.: [5, 3.0] 98 | 5 + 4 + 1 = 11.: [34, 27.0] 99 | 5 + 4 = 10.: [68, 54.0] 100 | 5 + 5 + 1 = 11.: [1, 0.0] 101 | 5 + 5 + 1 = 12.: [24, 17.0] 102 | 5 + 5 = 10.: [6, 0.0] 103 | 5 + 5 = 11.: [39, 27.0] 104 | 5 + 6 + 1 = 13.: [33, 24.0] 105 | 5 + 6 = 11.: [1, 0.0] 106 | 5 + 6 = 12.: [37, 29.0] 107 | 5 + 7 + 1 = 14.: [15, 12.0] 108 | 5 + 7 = 13.: [67, 47.0] 109 | 5 + 8 + 1 = 15.: [12, 8.0] 110 | 5 + 8 = 14.: [52, 30.0] 111 | 6 + 0 = 6.: [15, 11.0] 112 | 6 + 1 + 1 = 9.: [24, 0.0] 113 | 6 + 1 = 7.: [19, 14.0] 114 | 6 + 2 + 1 = 10.: [18, 16.0] 115 | 6 + 2 = 8.: [11, 9.0] 116 | 6 + 3 + 1 = 11.: [31, 24.0] 117 | 6 + 3 = 10.: [62, 50.0] 118 | 6 + 4 + 1 = 12.: [24, 21.0] 119 | 6 + 4 = 10.: [4, 0.0] 120 | 6 + 4 = 11.: [51, 34.0] 121 | 6 + 5 + 1 = 13.: [24, 18.0] 122 | 6 + 5 = 11.: [1, 1.0] 123 | 6 + 5 = 12.: [54, 38.0] 124 | 6 + 6 + 1 = 14.: [20, 10.0] 125 | 6 + 6 = 12.: [1, 1.0] 126 | 6 + 6 = 13.: [68, 47.0] 127 | 6 + 7 + 1 = 15.: [16, 10.0] 128 | 6 + 7 = 14.: [59, 49.0] 129 | 6 + 7 = 14.14 is 1, 4.: [1, 1.0] 130 | 6 + 8 + 1 = 16.: [14, 7.0] 131 | 6 + 8 = 15.: [39, 23.0] 132 | 7 + 0 = 7.: [25, 21.0] 133 | 7 + 1 + 1 = 10.: [23, 18.0] 134 | 7 + 1 = 8.: [13, 12.0] 135 | 7 + 2 + 0 = 10.: [1, 1.0] 136 | 7 + 2 + 1 = 11.: [22, 17.0] 137 | 7 + 2 = 10.: [55, 48.0] 138 | 7 + 3 + 1 = 12.: [18, 15.0] 139 | 7 + 3 = 10.: [1, 1.0] 140 | 7 + 3 = 11.: [55, 31.0] 141 | 7 + 4 + 1 = 13.: [25, 22.0] 142 | 7 + 4 = 12.: [59, 45.0] 143 | 7 + 5 + 1 = 14.: [27, 22.0] 144 | 7 + 5 = 13.: [43, 30.0] 145 | 7 + 6 + 1 = 15.: [14, 11.0] 146 | 7 + 6 = 14.: [75, 45.0] 147 | 7 + 7 + 1 = 16.: [4, 2.0] 148 | 7 + 7 = 15.: [61, 42.0] 149 | 7 + 7 = 16.: [1, 0.0] 150 | 7 + 8 + 1 = 17.: [20, 7.0] 151 | 7 + 8 = 16.: [59, 17.0] 152 | 8 + 0 = 8.: [19, 17.0] 153 | 8 + 1 + 1 = 11.: [15, 13.0] 154 | 8 + 1 = 10.: [65, 54.0] 155 | 8 + 2 + 1 = 12.: [15, 15.0] 156 | 8 + 2 = 11.: [63, 38.0] 157 | 8 + 3 + 1 = 13.: [23, 19.0] 158 | 8 + 3 = 11.: [1, 1.0] 159 | 8 + 3 = 12.: [42, 30.0] 160 | 8 + 4 + 1 = 14.: [4, 4.0] 161 | 8 + 4 = 13.: [34, 27.0] 162 | 8 + 5 + 1 = 15.: [20, 16.0] 163 | 8 + 5 = 14.: [46, 29.0] 164 | 8 + 6 + 1 = 16.: [14, 10.0] 165 | 8 + 6 = 14.: [1, 1.0] 166 | 8 + 6 = 15.: [50, 36.0] 167 | 8 + 7 + 1 = 17.: [15, 9.0] 168 | 8 + 7 = 16.: [72, 24.0] 169 | 8 + 8 + 1 = 18.: [21, 0.0] 170 | 8 + 8 = 16.: [14, 8.0] 171 | 8 + 8 = 17.: [30, 4.0] 172 | -------------------------------------------------------------------------------- /source/prompts.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Prompt function and rule library.""" 17 | 18 | from collections import defaultdict # pylint: disable=g-importing-member 19 | from collections.abc import Mapping, Sequence 20 | import logging 21 | import re 22 | from typing import Any 23 | 24 | import jinja2 25 | from nltk import tokenize 26 | import yaml 27 | 28 | 29 | logger = logging.getLogger(__name__) 30 | 31 | 32 | class RuleLibrary: 33 | """Rule library that stores learned rules and their statistics.""" 34 | 35 | def __init__(self): 36 | self.count = defaultdict(int) 37 | self.score = defaultdict(float) 38 | 39 | def update( 40 | self, 41 | rules: Sequence[str], 42 | acc: float, 43 | ) -> None: 44 | """Update the posterior of rule confidence, based on the observed acc. 45 | 46 | Args: 47 | rules: the list of proposed rules. 48 | acc: the accuracy of applying these rules. 49 | """ 50 | for rule in rules: 51 | self.count[rule] += 1 52 | self.score[rule] += acc 53 | 54 | def save(self, file_name: str): 55 | """Save the rule library to a file.""" 56 | data = {} 57 | for rule in self.count: 58 | data[rule] = [self.count[rule], self.score[rule]] 59 | with open(file_name, "w") as fout: 60 | fout.write(yaml.dump(data, width=1000, default_flow_style=None)) 61 | 62 | def load(self, file_name: str): 63 | """Load the rule library from a file.""" 64 | with open(file_name, "r") as fin: 65 | data = yaml.safe_load(fin.read()) 66 | for rule in data: 67 | num_recall, num_correct = data[rule] 68 | self.count[rule] = num_recall 69 | self.score[rule] = num_correct 70 | 71 | def to_prompt( 72 | self, 73 | min_coverage: int = 2, 74 | min_confidence: float = 0., 75 | ) -> Sequence[str]: 76 | """Convert the rule library to a list of prompts. 77 | 78 | Args: 79 | min_coverage: threshold of minimal rule coverage 80 | min_confidence: threshold of minimal confidence 81 | Returns: 82 | List of rules. 83 | """ 84 | rules = {} 85 | for rule in self.count: 86 | coverage = self.count[rule] 87 | confidence = self.score[rule] / self.count[rule] 88 | if coverage >= min_coverage and confidence >= min_confidence: 89 | rules[rule] = confidence 90 | return rules 91 | 92 | 93 | class PromptFunction: 94 | """An LLM-based function defined by a prompt string.""" 95 | 96 | def __init__( 97 | self, 98 | prompt: str, 99 | system: str = None, 100 | pattern: str = None, 101 | stop: str = None, 102 | return_last: bool = False, 103 | **kwargs 104 | ) -> None: 105 | self.prompt = jinja2.Template(prompt) 106 | if system is not None: 107 | self.system = jinja2.Template(system) 108 | else: 109 | self.system = None 110 | self.pattern = pattern 111 | self.stop = stop 112 | self.return_last = return_last 113 | self.kwargs = kwargs 114 | 115 | @classmethod 116 | def from_yaml(cls, yaml_file: str, **kwargs) -> "PromptFunction": 117 | with open(yaml_file, "r") as fin: 118 | config = yaml.safe_load(fin.read()) 119 | kwargs.update(config) 120 | return cls(**kwargs) 121 | 122 | def __call__(self, model, sample: Mapping[str, Any]) -> tuple[ 123 | str, float] | tuple[str, float, Sequence[str]]: 124 | """Call the model with the sample formatted by prompt. 125 | 126 | Args: 127 | model: the LLM model. 128 | sample: the input sample defined by k-v pairs. 129 | Returns: 130 | The output of the prompt function and the cost. If pattern is defined, 131 | additionally return the matches in the output. 132 | """ 133 | logger.info("<" * 50) 134 | if self.system is not None: 135 | system = self.system.render(**sample, **self.kwargs) 136 | marker = "#" * 20 137 | logger.info("%s System %s", marker, marker) 138 | logger.info(system) 139 | logger.info("%s Prompt %s", marker, marker) 140 | else: 141 | system = None 142 | prompt = self.prompt.render(**sample, **self.kwargs) 143 | logger.info(prompt) 144 | logger.info("=" * 50) 145 | 146 | response = model(prompt, system=system, stop=self.stop) 147 | cost = model.get_cost(prompt, system=system, response=response) 148 | logger.info(response) 149 | logger.info(">" * 50) 150 | 151 | sents = [] 152 | for line in re.split(r"\n+", response): 153 | sents += tokenize.sent_tokenize(line) 154 | if self.return_last: 155 | pred = sents[-1] 156 | else: 157 | pred = response 158 | if self.pattern: 159 | matches = [] 160 | for sent in sents: 161 | matches += re.findall(self.pattern, sent) 162 | return pred, cost, matches 163 | else: 164 | return pred, cost 165 | -------------------------------------------------------------------------------- /source/models.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """GPT model wrapper.""" 17 | 18 | from collections.abc import Mapping, Sequence 19 | import os 20 | from typing import Final 21 | import google.generativeai as genai 22 | import openai 23 | # pylint: disable=g-importing-member 24 | from tenacity import retry 25 | from tenacity import stop_after_attempt 26 | from tenacity import wait_random_exponential 27 | import tiktoken 28 | 29 | 30 | class GPT: 31 | """GPT model object.""" 32 | 33 | _MODEL_PRICE: Final[Mapping[str, tuple[float, float]]] = { 34 | "gpt-3.5-turbo": (0.0005 / 1000, 0.0015 / 1000), 35 | "gpt-3.5-turbo-0613": (0.0005 / 1000, 0.0015 / 1000), 36 | "gpt-3.5-turbo-16k-0613": (0.0005 / 1000, 0.0015 / 1000), 37 | "gpt-4": (0.03 / 1000, 0.06 / 1000), 38 | } 39 | 40 | def __init__( 41 | self, 42 | model: str = "gpt-3.5-turbo", 43 | temperature: float = 1.0, 44 | top_p: float = 1.0, 45 | max_tokens: int = 2000, 46 | ): 47 | self.client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"]) 48 | self.encoding = tiktoken.encoding_for_model(model) 49 | self.model = model 50 | self.temperature = temperature 51 | self.top_p = top_p 52 | self.max_tokens = max_tokens 53 | 54 | @retry(wait=wait_random_exponential(min=1, max=60), 55 | stop=stop_after_attempt(10)) 56 | def __call__( 57 | self, 58 | prompt: str, 59 | system: str | None = None, 60 | stop: str | None = None, 61 | ): 62 | messages = [] 63 | if system: 64 | messages.append({"role": "system", "content": system}) 65 | messages.append({"role": "user", "content": prompt}) 66 | response = self.client.chat.completions.create( 67 | model=self.model, 68 | messages=messages, 69 | temperature=self.temperature, 70 | top_p=self.top_p, 71 | max_tokens=self.max_tokens, 72 | stop=stop 73 | ) 74 | return response.choices[0].message.content 75 | 76 | def get_cost( 77 | self, 78 | prompt: str, 79 | response: str | None = None, 80 | system: str | None = None, 81 | ) -> float: 82 | """Get the cost for this request.""" 83 | if system is not None: 84 | prompt = system + prompt 85 | num_prompt_token = len(self.encoding.encode(prompt)) 86 | if response: 87 | num_response_token = len(self.encoding.encode(response)) 88 | else: 89 | num_response_token = self.max_tokens 90 | input_price, output_price = self._MODEL_PRICE[self.model] 91 | return num_prompt_token * input_price + num_response_token * output_price 92 | 93 | 94 | class Gemini: 95 | """Wrapper for Gemini models.""" 96 | 97 | model_price = { 98 | "gemini-pro": (0, 0), 99 | } 100 | 101 | _SETTINGS: Final[Sequence[Mapping[str, str]]] = tuple( 102 | { 103 | "category": f"HARM_CATEGORY_{category}", 104 | "threshold": "BLOCK_NONE", 105 | } for category in [ 106 | "HARASSMENT", "HATE_SPEECH", "SEXUALLY_EXPLICIT", "DANGEROUS"] 107 | ) 108 | 109 | def __init__( 110 | self, 111 | model: str = "gemini-pro", 112 | temperature: float = 0.9, 113 | top_p: float = 1, 114 | max_tokens: int = 2000, 115 | ) -> None: 116 | genai.configure(api_key=os.environ["GOOGLE_API_KEY"]) 117 | self.model = model 118 | self.temperature = temperature 119 | self.top_p = top_p 120 | self.max_tokens = max_tokens 121 | 122 | @retry(wait=wait_random_exponential(min=1, max=60), 123 | stop=stop_after_attempt(10)) 124 | def __call__( 125 | self, 126 | prompt: str, 127 | system: str | None = None, 128 | stop: str | None = None, 129 | ): 130 | config = genai.GenerationConfig( 131 | temperature=self.temperature, 132 | top_p=self.top_p, 133 | max_output_tokens=self.max_tokens, 134 | stop_sequences=stop 135 | ) 136 | model = genai.GenerativeModel(self.model, generation_config=config, 137 | safety_settings=self._SETTINGS) 138 | messages = [] 139 | if system: 140 | messages.append({"role": "user", "parts": f"{system}\n\n{prompt}"}) 141 | else: 142 | messages.append({"role": "user", "parts": prompt}) 143 | response = model.generate_content(prompt) 144 | return response.text 145 | 146 | def get_cost( 147 | self, 148 | prompt: str, 149 | response: str | None = None, 150 | system: str | None = None, 151 | ) -> float: 152 | """Get the cost for this request.""" 153 | if system is not None: 154 | prompt = system + prompt 155 | model = genai.GenerativeModel(self.model) 156 | num_prompt_token = model.count_tokens(prompt).total_tokens 157 | if response: 158 | num_response_token = model.count_tokens(response).total_tokens 159 | else: 160 | num_response_token = self.max_tokens 161 | input_price, output_price = self.model_price[self.model] 162 | return num_prompt_token * input_price + num_response_token * output_price 163 | -------------------------------------------------------------------------------- /artifacts/prompt/clutrr/textual/5-shot_cot_htt_test.yaml: -------------------------------------------------------------------------------- 1 | system: | 2 | Instruction: When you answer the questions, try to use the provided knowledge whenever possible. Try not to invent knowledge by yourself unless necessary. 3 | Knowledge: 4 | {%- set key_rules = {} -%} 5 | {%- for rule in rules -%} 6 | {%- set tokens = rule.split(" ") -%} 7 | {%- set _ = key_rules.update({(tokens[0].split("'")[0], tokens[1]): rule}) -%} 8 | {%- endfor -%} 9 | {%- set global = namespace(old =["", ""]) -%} 10 | {%- for key, rule in key_rules | dictsort -%} 11 | {%- if global.old[1] and global.old != key -%} 12 | 13 | {%- endif -%} 14 | {%- if global.old[0] and global.old[0] != key[0] -%} 15 | 16 | {%- endif -%} 17 | {%- if global.old[0] != key[0] %} 18 | <{{ key[0] }}> 19 | {%- endif -%} 20 | {%- if global.old != key -%} 21 | <{{ key[1] }}> 22 | {%- endif -%} 23 | {{- rule -}} 24 | {%- set global.old = key -%} 25 | {%- endfor -%} 26 | 27 | 28 | prompt: | 29 | Document: Anthony went to the park with his father, James. Annie took her uncle James to the grocery store. Alan and his daughter Annie spent Father's Day together. Annie took her dad out to a sports bar, and they had a great time watching football and drinking beer there. 30 | Question: Anthony is Alan's what? 31 | Answer: We first extract all triplets from the document. We then find the path from Alan to Anthony. Finally, we reduce the relations on the path to get the answer. 32 | The triplets include (Anthony, father, James), (Annie, uncle, James), (Alan, daughter, Annie). 33 | The path from Alan to Anthony is (Alan, daughter, Annie), (Annie, uncle, James), (James, son, Anthony). 34 | The relations on the path are daughter, uncle, son. 35 | daughter's uncle is brother. So the relations are reduced to brother, son. 36 | brother's son is nephew. So the relations are reduced to nephew. 37 | Therefore, Anthony is Alan's nephew. 38 | 39 | Document: Valerie's biggest accomplishment is raising her son Carlos. Annie does n't like having to babysit her younger brother, Emmanuel. Valerie and her son Emmanuel had lunch together at a local Chinese restaurant. 40 | Question: Carlos is Annie's what? 41 | Answer: We first extract all triplets from the document. We then find the path from Annie to Carlos. Finally, we reduce the relations on the path to get the answer. 42 | The triplets include (Valerie, son, Carlos), (Annie, brother, Emmanuel), (Valerie, son, Emmanuel). 43 | The path from Annie to Carlos is (Annie, brother, Emmanuel), (Emmanuel, mother, Valerie), (Valerie, son, Carlos). 44 | The relations on the path are brother, mother, son. 45 | brother's mother is mother. So the relations are reduced to mother, son. 46 | mother's son is brother. So the relations are reduced to brother. 47 | Therefore, Carlos is Annie's brother. 48 | 49 | Document: James likes to take his daughter Jeanna fishing. James loves cooking with his daughter. Her name is Beverly. Jeanna loves visiting with her aunt Michelle. 50 | Question: Michelle is Beverly's what? 51 | Answer: We first extract all triplets from the document. We then find the path from Beverly to Michelle. Finally, we reduce the relations on the path to get the answer. 52 | The triplets include (James, daughter, Jeanna), (James, daughter, Beverly), (Jeanna, aunt, Michelle). 53 | The path from Beverly to Michelle is (Beverly, father, James), (James, daughter, Jeanna), (Jeanna, aunt, Michelle). 54 | The relations on the path are father, daughter, aunt. 55 | father's daughter is sister. So the relations are reduced to sister, aunt. 56 | sister's aunt is aunt. So the relations are reduced to aunt. 57 | Therefore, Michelle is Beverly's aunt. 58 | 59 | Document: Lee was finally coming of age and it was time for him and his father to go on a coming of age camping trip. Beverly, James's younger daughter, decided she wanted to go on the trip despite being several years younger. Jeanna took her younger sister Beverly to the carnival last weekend. 60 | Question: Jeanna is Lee's what? 61 | Answer: We first extract all triplets from the document. We then find the path from Lee to Jeanna. Finally, we reduce the relations on the path to get the answer. 62 | The triplets include (Lee, father, James), (James, daughter, Beverly), (Jeanna, sister, Beverly). 63 | The path from Lee to Jeanna is (Lee, father, James), (James, daughter, Beverly), (Beverly, sister, Jeanna). 64 | The relations on the path are father, daughter, sister. 65 | father's daughter is sister. So the relations are reduced to sister, sister. 66 | sister's sister is sister. So the relations are reduced to sister. 67 | Therefore, Jeanna is Lee's sister. 68 | 69 | Document: Craig's sister, Rosie, bought movie tickets at a discount rate. Rosie and her father Elliott love to go skiing. Often, Elliott will invite his mother Molly to join them. 70 | Question: Molly is Craig's what? 71 | Answer: We first extract all triplets from the document. We then find the path from Craig to Molly. Finally, we reduce the relations on the path to get the answer. 72 | The triplets include (Craig, sister, Rosie), (Rosie, father, Elliott), (Elliott, mother, Molly). 73 | The path from Craig to Molly is (Craig, sister, Rosie), (Rosie, father, Elliott), (Elliott, mother, Molly). 74 | The relations on the path are sister, father, mother. 75 | sister's father is father. So the relations are reduced to father, mother. 76 | father's mother is grandmother. So the relations are reduced to grandmother. 77 | Therefore, Molly is Craig's grandmother. 78 | 79 | Document: {{ document }} 80 | Question: {{ query[1] }} is {{ query[0] }}'s what? 81 | Answer: 82 | 83 | return_last: yes -------------------------------------------------------------------------------- /artifacts/checkpoint/arithmetic/base-9/gpt-3.5_5-shot_cot_htt_2000.yaml: -------------------------------------------------------------------------------- 1 | 0 + 0 = 0.: [17, 0.0] 2 | 0 + 1 = 1.: [16, 3.0] 3 | 0 + 2 = 2.: [17, 2.0] 4 | 0 + 3 = 3.: [21, 2.0] 5 | 0 + 4 = 4.: [18, 6.0] 6 | 0 + 5 = 5.: [28, 2.0] 7 | 0 + 6 = 6.: [24, 4.0] 8 | 0 + 7 + 7 = 14.: [1, 0.0] 9 | 0 + 7 = 7.: [15, 2.0] 10 | 0 + 8 = 8.: [8, 1.0] 11 | 1 + 0 + 1 = 2.: [1, 0.0] 12 | 1 + 0 = 1.: [7, 4.0] 13 | 1 + 1 + 1 = 3.: [11, 4.0] 14 | 1 + 1 = 2.: [26, 1.0] 15 | 1 + 2 + 1 = 4.: [19, 4.0] 16 | 1 + 2 = 3.: [24, 0.0] 17 | 1 + 3 + 1 = 5.: [16, 2.0] 18 | 1 + 3 = 4.: [18, 2.0] 19 | 1 + 4 + 1 = 6.: [25, 1.0] 20 | 1 + 4 = 5.: [18, 1.0] 21 | 1 + 5 + 1 = 7.: [12, 2.0] 22 | 1 + 5 = 6.: [28, 7.0] 23 | 1 + 6 + 1 = 8.: [14, 3.0] 24 | 1 + 6 = 7.: [11, 0.0] 25 | 1 + 7 + 1 = 9.: [30, 5.0] 26 | 1 + 7 = 8.: [10, 1.0] 27 | 1 + 8 + 0 = 9.: [4, 3.0] 28 | 1 + 8 + 1 = 10.: [16, 2.0] 29 | 1 + 8 = 10.: [1, 1.0] 30 | 1 + 8 = 9.: [51, 29.0] 31 | 2 + 0 = 2.: [22, 0.0] 32 | 2 + 1 + 1 = 4.: [16, 3.0] 33 | 2 + 1 = 3.: [27, 5.0] 34 | 2 + 2 + 1 = 5.: [21, 2.0] 35 | 2 + 2 = 4.: [16, 3.0] 36 | 2 + 3 + 1 = 6.: [30, 9.0] 37 | 2 + 3 = 5.: [33, 1.0] 38 | 2 + 4 + 1 = 7.: [14, 0.0] 39 | 2 + 4 = 6.: [13, 2.0] 40 | 2 + 5 + 1 = 8.: [17, 1.0] 41 | 2 + 5 = 7.: [12, 7.0] 42 | 2 + 6 + 1 = 9.: [21, 10.0] 43 | 2 + 6 = 8.: [25, 3.0] 44 | 2 + 7 + 0 = 9.: [3, 2.0] 45 | 2 + 7 + 1 = 10.: [24, 1.0] 46 | 2 + 7 = 9.: [53, 24.0] 47 | 2 + 8 + 0 = 10.: [2, 0.0] 48 | 2 + 8 + 1 = 11.: [15, 4.0] 49 | 2 + 8 + 5 = 15.: [2, 0.0] 50 | 2 + 8 = 10.: [44, 0.0] 51 | 3 + 0 = 3.: [18, 1.0] 52 | 3 + 1 + 1 = 5.: [27, 4.0] 53 | 3 + 1 = 4.: [22, 3.0] 54 | 3 + 2 + 1 = 6.: [15, 0.0] 55 | 3 + 2 = 5.: [6, 1.0] 56 | 3 + 3 + 1 = 7.: [24, 7.0] 57 | 3 + 3 = 6.: [16, 4.0] 58 | 3 + 4 + 1 = 8.: [24, 5.0] 59 | 3 + 4 = 7.: [28, 9.0] 60 | 3 + 5 + 1 = 10.: [1, 1.0] 61 | 3 + 5 + 1 = 9.: [21, 5.0] 62 | 3 + 5 = 8.: [16, 6.0] 63 | 3 + 6 + 0 = 9.: [1, 0.0] 64 | 3 + 6 + 1 = 10.: [21, 1.0] 65 | 3 + 6 + 8 = 17.: [1, 0.0] 66 | 3 + 6 = 9.: [58, 27.0] 67 | 3 + 7 + 0 = 10.: [4, 0.0] 68 | 3 + 7 + 1 = 11.: [4, 2.0] 69 | 3 + 7 + 3 = 13.: [1, 0.0] 70 | 3 + 7 + 4 = 14.: [1, 0.0] 71 | 3 + 7 = 10.: [44, 0.0] 72 | 3 + 8 + 0 = 11.: [1, 0.0] 73 | 3 + 8 + 1 = 12.: [23, 2.0] 74 | 3 + 8 = 11.: [45, 0.0] 75 | 4 + 0 = 4.: [19, 1.0] 76 | 4 + 1 + 1 = 6.: [18, 3.0] 77 | 4 + 1 = 5.: [16, 3.0] 78 | 4 + 2 + 1 = 7.: [17, 4.0] 79 | 4 + 2 = 6.: [26, 6.0] 80 | 4 + 3 + 1 = 8.: [11, 1.0] 81 | 4 + 3 = 7.: [20, 9.0] 82 | 4 + 4 + 1 = 9.: [6, 4.0] 83 | 4 + 4 = 8.: [21, 8.0] 84 | 4 + 5 + 0 = 9.: [2, 2.0] 85 | 4 + 5 + 1 = 10.: [10, 0.0] 86 | 4 + 5 + 2 = 11.: [1, 0.0] 87 | 4 + 5 + 5 = 14.: [1, 0.0] 88 | 4 + 5 = 9.: [46, 25.0] 89 | 4 + 6 + 0 = 10.: [1, 0.0] 90 | 4 + 6 + 1 = 11.: [26, 10.0] 91 | 4 + 6 + 2 = 12.: [2, 0.0] 92 | 4 + 6 = 10.: [52, 0.0] 93 | 4 + 6 = 12.: [1, 0.0] 94 | 4 + 7 + 0 = 11.: [2, 0.0] 95 | 4 + 7 + 1 = 12.: [17, 2.0] 96 | 4 + 7 + 5 = 16.: [2, 0.0] 97 | 4 + 7 = 11.: [54, 1.0] 98 | 4 + 8 + 0 = 12.: [2, 0.0] 99 | 4 + 8 + 1 = 13.: [20, 3.0] 100 | 4 + 8 = 12.: [42, 0.0] 101 | 5 + 0 = 5.: [24, 1.0] 102 | 5 + 1 + 1 = 7.: [12, 2.0] 103 | 5 + 1 = 6.: [21, 1.0] 104 | 5 + 2 + 1 = 8.: [19, 3.0] 105 | 5 + 2 = 7.: [11, 5.0] 106 | 5 + 3 + 1 = 9.: [14, 0.0] 107 | 5 + 3 = 8.: [6, 2.0] 108 | 5 + 4 + 0 = 9.: [2, 1.0] 109 | 5 + 4 + 1 = 10.: [34, 0.0] 110 | 5 + 4 + 2 = 11.: [2, 0.0] 111 | 5 + 4 = 9.: [59, 32.0] 112 | 5 + 5 + 0 = 10.: [4, 0.0] 113 | 5 + 5 + 1 = 11.: [25, 5.0] 114 | 5 + 5 = 10.: [40, 0.0] 115 | 5 + 6 + 0 = 11.: [1, 0.0] 116 | 5 + 6 + 1 = 12.: [39, 6.0] 117 | 5 + 6 = 11.: [36, 0.0] 118 | 5 + 7 + 0 = 12.: [4, 0.0] 119 | 5 + 7 + 1 = 13.: [20, 3.0] 120 | 5 + 7 + 5 = 17.: [1, 0.0] 121 | 5 + 7 = 12.: [59, 0.0] 122 | 5 + 8 + 0 = 13.: [2, 0.0] 123 | 5 + 8 + 1 = 14.: [15, 1.0] 124 | 5 + 8 = 13.: [47, 0.0] 125 | 6 + 0 = 6.: [15, 1.0] 126 | 6 + 1 + 1 = 8.: [25, 6.0] 127 | 6 + 1 = 7.: [20, 7.0] 128 | 6 + 2 + 1 = 9.: [20, 8.0] 129 | 6 + 2 = 8.: [10, 2.0] 130 | 6 + 3 + 0 = 9.: [1, 0.0] 131 | 6 + 3 + 1 = 10.: [32, 0.0] 132 | 6 + 3 + 8 = 17.: [1, 0.0] 133 | 6 + 3 = 10.: [2, 2.0] 134 | 6 + 3 = 9.: [57, 27.0] 135 | 6 + 4 + 0 = 10.: [2, 0.0] 136 | 6 + 4 + 1 = 11.: [24, 6.0] 137 | 6 + 4 + 3 = 13.: [1, 0.0] 138 | 6 + 4 + 5 = 15.: [2, 0.0] 139 | 6 + 4 = 10.: [52, 1.0] 140 | 6 + 5 + 0 = 11.: [1, 0.0] 141 | 6 + 5 + 1 = 12.: [24, 0.0] 142 | 6 + 5 + 4 = 15.: [1, 0.0] 143 | 6 + 5 + 5 = 16.: [2, 0.0] 144 | 6 + 5 = 11.: [48, 1.0] 145 | 6 + 6 + 0 = 12.: [4, 0.0] 146 | 6 + 6 + 1 = 13.: [38, 8.0] 147 | 6 + 6 + 5 = 17.: [1, 0.0] 148 | 6 + 6 + 8 = 20.: [1, 0.0] 149 | 6 + 6 = 12.: [46, 0.0] 150 | 6 + 7 + 0 = 13.: [10, 0.0] 151 | 6 + 7 + 1 = 14.: [16, 0.0] 152 | 6 + 7 + 6 = 19.: [1, 0.0] 153 | 6 + 7 = 13.: [49, 1.0] 154 | 6 + 8 + 1 = 15.: [15, 3.0] 155 | 6 + 8 = 14.: [39, 0.0] 156 | 7 + 0 = 7.: [26, 5.0] 157 | 7 + 1 + 1 = 9.: [23, 3.0] 158 | 7 + 1 = 8.: [13, 2.0] 159 | 7 + 2 + 0 = 9.: [2, 1.0] 160 | 7 + 2 + 1 = 10.: [23, 0.0] 161 | 7 + 2 = 9.: [49, 28.0] 162 | 7 + 3 + 0 = 10.: [6, 0.0] 163 | 7 + 3 + 1 = 11.: [18, 5.0] 164 | 7 + 3 = 10.: [45, 0.0] 165 | 7 + 4 + 0 = 11.: [1, 0.0] 166 | 7 + 4 + 1 = 12.: [26, 0.0] 167 | 7 + 4 = 11.: [55, 0.0] 168 | 7 + 5 + 0 = 12.: [2, 0.0] 169 | 7 + 5 + 1 = 13.: [27, 1.0] 170 | 7 + 5 = 12.: [42, 0.0] 171 | 7 + 6 + 1 = 14.: [16, 0.0] 172 | 7 + 6 + 2 = 15.: [1, 0.0] 173 | 7 + 6 + 5 = 18.: [1, 0.0] 174 | 7 + 6 + 8 = 21.: [1, 0.0] 175 | 7 + 6 = 13.: [67, 0.0] 176 | 7 + 7 + 0 = 14.: [10, 0.0] 177 | 7 + 7 + 1 = 15.: [17, 4.0] 178 | 7 + 7 + 4 = 18.: [1, 0.0] 179 | 7 + 7 + 6 = 20.: [1, 0.0] 180 | 7 + 7 = 14.: [44, 0.0] 181 | 7 + 8 + 0 = 15.: [4, 1.0] 182 | 7 + 8 + 1 = 16.: [24, 3.0] 183 | 7 + 8 = 15.: [53, 0.0] 184 | 8 + 0 = 8.: [20, 3.0] 185 | 8 + 1 + 0 = 9.: [4, 2.0] 186 | 8 + 1 + 1 = 10.: [14, 0.0] 187 | 8 + 1 = 9.: [60, 19.0] 188 | 8 + 2 + 0 = 10.: [2, 0.0] 189 | 8 + 2 + 1 = 11.: [14, 0.0] 190 | 8 + 2 = 10.: [59, 0.0] 191 | 8 + 3 + 0 = 11.: [2, 0.0] 192 | 8 + 3 + 1 = 12.: [24, 1.0] 193 | 8 + 3 + 2 = 13.: [1, 0.0] 194 | 8 + 3 = 11.: [39, 0.0] 195 | 8 + 4 + 1 = 13.: [4, 0.0] 196 | 8 + 4 + 3 = 15.: [1, 0.0] 197 | 8 + 4 = 12.: [33, 0.0] 198 | 8 + 5 + 0 = 13.: [1, 0.0] 199 | 8 + 5 + 1 = 14.: [20, 1.0] 200 | 8 + 5 = 13.: [45, 0.0] 201 | 8 + 6 + 1 = 15.: [19, 1.0] 202 | 8 + 6 + 2 = 16.: [1, 0.0] 203 | 8 + 6 = 14.: [43, 0.0] 204 | 8 + 7 + 0 = 15.: [2, 0.0] 205 | 8 + 7 + 1 = 16.: [20, 3.0] 206 | 8 + 7 + 4 = 19.: [1, 0.0] 207 | 8 + 7 + 8 = 23.: [1, 0.0] 208 | 8 + 7 = 15.: [61, 2.0] 209 | 8 + 8 + 0 = 16.: [2, 0.0] 210 | 8 + 8 + 1 = 17.: [30, 5.0] 211 | 8 + 8 = 16.: [39, 4.0] 212 | -------------------------------------------------------------------------------- /artifacts/prompt/list_functions/4-shot_cot_htt_test.yaml: -------------------------------------------------------------------------------- 1 | system: | 2 | Instruction: Infer the function behind the examples. Use the function to answer the questions. 3 | 4 | prompt: | 5 | {%- set concept2rules = {} -%} 6 | {%- for rule, score in rules.items() -%} 7 | {%- set pos1 = rule.find("]") -%} 8 | {%- set concept = rule[:pos1] | trim("[") -%} 9 | {%- set rule = rule[pos1 + 1:] | trim -%} 10 | {%- set concept_rules = concept2rules[concept] if concept in concept2rules else [] -%} 11 | {%- set _ = concept2rules.update({concept: concept_rules + [(rule, score)]}) -%} 12 | {%- endfor -%} 13 | {%- for concept, concept_rules in concept2rules.items() -%} 14 | {%- set _ = concept2rules.update({concept: concept_rules | sort(attribute="1", reverse=True)}) -%} 15 | {%- endfor -%} 16 | Examples: {#- c020 #} 17 | [0, 8, 5, 2, 7, 1, 4, 6, 9, 3] -> [3, 8, 5, 2, 7, 1, 4, 6, 9, 3] 18 | [4, 0, 1] -> [1, 0, 1] 19 | [6, 1, 7, 5, 3, 2, 8, 4, 9] -> [9, 1, 7, 5, 3, 2, 8, 4, 9] 20 | [6, 2, 1, 9, 4] -> [4, 2, 1, 9, 4] 21 | [2, 9, 7, 5, 3, 8, 1, 4] -> [4, 9, 7, 5, 3, 8, 1, 4] 22 | [5, 1, 7, 8, 9, 4, 0, 3, 2] -> [2, 1, 7, 8, 9, 4, 0, 3, 2] 23 | Questions: 24 | [5, 8, 6, 1, 0, 9, 7] -> ? 25 | [3, 8, 6, 0] -> ? 26 | [8, 3] -> ? 27 | [3, 2, 0, 1, 6, 8, 7, 5] -> ? 28 | [5, 2, 0, 8, 9, 6] -> ? 29 | [8, 5, 7, 4, 2, 3, 6] -> ? 30 | Potential functions and their confidence: 31 | {%- for rule, score in concept2rules["c020"] %} 32 | {{ rule }}: {{ "%0.2f" | format(score) }} 33 | {%- endfor %} 34 | {%- if concept2rules["c020"] | length == 0 %} 35 | N/A 36 | {%- endif %} 37 | Answers: 38 | Based on the examples and the potential functions, we infer the function is to replace the first element with the last element. 39 | Using this function, the answers to the questions are: 40 | [5, 8, 6, 1, 0, 9, 7] -> [7, 8, 6, 1, 0, 9, 7] 41 | [3, 8, 6, 0] -> [0, 8, 6, 0] 42 | [8, 3] -> [3, 3] 43 | [3, 2, 0, 1, 6, 8, 7, 5] -> [5, 2, 0, 1, 6, 8, 7, 5] 44 | [5, 2, 0, 8, 9, 6] -> [6, 2, 0, 8, 9, 6] 45 | [8, 5, 7, 4, 2, 3, 6] -> [6, 5, 7, 4, 2, 3, 6] 46 | 47 | Examples: {#- c040 #} 48 | [2] -> [2] 49 | [4, 3, 0, 1, 7, 8] -> [4, 3, 0, 1, 7, 8, 3] 50 | [5, 0, 2, 9] -> [5, 0, 2, 9, 9] 51 | [7, 0, 2, 5] -> [7, 0, 2, 5] 52 | [3, 4, 7, 6, 0] -> [3, 4, 7, 6, 0, 3] 53 | [8, 1, 2, 3, 7] -> [8, 1, 2, 3, 7, 3] 54 | Questions: 55 | [9, 1] -> ? 56 | [6] -> ? 57 | [1, 9, 5, 0] -> ? 58 | [4, 6, 9, 0, 7, 8, 1, 2] -> ? 59 | [4, 2, 8] -> ? 60 | [6, 2, 0, 3, 1, 8, 7] -> ? 61 | Potential functions and their confidence: 62 | {%- for rule, score in concept2rules["c040"] %} 63 | {{ rule }}: {{ "%0.2f" | format(score) }} 64 | {%- endfor %} 65 | {%- if concept2rules["c040"] | length == 0 %} 66 | N/A 67 | {%- endif %} 68 | Answers: 69 | Based on the examples and the potential functions, we infer the function is to append 3 if the list contains a 3, else append 9 if the list contains a 9. 70 | Using this function, the answers to the questions are: 71 | [9, 1] -> [9, 1, 9] 72 | [6] -> [6] 73 | [1, 9, 5, 0] -> [1, 9, 5, 0, 9] 74 | [4, 6, 9, 0, 7, 8, 1, 2] -> [4, 6, 9, 0, 7, 8, 1, 2, 9] 75 | [4, 2, 8] -> [4, 2, 8] 76 | [6, 2, 0, 3, 1, 8, 7] -> [6, 2, 0, 3, 1, 8, 7, 3] 77 | 78 | Examples: {#- c060 #} 79 | [1, 0, 9, 7, 4, 2, 5, 3, 6, 8] -> [9, 0, 1, 4, 4, 5] 80 | [3, 8, 4, 6, 1, 5, 7, 0] -> [4, 8, 3, 4, 1, 7] 81 | [5, 4, 7, 2, 9, 3, 8, 1] -> [7, 4, 5, 4, 9, 8] 82 | [3, 9, 2, 0, 6, 8, 5, 1, 7] -> [2, 9, 3, 4, 6, 5] 83 | [9, 2, 1, 3, 4, 7, 6, 8, 5, 0] -> [1, 2, 9, 4, 4, 6] 84 | [0, 7, 9, 3, 1, 5, 8, 2, 6] -> [9, 7, 0, 4, 1, 8] 85 | Questions: 86 | [3, 9, 7, 6, 0, 5, 1] -> ? 87 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> ? 88 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> ? 89 | [8, 4, 9, 1, 3, 2, 7] -> ? 90 | [8, 3, 7, 0, 4, 2, 5] -> ? 91 | [6, 2, 1, 0, 9, 8, 5] -> ? 92 | Potential functions and their confidence: 93 | {%- for rule, score in concept2rules["c060"] %} 94 | {{ rule }}: {{ "%0.2f" | format(score) }} 95 | {%- endfor %} 96 | {%- if concept2rules["c060"] | length == 0 %} 97 | N/A 98 | {%- endif %} 99 | Answers: 100 | Based on the examples and the potential functions, we infer the function is to generate a list of elements 3, 2, 1, the number 4, then elements 5 and 7. 101 | Using this function, the answers to the questions are: 102 | [3, 9, 7, 6, 0, 5, 1] -> [7, 9, 3, 4, 0, 1] 103 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> [9, 5, 2, 4, 8, 0] 104 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> [7, 0, 9, 4, 4, 3] 105 | [8, 4, 9, 1, 3, 2, 7] -> [9, 4, 8, 4, 3, 7] 106 | [8, 3, 7, 0, 4, 2, 5] -> [7, 3, 8, 4, 4, 5] 107 | [6, 2, 1, 0, 9, 8, 5] -> [1, 2, 6, 4, 9, 5] 108 | 109 | Examples: {#- c080 #} 110 | [] -> [] 111 | [1, 5, 6, 2, 8, 3, 7] -> [7, 3, 8, 2, 6, 5, 1] 112 | [2, 1, 9, 6, 3, 5, 4, 8] -> [8, 4, 5, 3, 6, 9, 1, 2] 113 | [9, 1, 2, 8, 0] -> [0, 8, 2, 1, 9] 114 | [1, 0, 7, 3, 9, 2] -> [2, 9, 3, 7, 0, 1] 115 | [7, 6, 3, 0, 4, 1, 5, 2] -> [2, 5, 1, 4, 0, 3, 6, 7] 116 | Questions: 117 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> ? 118 | [6, 4, 0] -> ? 119 | [3, 6, 1, 7, 0, 4] -> ? 120 | [5, 4, 2, 7] -> ? 121 | [5, 7, 6, 2, 3] -> ? 122 | [7, 9] -> ? 123 | Potential functions and their confidence: 124 | {%- for rule, score in concept2rules["c080"] %} 125 | {{ rule }}: {{ "%0.2f" | format(score) }} 126 | {%- endfor %} 127 | {%- if concept2rules["c080"] | length == 0 %} 128 | N/A 129 | {%- endif %} 130 | Answers: 131 | Based on the examples and the potential functions, we infer the function is to reverse the elements. 132 | Using this function, the answers to the questions are: 133 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> [9, 1, 3, 4, 0, 8, 7, 5, 6, 2] 134 | [6, 4, 0] -> [0, 4, 6] 135 | [3, 6, 1, 7, 0, 4] -> [4, 0, 7, 1, 6, 3] 136 | [5, 4, 2, 7] -> [7, 2, 4, 5] 137 | [5, 7, 6, 2, 3] -> [3, 2, 6, 7, 5] 138 | [7, 9] -> [9, 7] 139 | 140 | Examples: 141 | {%- for i in range(train_queries | length) %} 142 | {{ train_queries[i] }} -> {{ train_answers[i] }} 143 | {%- endfor %} 144 | Questions: 145 | {%- for query in queries %} 146 | {{ query }} -> ? 147 | {%- endfor %} 148 | Potential functions and their confidence: 149 | {%- for rule, score in concept2rules[concept] %} 150 | {{ rule }}: {{ "%0.2f" | format(score) }} 151 | {%- endfor %} 152 | {%- if concept2rules[concept] | length == 0 %} 153 | N/A 154 | {%- endif %} 155 | Answers: -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # llms_can_learn_rules 2 | 3 | ![HtT prompting](asset/htt.svg) 4 | 5 | A major reason for failure of chain-of-thought prompting is its tendency to 6 | hallucinate rules in multi-step reasoning. Our work, Hypotheses-to-Theories (HtT), 7 | prompts LLMs to induce rules from training samples, build a rule library, and 8 | apply it to solve reasoning problems. 9 | 10 | HtT can be viewed as a new paradigm of learning with LLMs. Instead of learning 11 | model parameters, HtT learns a rule library that transfers across different models 12 | and textual forms. It can be applied to black-box LLMs such as GPT-4 and Gemini-Pro. 13 | 14 | ## Installation 15 | 16 | First, let's create a virtual environment: 17 | 18 | Below we will use conda environment. If you haven't heard about conda, you can install the miniconda following https://docs.conda.io/projects/miniconda/en/latest/ 19 | 20 | ```bash 21 | conda create -n htt python=3.10 22 | conda activate htt 23 | ``` 24 | 25 | Install dependencies through pip. 26 | 27 | ```bash 28 | pip install -r requirements.txt 29 | ``` 30 | 31 | If it is your first time using nltk, please also download the necessary pieces: 32 | 33 | ```python 34 | >>> import nltk 35 | >>> nltk.download('punkt') 36 | >>> nltk.download('punkt_tab') 37 | ``` 38 | 39 | 40 | Export your API keys as an environment variable. 41 | 42 | ```bash 43 | export OPENAI_API_KEY=your-open-ai-key 44 | export GOOGLE_API_KEY=your-google-key 45 | ``` 46 | 47 | ## 🛠️ Usage ## 48 | 49 | ### Dataset ### 50 | 51 | Please navigate to `artifacts/dataset` and run the `download.sh` script to prepare the datasets. 52 | 53 | Use `source/train.py` for training HtT and `source/test.py` for testing HtT and 54 | other methods. All the experiments will be logged in `experiment/`. 55 | 56 | ### Training (Induction Stage) ### 57 | 58 | Below are the command lines for training HtT. Note if you train HtT with GPT-4, 59 | it will incur substantial time (2-21 hours) and cost ($20-270 USD) per dataset. 60 | If you only want to reproduce the results of HtT, we highly recommend you to use 61 | the provided checkpoints of rule libraries. 62 | 63 | ```bash 64 | python source/train.py -c artifacts/config/clutrr/symbolic/gpt-4_5-shot_cot_htt.yaml --num-iteration 2000 65 | python source/train.py -c artifacts/config/arithmetic/base-16/gpt-4_5-shot_cot_htt.yaml --num-iteration 2000 66 | python source/train.py -c artifacts/config/arithmetic/base-11/gpt-4_5-shot_cot_htt.yaml --num-iteration 2000 67 | python source/train.py -c artifacts/config/arithmetic/base-9/gpt-4_5-shot_cot_htt.yaml --num-iteration 2000 68 | python source/train.py -c artifacts/config/list_functions/gpt-4_4-shot_cot_htt.yaml --num-iteration 5000 69 | ``` 70 | 71 | ### Test (Deduction Stage) ### 72 | 73 | Here are the command lines for testing HtT based on the provided rule libraries. 74 | If you want to test with your own rule libraries, modify the library path in the 75 | corresponding config files. 76 | 77 | ```bash 78 | python source/test.py -c artifacts/config/clutrr/symbolic/gpt-4_5-shot_cot_htt.yaml 79 | python source/test.py -c artifacts/config/clutrr/textual/gpt-4_5-shot_cot_htt.yaml 80 | python source/test.py -c artifacts/config/arithmetic/base-16/gpt-4_5-shot_cot_htt.yaml 81 | python source/test.py -c artifacts/config/arithmetic/base-11/gpt-4_5-shot_cot_htt.yaml 82 | python source/test.py -c artifacts/config/arithmetic/base-9/gpt-4_5-shot_cot_htt.yaml 83 | python source/test.py -c artifacts/config/list_functions/gpt-4_4-shot_cot_htt.yaml 84 | ``` 85 | 86 | To run baseline methods like 0-shot CoT and 5-shot Cot, use the following commands 87 | 88 | ```bash 89 | python source/test.py -c artifacts/config/clutrr/symbolic/gpt-4_0-shot_cot_htt.yaml 90 | python source/test.py -c artifacts/config/clutrr/textual/gpt-4_0-shot_cot_htt.yaml 91 | python source/test.py -c artifacts/config/arithmetic/base-16/gpt-4_0-shot_cot.yaml 92 | python source/test.py -c artifacts/config/arithmetic/base-11/gpt-4_0-shot_cot.yaml 93 | python source/test.py -c artifacts/config/arithmetic/base-9/gpt-4_0-shot_cot.yaml 94 | python source/test.py -c artifacts/config/list_functions/gpt-4_0-shot_cot.yaml 95 | ``` 96 | 97 | ```bash 98 | python source/test.py -c artifacts/config/clutrr/symbolic/gpt-4_5-shot_cot_htt.yaml 99 | python source/test.py -c artifacts/config/clutrr/textual/gpt-4_5-shot_cot_htt.yaml 100 | python source/test.py -c artifacts/config/arithmetic/base-16/gpt-4_5-shot_cot.yaml 101 | python source/test.py -c artifacts/config/arithmetic/base-11/gpt-4_5-shot_cot.yaml 102 | python source/test.py -c artifacts/config/arithmetic/base-9/gpt-4_5-shot_cot.yaml 103 | python source/test.py -c artifacts/config/list_functions/gpt-4_4-shot_cot.yaml 104 | ``` 105 | 106 | ## 📂 Code Structure ## 107 | 108 | - `artifacts/checkpoint/`: checkpoints of rule libraries 109 | - `artifacts/config/`: configuration files for experiments 110 | - `artifacts/dataset/`: benchmark datasets 111 | - `artifacts/prompt/`: prompts written in [jinja](https://jinja.palletsprojects.com/) 112 | - `source/`: source files written in python 113 | 114 | ## 🗡️ Reproducibility ## 115 | 116 | Due to the randomness in LLMs, change in OpenAI models and reimplementation of 117 | our codebase, this repo may not reproduce the exact numbers in the paper. 118 | 119 | It is normal to observe numbers higher or lower than those the paper, but the 120 | rankings of methods should be preserved on strong models like GPT-4: 121 | few-shot CoT + HtT > few-shot CoT > 0-shot CoT. 122 | 123 | 124 | ## Citing this work 125 | 126 | If you use HtT or this repo in your research, please cite the following paper. 127 | 128 | 129 | ```bibtex 130 | @article{zhu2023large, 131 | title={Large Language Models can Learn Rules}, 132 | author={Zhu, Zhaocheng and Xue, Yuan and Chen, Xinyun and Zhou, Denny and Tang, Jian and Schuurmans, Dale and Dai, Hanjun}, 133 | journal={arXiv preprint arXiv:2310.07064}, 134 | year={2023} 135 | } 136 | ``` 137 | 138 | ## License and disclaimer 139 | 140 | Copyright 2023 DeepMind Technologies Limited 141 | 142 | All software is licensed under the Apache License, Version 2.0 (Apache 2.0); 143 | you may not use this file except in compliance with the Apache 2.0 license. 144 | You may obtain a copy of the Apache 2.0 license at: 145 | https://www.apache.org/licenses/LICENSE-2.0 146 | 147 | All other materials are licensed under the Creative Commons Attribution 4.0 148 | International License (CC-BY). You may obtain a copy of the CC-BY license at: 149 | https://creativecommons.org/licenses/by/4.0/legalcode 150 | 151 | Unless required by applicable law or agreed to in writing, all software and 152 | materials distributed here under the Apache 2.0 or CC-BY licenses are 153 | distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 154 | either express or implied. See the licenses for the specific language governing 155 | permissions and limitations under those licenses. 156 | 157 | This is not an official Google product. 158 | -------------------------------------------------------------------------------- /artifacts/checkpoint/arithmetic/base-9/gemini-pro_5-shot_cot_htt_2000.yaml: -------------------------------------------------------------------------------- 1 | 0 + 0 = 0.: [17, 15.0] 2 | 0 + 0 = 00.: [1, 1.0] 3 | 0 + 1 + 1 = 12.: [1, 0.0] 4 | 0 + 1 = 1.: [14, 12.0] 5 | 0 + 2 = 2.: [23, 17.0] 6 | 0 + 2 = 3.: [1, 0.0] 7 | 0 + 3 = 3.: [19, 19.0] 8 | 0 + 3 = 4.: [5, 0.0] 9 | 0 + 4 = 4.: [19, 15.0] 10 | 0 + 4 = 5.: [6, 0.0] 11 | 0 + 5 = 5.: [20, 17.0] 12 | 0 + 5 = 6.: [15, 0.0] 13 | 0 + 6 = 6.: [13, 12.0] 14 | 0 + 6 = 7.: [13, 0.0] 15 | 0 + 7 = 7.: [12, 10.0] 16 | 0 + 7 = 8.: [10, 0.0] 17 | 0 + 8 = 8.: [14, 14.0] 18 | 0 + 8 = 9.: [4, 0.0] 19 | 1 + 0 = 1.: [7, 5.0] 20 | 1 + 1 + 1 = 10.: [2, 0.0] 21 | 1 + 1 + 1 = 11.: [4, 0.0] 22 | 1 + 1 + 1 = 13.: [5, 0.0] 23 | 1 + 1 = 2.: [21, 20.0] 24 | 1 + 1 = 3.: [3, 0.0] 25 | 1 + 2 + 1 = 10.: [1, 0.0] 26 | 1 + 2 + 1 = 4.: [5, 5.0] 27 | 1 + 2 + 1 = 5.: [12, 0.0] 28 | 1 + 2 = 3.: [11, 11.0] 29 | 1 + 2 = 4.: [12, 0.0] 30 | 1 + 3 + 1 = 5.: [1, 1.0] 31 | 1 + 3 + 1 = 6.: [12, 0.0] 32 | 1 + 3 = 4.: [10, 9.0] 33 | 1 + 3 = 5.: [15, 1.0] 34 | 1 + 4 + 1 = 7.: [24, 0.0] 35 | 1 + 4 = 5.: [3, 3.0] 36 | 1 + 4 = 6.: [14, 0.0] 37 | 1 + 5 + 1 = 8.: [10, 0.0] 38 | 1 + 5 = 6.: [1, 0.0] 39 | 1 + 5 = 7.: [25, 1.0] 40 | 1 + 6 + 1 = 9.: [11, 0.0] 41 | 1 + 6 = 7.: [2, 2.0] 42 | 1 + 6 = 8.: [12, 1.0] 43 | 1 + 7 + 1 = 10.: [26, 26.0] 44 | 1 + 7 = 10.: [11, 0.0] 45 | 1 + 7 = 9.: [2, 0.0] 46 | 1 + 8 + 0 = 10.: [1, 1.0] 47 | 1 + 8 + 1 = 11.: [13, 10.0] 48 | 1 + 8 = 10.: [58, 37.0] 49 | 2 + 0 = 2.: [15, 15.0] 50 | 2 + 0 = 3.: [1, 0.0] 51 | 2 + 1 + 1 = 11.: [1, 0.0] 52 | 2 + 1 + 1 = 4.: [2, 2.0] 53 | 2 + 1 + 1 = 5.: [14, 0.0] 54 | 2 + 1 = 3.: [13, 10.0] 55 | 2 + 1 = 4.: [15, 0.0] 56 | 2 + 2 + 1 = 10.: [1, 0.0] 57 | 2 + 2 + 1 = 6.: [19, 0.0] 58 | 2 + 2 = 4.: [12, 10.0] 59 | 2 + 2 = 5.: [3, 1.0] 60 | 2 + 3 + 1 = 7.: [29, 0.0] 61 | 2 + 3 = 5.: [4, 2.0] 62 | 2 + 3 = 6.: [26, 0.0] 63 | 2 + 4 + 1 = 8.: [14, 0.0] 64 | 2 + 4 = 6.: [1, 1.0] 65 | 2 + 4 = 7.: [12, 0.0] 66 | 2 + 5 + 1 = 9.: [17, 0.0] 67 | 2 + 5 = 7.: [2, 2.0] 68 | 2 + 5 = 8.: [14, 0.0] 69 | 2 + 6 + 1 = 10.: [21, 19.0] 70 | 2 + 6 = 10.: [8, 0.0] 71 | 2 + 6 = 8.: [7, 6.0] 72 | 2 + 6 = 9.: [10, 0.0] 73 | 2 + 7 + 1 = 11.: [22, 20.0] 74 | 2 + 7 = 10.: [37, 15.0] 75 | 2 + 7 = 11.: [20, 0.0] 76 | 2 + 8 + 0 = 11.: [2, 0.0] 77 | 2 + 8 + 1 = 12.: [11, 10.0] 78 | 2 + 8 = 10.: [3, 0.0] 79 | 2 + 8 = 11.: [49, 25.0] 80 | 3 + 0 = 3.: [13, 13.0] 81 | 3 + 0 = 4.: [2, 0.0] 82 | 3 + 1 + 1 = 11.: [2, 0.0] 83 | 3 + 1 + 1 = 5.: [2, 1.0] 84 | 3 + 1 + 1 = 6.: [23, 0.0] 85 | 3 + 1 = 4.: [7, 6.0] 86 | 3 + 1 = 5.: [9, 0.0] 87 | 3 + 2 + 1 = 7.: [15, 0.0] 88 | 3 + 2 = 5.: [6, 3.0] 89 | 3 + 2 = 6.: [3, 0.0] 90 | 3 + 3 + 1 = 7.: [1, 1.0] 91 | 3 + 3 + 1 = 8.: [22, 0.0] 92 | 3 + 3 = 6.: [10, 9.0] 93 | 3 + 3 = 7.: [6, 0.0] 94 | 3 + 4 + 1 = 10.: [2, 0.0] 95 | 3 + 4 + 1 = 9.: [21, 0.0] 96 | 3 + 4 = 7.: [5, 4.0] 97 | 3 + 4 = 8.: [26, 0.0] 98 | 3 + 5 + 1 = 10.: [27, 27.0] 99 | 3 + 5 = 8.: [1, 1.0] 100 | 3 + 5 = 9.: [14, 0.0] 101 | 3 + 6 + 0 = 10.: [2, 1.0] 102 | 3 + 6 + 1 = 11.: [20, 18.0] 103 | 3 + 6 = 10.: [47, 32.0] 104 | 3 + 6 = 11.: [1, 0.0] 105 | 3 + 7 + 0 = 11.: [2, 0.0] 106 | 3 + 7 + 1 = 11.: [1, 0.0] 107 | 3 + 7 + 1 = 12.: [3, 3.0] 108 | 3 + 7 = 11.: [49, 26.0] 109 | 3 + 8 + 0 = 12.: [1, 0.0] 110 | 3 + 8 + 1 = 13.: [16, 14.0] 111 | 3 + 8 = 12.: [48, 26.0] 112 | 4 + 0 + 1 = 5.: [1, 0.0] 113 | 4 + 0 = 4.: [12, 11.0] 114 | 4 + 1 + 1 = 7.: [18, 0.0] 115 | 4 + 1 = 5.: [5, 5.0] 116 | 4 + 1 = 6.: [11, 0.0] 117 | 4 + 2 + 1 = 8.: [16, 0.0] 118 | 4 + 2 = 6.: [3, 3.0] 119 | 4 + 2 = 7.: [23, 0.0] 120 | 4 + 3 + 1 = 9.: [12, 0.0] 121 | 4 + 3 = 7.: [6, 6.0] 122 | 4 + 3 = 8.: [12, 0.0] 123 | 4 + 4 + 0 = 9.: [1, 0.0] 124 | 4 + 4 + 1 = 10.: [7, 4.0] 125 | 4 + 4 = 10.: [6, 0.0] 126 | 4 + 4 = 8.: [3, 2.0] 127 | 4 + 4 = 9.: [13, 0.0] 128 | 4 + 5 + 0 = 10.: [1, 0.0] 129 | 4 + 5 + 1 = 11.: [9, 9.0] 130 | 4 + 5 = 10.: [49, 24.0] 131 | 4 + 6 + 0 = 11.: [2, 1.0] 132 | 4 + 6 + 1 = 12.: [24, 23.0] 133 | 4 + 6 = 11.: [46, 24.0] 134 | 4 + 7 + 0 = 12.: [2, 0.0] 135 | 4 + 7 + 1 = 13.: [13, 13.0] 136 | 4 + 7 = 11.: [1, 0.0] 137 | 4 + 7 = 12.: [62, 39.0] 138 | 4 + 8 + 0 = 13.: [1, 1.0] 139 | 4 + 8 + 1 = 14.: [15, 15.0] 140 | 4 + 8 = 12.: [1, 0.0] 141 | 4 + 8 = 13.: [43, 18.0] 142 | 5 + 0 = 5.: [16, 12.0] 143 | 5 + 0 = 6.: [2, 0.0] 144 | 5 + 1 + 1 = 8.: [11, 0.0] 145 | 5 + 1 = 6.: [7, 6.0] 146 | 5 + 1 = 7.: [18, 0.0] 147 | 5 + 2 + 1 = 10.: [1, 0.0] 148 | 5 + 2 + 1 = 9.: [18, 0.0] 149 | 5 + 2 = 7.: [1, 1.0] 150 | 5 + 2 = 8.: [7, 0.0] 151 | 5 + 3 + 1 = 10.: [13, 13.0] 152 | 5 + 3 = 8.: [2, 2.0] 153 | 5 + 3 = 9.: [4, 0.0] 154 | 5 + 4 + 1 = 11.: [33, 31.0] 155 | 5 + 4 = 10.: [62, 35.0] 156 | 5 + 5 + 1 = 11.: [2, 0.0] 157 | 5 + 5 + 1 = 12.: [24, 23.0] 158 | 5 + 5 = 11.: [42, 22.0] 159 | 5 + 6 + 1 = 13.: [28, 24.0] 160 | 5 + 6 = 12.: [32, 19.0] 161 | 5 + 7 + 1 = 14.: [16, 14.0] 162 | 5 + 7 = 13.: [61, 28.0] 163 | 5 + 8 + 0 = 14.: [2, 0.0] 164 | 5 + 8 + 1 = 15.: [12, 12.0] 165 | 5 + 8 = 14.: [47, 27.0] 166 | 6 + 0 = 6.: [10, 10.0] 167 | 6 + 0 = 7.: [2, 0.0] 168 | 6 + 1 + 1 = 8.: [1, 1.0] 169 | 6 + 1 + 1 = 9.: [25, 0.0] 170 | 6 + 1 = 7.: [4, 4.0] 171 | 6 + 1 = 8.: [14, 0.0] 172 | 6 + 2 + 1 = 10.: [20, 20.0] 173 | 6 + 2 = 8.: [3, 3.0] 174 | 6 + 2 = 9.: [7, 0.0] 175 | 6 + 3 + 0 = 10.: [2, 0.0] 176 | 6 + 3 + 1 = 10.: [1, 0.0] 177 | 6 + 3 + 1 = 11.: [34, 33.0] 178 | 6 + 3 = 10.: [66, 31.0] 179 | 6 + 3 = 9.: [1, 0.0] 180 | 6 + 4 + 1 = 11.: [1, 0.0] 181 | 6 + 4 + 1 = 12.: [29, 21.0] 182 | 6 + 4 = 11.: [62, 33.0] 183 | 6 + 5 + 1 = 13.: [28, 28.0] 184 | 6 + 5 = 11.: [1, 0.0] 185 | 6 + 5 = 12.: [59, 37.0] 186 | 6 + 6 + 0 = 13.: [3, 1.0] 187 | 6 + 6 + 1 = 14.: [25, 16.0] 188 | 6 + 6 = 13.: [64, 23.0] 189 | 6 + 6 = 15.: [1, 0.0] 190 | 6 + 7 + 0 = 14.: [3, 2.0] 191 | 6 + 7 + 1 = 15.: [12, 10.0] 192 | 6 + 7 = 14.: [66, 46.0] 193 | 6 + 8 + 0 = 15.: [1, 1.0] 194 | 6 + 8 + 1 = 16.: [14, 13.0] 195 | 6 + 8 = 15.: [38, 29.0] 196 | 7 + 0 = 7.: [13, 12.0] 197 | 7 + 0 = 8.: [3, 0.0] 198 | 7 + 1 + 1 = 10.: [26, 26.0] 199 | 7 + 1 = 10.: [10, 0.0] 200 | 7 + 1 = 9.: [2, 0.0] 201 | 7 + 2 + 1 = 11.: [25, 21.0] 202 | 7 + 2 = 10.: [46, 31.0] 203 | 7 + 2 = 11.: [5, 0.0] 204 | 7 + 3 + 0 = 11.: [1, 0.0] 205 | 7 + 3 + 1 = 11.: [3, 0.0] 206 | 7 + 3 + 1 = 12.: [16, 14.0] 207 | 7 + 3 = 11.: [51, 27.0] 208 | 7 + 4 + 0 = 12.: [1, 0.0] 209 | 7 + 4 + 1 = 13.: [25, 23.0] 210 | 7 + 4 = 12.: [55, 38.0] 211 | 7 + 5 + 1 = 14.: [26, 24.0] 212 | 7 + 5 = 13.: [51, 27.0] 213 | 7 + 6 + 0 = 14.: [1, 1.0] 214 | 7 + 6 + 1 = 15.: [12, 12.0] 215 | 7 + 6 = 14.: [66, 36.0] 216 | 7 + 7 + 0 = 15.: [2, 1.0] 217 | 7 + 7 + 1 = 16.: [8, 4.0] 218 | 7 + 7 = 14.: [1, 0.0] 219 | 7 + 7 = 15.: [55, 25.0] 220 | 7 + 7 = 16.: [3, 0.0] 221 | 7 + 8 + 0 = 16.: [4, 2.0] 222 | 7 + 8 + 1 = 17.: [20, 19.0] 223 | 7 + 8 = 16.: [57, 37.0] 224 | 8 + 0 = 8.: [8, 8.0] 225 | 8 + 0 = 9.: [1, 0.0] 226 | 8 + 1 + 0 = 10.: [1, 1.0] 227 | 8 + 1 + 1 = 11.: [17, 15.0] 228 | 8 + 1 = 10.: [59, 45.0] 229 | 8 + 1 = 9.: [2, 0.0] 230 | 8 + 2 + 1 = 12.: [17, 14.0] 231 | 8 + 2 = 10.: [1, 0.0] 232 | 8 + 2 = 11.: [52, 33.0] 233 | 8 + 3 + 0 = 11.: [1, 0.0] 234 | 8 + 3 + 0 = 12.: [1, 1.0] 235 | 8 + 3 + 1 = 13.: [23, 22.0] 236 | 8 + 3 = 12.: [45, 26.0] 237 | 8 + 4 + 1 = 14.: [5, 4.0] 238 | 8 + 4 = 13.: [37, 23.0] 239 | 8 + 5 + 0 = 14.: [1, 0.0] 240 | 8 + 5 + 1 = 15.: [19, 18.0] 241 | 8 + 5 = 14.: [49, 30.0] 242 | 8 + 6 + 1 = 16.: [18, 14.0] 243 | 8 + 6 = 15.: [47, 30.0] 244 | 8 + 7 + 0 = 16.: [4, 0.0] 245 | 8 + 7 + 1 = 16.: [1, 0.0] 246 | 8 + 7 + 1 = 17.: [17, 15.0] 247 | 8 + 7 = 16.: [64, 40.0] 248 | 8 + 8 + 0 = 17.: [2, 0.0] 249 | 8 + 8 + 1 = 18.: [19, 7.0] 250 | 8 + 8 = 16.: [1, 0.0] 251 | 8 + 8 = 17.: [41, 27.0] 252 | 8 + 8 = 18.: [1, 0.0] 253 | -------------------------------------------------------------------------------- /artifacts/checkpoint/clutrr/symbolic/gpt-4_5-shot_cot_htt_2000.yaml: -------------------------------------------------------------------------------- 1 | aunt's brother is uncle.: [8, 3.0] 2 | aunt's daughter is cousin.: [3, 0.0] 3 | aunt's father is grandfather.: [6, 4.0] 4 | aunt's mother is grandmother.: [7, 4.0] 5 | aunt's sister is aunt.: [4, 3.0] 6 | aunt's son is cousin.: [2, 0.0] 7 | brother's aunt is aunt.: [5, 5.0] 8 | brother's brother is brother.: [46, 43.0] 9 | brother's brother is uncle.: [2, 0.0] 10 | brother's daughter is niece.: [1, 1.0] 11 | brother's father is father.: [42, 40.0] 12 | brother's grandfather is grandfather.: [22, 22.0] 13 | brother's grandfather is great-grandfather.: [5, 0.0] 14 | brother's grandmother is grandmother.: [16, 16.0] 15 | brother's grandmother is great-grandmother.: [4, 0.0] 16 | brother's mother is grandmother.: [10, 0.0] 17 | brother's mother is mother.: [43, 42.0] 18 | brother's sister is sister.: [50, 46.0] 19 | brother's son is nephew.: [54, 47.0] 20 | brother's uncle is uncle.: [2, 2.0] 21 | brother's wife is sister-in-law.: [8, 8.0] 22 | brother-in-law's daughter is niece.: [4, 4.0] 23 | brother-in-law's father is father-in-law.: [6, 6.0] 24 | brother-in-law's mother is mother-in-law.: [6, 6.0] 25 | brother-in-law's son is nephew.: [3, 3.0] 26 | daughter's aunt is aunt.: [17, 0.0] 27 | daughter's aunt is cousin.: [8, 0.0] 28 | daughter's aunt is sister.: [17, 17.0] 29 | daughter's brother is son.: [50, 36.0] 30 | daughter's daughter is granddaughter.: [27, 19.0] 31 | daughter's father is father.: [17, 0.0] 32 | daughter's father is husband.: [3, 1.0] 33 | daughter's father is self.: [1, 1.0] 34 | daughter's grandfather is father.: [24, 20.0] 35 | daughter's grandfather is grandfather.: [3, 0.0] 36 | daughter's grandmother is grandmother.: [3, 1.0] 37 | daughter's grandmother is mother.: [28, 24.0] 38 | daughter's husband is son-in-law.: [23, 23.0] 39 | daughter's mother is mother.: [2, 1.0] 40 | daughter's mother is self.: [1, 1.0] 41 | daughter's mother is wife.: [15, 5.0] 42 | daughter's sister is daughter.: [23, 20.0] 43 | daughter's sister is niece.: [1, 0.0] 44 | daughter's sister is sister.: [30, 1.0] 45 | daughter's son is grandson.: [49, 38.0] 46 | daughter's uncle is brother.: [29, 26.0] 47 | daughter's uncle is cousin.: [2, 0.0] 48 | daughter's uncle is uncle.: [6, 0.0] 49 | daughter-in-law's daughter is granddaughter.: [5, 5.0] 50 | daughter-in-law's son is grandson.: [4, 4.0] 51 | father's brother is uncle.: [42, 38.0] 52 | father's daughter is daughter.: [4, 0.0] 53 | father's daughter is sister.: [56, 54.0] 54 | father's father is grandfather.: [36, 30.0] 55 | father's mother is grandmother.: [31, 28.0] 56 | father's sister is aunt.: [32, 30.0] 57 | father's son is brother.: [47, 41.0] 58 | father's son is son.: [21, 3.0] 59 | father's wife is mother.: [5, 4.0] 60 | grand-daughter's brother is grandson.: [1, 1.0] 61 | granddaughter's aunt is aunt.: [2, 0.0] 62 | granddaughter's brother is brother.: [1, 0.0] 63 | granddaughter's brother is grandson.: [6, 6.0] 64 | granddaughter's brother is great-grandson.: [9, 0.0] 65 | granddaughter's brother is great-uncle.: [3, 0.0] 66 | granddaughter's brother is nephew.: [4, 0.0] 67 | granddaughter's brother is son.: [3, 0.0] 68 | granddaughter's father is son.: [9, 9.0] 69 | granddaughter's mother is daughter.: [10, 10.0] 70 | granddaughter's sister is granddaughter.: [15, 15.0] 71 | granddaughter's sister is niece.: [6, 0.0] 72 | granddaughter's sister is sister.: [2, 0.0] 73 | granddaughter's uncle is cousin.: [1, 0.0] 74 | granddaughter's uncle is son.: [2, 2.0] 75 | grandfather's daughter is aunt.: [6, 6.0] 76 | grandfather's son is uncle.: [8, 7.0] 77 | grandmother's daughter is aunt.: [7, 7.0] 78 | grandmother's daughter is mother.: [3, 0.0] 79 | grandmother's son is uncle.: [8, 8.0] 80 | grandson's aunt is cousin.: [1, 0.0] 81 | grandson's aunt is niece.: [2, 0.0] 82 | grandson's brother is brother.: [1, 0.0] 83 | grandson's brother is grandson.: [11, 11.0] 84 | grandson's brother is nephew.: [7, 0.0] 85 | grandson's brother is son.: [1, 0.0] 86 | grandson's father is son.: [6, 6.0] 87 | grandson's mother is daughter.: [6, 6.0] 88 | grandson's sister is aunt.: [1, 0.0] 89 | grandson's sister is daughter.: [1, 0.0] 90 | grandson's sister is grand-daughter.: [1, 1.0] 91 | grandson's sister is granddaughter.: [16, 16.0] 92 | grandson's sister is niece.: [12, 0.0] 93 | grandson's sister is sister.: [4, 0.0] 94 | grandson's uncle is cousin.: [2, 0.0] 95 | grandson's uncle is son.: [3, 3.0] 96 | grandson's uncle is uncle.: [2, 0.0] 97 | great-granddaughter's brother is great-grandson.: [3, 0.0] 98 | great-granddaughter's sister is great-granddaughter.: [4, 0.0] 99 | great-grandson's brother is great-grandson.: [2, 0.0] 100 | himself's son is son.: [1, 1.0] 101 | husband's brother is brother-in-law.: [10, 6.0] 102 | husband's daughter is daughter.: [21, 19.0] 103 | husband's daughter is step-daughter.: [22, 3.0] 104 | husband's daughter is stepdaughter.: [14, 1.0] 105 | husband's father is father-in-law.: [12, 12.0] 106 | husband's granddaughter is granddaughter.: [18, 18.0] 107 | husband's granddaughter is great-granddaughter.: [6, 0.0] 108 | husband's grandson is grandchild.: [2, 0.0] 109 | husband's grandson is grandson.: [18, 18.0] 110 | husband's mother is mother-in-law.: [23, 18.0] 111 | husband's sister is sister-in-law.: [5, 2.0] 112 | husband's son is son.: [31, 26.0] 113 | husband's son is step-son.: [2, 0.0] 114 | husband's son is stepson.: [6, 0.0] 115 | mother's brother is uncle.: [32, 29.0] 116 | mother's daughter is daughter.: [11, 2.0] 117 | mother's daughter is granddaughter.: [1, 0.0] 118 | mother's daughter is sister.: [49, 48.0] 119 | mother's father is grandfather.: [29, 29.0] 120 | mother's husband is father.: [8, 5.0] 121 | mother's mother is grandmother.: [29, 26.0] 122 | mother's sister is aunt.: [31, 30.0] 123 | mother's son is brother.: [25, 23.0] 124 | mother's son is son.: [25, 3.0] 125 | nephew's aunt is mother.: [5, 0.0] 126 | nephew's aunt is sister.: [1, 1.0] 127 | nephew's brother is nephew.: [8, 4.0] 128 | nephew's grandfather is father.: [3, 3.0] 129 | nephew's grandfather is grandfather.: [1, 0.0] 130 | nephew's grandmother is grandmother.: [1, 0.0] 131 | nephew's grandmother is mother.: [6, 6.0] 132 | nephew's sister is niece.: [10, 10.0] 133 | nephew's uncle is father.: [1, 0.0] 134 | niece's aunt is cousin.: [2, 0.0] 135 | niece's aunt is sister.: [1, 1.0] 136 | niece's brother is nephew.: [8, 8.0] 137 | niece's grandfather is father.: [1, 1.0] 138 | niece's grandfather is grandfather.: [2, 0.0] 139 | niece's grandmother is grandmother.: [1, 0.0] 140 | niece's grandmother is mother.: [1, 1.0] 141 | niece's sister is niece.: [9, 6.0] 142 | niece's uncle is brother.: [5, 5.0] 143 | niece's uncle is uncle.: [2, 0.0] 144 | self's brother is brother.: [2, 2.0] 145 | sister's aunt is aunt.: [6, 3.0] 146 | sister's brother is brother.: [78, 64.0] 147 | sister's daughter is niece.: [58, 56.0] 148 | sister's father is father.: [47, 47.0] 149 | sister's father is grandfather.: [2, 0.0] 150 | sister's grandfather is grandfather.: [29, 25.0] 151 | sister's grandfather is great-grandfather.: [1, 0.0] 152 | sister's grandmother is grandmother.: [31, 26.0] 153 | sister's grandmother is great-grandmother.: [1, 0.0] 154 | sister's husband is brother-in-law.: [9, 7.0] 155 | sister's mother is grandmother.: [1, 0.0] 156 | sister's mother is mother.: [43, 43.0] 157 | sister's sister is sister.: [55, 55.0] 158 | sister's son is nephew.: [54, 44.0] 159 | sister's uncle is uncle.: [10, 6.0] 160 | sister-in-law's daughter is niece.: [2, 2.0] 161 | sister-in-law's father is father-in-law.: [3, 3.0] 162 | sister-in-law's mother is mother-in-law.: [7, 7.0] 163 | sister-in-law's son is nephew.: [6, 6.0] 164 | son's aunt is aunt.: [2, 0.0] 165 | son's aunt is cousin.: [13, 0.0] 166 | son's aunt is sister.: [31, 30.0] 167 | son's brother is brother.: [2, 1.0] 168 | son's brother is nephew.: [1, 0.0] 169 | son's brother is son.: [44, 40.0] 170 | son's daughter is granddaughter.: [43, 34.0] 171 | son's father is husband.: [15, 5.0] 172 | son's grandfather is father.: [23, 21.0] 173 | son's grandfather is grandfather.: [3, 0.0] 174 | son's grandmother is grandmother.: [8, 2.0] 175 | son's grandmother is mother.: [21, 17.0] 176 | son's mother is grandmother.: [1, 0.0] 177 | son's mother is mother.: [7, 0.0] 178 | son's mother is wife.: [10, 4.0] 179 | son's sister is daughter.: [48, 39.0] 180 | son's sister is sister.: [3, 0.0] 181 | son's son is grandson.: [45, 36.0] 182 | son's uncle is brother.: [23, 20.0] 183 | son's uncle is cousin.: [20, 0.0] 184 | son's uncle is uncle.: [6, 1.0] 185 | son's wife is daughter-in-law.: [31, 31.0] 186 | son-in-law's son is grandson.: [2, 2.0] 187 | step-daughter's aunt is step-sister.: [1, 0.0] 188 | step-daughter's daughter is step-granddaughter.: [3, 0.0] 189 | step-daughter's grandmother is mother.: [2, 2.0] 190 | step-daughter's sister is step-daughter.: [1, 0.0] 191 | step-daughter's uncle is brother.: [1, 1.0] 192 | step-daughter's uncle is step-brother.: [3, 0.0] 193 | stepdaughter's brother is stepson.: [1, 0.0] 194 | stepdaughter's grandfather is father.: [1, 1.0] 195 | stepdaughter's grandmother is mother.: [2, 0.0] 196 | stepdaughter's sister is stepdaughter.: [2, 0.0] 197 | stepson's aunt is step-aunt.: [1, 0.0] 198 | stepson's aunt is step-sister.: [1, 0.0] 199 | stepson's aunt is stepsister.: [1, 0.0] 200 | uncle's brother is uncle.: [5, 3.0] 201 | uncle's daughter is cousin.: [1, 0.0] 202 | uncle's father is grandfather.: [7, 3.0] 203 | uncle's mother is grandmother.: [8, 5.0] 204 | uncle's sister is aunt.: [7, 5.0] 205 | wife's brother is brother-in-law.: [7, 6.0] 206 | wife's daughter is daughter.: [37, 27.0] 207 | wife's daughter is step-daughter.: [6, 0.0] 208 | wife's daughter is stepdaughter.: [9, 1.0] 209 | wife's father is father-in-law.: [18, 13.0] 210 | wife's granddaughter is granddaughter.: [9, 7.0] 211 | wife's granddaughter is great-granddaughter.: [8, 0.0] 212 | wife's grandson is grandson.: [18, 17.0] 213 | wife's mother is mother-in-law.: [18, 14.0] 214 | wife's sister is sister-in-law.: [13, 8.0] 215 | wife's son is son.: [36, 32.0] 216 | wife's son is step-son.: [1, 0.0] 217 | wife's son is stepson.: [11, 0.0] 218 | -------------------------------------------------------------------------------- /artifacts/checkpoint/arithmetic/base-11/gpt-3.5_5-shot_cot_htt_2000.yaml: -------------------------------------------------------------------------------- 1 | 0 + 0 = 0.: [13, 1.0] 2 | 0 + 1 = 1.: [8, 0.0] 3 | 0 + 2 = 2.: [12, 0.0] 4 | 0 + 3 = 3.: [22, 1.0] 5 | 0 + 4 = 4.: [7, 2.0] 6 | 0 + 5 = 5.: [8, 0.0] 7 | 0 + 6 = 6.: [4, 0.0] 8 | 0 + 7 = 7.: [7, 1.0] 9 | 0 + 8 = 8.: [7, 1.0] 10 | 0 + 9 = 9.: [15, 1.0] 11 | 0 + A = A.: [29, 4.0] 12 | 1 + 0 = 1.: [14, 0.0] 13 | 1 + 1 + 1 = 3.: [9, 0.0] 14 | 1 + 1 = 2.: [19, 1.0] 15 | 1 + 10 + 1 = 12.: [1, 0.0] 16 | 1 + 10 = 11.: [1, 0.0] 17 | 1 + 2 + 1 = 4.: [8, 0.0] 18 | 1 + 2 = 3.: [15, 4.0] 19 | 1 + 3 + 1 = 5.: [7, 1.0] 20 | 1 + 3 = 4.: [20, 0.0] 21 | 1 + 4 + 1 = 6.: [7, 0.0] 22 | 1 + 4 = 5.: [17, 3.0] 23 | 1 + 5 + 1 = 7.: [9, 0.0] 24 | 1 + 5 = 6.: [20, 2.0] 25 | 1 + 6 + 1 = 8.: [4, 1.0] 26 | 1 + 6 = 7.: [30, 3.0] 27 | 1 + 7 + 1 = 9.: [13, 0.0] 28 | 1 + 7 = 8.: [23, 9.0] 29 | 1 + 8 + 1 = 10.: [4, 0.0] 30 | 1 + 8 + 1 = A.: [2, 0.0] 31 | 1 + 8 = 9.: [12, 1.0] 32 | 1 + 9 + 1 = 11.: [6, 1.0] 33 | 1 + 9 + 1 = B.: [1, 0.0] 34 | 1 + 9 = 10.: [16, 0.0] 35 | 1 + 9 = A.: [11, 4.0] 36 | 1 + A + 1 = 12.: [1, 0.0] 37 | 1 + A + 1 = B.: [1, 0.0] 38 | 1 + A + 1 = C.: [6, 0.0] 39 | 1 + A + 6 = 17.: [1, 0.0] 40 | 1 + A = 11.: [13, 7.0] 41 | 1 + A = B.: [26, 0.0] 42 | 10 + 1 = 11.: [1, 0.0] 43 | 10 + 2 + 1 = 13.: [1, 0.0] 44 | 10 + 2 = 12.: [1, 0.0] 45 | 10 + 3 = 13.: [1, 0.0] 46 | 10 + 8 + 1 = 19.: [1, 0.0] 47 | 10 + 8 = 18.: [1, 0.0] 48 | 10 + 9 + 1 = 20.: [1, 0.0] 49 | 2 + 0 = 2.: [18, 2.0] 50 | 2 + 1 + 1 = 4.: [2, 0.0] 51 | 2 + 1 = 3.: [16, 1.0] 52 | 2 + 10 + 1 = 13.: [1, 0.0] 53 | 2 + 10 = 12.: [1, 0.0] 54 | 2 + 2 + 1 = 5.: [12, 3.0] 55 | 2 + 2 = 4.: [16, 3.0] 56 | 2 + 3 + 1 = 6.: [5, 1.0] 57 | 2 + 3 = 5.: [22, 2.0] 58 | 2 + 4 + 1 = 7.: [7, 0.0] 59 | 2 + 4 = 6.: [9, 0.0] 60 | 2 + 5 + 1 = 8.: [11, 1.0] 61 | 2 + 5 = 7.: [12, 3.0] 62 | 2 + 6 + 1 = 9.: [13, 0.0] 63 | 2 + 6 = 8.: [16, 2.0] 64 | 2 + 7 + 1 = 10.: [11, 0.0] 65 | 2 + 7 + 1 = A.: [1, 0.0] 66 | 2 + 7 = 9.: [16, 4.0] 67 | 2 + 8 + 1 = 11.: [6, 0.0] 68 | 2 + 8 = 10.: [18, 0.0] 69 | 2 + 8 = A.: [28, 17.0] 70 | 2 + 9 + 1 = 12.: [14, 0.0] 71 | 2 + 9 + 1 = C.: [1, 0.0] 72 | 2 + 9 = 11.: [28, 5.0] 73 | 2 + 9 = B.: [5, 0.0] 74 | 2 + A + 1 = C.: [4, 0.0] 75 | 2 + A = 10.: [1, 0.0] 76 | 2 + A = 11.: [2, 1.0] 77 | 2 + A = 12.: [7, 0.0] 78 | 2 + A = 13.: [1, 0.0] 79 | 2 + A = C.: [25, 0.0] 80 | 3 + 0 = 3.: [8, 2.0] 81 | 3 + 1 + 1 = 5.: [14, 0.0] 82 | 3 + 1 = 4.: [22, 4.0] 83 | 3 + 10 + 1 = 14.: [1, 1.0] 84 | 3 + 10 = 13.: [1, 0.0] 85 | 3 + 2 + 1 = 6.: [8, 1.0] 86 | 3 + 2 = 5.: [20, 2.0] 87 | 3 + 3 + 1 = 7.: [6, 0.0] 88 | 3 + 3 = 6.: [8, 0.0] 89 | 3 + 4 + 1 = 8.: [24, 1.0] 90 | 3 + 4 = 7.: [21, 1.0] 91 | 3 + 5 + 1 = 9.: [8, 0.0] 92 | 3 + 5 = 8.: [23, 0.0] 93 | 3 + 6 + 1 = 10.: [4, 0.0] 94 | 3 + 6 + 1 = A.: [3, 0.0] 95 | 3 + 6 = 9.: [23, 3.0] 96 | 3 + 7 + 1 = 11.: [7, 0.0] 97 | 3 + 7 = 10.: [43, 0.0] 98 | 3 + 7 = A.: [11, 3.0] 99 | 3 + 8 + 1 = 12.: [4, 0.0] 100 | 3 + 8 + 1 = C.: [4, 0.0] 101 | 3 + 8 = 11.: [16, 7.0] 102 | 3 + 8 = B.: [20, 0.0] 103 | 3 + 9 + 1 = 13.: [11, 0.0] 104 | 3 + 9 = 12.: [16, 0.0] 105 | 3 + 9 = C.: [7, 0.0] 106 | 3 + A + 1 = 12.: [1, 0.0] 107 | 3 + A + 1 = 14.: [1, 0.0] 108 | 3 + A + 1 = 15.: [1, 0.0] 109 | 3 + A + 1 = E.: [4, 0.0] 110 | 3 + A = 13.: [12, 0.0] 111 | 3 + A = 14.: [4, 0.0] 112 | 3 + A = D.: [22, 0.0] 113 | 4 + 0 = 4.: [6, 0.0] 114 | 4 + 1 + 1 = 6.: [2, 0.0] 115 | 4 + 1 = 5.: [26, 3.0] 116 | 4 + 10 + 1 = 15.: [1, 0.0] 117 | 4 + 2 + 1 = 7.: [5, 0.0] 118 | 4 + 2 = 6.: [13, 2.0] 119 | 4 + 3 + 1 = 8.: [5, 0.0] 120 | 4 + 3 = 7.: [17, 6.0] 121 | 4 + 4 + 1 = 9.: [7, 0.0] 122 | 4 + 4 = 8.: [13, 3.0] 123 | 4 + 5 + 1 = 10.: [4, 0.0] 124 | 4 + 5 + 1 = A.: [2, 0.0] 125 | 4 + 5 = 9.: [7, 1.0] 126 | 4 + 6 + 1 = 11.: [4, 0.0] 127 | 4 + 6 + 1 = B.: [1, 0.0] 128 | 4 + 6 + 3 = 13.: [1, 0.0] 129 | 4 + 6 = 10.: [7, 0.0] 130 | 4 + 6 = A.: [38, 12.0] 131 | 4 + 7 + 1 = 12.: [4, 0.0] 132 | 4 + 7 = 11.: [12, 6.0] 133 | 4 + 7 = B.: [17, 0.0] 134 | 4 + 8 + 1 = 13.: [9, 0.0] 135 | 4 + 8 = 12.: [17, 0.0] 136 | 4 + 8 = C.: [23, 0.0] 137 | 4 + 9 + 1 = 14.: [7, 0.0] 138 | 4 + 9 = 13.: [34, 0.0] 139 | 4 + 9 = D.: [4, 0.0] 140 | 4 + A + 1 = 15.: [2, 0.0] 141 | 4 + A + 1 = F.: [1, 0.0] 142 | 4 + A = 14.: [2, 0.0] 143 | 4 + A = 15.: [1, 0.0] 144 | 4 + A = E.: [17, 0.0] 145 | 5 + 0 = 5.: [13, 0.0] 146 | 5 + 1 + 1 = 7.: [6, 0.0] 147 | 5 + 1 = 6.: [19, 5.0] 148 | 5 + 10 + 1 = 16.: [1, 0.0] 149 | 5 + 10 = 15.: [1, 1.0] 150 | 5 + 2 + 1 = 8.: [5, 2.0] 151 | 5 + 2 = 7.: [16, 0.0] 152 | 5 + 3 + 1 = 9.: [13, 0.0] 153 | 5 + 3 = 8.: [19, 5.0] 154 | 5 + 4 + 1 = 10.: [2, 0.0] 155 | 5 + 4 = 9.: [32, 5.0] 156 | 5 + 5 + 1 = 11.: [8, 1.0] 157 | 5 + 5 = 10.: [9, 0.0] 158 | 5 + 5 = A.: [28, 14.0] 159 | 5 + 6 + 1 = 12.: [14, 0.0] 160 | 5 + 6 + 1 = C.: [1, 0.0] 161 | 5 + 6 = 11.: [5, 2.0] 162 | 5 + 6 = B.: [22, 0.0] 163 | 5 + 7 + 1 = 13.: [9, 0.0] 164 | 5 + 7 + 1 = D.: [2, 0.0] 165 | 5 + 7 = 12.: [8, 0.0] 166 | 5 + 7 = C.: [21, 0.0] 167 | 5 + 8 + 1 = 14.: [11, 0.0] 168 | 5 + 8 + 1 = E.: [3, 0.0] 169 | 5 + 8 = 13.: [1, 0.0] 170 | 5 + 8 = D.: [15, 0.0] 171 | 5 + 9 + 1 = 15.: [17, 0.0] 172 | 5 + 9 + 1 = F.: [2, 0.0] 173 | 5 + 9 = 14.: [30, 0.0] 174 | 5 + 9 = E.: [1, 0.0] 175 | 5 + A + 1 = 10.: [1, 0.0] 176 | 5 + A + 1 = 16.: [2, 0.0] 177 | 5 + A + 1 = F.: [1, 0.0] 178 | 5 + A = 14.: [2, 1.0] 179 | 5 + A = 15.: [19, 0.0] 180 | 5 + A = F.: [27, 0.0] 181 | 6 + 0 = 6.: [14, 0.0] 182 | 6 + 1 + 1 = 8.: [3, 0.0] 183 | 6 + 1 + A = 11.: [1, 0.0] 184 | 6 + 1 = 7.: [19, 1.0] 185 | 6 + 10 = 16.: [3, 0.0] 186 | 6 + 2 + 1 = 9.: [10, 0.0] 187 | 6 + 2 = 8.: [14, 7.0] 188 | 6 + 3 + 1 = 10.: [7, 0.0] 189 | 6 + 3 + 1 = A.: [6, 0.0] 190 | 6 + 3 = 9.: [13, 0.0] 191 | 6 + 4 + 1 = 11.: [7, 0.0] 192 | 6 + 4 + 1 = B.: [2, 0.0] 193 | 6 + 4 + 5 = 15.: [1, 0.0] 194 | 6 + 4 = 10.: [1, 0.0] 195 | 6 + 4 = A.: [51, 15.0] 196 | 6 + 5 + 1 = 12.: [5, 1.0] 197 | 6 + 5 + 9 = 20.: [1, 0.0] 198 | 6 + 5 = 11.: [13, 5.0] 199 | 6 + 5 = B.: [24, 0.0] 200 | 6 + 6 + 1 = 13.: [10, 0.0] 201 | 6 + 6 + 1 = D.: [1, 0.0] 202 | 6 + 6 = 10.: [1, 0.0] 203 | 6 + 6 = 12.: [7, 0.0] 204 | 6 + 6 = C.: [24, 0.0] 205 | 6 + 7 + 1 = 14.: [6, 0.0] 206 | 6 + 7 = 13.: [22, 0.0] 207 | 6 + 7 = D.: [23, 0.0] 208 | 6 + 8 + 1 = 15.: [5, 0.0] 209 | 6 + 8 + 1 = F.: [1, 0.0] 210 | 6 + 8 = 14.: [24, 0.0] 211 | 6 + 8 = E.: [5, 0.0] 212 | 6 + 9 + 1 = 16.: [9, 0.0] 213 | 6 + 9 = 15.: [28, 0.0] 214 | 6 + 9 = F.: [17, 0.0] 215 | 6 + A + 1 = 11.: [1, 0.0] 216 | 6 + A + 1 = 17.: [3, 0.0] 217 | 6 + A = 10.: [23, 0.0] 218 | 6 + A = 16.: [9, 0.0] 219 | 6 + A = 17.: [1, 0.0] 220 | 6 + A = 20.: [1, 0.0] 221 | 7 + 0 = 7.: [18, 2.0] 222 | 7 + 1 + 1 = 9.: [10, 0.0] 223 | 7 + 1 = 8.: [27, 5.0] 224 | 7 + 2 + 1 = 10.: [6, 0.0] 225 | 7 + 2 + 1 = A.: [3, 0.0] 226 | 7 + 2 = 9.: [14, 3.0] 227 | 7 + 3 + 1 = 11.: [3, 0.0] 228 | 7 + 3 = 10.: [8, 0.0] 229 | 7 + 3 = A.: [15, 5.0] 230 | 7 + 4 + 1 = 12.: [9, 2.0] 231 | 7 + 4 + 1 = C.: [2, 0.0] 232 | 7 + 4 = 11.: [15, 9.0] 233 | 7 + 4 = A.: [1, 0.0] 234 | 7 + 4 = B.: [33, 0.0] 235 | 7 + 5 + 1 = 13.: [10, 0.0] 236 | 7 + 5 = 12.: [10, 0.0] 237 | 7 + 5 = C.: [39, 0.0] 238 | 7 + 6 + 1 = 14.: [4, 0.0] 239 | 7 + 6 = 13.: [20, 0.0] 240 | 7 + 6 = D.: [17, 0.0] 241 | 7 + 7 + 1 = 15.: [9, 0.0] 242 | 7 + 7 + 1 = F.: [2, 0.0] 243 | 7 + 7 = 14.: [39, 0.0] 244 | 7 + 7 = E.: [1, 0.0] 245 | 7 + 8 + 1 = 16.: [3, 0.0] 246 | 7 + 8 = 15.: [29, 0.0] 247 | 7 + 9 + 1 = 17.: [19, 0.0] 248 | 7 + 9 = 10.: [1, 0.0] 249 | 7 + 9 = 16.: [42, 0.0] 250 | 7 + A + 1 = 12.: [1, 0.0] 251 | 7 + A + 1 = 18.: [1, 0.0] 252 | 7 + A = 11.: [15, 0.0] 253 | 7 + A = 17.: [20, 0.0] 254 | 8 + 0 = 8.: [14, 2.0] 255 | 8 + 1 + 1 = A.: [2, 0.0] 256 | 8 + 1 = 9.: [27, 5.0] 257 | 8 + 2 + 1 = 11.: [6, 0.0] 258 | 8 + 2 + 1 = B.: [2, 0.0] 259 | 8 + 2 = 10.: [2, 0.0] 260 | 8 + 2 = A.: [46, 16.0] 261 | 8 + 3 + 1 = 12.: [14, 0.0] 262 | 8 + 3 + 1 = C.: [3, 0.0] 263 | 8 + 3 = 11.: [9, 4.0] 264 | 8 + 3 = B.: [31, 0.0] 265 | 8 + 4 + 1 = 13.: [7, 0.0] 266 | 8 + 4 = 12.: [8, 0.0] 267 | 8 + 4 = C.: [19, 0.0] 268 | 8 + 5 + 1 = 14.: [3, 0.0] 269 | 8 + 5 + 1 = E.: [2, 0.0] 270 | 8 + 5 = 13.: [16, 0.0] 271 | 8 + 5 = D.: [24, 0.0] 272 | 8 + 6 + 1 = 15.: [10, 0.0] 273 | 8 + 6 + 1 = F.: [1, 0.0] 274 | 8 + 6 + 5 = 19.: [2, 0.0] 275 | 8 + 6 = 14.: [23, 0.0] 276 | 8 + 6 = E.: [10, 0.0] 277 | 8 + 7 + 1 = 16.: [7, 0.0] 278 | 8 + 7 = 15.: [28, 0.0] 279 | 8 + 7 = F.: [10, 0.0] 280 | 8 + 8 + 1 = 17.: [15, 0.0] 281 | 8 + 8 + 5 = 21.: [1, 0.0] 282 | 8 + 8 = 10.: [4, 0.0] 283 | 8 + 8 = 16.: [23, 0.0] 284 | 8 + 9 + 1 = 18.: [7, 0.0] 285 | 8 + 9 = 11.: [2, 0.0] 286 | 8 + 9 = 17.: [28, 0.0] 287 | 8 + A + 1 = 19.: [2, 0.0] 288 | 8 + A + 1 = 24.: [1, 0.0] 289 | 8 + A = 11.: [1, 0.0] 290 | 8 + A = 12.: [18, 0.0] 291 | 8 + A = 13.: [4, 0.0] 292 | 8 + A = 17.: [2, 2.0] 293 | 8 + A = 18.: [15, 0.0] 294 | 8 + A = 19.: [3, 0.0] 295 | 9 + 0 = 9.: [5, 0.0] 296 | 9 + 1 + 1 = 11.: [7, 0.0] 297 | 9 + 1 = 10.: [12, 0.0] 298 | 9 + 1 = A.: [13, 4.0] 299 | 9 + 10 = 19.: [1, 0.0] 300 | 9 + 2 + 1 = 12.: [8, 0.0] 301 | 9 + 2 = 11.: [32, 9.0] 302 | 9 + 2 = B.: [15, 0.0] 303 | 9 + 3 + 1 = 13.: [4, 0.0] 304 | 9 + 3 = 12.: [22, 0.0] 305 | 9 + 3 = C.: [16, 0.0] 306 | 9 + 4 + 1 = 14.: [5, 0.0] 307 | 9 + 4 = 13.: [21, 0.0] 308 | 9 + 4 = D.: [9, 0.0] 309 | 9 + 5 + 1 = 15.: [7, 0.0] 310 | 9 + 5 = 13.: [3, 3.0] 311 | 9 + 5 = 14.: [33, 0.0] 312 | 9 + 5 = E.: [3, 0.0] 313 | 9 + 6 + 1 = 16.: [5, 0.0] 314 | 9 + 6 = 15.: [38, 0.0] 315 | 9 + 6 = F.: [2, 0.0] 316 | 9 + 7 + 1 = 17.: [6, 2.0] 317 | 9 + 7 = 10.: [1, 0.0] 318 | 9 + 7 = 16.: [19, 0.0] 319 | 9 + 8 + 1 = 18.: [2, 0.0] 320 | 9 + 8 + 5 = 22.: [2, 0.0] 321 | 9 + 8 = 17.: [35, 0.0] 322 | 9 + 9 + 1 = 19.: [27, 0.0] 323 | 9 + 9 = 18.: [18, 0.0] 324 | 9 + A + 1 = 15.: [1, 0.0] 325 | 9 + A = 13.: [6, 0.0] 326 | 9 + A = 14.: [1, 0.0] 327 | 9 + A = 15.: [1, 0.0] 328 | 9 + A = 19.: [25, 0.0] 329 | 9 + A = 1A.: [1, 0.0] 330 | 9 + A = 20.: [2, 0.0] 331 | A + 0 = A.: [24, 9.0] 332 | A + 1 + 1 = B.: [2, 0.0] 333 | A + 1 = B.: [24, 0.0] 334 | A + 2 = 12.: [1, 0.0] 335 | A + 2 = C.: [31, 0.0] 336 | A + 3 + 1 = 10.: [1, 0.0] 337 | A + 3 + 1 = 14.: [1, 0.0] 338 | A + 3 + 1 = 15.: [3, 0.0] 339 | A + 3 + 1 = E.: [6, 0.0] 340 | A + 3 + 1 = F.: [1, 0.0] 341 | A + 3 = D.: [27, 0.0] 342 | A + 4 + 1 = 14.: [2, 0.0] 343 | A + 4 + 1 = 15.: [2, 0.0] 344 | A + 4 = E.: [24, 0.0] 345 | A + 5 + 1 = 11.: [3, 0.0] 346 | A + 5 + 1 = 17.: [2, 0.0] 347 | A + 5 = 10.: [1, 0.0] 348 | A + 5 = 14.: [1, 1.0] 349 | A + 5 = 15.: [6, 0.0] 350 | A + 5 = F.: [22, 1.0] 351 | A + 6 + 1 = 11.: [2, 0.0] 352 | A + 6 + 1 = 12.: [1, 0.0] 353 | A + 6 + 1 = 17.: [1, 0.0] 354 | A + 6 = 10.: [34, 0.0] 355 | A + 6 = 11.: [2, 0.0] 356 | A + 6 = 16.: [8, 0.0] 357 | A + 6 = 18.: [1, 0.0] 358 | A + 7 + 1 = 12.: [1, 0.0] 359 | A + 7 + 1 = 16.: [1, 0.0] 360 | A + 7 + 1 = 18.: [3, 0.0] 361 | A + 7 + 1 = 19.: [1, 0.0] 362 | A + 7 = 10.: [1, 0.0] 363 | A + 7 = 11.: [17, 0.0] 364 | A + 7 = 12.: [3, 0.0] 365 | A + 7 = 16.: [2, 2.0] 366 | A + 7 = 17.: [10, 0.0] 367 | A + 7 = 18.: [2, 0.0] 368 | A + 8 + 1 = 14.: [2, 0.0] 369 | A + 8 + 1 = 19.: [2, 0.0] 370 | A + 8 = 10.: [1, 0.0] 371 | A + 8 = 12.: [19, 0.0] 372 | A + 8 = 13.: [9, 0.0] 373 | A + 8 = 17.: [3, 2.0] 374 | A + 8 = 18.: [6, 0.0] 375 | A + 8 = 19.: [3, 0.0] 376 | A + 8 = 1A.: [1, 0.0] 377 | A + 9 + 1 = 14.: [1, 0.0] 378 | A + 9 + 1 = 15.: [1, 0.0] 379 | A + 9 + 1 = 1A.: [5, 0.0] 380 | A + 9 + 1 = 20.: [1, 0.0] 381 | A + 9 = 13.: [19, 0.0] 382 | A + 9 = 14.: [3, 0.0] 383 | A + 9 = 15.: [2, 0.0] 384 | A + 9 = 19.: [19, 0.0] 385 | A + A + 1 = 15.: [1, 0.0] 386 | A + A + 1 = 16.: [1, 0.0] 387 | A + A + 1 = 17.: [1, 0.0] 388 | A + A + 1 = 20.: [1, 0.0] 389 | A + A + 1 = 21.: [3, 0.0] 390 | A + A = 10.: [2, 0.0] 391 | A + A = 12.: [1, 0.0] 392 | A + A = 14.: [35, 0.0] 393 | -------------------------------------------------------------------------------- /artifacts/checkpoint/arithmetic/base-11/gpt-4_5-shot_cot_htt_2000.yaml: -------------------------------------------------------------------------------- 1 | 0 + 0 = 0.: [13, 4.0] 2 | 0 + 1 = 1.: [8, 7.0] 3 | 0 + 10 = 10.: [2, 0.0] 4 | 0 + 2 = 2.: [12, 8.0] 5 | 0 + 3 = 3.: [22, 15.0] 6 | 0 + 4 = 4.: [6, 4.0] 7 | 0 + 5 = 5.: [9, 6.0] 8 | 0 + 6 = 6.: [4, 3.0] 9 | 0 + 7 = 7.: [7, 5.0] 10 | 0 + 8 = 8.: [7, 6.0] 11 | 0 + 9 = 9.: [15, 7.0] 12 | 0 + A = A.: [25, 15.0] 13 | 1 + 0 = 1.: [14, 14.0] 14 | 1 + 1 + 1 = 3.: [9, 7.0] 15 | 1 + 1 = 2.: [10, 8.0] 16 | 1 + 2 + 1 = 4.: [10, 7.0] 17 | 1 + 2 = 3.: [13, 11.0] 18 | 1 + 3 + 1 = 5.: [15, 11.0] 19 | 1 + 3 = 4.: [13, 7.0] 20 | 1 + 4 + 1 = 6.: [6, 5.0] 21 | 1 + 4 + A = 14.: [1, 0.0] 22 | 1 + 4 = 5.: [15, 11.0] 23 | 1 + 5 + 1 = 7.: [9, 3.0] 24 | 1 + 5 = 6.: [18, 12.0] 25 | 1 + 6 + 1 = 8.: [9, 6.0] 26 | 1 + 6 = 7.: [24, 18.0] 27 | 1 + 7 + 1 = 8.: [5, 0.0] 28 | 1 + 7 + 1 = 9.: [14, 10.0] 29 | 1 + 7 = 8.: [17, 12.0] 30 | 1 + 8 + 1 = 10.: [5, 0.0] 31 | 1 + 8 + 1 = 9.: [2, 0.0] 32 | 1 + 8 + 1 = A.: [1, 1.0] 33 | 1 + 8 = 9.: [11, 8.0] 34 | 1 + 9 + 1 = 10.: [7, 3.0] 35 | 1 + 9 + 1 = 11.: [5, 1.0] 36 | 1 + 9 = 10.: [13, 0.0] 37 | 1 + 9 = A.: [9, 8.0] 38 | 1 + A + 1 = 11.: [10, 4.0] 39 | 1 + A + 1 = 12.: [4, 0.0] 40 | 1 + A + 1 = 21.: [1, 0.0] 41 | 1 + A = 10.: [30, 26.0] 42 | 1 + A = 11.: [1, 0.0] 43 | 1 + A = B.: [1, 0.0] 44 | 10 + 2 = 11.: [1, 0.0] 45 | 10 + 4 = 14.: [1, 1.0] 46 | 10 + 6 = 16.: [2, 0.0] 47 | 10 + 9 + 1 = 19.: [1, 0.0] 48 | 10 + 9 = 19.: [1, 1.0] 49 | 2 + 0 = 2.: [17, 12.0] 50 | 2 + 1 + 1 = 4.: [2, 2.0] 51 | 2 + 1 = 3.: [15, 5.0] 52 | 2 + 10 = 11.: [1, 1.0] 53 | 2 + 10 = 12.: [1, 0.0] 54 | 2 + 2 + 1 = 5.: [13, 10.0] 55 | 2 + 2 = 4.: [15, 11.0] 56 | 2 + 3 + 1 = 6.: [7, 6.0] 57 | 2 + 3 = 5.: [20, 18.0] 58 | 2 + 4 + 1 = 7.: [8, 8.0] 59 | 2 + 4 = 6.: [8, 7.0] 60 | 2 + 5 + 1 = 8.: [15, 14.0] 61 | 2 + 5 = 7.: [9, 7.0] 62 | 2 + 6 + 1 = 10.: [1, 0.0] 63 | 2 + 6 + 1 = 8.: [1, 0.0] 64 | 2 + 6 + 1 = 9.: [12, 8.0] 65 | 2 + 6 = 8.: [15, 14.0] 66 | 2 + 7 + 1 = 10.: [6, 0.0] 67 | 2 + 7 + 1 = 9.: [2, 0.0] 68 | 2 + 7 + 1 = A.: [2, 2.0] 69 | 2 + 7 = 9.: [17, 12.0] 70 | 2 + 8 + 1 = 10.: [8, 5.0] 71 | 2 + 8 + 1 = 11.: [3, 1.0] 72 | 2 + 8 = 10.: [33, 0.0] 73 | 2 + 8 = A.: [8, 5.0] 74 | 2 + 9 + 1 = 11.: [11, 2.0] 75 | 2 + 9 + 1 = 12.: [1, 0.0] 76 | 2 + 9 = 10.: [30, 22.0] 77 | 2 + 9 = 11.: [6, 5.0] 78 | 2 + A + 1 = 12.: [5, 3.0] 79 | 2 + A + 1 = 13.: [1, 0.0] 80 | 2 + A + 1 = 14.: [1, 0.0] 81 | 2 + A + 1 = 16.: [1, 0.0] 82 | 2 + A = 10.: [1, 0.0] 83 | 2 + A = 11.: [26, 10.0] 84 | 2 + A = 12.: [2, 0.0] 85 | 3 + 0 = 3.: [8, 5.0] 86 | 3 + 1 + 1 = 5.: [25, 21.0] 87 | 3 + 1 = 4.: [9, 6.0] 88 | 3 + 2 + 1 = 6.: [14, 10.0] 89 | 3 + 2 = 5.: [15, 14.0] 90 | 3 + 3 + 1 = 7.: [8, 8.0] 91 | 3 + 3 = 6.: [6, 3.0] 92 | 3 + 4 + 1 = 8.: [32, 26.0] 93 | 3 + 4 = 7.: [13, 11.0] 94 | 3 + 5 + 1 = 9.: [16, 8.0] 95 | 3 + 5 = 8.: [15, 10.0] 96 | 3 + 6 + 1 = 10.: [13, 0.0] 97 | 3 + 6 = 10.: [1, 0.0] 98 | 3 + 6 = 9.: [16, 12.0] 99 | 3 + 7 + 1 = 10.: [8, 7.0] 100 | 3 + 7 + 1 = 11.: [4, 3.0] 101 | 3 + 7 = 10.: [31, 1.0] 102 | 3 + 7 = 11.: [1, 0.0] 103 | 3 + 7 = 9.: [1, 0.0] 104 | 3 + 7 = A.: [13, 9.0] 105 | 3 + 8 + 1 = 11.: [11, 2.0] 106 | 3 + 8 = 10.: [31, 25.0] 107 | 3 + 8 = 11.: [3, 2.0] 108 | 3 + 9 + 1 = 12.: [6, 5.0] 109 | 3 + 9 + 1 = 13.: [2, 0.0] 110 | 3 + 9 = 11.: [22, 12.0] 111 | 3 + 9 = 12.: [2, 0.0] 112 | 3 + A + 1 = 13.: [6, 5.0] 113 | 3 + A + 1 = 14.: [2, 0.0] 114 | 3 + A = 12.: [33, 30.0] 115 | 3 + A = 13.: [1, 0.0] 116 | 4 + 0 = 4.: [7, 6.0] 117 | 4 + 1 + 1 = 6.: [6, 6.0] 118 | 4 + 1 = 5.: [22, 10.0] 119 | 4 + 2 + 1 = 7.: [9, 8.0] 120 | 4 + 2 = 6.: [9, 5.0] 121 | 4 + 3 + 1 = 8.: [8, 6.0] 122 | 4 + 3 = 7.: [13, 8.0] 123 | 4 + 4 + 1 = 9.: [11, 7.0] 124 | 4 + 4 = 8.: [8, 7.0] 125 | 4 + 5 + 1 = 10.: [5, 0.0] 126 | 4 + 5 + 1 = A.: [2, 2.0] 127 | 4 + 5 = 9.: [6, 4.0] 128 | 4 + 6 + 1 = 10.: [12, 8.0] 129 | 4 + 6 + 1 = 11.: [2, 0.0] 130 | 4 + 6 = 10.: [21, 0.0] 131 | 4 + 6 = 9.: [2, 0.0] 132 | 4 + 6 = A.: [14, 11.0] 133 | 4 + 7 + 1 = 11.: [11, 1.0] 134 | 4 + 7 + 1 = 12.: [1, 0.0] 135 | 4 + 7 = 10.: [18, 15.0] 136 | 4 + 7 = 11.: [1, 0.0] 137 | 4 + 7 = A.: [1, 0.0] 138 | 4 + 7 = B.: [1, 0.0] 139 | 4 + 8 + 1 = 12.: [7, 5.0] 140 | 4 + 8 + 1 = 13.: [2, 0.0] 141 | 4 + 8 = 11.: [36, 16.0] 142 | 4 + 8 = 12.: [3, 0.0] 143 | 4 + 9 + 1 = 13.: [8, 7.0] 144 | 4 + 9 = 12.: [34, 31.0] 145 | 4 + A + 1 = 14.: [2, 0.0] 146 | 4 + A = 12.: [1, 0.0] 147 | 4 + A = 13.: [16, 16.0] 148 | 4 + A = 14.: [1, 0.0] 149 | 5 + 0 = 5.: [11, 10.0] 150 | 5 + 1 + 1 = 7.: [10, 8.0] 151 | 5 + 1 = 6.: [18, 13.0] 152 | 5 + 2 + 1 = 8.: [9, 8.0] 153 | 5 + 2 = 7.: [12, 6.0] 154 | 5 + 3 + 1 = 9.: [13, 8.0] 155 | 5 + 3 = 8.: [18, 14.0] 156 | 5 + 4 + 1 = 10.: [13, 2.0] 157 | 5 + 4 + 1 = 9.: [1, 0.0] 158 | 5 + 4 + 1 = A.: [1, 1.0] 159 | 5 + 4 = 9.: [19, 11.0] 160 | 5 + 5 + 1 = 10.: [12, 7.0] 161 | 5 + 5 = 10.: [23, 0.0] 162 | 5 + 5 = 9.: [2, 0.0] 163 | 5 + 5 = A.: [8, 6.0] 164 | 5 + 6 + 1 = 11.: [16, 4.0] 165 | 5 + 6 + 1 = 12.: [1, 0.0] 166 | 5 + 6 = 10.: [22, 18.0] 167 | 5 + 7 + 1 = 12.: [19, 13.0] 168 | 5 + 7 = 11.: [18, 4.0] 169 | 5 + 7 = 12.: [1, 0.0] 170 | 5 + 8 + 1 = 13.: [13, 7.0] 171 | 5 + 8 + 1 = 14.: [3, 1.0] 172 | 5 + 8 = 12.: [12, 10.0] 173 | 5 + 8 = 13.: [2, 0.0] 174 | 5 + 9 + 1 = 14.: [20, 11.0] 175 | 5 + 9 + 1 = 15.: [1, 0.0] 176 | 5 + 9 = 13.: [25, 21.0] 177 | 5 + 9 = 14.: [2, 1.0] 178 | 5 + A + 1 = 15.: [9, 5.0] 179 | 5 + A = 14.: [40, 31.0] 180 | 5 + A = 15.: [1, 0.0] 181 | 6 + 0 = 6.: [14, 10.0] 182 | 6 + 1 + 1 = 8.: [11, 6.0] 183 | 6 + 1 = 7.: [11, 5.0] 184 | 6 + 2 + 1 = 8.: [1, 0.0] 185 | 6 + 2 + 1 = 9.: [17, 9.0] 186 | 6 + 2 = 8.: [5, 2.0] 187 | 6 + 3 + 1 = 10.: [14, 2.0] 188 | 6 + 3 + 1 = 9.: [1, 0.0] 189 | 6 + 3 + 1 = A.: [2, 2.0] 190 | 6 + 3 = 9.: [9, 7.0] 191 | 6 + 4 + 1 = 10.: [19, 17.0] 192 | 6 + 4 + 1 = 11.: [3, 1.0] 193 | 6 + 4 = 10.: [24, 0.0] 194 | 6 + 4 = A.: [13, 10.0] 195 | 6 + 5 + 1 = 11.: [11, 1.0] 196 | 6 + 5 = 10.: [28, 26.0] 197 | 6 + 5 = 11.: [2, 1.0] 198 | 6 + 6 + 1 = 12.: [11, 7.0] 199 | 6 + 6 + 1 = 13.: [2, 0.0] 200 | 6 + 6 = 10.: [2, 0.0] 201 | 6 + 6 = 11.: [26, 13.0] 202 | 6 + 6 = 12.: [2, 2.0] 203 | 6 + 7 + 1 = 13.: [6, 6.0] 204 | 6 + 7 + 1 = 14.: [1, 1.0] 205 | 6 + 7 = 12.: [43, 34.0] 206 | 6 + 8 + 1 = 14.: [12, 3.0] 207 | 6 + 8 = 13.: [22, 19.0] 208 | 6 + 8 = 14.: [1, 1.0] 209 | 6 + 9 + 1 = 15.: [5, 5.0] 210 | 6 + 9 + 1 = 16.: [2, 0.0] 211 | 6 + 9 = 14.: [44, 25.0] 212 | 6 + A + 1 = 16.: [6, 5.0] 213 | 6 + A + 1 = 17.: [1, 0.0] 214 | 6 + A + 1 = 20.: [1, 0.0] 215 | 6 + A = 14.: [1, 0.0] 216 | 6 + A = 15.: [28, 20.0] 217 | 7 + 0 = 7.: [18, 13.0] 218 | 7 + 1 + 1 = 9.: [22, 14.0] 219 | 7 + 1 = 8.: [13, 10.0] 220 | 7 + 2 + 1 = 10.: [7, 0.0] 221 | 7 + 2 + 1 = 9.: [4, 0.0] 222 | 7 + 2 = 10.: [1, 0.0] 223 | 7 + 2 = 9.: [11, 9.0] 224 | 7 + 3 + 1 = 10.: [3, 1.0] 225 | 7 + 3 + 1 = 11.: [5, 1.0] 226 | 7 + 3 = 10.: [16, 0.0] 227 | 7 + 3 = A.: [3, 3.0] 228 | 7 + 4 + 1 = 11.: [16, 5.0] 229 | 7 + 4 + 1 = 12.: [1, 1.0] 230 | 7 + 4 = 10.: [38, 27.0] 231 | 7 + 4 = 11.: [3, 3.0] 232 | 7 + 4 = B.: [2, 0.0] 233 | 7 + 5 + 1 = 12.: [11, 5.0] 234 | 7 + 5 + 1 = 13.: [5, 0.0] 235 | 7 + 5 + 2 = 14.: [1, 0.0] 236 | 7 + 5 = 11.: [42, 16.0] 237 | 7 + 6 + 1 = 13.: [11, 4.0] 238 | 7 + 6 + 1 = 14.: [2, 2.0] 239 | 7 + 6 = 12.: [29, 25.0] 240 | 7 + 7 + 1 = 14.: [17, 7.0] 241 | 7 + 7 + 1 = 15.: [1, 0.0] 242 | 7 + 7 = 13.: [34, 25.0] 243 | 7 + 8 + 1 = 15.: [3, 2.0] 244 | 7 + 8 + 1 = 16.: [1, 0.0] 245 | 7 + 8 = 14.: [26, 7.0] 246 | 7 + 8 = 15.: [1, 0.0] 247 | 7 + 9 + 1 = 16.: [12, 10.0] 248 | 7 + 9 + 1 = 17.: [2, 0.0] 249 | 7 + 9 = 15.: [41, 33.0] 250 | 7 + A + 1 = 17.: [5, 3.0] 251 | 7 + A + 1 = 18.: [5, 0.0] 252 | 7 + A + 1 = 19.: [1, 0.0] 253 | 7 + A + 1 = 20.: [1, 0.0] 254 | 7 + A = 10.: [3, 0.0] 255 | 7 + A = 15.: [2, 0.0] 256 | 7 + A = 16.: [20, 12.0] 257 | 8 + 0 = 8.: [14, 12.0] 258 | 8 + 1 + 1 = 10.: [7, 0.0] 259 | 8 + 1 + 1 = 9.: [1, 0.0] 260 | 8 + 1 = 9.: [20, 17.0] 261 | 8 + 2 + 1 = 10.: [10, 9.0] 262 | 8 + 2 + 1 = 11.: [10, 5.0] 263 | 8 + 2 = 10.: [35, 0.0] 264 | 8 + 2 = 9.: [1, 0.0] 265 | 8 + 2 = A.: [1, 1.0] 266 | 8 + 3 + 1 = 11.: [24, 3.0] 267 | 8 + 3 = 10.: [24, 21.0] 268 | 8 + 3 = 11.: [7, 6.0] 269 | 8 + 4 + 1 = 12.: [12, 7.0] 270 | 8 + 4 + 1 = 13.: [1, 0.0] 271 | 8 + 4 = 11.: [21, 7.0] 272 | 8 + 5 + 1 = 13.: [9, 8.0] 273 | 8 + 5 + 1 = 14.: [1, 0.0] 274 | 8 + 5 = 12.: [32, 27.0] 275 | 8 + 5 = 13.: [2, 0.0] 276 | 8 + 6 + 1 = 14.: [12, 3.0] 277 | 8 + 6 = 13.: [33, 28.0] 278 | 8 + 7 + 1 = 15.: [12, 7.0] 279 | 8 + 7 + 1 = 16.: [1, 0.0] 280 | 8 + 7 = 14.: [32, 15.0] 281 | 8 + 8 + 1 = 16.: [19, 13.0] 282 | 8 + 8 = 14.: [2, 0.0] 283 | 8 + 8 = 15.: [17, 12.0] 284 | 8 + 8 = 16.: [3, 0.0] 285 | 8 + 9 + 1 = 17.: [7, 5.0] 286 | 8 + 9 = 16.: [29, 23.0] 287 | 8 + 9 = 17.: [1, 0.0] 288 | 8 + A + 1 = 18.: [14, 12.0] 289 | 8 + A + 1 = 19.: [2, 0.0] 290 | 8 + A + 1 = 1A.: [1, 0.0] 291 | 8 + A + 1 = 20.: [1, 0.0] 292 | 8 + A = 11.: [1, 0.0] 293 | 8 + A = 17.: [25, 23.0] 294 | 8 + A = 18.: [1, 0.0] 295 | 9 + 0 = 9.: [5, 3.0] 296 | 9 + 1 + 1 = 10.: [12, 8.0] 297 | 9 + 1 + 1 = 11.: [5, 1.0] 298 | 9 + 1 = 10.: [13, 0.0] 299 | 9 + 1 = A.: [3, 3.0] 300 | 9 + 10 + 1 = 19.: [1, 1.0] 301 | 9 + 10 = 18.: [1, 1.0] 302 | 9 + 2 + 1 = 11.: [14, 8.0] 303 | 9 + 2 = 10.: [31, 29.0] 304 | 9 + 2 = 11.: [6, 3.0] 305 | 9 + 2 = B.: [1, 0.0] 306 | 9 + 3 + 1 = 12.: [2, 2.0] 307 | 9 + 3 + 1 = 13.: [3, 0.0] 308 | 9 + 3 = 11.: [36, 18.0] 309 | 9 + 3 = 12.: [2, 0.0] 310 | 9 + 4 + 1 = 13.: [9, 4.0] 311 | 9 + 4 + 1 = 14.: [1, 0.0] 312 | 9 + 4 = 12.: [23, 19.0] 313 | 9 + 4 = 13.: [1, 0.0] 314 | 9 + 5 + 1 = 14.: [10, 6.0] 315 | 9 + 5 + 1 = 15.: [1, 0.0] 316 | 9 + 5 = 13.: [36, 29.0] 317 | 9 + 6 + 1 = 15.: [14, 5.0] 318 | 9 + 6 + 1 = 16.: [2, 0.0] 319 | 9 + 6 = 14.: [27, 11.0] 320 | 9 + 6 = 15.: [2, 1.0] 321 | 9 + 7 + 1 = 16.: [7, 6.0] 322 | 9 + 7 = 14.: [1, 0.0] 323 | 9 + 7 = 15.: [15, 13.0] 324 | 9 + 7 = 16.: [1, 0.0] 325 | 9 + 8 + 1 = 17.: [3, 1.0] 326 | 9 + 8 = 16.: [36, 30.0] 327 | 9 + 9 + 1 = 18.: [5, 3.0] 328 | 9 + 9 + 1 = 19.: [1, 0.0] 329 | 9 + 9 = 17.: [31, 28.0] 330 | 9 + 9 = 18.: [6, 2.0] 331 | 9 + A + 1 = 19.: [9, 2.0] 332 | 9 + A = 16.: [1, 0.0] 333 | 9 + A = 17.: [3, 0.0] 334 | 9 + A = 18.: [21, 15.0] 335 | 9 + A = 19.: [1, 0.0] 336 | A + 0 = A.: [24, 22.0] 337 | A + 1 + 1 = 11.: [1, 1.0] 338 | A + 1 + 1 = 12.: [5, 0.0] 339 | A + 1 = 10.: [17, 10.0] 340 | A + 1 = 11.: [3, 2.0] 341 | A + 2 + 1 = 12.: [3, 0.0] 342 | A + 2 + 1 = 14.: [1, 0.0] 343 | A + 2 = 11.: [20, 10.0] 344 | A + 2 = 12.: [1, 0.0] 345 | A + 3 + 1 = 13.: [12, 9.0] 346 | A + 3 + 1 = 15.: [1, 0.0] 347 | A + 3 = 12.: [20, 15.0] 348 | A + 3 = 13.: [1, 0.0] 349 | A + 4 + 1 = 14.: [8, 4.0] 350 | A + 4 = 13.: [12, 6.0] 351 | A + 4 = 14.: [1, 1.0] 352 | A + 5 + 1 = 15.: [4, 2.0] 353 | A + 5 = 14.: [30, 26.0] 354 | A + 6 + 1 = 16.: [11, 4.0] 355 | A + 6 + 1 = 17.: [4, 0.0] 356 | A + 6 = 10.: [2, 0.0] 357 | A + 6 = 15.: [21, 15.0] 358 | A + 7 + 1 = 17.: [5, 3.0] 359 | A + 7 + 1 = 18.: [2, 1.0] 360 | A + 7 = 0.: [1, 0.0] 361 | A + 7 = 10.: [7, 0.0] 362 | A + 7 = 11.: [1, 0.0] 363 | A + 7 = 16.: [20, 18.0] 364 | A + 8 + 1 = 18.: [4, 3.0] 365 | A + 8 + 1 = 19.: [2, 1.0] 366 | A + 8 + 1 = 20.: [3, 0.0] 367 | A + 8 + 1 = 22.: [1, 0.0] 368 | A + 8 = 11.: [2, 0.0] 369 | A + 8 = 17.: [19, 14.0] 370 | A + 8 = 18.: [1, 0.0] 371 | A + 8 = 19.: [2, 0.0] 372 | A + 8 = 1A.: [1, 0.0] 373 | A + 8 = 20.: [1, 0.0] 374 | A + 8 = 21.: [3, 0.0] 375 | A + 9 + 1 = 19.: [4, 1.0] 376 | A + 9 + 1 = 20.: [1, 1.0] 377 | A + 9 + 1 = 21.: [2, 0.0] 378 | A + 9 + 1 = 22.: [1, 0.0] 379 | A + 9 + 1 = 23.: [2, 0.0] 380 | A + 9 = 12.: [3, 0.0] 381 | A + 9 = 16.: [1, 0.0] 382 | A + 9 = 18.: [19, 17.0] 383 | A + 9 = 19.: [6, 2.0] 384 | A + 9 = 22.: [6, 0.0] 385 | A + A + 1 = 20.: [3, 0.0] 386 | A + A + 1 = 21.: [4, 1.0] 387 | A + A + 1 = 22.: [1, 0.0] 388 | A + A = 14.: [9, 0.0] 389 | A + A = 19.: [2, 1.0] 390 | A + A = 20.: [14, 9.0] 391 | A + A = 21.: [4, 0.0] 392 | -------------------------------------------------------------------------------- /artifacts/checkpoint/clutrr/symbolic/gpt-3.5_5-shot_cot_htt_2000.yaml: -------------------------------------------------------------------------------- 1 | alma's son is son.: [1, 0.0] 2 | aunt's aunt is aunt.: [4, 1.0] 3 | aunt's brother is brother.: [1, 0.0] 4 | aunt's brother is uncle.: [9, 3.0] 5 | aunt's daughter is cousin.: [3, 0.0] 6 | aunt's father is grandfather.: [6, 4.0] 7 | aunt's mother is grandmother.: [4, 4.0] 8 | aunt's mother is mother.: [1, 1.0] 9 | aunt's sister is aunt.: [1, 0.0] 10 | aunt's sister is mother.: [1, 0.0] 11 | aunt's sister is sister.: [4, 1.0] 12 | aunt's son is cousin.: [4, 0.0] 13 | brother's aunt is aunt.: [10, 5.0] 14 | brother's brother is brother.: [89, 52.0] 15 | brother's daughter is niece.: [57, 30.0] 16 | brother's father is father.: [45, 43.0] 17 | brother's grandfather is grandfather.: [14, 11.0] 18 | brother's grandfather is great-grandfather.: [4, 0.0] 19 | brother's grandmother is grandmother.: [25, 20.0] 20 | brother's grandmother is great-grandmother.: [2, 0.0] 21 | brother's grandson is nephew.: [3, 0.0] 22 | brother's husband is husband.: [1, 0.0] 23 | brother's mother is grandmother.: [1, 0.0] 24 | brother's mother is mother.: [50, 50.0] 25 | brother's sister is sister.: [59, 45.0] 26 | brother's son is nephew.: [63, 34.0] 27 | brother's uncle is uncle.: [8, 3.0] 28 | brother's wife is sister-in-law.: [11, 6.0] 29 | brother-in-law's daughter is niece.: [4, 4.0] 30 | brother-in-law's father is father-in-law.: [3, 3.0] 31 | brother-in-law's mother is mother-in-law.: [7, 7.0] 32 | brother-in-law's son is nephew.: [2, 2.0] 33 | daughter's aunt is aunt.: [22, 1.0] 34 | daughter's aunt is sister.: [18, 17.0] 35 | daughter's brother is brother.: [39, 0.0] 36 | daughter's brother is son.: [2, 2.0] 37 | daughter's brother is uncle.: [1, 0.0] 38 | daughter's daughter is daughter.: [3, 0.0] 39 | daughter's daughter is granddaughter.: [51, 26.0] 40 | daughter's daughter is sister.: [1, 0.0] 41 | daughter's father is father.: [22, 4.0] 42 | daughter's father is husband.: [1, 0.0] 43 | daughter's grandfather is father.: [16, 11.0] 44 | daughter's grandfather is grandfather.: [14, 4.0] 45 | daughter's grandfather is grandfatherr.: [1, 0.0] 46 | daughter's grandmother is grandmother.: [17, 4.0] 47 | daughter's grandmother is mother.: [19, 9.0] 48 | daughter's husband is son-in-law.: [10, 10.0] 49 | daughter's mother is mother.: [21, 3.0] 50 | daughter's sister is sister.: [51, 1.0] 51 | daughter's son is grandson.: [33, 24.0] 52 | daughter's son is nephew.: [1, 0.0] 53 | daughter's son is son.: [1, 0.0] 54 | daughter's uncle is brother.: [31, 27.0] 55 | daughter's uncle is uncle.: [8, 3.0] 56 | daughter-in-law's daughter is granddaughter.: [5, 5.0] 57 | daughter-in-law's son is grandson.: [1, 1.0] 58 | father's brother is brother.: [1, 1.0] 59 | father's brother is uncle.: [44, 31.0] 60 | father's daughter is daughter.: [25, 5.0] 61 | father's daughter is sister.: [42, 33.0] 62 | father's father is grandfather.: [43, 26.0] 63 | father's granddaughter is granddaughter.: [1, 1.0] 64 | father's grandson is grandson.: [1, 1.0] 65 | father's mother is grandmother.: [22, 13.0] 66 | father's mother is mother.: [5, 0.0] 67 | father's sister is aunt.: [34, 26.0] 68 | father's son is brother.: [44, 35.0] 69 | father's son is son.: [30, 10.0] 70 | father's wife is mother.: [2, 0.0] 71 | gabrielle's brother is brother.: [2, 0.0] 72 | gabrielle's sister is sister.: [1, 0.0] 73 | granddaughter's aunt is aunt.: [3, 0.0] 74 | granddaughter's aunt is niece.: [1, 0.0] 75 | granddaughter's brother is brother.: [5, 0.0] 76 | granddaughter's brother is cousin.: [2, 0.0] 77 | granddaughter's brother is grandson.: [2, 1.0] 78 | granddaughter's brother is nephew.: [4, 0.0] 79 | granddaughter's brother is uncle.: [3, 0.0] 80 | granddaughter's father is son.: [4, 4.0] 81 | granddaughter's grandson is grandson.: [1, 0.0] 82 | granddaughter's mother is daughter.: [7, 7.0] 83 | granddaughter's sister is niece.: [1, 0.0] 84 | granddaughter's sister is sister.: [25, 0.0] 85 | granddaughter's uncle is great-uncle.: [1, 0.0] 86 | granddaughter's uncle is uncle.: [2, 0.0] 87 | grandfather's brother is great-uncle.: [1, 0.0] 88 | grandfather's daughter is mother.: [1, 0.0] 89 | grandfather's father is great-grandfather.: [1, 0.0] 90 | grandfather's granddaughter is granddaughter.: [1, 0.0] 91 | grandfather's grandfather is great-grandfather.: [4, 0.0] 92 | grandfather's mother is great-grandmother.: [1, 0.0] 93 | grandfather's son is father.: [6, 0.0] 94 | grandmother's daughter is mother.: [3, 0.0] 95 | grandmother's granddaughter is granddaughter.: [2, 0.0] 96 | grandmother's granddaughter is niece.: [1, 0.0] 97 | grandmother's grandmother is great-grandmother.: [1, 0.0] 98 | grandmother's mother is great-grandmother.: [1, 0.0] 99 | grandmother's sister is great-aunt.: [1, 0.0] 100 | grandson's aunt is aunt.: [2, 0.0] 101 | grandson's aunt is great-aunt.: [1, 0.0] 102 | grandson's brother is brother.: [7, 0.0] 103 | grandson's brother is grandson.: [1, 1.0] 104 | grandson's brother is nephew.: [1, 0.0] 105 | grandson's brother is uncle.: [1, 0.0] 106 | grandson's father is son-in-law.: [1, 1.0] 107 | grandson's father is son.: [2, 2.0] 108 | grandson's mother is daughter.: [2, 2.0] 109 | grandson's sister is aunt.: [4, 0.0] 110 | grandson's sister is cousin.: [1, 0.0] 111 | grandson's sister is granddaughter.: [2, 2.0] 112 | grandson's sister is great-aunt.: [1, 0.0] 113 | grandson's sister is niece.: [4, 0.0] 114 | grandson's sister is sister.: [9, 0.0] 115 | grandson's uncle is great-uncle.: [1, 0.0] 116 | grandson's uncle is uncle.: [7, 0.0] 117 | guillermina's grandmother is grandmother.: [1, 1.0] 118 | husband's brother is brother-in-law.: [6, 6.0] 119 | husband's daughter is daughter-in-law.: [3, 2.0] 120 | husband's daughter is daughter.: [32, 15.0] 121 | husband's daughter is step-daughter.: [1, 0.0] 122 | husband's daughter is stepdaughter.: [19, 2.0] 123 | husband's father is father-in-law.: [11, 11.0] 124 | husband's father is father.: [1, 0.0] 125 | husband's granddaughter is daughter.: [1, 0.0] 126 | husband's granddaughter is granddaughter.: [19, 12.0] 127 | husband's granddaughter is great-granddaughter.: [1, 0.0] 128 | husband's grandson is grandchild.: [1, 0.0] 129 | husband's grandson is grandson.: [19, 17.0] 130 | husband's grandson is son.: [1, 1.0] 131 | husband's husband is husband.: [1, 0.0] 132 | husband's mother is mother-in-law.: [19, 18.0] 133 | husband's mother is mother.: [1, 0.0] 134 | husband's sister is sister-in-law.: [1, 1.0] 135 | husband's sister is sister.: [1, 0.0] 136 | husband's son is son-in-law.: [4, 3.0] 137 | husband's son is son.: [28, 16.0] 138 | husband's son is stepson.: [1, 0.0] 139 | husband's wife is wife.: [3, 0.0] 140 | jason's brother is brother.: [2, 0.0] 141 | jason's sister is michelle.: [1, 0.0] 142 | jason's son is vincent.: [1, 0.0] 143 | lewis's brother is brother.: [1, 1.0] 144 | mother's brother is uncle.: [33, 23.0] 145 | mother's daughter is daughter.: [41, 12.0] 146 | mother's daughter is mother.: [1, 0.0] 147 | mother's daughter is sister.: [39, 26.0] 148 | mother's father is father.: [1, 0.0] 149 | mother's father is grandfather.: [28, 16.0] 150 | mother's grandmother is great-grandmother.: [2, 0.0] 151 | mother's husband is father.: [1, 0.0] 152 | mother's mother is grandmother.: [30, 16.0] 153 | mother's sister is aunt.: [36, 27.0] 154 | mother's son is brother.: [43, 29.0] 155 | mother's son is father.: [1, 1.0] 156 | mother's son is son.: [25, 9.0] 157 | nephew's aunt is aunt.: [6, 0.0] 158 | nephew's brother is brother.: [1, 0.0] 159 | nephew's brother is cousin.: [1, 0.0] 160 | nephew's grandfather is grandfather.: [5, 0.0] 161 | nephew's grandmother is grandmother.: [7, 0.0] 162 | nephew's sister is cousin.: [3, 0.0] 163 | nephew's sister is niece.: [7, 7.0] 164 | nephew's uncle is uncle.: [5, 0.0] 165 | niece's aunt is aunt.: [1, 0.0] 166 | niece's brother is cousin.: [2, 0.0] 167 | niece's grandfather is grandfather.: [3, 0.0] 168 | niece's grandmother is grandmother.: [3, 0.0] 169 | niece's sister is niece.: [1, 1.0] 170 | niece's sister is sister.: [3, 0.0] 171 | niece's uncle is uncle.: [6, 0.0] 172 | sister's aunt is aunt.: [6, 2.0] 173 | sister's brother is brother.: [58, 46.0] 174 | sister's daughter is niece.: [60, 35.0] 175 | sister's father is father.: [45, 44.0] 176 | sister's grandfather is father.: [1, 0.0] 177 | sister's grandfather is grandfather.: [16, 11.0] 178 | sister's grandfather is great-grandfather.: [1, 0.0] 179 | sister's grandmother is grandmother.: [25, 17.0] 180 | sister's grandmother is great-grandmother.: [4, 0.0] 181 | sister's husband is brother-in-law.: [10, 6.0] 182 | sister's mother is aunt.: [1, 0.0] 183 | sister's mother is grandmother.: [1, 0.0] 184 | sister's mother is mother.: [48, 44.0] 185 | sister's sister is sister.: [68, 40.0] 186 | sister's son is nephew.: [44, 29.0] 187 | sister's uncle is uncle.: [8, 3.0] 188 | sister-in-law's daughter is niece.: [1, 1.0] 189 | sister-in-law's father is father-in-law.: [1, 1.0] 190 | sister-in-law's mother is mother-in-law.: [1, 1.0] 191 | sister-in-law's son is nephew-in-law.: [1, 0.0] 192 | sister-in-law's son is nephew.: [5, 5.0] 193 | son's aunt is aunt.: [11, 2.0] 194 | son's aunt is sister.: [29, 29.0] 195 | son's brother is brother.: [43, 0.0] 196 | son's brother is uncle.: [1, 0.0] 197 | son's daughter is daughter.: [2, 0.0] 198 | son's daughter is granddaughter.: [22, 14.0] 199 | son's father is father.: [18, 2.0] 200 | son's grandfather is father.: [16, 15.0] 201 | son's grandfather is grandfather.: [10, 3.0] 202 | son's grandmother is grandmother.: [24, 5.0] 203 | son's grandmother is mother.: [2, 0.0] 204 | son's grandson is great-grandson.: [1, 0.0] 205 | son's husband is son-in-law.: [1, 0.0] 206 | son's mother is mother.: [22, 4.0] 207 | son's sister is daughter.: [2, 2.0] 208 | son's sister is sister.: [34, 2.0] 209 | son's son is grandson.: [48, 25.0] 210 | son's son is nephew.: [1, 0.0] 211 | son's son is son.: [2, 0.0] 212 | son's uncle is brother.: [27, 25.0] 213 | son's uncle is uncle.: [11, 3.0] 214 | son's wife is daughter-in-law.: [17, 17.0] 215 | son-in-law's aunt is aunt.: [1, 0.0] 216 | son-in-law's daughter is granddaughter.: [1, 1.0] 217 | son-in-law's son is grandson.: [4, 4.0] 218 | son-in-law's wife is daughter.: [1, 0.0] 219 | stepdaughter's brother is stepbrother.: [2, 0.0] 220 | stepdaughter's husband is stepson-in-law.: [1, 0.0] 221 | stepdaughter's sister is sister.: [1, 0.0] 222 | stepdaughter's sister is stepsister.: [1, 0.0] 223 | stepdaughter's uncle is uncle.: [2, 0.0] 224 | uncle's brother is brother.: [3, 0.0] 225 | uncle's daughter is cousin.: [3, 0.0] 226 | uncle's father is grandfather.: [2, 1.0] 227 | uncle's mother is grandmother.: [8, 5.0] 228 | uncle's sister is aunt.: [4, 4.0] 229 | uncle's sister is sister.: [1, 0.0] 230 | uncle's son is cousin.: [4, 0.0] 231 | uncle's uncle is great-uncle.: [1, 0.0] 232 | uncle's uncle is uncle.: [2, 0.0] 233 | uncle's wife is aunt.: [1, 0.0] 234 | wife's brother is brother-in-law.: [6, 4.0] 235 | wife's brother is brother.: [1, 0.0] 236 | wife's daughter is daughter-in-law.: [1, 1.0] 237 | wife's daughter is daughter.: [41, 17.0] 238 | wife's daughter is stepdaughter.: [2, 0.0] 239 | wife's father is father-in-law.: [9, 9.0] 240 | wife's granddaughter is granddaughter.: [14, 10.0] 241 | wife's grandson is grandson.: [9, 6.0] 242 | wife's grandson is son.: [4, 0.0] 243 | wife's husband is husband.: [2, 0.0] 244 | wife's mother is mother-in-law.: [15, 15.0] 245 | wife's mother is mother.: [1, 0.0] 246 | wife's sister is sister-in-law.: [3, 1.0] 247 | wife's sister is sister.: [6, 0.0] 248 | wife's son is son-in-law.: [1, 0.0] 249 | wife's son is son.: [43, 27.0] 250 | wife's son is stepson.: [1, 0.0] 251 | william's grandmother is gabrielle.: [1, 1.0] 252 | -------------------------------------------------------------------------------- /source/datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Dataset module.""" 17 | 18 | import ast 19 | from collections.abc import Mapping, Sequence 20 | import csv 21 | import glob 22 | import json 23 | import os 24 | import random 25 | import re 26 | from typing import Any 27 | 28 | from nltk import tokenize 29 | import numpy as np 30 | 31 | 32 | class Arithmetic: 33 | """Artithmetic dataset.""" 34 | 35 | def __init__( 36 | self, 37 | path, 38 | base=10, 39 | num_train=900, 40 | num_valid=100, 41 | num_test=100, 42 | ): 43 | """Init arithmetic dataset. 44 | 45 | Args: 46 | path: path to the dataset 47 | base: base of the arithmetic 48 | num_train: number of training samples 49 | num_valid: number of validation samples 50 | num_test: number of test samples 51 | 52 | Raises: 53 | FileNotFoundError: if dataset files are not found 54 | """ 55 | 56 | self.path = path 57 | self.base = base 58 | self.num_train = num_train 59 | self.num_valid = num_valid 60 | self.num_test = num_test 61 | 62 | train_files = glob.glob(os.path.join(path, f"base-{base}/*_train.txt")) 63 | test_files = glob.glob(os.path.join(path, f"base-{base}/*_test.txt")) 64 | if not train_files or not test_files: 65 | raise FileNotFoundError(f"Can't find dataset files in `{path}`.") 66 | 67 | queries = [] 68 | answers = [] 69 | levels = [] 70 | num_samples = [] 71 | for txt_file in train_files + test_files: 72 | num_sample = 0 73 | with open(txt_file, "r") as fin: 74 | for line in fin: 75 | query = line.strip().split("+") 76 | answer = sum(int(x, base) for x in query) 77 | answer = np.base_repr(answer, base) 78 | level = f"{len(query[0])} digits" 79 | queries.append(query) 80 | answers.append(answer) 81 | levels.append(level) 82 | num_sample += 1 83 | num_samples.append(num_sample) 84 | 85 | total_train = sum(num_samples[:len(train_files)]) 86 | total_test = sum(num_samples[len(train_files):]) 87 | train_indices = random.sample(range(total_train), num_train) 88 | test_indices = random.sample(range(total_train, total_train + total_test), 89 | num_valid + num_test) 90 | indices = train_indices + test_indices 91 | self.queries = [queries[i] for i in indices] 92 | self.answers = [answers[i] for i in indices] 93 | self.levels = [levels[i] for i in indices] 94 | 95 | def get_split( 96 | self, 97 | split: str = "test", 98 | ) -> Sequence[Mapping[str, Any]]: 99 | """Get dataset split. 100 | 101 | Args: 102 | split: split name 103 | Returns: 104 | List of samples 105 | """ 106 | if split == "train": 107 | indices = range(self.num_train) 108 | elif split == "valid": 109 | indices = range(self.num_train, self.num_train + self.num_valid) 110 | elif split == "test": 111 | indices = range(len(self) - self.num_test, len(self)) 112 | else: 113 | raise ValueError(f"Unknown split `{split}`") 114 | return [self[i] for i in indices] 115 | 116 | def evaluate( 117 | self, 118 | truth: str, 119 | pred: str, 120 | ): 121 | """Evaluate truth and pred.""" 122 | return truth.lower() in tokenize.word_tokenize(pred.lower()) 123 | 124 | def __getitem__( 125 | self, 126 | index, 127 | ): 128 | return { 129 | "query": self.queries[index], 130 | "answer": self.answers[index], 131 | "level": self.levels[index], 132 | "base": self.base, 133 | } 134 | 135 | def __len__(self): 136 | return len(self.queries) 137 | 138 | 139 | class CLUTRR: 140 | """CLUTRR dataset.""" 141 | 142 | def __init__( 143 | self, 144 | path, 145 | num_train=2000, 146 | num_valid=200, 147 | num_test=200, 148 | ): 149 | self.path = path 150 | self.num_train = num_train 151 | self.num_valid = num_valid 152 | self.num_test = num_test 153 | 154 | train_files = glob.glob(os.path.join(path, "*_train.csv")) 155 | test_files = glob.glob(os.path.join(path, "*_test.csv")) 156 | if not train_files or not test_files: 157 | raise FileNotFoundError(f"Can't find dataset files in `{path}`.") 158 | 159 | documents = [] 160 | paths = [] 161 | queries = [] 162 | answers = [] 163 | levels = [] 164 | num_samples = [] 165 | for csv_file in train_files + test_files: 166 | num_sample = 0 167 | with open(csv_file, "r") as fin: 168 | reader = csv.reader(fin) 169 | fields = next(reader) 170 | for values in reader: 171 | document = path = query = answer = level = None 172 | for field, value in zip(fields, values, strict=True): 173 | if field == "story": 174 | document = re.sub(r"[\[\]]", "", value) 175 | elif field == "f_comb": 176 | path = value.split("-") 177 | elif field == "query": 178 | query = ast.literal_eval(value) 179 | elif field == "target": 180 | answer = value 181 | elif field == "task_name": 182 | level = f"{value.split('.')[1]} hops" 183 | documents.append(document) 184 | paths.append(path) 185 | queries.append(query) 186 | answers.append(answer) 187 | levels.append(level) 188 | num_sample += 1 189 | num_samples.append(num_sample) 190 | 191 | total_train = sum(num_samples[:len(train_files)]) 192 | total_test = sum(num_samples[len(train_files):]) 193 | train_indices = random.sample(range(total_train), num_train) 194 | test_indices = random.sample(range(total_train, total_train + total_test), 195 | num_valid + num_test) 196 | indices = train_indices + test_indices 197 | self.documents = [documents[i] for i in indices] 198 | self.paths = [paths[i] for i in indices] 199 | self.queries = [queries[i] for i in indices] 200 | self.answers = [answers[i] for i in indices] 201 | self.levels = [levels[i] for i in indices] 202 | self.labels = set(answers) 203 | 204 | def get_split( 205 | self, 206 | split="test" 207 | ): 208 | """Get dataset split.""" 209 | if split == "train": 210 | indices = range(self.num_train) 211 | elif split == "valid": 212 | indices = range(self.num_train, self.num_train + self.num_valid) 213 | elif split == "test": 214 | indices = range(len(self) - self.num_test, len(self)) 215 | else: 216 | raise ValueError(f"Unknown split `{split}`") 217 | return [self[i] for i in indices] 218 | 219 | def evaluate( 220 | self, 221 | truth, 222 | pred 223 | ): 224 | """Evaluate truth and pred.""" 225 | truth = truth.lower() 226 | words = tokenize.word_tokenize(pred.lower()) 227 | others = self.labels - {truth} 228 | return truth in words and not any(label in words for label in others) 229 | 230 | def __getitem__( 231 | self, 232 | index 233 | ): 234 | return { 235 | "document": self.documents[index], 236 | "path": self.paths[index], 237 | "query": self.queries[index], 238 | "answer": self.answers[index], 239 | "level": self.levels[index] 240 | } 241 | 242 | def __len__(self): 243 | return len(self.queries) 244 | 245 | 246 | class ListFunctions: 247 | """List Functions dataset.""" 248 | 249 | def __init__( 250 | self, 251 | path: str, 252 | num_train: int = 8, 253 | num_valid: int = 8, 254 | num_test: int = 16 255 | ) -> None: 256 | self.path = path 257 | self.num_train = num_train 258 | self.num_valid = num_valid 259 | self.num_test = num_test 260 | 261 | num_sample = num_train + num_valid + num_test 262 | json_files = sorted(glob.glob(os.path.join(path, "c*.json"))) 263 | if not json_files: 264 | raise FileNotFoundError(f"Can't find dataset files in `{path}`.") 265 | 266 | levels = [] 267 | concepts = [] 268 | queries = [] 269 | answers = [] 270 | for json_file in json_files: 271 | with open(json_file, "r") as fin: 272 | obj = json.load(fin) 273 | query = [] 274 | answer = [] 275 | for example in obj["examples"]: 276 | query.append(example["input"]) 277 | answer.append(example["target"]) 278 | concept = re.search(r"(c\d+).json", json_file) 279 | assert concept is not None 280 | concept = concept.group(1) 281 | cid = int(concept[1:]) 282 | if cid <= 80: 283 | level = "P1" 284 | elif cid <= 100: 285 | level = "P2" 286 | else: 287 | level = "P3" 288 | indices = random.sample(range(len(query)), num_sample) 289 | query = [query[i] for i in indices] 290 | answer = [answer[i] for i in indices] 291 | levels.append(level) 292 | concepts.append(concept) 293 | queries.append(query) 294 | answers.append(answer) 295 | 296 | self.levels = levels 297 | self.concepts = concepts 298 | self.queries = queries 299 | self.answers = answers 300 | 301 | def get_split( 302 | self, 303 | split: str = "test" 304 | ) -> Sequence[Mapping[str, Any]]: 305 | """Get dataset split.""" 306 | if split in {"train", "valid"}: 307 | train_indices = slice(self.num_train) 308 | test_indices = slice(self.num_train, self.num_train + self.num_valid) 309 | elif split == "test": 310 | train_indices = slice(self.num_train + self.num_valid) 311 | test_indices = slice(self.num_train + self.num_valid, None) 312 | else: 313 | raise ValueError(f"Unknown split `{split}`") 314 | 315 | dataset = [] 316 | for sample in self: 317 | sample["train_queries"] = sample["queries"][train_indices] 318 | sample["train_answers"] = sample["answers"][train_indices] 319 | sample["queries"] = sample["queries"][test_indices] 320 | sample["answers"] = sample["answers"][test_indices] 321 | answers = [f"{q} -> {a}" 322 | for q, a in zip(sample["queries"], sample["answers"])] 323 | sample["answer"] = "\n".join(answers) 324 | dataset.append(sample) 325 | return dataset 326 | 327 | def evaluate( 328 | self, 329 | truth: str, 330 | pred: str 331 | ) -> bool: 332 | """Evaluate truth and pred.""" 333 | pattern = r"(\[[A-Z0-9, ]*\]) ?-> ?(\[[A-Z0-9, ]*\])" 334 | query2truth = dict(re.findall(pattern, truth)) 335 | query2pred = dict(re.findall(pattern, pred)) 336 | num_correct = 0 337 | for query, truth in query2truth.items(): 338 | if query in query2pred: 339 | try: 340 | truth = ast.literal_eval(truth) 341 | pred = ast.literal_eval(query2pred[query]) 342 | num_correct += int(truth == pred) 343 | except (ValueError, SyntaxError): 344 | pass 345 | return num_correct / len(query2truth) 346 | 347 | def __getitem__(self, index: int) -> Mapping[str, Any]: 348 | return { 349 | "queries": self.queries[index], 350 | "answers": self.answers[index], 351 | "level": self.levels[index], 352 | "concept": self.concepts[index], 353 | } 354 | 355 | def __len__(self) -> int: 356 | return len(self.queries) 357 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /artifacts/checkpoint/clutrr/symbolic/gemini-pro_5-shot_cot_htt_2000.yaml: -------------------------------------------------------------------------------- 1 | aunt's aunt is aunt.: [2, 0.0] 2 | aunt's brother is brother.: [2, 0.0] 3 | aunt's brother is cousin.: [2, 0.0] 4 | aunt's brother is nephew.: [1, 0.0] 5 | aunt's brother is uncle.: [1, 0.0] 6 | aunt's father is uncle.: [4, 0.0] 7 | aunt's grandfather is grandfather.: [1, 0.0] 8 | aunt's mother is grandmother.: [2, 2.0] 9 | aunt's sister is cousin.: [3, 0.0] 10 | aunt's uncle is father.: [1, 0.0] 11 | brother's aunt is aunt.: [12, 5.0] 12 | brother's aunt is cousin.: [1, 0.0] 13 | brother's brother is brother.: [77, 52.0] 14 | brother's daughter is brother.: [1, 0.0] 15 | brother's daughter is niece.: [57, 39.0] 16 | brother's father is father.: [46, 37.0] 17 | brother's father is uncle.: [1, 0.0] 18 | brother's grandfather is father.: [8, 0.0] 19 | brother's grandfather is grandfather.: [16, 14.0] 20 | brother's grandmother is aunt.: [1, 0.0] 21 | brother's grandmother is grandmother.: [32, 26.0] 22 | brother's grandmother is mother.: [1, 0.0] 23 | brother's grandson is grandson.: [1, 1.0] 24 | brother's grandson is nephew.: [2, 0.0] 25 | brother's mother is aunt.: [1, 0.0] 26 | brother's mother is mother.: [53, 46.0] 27 | brother's nephew is nephew.: [1, 1.0] 28 | brother's niece is niece.: [4, 0.0] 29 | brother's sister is sister.: [72, 57.0] 30 | brother's son is nephew.: [1, 0.0] 31 | brother's uncle is father.: [3, 0.0] 32 | brother's uncle is uncle.: [6, 4.0] 33 | brother's wife is sister-in-law.: [6, 2.0] 34 | brother's wife is wife.: [4, 0.0] 35 | brother-in-law's daughter is niece.: [3, 3.0] 36 | brother-in-law's father is father-in-law.: [5, 5.0] 37 | brother-in-law's mother is mother-in-law.: [5, 5.0] 38 | brother-in-law's son is nephew.: [2, 2.0] 39 | daughter's aunt is aunt.: [6, 0.0] 40 | daughter's aunt is niece.: [2, 0.0] 41 | daughter's aunt is sister.: [30, 27.0] 42 | daughter's brother is brother.: [27, 0.0] 43 | daughter's brother is nephew.: [4, 1.0] 44 | daughter's brother is son.: [5, 3.0] 45 | daughter's brother is uncle.: [14, 0.0] 46 | daughter's daughter is granddaughter.: [45, 32.0] 47 | daughter's father is brother.: [1, 1.0] 48 | daughter's father is father.: [21, 5.0] 49 | daughter's father is grandfather.: [1, 0.0] 50 | daughter's father is son.: [1, 0.0] 51 | daughter's grandfather is father.: [21, 19.0] 52 | daughter's grandfather is grandfather.: [4, 1.0] 53 | daughter's grandmother is grandmother.: [8, 0.0] 54 | daughter's grandmother is mother.: [18, 17.0] 55 | daughter's husband is father.: [1, 0.0] 56 | daughter's husband is husband.: [3, 0.0] 57 | daughter's husband is son-in-law.: [14, 14.0] 58 | daughter's mother is mother.: [19, 6.0] 59 | daughter's mother is sister.: [2, 2.0] 60 | daughter's mother is wife.: [1, 0.0] 61 | daughter's sister is aunt.: [3, 0.0] 62 | daughter's sister is daughter.: [2, 0.0] 63 | daughter's sister is niece.: [7, 0.0] 64 | daughter's sister is sister.: [37, 2.0] 65 | daughter's son is brother.: [1, 0.0] 66 | daughter's son is grandson.: [39, 29.0] 67 | daughter's son is nephew.: [3, 0.0] 68 | daughter's son is son.: [1, 0.0] 69 | daughter's uncle is brother.: [36, 30.0] 70 | daughter's uncle is father.: [1, 0.0] 71 | daughter's uncle is uncle.: [1, 0.0] 72 | daughter-in-law's daughter is granddaughter.: [2, 2.0] 73 | daughter-in-law's son is grandson.: [2, 2.0] 74 | father's brother is brother.: [1, 0.0] 75 | father's brother is uncle.: [44, 28.0] 76 | father's child is son.: [1, 0.0] 77 | father's daughter is daughter.: [21, 1.0] 78 | father's daughter is sister.: [41, 34.0] 79 | father's father is grandfather.: [40, 25.0] 80 | father's grandfather is grandfather.: [1, 1.0] 81 | father's grandfather is great-grandfather.: [1, 0.0] 82 | father's grandmother is grandmother.: [1, 0.0] 83 | father's grandmother is great-grandmother.: [1, 0.0] 84 | father's mother is grandmother.: [23, 15.0] 85 | father's mother is mother.: [8, 0.0] 86 | father's nephew is cousin.: [1, 0.0] 87 | father's sister is aunt.: [33, 17.0] 88 | father's sister is sister.: [3, 0.0] 89 | father's sister-in-law is sister-in-law.: [1, 0.0] 90 | father's son is brother.: [41, 35.0] 91 | father's son is grandson.: [2, 0.0] 92 | father's son is son.: [25, 3.0] 93 | father's wife is mother.: [5, 4.0] 94 | father-in-law's son is husband.: [1, 0.0] 95 | gabrielle's son is brother.: [1, 0.0] 96 | granddaughter's aunt is aunt.: [5, 0.0] 97 | granddaughter's brother is brother-in-law.: [1, 0.0] 98 | granddaughter's brother is brother.: [7, 0.0] 99 | granddaughter's brother is grandson.: [3, 3.0] 100 | granddaughter's brother is nephew.: [2, 0.0] 101 | granddaughter's brother is uncle.: [17, 0.0] 102 | granddaughter's father is daughter.: [1, 1.0] 103 | granddaughter's father is father.: [3, 0.0] 104 | granddaughter's father is grandfather.: [1, 0.0] 105 | granddaughter's father is mother.: [1, 0.0] 106 | granddaughter's father is son.: [4, 2.0] 107 | granddaughter's grandfather is grandfather.: [1, 0.0] 108 | granddaughter's husband is husband.: [1, 0.0] 109 | granddaughter's mother is daughter.: [10, 7.0] 110 | granddaughter's sister is aunt.: [2, 0.0] 111 | granddaughter's sister is brother.: [1, 0.0] 112 | granddaughter's sister is cousin.: [3, 0.0] 113 | granddaughter's sister is granddaughter.: [4, 4.0] 114 | granddaughter's sister is grandniece.: [1, 0.0] 115 | granddaughter's sister is great-aunt.: [1, 0.0] 116 | granddaughter's sister is mother.: [1, 0.0] 117 | granddaughter's sister is niece.: [9, 0.0] 118 | granddaughter's sister is sister-in-law.: [1, 0.0] 119 | granddaughter's sister is sister.: [6, 0.0] 120 | granddaughter's uncle is brother.: [2, 0.0] 121 | granddaughter's uncle is father.: [2, 0.0] 122 | granddaughter's uncle is uncle.: [4, 0.0] 123 | grandfather's brother is father.: [1, 0.0] 124 | grandfather's brother is uncle.: [3, 0.0] 125 | grandfather's daughter is father.: [1, 0.0] 126 | grandfather's daughter is granddaughter.: [8, 0.0] 127 | grandfather's daughter is grandmother.: [1, 0.0] 128 | grandfather's granddaughter is granddaughter.: [1, 0.0] 129 | grandfather's grandfather is ancestor.: [1, 0.0] 130 | grandfather's grandson is grandson.: [3, 0.0] 131 | grandfather's niece is granddaughter.: [1, 0.0] 132 | grandfather's sister is aunt.: [6, 0.0] 133 | grandfather's son is father.: [9, 0.0] 134 | grandfather's stepdaughter is granddaughter.: [2, 0.0] 135 | grandmother's daughter is granddaughter.: [1, 0.0] 136 | grandmother's daughter is mother.: [10, 0.0] 137 | grandmother's granddaughter is granddaughter.: [1, 0.0] 138 | grandmother's grandmother is great-grandmother.: [2, 0.0] 139 | grandmother's mother is grandmother.: [1, 0.0] 140 | grandmother's son is father.: [6, 0.0] 141 | grandson's aunt is aunt.: [2, 0.0] 142 | grandson's brother is brother.: [6, 0.0] 143 | grandson's brother is cousin.: [2, 0.0] 144 | grandson's brother is grandson.: [1, 1.0] 145 | grandson's brother is nephew.: [3, 0.0] 146 | grandson's brother is uncle.: [5, 0.0] 147 | grandson's father is son.: [6, 6.0] 148 | grandson's grandmother is grandmother.: [2, 0.0] 149 | grandson's mother is daughter.: [5, 5.0] 150 | grandson's sister is aunt.: [7, 0.0] 151 | grandson's sister is granddaughter.: [9, 9.0] 152 | grandson's sister is grandniece.: [1, 0.0] 153 | grandson's sister is niece.: [12, 0.0] 154 | grandson's sister is sister.: [3, 0.0] 155 | grandson's uncle is father.: [3, 0.0] 156 | grandson's uncle is uncle.: [3, 0.0] 157 | husband's brother is brother-in-law.: [6, 6.0] 158 | husband's daughter is daughter-in-law.: [1, 0.0] 159 | husband's daughter is daughter.: [35, 27.0] 160 | husband's daughter is sister.: [2, 0.0] 161 | husband's daughter is step-daughter.: [1, 0.0] 162 | husband's daughter is stepdaughter.: [11, 1.0] 163 | husband's daughter is wife.: [1, 0.0] 164 | husband's father is father-in-law.: [12, 11.0] 165 | husband's granddaughter is daughter.: [2, 0.0] 166 | husband's granddaughter is granddaughter.: [21, 17.0] 167 | husband's grandson is brother.: [1, 0.0] 168 | husband's grandson is cousin.: [1, 0.0] 169 | husband's grandson is grandson.: [15, 13.0] 170 | husband's grandson is great-grandson.: [5, 0.0] 171 | husband's grandson is son.: [2, 0.0] 172 | husband's husband is boyfriend.: [1, 0.0] 173 | husband's husband is brother.: [1, 0.0] 174 | husband's mother is mother-in-law.: [12, 11.0] 175 | husband's mother is mother.: [6, 0.0] 176 | husband's sister is sister-in-law.: [1, 1.0] 177 | husband's son is brother.: [4, 0.0] 178 | husband's son is father.: [1, 0.0] 179 | husband's son is grandfather.: [1, 0.0] 180 | husband's son is grandson.: [3, 0.0] 181 | husband's son is son.: [25, 18.0] 182 | husband's son is step-son.: [1, 0.0] 183 | husband's son is stepson.: [3, 2.0] 184 | law's daughter is granddaughter.: [1, 1.0] 185 | law's daughter is niece.: [1, 1.0] 186 | law's father is brother.: [1, 0.0] 187 | law's son is grandson.: [1, 1.0] 188 | law's son is nephew.: [2, 2.0] 189 | mother's brother is father.: [1, 0.0] 190 | mother's brother is uncle.: [36, 21.0] 191 | mother's daughter is daughter.: [39, 8.0] 192 | mother's daughter is father.: [1, 1.0] 193 | mother's daughter is grandmother.: [1, 1.0] 194 | mother's daughter is sister.: [35, 28.0] 195 | mother's father is father.: [15, 0.0] 196 | mother's father is grandfather.: [19, 11.0] 197 | mother's husband is father.: [8, 4.0] 198 | mother's mother is grandmother.: [42, 20.0] 199 | mother's sister is aunt.: [35, 22.0] 200 | mother's sister is sister.: [2, 0.0] 201 | mother's son is brother.: [48, 37.0] 202 | mother's son is son.: [14, 1.0] 203 | neice's brother is cousin.: [1, 0.0] 204 | nephew's aunt is aunt.: [7, 0.0] 205 | nephew's brother is brother.: [2, 0.0] 206 | nephew's brother is nephew.: [1, 1.0] 207 | nephew's daughter is granddaughter.: [1, 1.0] 208 | nephew's father is brother.: [1, 0.0] 209 | nephew's grandfather is grandfather.: [1, 0.0] 210 | nephew's grandfather is great-grandfather.: [2, 0.0] 211 | nephew's grandfather is uncle.: [1, 0.0] 212 | nephew's grandmother is grandmother.: [8, 0.0] 213 | nephew's grandmother is sister.: [1, 0.0] 214 | nephew's sister is niece.: [8, 8.0] 215 | nephew's sister is sister.: [1, 0.0] 216 | nephew's son is grandnephew.: [1, 0.0] 217 | nephew's uncle is brother.: [1, 1.0] 218 | nephew's uncle is father.: [1, 0.0] 219 | nephew's uncle is uncle.: [2, 0.0] 220 | niece's aunt is aunt.: [5, 0.0] 221 | niece's brother is cousin.: [2, 0.0] 222 | niece's brother is nephew.: [1, 0.0] 223 | niece's daughter is grandniece.: [1, 0.0] 224 | niece's grandfather is brother.: [1, 0.0] 225 | niece's grandfather is grandfather.: [1, 0.0] 226 | niece's grandfather is uncle.: [1, 0.0] 227 | niece's grandmother is aunt.: [2, 0.0] 228 | niece's grandmother is grandmother.: [6, 0.0] 229 | niece's husband is cousin.: [1, 0.0] 230 | niece's uncle is brother.: [4, 4.0] 231 | niece's uncle is father.: [1, 0.0] 232 | niece's uncle is uncle.: [4, 0.0] 233 | self's mother is mother.: [1, 1.0] 234 | sister's aunt is aunt.: [6, 4.0] 235 | sister's brother is brother.: [82, 74.0] 236 | sister's daughter is neice.: [1, 0.0] 237 | sister's daughter is niece.: [59, 48.0] 238 | sister's daughter-in-law is sister-in-law.: [1, 0.0] 239 | sister's father is brother.: [2, 0.0] 240 | sister's father is father.: [51, 45.0] 241 | sister's granddaughter is cousin.: [1, 0.0] 242 | sister's granddaughter is niece.: [1, 0.0] 243 | sister's grandfather is father.: [7, 1.0] 244 | sister's grandfather is grandfather.: [21, 17.0] 245 | sister's grandmother is aunt.: [1, 0.0] 246 | sister's grandmother is grandmother.: [30, 26.0] 247 | sister's grandmother is mother.: [2, 0.0] 248 | sister's grandson is grandson.: [1, 0.0] 249 | sister's grandson is nephew.: [3, 0.0] 250 | sister's husband is brother-in-law.: [7, 5.0] 251 | sister's mother is aunt.: [4, 0.0] 252 | sister's mother is mother.: [41, 35.0] 253 | sister's mother-in-law is sister-in-law.: [1, 0.0] 254 | sister's niece is niece.: [11, 11.0] 255 | sister's sister is niece.: [1, 0.0] 256 | sister's sister is sister.: [76, 61.0] 257 | sister's sister-in-law is sister-in-law.: [1, 0.0] 258 | sister's son is brother.: [3, 0.0] 259 | sister's son is grandson.: [1, 0.0] 260 | sister's son is nephew.: [1, 0.0] 261 | sister's stepdaughter is stepdaughter.: [2, 0.0] 262 | sister's uncle is father.: [2, 0.0] 263 | sister's uncle is uncle.: [6, 4.0] 264 | sister-in-law's father is brother-in-law.: [1, 0.0] 265 | sister-in-law's mother is mother-in-law.: [7, 7.0] 266 | sister-in-law's son is brother-in-law.: [1, 0.0] 267 | sister-in-law's son is brother.: [1, 0.0] 268 | sister-in-law's son is nephew.: [2, 2.0] 269 | son's aunt is aunt.: [10, 0.0] 270 | son's aunt is mother.: [2, 0.0] 271 | son's aunt is sister.: [22, 22.0] 272 | son's brother is brother.: [30, 0.0] 273 | son's brother is nephew.: [3, 0.0] 274 | son's brother is uncle.: [3, 0.0] 275 | son's daughter is granddaughter.: [30, 15.0] 276 | son's father is father.: [5, 0.0] 277 | son's father is grandfather.: [8, 0.0] 278 | son's father is mother.: [1, 0.0] 279 | son's father is self.: [1, 1.0] 280 | son's grandfather is father.: [22, 21.0] 281 | son's grandmother is mother.: [18, 18.0] 282 | son's mother is mother.: [19, 3.0] 283 | son's sister is daughter.: [12, 10.0] 284 | son's sister is niece.: [9, 0.0] 285 | son's sister is sister.: [9, 0.0] 286 | son's son is grandson.: [40, 26.0] 287 | son's uncle is brother.: [23, 23.0] 288 | son's uncle is father.: [8, 0.0] 289 | son's uncle is grandfather.: [1, 0.0] 290 | son's uncle is nephew.: [1, 0.0] 291 | son's uncle is uncle.: [3, 0.0] 292 | son's wife is daughter-in-law.: [17, 16.0] 293 | son's wife is mother.: [1, 0.0] 294 | son's wife is wife.: [5, 0.0] 295 | step-daughter's aunt is aunt.: [2, 0.0] 296 | step-daughter's grandmother is step-grandmother.: [1, 0.0] 297 | step-daughter's son is step-grandson.: [1, 0.0] 298 | step-son's daughter is step-granddaughter.: [1, 0.0] 299 | step-son's grandfather is step-grandfather.: [1, 0.0] 300 | step-son's son is grandson.: [1, 1.0] 301 | step-son's wife is daughter-in-law.: [1, 1.0] 302 | stepdaughter's grandmother is mother-in-law.: [1, 0.0] 303 | stepdaughter's grandmother is stepmother.: [1, 0.0] 304 | stepdaughter's son is stepson.: [2, 0.0] 305 | stepson's brother is stepson.: [1, 0.0] 306 | stepson's daughter is granddaughter.: [1, 1.0] 307 | stepson's uncle is uncle.: [1, 0.0] 308 | stepson's wife is stepmother.: [1, 0.0] 309 | uncle's aunt is aunt.: [2, 0.0] 310 | uncle's brother is brother.: [1, 0.0] 311 | uncle's brother is cousin.: [7, 0.0] 312 | uncle's brother is father.: [1, 0.0] 313 | uncle's daughter is niece.: [1, 0.0] 314 | uncle's father is grandfather.: [3, 3.0] 315 | uncle's grandfather is great-grandfather.: [2, 0.0] 316 | uncle's grandson is nephew.: [1, 0.0] 317 | uncle's mother is aunt.: [4, 0.0] 318 | uncle's nephew is nephew.: [3, 0.0] 319 | uncle's niece is niece.: [1, 0.0] 320 | uncle's sister is aunt.: [6, 1.0] 321 | uncle's sister is cousin.: [4, 0.0] 322 | uncle's son is nephew.: [2, 0.0] 323 | uncle's stepdaughter is stepdaughter.: [1, 0.0] 324 | uncle's uncle is grandfather.: [1, 0.0] 325 | uncle's uncle is granduncle.: [1, 0.0] 326 | uncle's wife is aunt.: [1, 0.0] 327 | wife's brother is brother-in-law.: [4, 4.0] 328 | wife's brother is brother.: [1, 0.0] 329 | wife's daughter is daughter-in-law.: [1, 0.0] 330 | wife's daughter is daughter.: [35, 21.0] 331 | wife's daughter is granddaughter.: [2, 0.0] 332 | wife's daughter is step-daughter.: [4, 0.0] 333 | wife's daughter is stepdaughter.: [2, 0.0] 334 | wife's father is father-in-law.: [9, 9.0] 335 | wife's father is father.: [3, 0.0] 336 | wife's granddaughter is daughter.: [1, 0.0] 337 | wife's granddaughter is granddaughter.: [20, 16.0] 338 | wife's grandson is grandson.: [18, 11.0] 339 | wife's mother is mother-in-law.: [13, 13.0] 340 | wife's mother is mother.: [1, 0.0] 341 | wife's sister is sister-in-law.: [8, 6.0] 342 | wife's son is brother.: [8, 0.0] 343 | wife's son is son.: [30, 25.0] 344 | wife's son is step-son.: [4, 2.0] 345 | wife's son is stepson.: [4, 0.0] 346 | --------------------------------------------------------------------------------