├── source
├── requirements.txt
├── util.py
├── train.py
├── test.py
├── prompts.py
├── models.py
└── datasets.py
├── requirements.txt
├── artifacts
├── config
│ ├── arithmetic
│ │ ├── base-10
│ │ │ ├── gpt-4_0-shot_cot.yaml
│ │ │ ├── gemini-pro_0-shot_cot.yaml
│ │ │ ├── gpt-3.5_0-shot_cot.yaml
│ │ │ ├── gpt-4_5-shot_cot.yaml
│ │ │ ├── gemini-pro_5-shot_cot.yaml
│ │ │ └── gpt-3.5_5-shot_cot.yaml
│ │ ├── base-11
│ │ │ ├── gpt-4_0-shot_cot.yaml
│ │ │ ├── gemini-pro_0-shot_cot.yaml
│ │ │ ├── gpt-4_5-shot_cot.yaml
│ │ │ ├── gpt-3.5_0-shot_cot.yaml
│ │ │ ├── gemini-pro_5-shot_cot.yaml
│ │ │ ├── gpt-3.5_5-shot_cot.yaml
│ │ │ ├── gpt-4_5-shot_cot_htt.yaml
│ │ │ ├── gemini-pro_5-shot_cot_htt.yaml
│ │ │ └── gpt-3.5_5-shot_cot_htt.yaml
│ │ ├── base-16
│ │ │ ├── gpt-4_0-shot_cot.yaml
│ │ │ ├── gemini-pro_0-shot_cot.yaml
│ │ │ ├── gpt-4_5-shot_cot.yaml
│ │ │ ├── gpt-3.5_0-shot_cot.yaml
│ │ │ ├── gemini-pro_5-shot_cot.yaml
│ │ │ ├── gpt-3.5_5-shot_cot.yaml
│ │ │ ├── gemini-pro_5-shot_cot_htt.yaml
│ │ │ ├── gpt-4_5-shot_cot_htt.yaml
│ │ │ └── gpt-3.5_5-shot_cot_htt.yaml
│ │ └── base-9
│ │ │ ├── gpt-4_0-shot_cot.yaml
│ │ │ ├── gpt-4_5-shot_cot.yaml
│ │ │ ├── gemini-pro_0-shot_cot.yaml
│ │ │ ├── gemini-pro_5-shot_cot.yaml
│ │ │ ├── gpt-3.5_0-shot_cot.yaml
│ │ │ ├── gpt-3.5_5-shot_cot.yaml
│ │ │ ├── gpt-4_5-shot_cot_htt.yaml
│ │ │ ├── gemini-pro_5-shot_cot_htt.yaml
│ │ │ └── gpt-3.5_5-shot_cot_htt.yaml
│ ├── clutrr
│ │ ├── symbolic
│ │ │ ├── gpt-4_0-shot_cot.yaml
│ │ │ ├── gpt-4_5-shot_cot.yaml
│ │ │ ├── gemini-pro_0-shot_cot.yaml
│ │ │ ├── gemini-pro_5-shot_cot.yaml
│ │ │ ├── gpt-3.5_0-shot_cot.yaml
│ │ │ ├── gpt-3.5_5-shot_cot.yaml
│ │ │ ├── gpt-4_5-shot_cot_htt.yaml
│ │ │ ├── gemini-pro_5-shot_cot_htt.yaml
│ │ │ └── gpt-3.5_5-shot_cot_htt.yaml
│ │ └── textual
│ │ │ ├── gpt-4_0-shot_cot.yaml
│ │ │ ├── gpt-4_5-shot_cot.yaml
│ │ │ ├── gemini-pro_0-shot_cot.yaml
│ │ │ ├── gemini-pro_5-shot_cot.yaml
│ │ │ ├── gpt-3.5_0-shot_cot.yaml
│ │ │ ├── gpt-3.5_5-shot_cot.yaml
│ │ │ ├── gpt-4_5-shot_cot_htt.yaml
│ │ │ ├── gpt-3.5_5-shot_cot_htt.yaml
│ │ │ └── gemini-pro_5-shot_cot_htt.yaml
│ └── list_functions
│ │ ├── gpt-4_0-shot_cot.yaml
│ │ ├── gpt-4_4-shot_cot.yaml
│ │ ├── gemini-pro_0-shot_cot.yaml
│ │ ├── gemini-pro_4-shot_cot.yaml
│ │ ├── gpt-3.5_0-shot_cot.yaml
│ │ ├── gpt-3.5_4-shot_cot.yaml
│ │ ├── gpt-4_4-shot_cot_htt.yaml
│ │ ├── gemini-pro_4-shot_cot_htt.yaml
│ │ └── gpt-3.5_4-shot_cot_htt.yaml
├── prompt
│ ├── arithmetic
│ │ ├── 0-shot_cot.yaml
│ │ ├── base-10
│ │ │ └── 5-shot_cot.yaml
│ │ ├── base-11
│ │ │ ├── 5-shot_cot.yaml
│ │ │ ├── 5-shot_cot_htt_train.yaml
│ │ │ └── 5-shot_cot_htt_test.yaml
│ │ ├── base-16
│ │ │ ├── 5-shot_cot.yaml
│ │ │ ├── 5-shot_cot_htt_train.yaml
│ │ │ └── 5-shot_cot_htt_test.yaml
│ │ └── base-9
│ │ │ ├── 5-shot_cot.yaml
│ │ │ ├── 5-shot_cot_htt_train.yaml
│ │ │ └── 5-shot_cot_htt_test.yaml
│ ├── clutrr
│ │ ├── textual
│ │ │ ├── 0-shot_cot.yaml
│ │ │ ├── 5-shot_cot.yaml
│ │ │ └── 5-shot_cot_htt_test.yaml
│ │ └── symbolic
│ │ │ ├── 0-shot_cot.yaml
│ │ │ ├── 5-shot_cot.yaml
│ │ │ ├── 5-shot_cot_htt_train.yaml
│ │ │ └── 5-shot_cot_htt_test.yaml
│ └── list_functions
│ │ ├── 0-shot_cot.yaml
│ │ ├── 4-shot_cot.yaml
│ │ ├── 4-shot_cot_htt_train.yaml
│ │ └── 4-shot_cot_htt_test.yaml
├── dataset
│ └── download.sh
└── checkpoint
│ ├── arithmetic
│ ├── base-9
│ │ ├── gpt-4_5-shot_cot_htt_2000.yaml
│ │ ├── gpt-3.5_5-shot_cot_htt_2000.yaml
│ │ └── gemini-pro_5-shot_cot_htt_2000.yaml
│ └── base-11
│ │ ├── gpt-3.5_5-shot_cot_htt_2000.yaml
│ │ └── gpt-4_5-shot_cot_htt_2000.yaml
│ └── clutrr
│ └── symbolic
│ ├── gpt-4_5-shot_cot_htt_2000.yaml
│ ├── gpt-3.5_5-shot_cot_htt_2000.yaml
│ └── gemini-pro_5-shot_cot_htt_2000.yaml
├── .gitignore
├── CONTRIBUTING.md
├── README.md
└── LICENSE
/source/requirements.txt:
--------------------------------------------------------------------------------
1 | nltk
2 | tqdm
3 | numpy
4 | jinja2
5 | pyyaml
6 | openai>=1.0
7 | tenacity
8 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | nltk
2 | tqdm
3 | numpy
4 | jinja2
5 | pyyaml
6 | openai
7 | tenacity
8 | tiktoken
9 | google-generativeai
10 |
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-10/gpt-4_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-10
3 | model: gpt-4
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-11/gpt-4_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-11
3 | model: gpt-4
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-16/gpt-4_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-16
3 | model: gpt-4
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-9/gpt-4_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-9
3 | model: gpt-4
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/symbolic/gpt-4_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gpt-4
4 | prompt: prompt/clutrr/symbolic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/symbolic/gpt-4_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gpt-4
4 | prompt: prompt/clutrr/symbolic/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/textual/gpt-4_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gpt-4
4 | prompt: prompt/clutrr/textual/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/textual/gpt-4_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gpt-4
4 | prompt: prompt/clutrr/textual/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-9/gpt-4_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-9
3 | model: gpt-4
4 | prompt: prompt/arithmetic/base-9/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-10/gemini-pro_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-10
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-10/gpt-3.5_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-10
3 | model: gpt-3.5-turbo
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-10/gpt-4_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-10
3 | model: gpt-4
4 | prompt: prompt/arithmetic/base-10/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-11/gemini-pro_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-11
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-11/gpt-4_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-11
3 | model: gpt-4
4 | prompt: prompt/arithmetic/base-11/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-16/gemini-pro_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-16
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-16/gpt-4_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-16
3 | model: gpt-4
4 | prompt: prompt/arithmetic/base-16/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-9/gemini-pro_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-9
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/symbolic/gemini-pro_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gemini-pro
4 | prompt: prompt/clutrr/symbolic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/symbolic/gemini-pro_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gemini-pro
4 | prompt: prompt/clutrr/symbolic/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/textual/gemini-pro_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gemini-pro
4 | prompt: prompt/clutrr/textual/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/textual/gemini-pro_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gemini-pro
4 | prompt: prompt/clutrr/textual/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/textual/gpt-3.5_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gpt-3.5-turbo
4 | prompt: prompt/clutrr/textual/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/textual/gpt-3.5_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gpt-3.5-turbo
4 | prompt: prompt/clutrr/textual/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/list_functions/gpt-4_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: list_functions
3 | model: gpt-4
4 | prompt: prompt/list_functions/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/list_functions/gpt-4_4-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: list_functions
3 | model: gpt-4
4 | prompt: prompt/list_functions/4-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-11/gpt-3.5_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-11
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-16/gpt-3.5_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-16
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-9/gemini-pro_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-9
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/base-9/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-9/gpt-3.5_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-9
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/arithmetic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/symbolic/gpt-3.5_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/clutrr/symbolic/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/clutrr/symbolic/gpt-3.5_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/clutrr/symbolic/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-10/gemini-pro_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-10
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/base-10/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-10/gpt-3.5_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-10
3 | model: gpt-3.5-turbo
4 | prompt: prompt/arithmetic/base-10/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-11/gemini-pro_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-11
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/base-11/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-16/gemini-pro_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-16
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/base-16/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-9/gpt-3.5_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-9
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/arithmetic/base-9/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/list_functions/gemini-pro_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: list_functions
3 | model: gemini-pro
4 | prompt: prompt/list_functions/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/list_functions/gemini-pro_4-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: list_functions
3 | model: gemini-pro
4 | prompt: prompt/list_functions/4-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-11/gpt-3.5_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-11
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/arithmetic/base-11/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-16/gpt-3.5_5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: base-16
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/arithmetic/base-16/5-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/list_functions/gpt-3.5_0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: list_functions
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/list_functions/0-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/config/list_functions/gpt-3.5_4-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: list_functions
3 | model: gpt-3.5-turbo-16k-0613
4 | prompt: prompt/list_functions/4-shot_cot.yaml
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Question: In base-{{ base }}, what is {{ query[0] }} + {{ query[1] }}?
3 | Answer:
4 | Let's think step by step.
5 |
6 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/clutrr/textual/0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Document: {{ document }}
3 | Question: {{ query[1] }} is {{ query[0] }}'s what?
4 | Answer:
5 | Let's think step by step.
6 |
7 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/config/clutrr/textual/gpt-4_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gpt-4
4 | prompt: prompt/clutrr/textual/5-shot_cot_htt_test.yaml
5 | library: checkpoint/clutrr/symbolic/gpt-4_5-shot_cot_htt_2000.yaml
6 | min_coverage: 2
7 | min_confidence: 0.7
--------------------------------------------------------------------------------
/artifacts/config/clutrr/textual/gpt-3.5_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gpt-3.5-turbo
4 | prompt: prompt/clutrr/textual/5-shot_cot_htt_test.yaml
5 | library: checkpoint/clutrr/symbolic/gpt-3.5_5-shot_cot_htt_2000.yaml
6 | min_coverage: 2
7 | min_confidence: 0.3
--------------------------------------------------------------------------------
/artifacts/prompt/clutrr/symbolic/0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Context: The relations on the path from {{ query[0] }} to {{ query[1] }} are {{ path | join(", ") }}.
3 | Question: {{ query[1] }} is {{ query[0] }}'s what?
4 | Answer:
5 | Let's think step by step.
6 |
7 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/config/clutrr/textual/gemini-pro_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | test:
2 | dataset: clutrr
3 | model: gemini-pro
4 | prompt: prompt/clutrr/textual/5-shot_cot_htt_test.yaml
5 | library: checkpoint/clutrr/symbolic/gemini-pro_5-shot_cot_htt_2000.yaml
6 | min_coverage: 2
7 | min_confidence: 0.3
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # Distribution / packaging
7 | .Python
8 | build/
9 | develop-eggs/
10 | dist/
11 | downloads/
12 | eggs/
13 | .eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | wheels/
20 | share/python-wheels/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | MANIFEST
25 |
--------------------------------------------------------------------------------
/artifacts/config/clutrr/symbolic/gpt-4_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: clutrr
3 | model: gpt-4
4 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: clutrr
8 | model: gpt-4
9 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_test.yaml
10 | library: checkpoint/clutrr/symbolic/gpt-4_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.7
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-9/gpt-4_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: base-9
3 | model: gpt-4
4 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: base-9
8 | model: gpt-4
9 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_test.yaml
10 | library: checkpoint/arithmetic/base-9/gpt-4_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.3
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-11/gpt-4_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: base-11
3 | model: gpt-4
4 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: base-11
8 | model: gpt-4
9 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_test.yaml
10 | library: checkpoint/arithmetic/base-11/gpt-4_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.3
--------------------------------------------------------------------------------
/artifacts/config/list_functions/gpt-4_4-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: list_functions
3 | model: gpt-4
4 | prompt: prompt/list_functions/4-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: list_functions
8 | model: gpt-4
9 | prompt: prompt/list_functions/4-shot_cot_htt_test.yaml
10 | library: checkpoint/list_functions/gpt-4_4-shot_cot_htt_5000.yaml
11 | min_coverage: 1
12 | min_confidence: 0.1
--------------------------------------------------------------------------------
/artifacts/config/clutrr/symbolic/gemini-pro_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: clutrr
3 | model: gemini-pro
4 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: clutrr
8 | model: gemini-pro
9 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_test.yaml
10 | library: checkpoint/clutrr/symbolic/gemini-pro_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.3
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-9/gemini-pro_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: base-9
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: base-9
8 | model: gemini-pro
9 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_test.yaml
10 | library: checkpoint/arithmetic/base-9/gemini-pro_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.5
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-11/gemini-pro_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: base-11
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: base-11
8 | model: gemini-pro
9 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_test.yaml
10 | library: checkpoint/arithmetic/base-11/gemini-pro_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.5
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-16/gemini-pro_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: base-16
3 | model: gemini-pro
4 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: base-16
8 | model: gemini-pro
9 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_test.yaml
10 | library: checkpoint/arithmetic/base-16/gemini-pro_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.3
--------------------------------------------------------------------------------
/artifacts/config/clutrr/symbolic/gpt-3.5_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: clutrr
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: clutrr
8 | model: gpt-3.5-turbo-0613
9 | prompt: prompt/clutrr/symbolic/5-shot_cot_htt_test.yaml
10 | library: checkpoint/clutrr/symbolic/gpt-3.5_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.3
--------------------------------------------------------------------------------
/artifacts/config/list_functions/gemini-pro_4-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: list_functions
3 | model: gemini-pro
4 | prompt: prompt/list_functions/4-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: list_functions
8 | model: gemini-pro
9 | prompt: prompt/list_functions/4-shot_cot_htt_test.yaml
10 | library: checkpoint/list_functions/gemini-pro_4-shot_cot_htt_5000.yaml
11 | min_coverage: 1
12 | min_confidence: 0.3
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-16/gpt-4_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: base-16
3 | model: gpt-4
4 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: base-16
8 | model: gpt-4
9 | max_tokens: 1000
10 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_test.yaml
11 | library: checkpoint/arithmetic/base-16/gpt-4_5-shot_cot_htt_2000.yaml
12 | min_coverage: 2
13 | min_confidence: 0.5
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-9/gpt-3.5_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: base-9
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: base-9
8 | model: gpt-3.5-turbo-16k-0613
9 | prompt: prompt/arithmetic/base-9/5-shot_cot_htt_test.yaml
10 | library: checkpoint/arithmetic/base-9/gpt-3.5_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.5
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-11/gpt-3.5_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: base-11
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: base-11
8 | model: gpt-3.5-turbo-16k-0613
9 | prompt: prompt/arithmetic/base-11/5-shot_cot_htt_test.yaml
10 | library: checkpoint/arithmetic/base-11/gpt-3.5_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.5
--------------------------------------------------------------------------------
/artifacts/config/arithmetic/base-16/gpt-3.5_5-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: base-16
3 | model: gpt-3.5-turbo-0613
4 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: base-16
8 | model: gpt-3.5-turbo-16k-0613
9 | prompt: prompt/arithmetic/base-16/5-shot_cot_htt_test.yaml
10 | library: checkpoint/arithmetic/base-16/gpt-3.5_5-shot_cot_htt_2000.yaml
11 | min_coverage: 2
12 | min_confidence: 0.3
--------------------------------------------------------------------------------
/artifacts/config/list_functions/gpt-3.5_4-shot_cot_htt.yaml:
--------------------------------------------------------------------------------
1 | train:
2 | dataset: list_functions
3 | model: gpt-3.5-turbo-16k-0613
4 | prompt: prompt/list_functions/4-shot_cot_htt_train.yaml
5 |
6 | test:
7 | dataset: list_functions
8 | model: gpt-3.5-turbo-16k-0613
9 | prompt: prompt/list_functions/4-shot_cot_htt_test.yaml
10 | library: checkpoint/list_functions/gpt-3.5_4-shot_cot_htt_5000.yaml
11 | min_coverage: 1
12 | min_confidence: 0.1
--------------------------------------------------------------------------------
/artifacts/prompt/list_functions/0-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | system: |
2 | Instruction: Infer the function behind the examples. Use the function to answer the questions.
3 |
4 | prompt: |
5 | Examples:
6 | {%- for i in range(train_queries | length) %}
7 | {{ train_queries[i] }} -> {{ train_answers[i] }}
8 | {%- endfor %}
9 | Questions:
10 | {%- for query in queries %}
11 | {{ query }} -> ?
12 | {%- endfor %}
13 | Answers:
14 | Let's think step by step.
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | ## Contributor License Agreement
4 |
5 | Contributions to this project must be accompanied by a Contributor License
6 | Agreement. You (or your employer) retain the copyright to your contribution,
7 | this simply gives us permission to use and redistribute your contributions as
8 | part of the project. Head over to to see
9 | your current agreements on file or to sign a new one.
10 |
11 | You generally only need to submit a CLA once, so if you've already submitted one
12 | (even if it was for a different project), you probably don't need to do it
13 | again.
14 |
15 | ## Code reviews
16 |
17 | All submissions, including submissions by project members, require review. We
18 | use GitHub pull requests for this purpose. Consult
19 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
20 | information on using pull requests.
21 |
22 | ## Community Guidelines
23 |
24 | This project follows [Google's Open Source Community
25 | Guidelines](https://opensource.google/conduct/).
26 |
--------------------------------------------------------------------------------
/artifacts/dataset/download.sh:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | !/usr/bin/env bash
17 |
18 | # CLUTRR
19 | wget "https://drive.google.com/u/2/uc?id=1SEq_e1IVCDDzsBIBhoUQ5pOVH5kxRoZF&export=download" -O clutrr.zip
20 | unzip -po clutrr.zip data_emnlp_final/data_089907f8.zip > data_089907f8.zip
21 | unzip -o data_089907f8.zip -d clutrr
22 | rm clutrr.zip data_089907f8.zip
23 |
24 | # Arithmetic
25 | URL=https://raw.githubusercontent.com/ZhaofengWu/counterfactual-evaluation/master/arithmetic/data
26 | for BASE in 9 10 11 16
27 | do
28 | mkdir -p arithmetic/base-${BASE}
29 | wget ${URL}/0shot/base${BASE}.txt -O - | head -n 900 > arithmetic/base-${BASE}/2_train.txt
30 | wget ${URL}/0shot/base${BASE}.txt -O - | tail -n 100 > arithmetic/base-${BASE}/2_test.txt
31 | wget ${URL}/0shot_3digits/base${BASE}.txt -O - | tail -n 100 > arithmetic/base-${BASE}/3_test.txt
32 | wget ${URL}/0shot_4digits/base${BASE}.txt -O - | tail -n 100 > arithmetic/base-${BASE}/4_test.txt
33 | done
34 |
35 | # List Functions
36 | URL=https://raw.githubusercontent.com/google/BIG-bench/main/bigbench/benchmark_tasks/list_functions/
37 | mkdir list_functions
38 | for i in {1..250}
39 | do
40 | id=$(printf "%03d" ${i})
41 | wget ${URL}/c${id}/task.json -O list_functions/c${id}.json
42 | done
--------------------------------------------------------------------------------
/artifacts/prompt/clutrr/symbolic/5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Context: The relations on the path from Alan to Anthony are daughter, uncle, son.
3 | Question: Anthony is Alan's what?
4 | Answer:
5 | For daughter's uncle, we have daughter's uncle is brother. So the relations are reduced to brother, son.
6 | For brother's son, we have brother's son is nephew. So the relations are reduced to nephew.
7 | Therefore, the answer is nephew.
8 |
9 | Context: The relations on the path from Annie to Carlos are brother, mother, son.
10 | Question: Carlos is Annie's what?
11 | Answer:
12 | For brother's mother, we have brother's mother is mother. So the relations are reduced to mother, son.
13 | For mother's son, we have mother's son is brother. So the relations are reduced to brother.
14 | Therefore, the answer is brother.
15 |
16 | Context: The relations on the path from Beverly to Michelle are father, daughter, aunt.
17 | Question: Michelle is Beverly's what?
18 | Answer:
19 | For father's daughter, we have father's daughter is sister. So the relations are reduced to sister, aunt.
20 | For sister's aunt, we have sister's aunt is aunt. So the relations are reduced to aunt.
21 | Therefore, the answer is aunt.
22 |
23 | Context: The relations on the path from Lee to Jeanna are father, daughter, sister.
24 | Question: Jeanna is Lee's what?
25 | Answer:
26 | For father's daughter, we have father's daughter is sister. So the relations are reduced to sister, sister.
27 | For sister's sister, we have sister's sister is sister. So the relations are reduced to sister.
28 | Therefore, the answer is sister.
29 |
30 | Context: The relations on the path from Craig to Molly are sister, father, mother.
31 | Question: Molly is Craig's what?
32 | Answer:
33 | For sister's father, we have sister's father is father. So the relations are reduced to father, mother.
34 | For father's mother, we have father's mother is grandmother. So the relations are reduced to grandmother.
35 | Therefore, the answer is grandmother.
36 |
37 | Context: The relations on the path from {{ query[0] }} to {{ query[1] }} are {{ path | join(", ") }}.
38 | Question: {{ query[1] }} is {{ query[0] }}'s what?
39 | Answer:
40 |
41 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/clutrr/symbolic/5-shot_cot_htt_train.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Context: The relations on the path from Alan to Anthony are daughter, uncle, son.
3 | Question: Anthony is Alan's what?
4 | Answer:
5 | For daughter's uncle, we have daughter's uncle is brother. So the relations are reduced to brother, son.
6 | For brother's son, we have brother's son is nephew. So the relations are reduced to nephew.
7 | Therefore, the answer is nephew.
8 |
9 | Context: The relations on the path from Annie to Carlos are brother, mother, son.
10 | Question: Carlos is Annie's what?
11 | Answer:
12 | For brother's mother, we have brother's mother is mother. So the relations are reduced to mother, son.
13 | For mother's son, we have mother's son is brother. So the relations are reduced to brother.
14 | Therefore, the answer is brother.
15 |
16 | Context: The relations on the path from Beverly to Michelle are father, daughter, aunt.
17 | Question: Michelle is Beverly's what?
18 | Answer:
19 | For father's daughter, we have father's daughter is sister. So the relations are reduced to sister, aunt.
20 | For sister's aunt, we have sister's aunt is aunt. So the relations are reduced to aunt.
21 | Therefore, the answer is aunt.
22 |
23 | Context: The relations on the path from Lee to Jeanna are father, daughter, sister.
24 | Question: Jeanna is Lee's what?
25 | Answer:
26 | For father's daughter, we have father's daughter is sister. So the relations are reduced to sister, sister.
27 | For sister's sister, we have sister's sister is sister. So the relations are reduced to sister.
28 | Therefore, the answer is sister.
29 |
30 | Context: The relations on the path from Craig to Molly are sister, father, mother.
31 | Question: Molly is Craig's what?
32 | Answer:
33 | For sister's father, we have sister's father is father. So the relations are reduced to father, mother.
34 | For father's mother, we have father's mother is grandmother. So the relations are reduced to grandmother.
35 | Therefore, the answer is grandmother.
36 |
37 | Context: The relations on the path from {{ query[0] }} to {{ query[1] }} are {{ path | join(", ") }}.
38 | Question: {{ query[1] }} is {{ query[0] }}'s what?
39 | Answer:
40 |
41 | pattern: '[a-z\-]+''s [a-z\-]+ is [a-z\-]+\.'
42 |
43 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/base-10/5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Question: In base-10, what is 76 + 14?
3 | Answer:
4 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1.
5 | There is no carry. 6 + 4 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0.
6 | The carry is 1. 7 + 1 + 1 = 9. 9 is 0, 9. So we clear the carry. Prepend 9 to the answer. So far the answer has 2 digits: 9, 0.
7 | There is no carry. So far the answer has 2 digits: 9, 0.
8 | Therefore, the answer is 90.
9 |
10 | Question: In base-10, what is 97 + 74?
11 | Answer:
12 | 97 is 9, 7. 74 is 7, 4. So the steps are 7 + 4, 9 + 7.
13 | There is no carry. 7 + 4 = 11. 11 is 1, 1. So we set the carry to 1. Prepend 1 to the answer. So far the answer has 1 digit: 1.
14 | The carry is 1. 9 + 7 + 1 = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 2 digits: 7, 1.
15 | The carry is 1. So far the answer has 3 digits: 1, 7, 1.
16 | Therefore, the answer is 171.
17 |
18 | Question: In base-10, what is 85 + 48?
19 | Answer:
20 | 85 is 8, 5. 48 is 4, 8. So the steps are 5 + 8, 8 + 4.
21 | There is no carry. 5 + 8 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 1 digit: 3.
22 | The carry is 1. 8 + 4 + 1 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, 3.
23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, 3.
24 | Therefore, the answer is 133.
25 |
26 | Question: In base-10, what is 34 + 31?
27 | Answer:
28 | 34 is 3, 4. 31 is 3, 1. So the steps are 4 + 1, 3 + 3.
29 | There is no carry. 4 + 1 = 5. 5 is 0, 5. So we clear the carry. Prepend 5 to the answer. So far the answer has 1 digit: 5.
30 | There is no carry. 3 + 3 = 6. 6 is 0, 6. So we clear the carry. Prepend 6 to the answer. So far the answer has 2 digits: 6, 5.
31 | There is no carry. So far the answer has 2 digits: 6, 5.
32 | Therefore, the answer is 65.
33 |
34 | Question: In base-10, what is 58 + 34?
35 | Answer:
36 | 58 is 5, 8. 34 is 3, 4. So the steps are 8 + 4, 5 + 3.
37 | There is no carry. 8 + 4 = 12. 12 is 1, 2. So we set the carry to 1. Prepend 2 to the answer. So far the answer has 1 digit: 2.
38 | The carry is 1. 5 + 3 + 1 = 9. 9 is 0, 9. So we clear the carry. Prepend 9 to the answer. So far the answer has 2 digits: 9, 2.
39 | There is no carry. So far the answer has 2 digits: 9, 2.
40 | Therefore, the answer is 92.
41 |
42 | Question: In base-10, what is {{ query[0] }} + {{ query[1] }}?
43 | Answer:
44 |
45 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/base-11/5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Question: In base-11, what is 76 + 14?
3 | Answer:
4 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1.
5 | There is no carry. 6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A.
6 | There is no carry. 7 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, A.
7 | There is no carry. So far the answer has 2 digits: 8, A.
8 | Therefore, the answer is 8A.
9 |
10 | Question: In base-11, what is 97 + 74?
11 | Answer:
12 | 97 is 9, 7. 74 is 7, 4. So the steps are 7 + 4, 9 + 7.
13 | There is no carry. 7 + 4 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0.
14 | The carry is 1. 9 + 7 + 1 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 2 digits: 6, 0.
15 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 6, 0.
16 | Therefore, the answer is 160.
17 |
18 | Question: In base-11, what is 85 + A3?
19 | Answer:
20 | 85 is 8, 5. A3 is A, 3. So the steps are 5 + 3, 8 + A.
21 | There is no carry. 5 + 3 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 1 digit: 8.
22 | There is no carry. 8 + A = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 2 digits: 7, 8.
23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 7, 8.
24 | Therefore, the answer is 178.
25 |
26 | Question: In base-11, what is 92 + 52?
27 | Answer:
28 | 92 is 9, 2. 52 is 5, 2. So the steps are 2 + 2, 9 + 5.
29 | There is no carry. 2 + 2 = 4. 4 is 0, 4. So we clear the carry. Prepend 4 to the answer. So far the answer has 1 digit: 4.
30 | There is no carry. 9 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, 4.
31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, 4.
32 | Therefore, the answer is 134.
33 |
34 | Question: In base-11, what is 29 + 58?
35 | Answer:
36 | 29 is 2, 9. 58 is 5, 8. So the steps are 9 + 8, 2 + 5.
37 | There is no carry. 9 + 8 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 1 digit: 6.
38 | The carry is 1. 2 + 5 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, 6.
39 | There is no carry. So far the answer has 2 digits: 8, 6.
40 | Therefore, the answer is 86.
41 |
42 | Question: In base-11, what is {{ query[0] }} + {{ query[1] }}?
43 | Answer:
44 |
45 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/base-16/5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Question: In base-16, what is EC + DD?
3 | Answer:
4 | EC is E, C. DD is D, D. So the steps are C + D, E + D.
5 | There is no carry. C + D = 19. 19 is 1, 9. So we set the carry to 1. Prepend 9 to the answer. So far the answer has 1 digit: 9.
6 | The carry is 1. E + D + 1 = 1C. 1C is 1, C. So we set the carry to 1. Prepend C to the answer. So far the answer has 2 digits: C, 9.
7 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, C, 9.
8 | Therefore, the answer is 1C9.
9 |
10 | Question: In base-16, what is 18 + 9F?
11 | Answer:
12 | 18 is 1, 8. 9F is 9, F. So the steps are 8 + F, 1 + 9.
13 | There is no carry. 8 + F = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 1 digit: 7.
14 | The carry is 1. 1 + 9 + 1 = B. B is 0, B. So we clear the carry. Prepend B to the answer. So far the answer has 2 digits: B, 7.
15 | There is no carry. So far the answer has 2 digits: B, 7.
16 | Therefore, the answer is B7.
17 |
18 | Question: In base-16, what is 79 + 8B?
19 | Answer:
20 | 79 is 7, 9. 8B is 8, B. So the steps are 9 + B, 7 + 8.
21 | There is no carry. 9 + B = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4.
22 | The carry is 1. 7 + 8 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 4.
23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 4.
24 | Therefore, the answer is 104.
25 |
26 | Question: In base-16, what is A6 + 94?
27 | Answer:
28 | A6 is A, 6. 94 is 9, 4. So the steps are 6 + 4, A + 9.
29 | There is no carry. 6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A.
30 | There is no carry. A + 9 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, A.
31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, A.
32 | Therefore, the answer is 13A.
33 |
34 | Question: In base-16, what is 54 + D3?
35 | Answer:
36 | 54 is 5, 4. D3 is D, 3. So the steps are 4 + 3, 5 + D.
37 | There is no carry. 4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7.
38 | There is no carry. 5 + D = 12. 12 is 1, 2. So we set the carry to 1. Prepend 2 to the answer. So far the answer has 2 digits: 2, 7.
39 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 2, 7.
40 | Therefore, the answer is 127.
41 |
42 | Question: In base-16, what is {{ query[0] }} + {{ query[1] }}?
43 | Answer:
44 |
45 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/base-9/5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Question: In base-9, what is 76 + 14?
3 | Answer:
4 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1.
5 | There is no carry. 6 + 4 = 11. 11 is 1, 1. So we set the carry to 1. Prepend 1 to the answer. So far the answer has 1 digit: 1.
6 | The carry is 1. 7 + 1 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 1.
7 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 1.
8 | Therefore, the answer is 101.
9 |
10 | Question: In base-9, what is 86 + 57?
11 | Answer:
12 | 86 is 8, 6. 57 is 5, 7. So the steps are 6 + 7, 8 + 5.
13 | There is no carry. 6 + 7 = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4.
14 | The carry is 1. 8 + 5 + 1 = 15. 15 is 1, 5. So we set the carry to 1. Prepend 5 to the answer. So far the answer has 2 digits: 5, 4.
15 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 5, 4.
16 | Therefore, the answer is 154.
17 |
18 | Question: In base-9, what is 63 + 34?
19 | Answer:
20 | 63 is 6, 3. 34 is 3, 4. So the steps are 3 + 4, 6 + 3.
21 | There is no carry. 4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7.
22 | There is no carry. 6 + 3 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 7.
23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 7.
24 | Therefore, the answer is 107.
25 |
26 | Question: In base-9, what is 31 + 58?
27 | Answer:
28 | 31 is 3, 1. 58 is 5, 8. So the steps are 1 + 8, 3 + 5.
29 | There is no carry. 1 + 8 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0.
30 | The carry is 1. 3 + 5 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 0.
31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 0.
32 | Therefore, the answer is 100.
33 |
34 | Question: In base-9, what is 67 + 25?
35 | Answer:
36 | 67 is 6, 7. 25 is 2, 5. So the steps are 7 + 5, 6 + 2.
37 | There is no carry. 7 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 1 digit: 3.
38 | The carry is 1. 6 + 2 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 3.
39 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 3.
40 | Therefore, the answer is 103.
41 |
42 | Question: In base-9, what is {{ query[0] }} + {{ query[1] }}?
43 | Answer:
44 |
45 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/base-11/5-shot_cot_htt_train.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Question: In base-11, what is 76 + 14?
3 | Answer:
4 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1.
5 | There is no carry. 6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A.
6 | There is no carry. 7 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, A.
7 | There is no carry. So far the answer has 2 digits: 8, A.
8 | Therefore, the answer is 8A.
9 |
10 | Question: In base-11, what is 97 + 74?
11 | Answer:
12 | 97 is 9, 7. 74 is 7, 4. So the steps are 7 + 4, 9 + 7.
13 | There is no carry. 7 + 4 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0.
14 | The carry is 1. 9 + 7 + 1 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 2 digits: 6, 0.
15 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 6, 0.
16 | Therefore, the answer is 160.
17 |
18 | Question: In base-11, what is 85 + A3?
19 | Answer:
20 | 85 is 8, 5. A3 is A, 3. So the steps are 5 + 3, 8 + A.
21 | There is no carry. 5 + 3 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 1 digit: 8.
22 | There is no carry. 8 + A = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 2 digits: 7, 8.
23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 7, 8.
24 | Therefore, the answer is 178.
25 |
26 | Question: In base-11, what is 92 + 52?
27 | Answer:
28 | 92 is 9, 2. 52 is 5, 2. So the steps are 2 + 2, 9 + 5.
29 | There is no carry. 2 + 2 = 4. 4 is 0, 4. So we clear the carry. Prepend 4 to the answer. So far the answer has 1 digit: 4.
30 | There is no carry. 9 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, 4.
31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, 4.
32 | Therefore, the answer is 134.
33 |
34 | Question: In base-11, what is 29 + 58?
35 | Answer:
36 | 29 is 2, 9. 58 is 5, 8. So the steps are 9 + 8, 2 + 5.
37 | There is no carry. 9 + 8 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 1 digit: 6.
38 | The carry is 1. 2 + 5 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, 6.
39 | There is no carry. So far the answer has 2 digits: 8, 6.
40 | Therefore, the answer is 86.
41 |
42 | Question: In base-11, what is {{ query[0] }} + {{ query[1] }}?
43 | Answer:
44 |
45 | pattern: '[A-Z0-9]+(?: \+ [A-Z0-9]+)+ = [A-Z0-9]+\.'
46 |
47 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/base-16/5-shot_cot_htt_train.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Question: In base-16, what is EC + DD?
3 | Answer:
4 | EC is E, C. DD is D, D. So the steps are C + D, E + D.
5 | There is no carry. C + D = 19. 19 is 1, 9. So we set the carry to 1. Prepend 9 to the answer. So far the answer has 1 digit: 9.
6 | The carry is 1. E + D + 1 = 1C. 1C is 1, C. So we set the carry to 1. Prepend C to the answer. So far the answer has 2 digits: C, 9.
7 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, C, 9.
8 | Therefore, the answer is 1C9.
9 |
10 | Question: In base-16, what is 18 + 9F?
11 | Answer:
12 | 18 is 1, 8. 9F is 9, F. So the steps are 8 + F, 1 + 9.
13 | There is no carry. 8 + F = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 1 digit: 7.
14 | The carry is 1. 1 + 9 + 1 = B. B is 0, B. So we clear the carry. Prepend B to the answer. So far the answer has 2 digits: B, 7.
15 | There is no carry. So far the answer has 2 digits: B, 7.
16 | Therefore, the answer is B7.
17 |
18 | Question: In base-16, what is 79 + 8B?
19 | Answer:
20 | 79 is 7, 9. 8B is 8, B. So the steps are 9 + B, 7 + 8.
21 | There is no carry. 9 + B = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4.
22 | The carry is 1. 7 + 8 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 4.
23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 4.
24 | Therefore, the answer is 104.
25 |
26 | Question: In base-16, what is A6 + 94?
27 | Answer:
28 | A6 is A, 6. 94 is 9, 4. So the steps are 6 + 4, A + 9.
29 | There is no carry. 6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A.
30 | There is no carry. A + 9 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, A.
31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, A.
32 | Therefore, the answer is 13A.
33 |
34 | Question: In base-16, what is 54 + D3?
35 | Answer:
36 | 54 is 5, 4. D3 is D, 3. So the steps are 4 + 3, 5 + D.
37 | There is no carry. 4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7.
38 | There is no carry. 5 + D = 12. 12 is 1, 2. So we set the carry to 1. Prepend 2 to the answer. So far the answer has 2 digits: 2, 7.
39 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 2, 7.
40 | Therefore, the answer is 127.
41 |
42 | Question: In base-16, what is {{ query[0] }} + {{ query[1] }}?
43 | Answer:
44 |
45 | pattern: '[A-Z0-9]+(?: \+ [A-Z0-9]+)+ = [A-Z0-9]+\.'
46 |
47 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/base-9/5-shot_cot_htt_train.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Question: In base-9, what is 76 + 14?
3 | Answer:
4 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1.
5 | There is no carry. 6 + 4 = 11. 11 is 1, 1. So we set the carry to 1. Prepend 1 to the answer. So far the answer has 1 digit: 1.
6 | The carry is 1. 7 + 1 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 1.
7 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 1.
8 | Therefore, the answer is 101.
9 |
10 | Question: In base-9, what is 86 + 57?
11 | Answer:
12 | 86 is 8, 6. 57 is 5, 7. So the steps are 6 + 7, 8 + 5.
13 | There is no carry. 6 + 7 = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4.
14 | The carry is 1. 8 + 5 + 1 = 15. 15 is 1, 5. So we set the carry to 1. Prepend 5 to the answer. So far the answer has 2 digits: 5, 4.
15 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 5, 4.
16 | Therefore, the answer is 154.
17 |
18 | Question: In base-9, what is 63 + 34?
19 | Answer:
20 | 63 is 6, 3. 34 is 3, 4. So the steps are 3 + 4, 6 + 3.
21 | There is no carry. 4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7.
22 | There is no carry. 6 + 3 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 7.
23 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 7.
24 | Therefore, the answer is 107.
25 |
26 | Question: In base-9, what is 31 + 58?
27 | Answer:
28 | 31 is 3, 1. 58 is 5, 8. So the steps are 1 + 8, 3 + 5.
29 | There is no carry. 1 + 8 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0.
30 | The carry is 1. 3 + 5 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 0.
31 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 0.
32 | Therefore, the answer is 100.
33 |
34 | Question: In base-9, what is 67 + 25?
35 | Answer:
36 | 67 is 6, 7. 25 is 2, 5. So the steps are 7 + 5, 6 + 2.
37 | There is no carry. 7 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 1 digit: 3.
38 | The carry is 1. 6 + 2 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 3.
39 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 3.
40 | Therefore, the answer is 103.
41 |
42 | Question: In base-9, what is {{ query[0] }} + {{ query[1] }}?
43 | Answer:
44 |
45 | pattern: '[A-Z0-9]+(?: \+ [A-Z0-9]+)+ = [A-Z0-9]+\.'
46 |
47 | return_last: yes
--------------------------------------------------------------------------------
/source/util.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Utilities for training and testing."""
17 |
18 | import argparse
19 | from collections.abc import Mapping
20 | import logging
21 | import os
22 | import sys
23 | import time
24 | from typing import Any
25 |
26 |
27 | class _DebugHook(object):
28 | instance = None
29 |
30 | def __call__(self, *args, **kwargs):
31 | if self.instance is None:
32 | from IPython.core import ultratb # pylint: disable=g-import-not-at-top
33 | self.instance = ultratb.FormattedTB(
34 | mode="Plain", color_scheme="Linux", call_pdb=1)
35 | return self.instance(*args, **kwargs)
36 |
37 |
38 | sys.excepthook = _DebugHook()
39 |
40 |
41 | def parse_args() -> argparse.Namespace:
42 | """Parse command line arguments."""
43 |
44 | parser = argparse.ArgumentParser("")
45 | parser.add_argument(
46 | "-a", "--artifacts", default="artifacts",
47 | help="folder for all the artifacts", required=False)
48 | parser.add_argument("-c", "--config",
49 | help="yaml configuration file", required=True)
50 | parser.add_argument("-s", "--split",
51 | help="data split to train / test on", default=None)
52 | parser.add_argument("-n", "--num-iteration",
53 | help="number of training iterations",
54 | type=int, default=2000)
55 | parser.add_argument("-o", "--output-dir",
56 | help="directory to store logs and checkpoints",
57 | default="experiment/")
58 | return parser.parse_args()
59 |
60 |
61 | def create_working_directory(
62 | args: argparse.Namespace,
63 | cfg: Mapping[str, Any],
64 | ) -> str:
65 | """Creates a working directory.
66 |
67 | Args:
68 | args: args
69 | cfg: config dict
70 | Returns:
71 | working directory
72 | """
73 | config = os.path.splitext(os.path.basename(args.config))[0]
74 | time_str = time.strftime("%Y-%m-%d-%H-%M-%S")
75 | working_dir = os.path.join(args.output_dir, cfg["dataset"],
76 | f"{config}_{args.split}_{time_str}")
77 | os.makedirs(working_dir)
78 | return working_dir
79 |
80 |
81 | def create_logger(working_dir: str) -> logging.Logger:
82 | """Create a logger with both stream and file handlers.
83 |
84 | Args:
85 | working_dir: working directory
86 | Returns:
87 | logger
88 | """
89 | logger = logging.getLogger("")
90 | logger.setLevel(logging.INFO)
91 | handler = logging.StreamHandler()
92 | logger.addHandler(handler)
93 | log_file = os.path.join(working_dir, "log.txt")
94 | handler = logging.FileHandler(log_file)
95 | logger.addHandler(handler)
96 | return logger
97 |
--------------------------------------------------------------------------------
/artifacts/prompt/clutrr/symbolic/5-shot_cot_htt_test.yaml:
--------------------------------------------------------------------------------
1 | system: |
2 | Instruction: When you answer the questions, try to use the provided knowledge whenever possible. Try not to invent knowledge by yourself unless necessary.
3 | Knowledge:
4 | {%- set key_rules = {} -%}
5 | {%- for rule in rules -%}
6 | {%- set tokens = rule.split(" ") -%}
7 | {%- set _ = key_rules.update({(tokens[0].split("'")[0], tokens[1]): rule}) -%}
8 | {%- endfor -%}
9 | {%- set global = namespace(old =["", ""]) -%}
10 | {%- for key, rule in key_rules | dictsort -%}
11 | {%- if global.old[1] and global.old != key -%}
12 | {{ global.old[1] }}>
13 | {%- endif -%}
14 | {%- if global.old[0] and global.old[0] != key[0] -%}
15 | {{ global.old[0] }}>
16 | {%- endif -%}
17 | {%- if global.old[0] != key[0] %}
18 | <{{ key[0] }}>
19 | {%- endif -%}
20 | {%- if global.old != key -%}
21 | <{{ key[1] }}>
22 | {%- endif -%}
23 | {{- rule -}}
24 | {%- set global.old = key -%}
25 | {%- endfor -%}
26 | {{ global.old[1] }}>{{ global.old[0] }}>
27 |
28 | prompt: |
29 | Context: The relations on the path from Alan to Anthony are daughter, uncle, son.
30 | Question: Anthony is Alan's what?
31 | Answer:
32 | For daughter's uncle, we retrieve daughter's uncle is brother. So the relations are reduced to brother, son.
33 | For brother's son, we retrieve brother's son is nephew. So the relations are reduced to nephew.
34 | Therefore, the answer is nephew.
35 |
36 | Context: The relations on the path from Annie to Carlos are brother, mother, son.
37 | Question: Carlos is Annie's what?
38 | Answer:
39 | For brother's mother, we retrieve brother's mother is mother. So the relations are reduced to mother, son.
40 | For mother's son, we retrieve mother's son is brother. So the relations are reduced to brother.
41 | Therefore, the answer is brother.
42 |
43 | Context: The relations on the path from Beverly to Michelle are father, daughter, aunt.
44 | Question: Michelle is Beverly's what?
45 | Answer:
46 | For father's daughter, we retrieve father's daughter is sister. So the relations are reduced to sister, aunt.
47 | For sister's aunt, we retrieve sister's aunt is aunt. So the relations are reduced to aunt.
48 | Therefore, the answer is aunt.
49 |
50 | Context: The relations on the path from Lee to Jeanna are father, daughter, sister.
51 | Question: Jeanna is Lee's what?
52 | Answer:
53 | For father's daughter, we retrieve father's daughter is sister. So the relations are reduced to sister, sister.
54 | For sister's sister, we retrieve sister's sister is sister. So the relations are reduced to sister.
55 | Therefore, the answer is sister.
56 |
57 | Context: The relations on the path from Craig to Molly are sister, father, mother.
58 | Question: Molly is Craig's what?
59 | Answer:
60 | For sister's father, we retrieve sister's father is father. So the relations are reduced to father, mother.
61 | For father's mother, we retrieve father's mother is grandmother. So the relations are reduced to grandmother.
62 | Therefore, the answer is grandmother.
63 |
64 | Context: The relations on the path from {{ query[0] }} to {{ query[1] }} are {{ path | join(", ") }}.
65 | Question: {{ query[1] }} is {{ query[0] }}'s what?
66 | Answer:
67 |
68 | return_last: yes
--------------------------------------------------------------------------------
/source/train.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Training, i.e., rule learning file."""
17 |
18 | import os
19 | import pprint
20 | import random
21 |
22 | import datasets
23 | import models
24 | import prompts
25 | import tqdm
26 | import util
27 | import yaml
28 |
29 |
30 | def main():
31 | random.seed(0)
32 | args = util.parse_args()
33 | args.split = args.split or "train"
34 |
35 | with open(args.config, "r") as fin:
36 | cfg = yaml.safe_load(fin.read())
37 | cfg = cfg[args.split]
38 |
39 | working_dir = util.create_working_directory(args, cfg)
40 | logger = util.create_logger(working_dir)
41 | logger.warning(pprint.pformat(vars(args)))
42 | logger.warning(pprint.pformat(cfg))
43 |
44 | if cfg["dataset"] == "clutrr":
45 | dataset = datasets.CLUTRR(os.path.join(args.artifacts, "dataset/clutrr"))
46 | elif cfg["dataset"].startswith("base-"):
47 | base = int(cfg["dataset"][5:])
48 | dataset = datasets.Arithmetic(
49 | os.path.join(args.artifacts, "dataset/arithmetic"), base=base)
50 | elif cfg["dataset"] == "list_functions":
51 | dataset = datasets.ListFunctions(
52 | os.path.join(args.artifacts, "dataset/list_functions"))
53 | else:
54 | raise ValueError(f"Unknown dataset `{cfg['datasets']}`")
55 |
56 | train_set = dataset.get_split(args.split)
57 | max_tokens = cfg.get("max_tokens", 2000)
58 | if cfg["model"].startswith("gpt"):
59 | model = models.GPT(cfg["model"], max_tokens=max_tokens)
60 | elif cfg["model"].startswith("gemini"):
61 | model = models.Gemini(cfg["model"], max_tokens=max_tokens)
62 | else:
63 | raise ValueError(f"Unknown model `{cfg['model']}`")
64 | function = prompts.PromptFunction.from_yaml(
65 | os.path.join(args.artifacts, cfg["prompt"]))
66 | library = prompts.RuleLibrary()
67 |
68 | num_epoch = args.num_iteration // len(train_set)
69 | train_set = train_set * num_epoch + random.sample(
70 | train_set, args.num_iteration % len(train_set))
71 |
72 | total_cost = 0
73 | num_iteration = 0
74 | for sample in tqdm.tqdm(train_set):
75 | truth = sample["answer"]
76 | pred, cost, rules = function(model, sample)
77 | logger.warning("rules:")
78 | for rule in rules:
79 | logger.warning(rule)
80 | acc = dataset.evaluate(truth, pred)
81 |
82 | if "concept" in sample:
83 | concept = sample["concept"]
84 | rules = [f"[{concept}] {rule}" for rule in rules]
85 | library.update(rules, acc)
86 | total_cost += cost
87 | logger.warning("truth: %s, pred: %s, accuracy: %s", truth, pred, acc)
88 | logger.warning("total cost: %s", total_cost)
89 |
90 | num_iteration += 1
91 | if num_iteration % 100 == 0 or num_iteration == args.num_iteration:
92 | save_file = os.path.join(working_dir, f"library_{num_iteration}.yaml")
93 | library.save(save_file)
94 | logger.warning("Save the rule library to `%s`", save_file)
95 |
96 |
97 | if __name__ == "__main__":
98 | main()
99 |
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/base-11/5-shot_cot_htt_test.yaml:
--------------------------------------------------------------------------------
1 | system: |
2 | Instruction: When you answer the questions, try to use the provided knowledge whenever possible. Try not to invent knowledge by yourself unless necessary.
3 | Knowledge:
4 | {%- set key_rules = {} -%}
5 | {%- for rule in rules -%}
6 | {%- set tokens = rule.split("=")[0].split("+") -%}
7 | {%- set _ = key_rules.update({(tokens | length, tokens[0] | trim, tokens[1] | trim): rule}) -%}
8 | {%- endfor -%}
9 | {%- set global = namespace(old=["", "", ""]) -%}
10 | {%- for key, rule in key_rules | dictsort -%}
11 | {%- if key[0] > 2 -%}
12 | {%- set key = ["carry", key[1], key[2]] -%}
13 | {%- else -%}
14 | {%- set key = ["no_carry", key[1], key[2]] -%}
15 | {%- endif -%}
16 | {%- if global.old[2] and global.old != key -%}
17 | {{ global.old[2] }}>
18 | {%- endif -%}
19 | {%- if global.old[1] and global.old[:2] != key[:2] -%}
20 | {{ global.old[1] }}>
21 | {%- endif -%}
22 | {%- if global.old[0] and global.old[0] != key[0] %}
23 | {{ global.old[0] }}>
24 | {%- endif -%}
25 | {%- if global.old[0] != key[0] %}
26 | <{{ key[0] }}>
27 | {%- endif -%}
28 | {%- if global.old[:2] != key[:2] %}
29 | <{{ key[1] }}>
30 | {%- endif -%}
31 | {%- if global.old != key -%}
32 | <{{ key[2] }}>
33 | {%- endif -%}
34 | {{- rule -}}
35 | {%- set global.old = key -%}
36 | {%- endfor -%}
37 | {{ global.old[2] }}>{{ global.old[1] }}>
38 | {{ global.old[0] }}>
39 |
40 | prompt: |
41 | Question: In base-11, what is 76 + 14?
42 | Answer:
43 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1.
44 | There is no carry. <6><4>6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A.
45 | There is no carry. <7><1>7 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, A.
46 | There is no carry. So far the answer has 2 digits: 8, A.
47 | Therefore, the answer is 8A.
48 |
49 | Question: In base-11, what is 97 + 74?
50 | Answer:
51 | 97 is 9, 7. 74 is 7, 4. So the steps are 7 + 4, 9 + 7.
52 | There is no carry. <7><4>7 + 4 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0.
53 | The carry is 1. <9><7>9 + 7 + 1 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 2 digits: 6, 0.
54 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 6, 0.
55 | Therefore, the answer is 160.
56 |
57 | Question: In base-11, what is 85 + A3?
58 | Answer:
59 | 85 is 8, 5. A3 is A, 3. So the steps are 5 + 3, 8 + A.
60 | There is no carry. <5><3>5 + 3 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 1 digit: 8.
61 | There is no carry. <8>8 + A = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 2 digits: 7, 8.
62 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 7, 8.
63 | Therefore, the answer is 178.
64 |
65 | Question: In base-11, what is 92 + 52?
66 | Answer:
67 | 92 is 9, 2. 52 is 5, 2. So the steps are 2 + 2, 9 + 5.
68 | There is no carry. <2><2>2 + 2 = 4. 4 is 0, 4. So we clear the carry. Prepend 4 to the answer. So far the answer has 1 digit: 4.
69 | There is no carry. <9><5>9 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, 4.
70 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, 4.
71 | Therefore, the answer is 134.
72 |
73 | Question: In base-11, what is 29 + 58?
74 | Answer:
75 | 29 is 2, 9. 58 is 5, 8. So the steps are 9 + 8, 2 + 5.
76 | There is no carry. <9><8>9 + 8 = 16. 16 is 1, 6. So we set the carry to 1. Prepend 6 to the answer. So far the answer has 1 digit: 6.
77 | The carry is 1. <2><5>2 + 5 + 1 = 8. 8 is 0, 8. So we clear the carry. Prepend 8 to the answer. So far the answer has 2 digits: 8, 6.
78 | There is no carry. So far the answer has 2 digits: 8, 6.
79 | Therefore, the answer is 86.
80 |
81 | Question: In base-11, what is {{ query[0] }} + {{ query[1] }}?
82 | Answer:
83 |
84 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/list_functions/4-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | system: |
2 | Instruction: Infer the function behind the examples. Use the function to answer the questions.
3 |
4 | prompt: |
5 | Examples:
6 | [0, 8, 5, 2, 7, 1, 4, 6, 9, 3] -> [3, 8, 5, 2, 7, 1, 4, 6, 9, 3]
7 | [4, 0, 1] -> [1, 0, 1]
8 | [6, 1, 7, 5, 3, 2, 8, 4, 9] -> [9, 1, 7, 5, 3, 2, 8, 4, 9]
9 | [6, 2, 1, 9, 4] -> [4, 2, 1, 9, 4]
10 | [2, 9, 7, 5, 3, 8, 1, 4] -> [4, 9, 7, 5, 3, 8, 1, 4]
11 | [5, 1, 7, 8, 9, 4, 0, 3, 2] -> [2, 1, 7, 8, 9, 4, 0, 3, 2]
12 | Questions:
13 | [5, 8, 6, 1, 0, 9, 7] -> ?
14 | [3, 8, 6, 0] -> ?
15 | [8, 3] -> ?
16 | [3, 2, 0, 1, 6, 8, 7, 5] -> ?
17 | [5, 2, 0, 8, 9, 6] -> ?
18 | [8, 5, 7, 4, 2, 3, 6] -> ?
19 | Answers:
20 | From the examples, we infer the function is to replace the first element with the last element.
21 | Using this function, the answers to the questions are:
22 | [5, 8, 6, 1, 0, 9, 7] -> [7, 8, 6, 1, 0, 9, 7]
23 | [3, 8, 6, 0] -> [0, 8, 6, 0]
24 | [8, 3] -> [3, 3]
25 | [3, 2, 0, 1, 6, 8, 7, 5] -> [5, 2, 0, 1, 6, 8, 7, 5]
26 | [5, 2, 0, 8, 9, 6] -> [6, 2, 0, 8, 9, 6]
27 | [8, 5, 7, 4, 2, 3, 6] -> [6, 5, 7, 4, 2, 3, 6]
28 |
29 | Examples:
30 | [2] -> [2]
31 | [4, 3, 0, 1, 7, 8] -> [4, 3, 0, 1, 7, 8, 3]
32 | [5, 0, 2, 9] -> [5, 0, 2, 9, 9]
33 | [7, 0, 2, 5] -> [7, 0, 2, 5]
34 | [3, 4, 7, 6, 0] -> [3, 4, 7, 6, 0, 3]
35 | [8, 1, 2, 3, 7] -> [8, 1, 2, 3, 7, 3]
36 | Questions:
37 | [9, 1] -> ?
38 | [6] -> ?
39 | [1, 9, 5, 0] -> ?
40 | [4, 6, 9, 0, 7, 8, 1, 2] -> ?
41 | [4, 2, 8] -> ?
42 | [6, 2, 0, 3, 1, 8, 7] -> ?
43 | Answers:
44 | From the examples, we infer the function is to append 3 if the list contains a 3, else append 9 if the list contains a 9.
45 | Using this function, the answers to the questions are:
46 | [9, 1] -> [9, 1, 9]
47 | [6] -> [6]
48 | [1, 9, 5, 0] -> [1, 9, 5, 0, 9]
49 | [4, 6, 9, 0, 7, 8, 1, 2] -> [4, 6, 9, 0, 7, 8, 1, 2, 9]
50 | [4, 2, 8] -> [4, 2, 8]
51 | [6, 2, 0, 3, 1, 8, 7] -> [6, 2, 0, 3, 1, 8, 7, 3]
52 |
53 | Examples:
54 | [1, 0, 9, 7, 4, 2, 5, 3, 6, 8] -> [9, 0, 1, 4, 4, 5]
55 | [3, 8, 4, 6, 1, 5, 7, 0] -> [4, 8, 3, 4, 1, 7]
56 | [5, 4, 7, 2, 9, 3, 8, 1] -> [7, 4, 5, 4, 9, 8]
57 | [3, 9, 2, 0, 6, 8, 5, 1, 7] -> [2, 9, 3, 4, 6, 5]
58 | [9, 2, 1, 3, 4, 7, 6, 8, 5, 0] -> [1, 2, 9, 4, 4, 6]
59 | [0, 7, 9, 3, 1, 5, 8, 2, 6] -> [9, 7, 0, 4, 1, 8]
60 | Questions:
61 | [3, 9, 7, 6, 0, 5, 1] -> ?
62 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> ?
63 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> ?
64 | [8, 4, 9, 1, 3, 2, 7] -> ?
65 | [8, 3, 7, 0, 4, 2, 5] -> ?
66 | [6, 2, 1, 0, 9, 8, 5] -> ?
67 | Answers:
68 | From the examples, we infer the function is to generate a list of elements 3, 2, 1, the number 4, then elements 5 and 7.
69 | Using this function, the answers to the questions are:
70 | [3, 9, 7, 6, 0, 5, 1] -> [7, 9, 3, 4, 0, 1]
71 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> [9, 5, 2, 4, 8, 0]
72 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> [7, 0, 9, 4, 4, 3]
73 | [8, 4, 9, 1, 3, 2, 7] -> [9, 4, 8, 4, 3, 7]
74 | [8, 3, 7, 0, 4, 2, 5] -> [7, 3, 8, 4, 4, 5]
75 | [6, 2, 1, 0, 9, 8, 5] -> [1, 2, 6, 4, 9, 5]
76 |
77 | Examples:
78 | [] -> []
79 | [1, 5, 6, 2, 8, 3, 7] -> [7, 3, 8, 2, 6, 5, 1]
80 | [2, 1, 9, 6, 3, 5, 4, 8] -> [8, 4, 5, 3, 6, 9, 1, 2]
81 | [9, 1, 2, 8, 0] -> [0, 8, 2, 1, 9]
82 | [1, 0, 7, 3, 9, 2] -> [2, 9, 3, 7, 0, 1]
83 | [7, 6, 3, 0, 4, 1, 5, 2] -> [2, 5, 1, 4, 0, 3, 6, 7]
84 | Questions:
85 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> ?
86 | [6, 4, 0] -> ?
87 | [3, 6, 1, 7, 0, 4] -> ?
88 | [5, 4, 2, 7] -> ?
89 | [5, 7, 6, 2, 3] -> ?
90 | [7, 9] -> ?
91 | Answers:
92 | From the examples, we infer the function is to reverse the elements.
93 | Using this function, the answers to the questions are:
94 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> [9, 1, 3, 4, 0, 8, 7, 5, 6, 2]
95 | [6, 4, 0] -> [0, 4, 6]
96 | [3, 6, 1, 7, 0, 4] -> [4, 0, 7, 1, 6, 3]
97 | [5, 4, 2, 7] -> [7, 2, 4, 5]
98 | [5, 7, 6, 2, 3] -> [3, 2, 6, 7, 5]
99 | [7, 9] -> [9, 7]
100 |
101 | Examples:
102 | {%- for i in range(train_queries | length) %}
103 | {{ train_queries[i] }} -> {{ train_answers[i] }}
104 | {%- endfor %}
105 | Questions:
106 | {%- for query in queries %}
107 | {{ query }} -> ?
108 | {%- endfor %}
109 | Answers:
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/base-16/5-shot_cot_htt_test.yaml:
--------------------------------------------------------------------------------
1 | system: |
2 | Instruction: When you answer the questions, try to use the provided knowledge whenever possible. Try not to invent knowledge by yourself unless necessary.
3 | Knowledge:
4 | {%- set key_rules = {} -%}
5 | {%- for rule in rules -%}
6 | {%- set tokens = rule.split("=")[0].split("+") -%}
7 | {%- set _ = key_rules.update({(tokens | length, tokens[0] | trim, tokens[1] | trim): rule}) -%}
8 | {%- endfor -%}
9 | {%- set global = namespace(old=["", "", ""]) -%}
10 | {%- for key, rule in key_rules | dictsort -%}
11 | {%- if key[0] > 2 -%}
12 | {%- set key = ["carry", key[1], key[2]] -%}
13 | {%- else -%}
14 | {%- set key = ["no_carry", key[1], key[2]] -%}
15 | {%- endif -%}
16 | {%- if global.old[2] and global.old != key -%}
17 | {{ global.old[2] }}>
18 | {%- endif -%}
19 | {%- if global.old[1] and global.old[:2] != key[:2] -%}
20 | {{ global.old[1] }}>
21 | {%- endif -%}
22 | {%- if global.old[0] and global.old[0] != key[0] %}
23 | {{ global.old[0] }}>
24 | {%- endif -%}
25 | {%- if global.old[0] != key[0] %}
26 | <{{ key[0] }}>
27 | {%- endif -%}
28 | {%- if global.old[:2] != key[:2] %}
29 | <{{ key[1] }}>
30 | {%- endif -%}
31 | {%- if global.old != key -%}
32 | <{{ key[2] }}>
33 | {%- endif -%}
34 | {{- rule -}}
35 | {%- set global.old = key -%}
36 | {%- endfor -%}
37 | {{ global.old[2] }}>{{ global.old[1] }}>
38 | {{ global.old[0] }}>
39 |
40 | prompt: |
41 | Question: In base-16, what is EC + DD?
42 | Answer:
43 | EC is E, C. DD is D, D. So the steps are C + D, E + D.
44 | There is no carry. C + D = 19. 19 is 1, 9. So we set the carry to 1. Prepend 9 to the answer. So far the answer has 1 digit: 9.
45 | The carry is 1. E + D + 1 = 1C. 1C is 1, C. So we set the carry to 1. Prepend C to the answer. So far the answer has 2 digits: C, 9.
46 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, C, 9.
47 | Therefore, the answer is 1C9.
48 |
49 | Question: In base-16, what is 18 + 9F?
50 | Answer:
51 | 18 is 1, 8. 9F is 9, F. So the steps are 8 + F, 1 + 9.
52 | There is no carry. <8>8 + F = 17. 17 is 1, 7. So we set the carry to 1. Prepend 7 to the answer. So far the answer has 1 digit: 7.
53 | The carry is 1. <1><9>1 + 9 + 1 = B. B is 0, B. So we clear the carry. Prepend B to the answer. So far the answer has 2 digits: B, 7.
54 | There is no carry. So far the answer has 2 digits: B, 7.
55 | Therefore, the answer is B7.
56 |
57 | Question: In base-16, what is 79 + 8B?
58 | Answer:
59 | 79 is 7, 9. 8B is 8, B. So the steps are 9 + B, 7 + 8.
60 | There is no carry. <9>9 + B = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4.
61 | The carry is 1. <7><8>7 + 8 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 4.
62 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 4.
63 | Therefore, the answer is 104.
64 |
65 | Question: In base-16, what is A6 + 94?
66 | Answer:
67 | A6 is A, 6. 94 is 9, 4. So the steps are 6 + 4, A + 9.
68 | There is no carry. <6><4>6 + 4 = A. A is 0, A. So we clear the carry. Prepend A to the answer. So far the answer has 1 digit: A.
69 | There is no carry. <9>A + 9 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 2 digits: 3, A.
70 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 3, A.
71 | Therefore, the answer is 13A.
72 |
73 | Question: In base-16, what is 54 + D3?
74 | Answer:
75 | 54 is 5, 4. D3 is D, 3. So the steps are 4 + 3, 5 + D.
76 | There is no carry. <4><3>4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7.
77 | There is no carry. <5>5 + D = 12. 12 is 1, 2. So we set the carry to 1. Prepend 2 to the answer. So far the answer has 2 digits: 2, 7.
78 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 2, 7.
79 | Therefore, the answer is 127.
80 |
81 | Question: In base-16, what is {{ query[0] }} + {{ query[1] }}?
82 | Answer:
83 |
84 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/arithmetic/base-9/5-shot_cot_htt_test.yaml:
--------------------------------------------------------------------------------
1 | system: |
2 | Instruction: When you answer the questions, try to use the provided knowledge whenever possible. Try not to invent knowledge by yourself unless necessary.
3 | Knowledge:
4 | {%- set key_rules = {} -%}
5 | {%- for rule in rules -%}
6 | {%- set tokens = rule.split("=")[0].split("+") -%}
7 | {%- set _ = key_rules.update({(tokens | length, tokens[0] | trim, tokens[1] | trim): rule}) -%}
8 | {%- endfor -%}
9 | {%- set global = namespace(old=["", "", ""]) -%}
10 | {%- for key, rule in key_rules | dictsort -%}
11 | {%- if key[0] > 2 -%}
12 | {%- set key = ["carry", key[1], key[2]] -%}
13 | {%- else -%}
14 | {%- set key = ["no_carry", key[1], key[2]] -%}
15 | {%- endif -%}
16 | {%- if global.old[2] and global.old != key -%}
17 | {{ global.old[2] }}>
18 | {%- endif -%}
19 | {%- if global.old[1] and global.old[:2] != key[:2] -%}
20 | {{ global.old[1] }}>
21 | {%- endif -%}
22 | {%- if global.old[0] and global.old[0] != key[0] %}
23 | {{ global.old[0] }}>
24 | {%- endif -%}
25 | {%- if global.old[0] != key[0] %}
26 | <{{ key[0] }}>
27 | {%- endif -%}
28 | {%- if global.old[:2] != key[:2] %}
29 | <{{ key[1] }}>
30 | {%- endif -%}
31 | {%- if global.old != key -%}
32 | <{{ key[2] }}>
33 | {%- endif -%}
34 | {{- rule -}}
35 | {%- set global.old = key -%}
36 | {%- endfor -%}
37 | {{ global.old[2] }}>{{ global.old[1] }}>
38 | {{ global.old[0] }}>
39 |
40 | prompt: |
41 | Question: In base-9, what is 76 + 14?
42 | Answer:
43 | 76 is 7, 6. 14 is 1, 4. So the steps are 6 + 4, 7 + 1.
44 | There is no carry. <6><4>6 + 4 = 11. 11 is 1, 1. So we set the carry to 1. Prepend 1 to the answer. So far the answer has 1 digit: 1.
45 | The carry is 1. <7><1>7 + 1 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 1.
46 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 1.
47 | Therefore, the answer is 101.
48 |
49 | Question: In base-9, what is 86 + 57?
50 | Answer:
51 | 86 is 8, 6. 57 is 5, 7. So the steps are 6 + 7, 8 + 5.
52 | There is no carry. <6><7>6 + 7 = 14. 14 is 1, 4. So we set the carry to 1. Prepend 4 to the answer. So far the answer has 1 digit: 4.
53 | The carry is 1. <8><5>8 + 5 + 1 = 15. 15 is 1, 5. So we set the carry to 1. Prepend 5 to the answer. So far the answer has 2 digits: 5, 4.
54 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 5, 4.
55 | Therefore, the answer is 154.
56 |
57 | Question: In base-9, what is 63 + 34?
58 | Answer:
59 | 63 is 6, 3. 34 is 3, 4. So the steps are 3 + 4, 6 + 3.
60 | There is no carry. <4><3>4 + 3 = 7. 7 is 0, 7. So we clear the carry. Prepend 7 to the answer. So far the answer has 1 digit: 7.
61 | There is no carry. <6><3>6 + 3 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 7.
62 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 7.
63 | Therefore, the answer is 107.
64 |
65 | Question: In base-9, what is 31 + 58?
66 | Answer:
67 | 31 is 3, 1. 58 is 5, 8. So the steps are 1 + 8, 3 + 5.
68 | There is no carry. <1><8>1 + 8 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 1 digit: 0.
69 | The carry is 1. <3><5>3 + 5 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 0.
70 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 0.
71 | Therefore, the answer is 100.
72 |
73 | Question: In base-9, what is 67 + 25?
74 | Answer:
75 | 67 is 6, 7. 25 is 2, 5. So the steps are 7 + 5, 6 + 2.
76 | There is no carry. <7><5>7 + 5 = 13. 13 is 1, 3. So we set the carry to 1. Prepend 3 to the answer. So far the answer has 1 digit: 3.
77 | The carry is 1. <6><2>6 + 2 + 1 = 10. 10 is 1, 0. So we set the carry to 1. Prepend 0 to the answer. So far the answer has 2 digits: 0, 3.
78 | The carry is 1. Prepend 1 to the answer. So far the answer has 3 digits: 1, 0, 3.
79 | Therefore, the answer is 103.
80 |
81 | Question: In base-9, what is {{ query[0] }} + {{ query[1] }}?
82 | Answer:
83 |
84 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/prompt/list_functions/4-shot_cot_htt_train.yaml:
--------------------------------------------------------------------------------
1 | system: |
2 | Instruction: Infer the function behind the examples. Use the function to answer the questions.
3 |
4 | prompt: |
5 | Examples:
6 | [0, 8, 5, 2, 7, 1, 4, 6, 9, 3] -> [3, 8, 5, 2, 7, 1, 4, 6, 9, 3]
7 | [4, 0, 1] -> [1, 0, 1]
8 | [6, 1, 7, 5, 3, 2, 8, 4, 9] -> [9, 1, 7, 5, 3, 2, 8, 4, 9]
9 | [6, 2, 1, 9, 4] -> [4, 2, 1, 9, 4]
10 | [2, 9, 7, 5, 3, 8, 1, 4] -> [4, 9, 7, 5, 3, 8, 1, 4]
11 | [5, 1, 7, 8, 9, 4, 0, 3, 2] -> [2, 1, 7, 8, 9, 4, 0, 3, 2]
12 | Questions:
13 | [5, 8, 6, 1, 0, 9, 7] -> ?
14 | [3, 8, 6, 0] -> ?
15 | [8, 3] -> ?
16 | [3, 2, 0, 1, 6, 8, 7, 5] -> ?
17 | [5, 2, 0, 8, 9, 6] -> ?
18 | [8, 5, 7, 4, 2, 3, 6] -> ?
19 | Answers:
20 | From the examples, we infer the function is to replace the first element with the last element.
21 | Using this function, the answers to the questions are:
22 | [5, 8, 6, 1, 0, 9, 7] -> [7, 8, 6, 1, 0, 9, 7]
23 | [3, 8, 6, 0] -> [0, 8, 6, 0]
24 | [8, 3] -> [3, 3]
25 | [3, 2, 0, 1, 6, 8, 7, 5] -> [5, 2, 0, 1, 6, 8, 7, 5]
26 | [5, 2, 0, 8, 9, 6] -> [6, 2, 0, 8, 9, 6]
27 | [8, 5, 7, 4, 2, 3, 6] -> [6, 5, 7, 4, 2, 3, 6]
28 |
29 | Examples:
30 | [2] -> [2]
31 | [4, 3, 0, 1, 7, 8] -> [4, 3, 0, 1, 7, 8, 3]
32 | [5, 0, 2, 9] -> [5, 0, 2, 9, 9]
33 | [7, 0, 2, 5] -> [7, 0, 2, 5]
34 | [3, 4, 7, 6, 0] -> [3, 4, 7, 6, 0, 3]
35 | [8, 1, 2, 3, 7] -> [8, 1, 2, 3, 7, 3]
36 | Questions:
37 | [9, 1] -> ?
38 | [6] -> ?
39 | [1, 9, 5, 0] -> ?
40 | [4, 6, 9, 0, 7, 8, 1, 2] -> ?
41 | [4, 2, 8] -> ?
42 | [6, 2, 0, 3, 1, 8, 7] -> ?
43 | Answers:
44 | From the examples, we infer the function is to append 3 if the list contains a 3, else append 9 if the list contains a 9.
45 | Using this function, the answers to the questions are:
46 | [9, 1] -> [9, 1, 9]
47 | [6] -> [6]
48 | [1, 9, 5, 0] -> [1, 9, 5, 0, 9]
49 | [4, 6, 9, 0, 7, 8, 1, 2] -> [4, 6, 9, 0, 7, 8, 1, 2, 9]
50 | [4, 2, 8] -> [4, 2, 8]
51 | [6, 2, 0, 3, 1, 8, 7] -> [6, 2, 0, 3, 1, 8, 7, 3]
52 |
53 | Examples:
54 | [1, 0, 9, 7, 4, 2, 5, 3, 6, 8] -> [9, 0, 1, 4, 4, 5]
55 | [3, 8, 4, 6, 1, 5, 7, 0] -> [4, 8, 3, 4, 1, 7]
56 | [5, 4, 7, 2, 9, 3, 8, 1] -> [7, 4, 5, 4, 9, 8]
57 | [3, 9, 2, 0, 6, 8, 5, 1, 7] -> [2, 9, 3, 4, 6, 5]
58 | [9, 2, 1, 3, 4, 7, 6, 8, 5, 0] -> [1, 2, 9, 4, 4, 6]
59 | [0, 7, 9, 3, 1, 5, 8, 2, 6] -> [9, 7, 0, 4, 1, 8]
60 | Questions:
61 | [3, 9, 7, 6, 0, 5, 1] -> ?
62 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> ?
63 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> ?
64 | [8, 4, 9, 1, 3, 2, 7] -> ?
65 | [8, 3, 7, 0, 4, 2, 5] -> ?
66 | [6, 2, 1, 0, 9, 8, 5] -> ?
67 | Answers:
68 | From the examples, we infer the function is to generate a list of elements 3, 2, 1, the number 4, then elements 5 and 7.
69 | Using this function, the answers to the questions are:
70 | [3, 9, 7, 6, 0, 5, 1] -> [7, 9, 3, 4, 0, 1]
71 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> [9, 5, 2, 4, 8, 0]
72 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> [7, 0, 9, 4, 4, 3]
73 | [8, 4, 9, 1, 3, 2, 7] -> [9, 4, 8, 4, 3, 7]
74 | [8, 3, 7, 0, 4, 2, 5] -> [7, 3, 8, 4, 4, 5]
75 | [6, 2, 1, 0, 9, 8, 5] -> [1, 2, 6, 4, 9, 5]
76 |
77 | Examples:
78 | [] -> []
79 | [1, 5, 6, 2, 8, 3, 7] -> [7, 3, 8, 2, 6, 5, 1]
80 | [2, 1, 9, 6, 3, 5, 4, 8] -> [8, 4, 5, 3, 6, 9, 1, 2]
81 | [9, 1, 2, 8, 0] -> [0, 8, 2, 1, 9]
82 | [1, 0, 7, 3, 9, 2] -> [2, 9, 3, 7, 0, 1]
83 | [7, 6, 3, 0, 4, 1, 5, 2] -> [2, 5, 1, 4, 0, 3, 6, 7]
84 | Questions:
85 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> ?
86 | [6, 4, 0] -> ?
87 | [3, 6, 1, 7, 0, 4] -> ?
88 | [5, 4, 2, 7] -> ?
89 | [5, 7, 6, 2, 3] -> ?
90 | [7, 9] -> ?
91 | Answers:
92 | From the examples, we infer the function is to reverse the elements.
93 | Using this function, the answers to the questions are:
94 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> [9, 1, 3, 4, 0, 8, 7, 5, 6, 2]
95 | [6, 4, 0] -> [0, 4, 6]
96 | [3, 6, 1, 7, 0, 4] -> [4, 0, 7, 1, 6, 3]
97 | [5, 4, 2, 7] -> [7, 2, 4, 5]
98 | [5, 7, 6, 2, 3] -> [3, 2, 6, 7, 5]
99 | [7, 9] -> [9, 7]
100 |
101 | Examples:
102 | {%- for i in range(train_queries | length) %}
103 | {{ train_queries[i] }} -> {{ train_answers[i] }}
104 | {%- endfor %}
105 | Questions:
106 | {%- for query in queries %}
107 | {{ query }} -> ?
108 | {%- endfor %}
109 | Answers:
110 |
111 | pattern: '(?<=the function is to )[^.]+\.'
--------------------------------------------------------------------------------
/source/test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Test the llm after learning the rules."""
17 |
18 | import collections
19 | import os
20 | import pprint
21 | import random
22 |
23 | import datasets
24 | import models
25 | import prompts
26 | import tqdm
27 | import util
28 | import yaml
29 |
30 |
31 | def main():
32 | random.seed(0)
33 | args = util.parse_args()
34 | args.split = args.split or "test"
35 |
36 | with open(args.config, "r") as fin:
37 | cfg = yaml.safe_load(fin.read())
38 | cfg = cfg[args.split]
39 |
40 | working_dir = util.create_working_directory(args, cfg)
41 | logger = util.create_logger(working_dir)
42 | logger.warning(pprint.pformat(vars(args)))
43 | logger.warning(pprint.pformat(cfg))
44 |
45 | if cfg["dataset"] == "clutrr":
46 | dataset = datasets.CLUTRR(os.path.join(args.artifacts, "dataset/clutrr"))
47 | elif cfg["dataset"].startswith("base-"):
48 | base = int(cfg["dataset"][5:])
49 | dataset = datasets.Arithmetic(
50 | os.path.join(args.artifacts, "dataset/arithmetic"), base=base)
51 | elif cfg["dataset"] == "list_functions":
52 | dataset = datasets.ListFunctions(
53 | os.path.join(args.artifacts, "dataset/list_functions"))
54 | else:
55 | raise ValueError(f"Unknown dataset `{cfg['datasets']}`")
56 |
57 | test_set = dataset.get_split(args.split)
58 | max_tokens = cfg.get("max_tokens", 2000)
59 | if cfg["model"].startswith("gpt"):
60 | model = models.GPT(cfg["model"], max_tokens=max_tokens)
61 | elif cfg["model"].startswith("gemini"):
62 | model = models.Gemini(cfg["model"], max_tokens=max_tokens)
63 | else:
64 | raise ValueError(f"Unknown model `{cfg['model']}`")
65 | if "library" in cfg:
66 | library = prompts.RuleLibrary()
67 | library.load(os.path.join(args.artifacts, cfg["library"]))
68 | rules = library.to_prompt(cfg["min_coverage"], cfg["min_confidence"])
69 | function = prompts.PromptFunction.from_yaml(
70 | os.path.join(args.artifacts, cfg["prompt"]), rules=rules)
71 | logger.warning("Load the rule library from `%s`", cfg["library"])
72 | logger.warning("min coverage: %d, min confidence: %s, #rules: %d",
73 | cfg["min_coverage"], cfg["min_confidence"], len(rules))
74 | else:
75 | function = prompts.PromptFunction.from_yaml(
76 | os.path.join(args.artifacts, cfg["prompt"]))
77 |
78 | level2accs = collections.defaultdict(list)
79 | total_cost = 0
80 | for sample in tqdm.tqdm(test_set):
81 | truth = sample["answer"]
82 | level = sample["level"]
83 | pred, cost = function(model, sample)
84 | acc = dataset.evaluate(truth, pred)
85 |
86 | level2accs[level].append(acc)
87 | total_cost += cost
88 | logger.warning("truth: %s, pred: %s, accuracy: %s", truth, pred, acc)
89 | logger.warning("total cost: %s", total_cost)
90 |
91 | accs = []
92 | task_accs = []
93 | for level, level_accs in sorted(level2accs.items()):
94 | acc = sum(level_accs) / len(level_accs)
95 | accs.append(acc)
96 | if isinstance(level_accs[0], float):
97 | task_acc = sum(x > 0.999 for x in level_accs) / len(level_accs)
98 | task_accs.append(task_acc)
99 | logger.warning("[%s] #sample: %d, raw accuracy: %s, task accuracy: %s",
100 | level, len(level_accs), acc, task_acc)
101 | else:
102 | logger.warning("[%s] #sample: %d, accuracy: %s",
103 | level, len(level_accs), acc)
104 | if task_accs:
105 | logger.warning("average raw accuracy: %s, average task accuracy: %s",
106 | sum(accs) / len(accs), sum(task_accs) / len(task_accs))
107 | else:
108 | logger.warning("average accuracy: %s", sum(accs) / len(accs))
109 |
110 |
111 | if __name__ == "__main__":
112 | main()
113 |
--------------------------------------------------------------------------------
/artifacts/prompt/clutrr/textual/5-shot_cot.yaml:
--------------------------------------------------------------------------------
1 | prompt: |
2 | Document: Anthony went to the park with his father, James. Annie took her uncle James to the grocery store. Alan and his daughter Annie spent Father's Day together. Annie took her dad out to a sports bar, and they had a great time watching football and drinking beer there.
3 | Question: Anthony is Alan's what?
4 | Answer: We first extract all triplets from the document. We then find the path from Alan to Anthony. Finally, we reduce the relations on the path to get the answer.
5 | The triplets include (Anthony, father, James), (Annie, uncle, James), (Alan, daughter, Annie).
6 | The path from Alan to Anthony is (Alan, daughter, Annie), (Annie, uncle, James), (James, son, Anthony).
7 | The relations on the path are daughter, uncle, son.
8 | Daughter's uncle is brother. So the relations are reduced to brother, son.
9 | Brother's son is nephew. So the relations are reduced to nephew.
10 | Therefore, Anthony is Alan's nephew.
11 |
12 | Document: Valerie's biggest accomplishment is raising her son Carlos. Annie does n't like having to babysit her younger brother, Emmanuel. Valerie and her son Emmanuel had lunch together at a local Chinese restaurant.
13 | Question: Carlos is Annie's what?
14 | Answer: We first extract all triplets from the document. We then find the path from Annie to Carlos. Finally, we reduce the relations on the path to get the answer.
15 | The triplets include (Valerie, son, Carlos), (Annie, brother, Emmanuel), (Valerie, son, Emmanuel).
16 | The path from Annie to Carlos is (Annie, brother, Emmanuel), (Emmanuel, mother, Valerie), (Valerie, son, Carlos).
17 | The relations on the path are brother, mother, son.
18 | Brother's mother is mother. So the relations are reduced to mother, son.
19 | Mother's son is brother. So the relations are reduced to brother.
20 | Therefore, Carlos is Annie's brother.
21 |
22 | Document: James likes to take his daughter Jeanna fishing. James loves cooking with his daughter. Her name is Beverly. Jeanna loves visiting with her aunt Michelle.
23 | Question: Michelle is Beverly's what?
24 | Answer: We first extract all triplets from the document. We then find the path from Beverly to Michelle. Finally, we reduce the relations on the path to get the answer.
25 | The triplets include (James, daughter, Jeanna), (James, daughter, Beverly), (Jeanna, aunt, Michelle).
26 | The path from Beverly to Michelle is (Beverly, father, James), (James, daughter, Jeanna), (Jeanna, aunt, Michelle).
27 | The relations on the path are father, daughter, aunt.
28 | Father's daughter is sister. So the relations are reduced to sister, aunt.
29 | Sister's aunt is aunt. So the relations are reduced to aunt.
30 | Therefore, Michelle is Beverly's aunt.
31 |
32 | Document: Lee was finally coming of age and it was time for him and his father to go on a coming of age camping trip. Beverly, James's younger daughter, decided she wanted to go on the trip despite being several years younger. Jeanna took her younger sister Beverly to the carnival last weekend.
33 | Question: Jeanna is Lee's what?
34 | Answer: We first extract all triplets from the document. We then find the path from Lee to Jeanna. Finally, we reduce the relations on the path to get the answer.
35 | The triplets include (Lee, father, James), (James, daughter, Beverly), (Jeanna, sister, Beverly).
36 | The path from Lee to Jeanna is (Lee, father, James), (James, daughter, Beverly), (Beverly, sister, Jeanna).
37 | The relations on the path are father, daughter, sister.
38 | Father's daughter is sister. So the relations are reduced to sister, sister.
39 | Sister's sister is sister. So the relations are reduced to sister.
40 | Therefore, Jeanna is Lee's sister.
41 |
42 | Document: Craig's sister, Rosie, bought movie tickets at a discount rate. Rosie and her father Elliott love to go skiing. Often, Elliott will invite his mother Molly to join them.
43 | Question: Molly is Craig's what?
44 | Answer: We first extract all triplets from the document. We then find the path from Craig to Molly. Finally, we reduce the relations on the path to get the answer.
45 | The triplets include (Craig, sister, Rosie), (Rosie, father, Elliott), (Elliott, mother, Molly).
46 | The path from Craig to Molly is (Craig, sister, Rosie), (Rosie, father, Elliott), (Elliott, mother, Molly).
47 | The relations on the path are sister, father, mother.
48 | Sister's father is father. So the relations are reduced to father, mother.
49 | Father's mother is grandmother. So the relations are reduced to grandmother.
50 | Therefore, Molly is Craig's grandmother.
51 |
52 | Document: {{ document }}
53 | Question: {{ query[1] }} is {{ query[0] }}'s what?
54 | Answer:
55 |
56 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/checkpoint/arithmetic/base-9/gpt-4_5-shot_cot_htt_2000.yaml:
--------------------------------------------------------------------------------
1 | 0 + 0 = 0.: [17, 14.0]
2 | 0 + 1 = 1.: [14, 11.0]
3 | 0 + 2 = 2.: [15, 13.0]
4 | 0 + 3 = 3.: [22, 20.0]
5 | 0 + 4 = 4.: [17, 17.0]
6 | 0 + 5 = 5.: [27, 24.0]
7 | 0 + 6 = 6.: [23, 23.0]
8 | 0 + 7 = 7.: [15, 13.0]
9 | 0 + 8 = 8.: [9, 9.0]
10 | 1 + 0 = 1.: [7, 7.0]
11 | 1 + 1 + 1 = 3.: [1, 1.0]
12 | 1 + 1 + 1 = 4.: [10, 0.0]
13 | 1 + 1 = 2.: [24, 23.0]
14 | 1 + 2 + 1 = 4.: [1, 0.0]
15 | 1 + 2 + 1 = 5.: [18, 0.0]
16 | 1 + 2 = 3.: [25, 19.0]
17 | 1 + 3 + 1 = 5.: [4, 2.0]
18 | 1 + 3 + 1 = 6.: [12, 0.0]
19 | 1 + 3 = 4.: [18, 15.0]
20 | 1 + 4 + 1 = 7.: [23, 0.0]
21 | 1 + 4 = 5.: [17, 16.0]
22 | 1 + 5 + 1 = 8.: [11, 0.0]
23 | 1 + 5 = 6.: [26, 23.0]
24 | 1 + 6 + 1 = 9.: [14, 0.0]
25 | 1 + 6 = 7.: [11, 8.0]
26 | 1 + 7 + 1 = 10.: [27, 24.0]
27 | 1 + 7 = 8.: [10, 9.0]
28 | 1 + 8 + 1 = 11.: [12, 11.0]
29 | 1 + 8 = 10.: [59, 43.0]
30 | 2 + 0 = 2.: [24, 23.0]
31 | 2 + 1 + 1 = 5.: [16, 0.0]
32 | 2 + 1 = 3.: [26, 22.0]
33 | 2 + 2 + 1 = 6.: [21, 0.0]
34 | 2 + 2 = 4.: [15, 13.0]
35 | 2 + 3 + 1 = 7.: [29, 0.0]
36 | 2 + 3 = 5.: [32, 27.0]
37 | 2 + 4 + 1 = 8.: [14, 0.0]
38 | 2 + 4 = 6.: [13, 10.0]
39 | 2 + 5 + 1 = 9.: [17, 0.0]
40 | 2 + 5 = 7.: [6, 5.0]
41 | 2 + 5 = 8.: [6, 0.0]
42 | 2 + 6 + 1 = 10.: [21, 19.0]
43 | 2 + 6 = 11.: [1, 0.0]
44 | 2 + 6 = 8.: [23, 17.0]
45 | 2 + 7 + 1 = 11.: [21, 18.0]
46 | 2 + 7 = 10.: [56, 36.0]
47 | 2 + 8 + 1 = 12.: [9, 8.0]
48 | 2 + 8 = 10.: [2, 0.0]
49 | 2 + 8 = 11.: [48, 26.0]
50 | 3 + 0 = 3.: [17, 15.0]
51 | 3 + 1 + 1 = 5.: [3, 2.0]
52 | 3 + 1 + 1 = 6.: [23, 0.0]
53 | 3 + 1 = 4.: [22, 17.0]
54 | 3 + 2 + 1 = 7.: [15, 0.0]
55 | 3 + 2 = 5.: [7, 5.0]
56 | 3 + 3 + 1 = 8.: [24, 0.0]
57 | 3 + 3 = 6.: [16, 15.0]
58 | 3 + 4 + 1 = 9.: [24, 0.0]
59 | 3 + 4 = 7.: [25, 22.0]
60 | 3 + 4 = 8.: [1, 0.0]
61 | 3 + 5 + 1 = 10.: [26, 21.0]
62 | 3 + 5 = 8.: [17, 13.0]
63 | 3 + 6 + 1 = 11.: [20, 17.0]
64 | 3 + 6 = 10.: [60, 44.0]
65 | 3 + 7 + 1 = 12.: [4, 3.0]
66 | 3 + 7 = 10.: [1, 0.0]
67 | 3 + 7 = 11.: [48, 25.0]
68 | 3 + 8 + 1 = 13.: [13, 10.0]
69 | 3 + 8 = 12.: [56, 39.0]
70 | 4 + 0 = 4.: [20, 19.0]
71 | 4 + 1 + 1 = 7.: [17, 0.0]
72 | 4 + 1 = 5.: [16, 14.0]
73 | 4 + 2 + 1 = 8.: [17, 0.0]
74 | 4 + 2 = 6.: [26, 22.0]
75 | 4 + 3 + 1 = 8.: [1, 1.0]
76 | 4 + 3 + 1 = 9.: [10, 0.0]
77 | 4 + 3 = 7.: [21, 15.0]
78 | 4 + 3 = 8.: [1, 0.0]
79 | 4 + 4 + 1 = 10.: [6, 5.0]
80 | 4 + 4 = 8.: [21, 20.0]
81 | 4 + 5 + 1 = 11.: [10, 7.0]
82 | 4 + 5 = 10.: [45, 30.0]
83 | 4 + 6 + 1 = 12.: [24, 17.0]
84 | 4 + 6 = 10.: [2, 0.0]
85 | 4 + 6 = 11.: [58, 30.0]
86 | 4 + 7 + 1 = 13.: [13, 10.0]
87 | 4 + 7 = 12.: [61, 43.0]
88 | 4 + 8 + 1 = 14.: [15, 12.0]
89 | 4 + 8 = 13.: [49, 33.0]
90 | 5 + 0 = 5.: [25, 23.0]
91 | 5 + 1 + 1 = 8.: [11, 0.0]
92 | 5 + 1 = 6.: [23, 22.0]
93 | 5 + 2 + 1 = 9.: [19, 0.0]
94 | 5 + 2 = 7.: [8, 8.0]
95 | 5 + 2 = 8.: [4, 0.0]
96 | 5 + 3 + 1 = 10.: [14, 11.0]
97 | 5 + 3 = 8.: [5, 3.0]
98 | 5 + 4 + 1 = 11.: [34, 27.0]
99 | 5 + 4 = 10.: [68, 54.0]
100 | 5 + 5 + 1 = 11.: [1, 0.0]
101 | 5 + 5 + 1 = 12.: [24, 17.0]
102 | 5 + 5 = 10.: [6, 0.0]
103 | 5 + 5 = 11.: [39, 27.0]
104 | 5 + 6 + 1 = 13.: [33, 24.0]
105 | 5 + 6 = 11.: [1, 0.0]
106 | 5 + 6 = 12.: [37, 29.0]
107 | 5 + 7 + 1 = 14.: [15, 12.0]
108 | 5 + 7 = 13.: [67, 47.0]
109 | 5 + 8 + 1 = 15.: [12, 8.0]
110 | 5 + 8 = 14.: [52, 30.0]
111 | 6 + 0 = 6.: [15, 11.0]
112 | 6 + 1 + 1 = 9.: [24, 0.0]
113 | 6 + 1 = 7.: [19, 14.0]
114 | 6 + 2 + 1 = 10.: [18, 16.0]
115 | 6 + 2 = 8.: [11, 9.0]
116 | 6 + 3 + 1 = 11.: [31, 24.0]
117 | 6 + 3 = 10.: [62, 50.0]
118 | 6 + 4 + 1 = 12.: [24, 21.0]
119 | 6 + 4 = 10.: [4, 0.0]
120 | 6 + 4 = 11.: [51, 34.0]
121 | 6 + 5 + 1 = 13.: [24, 18.0]
122 | 6 + 5 = 11.: [1, 1.0]
123 | 6 + 5 = 12.: [54, 38.0]
124 | 6 + 6 + 1 = 14.: [20, 10.0]
125 | 6 + 6 = 12.: [1, 1.0]
126 | 6 + 6 = 13.: [68, 47.0]
127 | 6 + 7 + 1 = 15.: [16, 10.0]
128 | 6 + 7 = 14.: [59, 49.0]
129 | 6 + 7 = 14.14 is 1, 4.: [1, 1.0]
130 | 6 + 8 + 1 = 16.: [14, 7.0]
131 | 6 + 8 = 15.: [39, 23.0]
132 | 7 + 0 = 7.: [25, 21.0]
133 | 7 + 1 + 1 = 10.: [23, 18.0]
134 | 7 + 1 = 8.: [13, 12.0]
135 | 7 + 2 + 0 = 10.: [1, 1.0]
136 | 7 + 2 + 1 = 11.: [22, 17.0]
137 | 7 + 2 = 10.: [55, 48.0]
138 | 7 + 3 + 1 = 12.: [18, 15.0]
139 | 7 + 3 = 10.: [1, 1.0]
140 | 7 + 3 = 11.: [55, 31.0]
141 | 7 + 4 + 1 = 13.: [25, 22.0]
142 | 7 + 4 = 12.: [59, 45.0]
143 | 7 + 5 + 1 = 14.: [27, 22.0]
144 | 7 + 5 = 13.: [43, 30.0]
145 | 7 + 6 + 1 = 15.: [14, 11.0]
146 | 7 + 6 = 14.: [75, 45.0]
147 | 7 + 7 + 1 = 16.: [4, 2.0]
148 | 7 + 7 = 15.: [61, 42.0]
149 | 7 + 7 = 16.: [1, 0.0]
150 | 7 + 8 + 1 = 17.: [20, 7.0]
151 | 7 + 8 = 16.: [59, 17.0]
152 | 8 + 0 = 8.: [19, 17.0]
153 | 8 + 1 + 1 = 11.: [15, 13.0]
154 | 8 + 1 = 10.: [65, 54.0]
155 | 8 + 2 + 1 = 12.: [15, 15.0]
156 | 8 + 2 = 11.: [63, 38.0]
157 | 8 + 3 + 1 = 13.: [23, 19.0]
158 | 8 + 3 = 11.: [1, 1.0]
159 | 8 + 3 = 12.: [42, 30.0]
160 | 8 + 4 + 1 = 14.: [4, 4.0]
161 | 8 + 4 = 13.: [34, 27.0]
162 | 8 + 5 + 1 = 15.: [20, 16.0]
163 | 8 + 5 = 14.: [46, 29.0]
164 | 8 + 6 + 1 = 16.: [14, 10.0]
165 | 8 + 6 = 14.: [1, 1.0]
166 | 8 + 6 = 15.: [50, 36.0]
167 | 8 + 7 + 1 = 17.: [15, 9.0]
168 | 8 + 7 = 16.: [72, 24.0]
169 | 8 + 8 + 1 = 18.: [21, 0.0]
170 | 8 + 8 = 16.: [14, 8.0]
171 | 8 + 8 = 17.: [30, 4.0]
172 |
--------------------------------------------------------------------------------
/source/prompts.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Prompt function and rule library."""
17 |
18 | from collections import defaultdict # pylint: disable=g-importing-member
19 | from collections.abc import Mapping, Sequence
20 | import logging
21 | import re
22 | from typing import Any
23 |
24 | import jinja2
25 | from nltk import tokenize
26 | import yaml
27 |
28 |
29 | logger = logging.getLogger(__name__)
30 |
31 |
32 | class RuleLibrary:
33 | """Rule library that stores learned rules and their statistics."""
34 |
35 | def __init__(self):
36 | self.count = defaultdict(int)
37 | self.score = defaultdict(float)
38 |
39 | def update(
40 | self,
41 | rules: Sequence[str],
42 | acc: float,
43 | ) -> None:
44 | """Update the posterior of rule confidence, based on the observed acc.
45 |
46 | Args:
47 | rules: the list of proposed rules.
48 | acc: the accuracy of applying these rules.
49 | """
50 | for rule in rules:
51 | self.count[rule] += 1
52 | self.score[rule] += acc
53 |
54 | def save(self, file_name: str):
55 | """Save the rule library to a file."""
56 | data = {}
57 | for rule in self.count:
58 | data[rule] = [self.count[rule], self.score[rule]]
59 | with open(file_name, "w") as fout:
60 | fout.write(yaml.dump(data, width=1000, default_flow_style=None))
61 |
62 | def load(self, file_name: str):
63 | """Load the rule library from a file."""
64 | with open(file_name, "r") as fin:
65 | data = yaml.safe_load(fin.read())
66 | for rule in data:
67 | num_recall, num_correct = data[rule]
68 | self.count[rule] = num_recall
69 | self.score[rule] = num_correct
70 |
71 | def to_prompt(
72 | self,
73 | min_coverage: int = 2,
74 | min_confidence: float = 0.,
75 | ) -> Sequence[str]:
76 | """Convert the rule library to a list of prompts.
77 |
78 | Args:
79 | min_coverage: threshold of minimal rule coverage
80 | min_confidence: threshold of minimal confidence
81 | Returns:
82 | List of rules.
83 | """
84 | rules = {}
85 | for rule in self.count:
86 | coverage = self.count[rule]
87 | confidence = self.score[rule] / self.count[rule]
88 | if coverage >= min_coverage and confidence >= min_confidence:
89 | rules[rule] = confidence
90 | return rules
91 |
92 |
93 | class PromptFunction:
94 | """An LLM-based function defined by a prompt string."""
95 |
96 | def __init__(
97 | self,
98 | prompt: str,
99 | system: str = None,
100 | pattern: str = None,
101 | stop: str = None,
102 | return_last: bool = False,
103 | **kwargs
104 | ) -> None:
105 | self.prompt = jinja2.Template(prompt)
106 | if system is not None:
107 | self.system = jinja2.Template(system)
108 | else:
109 | self.system = None
110 | self.pattern = pattern
111 | self.stop = stop
112 | self.return_last = return_last
113 | self.kwargs = kwargs
114 |
115 | @classmethod
116 | def from_yaml(cls, yaml_file: str, **kwargs) -> "PromptFunction":
117 | with open(yaml_file, "r") as fin:
118 | config = yaml.safe_load(fin.read())
119 | kwargs.update(config)
120 | return cls(**kwargs)
121 |
122 | def __call__(self, model, sample: Mapping[str, Any]) -> tuple[
123 | str, float] | tuple[str, float, Sequence[str]]:
124 | """Call the model with the sample formatted by prompt.
125 |
126 | Args:
127 | model: the LLM model.
128 | sample: the input sample defined by k-v pairs.
129 | Returns:
130 | The output of the prompt function and the cost. If pattern is defined,
131 | additionally return the matches in the output.
132 | """
133 | logger.info("<" * 50)
134 | if self.system is not None:
135 | system = self.system.render(**sample, **self.kwargs)
136 | marker = "#" * 20
137 | logger.info("%s System %s", marker, marker)
138 | logger.info(system)
139 | logger.info("%s Prompt %s", marker, marker)
140 | else:
141 | system = None
142 | prompt = self.prompt.render(**sample, **self.kwargs)
143 | logger.info(prompt)
144 | logger.info("=" * 50)
145 |
146 | response = model(prompt, system=system, stop=self.stop)
147 | cost = model.get_cost(prompt, system=system, response=response)
148 | logger.info(response)
149 | logger.info(">" * 50)
150 |
151 | sents = []
152 | for line in re.split(r"\n+", response):
153 | sents += tokenize.sent_tokenize(line)
154 | if self.return_last:
155 | pred = sents[-1]
156 | else:
157 | pred = response
158 | if self.pattern:
159 | matches = []
160 | for sent in sents:
161 | matches += re.findall(self.pattern, sent)
162 | return pred, cost, matches
163 | else:
164 | return pred, cost
165 |
--------------------------------------------------------------------------------
/source/models.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """GPT model wrapper."""
17 |
18 | from collections.abc import Mapping, Sequence
19 | import os
20 | from typing import Final
21 | import google.generativeai as genai
22 | import openai
23 | # pylint: disable=g-importing-member
24 | from tenacity import retry
25 | from tenacity import stop_after_attempt
26 | from tenacity import wait_random_exponential
27 | import tiktoken
28 |
29 |
30 | class GPT:
31 | """GPT model object."""
32 |
33 | _MODEL_PRICE: Final[Mapping[str, tuple[float, float]]] = {
34 | "gpt-3.5-turbo": (0.0005 / 1000, 0.0015 / 1000),
35 | "gpt-3.5-turbo-0613": (0.0005 / 1000, 0.0015 / 1000),
36 | "gpt-3.5-turbo-16k-0613": (0.0005 / 1000, 0.0015 / 1000),
37 | "gpt-4": (0.03 / 1000, 0.06 / 1000),
38 | }
39 |
40 | def __init__(
41 | self,
42 | model: str = "gpt-3.5-turbo",
43 | temperature: float = 1.0,
44 | top_p: float = 1.0,
45 | max_tokens: int = 2000,
46 | ):
47 | self.client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"])
48 | self.encoding = tiktoken.encoding_for_model(model)
49 | self.model = model
50 | self.temperature = temperature
51 | self.top_p = top_p
52 | self.max_tokens = max_tokens
53 |
54 | @retry(wait=wait_random_exponential(min=1, max=60),
55 | stop=stop_after_attempt(10))
56 | def __call__(
57 | self,
58 | prompt: str,
59 | system: str | None = None,
60 | stop: str | None = None,
61 | ):
62 | messages = []
63 | if system:
64 | messages.append({"role": "system", "content": system})
65 | messages.append({"role": "user", "content": prompt})
66 | response = self.client.chat.completions.create(
67 | model=self.model,
68 | messages=messages,
69 | temperature=self.temperature,
70 | top_p=self.top_p,
71 | max_tokens=self.max_tokens,
72 | stop=stop
73 | )
74 | return response.choices[0].message.content
75 |
76 | def get_cost(
77 | self,
78 | prompt: str,
79 | response: str | None = None,
80 | system: str | None = None,
81 | ) -> float:
82 | """Get the cost for this request."""
83 | if system is not None:
84 | prompt = system + prompt
85 | num_prompt_token = len(self.encoding.encode(prompt))
86 | if response:
87 | num_response_token = len(self.encoding.encode(response))
88 | else:
89 | num_response_token = self.max_tokens
90 | input_price, output_price = self._MODEL_PRICE[self.model]
91 | return num_prompt_token * input_price + num_response_token * output_price
92 |
93 |
94 | class Gemini:
95 | """Wrapper for Gemini models."""
96 |
97 | model_price = {
98 | "gemini-pro": (0, 0),
99 | }
100 |
101 | _SETTINGS: Final[Sequence[Mapping[str, str]]] = tuple(
102 | {
103 | "category": f"HARM_CATEGORY_{category}",
104 | "threshold": "BLOCK_NONE",
105 | } for category in [
106 | "HARASSMENT", "HATE_SPEECH", "SEXUALLY_EXPLICIT", "DANGEROUS"]
107 | )
108 |
109 | def __init__(
110 | self,
111 | model: str = "gemini-pro",
112 | temperature: float = 0.9,
113 | top_p: float = 1,
114 | max_tokens: int = 2000,
115 | ) -> None:
116 | genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
117 | self.model = model
118 | self.temperature = temperature
119 | self.top_p = top_p
120 | self.max_tokens = max_tokens
121 |
122 | @retry(wait=wait_random_exponential(min=1, max=60),
123 | stop=stop_after_attempt(10))
124 | def __call__(
125 | self,
126 | prompt: str,
127 | system: str | None = None,
128 | stop: str | None = None,
129 | ):
130 | config = genai.GenerationConfig(
131 | temperature=self.temperature,
132 | top_p=self.top_p,
133 | max_output_tokens=self.max_tokens,
134 | stop_sequences=stop
135 | )
136 | model = genai.GenerativeModel(self.model, generation_config=config,
137 | safety_settings=self._SETTINGS)
138 | messages = []
139 | if system:
140 | messages.append({"role": "user", "parts": f"{system}\n\n{prompt}"})
141 | else:
142 | messages.append({"role": "user", "parts": prompt})
143 | response = model.generate_content(prompt)
144 | return response.text
145 |
146 | def get_cost(
147 | self,
148 | prompt: str,
149 | response: str | None = None,
150 | system: str | None = None,
151 | ) -> float:
152 | """Get the cost for this request."""
153 | if system is not None:
154 | prompt = system + prompt
155 | model = genai.GenerativeModel(self.model)
156 | num_prompt_token = model.count_tokens(prompt).total_tokens
157 | if response:
158 | num_response_token = model.count_tokens(response).total_tokens
159 | else:
160 | num_response_token = self.max_tokens
161 | input_price, output_price = self.model_price[self.model]
162 | return num_prompt_token * input_price + num_response_token * output_price
163 |
--------------------------------------------------------------------------------
/artifacts/prompt/clutrr/textual/5-shot_cot_htt_test.yaml:
--------------------------------------------------------------------------------
1 | system: |
2 | Instruction: When you answer the questions, try to use the provided knowledge whenever possible. Try not to invent knowledge by yourself unless necessary.
3 | Knowledge:
4 | {%- set key_rules = {} -%}
5 | {%- for rule in rules -%}
6 | {%- set tokens = rule.split(" ") -%}
7 | {%- set _ = key_rules.update({(tokens[0].split("'")[0], tokens[1]): rule}) -%}
8 | {%- endfor -%}
9 | {%- set global = namespace(old =["", ""]) -%}
10 | {%- for key, rule in key_rules | dictsort -%}
11 | {%- if global.old[1] and global.old != key -%}
12 | {{ global.old[1] }}>
13 | {%- endif -%}
14 | {%- if global.old[0] and global.old[0] != key[0] -%}
15 | {{ global.old[0] }}>
16 | {%- endif -%}
17 | {%- if global.old[0] != key[0] %}
18 | <{{ key[0] }}>
19 | {%- endif -%}
20 | {%- if global.old != key -%}
21 | <{{ key[1] }}>
22 | {%- endif -%}
23 | {{- rule -}}
24 | {%- set global.old = key -%}
25 | {%- endfor -%}
26 | {{ global.old[1] }}>{{ global.old[0] }}>
27 |
28 | prompt: |
29 | Document: Anthony went to the park with his father, James. Annie took her uncle James to the grocery store. Alan and his daughter Annie spent Father's Day together. Annie took her dad out to a sports bar, and they had a great time watching football and drinking beer there.
30 | Question: Anthony is Alan's what?
31 | Answer: We first extract all triplets from the document. We then find the path from Alan to Anthony. Finally, we reduce the relations on the path to get the answer.
32 | The triplets include (Anthony, father, James), (Annie, uncle, James), (Alan, daughter, Annie).
33 | The path from Alan to Anthony is (Alan, daughter, Annie), (Annie, uncle, James), (James, son, Anthony).
34 | The relations on the path are daughter, uncle, son.
35 | daughter's uncle is brother. So the relations are reduced to brother, son.
36 | brother's son is nephew. So the relations are reduced to nephew.
37 | Therefore, Anthony is Alan's nephew.
38 |
39 | Document: Valerie's biggest accomplishment is raising her son Carlos. Annie does n't like having to babysit her younger brother, Emmanuel. Valerie and her son Emmanuel had lunch together at a local Chinese restaurant.
40 | Question: Carlos is Annie's what?
41 | Answer: We first extract all triplets from the document. We then find the path from Annie to Carlos. Finally, we reduce the relations on the path to get the answer.
42 | The triplets include (Valerie, son, Carlos), (Annie, brother, Emmanuel), (Valerie, son, Emmanuel).
43 | The path from Annie to Carlos is (Annie, brother, Emmanuel), (Emmanuel, mother, Valerie), (Valerie, son, Carlos).
44 | The relations on the path are brother, mother, son.
45 | brother's mother is mother. So the relations are reduced to mother, son.
46 | mother's son is brother. So the relations are reduced to brother.
47 | Therefore, Carlos is Annie's brother.
48 |
49 | Document: James likes to take his daughter Jeanna fishing. James loves cooking with his daughter. Her name is Beverly. Jeanna loves visiting with her aunt Michelle.
50 | Question: Michelle is Beverly's what?
51 | Answer: We first extract all triplets from the document. We then find the path from Beverly to Michelle. Finally, we reduce the relations on the path to get the answer.
52 | The triplets include (James, daughter, Jeanna), (James, daughter, Beverly), (Jeanna, aunt, Michelle).
53 | The path from Beverly to Michelle is (Beverly, father, James), (James, daughter, Jeanna), (Jeanna, aunt, Michelle).
54 | The relations on the path are father, daughter, aunt.
55 | father's daughter is sister. So the relations are reduced to sister, aunt.
56 | sister's aunt is aunt. So the relations are reduced to aunt.
57 | Therefore, Michelle is Beverly's aunt.
58 |
59 | Document: Lee was finally coming of age and it was time for him and his father to go on a coming of age camping trip. Beverly, James's younger daughter, decided she wanted to go on the trip despite being several years younger. Jeanna took her younger sister Beverly to the carnival last weekend.
60 | Question: Jeanna is Lee's what?
61 | Answer: We first extract all triplets from the document. We then find the path from Lee to Jeanna. Finally, we reduce the relations on the path to get the answer.
62 | The triplets include (Lee, father, James), (James, daughter, Beverly), (Jeanna, sister, Beverly).
63 | The path from Lee to Jeanna is (Lee, father, James), (James, daughter, Beverly), (Beverly, sister, Jeanna).
64 | The relations on the path are father, daughter, sister.
65 | father's daughter is sister. So the relations are reduced to sister, sister.
66 | sister's sister is sister. So the relations are reduced to sister.
67 | Therefore, Jeanna is Lee's sister.
68 |
69 | Document: Craig's sister, Rosie, bought movie tickets at a discount rate. Rosie and her father Elliott love to go skiing. Often, Elliott will invite his mother Molly to join them.
70 | Question: Molly is Craig's what?
71 | Answer: We first extract all triplets from the document. We then find the path from Craig to Molly. Finally, we reduce the relations on the path to get the answer.
72 | The triplets include (Craig, sister, Rosie), (Rosie, father, Elliott), (Elliott, mother, Molly).
73 | The path from Craig to Molly is (Craig, sister, Rosie), (Rosie, father, Elliott), (Elliott, mother, Molly).
74 | The relations on the path are sister, father, mother.
75 | sister's father is father. So the relations are reduced to father, mother.
76 | father's mother is grandmother. So the relations are reduced to grandmother.
77 | Therefore, Molly is Craig's grandmother.
78 |
79 | Document: {{ document }}
80 | Question: {{ query[1] }} is {{ query[0] }}'s what?
81 | Answer:
82 |
83 | return_last: yes
--------------------------------------------------------------------------------
/artifacts/checkpoint/arithmetic/base-9/gpt-3.5_5-shot_cot_htt_2000.yaml:
--------------------------------------------------------------------------------
1 | 0 + 0 = 0.: [17, 0.0]
2 | 0 + 1 = 1.: [16, 3.0]
3 | 0 + 2 = 2.: [17, 2.0]
4 | 0 + 3 = 3.: [21, 2.0]
5 | 0 + 4 = 4.: [18, 6.0]
6 | 0 + 5 = 5.: [28, 2.0]
7 | 0 + 6 = 6.: [24, 4.0]
8 | 0 + 7 + 7 = 14.: [1, 0.0]
9 | 0 + 7 = 7.: [15, 2.0]
10 | 0 + 8 = 8.: [8, 1.0]
11 | 1 + 0 + 1 = 2.: [1, 0.0]
12 | 1 + 0 = 1.: [7, 4.0]
13 | 1 + 1 + 1 = 3.: [11, 4.0]
14 | 1 + 1 = 2.: [26, 1.0]
15 | 1 + 2 + 1 = 4.: [19, 4.0]
16 | 1 + 2 = 3.: [24, 0.0]
17 | 1 + 3 + 1 = 5.: [16, 2.0]
18 | 1 + 3 = 4.: [18, 2.0]
19 | 1 + 4 + 1 = 6.: [25, 1.0]
20 | 1 + 4 = 5.: [18, 1.0]
21 | 1 + 5 + 1 = 7.: [12, 2.0]
22 | 1 + 5 = 6.: [28, 7.0]
23 | 1 + 6 + 1 = 8.: [14, 3.0]
24 | 1 + 6 = 7.: [11, 0.0]
25 | 1 + 7 + 1 = 9.: [30, 5.0]
26 | 1 + 7 = 8.: [10, 1.0]
27 | 1 + 8 + 0 = 9.: [4, 3.0]
28 | 1 + 8 + 1 = 10.: [16, 2.0]
29 | 1 + 8 = 10.: [1, 1.0]
30 | 1 + 8 = 9.: [51, 29.0]
31 | 2 + 0 = 2.: [22, 0.0]
32 | 2 + 1 + 1 = 4.: [16, 3.0]
33 | 2 + 1 = 3.: [27, 5.0]
34 | 2 + 2 + 1 = 5.: [21, 2.0]
35 | 2 + 2 = 4.: [16, 3.0]
36 | 2 + 3 + 1 = 6.: [30, 9.0]
37 | 2 + 3 = 5.: [33, 1.0]
38 | 2 + 4 + 1 = 7.: [14, 0.0]
39 | 2 + 4 = 6.: [13, 2.0]
40 | 2 + 5 + 1 = 8.: [17, 1.0]
41 | 2 + 5 = 7.: [12, 7.0]
42 | 2 + 6 + 1 = 9.: [21, 10.0]
43 | 2 + 6 = 8.: [25, 3.0]
44 | 2 + 7 + 0 = 9.: [3, 2.0]
45 | 2 + 7 + 1 = 10.: [24, 1.0]
46 | 2 + 7 = 9.: [53, 24.0]
47 | 2 + 8 + 0 = 10.: [2, 0.0]
48 | 2 + 8 + 1 = 11.: [15, 4.0]
49 | 2 + 8 + 5 = 15.: [2, 0.0]
50 | 2 + 8 = 10.: [44, 0.0]
51 | 3 + 0 = 3.: [18, 1.0]
52 | 3 + 1 + 1 = 5.: [27, 4.0]
53 | 3 + 1 = 4.: [22, 3.0]
54 | 3 + 2 + 1 = 6.: [15, 0.0]
55 | 3 + 2 = 5.: [6, 1.0]
56 | 3 + 3 + 1 = 7.: [24, 7.0]
57 | 3 + 3 = 6.: [16, 4.0]
58 | 3 + 4 + 1 = 8.: [24, 5.0]
59 | 3 + 4 = 7.: [28, 9.0]
60 | 3 + 5 + 1 = 10.: [1, 1.0]
61 | 3 + 5 + 1 = 9.: [21, 5.0]
62 | 3 + 5 = 8.: [16, 6.0]
63 | 3 + 6 + 0 = 9.: [1, 0.0]
64 | 3 + 6 + 1 = 10.: [21, 1.0]
65 | 3 + 6 + 8 = 17.: [1, 0.0]
66 | 3 + 6 = 9.: [58, 27.0]
67 | 3 + 7 + 0 = 10.: [4, 0.0]
68 | 3 + 7 + 1 = 11.: [4, 2.0]
69 | 3 + 7 + 3 = 13.: [1, 0.0]
70 | 3 + 7 + 4 = 14.: [1, 0.0]
71 | 3 + 7 = 10.: [44, 0.0]
72 | 3 + 8 + 0 = 11.: [1, 0.0]
73 | 3 + 8 + 1 = 12.: [23, 2.0]
74 | 3 + 8 = 11.: [45, 0.0]
75 | 4 + 0 = 4.: [19, 1.0]
76 | 4 + 1 + 1 = 6.: [18, 3.0]
77 | 4 + 1 = 5.: [16, 3.0]
78 | 4 + 2 + 1 = 7.: [17, 4.0]
79 | 4 + 2 = 6.: [26, 6.0]
80 | 4 + 3 + 1 = 8.: [11, 1.0]
81 | 4 + 3 = 7.: [20, 9.0]
82 | 4 + 4 + 1 = 9.: [6, 4.0]
83 | 4 + 4 = 8.: [21, 8.0]
84 | 4 + 5 + 0 = 9.: [2, 2.0]
85 | 4 + 5 + 1 = 10.: [10, 0.0]
86 | 4 + 5 + 2 = 11.: [1, 0.0]
87 | 4 + 5 + 5 = 14.: [1, 0.0]
88 | 4 + 5 = 9.: [46, 25.0]
89 | 4 + 6 + 0 = 10.: [1, 0.0]
90 | 4 + 6 + 1 = 11.: [26, 10.0]
91 | 4 + 6 + 2 = 12.: [2, 0.0]
92 | 4 + 6 = 10.: [52, 0.0]
93 | 4 + 6 = 12.: [1, 0.0]
94 | 4 + 7 + 0 = 11.: [2, 0.0]
95 | 4 + 7 + 1 = 12.: [17, 2.0]
96 | 4 + 7 + 5 = 16.: [2, 0.0]
97 | 4 + 7 = 11.: [54, 1.0]
98 | 4 + 8 + 0 = 12.: [2, 0.0]
99 | 4 + 8 + 1 = 13.: [20, 3.0]
100 | 4 + 8 = 12.: [42, 0.0]
101 | 5 + 0 = 5.: [24, 1.0]
102 | 5 + 1 + 1 = 7.: [12, 2.0]
103 | 5 + 1 = 6.: [21, 1.0]
104 | 5 + 2 + 1 = 8.: [19, 3.0]
105 | 5 + 2 = 7.: [11, 5.0]
106 | 5 + 3 + 1 = 9.: [14, 0.0]
107 | 5 + 3 = 8.: [6, 2.0]
108 | 5 + 4 + 0 = 9.: [2, 1.0]
109 | 5 + 4 + 1 = 10.: [34, 0.0]
110 | 5 + 4 + 2 = 11.: [2, 0.0]
111 | 5 + 4 = 9.: [59, 32.0]
112 | 5 + 5 + 0 = 10.: [4, 0.0]
113 | 5 + 5 + 1 = 11.: [25, 5.0]
114 | 5 + 5 = 10.: [40, 0.0]
115 | 5 + 6 + 0 = 11.: [1, 0.0]
116 | 5 + 6 + 1 = 12.: [39, 6.0]
117 | 5 + 6 = 11.: [36, 0.0]
118 | 5 + 7 + 0 = 12.: [4, 0.0]
119 | 5 + 7 + 1 = 13.: [20, 3.0]
120 | 5 + 7 + 5 = 17.: [1, 0.0]
121 | 5 + 7 = 12.: [59, 0.0]
122 | 5 + 8 + 0 = 13.: [2, 0.0]
123 | 5 + 8 + 1 = 14.: [15, 1.0]
124 | 5 + 8 = 13.: [47, 0.0]
125 | 6 + 0 = 6.: [15, 1.0]
126 | 6 + 1 + 1 = 8.: [25, 6.0]
127 | 6 + 1 = 7.: [20, 7.0]
128 | 6 + 2 + 1 = 9.: [20, 8.0]
129 | 6 + 2 = 8.: [10, 2.0]
130 | 6 + 3 + 0 = 9.: [1, 0.0]
131 | 6 + 3 + 1 = 10.: [32, 0.0]
132 | 6 + 3 + 8 = 17.: [1, 0.0]
133 | 6 + 3 = 10.: [2, 2.0]
134 | 6 + 3 = 9.: [57, 27.0]
135 | 6 + 4 + 0 = 10.: [2, 0.0]
136 | 6 + 4 + 1 = 11.: [24, 6.0]
137 | 6 + 4 + 3 = 13.: [1, 0.0]
138 | 6 + 4 + 5 = 15.: [2, 0.0]
139 | 6 + 4 = 10.: [52, 1.0]
140 | 6 + 5 + 0 = 11.: [1, 0.0]
141 | 6 + 5 + 1 = 12.: [24, 0.0]
142 | 6 + 5 + 4 = 15.: [1, 0.0]
143 | 6 + 5 + 5 = 16.: [2, 0.0]
144 | 6 + 5 = 11.: [48, 1.0]
145 | 6 + 6 + 0 = 12.: [4, 0.0]
146 | 6 + 6 + 1 = 13.: [38, 8.0]
147 | 6 + 6 + 5 = 17.: [1, 0.0]
148 | 6 + 6 + 8 = 20.: [1, 0.0]
149 | 6 + 6 = 12.: [46, 0.0]
150 | 6 + 7 + 0 = 13.: [10, 0.0]
151 | 6 + 7 + 1 = 14.: [16, 0.0]
152 | 6 + 7 + 6 = 19.: [1, 0.0]
153 | 6 + 7 = 13.: [49, 1.0]
154 | 6 + 8 + 1 = 15.: [15, 3.0]
155 | 6 + 8 = 14.: [39, 0.0]
156 | 7 + 0 = 7.: [26, 5.0]
157 | 7 + 1 + 1 = 9.: [23, 3.0]
158 | 7 + 1 = 8.: [13, 2.0]
159 | 7 + 2 + 0 = 9.: [2, 1.0]
160 | 7 + 2 + 1 = 10.: [23, 0.0]
161 | 7 + 2 = 9.: [49, 28.0]
162 | 7 + 3 + 0 = 10.: [6, 0.0]
163 | 7 + 3 + 1 = 11.: [18, 5.0]
164 | 7 + 3 = 10.: [45, 0.0]
165 | 7 + 4 + 0 = 11.: [1, 0.0]
166 | 7 + 4 + 1 = 12.: [26, 0.0]
167 | 7 + 4 = 11.: [55, 0.0]
168 | 7 + 5 + 0 = 12.: [2, 0.0]
169 | 7 + 5 + 1 = 13.: [27, 1.0]
170 | 7 + 5 = 12.: [42, 0.0]
171 | 7 + 6 + 1 = 14.: [16, 0.0]
172 | 7 + 6 + 2 = 15.: [1, 0.0]
173 | 7 + 6 + 5 = 18.: [1, 0.0]
174 | 7 + 6 + 8 = 21.: [1, 0.0]
175 | 7 + 6 = 13.: [67, 0.0]
176 | 7 + 7 + 0 = 14.: [10, 0.0]
177 | 7 + 7 + 1 = 15.: [17, 4.0]
178 | 7 + 7 + 4 = 18.: [1, 0.0]
179 | 7 + 7 + 6 = 20.: [1, 0.0]
180 | 7 + 7 = 14.: [44, 0.0]
181 | 7 + 8 + 0 = 15.: [4, 1.0]
182 | 7 + 8 + 1 = 16.: [24, 3.0]
183 | 7 + 8 = 15.: [53, 0.0]
184 | 8 + 0 = 8.: [20, 3.0]
185 | 8 + 1 + 0 = 9.: [4, 2.0]
186 | 8 + 1 + 1 = 10.: [14, 0.0]
187 | 8 + 1 = 9.: [60, 19.0]
188 | 8 + 2 + 0 = 10.: [2, 0.0]
189 | 8 + 2 + 1 = 11.: [14, 0.0]
190 | 8 + 2 = 10.: [59, 0.0]
191 | 8 + 3 + 0 = 11.: [2, 0.0]
192 | 8 + 3 + 1 = 12.: [24, 1.0]
193 | 8 + 3 + 2 = 13.: [1, 0.0]
194 | 8 + 3 = 11.: [39, 0.0]
195 | 8 + 4 + 1 = 13.: [4, 0.0]
196 | 8 + 4 + 3 = 15.: [1, 0.0]
197 | 8 + 4 = 12.: [33, 0.0]
198 | 8 + 5 + 0 = 13.: [1, 0.0]
199 | 8 + 5 + 1 = 14.: [20, 1.0]
200 | 8 + 5 = 13.: [45, 0.0]
201 | 8 + 6 + 1 = 15.: [19, 1.0]
202 | 8 + 6 + 2 = 16.: [1, 0.0]
203 | 8 + 6 = 14.: [43, 0.0]
204 | 8 + 7 + 0 = 15.: [2, 0.0]
205 | 8 + 7 + 1 = 16.: [20, 3.0]
206 | 8 + 7 + 4 = 19.: [1, 0.0]
207 | 8 + 7 + 8 = 23.: [1, 0.0]
208 | 8 + 7 = 15.: [61, 2.0]
209 | 8 + 8 + 0 = 16.: [2, 0.0]
210 | 8 + 8 + 1 = 17.: [30, 5.0]
211 | 8 + 8 = 16.: [39, 4.0]
212 |
--------------------------------------------------------------------------------
/artifacts/prompt/list_functions/4-shot_cot_htt_test.yaml:
--------------------------------------------------------------------------------
1 | system: |
2 | Instruction: Infer the function behind the examples. Use the function to answer the questions.
3 |
4 | prompt: |
5 | {%- set concept2rules = {} -%}
6 | {%- for rule, score in rules.items() -%}
7 | {%- set pos1 = rule.find("]") -%}
8 | {%- set concept = rule[:pos1] | trim("[") -%}
9 | {%- set rule = rule[pos1 + 1:] | trim -%}
10 | {%- set concept_rules = concept2rules[concept] if concept in concept2rules else [] -%}
11 | {%- set _ = concept2rules.update({concept: concept_rules + [(rule, score)]}) -%}
12 | {%- endfor -%}
13 | {%- for concept, concept_rules in concept2rules.items() -%}
14 | {%- set _ = concept2rules.update({concept: concept_rules | sort(attribute="1", reverse=True)}) -%}
15 | {%- endfor -%}
16 | Examples: {#- c020 #}
17 | [0, 8, 5, 2, 7, 1, 4, 6, 9, 3] -> [3, 8, 5, 2, 7, 1, 4, 6, 9, 3]
18 | [4, 0, 1] -> [1, 0, 1]
19 | [6, 1, 7, 5, 3, 2, 8, 4, 9] -> [9, 1, 7, 5, 3, 2, 8, 4, 9]
20 | [6, 2, 1, 9, 4] -> [4, 2, 1, 9, 4]
21 | [2, 9, 7, 5, 3, 8, 1, 4] -> [4, 9, 7, 5, 3, 8, 1, 4]
22 | [5, 1, 7, 8, 9, 4, 0, 3, 2] -> [2, 1, 7, 8, 9, 4, 0, 3, 2]
23 | Questions:
24 | [5, 8, 6, 1, 0, 9, 7] -> ?
25 | [3, 8, 6, 0] -> ?
26 | [8, 3] -> ?
27 | [3, 2, 0, 1, 6, 8, 7, 5] -> ?
28 | [5, 2, 0, 8, 9, 6] -> ?
29 | [8, 5, 7, 4, 2, 3, 6] -> ?
30 | Potential functions and their confidence:
31 | {%- for rule, score in concept2rules["c020"] %}
32 | {{ rule }}: {{ "%0.2f" | format(score) }}
33 | {%- endfor %}
34 | {%- if concept2rules["c020"] | length == 0 %}
35 | N/A
36 | {%- endif %}
37 | Answers:
38 | Based on the examples and the potential functions, we infer the function is to replace the first element with the last element.
39 | Using this function, the answers to the questions are:
40 | [5, 8, 6, 1, 0, 9, 7] -> [7, 8, 6, 1, 0, 9, 7]
41 | [3, 8, 6, 0] -> [0, 8, 6, 0]
42 | [8, 3] -> [3, 3]
43 | [3, 2, 0, 1, 6, 8, 7, 5] -> [5, 2, 0, 1, 6, 8, 7, 5]
44 | [5, 2, 0, 8, 9, 6] -> [6, 2, 0, 8, 9, 6]
45 | [8, 5, 7, 4, 2, 3, 6] -> [6, 5, 7, 4, 2, 3, 6]
46 |
47 | Examples: {#- c040 #}
48 | [2] -> [2]
49 | [4, 3, 0, 1, 7, 8] -> [4, 3, 0, 1, 7, 8, 3]
50 | [5, 0, 2, 9] -> [5, 0, 2, 9, 9]
51 | [7, 0, 2, 5] -> [7, 0, 2, 5]
52 | [3, 4, 7, 6, 0] -> [3, 4, 7, 6, 0, 3]
53 | [8, 1, 2, 3, 7] -> [8, 1, 2, 3, 7, 3]
54 | Questions:
55 | [9, 1] -> ?
56 | [6] -> ?
57 | [1, 9, 5, 0] -> ?
58 | [4, 6, 9, 0, 7, 8, 1, 2] -> ?
59 | [4, 2, 8] -> ?
60 | [6, 2, 0, 3, 1, 8, 7] -> ?
61 | Potential functions and their confidence:
62 | {%- for rule, score in concept2rules["c040"] %}
63 | {{ rule }}: {{ "%0.2f" | format(score) }}
64 | {%- endfor %}
65 | {%- if concept2rules["c040"] | length == 0 %}
66 | N/A
67 | {%- endif %}
68 | Answers:
69 | Based on the examples and the potential functions, we infer the function is to append 3 if the list contains a 3, else append 9 if the list contains a 9.
70 | Using this function, the answers to the questions are:
71 | [9, 1] -> [9, 1, 9]
72 | [6] -> [6]
73 | [1, 9, 5, 0] -> [1, 9, 5, 0, 9]
74 | [4, 6, 9, 0, 7, 8, 1, 2] -> [4, 6, 9, 0, 7, 8, 1, 2, 9]
75 | [4, 2, 8] -> [4, 2, 8]
76 | [6, 2, 0, 3, 1, 8, 7] -> [6, 2, 0, 3, 1, 8, 7, 3]
77 |
78 | Examples: {#- c060 #}
79 | [1, 0, 9, 7, 4, 2, 5, 3, 6, 8] -> [9, 0, 1, 4, 4, 5]
80 | [3, 8, 4, 6, 1, 5, 7, 0] -> [4, 8, 3, 4, 1, 7]
81 | [5, 4, 7, 2, 9, 3, 8, 1] -> [7, 4, 5, 4, 9, 8]
82 | [3, 9, 2, 0, 6, 8, 5, 1, 7] -> [2, 9, 3, 4, 6, 5]
83 | [9, 2, 1, 3, 4, 7, 6, 8, 5, 0] -> [1, 2, 9, 4, 4, 6]
84 | [0, 7, 9, 3, 1, 5, 8, 2, 6] -> [9, 7, 0, 4, 1, 8]
85 | Questions:
86 | [3, 9, 7, 6, 0, 5, 1] -> ?
87 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> ?
88 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> ?
89 | [8, 4, 9, 1, 3, 2, 7] -> ?
90 | [8, 3, 7, 0, 4, 2, 5] -> ?
91 | [6, 2, 1, 0, 9, 8, 5] -> ?
92 | Potential functions and their confidence:
93 | {%- for rule, score in concept2rules["c060"] %}
94 | {{ rule }}: {{ "%0.2f" | format(score) }}
95 | {%- endfor %}
96 | {%- if concept2rules["c060"] | length == 0 %}
97 | N/A
98 | {%- endif %}
99 | Answers:
100 | Based on the examples and the potential functions, we infer the function is to generate a list of elements 3, 2, 1, the number 4, then elements 5 and 7.
101 | Using this function, the answers to the questions are:
102 | [3, 9, 7, 6, 0, 5, 1] -> [7, 9, 3, 4, 0, 1]
103 | [2, 5, 9, 7, 8, 1, 0, 6, 4, 3] -> [9, 5, 2, 4, 8, 0]
104 | [9, 0, 7, 2, 4, 5, 3, 1, 6] -> [7, 0, 9, 4, 4, 3]
105 | [8, 4, 9, 1, 3, 2, 7] -> [9, 4, 8, 4, 3, 7]
106 | [8, 3, 7, 0, 4, 2, 5] -> [7, 3, 8, 4, 4, 5]
107 | [6, 2, 1, 0, 9, 8, 5] -> [1, 2, 6, 4, 9, 5]
108 |
109 | Examples: {#- c080 #}
110 | [] -> []
111 | [1, 5, 6, 2, 8, 3, 7] -> [7, 3, 8, 2, 6, 5, 1]
112 | [2, 1, 9, 6, 3, 5, 4, 8] -> [8, 4, 5, 3, 6, 9, 1, 2]
113 | [9, 1, 2, 8, 0] -> [0, 8, 2, 1, 9]
114 | [1, 0, 7, 3, 9, 2] -> [2, 9, 3, 7, 0, 1]
115 | [7, 6, 3, 0, 4, 1, 5, 2] -> [2, 5, 1, 4, 0, 3, 6, 7]
116 | Questions:
117 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> ?
118 | [6, 4, 0] -> ?
119 | [3, 6, 1, 7, 0, 4] -> ?
120 | [5, 4, 2, 7] -> ?
121 | [5, 7, 6, 2, 3] -> ?
122 | [7, 9] -> ?
123 | Potential functions and their confidence:
124 | {%- for rule, score in concept2rules["c080"] %}
125 | {{ rule }}: {{ "%0.2f" | format(score) }}
126 | {%- endfor %}
127 | {%- if concept2rules["c080"] | length == 0 %}
128 | N/A
129 | {%- endif %}
130 | Answers:
131 | Based on the examples and the potential functions, we infer the function is to reverse the elements.
132 | Using this function, the answers to the questions are:
133 | [2, 6, 5, 7, 8, 0, 4, 3, 1, 9] -> [9, 1, 3, 4, 0, 8, 7, 5, 6, 2]
134 | [6, 4, 0] -> [0, 4, 6]
135 | [3, 6, 1, 7, 0, 4] -> [4, 0, 7, 1, 6, 3]
136 | [5, 4, 2, 7] -> [7, 2, 4, 5]
137 | [5, 7, 6, 2, 3] -> [3, 2, 6, 7, 5]
138 | [7, 9] -> [9, 7]
139 |
140 | Examples:
141 | {%- for i in range(train_queries | length) %}
142 | {{ train_queries[i] }} -> {{ train_answers[i] }}
143 | {%- endfor %}
144 | Questions:
145 | {%- for query in queries %}
146 | {{ query }} -> ?
147 | {%- endfor %}
148 | Potential functions and their confidence:
149 | {%- for rule, score in concept2rules[concept] %}
150 | {{ rule }}: {{ "%0.2f" | format(score) }}
151 | {%- endfor %}
152 | {%- if concept2rules[concept] | length == 0 %}
153 | N/A
154 | {%- endif %}
155 | Answers:
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # llms_can_learn_rules
2 |
3 | 
4 |
5 | A major reason for failure of chain-of-thought prompting is its tendency to
6 | hallucinate rules in multi-step reasoning. Our work, Hypotheses-to-Theories (HtT),
7 | prompts LLMs to induce rules from training samples, build a rule library, and
8 | apply it to solve reasoning problems.
9 |
10 | HtT can be viewed as a new paradigm of learning with LLMs. Instead of learning
11 | model parameters, HtT learns a rule library that transfers across different models
12 | and textual forms. It can be applied to black-box LLMs such as GPT-4 and Gemini-Pro.
13 |
14 | ## Installation
15 |
16 | First, let's create a virtual environment:
17 |
18 | Below we will use conda environment. If you haven't heard about conda, you can install the miniconda following https://docs.conda.io/projects/miniconda/en/latest/
19 |
20 | ```bash
21 | conda create -n htt python=3.10
22 | conda activate htt
23 | ```
24 |
25 | Install dependencies through pip.
26 |
27 | ```bash
28 | pip install -r requirements.txt
29 | ```
30 |
31 | If it is your first time using nltk, please also download the necessary pieces:
32 |
33 | ```python
34 | >>> import nltk
35 | >>> nltk.download('punkt')
36 | >>> nltk.download('punkt_tab')
37 | ```
38 |
39 |
40 | Export your API keys as an environment variable.
41 |
42 | ```bash
43 | export OPENAI_API_KEY=your-open-ai-key
44 | export GOOGLE_API_KEY=your-google-key
45 | ```
46 |
47 | ## 🛠️ Usage ##
48 |
49 | ### Dataset ###
50 |
51 | Please navigate to `artifacts/dataset` and run the `download.sh` script to prepare the datasets.
52 |
53 | Use `source/train.py` for training HtT and `source/test.py` for testing HtT and
54 | other methods. All the experiments will be logged in `experiment/`.
55 |
56 | ### Training (Induction Stage) ###
57 |
58 | Below are the command lines for training HtT. Note if you train HtT with GPT-4,
59 | it will incur substantial time (2-21 hours) and cost ($20-270 USD) per dataset.
60 | If you only want to reproduce the results of HtT, we highly recommend you to use
61 | the provided checkpoints of rule libraries.
62 |
63 | ```bash
64 | python source/train.py -c artifacts/config/clutrr/symbolic/gpt-4_5-shot_cot_htt.yaml --num-iteration 2000
65 | python source/train.py -c artifacts/config/arithmetic/base-16/gpt-4_5-shot_cot_htt.yaml --num-iteration 2000
66 | python source/train.py -c artifacts/config/arithmetic/base-11/gpt-4_5-shot_cot_htt.yaml --num-iteration 2000
67 | python source/train.py -c artifacts/config/arithmetic/base-9/gpt-4_5-shot_cot_htt.yaml --num-iteration 2000
68 | python source/train.py -c artifacts/config/list_functions/gpt-4_4-shot_cot_htt.yaml --num-iteration 5000
69 | ```
70 |
71 | ### Test (Deduction Stage) ###
72 |
73 | Here are the command lines for testing HtT based on the provided rule libraries.
74 | If you want to test with your own rule libraries, modify the library path in the
75 | corresponding config files.
76 |
77 | ```bash
78 | python source/test.py -c artifacts/config/clutrr/symbolic/gpt-4_5-shot_cot_htt.yaml
79 | python source/test.py -c artifacts/config/clutrr/textual/gpt-4_5-shot_cot_htt.yaml
80 | python source/test.py -c artifacts/config/arithmetic/base-16/gpt-4_5-shot_cot_htt.yaml
81 | python source/test.py -c artifacts/config/arithmetic/base-11/gpt-4_5-shot_cot_htt.yaml
82 | python source/test.py -c artifacts/config/arithmetic/base-9/gpt-4_5-shot_cot_htt.yaml
83 | python source/test.py -c artifacts/config/list_functions/gpt-4_4-shot_cot_htt.yaml
84 | ```
85 |
86 | To run baseline methods like 0-shot CoT and 5-shot Cot, use the following commands
87 |
88 | ```bash
89 | python source/test.py -c artifacts/config/clutrr/symbolic/gpt-4_0-shot_cot_htt.yaml
90 | python source/test.py -c artifacts/config/clutrr/textual/gpt-4_0-shot_cot_htt.yaml
91 | python source/test.py -c artifacts/config/arithmetic/base-16/gpt-4_0-shot_cot.yaml
92 | python source/test.py -c artifacts/config/arithmetic/base-11/gpt-4_0-shot_cot.yaml
93 | python source/test.py -c artifacts/config/arithmetic/base-9/gpt-4_0-shot_cot.yaml
94 | python source/test.py -c artifacts/config/list_functions/gpt-4_0-shot_cot.yaml
95 | ```
96 |
97 | ```bash
98 | python source/test.py -c artifacts/config/clutrr/symbolic/gpt-4_5-shot_cot_htt.yaml
99 | python source/test.py -c artifacts/config/clutrr/textual/gpt-4_5-shot_cot_htt.yaml
100 | python source/test.py -c artifacts/config/arithmetic/base-16/gpt-4_5-shot_cot.yaml
101 | python source/test.py -c artifacts/config/arithmetic/base-11/gpt-4_5-shot_cot.yaml
102 | python source/test.py -c artifacts/config/arithmetic/base-9/gpt-4_5-shot_cot.yaml
103 | python source/test.py -c artifacts/config/list_functions/gpt-4_4-shot_cot.yaml
104 | ```
105 |
106 | ## 📂 Code Structure ##
107 |
108 | - `artifacts/checkpoint/`: checkpoints of rule libraries
109 | - `artifacts/config/`: configuration files for experiments
110 | - `artifacts/dataset/`: benchmark datasets
111 | - `artifacts/prompt/`: prompts written in [jinja](https://jinja.palletsprojects.com/)
112 | - `source/`: source files written in python
113 |
114 | ## 🗡️ Reproducibility ##
115 |
116 | Due to the randomness in LLMs, change in OpenAI models and reimplementation of
117 | our codebase, this repo may not reproduce the exact numbers in the paper.
118 |
119 | It is normal to observe numbers higher or lower than those the paper, but the
120 | rankings of methods should be preserved on strong models like GPT-4:
121 | few-shot CoT + HtT > few-shot CoT > 0-shot CoT.
122 |
123 |
124 | ## Citing this work
125 |
126 | If you use HtT or this repo in your research, please cite the following paper.
127 |
128 |
129 | ```bibtex
130 | @article{zhu2023large,
131 | title={Large Language Models can Learn Rules},
132 | author={Zhu, Zhaocheng and Xue, Yuan and Chen, Xinyun and Zhou, Denny and Tang, Jian and Schuurmans, Dale and Dai, Hanjun},
133 | journal={arXiv preprint arXiv:2310.07064},
134 | year={2023}
135 | }
136 | ```
137 |
138 | ## License and disclaimer
139 |
140 | Copyright 2023 DeepMind Technologies Limited
141 |
142 | All software is licensed under the Apache License, Version 2.0 (Apache 2.0);
143 | you may not use this file except in compliance with the Apache 2.0 license.
144 | You may obtain a copy of the Apache 2.0 license at:
145 | https://www.apache.org/licenses/LICENSE-2.0
146 |
147 | All other materials are licensed under the Creative Commons Attribution 4.0
148 | International License (CC-BY). You may obtain a copy of the CC-BY license at:
149 | https://creativecommons.org/licenses/by/4.0/legalcode
150 |
151 | Unless required by applicable law or agreed to in writing, all software and
152 | materials distributed here under the Apache 2.0 or CC-BY licenses are
153 | distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
154 | either express or implied. See the licenses for the specific language governing
155 | permissions and limitations under those licenses.
156 |
157 | This is not an official Google product.
158 |
--------------------------------------------------------------------------------
/artifacts/checkpoint/arithmetic/base-9/gemini-pro_5-shot_cot_htt_2000.yaml:
--------------------------------------------------------------------------------
1 | 0 + 0 = 0.: [17, 15.0]
2 | 0 + 0 = 00.: [1, 1.0]
3 | 0 + 1 + 1 = 12.: [1, 0.0]
4 | 0 + 1 = 1.: [14, 12.0]
5 | 0 + 2 = 2.: [23, 17.0]
6 | 0 + 2 = 3.: [1, 0.0]
7 | 0 + 3 = 3.: [19, 19.0]
8 | 0 + 3 = 4.: [5, 0.0]
9 | 0 + 4 = 4.: [19, 15.0]
10 | 0 + 4 = 5.: [6, 0.0]
11 | 0 + 5 = 5.: [20, 17.0]
12 | 0 + 5 = 6.: [15, 0.0]
13 | 0 + 6 = 6.: [13, 12.0]
14 | 0 + 6 = 7.: [13, 0.0]
15 | 0 + 7 = 7.: [12, 10.0]
16 | 0 + 7 = 8.: [10, 0.0]
17 | 0 + 8 = 8.: [14, 14.0]
18 | 0 + 8 = 9.: [4, 0.0]
19 | 1 + 0 = 1.: [7, 5.0]
20 | 1 + 1 + 1 = 10.: [2, 0.0]
21 | 1 + 1 + 1 = 11.: [4, 0.0]
22 | 1 + 1 + 1 = 13.: [5, 0.0]
23 | 1 + 1 = 2.: [21, 20.0]
24 | 1 + 1 = 3.: [3, 0.0]
25 | 1 + 2 + 1 = 10.: [1, 0.0]
26 | 1 + 2 + 1 = 4.: [5, 5.0]
27 | 1 + 2 + 1 = 5.: [12, 0.0]
28 | 1 + 2 = 3.: [11, 11.0]
29 | 1 + 2 = 4.: [12, 0.0]
30 | 1 + 3 + 1 = 5.: [1, 1.0]
31 | 1 + 3 + 1 = 6.: [12, 0.0]
32 | 1 + 3 = 4.: [10, 9.0]
33 | 1 + 3 = 5.: [15, 1.0]
34 | 1 + 4 + 1 = 7.: [24, 0.0]
35 | 1 + 4 = 5.: [3, 3.0]
36 | 1 + 4 = 6.: [14, 0.0]
37 | 1 + 5 + 1 = 8.: [10, 0.0]
38 | 1 + 5 = 6.: [1, 0.0]
39 | 1 + 5 = 7.: [25, 1.0]
40 | 1 + 6 + 1 = 9.: [11, 0.0]
41 | 1 + 6 = 7.: [2, 2.0]
42 | 1 + 6 = 8.: [12, 1.0]
43 | 1 + 7 + 1 = 10.: [26, 26.0]
44 | 1 + 7 = 10.: [11, 0.0]
45 | 1 + 7 = 9.: [2, 0.0]
46 | 1 + 8 + 0 = 10.: [1, 1.0]
47 | 1 + 8 + 1 = 11.: [13, 10.0]
48 | 1 + 8 = 10.: [58, 37.0]
49 | 2 + 0 = 2.: [15, 15.0]
50 | 2 + 0 = 3.: [1, 0.0]
51 | 2 + 1 + 1 = 11.: [1, 0.0]
52 | 2 + 1 + 1 = 4.: [2, 2.0]
53 | 2 + 1 + 1 = 5.: [14, 0.0]
54 | 2 + 1 = 3.: [13, 10.0]
55 | 2 + 1 = 4.: [15, 0.0]
56 | 2 + 2 + 1 = 10.: [1, 0.0]
57 | 2 + 2 + 1 = 6.: [19, 0.0]
58 | 2 + 2 = 4.: [12, 10.0]
59 | 2 + 2 = 5.: [3, 1.0]
60 | 2 + 3 + 1 = 7.: [29, 0.0]
61 | 2 + 3 = 5.: [4, 2.0]
62 | 2 + 3 = 6.: [26, 0.0]
63 | 2 + 4 + 1 = 8.: [14, 0.0]
64 | 2 + 4 = 6.: [1, 1.0]
65 | 2 + 4 = 7.: [12, 0.0]
66 | 2 + 5 + 1 = 9.: [17, 0.0]
67 | 2 + 5 = 7.: [2, 2.0]
68 | 2 + 5 = 8.: [14, 0.0]
69 | 2 + 6 + 1 = 10.: [21, 19.0]
70 | 2 + 6 = 10.: [8, 0.0]
71 | 2 + 6 = 8.: [7, 6.0]
72 | 2 + 6 = 9.: [10, 0.0]
73 | 2 + 7 + 1 = 11.: [22, 20.0]
74 | 2 + 7 = 10.: [37, 15.0]
75 | 2 + 7 = 11.: [20, 0.0]
76 | 2 + 8 + 0 = 11.: [2, 0.0]
77 | 2 + 8 + 1 = 12.: [11, 10.0]
78 | 2 + 8 = 10.: [3, 0.0]
79 | 2 + 8 = 11.: [49, 25.0]
80 | 3 + 0 = 3.: [13, 13.0]
81 | 3 + 0 = 4.: [2, 0.0]
82 | 3 + 1 + 1 = 11.: [2, 0.0]
83 | 3 + 1 + 1 = 5.: [2, 1.0]
84 | 3 + 1 + 1 = 6.: [23, 0.0]
85 | 3 + 1 = 4.: [7, 6.0]
86 | 3 + 1 = 5.: [9, 0.0]
87 | 3 + 2 + 1 = 7.: [15, 0.0]
88 | 3 + 2 = 5.: [6, 3.0]
89 | 3 + 2 = 6.: [3, 0.0]
90 | 3 + 3 + 1 = 7.: [1, 1.0]
91 | 3 + 3 + 1 = 8.: [22, 0.0]
92 | 3 + 3 = 6.: [10, 9.0]
93 | 3 + 3 = 7.: [6, 0.0]
94 | 3 + 4 + 1 = 10.: [2, 0.0]
95 | 3 + 4 + 1 = 9.: [21, 0.0]
96 | 3 + 4 = 7.: [5, 4.0]
97 | 3 + 4 = 8.: [26, 0.0]
98 | 3 + 5 + 1 = 10.: [27, 27.0]
99 | 3 + 5 = 8.: [1, 1.0]
100 | 3 + 5 = 9.: [14, 0.0]
101 | 3 + 6 + 0 = 10.: [2, 1.0]
102 | 3 + 6 + 1 = 11.: [20, 18.0]
103 | 3 + 6 = 10.: [47, 32.0]
104 | 3 + 6 = 11.: [1, 0.0]
105 | 3 + 7 + 0 = 11.: [2, 0.0]
106 | 3 + 7 + 1 = 11.: [1, 0.0]
107 | 3 + 7 + 1 = 12.: [3, 3.0]
108 | 3 + 7 = 11.: [49, 26.0]
109 | 3 + 8 + 0 = 12.: [1, 0.0]
110 | 3 + 8 + 1 = 13.: [16, 14.0]
111 | 3 + 8 = 12.: [48, 26.0]
112 | 4 + 0 + 1 = 5.: [1, 0.0]
113 | 4 + 0 = 4.: [12, 11.0]
114 | 4 + 1 + 1 = 7.: [18, 0.0]
115 | 4 + 1 = 5.: [5, 5.0]
116 | 4 + 1 = 6.: [11, 0.0]
117 | 4 + 2 + 1 = 8.: [16, 0.0]
118 | 4 + 2 = 6.: [3, 3.0]
119 | 4 + 2 = 7.: [23, 0.0]
120 | 4 + 3 + 1 = 9.: [12, 0.0]
121 | 4 + 3 = 7.: [6, 6.0]
122 | 4 + 3 = 8.: [12, 0.0]
123 | 4 + 4 + 0 = 9.: [1, 0.0]
124 | 4 + 4 + 1 = 10.: [7, 4.0]
125 | 4 + 4 = 10.: [6, 0.0]
126 | 4 + 4 = 8.: [3, 2.0]
127 | 4 + 4 = 9.: [13, 0.0]
128 | 4 + 5 + 0 = 10.: [1, 0.0]
129 | 4 + 5 + 1 = 11.: [9, 9.0]
130 | 4 + 5 = 10.: [49, 24.0]
131 | 4 + 6 + 0 = 11.: [2, 1.0]
132 | 4 + 6 + 1 = 12.: [24, 23.0]
133 | 4 + 6 = 11.: [46, 24.0]
134 | 4 + 7 + 0 = 12.: [2, 0.0]
135 | 4 + 7 + 1 = 13.: [13, 13.0]
136 | 4 + 7 = 11.: [1, 0.0]
137 | 4 + 7 = 12.: [62, 39.0]
138 | 4 + 8 + 0 = 13.: [1, 1.0]
139 | 4 + 8 + 1 = 14.: [15, 15.0]
140 | 4 + 8 = 12.: [1, 0.0]
141 | 4 + 8 = 13.: [43, 18.0]
142 | 5 + 0 = 5.: [16, 12.0]
143 | 5 + 0 = 6.: [2, 0.0]
144 | 5 + 1 + 1 = 8.: [11, 0.0]
145 | 5 + 1 = 6.: [7, 6.0]
146 | 5 + 1 = 7.: [18, 0.0]
147 | 5 + 2 + 1 = 10.: [1, 0.0]
148 | 5 + 2 + 1 = 9.: [18, 0.0]
149 | 5 + 2 = 7.: [1, 1.0]
150 | 5 + 2 = 8.: [7, 0.0]
151 | 5 + 3 + 1 = 10.: [13, 13.0]
152 | 5 + 3 = 8.: [2, 2.0]
153 | 5 + 3 = 9.: [4, 0.0]
154 | 5 + 4 + 1 = 11.: [33, 31.0]
155 | 5 + 4 = 10.: [62, 35.0]
156 | 5 + 5 + 1 = 11.: [2, 0.0]
157 | 5 + 5 + 1 = 12.: [24, 23.0]
158 | 5 + 5 = 11.: [42, 22.0]
159 | 5 + 6 + 1 = 13.: [28, 24.0]
160 | 5 + 6 = 12.: [32, 19.0]
161 | 5 + 7 + 1 = 14.: [16, 14.0]
162 | 5 + 7 = 13.: [61, 28.0]
163 | 5 + 8 + 0 = 14.: [2, 0.0]
164 | 5 + 8 + 1 = 15.: [12, 12.0]
165 | 5 + 8 = 14.: [47, 27.0]
166 | 6 + 0 = 6.: [10, 10.0]
167 | 6 + 0 = 7.: [2, 0.0]
168 | 6 + 1 + 1 = 8.: [1, 1.0]
169 | 6 + 1 + 1 = 9.: [25, 0.0]
170 | 6 + 1 = 7.: [4, 4.0]
171 | 6 + 1 = 8.: [14, 0.0]
172 | 6 + 2 + 1 = 10.: [20, 20.0]
173 | 6 + 2 = 8.: [3, 3.0]
174 | 6 + 2 = 9.: [7, 0.0]
175 | 6 + 3 + 0 = 10.: [2, 0.0]
176 | 6 + 3 + 1 = 10.: [1, 0.0]
177 | 6 + 3 + 1 = 11.: [34, 33.0]
178 | 6 + 3 = 10.: [66, 31.0]
179 | 6 + 3 = 9.: [1, 0.0]
180 | 6 + 4 + 1 = 11.: [1, 0.0]
181 | 6 + 4 + 1 = 12.: [29, 21.0]
182 | 6 + 4 = 11.: [62, 33.0]
183 | 6 + 5 + 1 = 13.: [28, 28.0]
184 | 6 + 5 = 11.: [1, 0.0]
185 | 6 + 5 = 12.: [59, 37.0]
186 | 6 + 6 + 0 = 13.: [3, 1.0]
187 | 6 + 6 + 1 = 14.: [25, 16.0]
188 | 6 + 6 = 13.: [64, 23.0]
189 | 6 + 6 = 15.: [1, 0.0]
190 | 6 + 7 + 0 = 14.: [3, 2.0]
191 | 6 + 7 + 1 = 15.: [12, 10.0]
192 | 6 + 7 = 14.: [66, 46.0]
193 | 6 + 8 + 0 = 15.: [1, 1.0]
194 | 6 + 8 + 1 = 16.: [14, 13.0]
195 | 6 + 8 = 15.: [38, 29.0]
196 | 7 + 0 = 7.: [13, 12.0]
197 | 7 + 0 = 8.: [3, 0.0]
198 | 7 + 1 + 1 = 10.: [26, 26.0]
199 | 7 + 1 = 10.: [10, 0.0]
200 | 7 + 1 = 9.: [2, 0.0]
201 | 7 + 2 + 1 = 11.: [25, 21.0]
202 | 7 + 2 = 10.: [46, 31.0]
203 | 7 + 2 = 11.: [5, 0.0]
204 | 7 + 3 + 0 = 11.: [1, 0.0]
205 | 7 + 3 + 1 = 11.: [3, 0.0]
206 | 7 + 3 + 1 = 12.: [16, 14.0]
207 | 7 + 3 = 11.: [51, 27.0]
208 | 7 + 4 + 0 = 12.: [1, 0.0]
209 | 7 + 4 + 1 = 13.: [25, 23.0]
210 | 7 + 4 = 12.: [55, 38.0]
211 | 7 + 5 + 1 = 14.: [26, 24.0]
212 | 7 + 5 = 13.: [51, 27.0]
213 | 7 + 6 + 0 = 14.: [1, 1.0]
214 | 7 + 6 + 1 = 15.: [12, 12.0]
215 | 7 + 6 = 14.: [66, 36.0]
216 | 7 + 7 + 0 = 15.: [2, 1.0]
217 | 7 + 7 + 1 = 16.: [8, 4.0]
218 | 7 + 7 = 14.: [1, 0.0]
219 | 7 + 7 = 15.: [55, 25.0]
220 | 7 + 7 = 16.: [3, 0.0]
221 | 7 + 8 + 0 = 16.: [4, 2.0]
222 | 7 + 8 + 1 = 17.: [20, 19.0]
223 | 7 + 8 = 16.: [57, 37.0]
224 | 8 + 0 = 8.: [8, 8.0]
225 | 8 + 0 = 9.: [1, 0.0]
226 | 8 + 1 + 0 = 10.: [1, 1.0]
227 | 8 + 1 + 1 = 11.: [17, 15.0]
228 | 8 + 1 = 10.: [59, 45.0]
229 | 8 + 1 = 9.: [2, 0.0]
230 | 8 + 2 + 1 = 12.: [17, 14.0]
231 | 8 + 2 = 10.: [1, 0.0]
232 | 8 + 2 = 11.: [52, 33.0]
233 | 8 + 3 + 0 = 11.: [1, 0.0]
234 | 8 + 3 + 0 = 12.: [1, 1.0]
235 | 8 + 3 + 1 = 13.: [23, 22.0]
236 | 8 + 3 = 12.: [45, 26.0]
237 | 8 + 4 + 1 = 14.: [5, 4.0]
238 | 8 + 4 = 13.: [37, 23.0]
239 | 8 + 5 + 0 = 14.: [1, 0.0]
240 | 8 + 5 + 1 = 15.: [19, 18.0]
241 | 8 + 5 = 14.: [49, 30.0]
242 | 8 + 6 + 1 = 16.: [18, 14.0]
243 | 8 + 6 = 15.: [47, 30.0]
244 | 8 + 7 + 0 = 16.: [4, 0.0]
245 | 8 + 7 + 1 = 16.: [1, 0.0]
246 | 8 + 7 + 1 = 17.: [17, 15.0]
247 | 8 + 7 = 16.: [64, 40.0]
248 | 8 + 8 + 0 = 17.: [2, 0.0]
249 | 8 + 8 + 1 = 18.: [19, 7.0]
250 | 8 + 8 = 16.: [1, 0.0]
251 | 8 + 8 = 17.: [41, 27.0]
252 | 8 + 8 = 18.: [1, 0.0]
253 |
--------------------------------------------------------------------------------
/artifacts/checkpoint/clutrr/symbolic/gpt-4_5-shot_cot_htt_2000.yaml:
--------------------------------------------------------------------------------
1 | aunt's brother is uncle.: [8, 3.0]
2 | aunt's daughter is cousin.: [3, 0.0]
3 | aunt's father is grandfather.: [6, 4.0]
4 | aunt's mother is grandmother.: [7, 4.0]
5 | aunt's sister is aunt.: [4, 3.0]
6 | aunt's son is cousin.: [2, 0.0]
7 | brother's aunt is aunt.: [5, 5.0]
8 | brother's brother is brother.: [46, 43.0]
9 | brother's brother is uncle.: [2, 0.0]
10 | brother's daughter is niece.: [1, 1.0]
11 | brother's father is father.: [42, 40.0]
12 | brother's grandfather is grandfather.: [22, 22.0]
13 | brother's grandfather is great-grandfather.: [5, 0.0]
14 | brother's grandmother is grandmother.: [16, 16.0]
15 | brother's grandmother is great-grandmother.: [4, 0.0]
16 | brother's mother is grandmother.: [10, 0.0]
17 | brother's mother is mother.: [43, 42.0]
18 | brother's sister is sister.: [50, 46.0]
19 | brother's son is nephew.: [54, 47.0]
20 | brother's uncle is uncle.: [2, 2.0]
21 | brother's wife is sister-in-law.: [8, 8.0]
22 | brother-in-law's daughter is niece.: [4, 4.0]
23 | brother-in-law's father is father-in-law.: [6, 6.0]
24 | brother-in-law's mother is mother-in-law.: [6, 6.0]
25 | brother-in-law's son is nephew.: [3, 3.0]
26 | daughter's aunt is aunt.: [17, 0.0]
27 | daughter's aunt is cousin.: [8, 0.0]
28 | daughter's aunt is sister.: [17, 17.0]
29 | daughter's brother is son.: [50, 36.0]
30 | daughter's daughter is granddaughter.: [27, 19.0]
31 | daughter's father is father.: [17, 0.0]
32 | daughter's father is husband.: [3, 1.0]
33 | daughter's father is self.: [1, 1.0]
34 | daughter's grandfather is father.: [24, 20.0]
35 | daughter's grandfather is grandfather.: [3, 0.0]
36 | daughter's grandmother is grandmother.: [3, 1.0]
37 | daughter's grandmother is mother.: [28, 24.0]
38 | daughter's husband is son-in-law.: [23, 23.0]
39 | daughter's mother is mother.: [2, 1.0]
40 | daughter's mother is self.: [1, 1.0]
41 | daughter's mother is wife.: [15, 5.0]
42 | daughter's sister is daughter.: [23, 20.0]
43 | daughter's sister is niece.: [1, 0.0]
44 | daughter's sister is sister.: [30, 1.0]
45 | daughter's son is grandson.: [49, 38.0]
46 | daughter's uncle is brother.: [29, 26.0]
47 | daughter's uncle is cousin.: [2, 0.0]
48 | daughter's uncle is uncle.: [6, 0.0]
49 | daughter-in-law's daughter is granddaughter.: [5, 5.0]
50 | daughter-in-law's son is grandson.: [4, 4.0]
51 | father's brother is uncle.: [42, 38.0]
52 | father's daughter is daughter.: [4, 0.0]
53 | father's daughter is sister.: [56, 54.0]
54 | father's father is grandfather.: [36, 30.0]
55 | father's mother is grandmother.: [31, 28.0]
56 | father's sister is aunt.: [32, 30.0]
57 | father's son is brother.: [47, 41.0]
58 | father's son is son.: [21, 3.0]
59 | father's wife is mother.: [5, 4.0]
60 | grand-daughter's brother is grandson.: [1, 1.0]
61 | granddaughter's aunt is aunt.: [2, 0.0]
62 | granddaughter's brother is brother.: [1, 0.0]
63 | granddaughter's brother is grandson.: [6, 6.0]
64 | granddaughter's brother is great-grandson.: [9, 0.0]
65 | granddaughter's brother is great-uncle.: [3, 0.0]
66 | granddaughter's brother is nephew.: [4, 0.0]
67 | granddaughter's brother is son.: [3, 0.0]
68 | granddaughter's father is son.: [9, 9.0]
69 | granddaughter's mother is daughter.: [10, 10.0]
70 | granddaughter's sister is granddaughter.: [15, 15.0]
71 | granddaughter's sister is niece.: [6, 0.0]
72 | granddaughter's sister is sister.: [2, 0.0]
73 | granddaughter's uncle is cousin.: [1, 0.0]
74 | granddaughter's uncle is son.: [2, 2.0]
75 | grandfather's daughter is aunt.: [6, 6.0]
76 | grandfather's son is uncle.: [8, 7.0]
77 | grandmother's daughter is aunt.: [7, 7.0]
78 | grandmother's daughter is mother.: [3, 0.0]
79 | grandmother's son is uncle.: [8, 8.0]
80 | grandson's aunt is cousin.: [1, 0.0]
81 | grandson's aunt is niece.: [2, 0.0]
82 | grandson's brother is brother.: [1, 0.0]
83 | grandson's brother is grandson.: [11, 11.0]
84 | grandson's brother is nephew.: [7, 0.0]
85 | grandson's brother is son.: [1, 0.0]
86 | grandson's father is son.: [6, 6.0]
87 | grandson's mother is daughter.: [6, 6.0]
88 | grandson's sister is aunt.: [1, 0.0]
89 | grandson's sister is daughter.: [1, 0.0]
90 | grandson's sister is grand-daughter.: [1, 1.0]
91 | grandson's sister is granddaughter.: [16, 16.0]
92 | grandson's sister is niece.: [12, 0.0]
93 | grandson's sister is sister.: [4, 0.0]
94 | grandson's uncle is cousin.: [2, 0.0]
95 | grandson's uncle is son.: [3, 3.0]
96 | grandson's uncle is uncle.: [2, 0.0]
97 | great-granddaughter's brother is great-grandson.: [3, 0.0]
98 | great-granddaughter's sister is great-granddaughter.: [4, 0.0]
99 | great-grandson's brother is great-grandson.: [2, 0.0]
100 | himself's son is son.: [1, 1.0]
101 | husband's brother is brother-in-law.: [10, 6.0]
102 | husband's daughter is daughter.: [21, 19.0]
103 | husband's daughter is step-daughter.: [22, 3.0]
104 | husband's daughter is stepdaughter.: [14, 1.0]
105 | husband's father is father-in-law.: [12, 12.0]
106 | husband's granddaughter is granddaughter.: [18, 18.0]
107 | husband's granddaughter is great-granddaughter.: [6, 0.0]
108 | husband's grandson is grandchild.: [2, 0.0]
109 | husband's grandson is grandson.: [18, 18.0]
110 | husband's mother is mother-in-law.: [23, 18.0]
111 | husband's sister is sister-in-law.: [5, 2.0]
112 | husband's son is son.: [31, 26.0]
113 | husband's son is step-son.: [2, 0.0]
114 | husband's son is stepson.: [6, 0.0]
115 | mother's brother is uncle.: [32, 29.0]
116 | mother's daughter is daughter.: [11, 2.0]
117 | mother's daughter is granddaughter.: [1, 0.0]
118 | mother's daughter is sister.: [49, 48.0]
119 | mother's father is grandfather.: [29, 29.0]
120 | mother's husband is father.: [8, 5.0]
121 | mother's mother is grandmother.: [29, 26.0]
122 | mother's sister is aunt.: [31, 30.0]
123 | mother's son is brother.: [25, 23.0]
124 | mother's son is son.: [25, 3.0]
125 | nephew's aunt is mother.: [5, 0.0]
126 | nephew's aunt is sister.: [1, 1.0]
127 | nephew's brother is nephew.: [8, 4.0]
128 | nephew's grandfather is father.: [3, 3.0]
129 | nephew's grandfather is grandfather.: [1, 0.0]
130 | nephew's grandmother is grandmother.: [1, 0.0]
131 | nephew's grandmother is mother.: [6, 6.0]
132 | nephew's sister is niece.: [10, 10.0]
133 | nephew's uncle is father.: [1, 0.0]
134 | niece's aunt is cousin.: [2, 0.0]
135 | niece's aunt is sister.: [1, 1.0]
136 | niece's brother is nephew.: [8, 8.0]
137 | niece's grandfather is father.: [1, 1.0]
138 | niece's grandfather is grandfather.: [2, 0.0]
139 | niece's grandmother is grandmother.: [1, 0.0]
140 | niece's grandmother is mother.: [1, 1.0]
141 | niece's sister is niece.: [9, 6.0]
142 | niece's uncle is brother.: [5, 5.0]
143 | niece's uncle is uncle.: [2, 0.0]
144 | self's brother is brother.: [2, 2.0]
145 | sister's aunt is aunt.: [6, 3.0]
146 | sister's brother is brother.: [78, 64.0]
147 | sister's daughter is niece.: [58, 56.0]
148 | sister's father is father.: [47, 47.0]
149 | sister's father is grandfather.: [2, 0.0]
150 | sister's grandfather is grandfather.: [29, 25.0]
151 | sister's grandfather is great-grandfather.: [1, 0.0]
152 | sister's grandmother is grandmother.: [31, 26.0]
153 | sister's grandmother is great-grandmother.: [1, 0.0]
154 | sister's husband is brother-in-law.: [9, 7.0]
155 | sister's mother is grandmother.: [1, 0.0]
156 | sister's mother is mother.: [43, 43.0]
157 | sister's sister is sister.: [55, 55.0]
158 | sister's son is nephew.: [54, 44.0]
159 | sister's uncle is uncle.: [10, 6.0]
160 | sister-in-law's daughter is niece.: [2, 2.0]
161 | sister-in-law's father is father-in-law.: [3, 3.0]
162 | sister-in-law's mother is mother-in-law.: [7, 7.0]
163 | sister-in-law's son is nephew.: [6, 6.0]
164 | son's aunt is aunt.: [2, 0.0]
165 | son's aunt is cousin.: [13, 0.0]
166 | son's aunt is sister.: [31, 30.0]
167 | son's brother is brother.: [2, 1.0]
168 | son's brother is nephew.: [1, 0.0]
169 | son's brother is son.: [44, 40.0]
170 | son's daughter is granddaughter.: [43, 34.0]
171 | son's father is husband.: [15, 5.0]
172 | son's grandfather is father.: [23, 21.0]
173 | son's grandfather is grandfather.: [3, 0.0]
174 | son's grandmother is grandmother.: [8, 2.0]
175 | son's grandmother is mother.: [21, 17.0]
176 | son's mother is grandmother.: [1, 0.0]
177 | son's mother is mother.: [7, 0.0]
178 | son's mother is wife.: [10, 4.0]
179 | son's sister is daughter.: [48, 39.0]
180 | son's sister is sister.: [3, 0.0]
181 | son's son is grandson.: [45, 36.0]
182 | son's uncle is brother.: [23, 20.0]
183 | son's uncle is cousin.: [20, 0.0]
184 | son's uncle is uncle.: [6, 1.0]
185 | son's wife is daughter-in-law.: [31, 31.0]
186 | son-in-law's son is grandson.: [2, 2.0]
187 | step-daughter's aunt is step-sister.: [1, 0.0]
188 | step-daughter's daughter is step-granddaughter.: [3, 0.0]
189 | step-daughter's grandmother is mother.: [2, 2.0]
190 | step-daughter's sister is step-daughter.: [1, 0.0]
191 | step-daughter's uncle is brother.: [1, 1.0]
192 | step-daughter's uncle is step-brother.: [3, 0.0]
193 | stepdaughter's brother is stepson.: [1, 0.0]
194 | stepdaughter's grandfather is father.: [1, 1.0]
195 | stepdaughter's grandmother is mother.: [2, 0.0]
196 | stepdaughter's sister is stepdaughter.: [2, 0.0]
197 | stepson's aunt is step-aunt.: [1, 0.0]
198 | stepson's aunt is step-sister.: [1, 0.0]
199 | stepson's aunt is stepsister.: [1, 0.0]
200 | uncle's brother is uncle.: [5, 3.0]
201 | uncle's daughter is cousin.: [1, 0.0]
202 | uncle's father is grandfather.: [7, 3.0]
203 | uncle's mother is grandmother.: [8, 5.0]
204 | uncle's sister is aunt.: [7, 5.0]
205 | wife's brother is brother-in-law.: [7, 6.0]
206 | wife's daughter is daughter.: [37, 27.0]
207 | wife's daughter is step-daughter.: [6, 0.0]
208 | wife's daughter is stepdaughter.: [9, 1.0]
209 | wife's father is father-in-law.: [18, 13.0]
210 | wife's granddaughter is granddaughter.: [9, 7.0]
211 | wife's granddaughter is great-granddaughter.: [8, 0.0]
212 | wife's grandson is grandson.: [18, 17.0]
213 | wife's mother is mother-in-law.: [18, 14.0]
214 | wife's sister is sister-in-law.: [13, 8.0]
215 | wife's son is son.: [36, 32.0]
216 | wife's son is step-son.: [1, 0.0]
217 | wife's son is stepson.: [11, 0.0]
218 |
--------------------------------------------------------------------------------
/artifacts/checkpoint/arithmetic/base-11/gpt-3.5_5-shot_cot_htt_2000.yaml:
--------------------------------------------------------------------------------
1 | 0 + 0 = 0.: [13, 1.0]
2 | 0 + 1 = 1.: [8, 0.0]
3 | 0 + 2 = 2.: [12, 0.0]
4 | 0 + 3 = 3.: [22, 1.0]
5 | 0 + 4 = 4.: [7, 2.0]
6 | 0 + 5 = 5.: [8, 0.0]
7 | 0 + 6 = 6.: [4, 0.0]
8 | 0 + 7 = 7.: [7, 1.0]
9 | 0 + 8 = 8.: [7, 1.0]
10 | 0 + 9 = 9.: [15, 1.0]
11 | 0 + A = A.: [29, 4.0]
12 | 1 + 0 = 1.: [14, 0.0]
13 | 1 + 1 + 1 = 3.: [9, 0.0]
14 | 1 + 1 = 2.: [19, 1.0]
15 | 1 + 10 + 1 = 12.: [1, 0.0]
16 | 1 + 10 = 11.: [1, 0.0]
17 | 1 + 2 + 1 = 4.: [8, 0.0]
18 | 1 + 2 = 3.: [15, 4.0]
19 | 1 + 3 + 1 = 5.: [7, 1.0]
20 | 1 + 3 = 4.: [20, 0.0]
21 | 1 + 4 + 1 = 6.: [7, 0.0]
22 | 1 + 4 = 5.: [17, 3.0]
23 | 1 + 5 + 1 = 7.: [9, 0.0]
24 | 1 + 5 = 6.: [20, 2.0]
25 | 1 + 6 + 1 = 8.: [4, 1.0]
26 | 1 + 6 = 7.: [30, 3.0]
27 | 1 + 7 + 1 = 9.: [13, 0.0]
28 | 1 + 7 = 8.: [23, 9.0]
29 | 1 + 8 + 1 = 10.: [4, 0.0]
30 | 1 + 8 + 1 = A.: [2, 0.0]
31 | 1 + 8 = 9.: [12, 1.0]
32 | 1 + 9 + 1 = 11.: [6, 1.0]
33 | 1 + 9 + 1 = B.: [1, 0.0]
34 | 1 + 9 = 10.: [16, 0.0]
35 | 1 + 9 = A.: [11, 4.0]
36 | 1 + A + 1 = 12.: [1, 0.0]
37 | 1 + A + 1 = B.: [1, 0.0]
38 | 1 + A + 1 = C.: [6, 0.0]
39 | 1 + A + 6 = 17.: [1, 0.0]
40 | 1 + A = 11.: [13, 7.0]
41 | 1 + A = B.: [26, 0.0]
42 | 10 + 1 = 11.: [1, 0.0]
43 | 10 + 2 + 1 = 13.: [1, 0.0]
44 | 10 + 2 = 12.: [1, 0.0]
45 | 10 + 3 = 13.: [1, 0.0]
46 | 10 + 8 + 1 = 19.: [1, 0.0]
47 | 10 + 8 = 18.: [1, 0.0]
48 | 10 + 9 + 1 = 20.: [1, 0.0]
49 | 2 + 0 = 2.: [18, 2.0]
50 | 2 + 1 + 1 = 4.: [2, 0.0]
51 | 2 + 1 = 3.: [16, 1.0]
52 | 2 + 10 + 1 = 13.: [1, 0.0]
53 | 2 + 10 = 12.: [1, 0.0]
54 | 2 + 2 + 1 = 5.: [12, 3.0]
55 | 2 + 2 = 4.: [16, 3.0]
56 | 2 + 3 + 1 = 6.: [5, 1.0]
57 | 2 + 3 = 5.: [22, 2.0]
58 | 2 + 4 + 1 = 7.: [7, 0.0]
59 | 2 + 4 = 6.: [9, 0.0]
60 | 2 + 5 + 1 = 8.: [11, 1.0]
61 | 2 + 5 = 7.: [12, 3.0]
62 | 2 + 6 + 1 = 9.: [13, 0.0]
63 | 2 + 6 = 8.: [16, 2.0]
64 | 2 + 7 + 1 = 10.: [11, 0.0]
65 | 2 + 7 + 1 = A.: [1, 0.0]
66 | 2 + 7 = 9.: [16, 4.0]
67 | 2 + 8 + 1 = 11.: [6, 0.0]
68 | 2 + 8 = 10.: [18, 0.0]
69 | 2 + 8 = A.: [28, 17.0]
70 | 2 + 9 + 1 = 12.: [14, 0.0]
71 | 2 + 9 + 1 = C.: [1, 0.0]
72 | 2 + 9 = 11.: [28, 5.0]
73 | 2 + 9 = B.: [5, 0.0]
74 | 2 + A + 1 = C.: [4, 0.0]
75 | 2 + A = 10.: [1, 0.0]
76 | 2 + A = 11.: [2, 1.0]
77 | 2 + A = 12.: [7, 0.0]
78 | 2 + A = 13.: [1, 0.0]
79 | 2 + A = C.: [25, 0.0]
80 | 3 + 0 = 3.: [8, 2.0]
81 | 3 + 1 + 1 = 5.: [14, 0.0]
82 | 3 + 1 = 4.: [22, 4.0]
83 | 3 + 10 + 1 = 14.: [1, 1.0]
84 | 3 + 10 = 13.: [1, 0.0]
85 | 3 + 2 + 1 = 6.: [8, 1.0]
86 | 3 + 2 = 5.: [20, 2.0]
87 | 3 + 3 + 1 = 7.: [6, 0.0]
88 | 3 + 3 = 6.: [8, 0.0]
89 | 3 + 4 + 1 = 8.: [24, 1.0]
90 | 3 + 4 = 7.: [21, 1.0]
91 | 3 + 5 + 1 = 9.: [8, 0.0]
92 | 3 + 5 = 8.: [23, 0.0]
93 | 3 + 6 + 1 = 10.: [4, 0.0]
94 | 3 + 6 + 1 = A.: [3, 0.0]
95 | 3 + 6 = 9.: [23, 3.0]
96 | 3 + 7 + 1 = 11.: [7, 0.0]
97 | 3 + 7 = 10.: [43, 0.0]
98 | 3 + 7 = A.: [11, 3.0]
99 | 3 + 8 + 1 = 12.: [4, 0.0]
100 | 3 + 8 + 1 = C.: [4, 0.0]
101 | 3 + 8 = 11.: [16, 7.0]
102 | 3 + 8 = B.: [20, 0.0]
103 | 3 + 9 + 1 = 13.: [11, 0.0]
104 | 3 + 9 = 12.: [16, 0.0]
105 | 3 + 9 = C.: [7, 0.0]
106 | 3 + A + 1 = 12.: [1, 0.0]
107 | 3 + A + 1 = 14.: [1, 0.0]
108 | 3 + A + 1 = 15.: [1, 0.0]
109 | 3 + A + 1 = E.: [4, 0.0]
110 | 3 + A = 13.: [12, 0.0]
111 | 3 + A = 14.: [4, 0.0]
112 | 3 + A = D.: [22, 0.0]
113 | 4 + 0 = 4.: [6, 0.0]
114 | 4 + 1 + 1 = 6.: [2, 0.0]
115 | 4 + 1 = 5.: [26, 3.0]
116 | 4 + 10 + 1 = 15.: [1, 0.0]
117 | 4 + 2 + 1 = 7.: [5, 0.0]
118 | 4 + 2 = 6.: [13, 2.0]
119 | 4 + 3 + 1 = 8.: [5, 0.0]
120 | 4 + 3 = 7.: [17, 6.0]
121 | 4 + 4 + 1 = 9.: [7, 0.0]
122 | 4 + 4 = 8.: [13, 3.0]
123 | 4 + 5 + 1 = 10.: [4, 0.0]
124 | 4 + 5 + 1 = A.: [2, 0.0]
125 | 4 + 5 = 9.: [7, 1.0]
126 | 4 + 6 + 1 = 11.: [4, 0.0]
127 | 4 + 6 + 1 = B.: [1, 0.0]
128 | 4 + 6 + 3 = 13.: [1, 0.0]
129 | 4 + 6 = 10.: [7, 0.0]
130 | 4 + 6 = A.: [38, 12.0]
131 | 4 + 7 + 1 = 12.: [4, 0.0]
132 | 4 + 7 = 11.: [12, 6.0]
133 | 4 + 7 = B.: [17, 0.0]
134 | 4 + 8 + 1 = 13.: [9, 0.0]
135 | 4 + 8 = 12.: [17, 0.0]
136 | 4 + 8 = C.: [23, 0.0]
137 | 4 + 9 + 1 = 14.: [7, 0.0]
138 | 4 + 9 = 13.: [34, 0.0]
139 | 4 + 9 = D.: [4, 0.0]
140 | 4 + A + 1 = 15.: [2, 0.0]
141 | 4 + A + 1 = F.: [1, 0.0]
142 | 4 + A = 14.: [2, 0.0]
143 | 4 + A = 15.: [1, 0.0]
144 | 4 + A = E.: [17, 0.0]
145 | 5 + 0 = 5.: [13, 0.0]
146 | 5 + 1 + 1 = 7.: [6, 0.0]
147 | 5 + 1 = 6.: [19, 5.0]
148 | 5 + 10 + 1 = 16.: [1, 0.0]
149 | 5 + 10 = 15.: [1, 1.0]
150 | 5 + 2 + 1 = 8.: [5, 2.0]
151 | 5 + 2 = 7.: [16, 0.0]
152 | 5 + 3 + 1 = 9.: [13, 0.0]
153 | 5 + 3 = 8.: [19, 5.0]
154 | 5 + 4 + 1 = 10.: [2, 0.0]
155 | 5 + 4 = 9.: [32, 5.0]
156 | 5 + 5 + 1 = 11.: [8, 1.0]
157 | 5 + 5 = 10.: [9, 0.0]
158 | 5 + 5 = A.: [28, 14.0]
159 | 5 + 6 + 1 = 12.: [14, 0.0]
160 | 5 + 6 + 1 = C.: [1, 0.0]
161 | 5 + 6 = 11.: [5, 2.0]
162 | 5 + 6 = B.: [22, 0.0]
163 | 5 + 7 + 1 = 13.: [9, 0.0]
164 | 5 + 7 + 1 = D.: [2, 0.0]
165 | 5 + 7 = 12.: [8, 0.0]
166 | 5 + 7 = C.: [21, 0.0]
167 | 5 + 8 + 1 = 14.: [11, 0.0]
168 | 5 + 8 + 1 = E.: [3, 0.0]
169 | 5 + 8 = 13.: [1, 0.0]
170 | 5 + 8 = D.: [15, 0.0]
171 | 5 + 9 + 1 = 15.: [17, 0.0]
172 | 5 + 9 + 1 = F.: [2, 0.0]
173 | 5 + 9 = 14.: [30, 0.0]
174 | 5 + 9 = E.: [1, 0.0]
175 | 5 + A + 1 = 10.: [1, 0.0]
176 | 5 + A + 1 = 16.: [2, 0.0]
177 | 5 + A + 1 = F.: [1, 0.0]
178 | 5 + A = 14.: [2, 1.0]
179 | 5 + A = 15.: [19, 0.0]
180 | 5 + A = F.: [27, 0.0]
181 | 6 + 0 = 6.: [14, 0.0]
182 | 6 + 1 + 1 = 8.: [3, 0.0]
183 | 6 + 1 + A = 11.: [1, 0.0]
184 | 6 + 1 = 7.: [19, 1.0]
185 | 6 + 10 = 16.: [3, 0.0]
186 | 6 + 2 + 1 = 9.: [10, 0.0]
187 | 6 + 2 = 8.: [14, 7.0]
188 | 6 + 3 + 1 = 10.: [7, 0.0]
189 | 6 + 3 + 1 = A.: [6, 0.0]
190 | 6 + 3 = 9.: [13, 0.0]
191 | 6 + 4 + 1 = 11.: [7, 0.0]
192 | 6 + 4 + 1 = B.: [2, 0.0]
193 | 6 + 4 + 5 = 15.: [1, 0.0]
194 | 6 + 4 = 10.: [1, 0.0]
195 | 6 + 4 = A.: [51, 15.0]
196 | 6 + 5 + 1 = 12.: [5, 1.0]
197 | 6 + 5 + 9 = 20.: [1, 0.0]
198 | 6 + 5 = 11.: [13, 5.0]
199 | 6 + 5 = B.: [24, 0.0]
200 | 6 + 6 + 1 = 13.: [10, 0.0]
201 | 6 + 6 + 1 = D.: [1, 0.0]
202 | 6 + 6 = 10.: [1, 0.0]
203 | 6 + 6 = 12.: [7, 0.0]
204 | 6 + 6 = C.: [24, 0.0]
205 | 6 + 7 + 1 = 14.: [6, 0.0]
206 | 6 + 7 = 13.: [22, 0.0]
207 | 6 + 7 = D.: [23, 0.0]
208 | 6 + 8 + 1 = 15.: [5, 0.0]
209 | 6 + 8 + 1 = F.: [1, 0.0]
210 | 6 + 8 = 14.: [24, 0.0]
211 | 6 + 8 = E.: [5, 0.0]
212 | 6 + 9 + 1 = 16.: [9, 0.0]
213 | 6 + 9 = 15.: [28, 0.0]
214 | 6 + 9 = F.: [17, 0.0]
215 | 6 + A + 1 = 11.: [1, 0.0]
216 | 6 + A + 1 = 17.: [3, 0.0]
217 | 6 + A = 10.: [23, 0.0]
218 | 6 + A = 16.: [9, 0.0]
219 | 6 + A = 17.: [1, 0.0]
220 | 6 + A = 20.: [1, 0.0]
221 | 7 + 0 = 7.: [18, 2.0]
222 | 7 + 1 + 1 = 9.: [10, 0.0]
223 | 7 + 1 = 8.: [27, 5.0]
224 | 7 + 2 + 1 = 10.: [6, 0.0]
225 | 7 + 2 + 1 = A.: [3, 0.0]
226 | 7 + 2 = 9.: [14, 3.0]
227 | 7 + 3 + 1 = 11.: [3, 0.0]
228 | 7 + 3 = 10.: [8, 0.0]
229 | 7 + 3 = A.: [15, 5.0]
230 | 7 + 4 + 1 = 12.: [9, 2.0]
231 | 7 + 4 + 1 = C.: [2, 0.0]
232 | 7 + 4 = 11.: [15, 9.0]
233 | 7 + 4 = A.: [1, 0.0]
234 | 7 + 4 = B.: [33, 0.0]
235 | 7 + 5 + 1 = 13.: [10, 0.0]
236 | 7 + 5 = 12.: [10, 0.0]
237 | 7 + 5 = C.: [39, 0.0]
238 | 7 + 6 + 1 = 14.: [4, 0.0]
239 | 7 + 6 = 13.: [20, 0.0]
240 | 7 + 6 = D.: [17, 0.0]
241 | 7 + 7 + 1 = 15.: [9, 0.0]
242 | 7 + 7 + 1 = F.: [2, 0.0]
243 | 7 + 7 = 14.: [39, 0.0]
244 | 7 + 7 = E.: [1, 0.0]
245 | 7 + 8 + 1 = 16.: [3, 0.0]
246 | 7 + 8 = 15.: [29, 0.0]
247 | 7 + 9 + 1 = 17.: [19, 0.0]
248 | 7 + 9 = 10.: [1, 0.0]
249 | 7 + 9 = 16.: [42, 0.0]
250 | 7 + A + 1 = 12.: [1, 0.0]
251 | 7 + A + 1 = 18.: [1, 0.0]
252 | 7 + A = 11.: [15, 0.0]
253 | 7 + A = 17.: [20, 0.0]
254 | 8 + 0 = 8.: [14, 2.0]
255 | 8 + 1 + 1 = A.: [2, 0.0]
256 | 8 + 1 = 9.: [27, 5.0]
257 | 8 + 2 + 1 = 11.: [6, 0.0]
258 | 8 + 2 + 1 = B.: [2, 0.0]
259 | 8 + 2 = 10.: [2, 0.0]
260 | 8 + 2 = A.: [46, 16.0]
261 | 8 + 3 + 1 = 12.: [14, 0.0]
262 | 8 + 3 + 1 = C.: [3, 0.0]
263 | 8 + 3 = 11.: [9, 4.0]
264 | 8 + 3 = B.: [31, 0.0]
265 | 8 + 4 + 1 = 13.: [7, 0.0]
266 | 8 + 4 = 12.: [8, 0.0]
267 | 8 + 4 = C.: [19, 0.0]
268 | 8 + 5 + 1 = 14.: [3, 0.0]
269 | 8 + 5 + 1 = E.: [2, 0.0]
270 | 8 + 5 = 13.: [16, 0.0]
271 | 8 + 5 = D.: [24, 0.0]
272 | 8 + 6 + 1 = 15.: [10, 0.0]
273 | 8 + 6 + 1 = F.: [1, 0.0]
274 | 8 + 6 + 5 = 19.: [2, 0.0]
275 | 8 + 6 = 14.: [23, 0.0]
276 | 8 + 6 = E.: [10, 0.0]
277 | 8 + 7 + 1 = 16.: [7, 0.0]
278 | 8 + 7 = 15.: [28, 0.0]
279 | 8 + 7 = F.: [10, 0.0]
280 | 8 + 8 + 1 = 17.: [15, 0.0]
281 | 8 + 8 + 5 = 21.: [1, 0.0]
282 | 8 + 8 = 10.: [4, 0.0]
283 | 8 + 8 = 16.: [23, 0.0]
284 | 8 + 9 + 1 = 18.: [7, 0.0]
285 | 8 + 9 = 11.: [2, 0.0]
286 | 8 + 9 = 17.: [28, 0.0]
287 | 8 + A + 1 = 19.: [2, 0.0]
288 | 8 + A + 1 = 24.: [1, 0.0]
289 | 8 + A = 11.: [1, 0.0]
290 | 8 + A = 12.: [18, 0.0]
291 | 8 + A = 13.: [4, 0.0]
292 | 8 + A = 17.: [2, 2.0]
293 | 8 + A = 18.: [15, 0.0]
294 | 8 + A = 19.: [3, 0.0]
295 | 9 + 0 = 9.: [5, 0.0]
296 | 9 + 1 + 1 = 11.: [7, 0.0]
297 | 9 + 1 = 10.: [12, 0.0]
298 | 9 + 1 = A.: [13, 4.0]
299 | 9 + 10 = 19.: [1, 0.0]
300 | 9 + 2 + 1 = 12.: [8, 0.0]
301 | 9 + 2 = 11.: [32, 9.0]
302 | 9 + 2 = B.: [15, 0.0]
303 | 9 + 3 + 1 = 13.: [4, 0.0]
304 | 9 + 3 = 12.: [22, 0.0]
305 | 9 + 3 = C.: [16, 0.0]
306 | 9 + 4 + 1 = 14.: [5, 0.0]
307 | 9 + 4 = 13.: [21, 0.0]
308 | 9 + 4 = D.: [9, 0.0]
309 | 9 + 5 + 1 = 15.: [7, 0.0]
310 | 9 + 5 = 13.: [3, 3.0]
311 | 9 + 5 = 14.: [33, 0.0]
312 | 9 + 5 = E.: [3, 0.0]
313 | 9 + 6 + 1 = 16.: [5, 0.0]
314 | 9 + 6 = 15.: [38, 0.0]
315 | 9 + 6 = F.: [2, 0.0]
316 | 9 + 7 + 1 = 17.: [6, 2.0]
317 | 9 + 7 = 10.: [1, 0.0]
318 | 9 + 7 = 16.: [19, 0.0]
319 | 9 + 8 + 1 = 18.: [2, 0.0]
320 | 9 + 8 + 5 = 22.: [2, 0.0]
321 | 9 + 8 = 17.: [35, 0.0]
322 | 9 + 9 + 1 = 19.: [27, 0.0]
323 | 9 + 9 = 18.: [18, 0.0]
324 | 9 + A + 1 = 15.: [1, 0.0]
325 | 9 + A = 13.: [6, 0.0]
326 | 9 + A = 14.: [1, 0.0]
327 | 9 + A = 15.: [1, 0.0]
328 | 9 + A = 19.: [25, 0.0]
329 | 9 + A = 1A.: [1, 0.0]
330 | 9 + A = 20.: [2, 0.0]
331 | A + 0 = A.: [24, 9.0]
332 | A + 1 + 1 = B.: [2, 0.0]
333 | A + 1 = B.: [24, 0.0]
334 | A + 2 = 12.: [1, 0.0]
335 | A + 2 = C.: [31, 0.0]
336 | A + 3 + 1 = 10.: [1, 0.0]
337 | A + 3 + 1 = 14.: [1, 0.0]
338 | A + 3 + 1 = 15.: [3, 0.0]
339 | A + 3 + 1 = E.: [6, 0.0]
340 | A + 3 + 1 = F.: [1, 0.0]
341 | A + 3 = D.: [27, 0.0]
342 | A + 4 + 1 = 14.: [2, 0.0]
343 | A + 4 + 1 = 15.: [2, 0.0]
344 | A + 4 = E.: [24, 0.0]
345 | A + 5 + 1 = 11.: [3, 0.0]
346 | A + 5 + 1 = 17.: [2, 0.0]
347 | A + 5 = 10.: [1, 0.0]
348 | A + 5 = 14.: [1, 1.0]
349 | A + 5 = 15.: [6, 0.0]
350 | A + 5 = F.: [22, 1.0]
351 | A + 6 + 1 = 11.: [2, 0.0]
352 | A + 6 + 1 = 12.: [1, 0.0]
353 | A + 6 + 1 = 17.: [1, 0.0]
354 | A + 6 = 10.: [34, 0.0]
355 | A + 6 = 11.: [2, 0.0]
356 | A + 6 = 16.: [8, 0.0]
357 | A + 6 = 18.: [1, 0.0]
358 | A + 7 + 1 = 12.: [1, 0.0]
359 | A + 7 + 1 = 16.: [1, 0.0]
360 | A + 7 + 1 = 18.: [3, 0.0]
361 | A + 7 + 1 = 19.: [1, 0.0]
362 | A + 7 = 10.: [1, 0.0]
363 | A + 7 = 11.: [17, 0.0]
364 | A + 7 = 12.: [3, 0.0]
365 | A + 7 = 16.: [2, 2.0]
366 | A + 7 = 17.: [10, 0.0]
367 | A + 7 = 18.: [2, 0.0]
368 | A + 8 + 1 = 14.: [2, 0.0]
369 | A + 8 + 1 = 19.: [2, 0.0]
370 | A + 8 = 10.: [1, 0.0]
371 | A + 8 = 12.: [19, 0.0]
372 | A + 8 = 13.: [9, 0.0]
373 | A + 8 = 17.: [3, 2.0]
374 | A + 8 = 18.: [6, 0.0]
375 | A + 8 = 19.: [3, 0.0]
376 | A + 8 = 1A.: [1, 0.0]
377 | A + 9 + 1 = 14.: [1, 0.0]
378 | A + 9 + 1 = 15.: [1, 0.0]
379 | A + 9 + 1 = 1A.: [5, 0.0]
380 | A + 9 + 1 = 20.: [1, 0.0]
381 | A + 9 = 13.: [19, 0.0]
382 | A + 9 = 14.: [3, 0.0]
383 | A + 9 = 15.: [2, 0.0]
384 | A + 9 = 19.: [19, 0.0]
385 | A + A + 1 = 15.: [1, 0.0]
386 | A + A + 1 = 16.: [1, 0.0]
387 | A + A + 1 = 17.: [1, 0.0]
388 | A + A + 1 = 20.: [1, 0.0]
389 | A + A + 1 = 21.: [3, 0.0]
390 | A + A = 10.: [2, 0.0]
391 | A + A = 12.: [1, 0.0]
392 | A + A = 14.: [35, 0.0]
393 |
--------------------------------------------------------------------------------
/artifacts/checkpoint/arithmetic/base-11/gpt-4_5-shot_cot_htt_2000.yaml:
--------------------------------------------------------------------------------
1 | 0 + 0 = 0.: [13, 4.0]
2 | 0 + 1 = 1.: [8, 7.0]
3 | 0 + 10 = 10.: [2, 0.0]
4 | 0 + 2 = 2.: [12, 8.0]
5 | 0 + 3 = 3.: [22, 15.0]
6 | 0 + 4 = 4.: [6, 4.0]
7 | 0 + 5 = 5.: [9, 6.0]
8 | 0 + 6 = 6.: [4, 3.0]
9 | 0 + 7 = 7.: [7, 5.0]
10 | 0 + 8 = 8.: [7, 6.0]
11 | 0 + 9 = 9.: [15, 7.0]
12 | 0 + A = A.: [25, 15.0]
13 | 1 + 0 = 1.: [14, 14.0]
14 | 1 + 1 + 1 = 3.: [9, 7.0]
15 | 1 + 1 = 2.: [10, 8.0]
16 | 1 + 2 + 1 = 4.: [10, 7.0]
17 | 1 + 2 = 3.: [13, 11.0]
18 | 1 + 3 + 1 = 5.: [15, 11.0]
19 | 1 + 3 = 4.: [13, 7.0]
20 | 1 + 4 + 1 = 6.: [6, 5.0]
21 | 1 + 4 + A = 14.: [1, 0.0]
22 | 1 + 4 = 5.: [15, 11.0]
23 | 1 + 5 + 1 = 7.: [9, 3.0]
24 | 1 + 5 = 6.: [18, 12.0]
25 | 1 + 6 + 1 = 8.: [9, 6.0]
26 | 1 + 6 = 7.: [24, 18.0]
27 | 1 + 7 + 1 = 8.: [5, 0.0]
28 | 1 + 7 + 1 = 9.: [14, 10.0]
29 | 1 + 7 = 8.: [17, 12.0]
30 | 1 + 8 + 1 = 10.: [5, 0.0]
31 | 1 + 8 + 1 = 9.: [2, 0.0]
32 | 1 + 8 + 1 = A.: [1, 1.0]
33 | 1 + 8 = 9.: [11, 8.0]
34 | 1 + 9 + 1 = 10.: [7, 3.0]
35 | 1 + 9 + 1 = 11.: [5, 1.0]
36 | 1 + 9 = 10.: [13, 0.0]
37 | 1 + 9 = A.: [9, 8.0]
38 | 1 + A + 1 = 11.: [10, 4.0]
39 | 1 + A + 1 = 12.: [4, 0.0]
40 | 1 + A + 1 = 21.: [1, 0.0]
41 | 1 + A = 10.: [30, 26.0]
42 | 1 + A = 11.: [1, 0.0]
43 | 1 + A = B.: [1, 0.0]
44 | 10 + 2 = 11.: [1, 0.0]
45 | 10 + 4 = 14.: [1, 1.0]
46 | 10 + 6 = 16.: [2, 0.0]
47 | 10 + 9 + 1 = 19.: [1, 0.0]
48 | 10 + 9 = 19.: [1, 1.0]
49 | 2 + 0 = 2.: [17, 12.0]
50 | 2 + 1 + 1 = 4.: [2, 2.0]
51 | 2 + 1 = 3.: [15, 5.0]
52 | 2 + 10 = 11.: [1, 1.0]
53 | 2 + 10 = 12.: [1, 0.0]
54 | 2 + 2 + 1 = 5.: [13, 10.0]
55 | 2 + 2 = 4.: [15, 11.0]
56 | 2 + 3 + 1 = 6.: [7, 6.0]
57 | 2 + 3 = 5.: [20, 18.0]
58 | 2 + 4 + 1 = 7.: [8, 8.0]
59 | 2 + 4 = 6.: [8, 7.0]
60 | 2 + 5 + 1 = 8.: [15, 14.0]
61 | 2 + 5 = 7.: [9, 7.0]
62 | 2 + 6 + 1 = 10.: [1, 0.0]
63 | 2 + 6 + 1 = 8.: [1, 0.0]
64 | 2 + 6 + 1 = 9.: [12, 8.0]
65 | 2 + 6 = 8.: [15, 14.0]
66 | 2 + 7 + 1 = 10.: [6, 0.0]
67 | 2 + 7 + 1 = 9.: [2, 0.0]
68 | 2 + 7 + 1 = A.: [2, 2.0]
69 | 2 + 7 = 9.: [17, 12.0]
70 | 2 + 8 + 1 = 10.: [8, 5.0]
71 | 2 + 8 + 1 = 11.: [3, 1.0]
72 | 2 + 8 = 10.: [33, 0.0]
73 | 2 + 8 = A.: [8, 5.0]
74 | 2 + 9 + 1 = 11.: [11, 2.0]
75 | 2 + 9 + 1 = 12.: [1, 0.0]
76 | 2 + 9 = 10.: [30, 22.0]
77 | 2 + 9 = 11.: [6, 5.0]
78 | 2 + A + 1 = 12.: [5, 3.0]
79 | 2 + A + 1 = 13.: [1, 0.0]
80 | 2 + A + 1 = 14.: [1, 0.0]
81 | 2 + A + 1 = 16.: [1, 0.0]
82 | 2 + A = 10.: [1, 0.0]
83 | 2 + A = 11.: [26, 10.0]
84 | 2 + A = 12.: [2, 0.0]
85 | 3 + 0 = 3.: [8, 5.0]
86 | 3 + 1 + 1 = 5.: [25, 21.0]
87 | 3 + 1 = 4.: [9, 6.0]
88 | 3 + 2 + 1 = 6.: [14, 10.0]
89 | 3 + 2 = 5.: [15, 14.0]
90 | 3 + 3 + 1 = 7.: [8, 8.0]
91 | 3 + 3 = 6.: [6, 3.0]
92 | 3 + 4 + 1 = 8.: [32, 26.0]
93 | 3 + 4 = 7.: [13, 11.0]
94 | 3 + 5 + 1 = 9.: [16, 8.0]
95 | 3 + 5 = 8.: [15, 10.0]
96 | 3 + 6 + 1 = 10.: [13, 0.0]
97 | 3 + 6 = 10.: [1, 0.0]
98 | 3 + 6 = 9.: [16, 12.0]
99 | 3 + 7 + 1 = 10.: [8, 7.0]
100 | 3 + 7 + 1 = 11.: [4, 3.0]
101 | 3 + 7 = 10.: [31, 1.0]
102 | 3 + 7 = 11.: [1, 0.0]
103 | 3 + 7 = 9.: [1, 0.0]
104 | 3 + 7 = A.: [13, 9.0]
105 | 3 + 8 + 1 = 11.: [11, 2.0]
106 | 3 + 8 = 10.: [31, 25.0]
107 | 3 + 8 = 11.: [3, 2.0]
108 | 3 + 9 + 1 = 12.: [6, 5.0]
109 | 3 + 9 + 1 = 13.: [2, 0.0]
110 | 3 + 9 = 11.: [22, 12.0]
111 | 3 + 9 = 12.: [2, 0.0]
112 | 3 + A + 1 = 13.: [6, 5.0]
113 | 3 + A + 1 = 14.: [2, 0.0]
114 | 3 + A = 12.: [33, 30.0]
115 | 3 + A = 13.: [1, 0.0]
116 | 4 + 0 = 4.: [7, 6.0]
117 | 4 + 1 + 1 = 6.: [6, 6.0]
118 | 4 + 1 = 5.: [22, 10.0]
119 | 4 + 2 + 1 = 7.: [9, 8.0]
120 | 4 + 2 = 6.: [9, 5.0]
121 | 4 + 3 + 1 = 8.: [8, 6.0]
122 | 4 + 3 = 7.: [13, 8.0]
123 | 4 + 4 + 1 = 9.: [11, 7.0]
124 | 4 + 4 = 8.: [8, 7.0]
125 | 4 + 5 + 1 = 10.: [5, 0.0]
126 | 4 + 5 + 1 = A.: [2, 2.0]
127 | 4 + 5 = 9.: [6, 4.0]
128 | 4 + 6 + 1 = 10.: [12, 8.0]
129 | 4 + 6 + 1 = 11.: [2, 0.0]
130 | 4 + 6 = 10.: [21, 0.0]
131 | 4 + 6 = 9.: [2, 0.0]
132 | 4 + 6 = A.: [14, 11.0]
133 | 4 + 7 + 1 = 11.: [11, 1.0]
134 | 4 + 7 + 1 = 12.: [1, 0.0]
135 | 4 + 7 = 10.: [18, 15.0]
136 | 4 + 7 = 11.: [1, 0.0]
137 | 4 + 7 = A.: [1, 0.0]
138 | 4 + 7 = B.: [1, 0.0]
139 | 4 + 8 + 1 = 12.: [7, 5.0]
140 | 4 + 8 + 1 = 13.: [2, 0.0]
141 | 4 + 8 = 11.: [36, 16.0]
142 | 4 + 8 = 12.: [3, 0.0]
143 | 4 + 9 + 1 = 13.: [8, 7.0]
144 | 4 + 9 = 12.: [34, 31.0]
145 | 4 + A + 1 = 14.: [2, 0.0]
146 | 4 + A = 12.: [1, 0.0]
147 | 4 + A = 13.: [16, 16.0]
148 | 4 + A = 14.: [1, 0.0]
149 | 5 + 0 = 5.: [11, 10.0]
150 | 5 + 1 + 1 = 7.: [10, 8.0]
151 | 5 + 1 = 6.: [18, 13.0]
152 | 5 + 2 + 1 = 8.: [9, 8.0]
153 | 5 + 2 = 7.: [12, 6.0]
154 | 5 + 3 + 1 = 9.: [13, 8.0]
155 | 5 + 3 = 8.: [18, 14.0]
156 | 5 + 4 + 1 = 10.: [13, 2.0]
157 | 5 + 4 + 1 = 9.: [1, 0.0]
158 | 5 + 4 + 1 = A.: [1, 1.0]
159 | 5 + 4 = 9.: [19, 11.0]
160 | 5 + 5 + 1 = 10.: [12, 7.0]
161 | 5 + 5 = 10.: [23, 0.0]
162 | 5 + 5 = 9.: [2, 0.0]
163 | 5 + 5 = A.: [8, 6.0]
164 | 5 + 6 + 1 = 11.: [16, 4.0]
165 | 5 + 6 + 1 = 12.: [1, 0.0]
166 | 5 + 6 = 10.: [22, 18.0]
167 | 5 + 7 + 1 = 12.: [19, 13.0]
168 | 5 + 7 = 11.: [18, 4.0]
169 | 5 + 7 = 12.: [1, 0.0]
170 | 5 + 8 + 1 = 13.: [13, 7.0]
171 | 5 + 8 + 1 = 14.: [3, 1.0]
172 | 5 + 8 = 12.: [12, 10.0]
173 | 5 + 8 = 13.: [2, 0.0]
174 | 5 + 9 + 1 = 14.: [20, 11.0]
175 | 5 + 9 + 1 = 15.: [1, 0.0]
176 | 5 + 9 = 13.: [25, 21.0]
177 | 5 + 9 = 14.: [2, 1.0]
178 | 5 + A + 1 = 15.: [9, 5.0]
179 | 5 + A = 14.: [40, 31.0]
180 | 5 + A = 15.: [1, 0.0]
181 | 6 + 0 = 6.: [14, 10.0]
182 | 6 + 1 + 1 = 8.: [11, 6.0]
183 | 6 + 1 = 7.: [11, 5.0]
184 | 6 + 2 + 1 = 8.: [1, 0.0]
185 | 6 + 2 + 1 = 9.: [17, 9.0]
186 | 6 + 2 = 8.: [5, 2.0]
187 | 6 + 3 + 1 = 10.: [14, 2.0]
188 | 6 + 3 + 1 = 9.: [1, 0.0]
189 | 6 + 3 + 1 = A.: [2, 2.0]
190 | 6 + 3 = 9.: [9, 7.0]
191 | 6 + 4 + 1 = 10.: [19, 17.0]
192 | 6 + 4 + 1 = 11.: [3, 1.0]
193 | 6 + 4 = 10.: [24, 0.0]
194 | 6 + 4 = A.: [13, 10.0]
195 | 6 + 5 + 1 = 11.: [11, 1.0]
196 | 6 + 5 = 10.: [28, 26.0]
197 | 6 + 5 = 11.: [2, 1.0]
198 | 6 + 6 + 1 = 12.: [11, 7.0]
199 | 6 + 6 + 1 = 13.: [2, 0.0]
200 | 6 + 6 = 10.: [2, 0.0]
201 | 6 + 6 = 11.: [26, 13.0]
202 | 6 + 6 = 12.: [2, 2.0]
203 | 6 + 7 + 1 = 13.: [6, 6.0]
204 | 6 + 7 + 1 = 14.: [1, 1.0]
205 | 6 + 7 = 12.: [43, 34.0]
206 | 6 + 8 + 1 = 14.: [12, 3.0]
207 | 6 + 8 = 13.: [22, 19.0]
208 | 6 + 8 = 14.: [1, 1.0]
209 | 6 + 9 + 1 = 15.: [5, 5.0]
210 | 6 + 9 + 1 = 16.: [2, 0.0]
211 | 6 + 9 = 14.: [44, 25.0]
212 | 6 + A + 1 = 16.: [6, 5.0]
213 | 6 + A + 1 = 17.: [1, 0.0]
214 | 6 + A + 1 = 20.: [1, 0.0]
215 | 6 + A = 14.: [1, 0.0]
216 | 6 + A = 15.: [28, 20.0]
217 | 7 + 0 = 7.: [18, 13.0]
218 | 7 + 1 + 1 = 9.: [22, 14.0]
219 | 7 + 1 = 8.: [13, 10.0]
220 | 7 + 2 + 1 = 10.: [7, 0.0]
221 | 7 + 2 + 1 = 9.: [4, 0.0]
222 | 7 + 2 = 10.: [1, 0.0]
223 | 7 + 2 = 9.: [11, 9.0]
224 | 7 + 3 + 1 = 10.: [3, 1.0]
225 | 7 + 3 + 1 = 11.: [5, 1.0]
226 | 7 + 3 = 10.: [16, 0.0]
227 | 7 + 3 = A.: [3, 3.0]
228 | 7 + 4 + 1 = 11.: [16, 5.0]
229 | 7 + 4 + 1 = 12.: [1, 1.0]
230 | 7 + 4 = 10.: [38, 27.0]
231 | 7 + 4 = 11.: [3, 3.0]
232 | 7 + 4 = B.: [2, 0.0]
233 | 7 + 5 + 1 = 12.: [11, 5.0]
234 | 7 + 5 + 1 = 13.: [5, 0.0]
235 | 7 + 5 + 2 = 14.: [1, 0.0]
236 | 7 + 5 = 11.: [42, 16.0]
237 | 7 + 6 + 1 = 13.: [11, 4.0]
238 | 7 + 6 + 1 = 14.: [2, 2.0]
239 | 7 + 6 = 12.: [29, 25.0]
240 | 7 + 7 + 1 = 14.: [17, 7.0]
241 | 7 + 7 + 1 = 15.: [1, 0.0]
242 | 7 + 7 = 13.: [34, 25.0]
243 | 7 + 8 + 1 = 15.: [3, 2.0]
244 | 7 + 8 + 1 = 16.: [1, 0.0]
245 | 7 + 8 = 14.: [26, 7.0]
246 | 7 + 8 = 15.: [1, 0.0]
247 | 7 + 9 + 1 = 16.: [12, 10.0]
248 | 7 + 9 + 1 = 17.: [2, 0.0]
249 | 7 + 9 = 15.: [41, 33.0]
250 | 7 + A + 1 = 17.: [5, 3.0]
251 | 7 + A + 1 = 18.: [5, 0.0]
252 | 7 + A + 1 = 19.: [1, 0.0]
253 | 7 + A + 1 = 20.: [1, 0.0]
254 | 7 + A = 10.: [3, 0.0]
255 | 7 + A = 15.: [2, 0.0]
256 | 7 + A = 16.: [20, 12.0]
257 | 8 + 0 = 8.: [14, 12.0]
258 | 8 + 1 + 1 = 10.: [7, 0.0]
259 | 8 + 1 + 1 = 9.: [1, 0.0]
260 | 8 + 1 = 9.: [20, 17.0]
261 | 8 + 2 + 1 = 10.: [10, 9.0]
262 | 8 + 2 + 1 = 11.: [10, 5.0]
263 | 8 + 2 = 10.: [35, 0.0]
264 | 8 + 2 = 9.: [1, 0.0]
265 | 8 + 2 = A.: [1, 1.0]
266 | 8 + 3 + 1 = 11.: [24, 3.0]
267 | 8 + 3 = 10.: [24, 21.0]
268 | 8 + 3 = 11.: [7, 6.0]
269 | 8 + 4 + 1 = 12.: [12, 7.0]
270 | 8 + 4 + 1 = 13.: [1, 0.0]
271 | 8 + 4 = 11.: [21, 7.0]
272 | 8 + 5 + 1 = 13.: [9, 8.0]
273 | 8 + 5 + 1 = 14.: [1, 0.0]
274 | 8 + 5 = 12.: [32, 27.0]
275 | 8 + 5 = 13.: [2, 0.0]
276 | 8 + 6 + 1 = 14.: [12, 3.0]
277 | 8 + 6 = 13.: [33, 28.0]
278 | 8 + 7 + 1 = 15.: [12, 7.0]
279 | 8 + 7 + 1 = 16.: [1, 0.0]
280 | 8 + 7 = 14.: [32, 15.0]
281 | 8 + 8 + 1 = 16.: [19, 13.0]
282 | 8 + 8 = 14.: [2, 0.0]
283 | 8 + 8 = 15.: [17, 12.0]
284 | 8 + 8 = 16.: [3, 0.0]
285 | 8 + 9 + 1 = 17.: [7, 5.0]
286 | 8 + 9 = 16.: [29, 23.0]
287 | 8 + 9 = 17.: [1, 0.0]
288 | 8 + A + 1 = 18.: [14, 12.0]
289 | 8 + A + 1 = 19.: [2, 0.0]
290 | 8 + A + 1 = 1A.: [1, 0.0]
291 | 8 + A + 1 = 20.: [1, 0.0]
292 | 8 + A = 11.: [1, 0.0]
293 | 8 + A = 17.: [25, 23.0]
294 | 8 + A = 18.: [1, 0.0]
295 | 9 + 0 = 9.: [5, 3.0]
296 | 9 + 1 + 1 = 10.: [12, 8.0]
297 | 9 + 1 + 1 = 11.: [5, 1.0]
298 | 9 + 1 = 10.: [13, 0.0]
299 | 9 + 1 = A.: [3, 3.0]
300 | 9 + 10 + 1 = 19.: [1, 1.0]
301 | 9 + 10 = 18.: [1, 1.0]
302 | 9 + 2 + 1 = 11.: [14, 8.0]
303 | 9 + 2 = 10.: [31, 29.0]
304 | 9 + 2 = 11.: [6, 3.0]
305 | 9 + 2 = B.: [1, 0.0]
306 | 9 + 3 + 1 = 12.: [2, 2.0]
307 | 9 + 3 + 1 = 13.: [3, 0.0]
308 | 9 + 3 = 11.: [36, 18.0]
309 | 9 + 3 = 12.: [2, 0.0]
310 | 9 + 4 + 1 = 13.: [9, 4.0]
311 | 9 + 4 + 1 = 14.: [1, 0.0]
312 | 9 + 4 = 12.: [23, 19.0]
313 | 9 + 4 = 13.: [1, 0.0]
314 | 9 + 5 + 1 = 14.: [10, 6.0]
315 | 9 + 5 + 1 = 15.: [1, 0.0]
316 | 9 + 5 = 13.: [36, 29.0]
317 | 9 + 6 + 1 = 15.: [14, 5.0]
318 | 9 + 6 + 1 = 16.: [2, 0.0]
319 | 9 + 6 = 14.: [27, 11.0]
320 | 9 + 6 = 15.: [2, 1.0]
321 | 9 + 7 + 1 = 16.: [7, 6.0]
322 | 9 + 7 = 14.: [1, 0.0]
323 | 9 + 7 = 15.: [15, 13.0]
324 | 9 + 7 = 16.: [1, 0.0]
325 | 9 + 8 + 1 = 17.: [3, 1.0]
326 | 9 + 8 = 16.: [36, 30.0]
327 | 9 + 9 + 1 = 18.: [5, 3.0]
328 | 9 + 9 + 1 = 19.: [1, 0.0]
329 | 9 + 9 = 17.: [31, 28.0]
330 | 9 + 9 = 18.: [6, 2.0]
331 | 9 + A + 1 = 19.: [9, 2.0]
332 | 9 + A = 16.: [1, 0.0]
333 | 9 + A = 17.: [3, 0.0]
334 | 9 + A = 18.: [21, 15.0]
335 | 9 + A = 19.: [1, 0.0]
336 | A + 0 = A.: [24, 22.0]
337 | A + 1 + 1 = 11.: [1, 1.0]
338 | A + 1 + 1 = 12.: [5, 0.0]
339 | A + 1 = 10.: [17, 10.0]
340 | A + 1 = 11.: [3, 2.0]
341 | A + 2 + 1 = 12.: [3, 0.0]
342 | A + 2 + 1 = 14.: [1, 0.0]
343 | A + 2 = 11.: [20, 10.0]
344 | A + 2 = 12.: [1, 0.0]
345 | A + 3 + 1 = 13.: [12, 9.0]
346 | A + 3 + 1 = 15.: [1, 0.0]
347 | A + 3 = 12.: [20, 15.0]
348 | A + 3 = 13.: [1, 0.0]
349 | A + 4 + 1 = 14.: [8, 4.0]
350 | A + 4 = 13.: [12, 6.0]
351 | A + 4 = 14.: [1, 1.0]
352 | A + 5 + 1 = 15.: [4, 2.0]
353 | A + 5 = 14.: [30, 26.0]
354 | A + 6 + 1 = 16.: [11, 4.0]
355 | A + 6 + 1 = 17.: [4, 0.0]
356 | A + 6 = 10.: [2, 0.0]
357 | A + 6 = 15.: [21, 15.0]
358 | A + 7 + 1 = 17.: [5, 3.0]
359 | A + 7 + 1 = 18.: [2, 1.0]
360 | A + 7 = 0.: [1, 0.0]
361 | A + 7 = 10.: [7, 0.0]
362 | A + 7 = 11.: [1, 0.0]
363 | A + 7 = 16.: [20, 18.0]
364 | A + 8 + 1 = 18.: [4, 3.0]
365 | A + 8 + 1 = 19.: [2, 1.0]
366 | A + 8 + 1 = 20.: [3, 0.0]
367 | A + 8 + 1 = 22.: [1, 0.0]
368 | A + 8 = 11.: [2, 0.0]
369 | A + 8 = 17.: [19, 14.0]
370 | A + 8 = 18.: [1, 0.0]
371 | A + 8 = 19.: [2, 0.0]
372 | A + 8 = 1A.: [1, 0.0]
373 | A + 8 = 20.: [1, 0.0]
374 | A + 8 = 21.: [3, 0.0]
375 | A + 9 + 1 = 19.: [4, 1.0]
376 | A + 9 + 1 = 20.: [1, 1.0]
377 | A + 9 + 1 = 21.: [2, 0.0]
378 | A + 9 + 1 = 22.: [1, 0.0]
379 | A + 9 + 1 = 23.: [2, 0.0]
380 | A + 9 = 12.: [3, 0.0]
381 | A + 9 = 16.: [1, 0.0]
382 | A + 9 = 18.: [19, 17.0]
383 | A + 9 = 19.: [6, 2.0]
384 | A + 9 = 22.: [6, 0.0]
385 | A + A + 1 = 20.: [3, 0.0]
386 | A + A + 1 = 21.: [4, 1.0]
387 | A + A + 1 = 22.: [1, 0.0]
388 | A + A = 14.: [9, 0.0]
389 | A + A = 19.: [2, 1.0]
390 | A + A = 20.: [14, 9.0]
391 | A + A = 21.: [4, 0.0]
392 |
--------------------------------------------------------------------------------
/artifacts/checkpoint/clutrr/symbolic/gpt-3.5_5-shot_cot_htt_2000.yaml:
--------------------------------------------------------------------------------
1 | alma's son is son.: [1, 0.0]
2 | aunt's aunt is aunt.: [4, 1.0]
3 | aunt's brother is brother.: [1, 0.0]
4 | aunt's brother is uncle.: [9, 3.0]
5 | aunt's daughter is cousin.: [3, 0.0]
6 | aunt's father is grandfather.: [6, 4.0]
7 | aunt's mother is grandmother.: [4, 4.0]
8 | aunt's mother is mother.: [1, 1.0]
9 | aunt's sister is aunt.: [1, 0.0]
10 | aunt's sister is mother.: [1, 0.0]
11 | aunt's sister is sister.: [4, 1.0]
12 | aunt's son is cousin.: [4, 0.0]
13 | brother's aunt is aunt.: [10, 5.0]
14 | brother's brother is brother.: [89, 52.0]
15 | brother's daughter is niece.: [57, 30.0]
16 | brother's father is father.: [45, 43.0]
17 | brother's grandfather is grandfather.: [14, 11.0]
18 | brother's grandfather is great-grandfather.: [4, 0.0]
19 | brother's grandmother is grandmother.: [25, 20.0]
20 | brother's grandmother is great-grandmother.: [2, 0.0]
21 | brother's grandson is nephew.: [3, 0.0]
22 | brother's husband is husband.: [1, 0.0]
23 | brother's mother is grandmother.: [1, 0.0]
24 | brother's mother is mother.: [50, 50.0]
25 | brother's sister is sister.: [59, 45.0]
26 | brother's son is nephew.: [63, 34.0]
27 | brother's uncle is uncle.: [8, 3.0]
28 | brother's wife is sister-in-law.: [11, 6.0]
29 | brother-in-law's daughter is niece.: [4, 4.0]
30 | brother-in-law's father is father-in-law.: [3, 3.0]
31 | brother-in-law's mother is mother-in-law.: [7, 7.0]
32 | brother-in-law's son is nephew.: [2, 2.0]
33 | daughter's aunt is aunt.: [22, 1.0]
34 | daughter's aunt is sister.: [18, 17.0]
35 | daughter's brother is brother.: [39, 0.0]
36 | daughter's brother is son.: [2, 2.0]
37 | daughter's brother is uncle.: [1, 0.0]
38 | daughter's daughter is daughter.: [3, 0.0]
39 | daughter's daughter is granddaughter.: [51, 26.0]
40 | daughter's daughter is sister.: [1, 0.0]
41 | daughter's father is father.: [22, 4.0]
42 | daughter's father is husband.: [1, 0.0]
43 | daughter's grandfather is father.: [16, 11.0]
44 | daughter's grandfather is grandfather.: [14, 4.0]
45 | daughter's grandfather is grandfatherr.: [1, 0.0]
46 | daughter's grandmother is grandmother.: [17, 4.0]
47 | daughter's grandmother is mother.: [19, 9.0]
48 | daughter's husband is son-in-law.: [10, 10.0]
49 | daughter's mother is mother.: [21, 3.0]
50 | daughter's sister is sister.: [51, 1.0]
51 | daughter's son is grandson.: [33, 24.0]
52 | daughter's son is nephew.: [1, 0.0]
53 | daughter's son is son.: [1, 0.0]
54 | daughter's uncle is brother.: [31, 27.0]
55 | daughter's uncle is uncle.: [8, 3.0]
56 | daughter-in-law's daughter is granddaughter.: [5, 5.0]
57 | daughter-in-law's son is grandson.: [1, 1.0]
58 | father's brother is brother.: [1, 1.0]
59 | father's brother is uncle.: [44, 31.0]
60 | father's daughter is daughter.: [25, 5.0]
61 | father's daughter is sister.: [42, 33.0]
62 | father's father is grandfather.: [43, 26.0]
63 | father's granddaughter is granddaughter.: [1, 1.0]
64 | father's grandson is grandson.: [1, 1.0]
65 | father's mother is grandmother.: [22, 13.0]
66 | father's mother is mother.: [5, 0.0]
67 | father's sister is aunt.: [34, 26.0]
68 | father's son is brother.: [44, 35.0]
69 | father's son is son.: [30, 10.0]
70 | father's wife is mother.: [2, 0.0]
71 | gabrielle's brother is brother.: [2, 0.0]
72 | gabrielle's sister is sister.: [1, 0.0]
73 | granddaughter's aunt is aunt.: [3, 0.0]
74 | granddaughter's aunt is niece.: [1, 0.0]
75 | granddaughter's brother is brother.: [5, 0.0]
76 | granddaughter's brother is cousin.: [2, 0.0]
77 | granddaughter's brother is grandson.: [2, 1.0]
78 | granddaughter's brother is nephew.: [4, 0.0]
79 | granddaughter's brother is uncle.: [3, 0.0]
80 | granddaughter's father is son.: [4, 4.0]
81 | granddaughter's grandson is grandson.: [1, 0.0]
82 | granddaughter's mother is daughter.: [7, 7.0]
83 | granddaughter's sister is niece.: [1, 0.0]
84 | granddaughter's sister is sister.: [25, 0.0]
85 | granddaughter's uncle is great-uncle.: [1, 0.0]
86 | granddaughter's uncle is uncle.: [2, 0.0]
87 | grandfather's brother is great-uncle.: [1, 0.0]
88 | grandfather's daughter is mother.: [1, 0.0]
89 | grandfather's father is great-grandfather.: [1, 0.0]
90 | grandfather's granddaughter is granddaughter.: [1, 0.0]
91 | grandfather's grandfather is great-grandfather.: [4, 0.0]
92 | grandfather's mother is great-grandmother.: [1, 0.0]
93 | grandfather's son is father.: [6, 0.0]
94 | grandmother's daughter is mother.: [3, 0.0]
95 | grandmother's granddaughter is granddaughter.: [2, 0.0]
96 | grandmother's granddaughter is niece.: [1, 0.0]
97 | grandmother's grandmother is great-grandmother.: [1, 0.0]
98 | grandmother's mother is great-grandmother.: [1, 0.0]
99 | grandmother's sister is great-aunt.: [1, 0.0]
100 | grandson's aunt is aunt.: [2, 0.0]
101 | grandson's aunt is great-aunt.: [1, 0.0]
102 | grandson's brother is brother.: [7, 0.0]
103 | grandson's brother is grandson.: [1, 1.0]
104 | grandson's brother is nephew.: [1, 0.0]
105 | grandson's brother is uncle.: [1, 0.0]
106 | grandson's father is son-in-law.: [1, 1.0]
107 | grandson's father is son.: [2, 2.0]
108 | grandson's mother is daughter.: [2, 2.0]
109 | grandson's sister is aunt.: [4, 0.0]
110 | grandson's sister is cousin.: [1, 0.0]
111 | grandson's sister is granddaughter.: [2, 2.0]
112 | grandson's sister is great-aunt.: [1, 0.0]
113 | grandson's sister is niece.: [4, 0.0]
114 | grandson's sister is sister.: [9, 0.0]
115 | grandson's uncle is great-uncle.: [1, 0.0]
116 | grandson's uncle is uncle.: [7, 0.0]
117 | guillermina's grandmother is grandmother.: [1, 1.0]
118 | husband's brother is brother-in-law.: [6, 6.0]
119 | husband's daughter is daughter-in-law.: [3, 2.0]
120 | husband's daughter is daughter.: [32, 15.0]
121 | husband's daughter is step-daughter.: [1, 0.0]
122 | husband's daughter is stepdaughter.: [19, 2.0]
123 | husband's father is father-in-law.: [11, 11.0]
124 | husband's father is father.: [1, 0.0]
125 | husband's granddaughter is daughter.: [1, 0.0]
126 | husband's granddaughter is granddaughter.: [19, 12.0]
127 | husband's granddaughter is great-granddaughter.: [1, 0.0]
128 | husband's grandson is grandchild.: [1, 0.0]
129 | husband's grandson is grandson.: [19, 17.0]
130 | husband's grandson is son.: [1, 1.0]
131 | husband's husband is husband.: [1, 0.0]
132 | husband's mother is mother-in-law.: [19, 18.0]
133 | husband's mother is mother.: [1, 0.0]
134 | husband's sister is sister-in-law.: [1, 1.0]
135 | husband's sister is sister.: [1, 0.0]
136 | husband's son is son-in-law.: [4, 3.0]
137 | husband's son is son.: [28, 16.0]
138 | husband's son is stepson.: [1, 0.0]
139 | husband's wife is wife.: [3, 0.0]
140 | jason's brother is brother.: [2, 0.0]
141 | jason's sister is michelle.: [1, 0.0]
142 | jason's son is vincent.: [1, 0.0]
143 | lewis's brother is brother.: [1, 1.0]
144 | mother's brother is uncle.: [33, 23.0]
145 | mother's daughter is daughter.: [41, 12.0]
146 | mother's daughter is mother.: [1, 0.0]
147 | mother's daughter is sister.: [39, 26.0]
148 | mother's father is father.: [1, 0.0]
149 | mother's father is grandfather.: [28, 16.0]
150 | mother's grandmother is great-grandmother.: [2, 0.0]
151 | mother's husband is father.: [1, 0.0]
152 | mother's mother is grandmother.: [30, 16.0]
153 | mother's sister is aunt.: [36, 27.0]
154 | mother's son is brother.: [43, 29.0]
155 | mother's son is father.: [1, 1.0]
156 | mother's son is son.: [25, 9.0]
157 | nephew's aunt is aunt.: [6, 0.0]
158 | nephew's brother is brother.: [1, 0.0]
159 | nephew's brother is cousin.: [1, 0.0]
160 | nephew's grandfather is grandfather.: [5, 0.0]
161 | nephew's grandmother is grandmother.: [7, 0.0]
162 | nephew's sister is cousin.: [3, 0.0]
163 | nephew's sister is niece.: [7, 7.0]
164 | nephew's uncle is uncle.: [5, 0.0]
165 | niece's aunt is aunt.: [1, 0.0]
166 | niece's brother is cousin.: [2, 0.0]
167 | niece's grandfather is grandfather.: [3, 0.0]
168 | niece's grandmother is grandmother.: [3, 0.0]
169 | niece's sister is niece.: [1, 1.0]
170 | niece's sister is sister.: [3, 0.0]
171 | niece's uncle is uncle.: [6, 0.0]
172 | sister's aunt is aunt.: [6, 2.0]
173 | sister's brother is brother.: [58, 46.0]
174 | sister's daughter is niece.: [60, 35.0]
175 | sister's father is father.: [45, 44.0]
176 | sister's grandfather is father.: [1, 0.0]
177 | sister's grandfather is grandfather.: [16, 11.0]
178 | sister's grandfather is great-grandfather.: [1, 0.0]
179 | sister's grandmother is grandmother.: [25, 17.0]
180 | sister's grandmother is great-grandmother.: [4, 0.0]
181 | sister's husband is brother-in-law.: [10, 6.0]
182 | sister's mother is aunt.: [1, 0.0]
183 | sister's mother is grandmother.: [1, 0.0]
184 | sister's mother is mother.: [48, 44.0]
185 | sister's sister is sister.: [68, 40.0]
186 | sister's son is nephew.: [44, 29.0]
187 | sister's uncle is uncle.: [8, 3.0]
188 | sister-in-law's daughter is niece.: [1, 1.0]
189 | sister-in-law's father is father-in-law.: [1, 1.0]
190 | sister-in-law's mother is mother-in-law.: [1, 1.0]
191 | sister-in-law's son is nephew-in-law.: [1, 0.0]
192 | sister-in-law's son is nephew.: [5, 5.0]
193 | son's aunt is aunt.: [11, 2.0]
194 | son's aunt is sister.: [29, 29.0]
195 | son's brother is brother.: [43, 0.0]
196 | son's brother is uncle.: [1, 0.0]
197 | son's daughter is daughter.: [2, 0.0]
198 | son's daughter is granddaughter.: [22, 14.0]
199 | son's father is father.: [18, 2.0]
200 | son's grandfather is father.: [16, 15.0]
201 | son's grandfather is grandfather.: [10, 3.0]
202 | son's grandmother is grandmother.: [24, 5.0]
203 | son's grandmother is mother.: [2, 0.0]
204 | son's grandson is great-grandson.: [1, 0.0]
205 | son's husband is son-in-law.: [1, 0.0]
206 | son's mother is mother.: [22, 4.0]
207 | son's sister is daughter.: [2, 2.0]
208 | son's sister is sister.: [34, 2.0]
209 | son's son is grandson.: [48, 25.0]
210 | son's son is nephew.: [1, 0.0]
211 | son's son is son.: [2, 0.0]
212 | son's uncle is brother.: [27, 25.0]
213 | son's uncle is uncle.: [11, 3.0]
214 | son's wife is daughter-in-law.: [17, 17.0]
215 | son-in-law's aunt is aunt.: [1, 0.0]
216 | son-in-law's daughter is granddaughter.: [1, 1.0]
217 | son-in-law's son is grandson.: [4, 4.0]
218 | son-in-law's wife is daughter.: [1, 0.0]
219 | stepdaughter's brother is stepbrother.: [2, 0.0]
220 | stepdaughter's husband is stepson-in-law.: [1, 0.0]
221 | stepdaughter's sister is sister.: [1, 0.0]
222 | stepdaughter's sister is stepsister.: [1, 0.0]
223 | stepdaughter's uncle is uncle.: [2, 0.0]
224 | uncle's brother is brother.: [3, 0.0]
225 | uncle's daughter is cousin.: [3, 0.0]
226 | uncle's father is grandfather.: [2, 1.0]
227 | uncle's mother is grandmother.: [8, 5.0]
228 | uncle's sister is aunt.: [4, 4.0]
229 | uncle's sister is sister.: [1, 0.0]
230 | uncle's son is cousin.: [4, 0.0]
231 | uncle's uncle is great-uncle.: [1, 0.0]
232 | uncle's uncle is uncle.: [2, 0.0]
233 | uncle's wife is aunt.: [1, 0.0]
234 | wife's brother is brother-in-law.: [6, 4.0]
235 | wife's brother is brother.: [1, 0.0]
236 | wife's daughter is daughter-in-law.: [1, 1.0]
237 | wife's daughter is daughter.: [41, 17.0]
238 | wife's daughter is stepdaughter.: [2, 0.0]
239 | wife's father is father-in-law.: [9, 9.0]
240 | wife's granddaughter is granddaughter.: [14, 10.0]
241 | wife's grandson is grandson.: [9, 6.0]
242 | wife's grandson is son.: [4, 0.0]
243 | wife's husband is husband.: [2, 0.0]
244 | wife's mother is mother-in-law.: [15, 15.0]
245 | wife's mother is mother.: [1, 0.0]
246 | wife's sister is sister-in-law.: [3, 1.0]
247 | wife's sister is sister.: [6, 0.0]
248 | wife's son is son-in-law.: [1, 0.0]
249 | wife's son is son.: [43, 27.0]
250 | wife's son is stepson.: [1, 0.0]
251 | william's grandmother is gabrielle.: [1, 1.0]
252 |
--------------------------------------------------------------------------------
/source/datasets.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Dataset module."""
17 |
18 | import ast
19 | from collections.abc import Mapping, Sequence
20 | import csv
21 | import glob
22 | import json
23 | import os
24 | import random
25 | import re
26 | from typing import Any
27 |
28 | from nltk import tokenize
29 | import numpy as np
30 |
31 |
32 | class Arithmetic:
33 | """Artithmetic dataset."""
34 |
35 | def __init__(
36 | self,
37 | path,
38 | base=10,
39 | num_train=900,
40 | num_valid=100,
41 | num_test=100,
42 | ):
43 | """Init arithmetic dataset.
44 |
45 | Args:
46 | path: path to the dataset
47 | base: base of the arithmetic
48 | num_train: number of training samples
49 | num_valid: number of validation samples
50 | num_test: number of test samples
51 |
52 | Raises:
53 | FileNotFoundError: if dataset files are not found
54 | """
55 |
56 | self.path = path
57 | self.base = base
58 | self.num_train = num_train
59 | self.num_valid = num_valid
60 | self.num_test = num_test
61 |
62 | train_files = glob.glob(os.path.join(path, f"base-{base}/*_train.txt"))
63 | test_files = glob.glob(os.path.join(path, f"base-{base}/*_test.txt"))
64 | if not train_files or not test_files:
65 | raise FileNotFoundError(f"Can't find dataset files in `{path}`.")
66 |
67 | queries = []
68 | answers = []
69 | levels = []
70 | num_samples = []
71 | for txt_file in train_files + test_files:
72 | num_sample = 0
73 | with open(txt_file, "r") as fin:
74 | for line in fin:
75 | query = line.strip().split("+")
76 | answer = sum(int(x, base) for x in query)
77 | answer = np.base_repr(answer, base)
78 | level = f"{len(query[0])} digits"
79 | queries.append(query)
80 | answers.append(answer)
81 | levels.append(level)
82 | num_sample += 1
83 | num_samples.append(num_sample)
84 |
85 | total_train = sum(num_samples[:len(train_files)])
86 | total_test = sum(num_samples[len(train_files):])
87 | train_indices = random.sample(range(total_train), num_train)
88 | test_indices = random.sample(range(total_train, total_train + total_test),
89 | num_valid + num_test)
90 | indices = train_indices + test_indices
91 | self.queries = [queries[i] for i in indices]
92 | self.answers = [answers[i] for i in indices]
93 | self.levels = [levels[i] for i in indices]
94 |
95 | def get_split(
96 | self,
97 | split: str = "test",
98 | ) -> Sequence[Mapping[str, Any]]:
99 | """Get dataset split.
100 |
101 | Args:
102 | split: split name
103 | Returns:
104 | List of samples
105 | """
106 | if split == "train":
107 | indices = range(self.num_train)
108 | elif split == "valid":
109 | indices = range(self.num_train, self.num_train + self.num_valid)
110 | elif split == "test":
111 | indices = range(len(self) - self.num_test, len(self))
112 | else:
113 | raise ValueError(f"Unknown split `{split}`")
114 | return [self[i] for i in indices]
115 |
116 | def evaluate(
117 | self,
118 | truth: str,
119 | pred: str,
120 | ):
121 | """Evaluate truth and pred."""
122 | return truth.lower() in tokenize.word_tokenize(pred.lower())
123 |
124 | def __getitem__(
125 | self,
126 | index,
127 | ):
128 | return {
129 | "query": self.queries[index],
130 | "answer": self.answers[index],
131 | "level": self.levels[index],
132 | "base": self.base,
133 | }
134 |
135 | def __len__(self):
136 | return len(self.queries)
137 |
138 |
139 | class CLUTRR:
140 | """CLUTRR dataset."""
141 |
142 | def __init__(
143 | self,
144 | path,
145 | num_train=2000,
146 | num_valid=200,
147 | num_test=200,
148 | ):
149 | self.path = path
150 | self.num_train = num_train
151 | self.num_valid = num_valid
152 | self.num_test = num_test
153 |
154 | train_files = glob.glob(os.path.join(path, "*_train.csv"))
155 | test_files = glob.glob(os.path.join(path, "*_test.csv"))
156 | if not train_files or not test_files:
157 | raise FileNotFoundError(f"Can't find dataset files in `{path}`.")
158 |
159 | documents = []
160 | paths = []
161 | queries = []
162 | answers = []
163 | levels = []
164 | num_samples = []
165 | for csv_file in train_files + test_files:
166 | num_sample = 0
167 | with open(csv_file, "r") as fin:
168 | reader = csv.reader(fin)
169 | fields = next(reader)
170 | for values in reader:
171 | document = path = query = answer = level = None
172 | for field, value in zip(fields, values, strict=True):
173 | if field == "story":
174 | document = re.sub(r"[\[\]]", "", value)
175 | elif field == "f_comb":
176 | path = value.split("-")
177 | elif field == "query":
178 | query = ast.literal_eval(value)
179 | elif field == "target":
180 | answer = value
181 | elif field == "task_name":
182 | level = f"{value.split('.')[1]} hops"
183 | documents.append(document)
184 | paths.append(path)
185 | queries.append(query)
186 | answers.append(answer)
187 | levels.append(level)
188 | num_sample += 1
189 | num_samples.append(num_sample)
190 |
191 | total_train = sum(num_samples[:len(train_files)])
192 | total_test = sum(num_samples[len(train_files):])
193 | train_indices = random.sample(range(total_train), num_train)
194 | test_indices = random.sample(range(total_train, total_train + total_test),
195 | num_valid + num_test)
196 | indices = train_indices + test_indices
197 | self.documents = [documents[i] for i in indices]
198 | self.paths = [paths[i] for i in indices]
199 | self.queries = [queries[i] for i in indices]
200 | self.answers = [answers[i] for i in indices]
201 | self.levels = [levels[i] for i in indices]
202 | self.labels = set(answers)
203 |
204 | def get_split(
205 | self,
206 | split="test"
207 | ):
208 | """Get dataset split."""
209 | if split == "train":
210 | indices = range(self.num_train)
211 | elif split == "valid":
212 | indices = range(self.num_train, self.num_train + self.num_valid)
213 | elif split == "test":
214 | indices = range(len(self) - self.num_test, len(self))
215 | else:
216 | raise ValueError(f"Unknown split `{split}`")
217 | return [self[i] for i in indices]
218 |
219 | def evaluate(
220 | self,
221 | truth,
222 | pred
223 | ):
224 | """Evaluate truth and pred."""
225 | truth = truth.lower()
226 | words = tokenize.word_tokenize(pred.lower())
227 | others = self.labels - {truth}
228 | return truth in words and not any(label in words for label in others)
229 |
230 | def __getitem__(
231 | self,
232 | index
233 | ):
234 | return {
235 | "document": self.documents[index],
236 | "path": self.paths[index],
237 | "query": self.queries[index],
238 | "answer": self.answers[index],
239 | "level": self.levels[index]
240 | }
241 |
242 | def __len__(self):
243 | return len(self.queries)
244 |
245 |
246 | class ListFunctions:
247 | """List Functions dataset."""
248 |
249 | def __init__(
250 | self,
251 | path: str,
252 | num_train: int = 8,
253 | num_valid: int = 8,
254 | num_test: int = 16
255 | ) -> None:
256 | self.path = path
257 | self.num_train = num_train
258 | self.num_valid = num_valid
259 | self.num_test = num_test
260 |
261 | num_sample = num_train + num_valid + num_test
262 | json_files = sorted(glob.glob(os.path.join(path, "c*.json")))
263 | if not json_files:
264 | raise FileNotFoundError(f"Can't find dataset files in `{path}`.")
265 |
266 | levels = []
267 | concepts = []
268 | queries = []
269 | answers = []
270 | for json_file in json_files:
271 | with open(json_file, "r") as fin:
272 | obj = json.load(fin)
273 | query = []
274 | answer = []
275 | for example in obj["examples"]:
276 | query.append(example["input"])
277 | answer.append(example["target"])
278 | concept = re.search(r"(c\d+).json", json_file)
279 | assert concept is not None
280 | concept = concept.group(1)
281 | cid = int(concept[1:])
282 | if cid <= 80:
283 | level = "P1"
284 | elif cid <= 100:
285 | level = "P2"
286 | else:
287 | level = "P3"
288 | indices = random.sample(range(len(query)), num_sample)
289 | query = [query[i] for i in indices]
290 | answer = [answer[i] for i in indices]
291 | levels.append(level)
292 | concepts.append(concept)
293 | queries.append(query)
294 | answers.append(answer)
295 |
296 | self.levels = levels
297 | self.concepts = concepts
298 | self.queries = queries
299 | self.answers = answers
300 |
301 | def get_split(
302 | self,
303 | split: str = "test"
304 | ) -> Sequence[Mapping[str, Any]]:
305 | """Get dataset split."""
306 | if split in {"train", "valid"}:
307 | train_indices = slice(self.num_train)
308 | test_indices = slice(self.num_train, self.num_train + self.num_valid)
309 | elif split == "test":
310 | train_indices = slice(self.num_train + self.num_valid)
311 | test_indices = slice(self.num_train + self.num_valid, None)
312 | else:
313 | raise ValueError(f"Unknown split `{split}`")
314 |
315 | dataset = []
316 | for sample in self:
317 | sample["train_queries"] = sample["queries"][train_indices]
318 | sample["train_answers"] = sample["answers"][train_indices]
319 | sample["queries"] = sample["queries"][test_indices]
320 | sample["answers"] = sample["answers"][test_indices]
321 | answers = [f"{q} -> {a}"
322 | for q, a in zip(sample["queries"], sample["answers"])]
323 | sample["answer"] = "\n".join(answers)
324 | dataset.append(sample)
325 | return dataset
326 |
327 | def evaluate(
328 | self,
329 | truth: str,
330 | pred: str
331 | ) -> bool:
332 | """Evaluate truth and pred."""
333 | pattern = r"(\[[A-Z0-9, ]*\]) ?-> ?(\[[A-Z0-9, ]*\])"
334 | query2truth = dict(re.findall(pattern, truth))
335 | query2pred = dict(re.findall(pattern, pred))
336 | num_correct = 0
337 | for query, truth in query2truth.items():
338 | if query in query2pred:
339 | try:
340 | truth = ast.literal_eval(truth)
341 | pred = ast.literal_eval(query2pred[query])
342 | num_correct += int(truth == pred)
343 | except (ValueError, SyntaxError):
344 | pass
345 | return num_correct / len(query2truth)
346 |
347 | def __getitem__(self, index: int) -> Mapping[str, Any]:
348 | return {
349 | "queries": self.queries[index],
350 | "answers": self.answers[index],
351 | "level": self.levels[index],
352 | "concept": self.concepts[index],
353 | }
354 |
355 | def __len__(self) -> int:
356 | return len(self.queries)
357 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/artifacts/checkpoint/clutrr/symbolic/gemini-pro_5-shot_cot_htt_2000.yaml:
--------------------------------------------------------------------------------
1 | aunt's aunt is aunt.: [2, 0.0]
2 | aunt's brother is brother.: [2, 0.0]
3 | aunt's brother is cousin.: [2, 0.0]
4 | aunt's brother is nephew.: [1, 0.0]
5 | aunt's brother is uncle.: [1, 0.0]
6 | aunt's father is uncle.: [4, 0.0]
7 | aunt's grandfather is grandfather.: [1, 0.0]
8 | aunt's mother is grandmother.: [2, 2.0]
9 | aunt's sister is cousin.: [3, 0.0]
10 | aunt's uncle is father.: [1, 0.0]
11 | brother's aunt is aunt.: [12, 5.0]
12 | brother's aunt is cousin.: [1, 0.0]
13 | brother's brother is brother.: [77, 52.0]
14 | brother's daughter is brother.: [1, 0.0]
15 | brother's daughter is niece.: [57, 39.0]
16 | brother's father is father.: [46, 37.0]
17 | brother's father is uncle.: [1, 0.0]
18 | brother's grandfather is father.: [8, 0.0]
19 | brother's grandfather is grandfather.: [16, 14.0]
20 | brother's grandmother is aunt.: [1, 0.0]
21 | brother's grandmother is grandmother.: [32, 26.0]
22 | brother's grandmother is mother.: [1, 0.0]
23 | brother's grandson is grandson.: [1, 1.0]
24 | brother's grandson is nephew.: [2, 0.0]
25 | brother's mother is aunt.: [1, 0.0]
26 | brother's mother is mother.: [53, 46.0]
27 | brother's nephew is nephew.: [1, 1.0]
28 | brother's niece is niece.: [4, 0.0]
29 | brother's sister is sister.: [72, 57.0]
30 | brother's son is nephew.: [1, 0.0]
31 | brother's uncle is father.: [3, 0.0]
32 | brother's uncle is uncle.: [6, 4.0]
33 | brother's wife is sister-in-law.: [6, 2.0]
34 | brother's wife is wife.: [4, 0.0]
35 | brother-in-law's daughter is niece.: [3, 3.0]
36 | brother-in-law's father is father-in-law.: [5, 5.0]
37 | brother-in-law's mother is mother-in-law.: [5, 5.0]
38 | brother-in-law's son is nephew.: [2, 2.0]
39 | daughter's aunt is aunt.: [6, 0.0]
40 | daughter's aunt is niece.: [2, 0.0]
41 | daughter's aunt is sister.: [30, 27.0]
42 | daughter's brother is brother.: [27, 0.0]
43 | daughter's brother is nephew.: [4, 1.0]
44 | daughter's brother is son.: [5, 3.0]
45 | daughter's brother is uncle.: [14, 0.0]
46 | daughter's daughter is granddaughter.: [45, 32.0]
47 | daughter's father is brother.: [1, 1.0]
48 | daughter's father is father.: [21, 5.0]
49 | daughter's father is grandfather.: [1, 0.0]
50 | daughter's father is son.: [1, 0.0]
51 | daughter's grandfather is father.: [21, 19.0]
52 | daughter's grandfather is grandfather.: [4, 1.0]
53 | daughter's grandmother is grandmother.: [8, 0.0]
54 | daughter's grandmother is mother.: [18, 17.0]
55 | daughter's husband is father.: [1, 0.0]
56 | daughter's husband is husband.: [3, 0.0]
57 | daughter's husband is son-in-law.: [14, 14.0]
58 | daughter's mother is mother.: [19, 6.0]
59 | daughter's mother is sister.: [2, 2.0]
60 | daughter's mother is wife.: [1, 0.0]
61 | daughter's sister is aunt.: [3, 0.0]
62 | daughter's sister is daughter.: [2, 0.0]
63 | daughter's sister is niece.: [7, 0.0]
64 | daughter's sister is sister.: [37, 2.0]
65 | daughter's son is brother.: [1, 0.0]
66 | daughter's son is grandson.: [39, 29.0]
67 | daughter's son is nephew.: [3, 0.0]
68 | daughter's son is son.: [1, 0.0]
69 | daughter's uncle is brother.: [36, 30.0]
70 | daughter's uncle is father.: [1, 0.0]
71 | daughter's uncle is uncle.: [1, 0.0]
72 | daughter-in-law's daughter is granddaughter.: [2, 2.0]
73 | daughter-in-law's son is grandson.: [2, 2.0]
74 | father's brother is brother.: [1, 0.0]
75 | father's brother is uncle.: [44, 28.0]
76 | father's child is son.: [1, 0.0]
77 | father's daughter is daughter.: [21, 1.0]
78 | father's daughter is sister.: [41, 34.0]
79 | father's father is grandfather.: [40, 25.0]
80 | father's grandfather is grandfather.: [1, 1.0]
81 | father's grandfather is great-grandfather.: [1, 0.0]
82 | father's grandmother is grandmother.: [1, 0.0]
83 | father's grandmother is great-grandmother.: [1, 0.0]
84 | father's mother is grandmother.: [23, 15.0]
85 | father's mother is mother.: [8, 0.0]
86 | father's nephew is cousin.: [1, 0.0]
87 | father's sister is aunt.: [33, 17.0]
88 | father's sister is sister.: [3, 0.0]
89 | father's sister-in-law is sister-in-law.: [1, 0.0]
90 | father's son is brother.: [41, 35.0]
91 | father's son is grandson.: [2, 0.0]
92 | father's son is son.: [25, 3.0]
93 | father's wife is mother.: [5, 4.0]
94 | father-in-law's son is husband.: [1, 0.0]
95 | gabrielle's son is brother.: [1, 0.0]
96 | granddaughter's aunt is aunt.: [5, 0.0]
97 | granddaughter's brother is brother-in-law.: [1, 0.0]
98 | granddaughter's brother is brother.: [7, 0.0]
99 | granddaughter's brother is grandson.: [3, 3.0]
100 | granddaughter's brother is nephew.: [2, 0.0]
101 | granddaughter's brother is uncle.: [17, 0.0]
102 | granddaughter's father is daughter.: [1, 1.0]
103 | granddaughter's father is father.: [3, 0.0]
104 | granddaughter's father is grandfather.: [1, 0.0]
105 | granddaughter's father is mother.: [1, 0.0]
106 | granddaughter's father is son.: [4, 2.0]
107 | granddaughter's grandfather is grandfather.: [1, 0.0]
108 | granddaughter's husband is husband.: [1, 0.0]
109 | granddaughter's mother is daughter.: [10, 7.0]
110 | granddaughter's sister is aunt.: [2, 0.0]
111 | granddaughter's sister is brother.: [1, 0.0]
112 | granddaughter's sister is cousin.: [3, 0.0]
113 | granddaughter's sister is granddaughter.: [4, 4.0]
114 | granddaughter's sister is grandniece.: [1, 0.0]
115 | granddaughter's sister is great-aunt.: [1, 0.0]
116 | granddaughter's sister is mother.: [1, 0.0]
117 | granddaughter's sister is niece.: [9, 0.0]
118 | granddaughter's sister is sister-in-law.: [1, 0.0]
119 | granddaughter's sister is sister.: [6, 0.0]
120 | granddaughter's uncle is brother.: [2, 0.0]
121 | granddaughter's uncle is father.: [2, 0.0]
122 | granddaughter's uncle is uncle.: [4, 0.0]
123 | grandfather's brother is father.: [1, 0.0]
124 | grandfather's brother is uncle.: [3, 0.0]
125 | grandfather's daughter is father.: [1, 0.0]
126 | grandfather's daughter is granddaughter.: [8, 0.0]
127 | grandfather's daughter is grandmother.: [1, 0.0]
128 | grandfather's granddaughter is granddaughter.: [1, 0.0]
129 | grandfather's grandfather is ancestor.: [1, 0.0]
130 | grandfather's grandson is grandson.: [3, 0.0]
131 | grandfather's niece is granddaughter.: [1, 0.0]
132 | grandfather's sister is aunt.: [6, 0.0]
133 | grandfather's son is father.: [9, 0.0]
134 | grandfather's stepdaughter is granddaughter.: [2, 0.0]
135 | grandmother's daughter is granddaughter.: [1, 0.0]
136 | grandmother's daughter is mother.: [10, 0.0]
137 | grandmother's granddaughter is granddaughter.: [1, 0.0]
138 | grandmother's grandmother is great-grandmother.: [2, 0.0]
139 | grandmother's mother is grandmother.: [1, 0.0]
140 | grandmother's son is father.: [6, 0.0]
141 | grandson's aunt is aunt.: [2, 0.0]
142 | grandson's brother is brother.: [6, 0.0]
143 | grandson's brother is cousin.: [2, 0.0]
144 | grandson's brother is grandson.: [1, 1.0]
145 | grandson's brother is nephew.: [3, 0.0]
146 | grandson's brother is uncle.: [5, 0.0]
147 | grandson's father is son.: [6, 6.0]
148 | grandson's grandmother is grandmother.: [2, 0.0]
149 | grandson's mother is daughter.: [5, 5.0]
150 | grandson's sister is aunt.: [7, 0.0]
151 | grandson's sister is granddaughter.: [9, 9.0]
152 | grandson's sister is grandniece.: [1, 0.0]
153 | grandson's sister is niece.: [12, 0.0]
154 | grandson's sister is sister.: [3, 0.0]
155 | grandson's uncle is father.: [3, 0.0]
156 | grandson's uncle is uncle.: [3, 0.0]
157 | husband's brother is brother-in-law.: [6, 6.0]
158 | husband's daughter is daughter-in-law.: [1, 0.0]
159 | husband's daughter is daughter.: [35, 27.0]
160 | husband's daughter is sister.: [2, 0.0]
161 | husband's daughter is step-daughter.: [1, 0.0]
162 | husband's daughter is stepdaughter.: [11, 1.0]
163 | husband's daughter is wife.: [1, 0.0]
164 | husband's father is father-in-law.: [12, 11.0]
165 | husband's granddaughter is daughter.: [2, 0.0]
166 | husband's granddaughter is granddaughter.: [21, 17.0]
167 | husband's grandson is brother.: [1, 0.0]
168 | husband's grandson is cousin.: [1, 0.0]
169 | husband's grandson is grandson.: [15, 13.0]
170 | husband's grandson is great-grandson.: [5, 0.0]
171 | husband's grandson is son.: [2, 0.0]
172 | husband's husband is boyfriend.: [1, 0.0]
173 | husband's husband is brother.: [1, 0.0]
174 | husband's mother is mother-in-law.: [12, 11.0]
175 | husband's mother is mother.: [6, 0.0]
176 | husband's sister is sister-in-law.: [1, 1.0]
177 | husband's son is brother.: [4, 0.0]
178 | husband's son is father.: [1, 0.0]
179 | husband's son is grandfather.: [1, 0.0]
180 | husband's son is grandson.: [3, 0.0]
181 | husband's son is son.: [25, 18.0]
182 | husband's son is step-son.: [1, 0.0]
183 | husband's son is stepson.: [3, 2.0]
184 | law's daughter is granddaughter.: [1, 1.0]
185 | law's daughter is niece.: [1, 1.0]
186 | law's father is brother.: [1, 0.0]
187 | law's son is grandson.: [1, 1.0]
188 | law's son is nephew.: [2, 2.0]
189 | mother's brother is father.: [1, 0.0]
190 | mother's brother is uncle.: [36, 21.0]
191 | mother's daughter is daughter.: [39, 8.0]
192 | mother's daughter is father.: [1, 1.0]
193 | mother's daughter is grandmother.: [1, 1.0]
194 | mother's daughter is sister.: [35, 28.0]
195 | mother's father is father.: [15, 0.0]
196 | mother's father is grandfather.: [19, 11.0]
197 | mother's husband is father.: [8, 4.0]
198 | mother's mother is grandmother.: [42, 20.0]
199 | mother's sister is aunt.: [35, 22.0]
200 | mother's sister is sister.: [2, 0.0]
201 | mother's son is brother.: [48, 37.0]
202 | mother's son is son.: [14, 1.0]
203 | neice's brother is cousin.: [1, 0.0]
204 | nephew's aunt is aunt.: [7, 0.0]
205 | nephew's brother is brother.: [2, 0.0]
206 | nephew's brother is nephew.: [1, 1.0]
207 | nephew's daughter is granddaughter.: [1, 1.0]
208 | nephew's father is brother.: [1, 0.0]
209 | nephew's grandfather is grandfather.: [1, 0.0]
210 | nephew's grandfather is great-grandfather.: [2, 0.0]
211 | nephew's grandfather is uncle.: [1, 0.0]
212 | nephew's grandmother is grandmother.: [8, 0.0]
213 | nephew's grandmother is sister.: [1, 0.0]
214 | nephew's sister is niece.: [8, 8.0]
215 | nephew's sister is sister.: [1, 0.0]
216 | nephew's son is grandnephew.: [1, 0.0]
217 | nephew's uncle is brother.: [1, 1.0]
218 | nephew's uncle is father.: [1, 0.0]
219 | nephew's uncle is uncle.: [2, 0.0]
220 | niece's aunt is aunt.: [5, 0.0]
221 | niece's brother is cousin.: [2, 0.0]
222 | niece's brother is nephew.: [1, 0.0]
223 | niece's daughter is grandniece.: [1, 0.0]
224 | niece's grandfather is brother.: [1, 0.0]
225 | niece's grandfather is grandfather.: [1, 0.0]
226 | niece's grandfather is uncle.: [1, 0.0]
227 | niece's grandmother is aunt.: [2, 0.0]
228 | niece's grandmother is grandmother.: [6, 0.0]
229 | niece's husband is cousin.: [1, 0.0]
230 | niece's uncle is brother.: [4, 4.0]
231 | niece's uncle is father.: [1, 0.0]
232 | niece's uncle is uncle.: [4, 0.0]
233 | self's mother is mother.: [1, 1.0]
234 | sister's aunt is aunt.: [6, 4.0]
235 | sister's brother is brother.: [82, 74.0]
236 | sister's daughter is neice.: [1, 0.0]
237 | sister's daughter is niece.: [59, 48.0]
238 | sister's daughter-in-law is sister-in-law.: [1, 0.0]
239 | sister's father is brother.: [2, 0.0]
240 | sister's father is father.: [51, 45.0]
241 | sister's granddaughter is cousin.: [1, 0.0]
242 | sister's granddaughter is niece.: [1, 0.0]
243 | sister's grandfather is father.: [7, 1.0]
244 | sister's grandfather is grandfather.: [21, 17.0]
245 | sister's grandmother is aunt.: [1, 0.0]
246 | sister's grandmother is grandmother.: [30, 26.0]
247 | sister's grandmother is mother.: [2, 0.0]
248 | sister's grandson is grandson.: [1, 0.0]
249 | sister's grandson is nephew.: [3, 0.0]
250 | sister's husband is brother-in-law.: [7, 5.0]
251 | sister's mother is aunt.: [4, 0.0]
252 | sister's mother is mother.: [41, 35.0]
253 | sister's mother-in-law is sister-in-law.: [1, 0.0]
254 | sister's niece is niece.: [11, 11.0]
255 | sister's sister is niece.: [1, 0.0]
256 | sister's sister is sister.: [76, 61.0]
257 | sister's sister-in-law is sister-in-law.: [1, 0.0]
258 | sister's son is brother.: [3, 0.0]
259 | sister's son is grandson.: [1, 0.0]
260 | sister's son is nephew.: [1, 0.0]
261 | sister's stepdaughter is stepdaughter.: [2, 0.0]
262 | sister's uncle is father.: [2, 0.0]
263 | sister's uncle is uncle.: [6, 4.0]
264 | sister-in-law's father is brother-in-law.: [1, 0.0]
265 | sister-in-law's mother is mother-in-law.: [7, 7.0]
266 | sister-in-law's son is brother-in-law.: [1, 0.0]
267 | sister-in-law's son is brother.: [1, 0.0]
268 | sister-in-law's son is nephew.: [2, 2.0]
269 | son's aunt is aunt.: [10, 0.0]
270 | son's aunt is mother.: [2, 0.0]
271 | son's aunt is sister.: [22, 22.0]
272 | son's brother is brother.: [30, 0.0]
273 | son's brother is nephew.: [3, 0.0]
274 | son's brother is uncle.: [3, 0.0]
275 | son's daughter is granddaughter.: [30, 15.0]
276 | son's father is father.: [5, 0.0]
277 | son's father is grandfather.: [8, 0.0]
278 | son's father is mother.: [1, 0.0]
279 | son's father is self.: [1, 1.0]
280 | son's grandfather is father.: [22, 21.0]
281 | son's grandmother is mother.: [18, 18.0]
282 | son's mother is mother.: [19, 3.0]
283 | son's sister is daughter.: [12, 10.0]
284 | son's sister is niece.: [9, 0.0]
285 | son's sister is sister.: [9, 0.0]
286 | son's son is grandson.: [40, 26.0]
287 | son's uncle is brother.: [23, 23.0]
288 | son's uncle is father.: [8, 0.0]
289 | son's uncle is grandfather.: [1, 0.0]
290 | son's uncle is nephew.: [1, 0.0]
291 | son's uncle is uncle.: [3, 0.0]
292 | son's wife is daughter-in-law.: [17, 16.0]
293 | son's wife is mother.: [1, 0.0]
294 | son's wife is wife.: [5, 0.0]
295 | step-daughter's aunt is aunt.: [2, 0.0]
296 | step-daughter's grandmother is step-grandmother.: [1, 0.0]
297 | step-daughter's son is step-grandson.: [1, 0.0]
298 | step-son's daughter is step-granddaughter.: [1, 0.0]
299 | step-son's grandfather is step-grandfather.: [1, 0.0]
300 | step-son's son is grandson.: [1, 1.0]
301 | step-son's wife is daughter-in-law.: [1, 1.0]
302 | stepdaughter's grandmother is mother-in-law.: [1, 0.0]
303 | stepdaughter's grandmother is stepmother.: [1, 0.0]
304 | stepdaughter's son is stepson.: [2, 0.0]
305 | stepson's brother is stepson.: [1, 0.0]
306 | stepson's daughter is granddaughter.: [1, 1.0]
307 | stepson's uncle is uncle.: [1, 0.0]
308 | stepson's wife is stepmother.: [1, 0.0]
309 | uncle's aunt is aunt.: [2, 0.0]
310 | uncle's brother is brother.: [1, 0.0]
311 | uncle's brother is cousin.: [7, 0.0]
312 | uncle's brother is father.: [1, 0.0]
313 | uncle's daughter is niece.: [1, 0.0]
314 | uncle's father is grandfather.: [3, 3.0]
315 | uncle's grandfather is great-grandfather.: [2, 0.0]
316 | uncle's grandson is nephew.: [1, 0.0]
317 | uncle's mother is aunt.: [4, 0.0]
318 | uncle's nephew is nephew.: [3, 0.0]
319 | uncle's niece is niece.: [1, 0.0]
320 | uncle's sister is aunt.: [6, 1.0]
321 | uncle's sister is cousin.: [4, 0.0]
322 | uncle's son is nephew.: [2, 0.0]
323 | uncle's stepdaughter is stepdaughter.: [1, 0.0]
324 | uncle's uncle is grandfather.: [1, 0.0]
325 | uncle's uncle is granduncle.: [1, 0.0]
326 | uncle's wife is aunt.: [1, 0.0]
327 | wife's brother is brother-in-law.: [4, 4.0]
328 | wife's brother is brother.: [1, 0.0]
329 | wife's daughter is daughter-in-law.: [1, 0.0]
330 | wife's daughter is daughter.: [35, 21.0]
331 | wife's daughter is granddaughter.: [2, 0.0]
332 | wife's daughter is step-daughter.: [4, 0.0]
333 | wife's daughter is stepdaughter.: [2, 0.0]
334 | wife's father is father-in-law.: [9, 9.0]
335 | wife's father is father.: [3, 0.0]
336 | wife's granddaughter is daughter.: [1, 0.0]
337 | wife's granddaughter is granddaughter.: [20, 16.0]
338 | wife's grandson is grandson.: [18, 11.0]
339 | wife's mother is mother-in-law.: [13, 13.0]
340 | wife's mother is mother.: [1, 0.0]
341 | wife's sister is sister-in-law.: [8, 6.0]
342 | wife's son is brother.: [8, 0.0]
343 | wife's son is son.: [30, 25.0]
344 | wife's son is step-son.: [4, 2.0]
345 | wife's son is stepson.: [4, 0.0]
346 |
--------------------------------------------------------------------------------