├── .github └── pull_request_template.md ├── GettingStarted.md ├── LICENSE ├── README.md ├── configs └── tasks │ ├── README.md │ ├── acp_gen_chat │ ├── _gen_yaml_2shot │ ├── act_reach.yaml │ ├── app.yaml │ ├── just.yaml │ ├── land.yaml │ ├── next_act.yaml │ ├── prog.yaml │ ├── reach.yaml │ └── val.yaml │ ├── boolq_cot_2shot │ ├── _boolq_cot_2shot_yaml │ ├── act_reach.yaml │ ├── app.yaml │ ├── just.yaml │ ├── land.yaml │ ├── prog.yaml │ ├── reach.yaml │ └── val.yaml │ └── mcq_cot_2shot │ ├── _mcq_cot_2shot_yaml │ ├── act_reach.yaml │ ├── app.yaml │ ├── just.yaml │ ├── land.yaml │ ├── prog.yaml │ ├── reach.yaml │ └── val.yaml ├── dataset ├── .DS_Store ├── app │ ├── dev.bool.json │ ├── dev.gen.json │ ├── dev.mcq.json │ ├── test.bool.json.gz │ ├── test.gen.json.gz │ └── test.mcq.json.gz ├── areach │ ├── dev.bool.json │ ├── dev.gen.json │ ├── dev.mcq.json │ ├── test.bool.json.gz │ ├── test.gen.json.gz │ └── test.mcq.json.gz ├── just │ ├── dev.bool.json │ ├── dev.gen.json │ ├── dev.mcq.json │ ├── test.bool.json.gz │ ├── test.gen.json.gz │ └── test.mcq.json.gz ├── land │ ├── dev.bool.json │ ├── dev.gen.json │ ├── dev.mcq.json │ ├── test.bool.json.gz │ ├── test.gen.json.gz │ └── test.mcq.json.gz ├── nexta │ ├── dev.gen.json │ └── test.gen.json.gz ├── prog │ ├── dev.bool.json │ ├── dev.gen.json │ ├── dev.mcq.json │ ├── test.bool.json.gz │ ├── test.gen.json.gz │ └── test.mcq.json.gz ├── reach │ ├── dev.bool.json │ ├── dev.gen.json │ ├── dev.mcq.json │ ├── test.bool.json.gz │ ├── test.gen.json.gz │ └── test.mcq.json.gz └── val │ ├── dev.bool.json │ ├── dev.gen.json │ ├── dev.mcq.json │ ├── test.bool.json.gz │ ├── test.gen.json.gz │ └── test.mcq.json.gz ├── requirements.txt └── src ├── evaluate_gen.py ├── evaluation_bool_mcq.py ├── evaluators ├── __init__.py ├── action_reachability.py ├── applicability.py ├── base.py ├── justification.py ├── landmarks.py ├── next_action.py ├── progression.py ├── reachability.py └── validation.py ├── grammar ├── acp_grammar.lark └── grammar_parser.py └── utils ├── pddl_utils.py ├── tarski_utils.py └── tarskilite.py /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Submission : [Model-name] 2 | 3 | > ![IMPORTANT] 4 | > 5 | > Please go over the checklist at the bottom after providing mandatory and optional information. 6 | 7 | 8 | > *Provide a general summary of your model* 9 | 10 | ## Mandatory information 11 | 12 | **Model ID**: 13 | > *Unique identifier for the submission* 14 | 15 | 16 | **Model Info** : 17 | 18 | > *provide information about model family, name, parameter, variant etc.* 19 | 20 | **Solution Description**: 21 | 22 | > **Describe the model and the training or prompting approach used.** 23 | 24 | **Authors and Affiliation**: 25 | 26 | 27 | > *Provide a list of authors and their affiliations.* 28 | 29 | **Any risks or ethical concerns associated with the model**: 30 | 31 | > *Mention any risks or ethical concerns associated with the model. Say N/A if not applicable.* 32 | 33 | ## Optional information 34 | 35 | 36 | **Execution Time**: 37 | 38 | **Total # input tokens:** 39 | 40 | **Total # output tokens:** 41 | 42 | **Evaluation cost:** 43 | 44 | **Link to the model:** 45 | 46 | **Link to the training data:** 47 | 48 | **Link to the article:** 49 | 50 | **Temperature:** 51 | 52 | ## Checklist: 53 | > *Go over all the following points, and put an `x` in all the boxes that apply.* 54 | > *If you're unsure about any of these, put a `?`* 55 | 56 | 57 | - [ ] I declare that the answers were not generated manually. 58 | - [ ] There was no form of human intervention involved in generating the answers. (If no, please explain in the comments section below.) 59 | - [ ] The submission adhers to the JSON format. 60 | - [ ] Is the model open sourced? 61 | - [ ] If yes, the `Link to the model` is provided. 62 | - [ ] The training data is open sourced. 63 | - [ ] If yes, the `Link to the training data` is provided. 64 | - [ ] If not checked, an explanation is provided in the comments below. 65 | - [ ] There is a related article or paper. 66 | - [ ] If yes, the `Link to the article` is provided. 67 | - [ ] If not checked, an explanation is provided in the comments below. 68 | - [ ] JSON contains the *verbatim* response from the model. 69 | - [ ] I have provided description of risks and ethical concerns above to the best of my abilities. 70 | 71 | 72 | 73 | ## Comments 74 | 75 | > *Any additional details that you may find relevant.* 76 | 77 | 78 | Submitted By : 79 | 80 | *Name* 81 | *Affiliation* 82 | *Email Address* 83 | 84 | -------------------------------------------------------------------------------- /GettingStarted.md: -------------------------------------------------------------------------------- 1 | 2 | # ACPBench 3 | 4 |

5 | 🏠 Homepage • 6 | 📄 Paper • 7 | 🤗 Dataset 8 |

9 |

10 | 📖 README • 11 | ▶️ Recording • 12 | 📜 Citation • 13 | 🙏 Acknowledgement 14 |

15 | 16 | 17 | # 🔥 Getting Started 18 | 19 | 20 | > [!TIP] 21 | > 22 | > [ACPBench](https://ibm.github.io/ACPBench) ❤️ [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness) ❤️ [hugging-face](https://huggingface.co/datasets/ibm/ACPBench)! 23 | > 24 | > ACPBench is integrated with lm-evaluation-harness and hugging-face to facilitate quick evaluation of existing pretrained models as well as custom finetuned models. 25 | 26 | 27 | 28 | > [!IMPORTANT] 29 | > 30 | > [ACPBench Hard](https://openreview.net/forum?id=cfsVixNuJw) dataset is now available in this repo. Scroll down to see how to get started. 31 | 32 | 33 | ## ACPBench 34 | 35 | We release dev and test sets for each task in this repo. The dev set contains 40 examples with answers that can be used for validation, development purposes. Refer to the [development guide](#development-guide) below to see how to quickly estimate the performance of your model on dev or test set. 36 | 37 | ### Development Guide 38 | 39 | You can either use your model with lm-eval-harness or custom implementation to generate outputs. We provide lm-eval-harness config files for evaluation. For custom implementation, you can either use ['exact_match' metric](https://huggingface.co/spaces/evaluate-metric/exact_match) from hugging face, or produce json file consistent with lm-eval-harness and use the provided [evaluation_script.py](./evaluation_script.py). 40 | 41 | 42 | **Using LM-eval-harness** 43 | 44 | 45 | To evaluate your model on ACPBench test set using LM-eval-harness, use the following command. 46 | 47 | 48 | ``` 49 | lm_eval --model \ 50 | --model_args \ 51 | --tasks acp_bench \ 52 | --output \ 53 | --log_samples 54 | ``` 55 | 56 | > [!IMPORTANT] 57 | > 58 | > To evaluate your model on ACPBench test set using LM-eval-harness, update the `test_split` in the yaml file to `test`. 59 | 60 | **Custom** 61 | 62 | To use [evaluation_script.py](./evaluation_script.py) to obtain the score, dump the generated outputs for each example in the lm-eval format shown below. Here, `doc` is the original example, `resp` is the generated response (showing 5 samples here) from the model and `filtered_resps` is the answer to the question (obtained by processing the `resp`). 63 | 64 | ```json 65 | [ { 66 | "doc_id": 0, 67 | "doc": { 68 | "id": -8342636639526456067, 69 | "group": "applicable_actions_bool", 70 | "context": "This is a ferry domain, ...", 71 | "question": "Is the following action applicable in this state: travel by sea from location l1 to location l0?", 72 | "answer": "yes" 73 | }, 74 | "resp": [["... Therefore, the answer is Yes", 75 | "... the answer is Yes", 76 | "Yes", 77 | "The answer is yes", 78 | "the action is applicable"]], 79 | "filtered_resps": [ 80 | [ 81 | "Yes", 82 | "Yes", 83 | "Yes", 84 | "Yes", 85 | "Yes" 86 | ] 87 | ], 88 | }, 89 | ... 90 | ] 91 | ``` 92 | 93 | Once the json file is created for a task, you can use the the following command to print the score 94 | 95 | ```bash 96 | python evaluation_bool_mcq.py --results --gt 97 | ``` 98 | 99 | 100 | ## ACPBench Hard 101 | 102 | We release dev and test sets for 8 tasks in ACPBench-Hard in this repo. The dataset is not available on hugging face just yet. To evaluate a model on ACPBench Hard, use the LM-eval-harness and custom evaluation script as shown below. 103 | 104 | 105 | ### Generate responses from LLM 106 | 107 | ```bash 108 | lm_eval --model \ 109 | --model_args \ 110 | --tasks acp_benchacp_bench_hard \ 111 | --output \ 112 | --log_samples \ 113 | --include_path ./configs/tasks 114 | ``` 115 | 116 | ### Evaluate the output 117 | 118 | > :exclamation: Install the python [requirements](./requirements.txt) in your environment. 119 | 120 | ```bash 121 | python ./src/evaluate_gen.py 122 | ``` -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Community Data License Agreement - Permissive - Version 2.0 2 | 3 | This is the Community Data License Agreement - Permissive, Version 2.0 (the "agreement"). Data Provider(s) and Data Recipient(s) agree as follows: 4 | 5 | 1. Provision of the Data 6 | 7 | 1.1. A Data Recipient may use, modify, and share the Data made available by Data Provider(s) under this agreement if that Data Recipient follows the terms of this agreement. 8 | 9 | 1.2. This agreement does not impose any restriction on a Data Recipient's use, modification, or sharing of any portions of the Data that are in the public domain or that may be used, modified, or shared under any other legal exception or limitation. 10 | 11 | 2. Conditions for Sharing Data 12 | 13 | 2.1. A Data Recipient may share Data, with or without modifications, so long as the Data Recipient makes available the text of this agreement with the shared Data. 14 | 15 | 3. No Restrictions on Results 16 | 17 | 3.1. This agreement does not impose any restriction or obligations with respect to the use, modification, or sharing of Results. 18 | 19 | 4. No Warranty; Limitation of Liability 20 | 21 | 4.1. All Data Recipients receive the Data subject to the following terms: 22 | 23 | THE DATA IS PROVIDED ON AN "AS IS" BASIS, WITHOUT REPRESENTATIONS, WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 24 | 25 | NO DATA PROVIDER SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE DATA OR RESULTS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 26 | 27 | 5. Definitions 28 | 29 | 5.1. "Data" means the material received by a Data Recipient under this agreement. 30 | 31 | 5.2. "Data Provider" means any person who is the source of Data provided under this agreement and in reliance on a Data Recipient's agreement to its terms. 32 | 33 | 5.3. "Data Recipient" means any person who receives Data directly or indirectly from a Data Provider and agrees to the terms of this agreement. 34 | 35 | 5.4. "Results" means any outcome obtained by computational analysis of Data, including for example machine learning models and models' insights. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ACPBench 2 | 3 |

4 | 🏠 Homepage • 5 | 📄 Paper • 6 | 🤗 Dataset 7 |

8 |

9 | 🔥 Getting Started • 10 | ▶️ Recording • 11 | 📜 Citation • 12 | 🙏 Acknowledgement 13 |

14 | 15 | 16 | > [!IMPORTANT] 17 | > 18 | > [ACPBench Hard](https://openreview.net/forum?id=cfsVixNuJw) dataset is now available in this repo. 19 | 20 | 21 | 22 | ACPBench is a dataset to evaluate reasoning ability of LLMs about Action, Change, and Planning. It consists of 7 tasks. 23 | 24 | **1. Applicability (app)**, checks which actions are applicable in a state. 25 | 26 |
Examples 27 | 28 | 29 | #### Multiple choice questions (MCQ) 30 | Example: 31 | ``` json 32 | { 33 | "id": -6575941946410689765, 34 | "group": "applicable_actions_mc", 35 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 10 cars, numbered consecutively. Currently, the ferry is at l1, with the car c0 on board. The cars are at locations as follows: c4, c7, and c9 are at l1; c6, c3, c1, c5, c2, and c8 are at l0.", 36 | "question": "Which of the following actions will be applicable in this state? A. unload the car c7 from the ferry to location l0. B. sail from location l1 to location l0. C. load the car c1 at location l0 on to the ferry. D. load the car c2 at location l0 on to the ferry.", 37 | "choices": { 38 | "text": [ 39 | "unload the car c7 from the ferry to location l0", 40 | "sail from location l1 to location l0", 41 | "load the car c1 at location l0 on to the ferry", 42 | "load the car c2 at location l0 on to the ferry" 43 | ], 44 | "label": [ 45 | "A", 46 | "B", 47 | "C", 48 | "D" 49 | ] 50 | }, 51 | "query": "Which action will be applicable in this state?" 52 | }, 53 | ``` 54 | 55 | #### Yes-no/binary questions (Bool) 56 | Example: 57 | ``` json 58 | { 59 | "id": -8342636639526456067, 60 | "group": "applicable_actions_bool", 61 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 20 cars, numbered consecutively. Currently, the ferry is at l1 location and it is empty. The cars are at locations as follows: c7, c11, c2, c16, c14, c19, c5, c4, c12, c17, and c1 are at l1; c13, c8, c6, c18, c0, c3, c9, c10, and c15 are at l0.", 62 | "question": "Is the following action applicable in this state: travel by sea from location l1 to location l0?" 63 | }, 64 | ``` 65 |
66 | 67 | **2. Progression (prog)**, checks what would happens once an action is applied. 68 | 69 | 70 |
71 | Examples 72 | 73 | #### Multiple choice questions (MCQ) 74 | Example: 75 | ``` json 76 | { 77 | "id": -6721318970102316394, 78 | "group": "progression_mcq", 79 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 10 cars, numbered consecutively. Currently, the ferry is at l1, with the car c2 on board. The cars are at locations as follows: c0, c3, c6, c1, c8, and c9 are at l0; c7, c5, and c4 are at l1.", 80 | "question": "Which of the following facts hold after performing the action \"sail from location l1 to location l0\" in the current state? A. The ferry is at l0 location and The ferry is at l1 location. B. The ferry is at l1 location and The ferry is empty. C. The ferry is empty. D. The ferry is at l0 location.", 81 | "choices": { 82 | "text": [ 83 | "The ferry is at l0 location and The ferry is at l1 location", 84 | "The ferry is at l1 location and The ferry is empty", 85 | "The ferry is empty", 86 | "The ferry is at l0 location" 87 | ], 88 | "label": [ 89 | "A", 90 | "B", 91 | "C", 92 | "D" 93 | ] 94 | }, 95 | "query": "Which fact will hold after performing the action \"sail from location l1 to location l0\" in the current state?" 96 | }, 97 | ``` 98 | 99 | #### Yes-no/binary questions (Bool) 100 | Example: 101 | ``` json 102 | { 103 | "id": -8215166616105943671, 104 | "group": "progression_bool", 105 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 5 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c1, c0, c3, and c2 are at l0; c4 is at l1.", 106 | "question": "Will the fact \"Car c4 is on the ferry\" hold after performing the action \"sail from location l0 to location l1\" in the current state?" 107 | }, 108 | ``` 109 | 110 |
111 | 112 | **3. Atom Reachability (reach)**, checks which atoms are reachable from a state. 113 | 114 |
115 | Examples 116 | 117 | 118 | #### Multiple choice questions (MCQ) 119 | Example: 120 | ``` json 121 | { 122 | "id": 7931544803254567708, 123 | "group": "reachable_atom_mc", 124 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 10 cars, numbered consecutively. Currently, the ferry is at l0, with the car c3 on board. The cars are at locations as follows: c0, c1, c2, c6, c8, and c9 are at l0; c4, c7, and c5 are at l1.", 125 | "question": "Which of the following options can hold in a state that can potentially be reached? A. Ferry has car l1 on board. B. Car c8 is at location l0 and Car c8 is on board the ferry. C. The ferry is at c5 location and Car c5 is at location l1. D. The ferry is at l1 location and Car c3 is at location l1.", 126 | "choices": { 127 | "text": [ 128 | "Ferry has car l1 on board", 129 | "Car c8 is at location l0 and Car c8 is on board the ferry", 130 | "The ferry is at c5 location and Car c5 is at location l1", 131 | "The ferry is at l1 location and Car c3 is at location l1" 132 | ], 133 | "label": [ 134 | "A", 135 | "B", 136 | "C", 137 | "D" 138 | ] 139 | }, 140 | "query": "Which fact is reachable from this state?" 141 | }, 142 | ``` 143 | 144 | #### Yes-no/binary questions (Bool) 145 | 146 | 147 | Example: 148 | ``` json 149 | { 150 | "id": -2426698749034015429, 151 | "group": "reachable_atom_bool", 152 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 10 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c2, c7, and c5 are at l1; c3, c4, c6, c9, c1, c0, and c8 are at l0.", 153 | "question": "Is it possible to transition to a state where the following holds: Car c2 is at location c0?" 154 | }, 155 | ``` 156 | 157 |
158 | 159 | **4. Validation (val)**, checks whether a sequence of actions is applicable and achieves the goal 160 | 161 |
162 | Examples 163 | 164 | 165 | #### Multiple choice questions (MCQ) 166 | Example: 167 | ``` json 168 | { 169 | "id": -2425816914857415723, 170 | "group": "validation_mcq", 171 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c1 and c0 are at l0. The goal is to reach a state where the following facts hold: Car c0 is at location l1 and Car c1 is at location l1.", 172 | "question": "Which of the following claims is true with regard to the following sequence of actions \"board the car c1 at location l0 on to the ferry, debark car c1 to location l0 from the ferry, board the car c0 at location l0 on to the ferry, travel by sea from location l0 to location l1, debark car c0 to location l1 from the ferry, board the car c0 at location l1 on to the ferry, debark car c0 to location l1 from the ferry, travel by sea from location l1 to location l0, board the car c1 at location l0 on to the ferry, debark car c1 to location l0 from the ferry, board the car c1 at location l0 on to the ferry, travel by sea from location l0 to location l1, debark car c1 to location l1 from the ferry, board the car c0 at location l1 on to the ferry, debark car c0 to location l1 from the ferry\" A. The sequence is not valid. B. The sequence is not applicable. C. The sequence is applicable, but does not achieve the goal. D. The sequence is a plan.", 173 | "choices": { 174 | "text": [ 175 | "The sequence is not valid", 176 | "The sequence is not applicable", 177 | "The sequence is applicable, but does not achieve the goal", 178 | "The sequence is a plan" 179 | ], 180 | "label": [ 181 | "A", 182 | "B", 183 | "C", 184 | "D" 185 | ] 186 | }, 187 | "query": "Is the following sequence of actions applicable in the current state: \"board the car c1 at location l0 on to the ferry debark car c1 to location l0 from the ferry board the car c0 at location l0 on to the ferry travel by sea from location l0 to location l1 debark car c0 to location l1 from the ferry board the car c0 at location l1 on to the ferry debark car c0 to location l1 from the ferry travel by sea from location l1 to location l0 board the car c1 at location l0 on to the ferry debark car c1 to location l0 from the ferry board the car c1 at location l0 on to the ferry travel by sea from location l0 to location l1 debark car c1 to location l1 from the ferry board the car c0 at location l1 on to the ferry debark car c0 to location l1 from the ferry\" and does it achieve the goal?" 188 | }, 189 | ``` 190 | 191 | #### Yes-no/binary questions (Bool) 192 | Example: 193 | ``` json 194 | { 195 | "id": -2339048290501167365, 196 | "group": "validation_bool", 197 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c0 and c1 are at l0. The goal is to reach a state where the following facts hold: Car c0 is at location l1 and Car c1 is at location l1.", 198 | "question": "Is the following sequence of actions \"board car c0 at location l0, debark car c0 to location l0 from the ferry, travel by sea from location l0 to location l1, travel by sea from location l1 to location l0, board car c1 at location l0, travel by sea from location l0 to location l1, debark car c1 to location l1 from the ferry, board car c1 at location l1, debark car c1 to location l1 from the ferry, travel by sea from location l1 to location l0, board car c0 at location l0, debark car c0 to location l0 from the ferry, board car c0 at location l0, travel by sea from location l0 to location l1, debark car c0 to location l1 from the ferry\" valid in this problem?" 199 | }, 200 | ``` 201 | 202 |
203 | 204 | **5. Action Reachability (areach)**, checks whether there is a reachable state where the action is applicable. 205 | 206 | 207 |
208 | Examples 209 | 210 | 211 | #### Multiple choice questions (MCQ) 212 | Example: 213 | ``` json 214 | { 215 | "id": 6622905800496884581, 216 | "group": "reachable_action_mc", 217 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 10 cars, numbered consecutively. Currently, the ferry is at l1, with the car c3 on board. The cars are at locations as follows: c9, c2, c6, c8, c0, and c1 are at l0; c7, c4, and c5 are at l1.", 218 | "question": "Which of the following actions can eventually be applied? A. sail from location c2 to location l1. B. unload the car c7 from the ferry to location l0. C. unload the car c3 from the ferry to location c7. D. unload the car c8 from the ferry to location c3.", 219 | "choices": { 220 | "text": [ 221 | "sail from location c2 to location l1", 222 | "unload the car c7 from the ferry to location l0", 223 | "unload the car c3 from the ferry to location c7", 224 | "unload the car c8 from the ferry to location c3" 225 | ], 226 | "label": [ 227 | "A", 228 | "B", 229 | "C", 230 | "D" 231 | ] 232 | }, 233 | "query": "Which action is reachable from this state?" 234 | }, 235 | ``` 236 | 237 | #### Yes-no/binary questions (Bool) 238 | Example: 239 | ``` json 240 | { 241 | "id": -1990152005808638716, 242 | "group": "reachable_action_bool", 243 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 20 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c12, c19, c4, c11, c5, c7, c16, and c1 are at l1; c15, c18, c14, c0, c8, c3, c2, c9, c6, c10, c13, and c17 are at l0.", 244 | "question": "Is it possible to transition to a state where the action \"board the car c19 at location l1\" can be applied?" 245 | }, 246 | ``` 247 |
248 | 249 | 250 | **6. Action Justification (just)**, checks whether the action is needed on the plan. 251 | 252 | 253 |
254 | Examples 255 | 256 | 257 | #### Multiple choice questions (MCQ) 258 | Example: 259 | ``` json 260 | { 261 | "id": 3903123391386162053, 262 | "group": "action_justification_mcq", 263 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c1 and c0 are at l0. The goal is to reach a state where the following facts hold: Car c0 is at location l1 and Car c1 is at location l1.", 264 | "question": "Given the plan: \"board the car c0 at the location l0, travel by sea from location l0 to location l1, unload the car c0 from the ferry to location l1, travel by sea from location l1 to location l0, board the car c1 at the location l0, travel by sea from location l0 to location l1, unload the car c1 from the ferry to location l1, board the car c0 at the location l1, unload the car c0 from the ferry to location l1\"; which of the following pairs of consecutive actions can be removed from this plan and still have a valid plan? A. board the car c0 at the location l0 and travel by sea from location l0 to location l1. B. unload the car c1 from the ferry to location l1 and board the car c0 at the location l1. C. travel by sea from location l0 to location l1 and unload the car c1 from the ferry to location l1. D. board the car c0 at the location l1 and unload the car c0 from the ferry to location l1.", 265 | "choices": { 266 | "text": [ 267 | "board the car c0 at the location l0 and travel by sea from location l0 to location l1", 268 | "unload the car c1 from the ferry to location l1 and board the car c0 at the location l1", 269 | "travel by sea from location l0 to location l1 and unload the car c1 from the ferry to location l1", 270 | "board the car c0 at the location l1 and unload the car c0 from the ferry to location l1" 271 | ], 272 | "label": [ 273 | "A", 274 | "B", 275 | "C", 276 | "D" 277 | ] 278 | }, 279 | "query": "Given the plan: \"board the car c0 at the location l0, travel by sea from location l0 to location l1, unload the car c0 from the ferry to location l1, travel by sea from location l1 to location l0, board the car c1 at the location l0, travel by sea from location l0 to location l1, unload the car c1 from the ferry to location l1, board the car c0 at the location l1, unload the car c0 from the ferry to location l1\"; which pair of consecutive actions can be removed from this plan?" 280 | }, 281 | ``` 282 | 283 | #### Yes-no/binary questions (Bool) 284 | 285 | 286 | Example: 287 | ``` json 288 | { 289 | "id": -3115201149135125328, 290 | "group": "action_justification_bool", 291 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 3 locations and 2 cars, numbered consecutively. Currently, the ferry is at l1 location and it is empty. The cars are at locations as follows: c1 and c0 are at l1. The goal is to reach a state where the following facts hold: Car c0 is at location l0 and Car c1 is at location l2.", 292 | "question": "Given the plan: \"load the car c1 at location l1 on to the ferry, unload the car c1 from the ferry to location l1, load the car c1 at location l1 on to the ferry, sail from location l1 to location l2, unload the car c1 from the ferry to location l2, load the car c1 at location l2 on to the ferry, unload the car c1 from the ferry to location l2, sail from location l2 to location l1, load the car c0 at location l1 on to the ferry, sail from location l1 to location l0, unload the car c0 from the ferry to location l0\"; can the following action be removed from this plan and still have a valid plan: load the car c1 at location l1 on to the ferry?" 293 | }, 294 | ``` 295 | 296 |
297 | 298 | **7. Landmarks (land)**, checks whether a fact must become true sometime along every plan. 299 | 300 | 301 | 302 | 303 | 304 |
305 | Examples 306 | 307 | 308 | 309 | #### Multiple choice questions (MCQ) 310 | Example: 311 | ``` json 312 | { 313 | "id": -981962208469164703, 314 | "group": "landmarks_mcq", 315 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 20 cars, numbered consecutively. Currently, the ferry is at l0, with the car c1 on board. The cars are at locations as follows: c7, c19, c4, c12, c17, and c5 are at l1; c11, c15, c0, c13, c18, c6, c8, c2, c10, c16, c9, c3, and c14 are at l0. The goal is to reach a state where the following facts hold: Car c7 is at location l1, Car c15 is at location l0, Car c0 is at location l0, Car c1 is at location l1, Car c13 is at location l0, Car c14 is at location l1, Car c19 is at location l1, Car c18 is at location l1, Car c4 is at location l1, Car c10 is at location l0, Car c2 is at location l1, Car c8 is at location l1, Car c12 is at location l1, Car c9 is at location l0, Car c17 is at location l1, Car c16 is at location l1, Car c6 is at location l1, Car c11 is at location l1, Car c5 is at location l1, and Car c3 is at location l0.", 316 | "question": "Which of the following facts is a landmark (must hold at some point along any plan) for the current state? A. Car c6 is on board the ferry. B. Car c9 is at location l1. C. Car c13 is on the ferry. D. Ferry has car c15 on board.", 317 | "choices": { 318 | "text": [ 319 | "Car c6 is on board the ferry", 320 | "Car c9 is at location l1", 321 | "Car c13 is on the ferry", 322 | "Ferry has car c15 on board" 323 | ], 324 | "label": [ 325 | "A", 326 | "B", 327 | "C", 328 | "D" 329 | ] 330 | }, 331 | "query": "Which fact must hold at some point on any way to the goal from the current state?" 332 | }, 333 | ``` 334 | 335 | #### Yes-no/binary questions (Bool) 336 | Example: 337 | ``` json 338 | { 339 | "id": 1263458375528833442, 340 | "group": "landmarks_bool", 341 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 20 cars, numbered consecutively. Currently, the ferry is at l1 location and it is empty. The cars are at locations as follows: c14, c8, c3, c2, c10, c0, c6, c13, c11, c16, c9, c15, c18, and c17 are at l0; c7, c12, c19, c1, c4, and c5 are at l1. The goal is to reach a state where the following facts hold: Car c11 is at location l1, Car c8 is at location l1, Car c3 is at location l0, Car c7 is at location l1, Car c10 is at location l0, Car c0 is at location l0, Car c12 is at location l1, Car c19 is at location l1, Car c13 is at location l0, Car c17 is at location l1, Car c1 is at location l1, Car c9 is at location l0, Car c15 is at location l0, Car c14 is at location l1, Car c2 is at location l1, Car c4 is at location l1, Car c16 is at location l1, Car c6 is at location l1, Car c18 is at location l1, and Car c5 is at location l1.", 342 | "question": "Is the following fact a landmark (must hold at some point along any plan) for the current state? Car c12 is at location l0" 343 | }, 344 | ``` 345 | 346 | 347 | 348 |
349 | 350 | > [!IMPORTANT] 351 | > 352 | > Checkout our [blog](https://ibm.github.io/ACPBench/blog.html) to get more insight on each of these tasks, and why we need yet another question-answering dataset. 353 | 354 | 355 | > [!WARNING] 356 | > 357 | > ACP Bench is an evolving dataset collection. We may add different tasks and domains to this collection in time. 358 | 359 | 360 | ## 📜 Citation 361 | ``` 362 | @inproceedings{kokel2025acp 363 | author = {Harsha Kokel and 364 | Michael Katz and 365 | Kavitha Srinivas and 366 | Shirin Sohrabi}, 367 | title = {ACPBench: Reasoning about Action, Change, and Planning}, 368 | booktitle = {{AAAI}}, 369 | publisher = {{AAAI} Press}, 370 | year = {2025} 371 | } 372 | ``` 373 | 374 | ## 🙏 Acknowledgement 375 | 376 | Authors acknowledge help from Maxwell Crouse, Asim Munawar, Ramón Fernandez Astudillo, and Ibrahim Abdelaziz at IBM Research for their help in setting up the code and finetuning. 377 | -------------------------------------------------------------------------------- /configs/tasks/README.md: -------------------------------------------------------------------------------- 1 | # ACPBench 2 | 3 | ### Paper 4 | 5 | Title: ACPBench: Reasoning About Action, Change, and Planning 6 | Abstract: https://arxiv.org/pdf/2410.05669 7 | 8 | There is an increasing body of work using Large Language Models (LLMs) as agents for orchestrating workflows and making decisions in domains that require planning and multi-step reasoning. As a result, it is imperative to evaluate LMs on core skills required for planning. ACPBench is a benchmark for evaluating the reasoning tasks in the field of planning. The benchmark consists of 7 reasoning tasks over 13 planning domains. The collection is constructed from planning domains described in a formal language. This allows the synthesized problems to have provably correct solutions across many tasks and domains. Further, it allows the luxury to scale without additional human effort, i.e., many additional problems can be created automatically. 9 | 10 | Homepage: https://ibm.github.io/ACPBench/ 11 | 12 | 13 | ### Citation 14 | 15 | ``` 16 | @inproceedings{kokel2025acp 17 | author = {Harsha Kokel and 18 | Michael Katz and 19 | Kavitha Srinivas and 20 | Shirin Sohrabi}, 21 | title = {ACPBench: Reasoning about Action, Change, and Planning}, 22 | booktitle = {{AAAI}}, 23 | publisher = {{AAAI} Press}, 24 | year = {2025} 25 | } 26 | ``` 27 | 28 | ### Groups, Tags, and Tasks 29 | 30 | #### Groups 31 | 32 | * None 33 | 34 | #### Tags 35 | 36 | * `acp_bench` : Evaluates `acp_bool_cot_2shot` and `acp_mcq_cot_2shot` 37 | * `acp_bool_cot_2shot` : Evaluates `acp_areach_bool`, `acp_app_bool`, `acp_just_bool`, `acp_land_bool`, `acp_prog_bool`, `acp_reach_bool`, `acp_val_bool` with chain-of-thought and 2 shots 38 | * `acp_mcq_cot_2shot` : Evaluates `acp_areach_mcq`, `acp_app_mcq`, `acp_just_mcq`, `acp_land_mcq`, `acp_prog_mcq`, `acp_reach_mcq`, `acp_val_mcq` with chain-of-thought and 2 shots 39 | 40 | #### Tasks 41 | 42 | 7 Boolean tasks 43 | * `acp_areach_bool` 44 | * `acp_app_bool` 45 | * `acp_just_bool` 46 | * `acp_land_bool` 47 | * `acp_prog_bool` 48 | * `acp_reach_bool` 49 | * `acp_val_bool` 50 | 51 | 7 MCQ tasks 52 | * `acp_areach_mcq` 53 | * `acp_app_mcq` 54 | * `acp_just_mcq` 55 | * `acp_land_mcq` 56 | * `acp_prog_mcq` 57 | * `acp_reach_mcq` 58 | * `acp_val_mcq` 59 | -------------------------------------------------------------------------------- /configs/tasks/acp_gen_chat/_gen_yaml_2shot: -------------------------------------------------------------------------------- 1 | tag: 2 | - acp_gen_2shot 3 | - acp_bench_hard 4 | dataset_path: json 5 | test_split: test 6 | dataset_name: null 7 | doc_to_target: "{{answer}}" 8 | output_type: generate_until 9 | num_fewshot: 2 10 | generation_kwargs: 11 | until: 12 | - "\n\n\n\n" 13 | - "**Question**:" 14 | - "**Question:**" 15 | - "Q:" 16 | do_sample: false 17 | max_gen_toks: 1000 18 | temperature: 0.0 19 | metric_list: 20 | - metric: "bypass" 21 | metadata: 22 | version: 1.0 23 | -------------------------------------------------------------------------------- /configs/tasks/acp_gen_chat/act_reach.yaml: -------------------------------------------------------------------------------- 1 | task: acp_areach_gen 2 | dataset_path: json 3 | dataset_name: acp_areach_gen 4 | dataset_kwargs: 5 | data_files: 6 | test: ./dataset/areach/test.gen.json.gz 7 | test_split: test 8 | include: _gen_yaml_2shot 9 | fewshot_config: 10 | sampler: first_n 11 | samples: 12 | - context: "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 0 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0. Currently, the robot is at position f2-2f and its arm is empty. All the positions are open except the following: f2-0f has shape0 shaped lock, f4-2f has shape0 shaped lock. Key key0-0 is at position f1-2f. Key key0-1 is at position f1-3f. The available actions are: (unlock ?curpos ?lockpos ?key ?shape) - unlock the place ?lockpos with key ?key of shape ?shape from the current position place ?curpos, (move ?curpos ?nextpos) - move from place ?curpos to place ?nextpos, (pickup ?curpos ?key) - retrieve the key ?key from its current position ?curpos, (pickup-and-loose ?curpos ?newkey ?oldkey) - pick up the key ?newkey from the current position ?curpos and loose the key ?oldkey which is being held, and (putdown ?curpos ?key) - put the key ?key at the current position place ?curpos." 13 | question: "What action can never become applicable, in any state reachable from the current state?" 14 | answer: "(unlock f0-3f f0-4f key0-0 shape0)" 15 | - context: "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-0 and l0-1 are in c0. Currently, t1 is at l1-1, a0 is at l1-0, p0 is at l0-0, t0 is at l0-1, p2 is in a0, p1 is in t1, p3 is in t0. The available actions are: (load-truck ?obj ?truck ?loc) - load the object ?obj from location ?loc into the truck ?truck, (load-airplane ?obj ?airplane ?loc) - load object ?obj into airplane ?airplane at location ?loc, (unload-truck ?obj ?truck ?loc) - offload the object ?obj from the truck ?truck at location ?loc, (unload-airplane ?obj ?airplane ?loc) - offload the object ?obj from the airplane ?airplane at location ?loc, (drive-truck ?truck ?loc-from ?loc-to ?city) - navigate the truck ?truck from location ?loc-from in city ?city to location ?loc-to in the same city, and (fly-airplane ?airplane ?loc-from ?loc-to) - operate the airplane ?airplane from airport ?loc-from to airport ?loc-to." 16 | question: "What action can never become applicable, in any state reachable from the current state?" 17 | answer: "(drive-truck t0 l1-1 l0-0 c0)" 18 | doc_to_text: "{{context}} {{question}} Each action starts with an opening parenthesis and ends with closing parenthesis. Provide one action or None." 19 | -------------------------------------------------------------------------------- /configs/tasks/acp_gen_chat/app.yaml: -------------------------------------------------------------------------------- 1 | task: acp_app_gen 2 | dataset_path: json 3 | dataset_name: acp_app_gen 4 | dataset_kwargs: 5 | data_files: 6 | test: ./dataset/app/test.gen.json.gz 7 | test_split: test 8 | include: _gen_yaml_2shot 9 | fewshot_config: 10 | sampler: first_n 11 | samples: 12 | - context: "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-1 is of shape shape0, Key key0-0 is of shape shape0. Currently, the robot is at position f3-2f and its arm is empty. All the positions are open except the following: f2-0f has shape0 shaped lock, f4-2f has shape0 shaped lock. Key key0-0 is at position f2-2f. Key key0-1 is at position f1-3f. The available actions are: (unlock ?curpos ?lockpos ?key ?shape) - unlock the place ?lockpos with key ?key of shape ?shape from the current position place ?curpos, (move ?curpos ?nextpos) - travel from the current position ?curpos to the next position ?nextpos, (pickup ?curpos ?key) - pick up key ?key from place ?curpos, (pickup-and-loose ?curpos ?newkey ?oldkey) - pick up the key ?newkey at the current position place ?curpos and loose the key ?oldkey being held, and (putdown ?curpos ?key) - put down key ?key at current position place ?curpos." 13 | question: "Generate the list of all ground actions that are applicable in this state." 14 | answer: "[(move f3-2f f3-1f), (move f3-2f f2-2f), (move f3-2f f3-3f)]" 15 | - context: "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-0 and l0-1 are in c0; l1-0 and l1-1 are in c1. Currently, t1, p2, and p3 are at l1-0, a0 is at l0-0, t0 is at l0-1, p1 and p0 are in t1. The available actions are: (load-truck ?obj ?truck ?loc) - load object ?obj into truck ?truck at location ?loc, (load-airplane ?obj ?airplane ?loc) - load object ?obj into airplane ?airplane at location ?loc, (unload-truck ?obj ?truck ?loc) - unload object ?obj from truck ?truck at location ?loc, (unload-airplane ?obj ?airplane ?loc) - remove the object ?obj from the airplane ?airplane and place it on the location ?loc, (drive-truck ?truck ?loc-from ?loc-to ?city) - navigate the truck ?truck from its current location ?loc-from in city ?city to the new location ?loc-to within the same city, and (fly-airplane ?airplane ?loc-from ?loc-to) - fly airplane ?airplane from airport ?loc-from to airport ?loc-to." 16 | question: "Generate the list of all ground actions that are applicable in this state." 17 | answer: "[(drive-truck t1 l1-0 l1-0 c1), (drive-truck t0 l0-1 l0-0 c0), (load-truck p2 t1 l1-0), (unload-truck p0 t1 l1-0), (drive-truck t0 l0-1 l0-1 c0), (fly-airplane a0 l0-0 l1-0), (fly-airplane a0 l0-0 l0-0), (unload-truck p1 t1 l1-0), (drive-truck t1 l1-0 l1-1 c1), (load-truck p3 t1 l1-0)]" 18 | doc_to_text: "{{context}} {{question}} Each action starts with an opening parenthesis and ends with closing parenthesis. Provide only the actions." 19 | -------------------------------------------------------------------------------- /configs/tasks/acp_gen_chat/just.yaml: -------------------------------------------------------------------------------- 1 | task: acp_just_gen 2 | dataset_path: json 3 | dataset_name: acp_just_gen 4 | dataset_kwargs: 5 | data_files: 6 | test: ./dataset/just/test.gen.json.gz 7 | test_split: test 8 | include: _gen_yaml_2shot 9 | fewshot_config: 10 | sampler: first_n 11 | samples: 12 | - context: "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0. Currently, the robot is at position f3-3f and its arm is empty. All the positions are open except the following: f4-2f has shape0 shaped lock, f2-0f has shape0 shaped lock. Key key0-0 is at position f2-2f. Key key0-1 is at position f1-3f. The available actions are: (unlock ?curpos ?lockpos ?key ?shape) - unlock place ?lockpos with key ?key of shape ?shape from current position place ?curpos, (move ?curpos ?nextpos) - move from ?curpos to ?nextpos, (pickup ?curpos ?key) - retrieve the key ?key from its current position ?curpos, (pickup-and-loose ?curpos ?newkey ?oldkey) - pick up key ?newkey at current position place ?curpos and loose key ?oldkey being held, and (putdown ?curpos ?key) - put down the key ?key at the current position ?curpos. The goal is to reach a state where the following facts hold: Key key0-0 is at f2-0f location and Key key0-1 is at f1-3f location." 13 | question: "Simplify the plan [(move f3-3f f3-2f), (move f3-2f f2-2f), (pickup f2-2f key0-0), (move f2-2f f2-1f), (putdown f2-1f key0-0), (pickup f2-1f key0-0), (unlock f2-1f f2-0f key0-0 shape0), (move f2-1f f2-0f), (putdown f2-0f key0-0)] by removing either a single action or a pair of consecutive actions, while still maintaining a valid plan. Provide the resulting simplified plan." 14 | answer: "[(move f3-3f f3-2f), (move f3-2f f2-2f), (pickup f2-2f key0-0), (move f2-2f f2-1f), (unlock f2-1f f2-0f key0-0 shape0), (move f2-1f f2-0f), (putdown f2-0f key0-0)]" 15 | - context: "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-0 and l0-1 are in c0. Currently, p2, p1, and p3 are at l1-0, p0 and t1 are at l1-1, t0 is at l0-1, a0 is at l0-0. The available actions are: (load-truck ?obj ?truck ?loc) - load the object ?obj from location ?loc into the truck ?truck, (load-airplane ?obj ?airplane ?loc) - load the object ?obj from location ?loc onto the airplane ?airplane, (unload-truck ?obj ?truck ?loc) - unload the object ?obj from the truck ?truck at location ?loc, (unload-airplane ?obj ?airplane ?loc) - remove the object ?obj from the airplane ?airplane and place it on the location ?loc, (drive-truck ?truck ?loc-from ?loc-to ?city) - drive truck ?truck from location ?loc-from in city ?city to location ?loc-to in the same city, and (fly-airplane ?airplane ?loc-from ?loc-to) - fly the airplane ?airplane from location ?loc-from to location ?loc-to. The goal is to reach a state where the following facts hold: p3 is at l0-1, p2 is at l1-0, p0 is at l0-0, and p1 is at l1-0." 16 | question: "Simplify the plan [(fly-airplane a0 l0-0 l1-0), (fly-airplane a0 l1-0 l0-0), (load-truck p0 t1 l1-1), (drive-truck t1 l1-1 l1-0 c1), (unload-truck p0 t1 l1-0), (fly-airplane a0 l0-0 l1-0), (load-airplane p0 a0 l1-0), (load-airplane p3 a0 l1-0), (fly-airplane a0 l1-0 l0-0), (unload-airplane p0 a0 l0-0), (unload-airplane p3 a0 l0-0), (drive-truck t0 l0-1 l0-0 c0), (load-truck p3 t0 l0-0), (drive-truck t0 l0-0 l0-1 c0), (unload-truck p3 t0 l0-1)] by removing either a single action or a pair of consecutive actions, while still maintaining a valid plan. Provide the resulting simplified plan." 17 | answer: "[(load-truck p0 t1 l1-1), (drive-truck t1 l1-1 l1-0 c1), (unload-truck p0 t1 l1-0), (fly-airplane a0 l0-0 l1-0), (load-airplane p0 a0 l1-0), (load-airplane p3 a0 l1-0), (fly-airplane a0 l1-0 l0-0), (unload-airplane p0 a0 l0-0), (unload-airplane p3 a0 l0-0), (drive-truck t0 l0-1 l0-0 c0), (load-truck p3 t0 l0-0), (drive-truck t0 l0-0 l0-1 c0), (unload-truck p3 t0 l0-1)]" 18 | doc_to_text: "{{context}} {{question}}" 19 | -------------------------------------------------------------------------------- /configs/tasks/acp_gen_chat/land.yaml: -------------------------------------------------------------------------------- 1 | task: acp_land_gen 2 | dataset_path: json 3 | dataset_name: acp_land_gen 4 | dataset_kwargs: 5 | data_files: 6 | test: ./dataset/land/test.gen.json.gz 7 | test_split: test 8 | include: _gen_yaml_2shot 9 | fewshot_config: 10 | sampler: first_n 11 | samples: 12 | - context: "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-1 is of shape shape0, Key key0-0 is of shape shape0. Currently, the robot is at position f3-0f and its arm is empty. All the positions are open except the following: f4-2f has shape0 shaped lock. Key key0-0 is at position f3-0f. Key key0-1 is at position f1-3f. The goal is to reach a state where the following facts hold: Key key0-0 is at f2-0f location and Key key0-1 is at f1-3f location. The available propositions are: (at ?r ?x) - Key ?r is at ?x location, (at-robot ?x) - Robot is at ?x location, (locked ?x) - Location ?x is locked, (holding ?k) - Robot is holding ?k, (open ?x) - Location ?x is open, and (arm-empty) - Robot's arm is empty." 13 | question: "Generate a non-trivial fact landmark, one that does not hold in the initial state or goal." 14 | answer: "(holding key0-0)" 15 | - context: "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-0 and l1-1 are in c1; l0-1 and l0-0 are in c0. Currently, a0 and p2 are at l1-0, t0 is at l0-0, t1 is at l1-1, p3 and p1 are in a0, p0 is in t1. The goal is to reach a state where the following facts hold: p0 is at l0-0, p2 is at l1-0, p1 is at l1-0, and p3 is at l0-1. The available propositions are: (at ?obj ?loc) - ?obj is at ?loc and (in ?obj1 ?obj2) - ?obj1 is in ?obj2." 16 | question: "Generate a non-trivial fact landmark, one that does not hold in the initial state or goal." 17 | answer: "(in p3 t0)" 18 | doc_to_text: "{{context}} {{question}} Provide only the ground proposition or None." -------------------------------------------------------------------------------- /configs/tasks/acp_gen_chat/next_act.yaml: -------------------------------------------------------------------------------- 1 | task: acp_nexta_gen 2 | dataset_path: json 3 | dataset_name: acp_nexta_gen 4 | dataset_kwargs: 5 | data_files: 6 | test: ./dataset/nexta/test.gen.json.gz 7 | test_split: test 8 | include: _gen_yaml_2shot 9 | fewshot_config: 10 | sampler: first_n 11 | samples: 12 | - context: "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-1 is of shape shape0, Key key0-0 is of shape shape0. Currently, the robot is at position f4-0f and its arm is empty. All the positions are open except the following: f4-2f has shape0 shaped lock. Key key0-0 is at position f3-0f. Key key0-1 is at position f1-3f. The goal is to reach a state where the following facts hold: Key key0-0 is at f2-0f location and Key key0-1 is at f1-3f location. The available actions are: (unlock ?curpos ?lockpos ?key ?shape) - unlock place ?lockpos with key ?key of shape ?shape from current position place ?curpos, (move ?curpos ?nextpos) - travel from the current position ?curpos to the next position ?nextpos, (pickup ?curpos ?key) - pick up key ?key from place ?curpos, (pickup-and-loose ?curpos ?newkey ?oldkey) - pick up the key ?newkey at the current position place ?curpos and loose the key ?oldkey being held, and (putdown ?curpos ?key) - put down the key ?key at the current position ?curpos." 13 | question: "What is the next action that takes us towards the goal?" 14 | answer: "(move f4-0f f3-0f)" 15 | - context: "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-1 and l0-0 are in c0; l1-1 and l1-0 are in c1. Currently, t0 is at l0-1, a0 is at l0-0, t1 and p1 are at l1-0, p2, p0, and p3 are in t1. The goal is to reach a state where the following facts hold: p3 is at l0-1, p2 is at l1-0, p1 is at l1-0, and p0 is at l0-0. The available actions are: (load-truck ?obj ?truck ?loc) - load object ?obj into truck ?truck at location ?loc, (load-airplane ?obj ?airplane ?loc) - load the object ?obj from location ?loc onto the airplane ?airplane, (unload-truck ?obj ?truck ?loc) - unload the object ?obj from the truck ?truck at location ?loc, (unload-airplane ?obj ?airplane ?loc) - unload object ?obj from airplane ?airplane at location ?loc, (drive-truck ?truck ?loc-from ?loc-to ?city) - drive the truck ?truck in city ?city from location ?loc-from to location ?loc-to, and (fly-airplane ?airplane ?loc-from ?loc-to) - operate the airplane ?airplane from airport ?loc-from to airport ?loc-to." 16 | question: "What is the next action that takes us towards the goal?" 17 | answer: "(drive-truck t0 l0-1 l0-0 c0)" 18 | doc_to_text: "{{context}} {{question}} Each action starts with an opening parenthesis and ends with closing parenthesis. Provide only the action." 19 | -------------------------------------------------------------------------------- /configs/tasks/acp_gen_chat/prog.yaml: -------------------------------------------------------------------------------- 1 | task: acp_prog_gen 2 | dataset_path: json 3 | dataset_name: acp_prog_gen 4 | dataset_kwargs: 5 | data_files: 6 | test: ./dataset/prog/test.gen.json.gz 7 | test_split: test 8 | include: _gen_yaml_2shot 9 | fewshot_config: 10 | sampler: first_n 11 | samples: 12 | - context: "A robot is in a grid and can only move to places that are connected to its current position. \nThe grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. \nThere are 2 keys in 0 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0. \nCurrently, the robot is at position f0-1f and its arm is empty. All the positions are open except the following: f4-2f has shape0 shaped lock. Key key0-1 is at position f1-3f. Key key0-0 is at position f0-1f. The available propositions are: (at ?r ?x) - Key ?r is at ?x location, (at-robot ?x) - Robot is at ?x location, (locked ?x) - Location ?x is locked, (holding ?k) - Robot is holding ?k, (open ?x) - Location ?x is open, and (arm-empty) - Robot's arm is empty." 13 | question: "Break down the outcomes of performing the action \"retrieve the key key0-0 from its current position f0-1f\" into two lists, positive effects and negative effects. Positive effects are the propositions that are false in the current state but will become true after performing the action. Negative effects are the propositions that are true in the current state and will become false after performing the action." 14 | answer: "[(holding key0-0)] [(arm-empty), (at key0-0 f0-1f)]" 15 | - context: "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-1 and l0-0 are in c0. Currently, p2, t1, p1, p3, a0, and p0 are at l1-0, t0 is at l0-1. The available propositions are: (at ?obj ?loc) - ?obj is at ?loc and (in ?obj1 ?obj2) - ?obj1 is in ?obj2." 16 | question: "Break down the outcomes of performing the action \"load object p3 into truck t1 at location l1-0\" into two lists, positive effects and negative effects. Positive effects are the propositions that are false in the current state but will become true after performing the action. Negative effects are the propositions that are true in the current state and will become false after performing the action." 17 | answer: "[(in p3 t1)] [(at p3 l1-0)]" 18 | doc_to_text: "{{context}} {{question}} Provide only the two lists with the ground propositions." 19 | -------------------------------------------------------------------------------- /configs/tasks/acp_gen_chat/reach.yaml: -------------------------------------------------------------------------------- 1 | task: acp_reach_gen 2 | dataset_path: json 3 | dataset_name: acp_reach_gen 4 | dataset_kwargs: 5 | data_files: 6 | test: ./dataset/reach/test.gen.json.gz 7 | test_split: test 8 | include: _gen_yaml_2shot 9 | fewshot_config: 10 | sampler: first_n 11 | samples: 12 | - context: "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 0 different shapes: Key key0-1 is of shape shape0, Key key0-0 is of shape shape0. Currently, the robot is at position f1-2f and its arm is empty. All the positions are open except the following: f4-2f has shape0 shaped lock. Key key0-0 is at position f1-0f. Key key0-1 is at position f1-3f. The available propositions are: (at ?r ?x) - Key ?r is at ?x location, (at-robot ?x) - Robot is at ?x location, (locked ?x) - Location ?x is locked, (holding ?k) - Robot is holding ?k, (open ?x) - Location ?x is open, and (arm-empty) - Robot's arm is empty." 13 | question: "What proposition can never hold in any potentially reachable state?" 14 | answer: "(locked f3-1f)" 15 | - context: "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-0 and l0-1 are in c0; l1-0 and l1-1 are in c1. Currently, a0, p2, and t1 are at l1-0, p3 and p0 are at l0-0, t0 is at l0-1, p1 is in t1. The available propositions are: (at ?obj ?loc) - ?obj is at ?loc and (in ?obj1 ?obj2) - ?obj1 is in ?obj2." 16 | question: "What proposition can never hold in any potentially reachable state?" 17 | answer: "(at t0 l1-1)" 18 | doc_to_text: "{{context}} {{question}} Provide one proposition or None." 19 | -------------------------------------------------------------------------------- /configs/tasks/acp_gen_chat/val.yaml: -------------------------------------------------------------------------------- 1 | task: acp_val_gen 2 | dataset_path: json 3 | dataset_name: acp_val_gen 4 | dataset_kwargs: 5 | data_files: 6 | test: ./dataset/val/test.gen.json.gz 7 | test_split: test 8 | include: _gen_yaml_2shot 9 | fewshot_config: 10 | sampler: first_n 11 | samples: 12 | - context: "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0. Currently, the robot is at position f3-3f and its arm is empty. All the positions are open except the following: f2-0f has shape0 shaped lock, f4-2f has shape0 shaped lock. Key key0-1 is at position f1-3f. Key key0-0 is at position f2-2f. The goal is to reach a state where the following facts hold: Key key0-0 is at f2-0f location and Key key0-1 is at f1-3f location. The available actions are: (unlock ?curpos ?lockpos ?key ?shape) - unlock the place ?lockpos with the key ?key of the shape ?shape from the current position place ?curpos, (move ?curpos ?nextpos) - travel from the current position ?curpos to the next position ?nextpos, (pickup ?curpos ?key) - pick up key ?key from place ?curpos, (pickup-and-loose ?curpos ?newkey ?oldkey) - pick up the key ?newkey from the current position ?curpos and loose the key ?oldkey which is being held, and (putdown ?curpos ?key) - put down key ?key at current position place ?curpos." 13 | question: "What is the first inapplicable action in the next sequence of actions: [(move f3-3f f3-2f), (move f3-2f f2-2f), (pickup f2-2f key0-0), (pickup-and-loose f4-0f key0-0 key0-1), (unlock f2-1f f2-0f key0-0 shape0), (move f2-1f f2-0f), (putdown f2-0f key0-0), (move f2-0f f2-1f)]?" 14 | answer: "3" 15 | - context: "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-1 and l0-0 are in c0; l1-1 and l1-0 are in c1. Currently, t1 and p0 are at l1-1, t0 is at l0-1, p3, p2, and p1 are at l1-0, a0 is at l0-0. The goal is to reach a state where the following facts hold: p2 is at l1-0, p3 is at l0-1, p0 is at l0-0, and p1 is at l1-0. The available actions are: (load-truck ?obj ?truck ?loc) - load object ?obj into truck ?truck at location ?loc, (load-airplane ?obj ?airplane ?loc) - load the object ?obj from location ?loc onto the airplane ?airplane, (unload-truck ?obj ?truck ?loc) - unload the object ?obj from the truck ?truck at location ?loc, (unload-airplane ?obj ?airplane ?loc) - unload object ?obj from airplane ?airplane at location ?loc, (drive-truck ?truck ?loc-from ?loc-to ?city) - navigate the truck ?truck from its current location ?loc-from in city ?city to the new location ?loc-to within the same city, and (fly-airplane ?airplane ?loc-from ?loc-to) - fly the airplane ?airplane from location ?loc-from to location ?loc-to." 16 | question: "What is the first inapplicable action in the next sequence of actions: [(load-truck p0 t1 l1-1), (drive-truck t1 l1-1 l1-0 c1), (unload-truck p0 t1 l1-0), (fly-airplane a0 l0-0 l1-0), (unload-truck p3 t0 l0-1), (load-airplane p3 a0 l1-0), (fly-airplane a0 l1-0 l0-0), (unload-airplane p0 a0 l0-0), (unload-airplane p3 a0 l0-0), (drive-truck t0 l0-1 l0-0 c0), (load-truck p3 t0 l0-0), (drive-truck t0 l0-0 l0-1 c0), (unload-truck p3 t0 l0-1)]?" 17 | answer: "4" 18 | doc_to_text: "{{context}} {{question}} Provide only the index of the action." 19 | -------------------------------------------------------------------------------- /configs/tasks/boolq_cot_2shot/_boolq_cot_2shot_yaml: -------------------------------------------------------------------------------- 1 | tag: 2 | - acp_bool_cot_2shot 3 | - acp_bench 4 | output_type: generate_until 5 | dataset_path: ibm-research/acp_bench 6 | test_split: test 7 | num_fewshot: 2 8 | doc_to_target: "{{answer}}" 9 | doc_to_text: "**Question**: {{context}} {{question}} **Thoughts**:" 10 | filter_list: 11 | - name: "extract-yes-no" 12 | filter: 13 | - function: "regex" 14 | regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.)|(?<=..Final Answer..: )(.*)(?=.)|(?<=..answer..: )(.*)(?=.)|(?<=..Answer..: )(.*)(?=.)|\\b(Yes|No|yes|no)\\b)" 15 | - function: "take_first" 16 | generation_kwargs: 17 | until: 18 | - "**Question**:" 19 | - "**Question:**" 20 | - "Q:" 21 | - "\n\n\n\n" 22 | do_sample: false 23 | temperature: 0.0 24 | max_gen_toks: 1024 25 | metric_list: 26 | - metric: exact_match 27 | aggregation: mean 28 | higher_is_better: true 29 | ignore_case: true 30 | ignore_punctuation: true 31 | metadata: 32 | version: 1.0 33 | -------------------------------------------------------------------------------- /configs/tasks/boolq_cot_2shot/act_reach.yaml: -------------------------------------------------------------------------------- 1 | task: acp_areach_bool 2 | dataset_name: acp_areach_bool 3 | include: _boolq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 5 cars, numbered consecutively. Currently, the ferry is at l1, with the car c4 on board. The cars are at locations as follows: c0 and c3 are at l1; c1 and c2 are at l0.' 8 | question: 'Is it possible to transition to a state where the action "travel by sea from location l0 to location l1" can be applied?' 9 | answer: "Let's think step by step. Step 1: Verify if there is a sequence of actions which transforms the current state into a state where the precondition of the action \"travel by sea from location l0 to location l1\" hold. Step 2: The following sequence of actions would transition to such a state: sail from location l1 to location l0, unload the car c4 from the ferry to location l0, board car c1 at location l0. **Final Answer**: Yes." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 6 locations across 2 cities. The locations are in cities as follows: l0-0, l0-1, and l0-2 are in c0; l1-1, l1-2, and l1-0 are in c1. Currently, a0 is at l1-0, t1 is at l1-1, t0 is at l0-0, p2 and p1 are in t1, p0 and p3 are in a0.' 11 | question: 'Is it possible to transition to a state where the action "offload the object p0 from the truck p0 at location p1" can be applied?' 12 | answer: "Let's think step by step. Step 1: Verify if there is a sequence of actions which transforms the current state into a state where the precondition of the action \"offload the object p0 from the truck p0 at location p1\" hold. Step 2: Action preconditions are \"p0 is in p0 and p0 is at p1\". Step 3: These facts are not reachable together, as they include mutually exclusive facts \"p0 is in p0 and p0 is at p1\". **Final Answer**: No." 13 | -------------------------------------------------------------------------------- /configs/tasks/boolq_cot_2shot/app.yaml: -------------------------------------------------------------------------------- 1 | task: acp_app_bool 2 | dataset_name: acp_app_bool 3 | include: _boolq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0, with the car c1 on board. The cars are at locations as follows: c0 is at l0.' 8 | question: 'Is the following action applicable in this state: travel by sea from location l1 to location l0?' 9 | answer: "Step 1: In order to apply the action travel by sea from location l1 to location l0, the following fact(s) must hold in this state: The ferry is at l1 location Step 2: These facts do not hold in the mentioned state: The ferry is at l1 location. So, the action is not applicable. **Final Answer**: No." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 3 trucks and 1 airplane, as well as 7 packages. There are 9 locations across 3 cities. The locations are in cities as follows: l1-2, l1-0, and l1-1 are in c1; l2-0, l2-1, and l2-2 are in c2; l0-0, l0-2, and l0-1 are in c0. Currently, p0 and p1 are at l1-2, a0 and p2 are at l0-0, t1 is at l1-0, t0 and p4 are at l0-2, t2 is at l2-0, p6, p3, and p5 are in t0.' 11 | question: 'Is the following action applicable in this state: load the object p4 from location l0-2 into the truck t0?' 12 | answer: "Let's think step by step. Step 1: In order to apply the action \"load the object p4 from location l0-2 into the truck t0\", the following fact(s) must hold in this state: p4 is at l0-2 and t0 is at l0-2 Step 2: These facts hold in the mentioned state, so the action is applicable. **Final Answer**: Yes." 13 | -------------------------------------------------------------------------------- /configs/tasks/boolq_cot_2shot/just.yaml: -------------------------------------------------------------------------------- 1 | task: acp_just_bool 2 | dataset_name: acp_just_bool 3 | include: _boolq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c0 and c1 are at l0. The goal is to reach a state where the following facts hold: Car c1 is at location l1 and Car c0 is at location l1' 8 | question: 'Given the plan: \"board the car c1 at the location l0, sail from location l0 to location l1, unload the car c1 from the ferry to location l1, travel by sea from location l1 to location l0, board the car c0 at location l0, travel by sea from location l0 to location l1, unload the car c0 from the ferry to location l1, embark the car c1 at location l1 on to the ferry, debark car c1 to location l1 from the ferry\"; can the following pair of consecutive actions be removed from this plan and still have a valid plan: embark the car c1 at location l1 on to the ferry and debark the car c1 to location l1 from the ferry?' 9 | answer: "Let's think step by step. Step 1: A valid plan is a sequence of actions that is executable from the current state and achieves the goal. Step 2: Consecutive actions \"embark the car c1 at location l1 on to the ferry and debark the car c1 to location l1 from the ferry\" can be removed from the plan and still have a valid plan. **Final Answer**: Yes." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-0 and l1-1 are in c1; l0-1 and l0-0 are in 11 | c0. Currently, p3, p2, and p1 are at l1-0, p0 and t1 are at l1-1, a0 is at l0-0, t0 is at l0-1. The goal is to reach a state where the following facts hold: p0 is at l0-0, p2 is at l1-0, p3 is at l0-1, and p1 is at l1-0. Given the plan: \"load the object p0 from location l1-1 into the truck t1, navigate the truck t1 which is in location l1-1 in city c1 to another location l1-0 in the same city, offload the object p0 from the truck t1 at location l1-0, drive truck t1 from location l1-0 in city c1 to location l1-1 in the same city, fly airplane a0 from airport l0-0 to airport l1-0, load the object p0 from location l1-0 onto the airplane a0, load the object p3 from location l1-0 onto the airplane a0, fly the airplane a0 from airport l1-0 to airport l0-0, unload the object p0 from the airplane a0 at location l0-0, remove the object p3 from the airplane a0 and place it on the location l0-0, drive the truck t0 in city c0 from location l0-1 to location l0-0, place the object p3 into the truck t0 at location l0-0, navigate the truck t0 from location l0-0 in city c0 to location l0-1 in the same city, unload object p3 from truck t0 at location l0-1\"' 12 | question: 'can the following pair of consecutive actions be removed from this plan and still have a valid plan: place the object p0 onto the airplane a0 at location l1-0 and place the object p3 onto the airplane a0 at location l1-0?' 13 | answer: "Let's think step by step. Step 1: A valid plan is a sequence of actions that is exectuable from the current state and achieves the goal. Step 2: Consecutive actions \"place the object p0 onto the airplane a0 at location l1-0 and place the object p3 onto the airplane a0 at location l1-0\" cannot be removed from the plan and still have a valid plan. **Final Answer**: No." 14 | -------------------------------------------------------------------------------- /configs/tasks/boolq_cot_2shot/land.yaml: -------------------------------------------------------------------------------- 1 | task: acp_land_bool 2 | dataset_name: acp_land_bool 3 | include: _boolq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 5 cars, numbered consecutively. Currently, the ferry is at l1 location and it is empty. The cars are at locations as follows: c4 and c2 are at l1; c0, c1, and c3 are at l0. The goal is to reach a state where the following facts hold: Car c4 is at location l0, Car c0 is at location l1, Car c1 is at location l0, Car c3 is at location l0, and Car c2 is at location l1.' 8 | question: 'Is the following fact a landmark (must hold at some point along any plan) for the current state? The ferry is at l0 location.' 9 | answer: "Let's think step by step. Step 1: A fact is a landmark if it must hold at some point along any plan. Step 2: The fact \"The ferry is at l0 location.\" can be found by a simple procedure that traces back such atoms from the goal. **Final Answer**: Yes." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-0 and l0-1 are in c0. Currently, t0 is at l0-0, t1, a0, p1, and p2 are at l1-0, p3 is in t1, p0 is in a0. The goal is to reach a state where the following facts hold: p2 is at l1-0, p0 is at l0-0, p3 is at l0-1, and p1 is at l1-0.' 11 | question: 'Is the following fact a landmark (must hold at some point along any plan) for the current state? p2 is in a0.' 12 | answer: "Let's think step by step. Step 1: A fact is a landmark if it must hold at some point along any plan. Step 2: The fact \"p2 is in a0.\" is not achieved 13 | along the following plan: remove the object p3 from the truck t1 and place it on the location l1-0, load the object p3 from location l1-0 into the airplane a0, load the object p2 from location l1-0 into the airplane a0, remove the object p2 from the airplane a0 and place it on the location l1-0, fly the airplane a0 from the airport l1-0 to the airport l0-0, unload the object p3 from the airplane a0 at location l0-0, remove the object p0 from the airplane a0 and place it on the location l0-0, place the object p3 into the truck t0 at location l0-0. drive the truck t0 in city c0 from location l0-0 to location l0-1, unload the object p3 from the truck t0 at location l0-1. **Final Answer**: No." 14 | -------------------------------------------------------------------------------- /configs/tasks/boolq_cot_2shot/prog.yaml: -------------------------------------------------------------------------------- 1 | task: acp_prog_bool 2 | dataset_name: acp_prog_bool 3 | include: _boolq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0, with the car c0 on board. The cars are at locations as follows: c1 is at l0.' 8 | question: 'Will the fact \"Car c0 is at location l0\" hold after performing the action \"travel by sea from location l0 to location l1\" in the current state?' 9 | answer: "Let's think step by step. Step 1: The following fact(s) do not hold in the current state: Car c0 is at location l0. Step 2: The action adds the following fact(s): The ferry is at l1 location Step 3: Fact(s) \"Car c0 is at location l0\" are not added. **Final Answer**: No." 10 | - context: 'A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 4 keys in 1 different shapes: Key key0-1 is of shape shape0, Key key0-3 is of shape shape0, Key key0-2 is of shape shape0, Key key0-0 is of shape shape0. Currently, the robot is at position f4-2f and its arm is empty. All the positions are open except the following: f0-1f has shape0 shaped lock, f4-0f has shape0 shaped lock, f4-4f has shape0 shaped lock, f0-3f has shape0 shaped lock. Key key0-3 is at position f2-1f. Key key0-0 is at position f2-2f. Key key0-1 is at position f4-3f. Key key0-2 is at position f4-2f.' 11 | question: 'Will the fact \"Robot is holding key0-2\" hold after performing the action \"retrieve the key key0-2 from its current position f4-2f\" in the current state?' 12 | answer: "Let's think step by step. Step 1: The following fact(s) do not hold in the current state: Robot is holding key0-2. Step 2: The action adds the following fact(s): Robot is holding key0-2 Step 3: Fact(s) \"Robot is holding key0-2\" are added. **Final Answer**: Yes." 13 | -------------------------------------------------------------------------------- /configs/tasks/boolq_cot_2shot/reach.yaml: -------------------------------------------------------------------------------- 1 | task: acp_reach_bool 2 | dataset_name: acp_reach_bool 3 | include: _boolq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l1 location and it is empty. The cars are at locations as follows: c0 is at l1; c1 is at l0. is at l1 location and it is empty. The cars are at locations as follows: c0 is at l1; c1 is at l0.' 8 | question: 'Is it possible to transition to a state where the following holds: The ferry is empty and The ferry is at c1 location.' 9 | answer: "Let's think step by step. Step 1: Verify if the following fact(s) hold in current state or if there is a sequence of actions which transforms the current state into a state where they hold: The ferry is empty and The ferry is at c1 location. Step 2: These facts do not hold in the current state. Step 3: The fact There are no cars on the ferry and The ferry is at c1 location is not reachable even by a simple iterative procedure that accumulates all facts made true by applicable actions. **Final Answer**: No." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 5 trucks and 1 airplane, as well as 4 packages. There are 15 locations across 5 cities. The locations are in cities as follows: l4-2, l4-1, and l4-0 are in c4; l3-2, l3-0, and l3-1 are in c3; l0-2, l0-0, and l0-1 are in c0; l1-0, l1-1, and l1-2 are in c1; l2-1, l2-0, and l2-2 are in c2. Currently, p3 and t1 are at l1-1, t3 is at l3-0, p0 is at l1-0, t4 is at l4-2, p1 and t2 are at l2-0, a0 is at l4-0, t0 is at l0-2, p2 is in a0.' 11 | question: 'Is it possible to transition to a state where the following holds: l2-2 is in l3-2?' 12 | answer: "Let's think step by step. Step 1: Verify if the following fact(s) hold in current state or if there 13 | is a sequence of actions which transforms the current state into a state where they hold: l2-2 is in l3-2. Step 2: These facts do not hold in the current state. Step 3: The fact l2-2 is in l3-2 is not reachable even by a simple iterative procedure that accumulates all facts made true by applicable actions. **Final Answer**: No." 14 | -------------------------------------------------------------------------------- /configs/tasks/boolq_cot_2shot/val.yaml: -------------------------------------------------------------------------------- 1 | task: acp_val_bool 2 | dataset_name: acp_val_bool 3 | include: _boolq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c0 and c1 are at l0. The goal is to reach a state where the following facts hold: Car c1 is at location l1 and Car c0 is at location l1.' 8 | question: 'Is the following sequence of actions \"embark the car c0 at location l0 on to the ferry travel by sea from location l0 to location l1 debark the car c0 to location l1 from the ferry sail from location l1 to location l0 board car c1 at location l0 travel by sea from location l0 to location l1 sail from location l1 to location l0\" a plan for the current state?' 9 | answer: "Let's think step by step. Step 1: For a sequence of actions to be a plan, it should be applicable in the current state, all actions should be valid, and the resulting state should entail the goal. Step 2: All the actions in the sequence are valid in this problem. Step 3: The action sequence is applicable in the current state. Step 4: However, the following goal fact(s) are not achieved: Car c1 is at location l1. **Final Answer**: No." 10 | - context: 'This is a blocksworld domain where blocks can be placed on top of each other or on the table. There is one robotic arm that can move the block. There are 3 blocks. Currently, the robotic arm is empty. The following block(s) are on the table: block_1 and block_2. The following block(s) is stacked on top of another block: block_3 is on block_2. The goal is to reach a state where the following facts hold: The block block_1 is currently situated above the block block_2 and The block block_3 is on top of block block_1.' 11 | question: 'Is the following sequence of actions "unstack the object block_3 from the object block_2 place the object block_3 on top of the object block_2 unstack object block_3 from object block_2 put down object block_3 collect the object block_3 from the table place the object block_3 on the table pick up object block_2 from the table place the object block_2 on the table pick up the object block_1 from the table stack the object block_1 on top of the object block_2 pick up object block_3 from the table stack object block_3 on top of object block_1" a plan for the current state? ' 12 | answer: "Let's think step by step. Step 1: For a sequence of actions to be a plan, it should be applicable in the current state, all actions should be valid, and the resulting state should entail the goal. Step 2: All the actions in the sequence are valid in this problem. Step 3: The action sequence is applicable in the current state. Step 4: The action sequence achieves the goal. **Final Answer**: Yes." 13 | -------------------------------------------------------------------------------- /configs/tasks/mcq_cot_2shot/_mcq_cot_2shot_yaml: -------------------------------------------------------------------------------- 1 | tag: 2 | - acp_mcq_cot_2shot 3 | - acp_bench 4 | output_type: generate_until 5 | dataset_path: ibm-research/acp_bench 6 | test_split: test 7 | num_fewshot: 2 8 | doc_to_target: "{{answer}}" 9 | doc_to_text: "**Question**: {{context}} {{question}} **Thoughts**:" 10 | generation_kwargs: 11 | until: 12 | - "\n\n\n\n" 13 | - "**Question**:" 14 | - "**Question:**" 15 | - "Q:" 16 | do_sample: false 17 | temperature: 0.0 18 | max_gen_toks: 1024 19 | metric_list: 20 | - metric: exact_match 21 | ignore_case: true 22 | ignore_punctuation: true 23 | regexes_to_ignore: 24 | - "\\.$" 25 | - "," 26 | - "\\\\" 27 | - "\n" 28 | - '"' 29 | filter_list: 30 | - name: "mcq-extract" 31 | filter: 32 | - function: multi_choice_regex 33 | group_select: -1 34 | ignore_case: true 35 | ignore_punctuation: true 36 | regex_pattern: '(((?<=[answer is ])[A-D])|([A-D]\n)|([A-D]\.)|( [A-D] )|(^[A-D]$)|(\[[A-D]\])|([A-D])|(?<=..Final Answer..: )(.*)(?=.)|(?<=..answer..: )(.*)(?=.)|(?<=..Answer..: )(.*)(?=.))' 37 | - function: "take_first" 38 | metadata: 39 | version: 1.0 40 | -------------------------------------------------------------------------------- /configs/tasks/mcq_cot_2shot/act_reach.yaml: -------------------------------------------------------------------------------- 1 | task: acp_areach_mcq 2 | dataset_name: acp_areach_mcq 3 | include: _mcq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0, with the car c1 on board. The cars are at locations as follows: c0 is at l0.' 8 | question: 'Which of the following actions can eventually be applied? A. embark the car c0 at location l0 on to the ferry. B. travel by sea from location c0 to location c1. C. fly from location l0 to location l1. D. board the car c0 at location l0 into the airplane.' 9 | answer: "Let's think step by step. Step 1: Verify if there is a sequence of actions which transforms the current state into a state where the precondition of the action \"embark the car c0 at location l0 on to the ferry\" hold. Step 2: The following sequence of actions would transition to such a state: travel by sea from location l0 to location l1, debark the car c1 from the ferry to location l1, travel by sea from location l1 to location l0. **Final Answer**: A." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-0 and l0-1 are in c0. Currently, a0 is at l0-0, t0 is at l0-1, p3 and t1 are at l1-0, p0 is in t1, p2 and p1 are in a0.' 11 | question: 'Which of the following actions can eventually be applied? A. offload the object p3 from the truck p3 at location l1-1. B. navigate the truck c1 which is in location p0 in city l0-1 to another location t1 in the same city. C. fly the airplane a0 from airport l1-0 to airport l0-0. D. fly the airplane a0 to the airport l0-0 in city l1-0.' 12 | answer: "Let's think step by step. Step 1: Verify if there is a sequence of actions which transforms the current state into a state where the precondition of the action \"fly the airplane a0 from airport l1-0 to airport l0-0\" hold. Step 2: The following sequence of actions would transition to such a state: drive truck t0 from location l0-1 in city c0 to location l0-0 in the same city, fly the airplane a0 from location l0-0 to location l1-0, navigate the truck t1 which is in location l1-0 in city c1 to another location l1-1 in the same city. **Final Answer**: C." 13 | -------------------------------------------------------------------------------- /configs/tasks/mcq_cot_2shot/app.yaml: -------------------------------------------------------------------------------- 1 | task: acp_app_mcq 2 | dataset_name: acp_app_mcq 3 | include: _mcq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l1 location and it is empty. The cars are at locations as follows: c1 and c0 are at l0.' 8 | question: 'Which of the following actions will be applicable in this state? A. unload the car c1 from the ferry to location l1. B. load the car c0 at location l1 on to the ferry. C. load the car c0 at location l0 on to the ferry. D. sail from location l1 to location l0.' 9 | answer: "Let's think step by step. Step 1: In order to apply the action \"sail from location l1 to location l0\", the following fact(s) must hold in this state: The ferry is at l1 location Step 2: These facts hold in the mentioned state, so the action \"sail from location l1 to location l0\" is applicable. **Final Answer**: D." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-0 and l0-1 are in c0. Currently, a0 and t0 are at l0-0, t1 is at l1-1, p2 is at l1-0, p1 and p3 are in t1, p0 is in t0.' 11 | question: 'Which of the following actions will be applicable in this state? A. load object p1 into airplane a0 at location l1-0. B. unload the object p2 from the airplane a0 at location l1-0. C. navigate the truck t1 from location l1-1 in city c1 to location l1-1 in the same city. D. operate the airplane a0 from airport l1-0 to airport l0-0.' 12 | answer: "Let's think step by step. Step 1: In order to apply the action \"navigate the truck t1 from location l1-1 in city c1 to location l1-1 in the same city\", the following fact(s) must hold in this state: t1 is at l1-1 Step 2: These facts hold in the mentioned state, so the action \"navigate the truck t1 from location l1-1 in city c1 to location l1-1 in the same city\" is applicable. **Final Answer**: C." 13 | -------------------------------------------------------------------------------- /configs/tasks/mcq_cot_2shot/just.yaml: -------------------------------------------------------------------------------- 1 | task: acp_just_mcq 2 | dataset_name: acp_just_mcq 3 | include: _mcq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c0 and c1 are at l0. The goal is to reach a state where the following facts hold: Car c1 is at location l1 and Car c0 is at location l1.' 8 | question: 'Given the plan: \"board the car c0 at the location l0, travel by sea from location l0 to location l1, unload the car c0 from the ferry to location l1, travel by sea from location l1 to location l0, board the car c1 at location l0, sail from location l0 to location l1, debark the car c1 from the ferry to location l1, travel by sea from location l1 to location l0\"; which of the following actions can be removed from this plan and still have a valid plan? A. board the car c1 at the location l0. B. debark car c1 to location l1 from the ferry. C. travel by sea from location l1 to location l0. D. sail from location l0 to location l1.' 9 | answer: "Let's think step by step. Step 1: A valid plan is a sequence of actions that is exectuable from the current state and achieves the goal. Step 2: The 2nd instance of \"travel by sea from location l1 to location l0\" can be removed from the plan and still have a valid plan. **Final Answer**: C." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-0 and l0-1 are in c0; l1-0 and l1-1 are in c1. Currently, p2, p1, and p3 are at l1-0, p0 and t1 are at l1-1, a0 is at l0-0, t0 is at l0-1. The goal is to reach a state where the following facts hold: p3 is at l0-1, p0 is at l0-0, p2 is at l1-0, and p1 is at l1-0.' 11 | question: 'Given the plan: \"load object p0 into truck t1 at location l1-1, navigate the truck t1 which is in location l1-1 in city c1 to another location l1-0 in the same city, offload the object p0 from the truck t1 at location l1-0, fly the airplane a0 from location l0-0 to location l1-0, load object p0 into airplane a0 at location l1-0, place the object p3 onto the airplane a0 at location l1-0, fly the airplane a0 from location l1-0 to location l0-0, unload the object p0 from the airplane a0 at location l0-0, unload the object p3 from the airplane a0 at location l0-0, navigate the truck t0 from location l0-1 in city c0 to location l0-0 in the same city, load object p3 into truck t0 at location l0-0, drive truck t0 from location l0-0 in city c0 to location l0-1 in the same city, offload the object p3 from the truck t0 at location l0-1, drive truck t0 from location l0-1 in city c0 to location l0-0 in the same city, navigate the truck t0 from its current location l0-0 in city c0 to the new location l0-1 within the same city\"; which of the following pair of consecutive actions can be removed from this plan and still have a valid plan? A. drive the truck t0 in city c0 from location l0-1 to location l0-0 and navigate the truck t0 which is in location l0-0 in city c0 to another location l0-1 in the same city. B. unload object p3 from truck t0 at location l0-1 and drive truck t0 from location l0-1 in city c0 to location l0-0 in the same city. C. drive truck t0 from location l0-1 in city c0 to location l0-0 in the same city and load the object p3 from location l0-0 into the truck t0. D. unload the object p0 from the airplane a0 at location l0-0 and remove the object p3 from the airplane a0 and place it on the location l0-0.' 12 | answer: "Let's think step by step. Step 1: A valid plan is a sequence of actions that is exectuable from the current state and achieves the goal. Step 2: Consecutive actions \"drive the truck t0 in city c0 from location l0-1 to location l0-0 and navigate the truck t0 which is in location l0-0 in city c0 to another location l0-1 in the same city\" can be removed from the plan and still have a valid plan. **Final Answer**: A." 13 | -------------------------------------------------------------------------------- /configs/tasks/mcq_cot_2shot/land.yaml: -------------------------------------------------------------------------------- 1 | task: acp_land_mcq 2 | dataset_name: acp_land_mcq 3 | include: _mcq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c1 is at l0; c0 is at l1. The goal is to reach a state where the following facts hold: Car c1 is at location l1 and Car c0 is at location l1.' 8 | question: 'Which of the following facts is a landmark (must hold at some point along any plan) for the current state? A. Car c0 is on the ferry. B. Ferry has car c1 on board and Car c0 is at location l0. C. Ferry has car c1 on board. D. Ferry has car c1 on board and Car c0 is on the ferry.' 9 | answer: "Let's think step by step. Step 1: A fact is a landmark if it must hold at some point along any plan. Step 2: The fact \"Ferry has car c1 on board\" can be found by a simple procedure that traces back such atoms from the goal. **Final Answer**: C." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-1 and l0-0 are in c0. Currently, p1 and t0 are at l0-0, a0, p0, p3, and p2 are at l1-0, t1 is at l1-1. The goal is to reach a state where the following facts hold: p0 is at l0-0, p1 is at l1-0, p3 is at l0-1, and p2 is at l1-0.' 11 | question: 'Which of the following facts is a landmark (must hold at some point along any plan) for the current state? A. p3 is at l0-0. B. p1 is at l0-1. C. p0 is in t0. D. p2 is in a0.' 12 | answer: "Let's think step by step. Step 1: A fact is a landmark if it must hold at some point along any plan. Step 2: The fact \"p3 is at l0-0\" can be found by a simple procedure that traces back such atoms from the goal. **Final Answer**: A." 13 | -------------------------------------------------------------------------------- /configs/tasks/mcq_cot_2shot/prog.yaml: -------------------------------------------------------------------------------- 1 | task: acp_prog_mcq 2 | dataset_name: acp_prog_mcq 3 | include: _mcq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l1, with the car c1 on board. The cars are at locations as follows: c0 is at l1.' 8 | question: 'Which the following facts hold after performing the action \"travel by sea from location l1 to location l0\" in the current state? **Possible Answers**: A. Car c0 is at location l1 and The ferry is at l1 location. B. The ferry is at l0 location and The ferry is at l1 location. C. The ferry is at l0 location. D. The ferry is at l0 location and Car c0 is at location l1.' 9 | answer: "Let's think step by step. Step 1: The following fact(s) do not hold in the current state: The ferry is at l0 location. Step 2: The action adds the following fact(s): The ferry is at l0 location Step 3: The following fact(s) hold in the current state: Car c0 is at location l1. Step 4: The action deletes the following fact(s): The ferry is at l1 location Step 5: Fact(s) \"The ferry is at l0 location\" are added and Fact(s) \"Car c0 is at location l1\" are not deleted. **Final Answer**: D." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-1 and l0-0 are in c0. Currently, a0 is at l0-0, t1 and p0 are at l1-1, t0 is at l0-1, p1 is in t1, p2 and p3 are in a0.' 11 | question: 'Which the following facts hold after performing the action \"drive truck t0 from location l0-1 in city c0 to location l0-1 in the same city\" in the current state? A. p3 is in t1. B. a0 is at l0-0 and p3 is in t1. C. a0 is at l0-0. D. None of the above.' 12 | answer: "Let's think step by step. Step 1: The following fact(s) hold in the current state: a0 is at l0-0. Step 2: The action deletes the following fact(s): t0 is at l0-1 Step 3: Fact(s) \"a0 is at l0-0\" are not deleted. **Final Answer**: C." 13 | -------------------------------------------------------------------------------- /configs/tasks/mcq_cot_2shot/reach.yaml: -------------------------------------------------------------------------------- 1 | task: acp_reach_mcq 2 | dataset_name: acp_reach_mcq 3 | include: _mcq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0, with the car c0 on board. The cars are at locations as follows: c1 is at l1.' 8 | question: 'Which of the following options can hold in a state that can potentially be reached? A. There are no cars on the ferry and The ferry is at l1 location. B. Car l1 is at location c1. C. Ferry has car c0 on board and There are no cars on the ferry. D. The ferry is at c0 location and Car c1 is at location l1.' 9 | answer: "Let's think step by step. Step 1: Verify if the following facts hold in the current state: There are no cars on the ferry and The ferry is at l1 location. Step 2: These facts do not hold. Step 3: Verify if there is a sequence of actions which transforms the current state into a state where these facts hold. Step 4: The following sequence of actions would transition to such a state: debark the car c0 from the ferry to location l0, sail from location l0 to location l1. **Final Answer**: A." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-0 and l0-1 are in c0. Currently, p1 and t1 are at l1-1, a0 and p0 are at l0-0, t0 and p3 are at l0-1, p2 is at l1-0.' 11 | question: 'Which of the following options can hold in a state that can potentially be reached? A. p2 is at p0. B. t0 is at l0-0. C. p3 is in t1 and p3 is in t0. D. l1-1 is at p1.' 12 | answer: "Let's think step by step. Step 1: Verify if the following fact holds in the current state: t0 is at l0-0. Step 2: The fact does not hold. Step 3: Verify if there is a sequence of actions which transforms the current state into a state where the fact holds. Step 4: The following sequence of actions would transition to such a state: drive the truck t0 in city c0 from location l0-1 to location l0-0, navigate the truck t0 which is in location l0-0 in city c0 to another location l0-0 in the same city. **Final Answer**: B." 13 | -------------------------------------------------------------------------------- /configs/tasks/mcq_cot_2shot/val.yaml: -------------------------------------------------------------------------------- 1 | task: acp_val_mcq 2 | dataset_name: acp_val_mcq 3 | include: _mcq_cot_2shot_yaml 4 | fewshot_config: 5 | sampler: first_n 6 | samples: 7 | - context: 'This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 2 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c1 and c0 are at l0. The goal is to reach a state where the following facts hold: Car c0 is at location l1 and Car c1 is at location l1.' 8 | question: 'Which of the following claims is true with regard to the following sequence of actions \"board the car c0 at the location l0, travel by sea from location l0 to location l1, unload the car c0 from the ferry to location l1, travel by sea from location l1 to location l0, board the car c1 at location l0, sail from location l0 to location l1, debark the car c1 from the ferry to location l1\" and the current state? A. The sequence is not applicable. B. The sequence is a plan. C. The sequence is applicable, but does not achieve the goal. D. The sequence is not valid.' 9 | answer: "Let's think step by step. Step 1: For a sequence of actions to be a plan, all actions should be valid, applicable in sequence, and achieve the goal. Step 2: The action sequence is applicable and it achieves the goal. **Final Answer**: B." 10 | - context: 'There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 3 trucks and 1 airplane, as well as 4 packages. There are 9 locations across 3 cities. The locations are in cities as follows: l1-2, l1-0, and l1-1 are in c1; l0-0, l0-1, and l0-2 are in c0; l2-1, l2-2, and l2-0 are in c2. Currently, p2 and t1 are at l1-2, p3 is at l2-0, t0 and p0 are at l0-2, p1 is at l1-0, a0 is at l0-0, t2 is at l2-2. The goal is to reach a state where the following facts hold: p1 is at l1-0, p3 is at l2-0, p2 is at l0-1, and p0 is at l1-2.' 11 | question: 'Which of the following claims is true with regard to the following sequence of actions \"load object p0 into truck t0 at location l0-2, sail the ship t0 into city c0 from location l0-2 in city l0-0, remove the object p0 from the truck t0 and place it on the location l0-0, load the object p0 from location l0-0 onto the airplane a0, fly the airplane a0 from the airport l0-0 to the airport l1-0, remove the object p0 from the airplane a0 and place it on the location l1-0, load object p2 into truck t1 at location l1-2, navigate the truck t1 from its current location l1-2 in city c1 to the new location l1-0 within the same city place the object p0 into the truck t1 at location l1-0 remove the object p2 from the truck t1 and place it on the location l1-0 load the object p2 from location l1-0 onto the airplane a0 fly the airplane a0 from location l1-0 to location l2-0 fly airplane a0 from airport l2-0 to airport l0-0 unload the object p2 from the airplane a0 at location l0-0 place the object p2 into the truck t0 at location l0-0 navigate the truck t0 from its current location l0-0 in city c0 to the new location l0-1 within the same city offload the object p2 from the truck t0 at location l0-1 drive the truck t1 in city c1 from location l1-0 to location l1-2 offload the object p0 from the truck t1 at location l1-2 navigate the truck t2 from its current location l2-2 in city c2 to the new location l2-1 within the same city\" and the current state? A. The sequence is not valid. B. The sequence is applicable, but does not achieve the goal. C. The sequence is a plan. D. The sequence is not applicable.' 12 | answer: "Let's think step by step. Step 1: For a sequence of actions to be a plan, all actions should be valid, applicable in sequence, and achieve the goal. Step 2: The action \"sail the ship t0 into city c0 from location l0-2 in city l0-0\" is not valid in this problem. **Final Answer**: A." 13 | -------------------------------------------------------------------------------- /dataset/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/.DS_Store -------------------------------------------------------------------------------- /dataset/app/test.bool.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/app/test.bool.json.gz -------------------------------------------------------------------------------- /dataset/app/test.gen.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/app/test.gen.json.gz -------------------------------------------------------------------------------- /dataset/app/test.mcq.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/app/test.mcq.json.gz -------------------------------------------------------------------------------- /dataset/areach/test.bool.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/areach/test.bool.json.gz -------------------------------------------------------------------------------- /dataset/areach/test.gen.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/areach/test.gen.json.gz -------------------------------------------------------------------------------- /dataset/areach/test.mcq.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/areach/test.mcq.json.gz -------------------------------------------------------------------------------- /dataset/just/test.bool.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/just/test.bool.json.gz -------------------------------------------------------------------------------- /dataset/just/test.gen.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/just/test.gen.json.gz -------------------------------------------------------------------------------- /dataset/just/test.mcq.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/just/test.mcq.json.gz -------------------------------------------------------------------------------- /dataset/land/test.bool.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/land/test.bool.json.gz -------------------------------------------------------------------------------- /dataset/land/test.gen.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/land/test.gen.json.gz -------------------------------------------------------------------------------- /dataset/land/test.mcq.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/land/test.mcq.json.gz -------------------------------------------------------------------------------- /dataset/nexta/test.gen.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/nexta/test.gen.json.gz -------------------------------------------------------------------------------- /dataset/prog/test.bool.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/prog/test.bool.json.gz -------------------------------------------------------------------------------- /dataset/prog/test.gen.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/prog/test.gen.json.gz -------------------------------------------------------------------------------- /dataset/prog/test.mcq.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/prog/test.mcq.json.gz -------------------------------------------------------------------------------- /dataset/reach/dev.bool.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": -2426698749034015429, 4 | "group": "reachable_atom_bool", 5 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 10 cars, numbered consecutively. Currently, the ferry is at l0 location and it is empty. The cars are at locations as follows: c2, c7, and c5 are at l1; c3, c4, c6, c9, c1, c0, and c8 are at l0.", 6 | "question": "Is it possible to transition to a state where the following holds: Car c2 is at location c0?", 7 | "answer": "no" 8 | }, 9 | { 10 | "id": -8931355586395996072, 11 | "group": "reachable_atom_bool", 12 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 10 cars, numbered consecutively. Currently, the ferry is at l0, with the car c1 on board. The cars are at locations as follows: c2, c7, and c4 are at l1; c3, c9, c6, c5, c0, and c8 are at l0.", 13 | "question": "Is it possible to transition to a state where the following holds: The ferry is at c3 location and Car c8 is at location l0?", 14 | "answer": "no" 15 | }, 16 | { 17 | "id": -7281146360401604897, 18 | "group": "reachable_atom_bool", 19 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 3 locations and 2 cars, numbered consecutively. Currently, the ferry is at l1 location and it is empty. The cars are at locations as follows: c1 is at l2; c0 is at l1.", 20 | "question": "Is it possible to transition to a state where the following holds: The ferry is at l2 location?", 21 | "answer": "yes" 22 | }, 23 | { 24 | "id": -7522838240986012391, 25 | "group": "reachable_atom_bool", 26 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 10 cars, numbered consecutively. Currently, the ferry is at l1 location and it is empty. The cars are at locations as follows: c3, c9, c6, c2, c1, c0, and c8 are at l0; c7, c4, and c5 are at l1.", 27 | "question": "Is it possible to transition to a state where the following holds: The ferry is at l0 location and Car c6 is on the ferry?", 28 | "answer": "yes" 29 | }, 30 | { 31 | "id": -1202705330726218439, 32 | "group": "reachable_atom_bool", 33 | "context": "This is a ferry domain, where the task is to transport cars from their start to their goal locations, using a ferry. Each location is accessible by ferry from each other location. The cars can be debarked or boarded, and the ferry can carry only one car at a time. There are 2 locations and 20 cars, numbered consecutively. Currently, the ferry is at l1 location and it is empty. The cars are at locations as follows: c2, c13, c16, c3, c15, c8, c10, c6, c18, c0, and c9 are at l0; c17, c14, c1, c19, c5, c11, c4, c7, and c12 are at l1.", 34 | "question": "Is it possible to transition to a state where the following holds: Car l0 is on the ferry?", 35 | "answer": "no" 36 | }, 37 | { 38 | "id": -8598089088107253298, 39 | "group": "reachable_atom_bool", 40 | "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 6 locations across 2 cities. The locations are in cities as follows: l1-1, l1-0, and l1-2 are in c1; l0-0, l0-2, and l0-1 are in c0. Currently, t1, p1, and p2 are at l1-2, t0, a0, and p3 are at l0-0, p0 is in a0.", 41 | "question": "Is it possible to transition to a state where the following holds: t1 is at l1-0 and t0 is at l0-1?", 42 | "answer": "yes" 43 | }, 44 | { 45 | "id": -7437489527440600797, 46 | "group": "reachable_atom_bool", 47 | "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 6 locations across 2 cities. The locations are in cities as follows: l1-1, l1-0, and l1-2 are in c1; l0-0, l0-2, and l0-1 are in c0. Currently, t1 is at l1-2, t0 and a0 are at l0-0, p1 is at l1-1, p3 and p2 are in t0, p0 is in a0.", 48 | "question": "Is it possible to transition to a state where the following holds: t1 is at l1-1?", 49 | "answer": "yes" 50 | }, 51 | { 52 | "id": 873589922904707629, 53 | "group": "reachable_atom_bool", 54 | "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-1 and l0-0 are in c0; l1-0 and l1-1 are in c1. Currently, p3 and p1 are at l1-0, t1 is at l1-1, t0 and a0 are at l0-0, p2 is in a0, p0 is in t1.", 55 | "question": "Is it possible to transition to a state where the following holds: p0 is at c1 and p3 is at l1-0?", 56 | "answer": "no" 57 | }, 58 | { 59 | "id": -1396451365000157850, 60 | "group": "reachable_atom_bool", 61 | "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-1 and l0-0 are in c0; l1-0 and l1-1 are in c1. Currently, p2 and p1 are at l1-0, t1 is at l1-1, a0 is at l0-0, t0 is at l0-1, p0 and p3 are in a0.", 62 | "question": "Is it possible to transition to a state where the following holds: p3 is at l0-0?", 63 | "answer": "yes" 64 | }, 65 | { 66 | "id": 176148927386011603, 67 | "group": "reachable_atom_bool", 68 | "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-1 and l0-0 are in c0; l1-0 and l1-1 are in c1. Currently, a0, p1, and t1 are at l1-0, p0, t0, and p3 are at l0-0, p2 is in a0.", 69 | "question": "Is it possible to transition to a state where the following holds: l0-1 is at l1-1 and p1 is at l1-0?", 70 | "answer": "no" 71 | }, 72 | { 73 | "id": -2273865423357842495, 74 | "group": "reachable_atom_bool", 75 | "context": "This is a blocksworld domain where blocks can be placed on top of each other or on the table. There is one robotic arm that can move the block. There are 5 blocks. Currently, the robotic arm is holding block_3. The following block(s) is on the table: block_1. The following block(s) are stacked on top of another block: block_2 is on block_5, block_4 is on block_1, and block_5 is on block_4.", 76 | "question": "Is it possible to transition to a state where the following holds: Block block_3 is on the table and Block block_3 is clear?", 77 | "answer": "yes" 78 | }, 79 | { 80 | "id": -7926943085549188920, 81 | "group": "reachable_atom_bool", 82 | "context": "This is a blocksworld domain where blocks can be placed on top of each other or on the table. There is one robotic arm that can move the block. There are 5 blocks. Currently, the robotic arm is empty. The following block(s) are on the table: block_5 and block_1. The following block(s) are stacked on top of another block: block_3 is on block_2, block_2 is on block_4, and block_4 is on block_5.", 83 | "question": "Is it possible to transition to a state where the following holds: The block block_1 is on top of block block_3?", 84 | "answer": "yes" 85 | }, 86 | { 87 | "id": -1753703551762064220, 88 | "group": "reachable_atom_bool", 89 | "context": "This is a blocksworld domain where blocks can be placed on top of each other or on the table. There is one robotic arm that can move the block. There are 5 blocks. Currently, the robotic arm is holding block_2. The following block(s) are on the table: block_3 and block_1. The following block(s) are stacked on top of another block: block_4 is on block_3 and block_5 is on block_1.", 90 | "question": "Is it possible to transition to a state where the following holds: The block block_5 is currently being held by the robotic arm and The robotic arm is empty?", 91 | "answer": "no" 92 | }, 93 | { 94 | "id": -643910057256822341, 95 | "group": "reachable_atom_bool", 96 | "context": "This is a blocksworld domain where blocks can be placed on top of each other or on the table. There is one robotic arm that can move the block. There are 5 blocks. Currently, the robotic arm is empty. The following block(s) are on the table: block_3 and block_2. The following block(s) are stacked on top of another block: block_1 is on block_3, block_5 is on block_2, and block_4 is on block_5.", 97 | "question": "Is it possible to transition to a state where the following holds: The block block_4 is currently situated under the block block_1 and The robotic arm is holding block_4?", 98 | "answer": "no" 99 | }, 100 | { 101 | "id": 2632670807254420405, 102 | "group": "reachable_atom_bool", 103 | "context": "This is a blocksworld domain where blocks can be placed on top of each other or on the table. There is one robotic arm that can move the block. There are 5 blocks. Currently, the robotic arm is holding block_5. The following block(s) are on the table: block_3 and block_2. The following block(s) are stacked on top of another block: block_4 is on block_3 and block_1 is on block_4.", 104 | "question": "Is it possible to transition to a state where the following holds: Block block_5 is clear and The robotic arm is holding block_2?", 105 | "answer": "yes" 106 | }, 107 | { 108 | "id": 1313050701834406721, 109 | "group": "reachable_atom_bool", 110 | "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0. Currently, the robot is at position f0-4f and is holding key0-0. All the positions are open except the following: f0-0f has shape0 shaped lock. Key key0-1 is at position f3-0f.", 111 | "question": "Is it possible to transition to a state where the following holds: Robot is at f2-4f location?", 112 | "answer": "yes" 113 | }, 114 | { 115 | "id": -8874972491600335569, 116 | "group": "reachable_atom_bool", 117 | "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0. Currently, the robot is at position f1-2f and its arm is empty. All the positions are open except the following: f4-0f has shape0 shaped lock, f0-3f has shape0 shaped lock. Key key0-1 is at position f1-2f. Key key0-0 is at position f4-4f.", 118 | "question": "Is it possible to transition to a state where the following holds: Robot is holding key0-1 and Robot is at f0-2f location?", 119 | "answer": "yes" 120 | }, 121 | { 122 | "id": -4837821160497429846, 123 | "group": "reachable_atom_bool", 124 | "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0. Currently, the robot is at position f3-2f and its arm is empty. All the positions are open except the following: f4-0f has shape0 shaped lock, f0-3f has shape0 shaped lock. Key key0-1 is at position f4-1f. Key key0-0 is at position f4-4f.", 125 | "question": "Is it possible to transition to a state where the following holds: Location key0-0 is locked?", 126 | "answer": "no" 127 | }, 128 | { 129 | "id": -4857683054103019041, 130 | "group": "reachable_atom_bool", 131 | "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0. Currently, the robot is at position f3-1f and its arm is empty. All the positions are open except the following: f4-0f has shape0 shaped lock, f0-3f has shape0 shaped lock. Key key0-1 is at position f3-1f. Key key0-0 is at position f4-4f.", 132 | "question": "Is it possible to transition to a state where the following holds: Robot is at f4-2f location?", 133 | "answer": "yes" 134 | }, 135 | { 136 | "id": -7637773953816219776, 137 | "group": "reachable_atom_bool", 138 | "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0. Currently, the robot is at position f3-2f and its arm is empty. All the positions are open except the following: f4-0f has shape0 shaped lock. Key key0-1 is at position f3-2f. Key key0-0 is at position f4-4f.", 139 | "question": "Is it possible to transition to a state where the following holds: Robot is at f4-3f location?", 140 | "answer": "yes" 141 | }, 142 | { 143 | "id": -1100065975109890045, 144 | "group": "reachable_atom_bool", 145 | "context": "A set of robots use different colors to paint patterns in floor tiles. The robots can move around the floor tiles in four directions (up, down, left and right). Robots paint with one color at a time, but can change their spray guns to any available color. However, robots can only paint the tile that is in front (up) and behind (down) them, and once a tile has been painted no robot can stand on it. Robots need to paint a grid with black and white, where the cell color is alternated always. There are 9 tiles and 2 robots. The tiles locations are: tile_5 is to the right of tile_4, tile_2 is to the right of tile_1, tile_3 is to the right of tile_2, tile_9 is to the right of tile_8, tile_8 is to the right of tile_7, and tile_6 is to the right of tile_5. Further, tile_6 is down from tile_9, tile_3 is down from tile_6, tile_1 is down from tile_4, tile_5 is down from tile_8, tile_4 is down from tile_7, and tile_2 is down from tile_5 Currently, robot robot1 is at tile_7 and holding color white and robot robot2 is at tile_3 and holding color white; tile_6, tile_2, tile_1, and tile_4 are clear; tile_9 is painted black, tile_8 is painted white, and tile_5 is painted black.", 146 | "question": "Is it possible to transition to a state where the following holds: Tile tile_4 is painted in white color and Tile tile_6 is painted in white color?", 147 | "answer": "yes" 148 | }, 149 | { 150 | "id": 1252245561935876045, 151 | "group": "reachable_atom_bool", 152 | "context": "A set of robots use different colors to paint patterns in floor tiles. The robots can move around the floor tiles in four directions (up, down, left and right). Robots paint with one color at a time, but can change their spray guns to any available color. However, robots can only paint the tile that is in front (up) and behind (down) them, and once a tile has been painted no robot can stand on it. Robots need to paint a grid with black and white, where the cell color is alternated always. There are 16 tiles and 2 robots. The tiles locations are: tile_12 is to the right of tile_11, tile_7 is to the right of tile_6, tile_10 is to the right of tile_9, tile_2 is to the right of tile_1, tile_15 is to the right of tile_14, tile_11 is to the right of tile_10, tile_8 is to the right of tile_7, tile_3 is to the right of tile_2, tile_14 is to the right of tile_13, tile_16 is to the right of tile_15, tile_6 is to the right of tile_5, and tile_4 is to the right of tile_3. Further, tile_7 is down from tile_11, tile_11 is down from tile_15, tile_12 is down from tile_16, tile_8 is down from tile_12, tile_2 is down from tile_6, tile_10 is down from tile_14, tile_4 is down from tile_8, tile_5 is down from tile_9, tile_6 is down from tile_10, tile_9 is down from tile_13, tile_1 is down from tile_5, and tile_3 is down from tile_7 Currently, robot robot1 is at tile_5 and holding color white and robot robot2 is at tile_2 and holding color black; tile_6, tile_3, tile_1, tile_4, tile_8, tile_7, and tile_10 are clear; tile_15 is painted white, tile_16 is painted black, tile_11 is painted black, tile_14 is painted black, tile_9 is painted black, tile_13 is painted white, and tile_12 is painted white.", 153 | "question": "Is it possible to transition to a state where the following holds: Tile tile_6 is painted in black color and Robot robot1 is holding black paint?", 154 | "answer": "yes" 155 | }, 156 | { 157 | "id": 7682947827300147988, 158 | "group": "reachable_atom_bool", 159 | "context": "A set of robots use different colors to paint patterns in floor tiles. The robots can move around the floor tiles in four directions (up, down, left and right). Robots paint with one color at a time, but can change their spray guns to any available color. However, robots can only paint the tile that is in front (up) and behind (down) them, and once a tile has been painted no robot can stand on it. Robots need to paint a grid with black and white, where the cell color is alternated always. There are 9 tiles and 2 robots. The tiles locations are: tile_5 is to the right of tile_4, tile_2 is to the right of tile_1, tile_3 is to the right of tile_2, tile_9 is to the right of tile_8, tile_8 is to the right of tile_7, and tile_6 is to the right of tile_5. Further, tile_6 is down from tile_9, tile_3 is down from tile_6, tile_1 is down from tile_4, tile_5 is down from tile_8, tile_4 is down from tile_7, and tile_2 is down from tile_5 Currently, robot robot1 is at tile_2 and holding color white and robot robot2 is at tile_1 and holding color white; tile_6, tile_3, and tile_5 are clear; tile_4 is painted white, tile_9 is painted black, tile_8 is painted white, and tile_7 is painted black.", 160 | "question": "Is it possible to transition to a state where the following holds: Robot robot1 is holding black paint and Robot robot1 is holding white paint?", 161 | "answer": "no" 162 | }, 163 | { 164 | "id": -6015685202443099301, 165 | "group": "reachable_atom_bool", 166 | "context": "A set of robots use different colors to paint patterns in floor tiles. The robots can move around the floor tiles in four directions (up, down, left and right). Robots paint with one color at a time, but can change their spray guns to any available color. However, robots can only paint the tile that is in front (up) and behind (down) them, and once a tile has been painted no robot can stand on it. Robots need to paint a grid with black and white, where the cell color is alternated always. There are 12 tiles and 2 robots. The tiles locations are: tile_5 is to the right of tile_4, tile_12 is to the right of tile_11, tile_2 is to the right of tile_1, tile_11 is to the right of tile_10, tile_3 is to the right of tile_2, tile_9 is to the right of tile_8, tile_8 is to the right of tile_7, and tile_6 is to the right of tile_5. Further, tile_6 is down from tile_9, tile_3 is down from tile_6, tile_1 is down from tile_4, tile_5 is down from tile_8, tile_7 is down from tile_10, tile_9 is down from tile_12, tile_4 is down from tile_7, tile_2 is down from tile_5, and tile_8 is down from tile_11 Currently, robot robot2 is at tile_9 and holding color black and robot robot1 is at tile_2 and holding color white; tile_6, tile_3, tile_1, tile_5, and tile_8 are clear; tile_4 is painted white, tile_11 is painted black, tile_12 is painted white, tile_7 is painted black, and tile_10 is painted white.", 167 | "question": "Is it possible to transition to a state where the following holds: Robot robot2 is at tile_7 location and tile_7 is clear?", 168 | "answer": "no" 169 | }, 170 | { 171 | "id": -223050641706935078, 172 | "group": "reachable_atom_bool", 173 | "context": "A set of robots use different colors to paint patterns in floor tiles. The robots can move around the floor tiles in four directions (up, down, left and right). Robots paint with one color at a time, but can change their spray guns to any available color. However, robots can only paint the tile that is in front (up) and behind (down) them, and once a tile has been painted no robot can stand on it. Robots need to paint a grid with black and white, where the cell color is alternated always. There are 12 tiles and 2 robots. The tiles locations are: tile_5 is to the right of tile_4, tile_12 is to the right of tile_11, tile_2 is to the right of tile_1, tile_11 is to the right of tile_10, tile_3 is to the right of tile_2, tile_9 is to the right of tile_8, tile_8 is to the right of tile_7, and tile_6 is to the right of tile_5. Further, tile_6 is down from tile_9, tile_3 is down from tile_6, tile_1 is down from tile_4, tile_5 is down from tile_8, tile_7 is down from tile_10, tile_9 is down from tile_12, tile_4 is down from tile_7, tile_2 is down from tile_5, and tile_8 is down from tile_11 Currently, robot robot2 is at tile_1 and holding color white and robot robot1 is at tile_4 and holding color black; tile_6, tile_2, tile_3, tile_5, and tile_8 are clear; tile_11 is painted black, tile_9 is painted black, tile_12 is painted white, tile_7 is painted black, and tile_10 is painted white.", 174 | "question": "Is it possible to transition to a state where the following holds: Robot robot1 is holding white paint?", 175 | "answer": "yes" 176 | }, 177 | { 178 | "id": 4266263960413465702, 179 | "group": "reachable_atom_bool", 180 | "context": "This is a grippers domain, where there is a robot with two grippers. The robot can carry a ball in each. The goal is to take the balls from one room to another. There are 1 robot, 2 rooms, and 4 balls, numbered consecutively. Currently, the robot robot1 is at room1, left gripper is carrying the ball ball1, and right gripper is carrying the ball ball3. Additionally, ball4 is at room2, ball2 is at room1.", 181 | "question": "Is it possible to transition to a state where the following holds: Ball ball3 is in room room2 and Robot robot1 is in room room2?", 182 | "answer": "yes" 183 | }, 184 | { 185 | "id": -3431730014064061534, 186 | "group": "reachable_atom_bool", 187 | "context": "This is a grippers domain, where there is a robot with two grippers. The robot can carry a ball in each. The goal is to take the balls from one room to another. There are 1 robot, 3 rooms, and 4 balls, numbered consecutively. Currently, the robot robot1 is at room2, left gripper is carrying the ball ball2, and right gripper is carrying the ball ball1. Additionally, ball3 is at room2, ball4 is at room3.", 188 | "question": "Is it possible to transition to a state where the following holds: Ball right1 is at room3 location?", 189 | "answer": "no" 190 | }, 191 | { 192 | "id": 1115746769585407893, 193 | "group": "reachable_atom_bool", 194 | "context": "This is a grippers domain, where there is a robot with two grippers. The robot can carry a ball in each. The goal is to take the balls from one room to another. There are 1 robot, 7 rooms, and 4 balls, numbered consecutively. Currently, the robot robot1 is at room5, left gripper is free, and right gripper is carrying the ball ball2. Additionally, ball4 is at room6, ball1 and ball3 are at room5.", 195 | "question": "Is it possible to transition to a state where the following holds: Ball room4 is at room5 location?", 196 | "answer": "no" 197 | }, 198 | { 199 | "id": 7773717068495872850, 200 | "group": "reachable_atom_bool", 201 | "context": "This is a grippers domain, where there is a robot with two grippers. The robot can carry a ball in each. The goal is to take the balls from one room to another. There are 1 robot, 3 rooms, and 4 balls, numbered consecutively. Currently, the robot robot1 is at room3, right gripper is free, and left gripper is carrying the ball ball2. Additionally, ball3 is at room2, ball4 and ball1 are at room3.", 202 | "question": "Is it possible to transition to a state where the following holds: Ball ball2 is at room3 location and Ball ball2 is in room room2?", 203 | "answer": "no" 204 | }, 205 | { 206 | "id": -1611079026423119195, 207 | "group": "reachable_atom_bool", 208 | "context": "This is a grippers domain, where there is a robot with two grippers. The robot can carry a ball in each. The goal is to take the balls from one room to another. There are 1 robot, 3 rooms, and 4 balls, numbered consecutively. Currently, the robot robot1 is at room2, right gripper is free, and left gripper is carrying the ball ball1. Additionally, ball4 is at room2, ball2 is at room1, ball3 is at room3.", 209 | "question": "Is it possible to transition to a state where the following holds: Robot robot1 is in room room3?", 210 | "answer": "yes" 211 | }, 212 | { 213 | "id": 4802916355221978023, 214 | "group": "reachable_atom_bool", 215 | "context": "This is a Rovers domain where rovers must navigate between waypoints gathering data and transmitting it back to a lander. Rovers cannot navigate to all waypoints and this makes particular routes impassable to some of the rovers. Data transmission is also constrained by the visibility of the lander from the waypoints. There are 2 rovers, 3 waypoints, 2 stores, 2 cameras, 3 objectives numbered consecutively. Further, there is 1 lander and 3 modes for the camera namely colour, high resolution, and low resolution. Rover(s) rover0 is equipped for soil analysis. Rover(s) rover1 and rover0 are equipped for rock analysis. Rover(s) rover1 and rover0 are equipped for imaging. Rover rover1 has store store1. Rover rover0 has store store0. Rover rover1 has camera0 on board. Rover rover0 has camera1 on board. Camera camera0 can be calibrated on objective2. Camera camera1 can be calibrated on objective1. Camera camera0 supports low_res. Camera camera1 supports low_res and high_res. Rover rover0 can traverse from waypoint2 to waypoint1, waypoint2 to waypoint0, waypoint0 to waypoint2, waypoint1 to waypoint2. Rover rover1 can traverse from waypoint1 to waypoint2, waypoint2 to waypoint1, waypoint0 to waypoint2, waypoint2 to waypoint0. Waypoint(s) are visible from waypoint1: waypoint2 and waypoint0. Waypoint(s) are visible from waypoint0: waypoint2 and waypoint1. Waypoint(s) are visible from waypoint2: waypoint0 and waypoint1. Objective objective0 is visible from waypoint0 and waypoint1. Objective objective1 is visible from waypoint1. Objective objective2 is visible from waypoint1. Lander general is at waypoint waypoint1. Currently, Rover rover0 is at waypoint2. Rover rover1 is at waypoint2. Rocks can be sampled at the following location(s): waypoint0. Soil can be sampled at the following location(s): waypoint0. Rovers rover0 and rover1 are available. Soil data was communicated from waypoint waypoint2; Rover rover0 has soil analyzed in waypoint waypoint2. Rover rover1 has rock analyzed in waypoint waypoint2. Rover rover0 has image objective1 in mode low_res. Rover rover0 has image objective0 in mode low_res. Rover rover0 has image objective1 in mode high_res. Store(s) store1 and store0 are full. ", 216 | "question": "Is it possible to transition to a state where the following holds: Rover rover0 is at waypoint0 and Rover rover1 is at waypoint0?", 217 | "answer": "yes" 218 | }, 219 | { 220 | "id": -7669604320220958070, 221 | "group": "reachable_atom_bool", 222 | "context": "This is a Rovers domain where rovers must navigate between waypoints gathering data and transmitting it back to a lander. Rovers cannot navigate to all waypoints and this makes particular routes impassable to some of the rovers. Data transmission is also constrained by the visibility of the lander from the waypoints. There are 2 rovers, 3 waypoints, 2 stores, 3 cameras, 3 objectives numbered consecutively. Further, there is 1 lander and 3 modes for the camera namely colour, high resolution, and low resolution. Rover(s) rover1 and rover0 are equipped for soil analysis. Rover(s) rover0 is equipped for rock analysis. Rover(s) rover1 and rover0 are equipped for imaging. Rover rover1 has store store1. Rover rover0 has store store0. Rover rover0 has camera2 on board. Rover rover1 has camera1 and camera0 on board. Camera camera2 can be calibrated on objective2. Camera camera1 can be calibrated on objective0. Camera camera0 can be calibrated on objective0. Camera camera0 supports low_res and colour. Camera camera2 supports high_res. Camera camera1 supports high_res. Rover rover0 can traverse from waypoint1 to waypoint0, waypoint2 to waypoint1, waypoint0 to waypoint1, waypoint1 to waypoint2. Rover rover1 can traverse from waypoint1 to waypoint0, waypoint1 to waypoint2, waypoint2 to waypoint1, waypoint0 to waypoint1. Waypoint(s) are visible from waypoint1: waypoint2 and waypoint0. Waypoint(s) are visible from waypoint0: waypoint2 and waypoint1. Waypoint(s) are visible from waypoint2: waypoint0 and waypoint1. Objective objective1 is visible from waypoint0 and waypoint1. Objective objective2 is visible from waypoint0. Objective objective0 is visible from waypoint2. Lander general is at waypoint waypoint1. Currently, Rover rover0 is at waypoint0. Rover rover1 is at waypoint0. Rocks can be sampled at the following location(s): waypoint0. Soil can be sampled at the following location(s): waypoint0. Rovers rover0 and rover1 are available. Image objective1 was communicated in mode high_res. Rover rover1 has image objective1 in mode low_res. Rover rover1 has image objective1 in mode high_res. Store(s) store0 and store1 are empty. ", 223 | "question": "Is it possible to transition to a state where the following holds: Rover rover1 is at waypoint1 and Rover rover1 has soil analyzed in waypoint waypoint0?", 224 | "answer": "yes" 225 | }, 226 | { 227 | "id": -4030372106350146955, 228 | "group": "reachable_atom_bool", 229 | "context": "This is a Rovers domain where rovers must navigate between waypoints gathering data and transmitting it back to a lander. Rovers cannot navigate to all waypoints and this makes particular routes impassable to some of the rovers. Data transmission is also constrained by the visibility of the lander from the waypoints. There are 2 rovers, 3 waypoints, 2 stores, 3 cameras, 5 objectives numbered consecutively. Further, there is 1 lander and 3 modes for the camera namely colour, high resolution, and low resolution. Rover(s) rover1 and rover0 are equipped for soil analysis. Rover(s) rover0 is equipped for rock analysis. Rover(s) rover1 and rover0 are equipped for imaging. Rover rover1 has store store1. Rover rover0 has store store0. Rover rover0 has camera2 on board. Rover rover1 has camera1 and camera0 on board. Camera camera2 can be calibrated on objective4. Camera camera1 can be calibrated on objective0. Camera camera0 can be calibrated on objective0. Camera camera0 supports low_res and colour. Camera camera2 supports high_res. Camera camera1 supports high_res. Rover rover0 can traverse from waypoint1 to waypoint0, waypoint2 to waypoint1, waypoint0 to waypoint1, waypoint1 to waypoint2. Rover rover1 can traverse from waypoint1 to waypoint0, waypoint1 to waypoint2, waypoint2 to waypoint1, waypoint0 to waypoint1. Waypoint(s) are visible from waypoint1: waypoint2 and waypoint0. Waypoint(s) are visible from waypoint0: waypoint2 and waypoint1. Waypoint(s) are visible from waypoint2: waypoint0 and waypoint1. Objective objective1 is visible from waypoint0 and waypoint1. Objective objective3 is visible from waypoint0. Objective objective2 is visible from waypoint0. Objective objective4 is visible from waypoint1. Objective objective0 is visible from waypoint2. Lander general is at waypoint waypoint1. Currently, Rover rover0 is at waypoint1. Rover rover1 is at waypoint1. Rocks can be sampled at the following location(s): waypoint2. Soil can be sampled at the following location(s): waypoint2 and waypoint0. Rovers rover0 and rover1 are available. Rock data was communicated from waypoint waypoint1; Image objective0 was communicated in mode high_res. Image objective2 was communicated in mode high_res. Rover rover0 has rock analyzed in waypoint waypoint1. Rover rover0 has image objective0 in mode high_res. Rover rover0 has image objective2 in mode high_res. Rover rover0 has its camera camera2 calibrated. Store(s) store0 and store1 are empty. ", 230 | "question": "Is it possible to transition to a state where the following holds: Rocks can be sampled at the following location(s): waypoint0 and Channel general is free?", 231 | "answer": "no" 232 | }, 233 | { 234 | "id": 6156317977928621649, 235 | "group": "reachable_atom_bool", 236 | "context": "This is a Rovers domain where rovers must navigate between waypoints gathering data and transmitting it back to a lander. Rovers cannot navigate to all waypoints and this makes particular routes impassable to some of the rovers. Data transmission is also constrained by the visibility of the lander from the waypoints. There are 2 rovers, 3 waypoints, 2 stores, 3 cameras, 3 objectives numbered consecutively. Further, there is 1 lander and 3 modes for the camera namely colour, high resolution, and low resolution. Rover(s) rover1 and rover0 are equipped for soil analysis. Rover(s) rover0 is equipped for rock analysis. Rover(s) rover1 and rover0 are equipped for imaging. Rover rover1 has store store1. Rover rover0 has store store0. Rover rover0 has camera2 on board. Rover rover1 has camera1 and camera0 on board. Camera camera2 can be calibrated on objective2. Camera camera1 can be calibrated on objective0. Camera camera0 can be calibrated on objective0. Camera camera0 supports low_res and colour. Camera camera2 supports high_res. Camera camera1 supports high_res. Rover rover0 can traverse from waypoint1 to waypoint0, waypoint2 to waypoint1, waypoint0 to waypoint1, waypoint1 to waypoint2. Rover rover1 can traverse from waypoint1 to waypoint0, waypoint1 to waypoint2, waypoint2 to waypoint1, waypoint0 to waypoint1. Waypoint(s) are visible from waypoint1: waypoint2 and waypoint0. Waypoint(s) are visible from waypoint0: waypoint2 and waypoint1. Waypoint(s) are visible from waypoint2: waypoint0 and waypoint1. Objective objective1 is visible from waypoint0 and waypoint1. Objective objective2 is visible from waypoint0. Objective objective0 is visible from waypoint2. Lander general is at waypoint waypoint1. Currently, Rover rover0 is at waypoint0. Rover rover1 is at waypoint1. Rocks can be sampled at the following location(s): waypoint0. Rovers rover0 and rover1 are available. Rover rover1 has soil analyzed in waypoint waypoint0. Rover rover1 has image objective1 in mode low_res. Store(s) store0 is empty. Store(s) store1 is full. ", 237 | "question": "Is it possible to transition to a state where the following holds: Rover rover0 is at waypoint1 and Rover rover1 is at waypoint2?", 238 | "answer": "yes" 239 | }, 240 | { 241 | "id": -8403308311277403202, 242 | "group": "reachable_atom_bool", 243 | "context": "This is a Rovers domain where rovers must navigate between waypoints gathering data and transmitting it back to a lander. Rovers cannot navigate to all waypoints and this makes particular routes impassable to some of the rovers. Data transmission is also constrained by the visibility of the lander from the waypoints. There are 2 rovers, 3 waypoints, 2 stores, 3 cameras, 2 objectives numbered consecutively. Further, there is 1 lander and 3 modes for the camera namely colour, high resolution, and low resolution. Rover(s) rover1 and rover0 are equipped for soil analysis. Rover(s) rover0 is equipped for rock analysis. Rover(s) rover1 and rover0 are equipped for imaging. Rover rover1 has store store1. Rover rover0 has store store0. Rover rover0 has camera2 on board. Rover rover1 has camera1 and camera0 on board. Camera camera1 can be calibrated on objective0. Camera camera2 can be calibrated on objective1. Camera camera0 can be calibrated on objective0. Camera camera0 supports low_res and colour. Camera camera2 supports high_res. Camera camera1 supports high_res. Rover rover0 can traverse from waypoint1 to waypoint0, waypoint2 to waypoint1, waypoint0 to waypoint1, waypoint1 to waypoint2. Rover rover1 can traverse from waypoint1 to waypoint0, waypoint1 to waypoint2, waypoint2 to waypoint1, waypoint0 to waypoint1. Waypoint(s) are visible from waypoint1: waypoint2 and waypoint0. Waypoint(s) are visible from waypoint0: waypoint2 and waypoint1. Waypoint(s) are visible from waypoint2: waypoint0 and waypoint1. Objective objective1 is visible from waypoint0 and waypoint1. Objective objective0 is visible from waypoint2. Lander general is at waypoint waypoint1. Currently, Rover rover0 is at waypoint2. Rover rover1 is at waypoint1. Rocks can be sampled at the following location(s): waypoint0 and waypoint2. Soil can be sampled at the following location(s): waypoint0 and waypoint1. Rovers rover0 and rover1 are available. Image objective0 was communicated in mode high_res. Rover rover0 has image objective0 in mode high_res. Store(s) store0 and store1 are empty. ", 244 | "question": "Is it possible to transition to a state where the following holds: Rocks can be sampled at the following location(s): waypoint1 and Image objective0 was communicated in mode high_res?", 245 | "answer": "no" 246 | }, 247 | { 248 | "id": -2600541151178733482, 249 | "group": "reachable_atom_bool", 250 | "context": "This is a visitall domain where a robot in a grid must visit all the cells or places in the grid. There are some unavailable places in the grid. The grid size is 4x4, and the location cell names are of the form loc-xi-yj (e.g., loc-x0-y2 or loc-x1-y1). The grid cells are connected to their available neighbors. The unavailable cells are loc-x0-y0, loc-x0-y3, and loc-x3-y3. Currently, the robot is in place loc-x0-y1.The following places have been visited: loc-x1-y0, loc-x2-y2, loc-x3-y0, loc-x0-y1, loc-x1-y1, loc-x2-y1, loc-x2-y0, loc-x3-y2, and loc-x3-y1.", 251 | "question": "Is it possible to transition to a state where the following holds: the robot is in place loc-x2-y1?", 252 | "answer": "yes" 253 | }, 254 | { 255 | "id": 4739079759859289365, 256 | "group": "reachable_atom_bool", 257 | "context": "This is a visitall domain where a robot in a grid must visit all the cells or places in the grid. There are some unavailable places in the grid. The grid size is 4x4, and the location cell names are of the form loc-xi-yj (e.g., loc-x0-y2 or loc-x1-y1). The grid cells are connected to their available neighbors. The unavailable cells are loc-x0-y0 and loc-x0-y3. Currently, the robot is in place loc-x2-y2.The following places have been visited: loc-x1-y0, loc-x2-y2, loc-x3-y0, loc-x3-y3, loc-x1-y1, loc-x2-y1, loc-x2-y0, loc-x3-y2, and loc-x3-y1.", 258 | "question": "Is it possible to transition to a state where the following holds: the robot is in place loc-x2-y3 and the robot is in place loc-x3-y3?", 259 | "answer": "no" 260 | }, 261 | { 262 | "id": -2984151980683011083, 263 | "group": "reachable_atom_bool", 264 | "context": "This is a visitall domain where a robot in a grid must visit all the cells or places in the grid. There are some unavailable places in the grid. The grid size is 4x4, and the location cell names are of the form loc-xi-yj (e.g., loc-x0-y2 or loc-x1-y1). The grid cells are connected to their available neighbors. There are no unavailable cells. Currently, the robot is in place loc-x3-y3.The following places have been visited: loc-x2-y2, loc-x0-y2, loc-x1-y3, loc-x0-y1, loc-x3-y3, loc-x1-y1, loc-x2-y1, loc-x0-y3, loc-x1-y2, and loc-x2-y3.", 265 | "question": "Is it possible to transition to a state where the following holds: the robot is in place loc-x3-y1 and Place loc-x3-y1 has been visited?", 266 | "answer": "yes" 267 | }, 268 | { 269 | "id": 2509410456201452078, 270 | "group": "reachable_atom_bool", 271 | "context": "This is a visitall domain where a robot in a grid must visit all the cells or places in the grid. There are some unavailable places in the grid. The grid size is 4x4, and the location cell names are of the form loc-xi-yj (e.g., loc-x0-y2 or loc-x1-y1). The grid cells are connected to their available neighbors. The unavailable cells are loc-x0-y0, loc-x0-y3, and loc-x3-y3. Currently, the robot is in place loc-x1-y3.The following places have been visited: loc-x1-y0, loc-x2-y2, loc-x0-y2, loc-x1-y3, loc-x0-y1, loc-x3-y0, loc-x1-y1, loc-x2-y1, loc-x2-y0, loc-x3-y2, loc-x1-y2, and loc-x3-y1.", 272 | "question": "Is it possible to transition to a state where the following holds: the robot is in place loc-x0-y2?", 273 | "answer": "yes" 274 | }, 275 | { 276 | "id": -8185664364293476672, 277 | "group": "reachable_atom_bool", 278 | "context": "This is a visitall domain where a robot in a grid must visit all the cells or places in the grid. There are some unavailable places in the grid. The grid size is 4x4, and the location cell names are of the form loc-xi-yj (e.g., loc-x0-y2 or loc-x1-y1). The grid cells are connected to their available neighbors. The only unavailable cell is loc-x3-y1. Currently, the robot is in place loc-x3-y3.The following places have been visited: loc-x1-y0, loc-x2-y2, loc-x0-y2, loc-x1-y3, loc-x0-y1, loc-x3-y3, loc-x1-y1, loc-x0-y3, loc-x1-y2, loc-x0-y0, loc-x2-y3, and loc-x3-y2.", 279 | "question": "Is it possible to transition to a state where the following holds: the robot is in place loc-x1-y0 and the robot is in place loc-x2-y3?", 280 | "answer": "no" 281 | } 282 | ] -------------------------------------------------------------------------------- /dataset/reach/test.bool.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/reach/test.bool.json.gz -------------------------------------------------------------------------------- /dataset/reach/test.gen.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/reach/test.gen.json.gz -------------------------------------------------------------------------------- /dataset/reach/test.mcq.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/reach/test.mcq.json.gz -------------------------------------------------------------------------------- /dataset/val/test.bool.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/val/test.bool.json.gz -------------------------------------------------------------------------------- /dataset/val/test.gen.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/val/test.gen.json.gz -------------------------------------------------------------------------------- /dataset/val/test.mcq.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IBM/ACPBench/5947a408d1a4999b64ceceaad97e51362b81c4d0/dataset/val/test.mcq.json.gz -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | clingo==5.6.2 2 | tarski==0.8.2 3 | kstar-planner>=1.4.1 4 | forbiditerative>=1.0.13 5 | python-dotenv 6 | tqdm 7 | numpy 8 | python-dateutil 9 | jsonlines 10 | pddl==0.4.2 11 | scipy 12 | 13 | -------------------------------------------------------------------------------- /src/evaluate_gen.py: -------------------------------------------------------------------------------- 1 | import json 2 | import jsonlines 3 | 4 | import glob 5 | import os 6 | import sys 7 | from numpy import mean 8 | from scipy.stats import sem, norm 9 | from evaluators import get_evaluator 10 | from grammar.grammar_parser import ACPGrammarParser 11 | 12 | GRAMMAR_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "grammar","acp_grammar.lark") 13 | 14 | 15 | def remove_garbage(s): 16 | while True: 17 | if s.endswith("."): 18 | s=s[:-1] 19 | elif s.endswith("\n"): 20 | s=s[:-2] 21 | else: 22 | break 23 | return s.rstrip() 24 | 25 | def compare_str(s1, s2): 26 | return remove_garbage(s1).lower() == remove_garbage(s2).lower() 27 | 28 | def compare(l1, l2): 29 | if not isinstance(l1, list): 30 | return compare_str(l1,l2) 31 | if not isinstance(l2, list): 32 | return False 33 | for i, v in enumerate (l1): 34 | if not compare(v, l2[i]): 35 | return False 36 | return True 37 | 38 | def get_type(file): 39 | return "gen" if "_gen_" in file else None 40 | 41 | def check_prog_response(resp): 42 | if "Positive Effects".lower() in resp.lower() and "Negative Effects".lower() in resp.lower(): 43 | if "[" not in resp: 44 | return True 45 | return False 46 | 47 | def get_parsed_answer(resp, task, parser): 48 | if "acp_prog_gen" in task: 49 | # Check for Positive Effects and Negative Effects instead of separation 50 | if check_prog_response(resp): 51 | # replace **Positive Effects** with "[" 52 | # replace **Negative Effects** with "] [" 53 | # append "]" to the end 54 | resp2 = resp.lower() 55 | resp2 = resp2.replace("*","") 56 | resp2 = resp2.replace("positive effects","[") 57 | resp2 = resp2.replace("negative effects","] [") 58 | resp2 = resp2 + "]" 59 | return parser.parse(resp2) 60 | if "acp_just_gen" in task: 61 | # Check for "simplified plan:" 62 | if "simplified plan:" in resp.lower(): 63 | resp2 = resp.lower() 64 | resp2 = resp2.replace("*","") 65 | resp2 = resp2.split("simplified plan:")[1] 66 | return parser.parse(resp2) 67 | return parser.parse(resp) 68 | 69 | def _task_name(file): 70 | return "acp_" + os.path.basename(file).split("_acp_")[1].replace(".jsonl","") 71 | 72 | def get_subtasks(task): 73 | if task == "acp_gen": 74 | return ["acp_just_gen", "acp_areach_gen", "acp_prog_gen", "acp_reach_gen", "acp_val_gen", "acp_land_gen", "acp_app_gen", "acp_nexta_gen"] 75 | if task == "acp_gen_cot": 76 | return ["acp_just_gen_cot", "acp_areach_gen_cot", "acp_prog_gen_cot", "acp_reach_gen_cot", "acp_val_gen_cot", "acp_land_gen_cot", "acp_app_gen_cot", "acp_nexta_gen_cot"] 77 | if task == "acp_gen_cot_2shot": 78 | return ["acp_just_gen_cot_2shot", "acp_areach_gen_cot_2shot", "acp_prog_gen_cot_2shot", "acp_reach_gen_cot_2shot", "acp_val_gen_cot_2shot", "acp_land_gen_cot_2shot", "acp_app_gen_cot_2shot", "acp_nexta_gen_cot_2shot"] 79 | if task == "acp_gen_2shot": 80 | return ["acp_just_gen_2shot", "acp_areach_gen_2shot", "acp_prog_gen_2shot", "acp_reach_gen_2shot", "acp_val_gen_2shot", "acp_land_gen_2shot", "acp_app_gen_2shot", "acp_nexta_gen_2shot"] 81 | print("Task " + task + " is not defined!") 82 | exit(1) 83 | 84 | 85 | def get_keys_from_results_data(data): 86 | # Getting the key (starts with "exact_match_stderr," and "exact_match,") 87 | for k in data.keys(): 88 | if k.startswith("exact_match,") or k.startswith("exact_match_mean_k,"): 89 | yield k 90 | for k in data.keys(): 91 | if k.startswith("exact_match_stderr,") or k.startswith("exact_match_mean_k_stderr,"): 92 | yield k 93 | 94 | 95 | def get_task_name(tasks, task): 96 | matches = [t for t in tasks if t.startswith(task)] 97 | if len(matches) == 1: 98 | return matches[0] 99 | if len(matches) > 1: 100 | return min(matches, key=len) 101 | return None 102 | 103 | def get_grammar_task(task): 104 | task = task.split('_')[1] 105 | # print(task) 106 | if task == 'reach': 107 | return 'act' 108 | elif task == 'prog': 109 | return 'progression_list' 110 | elif task == 'val': 111 | return 'index' 112 | elif task == 'areach': 113 | return 'act' 114 | elif task == 'just': 115 | return 'action_list' 116 | elif task == 'land': 117 | return 'act' 118 | elif task == 'nexta': 119 | return 'action_name' 120 | elif task == 'app': 121 | return 'action_list' 122 | 123 | if __name__ == "__main__": 124 | dir = sys.argv[1] 125 | scores_by_tasks = {} 126 | data = {} 127 | for file in glob.glob(f"{dir}/*.jsonl"): 128 | fname = os.path.basename(file) 129 | print(f"===== Begin =======") 130 | print(f"file: {file}") 131 | assert get_type(fname) == "gen", f'Task type is {get_type(fname)}' 132 | scores = [] 133 | task = _task_name(file) 134 | assert os.path.isfile(GRAMMAR_FILE), f"The grammar file is missing at {GRAMMAR_FILE}" 135 | parser = ACPGrammarParser(GRAMMAR_FILE, get_grammar_task(task)) 136 | 137 | num_parser_errors = 0 138 | with jsonlines.open(file) as f: 139 | results = {} 140 | for line in f.iter(): 141 | group = line["doc"]["group"] 142 | evaluator = get_evaluator(group) 143 | ans = [get_parsed_answer(resp, task, parser) for resp in line["resps"][0]] 144 | if any(elem is None for elem in ans) or any(elem is None for elem in ans[0]): 145 | num_parser_errors += 1 146 | line["filtered_resps"] = "PARSER ERROR" 147 | score = 0 148 | scores.append(0) 149 | results[line['doc']['id']]=0 150 | continue 151 | if isinstance(line["resps"][0], list): 152 | ans = [ans] 153 | if not compare(ans,line["filtered_resps"]): 154 | line["filtered_resps"] = ans 155 | 156 | if isinstance(line["resps"][0], list): 157 | score = evaluator.get_score(ans[0], line["doc"]) 158 | else: 159 | score = evaluator.get_score(ans, line["doc"]) 160 | line["exact_match_mean_k"] = score 161 | scores.append(score) 162 | results[line['doc']['id']]=score 163 | 164 | print(f"The number of parser errors is {num_parser_errors}") 165 | scores_by_tasks[task] = scores 166 | data[task]=results.copy() 167 | # print(data) 168 | # print(task, mean(scores), sem(scores)) 169 | print(f"====== End =====\n") 170 | 171 | print("Computed scores by tasks:") 172 | json_results = {} 173 | json_results["scores"]= {} 174 | for rname in glob.glob(f"{dir}/results*.json"): 175 | res = json.load(open(rname)) 176 | for task, results in res["results"].items(): 177 | if task == 'acp': 178 | continue 179 | if task in ["acp_gen", "acp_gen_cot_2shot", "acp_gen_cot","acp_gen_2shot"]: 180 | scores = [] 181 | for s in get_subtasks(task): 182 | scores.extend(scores_by_tasks[s]) 183 | else: 184 | task_name = get_task_name(scores_by_tasks.keys(), task) 185 | if task_name is None: 186 | continue 187 | scores = scores_by_tasks[task_name] 188 | json_results[task] = data[task_name] 189 | print(task, mean(scores), sem(scores)) 190 | json_results["scores"][task]= {"mean": mean(scores), "std": sem(scores)} 191 | 192 | with open('evaluation_results.json', 'w') as outfile: 193 | json.dump(json_results, outfile, indent=2) 194 | 195 | 196 | -------------------------------------------------------------------------------- /src/evaluation_bool_mcq.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import evaluate as hf_evaluate 4 | 5 | def compute_accuracy(args): 6 | gt_json = json.load(open(args.gt,'r')) 7 | results_json = json.load(open(args.results,'r')) 8 | 9 | assert len(gt_json) == len(results_json), "Length of results and dev json file is not same" 10 | 11 | reference, prediction = [], [] 12 | for sample, response in zip(gt_json,results_json): 13 | assert sample['id'] == response['doc']['id'], "Mismatch in example ids" 14 | assert sample['group'] == response['doc']['group'], "Mismatch in example groups" 15 | reference += response['filtered_resps'] 16 | prediction += sample['answer'] 17 | exact_match = hf_evaluate.load("exact_match") 18 | results = exact_match.compute(references=reference, predictions=prediction) 19 | print(f"Reference: {args.gt}") 20 | print(f"Prediction: {args.results}") 21 | print("Exact Match: ",round(results["exact_match"], 2)) 22 | return results 23 | 24 | 25 | if __name__ == "__main__": 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument('--results', type=str,required=True, 28 | help='Results JSON filepath') 29 | parser.add_argument('--gt', type=str,required=True, 30 | help='Ground truth JSON filepath') 31 | 32 | args = parser.parse_args() -------------------------------------------------------------------------------- /src/evaluators/__init__.py: -------------------------------------------------------------------------------- 1 | from .applicability import ApplicabilityEvaluator 2 | from .progression import ProgressionEvaluator 3 | from .reachability import ReachabilityEvaluator 4 | from .action_reachability import ActionReachabilityEvaluator 5 | from .validation import ValidationEvaluator 6 | from .landmarks import LandmarksEvaluator 7 | from .next_action import NextActionEvaluator 8 | from .justification import JustificationEvaluator 9 | 10 | 11 | def get_evaluator(group): 12 | if group == "applicable_actions_gen": 13 | return ApplicabilityEvaluator() 14 | elif group == "progression_gen": 15 | return ProgressionEvaluator() 16 | elif group == "validation_gen": 17 | return ValidationEvaluator() 18 | elif group == "reachable_atom_gen": 19 | return ReachabilityEvaluator() 20 | elif group == "goal_closer_gen": 21 | return NextActionEvaluator() 22 | elif group == "action_justification_gen": 23 | return JustificationEvaluator() 24 | elif group == "landmarks_gen": 25 | return LandmarksEvaluator() 26 | elif group == "reachable_action_gen": 27 | return ActionReachabilityEvaluator() 28 | assert True, f"Group {group} not found" 29 | -------------------------------------------------------------------------------- /src/evaluators/action_reachability.py: -------------------------------------------------------------------------------- 1 | from evaluators.base import BaseEvaluator 2 | 3 | from utils.tarski_utils import get_action_preconditions 4 | from utils.pddl_utils import is_unsolvable_new_goal 5 | 6 | 7 | class ActionReachabilityEvaluator(BaseEvaluator): 8 | def get_score(self, ans, doc): 9 | 10 | real_answer = doc["answer"] 11 | if not real_answer or len(real_answer) == 0: 12 | # None 13 | self.add_scores(["none" == x.strip().lower() for x in ans]) 14 | else: 15 | for x in ans: 16 | action = x.strip().lower() 17 | if action in real_answer: 18 | self.scores.append(True) 19 | continue 20 | prec = get_action_preconditions(doc["PDDL_domain"].lower(), doc["PDDL_problem"].lower(), action) 21 | if prec is None: 22 | self.scores.append(False) 23 | else: 24 | # Need to run a planner 25 | prec = f'(and {" ".join(prec)})' 26 | self.scores.append(is_unsolvable_new_goal(doc["PDDL_domain"].lower(), doc["PDDL_problem"].lower(), prec)) 27 | 28 | return self.get_avg_score() 29 | -------------------------------------------------------------------------------- /src/evaluators/applicability.py: -------------------------------------------------------------------------------- 1 | from evaluators.base import BaseEvaluator, cleanup_answer, fix_action_name, set_equal, skewed_jaccard_similarity 2 | 3 | 4 | class ApplicabilityEvaluator(BaseEvaluator): 5 | def get_score(self, ans, doc): 6 | real_answer = doc["answer"] 7 | real_answer = [a.lower() for a in real_answer] 8 | ans = [[fix_action_name(a) for a in x] for x in ans] 9 | 10 | # Skewed scores (not the final score) 11 | scores = [skewed_jaccard_similarity(real_answer, cleanup_answer(x)) for x in ans] 12 | avg = sum(scores)/len(scores) 13 | if avg > 0.0: 14 | print(f"Skewed score: {avg}") 15 | self.add_scores([set_equal(real_answer, cleanup_answer(x)) for x in ans]) 16 | return self.get_avg_score() 17 | 18 | -------------------------------------------------------------------------------- /src/evaluators/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | import os 3 | 4 | def fix_action_name(a): 5 | assert a.startswith("(") and a.endswith(")") 6 | return "(" + " ".join([x.strip() for x in a[1:-1].split(" ") if len(x) > 0]) + ")" 7 | 8 | def str_remove_before_first_parentheses(s): 9 | if s.startswith("("): 10 | return s 11 | try: 12 | return s[s.index("("):] 13 | except: 14 | return "" 15 | 16 | def str_remove_after_last_parentheses(s): 17 | if s.endswith(")"): 18 | return s 19 | 20 | i = s.rfind(")") 21 | 22 | if i == -1: 23 | return "" 24 | return s[:i+1] 25 | 26 | def cleanup_answer(ans): 27 | if isinstance(ans, str): 28 | ans = str_remove_before_first_parentheses(ans) 29 | ans = str_remove_after_last_parentheses(ans) 30 | ans = ans.lower() 31 | ans = ans.replace(")\n(", ")######(").replace("),(", ")######(").replace(") (", ")######(").split("######") 32 | return ans 33 | if isinstance(ans, list): 34 | res = [] 35 | for x in ans: 36 | res.extend(cleanup_answer(x)) 37 | return res 38 | 39 | def set_equal(ans1, ans2): 40 | return set(ans1) == set(ans2) 41 | 42 | def jaccard_similarity(ans1, ans2): 43 | s1, s2 = set(ans1), set(ans2) 44 | # assert (len(s2) == len(ans2)) 45 | return float(len(s1 & s2)) / len(s1 | s2) 46 | 47 | 48 | def skewed_jaccard_similarity(ans1, ans2): 49 | # Non-symmetric - ans1 is assumed to be the correct one 50 | # If ans2 - ans1 not empty, return 0 (hallucination) 51 | # Otherwise, return Jaccard. 52 | s1, s2 = set(ans1), set(ans2) 53 | if len(s2-s1) > 0: 54 | return 0.0 55 | return float(len(s1 & s2)) / len(s1 | s2) 56 | 57 | class BaseEvaluator(ABC): 58 | def __init__(self) -> None: 59 | self.scores = [] 60 | 61 | @abstractmethod 62 | def get_score(self, ans, doc): 63 | pass 64 | 65 | def add_scores(self, scores): 66 | self.scores.extend(scores) 67 | 68 | def get_avg_score(self): 69 | avg_score = sum(self.scores)/len(self.scores) 70 | return avg_score -------------------------------------------------------------------------------- /src/evaluators/justification.py: -------------------------------------------------------------------------------- 1 | from evaluators.base import BaseEvaluator, fix_action_name 2 | from utils.tarski_utils import is_plan 3 | 4 | def is_subsequence(plan, new_plan): 5 | i = 0 6 | for a in plan: 7 | if a == new_plan[i]: 8 | i+=1 9 | if len(new_plan) == i: 10 | # Done 11 | return True 12 | return False 13 | 14 | def is_subsequence_and_plan(domain, problem, plan, new_plan): 15 | if len(plan) <= len(new_plan): 16 | return False 17 | if not is_subsequence(plan, new_plan): 18 | return False 19 | return is_plan(domain, problem, new_plan) 20 | 21 | 22 | class JustificationEvaluator(BaseEvaluator): 23 | def get_score(self, ans, doc): 24 | seq = doc["question"][19:-147] 25 | seq = seq.replace(") (", ")######(").split("######") 26 | for x in ans: 27 | x = [fix_action_name(a) for a in x] 28 | if len(x) == 0: 29 | # Wrong answer - never an empty sequence 30 | self.scores.append(0) 31 | continue 32 | self.scores.append(is_subsequence_and_plan(doc["PDDL_domain"].lower(), doc["PDDL_problem"].lower(), seq, x)) 33 | return self.get_avg_score() 34 | -------------------------------------------------------------------------------- /src/evaluators/landmarks.py: -------------------------------------------------------------------------------- 1 | from evaluators.base import BaseEvaluator 2 | 3 | class LandmarksEvaluator(BaseEvaluator): 4 | def get_score(self, ans, doc): 5 | # Updated version: the questions are generated only for cases where all atoms are either 6 | # in state, goal, landmarks, or non-landmarks sets 7 | real_answer = doc["answer"] 8 | real_answer_yes = [a.lower() for a in real_answer["yes"]] 9 | # real_answer_no = [a.lower() for a in real_answer["no"]] 10 | 11 | if "(dummy val1)" in real_answer_yes: 12 | return 0 13 | 14 | for x in ans: 15 | if x.strip().lower() in real_answer_yes: 16 | self.scores.append(True) 17 | elif x.strip().lower() == "none": 18 | self.scores.append(len(real_answer_yes) == 0) 19 | else: 20 | self.scores.append(False) 21 | 22 | return self.get_avg_score() 23 | -------------------------------------------------------------------------------- /src/evaluators/next_action.py: -------------------------------------------------------------------------------- 1 | from evaluators.base import BaseEvaluator 2 | from utils.tarski_utils import is_on_optimal_plan 3 | 4 | 5 | class NextActionEvaluator(BaseEvaluator): 6 | def get_score(self, ans, doc): 7 | 8 | real_answer = doc["answer"] 9 | real_answer_yes = [a.lower() for a in real_answer["yes"]] 10 | real_answer_no = [a.lower() for a in real_answer["no"]] 11 | real_answer_maybe = [a.lower() for a in real_answer["maybe"]] 12 | opt = real_answer.get("opt", None) 13 | for x in ans: 14 | if x.strip().lower() in real_answer_yes: 15 | self.scores.append(True) 16 | elif x.strip().lower() in real_answer_no: 17 | # applicable, not towards goal 18 | self.scores.append(False) 19 | elif x.strip().lower() not in real_answer_maybe: 20 | # Not applicable 21 | self.scores.append(False) 22 | else: 23 | # Need to run a planner 24 | action = x.strip().lower() 25 | self.scores.append(is_on_optimal_plan(doc["PDDL_domain"].lower(), doc["PDDL_problem"].lower(), action, opt)) 26 | 27 | return self.get_avg_score() 28 | -------------------------------------------------------------------------------- /src/evaluators/progression.py: -------------------------------------------------------------------------------- 1 | from evaluators.base import BaseEvaluator, cleanup_answer, set_equal 2 | 3 | 4 | class ProgressionEvaluator(BaseEvaluator): 5 | def get_score(self, ans, doc): 6 | real_answer = doc["answer"] 7 | real_answer_pos = [a.lower() for a in real_answer["pos"]] 8 | real_answer_neg = [a.lower() for a in real_answer["neg"]] 9 | 10 | for x in ans: 11 | if len(x) > 2 or len(x) < 1: 12 | self.scores.append(False) 13 | else: 14 | p = cleanup_answer(x[0]) 15 | if len(x) == 2: 16 | n = cleanup_answer(x[1]) 17 | else: 18 | # Assuming the last is dropped because it is empty 19 | n = [] 20 | ans = [set_equal(real_answer_pos, p), set_equal(real_answer_neg, n)] 21 | self.scores.append(all(ans)) 22 | 23 | return self.get_avg_score() 24 | -------------------------------------------------------------------------------- /src/evaluators/reachability.py: -------------------------------------------------------------------------------- 1 | from evaluators.base import BaseEvaluator 2 | from utils.pddl_utils import is_unsolvable_new_goal 3 | 4 | 5 | class ReachabilityEvaluator(BaseEvaluator): 6 | def get_score(self, ans, doc): 7 | real_answer = doc["answer"] 8 | real_answer = [f'({x.strip().lower()})' for x in real_answer] 9 | 10 | if len(real_answer) == 0: 11 | # None 12 | self.add_scores(["none" == x.strip().lower() for x in ans]) 13 | else: 14 | for x in ans: 15 | if x.strip().lower() in real_answer: 16 | self.scores.append(True) 17 | else: 18 | # Need to run a planner 19 | atom = x.strip().lower() 20 | self.scores.append(is_unsolvable_new_goal(doc["PDDL_domain"].lower(), doc["PDDL_problem"].lower(), atom)) 21 | 22 | return self.get_avg_score() 23 | -------------------------------------------------------------------------------- /src/evaluators/validation.py: -------------------------------------------------------------------------------- 1 | from evaluators.base import BaseEvaluator 2 | 3 | 4 | class ValidationEvaluator(BaseEvaluator): 5 | def get_score(self, ans, doc): 6 | seq_index = int(doc["answer"]) 7 | assert(seq_index>= -1) 8 | 9 | if seq_index == -1: 10 | real_answer = "None" 11 | else: 12 | real_answer = str(seq_index) 13 | 14 | # printing the diff between the predicted index and the real index 15 | if real_answer.isnumeric(): 16 | 17 | scores = [abs(int(real_answer) - int(x.strip().lower())) for x in ans] 18 | avg = sum(scores)/len(scores) 19 | if avg > 0.0: 20 | print(f"Distance from the right index: {avg}") 21 | 22 | self.add_scores([real_answer.lower() == x.strip().lower() for x in ans]) 23 | 24 | return self.get_avg_score() 25 | -------------------------------------------------------------------------------- /src/grammar/acp_grammar.lark: -------------------------------------------------------------------------------- 1 | NAME: /[a-zA-Z][a-zA-Z0-9-_]*/ 2 | LPAR : "(" 3 | RPAR : ")" 4 | LSPAR: "[" 5 | RSPAR: "]" 6 | COMMA: "," 7 | WS: /[ \n]/ 8 | 9 | action_none : "None" 10 | 11 | action_name : LPAR NAME (WS NAME)* RPAR 12 | 13 | action_list : (action_name WS?)* 14 | 15 | prog_list : action_name* (COMMA action_name)* 16 | 17 | progression_list : LSPAR prog_list RSPAR LSPAR prog_list RSPAR 18 | 19 | act : action_name | action_none 20 | 21 | index: /[0-9]+[0-9]*/ 22 | 23 | start: action_list 24 | -------------------------------------------------------------------------------- /src/grammar/grammar_parser.py: -------------------------------------------------------------------------------- 1 | from lark import Lark 2 | from lark.visitors import Visitor 3 | from lark.lexer import Token 4 | 5 | class ACPBench_Visitor(Visitor): 6 | def __init__(self) -> None: 7 | super().__init__() 8 | self.action_lists = None 9 | self.action_names = None 10 | self.progression_lists = None 11 | self.prog_lists = None 12 | self.indexes = None 13 | 14 | def action_list(self, tree): 15 | self.action_lists = [] 16 | 17 | def prog_list(self, tree): 18 | if self.prog_lists is not None: 19 | self.progression_lists.append(self.prog_lists) 20 | self.prog_lists = [] 21 | 22 | def progression_list(self, tree): 23 | self.progression_lists = [] 24 | 25 | def action_none(self, tree): 26 | self.action_names = 'None' 27 | 28 | def action_name(self, tree): 29 | act_name = '(' + ''.join(tree.children[1:-1]) + ')' 30 | self.action_names = act_name 31 | if self.action_lists is not None: 32 | self.action_lists.append(act_name) 33 | if self.prog_lists is not None: 34 | self.prog_lists.append(act_name) 35 | 36 | def index(self, tree): 37 | self.indexes = ''.join(tree.children) 38 | if not self.indexes.isnumeric(): 39 | self.indexes = None 40 | 41 | 42 | class ACPGrammarParser(object): 43 | def __init__(self, grammarfile, task) -> None: 44 | self.task = task 45 | with open(grammarfile) as f: 46 | grammar = f.read() 47 | self.acp_parser = Lark(grammar, start=task, parser='lalr') 48 | 49 | def parse(self, input, debug=False): 50 | 51 | def ignore_errors(e): 52 | if hasattr(e, 'token') and e.token.type == '$END': 53 | for x in e.expected: 54 | if x != 'WS': 55 | e.interactive_parser.feed_token(Token(x, self.acp_parser.get_terminal(x).pattern.value)) 56 | 57 | return True 58 | 59 | 60 | input = input.replace('\n', '') 61 | input = input.strip() 62 | try: 63 | tree = self.acp_parser.parse(input, on_error=ignore_errors) 64 | 65 | if debug: 66 | print(tree) 67 | visitor = ACPBench_Visitor() 68 | visitor.visit_topdown(tree) 69 | if self.task == 'action_list': 70 | return visitor.action_lists 71 | elif self.task == 'act': 72 | return visitor.action_names 73 | elif self.task == 'action_name': 74 | return visitor.action_names 75 | elif self.task == 'index': 76 | return visitor.indexes 77 | elif self.task == 'progression_list': 78 | if visitor.prog_lists not in visitor.progression_lists: 79 | visitor.progression_lists.append(visitor.prog_lists) 80 | return visitor.progression_lists 81 | except Exception as e: 82 | if debug: 83 | print('exception') 84 | print(e) 85 | return None 86 | 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /src/utils/pddl_utils.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | from pathlib import Path 4 | # from forbiditerative import planners as fi 5 | from kstar_planner import planners as kp 6 | 7 | 8 | # Used in (action) reachability 9 | def is_unsolvable_new_goal(domain, problem, new_goal): 10 | goal = extract_goal(problem) 11 | new_problem = problem.replace(goal, f"(:goal {new_goal} )") 12 | return is_unsolvable(domain, new_problem) 13 | 14 | def is_unsolvable(domain, problem): 15 | with tempfile.NamedTemporaryFile() as domain_temp, \ 16 | tempfile.NamedTemporaryFile() as problem_temp: 17 | with open(str(domain_temp.name), 'w', encoding='utf8') as file: 18 | file.write(str(domain)) 19 | with open(str(problem_temp.name), 'w', encoding='utf8') as file: 20 | file.write(str(problem)) 21 | 22 | # plans = fi.plan_diverse_agl(domain_file=Path(str(domain_temp.name)), problem_file=Path(str(problem_temp.name)), number_of_plans_bound=1, timeout=3) 23 | plans = kp.plan_unordered_topq(domain_file=Path(str(domain_temp.name)), problem_file=Path(str(problem_temp.name)), quality_bound =1.0, number_of_plans_bound=1, timeout=3) 24 | 25 | if len(plans["planner_error"]) > 0: 26 | fl = plans["planner_error"].split("\n")[0] 27 | print(f'Planner error: {fl}') 28 | # print(plans["planner_error"]) 29 | return False 30 | if plans is None or len(plans['plans']) == 0: 31 | # print(plans) 32 | return plans["unsolvable"] 33 | return False 34 | 35 | def extract_goal(prob): 36 | 37 | a = prob.split("(:goal")[1] 38 | cp = 1 39 | for i, c in enumerate(a): 40 | if c == ")": 41 | cp -= 1 42 | if c == "(": 43 | cp += 1 44 | if cp == 0: 45 | return "(:goal" + a[:i+1] 46 | 47 | assert (False) 48 | -------------------------------------------------------------------------------- /src/utils/tarski_utils.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | from . import tarskilite as tl 3 | 4 | from pathlib import Path 5 | # from forbiditerative import planners as fi 6 | from kstar_planner import planners as kp 7 | 8 | 9 | def get_tarski_problem(domain, problem): 10 | with tempfile.NamedTemporaryFile() as domain_temp, \ 11 | tempfile.NamedTemporaryFile() as problem_temp: 12 | 13 | with open(str(domain_temp.name), 'w', encoding='utf8') as file: 14 | file.write(domain.lower()) 15 | with open(str(problem_temp.name), 'w', encoding='utf8') as file: 16 | file.write(problem.lower()) 17 | 18 | try: 19 | P = tl.STRIPS(str(domain_temp.name), str(problem_temp.name)) 20 | return P 21 | except Exception as e: 22 | print(f"||{e}||") 23 | return None 24 | 25 | 26 | def create_tmp_dom_prob_replace_init(P, state, result_domain_file, result_problem_file): 27 | d, p = P.PDDL_replace_init_pddl_parser(state) 28 | with open(str(result_domain_file.name), 'w', encoding='utf8') as file: 29 | file.write(str(d)) 30 | with open(str(result_problem_file.name), 'w', encoding='utf8') as file: 31 | file.write(str(p)) 32 | 33 | return d, p 34 | 35 | 36 | 37 | 38 | # Used in next action 39 | def is_on_optimal_plan(domain, problem, action, opt): 40 | with tempfile.NamedTemporaryFile() as domain_temp, \ 41 | tempfile.NamedTemporaryFile() as problem_temp: 42 | 43 | with open(str(domain_temp.name), 'w', encoding='utf8') as file: 44 | file.write(domain.lower()) 45 | with open(str(problem_temp.name), 'w', encoding='utf8') as file: 46 | file.write(problem.lower()) 47 | 48 | # Here, we need to keep the temp files live until the end of the function 49 | try: 50 | P = tl.STRIPS(str(domain_temp.name), str(problem_temp.name)) 51 | except Exception as e: 52 | # Unsolvable 53 | return False 54 | 55 | a = P.get_action_or_none(action[1:-1]) 56 | if a is None: 57 | return False 58 | state = P.init 59 | next_state = tl.progress(state, a) 60 | if opt is None: 61 | # Get an optimal plan cost 62 | plans = generate_optimal_plans_for_problem_state(P, state, num_plans=1, timeout=5) 63 | opt = len(plans[0]["actions"]) 64 | else: 65 | opt = int(opt) 66 | 67 | # Getting an optimal plan for the next state 68 | next_plans = generate_optimal_plans_for_problem_state(P, next_state, num_plans=1, timeout=5) 69 | if next_plans is None: 70 | return False 71 | next_opt = len(next_plans[0]["actions"]) 72 | return next_opt + 1 == opt 73 | 74 | # Used in justification 75 | def is_plan(domain, problem, new_plan): 76 | P = get_tarski_problem(domain, problem) 77 | if P is None: 78 | # Unsolvable 79 | return False 80 | 81 | # Check if new_plan is a plan 82 | current_state = P.init 83 | for action in new_plan: 84 | applicable_actions = P.get_applicable_actions(current_state) 85 | app_actions_list = [f'({a.name.lower()})' for a in applicable_actions] 86 | if action.lower() not in app_actions_list: 87 | return False 88 | a = applicable_actions[app_actions_list.index(action.lower())] 89 | current_state = tl.progress(current_state, a) 90 | return tl.entails(current_state, P.goal) 91 | 92 | # Used in action reachability 93 | def get_action_preconditions(domain, problem, action): 94 | P = get_tarski_problem(domain, problem) 95 | 96 | assert P is not None, f"Domain\n{domain}\nProblem\n{problem}\nAction: {action}" 97 | a = P.get_action_or_none(action[1:-1]) 98 | if a is None: 99 | return a 100 | 101 | return [f'({f})' for f in a.pres] 102 | 103 | 104 | def generate_top_q_plans(domain, problem, num_plans=10, quality_bound=1.0, timeout=30): 105 | # print("Running K* planner") 106 | plans = kp.plan_unordered_topq(domain_file=Path(domain), problem_file=Path(problem), number_of_plans_bound=num_plans, quality_bound=quality_bound, timeout=timeout) 107 | return plans 108 | 109 | def generate_optimal_plans_for_problem_state(P, state, num_plans, timeout): 110 | import tempfile 111 | with tempfile.NamedTemporaryFile() as domain_temp, \ 112 | tempfile.NamedTemporaryFile() as problem_temp: 113 | 114 | create_tmp_dom_prob_replace_init(P, state, domain_temp, problem_temp) 115 | plans = generate_top_q_plans(domain=str(domain_temp.name), problem=str(problem_temp.name), num_plans=num_plans, quality_bound=1.0, timeout=timeout) 116 | # print(plans) 117 | if plans is None or len(plans['plans']) == 0: 118 | return None 119 | return plans['plans'] -------------------------------------------------------------------------------- /src/utils/tarskilite.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | from collections import defaultdict 4 | 5 | import tarski 6 | from tarski.io import PDDLReader 7 | from tarski.io import fstrips as iofs 8 | from tarski.syntax.formulas import is_atom 9 | from tarski.syntax.transform.action_grounding import ground_schema_into_plain_operator_from_grounding 10 | from tarski.grounding.common import StateVariableLite 11 | from tarski.grounding.lp_grounding import LPGroundingStrategy 12 | from tarski.util import SymbolIndex 13 | 14 | from pddl.core import Problem 15 | from pddl.parser.domain import DomainParser 16 | from pddl.parser.problem import ProblemParser 17 | 18 | def entails(state, partialstate): 19 | return partialstate <= state 20 | 21 | def progress(state, act): 22 | assert entails(state, act.pres), "Cannot progress with inconsistent state / action precondition:\n\t Action: "+act.name+"\n\t State: \n\t\t"+'\n\t\t'.join(state) 23 | return (state - act.dels) | act.adds 24 | 25 | def regress(state, act): 26 | assert len(state & act.dels) == 0, f"Cannot regress with inconsistent state / action delete effect:\n\t Action: "+act.name+"\n\t State: \n\t\t"+'\n\t\t'.join(state) 27 | return (state - act.adds) | act.pres 28 | 29 | 30 | 31 | def fix_name(s): 32 | # (act param) 33 | if '(' == s[0] and ')' == s[-1]: 34 | return s[1:-1] 35 | # make it space separated 36 | s = s.replace(', ', ' ').replace(',', ' ') 37 | # act(param) 38 | if '(' in s: 39 | assert ')' == s[-1], f"Broken name? {s}" 40 | s = s.replace('(', ' ').replace(')', '') 41 | # act param 42 | return s 43 | 44 | 45 | class Action: 46 | def __init__(self, name, pre, add, delete): 47 | self.name = name 48 | self.pres = pre 49 | self.adds = add 50 | self.dels = delete 51 | 52 | def __str__(self): 53 | pres = "{" + ", ".join([f"({a})" for a in self.pres]) + "}" 54 | adds = "{" + ", ".join([f"({a})" for a in self.adds]) + "}" 55 | dels = "{" + ", ".join([f"({a})" for a in self.dels]) + "}" 56 | 57 | return f"< {self.name}, {pres}, {adds}, {dels} >" 58 | 59 | def toJSON(self): 60 | return json.dumps( 61 | { "name": self.name, "preconditions": [f"({a})" for a in self.pres], "add_effects": [f"({a})" for a in self.adds], "delete_effects": [f"({a})" for a in self.dels] }, 62 | sort_keys=True, 63 | indent=4) 64 | 65 | def __repr__(self): 66 | return self.name 67 | 68 | def __eq__(self, action): 69 | return self.name == action.name 70 | 71 | 72 | def __hash__(self): 73 | return hash(self.name) 74 | 75 | class STRIPS: 76 | def __init__(self, domain, problem): 77 | self.domain_file = domain 78 | self.problem_file = problem 79 | self.reader = PDDLReader(raise_on_error=True) 80 | self.reader.parse_domain(domain) 81 | self.problem = self.reader.parse_instance(problem) 82 | (self.grounded_fluents, init, goal, self.operators, self.grounder) = self.ground_problem(self.problem) 83 | 84 | self.fluents = set([fix_name(str(f)) for f in self.grounded_fluents]) 85 | self.fluents_map = dict() 86 | for f in self.grounded_fluents: 87 | self.fluents_map[fix_name(str(f))] = f 88 | self.init = set([fix_name(str(f)) for f in init]) 89 | self.goal = set([fix_name(str(f)) for f in goal]) 90 | self.actions = set() 91 | self.action_map = {} 92 | self.init_fluents = [self.fluents_map[f] for f in self.init] 93 | 94 | self.static_predicates = [ i.name for i in self.grounder.static_symbols] 95 | for op in self.operators: 96 | act = self.operator_to_action(op) 97 | self.actions.add(act) 98 | self.action_map[act.name.lower()] = act 99 | 100 | 101 | def __str__(self): 102 | fluents = "P = {" + ", ".join([f"({a})" for a in self.fluents]) + "}" 103 | init = "I = {" + ", ".join([f"({a})" for a in self.init]) + "}" 104 | goal = "G = {" + ", ".join([f"({a})" for a in self.goal]) + "}" 105 | actions = "A = {" + "\n ".join([a.__str__() for a in self.actions]) + "}" 106 | return fluents + ",\n" + init + "\n" + goal + "\n" + actions 107 | 108 | def toJSON(self): 109 | actions = [a.toJSON() for a in self.actions] 110 | return json.dumps( 111 | { "fluents": list(self.fluents), "initial_state": list(self.init), "goal": list(self.goal), "actions": actions }, 112 | sort_keys=True, 113 | indent=4) 114 | 115 | def operator_to_action(self, op, check_fluents= True, check_static=False): 116 | adds = {fix_name(str(f.atom)) for f in op.effects if isinstance(f, iofs.AddEffect)} & self.fluents 117 | dels = {fix_name(str(f.atom)) for f in op.effects if isinstance(f, iofs.DelEffect)} & self.fluents 118 | pre = self.fix_pre_name(op.precondition) 119 | if check_fluents: 120 | pre = pre & self.fluents 121 | if check_static: 122 | pre = {p for p in pre if p.split()[0] not in self.static_predicates} 123 | act = Action(fix_name(str(op)), pre, adds, dels) 124 | return act 125 | 126 | def fix_pre_name(self, precondition): 127 | if not is_atom(precondition): 128 | return {fix_name(str(f)) for f in precondition.subformulas} 129 | return {fix_name(str(precondition))} 130 | 131 | def action(self, name): 132 | return self.action_map[fix_name(name).lower()] 133 | 134 | def get_action_or_none(self, name): 135 | if '(' in name and ')' != name[-1]: 136 | return None 137 | return self.action_map.get(fix_name(name).lower(), None) 138 | 139 | def fluent(self, name): 140 | return fix_name(name) 141 | 142 | def static_symbols(self): 143 | return list(self.grounder.static_symbols) 144 | 145 | def fluent_symbols(self): 146 | return list(self.grounder.fluent_symbols) 147 | 148 | 149 | def get_grounded_atoms(self, symbol): 150 | variables = SymbolIndex() 151 | lang = symbol.language 152 | key = 'atom_' + symbol.name 153 | model = self.grounder._solve_lp() 154 | if key in model: # in case there is no reachable ground state variable from that fluent symbol 155 | for binding in model[key]: 156 | binding_with_constants = tuple(lang.get(c) for c in binding) 157 | variables.add(StateVariableLite(symbol, binding_with_constants)) 158 | return variables 159 | 160 | def get_applicable_actions(self, s): 161 | return [a for a in self.actions if entails(s, a.pres)] 162 | 163 | 164 | def ground_problem(self, problem): 165 | grounder = LPGroundingStrategy(problem, include_variable_inequalities=True) 166 | action_groundings = grounder.ground_actions() 167 | operators = [] 168 | for action_name, groundings in action_groundings.items(): 169 | action = problem.get_action(action_name) 170 | for grounding in groundings: 171 | # print(type(grounding[0]), grounding) 172 | operators.append(ground_schema_into_plain_operator_from_grounding(action, grounding)) 173 | 174 | grounded_fluents = set([grounded_fluent.to_atom() for grounded_fluent in grounder.ground_state_variables().objects]) 175 | init = [f for f in problem.init.as_atoms() if f in grounded_fluents] 176 | if isinstance(problem.goal, tarski.syntax.Atom): 177 | goal = [problem.goal] 178 | else: 179 | goal = [f for f in problem.goal.subformulas if f in grounded_fluents] 180 | 181 | return (grounded_fluents, init, goal, operators, grounder) 182 | 183 | 184 | def get_static(self): 185 | static_symbols = self.static_symbols() 186 | ret = [] 187 | for symbol in static_symbols: 188 | ret.extend(self.get_grounded_atoms(symbol)) 189 | return set([fix_name(str(x)) for x in ret]) 190 | 191 | def PDDL_replace_init_pddl_parser(self, s): 192 | d = DomainParser()(open(self.domain_file, "r").read().lower()) 193 | p = ProblemParser()(open(self.problem_file, "r").read().lower()) 194 | 195 | new_state = get_atoms_pddl(d, p, s | self.get_static()) 196 | 197 | new_p = Problem(p.name, domain=d, 198 | objects=p.objects, 199 | init=new_state, 200 | goal=p.goal) 201 | 202 | return d, new_p 203 | 204 | 205 | def get_atoms_pddl(d, p, atoms): 206 | # print(atoms) 207 | objs = set() 208 | preds = defaultdict(list) 209 | for atom in atoms: 210 | a = atom.lower().strip().split(" ") 211 | args = a[1:] 212 | # print(f"atom name |{a[0]}|") 213 | preds[a[0]].append(args) 214 | objs |= set(args) 215 | # print(preds) 216 | 217 | constants = [o for o in p.objects | d.constants if o.name.lower() in objs] 218 | constants_dict = {} 219 | for c in constants: 220 | constants_dict[c.name.lower()] = c 221 | assert len(objs) == len(constants), f"Could not identify all objects: {objs - set(constants_dict.keys())} not found, {set(constants_dict.keys()) - objs} should not be there" 222 | 223 | state = [] 224 | covered_preds = set() 225 | for f in d.predicates: 226 | name = f.name.lower() 227 | # print(f"Checking predicate |{name}|") 228 | if name in preds: 229 | covered_preds.add(name) 230 | assert len(preds[name][0]) == f.arity, f"The arity does not match: {preds[name]} vs {f.terms}" 231 | # Going over the lists of objects, adding ground predicate for each 232 | # print(f.name, preds[f.name]) 233 | for ob in preds[name]: 234 | c = [ constants_dict[o] for o in ob] 235 | # print("f: ", f, type(f), [x.type_tags for x in f.terms]) 236 | # print("c: ", c, [type(x) for x in c], [x.type_tags for x in c]) 237 | state.append(f(*c)) 238 | 239 | assert len(covered_preds) == len(preds.keys()), f"Covered predicates: \n{sorted(list(covered_preds))} vs \n{sorted(list(preds.keys()))}" 240 | return set(state) 241 | --------------------------------------------------------------------------------