├── .gitignore
├── LICENSE
├── README.md
├── examples
    ├── README.md
    ├── doc_merge
    │   ├── README.md
    │   ├── doc_merge.py
    │   ├── documents.csv
    │   ├── example_prompts_document_merging.md
    │   ├── plot.py
    │   └── pure_documents.json
    ├── keyword_counting
    │   ├── README.md
    │   ├── countries.csv
    │   ├── dataset_gen_countries.py
    │   ├── example_prompts_keyword_counting.md
    │   ├── keyword_counting.py
    │   └── plot.py
    ├── set_intersection
    │   ├── README.md
    │   ├── dataset_gen_intersection.py
    │   ├── example_prompts_set_intersection_032.md
    │   ├── plot.py
    │   ├── set_intersection_032.csv
    │   ├── set_intersection_032.py
    │   ├── set_intersection_064.csv
    │   ├── set_intersection_064.py
    │   ├── set_intersection_128.csv
    │   ├── set_intersection_128.py
    │   └── utils.py
    └── sorting
    │   ├── README.md
    │   ├── example_prompts_sorting_032.md
    │   ├── plot.py
    │   ├── sorting_032.csv
    │   ├── sorting_032.py
    │   ├── sorting_064.csv
    │   ├── sorting_064.py
    │   ├── sorting_128.csv
    │   ├── sorting_128.py
    │   └── utils.py
├── graph_of_thoughts
    ├── __init__.py
    ├── controller
    │   ├── README.md
    │   ├── __init__.py
    │   └── controller.py
    ├── language_models
    │   ├── README.md
    │   ├── __init__.py
    │   ├── abstract_language_model.py
    │   ├── chatgpt.py
    │   ├── config_template.json
    │   └── llamachat_hf.py
    ├── operations
    │   ├── README.md
    │   ├── __init__.py
    │   ├── graph_of_operations.py
    │   ├── operations.py
    │   └── thought.py
    ├── parser
    │   ├── __init__.py
    │   └── parser.py
    └── prompter
    │   ├── __init__.py
    │   └── prompter.py
├── paper
    ├── README.md
    ├── final_results_gpt35.tar.bz2
    ├── pics
    │   └── preview.svg
    ├── plots.py
    ├── poster.pdf
    └── poster.png
└── pyproject.toml


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | dist/
11 | build/
12 | 
13 | # Installer logs
14 | pip-log.txt
15 | 
16 | # IDEs
17 | .idea/
18 | *.vscode/
19 | *.pycproj
20 | *.user
21 | *.pyproj.user
22 | 
23 | # Data
24 | results/
25 | *.out
26 | *.err
27 | 
28 | # Environments
29 | env/
30 | 
31 | # Config File
32 | **/config.json
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2023 ETH Zurich.
 2 |                    All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 | - Redistributions of source code must retain the above copyright
 9 |   notice, this list of conditions and the following disclaimer.
10 | 
11 | - Redistributions in binary form must reproduce the above copyright
12 |   notice, this list of conditions and the following disclaimer listed
13 |   in this license in the documentation and/or other materials
14 |   provided with the distribution.
15 | 
16 | - Neither the name of the copyright holders nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | The copyright holders provide no reassurances that the source code
21 | provided does not infringe any patent, copyright, or any other
22 | intellectual property rights of third parties.  The copyright holders
23 | disclaim any liability to any recipient for claims brought against
24 | recipient by any third party for infringement of that parties
25 | intellectual property rights.
26 | 
27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 | 
39 | 
40 | Citation
41 | ========
42 | 
43 | Any published work which uses this software should include the
44 | following citation:
45 | 
46 | ----------------------------------------------------------------------
47 | Maciej Besta, Nils Blach, Ales Kubicek, Robert Gerstenberger, Lukas
48 | Gianinazzi, Joanna Gajda, Tomasz Lehmann, Michał Podstawski, Hubert
49 | Niewiadomski, Piotr Nyczyk, Torsten Hoefler (2024): Graph of Thoughts:
50 | Solving Elaborate Problems with Large Language Models. In: Proceedings
51 | of the AAAI Conference on Artificial Intelligence, 38(16),
52 | 17682-17690. https://doi.org/10.1609/aaai.v38i16.29720
53 | ----------------------------------------------------------------------
54 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Graph of Thoughts (GoT)
  2 | 
  3 | <p align="center">
  4 |   <img src="paper/pics/preview.svg">
  5 | </p>
  6 | 
  7 | This is the official implementation of [Graph of Thoughts: Solving Elaborate Problems with Large Language Models](https://arxiv.org/pdf/2308.09687.pdf).  
  8 | This framework gives you the ability to solve complex problems by modeling them as a Graph of Operations (GoO), which is automatically executed with a Large Language Model (LLM) as the engine.  
  9 | This framework is designed to be flexible and extensible, allowing you to not only solve problems using the new GoT approach, but also to implement GoOs resembling previous approaches like CoT or ToT.
 10 | 
 11 | ## Setup Guide
 12 | 
 13 | In order to use this framework, you need to have a working installation of Python 3.8 or newer.
 14 | 
 15 | ### Installing GoT
 16 | 
 17 | Before running either of the following two installation methods, make sure to activate your Python environment (if any) beforehand.  
 18 | If you are a user and you just want to use `graph_of_thoughts`, you can install it directly from PyPI:
 19 | ```bash
 20 | pip install graph_of_thoughts
 21 | ```
 22 | If you are a developer and you want to modify the code, you can install it in editable mode from source:
 23 | ```bash
 24 | git clone https://github.com/spcl/graph-of-thoughts.git
 25 | cd graph-of-thoughts
 26 | pip install -e .
 27 | ```
 28 | 
 29 | ### Configuring the LLM
 30 | 
 31 | In order to use the framework, you need to have access to an LLM.
 32 | Please follow the instructions in the [Controller README](graph_of_thoughts/controller/README.md) to configure the LLM of your choice.
 33 | 
 34 | ## Quick Start
 35 | 
 36 | The following code snippet shows how to use the framework to solve the sorting problem for a list of 32 numbers using a CoT-like approach.  
 37 | Make sure you have followed the [Setup Guide](#setup-guide) before running the code.
 38 | 
 39 | ```python
 40 | from examples.sorting.sorting_032 import SortingPrompter, SortingParser, utils
 41 | from graph_of_thoughts import controller, language_models, operations
 42 | 
 43 | # Problem input
 44 | 
 45 | to_be_sorted = "[0, 2, 6, 3, 8, 7, 1, 1, 6, 7, 7, 7, 7, 9, 3, 0, 1, 7, 9, 1, 3, 5, 1, 3, 6, 4, 5, 4, 7, 3, 5, 7]"
 46 | 
 47 | # Create the Graph of Operations
 48 | gop = operations.GraphOfOperations()
 49 | gop.append_operation(operations.Generate())
 50 | gop.append_operation(operations.Score(scoring_function=utils.num_errors))
 51 | gop.append_operation(operations.GroundTruth(utils.test_sorting))
 52 | 
 53 | # Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key)
 54 | lm = language_models.ChatGPT("config.json", model_name="chatgpt")
 55 | 
 56 | # Create the Controller
 57 | ctrl = controller.Controller(
 58 |   lm, 
 59 |   gop, 
 60 |   SortingPrompter(), 
 61 |   SortingParser(),
 62 |   # The following dictionary is used to configure the initial thought state
 63 |   {
 64 |     "original": to_be_sorted,
 65 |     "current": "",
 66 |     "method": "cot"
 67 |   }
 68 | )
 69 | 
 70 | # Run the Controller and generate the output graph
 71 | ctrl.run()
 72 | ctrl.output_graph("output_cot.json")
 73 | ```
 74 | 
 75 | To run the more sophisticated GoT approach, you can use the following code snippet.
 76 | 
 77 | ```python
 78 | from examples.sorting.sorting_032 import SortingPrompter, SortingParser, got, utils
 79 | from graph_of_thoughts import controller, language_models, operations
 80 | 
 81 | # Problem input
 82 | 
 83 | to_be_sorted = "[0, 2, 6, 3, 8, 7, 1, 1, 6, 7, 7, 7, 7, 9, 3, 0, 1, 7, 9, 1, 3, 5, 1, 3, 6, 4, 5, 4, 7, 3, 5, 7]"
 84 | 
 85 | # Retrieve the Graph of Operations
 86 | gop = got()
 87 | 
 88 | # Configure the Language Model (Assumes config.json is in the current directory with OpenAI API key)
 89 | lm = language_models.ChatGPT("config.json", model_name="chatgpt")
 90 | 
 91 | # Create the Controller
 92 | ctrl = controller.Controller(
 93 |   lm, 
 94 |   gop, 
 95 |   SortingPrompter(), 
 96 |   SortingParser(),
 97 |   # The following dictionary is used to configure the initial thought state
 98 |   {
 99 |     "original": to_be_sorted,
100 |     "current": "",
101 |     "phase": 0,
102 |     "method": "got"
103 |   }
104 | )
105 | 
106 | # Run the Controller and generate the output graph
107 | ctrl.run()
108 | ctrl.output_graph("output_got.json")
109 | ```
110 | You can compare the two results by inspecting the output graphs `output_cot.json` and `output_got.json`.  
111 | The final thought states' scores indicate the number of errors in the sorted list.
112 | 
113 | ## Documentation
114 | The paper gives a high-level overview of the framework and its components.  
115 | In order to understand the framework in more detail, you can read the documentation of the individual modules.  
116 | Especially the [Controller](graph_of_thoughts/controller/README.md) and [Operations](graph_of_thoughts/operations/README.md) modules are important for understanding how to make the most out of the framework.  
117 | We took extra care to fully document the code, so that you can easily understand how it works and how to extend it.
118 | 
119 | ## Examples
120 | 
121 | The [examples](examples) directory contains several examples of problems that can be solved using the framework, including the ones presented in the paper.  
122 | It is a great starting point for learning how to use the framework to solve real problems.  
123 | Each example contains a `README.md` file with instructions on how to run it and play with it. The code is fully documented and should be easy to follow.
124 | You can also run the examples straight from the main directory. Note that the results will be stored in the respective examples sub-directory.
125 | 
126 | Try for instance:
127 | ```bash
128 | python -m examples.sorting.sorting_032
129 | python -m examples.keyword_counting.keyword_counting
130 | ```
131 | ## Paper Results
132 | 
133 | You can run the experiments from the paper by following the instructions in the [examples](examples) directory.  
134 | However, if you just want to inspect and replot the results, you can use the [paper](paper) directory.
135 | 
136 | ## Citations
137 | 
138 | If you find this repository valuable, please give it a star!  
139 | Got any questions or feedback? Feel free to reach out to [nils.blach@inf.ethz.ch](mailto:nils.blach@inf.ethz.ch) or open an issue.  
140 | Using this in your work? Please reference us using the provided citation:
141 | 
142 | ```bibtex
143 | @article{besta2024got,
144 |   title = {{Graph of Thoughts: Solving Elaborate Problems with Large Language Models}},
145 |   author = {Besta, Maciej and Blach, Nils and Kubicek, Ales and Gerstenberger, Robert and Gianinazzi, Lukas and Gajda, Joanna and Lehmann, Tomasz and Podstawski, Micha{\l} and Niewiadomski, Hubert and Nyczyk, Piotr and Hoefler, Torsten},
146 |   year = 2024,
147 |   month = {Mar},
148 |   journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
149 |   volume = 38,
150 |   number = 16,
151 |   pages = {17682-17690},
152 |   publisher = {AAAI Press},
153 |   doi = {10.1609/aaai.v38i16.29720},
154 |   url = {https://ojs.aaai.org/index.php/AAAI/article/view/29720}
155 | }
156 | ```
157 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 | 
3 | This directory contains scripts for running various examples using the Graph of Thoughts package. Each script is a standalone Python program that sets up and runs a particular example.
4 | 
5 | We further include prompt files for each example that can be used to test prompts manually in a console.
6 | 
7 | Please refer to the individual example directories for more information on the specific example.
8 | 


--------------------------------------------------------------------------------
/examples/doc_merge/README.md:
--------------------------------------------------------------------------------
 1 | # Document Merging
 2 | 
 3 | The use case in this directory generates new Non-Disclosure Agreement (NDA) based on several input ones that partially overlap in terms of their contents. 
 4 | We provide implementations of five different approaches:
 5 | - IO
 6 | - Chain-of-Thought (CoT)
 7 | - Tree of Thought (ToT)
 8 | - Graph of Thoughts (GoT):
 9 |   - GoT: aggregation of fully merged NDAs
10 |   - GoT2: aggregation of partially merged NDAs
11 | 
12 | ## Data
13 | 
14 | We provide an input file with 50 samples: `documents.csv`.
15 | 
16 | ## Execution
17 | 
18 | The file to execute the use case is called
19 | `doc_merge.py`. In the main body, one can
20 | select the specific samples to be run (variable samples) and the
21 | approaches (variable approaches). It is also possible to set a budget in
22 | dollars (variable budget).
23 | 
24 | The Python scripts will create the directory `result`, if it is not
25 | already present. In the `result` directory, another directory is created
26 | for each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.
27 | Inside each execution specific directory two files (`config.json`,
28 | `log.log`) and a separate directory for each selected approach are
29 | created. `config.json` contains the configuration of the run: input data,
30 | selected approaches, name of the LLM, and the budget. `log.log` contains
31 | the prompts and responses of the LLM as well as additional debug data.
32 | The approach directories contain a separate json file for every sample
33 | and the file contains the Graph Reasoning State (GRS) for that sample.
34 | 
35 | ## Plot Data
36 | 
37 | Change the results directory in line 158 of `plot.py` and run `python3
38 | plot.py` to plot your data.
39 | 


--------------------------------------------------------------------------------
/examples/doc_merge/plot.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # main author: Nils Blach
  8 | 
  9 | import json
 10 | import os
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | 
 14 | def get_complete_results(base_directory):
 15 |     results_complete = {}
 16 |     for folder_name in os.listdir(base_directory):
 17 |         folder_path = os.path.join(base_directory, folder_name)
 18 |         if os.path.isdir(folder_path):
 19 |             results_complete[folder_name] = []
 20 |             for file_name in os.listdir(folder_path):
 21 |                 if file_name.endswith(".json"):
 22 |                     file_path = os.path.join(folder_path, file_name)
 23 |                     with open(file_path, "r") as f:
 24 |                         data = json.load(f)
 25 |                         results_complete[folder_name].append(
 26 |                             {"key": int(file_name.split(".")[0]), "data": data}
 27 |                         )
 28 |         for key in results_complete.keys():
 29 |             results_complete[key] = sorted(
 30 |                 results_complete[key], key=lambda x: x["key"]
 31 |             )
 32 |     return results_complete
 33 | 
 34 | 
 35 | def get_final_scores(results_complete):
 36 |     scores = {}
 37 |     for method in results_complete.keys():
 38 |         scores[method] = []
 39 |         for result in results_complete[method]:
 40 |             score = 0
 41 |             solved = False
 42 |             cost = 1
 43 |             prompt_tokens = 0
 44 |             completion_tokens = 0
 45 |             for op in reversed(result["data"]):
 46 |                 if "cost" in op:
 47 |                     cost = op["cost"]
 48 |                     prompt_tokens = op["prompt_tokens"]
 49 |                     completion_tokens = op["completion_tokens"]
 50 |                 if "operation" in op and op["operation"] == "score":
 51 |                     try:
 52 |                         score = max(op["scores"])
 53 |                         break
 54 |                     except:
 55 |                         continue
 56 |             scores[method].append(
 57 |                 [result["key"], score, solved, prompt_tokens, completion_tokens, cost]
 58 |             )
 59 |         scores[method] = sorted(scores[method], key=lambda x: x[0])
 60 |     return scores
 61 | 
 62 | 
 63 | def get_plotting_data(base_directory):
 64 |     results_complete = get_complete_results(base_directory)
 65 |     scores = get_final_scores(results_complete)
 66 |     results_plotting = {
 67 |         method: {
 68 |             "scores": [x[1] for x in scores[method]],
 69 |             "solved": sum([1 for x in scores[method] if x[2]]),
 70 |             "costs": [x[5] for x in scores[method]],
 71 |         }
 72 |         for method in scores.keys()
 73 |     }
 74 |     return results_plotting
 75 | 
 76 | 
 77 | def plot_results(
 78 |     results,
 79 |     methods_order=["io", "cot", "tot", "got", "got2"],
 80 |     model="GPT-3.5",
 81 |     num_ndas=4,
 82 |     y_lower=0,
 83 |     y_upper=10,
 84 |     cost_upper=1.8,
 85 |     display_solved=True,
 86 |     annotation_offset=1,
 87 |     display_left_ylabel=False,
 88 |     display_right_ylabel=False,
 89 | ):
 90 |     methods_order = [method for method in methods_order if method in results]
 91 |     scores_ordered = [
 92 |         [score for score in results[method]["scores"]] for method in methods_order
 93 |     ]
 94 |     total_costs = [sum(results[method]["costs"]) for method in methods_order]
 95 | 
 96 |     # Create figure and axis
 97 |     fig, ax = plt.subplots(dpi=150, figsize=(3.75, 5))
 98 | 
 99 |     # Create boxplots
100 |     positions = range(1, len(methods_order) + 1)
101 |     ax.boxplot(scores_ordered, positions=positions)
102 | 
103 |     fig_fontsize = 12
104 | 
105 |     # Set the ticks and labels
106 |     methods_labels = ["IO", "CoT", "ToT", "GoT", "GoT2"]
107 |     ax.set_xticks(range(1, len(methods_order) + 1))
108 |     ax.set_xticks(range(1, len(methods_order) + 1))
109 |     ax.set_xticklabels(methods_labels)
110 |     # ax.set_xlabel("Approach")
111 | 
112 |     ax.set_ylim(y_lower, 12 if display_solved else 9.75)
113 |     plt.yticks(fontsize=fig_fontsize)
114 | 
115 |     if display_left_ylabel:
116 |         ax.set_ylabel(
117 |             f"Score (out of 10); the higher the better", fontsize=fig_fontsize
118 |         )
119 | 
120 |     # ax.set_title(f"Document Merging")
121 | 
122 |     ax2 = ax.twinx()
123 |     ax2.bar(
124 |         positions,
125 |         total_costs,
126 |         alpha=0.5,
127 |         color="blue",
128 |         label="Total Cost ($); the lower the better",
129 |     )
130 |     ax2.yaxis.set_tick_params(colors="#1919ff", labelsize=fig_fontsize)
131 |     ax2.set_ylim(0, cost_upper)
132 |     number_of_ticks = len(ax.get_yticks())
133 |     tick_interval = cost_upper / (number_of_ticks)
134 |     ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]
135 | 
136 |     # Set custom tick positions for ax2
137 |     ax2.set_yticks(ax2_ticks)
138 | 
139 |     if display_right_ylabel:
140 |         ax2.set_ylabel(
141 |             "Total Cost ($); the lower the better",
142 |             color="#1919ff",
143 |             fontsize=fig_fontsize,
144 |         )
145 | 
146 |     if display_solved:
147 |         annotation_height = y_upper + annotation_offset
148 |         count = 1
149 |         for method in methods_order:
150 |             if method not in results:
151 |                 continue
152 |             solved = results[method]["solved"]
153 |             ax.text(
154 |                 count, annotation_height, f"Solved: {solved}", ha="center", va="bottom"
155 |             )
156 |             count += 1
157 | 
158 |     model = model.replace(".", "").replace("-", "").lower()
159 |     fig.savefig(f"doc_merge_{model}_{num_ndas}.pdf", bbox_inches="tight")
160 | 
161 | 
162 | plot_results(
163 |     get_plotting_data("results/"),
164 |     num_ndas=4,
165 |     display_solved=False,
166 |     model="GPT-3.5",
167 |     y_upper=10,
168 |     display_left_ylabel=True,
169 |     cost_upper=15,
170 | )
171 | 


--------------------------------------------------------------------------------
/examples/keyword_counting/README.md:
--------------------------------------------------------------------------------
 1 | # Keyword Counting
 2 | 
 3 | The use case in this directory computes the frequencies of occurring countries 
 4 | in a long passage of text. We provide implementations of seven different approaches:
 5 | - IO
 6 | - Chain-of-Thought (CoT)
 7 | - Tree of Thought (ToT):
 8 |   - ToT: wider tree, meaning more branches per level
 9 |   - ToT2: tree with more levels, but fewer branches per level
10 | - Graph of Thoughts (GoT):
11 |   - GoT4: split passage into 4 sub-passages
12 |   - GoT8: split passage into 8 sub-passages
13 |   - GoTx: split by sentences
14 | 
15 | ## Data
16 | 
17 | We provide an input file with 100 samples: `countries.csv`. It is also possible to use
18 | the data generator `dataset_gen_countries.py` to generate additional or
19 | different samples (using GPT-4). The parameters can be updated on line 54 (number of samples to be generated). 
20 | Note that not every generated sample will be included in the dataset, as each sample is 
21 | additionally tested for validity (observe script output for details).
22 | 
23 | ## Execution
24 | 
25 | The file to execute the use case is called
26 | `keyword_counting.py`. In the main body, one can
27 | select the specific samples to be run (variable samples) and the
28 | approaches (variable approaches). It is also possible to set a budget in
29 | dollars (variable budget).
30 | 
31 | The Python scripts will create the directory `result`, if it is not
32 | already present. In the `result` directory, another directory is created
33 | for each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.
34 | Inside each execution specific directory two files (`config.json`,
35 | `log.log`) and a separate directory for each selected approach are
36 | created. `config.json` contains the configuration of the run: input data,
37 | selected approaches, name of the LLM, and the budget. `log.log` contains
38 | the prompts and responses of the LLM as well as additional debug data.
39 | The approach directories contain a separate json file for every sample
40 | and the file contains the Graph Reasoning State (GRS) for that sample.
41 | 
42 | ## Plot Data
43 | 
44 | Change the results directory in line 150 of `plot.py` and run `python3
45 | plot.py` to plot your data.
46 | 


--------------------------------------------------------------------------------
/examples/keyword_counting/dataset_gen_countries.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # main author: Ales Kubicek
  8 | 
  9 | import csv
 10 | from typing import List, Tuple
 11 | from graph_of_thoughts import controller
 12 | 
 13 | 
 14 | def find_country_indices(text: str, country: str) -> List[Tuple[int, str]]:
 15 |     """
 16 |     Finds the indices of the occurences of a given country in the input text.
 17 | 
 18 |     :param text: Input text.
 19 |     :type text: str
 20 |     :param country: Country to search for.
 21 |     :type country: str
 22 |     :return: List of tuples, where each tuple consists of index and country.
 23 |     :rtype: List[Tuple[int, str]]
 24 |     """
 25 | 
 26 |     indices = []
 27 |     index = text.find(country)
 28 |     while index != -1:
 29 |         indices.append(index)
 30 |         index = text.find(country, index + 1)
 31 |     return [(index, country) for index in indices]
 32 | 
 33 | 
 34 | primary_countries = [
 35 |     "Afghanistan",
 36 |     "Argentina",
 37 |     "Australia",
 38 |     "Brazil",
 39 |     "Canada",
 40 |     "China",
 41 |     "Colombia",
 42 |     "Cuba",
 43 |     "Egypt",
 44 |     "France",
 45 |     "Germany",
 46 |     "Greece",
 47 |     "India",
 48 |     "Indonesia",
 49 |     "Iran",
 50 |     "Iraq",
 51 |     "Ireland",
 52 |     "Israel",
 53 |     "Italy",
 54 |     "Japan",
 55 |     "Kenya",
 56 |     "Mexico",
 57 |     "Netherlands",
 58 |     "New Zealand",
 59 |     "Nigeria",
 60 |     "North Korea",
 61 |     "Pakistan",
 62 |     "Peru",
 63 |     "Philippines",
 64 |     "Poland",
 65 |     "Portugal",
 66 |     "Russia",
 67 |     "Saudi Arabia",
 68 |     "South Africa",
 69 |     "South Korea",
 70 |     "Spain",
 71 |     "Sweden",
 72 |     "Switzerland",
 73 |     "Thailand",
 74 |     "Turkey",
 75 |     "Ukraine",
 76 |     "United Arab Emirates",
 77 |     "United Kingdom",
 78 |     "United States",
 79 |     "Venezuela",
 80 |     "Vietnam",
 81 |     "Yemen",
 82 |     "Zimbabwe",
 83 |     "Belgium",
 84 |     "Norway",
 85 | ]
 86 | primary_adjectives = [
 87 |     "Afghan",
 88 |     "Argentine ",
 89 |     "Argentinean",
 90 |     "Australian",
 91 |     "Brazilian",
 92 |     "Canadian",
 93 |     "Chinese",
 94 |     "Colombian",
 95 |     "Cuban",
 96 |     "Egyptian",
 97 |     "French",
 98 |     "German",
 99 |     "Greek",
100 |     "Indian",
101 |     "Indonesian",
102 |     "Iranian",
103 |     "Iraqi",
104 |     "Irish",
105 |     "Israeli",
106 |     "Italian",
107 |     "Japanese",
108 |     "Kenyan",
109 |     "Mexican",
110 |     "Dutch",
111 |     "New Zealander ",
112 |     "Kiwi",
113 |     "Nigerian",
114 |     "North Korean",
115 |     "Pakistani",
116 |     "Peruvian",
117 |     "Filipino",
118 |     "Philippine",
119 |     "Polish",
120 |     "Portuguese",
121 |     "Russian",
122 |     "Saudi ",
123 |     "Saudi Arabian",
124 |     "South African",
125 |     "South Korean",
126 |     "Spanish",
127 |     "Swedish",
128 |     "Swiss",
129 |     "Thai",
130 |     "Turkish",
131 |     "Ukrainian",
132 |     "United Arab Emirates",
133 |     "Emirati",
134 |     "British",
135 |     "American",
136 |     "Venezuelan",
137 |     "Vietnamese",
138 |     "Yemeni",
139 |     "Zimbabwean",
140 |     "Belgian",
141 |     "Norwegian",
142 | ]
143 | rest_countries = [
144 |     "Albania",
145 |     "Algeria",
146 |     "Andorra",
147 |     "Angola",
148 |     "Antigua and Barbuda",
149 |     "Armenia",
150 |     "Austria",
151 |     "Azerbaijan",
152 |     "The Bahamas",
153 |     "Bahrain",
154 |     "Bangladesh",
155 |     "Barbados",
156 |     "Belarus",
157 |     "Belize",
158 |     "Benin",
159 |     "Bhutan",
160 |     "Bolivia",
161 |     "Bosnia and Herzegovina",
162 |     "Botswana",
163 |     "Brunei",
164 |     "Bulgaria",
165 |     "Burkina Faso",
166 |     "Burundi",
167 |     "Cabo Verde",
168 |     "Cambodia",
169 |     "Cameroon",
170 |     "Central African Republic",
171 |     "Chad",
172 |     "Chile",
173 |     "Comoros",
174 |     "Congo",
175 |     "Costa Rica",
176 |     "Côte d’Ivoire",
177 |     "Croatia",
178 |     "Cyprus",
179 |     "Czech Republic",
180 |     "Czechia",
181 |     "Denmark",
182 |     "Djibouti",
183 |     "Dominica",
184 |     "Dominican Republic",
185 |     "East Timor",
186 |     "Timor-Leste",
187 |     "Ecuador",
188 |     "El Salvador",
189 |     "Equatorial Guinea",
190 |     "Eritrea",
191 |     "Estonia",
192 |     "Eswatini",
193 |     "Ethiopia",
194 |     "Fiji",
195 |     "Finland",
196 |     "Gabon",
197 |     "The Gambia",
198 |     "Georgia",
199 |     "Ghana",
200 |     "Grenada",
201 |     "Guatemala",
202 |     "Guinea",
203 |     "Guinea-Bissau",
204 |     "Guyana",
205 |     "Haiti",
206 |     "Honduras",
207 |     "Hungary",
208 |     "Iceland",
209 |     "Jamaica",
210 |     "Jordan",
211 |     "Kazakhstan",
212 |     "Kiribati",
213 |     "Kosovo",
214 |     "Kuwait",
215 |     "Kyrgyzstan",
216 |     "Laos",
217 |     "Latvia",
218 |     "Lebanon",
219 |     "Lesotho",
220 |     "Liberia",
221 |     "Libya",
222 |     "Liechtenstein",
223 |     "Lithuania",
224 |     "Luxembourg",
225 |     "Madagascar",
226 |     "Malawi",
227 |     "Malaysia",
228 |     "Maldives",
229 |     "Mali",
230 |     "Malta",
231 |     "Marshall Islands",
232 |     "Mauritania",
233 |     "Mauritius",
234 |     "Micronesia",
235 |     "Moldova",
236 |     "Monaco",
237 |     "Mongolia",
238 |     "Montenegro",
239 |     "Morocco",
240 |     "Mozambique",
241 |     "Myanmar",
242 |     "Burma",
243 |     "Namibia",
244 |     "Nauru",
245 |     "Nepal",
246 |     "Nicaragua",
247 |     "Niger",
248 |     "North Macedonia",
249 |     "Oman",
250 |     "Palau",
251 |     "Panama",
252 |     "Papua New Guinea",
253 |     "Paraguay",
254 |     "Qatar",
255 |     "Romania",
256 |     "Rwanda",
257 |     "Saint Kitts and Nevis",
258 |     "Saint Lucia",
259 |     "Saint Vincent and the Grenadines",
260 |     "Samoa",
261 |     "San Marino",
262 |     "Sao Tome and Principe",
263 |     "Senegal",
264 |     "Serbia",
265 |     "Seychelles",
266 |     "Sierra Leone",
267 |     "Singapore",
268 |     "Slovakia",
269 |     "Slovenia",
270 |     "Solomon Islands",
271 |     "Somalia",
272 |     "Sri Lanka",
273 |     "Sudan",
274 |     "Suriname",
275 |     "Syria",
276 |     "Taiwan",
277 |     "Tajikistan",
278 |     "Tanzania",
279 |     "Togo",
280 |     "Tonga",
281 |     "Trinidad and Tobago",
282 |     "Tunisia",
283 |     "Turkmenistan",
284 |     "Tuvalu",
285 |     "Uganda",
286 |     "Uruguay",
287 |     "Uzbekistan",
288 |     "Vanuatu",
289 |     "Vatican City",
290 |     "Zambia",
291 | ]
292 | rest_adjectives = [
293 |     "Albanian",
294 |     "Algerian",
295 |     "Andorran",
296 |     "Angolan",
297 |     "Antiguan and Barbudan",
298 |     "Armenian",
299 |     "Austrian",
300 |     "Azerbaijani",
301 |     "Bahamian",
302 |     "Bahraini",
303 |     "Bangladeshi",
304 |     "Barbadian",
305 |     "Belarusian",
306 |     "Belizean",
307 |     "Beninese",
308 |     "Bhutanese",
309 |     "Bolivian",
310 |     "Bosnian and Herzegovinian",
311 |     "Botswanan",
312 |     "Bruneian",
313 |     "Bulgarian",
314 |     "Burkinabè",
315 |     "Burundian",
316 |     "Cape Verdean",
317 |     "Cambodian",
318 |     "Cameroonian",
319 |     "Central African",
320 |     "Chadian",
321 |     "Chilean",
322 |     "Comorian",
323 |     "Congolese",
324 |     "Costa Rican",
325 |     "Ivorian",
326 |     "Croatian",
327 |     "Cypriot",
328 |     "Czech",
329 |     "Czech",
330 |     "Danish",
331 |     "Djiboutian",
332 |     "Dominican",
333 |     "Dominican",
334 |     "East Timorese",
335 |     "Timorese",
336 |     "Ecuadorian",
337 |     "Salvadoran",
338 |     "Equatorial Guinean",
339 |     "Eritrean",
340 |     "Estonian",
341 |     "Swazi",
342 |     "Ethiopian",
343 |     "Fijian",
344 |     "Finnish",
345 |     "Gabonese",
346 |     "Gambian",
347 |     "Georgian",
348 |     "Ghanaian",
349 |     "Grenadian",
350 |     "Guatemalan",
351 |     "Guinean",
352 |     "Bissau-Guinean",
353 |     "Guyanese",
354 |     "Haitian",
355 |     "Honduran",
356 |     "Hungarian",
357 |     "Icelandic",
358 |     "Jamaican",
359 |     "Jordanian",
360 |     "Kazakh",
361 |     "I-Kiribati",
362 |     "Kosovar",
363 |     "Kuwaiti",
364 |     "Kyrgyz",
365 |     "Laotian",
366 |     "Latvian",
367 |     "Lebanese",
368 |     "Basotho",
369 |     "Liberian",
370 |     "Libyan",
371 |     "Liechtensteiner",
372 |     "Lithuanian",
373 |     "Luxembourger",
374 |     "Malagasy",
375 |     "Malawian",
376 |     "Malaysian",
377 |     "Maldivian",
378 |     "Malian",
379 |     "Maltese",
380 |     "Marshallese",
381 |     "Mauritanian",
382 |     "Mauritian",
383 |     "Micronesian",
384 |     "Moldovan",
385 |     "Monégasque",
386 |     "Mongolian",
387 |     "Montenegrin",
388 |     "Moroccan",
389 |     "Mozambican",
390 |     "Myanmarese",
391 |     "Burmese",
392 |     "Namibian",
393 |     "Nauruan",
394 |     "Nepali",
395 |     "Nicaraguan",
396 |     "Nigerien",
397 |     "Macedonian",
398 |     "Omani",
399 |     "Palauan",
400 |     "Panamanian",
401 |     "Papua New Guinean",
402 |     "Paraguayan",
403 |     "Qatari",
404 |     "Romanian",
405 |     "Rwandan",
406 |     "Kittitian",
407 |     "Nevisian",
408 |     "Saint Lucian",
409 |     "Vincentian",
410 |     "Samoan",
411 |     "Sammarinese",
412 |     "Santomean",
413 |     "Senegalese",
414 |     "Serbian",
415 |     "Seychellois",
416 |     "Sierra Leonean",
417 |     "Singaporean",
418 |     "Slovak",
419 |     "Slovenian",
420 |     "Solomon Islander",
421 |     "Somali",
422 |     "Sri Lankan",
423 |     "Sudanese",
424 |     "Surinamese",
425 |     "Syrian",
426 |     "Taiwanese",
427 |     "Tajik",
428 |     "Tanzanian",
429 |     "Togolese",
430 |     "Tongan",
431 |     "Trinidadian ",
432 |     "Tobagonian",
433 |     "Tunisian",
434 |     "Turkmen",
435 |     "Tuvaluan",
436 |     "Ugandan",
437 |     "Uruguayan",
438 |     "Uzbek",
439 |     "Ni-Vanuatu",
440 |     "Vatican",
441 |     "Zambian",
442 | ]
443 | 
444 | lm = controller.ChatGPT(
445 |     "../../graph_of_thoughts/controller/config.json", model_name="chatgpt4"
446 | )
447 | 
448 | prompt = """<Instruction> Generate a continuous passage (single paragraph) of 16 sentences following the provided restrictions precisely. </Instruction>
449 | 
450 | <Restrictions>
451 | The following restrictions must apply to the generated text:
452 | 1. Single continuous passage of exactly 16 sentences without any paragraphs (line breaks).
453 | 2. Countries appearing in the passage must be only from the provided list. No other countries can be mentioned.
454 | 3. When a country is mentioned in the passage, it must be mentioned multiple times consecutively in the same or following sentences.
455 | 4. Passage should be creative and coherent.
456 | 5. Using adjectives of a country is NOT allowed (e.g., "Colombian coffee" should be "coffee from Colombia" instead)
457 | </Restrictions>
458 | 
459 | <Example>
460 | List of countries: [Afghanistan, Argentina, Australia, Brazil, Canada, China, Colombia, Cuba, Egypt, France, Germany, Greece, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Japan, Kenya, Mexico, Netherlands, New Zealand, Nigeria, North Korea, Pakistan, Peru, Philippines, Poland, Portugal, Russia, Saudi Arabia, South Africa, South Korea, Spain, Sweden, Switzerland, Thailand, Turkey, Ukraine, United Arab Emirates, United Kingdom, United States, Venezuela, Vietnam, Yemen, Zimbabwe, Belgium, Norway]
461 | Passage:
462 | While exploring the ancient ruins in Greece, Sam discovered manuscripts that hinted at the hidden treasures of Egypt. It seemed these treasures were once stolen from Egypt by rogue merchants and secretly moved to Greece, only to be buried under layers of time. Intrigued, he shared the findings with his friend Maya from India, who was an expert in decoding ancient languages. She pointed out that there was a similar legend in India about treasures from China that had somehow ended up in the southern parts of India, possibly through trade or conquest. She also recounted tales from China that spoke of incredible artifacts from Indonesia, suggesting a rich tapestry of cultural exchanges throughout history. Their conversation took an interesting turn when Sam mentioned a book he'd read about the mysterious connections between Argentina and Brazil. The book detailed how both Argentina and Brazil, despite their differences, shared tales of lost civilizations and forgotten cities deep within their jungles. Maya excitedly mentioned that she'd been to the Philippines and had heard local legends about ancient ties with Indonesia and how traders from the Philippines would journey to Indonesia in search of spices and other goods. Thinking of spices, Sam fondly recalled his trip to Spain, where he had learned about the country's historical links with Portugal. Spain and Portugal, both maritime giants of their time, had extensively explored unknown lands and established trade routes. Maya, remembering her travels, said that she had been to Belgium once and was fascinated by its connections with the Netherlands. Both Belgium and the Netherlands, she explained, had rich histories of art, trade, and diplomacy that intertwined them for centuries. They both sat back, marveling at the interconnectedness of the world and how countries from Greece to the Netherlands shared tales of adventure, discovery, and mystery.
463 | </Example>
464 | 
465 | List of countries: [Afghanistan, Argentina, Australia, Brazil, Canada, China, Colombia, Cuba, Egypt, France, Germany, Greece, India, Indonesia, Iran, Iraq, Ireland, Israel, Italy, Japan, Kenya, Mexico, Netherlands, New Zealand, Nigeria, North Korea, Pakistan, Peru, Philippines, Poland, Portugal, Russia, Saudi Arabia, South Africa, South Korea, Spain, Sweden, Switzerland, Thailand, Turkey, Ukraine, United Arab Emirates, United Kingdom, United States, Venezuela, Vietnam, Yemen, Zimbabwe, Belgium, Norway]
466 | Passage:
467 | """
468 | 
469 | num_samples = 100
470 | sample_id = 0
471 | result = [["ID", "Text", "Countries", "Sentences", "Characters"]]
472 | 
473 | """
474 | Generate passages of text that contain country names to be used as input for the
475 | keyword counting.
476 | 
477 | Input(x)  : Number of samples
478 | Output(y) : Passages written to a file in the CSV format.
479 |             File contains the sample ID, the passage, the countries the passage
480 |             contains, the sentences of the passages, number of characters of the
481 |             passage.
482 | """
483 | 
484 | # For x batches of y responses
485 | for _ in range(num_samples):
486 |     response = lm.query(prompt, 1)
487 |     texts = lm.get_response_texts(response)
488 |     for text in texts:
489 |         # Clean paragraphs - single long passage
490 |         text = text.strip().replace("\n", "")
491 | 
492 |         # Get all occurrences of all primary permissible countries
493 |         occurrences = []
494 |         for country in [country for country in primary_countries if country in text]:
495 |             occurrences.extend(find_country_indices(text, country))
496 |         # Order exactly how they appear in the text
497 |         ordered_occurrences = [country[1] for country in sorted(occurrences)]
498 | 
499 |         # Check invalid countries and adjectives
500 |         invalid_primary_adjective = [
501 |             adjective for adjective in primary_adjectives if adjective in text
502 |         ]
503 |         invalid_rest_country = [
504 |             country for country in rest_countries if country in text
505 |         ]
506 |         invalid_rest_adjective = [
507 |             adjective for adjective in rest_adjectives if adjective in text
508 |         ]
509 |         invalid_count = (
510 |             len(invalid_primary_adjective)
511 |             + len(invalid_rest_country)
512 |             + len(invalid_rest_adjective)
513 |         )
514 | 
515 |         if invalid_count > 0:
516 |             print(
517 |                 f"Invalid countries or adjectives present: {invalid_primary_adjective}, {invalid_rest_country}, {invalid_rest_adjective}"
518 |             )
519 |             continue
520 | 
521 |         result.append(
522 |             [
523 |                 sample_id,
524 |                 text,
525 |                 "[{0}]".format(", ".join(map(str, ordered_occurrences))),
526 |                 len(text.split(".")) - 1,
527 |                 len(text),
528 |             ]
529 |         )
530 |         sample_id += 1
531 | 
532 | # Writing to csv file
533 | with open("countries_script.csv", "w") as csvfile:
534 |     csvwriter = csv.writer(csvfile)
535 |     csvwriter.writerows(result)
536 | 


--------------------------------------------------------------------------------
/examples/keyword_counting/plot.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # main author: Nils Blach
  8 | # contributions: Ales Kubicek
  9 | 
 10 | import json
 11 | import os
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | 
 15 | def get_complete_results(base_directory):
 16 |     results_complete = {}
 17 |     for folder_name in os.listdir(base_directory):
 18 |         folder_path = os.path.join(base_directory, folder_name)
 19 |         if os.path.isdir(folder_path):
 20 |             results_complete[folder_name] = []
 21 |             for file_name in os.listdir(folder_path):
 22 |                 if file_name.endswith(".json"):
 23 |                     file_path = os.path.join(folder_path, file_name)
 24 |                     with open(file_path, "r") as f:
 25 |                         data = json.load(f)
 26 |                         results_complete[folder_name].append(
 27 |                             {"key": int(file_name.split(".")[0]), "data": data}
 28 |                         )
 29 |         for key in results_complete.keys():
 30 |             results_complete[key] = sorted(
 31 |                 results_complete[key], key=lambda x: x["key"]
 32 |             )
 33 |     return results_complete
 34 | 
 35 | 
 36 | def get_final_scores(results_complete):
 37 |     scores = {}
 38 |     for method in results_complete.keys():
 39 |         scores[method] = []
 40 |         for result in results_complete[method]:
 41 |             score = 100
 42 |             solved = False
 43 |             cost = 1
 44 |             prompt_tokens = 0
 45 |             completion_tokens = 0
 46 |             for op in result["data"]:
 47 |                 if "operation" in op and op["operation"] == "ground_truth_evaluator":
 48 |                     try:
 49 |                         score = min(op["scores"])
 50 |                         solved = any(op["problem_solved"])
 51 |                     except:
 52 |                         continue
 53 |                 if "cost" in op:
 54 |                     cost = op["cost"]
 55 |                     prompt_tokens = op["prompt_tokens"]
 56 |                     completion_tokens = op["completion_tokens"]
 57 |             scores[method].append(
 58 |                 [result["key"], score, solved, prompt_tokens, completion_tokens, cost]
 59 |             )
 60 |         scores[method] = sorted(scores[method], key=lambda x: x[0])
 61 |     return scores
 62 | 
 63 | 
 64 | def get_plotting_data(base_directory):
 65 |     results_complete = get_complete_results(base_directory)
 66 |     scores = get_final_scores(results_complete)
 67 |     results_plotting = {
 68 |         method: {
 69 |             "scores": [x[1] for x in scores[method]],
 70 |             "solved": sum([1 for x in scores[method] if x[2]]),
 71 |             "costs": [x[5] for x in scores[method]],
 72 |         }
 73 |         for method in scores.keys()
 74 |     }
 75 |     return results_plotting
 76 | 
 77 | 
 78 | def plot_results(
 79 |     results,
 80 |     methods_order=["io", "cot", "tot", "tot2", "got4", "got8", "gotx"],
 81 |     model="GPT-3.5",
 82 |     y_lower=0,
 83 |     y_upper=40,
 84 |     cost_upper=1.8,
 85 |     display_solved=True,
 86 |     annotation_offset=1,
 87 |     display_left_ylabel=False,
 88 |     display_right_ylabel=False,
 89 | ):
 90 |     methods_order = [method for method in methods_order if method in results]
 91 |     # Extract scores based on the order
 92 |     scores_ordered = [
 93 |         [score for score in results[method]["scores"] if score != 100 and score != 300]
 94 |         for method in methods_order
 95 |     ]
 96 |     total_costs = [sum(results[method]["costs"]) for method in methods_order]
 97 | 
 98 |     # Create figure and axis
 99 |     fig, ax = plt.subplots(dpi=150, figsize=(3.75, 4))
100 | 
101 |     # Create boxplots
102 |     positions = range(1, len(methods_order) + 1)
103 |     ax.boxplot(scores_ordered, positions=positions)
104 | 
105 |     fig_fontsize = 12
106 | 
107 |     # Set the ticks and labels
108 |     methods_labels = ["IO", "CoT", "ToT", "ToT2", "GoT4", "GoT8", "GoTx"]
109 |     ax.set_xticks(range(1, len(methods_order) + 1))
110 |     ax.set_xticks(range(1, len(methods_order) + 1))
111 |     ax.set_xticklabels(methods_labels, fontsize=10)
112 | 
113 |     ax.set_ylim(y_lower, (y_upper + 2) if display_solved else y_upper + 1)
114 |     plt.yticks(fontsize=fig_fontsize)
115 |     if display_left_ylabel:
116 |         ax.set_ylabel(f"Number of errors; the lower the better", fontsize=fig_fontsize)
117 | 
118 |     ax.set_title(f"Keyword Counting")
119 | 
120 |     ax2 = ax.twinx()
121 |     ax2.bar(positions, total_costs, alpha=0.5, color="blue", label="Total Cost ($)")
122 |     ax2.yaxis.set_tick_params(colors="#1919ff", labelsize=fig_fontsize)
123 |     ax2.set_ylim(0, cost_upper)
124 |     number_of_ticks = len(ax.get_yticks())
125 |     tick_interval = cost_upper / (number_of_ticks)
126 |     ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]
127 | 
128 |     ax2.set_yticks(ax2_ticks)
129 | 
130 |     if display_right_ylabel:
131 |         ax2.set_ylabel(
132 |             "Total Cost ($); the lower the better",
133 |             color="#1919ff",
134 |             fontsize=fig_fontsize,
135 |         )
136 | 
137 |     if display_solved:
138 |         annotation_height = y_upper + annotation_offset
139 |         count = 1
140 |         for method in methods_order:
141 |             if method not in results:
142 |                 continue
143 |             solved = results[method]["solved"]
144 |             ax.text(
145 |                 count,
146 |                 annotation_height,
147 |                 f"{solved}",
148 |                 ha="center",
149 |                 va="bottom",
150 |                 fontsize=fig_fontsize,
151 |             )
152 |             count += 1
153 | 
154 |     model = model.replace(".", "").replace("-", "").lower()
155 |     fig.savefig(f"keyword_counting_{model}.pdf", bbox_inches="tight")
156 | 
157 | 
158 | plot_results(
159 |     get_plotting_data("results/"),
160 |     display_solved=True,
161 |     annotation_offset=-0.3,
162 |     model="GPT-3.5",
163 |     y_upper=35,
164 |     display_left_ylabel=True,
165 |     display_right_ylabel=True,
166 |     cost_upper=9,
167 | )
168 | 


--------------------------------------------------------------------------------
/examples/set_intersection/README.md:
--------------------------------------------------------------------------------
 1 | # Set Intersection
 2 | 
 3 | The use case in this directory computes the intersection of two input
 4 | sets. We provide implementations of five different approaches for 32, 64
 5 | and 128 elements:
 6 | - IO
 7 | - Chain-of-Thought (CoT)
 8 | - Tree of Thought (ToT):
 9 |   - ToT: wider tree, meaning more branches per level
10 |   - ToT2: tree with more levels, but fewer branches per level
11 | - Graph of Thoughts (GoT)
12 | 
13 | ## Data
14 | 
15 | We provide input files with 100 precomputed samples for each set length:
16 | `set_intersection_<number of elements>.csv`. It is also possible to use
17 | the data generator `dataset_gen_intersection.py` to generate additional or
18 | different samples. The parameters can be updated in lines 24 to 28 of
19 | the main body:
20 | - set_size = 32 # size of the generated sets
21 | - int_value_ubound = 64 # (exclusive) upper limit of generated numbers
22 | - seed = 42 # seed of the random number generator
23 | - num_sample = 100 # number of samples
24 | - filename = 'set_intersection_032.csv' # output filename
25 | 
26 | ## Execution
27 | 
28 | The files to execute the use case are called
29 | `set_intersection_<number of elements>.py`. In the main body, one can
30 | select the specific samples to be run (variable sample) and the
31 | approaches (variable approaches). It is also possible to set a budget in
32 | dollars (variable budget).
33 | The input filename for the samples is currently hardcoded to
34 | `set_intersection_<number of elements>.csv`, but can be updated in the
35 | function `run`.
36 | 
37 | The Python scripts will create the directory `result`, if it is not
38 | already present. In the `result` directory, another directory is created
39 | for each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.
40 | Inside each execution specific directory two files (`config.json`,
41 | `log.log`) and a separate directory for each selected approach are
42 | created. `config.json` contains the configuration of the run: input data,
43 | selected approaches, name of the LLM, and the budget. `log.log` contains
44 | the prompts and responses of the LLM as well as additional debug data.
45 | The approach directories contain a separate json file for every sample
46 | and the file contains the Graph Reasoning State (GRS) for that sample.
47 | 
48 | ## Plot Data
49 | 
50 | Change the results directory in line 170 of `plot.py` and update the
51 | length parameter in the subsequent line and run `python3 plot.py` to
52 | plot your data.
53 | 


--------------------------------------------------------------------------------
/examples/set_intersection/dataset_gen_intersection.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 ETH Zurich.
 2 | #                    All rights reserved.
 3 | #
 4 | # Use of this source code is governed by a BSD-style license that can be
 5 | # found in the LICENSE file.
 6 | #
 7 | # main author: Robert Gerstenberger
 8 | 
 9 | import csv
10 | import numpy as np
11 | 
12 | 
13 | def scramble(array: np.ndarray, rng: np.random.Generator) -> None:
14 |     """
15 |     Helper function to change the order of the elements in an array randomly.
16 | 
17 |     :param array: Array to be scrambled.
18 |     :type: numpy.ndarray
19 |     :param rng: Random number generator.
20 |     :type rng: numpy.random.Generator
21 |     """
22 | 
23 |     size = array.shape[0]
24 | 
25 |     index_array = rng.integers(0, size, size)
26 | 
27 |     for i in range(size):
28 |         temp = array[i]
29 |         array[i] = array[index_array[i]]
30 |         array[index_array[i]] = temp
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     """
35 |     Input(u)  : Set size.
36 |     Input(v)  : Range of the integer numbers in the sets: 0..v (exclusive)
37 |     Input(w)  : Seed for the random number generator.
38 |     Input(x)  : Number of samples to be generated.
39 |     Input(y)  : Filename for the output CSV file.
40 |     Output(z) : Input sets and intersected set written a file in the CSV format.
41 |                 File contains the sample ID, input set 1, input set 2,
42 |                 intersection set.
43 |     """
44 | 
45 |     set_size = 32  # size of the generated sets
46 |     int_value_ubound = 64  # (exclusive) upper limit of generated numbers
47 |     seed = 42  # seed of the random number generator
48 |     num_sample = 100  # number of samples
49 |     filename = "set_intersection_032.csv"  # output filename
50 | 
51 |     assert 2 * set_size <= int_value_ubound
52 | 
53 |     rng = np.random.default_rng(seed)
54 | 
55 |     intersection_sizes = rng.integers(set_size // 4, 3 * set_size // 4, num_sample)
56 | 
57 |     np.set_printoptions(
58 |         linewidth=np.inf
59 |     )  # no wrapping in the array fields in the output file
60 | 
61 |     with open(filename, "w") as f:
62 |         fieldnames = ["ID", "SET1", "SET2", "INTERSECTION"]
63 |         writer = csv.DictWriter(f, delimiter=",", fieldnames=fieldnames)
64 |         writer.writeheader()
65 | 
66 |         for i in range(num_sample):
67 |             intersection_size = intersection_sizes[i]
68 | 
69 |             full_set = np.arange(0, int_value_ubound, dtype=np.int16)
70 | 
71 |             scramble(full_set, rng)
72 | 
73 |             intersection = full_set[:intersection_size].copy()
74 | 
75 |             sorted_intersection = np.sort(intersection)
76 | 
77 |             set1 = full_set[:set_size].copy()
78 |             set2 = np.concatenate(
79 |                 [intersection, full_set[set_size : 2 * set_size - intersection_size]]
80 |             )
81 | 
82 |             scramble(set1, rng)
83 |             scramble(set2, rng)
84 | 
85 |             writer.writerow(
86 |                 {
87 |                     "ID": i,
88 |                     "SET1": set1.tolist(),
89 |                     "SET2": set2.tolist(),
90 |                     "INTERSECTION": sorted_intersection.tolist(),
91 |                 }
92 |             )
93 | 


--------------------------------------------------------------------------------
/examples/set_intersection/example_prompts_set_intersection_032.md:
--------------------------------------------------------------------------------
  1 | # Set Intersection for 32-Element Sets - Prompts and Examples
  2 | ## Prompt Templates
  3 | ### GENERATE: split_prompt
  4 | Replace `{input}` with the input list/set of numbers to be split.
  5 | ```
  6 | <Instruction> Split the following list of 32 numbers into 2 lists of 16 numbers each, the first list should contain the first 16 numbers and the second list the second 16 numbers.
  7 | Only output the 2 lists in the following format without any additional text or thoughts!:
  8 | {
  9 |     "List 1": [13, 16, 30, 6, 21, 7, 31, ...],
 10 |     "List 2": [25, 24, 10, 4, 27, 0, 14, ...]
 11 | } </Instruction>
 12 | 
 13 | <Example>
 14 | Input: [26, 40, 42, 57, 15, 31, 5, 32, 11, 4, 24, 28, 51, 54, 12, 22, 33, 35, 7, 13, 2, 59, 8, 23, 43, 16, 29, 55, 25, 63, 21, 18]
 15 | Output:
 16 | {
 17 |     "List 1": [26, 40, 42, 57, 15, 31, 5, 32, 11, 4, 24, 28, 51, 54, 12, 22],
 18 |     "List 2": [33, 35, 7, 13, 2, 59, 8, 23, 43, 16, 29, 55, 25, 63, 21, 18]
 19 | }
 20 | </Example>
 21 | 
 22 | Input: {input}
 23 | ```
 24 | 
 25 | ### GENERATE: intersect_prompt
 26 | Replace `{set1}` and `{set2}` with the sets to be intersected.
 27 | ```
 28 | <Instruction> Find the intersection of two sets of numbers. Output only the set of numbers that are present in both sets, no additional text. </Instruction>
 29 | 
 30 | <Examples>
 31 | Input Set 1: [13, 16, 30, 6, 21, 7, 31, 15, 11, 1, 24, 10, 9, 3, 20, 8]
 32 | Input Set 2: [25, 24, 10, 4, 27, 0, 14, 12, 8, 2, 29, 20, 17, 19, 26, 23]
 33 | Output: [24, 10, 20, 8]
 34 | 
 35 | Input Set 1: [26, 40, 42, 57, 15, 31, 5, 32, 11, 4, 24, 28, 51, 54, 12, 22, 33, 35, 7, 13, 2, 59, 8, 23, 43, 16, 29, 55, 25, 63, 21, 18]
 36 | Input Set 2: [16, 60, 36, 48, 0, 15, 5, 19, 46, 24, 1, 6, 61, 10, 38, 53, 58, 9, 44, 14, 35, 63, 52, 20, 27, 17, 39, 47, 34, 56, 40, 59]
 37 | Output: [40, 15, 5, 24, 35, 59, 16, 63]
 38 | 
 39 | Input Set 1: [115, 61, 35, 103, 90, 117, 86, 44, 63, 45, 40, 30, 74, 33, 31, 1, 118, 48, 38, 0, 119, 51, 64, 78, 15, 121, 89, 101, 79, 69, 120, 29, 58, 50, 116, 11, 60, 12, 39, 95, 23, 2, 109, 84, 7, 43, 99, 98, 52, 70, 75, 102, 57, 19, 94, 36, 114, 88, 71, 56, 83, 6, 96, 107]
 40 | Input Set 2: [13, 35, 20, 96, 34, 18, 47, 127, 126, 9, 21, 16, 77, 22, 111, 122, 85, 73, 42, 105, 123, 15, 33, 59, 67, 57, 104, 8, 30, 89, 76, 12, 65, 84, 32, 40, 7, 100, 108, 50, 14, 28, 24, 53, 90, 17, 91, 81, 124, 63, 5, 46, 125, 93, 49, 66, 117, 37, 115, 113, 2, 106, 41, 72]
 41 | Output: [115, 35, 90, 117, 63, 40, 30, 33, 15, 89, 50, 12, 2, 84, 7, 57, 96]
 42 | </Examples>
 43 | 
 44 | Input Set 1: {set1}
 45 | Input Set 2: {set2}
 46 | ```
 47 | 
 48 | ### AGGREGATE: merge_prompt
 49 | Replace `{input1}` and `{input2}` with the lists/sets to be merged.
 50 | ```
 51 | <Instruction> Merge the following 2 lists of length 16 each, into one list of length 32 by appending the second list to the first list.
 52 | Only output the final list without any additional text or thoughts! </Instruction>
 53 | 
 54 | List 1: {input1}
 55 | List 2: {input2}
 56 | 
 57 | ```
 58 | 
 59 | ## Complete Example Prompts
 60 | ### The GoO Summarised
 61 | 
 62 | 1. Split the second input set into two sub-sets of equal size (split prompt)
 63 | 2. For each sub-set: Intersect the sub-set with the first input set (intersect prompt) five times; score each sort attempt; keep the best
 64 | 3. Merge the resulting intersections into one full intersection set (merge prompt) 10 times; score each merge attempt; keep the best
 65 | 
 66 | ### Input
 67 | ```
 68 | [31, 11, 4, 63, 38, 58, 59, 24, 61, 14, 32, 39, 27, 46, 48, 19, 52, 57, 50, 56, 3, 2, 53, 29, 5, 37, 62, 41, 36, 12, 49, 16]
 69 | ```
 70 | 
 71 | ### Step 1 - Prompt
 72 | ```
 73 | <Instruction> Split the following list of 32 numbers into 2 lists of 16 numbers each, the first list should contain the first 16 numbers and the second list the second 16 numbers.
 74 | Only output the 2 lists in the following format without any additional text or thoughts!:
 75 | {
 76 |     "List 1": [13, 16, 30, 6, 21, 7, 31, ...],
 77 |     "List 2": [25, 24, 10, 4, 27, 0, 14, ...]
 78 | } </Instruction>
 79 | 
 80 | <Example>
 81 | Input: [26, 40, 42, 57, 15, 31, 5, 32, 11, 4, 24, 28, 51, 54, 12, 22, 33, 35, 7, 13, 2, 59, 8, 23, 43, 16, 29, 55, 25, 63, 21, 18]
 82 | Output:
 83 | {
 84 |     "List 1": [26, 40, 42, 57, 15, 31, 5, 32, 11, 4, 24, 28, 51, 54, 12, 22],
 85 |     "List 2": [33, 35, 7, 13, 2, 59, 8, 23, 43, 16, 29, 55, 25, 63, 21, 18]
 86 | }
 87 | </Example>
 88 | 
 89 | Input: [31, 11, 4, 63, 38, 58, 59, 24, 61, 14, 32, 39, 27, 46, 48, 19, 52, 57, 50, 56, 3, 2, 53, 29, 5, 37, 62, 41, 36, 12, 49, 16]
 90 | ```
 91 | 
 92 | ### Step 1 - Response
 93 | ```
 94 | {
 95 |     "List 1": [31, 11, 4, 63, 38, 58, 59, 24, 61, 14, 32, 39, 27, 46, 48, 19],
 96 |     "List 2": [52, 57, 50, 56, 3, 2, 53, 29, 5, 37, 62, 41, 36, 12, 49, 16]
 97 | }
 98 | ```
 99 | 
100 | ### Step 2a - Prompt
101 | ```
102 | <Instruction> Find the intersection of two sets of numbers. Output only the set of numbers that are present in both sets, no additional text. </Instruction>
103 | 
104 | <Examples>
105 | Input Set 1: [13, 16, 30, 6, 21, 7, 31, 15, 11, 1, 24, 10, 9, 3, 20, 8]
106 | Input Set 2: [25, 24, 10, 4, 27, 0, 14, 12, 8, 2, 29, 20, 17, 19, 26, 23]
107 | Output: [24, 10, 20, 8]
108 | 
109 | Input Set 1: [26, 40, 42, 57, 15, 31, 5, 32, 11, 4, 24, 28, 51, 54, 12, 22, 33, 35, 7, 13, 2, 59, 8, 23, 43, 16, 29, 55, 25, 63, 21, 18]
110 | Input Set 2: [16, 60, 36, 48, 0, 15, 5, 19, 46, 24, 1, 6, 61, 10, 38, 53, 58, 9, 44, 14, 35, 63, 52, 20, 27, 17, 39, 47, 34, 56, 40, 59]
111 | Output: [40, 15, 5, 24, 35, 59, 16, 63]
112 | 
113 | Input Set 1: [115, 61, 35, 103, 90, 117, 86, 44, 63, 45, 40, 30, 74, 33, 31, 1, 118, 48, 38, 0, 119, 51, 64, 78, 15, 121, 89, 101, 79, 69, 120, 29, 58, 50, 116, 11, 60, 12, 39, 95, 23, 2, 109, 84, 7, 43, 99, 98, 52, 70, 75, 102, 57, 19, 94, 36, 114, 88, 71, 56, 83, 6, 96, 107]
114 | Input Set 2: [13, 35, 20, 96, 34, 18, 47, 127, 126, 9, 21, 16, 77, 22, 111, 122, 85, 73, 42, 105, 123, 15, 33, 59, 67, 57, 104, 8, 30, 89, 76, 12, 65, 84, 32, 40, 7, 100, 108, 50, 14, 28, 24, 53, 90, 17, 91, 81, 124, 63, 5, 46, 125, 93, 49, 66, 117, 37, 115, 113, 2, 106, 41, 72]
115 | Output: [115, 35, 90, 117, 63, 40, 30, 33, 15, 89, 50, 12, 2, 84, 7, 57, 96]
116 | </Examples>
117 | 
118 | Input Set 1: [11, 60, 1, 49, 21, 33, 14, 56, 54, 15, 23, 40, 45, 22, 7, 28, 20, 46, 51, 6, 34, 37, 3, 50, 17, 8, 25, 0, 35, 47, 18, 19]
119 | Input Set 2: [31, 11, 4, 63, 38, 58, 59, 24, 61, 14, 32, 39, 27, 46, 48, 19]
120 | ```
121 | 
122 | ### Step 2a - 5 Responses
123 | ```
124 | 1. [11, 14, 46, 14, 19]
125 | 2. Output: [11, 14, 46, 19]
126 | 3. [11, 14, 46, 14, 19]
127 | 4. [11, 14, 46, 19]
128 | 5. [11, 14, 46, 14, 19]
129 | ```
130 | 
131 | ### Step 2b - Prompt
132 | ```
133 | <Instruction> Find the intersection of two sets of numbers. Output only the set of numbers that are present in both sets, no additional text. </Instruction>
134 | 
135 | <Examples>
136 | Input Set 1: [13, 16, 30, 6, 21, 7, 31, 15, 11, 1, 24, 10, 9, 3, 20, 8]
137 | Input Set 2: [25, 24, 10, 4, 27, 0, 14, 12, 8, 2, 29, 20, 17, 19, 26, 23]
138 | Output: [24, 10, 20, 8]
139 | 
140 | Input Set 1: [26, 40, 42, 57, 15, 31, 5, 32, 11, 4, 24, 28, 51, 54, 12, 22, 33, 35, 7, 13, 2, 59, 8, 23, 43, 16, 29, 55, 25, 63, 21, 18]
141 | Input Set 2: [16, 60, 36, 48, 0, 15, 5, 19, 46, 24, 1, 6, 61, 10, 38, 53, 58, 9, 44, 14, 35, 63, 52, 20, 27, 17, 39, 47, 34, 56, 40, 59]
142 | Output: [40, 15, 5, 24, 35, 59, 16, 63]
143 | 
144 | Input Set 1: [115, 61, 35, 103, 90, 117, 86, 44, 63, 45, 40, 30, 74, 33, 31, 1, 118, 48, 38, 0, 119, 51, 64, 78, 15, 121, 89, 101, 79, 69, 120, 29, 58, 50, 116, 11, 60, 12, 39, 95, 23, 2, 109, 84, 7, 43, 99, 98, 52, 70, 75, 102, 57, 19, 94, 36, 114, 88, 71, 56, 83, 6, 96, 107]
145 | Input Set 2: [13, 35, 20, 96, 34, 18, 47, 127, 126, 9, 21, 16, 77, 22, 111, 122, 85, 73, 42, 105, 123, 15, 33, 59, 67, 57, 104, 8, 30, 89, 76, 12, 65, 84, 32, 40, 7, 100, 108, 50, 14, 28, 24, 53, 90, 17, 91, 81, 124, 63, 5, 46, 125, 93, 49, 66, 117, 37, 115, 113, 2, 106, 41, 72]
146 | Output: [115, 35, 90, 117, 63, 40, 30, 33, 15, 89, 50, 12, 2, 84, 7, 57, 96]
147 | </Examples>
148 | 
149 | Input Set 1: [11, 60, 1, 49, 21, 33, 14, 56, 54, 15, 23, 40, 45, 22, 7, 28, 20, 46, 51, 6, 34, 37, 3, 50, 17, 8, 25, 0, 35, 47, 18, 19]
150 | Input Set 2: [52, 57, 50, 56, 3, 2, 53, 29, 5, 37, 62, 41, 36, 12, 49, 16]
151 | ```
152 | 
153 | ### Step 2b - 5 Responses
154 | ```
155 | 1. [50, 56, 49]
156 | 2. [50, 56, 37, 49]
157 | 3. [56, 49, 37, 3, 50]
158 | 4. [50, 56, 37, 49]
159 | 5. [50, 56, 37, 49]
160 | ```
161 | 
162 | ### Step 3 - Prompt
163 | ```
164 | <Instruction> Merge the following 2 lists of length 16 each, into one list of length 32 by appending the second list to the first list.
165 | Only output the final list without any additional text or thoughts! </Instruction>
166 | 
167 | List 1: [11, 14, 46, 19]
168 | List 2: [56, 49, 37, 3, 50]
169 | ```
170 | 
171 | ### Step 3 - 10 Responses
172 | ```
173 | 1. [11, 14, 46, 19, 56, 49, 37, 3, 50]
174 | 2. [11, 14, 46, 19, 56, 49, 37, 3, 50]
175 | 3. [11, 14, 46, 19, 56, 49, 37, 3, 50]
176 | 4. [11, 14, 46, 19, 56, 49, 37, 3, 50]
177 | 5. [11, 14, 46, 19, 56, 49, 37, 3, 50]
178 | 6. [11, 14, 46, 19, 56, 49, 37, 3, 50]
179 | 7. [11, 14, 46, 19, 56, 49, 37, 3, 50]
180 | 8. [11, 14, 46, 19, 56, 49, 37, 3, 50]
181 | 9. [11, 14, 46, 19, 56, 49, 37, 3, 50]
182 | 10. [11, 14, 46, 19, 56, 49, 37, 3, 50]
183 | ```
184 | 
185 | ### Final Result
186 | ```
187 | [11, 14, 46, 19, 56, 49, 37, 3, 50]
188 | ```
189 | 


--------------------------------------------------------------------------------
/examples/set_intersection/plot.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # main author: Nils Blach
  8 | # contributions: Robert Gerstenberger
  9 | 
 10 | import json
 11 | import os
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | 
 15 | def get_complete_results(base_directory):
 16 |     results_complete = {}
 17 |     for folder_name in os.listdir(base_directory):
 18 |         folder_path = os.path.join(base_directory, folder_name)
 19 |         if os.path.isdir(folder_path):
 20 |             results_complete[folder_name] = []
 21 |             for file_name in os.listdir(folder_path):
 22 |                 if file_name.endswith(".json"):
 23 |                     file_path = os.path.join(folder_path, file_name)
 24 |                     with open(file_path, "r") as f:
 25 |                         data = json.load(f)
 26 |                         results_complete[folder_name].append(
 27 |                             {"key": int(file_name.split(".")[0]), "data": data}
 28 |                         )
 29 |         for key in results_complete.keys():
 30 |             results_complete[key] = sorted(
 31 |                 results_complete[key], key=lambda x: x["key"]
 32 |             )
 33 |     return results_complete
 34 | 
 35 | 
 36 | def get_final_scores(results_complete):
 37 |     scores = {}
 38 |     for method in results_complete.keys():
 39 |         scores[method] = []
 40 |         for result in results_complete[method]:
 41 |             score = 100
 42 |             solved = False
 43 |             cost = 1
 44 |             prompt_tokens = 0
 45 |             completion_tokens = 0
 46 |             for op in result["data"]:
 47 |                 if "operation" in op and op["operation"] == "ground_truth_evaluator":
 48 |                     try:
 49 |                         score = min(op["scores"])
 50 |                         solved = any(op["problem_solved"])
 51 |                     except:
 52 |                         continue
 53 |                 if "cost" in op:
 54 |                     cost = op["cost"]
 55 |                     prompt_tokens = op["prompt_tokens"]
 56 |                     completion_tokens = op["completion_tokens"]
 57 |             scores[method].append(
 58 |                 [result["key"], score, solved, prompt_tokens, completion_tokens, cost]
 59 |             )
 60 |         scores[method] = sorted(scores[method], key=lambda x: x[0])
 61 |     return scores
 62 | 
 63 | 
 64 | def get_plotting_data(base_directory):
 65 |     results_complete = get_complete_results(base_directory)
 66 |     scores = get_final_scores(results_complete)
 67 |     results_plotting = {
 68 |         method: {
 69 |             "scores": [x[1] for x in scores[method]],
 70 |             "solved": sum([1 for x in scores[method] if x[2]]),
 71 |             "costs": [x[5] for x in scores[method]],
 72 |         }
 73 |         for method in scores.keys()
 74 |     }
 75 |     return results_plotting
 76 | 
 77 | 
 78 | def plot_results(
 79 |     results,
 80 |     methods_order=["io", "cot", "tot", "tot2", "got"],
 81 |     model="GPT-3.5",
 82 |     length=32,
 83 |     y_lower=0,
 84 |     cost_upper=0.0,
 85 |     display_solved=True,
 86 |     annotation_offset=0,
 87 |     display_left_ylabel=False,
 88 |     display_right_ylabel=False,
 89 | ):
 90 |     methods_order = [method for method in methods_order if method in results]
 91 |     # Extract scores based on the order
 92 |     scores_ordered = [
 93 |         [score for score in results[method]["scores"] if score != 1000]
 94 |         for method in methods_order
 95 |     ]
 96 |     total_costs = [sum(results[method]["costs"]) for method in methods_order]
 97 | 
 98 |     # Create figure and axis
 99 |     fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))
100 | 
101 |     # Create boxplots
102 |     positions = range(1, len(methods_order) + 1)
103 |     ax.boxplot(scores_ordered, positions=positions)
104 | 
105 |     fig_fontsize = 12
106 | 
107 |     # Set the ticks and labels
108 |     methods_labels = ["IO", "CoT", "ToT", "ToT2", "GoT"]
109 |     plt.yticks(fontsize=fig_fontsize)
110 |     ax.set_xticks(range(1, len(methods_order) + 1))
111 |     ax.set_xticks(range(1, len(methods_order) + 1))
112 |     ax.set_xticklabels(methods_labels, fontsize=fig_fontsize)
113 | 
114 |     y_upper = length
115 | 
116 |     range_increase = 1
117 |     if display_solved:
118 |         if length < 48:
119 |             range_increase = 2
120 |         elif length < 96:
121 |             range_increase = 4
122 |         else:
123 |             range_increase = 8
124 | 
125 |     ax.set_ylim(y_lower, y_upper + range_increase)
126 |     ax1_yticks = range(
127 |         y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)
128 |     )
129 |     ax.set_yticks(ax1_yticks)
130 |     if display_left_ylabel:
131 |         ax.set_ylabel(
132 |             f"#incorrect elements; the lower the better", fontsize=fig_fontsize
133 |         )
134 | 
135 |     ax.set_title(f"{length} elements")
136 | 
137 |     ax2 = ax.twinx()
138 |     ax2.bar(positions, total_costs, alpha=0.5, color="blue", label="Total Cost ($)")
139 |     ax2.yaxis.set_tick_params(colors="#1919ff", labelsize=fig_fontsize)
140 |     if cost_upper > 0:
141 |         ax2.set_ylim(0, cost_upper)
142 |         number_of_ticks = len(ax.get_yticks())
143 |         tick_interval = cost_upper / (number_of_ticks)
144 |         ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]
145 | 
146 |         # Set custom tick positions for ax2
147 |         ax2.set_yticks(ax2_ticks)
148 | 
149 |     if display_right_ylabel:
150 |         ax2.set_ylabel(
151 |             "Total Cost ($); the lower the better",
152 |             color="#1919ff",
153 |             fontsize=fig_fontsize,
154 |         )
155 | 
156 |     if display_solved:
157 |         annotation_height = y_upper + annotation_offset
158 |         count = 1
159 |         for method in methods_order:
160 |             if method not in results:
161 |                 continue
162 |             solved = results[method]["solved"]
163 |             ax.text(
164 |                 count,
165 |                 annotation_height,
166 |                 f"{solved}",
167 |                 ha="center",
168 |                 va="bottom",
169 |                 fontsize=fig_fontsize,
170 |             )
171 |             count += 1
172 | 
173 |     model = model.replace(".", "").replace("-", "").lower()
174 |     fig.savefig(f"set_intersection_{model}_{length}.pdf", bbox_inches="tight")
175 | 
176 | 
177 | plot_results(
178 |     get_plotting_data("results/"),
179 |     length=32,
180 |     display_solved=True,
181 |     model="GPT-3.5",
182 |     display_left_ylabel=True,
183 |     display_right_ylabel=True,
184 | )
185 | 


--------------------------------------------------------------------------------
/examples/set_intersection/set_intersection_032.csv:
--------------------------------------------------------------------------------
  1 | ID,SET1,SET2,INTERSECTION
  2 | 0,"[11, 60, 1, 49, 21, 33, 14, 56, 54, 15, 23, 40, 45, 22, 7, 28, 20, 46, 51, 6, 34, 37, 3, 50, 17, 8, 25, 0, 35, 47, 18, 19]","[31, 11, 4, 63, 38, 58, 59, 24, 61, 14, 32, 39, 27, 46, 48, 19, 52, 57, 50, 56, 3, 2, 53, 29, 5, 37, 62, 41, 36, 12, 49, 16]","[3, 11, 14, 19, 37, 46, 49, 50, 56]"
  3 | 1,"[28, 58, 36, 18, 37, 31, 44, 39, 34, 51, 12, 56, 21, 27, 7, 24, 46, 1, 25, 2, 41, 6, 45, 29, 49, 42, 35, 30, 54, 55, 10, 50]","[4, 16, 28, 46, 49, 21, 58, 30, 19, 1, 37, 15, 3, 59, 51, 24, 10, 12, 34, 20, 40, 44, 35, 23, 36, 0, 43, 54, 2, 31, 57, 41]","[1, 2, 10, 12, 21, 24, 28, 30, 31, 34, 35, 36, 37, 41, 44, 46, 49, 51, 54, 58]"
  4 | 2,"[32, 24, 17, 27, 51, 19, 50, 59, 8, 18, 41, 36, 43, 39, 49, 4, 5, 31, 30, 60, 48, 61, 10, 16, 47, 58, 9, 54, 40, 37, 1, 11]","[44, 27, 33, 46, 51, 17, 28, 18, 4, 23, 32, 49, 58, 1, 20, 40, 39, 52, 30, 55, 26, 19, 63, 31, 57, 13, 47, 61, 10, 6, 38, 41]","[1, 4, 10, 17, 18, 19, 27, 30, 31, 32, 39, 40, 41, 47, 49, 51, 58, 61]"
  5 | 3,"[22, 38, 13, 31, 63, 6, 59, 17, 27, 20, 21, 42, 24, 4, 25, 15, 28, 29, 19, 49, 40, 30, 34, 0, 53, 23, 12, 62, 36, 51, 39, 45]","[21, 29, 30, 28, 31, 55, 2, 22, 19, 4, 46, 32, 37, 43, 23, 15, 58, 56, 11, 50, 48, 54, 41, 45, 51, 7, 42, 38, 18, 9, 6, 1]","[4, 6, 15, 19, 21, 22, 23, 28, 29, 30, 31, 38, 42, 45, 51]"
  6 | 4,"[4, 26, 48, 7, 42, 60, 31, 36, 25, 20, 14, 50, 21, 30, 12, 10, 23, 8, 39, 13, 46, 32, 11, 34, 6, 2, 57, 0, 17, 33, 18, 24]","[1, 37, 48, 28, 39, 58, 53, 10, 6, 50, 17, 9, 51, 56, 38, 52, 8, 30, 46, 62, 5, 24, 61, 22, 57, 14, 44, 11, 47, 35, 63, 36]","[6, 8, 10, 11, 14, 17, 24, 30, 36, 39, 46, 48, 50, 57]"
  7 | 5,"[48, 50, 19, 59, 57, 63, 49, 61, 14, 11, 29, 39, 4, 17, 21, 12, 3, 35, 23, 2, 58, 60, 38, 43, 28, 47, 5, 44, 15, 55, 8, 26]","[50, 12, 30, 23, 11, 61, 17, 58, 19, 55, 62, 43, 33, 2, 37, 21, 56, 4, 15, 24, 38, 45, 1, 28, 59, 48, 35, 6, 47, 8, 27, 22]","[2, 4, 8, 11, 12, 15, 17, 19, 21, 23, 28, 35, 38, 43, 47, 48, 50, 55, 58, 59, 61]"
  8 | 6,"[34, 2, 12, 54, 13, 41, 57, 42, 21, 59, 26, 10, 6, 35, 27, 16, 1, 44, 8, 11, 14, 7, 62, 20, 30, 19, 32, 9, 43, 39, 22, 24]","[18, 41, 49, 56, 1, 3, 45, 36, 17, 34, 50, 46, 29, 30, 61, 32, 14, 52, 24, 51, 5, 25, 31, 55, 37, 0, 23, 40, 28, 20, 22, 63]","[1, 14, 20, 22, 24, 30, 32, 34, 41]"
  9 | 7,"[49, 61, 9, 23, 27, 21, 47, 37, 54, 32, 18, 14, 22, 13, 62, 46, 0, 26, 4, 11, 60, 34, 25, 50, 6, 51, 59, 8, 35, 41, 29, 17]","[33, 21, 49, 54, 6, 27, 3, 47, 4, 20, 43, 41, 8, 24, 0, 57, 28, 35, 15, 17, 46, 23, 13, 9, 45, 31, 22, 12, 5, 29, 50, 7]","[0, 4, 6, 8, 9, 13, 17, 21, 22, 23, 27, 29, 35, 41, 46, 47, 49, 50, 54]"
 10 | 8,"[55, 59, 60, 13, 41, 15, 38, 52, 35, 5, 9, 61, 57, 34, 47, 22, 21, 1, 27, 11, 36, 2, 6, 58, 62, 51, 23, 50, 8, 39, 28, 54]","[37, 14, 4, 55, 50, 17, 8, 19, 10, 44, 0, 31, 27, 53, 39, 35, 30, 5, 62, 24, 26, 29, 54, 45, 40, 3, 43, 11, 12, 25, 41, 42]","[5, 8, 11, 27, 35, 39, 41, 50, 54, 55, 62]"
 11 | 9,"[20, 12, 52, 56, 36, 51, 59, 40, 55, 33, 41, 38, 28, 45, 13, 2, 43, 27, 35, 7, 9, 0, 10, 37, 29, 15, 14, 49, 17, 63, 25, 32]","[11, 13, 19, 6, 21, 54, 36, 14, 58, 26, 2, 39, 53, 22, 50, 51, 24, 16, 5, 17, 1, 60, 57, 23, 35, 33, 42, 18, 61, 29, 47, 8]","[2, 13, 14, 17, 29, 33, 35, 36, 51]"
 12 | 10,"[24, 21, 51, 37, 54, 22, 18, 38, 14, 30, 48, 49, 28, 47, 10, 44, 58, 29, 3, 45, 26, 15, 9, 6, 33, 2, 34, 17, 20, 25, 0, 1]","[3, 0, 20, 1, 24, 45, 25, 52, 13, 43, 38, 15, 22, 53, 27, 8, 42, 40, 59, 32, 21, 60, 30, 46, 51, 47, 14, 50, 5, 62, 57, 28]","[0, 1, 3, 14, 15, 20, 21, 22, 24, 25, 28, 30, 38, 45, 47, 51]"
 13 | 11,"[58, 52, 51, 15, 42, 26, 19, 30, 28, 6, 40, 11, 25, 53, 18, 47, 21, 12, 41, 45, 43, 7, 35, 48, 27, 3, 32, 2, 4, 62, 17, 29]","[41, 4, 0, 35, 25, 29, 14, 62, 45, 2, 37, 11, 15, 8, 30, 39, 58, 19, 56, 42, 17, 51, 24, 28, 44, 27, 13, 18, 26, 47, 53, 43]","[2, 4, 11, 15, 17, 18, 19, 25, 26, 27, 28, 29, 30, 35, 41, 42, 43, 45, 47, 51, 53, 58, 62]"
 14 | 12,"[8, 17, 23, 38, 24, 47, 61, 3, 46, 43, 58, 39, 27, 21, 26, 14, 62, 1, 15, 19, 37, 53, 55, 6, 51, 57, 52, 25, 16, 22, 33, 54]","[2, 18, 15, 17, 23, 45, 14, 36, 53, 7, 27, 32, 31, 21, 8, 38, 16, 61, 6, 58, 3, 39, 49, 19, 22, 42, 0, 12, 30, 60, 26, 46]","[3, 6, 8, 14, 15, 16, 17, 19, 21, 22, 23, 26, 27, 38, 39, 46, 53, 58, 61]"
 15 | 13,"[58, 10, 28, 52, 51, 48, 49, 13, 53, 25, 55, 6, 4, 15, 30, 56, 54, 41, 38, 42, 21, 33, 44, 18, 37, 43, 31, 20, 3, 2, 50, 0]","[54, 3, 32, 44, 26, 49, 0, 9, 35, 51, 25, 33, 62, 55, 47, 2, 29, 11, 42, 18, 4, 23, 31, 41, 20, 7, 58, 52, 13, 36, 5, 53]","[0, 2, 3, 4, 13, 18, 20, 25, 31, 33, 41, 42, 44, 49, 51, 52, 53, 54, 55, 58]"
 16 | 14,"[12, 18, 22, 38, 6, 23, 54, 37, 40, 44, 19, 46, 35, 47, 7, 10, 16, 24, 4, 42, 31, 30, 0, 63, 25, 57, 39, 51, 3, 60, 49, 28]","[40, 6, 12, 18, 19, 34, 17, 47, 4, 29, 53, 3, 15, 7, 37, 1, 59, 42, 33, 35, 52, 16, 61, 13, 9, 51, 32, 60, 24, 54, 38, 46]","[3, 4, 6, 7, 12, 16, 18, 19, 24, 35, 37, 38, 40, 42, 46, 47, 51, 54, 60]"
 17 | 15,"[61, 62, 10, 53, 17, 20, 28, 54, 55, 59, 4, 1, 11, 34, 5, 35, 56, 38, 60, 29, 43, 42, 63, 50, 24, 40, 47, 52, 16, 39, 31, 19]","[60, 24, 50, 45, 23, 18, 20, 31, 29, 35, 43, 44, 59, 41, 51, 46, 16, 63, 38, 3, 42, 25, 4, 17, 5, 9, 61, 7, 27, 54, 19, 39]","[4, 5, 16, 17, 19, 20, 24, 29, 31, 35, 38, 39, 42, 43, 50, 54, 59, 60, 61, 63]"
 18 | 16,"[12, 2, 21, 59, 49, 30, 31, 14, 23, 39, 5, 15, 4, 19, 27, 43, 6, 34, 0, 22, 16, 60, 17, 48, 40, 9, 56, 1, 63, 8, 45, 51]","[40, 4, 38, 7, 1, 47, 44, 16, 58, 11, 25, 35, 3, 51, 26, 10, 41, 60, 46, 6, 34, 19, 2, 61, 21, 31, 8, 22, 12, 53, 15, 50]","[1, 2, 4, 6, 8, 12, 15, 16, 19, 21, 22, 31, 34, 40, 51, 60]"
 19 | 17,"[26, 8, 53, 37, 45, 60, 32, 44, 19, 39, 36, 16, 25, 9, 6, 50, 38, 47, 30, 63, 42, 52, 57, 10, 7, 27, 14, 1, 41, 58, 59, 62]","[28, 12, 29, 21, 63, 34, 17, 49, 56, 4, 15, 39, 1, 46, 22, 62, 9, 43, 0, 50, 23, 54, 44, 31, 5, 51, 11, 37, 8, 61, 13, 42]","[1, 8, 9, 37, 39, 42, 44, 50, 62, 63]"
 20 | 18,"[9, 58, 28, 53, 3, 18, 45, 33, 11, 5, 2, 47, 4, 59, 38, 30, 61, 15, 21, 48, 41, 25, 26, 13, 60, 23, 6, 55, 17, 36, 62, 10]","[43, 15, 16, 10, 33, 5, 13, 62, 41, 53, 0, 32, 54, 23, 26, 19, 25, 9, 37, 49, 38, 21, 28, 27, 40, 45, 4, 61, 29, 3, 55, 18]","[3, 4, 5, 9, 10, 13, 15, 18, 21, 23, 25, 26, 28, 33, 38, 41, 45, 53, 55, 61, 62]"
 21 | 19,"[26, 63, 42, 19, 6, 62, 39, 14, 24, 17, 44, 31, 8, 7, 10, 16, 1, 56, 45, 43, 50, 57, 54, 51, 58, 59, 5, 37, 3, 21, 13, 30]","[60, 3, 1, 34, 17, 55, 50, 21, 0, 46, 62, 59, 47, 36, 15, 22, 19, 5, 2, 38, 58, 12, 32, 27, 28, 18, 24, 31, 26, 63, 51, 53]","[1, 3, 5, 17, 19, 21, 24, 26, 31, 50, 51, 58, 59, 62, 63]"
 22 | 20,"[30, 31, 28, 56, 54, 29, 61, 18, 1, 46, 34, 5, 15, 63, 26, 45, 4, 47, 40, 21, 3, 9, 2, 27, 7, 6, 24, 19, 42, 11, 35, 41]","[57, 5, 54, 42, 41, 31, 12, 13, 9, 34, 21, 23, 7, 48, 22, 29, 19, 14, 0, 43, 3, 27, 26, 4, 16, 25, 53, 51, 10, 11, 52, 20]","[3, 4, 5, 7, 9, 11, 19, 21, 26, 27, 29, 31, 34, 41, 42, 54]"
 23 | 21,"[9, 12, 51, 48, 23, 6, 50, 14, 53, 15, 58, 44, 32, 59, 24, 42, 27, 41, 29, 10, 45, 34, 0, 46, 3, 36, 25, 16, 13, 63, 49, 52]","[16, 15, 51, 37, 20, 27, 47, 36, 45, 59, 0, 41, 40, 2, 28, 31, 17, 1, 21, 22, 5, 58, 18, 57, 34, 39, 54, 9, 53, 33, 60, 19]","[0, 9, 15, 16, 27, 34, 36, 41, 45, 51, 53, 58, 59]"
 24 | 22,"[57, 44, 59, 3, 16, 5, 17, 6, 52, 0, 18, 40, 47, 11, 45, 14, 1, 49, 30, 55, 27, 12, 50, 10, 7, 39, 46, 22, 23, 38, 34, 41]","[44, 48, 9, 58, 28, 40, 4, 56, 49, 19, 53, 42, 47, 35, 25, 34, 29, 43, 33, 32, 6, 55, 37, 26, 20, 11, 2, 27, 31, 62, 54, 39]","[6, 11, 27, 34, 39, 40, 44, 47, 49, 55]"
 25 | 23,"[32, 25, 2, 13, 11, 23, 54, 49, 9, 57, 59, 6, 61, 47, 37, 34, 18, 29, 46, 45, 56, 31, 14, 17, 48, 55, 7, 50, 27, 51, 63, 44]","[49, 34, 46, 32, 17, 50, 51, 54, 61, 14, 52, 21, 56, 16, 44, 33, 25, 57, 2, 28, 35, 19, 55, 11, 22, 9, 48, 20, 53, 29, 45, 18]","[2, 9, 11, 14, 17, 18, 25, 29, 32, 34, 44, 45, 46, 48, 49, 50, 51, 54, 55, 56, 57, 61]"
 26 | 24,"[23, 16, 41, 14, 10, 42, 9, 27, 35, 51, 24, 4, 59, 47, 55, 12, 18, 58, 7, 63, 28, 32, 22, 49, 37, 31, 13, 1, 21, 39, 36, 30]","[39, 6, 63, 37, 27, 19, 40, 16, 18, 54, 22, 20, 2, 30, 23, 59, 46, 52, 43, 10, 21, 41, 31, 4, 12, 24, 32, 38, 3, 13, 47, 53]","[4, 10, 12, 13, 16, 18, 21, 22, 23, 24, 27, 30, 31, 32, 37, 39, 41, 47, 59, 63]"
 27 | 25,"[20, 5, 41, 40, 24, 60, 2, 6, 59, 50, 34, 47, 10, 0, 14, 29, 35, 28, 39, 58, 13, 16, 27, 4, 18, 52, 56, 21, 45, 15, 32, 30]","[2, 23, 40, 31, 7, 52, 55, 18, 59, 56, 60, 44, 38, 54, 35, 4, 46, 1, 12, 47, 10, 3, 13, 29, 48, 43, 6, 11, 20, 24, 45, 34]","[2, 4, 6, 10, 13, 18, 20, 24, 29, 34, 35, 40, 45, 47, 52, 56, 59, 60]"
 28 | 26,"[60, 39, 4, 62, 16, 57, 49, 41, 25, 59, 55, 35, 53, 33, 17, 5, 18, 13, 46, 30, 45, 38, 1, 20, 22, 44, 11, 15, 42, 7, 21, 28]","[57, 27, 24, 15, 44, 13, 60, 35, 62, 47, 38, 59, 42, 52, 49, 43, 10, 0, 53, 6, 37, 18, 19, 36, 51, 58, 3, 34, 23, 21, 63, 40]","[13, 15, 18, 21, 35, 38, 42, 44, 49, 53, 57, 59, 60, 62]"
 29 | 27,"[9, 48, 28, 3, 61, 4, 1, 22, 62, 57, 18, 21, 34, 26, 25, 8, 12, 38, 27, 49, 46, 6, 40, 56, 10, 16, 36, 5, 45, 44, 51, 42]","[10, 36, 20, 5, 45, 34, 16, 18, 21, 51, 60, 17, 57, 50, 42, 54, 46, 63, 39, 38, 58, 3, 24, 53, 37, 26, 27, 48, 22, 44, 49, 8]","[3, 5, 8, 10, 16, 18, 21, 22, 26, 27, 34, 36, 38, 42, 44, 45, 46, 48, 49, 51, 57]"
 30 | 28,"[17, 42, 40, 38, 58, 19, 63, 22, 50, 18, 8, 16, 12, 15, 61, 10, 52, 9, 44, 13, 60, 24, 41, 56, 35, 46, 21, 36, 28, 0, 48, 6]","[6, 33, 10, 5, 23, 41, 53, 30, 58, 22, 45, 42, 61, 7, 12, 19, 56, 60, 2, 28, 62, 36, 39, 43, 27, 20, 16, 55, 37, 40, 9, 32]","[6, 9, 10, 12, 16, 19, 22, 28, 36, 40, 41, 42, 56, 58, 60, 61]"
 31 | 29,"[1, 4, 35, 57, 2, 11, 43, 56, 16, 9, 51, 8, 26, 0, 59, 31, 22, 25, 29, 50, 13, 44, 12, 32, 30, 27, 19, 36, 42, 61, 7, 38]","[59, 35, 54, 49, 7, 45, 33, 57, 32, 41, 18, 47, 55, 56, 58, 5, 4, 46, 21, 9, 63, 52, 34, 2, 62, 8, 48, 22, 51, 11, 1, 16]","[1, 2, 4, 7, 8, 9, 11, 16, 22, 32, 35, 51, 56, 57, 59]"
 32 | 30,"[54, 2, 52, 36, 21, 37, 13, 28, 39, 17, 27, 0, 42, 56, 10, 4, 60, 48, 49, 24, 20, 16, 5, 62, 14, 47, 53, 1, 7, 32, 34, 12]","[38, 41, 13, 32, 31, 63, 11, 62, 1, 18, 52, 58, 56, 51, 7, 37, 36, 22, 9, 35, 43, 0, 46, 4, 55, 44, 25, 8, 24, 14, 34, 47]","[0, 1, 4, 7, 13, 14, 24, 32, 34, 36, 37, 47, 52, 56, 62]"
 33 | 31,"[6, 32, 2, 58, 25, 19, 31, 0, 30, 28, 46, 49, 41, 48, 45, 44, 59, 51, 22, 3, 38, 36, 1, 15, 11, 26, 37, 24, 55, 9, 14, 61]","[10, 56, 39, 4, 34, 11, 29, 14, 57, 15, 60, 20, 9, 43, 53, 40, 8, 44, 50, 27, 47, 19, 36, 52, 6, 62, 45, 63, 12, 21, 51, 58]","[6, 9, 11, 14, 15, 19, 36, 44, 45, 51, 58]"
 34 | 32,"[24, 51, 50, 19, 63, 42, 29, 59, 41, 9, 38, 28, 25, 4, 18, 40, 12, 1, 58, 34, 15, 30, 54, 6, 46, 27, 39, 55, 23, 61, 5, 36]","[56, 17, 24, 11, 35, 20, 15, 10, 3, 45, 26, 33, 21, 60, 8, 32, 22, 48, 25, 49, 41, 14, 13, 18, 19, 61, 2, 43, 52, 16, 5, 27]","[5, 15, 18, 19, 24, 25, 27, 41, 61]"
 35 | 33,"[27, 37, 6, 7, 2, 32, 28, 11, 24, 43, 5, 15, 12, 18, 36, 26, 55, 35, 57, 56, 29, 54, 30, 9, 3, 20, 38, 48, 63, 25, 16, 10]","[21, 34, 3, 20, 2, 13, 56, 32, 25, 58, 61, 51, 59, 53, 6, 27, 8, 48, 50, 43, 46, 52, 63, 11, 42, 12, 19, 45, 9, 39, 55, 54]","[2, 3, 6, 9, 11, 12, 20, 25, 27, 32, 43, 48, 54, 55, 56, 63]"
 36 | 34,"[22, 52, 23, 8, 46, 5, 19, 37, 31, 51, 6, 17, 21, 13, 29, 4, 58, 0, 32, 40, 9, 33, 55, 34, 11, 30, 24, 20, 3, 39, 2, 25]","[51, 20, 56, 37, 9, 19, 38, 53, 43, 35, 39, 23, 46, 8, 11, 3, 28, 49, 13, 22, 52, 2, 42, 26, 21, 30, 58, 24, 59, 40, 5, 6]","[2, 3, 5, 6, 8, 9, 11, 13, 19, 20, 21, 22, 23, 24, 30, 37, 39, 40, 46, 51, 52, 58]"
 37 | 35,"[9, 37, 45, 60, 18, 38, 55, 14, 44, 49, 27, 19, 6, 2, 51, 26, 11, 24, 54, 32, 48, 57, 34, 3, 42, 5, 39, 31, 53, 62, 43, 41]","[61, 28, 30, 25, 33, 43, 46, 56, 58, 62, 21, 17, 16, 50, 29, 41, 13, 38, 54, 63, 40, 18, 35, 10, 36, 20, 7, 9, 8, 45, 19, 52]","[9, 18, 19, 38, 41, 43, 45, 54, 62]"
 38 | 36,"[45, 26, 14, 2, 25, 41, 59, 58, 42, 39, 56, 46, 1, 48, 30, 31, 63, 36, 33, 5, 60, 19, 21, 57, 62, 35, 17, 37, 6, 3, 38, 13]","[43, 32, 60, 56, 36, 22, 59, 6, 39, 63, 46, 8, 26, 38, 57, 31, 29, 25, 45, 18, 55, 52, 13, 10, 62, 35, 30, 7, 47, 14, 37, 21]","[6, 13, 14, 21, 25, 26, 30, 31, 35, 36, 37, 38, 39, 45, 46, 56, 57, 59, 60, 62, 63]"
 39 | 37,"[47, 52, 1, 22, 16, 56, 5, 30, 43, 54, 3, 48, 21, 17, 33, 9, 57, 11, 4, 46, 15, 26, 12, 7, 32, 44, 6, 41, 31, 14, 49, 59]","[42, 44, 49, 59, 58, 61, 57, 6, 46, 3, 7, 31, 43, 1, 34, 22, 12, 16, 4, 54, 55, 45, 27, 5, 26, 48, 13, 17, 19, 51, 39, 47]","[1, 3, 4, 5, 6, 7, 12, 16, 17, 22, 26, 31, 43, 44, 46, 47, 48, 49, 54, 57, 59]"
 40 | 38,"[28, 27, 51, 11, 2, 31, 14, 6, 33, 25, 42, 49, 59, 60, 17, 10, 56, 12, 23, 36, 37, 53, 18, 52, 26, 21, 62, 63, 38, 22, 16, 44]","[52, 53, 6, 9, 40, 47, 14, 3, 34, 29, 18, 48, 7, 58, 20, 50, 51, 22, 5, 46, 28, 37, 60, 61, 55, 27, 1, 4, 49, 45, 39, 24]","[6, 14, 18, 22, 27, 28, 37, 49, 51, 52, 53, 60]"
 41 | 39,"[41, 11, 6, 43, 60, 48, 37, 55, 31, 39, 7, 13, 58, 42, 62, 44, 21, 56, 35, 19, 49, 9, 36, 34, 14, 10, 8, 54, 30, 32, 52, 2]","[0, 36, 42, 58, 13, 38, 31, 45, 11, 61, 18, 28, 32, 60, 17, 57, 10, 2, 3, 59, 51, 35, 12, 9, 7, 50, 63, 41, 48, 34, 39, 49]","[2, 7, 9, 10, 11, 13, 31, 32, 34, 35, 36, 39, 41, 42, 48, 49, 58, 60]"
 42 | 40,"[27, 26, 20, 51, 10, 56, 49, 22, 31, 61, 44, 24, 32, 17, 50, 29, 30, 19, 3, 4, 12, 0, 43, 41, 35, 58, 33, 15, 37, 34, 57, 25]","[54, 29, 52, 49, 45, 30, 48, 7, 11, 19, 47, 46, 10, 38, 16, 36, 40, 5, 41, 1, 23, 8, 42, 33, 56, 14, 59, 2, 58, 0, 63, 62]","[0, 10, 19, 29, 30, 33, 41, 49, 56, 58]"
 43 | 41,"[44, 7, 43, 52, 38, 18, 47, 2, 49, 41, 36, 42, 57, 1, 23, 27, 39, 50, 31, 51, 16, 29, 30, 26, 40, 24, 10, 20, 6, 11, 62, 19]","[47, 21, 60, 51, 62, 55, 28, 48, 16, 2, 52, 36, 10, 18, 63, 3, 45, 32, 50, 43, 19, 44, 27, 56, 26, 1, 8, 29, 25, 41, 11, 38]","[1, 2, 10, 11, 16, 18, 19, 26, 27, 29, 36, 38, 41, 43, 44, 47, 50, 51, 52, 62]"
 44 | 42,"[11, 29, 26, 5, 13, 32, 53, 10, 25, 60, 52, 39, 35, 55, 23, 17, 49, 47, 28, 20, 4, 50, 2, 31, 19, 48, 34, 6, 0, 54, 16, 37]","[59, 30, 29, 23, 49, 57, 25, 16, 37, 27, 1, 17, 20, 42, 55, 47, 41, 7, 33, 6, 52, 48, 34, 51, 13, 3, 56, 11, 26, 50, 54, 46]","[6, 11, 13, 16, 17, 20, 23, 25, 26, 29, 34, 37, 47, 48, 49, 50, 52, 54, 55]"
 45 | 43,"[2, 21, 7, 46, 49, 10, 25, 20, 32, 23, 55, 26, 4, 34, 15, 16, 28, 50, 30, 42, 37, 45, 44, 1, 39, 5, 6, 13, 24, 11, 38, 14]","[29, 45, 42, 8, 21, 9, 1, 5, 15, 61, 62, 51, 54, 41, 38, 31, 7, 56, 43, 13, 36, 0, 60, 10, 59, 48, 25, 3, 34, 19, 23, 27]","[1, 5, 7, 10, 13, 15, 21, 23, 25, 34, 38, 42, 45]"
 46 | 44,"[53, 2, 10, 19, 21, 8, 35, 56, 49, 5, 45, 42, 39, 16, 38, 32, 20, 9, 25, 41, 15, 29, 61, 7, 1, 11, 6, 47, 57, 36, 40, 28]","[23, 59, 3, 54, 22, 50, 47, 18, 37, 24, 27, 26, 48, 6, 34, 45, 16, 28, 55, 60, 33, 30, 52, 19, 7, 17, 13, 58, 35, 46, 62, 2]","[2, 6, 7, 16, 19, 28, 35, 45, 47]"
 47 | 45,"[46, 13, 6, 58, 35, 57, 18, 2, 53, 30, 31, 28, 60, 5, 21, 56, 52, 48, 55, 39, 59, 12, 23, 45, 36, 22, 50, 0, 20, 4, 29, 32]","[45, 2, 20, 60, 13, 35, 22, 57, 24, 55, 5, 58, 51, 32, 56, 4, 0, 8, 7, 54, 41, 36, 48, 53, 23, 34, 38, 39, 28, 29, 61, 30]","[0, 2, 4, 5, 13, 20, 22, 23, 28, 29, 30, 32, 35, 36, 39, 45, 48, 53, 55, 56, 57, 58, 60]"
 48 | 46,"[49, 54, 43, 3, 33, 63, 61, 57, 53, 42, 58, 48, 11, 51, 50, 55, 39, 1, 9, 28, 30, 6, 21, 20, 7, 47, 32, 19, 15, 5, 36, 40]","[44, 18, 40, 59, 58, 54, 0, 29, 38, 30, 48, 24, 55, 17, 52, 43, 11, 60, 31, 53, 20, 45, 41, 63, 3, 36, 23, 56, 35, 19, 14, 39]","[3, 11, 19, 20, 30, 36, 39, 40, 43, 48, 53, 54, 55, 58, 63]"
 49 | 47,"[20, 13, 57, 11, 8, 58, 9, 4, 43, 10, 12, 61, 48, 40, 22, 32, 50, 2, 25, 41, 17, 21, 33, 5, 1, 37, 34, 19, 31, 47, 15, 3]","[58, 10, 51, 3, 1, 39, 41, 38, 46, 8, 42, 12, 20, 32, 43, 5, 2, 50, 25, 17, 18, 55, 27, 40, 16, 19, 30, 57, 31, 47, 48, 37]","[1, 2, 3, 5, 8, 10, 12, 17, 19, 20, 25, 31, 32, 37, 40, 41, 43, 47, 48, 50, 57, 58]"
 50 | 48,"[39, 3, 45, 11, 22, 60, 1, 19, 25, 55, 18, 40, 35, 4, 58, 28, 8, 36, 41, 44, 30, 61, 50, 9, 2, 5, 15, 48, 12, 32, 31, 56]","[26, 19, 24, 8, 39, 44, 22, 30, 18, 31, 58, 11, 12, 63, 57, 17, 49, 45, 55, 23, 2, 42, 59, 60, 47, 0, 5, 21, 1, 29, 41, 27]","[1, 2, 5, 8, 11, 12, 18, 19, 22, 30, 31, 39, 41, 44, 45, 55, 58, 60]"
 51 | 49,"[39, 40, 9, 0, 34, 57, 30, 43, 7, 35, 6, 24, 41, 63, 44, 36, 8, 13, 45, 15, 28, 55, 61, 42, 11, 48, 12, 38, 5, 27, 50, 59]","[9, 40, 0, 15, 52, 31, 45, 7, 57, 55, 28, 44, 26, 29, 11, 56, 38, 17, 3, 63, 42, 43, 5, 60, 30, 10, 58, 47, 41, 50, 54, 48]","[0, 5, 7, 9, 11, 15, 28, 30, 38, 40, 41, 42, 43, 44, 45, 48, 50, 55, 57, 63]"
 52 | 50,"[27, 33, 14, 63, 0, 59, 54, 9, 62, 31, 21, 53, 39, 19, 10, 2, 22, 16, 29, 44, 17, 24, 60, 35, 12, 56, 4, 34, 36, 57, 30, 26]","[24, 36, 50, 3, 63, 58, 17, 22, 56, 0, 60, 7, 27, 55, 4, 11, 40, 31, 62, 14, 19, 35, 39, 33, 43, 21, 10, 6, 45, 59, 8, 18]","[0, 4, 10, 14, 17, 19, 21, 22, 24, 27, 31, 33, 35, 36, 39, 56, 59, 60, 62, 63]"
 53 | 51,"[2, 27, 29, 42, 14, 34, 24, 58, 31, 4, 43, 62, 7, 1, 9, 5, 10, 53, 21, 49, 26, 23, 32, 3, 35, 51, 59, 55, 30, 39, 13, 16]","[57, 4, 11, 56, 37, 33, 63, 21, 30, 24, 55, 60, 48, 19, 46, 54, 44, 47, 7, 25, 31, 17, 18, 15, 45, 13, 10, 29, 0, 6, 61, 1]","[1, 4, 7, 10, 13, 21, 24, 29, 30, 31, 55]"
 54 | 52,"[26, 63, 60, 20, 62, 17, 52, 35, 56, 12, 51, 10, 31, 42, 50, 18, 4, 39, 11, 44, 22, 25, 33, 5, 32, 13, 45, 0, 28, 23, 48, 59]","[36, 4, 35, 53, 22, 42, 55, 7, 18, 19, 13, 54, 52, 29, 27, 40, 25, 46, 15, 31, 63, 43, 57, 33, 38, 37, 1, 39, 9, 41, 45, 16]","[4, 13, 18, 22, 25, 31, 33, 35, 39, 42, 45, 52, 63]"
 55 | 53,"[30, 10, 36, 20, 63, 2, 43, 62, 25, 33, 9, 4, 24, 60, 21, 57, 26, 40, 53, 18, 17, 45, 23, 50, 3, 48, 41, 12, 52, 47, 59, 0]","[15, 55, 6, 43, 41, 46, 32, 30, 48, 39, 9, 28, 58, 16, 8, 7, 33, 56, 20, 2, 21, 24, 34, 49, 1, 23, 17, 11, 45, 0, 44, 60]","[0, 2, 9, 17, 20, 21, 23, 24, 30, 33, 41, 43, 45, 48, 60]"
 56 | 54,"[34, 24, 6, 4, 11, 52, 45, 54, 27, 49, 63, 16, 1, 28, 44, 33, 20, 22, 2, 57, 3, 40, 23, 30, 7, 41, 8, 62, 43, 58, 17, 46]","[61, 27, 43, 59, 55, 46, 10, 24, 12, 60, 1, 22, 23, 19, 52, 29, 40, 50, 39, 37, 26, 0, 63, 49, 56, 35, 6, 17, 41, 33, 9, 25]","[1, 6, 17, 22, 23, 24, 27, 33, 40, 41, 43, 46, 49, 52, 63]"
 57 | 55,"[40, 62, 51, 34, 3, 26, 52, 29, 5, 44, 18, 23, 61, 10, 50, 24, 47, 43, 37, 7, 38, 11, 15, 59, 36, 49, 53, 42, 48, 45, 56, 58]","[4, 28, 22, 9, 35, 30, 5, 2, 54, 33, 26, 11, 13, 6, 25, 31, 20, 17, 34, 27, 36, 55, 48, 8, 32, 19, 56, 42, 46, 21, 57, 41]","[5, 11, 26, 34, 36, 42, 48, 56]"
 58 | 56,"[49, 63, 30, 15, 54, 9, 6, 42, 8, 27, 3, 12, 48, 2, 36, 1, 26, 11, 43, 16, 61, 0, 62, 25, 53, 24, 51, 7, 19, 20, 14, 41]","[37, 55, 44, 8, 35, 43, 10, 36, 17, 32, 51, 7, 9, 59, 56, 16, 33, 19, 14, 58, 15, 6, 28, 53, 60, 50, 30, 41, 2, 23, 61, 34]","[2, 6, 7, 8, 9, 14, 15, 16, 19, 30, 36, 41, 43, 51, 53, 61]"
 59 | 57,"[26, 10, 13, 42, 6, 50, 47, 34, 27, 36, 46, 3, 14, 61, 5, 44, 15, 55, 30, 41, 52, 28, 8, 2, 21, 38, 40, 11, 0, 62, 1, 16]","[19, 26, 14, 7, 13, 1, 15, 29, 4, 63, 22, 59, 47, 60, 51, 35, 23, 50, 58, 30, 57, 2, 49, 31, 62, 25, 24, 53, 20, 45, 54, 12]","[1, 2, 13, 14, 15, 26, 30, 47, 50, 62]"
 60 | 58,"[43, 24, 41, 12, 33, 52, 34, 30, 39, 51, 1, 37, 49, 14, 50, 40, 23, 42, 17, 6, 31, 7, 3, 61, 21, 38, 63, 57, 20, 45, 9, 36]","[60, 34, 57, 3, 41, 49, 50, 23, 40, 14, 42, 58, 55, 19, 54, 22, 59, 21, 15, 20, 9, 28, 63, 17, 51, 27, 37, 13, 4, 2, 33, 1]","[1, 3, 9, 14, 17, 20, 21, 23, 33, 34, 37, 40, 41, 42, 49, 50, 51, 57, 63]"
 61 | 59,"[5, 19, 20, 41, 52, 7, 49, 50, 28, 18, 1, 40, 33, 55, 53, 2, 22, 47, 27, 62, 29, 17, 58, 4, 43, 39, 14, 15, 45, 26, 10, 21]","[62, 61, 27, 26, 7, 25, 40, 4, 32, 14, 42, 50, 20, 39, 24, 3, 58, 2, 57, 5, 1, 17, 11, 9, 8, 38, 18, 46, 51, 29, 23, 33]","[1, 2, 4, 5, 7, 14, 17, 18, 20, 26, 27, 29, 33, 39, 40, 50, 58, 62]"
 62 | 60,"[33, 32, 31, 53, 61, 4, 40, 55, 1, 43, 26, 25, 18, 17, 45, 46, 59, 21, 23, 35, 10, 27, 12, 50, 38, 42, 30, 2, 13, 29, 5, 14]","[13, 26, 30, 15, 50, 61, 2, 42, 52, 46, 32, 23, 17, 14, 21, 44, 40, 53, 8, 33, 59, 31, 28, 20, 4, 5, 55, 60, 56, 41, 24, 27]","[2, 4, 5, 13, 14, 17, 21, 23, 26, 27, 30, 31, 32, 33, 40, 42, 46, 50, 53, 55, 59, 61]"
 63 | 61,"[44, 60, 25, 59, 56, 22, 42, 5, 12, 4, 31, 35, 50, 2, 43, 19, 62, 53, 24, 15, 47, 13, 37, 6, 29, 9, 34, 49, 1, 27, 55, 0]","[48, 18, 47, 60, 25, 34, 3, 13, 36, 15, 54, 35, 2, 57, 9, 40, 42, 4, 24, 43, 51, 61, 62, 26, 56, 0, 53, 11, 5, 28, 58, 19]","[0, 2, 4, 5, 9, 13, 15, 19, 24, 25, 34, 35, 42, 43, 47, 53, 56, 60, 62]"
 64 | 62,"[16, 53, 6, 40, 31, 42, 45, 61, 37, 33, 27, 17, 57, 44, 2, 28, 15, 41, 8, 35, 5, 62, 1, 0, 22, 34, 21, 4, 18, 25, 36, 3]","[19, 28, 56, 32, 47, 62, 48, 43, 39, 3, 26, 38, 52, 31, 12, 33, 24, 46, 36, 51, 61, 27, 8, 57, 2, 63, 6, 9, 29, 54, 5, 13]","[2, 3, 5, 6, 8, 27, 28, 31, 33, 36, 57, 61, 62]"
 65 | 63,"[46, 10, 18, 3, 15, 4, 23, 13, 31, 6, 30, 39, 16, 35, 56, 59, 28, 25, 27, 26, 50, 54, 34, 36, 12, 37, 33, 42, 40, 32, 0, 5]","[56, 41, 25, 26, 3, 4, 63, 30, 20, 59, 23, 35, 39, 62, 53, 47, 52, 27, 28, 6, 50, 18, 46, 55, 33, 13, 34, 32, 10, 29, 16, 0]","[0, 3, 4, 6, 10, 13, 16, 18, 23, 25, 26, 27, 28, 30, 32, 33, 34, 35, 39, 46, 50, 56, 59]"
 66 | 64,"[47, 11, 4, 10, 52, 54, 43, 50, 18, 60, 34, 39, 36, 30, 37, 23, 41, 32, 5, 15, 2, 58, 40, 28, 27, 38, 12, 0, 20, 42, 26, 19]","[22, 18, 31, 62, 33, 15, 38, 4, 49, 17, 26, 50, 41, 27, 46, 35, 48, 36, 29, 9, 16, 24, 23, 56, 19, 5, 1, 12, 34, 61, 8, 55]","[4, 5, 12, 15, 18, 19, 23, 26, 27, 34, 36, 38, 41, 50]"
 67 | 65,"[34, 4, 44, 39, 29, 25, 16, 35, 10, 51, 62, 40, 5, 20, 6, 32, 48, 42, 23, 22, 7, 33, 3, 19, 8, 27, 52, 21, 13, 53, 36, 12]","[38, 2, 46, 35, 7, 44, 1, 49, 45, 27, 57, 0, 19, 61, 59, 11, 16, 18, 51, 8, 60, 9, 12, 15, 23, 6, 40, 22, 14, 30, 50, 31]","[6, 7, 8, 12, 16, 19, 22, 23, 27, 35, 40, 44, 51]"
 68 | 66,"[28, 39, 33, 2, 21, 53, 14, 12, 18, 7, 45, 43, 37, 60, 54, 42, 11, 38, 23, 9, 50, 26, 22, 62, 15, 5, 8, 0, 20, 13, 56, 30]","[51, 18, 47, 54, 4, 60, 48, 62, 61, 56, 50, 40, 14, 28, 39, 2, 25, 9, 0, 7, 13, 32, 33, 35, 30, 11, 8, 20, 23, 49, 12, 22]","[0, 2, 7, 8, 9, 11, 12, 13, 14, 18, 20, 22, 23, 28, 30, 33, 39, 50, 54, 56, 60, 62]"
 69 | 67,"[56, 61, 32, 49, 34, 25, 16, 14, 0, 28, 10, 55, 46, 53, 27, 52, 39, 37, 63, 6, 43, 21, 19, 30, 29, 50, 41, 8, 31, 18, 33, 40]","[22, 51, 50, 8, 32, 45, 6, 38, 34, 3, 10, 43, 54, 37, 1, 13, 61, 55, 47, 5, 20, 23, 9, 11, 58, 59, 48, 27, 17, 42, 31, 25]","[6, 8, 10, 25, 27, 31, 32, 34, 37, 43, 50, 55, 61]"
 70 | 68,"[32, 56, 63, 19, 46, 60, 23, 2, 62, 44, 53, 51, 49, 54, 33, 4, 31, 6, 47, 34, 61, 21, 12, 26, 55, 5, 27, 29, 15, 45, 52, 38]","[49, 8, 24, 36, 0, 3, 33, 51, 16, 11, 43, 40, 2, 29, 41, 38, 7, 39, 17, 55, 18, 62, 25, 20, 48, 14, 10, 13, 30, 22, 19, 50]","[2, 19, 29, 33, 38, 49, 51, 55, 62]"
 71 | 69,"[10, 62, 7, 61, 52, 43, 6, 14, 42, 13, 23, 30, 11, 22, 3, 8, 21, 63, 1, 34, 35, 4, 19, 36, 27, 26, 33, 28, 29, 59, 60, 16]","[31, 63, 8, 6, 32, 52, 48, 30, 53, 49, 5, 26, 2, 14, 47, 43, 51, 18, 44, 33, 55, 4, 20, 17, 42, 7, 15, 10, 24, 3, 59, 39]","[3, 4, 6, 7, 8, 10, 14, 26, 30, 33, 42, 43, 52, 59, 63]"
 72 | 70,"[9, 22, 57, 5, 0, 10, 56, 20, 8, 23, 4, 3, 58, 50, 38, 18, 1, 14, 17, 40, 21, 11, 48, 36, 27, 31, 2, 30, 34, 37, 49, 7]","[41, 36, 45, 4, 6, 42, 30, 10, 33, 54, 44, 47, 18, 40, 27, 9, 2, 21, 49, 60, 34, 1, 5, 23, 50, 46, 57, 32, 22, 26, 7, 11]","[1, 2, 4, 5, 7, 9, 10, 11, 18, 21, 22, 23, 27, 30, 34, 36, 40, 49, 50, 57]"
 73 | 71,"[57, 29, 41, 52, 62, 54, 6, 11, 36, 0, 44, 9, 35, 59, 63, 32, 47, 49, 38, 34, 8, 53, 18, 13, 27, 60, 3, 10, 19, 61, 25, 28]","[10, 62, 39, 11, 16, 28, 14, 37, 23, 8, 17, 25, 58, 45, 7, 56, 36, 61, 48, 12, 13, 4, 33, 55, 27, 42, 20, 46, 32, 31, 43, 21]","[8, 10, 11, 13, 25, 27, 28, 32, 36, 61, 62]"
 74 | 72,"[63, 8, 51, 13, 28, 52, 10, 41, 46, 34, 14, 9, 20, 39, 11, 25, 7, 21, 59, 5, 56, 23, 43, 47, 27, 35, 54, 30, 16, 37, 57, 2]","[19, 18, 24, 37, 58, 17, 9, 13, 2, 50, 16, 63, 3, 39, 53, 4, 35, 49, 22, 42, 45, 43, 20, 14, 47, 32, 12, 52, 26, 56, 11, 29]","[2, 9, 11, 13, 14, 16, 20, 35, 37, 39, 43, 47, 52, 56, 63]"
 75 | 73,"[56, 35, 29, 20, 24, 27, 31, 12, 2, 4, 33, 11, 60, 16, 7, 8, 58, 63, 46, 49, 36, 52, 53, 41, 30, 32, 51, 47, 62, 21, 15, 0]","[51, 26, 31, 25, 47, 60, 49, 45, 9, 34, 61, 33, 38, 44, 53, 59, 3, 14, 48, 8, 22, 4, 19, 55, 6, 43, 37, 5, 50, 27, 40, 17]","[4, 8, 27, 31, 33, 47, 49, 51, 53, 60]"
 76 | 74,"[17, 33, 36, 34, 27, 54, 37, 29, 38, 57, 21, 40, 43, 20, 48, 10, 41, 28, 22, 4, 32, 30, 31, 59, 47, 23, 14, 5, 1, 42, 50, 44]","[13, 23, 10, 14, 51, 12, 21, 40, 32, 3, 29, 56, 33, 50, 37, 22, 63, 47, 24, 38, 35, 43, 5, 26, 16, 27, 31, 1, 49, 46, 8, 52]","[1, 5, 10, 14, 21, 22, 23, 27, 29, 31, 32, 33, 37, 38, 40, 43, 47, 50]"
 77 | 75,"[57, 37, 60, 46, 20, 41, 33, 42, 26, 55, 39, 4, 36, 10, 32, 31, 58, 9, 3, 59, 22, 2, 34, 17, 0, 61, 16, 53, 12, 25, 50, 23]","[54, 19, 15, 49, 33, 31, 8, 28, 17, 38, 27, 55, 14, 4, 45, 6, 9, 60, 10, 24, 61, 63, 13, 47, 46, 2, 20, 36, 48, 7, 53, 23]","[2, 4, 9, 10, 17, 20, 23, 31, 33, 36, 46, 53, 55, 60, 61]"
 78 | 76,"[20, 56, 53, 3, 34, 4, 2, 6, 27, 15, 63, 23, 55, 39, 14, 36, 42, 25, 35, 38, 43, 1, 37, 51, 16, 29, 31, 47, 26, 28, 10, 45]","[33, 19, 20, 42, 10, 11, 48, 4, 47, 26, 9, 37, 55, 30, 58, 8, 41, 46, 59, 50, 34, 43, 18, 5, 62, 49, 23, 21, 61, 57, 51, 15]","[4, 10, 15, 20, 23, 26, 34, 37, 42, 43, 47, 51, 55]"
 79 | 77,"[23, 14, 10, 28, 7, 25, 2, 0, 29, 1, 35, 8, 54, 58, 51, 24, 21, 41, 3, 38, 44, 45, 39, 5, 53, 55, 17, 48, 6, 20, 18, 34]","[2, 53, 46, 41, 62, 43, 12, 63, 10, 32, 22, 59, 51, 13, 25, 23, 14, 36, 16, 11, 58, 57, 9, 61, 42, 47, 37, 50, 33, 3, 6, 15]","[2, 3, 6, 10, 14, 23, 25, 41, 51, 53, 58]"
 80 | 78,"[41, 50, 43, 38, 1, 44, 15, 61, 23, 26, 19, 9, 6, 31, 54, 18, 56, 10, 16, 29, 42, 39, 11, 12, 60, 52, 45, 63, 0, 30, 20, 53]","[58, 44, 4, 38, 11, 39, 60, 59, 22, 46, 50, 7, 15, 13, 8, 36, 5, 26, 28, 10, 53, 1, 23, 20, 43, 61, 51, 40, 57, 9, 54, 14]","[1, 9, 10, 11, 15, 20, 23, 26, 38, 39, 43, 44, 50, 53, 54, 60, 61]"
 81 | 79,"[33, 36, 54, 26, 59, 28, 0, 10, 1, 63, 45, 11, 53, 56, 6, 4, 27, 40, 39, 61, 55, 18, 3, 48, 9, 22, 2, 34, 15, 16, 60, 42]","[54, 63, 16, 12, 53, 27, 34, 8, 1, 41, 36, 52, 58, 24, 59, 23, 30, 6, 44, 32, 37, 48, 3, 42, 9, 45, 51, 14, 22, 61, 50, 4]","[1, 3, 4, 6, 9, 16, 22, 27, 34, 36, 42, 45, 48, 53, 54, 59, 61, 63]"
 82 | 80,"[50, 47, 26, 38, 60, 19, 14, 61, 55, 2, 31, 9, 29, 11, 62, 58, 5, 24, 25, 17, 13, 53, 37, 1, 34, 6, 43, 54, 33, 12, 41, 7]","[16, 62, 31, 60, 50, 29, 10, 20, 39, 2, 42, 14, 22, 1, 13, 59, 0, 26, 11, 34, 19, 6, 7, 54, 8, 38, 33, 61, 17, 25, 58, 47]","[1, 2, 6, 7, 11, 13, 14, 17, 19, 25, 26, 29, 31, 33, 34, 38, 47, 50, 54, 58, 60, 61, 62]"
 83 | 81,"[61, 3, 10, 49, 21, 43, 46, 18, 16, 57, 4, 2, 19, 14, 34, 62, 26, 25, 1, 5, 6, 52, 63, 13, 36, 7, 45, 39, 48, 15, 54, 33]","[47, 48, 6, 16, 31, 13, 56, 17, 21, 55, 36, 40, 35, 2, 50, 3, 53, 38, 51, 23, 12, 11, 24, 34, 15, 46, 45, 8, 1, 20, 22, 39]","[1, 2, 3, 6, 13, 15, 16, 21, 34, 36, 39, 45, 46, 48]"
 84 | 82,"[30, 47, 32, 19, 28, 45, 23, 13, 37, 0, 40, 16, 14, 26, 5, 43, 12, 41, 3, 6, 53, 51, 63, 24, 7, 61, 20, 22, 39, 11, 33, 34]","[9, 38, 46, 16, 6, 21, 39, 51, 1, 54, 13, 18, 57, 58, 8, 48, 29, 10, 31, 44, 17, 28, 27, 45, 34, 47, 60, 4, 5, 42, 15, 50]","[5, 6, 13, 16, 28, 34, 39, 45, 47, 51]"
 85 | 83,"[18, 15, 28, 52, 23, 43, 45, 19, 34, 37, 21, 8, 27, 14, 2, 4, 47, 42, 16, 17, 60, 29, 39, 0, 58, 5, 46, 33, 12, 32, 55, 10]","[51, 27, 18, 40, 45, 32, 60, 0, 53, 4, 23, 28, 10, 15, 29, 17, 24, 2, 5, 34, 21, 22, 52, 11, 12, 50, 44, 9, 3, 42, 39, 48]","[0, 2, 4, 5, 10, 12, 15, 17, 18, 21, 23, 27, 28, 29, 32, 34, 39, 42, 45, 52, 60]"
 86 | 84,"[16, 49, 22, 56, 0, 53, 12, 26, 9, 51, 20, 14, 38, 29, 36, 13, 19, 40, 6, 44, 48, 43, 32, 15, 42, 62, 39, 52, 23, 7, 60, 50]","[45, 50, 38, 19, 63, 9, 26, 20, 31, 4, 62, 54, 29, 61, 47, 16, 18, 10, 52, 39, 35, 17, 23, 56, 51, 14, 27, 49, 40, 30, 58, 15]","[9, 14, 15, 16, 19, 20, 23, 26, 29, 38, 39, 40, 49, 50, 51, 52, 56, 62]"
 87 | 85,"[41, 56, 11, 28, 36, 57, 6, 23, 49, 29, 20, 5, 63, 39, 58, 51, 42, 37, 18, 0, 19, 31, 47, 10, 32, 48, 38, 7, 13, 44, 45, 8]","[29, 5, 60, 63, 11, 12, 43, 20, 42, 30, 21, 3, 6, 49, 58, 17, 36, 15, 1, 44, 24, 41, 59, 51, 53, 32, 38, 56, 37, 57, 23, 33]","[5, 6, 11, 20, 23, 29, 32, 36, 37, 38, 41, 42, 44, 49, 51, 56, 57, 58, 63]"
 88 | 86,"[19, 38, 45, 33, 6, 24, 36, 10, 29, 30, 21, 9, 59, 25, 32, 11, 4, 2, 5, 57, 20, 40, 35, 55, 15, 1, 34, 44, 3, 8, 23, 18]","[37, 42, 17, 16, 6, 55, 43, 0, 56, 53, 31, 14, 21, 44, 26, 61, 49, 25, 12, 27, 28, 18, 32, 51, 13, 50, 52, 19, 54, 58, 30, 48]","[6, 18, 19, 21, 25, 30, 32, 44, 55]"
 89 | 87,"[28, 38, 31, 55, 61, 37, 62, 60, 25, 6, 41, 15, 9, 4, 18, 7, 12, 49, 24, 0, 23, 46, 44, 3, 30, 59, 5, 8, 2, 34, 13, 11]","[22, 34, 53, 40, 60, 36, 27, 43, 1, 10, 17, 15, 25, 51, 48, 57, 13, 33, 54, 7, 30, 16, 38, 29, 26, 0, 44, 14, 12, 19, 42, 31]","[0, 7, 12, 13, 15, 25, 30, 31, 34, 38, 44, 60]"
 90 | 88,"[60, 43, 34, 28, 48, 25, 29, 15, 16, 46, 26, 54, 4, 38, 11, 61, 39, 20, 17, 50, 27, 2, 49, 14, 36, 24, 5, 53, 41, 55, 10, 3]","[56, 48, 63, 6, 54, 38, 21, 42, 39, 47, 20, 11, 49, 14, 28, 46, 53, 61, 23, 9, 24, 34, 10, 43, 0, 52, 5, 29, 44, 59, 27, 16]","[5, 10, 11, 14, 16, 20, 24, 27, 28, 29, 34, 38, 39, 43, 46, 48, 49, 53, 54, 61]"
 91 | 89,"[24, 23, 8, 45, 17, 37, 35, 10, 58, 57, 3, 61, 18, 29, 15, 4, 13, 43, 5, 19, 26, 49, 0, 1, 27, 39, 6, 20, 16, 50, 14, 34]","[38, 63, 5, 49, 47, 18, 39, 60, 29, 26, 44, 3, 0, 10, 52, 61, 14, 4, 15, 35, 59, 8, 57, 50, 54, 20, 28, 24, 16, 1, 55, 25]","[0, 1, 3, 4, 5, 8, 10, 14, 15, 16, 18, 20, 24, 26, 29, 35, 39, 49, 50, 57, 61]"
 92 | 90,"[21, 35, 6, 49, 16, 39, 20, 27, 28, 57, 13, 29, 32, 61, 33, 46, 60, 38, 54, 45, 19, 59, 43, 36, 51, 31, 24, 4, 52, 50, 9, 63]","[13, 2, 42, 49, 59, 5, 27, 53, 33, 44, 16, 25, 15, 48, 52, 11, 47, 32, 7, 56, 17, 10, 14, 43, 24, 31, 0, 37, 51, 4, 30, 19]","[4, 13, 16, 19, 24, 27, 31, 32, 33, 43, 49, 51, 52, 59]"
 93 | 91,"[38, 36, 34, 17, 18, 46, 20, 3, 15, 57, 23, 42, 4, 25, 2, 7, 55, 24, 19, 51, 52, 31, 21, 28, 16, 62, 9, 12, 27, 13, 48, 1]","[52, 28, 54, 22, 51, 18, 19, 8, 26, 43, 55, 4, 21, 35, 1, 37, 46, 9, 44, 39, 0, 23, 15, 42, 57, 25, 27, 49, 34, 17, 13, 11]","[1, 4, 9, 13, 15, 17, 18, 19, 21, 23, 25, 27, 28, 34, 42, 46, 51, 52, 55, 57]"
 94 | 92,"[59, 17, 41, 57, 60, 46, 5, 52, 20, 38, 28, 45, 50, 58, 22, 54, 36, 56, 47, 32, 27, 3, 1, 10, 53, 24, 31, 23, 49, 6, 35, 0]","[59, 41, 20, 24, 53, 19, 46, 5, 35, 31, 38, 37, 36, 0, 52, 16, 2, 45, 17, 33, 21, 57, 50, 61, 56, 11, 8, 29, 18, 54, 10, 47]","[0, 5, 10, 17, 20, 24, 31, 35, 36, 38, 41, 45, 46, 47, 50, 52, 53, 54, 56, 57, 59]"
 95 | 93,"[11, 52, 33, 43, 9, 23, 21, 61, 10, 49, 20, 15, 58, 63, 62, 16, 45, 2, 37, 28, 35, 34, 31, 24, 3, 44, 29, 30, 19, 8, 32, 38]","[49, 29, 32, 0, 20, 53, 39, 26, 44, 23, 6, 35, 10, 15, 57, 7, 61, 56, 60, 17, 4, 42, 18, 55, 46, 28, 21, 41, 24, 33, 13, 47]","[10, 15, 20, 21, 23, 24, 28, 29, 32, 33, 35, 44, 49, 61]"
 96 | 94,"[8, 3, 10, 61, 33, 57, 38, 37, 20, 0, 35, 40, 58, 19, 25, 4, 24, 22, 13, 56, 45, 16, 27, 51, 46, 29, 59, 34, 15, 31, 44, 39]","[58, 55, 44, 60, 31, 8, 40, 18, 13, 26, 7, 61, 38, 19, 37, 51, 57, 53, 29, 4, 50, 46, 35, 33, 39, 5, 45, 24, 36, 34, 3, 63]","[3, 4, 8, 13, 19, 24, 29, 31, 33, 34, 35, 37, 38, 39, 40, 44, 45, 46, 51, 57, 58, 61]"
 97 | 95,"[12, 23, 31, 47, 17, 22, 27, 29, 16, 37, 0, 7, 4, 50, 39, 45, 46, 19, 20, 3, 30, 41, 62, 60, 8, 18, 35, 56, 11, 32, 14, 24]","[16, 13, 5, 49, 42, 12, 40, 34, 23, 48, 36, 51, 18, 7, 62, 31, 19, 61, 25, 58, 53, 54, 60, 17, 46, 59, 41, 52, 10, 33, 63, 26]","[7, 12, 16, 17, 18, 19, 23, 31, 41, 46, 60, 62]"
 98 | 96,"[22, 41, 48, 14, 11, 34, 2, 16, 3, 6, 21, 56, 61, 29, 19, 52, 18, 38, 49, 15, 12, 43, 62, 50, 63, 46, 55, 28, 0, 30, 51, 27]","[5, 2, 11, 26, 30, 40, 49, 47, 62, 54, 4, 17, 60, 44, 61, 28, 18, 35, 39, 24, 37, 57, 8, 7, 20, 45, 10, 12, 25, 3, 38, 36]","[2, 3, 11, 12, 18, 28, 30, 38, 49, 61, 62]"
 99 | 97,"[2, 62, 8, 22, 15, 60, 59, 14, 0, 10, 33, 34, 50, 4, 19, 31, 58, 1, 16, 53, 17, 24, 27, 18, 32, 11, 51, 43, 54, 42, 28, 39]","[57, 54, 13, 47, 2, 15, 7, 58, 43, 16, 22, 0, 36, 35, 23, 38, 21, 55, 33, 19, 46, 8, 29, 42, 60, 27, 32, 53, 49, 31, 9, 28]","[0, 2, 8, 15, 16, 19, 22, 27, 28, 31, 32, 33, 42, 43, 53, 54, 58, 60]"
100 | 98,"[62, 54, 43, 14, 61, 0, 42, 40, 15, 12, 50, 30, 29, 47, 37, 27, 20, 33, 7, 38, 2, 5, 53, 60, 11, 16, 35, 25, 6, 21, 55, 17]","[38, 7, 54, 31, 47, 28, 34, 8, 12, 20, 3, 63, 0, 52, 36, 55, 60, 25, 17, 15, 13, 46, 33, 56, 26, 11, 14, 16, 40, 4, 37, 45]","[0, 7, 11, 12, 14, 15, 16, 17, 20, 25, 33, 37, 38, 40, 47, 54, 55, 60]"
101 | 99,"[2, 41, 3, 17, 26, 4, 9, 58, 7, 42, 49, 21, 55, 40, 53, 8, 50, 18, 29, 0, 13, 44, 63, 5, 39, 48, 24, 36, 47, 54, 19, 38]","[10, 45, 1, 18, 8, 0, 16, 15, 55, 39, 25, 6, 61, 14, 3, 34, 49, 51, 43, 46, 36, 33, 28, 57, 52, 12, 9, 35, 23, 42, 11, 31]","[0, 3, 8, 9, 18, 36, 39, 42, 49, 55]"
102 | 


--------------------------------------------------------------------------------
/examples/set_intersection/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # The source code is adapted from the sorting source code written by
  8 | # Nils Blach.
  9 | #
 10 | # main author: Robert Gerstenberger
 11 | 
 12 | from typing import Dict, List, Set
 13 | 
 14 | 
 15 | def string_to_list(string: str) -> List[int]:
 16 |     """
 17 |     Helper function to convert a list encoded inside a string into a Python
 18 |     list object of integer elements.
 19 | 
 20 |     :param string: Input string containing a list.
 21 |     :type string: str
 22 |     :return: List of integer elements.
 23 |     :rtype: List[int]
 24 |     :raise AssertionError: If input string does not contain a list.
 25 |     """
 26 | 
 27 |     assert string[0] == "[" and string[-1] == "]", "String is not a list."
 28 |     return [int(num) for num in string[1:-1].split(",")]
 29 | 
 30 | 
 31 | def string_to_set(string: str) -> Set[int]:
 32 |     """
 33 |     Helper function to convert a list encoded inside a string into a Python
 34 |     set object of integer elements.
 35 | 
 36 |     :param string: Input string containing a list.
 37 |     :type string: str
 38 |     :return: Set of integer elements.
 39 |     :rtype: Set[int]
 40 |     :raise AssertionError: If input string does not contain a list.
 41 |     """
 42 | 
 43 |     assert string[0] == "[" and string[-1] == "]", "String is not a list."
 44 |     return {int(num) for num in string[1:-1].split(",")}
 45 | 
 46 | 
 47 | def test_set_intersection(state: Dict) -> bool:
 48 |     """
 49 |     Function to test whether the final solution matches ground truth.
 50 | 
 51 |     :param state: Thought state that represents the final solution.
 52 |     :type state: Dict
 53 |     :return: Returns whether the solution matches the ground truth.
 54 |     :rtype: bool
 55 |     """
 56 | 
 57 |     # convert string to list
 58 |     try:
 59 |         correct_list = string_to_list(state["result"])
 60 |         sorted_list = sorted(string_to_list(state["current"]))
 61 |         return sorted_list == correct_list
 62 |     except:
 63 |         return False
 64 | 
 65 | 
 66 | def num_errors(state: Dict) -> float:
 67 |     """
 68 |     Function to locally count the number of errors that serves as a score.
 69 | 
 70 |     :param state: Thought state to be scored.
 71 |     :type state: Dict
 72 |     :return: Number of errors.
 73 |     :rtype: float
 74 |     """
 75 | 
 76 |     try:
 77 |         set1 = string_to_set(state["set1"])
 78 |         set2 = string_to_set(state["set2"])
 79 |         if "subset" in state and state["subset"] != "" and state["subset"] is not None:
 80 |             set2 = string_to_set(state["subset"])
 81 |         common = sorted(list(set1 & set2))
 82 |         llm_solution = sorted(string_to_list(state["current"]))
 83 |         num_errors = 0
 84 |         common_idx = 0
 85 |         llm_idx = 0
 86 |         while common_idx < len(common) and llm_idx < len(llm_solution):
 87 |             if common[common_idx] == llm_solution[llm_idx]:
 88 |                 common_idx += 1
 89 |                 llm_idx += 1
 90 |             elif common[common_idx] < llm_solution[llm_idx]:
 91 |                 common_idx += 1
 92 |                 num_errors += 1
 93 |             elif common[common_idx] > llm_solution[llm_idx]:
 94 |                 llm_idx += 1
 95 |                 num_errors += 1
 96 |         num_errors += len(common) - common_idx + len(llm_solution) - llm_idx
 97 |         return num_errors
 98 |     except:
 99 |         return 1000
100 | 


--------------------------------------------------------------------------------
/examples/sorting/README.md:
--------------------------------------------------------------------------------
 1 | # Sorting
 2 | 
 3 | The use case in this directory sorts the provided list of 
 4 | numbers containing numbers from 0 to 9 (duplicates allowed). 
 5 | We provide implementations of five different approaches for 
 6 | 32, 64 and 128 elements:
 7 | - IO
 8 | - Chain-of-Thought (CoT)
 9 | - Tree of Thought (ToT):
10 |   - ToT: wider tree, meaning more branches per level
11 |   - ToT2: tree with more levels, but fewer branches per level
12 | - Graph of Thoughts (GoT):
13 |   - GoT: split into subarrays / sort / merge
14 | 
15 | ## Data
16 | 
17 | We provide input files with 100 precomputed samples for each list
18 | length: `sorting_<number of elements>.csv`.
19 | 
20 | ## Execution
21 | 
22 | The files to execute the use case are called
23 | `sorting_<number of elements>.py`. In the main body, one can select the
24 | specific samples to be run (variable sample) and the approaches
25 | (variable approaches). It is also possible to set a budget in dollars
26 | (variable budget).
27 | The input filename for the samples is currently hardcoded to
28 | `sorting_<number of elements>.csv`, but can be updated in the function
29 | `run`.
30 | 
31 | The Python scripts will create the directory `result`, if it is not
32 | already present. In the 'result' directory, another directory is created
33 | for each run: `{name of LLM}_{list of approaches}_{day}_{start time}`.
34 | Inside each execution specific directory two files (`config.json`,
35 | `log.log`) and a separate directory for each selected approach are
36 | created. `config.json` contains the configuration of the run: input data,
37 | selected approaches, name of the LLM, and the budget. `log.log` contains
38 | the prompts and responses of the LLM as well as additional debug data.
39 | The approach directories contain a separate json file for every sample
40 | and the file contains the Graph Reasoning State (GRS) for that sample.
41 | 
42 | ## Plot Data
43 | 
44 | Change the results directory in line 171 of `plot.py` and update the
45 | length parameter in the subsequent line and run `python3 plot.py` to
46 | plot your data.
47 | 


--------------------------------------------------------------------------------
/examples/sorting/example_prompts_sorting_032.md:
--------------------------------------------------------------------------------
  1 | # Sorting 32 Numbers - Prompts and Examples
  2 | ## Prompt Templates
  3 | ### GENERATE: split_prompt
  4 | Replace `{input}` with the input list of numbers to be split.
  5 | ```
  6 | <Instruction> Split the following list of 32 numbers into 2 lists of 16 numbers each, the first list should contain the first 16 numbers and the second list the second 16 numbers.
  7 | Only output the final 2 lists in the following format without any additional text or thoughts!:
  8 | {{
  9 |     "List 1": [3, 4, 3, 5, 7, 8, 1, ...],
 10 |     "List 2": [2, 9, 2, 4, 7, 1, 5, ...]
 11 | }} </Instruction>
 12 | 
 13 | <Example>
 14 | Input: [9, 6, 7, 7, 2, 0, 2, 2, 3, 5, 0, 9, 2, 2, 4, 4, 5, 2, 5, 1, 2, 8, 3, 8, 3, 9, 6, 0, 4, 2, 2, 3]
 15 | Output:
 16 | {{
 17 |     "List 1": [9, 6, 7, 7, 2, 0, 2, 2, 3, 5, 0, 9, 2, 2, 4, 4],
 18 |     "List 2": [5, 2, 5, 1, 2, 8, 3, 8, 3, 9, 6, 0, 4, 2, 2, 3]
 19 | }}
 20 | </Example>
 21 | 
 22 | Input: {input}
 23 | ```
 24 | 
 25 | ### GENERATE: sort_prompt
 26 | Replace `{input}` with the input list of numbers to be sorted.
 27 | ```
 28 | <Instruction> Sort the following list of numbers in ascending order. Output only the sorted list of numbers, no additional text. </Instruction>
 29 | 
 30 | <Examples>
 31 | Input: [5, 1, 0, 1, 2, 0, 4, 8, 1, 9, 5, 1, 3, 3, 9, 7]
 32 | Output: [0, 0, 1, 1, 1, 1, 2, 3, 3, 4, 5, 5, 7, 8, 9, 9]
 33 | 
 34 | Input: [3, 7, 0, 2, 8, 1, 2, 2, 2, 4, 7, 8, 5, 5, 3, 9, 4, 3, 5, 6, 6, 4, 4, 5, 2, 0, 9, 3, 3, 9, 2, 1]
 35 | Output: [0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9]
 36 | 
 37 | Input: [4, 4, 9, 7, 9, 7, 0, 0, 4, 9, 1, 7, 9, 5, 8, 7, 5, 6, 3, 8, 6, 7, 5, 8, 5, 0, 6, 3, 7, 0, 5, 3, 7, 5, 2, 4, 4, 9, 0, 7, 8, 2, 7, 7, 7, 2, 1, 3, 9, 9, 7, 9, 6, 6, 4, 5, 4, 2, 0, 8, 9, 0, 2, 2]
 38 | Output: [0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9]
 39 | </Examples>
 40 | 
 41 | Input: {input}
 42 | ```
 43 | 
 44 | ### IMPROVE: improve_prompt
 45 | Replace `{input}` with the input list of numbers to be sorted and `{incorrectly_sorted}` with the incorrectly sorted list of numbers. `{length}` is the length of the input list.
 46 | ```
 47 | <Instruction> The following two lists represent an unsorted list of numbers and a sorted variant of that list. The sorted variant is not correct. Fix the sorted variant so that it is correct.
 48 | Make sure that the output list is sorted in ascending order, has the same number of elements as the input list ({length}), and contains the same elements as the input list. </Instruction>
 49 | 
 50 | <Approach>
 51 | To fix the incorrectly sorted list follow these steps:
 52 | 1. For each number from 0 to 9, compare the frequency of that number in the incorrectly sorted list to the frequency of that number in the input list.
 53 | 2. Iterate through the incorrectly sorted list and add or remove numbers as needed to make the frequency of each number in the incorrectly sorted list match the frequency of that number in the input list.
 54 | </Approach>
 55 | 
 56 | <Examples>
 57 | Input: [3, 7, 0, 2, 8, 1, 2, 2, 2, 4, 7, 8, 5, 5, 3, 9]
 58 | Incorrectly Sorted: [0, 0, 0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 7, 7, 8, 8, 9, 9, 9, 9]
 59 | Reason: The incorrectly sorted list contains four extra 0s, two extra 4s and three extra 9s and is missing two 2s.
 60 | Output: [0, 1, 2, 2, 2, 2, 3, 3, 4, 5, 5, 7, 7, 8, 8, 9]
 61 | 
 62 | Input: [6, 4, 5, 7, 5, 6, 9, 7, 6, 9, 4, 6, 9, 8, 1, 9, 2, 4, 9, 0, 7, 6, 5, 6, 6, 2, 8, 3, 9, 5, 6, 1]
 63 | Incorrectly Sorted: [0, 1, 1, 2, 2, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9]
 64 | Reason: The incorrectly sorted list contains two extra 4s and is missing two 6s and one 9.
 65 | Output: [0, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9, 9]
 66 | 
 67 | Input: [4, 4, 9, 7, 9, 7, 0, 0, 4, 9, 1, 7, 9, 5, 8, 7, 5, 6, 3, 8, 6, 7, 5, 8, 5, 0, 6, 3, 7, 0, 5, 3, 7, 5, 2, 4, 4, 9, 0, 7, 8, 2, 7, 7, 7, 2, 1, 3, 9, 9, 7, 9, 6, 6, 4, 5, 4, 2, 0, 8, 9, 0, 2, 2]
 68 | Incorrectly Sorted: [0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9]
 69 | Reason: The incorrectly sorted list contains one extra 8 and is missing two 2s, one 3, three 4s, two 5s, one 6, six 7s and one 9.
 70 | Output: [0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9]
 71 | </Examples>
 72 | 
 73 | Input: {input}
 74 | Incorrectly Sorted: {incorrectly_sorted}
 75 | ```
 76 | 
 77 | ### AGGREGATE: aggregate_prompt
 78 | Replace `{input_list1}` and `{input_list2}` with the input lists of numbers to be merged. `{length}` is the length of each input list and `{length_combined}` is the length of the combined list.
 79 | ```
 80 | <Instruction> Merge the following 2 sorted lists of length {length} each, into one sorted list of length {length_combined} using a merge sort style approach.
 81 | Only output the final merged list without any additional text or thoughts!:</Instruction>
 82 | 
 83 | <Approach>
 84 | To merge the two lists in a merge-sort style approach, foloow these steps:
 85 | 1. Compare the first element of both lists.
 86 | 2. Append the smaller element to the merged list and move to the next element in the list from which the smaller element came.
 87 | 3. Repeat steps 1 and 2 until one of the lists is empty.
 88 | 4. Append the remaining elements of the non-empty list to the merged list.
 89 | </Approach>
 90 | 
 91 | Merge the following two lists into one sorted list:
 92 | 1: {input_list1}
 93 | 2: {input_list2}
 94 | 
 95 | Merged list:
 96 | ```
 97 | 
 98 | ## Complete Example Prompts
 99 | ### The GoO Summarised
100 | 1. Split the input list into two sub-lists of equal size (split prompt)
101 | 2. For each sub-list: Sort the sub-list (sort prompt) five times; score each sort attempt; keep the best
102 | 3. Merge the sorted sub-lists into one fully sorted list (merge prompt) 10 times; score each merge attempt; keep the best
103 | 4. Fix any potential mistakes in the sorted list (improve prompt) 10 times; score each improvement attempt; keep the best
104 | 
105 | ### Input
106 | ```
107 | [8, 7, 1, 1, 1, 1, 3, 3, 0, 9, 4, 1, 0, 2, 5, 1, 0, 5, 6, 7, 1, 4, 5, 9, 4, 6, 2, 5, 8, 6, 2, 6]
108 | ```
109 | 
110 | ### Step 1 - Prompt
111 | ```
112 | <Instruction> Split the following list of 32 numbers into 2 lists of 16 numbers each, the first list should contain the first 16 numbers and the second list the second 16 numbers.
113 | Only output the final 2 lists in the following format without any additional text or thoughts!:
114 | {
115 |     "List 1": [3, 4, 3, 5, 7, 8, 1, ...],
116 |     "List 2": [2, 9, 2, 4, 7, 1, 5, ...]
117 | } </Instruction>
118 | 
119 | <Example>
120 | Input: [9, 6, 7, 7, 2, 0, 2, 2, 3, 5, 0, 9, 2, 2, 4, 4, 5, 2, 5, 1, 2, 8, 3, 8, 3, 9, 6, 0, 4, 2, 2, 3]
121 | Output:
122 | {
123 |     "List 1": [9, 6, 7, 7, 2, 0, 2, 2, 3, 5, 0, 9, 2, 2, 4, 4],
124 |     "List 2": [5, 2, 5, 1, 2, 8, 3, 8, 3, 9, 6, 0, 4, 2, 2, 3]
125 | }
126 | </Example>
127 | 
128 | Input: [8, 7, 1, 1, 1, 1, 3, 3, 0, 9, 4, 1, 0, 2, 5, 1, 0, 5, 6, 7, 1, 4, 5, 9, 4, 6, 2, 5, 8, 6, 2, 6]
129 | ```
130 | 
131 | ### Step 1 - Response
132 | ```
133 | {
134 |     "List 1": [8, 7, 1, 1, 1, 1, 3, 3, 0, 9, 4, 1, 0, 2, 5, 1],
135 |     "List 2": [0, 5, 6, 7, 1, 4, 5, 9, 4, 6, 2, 5, 8, 6, 2, 6]
136 | }
137 | ```
138 | 
139 | ### Step 2a - Prompt
140 | ```
141 | <Instruction> Sort the following list of numbers in ascending order. Output only the sorted list of numbers, no additional text. </Instruction>
142 | 
143 | <Examples>
144 | Input: [5, 1, 0, 1, 2, 0, 4, 8, 1, 9, 5, 1, 3, 3, 9, 7]
145 | Output: [0, 0, 1, 1, 1, 1, 2, 3, 3, 4, 5, 5, 7, 8, 9, 9]
146 | 
147 | Input: [3, 7, 0, 2, 8, 1, 2, 2, 2, 4, 7, 8, 5, 5, 3, 9, 4, 3, 5, 6, 6, 4, 4, 5, 2, 0, 9, 3, 3, 9, 2, 1]
148 | Output: [0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9]
149 | 
150 | Input: [4, 4, 9, 7, 9, 7, 0, 0, 4, 9, 1, 7, 9, 5, 8, 7, 5, 6, 3, 8, 6, 7, 5, 8, 5, 0, 6, 3, 7, 0, 5, 3, 7, 5, 2, 4, 4, 9, 0, 7, 8, 2, 7, 7, 7, 2, 1, 3, 9, 9, 7, 9, 6, 6, 4, 5, 4, 2, 0, 8, 9, 0, 2, 2]
151 | Output: [0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9]
152 | </Examples>
153 | 
154 | Input: [8, 7, 1, 1, 1, 1, 3, 3, 0, 9, 4, 1, 0, 2, 5, 1]
155 | ```
156 | 
157 | ### Step 2a - 5 Responses
158 | ```
159 | 1. [0, 0, 1, 1, 1, 1, 1, 1, 2, 3, 3, 4, 5, 7, 8, 9]
160 | 2. [0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 4, 5, 7, 8, 9]
161 | 3. [0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 4, 5, 7, 8, 9]
162 | 4. [0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 4, 5, 7, 8, 9]
163 | 5. [0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 4, 5, 7, 8, 9]
164 | ```
165 | 
166 | ### Step 2b - Prompt
167 | ```
168 | <Instruction> Sort the following list of numbers in ascending order. Output only the sorted list of numbers, no additional text. </Instruction>
169 | 
170 | <Examples>
171 | Input: [5, 1, 0, 1, 2, 0, 4, 8, 1, 9, 5, 1, 3, 3, 9, 7]
172 | Output: [0, 0, 1, 1, 1, 1, 2, 3, 3, 4, 5, 5, 7, 8, 9, 9]
173 | 
174 | Input: [3, 7, 0, 2, 8, 1, 2, 2, 2, 4, 7, 8, 5, 5, 3, 9, 4, 3, 5, 6, 6, 4, 4, 5, 2, 0, 9, 3, 3, 9, 2, 1]
175 | Output: [0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 9]
176 | 
177 | Input: [4, 4, 9, 7, 9, 7, 0, 0, 4, 9, 1, 7, 9, 5, 8, 7, 5, 6, 3, 8, 6, 7, 5, 8, 5, 0, 6, 3, 7, 0, 5, 3, 7, 5, 2, 4, 4, 9, 0, 7, 8, 2, 7, 7, 7, 2, 1, 3, 9, 9, 7, 9, 6, 6, 4, 5, 4, 2, 0, 8, 9, 0, 2, 2]
178 | Output: [0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9]
179 | </Examples>
180 | 
181 | Input: [0, 5, 6, 7, 1, 4, 5, 9, 4, 6, 2, 5, 8, 6, 2, 6]
182 | ```
183 | 
184 | ### Step 2b - 5 Responses
185 | ```
186 | 1. [0, 1, 2, 2, 4, 4, 5, 5, 5, 6, 6, 6, 7, 8, 9]
187 | 2. [0, 1, 2, 2, 4, 4, 5, 5, 5, 6, 6, 6, 7, 8, 9]
188 | 3. [0, 1, 2, 2, 4, 4, 5, 5, 5, 6, 6, 6, 7, 8, 9]
189 | 4. [0, 1, 2, 2, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 8, 9]
190 | 5. [0, 1, 2, 2, 4, 4, 5, 5, 5, 6, 6, 6, 7, 8, 9]
191 | ```
192 | 
193 | ### Step 3 - Prompt
194 | ```
195 | <Instruction> Merge the following 2 sorted lists of length 16 each, into one sorted list of length 32 using a merge sort style approach.
196 | Only output the final merged list without any additional text or thoughts!:</Instruction>
197 | 
198 | <Approach>
199 | To merge the two lists in a merge-sort style approach, follow these steps:
200 | 1. Compare the first element of both lists.
201 | 2. Append the smaller element to the merged list and move to the next element in the list from which the smaller element came.
202 | 3. Repeat steps 1 and 2 until one of the lists is empty.
203 | 4. Append the remaining elements of the non-empty list to the merged list.
204 | </Approach>
205 | 
206 | Merge the following two lists into one sorted list:
207 | 1: [0, 0, 1, 1, 1, 1, 1, 1, 2, 3, 3, 4, 5, 7, 8, 9]
208 | 2: [0, 1, 2, 2, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 8, 9]
209 | 
210 | Merged list:
211 | ```
212 | 
213 | ### Step 3 - 10 Responses
214 | ```
215 | 1. [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
216 | 2. [0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
217 | 3. [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
218 | 4. [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 8, 8, 9]
219 | 5. [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
220 | 6. [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
221 | 7. [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
222 | 8. [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
223 | 9. [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
224 | 10. [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
225 | ```
226 | 
227 | ### Step 4 - Prompt
228 | ```
229 | <Instruction> The following two lists represent an unsorted list of numbers and a sorted variant of that list. The sorted variant is not correct. Fix the sorted variant so that it is correct.
230 | Make sure that the output list is sorted in ascending order, has the same number of elements as the input list (32), and contains the same elements as the input list. </Instruction>
231 | 
232 | <Approach>
233 | To fix the incorrectly sorted list follow these steps:
234 | 1. For each number from 0 to 9, compare the frequency of that number in the incorrectly sorted list to the frequency of that number in the input list.
235 | 2. Iterate through the incorrectly sorted list and add or remove numbers as needed to make the frequency of each number in the incorrectly sorted list match the frequency of that number in the input list.
236 | </Approach>
237 | 
238 | <Examples>
239 | Input: [3, 7, 0, 2, 8, 1, 2, 2, 2, 4, 7, 8, 5, 5, 3, 9]
240 | Incorrectly Sorted: [0, 0, 0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 7, 7, 8, 8, 9, 9, 9, 9]
241 | Reason: The incorrectly sorted list contains four extra 0s, two extra 4s and three extra 9s and is missing two 2s.
242 | Output: [0, 1, 2, 2, 2, 2, 3, 3, 4, 5, 5, 7, 7, 8, 8, 9]
243 | 
244 | Input: [6, 4, 5, 7, 5, 6, 9, 7, 6, 9, 4, 6, 9, 8, 1, 9, 2, 4, 9, 0, 7, 6, 5, 6, 6, 2, 8, 3, 9, 5, 6, 1]
245 | Incorrectly Sorted: [0, 1, 1, 2, 2, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9]
246 | Reason: The incorrectly sorted list contains two extra 4s and is missing two 6s and one 9.
247 | Output: [0, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9, 9]
248 | 
249 | Input: [4, 4, 9, 7, 9, 7, 0, 0, 4, 9, 1, 7, 9, 5, 8, 7, 5, 6, 3, 8, 6, 7, 5, 8, 5, 0, 6, 3, 7, 0, 5, 3, 7, 5, 2, 4, 4, 9, 0, 7, 8, 2, 7, 7, 7, 2, 1, 3, 9, 9, 7, 9, 6, 6, 4, 5, 4, 2, 0, 8, 9, 0, 2, 2]
250 | Incorrectly Sorted: [0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9]
251 | Reason: The incorrectly sorted list contains one extra 8 and is missing two 2s, one 3, three 4s, two 5s, one 6, six 7s and one 9.
252 | Output: [0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9]
253 | </Examples>
254 | 
255 | Input: [8, 7, 1, 1, 1, 1, 3, 3, 0, 9, 4, 1, 0, 2, 5, 1, 0, 5, 6, 7, 1, 4, 5, 9, 4, 6, 2, 5, 8, 6, 2, 6]
256 | Incorrectly Sorted: [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
257 | ```
258 | 
259 | ### Step 4 - 10 Responses
260 | ```
261 | 1. Reason: The incorrectly sorted list is missing two 0s, one 3, one 4, one 6 and one 9. The incorrectly sorted list also contains one extra 1, one extra 2, and one extra 5.
262 | 
263 |    Output: [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
264 | 
265 |    Note: The output list has the same number of elements as the input list, is sorted in ascending order, and contains the same elements as the input list.
266 | 
267 | 2. Output: [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
268 | 
269 | 3. Output: [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9]
270 | 
271 | 4. Reason: The incorrectly sorted list is missing two 9s and one 0.
272 |    Output: [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9, 9, 9]
273 | 
274 | 5. Reason: The incorrectly sorted list is missing one 0, one 3, one 4, four 6s, and one 9.
275 | 
276 | Output: [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9]
277 | 
278 | 6. Output: [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9]
279 | 
280 | 7. Output: [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
281 | 
282 | 8. Reason: The incorrectly sorted list is missing three 0s, two 3s, two 4s, one 5, two 6s, and one 9.
283 |    Output: [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 8, 8, 9]
284 | 
285 | 9. Reason: The incorrectly sorted list is missing one 0, three 2s, two 3s, two 4s, three 5s, two 6s, one 7, and one 9.
286 |    Output: [0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 8, 8, 9, 9]
287 | 
288 | 10. Reason: The incorrectly sorted list is missing three 1s, one 3, one 4, two 5s, one 6, one 7, one 8, and one 9.
289 |     Output: [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
290 | ```
291 | 
292 | ### Final Result
293 | ```
294 | [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]
295 | ```
296 | 


--------------------------------------------------------------------------------
/examples/sorting/plot.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # main author: Nils Blach
  8 | # contributions: Robert Gerstenberger
  9 | 
 10 | import json
 11 | import os
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | 
 15 | def get_complete_results(base_directory):
 16 |     results_complete = {}
 17 |     for folder_name in os.listdir(base_directory):
 18 |         folder_path = os.path.join(base_directory, folder_name)
 19 |         if os.path.isdir(folder_path):
 20 |             results_complete[folder_name] = []
 21 |             for file_name in os.listdir(folder_path):
 22 |                 if file_name.endswith(".json"):
 23 |                     file_path = os.path.join(folder_path, file_name)
 24 |                     with open(file_path, "r") as f:
 25 |                         data = json.load(f)
 26 |                         results_complete[folder_name].append(
 27 |                             {"key": int(file_name.split(".")[0]), "data": data}
 28 |                         )
 29 |         for key in results_complete.keys():
 30 |             results_complete[key] = sorted(
 31 |                 results_complete[key], key=lambda x: x["key"]
 32 |             )
 33 |     return results_complete
 34 | 
 35 | 
 36 | def get_final_scores(results_complete):
 37 |     scores = {}
 38 |     for method in results_complete.keys():
 39 |         scores[method] = []
 40 |         for result in results_complete[method]:
 41 |             score = 100
 42 |             solved = False
 43 |             cost = 1
 44 |             prompt_tokens = 0
 45 |             completion_tokens = 0
 46 |             for op in result["data"]:
 47 |                 if "operation" in op and op["operation"] == "ground_truth_evaluator":
 48 |                     try:
 49 |                         score = min(op["scores"])
 50 |                         solved = any(op["problem_solved"])
 51 |                     except:
 52 |                         continue
 53 |                 if "cost" in op:
 54 |                     cost = op["cost"]
 55 |                     prompt_tokens = op["prompt_tokens"]
 56 |                     completion_tokens = op["completion_tokens"]
 57 |             scores[method].append(
 58 |                 [result["key"], score, solved, prompt_tokens, completion_tokens, cost]
 59 |             )
 60 |         scores[method] = sorted(scores[method], key=lambda x: x[0])
 61 |     return scores
 62 | 
 63 | 
 64 | def get_plotting_data(base_directory):
 65 |     results_complete = get_complete_results(base_directory)
 66 |     scores = get_final_scores(results_complete)
 67 |     results_plotting = {
 68 |         method: {
 69 |             "scores": [x[1] for x in scores[method]],
 70 |             "solved": sum([1 for x in scores[method] if x[2]]),
 71 |             "costs": [x[5] for x in scores[method]],
 72 |         }
 73 |         for method in scores.keys()
 74 |     }
 75 |     return results_plotting
 76 | 
 77 | 
 78 | def plot_results(
 79 |     results,
 80 |     methods_order=["io", "cot", "tot", "tot2", "got"],
 81 |     model="GPT-3.5",
 82 |     length=32,
 83 |     y_lower=0,
 84 |     cost_upper=0.0,
 85 |     display_solved=True,
 86 |     annotation_offset=0,
 87 |     display_left_ylabel=False,
 88 |     display_right_ylabel=False,
 89 | ):
 90 |     methods_order = [method for method in methods_order if method in results]
 91 |     # Extract scores based on the order
 92 |     scores_ordered = [
 93 |         [
 94 |             min(score, length)
 95 |             for score in results[method]["scores"]
 96 |             if score != 100 and score != 300
 97 |         ]
 98 |         for method in methods_order
 99 |     ]
100 |     total_costs = [sum(results[method]["costs"]) for method in methods_order]
101 | 
102 |     # Create figure and axis
103 |     fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))
104 | 
105 |     # Create boxplots
106 |     positions = range(1, len(methods_order) + 1)
107 |     ax.boxplot(scores_ordered, positions=positions)
108 | 
109 |     fig_fontsize = 12
110 | 
111 |     # Set the ticks and labels
112 |     method_labels = ["IO", "CoT", "ToT", "ToT2", "GoT"]
113 |     plt.yticks(fontsize=fig_fontsize)
114 |     ax.set_xticks(range(1, len(methods_order) + 1))
115 |     ax.set_xticks(range(1, len(methods_order) + 1))
116 |     ax.set_xticklabels(method_labels, fontsize=fig_fontsize)
117 | 
118 |     y_upper = length
119 | 
120 |     range_increase = 1
121 |     if display_solved:
122 |         if length < 48:
123 |             range_increase = 2
124 |         elif length < 96:
125 |             range_increase = 4
126 |         else:
127 |             range_increase = 8
128 | 
129 |     ax.set_ylim(y_lower, y_upper + range_increase)
130 |     ax1_yticks = range(
131 |         y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)
132 |     )
133 |     ax.set_yticks(ax1_yticks)
134 |     if display_left_ylabel:
135 |         ax.set_ylabel(f"#incorrectly sorted elements; the lower the better")
136 | 
137 |     ax.set_title(f"{length} elements")
138 | 
139 |     ax2 = ax.twinx()
140 |     ax2.bar(positions, total_costs, alpha=0.5, color="blue", label="Total Cost ($)")
141 |     ax2.yaxis.set_tick_params(colors="#1919ff", labelsize=fig_fontsize)
142 |     if cost_upper > 0:
143 |         ax2.set_ylim(0, cost_upper)
144 |         number_of_ticks = len(ax.get_yticks())
145 |         tick_interval = cost_upper / (number_of_ticks)
146 |         ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]
147 | 
148 |         # Set custom tick positions for ax2
149 |         ax2.set_yticks(ax2_ticks)
150 | 
151 |     if display_right_ylabel:
152 |         ax2.set_ylabel(
153 |             "Total Cost ($); the lower the better",
154 |             color="#1919ff",
155 |             fontsize=fig_fontsize,
156 |         )
157 | 
158 |     if display_solved:
159 |         annotation_height = y_upper + annotation_offset
160 |         count = 1
161 |         for method in methods_order:
162 |             if method not in results:
163 |                 continue
164 |             solved = results[method]["solved"]
165 |             ax.text(
166 |                 count,
167 |                 annotation_height,
168 |                 f"{solved}",
169 |                 ha="center",
170 |                 va="bottom",
171 |                 fontsize=fig_fontsize,
172 |             )
173 |             count += 1
174 | 
175 |     model = model.replace(".", "").replace("-", "").lower()
176 |     fig.savefig(f"sorting_{model}_{length}.pdf", bbox_inches="tight")
177 | 
178 | 
179 | plot_results(
180 |     get_plotting_data("results/"),
181 |     length=32,
182 |     display_solved=True,
183 |     model="GPT-3.5",
184 |     display_left_ylabel=True,
185 |     display_right_ylabel=True,
186 | )
187 | 


--------------------------------------------------------------------------------
/examples/sorting/sorting_032.csv:
--------------------------------------------------------------------------------
  1 | ID,Unsorted,Sorted
  2 | 0,"[0, 0, 5, 9, 0, 7, 9, 9, 1, 2, 6, 1, 1, 9, 0, 1, 3, 5, 2, 3, 5, 6, 0, 2, 7, 4, 6, 2, 9, 7, 5, 9]","[0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 9, 9, 9, 9, 9, 9]"
  3 | 1,"[5, 6, 2, 3, 1, 4, 9, 0, 5, 7, 0, 7, 1, 3, 2, 4, 5, 5, 6, 6, 3, 6, 4, 4, 2, 3, 7, 1, 7, 0, 2, 5]","[0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 9]"
  4 | 2,"[9, 6, 7, 0, 7, 7, 4, 7, 9, 3, 6, 5, 0, 8, 1, 8, 6, 1, 5, 3, 3, 5, 3, 4, 2, 2, 4, 5, 8, 6, 0, 3]","[0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9]"
  5 | 3,"[8, 7, 1, 1, 1, 1, 3, 3, 0, 9, 4, 1, 0, 2, 5, 1, 0, 5, 6, 7, 1, 4, 5, 9, 4, 6, 2, 5, 8, 6, 2, 6]","[0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9]"
  6 | 4,"[5, 3, 9, 6, 3, 2, 9, 8, 2, 4, 2, 1, 8, 1, 8, 0, 0, 3, 7, 4, 4, 9, 9, 5, 4, 6, 5, 4, 3, 4, 1, 1]","[0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 6, 6, 7, 8, 8, 8, 9, 9, 9, 9]"
  7 | 5,"[4, 6, 9, 5, 8, 6, 5, 4, 0, 4, 2, 6, 3, 6, 8, 5, 5, 2, 3, 5, 0, 9, 8, 6, 5, 5, 5, 8, 1, 5, 8, 7]","[0, 0, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 8, 8, 8, 8, 8, 9, 9]"
  8 | 6,"[9, 6, 4, 1, 9, 6, 3, 1, 1, 7, 3, 9, 2, 6, 2, 9, 9, 4, 2, 7, 4, 0, 2, 3, 1, 7, 9, 8, 3, 0, 7, 3]","[0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 6, 6, 6, 7, 7, 7, 7, 8, 9, 9, 9, 9, 9, 9]"
  9 | 7,"[9, 2, 4, 7, 4, 8, 8, 4, 7, 5, 8, 9, 0, 1, 0, 0, 0, 0, 7, 3, 4, 7, 7, 7, 9, 8, 6, 3, 2, 6, 6, 8]","[0, 0, 0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9]"
 10 | 8,"[6, 6, 9, 1, 7, 3, 5, 9, 7, 9, 1, 4, 8, 2, 5, 7, 0, 8, 8, 8, 9, 8, 1, 0, 5, 6, 5, 9, 2, 1, 7, 7]","[0, 0, 1, 1, 1, 1, 2, 2, 3, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9]"
 11 | 9,"[0, 3, 5, 9, 3, 1, 0, 2, 9, 9, 8, 1, 7, 6, 8, 1, 7, 1, 7, 1, 3, 2, 4, 2, 8, 4, 6, 8, 3, 2, 7, 0]","[0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9]"
 12 | 10,"[5, 2, 2, 3, 5, 5, 6, 2, 6, 9, 9, 9, 9, 7, 5, 1, 4, 5, 0, 4, 0, 3, 6, 6, 8, 1, 2, 2, 2, 3, 7, 2]","[0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9]"
 13 | 11,"[4, 1, 7, 1, 8, 8, 7, 5, 7, 1, 6, 1, 6, 1, 4, 4, 0, 0, 3, 3, 5, 6, 9, 3, 4, 9, 9, 2, 3, 0, 5, 5]","[0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9]"
 14 | 12,"[6, 6, 5, 3, 1, 7, 2, 8, 7, 2, 0, 5, 4, 5, 0, 5, 8, 0, 4, 8, 5, 9, 0, 0, 2, 3, 4, 3, 2, 0, 4, 4]","[0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 7, 7, 8, 8, 8, 9]"
 15 | 13,"[7, 0, 9, 2, 0, 6, 5, 9, 3, 5, 6, 2, 0, 1, 4, 7, 5, 7, 0, 9, 8, 2, 1, 3, 4, 4, 8, 4, 2, 7, 9, 7]","[0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9]"
 16 | 14,"[3, 4, 7, 1, 9, 8, 3, 0, 6, 4, 4, 1, 9, 9, 9, 2, 8, 4, 7, 4, 7, 9, 7, 6, 9, 7, 2, 1, 4, 5, 3, 1]","[0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9, 9]"
 17 | 15,"[8, 7, 0, 7, 8, 6, 3, 6, 6, 9, 8, 0, 3, 9, 3, 1, 7, 7, 4, 2, 5, 8, 2, 8, 7, 2, 1, 2, 6, 5, 5, 2]","[0, 0, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9]"
 18 | 16,"[9, 7, 1, 8, 7, 9, 5, 0, 9, 0, 6, 4, 5, 7, 0, 9, 2, 5, 8, 4, 3, 9, 6, 4, 6, 1, 7, 2, 2, 5, 3, 0]","[0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9]"
 19 | 17,"[7, 5, 1, 8, 1, 5, 9, 6, 0, 9, 8, 2, 1, 2, 7, 4, 5, 7, 3, 7, 8, 5, 1, 0, 4, 7, 8, 1, 4, 1, 4, 2]","[0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9]"
 20 | 18,"[4, 3, 6, 3, 7, 8, 5, 6, 5, 6, 8, 3, 6, 5, 5, 6, 0, 8, 4, 9, 2, 0, 4, 3, 2, 2, 5, 0, 8, 5, 2, 6]","[0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 8, 8, 8, 8, 9]"
 21 | 19,"[9, 7, 8, 2, 6, 2, 9, 7, 6, 8, 1, 0, 6, 3, 5, 0, 3, 6, 2, 4, 2, 0, 6, 8, 3, 1, 9, 7, 7, 2, 6, 4]","[0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9]"
 22 | 20,"[8, 7, 6, 4, 5, 6, 4, 0, 2, 1, 8, 0, 2, 0, 8, 1, 8, 4, 4, 9, 7, 6, 0, 9, 6, 6, 1, 2, 5, 4, 7, 3]","[0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9]"
 23 | 21,"[6, 6, 4, 0, 7, 9, 5, 2, 0, 4, 2, 4, 8, 4, 0, 3, 7, 0, 4, 3, 1, 3, 0, 1, 9, 6, 7, 4, 9, 1, 3, 0]","[0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 6, 6, 6, 7, 7, 7, 8, 9, 9, 9]"
 24 | 22,"[6, 8, 0, 2, 7, 7, 9, 7, 9, 0, 6, 6, 8, 9, 1, 4, 2, 9, 6, 6, 2, 0, 0, 7, 5, 2, 5, 1, 3, 5, 2, 6]","[0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 3, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9]"
 25 | 23,"[8, 5, 0, 3, 6, 5, 6, 5, 4, 8, 0, 0, 7, 3, 9, 2, 9, 2, 0, 4, 1, 0, 5, 5, 8, 2, 8, 4, 4, 3, 5, 1]","[0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 8, 8, 8, 8, 9, 9]"
 26 | 24,"[8, 0, 5, 9, 3, 9, 2, 1, 7, 2, 1, 9, 7, 4, 8, 4, 2, 2, 9, 2, 1, 7, 3, 4, 4, 3, 3, 0, 8, 1, 8, 6]","[0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9]"
 27 | 25,"[9, 8, 8, 9, 8, 1, 9, 6, 5, 8, 0, 8, 5, 5, 5, 4, 3, 5, 8, 4, 0, 3, 3, 6, 6, 1, 4, 7, 0, 7, 2, 8]","[0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9]"
 28 | 26,"[3, 0, 9, 6, 0, 7, 0, 2, 7, 8, 8, 9, 7, 5, 6, 0, 2, 3, 4, 9, 6, 4, 8, 9, 3, 5, 6, 2, 0, 1, 5, 3]","[0, 0, 0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9]"
 29 | 27,"[6, 4, 9, 0, 5, 9, 0, 8, 5, 5, 9, 2, 6, 4, 8, 1, 9, 1, 3, 9, 4, 6, 6, 9, 7, 8, 8, 8, 9, 1, 1, 4]","[0, 0, 1, 1, 1, 1, 2, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9]"
 30 | 28,"[3, 9, 6, 5, 2, 1, 2, 7, 6, 9, 6, 4, 7, 2, 9, 1, 6, 6, 1, 6, 4, 7, 1, 0, 5, 7, 6, 2, 2, 2, 4, 8]","[0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 9, 9, 9]"
 31 | 29,"[3, 9, 5, 1, 3, 9, 0, 8, 0, 1, 9, 5, 8, 7, 3, 1, 6, 6, 2, 2, 0, 2, 7, 2, 5, 5, 5, 1, 6, 6, 9, 0]","[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 9, 9, 9, 9]"
 32 | 30,"[5, 1, 2, 2, 1, 9, 1, 7, 7, 1, 4, 1, 1, 3, 7, 0, 2, 0, 7, 8, 0, 1, 9, 2, 0, 4, 5, 6, 6, 1, 8, 9]","[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9]"
 33 | 31,"[4, 2, 6, 4, 7, 2, 5, 2, 8, 4, 9, 5, 7, 7, 3, 2, 5, 5, 0, 2, 2, 8, 2, 8, 2, 6, 5, 3, 3, 5, 8, 4]","[0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9]"
 34 | 32,"[6, 6, 2, 2, 9, 8, 7, 2, 5, 3, 5, 7, 1, 3, 4, 0, 3, 6, 9, 1, 6, 1, 0, 4, 1, 2, 6, 7, 7, 6, 9, 1]","[0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 9, 9, 9]"
 35 | 33,"[6, 3, 9, 8, 1, 4, 2, 1, 0, 0, 1, 3, 7, 9, 8, 1, 3, 1, 5, 3, 9, 7, 8, 9, 3, 9, 4, 0, 2, 7, 3, 9]","[0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 5, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 9, 9]"
 36 | 34,"[2, 6, 4, 5, 8, 1, 9, 3, 6, 3, 9, 7, 3, 3, 8, 4, 6, 7, 2, 7, 3, 2, 3, 3, 5, 5, 6, 4, 1, 6, 6, 2]","[1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9]"
 37 | 35,"[6, 6, 0, 8, 0, 6, 8, 4, 9, 8, 3, 8, 8, 1, 7, 1, 5, 0, 9, 1, 9, 9, 8, 1, 5, 3, 2, 2, 7, 0, 5, 6]","[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9]"
 38 | 36,"[8, 9, 1, 5, 7, 4, 2, 6, 2, 8, 8, 9, 3, 0, 6, 7, 7, 8, 6, 5, 8, 3, 3, 0, 1, 6, 4, 0, 8, 8, 1, 7]","[0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9]"
 39 | 37,"[9, 5, 1, 1, 3, 1, 0, 4, 4, 6, 1, 9, 4, 8, 0, 9, 4, 0, 5, 1, 9, 5, 6, 7, 9, 1, 2, 7, 1, 3, 9, 6]","[0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9, 9, 9]"
 40 | 38,"[4, 1, 1, 8, 1, 7, 9, 1, 5, 5, 1, 9, 0, 8, 7, 7, 9, 0, 7, 0, 4, 9, 8, 5, 9, 6, 6, 2, 6, 5, 1, 1]","[0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9, 9]"
 41 | 39,"[6, 1, 6, 5, 7, 1, 6, 3, 8, 7, 6, 1, 3, 8, 6, 9, 3, 1, 1, 9, 4, 8, 3, 1, 9, 4, 5, 9, 5, 4, 1, 8]","[1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9]"
 42 | 40,"[2, 1, 6, 3, 9, 3, 8, 3, 2, 3, 9, 9, 5, 4, 3, 2, 1, 9, 3, 4, 3, 3, 5, 4, 4, 4, 9, 7, 8, 3, 0, 8]","[0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 6, 7, 8, 8, 8, 9, 9, 9, 9, 9]"
 43 | 41,"[6, 5, 6, 4, 3, 7, 3, 0, 6, 9, 1, 0, 1, 7, 0, 1, 8, 6, 5, 9, 5, 9, 4, 2, 5, 3, 8, 8, 8, 0, 9, 7]","[0, 0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9]"
 44 | 42,"[9, 2, 6, 8, 4, 0, 5, 0, 8, 6, 1, 6, 6, 6, 7, 6, 5, 2, 3, 4, 3, 9, 2, 9, 3, 2, 0, 0, 1, 4, 6, 6]","[0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 8, 8, 9, 9, 9]"
 45 | 43,"[7, 7, 7, 8, 7, 9, 4, 3, 5, 5, 1, 7, 7, 4, 0, 5, 6, 2, 7, 3, 2, 2, 3, 2, 0, 8, 6, 7, 8, 4, 6, 4]","[0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 9]"
 46 | 44,"[0, 1, 6, 5, 4, 6, 9, 3, 7, 0, 0, 1, 1, 6, 7, 1, 9, 9, 4, 5, 9, 1, 2, 2, 7, 5, 6, 1, 9, 1, 2, 4]","[0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 9, 9, 9, 9, 9]"
 47 | 45,"[1, 8, 6, 5, 0, 6, 2, 4, 2, 2, 0, 8, 7, 6, 1, 3, 1, 9, 7, 5, 9, 8, 1, 8, 2, 7, 7, 3, 7, 7, 8, 7]","[0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5, 5, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9]"
 48 | 46,"[4, 7, 0, 6, 4, 5, 0, 3, 0, 3, 4, 6, 5, 2, 6, 5, 5, 9, 7, 1, 0, 8, 0, 4, 9, 1, 3, 5, 1, 7, 2, 0]","[0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 9, 9]"
 49 | 47,"[9, 6, 0, 1, 8, 4, 9, 6, 3, 6, 0, 7, 0, 8, 9, 3, 6, 6, 9, 3, 2, 2, 2, 9, 4, 9, 3, 8, 1, 9, 6, 1]","[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 6, 6, 6, 6, 6, 6, 7, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9]"
 50 | 48,"[7, 3, 4, 8, 4, 2, 8, 3, 1, 0, 7, 3, 1, 9, 1, 8, 0, 1, 9, 8, 3, 2, 4, 1, 7, 3, 1, 3, 2, 0, 4, 0]","[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 7, 7, 7, 8, 8, 8, 8, 9, 9]"
 51 | 49,"[2, 3, 7, 6, 5, 7, 5, 2, 8, 6, 8, 3, 7, 2, 3, 8, 6, 5, 4, 5, 3, 6, 8, 3, 0, 9, 2, 1, 7, 5, 5, 8]","[0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9]"
 52 | 50,"[4, 0, 9, 1, 1, 6, 5, 0, 6, 4, 8, 1, 6, 9, 6, 1, 8, 1, 3, 1, 1, 5, 8, 3, 2, 2, 9, 8, 4, 6, 6, 9]","[0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 9, 9, 9, 9]"
 53 | 51,"[1, 9, 8, 5, 8, 5, 2, 4, 1, 4, 8, 0, 7, 9, 0, 5, 0, 5, 6, 4, 6, 2, 2, 3, 4, 2, 8, 8, 6, 7, 8, 9]","[0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9]"
 54 | 52,"[3, 5, 2, 8, 8, 5, 4, 6, 0, 7, 2, 0, 2, 5, 2, 6, 0, 1, 8, 0, 9, 2, 8, 2, 8, 7, 3, 5, 6, 8, 0, 8]","[0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9]"
 55 | 53,"[2, 4, 1, 8, 9, 8, 2, 4, 0, 7, 9, 6, 1, 8, 1, 5, 6, 5, 8, 8, 7, 0, 4, 7, 2, 1, 9, 4, 4, 5, 5, 6]","[0, 0, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9]"
 56 | 54,"[4, 9, 4, 3, 2, 0, 9, 3, 9, 7, 7, 5, 9, 4, 3, 1, 6, 2, 5, 1, 9, 7, 2, 1, 9, 4, 6, 0, 7, 4, 9, 4]","[0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 6, 6, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9]"
 57 | 55,"[2, 3, 6, 7, 2, 2, 1, 6, 4, 0, 0, 9, 1, 6, 9, 1, 1, 2, 5, 1, 8, 1, 7, 1, 2, 1, 6, 0, 1, 6, 4, 1]","[0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 4, 4, 5, 6, 6, 6, 6, 6, 7, 7, 8, 9, 9]"
 58 | 56,"[4, 7, 7, 5, 3, 5, 9, 9, 3, 1, 4, 7, 8, 3, 4, 7, 7, 3, 3, 7, 0, 0, 2, 9, 6, 5, 3, 7, 3, 0, 1, 1]","[0, 0, 0, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 8, 9, 9, 9]"
 59 | 57,"[0, 9, 8, 9, 1, 3, 8, 0, 5, 7, 8, 5, 3, 4, 2, 5, 7, 8, 9, 6, 5, 1, 4, 5, 1, 3, 1, 8, 9, 2, 6, 9]","[0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9]"
 60 | 58,"[6, 1, 5, 1, 3, 5, 4, 0, 1, 0, 6, 8, 9, 8, 1, 7, 6, 8, 7, 4, 6, 2, 9, 0, 8, 0, 7, 2, 6, 6, 7, 7]","[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9]"
 61 | 59,"[2, 4, 0, 5, 4, 0, 6, 6, 1, 7, 8, 1, 7, 3, 4, 0, 9, 2, 8, 8, 8, 1, 1, 1, 7, 3, 1, 6, 0, 9, 1, 3]","[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9]"
 62 | 60,"[3, 8, 0, 9, 5, 6, 6, 8, 5, 2, 9, 4, 8, 2, 3, 5, 7, 6, 1, 7, 1, 4, 3, 9, 4, 5, 9, 5, 1, 9, 9, 9]","[0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9]"
 63 | 61,"[8, 8, 0, 2, 4, 3, 1, 9, 2, 9, 1, 1, 3, 7, 9, 9, 9, 1, 5, 2, 1, 3, 1, 5, 7, 7, 8, 2, 8, 8, 8, 2]","[0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 5, 5, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9]"
 64 | 62,"[5, 0, 7, 6, 2, 6, 6, 7, 0, 5, 6, 3, 9, 5, 4, 9, 0, 0, 4, 9, 0, 3, 1, 4, 7, 5, 9, 8, 6, 0, 7, 3]","[0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 9, 9, 9, 9]"
 65 | 63,"[0, 0, 4, 5, 1, 1, 1, 0, 2, 2, 5, 2, 5, 0, 0, 2, 1, 4, 6, 2, 0, 8, 8, 0, 5, 4, 3, 6, 8, 1, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 8, 8, 8]"
 66 | 64,"[3, 7, 4, 1, 6, 1, 9, 8, 1, 9, 7, 8, 8, 4, 6, 4, 3, 0, 2, 0, 0, 7, 0, 8, 1, 5, 3, 5, 2, 2, 0, 6]","[0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9]"
 67 | 65,"[3, 3, 7, 3, 2, 5, 7, 1, 4, 7, 0, 7, 1, 2, 5, 9, 6, 9, 5, 5, 0, 3, 2, 4, 1, 2, 0, 5, 5, 0, 1, 8]","[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 8, 9, 9]"
 68 | 66,"[9, 1, 9, 1, 3, 4, 4, 5, 0, 1, 9, 8, 0, 7, 1, 8, 7, 7, 6, 0, 0, 6, 1, 4, 4, 5, 8, 5, 4, 4, 4, 8]","[0, 0, 0, 0, 1, 1, 1, 1, 1, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9]"
 69 | 67,"[0, 7, 5, 5, 0, 3, 0, 7, 3, 9, 8, 3, 1, 2, 1, 5, 3, 6, 8, 7, 2, 9, 9, 6, 9, 2, 0, 1, 7, 9, 2, 0]","[0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 5, 5, 5, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 9, 9, 9]"
 70 | 68,"[1, 1, 6, 1, 1, 6, 0, 5, 0, 0, 8, 2, 5, 9, 6, 7, 1, 3, 9, 0, 9, 7, 0, 0, 3, 2, 2, 4, 0, 3, 6, 5]","[0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9]"
 71 | 69,"[3, 4, 2, 5, 5, 8, 4, 8, 5, 5, 8, 3, 4, 9, 9, 8, 4, 9, 9, 9, 9, 3, 9, 0, 5, 8, 3, 7, 8, 6, 1, 1]","[0, 1, 1, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9]"
 72 | 70,"[6, 4, 5, 0, 8, 3, 9, 7, 0, 2, 0, 2, 8, 5, 8, 2, 6, 2, 1, 9, 4, 5, 3, 3, 4, 0, 5, 7, 7, 2, 4, 0]","[0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9]"
 73 | 71,"[0, 4, 8, 5, 2, 4, 6, 7, 2, 6, 2, 8, 3, 6, 6, 2, 0, 9, 7, 7, 5, 1, 0, 8, 6, 0, 7, 7, 0, 3, 2, 6]","[0, 0, 0, 0, 0, 1, 2, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 9]"
 74 | 72,"[5, 1, 7, 2, 4, 5, 0, 2, 7, 2, 2, 9, 7, 0, 5, 8, 5, 2, 2, 1, 2, 6, 9, 7, 8, 8, 4, 5, 1, 8, 2, 8]","[0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9]"
 75 | 73,"[8, 9, 8, 8, 3, 5, 5, 3, 6, 1, 3, 2, 6, 3, 1, 3, 3, 5, 1, 7, 6, 0, 9, 6, 8, 1, 1, 0, 9, 5, 5, 6]","[0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 8, 8, 8, 8, 9, 9, 9]"
 76 | 74,"[4, 3, 3, 0, 4, 7, 9, 4, 3, 5, 3, 5, 4, 2, 0, 2, 1, 5, 8, 8, 4, 4, 3, 4, 3, 0, 1, 6, 7, 3, 2, 8]","[0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 6, 7, 7, 8, 8, 8, 9]"
 77 | 75,"[7, 1, 9, 4, 7, 4, 0, 5, 0, 7, 2, 0, 8, 5, 8, 4, 2, 2, 5, 9, 2, 3, 8, 2, 7, 4, 6, 6, 8, 6, 3, 3]","[0, 0, 0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9]"
 78 | 76,"[7, 3, 1, 7, 2, 0, 4, 5, 6, 5, 8, 3, 2, 8, 8, 2, 2, 5, 3, 9, 1, 3, 4, 9, 2, 7, 7, 3, 6, 8, 3, 7]","[0, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9]"
 79 | 77,"[9, 4, 1, 3, 1, 3, 4, 8, 8, 8, 7, 2, 7, 9, 5, 0, 2, 6, 4, 8, 1, 3, 5, 4, 1, 8, 7, 4, 7, 7, 0, 8]","[0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9]"
 80 | 78,"[3, 5, 5, 9, 6, 6, 6, 2, 9, 0, 3, 0, 2, 1, 2, 6, 5, 8, 4, 8, 5, 9, 9, 5, 7, 0, 6, 8, 9, 3, 3, 5]","[0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 8, 8, 8, 9, 9, 9, 9, 9]"
 81 | 79,"[9, 5, 9, 0, 4, 5, 6, 3, 0, 1, 4, 3, 1, 5, 9, 9, 3, 2, 9, 5, 5, 2, 9, 9, 9, 7, 8, 9, 8, 0, 2, 8]","[0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 5, 6, 7, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9]"
 82 | 80,"[9, 3, 1, 0, 5, 7, 6, 1, 8, 9, 5, 9, 1, 9, 9, 9, 9, 2, 2, 4, 3, 0, 6, 1, 9, 2, 1, 3, 5, 2, 5, 1]","[0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9]"
 83 | 81,"[8, 1, 1, 3, 0, 2, 8, 0, 9, 5, 3, 7, 7, 6, 8, 5, 2, 9, 3, 2, 6, 4, 2, 1, 7, 9, 4, 0, 1, 9, 3, 0]","[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9]"
 84 | 82,"[0, 1, 4, 1, 5, 7, 4, 9, 3, 7, 5, 3, 6, 5, 1, 4, 1, 2, 2, 0, 0, 7, 5, 3, 5, 4, 6, 2, 1, 4, 1, 3]","[0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 9]"
 85 | 83,"[2, 2, 4, 0, 5, 7, 4, 5, 9, 5, 7, 5, 1, 5, 9, 5, 4, 1, 7, 9, 2, 4, 4, 1, 4, 3, 7, 9, 0, 6, 1, 1]","[0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 9, 9, 9, 9]"
 86 | 84,"[2, 5, 4, 9, 4, 5, 3, 8, 1, 5, 0, 4, 5, 5, 7, 4, 6, 6, 7, 7, 1, 9, 6, 5, 8, 6, 3, 6, 8, 9, 0, 7]","[0, 0, 1, 1, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9]"
 87 | 85,"[0, 4, 3, 0, 6, 1, 1, 9, 2, 1, 3, 6, 5, 1, 2, 3, 0, 1, 6, 7, 8, 4, 1, 0, 2, 1, 2, 3, 4, 5, 6, 1]","[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 8, 9]"
 88 | 86,"[4, 6, 0, 6, 5, 1, 7, 7, 0, 8, 0, 4, 6, 0, 8, 5, 0, 1, 8, 8, 0, 1, 0, 9, 4, 5, 5, 3, 0, 1, 9, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 8, 8, 8, 8, 9, 9]"
 89 | 87,"[6, 0, 1, 6, 6, 0, 7, 9, 2, 5, 8, 4, 5, 8, 1, 4, 8, 0, 6, 4, 0, 7, 1, 2, 3, 9, 4, 8, 7, 3, 8, 2]","[0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9]"
 90 | 88,"[5, 1, 5, 3, 7, 6, 5, 0, 8, 6, 6, 4, 6, 7, 2, 0, 1, 9, 5, 2, 4, 1, 3, 0, 4, 6, 7, 8, 0, 7, 2, 8]","[0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9]"
 91 | 89,"[5, 2, 2, 3, 8, 5, 0, 0, 6, 7, 3, 3, 7, 7, 6, 6, 8, 2, 0, 1, 6, 3, 3, 9, 9, 9, 1, 9, 5, 6, 4, 2]","[0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 9]"
 92 | 90,"[3, 0, 3, 3, 0, 9, 8, 7, 3, 9, 7, 0, 4, 0, 9, 4, 3, 7, 5, 1, 9, 5, 6, 6, 6, 3, 6, 3, 2, 8, 6, 8]","[0, 0, 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9]"
 93 | 91,"[2, 4, 6, 0, 9, 4, 3, 5, 7, 4, 8, 8, 8, 1, 8, 5, 8, 0, 2, 6, 2, 9, 4, 0, 0, 8, 5, 0, 5, 1, 2, 8]","[0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9]"
 94 | 92,"[6, 5, 3, 4, 8, 2, 5, 9, 1, 3, 9, 6, 7, 4, 4, 7, 9, 7, 2, 2, 2, 8, 5, 8, 6, 3, 8, 5, 0, 8, 9, 1]","[0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9]"
 95 | 93,"[3, 8, 3, 9, 8, 4, 0, 7, 3, 9, 9, 2, 6, 0, 3, 5, 1, 3, 9, 3, 2, 0, 8, 8, 7, 3, 6, 1, 5, 4, 7, 7]","[0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9]"
 96 | 94,"[2, 7, 8, 3, 3, 8, 7, 9, 2, 8, 0, 6, 9, 5, 8, 4, 2, 8, 3, 6, 3, 4, 1, 4, 8, 5, 5, 6, 0, 0, 7, 6]","[0, 0, 0, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9]"
 97 | 95,"[8, 1, 9, 6, 1, 7, 1, 2, 6, 9, 0, 6, 0, 6, 8, 2, 3, 5, 8, 7, 9, 0, 9, 1, 7, 5, 4, 3, 7, 1, 8, 8]","[0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9]"
 98 | 96,"[3, 7, 6, 2, 0, 4, 8, 7, 1, 1, 1, 1, 8, 2, 4, 1, 4, 7, 4, 0, 8, 4, 9, 5, 0, 0, 6, 8, 3, 2, 1, 2]","[0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 5, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9]"
 99 | 97,"[3, 1, 2, 8, 6, 7, 0, 2, 3, 1, 6, 7, 9, 3, 1, 9, 7, 4, 5, 5, 0, 7, 8, 3, 3, 1, 4, 4, 2, 1, 1, 5]","[0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 7, 8, 8, 9, 9]"
100 | 98,"[5, 7, 1, 7, 2, 4, 1, 8, 4, 6, 9, 2, 0, 2, 3, 9, 3, 9, 7, 8, 3, 1, 1, 3, 9, 8, 5, 0, 2, 7, 0, 2]","[0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9]"
101 | 99,"[9, 4, 4, 7, 2, 3, 9, 8, 8, 3, 9, 8, 7, 1, 7, 9, 5, 1, 6, 1, 6, 2, 0, 4, 7, 6, 4, 6, 3, 2, 6, 2]","[0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9]"


--------------------------------------------------------------------------------
/examples/sorting/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 ETH Zurich.
 2 | #                    All rights reserved.
 3 | #
 4 | # Use of this source code is governed by a BSD-style license that can be
 5 | # found in the LICENSE file.
 6 | #
 7 | # main author: Nils Blach
 8 | 
 9 | from typing import Dict, List
10 | 
11 | 
12 | def string_to_list(string: str) -> List[int]:
13 |     """
14 |     Helper function to convert a list encoded inside a string into a Python
15 |     list object of string elements.
16 | 
17 |     :param string: Input string containing a list.
18 |     :type string: str
19 |     :return: List of string elements.
20 |     :rtype: List[str]
21 |     :raise AssertionError: If input string does not contain a list.
22 |     """
23 | 
24 |     assert string[0] == "[" and string[-1] == "]", "String is not a list."
25 |     return [int(num) for num in string[1:-1].split(",")]
26 | 
27 | 
28 | def test_sorting(state: Dict) -> bool:
29 |     """
30 |     Function to test whether the final solution matches ground truth.
31 | 
32 |     :param state: Thought state that represents the final solution.
33 |     :type state: Dict
34 |     :return: Returns whether the solution matches the ground truth.
35 |     :rtype: bool
36 |     """
37 | 
38 |     try:
39 |         correct_list = sorted(string_to_list(state["original"]))
40 |         sorted_list = string_to_list(state["current"])
41 |         return sorted_list == correct_list
42 |     except:
43 |         return False
44 | 
45 | 
46 | def num_errors(state: Dict) -> float:
47 |     """
48 |     Function to locally count the number of errors that serves as a score.
49 | 
50 |     :param state: Thought state to be scored.
51 |     :type state: Dict
52 |     :return: Number of errors.
53 |     :rtype: float
54 |     """
55 | 
56 |     try:
57 |         unsorted_list = state["original"]
58 |         if (
59 |             "unsorted_sublist" in state
60 |             and state["unsorted_sublist"] != ""
61 |             and state["unsorted_sublist"] is not None
62 |             and len(state["unsorted_sublist"]) < len(unsorted_list) - 5
63 |         ):
64 |             unsorted_list = state["unsorted_sublist"]
65 |         correct_list = sorted(string_to_list(unsorted_list))
66 |         current_list = string_to_list(state["current"])
67 |         num_errors = 0
68 |         for i in range(10):
69 |             num_errors += abs(
70 |                 sum([1 for num in current_list if num == i])
71 |                 - sum([1 for num in correct_list if num == i])
72 |             )
73 |         num_errors += sum(
74 |             [1 for num1, num2 in zip(current_list, current_list[1:]) if num1 > num2]
75 |         )
76 |         return num_errors
77 |     except:
78 |         return 300
79 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spcl/graph-of-thoughts/363421c61c7bc11edf32845a697ae2aaccd75463/graph_of_thoughts/__init__.py


--------------------------------------------------------------------------------
/graph_of_thoughts/controller/README.md:
--------------------------------------------------------------------------------
 1 | # Controller
 2 | 
 3 | The Controller class is responsible for traversing the Graph of Operations (GoO), which is a static structure that is constructed once, before the execution starts.
 4 | GoO prescribes the execution plan of thought operations and the Controller invokes their execution, generating the Graph Reasoning State (GRS). 
 5 | 
 6 | In order for a GoO to be executed, an instance of Large Language Model (LLM) must be supplied to the controller (along with other required objects).
 7 | Please refer to the [Language Models](../language_models/README.md) section for more information about LLMs. 
 8 | 
 9 | The following section describes how to instantiate the Controller to run a defined GoO. 
10 | 
11 | ## Controller Instantiation
12 | - Requires custom `Prompter`, `Parser`, as well as instantiated `GraphOfOperations` and `AbstractLanguageModel` - creation of these is described separately.
13 | - Prepare initial state (thought) as dictionary - this can be used in the initial prompts by the operations.
14 | ```
15 | lm = ...create
16 | graph_of_operations = ...create
17 | 
18 | executor = controller.Controller(
19 |     lm,
20 |     graph_of_operations,
21 |     <CustomPrompter()>,
22 |     <CustomParser()>,
23 |     <initial state>,
24 | )
25 | executor.run()
26 | executor.output_graph("path/to/output.json")
27 | ```
28 | - After the run the graph is written to an output file, which contains individual operations, their thoughts, information about scores and validity and total amount of used tokens / cost.
29 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/controller/__init__.py:
--------------------------------------------------------------------------------
1 | from .controller import Controller
2 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/controller/controller.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # main author: Nils Blach
  8 | 
  9 | import json
 10 | import logging
 11 | from typing import List
 12 | from graph_of_thoughts.language_models import AbstractLanguageModel
 13 | from graph_of_thoughts.operations import GraphOfOperations, Thought
 14 | from graph_of_thoughts.prompter import Prompter
 15 | from graph_of_thoughts.parser import Parser
 16 | 
 17 | 
 18 | class Controller:
 19 |     """
 20 |     Controller class to manage the execution flow of the Graph of Operations,
 21 |     generating the Graph Reasoning State.
 22 |     This involves language models, graph operations, prompting, and parsing.
 23 |     """
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         lm: AbstractLanguageModel,
 28 |         graph: GraphOfOperations,
 29 |         prompter: Prompter,
 30 |         parser: Parser,
 31 |         problem_parameters: dict,
 32 |     ) -> None:
 33 |         """
 34 |         Initialize the Controller instance with the language model,
 35 |         operations graph, prompter, parser, and problem parameters.
 36 | 
 37 |         :param lm: An instance of the AbstractLanguageModel.
 38 |         :type lm: AbstractLanguageModel
 39 |         :param graph: The Graph of Operations to be executed.
 40 |         :type graph: OperationsGraph
 41 |         :param prompter: An instance of the Prompter class, used to generate prompts.
 42 |         :type prompter: Prompter
 43 |         :param parser: An instance of the Parser class, used to parse responses.
 44 |         :type parser: Parser
 45 |         :param problem_parameters: Initial parameters/state of the problem.
 46 |         :type problem_parameters: dict
 47 |         """
 48 |         self.logger = logging.getLogger(self.__class__.__module__)
 49 |         self.lm = lm
 50 |         self.graph = graph
 51 |         self.prompter = prompter
 52 |         self.parser = parser
 53 |         self.problem_parameters = problem_parameters
 54 |         self.run_executed = False
 55 | 
 56 |     def run(self) -> None:
 57 |         """
 58 |         Run the controller and execute the operations from the Graph of
 59 |         Operations based on their readiness.
 60 |         Ensures the program is in a valid state before execution.
 61 |         :raises AssertionError: If the Graph of Operation has no roots.
 62 |         :raises AssertionError: If the successor of an operation is not in the Graph of Operations.
 63 |         """
 64 |         self.logger.debug("Checking that the program is in a valid state")
 65 |         assert self.graph.roots is not None, "The operations graph has no root"
 66 |         self.logger.debug("The program is in a valid state")
 67 | 
 68 |         execution_queue = [
 69 |             operation
 70 |             for operation in self.graph.operations
 71 |             if operation.can_be_executed()
 72 |         ]
 73 | 
 74 |         while len(execution_queue) > 0:
 75 |             current_operation = execution_queue.pop(0)
 76 |             self.logger.info("Executing operation %s", current_operation.operation_type)
 77 |             current_operation.execute(
 78 |                 self.lm, self.prompter, self.parser, **self.problem_parameters
 79 |             )
 80 |             self.logger.info("Operation %s executed", current_operation.operation_type)
 81 |             for operation in current_operation.successors:
 82 |                 assert (
 83 |                     operation in self.graph.operations
 84 |                 ), "The successor of an operation is not in the operations graph"
 85 |                 if operation.can_be_executed():
 86 |                     execution_queue.append(operation)
 87 |         self.logger.info("All operations executed")
 88 |         self.run_executed = True
 89 | 
 90 |     def get_final_thoughts(self) -> List[List[Thought]]:
 91 |         """
 92 |         Retrieve the final thoughts after all operations have been executed.
 93 | 
 94 |         :return: List of thoughts for each operation in the graph's leaves.
 95 |         :rtype: List[List[Thought]]
 96 |         :raises AssertionError: If the `run` method hasn't been executed yet.
 97 |         """
 98 |         assert self.run_executed, "The run method has not been executed"
 99 |         return [operation.get_thoughts() for operation in self.graph.leaves]
100 | 
101 |     def output_graph(self, path: str) -> None:
102 |         """
103 |         Serialize the state and results of the operations graph to a JSON file.
104 | 
105 |         :param path: The path to the output file.
106 |         :type path: str
107 |         """
108 |         output = []
109 |         for operation in self.graph.operations:
110 |             operation_serialized = {
111 |                 "operation": operation.operation_type.name,
112 |                 "thoughts": [thought.state for thought in operation.get_thoughts()],
113 |             }
114 |             if any([thought.scored for thought in operation.get_thoughts()]):
115 |                 operation_serialized["scored"] = [
116 |                     thought.scored for thought in operation.get_thoughts()
117 |                 ]
118 |                 operation_serialized["scores"] = [
119 |                     thought.score for thought in operation.get_thoughts()
120 |                 ]
121 |             if any([thought.validated for thought in operation.get_thoughts()]):
122 |                 operation_serialized["validated"] = [
123 |                     thought.validated for thought in operation.get_thoughts()
124 |                 ]
125 |                 operation_serialized["validity"] = [
126 |                     thought.valid for thought in operation.get_thoughts()
127 |                 ]
128 |             if any(
129 |                 [
130 |                     thought.compared_to_ground_truth
131 |                     for thought in operation.get_thoughts()
132 |                 ]
133 |             ):
134 |                 operation_serialized["compared_to_ground_truth"] = [
135 |                     thought.compared_to_ground_truth
136 |                     for thought in operation.get_thoughts()
137 |                 ]
138 |                 operation_serialized["problem_solved"] = [
139 |                     thought.solved for thought in operation.get_thoughts()
140 |                 ]
141 |             output.append(operation_serialized)
142 | 
143 |         output.append(
144 |             {
145 |                 "prompt_tokens": self.lm.prompt_tokens,
146 |                 "completion_tokens": self.lm.completion_tokens,
147 |                 "cost": self.lm.cost,
148 |             }
149 |         )
150 | 
151 |         with open(path, "w") as file:
152 |             file.write(json.dumps(output, indent=2))
153 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/language_models/README.md:
--------------------------------------------------------------------------------
 1 | # Language Models
 2 | 
 3 | The Language Models module is responsible for managing the large language models (LLMs) used by the Controller.
 4 | 
 5 | Currently, the framework supports the following LLMs:
 6 | - GPT-4 / GPT-3.5 (Remote - OpenAI API)
 7 | - LLaMA-2 (Local - HuggingFace Transformers)
 8 | 
 9 | The following sections describe how to instantiate individual LLMs and how to add new LLMs to the framework.
10 | 
11 | ## LLM Instantiation
12 | - Create a copy of `config_template.json` named `config.json`.
13 | - Fill configuration details based on the used model (below).
14 | 
15 | ### GPT-4 / GPT-3.5
16 | - Adjust the predefined `chatgpt` or `chatgpt4` configurations or create a new configuration with an unique key.
17 | 
18 | | Key                 | Value                                                                                                                                                                                                                                                                                                                                                               |
19 | |---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
20 | | model_id            | Model name based on [OpenAI model overview](https://platform.openai.com/docs/models/overview).                                                                                                                                                                                                                                                                      |
21 | | prompt_token_cost   | Price per 1000 prompt tokens based on [OpenAI pricing](https://openai.com/pricing), used for calculating cumulative price per LLM instance.                                                                                                                                                                                                                         |
22 | | response_token_cost | Price per 1000 response tokens based on [OpenAI pricing](https://openai.com/pricing), used for calculating cumulative price per LLM instance.                                                                                                                                                                                                                       |
23 | | temperature         | Parameter of OpenAI models that controls the randomness and the creativity of the responses (higher temperature = more diverse and unexpected responses). Value between 0.0 and 2.0, default is 1.0. More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature).     |
24 | | max_tokens          | The maximum number of tokens to generate in the chat completion. Value depends on the maximum context size of the model specified in the [OpenAI model overview](https://platform.openai.com/docs/models/overview). More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat/create-max_tokens). |
25 | | stop                | String or array of strings specifying sequences of characters which if detected, stops further generation of tokens. More information can be found in the [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create#chat/create-stop).                                                                                                       |
26 | | organization        | Organization to use for the API requests (may be empty).                                                                                                                                                                                                                                                                                                            |
27 | | api_key             | Personal API key that will be used to access OpenAI API.                                                                                                                                                                                                                                                                                                            |
28 | 
29 | - Instantiate the language model based on the selected configuration key (predefined / custom).
30 | ```python
31 | lm = controller.ChatGPT(
32 |     "path/to/config.json", 
33 |     model_name=<configuration key>
34 | )
35 | ```
36 | 
37 | ### LLaMA-2
38 | - Requires local hardware to run inference and a HuggingFace account.
39 | - Adjust the predefined `llama7b-hf`, `llama13b-hf` or `llama70b-hf` configurations or create a new configuration with an unique key.
40 | 
41 | | Key                 | Value                                                                                                                                                                           |
42 | |---------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
43 | | model_id            | Specifies HuggingFace LLaMA-2 model identifier (`meta-llama/<model_id>`).                                                                                                       |
44 | | cache_dir           | Local directory where the model will be downloaded and accessed.                                                                                                                    |
45 | | prompt_token_cost   | Price per 1000 prompt tokens (currently not used - local model = no cost).                                                                                                      |
46 | | response_token_cost | Price per 1000 response tokens (currently not used - local model = no cost).                                                                                                    |
47 | | temperature         | Parameter that controls the randomness and the creativity of the responses (higher temperature = more diverse and unexpected responses). Value between 0.0 and 1.0, default is 0.6. |
48 | | top_k               | Top-K sampling method described in [Transformers tutorial](https://huggingface.co/blog/how-to-generate). Default value is set to 10.                                            |
49 | | max_tokens          | The maximum number of tokens to generate in the chat completion. More tokens require more memory.                                                                               |
50 | 
51 | - Instantiate the language model based on the selected configuration key (predefined / custom).
52 | ```python
53 | lm = controller.Llama2HF(
54 |     "path/to/config.json", 
55 |     model_name=<configuration key>
56 | )
57 | ```
58 | - Request access to LLaMA-2 via the [Meta form](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) using the same email address as for the HuggingFace account.
59 | - After the access is granted, go to [HuggingFace LLaMA-2 model card](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), log in and accept the license (a _"You have been granted access to this model"_ message should appear).
60 | - Generate HuggingFace access token.
61 | - Log in from CLI with: `huggingface-cli login --token <your token>`.
62 | 
63 | Note: 4-bit quantization is used to reduce the model size for inference. During instantiation, the model is downloaded from HuggingFace into the cache directory specified in the `config.json`. Running queries using larger models will require multiple GPUs (splitting across many GPUs is done automatically by the Transformers library).
64 | 
65 | ## Adding LLMs
66 | More LLMs can be added by following these steps:
67 | - Create a new class as a subclass of `AbstractLanguageModel`.
68 | - Use the constructor for loading the configuration and instantiating the language model (if needed).
69 | ```python
70 | class CustomLanguageModel(AbstractLanguageModel):
71 |     def __init__(
72 |         self,
73 |         config_path: str = "",
74 |         model_name: str = "llama7b-hf",
75 |         cache: bool = False
76 |     ) -> None:
77 |         super().__init__(config_path, model_name, cache)
78 |         self.config: Dict = self.config[model_name]
79 |         
80 |         # Load data from configuration into variables if needed
81 | 
82 |         # Instantiate LLM if needed
83 | ```
84 | - Implement the `query` abstract method that is used to get a list of responses from the LLM (remote API call or local model inference).
85 | ```python
86 | def query(self, query: str, num_responses: int = 1) -> Any:
87 |     # Support caching 
88 |     # Call LLM and retrieve list of responses - based on num_responses    
89 |     # Return LLM response structure (not only raw strings)    
90 | ```
91 | - Implement the `get_response_texts` abstract method that is used to get a list of raw texts from the LLM response structure produced by `query`.
92 | ```python
93 | def get_response_texts(self, query_response: Union[List[Any], Any]) -> List[str]:
94 |     # Retrieve list of raw strings from the LLM response structure    
95 | ```
96 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/language_models/__init__.py:
--------------------------------------------------------------------------------
1 | from .abstract_language_model import AbstractLanguageModel
2 | from .chatgpt import ChatGPT
3 | from .llamachat_hf import Llama2HF
4 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/language_models/abstract_language_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 ETH Zurich.
 2 | #                    All rights reserved.
 3 | #
 4 | # Use of this source code is governed by a BSD-style license that can be
 5 | # found in the LICENSE file.
 6 | #
 7 | # main author: Nils Blach
 8 | 
 9 | from abc import ABC, abstractmethod
10 | from typing import List, Dict, Union, Any
11 | import json
12 | import os
13 | import logging
14 | 
15 | 
16 | class AbstractLanguageModel(ABC):
17 |     """
18 |     Abstract base class that defines the interface for all language models.
19 |     """
20 | 
21 |     def __init__(
22 |         self, config_path: str = "", model_name: str = "", cache: bool = False
23 |     ) -> None:
24 |         """
25 |         Initialize the AbstractLanguageModel instance with configuration, model details, and caching options.
26 | 
27 |         :param config_path: Path to the config file. Defaults to "".
28 |         :type config_path: str
29 |         :param model_name: Name of the language model. Defaults to "".
30 |         :type model_name: str
31 |         :param cache: Flag to determine whether to cache responses. Defaults to False.
32 |         :type cache: bool
33 |         """
34 |         self.logger = logging.getLogger(self.__class__.__name__)
35 |         self.config: Dict = None
36 |         self.model_name: str = model_name
37 |         self.cache = cache
38 |         if self.cache:
39 |             self.response_cache: Dict[str, List[Any]] = {}
40 |         self.load_config(config_path)
41 |         self.prompt_tokens: int = 0
42 |         self.completion_tokens: int = 0
43 |         self.cost: float = 0.0
44 | 
45 |     def load_config(self, path: str) -> None:
46 |         """
47 |         Load configuration from a specified path.
48 | 
49 |         :param path: Path to the config file. If an empty path provided,
50 |                      default is `config.json` in the current directory.
51 |         :type path: str
52 |         """
53 |         if path == "":
54 |             current_dir = os.path.dirname(os.path.abspath(__file__))
55 |             path = os.path.join(current_dir, "config.json")
56 | 
57 |         with open(path, "r") as f:
58 |             self.config = json.load(f)
59 | 
60 |         self.logger.debug(f"Loaded config from {path} for {self.model_name}")
61 | 
62 |     def clear_cache(self) -> None:
63 |         """
64 |         Clear the response cache.
65 |         """
66 |         self.response_cache.clear()
67 | 
68 |     @abstractmethod
69 |     def query(self, query: str, num_responses: int = 1) -> Any:
70 |         """
71 |         Abstract method to query the language model.
72 | 
73 |         :param query: The query to be posed to the language model.
74 |         :type query: str
75 |         :param num_responses: The number of desired responses.
76 |         :type num_responses: int
77 |         :return: The language model's response(s).
78 |         :rtype: Any
79 |         """
80 |         pass
81 | 
82 |     @abstractmethod
83 |     def get_response_texts(self, query_responses: Union[List[Any], Any]) -> List[str]:
84 |         """
85 |         Abstract method to extract response texts from the language model's response(s).
86 | 
87 |         :param query_responses: The responses returned from the language model.
88 |         :type query_responses: Union[List[Any], Any]
89 |         :return: List of textual responses.
90 |         :rtype: List[str]
91 |         """
92 |         pass
93 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/language_models/chatgpt.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # main author: Nils Blach
  8 | 
  9 | import backoff
 10 | import os
 11 | import random
 12 | import time
 13 | from typing import List, Dict, Union
 14 | from openai import OpenAI, OpenAIError
 15 | from openai.types.chat.chat_completion import ChatCompletion
 16 | 
 17 | from .abstract_language_model import AbstractLanguageModel
 18 | 
 19 | 
 20 | class ChatGPT(AbstractLanguageModel):
 21 |     """
 22 |     The ChatGPT class handles interactions with the OpenAI models using the provided configuration.
 23 | 
 24 |     Inherits from the AbstractLanguageModel and implements its abstract methods.
 25 |     """
 26 | 
 27 |     def __init__(
 28 |         self, config_path: str = "", model_name: str = "chatgpt", cache: bool = False
 29 |     ) -> None:
 30 |         """
 31 |         Initialize the ChatGPT instance with configuration, model details, and caching options.
 32 | 
 33 |         :param config_path: Path to the configuration file. Defaults to "".
 34 |         :type config_path: str
 35 |         :param model_name: Name of the model, default is 'chatgpt'. Used to select the correct configuration.
 36 |         :type model_name: str
 37 |         :param cache: Flag to determine whether to cache responses. Defaults to False.
 38 |         :type cache: bool
 39 |         """
 40 |         super().__init__(config_path, model_name, cache)
 41 |         self.config: Dict = self.config[model_name]
 42 |         # The model_id is the id of the model that is used for chatgpt, i.e. gpt-4, gpt-3.5-turbo, etc.
 43 |         self.model_id: str = self.config["model_id"]
 44 |         # The prompt_token_cost and response_token_cost are the costs for 1000 prompt tokens and 1000 response tokens respectively.
 45 |         self.prompt_token_cost: float = self.config["prompt_token_cost"]
 46 |         self.response_token_cost: float = self.config["response_token_cost"]
 47 |         # The temperature of a model is defined as the randomness of the model's output.
 48 |         self.temperature: float = self.config["temperature"]
 49 |         # The maximum number of tokens to generate in the chat completion.
 50 |         self.max_tokens: int = self.config["max_tokens"]
 51 |         # The stop sequence is a sequence of tokens that the model will stop generating at (it will not generate the stop sequence).
 52 |         self.stop: Union[str, List[str]] = self.config["stop"]
 53 |         # The account organization is the organization that is used for chatgpt.
 54 |         self.organization: str = self.config["organization"]
 55 |         if self.organization == "":
 56 |             self.logger.warning("OPENAI_ORGANIZATION is not set")
 57 |         self.api_key: str = os.getenv("OPENAI_API_KEY", self.config["api_key"])
 58 |         if self.api_key == "":
 59 |             raise ValueError("OPENAI_API_KEY is not set")
 60 |         # Initialize the OpenAI Client
 61 |         self.client = OpenAI(api_key=self.api_key, organization=self.organization)
 62 | 
 63 |     def query(
 64 |         self, query: str, num_responses: int = 1
 65 |     ) -> Union[List[ChatCompletion], ChatCompletion]:
 66 |         """
 67 |         Query the OpenAI model for responses.
 68 | 
 69 |         :param query: The query to be posed to the language model.
 70 |         :type query: str
 71 |         :param num_responses: Number of desired responses, default is 1.
 72 |         :type num_responses: int
 73 |         :return: Response(s) from the OpenAI model.
 74 |         :rtype: Dict
 75 |         """
 76 |         if self.cache and query in self.response_cache:
 77 |             return self.response_cache[query]
 78 | 
 79 |         if num_responses == 1:
 80 |             response = self.chat([{"role": "user", "content": query}], num_responses)
 81 |         else:
 82 |             response = []
 83 |             next_try = num_responses
 84 |             total_num_attempts = num_responses
 85 |             while num_responses > 0 and total_num_attempts > 0:
 86 |                 try:
 87 |                     assert next_try > 0
 88 |                     res = self.chat([{"role": "user", "content": query}], next_try)
 89 |                     response.append(res)
 90 |                     num_responses -= next_try
 91 |                     next_try = min(num_responses, next_try)
 92 |                 except Exception as e:
 93 |                     next_try = (next_try + 1) // 2
 94 |                     self.logger.warning(
 95 |                         f"Error in chatgpt: {e}, trying again with {next_try} samples"
 96 |                     )
 97 |                     time.sleep(random.randint(1, 3))
 98 |                     total_num_attempts -= 1
 99 | 
100 |         if self.cache:
101 |             self.response_cache[query] = response
102 |         return response
103 | 
104 |     @backoff.on_exception(backoff.expo, OpenAIError, max_time=10, max_tries=6)
105 |     def chat(self, messages: List[Dict], num_responses: int = 1) -> ChatCompletion:
106 |         """
107 |         Send chat messages to the OpenAI model and retrieves the model's response.
108 |         Implements backoff on OpenAI error.
109 | 
110 |         :param messages: A list of message dictionaries for the chat.
111 |         :type messages: List[Dict]
112 |         :param num_responses: Number of desired responses, default is 1.
113 |         :type num_responses: int
114 |         :return: The OpenAI model's response.
115 |         :rtype: ChatCompletion
116 |         """
117 |         response = self.client.chat.completions.create(
118 |             model=self.model_id,
119 |             messages=messages,
120 |             temperature=self.temperature,
121 |             max_tokens=self.max_tokens,
122 |             n=num_responses,
123 |             stop=self.stop,
124 |         )
125 | 
126 |         self.prompt_tokens += response.usage.prompt_tokens
127 |         self.completion_tokens += response.usage.completion_tokens
128 |         prompt_tokens_k = float(self.prompt_tokens) / 1000.0
129 |         completion_tokens_k = float(self.completion_tokens) / 1000.0
130 |         self.cost = (
131 |             self.prompt_token_cost * prompt_tokens_k
132 |             + self.response_token_cost * completion_tokens_k
133 |         )
134 |         self.logger.info(
135 |             f"This is the response from chatgpt: {response}"
136 |             f"\nThis is the cost of the response: {self.cost}"
137 |         )
138 |         return response
139 | 
140 |     def get_response_texts(
141 |         self, query_response: Union[List[ChatCompletion], ChatCompletion]
142 |     ) -> List[str]:
143 |         """
144 |         Extract the response texts from the query response.
145 | 
146 |         :param query_response: The response dictionary (or list of dictionaries) from the OpenAI model.
147 |         :type query_response: Union[List[ChatCompletion], ChatCompletion]
148 |         :return: List of response strings.
149 |         :rtype: List[str]
150 |         """
151 |         if not isinstance(query_response, List):
152 |             query_response = [query_response]
153 |         return [
154 |             choice.message.content
155 |             for response in query_response
156 |             for choice in response.choices
157 |         ]
158 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/language_models/config_template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "chatgpt" : {
 3 |         "model_id": "gpt-3.5-turbo",
 4 |         "prompt_token_cost": 0.0015,
 5 |         "response_token_cost": 0.002,
 6 |         "temperature": 1.0,
 7 |         "max_tokens": 1536,
 8 |         "stop": null,
 9 |         "organization": "",
10 |         "api_key": ""
11 |     },
12 |     "chatgpt4" : {
13 |         "model_id": "gpt-4",
14 |         "prompt_token_cost": 0.03,
15 |         "response_token_cost": 0.06,
16 |         "temperature": 1.0,
17 |         "max_tokens": 4096,
18 |         "stop": null,
19 |         "organization": "",
20 |         "api_key": ""
21 |     },
22 |     "llama7b-hf" : {
23 |         "model_id": "Llama-2-7b-chat-hf",
24 |         "cache_dir": "/llama",
25 |         "prompt_token_cost": 0.0,
26 |         "response_token_cost": 0.0,
27 |         "temperature": 0.6,
28 |         "top_k": 10,
29 |         "max_tokens": 4096
30 |     },
31 |     "llama13b-hf" : {
32 |         "model_id": "Llama-2-13b-chat-hf",
33 |         "cache_dir": "/llama",
34 |         "prompt_token_cost": 0.0,
35 |         "response_token_cost": 0.0,
36 |         "temperature": 0.6,
37 |         "top_k": 10,
38 |         "max_tokens": 4096
39 |     },
40 |     "llama70b-hf" : {
41 |         "model_id": "Llama-2-70b-chat-hf",
42 |         "cache_dir": "/llama",
43 |         "prompt_token_cost": 0.0,
44 |         "response_token_cost": 0.0,
45 |         "temperature": 0.6,
46 |         "top_k": 10,
47 |         "max_tokens": 4096
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/language_models/llamachat_hf.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # main author: Ales Kubicek
  8 | 
  9 | import os
 10 | import torch
 11 | from typing import List, Dict, Union
 12 | from .abstract_language_model import AbstractLanguageModel
 13 | 
 14 | 
 15 | class Llama2HF(AbstractLanguageModel):
 16 |     """
 17 |     An interface to use LLaMA 2 models through the HuggingFace library.
 18 |     """
 19 | 
 20 |     def __init__(
 21 |         self, config_path: str = "", model_name: str = "llama7b-hf", cache: bool = False
 22 |     ) -> None:
 23 |         """
 24 |         Initialize an instance of the Llama2HF class with configuration, model details, and caching options.
 25 | 
 26 |         :param config_path: Path to the configuration file. Defaults to an empty string.
 27 |         :type config_path: str
 28 |         :param model_name: Specifies the name of the LLaMA model variant. Defaults to "llama7b-hf".
 29 |                            Used to select the correct configuration.
 30 |         :type model_name: str
 31 |         :param cache: Flag to determine whether to cache responses. Defaults to False.
 32 |         :type cache: bool
 33 |         """
 34 |         super().__init__(config_path, model_name, cache)
 35 |         self.config: Dict = self.config[model_name]
 36 |         # Detailed id of the used model.
 37 |         self.model_id: str = self.config["model_id"]
 38 |         # Costs for 1000 tokens.
 39 |         self.prompt_token_cost: float = self.config["prompt_token_cost"]
 40 |         self.response_token_cost: float = self.config["response_token_cost"]
 41 |         # The temperature is defined as the randomness of the model's output.
 42 |         self.temperature: float = self.config["temperature"]
 43 |         # Top K sampling.
 44 |         self.top_k: int = self.config["top_k"]
 45 |         # The maximum number of tokens to generate in the chat completion.
 46 |         self.max_tokens: int = self.config["max_tokens"]
 47 | 
 48 |         # Important: must be done before importing transformers
 49 |         os.environ["TRANSFORMERS_CACHE"] = self.config["cache_dir"]
 50 |         import transformers
 51 | 
 52 |         hf_model_id = f"meta-llama/{self.model_id}"
 53 |         model_config = transformers.AutoConfig.from_pretrained(hf_model_id)
 54 |         bnb_config = transformers.BitsAndBytesConfig(
 55 |             load_in_4bit=True,
 56 |             bnb_4bit_quant_type="nf4",
 57 |             bnb_4bit_use_double_quant=True,
 58 |             bnb_4bit_compute_dtype=torch.bfloat16,
 59 |         )
 60 | 
 61 |         self.tokenizer = transformers.AutoTokenizer.from_pretrained(hf_model_id)
 62 |         self.model = transformers.AutoModelForCausalLM.from_pretrained(
 63 |             hf_model_id,
 64 |             trust_remote_code=True,
 65 |             config=model_config,
 66 |             quantization_config=bnb_config,
 67 |             device_map="auto",
 68 |         )
 69 |         self.model.eval()
 70 |         torch.no_grad()
 71 | 
 72 |         self.generate_text = transformers.pipeline(
 73 |             model=self.model, tokenizer=self.tokenizer, task="text-generation"
 74 |         )
 75 | 
 76 |     def query(self, query: str, num_responses: int = 1) -> List[Dict]:
 77 |         """
 78 |         Query the LLaMA 2 model for responses.
 79 | 
 80 |         :param query: The query to be posed to the language model.
 81 |         :type query: str
 82 |         :param num_responses: Number of desired responses, default is 1.
 83 |         :type num_responses: int
 84 |         :return: Response(s) from the LLaMA 2 model.
 85 |         :rtype: List[Dict]
 86 |         """
 87 |         if self.cache and query in self.response_cache:
 88 |             return self.response_cache[query]
 89 |         sequences = []
 90 |         query = f"<s><<SYS>>You are a helpful assistant. Always follow the intstructions precisely and output the response exactly in the requested format.<</SYS>>\n\n[INST] {query} [/INST]"
 91 |         for _ in range(num_responses):
 92 |             sequences.extend(
 93 |                 self.generate_text(
 94 |                     query,
 95 |                     do_sample=True,
 96 |                     top_k=self.top_k,
 97 |                     num_return_sequences=1,
 98 |                     eos_token_id=self.tokenizer.eos_token_id,
 99 |                     max_length=self.max_tokens,
100 |                 )
101 |             )
102 |         response = [
103 |             {"generated_text": sequence["generated_text"][len(query) :].strip()}
104 |             for sequence in sequences
105 |         ]
106 |         if self.cache:
107 |             self.response_cache[query] = response
108 |         return response
109 | 
110 |     def get_response_texts(self, query_responses: List[Dict]) -> List[str]:
111 |         """
112 |         Extract the response texts from the query response.
113 | 
114 |         :param query_responses: The response list of dictionaries generated from the `query` method.
115 |         :type query_responses: List[Dict]
116 |         :return: List of response strings.
117 |         :rtype: List[str]
118 |         """
119 |         return [query_response["generated_text"] for query_response in query_responses]
120 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/operations/README.md:
--------------------------------------------------------------------------------
 1 | # Operations
 2 | 
 3 | The Operations module contains operations to manipulate and process thoughts represented by the [Thought](thought.py) class.  
 4 | Operations interface with a language model and use other helper classes like [Prompter](../prompter/prompter.py) and [Parser](../parser/parser.py) for effective communication and extraction of results from the language model.  
 5 | The [Graph of Operations](graph_of_operations.py) class is the main class of the module and is responsible for orchestrating the operations, defining their relationships and maintaining the state of the thought graph, also known as Graph Reasoning State.
 6 | 
 7 | ## Graph of Operations
 8 | The [GraphOfOperations](graph_of_operations.py) class facilitates the creation and management of a directed graph representing the sequence and interrelationships of operations on thoughts. Here’s how you can construct and work with the Graph of Operations:
 9 | 
10 | ### Initialization
11 | Creating a new instance of GraphOfOperations:
12 | 
13 | ```python
14 | from graph_of_thoughts.operations import GraphOfOperations
15 | 
16 | graph = GraphOfOperations()
17 | ```
18 | 
19 | Upon initialization, the graph will be empty with no operations, roots, or leaves.
20 | 
21 | ### Adding Operations
22 | **Append Operation:** You can append operations to the end of the graph using the append_operation method. This ensures that the operation becomes a successor to all current leaf operations in the graph.
23 | ```python
24 | from graph_of_thoughts.operations import Generate
25 | 
26 | operationA = Generate()
27 | graph.append_operation(operationA)
28 | ```
29 | **Add Operation with Relationships:** If you want to define specific relationships for an operation, use the add_operation method.
30 | ```python
31 | operationB = Generate()
32 | operationB.predecessors.append(operationA)
33 | graph.add_operation(operationB)
34 | ```
35 | Remember to set up the predecessors (and optionally successors) for your operation before adding it to the graph.
36 | 
37 | ## Available Operations
38 | The following operations are available in the module:
39 | 
40 | **Score:** Collect all thoughts from preceding operations and score them either using the LLM or a custom scoring function.
41 | - num_samples (Optional): The number of samples to use for scoring, defaults to 1.
42 | - combined_scoring (Optional): Whether to score all thoughts together in a single prompt or separately, defaults to False.
43 | - scoring_function (Optional): A function that takes in a list of thought states and returns a list of scores for each thought.
44 | 
45 | **ValidateAndImprove:** For each thought, validate it and if it is invalid, improve it.  
46 | - num_samples (Optional): The number of samples to use for validation, defaults to 1.
47 | - improve (Optional): Whether to improve the thought if it is invalid, defaults to True.
48 | - num_tries (Optional): The number of times to try improving the thought, before giving up, defaults to 3.
49 | - validate_function (Optional): A function that takes in a thought state and returns a boolean indicating whether the thought is valid.
50 | 
51 | **Generate:** Generate new thoughts from the current thoughts. If no previous thoughts are available, the thoughts are initialized with the input to the [Controller](../controller/controller.py).  
52 | - num_branches_prompt (Optional): Number of responses that each prompt should generate (passed to prompter). Defaults to 1.
53 | - num_branches_response (Optional): Number of responses the LLM should generate for each prompt. Defaults to 1.
54 | 
55 | **Improve:** Improve the current thoughts. This operation is similar to the ValidateAndImprove operation, but it does not validate the thoughts and always tries to improve them.  
56 | 
57 | **Aggregate:** Aggregate the current thoughts into a single thought. This operation is useful when you want to combine multiple thoughts into a single thought.  
58 | - num_responses (Optional): Number of responses to request from the LLM (generates multiple new thoughts). Defaults to 1.
59 | 
60 | **KeepBestN:** Keep the best N thoughts from the preceding thoughts. Assumes that the thoughts are already scored and throws an error if they are not.
61 | - n: The number of thoughts to keep in order of score.
62 | - higher_is_better (Optional): Whether higher scores are better (True) or lower scores are better (False). Defaults to True.
63 | 
64 | **KeepValid:** Keep only the valid thoughts from the preceding thoughts. Assumes that each thought has already been validated, if not, it will be considered valid.
65 | 
66 | **Selector:** Select a number of thoughts from the preceding thoughts using a selection function. This is useful if subsequent operations should only be applied to a subset of the preceding thoughts.
67 | - selector: A function that takes in a list of thoughts and returns a list of thoughts to select.
68 | 
69 | **GroundTruth**: Evaluates if the preceding/current thoughts solve the problem and equal the ground truth. This operation is useful for terminating the graph and checking if the final thoughts solve the problem, but is only useful if the ground truth is known.
70 | - ground_truth_evaluator: A function that takes in a thought state and returns a boolean indicating whether the thought solves the problem.
71 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/operations/__init__.py:
--------------------------------------------------------------------------------
 1 | from .thought import Thought
 2 | from .graph_of_operations import GraphOfOperations
 3 | from .operations import (
 4 |     Operation,
 5 |     Score,
 6 |     ValidateAndImprove,
 7 |     Generate,
 8 |     Aggregate,
 9 |     KeepBestN,
10 |     KeepValid,
11 |     Selector,
12 |     GroundTruth,
13 |     Improve,
14 | )
15 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/operations/graph_of_operations.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 ETH Zurich.
 2 | #                    All rights reserved.
 3 | #
 4 | # Use of this source code is governed by a BSD-style license that can be
 5 | # found in the LICENSE file.
 6 | #
 7 | # main author: Nils Blach
 8 | 
 9 | from __future__ import annotations
10 | from typing import List
11 | 
12 | from graph_of_thoughts.operations.operations import Operation
13 | 
14 | 
15 | class GraphOfOperations:
16 |     """
17 |     Represents the Graph of Operations, which prescribes the execution plan of thought operations.
18 |     """
19 | 
20 |     def __init__(self) -> None:
21 |         """
22 |         Initializes a new Graph of Operations instance with empty operations, roots, and leaves.
23 |         The roots are the entry points in the graph with no predecessors.
24 |         The leaves are the exit points in the graph with no successors.
25 |         """
26 |         self.operations: List[Operation] = []
27 |         self.roots: List[Operation] = []
28 |         self.leaves: List[Operation] = []
29 | 
30 |     def append_operation(self, operation: Operation) -> None:
31 |         """
32 |         Appends an operation to all leaves in the graph and updates the relationships.
33 | 
34 |         :param operation: The operation to append.
35 |         :type operation: Operation
36 |         """
37 |         self.operations.append(operation)
38 | 
39 |         if len(self.roots) == 0:
40 |             self.roots = [operation]
41 |         else:
42 |             for leave in self.leaves:
43 |                 leave.add_successor(operation)
44 | 
45 |         self.leaves = [operation]
46 | 
47 |     def add_operation(self, operation: Operation) -> None:
48 |         """
49 |         Add an operation to the graph considering its predecessors and successors.
50 |         Adjust roots and leaves based on the added operation's position within the graph.
51 | 
52 |         :param operation: The operation to add.
53 |         :type operation: Operation
54 |         """
55 |         self.operations.append(operation)
56 |         if len(self.roots) == 0:
57 |             self.roots = [operation]
58 |             self.leaves = [operation]
59 |             assert (
60 |                 len(operation.predecessors) == 0
61 |             ), "First operation should have no predecessors"
62 |         else:
63 |             if len(operation.predecessors) == 0:
64 |                 self.roots.append(operation)
65 |             for predecessor in operation.predecessors:
66 |                 if predecessor in self.leaves:
67 |                     self.leaves.remove(predecessor)
68 |             if len(operation.successors) == 0:
69 |                 self.leaves.append(operation)
70 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/operations/thought.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # main author: Nils Blach
  8 | 
  9 | from __future__ import annotations
 10 | import logging
 11 | from typing import Iterator, Dict, Optional
 12 | import itertools
 13 | 
 14 | 
 15 | class Thought:
 16 |     """
 17 |     Represents an LLM thought with its state, constructed by the parser, and various flags.
 18 |     """
 19 | 
 20 |     _ids: Iterator[int] = itertools.count(0)
 21 | 
 22 |     def __init__(self, state: Optional[Dict] = None) -> None:
 23 |         """
 24 |         Initializes a new Thought instance with a state and various default flags.
 25 | 
 26 |         :param state: The state of the thought. Defaults to None.
 27 |         :type state: Optional[Dict]
 28 |         """
 29 |         self.logger: logging.Logger = logging.getLogger(self.__class__.__name__)
 30 |         self.id: int = next(Thought._ids)
 31 |         self.state: Dict = state
 32 |         self._score: float = 0.0
 33 |         self._valid: bool = False
 34 |         self._solved: bool = False
 35 |         self.scored: bool = False
 36 |         self.validated: bool = False
 37 |         self.compared_to_ground_truth: bool = False
 38 | 
 39 |     @staticmethod
 40 |     def from_thought(thought: Thought) -> Thought:
 41 |         """
 42 |         Creates a new thought from an existing one.
 43 | 
 44 |         :param thought: An instance of a Thought to clone.
 45 |         :return: A new Thought instance with properties copied from the input thought.
 46 |         """
 47 |         new_thought = Thought(thought.state)
 48 |         new_thought.score = thought.score
 49 |         new_thought.valid = thought.valid
 50 |         new_thought.solved = thought.solved
 51 |         new_thought.scored = thought.scored
 52 |         new_thought.validated = thought.validated
 53 |         new_thought.compared_to_ground_truth = thought.compared_to_ground_truth
 54 |         return new_thought
 55 | 
 56 |     @property
 57 |     def valid(self) -> bool:
 58 |         """
 59 |         Returns the validity of the thought.
 60 | 
 61 |         :return: The validity of the thought.
 62 |         :rtype: bool
 63 |         """
 64 |         return self._valid
 65 | 
 66 |     @valid.setter
 67 |     def valid(self, valid: bool) -> None:
 68 |         """
 69 |         Sets the validity of the thought and the validated flag.
 70 | 
 71 |         :param valid: The validity of the thought.
 72 |         :type valid: bool
 73 |         """
 74 |         self.validated = True
 75 |         self._valid = valid
 76 | 
 77 |     @property
 78 |     def score(self) -> float:
 79 |         """
 80 |         Returns the score of the thought.
 81 | 
 82 |         :return: The score of the thought.
 83 |         :rtype: float
 84 |         """
 85 |         return self._score
 86 | 
 87 |     @score.setter
 88 |     def score(self, new_score: float) -> None:
 89 |         """
 90 |         Sets the score of the thought and the scored flag.
 91 | 
 92 |         :param new_score: The score of the thought.
 93 |         :type new_score: float
 94 |         """
 95 |         self.scored = True
 96 |         self._score = new_score
 97 | 
 98 |     @property
 99 |     def solved(self) -> bool:
100 |         """
101 |         Returns the solved flag of the thought.
102 | 
103 |         :return: The solved flag of the thought.
104 |         :rtype: bool
105 |         """
106 |         return self._solved
107 | 
108 |     @solved.setter
109 |     def solved(self, solved: bool) -> None:
110 |         """
111 |         Sets the solved flag of the thought and the compared_to_ground_truth flag.
112 | 
113 |         :param solved: Whether the thought contains a solution to the problem.
114 |         :type solved: bool
115 |         """
116 |         self.compared_to_ground_truth = True
117 |         self._solved = solved
118 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/parser/__init__.py:
--------------------------------------------------------------------------------
1 | from .parser import Parser
2 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/parser/parser.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 ETH Zurich.
 2 | #                    All rights reserved.
 3 | #
 4 | # Use of this source code is governed by a BSD-style license that can be
 5 | # found in the LICENSE file.
 6 | #
 7 | # main authors: Robert Gerstenberger, Nils Blach
 8 | 
 9 | from __future__ import annotations
10 | from abc import ABC, abstractmethod
11 | from typing import Dict, List, Union
12 | 
13 | 
14 | class Parser(ABC):
15 |     """
16 |     Abstract base class that defines the interface for all parsers.
17 |     Parsers are used to parse the responses from the language models.
18 |     """
19 | 
20 |     @abstractmethod
21 |     def parse_aggregation_answer(
22 |         self, states: List[Dict], texts: List[str]
23 |     ) -> Union[Dict, List[Dict]]:
24 |         """
25 |         Parse the response from the language model for a aggregation prompt.
26 | 
27 |         :param states: The thought states used to generate the prompt.
28 |         :type states: List[Dict]
29 |         :param texts: The responses to the prompt from the language model.
30 |         :type texts: List[str]
31 |         :return: The new thought states after parsing the response from the language model.
32 |         :rtype: Union[Dict, List[Dict]]
33 |         """
34 |         pass
35 | 
36 |     @abstractmethod
37 |     def parse_improve_answer(self, state: Dict, texts: List[str]) -> Dict:
38 |         """
39 |         Parse the response from the language model for an improve prompt.
40 | 
41 |         :param state: The thought state used to generate the prompt.
42 |         :type state: Dict
43 |         :param texts: The responses to the prompt from the language model.
44 |         :type texts: List[str]
45 |         :return: The new thought state after parsing the response from the language model.
46 |         :rtype: Dict
47 |         """
48 |         pass
49 | 
50 |     @abstractmethod
51 |     def parse_generate_answer(self, state: Dict, texts: List[str]) -> List[Dict]:
52 |         """
53 |         Parse the response from the language model for a generate prompt.
54 | 
55 |         :param state: The thought state used to generate the prompt.
56 |         :type state: Dict
57 |         :param texts: The responses to the prompt from the language model.
58 |         :type texts: List[str]
59 |         :return: The new thought states after parsing the response from the language model.
60 |         :rtype: List[Dict]
61 |         """
62 |         pass
63 | 
64 |     @abstractmethod
65 |     def parse_validation_answer(self, state: Dict, texts: List[str]) -> bool:
66 |         """
67 |         Parse the response from the language model for a validation prompt.
68 | 
69 |         :param state: The thought state used to generate the prompt.
70 |         :type state: Dict
71 |         :param texts: The responses to the prompt from the language model.
72 |         :type texts: List[str]
73 |         :return: Whether the thought state is valid or not.
74 |         :rtype: bool
75 |         """
76 |         pass
77 | 
78 |     @abstractmethod
79 |     def parse_score_answer(self, states: List[Dict], texts: List[str]) -> List[float]:
80 |         """
81 |         Parse the response from the language model for a score prompt.
82 | 
83 |         :param states: The thought states used to generate the prompt.
84 |         :type states: List[Dict]
85 |         :param texts: The responses to the prompt from the language model.
86 |         :type texts: List[str]
87 |         :return: The scores for the thought states.
88 |         :rtype: List[float]
89 |         """
90 |         pass
91 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/prompter/__init__.py:
--------------------------------------------------------------------------------
1 | from .prompter import Prompter
2 | 


--------------------------------------------------------------------------------
/graph_of_thoughts/prompter/prompter.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 ETH Zurich.
 2 | #                    All rights reserved.
 3 | #
 4 | # Use of this source code is governed by a BSD-style license that can be
 5 | # found in the LICENSE file.
 6 | #
 7 | # main authors: Robert Gerstenberger, Nils Blach
 8 | 
 9 | from __future__ import annotations
10 | from abc import ABC, abstractmethod
11 | from typing import Dict, List
12 | 
13 | 
14 | class Prompter(ABC):
15 |     """
16 |     Abstract base class that defines the interface for all prompters.
17 |     Prompters are used to generate the prompts for the language models.
18 |     """
19 | 
20 |     @abstractmethod
21 |     def aggregation_prompt(self, state_dicts: List[Dict], **kwargs) -> str:
22 |         """
23 |         Generate a aggregation prompt for the language model.
24 | 
25 |         :param state_dicts: The thought states that should be aggregated.
26 |         :type state_dicts: List[Dict]
27 |         :param kwargs: Additional keyword arguments.
28 |         :return: The aggregation prompt.
29 |         :rtype: str
30 |         """
31 |         pass
32 | 
33 |     @abstractmethod
34 |     def improve_prompt(self, **kwargs) -> str:
35 |         """
36 |         Generate an improve prompt for the language model.
37 |         The thought state is unpacked to allow for additional keyword arguments
38 |         and concrete implementations to specify required arguments explicitly.
39 | 
40 |         :param kwargs: Additional keyword arguments.
41 |         :return: The improve prompt.
42 |         :rtype: str
43 |         """
44 |         pass
45 | 
46 |     @abstractmethod
47 |     def generate_prompt(self, num_branches: int, **kwargs) -> str:
48 |         """
49 |         Generate a generate prompt for the language model.
50 |         The thought state is unpacked to allow for additional keyword arguments
51 |         and concrete implementations to specify required arguments explicitly.
52 | 
53 |         :param num_branches: The number of responses the prompt should ask the LM to generate.
54 |         :type num_branches: int
55 |         :param kwargs: Additional keyword arguments.
56 |         :return: The generate prompt.
57 |         :rtype: str
58 |         """
59 |         pass
60 | 
61 |     @abstractmethod
62 |     def validation_prompt(self, **kwargs) -> str:
63 |         """
64 |         Generate a validation prompt for the language model.
65 |         The thought state is unpacked to allow for additional keyword arguments
66 |         and concrete implementations to specify required arguments explicitly.
67 | 
68 |         :param kwargs: Additional keyword arguments.
69 |         :return: The validation prompt.
70 |         :rtype: str
71 |         """
72 |         pass
73 | 
74 |     @abstractmethod
75 |     def score_prompt(self, state_dicts: List[Dict], **kwargs) -> str:
76 |         """
77 |         Generate a score prompt for the language model.
78 | 
79 |         :param state_dicts: The thought states that should be scored,
80 |                             if more than one, they should be scored together.
81 |         :type state_dicts: List[Dict]
82 |         :param kwargs: Additional keyword arguments.
83 |         :return: The score prompt.
84 |         :rtype: str
85 |         """
86 |         pass
87 | 


--------------------------------------------------------------------------------
/paper/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <img src="poster.png" width="40%">
 3 | </p>
 4 | 
 5 | ## Poster
 6 | The poster presented at the 2024 Association for the Advancement of Artificial Intelligence (AAAI'24) conference is available as [PDF](poster.pdf) and [PNG](poster.png).
 7 | 
 8 | ## Plot Data
 9 | 
10 | The data used to create the figures of the paper can be
11 | found in the `final_results_gpt35.tar.bz2` archive.  Unpack the archive
12 | and run the file `plots.py`.
13 | 


--------------------------------------------------------------------------------
/paper/final_results_gpt35.tar.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spcl/graph-of-thoughts/363421c61c7bc11edf32845a697ae2aaccd75463/paper/final_results_gpt35.tar.bz2


--------------------------------------------------------------------------------
/paper/plots.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2023 ETH Zurich.
  2 | #                    All rights reserved.
  3 | #
  4 | # Use of this source code is governed by a BSD-style license that can be
  5 | # found in the LICENSE file.
  6 | #
  7 | # main author: Nils Blach
  8 | # contributions: Robert Gerstenberger
  9 | 
 10 | import json
 11 | import os
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | 
 15 | def get_complete_results(base_directory):
 16 |     results_complete = {}
 17 |     for folder_name in os.listdir(base_directory):
 18 |         folder_path = os.path.join(base_directory, folder_name)
 19 |         if os.path.isdir(folder_path):
 20 |             results_complete[folder_name] = []
 21 |             for file_name in os.listdir(folder_path):
 22 |                 if file_name.endswith(".json"):
 23 |                     file_path = os.path.join(folder_path, file_name)
 24 |                     with open(file_path, "r") as f:
 25 |                         data = json.load(f)
 26 |                         results_complete[folder_name].append(
 27 |                             {"key": int(file_name.split(".")[0]), "data": data}
 28 |                         )
 29 |         for key in results_complete.keys():
 30 |             results_complete[key] = sorted(
 31 |                 results_complete[key], key=lambda x: x["key"]
 32 |             )
 33 |     return results_complete
 34 | 
 35 | 
 36 | def get_final_scores(results_complete):
 37 |     scores = {}
 38 |     for method in results_complete.keys():
 39 |         scores[method] = []
 40 |         for result in results_complete[method]:
 41 |             score = 100
 42 |             solved = False
 43 |             cost = 1
 44 |             prompt_tokens = 0
 45 |             completion_tokens = 0
 46 |             for op in result["data"]:
 47 |                 if "operation" in op and op["operation"] == "ground_truth_evaluator":
 48 |                     try:
 49 |                         score = min(op["scores"])
 50 |                         solved = any(op["problem_solved"])
 51 |                     except:
 52 |                         continue
 53 |                 if "cost" in op:
 54 |                     cost = op["cost"]
 55 |                     prompt_tokens = op["prompt_tokens"]
 56 |                     completion_tokens = op["completion_tokens"]
 57 |             scores[method].append(
 58 |                 [result["key"], score, solved, prompt_tokens, completion_tokens, cost]
 59 |             )
 60 |         scores[method] = sorted(scores[method], key=lambda x: x[0])
 61 |     return scores
 62 | 
 63 | 
 64 | def get_final_scores_doc_merge(results_complete):
 65 |     scores = {}
 66 |     for method in results_complete.keys():
 67 |         scores[method] = []
 68 |         for result in results_complete[method]:
 69 |             score = 0
 70 |             solved = False
 71 |             cost = 1
 72 |             prompt_tokens = 0
 73 |             completion_tokens = 0
 74 |             for op in reversed(result["data"]):
 75 |                 if "cost" in op:
 76 |                     cost = op["cost"]
 77 |                     prompt_tokens = op["prompt_tokens"]
 78 |                     completion_tokens = op["completion_tokens"]
 79 |                 if "operation" in op and op["operation"] == "score":
 80 |                     try:
 81 |                         score = max(op["scores"])
 82 |                         break
 83 |                     except:
 84 |                         continue
 85 |             scores[method].append(
 86 |                 [result["key"], score, solved, prompt_tokens, completion_tokens, cost]
 87 |             )
 88 |         scores[method] = sorted(scores[method], key=lambda x: x[0])
 89 |     return scores
 90 | 
 91 | 
 92 | def get_plotting_data(base_directory, score_method):
 93 |     results_complete = get_complete_results(base_directory)
 94 |     scores = score_method(results_complete)
 95 |     results_plotting = {
 96 |         method: {
 97 |             "scores": [x[1] for x in scores[method]],
 98 |             "solved": sum([1 for x in scores[method] if x[2]]),
 99 |             "costs": [x[5] for x in scores[method]],
100 |         }
101 |         for method in scores.keys()
102 |     }
103 |     return results_plotting
104 | 
105 | 
106 | def plot_results(
107 |     name,
108 |     results,
109 |     methods_order=["io", "cot", "tot", "tot2", "tog"],
110 |     methods_labels=["IO", "CoT", "ToT", "ToT2", "GoT"],
111 |     model="GPT-3.5",
112 |     length=32,
113 |     y_lower=0,
114 |     y_upper=16,
115 |     cost_upper=1.8,
116 |     display_solved=True,
117 |     annotation_offset=1,
118 |     display_left_ylabel=False,
119 |     display_right_ylabel=False,
120 | ):
121 |     methods_order = [method for method in methods_order if method in results]
122 |     # Extract scores based on the order
123 |     if name == "set_intersection":
124 |         scores_ordered = [
125 |             [min(score, length) for score in results[method]["scores"] if score != 1000]
126 |             for method in methods_order
127 |         ]
128 |     elif name == "sorting":
129 |         scores_ordered = [
130 |             [
131 |                 min(score, length)
132 |                 for score in results[method]["scores"]
133 |                 if score != 100 and score != 300
134 |             ]
135 |             for method in methods_order
136 |         ]
137 |     elif name == "keyword_counting":
138 |         scores_ordered = [
139 |             [
140 |                 score
141 |                 for score in results[method]["scores"]
142 |                 if score != 100 and score != 300
143 |             ]
144 |             for method in methods_order
145 |         ]
146 |     elif name == "document_merging":
147 |         scores_ordered = [
148 |             [score for score in results[method]["scores"]] for method in methods_order
149 |         ]
150 |     total_costs = [sum(results[method]["costs"]) for method in methods_order]
151 | 
152 |     # Create figure and axis
153 |     if name == "keyword_counting" or name == "document_merging":
154 |         fig, ax = plt.subplots(dpi=150, figsize=(3.75, 5))
155 |     else:
156 |         fig, ax = plt.subplots(dpi=150, figsize=(2.5, 5))
157 | 
158 |     # Create boxplots
159 |     positions = range(1, len(methods_order) + 1)
160 |     ax.boxplot(scores_ordered, positions=positions)
161 | 
162 |     fig_fontsize = 12
163 | 
164 |     # Set the ticks and labels
165 |     plt.yticks(fontsize=fig_fontsize)
166 |     ax.set_xticks(range(1, len(methods_order) + 1))
167 |     ax.set_xticks(range(1, len(methods_order) + 1))
168 |     if name == "keyword_counting":
169 |         ax.set_xticklabels(methods_labels, fontsize=10)
170 |     else:
171 |         ax.set_xticklabels(methods_labels, fontsize=fig_fontsize)
172 | 
173 |     if name == "document_merging":
174 |         ax.set_ylim(y_lower, 12 if display_solved else 9.75)
175 |     else:
176 |         ax.set_ylim(y_lower, (y_upper + 2) if display_solved else y_upper + 1)
177 | 
178 |     if name == "sorting" or name == "set_intersection":
179 |         ax1_yticks = range(
180 |             y_lower, y_upper + 1, 2 if length < 48 else (4 if length < 96 else 8)
181 |         )
182 |         ax.set_yticks(ax1_yticks)
183 | 
184 |     if display_left_ylabel:
185 |         if name == "keyword_counting":
186 |             ax.set_ylabel(
187 |                 f"Number of errors; the lower the better", fontsize=fig_fontsize
188 |             )
189 |         elif name == "document_merging":
190 |             ax.set_ylabel(
191 |                 f"Score (out of 10); the higher the better", fontsize=fig_fontsize
192 |             )
193 |         else:
194 |             ax.set_ylabel(
195 |                 f"#incorrect elements; the lower the better", fontsize=fig_fontsize
196 |             )
197 | 
198 |     if name == "sorting" or name == "set_intersection":
199 |         ax.set_title(f"{length} elements")
200 | 
201 |     ax2 = ax.twinx()
202 |     ax2.bar(positions, total_costs, alpha=0.5, color="blue", label="Total Cost ($)")
203 |     ax2.yaxis.set_tick_params(colors="#1919ff", labelsize=fig_fontsize)
204 |     ax2.set_ylim(0, cost_upper)
205 |     number_of_ticks = len(ax.get_yticks())
206 |     tick_interval = cost_upper / (number_of_ticks)
207 |     ax2_ticks = [tick_interval * i for i in range(number_of_ticks)]
208 | 
209 |     # Set custom tick positions for ax2
210 |     ax2.set_yticks(ax2_ticks)
211 | 
212 |     if display_right_ylabel:
213 |         ax2.set_ylabel(
214 |             "Total Cost ($); the lower the better",
215 |             color="#1919ff",
216 |             fontsize=fig_fontsize,
217 |         )
218 | 
219 |     if display_solved:
220 |         annotation_height = y_upper + annotation_offset
221 |         count = 1
222 |         for method in methods_order:
223 |             if method not in results:
224 |                 continue
225 |             solved = results[method]["solved"]
226 |             ax.text(
227 |                 count,
228 |                 annotation_height,
229 |                 f"{solved}",
230 |                 ha="center",
231 |                 va="bottom",
232 |                 fontsize=fig_fontsize,
233 |             )
234 |             count += 1
235 | 
236 |     model = model.replace(".", "").replace("-", "").lower()
237 |     if name == "keyword_counting" or name == "document_merging":
238 |         fig.savefig(f"{name}_{model}.pdf", bbox_inches="tight")
239 |     else:
240 |         fig.savefig(f"{name}_{model}_{length}.pdf", bbox_inches="tight")
241 | 
242 | 
243 | plot_results(
244 |     "set_intersection",
245 |     get_plotting_data("set_intersection_gpt35_032", get_final_scores),
246 |     methods_order=["io", "cot", "tot", "tot2", "tog2"],
247 |     length=32,
248 |     y_upper=19,
249 |     cost_upper=2,
250 |     display_solved=True,
251 |     annotation_offset=0.5,
252 |     display_left_ylabel=True,
253 |     display_right_ylabel=True,
254 | )
255 | 
256 | plot_results(
257 |     "set_intersection",
258 |     get_plotting_data("set_intersection_gpt35_064", get_final_scores),
259 |     methods_order=["io", "cot", "tot", "tot2", "tog2"],
260 |     length=64,
261 |     y_upper=32,
262 |     cost_upper=5.4,
263 |     display_solved=True,
264 |     annotation_offset=0.2,
265 |     display_left_ylabel=True,
266 |     display_right_ylabel=True,
267 | )
268 | 
269 | plot_results(
270 |     "set_intersection",
271 |     get_plotting_data("set_intersection_gpt35_128", get_final_scores),
272 |     methods_order=["io", "cot", "tot", "tot2", "tog2"],
273 |     length=128,
274 |     y_upper=94,
275 |     cost_upper=12,
276 |     display_solved=True,
277 |     annotation_offset=-3,
278 |     display_left_ylabel=True,
279 |     display_right_ylabel=True,
280 | )
281 | 
282 | plot_results(
283 |     "sorting",
284 |     get_plotting_data("sorting_gpt35_032", get_final_scores),
285 |     length=32,
286 |     display_solved=False,
287 |     annotation_offset=0.5,
288 |     display_left_ylabel=True,
289 |     display_right_ylabel=True,
290 | )
291 | 
292 | plot_results(
293 |     "sorting",
294 |     get_plotting_data("sorting_gpt35_064", get_final_scores),
295 |     length=64,
296 |     y_upper=64,
297 |     cost_upper=5.1,
298 |     display_solved=False,
299 |     display_left_ylabel=True,
300 |     display_right_ylabel=True,
301 | )
302 | 
303 | plot_results(
304 |     "sorting",
305 |     get_plotting_data("sorting_gpt35_128", get_final_scores),
306 |     length=128,
307 |     y_upper=128,
308 |     cost_upper=17,
309 |     display_solved=False,
310 |     display_left_ylabel=True,
311 |     display_right_ylabel=True,
312 | )
313 | 
314 | plot_results(
315 |     "keyword_counting",
316 |     get_plotting_data("keyword_counting_gpt35", get_final_scores),
317 |     methods_order=["io", "cot", "tot", "tot2", "gsp4", "gsp8", "gspx"],
318 |     methods_labels=["IO", "CoT", "ToT", "ToT2", "GoT4", "GoT8", "GoTx"],
319 |     y_upper=35,
320 |     cost_upper=9,
321 |     display_solved=True,
322 |     annotation_offset=-0.3,
323 |     display_left_ylabel=True,
324 |     display_right_ylabel=True,
325 | )
326 | 
327 | plot_results(
328 |     "document_merging",
329 |     get_plotting_data("document_merging_gpt35_16k", get_final_scores_doc_merge),
330 |     methods_order=["io", "cot", "tot", "gsp", "gsp2"],
331 |     methods_labels=["IO", "CoT", "ToT", "GoT", "GoT2"],
332 |     y_upper=10,
333 |     cost_upper=15,
334 |     display_solved=False,
335 |     display_left_ylabel=True,
336 |     display_right_ylabel=True,
337 | )
338 | 


--------------------------------------------------------------------------------
/paper/poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spcl/graph-of-thoughts/363421c61c7bc11edf32845a697ae2aaccd75463/paper/poster.pdf


--------------------------------------------------------------------------------
/paper/poster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spcl/graph-of-thoughts/363421c61c7bc11edf32845a697ae2aaccd75463/paper/poster.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "graph_of_thoughts"
 7 | version = "0.0.3"
 8 | authors = [
 9 |   { name="Maciej Besta", email="maciej.besta@inf.ethz.ch" },
10 |   { name="Nils Blach", email="nils.blach@inf.ethz.ch" },
11 |   { name="Ales Kubicek", email="akubicek@student.ethz.ch" },
12 |   { name="Robert Gerstenberger", email="gerstenberger.robert@gmail.com" },
13 | ]
14 | description = "Python package for Graph of Thoughts that enables solving elaborate problems with Large Language Models"
15 | readme = "README.md"
16 | license = {file = "LICENSE"}
17 | requires-python = ">=3.8"
18 | classifiers = [
19 |   "Programming Language :: Python :: 3",
20 |   "Operating System :: OS Independent",
21 | ]
22 | dependencies = [
23 |   "backoff>=2.2.1,<3.0.0",
24 |   "openai>=1.0.0,<2.0.0",
25 |   "matplotlib>=3.7.1,<4.0.0",
26 |   "numpy>=1.24.3,<2.0.0",
27 |   "pandas>=2.0.3,<3.0.0",
28 |   "sympy>=1.12,<2.0",
29 |   "torch>=2.0.1,<3.0.0",
30 |   "transformers>=4.31.0,<5.0.0",
31 |   "accelerate>=0.21.0,<1.0.0",
32 |   "bitsandbytes>=0.41.0,<1.0.0",
33 |   "scipy>=1.10.1,<2.0.0",
34 | ]
35 | 
36 | [project.urls]
37 | Homepage = "https://github.com/spcl/graph-of-thoughts"
38 | 
39 | [project.scripts]
40 | 


--------------------------------------------------------------------------------