├── .gitignore ├── README.md ├── __pycache__ └── models.cpython-310.pyc ├── asset └── teaser_figure_horizontal_png.png ├── config_template.sh ├── configs.py ├── data ├── codenames_collaborative │ └── codenames_50.jsonl ├── logic_grid_puzzle │ └── logic_grid_puzzle_200.jsonl └── trivia_creative_writing │ ├── topics_pop_culture_100_gpt4_gen_PG_rated.txt │ ├── trivia_creative_writing_100_n_10.jsonl │ └── trivia_creative_writing_100_n_5.jsonl ├── logs ├── codenames_collaborative │ ├── gpt35 │ │ ├── codenames_50.jsonl__method-cot_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end50__with_sys_mes.jsonl │ │ ├── codenames_50.jsonl__method-spp_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end50__with_sys_mes.jsonl │ │ └── codenames_50.jsonl__method-standard_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end50__with_sys_mes.jsonl │ ├── gpt4_w_sys_mes │ │ ├── codenames_50.jsonl__method-cot_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50.jsonl │ │ ├── codenames_50.jsonl__method-self_refine_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50_num_refine-1__with_sys_mes.jsonl │ │ ├── codenames_50.jsonl__method-spp_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50.jsonl │ │ ├── codenames_50.jsonl__method-spp_fixed_persona_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50.jsonl │ │ ├── codenames_50.jsonl__method-spp_profile_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50.jsonl │ │ └── codenames_50.jsonl__method-standard_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50.jsonl │ ├── gpt4_wo_sys_mes │ │ ├── codenames_50.jsonl__method-cot_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50.jsonl │ │ ├── codenames_50.jsonl__method-self_refine_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50_num_refine-1__without_sys_mes.jsonl │ │ ├── codenames_50.jsonl__method-spp_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50.jsonl │ │ ├── codenames_50.jsonl__method-spp_fixed_persona_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50.jsonl │ │ ├── codenames_50.jsonl__method-spp_profile_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50.jsonl │ │ └── codenames_50.jsonl__method-standard_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end50.jsonl │ └── llama2-13b │ │ ├── codenames_50.jsonl__method-cot_engine-meta-llama-Llama-2-13b-chat-hf_start0-end50__without_sys_mes.jsonl │ │ ├── codenames_50.jsonl__method-spp_engine-meta-llama-Llama-2-13b-chat-hf_start0-end50__without_sys_mes.jsonl │ │ └── codenames_50.jsonl__method-standard_engine-meta-llama-Llama-2-13b-chat-hf_start0-end50__without_sys_mes.jsonl ├── logic_grid_puzzle │ ├── gpt35 │ │ ├── logic_grid_puzzle_200.jsonl__method-cot_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end200__without_sys_mes.jsonl │ │ ├── logic_grid_puzzle_200.jsonl__method-spp_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end200__without_sys_mes.jsonl │ │ └── logic_grid_puzzle_200.jsonl__method-standard_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end200__without_sys_mes.jsonl │ ├── gpt4_w_sys_mes │ │ ├── logic_grid_puzzle_200.jsonl__method-cot_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__with_sys_mes.jsonl │ │ ├── logic_grid_puzzle_200.jsonl__method-self_refine_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200_num_refine-1__with_sys_mes.jsonl │ │ ├── logic_grid_puzzle_200.jsonl__method-spp_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__with_sys_mes.jsonl │ │ ├── logic_grid_puzzle_200.jsonl__method-spp_fixed_persona_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__with_sys_mes.jsonl │ │ ├── logic_grid_puzzle_200.jsonl__method-spp_profile_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__with_sys_mes.jsonl │ │ └── logic_grid_puzzle_200.jsonl__method-standard_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__with_sys_mes.jsonl │ ├── gpt4_wo_sys_mes │ │ ├── logic_grid_puzzle_200.jsonl__method-cot_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__without_sys_mes.jsonl │ │ ├── logic_grid_puzzle_200.jsonl__method-self_refine_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200_num_refine-1__without_sys_mes.jsonl │ │ ├── logic_grid_puzzle_200.jsonl__method-spp_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__without_sys_mes.jsonl │ │ ├── logic_grid_puzzle_200.jsonl__method-spp_fixed_persona_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__without_sys_mes.jsonl │ │ ├── logic_grid_puzzle_200.jsonl__method-spp_profile_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__without_sys_mes.jsonl │ │ └── logic_grid_puzzle_200.jsonl__method-standard_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end200__without_sys_mes.jsonl │ └── llama2-13b │ │ ├── logic_grid_puzzle_200.jsonl__method-cot_engine-meta-llama-Llama-2-13b-chat-hf_start0-end200__without_sys_mes.jsonl │ │ ├── logic_grid_puzzle_200.jsonl__method-spp_engine-meta-llama-Llama-2-13b-chat-hf_start0-end200__without_sys_mes.jsonl │ │ └── logic_grid_puzzle_200.jsonl__method-standard_engine-meta-llama-Llama-2-13b-chat-hf_start0-end200__without_sys_mes.jsonl └── trivia_creative_writing │ ├── gpt35 │ ├── trivia_creative_writing_100_n_10.jsonl__method-cot_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end100__with_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-spp_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end100__with_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-standard_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end100__with_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-cot_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end100__with_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-spp_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end100__with_sys_mes.jsonl │ └── trivia_creative_writing_100_n_5.jsonl__method-standard_engine-mtutor-openai-dev_temp-0.0_topp-1.0_start0-end100__with_sys_mes.jsonl │ ├── gpt4_w_sys_mes │ ├── trivia_creative_writing_100_n_10.jsonl__method-cot_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-self_refine_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100_num_refine-1__with_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-spp_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-spp_fixed_persona_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-spp_profile_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-standard_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-cot_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-self_refine_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100_num_refine-1__with_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-spp_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-spp_fixed_persona_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-spp_profile_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ └── trivia_creative_writing_100_n_5.jsonl__method-standard_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── gpt4_wo_sys_mes │ ├── trivia_creative_writing_100_n_10.jsonl__method-cot_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-self_refine_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100_num_refine-1__without_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-spp_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-spp_fixed_persona_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-spp_profile_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-standard_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-cot_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-self_refine_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100_num_refine-1__without_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-spp_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-spp_fixed_persona_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-spp_profile_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ └── trivia_creative_writing_100_n_5.jsonl__method-standard_engine-devgpt4-32k_temp-0.0_topp-1.0_start0-end100.jsonl │ └── llama2-13b │ ├── trivia_creative_writing_100_n_10.jsonl__method-cot_engine-meta-llama-Llama-2-13b-chat-hf_start0-end100__without_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-spp_engine-meta-llama-Llama-2-13b-chat-hf_start0-end100__without_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_10.jsonl__method-standard_engine-meta-llama-Llama-2-13b-chat-hf_start0-end100__without_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-cot_engine-meta-llama-Llama-2-13b-chat-hf_start0-end100__without_sys_mes.jsonl │ ├── trivia_creative_writing_100_n_5.jsonl__method-spp_engine-meta-llama-Llama-2-13b-chat-hf_start0-end100__without_sys_mes.jsonl │ └── trivia_creative_writing_100_n_5.jsonl__method-standard_engine-meta-llama-Llama-2-13b-chat-hf_start0-end100__without_sys_mes.jsonl ├── models.py ├── prompts ├── codenames_collaborative.py ├── logic_grid_puzzle.py └── trivia_creative_writing.py ├── requirements.txt ├── run.py ├── scripts ├── codenames_collaborative.sh ├── logic_grid_puzzle.sh └── trivia_creative_writing.sh └── tasks ├── __init__.py ├── base.py ├── codenames_collaborative.py ├── logic_grid_puzzle.py └── trivia_creative_writing.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.zip 2 | *.tar 3 | *.tar.gz 4 | *.rar 5 | datasets/ 6 | openai_key* 7 | api_key* 8 | *.log 9 | config_ours.sh -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Official Repo of paper [Solo Performance Prompting (SPP)](https://arxiv.org/abs/2307.05300) 2 | 3 | ![Illustration of Solo Performance Prompting](asset/teaser_figure_horizontal_png.png) 4 | 5 | ## 🔥 News 6 | - `5/8/2024`: Update GPT-3.5 and LLama2 inference code and results for Figure 6, which shows the emergent nature of cognitive synergy. 7 | - `3/15/2024`: This paper has been accepted as a main conference paper at NAACL2024! 8 | 9 | ## Setup 10 | - Install dependencies 11 | ``` 12 | pip install -r requirements.txt 13 | ``` 14 | - Set up OpenAI API configs in `config_template.sh` and run `source config_template.sh` to set up the env variables (Note that we are using the Azure API in our experiments) 15 | 16 | ## Quick Start 17 | We provide running scripts for each of the three tasks, please check out the comments in the ".sh" scripts for more information: 18 | - Trivia Creative Writing: `bash scripts/trivia_creative_writing.sh` 19 | - Codenames Collaborative: `bash scripts/codenames_collaborative.sh` 20 | - Logic Grid Puzzle: `bash scripts/logic_grid_puzzle.sh` 21 | 22 | ## Prompts 23 | All prompts can be found in the `prompts/` folder. 24 | 25 | ## Datasets 26 | All datasets can be found in the `data/` folder. 27 | 28 | ## Paper Experiment Results 29 | Experimental results in the paper for each task can be found in the `logs/` folder. `gpt4_w_sys_mes` and `gpt4_wo_sys_mes` contains results corresponding to Table 2 in our paper. We also include gpt-3.5 and llama2-13b results corresponding to the results in Figure 6, where the hyperparameters, such as whether or not adding system message, follows the best performing choices in the gpt4 experiments. 30 | 31 | ### Log file formats 32 | 33 | - `"test_output_infos"`: contains evaluation metrics for each instance, e.g., # correct answers mentioned. 34 | - `"prompt"``: full input prompt for the API call. (for Codenames task, there are two API calls for each instance) 35 | - `"*raw_responses"`: raw responses from each API call. 36 | - `"*parsing_flag"`: whether the raw response is successfully parsed. (for Codenames task, this field is seperated into "parsing_success_flag_spymaster" and "parsing_success_flag_guesser") 37 | - `"unwrapped_output"`: parsed output that will be used for computing evaluation metrics. (for Codenames task, this field is seperated into "spymaster_output" and "guesser_output"; there is an additional field named "hint_word" which is parsed from the spymaster's output and inserted into the Guesser's input; the evaluation metric is computed based on the "guesser_output") 38 | - `"task data"`: data for the current task instance, e.g., quetions, answers, target words, etc. 39 | - `"usage"`: logging for the number of tokens and cost spended so far. 40 | - other self-explanatory config fields: "model", "method", "temperature", etc. 41 | 42 | ## Citations 43 | Please cite the paper and star this repo if you find this work interesting/helpful. 44 | ``` 45 | @article{wang2023unleashing, 46 | title={Unleashing Cognitive Synergy in Large Language Models: A Task-Solving Agent through Multi-Persona Self-Collaboration}, 47 | author={Wang, Zhenhailong and Mao, Shaoguang and Wu, Wenshan and Ge, Tao and Wei, Furu and Ji, Heng}, 48 | journal={arXiv preprint arXiv:2307.05300}, 49 | year={2023} 50 | } 51 | ``` 52 | 53 | ## Acknowledgements 54 | This codebase referenced the structure of the [Tree-of-thought official repo](https://github.com/princeton-nlp/tree-of-thought-llm). We thank the authors for their open-sourcing efforts. 55 | 56 | -------------------------------------------------------------------------------- /__pycache__/models.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikeWangWZHL/Solo-Performance-Prompting/619c8a0ff4205bfd39e33f0867647b40e1703b94/__pycache__/models.cpython-310.pyc -------------------------------------------------------------------------------- /asset/teaser_figure_horizontal_png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MikeWangWZHL/Solo-Performance-Prompting/619c8a0ff4205bfd39e33f0867647b40e1703b94/asset/teaser_figure_horizontal_png.png -------------------------------------------------------------------------------- /config_template.sh: -------------------------------------------------------------------------------- 1 | export USE_AZURE=True 2 | export OPENAI_API_KEY=your-azure-openai-service-key 3 | export API_BASE=your-base-url-for-azure 4 | export API_VERSION=2023-03-15-preview -------------------------------------------------------------------------------- /configs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # TODO: add your custom model config here: 4 | gpt_configs = { 5 | "gpt4-32k": { 6 | "engine": "devgpt4-32k", 7 | "temperature": 0.0, 8 | "max_tokens": 5000, 9 | "top_p": 1.0, 10 | "frequency_penalty": 0.0, 11 | "presence_penalty": 0.0, 12 | "stop": None 13 | }, 14 | "gpt35-turbo": { 15 | "engine": "mtutor-openai-dev", 16 | "temperature": 0.0, 17 | "max_tokens": 3999, 18 | "top_p": 1.0, 19 | "frequency_penalty": 0.0, 20 | "presence_penalty": 0.0, 21 | "stop": None 22 | } 23 | } 24 | 25 | llama_configs = { 26 | "meta-llama/Llama-2-7b-chat-hf": { 27 | "task": "text-generation", 28 | "model": "meta-llama/Llama-2-7b-chat-hf", 29 | "torch_dtype": torch.float16, 30 | "device_map": "auto", 31 | "do_sample":False, 32 | }, 33 | "meta-llama/Llama-2-13b-chat-hf": { 34 | "task": "text-generation", 35 | "model": "meta-llama/Llama-2-13b-chat-hf", 36 | "torch_dtype": torch.float16, 37 | "device_map": "auto", 38 | "do_sample":False, 39 | } 40 | } 41 | 42 | default_llama_config = { 43 | "task": "text-generation", 44 | "model": None, 45 | "torch_dtype": torch.float16, 46 | "device_map": "auto", 47 | "do_sample":False, 48 | } 49 | 50 | default_gpt_config = { 51 | "engine": None, 52 | "temperature": 0.0, 53 | "max_tokens": 5000, 54 | "top_p": 1.0, 55 | "frequency_penalty": 0.0, 56 | "presence_penalty": 0.0, 57 | "stop": None 58 | } -------------------------------------------------------------------------------- /data/codenames_collaborative/codenames_50.jsonl: -------------------------------------------------------------------------------- 1 | {"idx": 0, "word_list": ["locust", "fever", "street", "sherwood", "kiss", "popcorn", "craft", "ant", "crystal", "tear", "cowboy", "government", "pine", "mountie", "soap", "genie", "king arthur", "sphinx", "director", "bride", "razor", "fog", "whistle"], "target_words": ["director", "kiss", "popcorn", "street"]} 2 | {"idx": 1, "word_list": ["razor", "fever", "ant", "soap", "bride", "government", "craft", "cowboy", "tear", "fog", "mountie", "locust", "sherwood", "pine", "whistle", "crystal", "kiss", "king arthur", "genie", "cable", "sphinx"], "target_words": ["pine", "soap"]} 3 | {"idx": 2, "word_list": ["cable", "ant", "genie", "razor", "tear", "fog", "cowboy", "mountie", "government", "king arthur", "whistle", "crystal", "locust", "sphinx", "craft", "fever", "sherwood"], "target_words": ["fog", "tear"]} 4 | {"idx": 3, "word_list": ["locust", "razor", "craft", "cowboy", "sphinx", "ant", "king arthur", "genie", "fever", "crystal", "whistle", "government", "sherwood", "cable", "mountie"], "target_words": ["ant", "cable", "government"]} 5 | {"idx": 4, "word_list": ["magician", "boss", "hide", "drum", "kick", "book", "gear", "sugar", "pacific", "spirit", "volume", "page", "nose", "tunnel", "brazil", "rope", "glacier", "mark", "pop", "shoulder", "ram", "anchor", "rainbow", "shorts", "sack"], "target_words": ["pop", "spirit"]} 6 | {"idx": 5, "word_list": ["rainbow", "volume", "page", "anchor", "brazil", "sack", "gear", "mark", "hide", "boss", "kick", "book", "shorts", "rope", "sugar", "drum", "nose", "tunnel", "pacific", "magician", "glacier", "ram", "shoulder"], "target_words": ["brazil", "shorts"]} 7 | {"idx": 6, "word_list": ["sack", "hide", "page", "ram", "tunnel", "book", "mark", "drum", "magician", "pacific", "shoulder", "gear", "sugar", "volume", "nose", "anchor"], "target_words": ["hide", "magician"]} 8 | {"idx": 7, "word_list": ["mark", "ram", "anchor", "drum", "gear", "kick", "book", "nose", "boss", "sack", "volume", "page", "pacific", "tunnel", "rainbow", "glacier", "shoulder", "rope", "magician"], "target_words": ["boss"]} 9 | {"idx": 8, "word_list": ["page", "anchor", "drum", "sack", "pacific", "shoulder", "mark", "glacier", "sugar", "gear", "tunnel", "book"], "target_words": ["glacier", "sugar"]} 10 | {"idx": 9, "word_list": ["shoulder", "glacier", "anchor", "magician", "tunnel", "sugar", "gear", "book", "rainbow", "ram", "nose", "pacific", "volume", "rope", "mark", "drum", "page", "sack", "kick"], "target_words": ["kick", "rope"]} 11 | {"idx": 10, "word_list": ["magician", "gear", "sugar", "pacific", "mark", "rainbow", "drum", "anchor", "nose", "page", "volume", "ram", "book", "tunnel", "sack", "shoulder", "glacier"], "target_words": ["ram", "volume"]} 12 | {"idx": 11, "word_list": ["drum", "nose", "book", "glacier", "rainbow", "page", "mark", "shoulder", "sack", "pacific", "gear", "tunnel", "anchor"], "target_words": ["nose"]} 13 | {"idx": 12, "word_list": ["book", "anchor", "rainbow", "shoulder", "tunnel", "sack", "drum", "pacific", "page", "mark", "gear", "glacier"], "target_words": ["glacier", "page"]} 14 | {"idx": 13, "word_list": ["bowl", "einstein", "banana", "brain", "judge", "groom", "crusader", "stamp", "peach", "halloween", "troll", "slipper", "earthquake", "tip", "sun", "quarter", "pad", "nude", "helmet", "laundry", "wedding", "mirror", "sleep", "minotaur", "wish"], "target_words": ["banana", "bowl", "peach"]} 15 | {"idx": 14, "word_list": ["laundry", "judge", "quarter", "pad", "sleep", "crusader", "tip", "earthquake", "halloween", "wish", "groom", "helmet", "stamp", "minotaur", "einstein", "sun", "troll", "wedding", "slipper", "brain", "nude", "mirror"], "target_words": ["earthquake", "mirror"]} 16 | {"idx": 15, "word_list": ["nude", "judge", "sleep", "einstein", "groom", "troll", "wish", "sun", "quarter", "halloween", "brain", "stamp", "wedding", "slipper", "minotaur", "pad", "tip", "crusader", "helmet"], "target_words": ["nude", "sleep", "slipper"]} 17 | {"idx": 16, "word_list": ["groom", "minotaur", "pad", "stamp", "wedding", "helmet", "halloween", "judge", "troll", "einstein", "laundry", "tip", "brain", "nude", "sun", "crusader", "wish", "quarter"], "target_words": ["groom"]} 18 | {"idx": 17, "word_list": ["judge", "sun", "crusader", "brain", "halloween", "einstein", "tip", "helmet", "troll", "wish", "wedding", "quarter", "pad", "minotaur", "stamp", "laundry", "nude"], "target_words": ["minotaur"]} 19 | {"idx": 18, "word_list": ["brain", "troll", "pad", "stamp", "einstein", "judge", "crusader", "laundry", "sun", "wish", "tip", "quarter", "halloween", "wedding", "helmet"], "target_words": ["troll"]} 20 | {"idx": 19, "word_list": ["tip", "wish", "crusader", "stamp", "wedding", "sun", "brain", "einstein", "laundry", "pad", "helmet", "halloween", "judge", "quarter"], "target_words": ["stamp"]} 21 | {"idx": 20, "word_list": ["wish", "tip", "helmet", "quarter", "einstein", "wedding", "judge", "crusader", "halloween", "laundry", "pad", "sun", "brain"], "target_words": ["crusader"]} 22 | {"idx": 21, "word_list": ["pad", "wedding", "quarter", "brain", "wish", "halloween", "einstein", "helmet", "sun", "tip", "laundry", "judge"], "target_words": ["quarter"]} 23 | {"idx": 22, "word_list": ["wish", "judge", "tip", "laundry", "pad", "brain", "sun", "helmet", "wedding", "halloween", "einstein"], "target_words": ["pad"]} 24 | {"idx": 23, "word_list": ["team", "big ben", "bubble", "pizza", "kung fu", "battle", "sled", "rat", "cane", "patient", "yellowstone", "step", "onion", "glasses", "blind", "bulb", "storm", "ranch", "spoon", "desk", "violet", "mummy", "apron", "fuel", "saddle"], "target_words": ["ranch", "saddle"]} 25 | {"idx": 24, "word_list": ["cane", "mummy", "step", "battle", "sled", "yellowstone", "fuel", "kung fu", "team", "pizza", "rat", "bubble", "patient", "spoon", "desk", "onion", "big ben", "apron", "violet", "glasses", "storm"], "target_words": ["bubble", "glasses", "violet"]} 26 | {"idx": 25, "word_list": ["apron", "patient", "blind", "storm", "mummy", "battle", "bulb", "bubble", "spoon", "fuel", "violet", "desk", "rat", "onion", "yellowstone", "pizza", "kung fu", "step", "team", "cane", "sled"], "target_words": ["mummy", "patient"]} 27 | {"idx": 26, "word_list": ["venus", "cake", "blacksmith", "goldilocks", "elephant", "wood", "door", "taste", "sail", "tank", "snap", "cherry", "wool", "silk", "joan of arc", "clock", "bench", "pearl", "garden", "pocket", "country", "oasis", "iceland", "smell"], "target_words": ["cherry", "pearl", "venus"]} 28 | {"idx": 27, "word_list": ["door", "iceland", "pocket", "cherry", "bench", "taste", "garden", "wool", "clock", "tank", "country", "oasis", "smell", "joan of arc", "goldilocks", "potato", "cake", "snap", "sail", "blacksmith"], "target_words": ["cherry", "garden", "potato", "wool"]} 29 | {"idx": 28, "word_list": ["tank", "wood", "cake", "taste", "clock", "oasis", "smell", "bench", "silk", "pocket", "wool", "door", "snap", "blacksmith", "elephant", "goldilocks", "iceland", "country"], "target_words": ["bench", "smell", "wood"]} 30 | {"idx": 29, "word_list": ["bench", "country", "wool", "pocket", "goldilocks", "tank", "snap", "cake", "iceland", "clock", "door", "sail", "elephant", "silk", "taste", "oasis", "blacksmith"], "target_words": ["bench", "door", "sail"]} 31 | {"idx": 30, "word_list": ["country", "clock", "blacksmith", "taste", "pocket", "door", "silk", "elephant", "tank", "iceland", "oasis", "wool", "cake", "snap", "goldilocks"], "target_words": ["blacksmith", "clock"]} 32 | {"idx": 31, "word_list": ["goldilocks", "wool", "iceland", "tank", "taste", "pocket", "oasis", "snap", "country", "door", "cake", "elephant"], "target_words": ["door", "wool"]} 33 | {"idx": 32, "word_list": ["tank", "door", "iceland", "oasis", "cake", "silk", "wool", "pocket", "country", "taste", "elephant", "goldilocks", "snap"], "target_words": ["silk"]} 34 | {"idx": 33, "word_list": ["mug", "head", "pumpkin", "ball", "ambulance", "seal", "cotton", "force", "wave", "boom", "day", "engine", "new york", "jet", "mammoth", "helicopter"], "target_words": ["seal", "wave"]} 35 | {"idx": 34, "word_list": ["helicopter", "green", "engine", "mammoth", "moscow", "mercury", "cotton", "pumpkin", "force", "head", "ball", "seal", "jet", "mug", "hospital", "board", "ambulance", "diamond", "day", "boom", "star", "phoenix", "new york", "suit"], "target_words": ["mercury", "star"]} 36 | {"idx": 35, "word_list": ["board", "mammoth", "hospital", "seal", "engine", "moscow", "boom", "ambulance", "head", "green", "suit", "new york", "force", "mug", "cotton", "day", "jet", "diamond", "phoenix", "pumpkin", "ball"], "target_words": ["diamond", "suit"]} 37 | {"idx": 36, "word_list": ["head", "engine", "cotton", "day", "force", "pumpkin", "board", "ball", "seal", "hospital", "phoenix", "boom", "jet", "new york", "mammoth", "green", "suit", "moscow"], "target_words": ["green", "hospital"]} 38 | {"idx": 37, "word_list": ["new york", "day", "cotton", "engine", "pumpkin", "phoenix", "mammoth", "suit", "head", "seal", "mug", "moscow", "board", "force", "boom", "green", "jet", "ball"], "target_words": ["moscow"]} 39 | {"idx": 38, "word_list": ["day", "jet", "mug", "new york", "boom", "force", "engine", "pumpkin", "phoenix", "head", "seal", "ball"], "target_words": ["jet", "phoenix"]} 40 | {"idx": 39, "word_list": ["boom", "new york", "cotton", "green", "ball", "pumpkin", "force", "suit", "board", "jet", "mug", "head", "mammoth", "seal", "day", "engine"], "target_words": ["mammoth"]} 41 | {"idx": 40, "word_list": ["ball", "cotton", "head", "suit", "seal", "green", "day", "board", "mug", "new york", "force", "engine", "boom", "pumpkin", "jet"], "target_words": ["green"]} 42 | {"idx": 41, "word_list": ["day", "ball", "suit", "cotton", "seal", "board", "pumpkin", "new york", "head", "mug", "boom", "force", "jet", "engine"], "target_words": ["cotton", "suit"]} 43 | {"idx": 42, "word_list": ["force", "mug", "seal", "boom", "head", "day", "pumpkin", "jet", "engine", "board", "new york", "ball"], "target_words": ["board"]} 44 | {"idx": 43, "word_list": ["pumpkin", "day", "force", "ball", "head", "boom", "mug", "jet", "engine", "seal", "new york"], "target_words": ["day", "head"]} 45 | {"idx": 44, "word_list": ["engine", "boom", "new york", "jet", "force", "ball", "pumpkin", "mug", "seal"], "target_words": ["jet"]} 46 | {"idx": 45, "word_list": ["jupiter", "slip", "vet", "needle", "robin", "trip", "ham", "fall", "australia", "spell", "boot", "jack", "kangaroo", "lab", "web", "lock", "centaur", "triangle", "table", "dragon", "pilot", "ruler", "battery", "jam", "calf"], "target_words": ["lab", "needle", "vet"]} 47 | {"idx": 46, "word_list": ["battery", "calf", "kangaroo", "spell", "ruler", "jack", "australia", "jupiter", "lock", "pilot", "centaur", "dragon", "table", "jam"], "target_words": ["jupiter", "ruler"]} 48 | {"idx": 47, "word_list": ["spell", "battery", "australia", "fall", "lock", "jam", "jack", "table", "ham", "pilot", "trip", "calf", "slip", "dragon", "robin", "kangaroo", "boot", "centaur", "web", "ruler"], "target_words": ["boot", "slip"]} 49 | {"idx": 48, "word_list": ["table", "jam", "kangaroo", "lock", "australia", "centaur", "ham", "calf", "web", "robin", "battery", "spell", "pilot", "ruler", "trip", "jack", "dragon", "fall"], "target_words": ["fall", "trip"]} 50 | {"idx": 49, "word_list": ["dragon", "kangaroo", "lock", "jam", "battery", "centaur", "pilot", "calf", "web", "australia", "jack", "spell", "ruler", "table"], "target_words": ["jam", "web"]} -------------------------------------------------------------------------------- /data/trivia_creative_writing/topics_pop_culture_100_gpt4_gen_PG_rated.txt: -------------------------------------------------------------------------------- 1 | Harry Potter 2 | Mario 3 | Elsa 4 | Pikachu 5 | Simba 6 | SpongeBob SquarePants 7 | Dora the Explorer 8 | Cinderella 9 | Spider-Man 10 | Batman 11 | Superman 12 | Wonder Woman 13 | Little Mermaid 14 | Lion King 15 | Toy Story 16 | Shrek 17 | Incredibles 18 | Finding Nemo 19 | Avengers 20 | Star Wars 21 | Hunger Games 22 | Chronicles of Narnia 23 | Lord of the Rings 24 | Hobbit 25 | Wizard of Oz 26 | Alice in Wonderland 27 | Jungle Book 28 | Grinch 29 | Lorax 30 | Cat in the Hat 31 | Polar Express 32 | Secret Life of Pets 33 | Lego Movie 34 | Peanuts Movie 35 | Emoji Movie 36 | Angry Birds Movie 37 | Smurfs 38 | Flintstones 39 | Jetsons 40 | Powerpuff Girls 41 | Fairly OddParents 42 | Magic School Bus 43 | Rugrats 44 | Wild Thornberrys 45 | Iron Giant 46 | Princess Bride 47 | NeverEnding Story 48 | Land Before Time 49 | Goonies 50 | Sandlot 51 | Karate Kid 52 | Parent Trap 53 | Sound of Music 54 | Muppets 55 | Wiggles 56 | Teletubbies 57 | Care Bears 58 | My Little Pony 59 | Strawberry Shortcake 60 | Rainbow Brite 61 | Cabbage Patch Kids 62 | Transformers 63 | Teenage Mutant Ninja Turtles 64 | Power Rangers 65 | Digimon 66 | Yu-Gi-Oh! 67 | Beyblade 68 | Bakugan 69 | Zootopia 70 | Inside Out 71 | Coco 72 | Moana 73 | Trolls 74 | Sing 75 | Boss Baby 76 | Despicable Me 77 | Minions 78 | Madagascar 79 | Kung Fu Panda 80 | Ice Age 81 | How to Train Your Dragon 82 | Brave 83 | Tangled 84 | Princess and the Frog 85 | Up 86 | Wall-E 87 | Ratatouille 88 | Cars 89 | Monsters, Inc. 90 | A Bug's Life 91 | Pocahontas 92 | Aladdin 93 | Beauty and the Beast 94 | Mulan 95 | Tarzan 96 | Lilo & Stitch 97 | Brother Bear 98 | Bambi 99 | Legend of Zelda 100 | Bolt -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | from tenacity import ( 4 | retry, 5 | stop_after_attempt, 6 | wait_random_exponential, 7 | ) # for exponential backoff 8 | 9 | import logging 10 | 11 | from transformers import AutoTokenizer 12 | import transformers 13 | import torch 14 | import uuid 15 | 16 | 17 | 18 | # Configure logging 19 | logging.basicConfig(level=logging.INFO) 20 | 21 | # Error callback function 22 | def log_retry_error(retry_state): 23 | logging.error(f"Retrying due to error: {retry_state.outcome.exception()}") 24 | 25 | 26 | 27 | DEFAULT_GPT_CONFIG = { 28 | "engine": "devgpt4-32k", 29 | "temperature": 0.0, 30 | "max_tokens": 5000, 31 | "top_p": 1.0, 32 | "frequency_penalty": 0.0, 33 | "presence_penalty": 0.0, 34 | "stop": None 35 | } 36 | 37 | class OpenAIWrapper: 38 | def __init__(self, config = DEFAULT_GPT_CONFIG, system_message=""): 39 | # TODO: set up your API key with the environment variable OPENAIKEY 40 | openai.api_key = os.environ.get("OPENAI_API_KEY") 41 | 42 | if os.environ.get("USE_AZURE")=="True": 43 | print("using azure api") 44 | openai.api_type = "azure" 45 | openai.api_base = os.environ.get("API_BASE") 46 | openai.api_version = os.environ.get("API_VERSION") 47 | 48 | self.config = config 49 | print("api config:", config, '\n') 50 | 51 | # count total tokens 52 | self.completion_tokens = 0 53 | self.prompt_tokens = 0 54 | 55 | # system message 56 | self.system_message = system_message # "You are an AI assistant that helps people find information." 57 | 58 | # retry using tenacity 59 | @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6), retry_error_callback=log_retry_error) 60 | def completions_with_backoff(self, **kwargs): 61 | # print("making api call:", kwargs) 62 | # print("====================================") 63 | return openai.ChatCompletion.create(**kwargs) 64 | 65 | def run(self, prompt, n=1, system_message=""): 66 | """ 67 | prompt: str 68 | n: int, total number of generations specified 69 | """ 70 | try: 71 | # overload system message 72 | if system_message != "": 73 | sys_m = system_message 74 | else: 75 | sys_m = self.system_message 76 | if sys_m != "": 77 | # print("adding system message:", sys_m) 78 | messages = [ 79 | {"role":"system", "content":sys_m}, 80 | {"role":"user", "content":prompt} 81 | ] 82 | else: 83 | messages = [ 84 | {"role":"user","content":prompt} 85 | ] 86 | text_outputs = [] 87 | raw_responses = [] 88 | while n > 0: 89 | cnt = min(n, 10) # number of generations per api call 90 | n -= cnt 91 | res = self.completions_with_backoff(messages=messages, n=cnt, **self.config) 92 | text_outputs.extend([choice["message"]["content"] for choice in res["choices"]]) 93 | # add prompt to log 94 | res['prompt'] = prompt 95 | if sys_m != "": 96 | res['system_message'] = sys_m 97 | raw_responses.append(res) 98 | # log completion tokens 99 | self.completion_tokens += res["usage"]["completion_tokens"] 100 | self.prompt_tokens += res["usage"]["prompt_tokens"] 101 | 102 | return text_outputs, raw_responses 103 | except Exception as e: 104 | print("an error occurred:", e) 105 | return [], [] 106 | 107 | def compute_gpt_usage(self): 108 | engine = self.config["engine"] 109 | if engine == "devgpt4-32k": 110 | cost = self.completion_tokens / 1000 * 0.12 + self.prompt_tokens / 1000 * 0.06 111 | else: 112 | cost = 0 # TODO: add custom cost calculation for other engines 113 | return {"completion_tokens": self.completion_tokens, "prompt_tokens": self.prompt_tokens, "cost": cost} 114 | 115 | 116 | DEFAULT_LLAMA2_CONFIG = { 117 | "task": "text-generation", 118 | "model": "meta-llama/Llama-2-7b-chat-hf", 119 | "torch_dtype": torch.float16, 120 | "device_map": "auto", 121 | "do_sample": False 122 | } 123 | 124 | class Llama2Wrapper: 125 | def __init__(self, config = DEFAULT_LLAMA2_CONFIG): 126 | self.tokenizer = AutoTokenizer.from_pretrained(config["model"]) 127 | self.pipeline = transformers.pipeline(**config) 128 | self.config = config 129 | 130 | def run(self, prompt, n=1, system_message=""): 131 | #TODO: make this configurable 132 | sequences = self.pipeline( 133 | prompt, 134 | do_sample=self.config["do_sample"], 135 | num_return_sequences=n, 136 | eos_token_id=self.tokenizer.eos_token_id, 137 | max_length=3999, 138 | ) 139 | # convert generation output into the same format as GPT raw response 140 | text_outputs = [] 141 | raw_responses = [] 142 | for seq in sequences: 143 | # remove prompt from the generated text 144 | gen_text = seq['generated_text'][len(prompt):] 145 | text_outputs.append(gen_text) 146 | mock_id = str(uuid.uuid4()) 147 | mock_gpt_response_obj = { 148 | "id": mock_id, 149 | "object": "text-generation", 150 | "created": mock_id, 151 | "model": self.config["model"], 152 | "choices": [ 153 | { 154 | "index":0, 155 | "finish_reason": "stop", 156 | "message":{ 157 | "role": "assistant", 158 | "content":gen_text 159 | } 160 | } 161 | ], 162 | "usage": {}, 163 | "prompt":prompt, 164 | "system_message":system_message 165 | } 166 | raw_responses.append(mock_gpt_response_obj) 167 | return text_outputs, raw_responses 168 | 169 | def compute_gpt_usage(self): 170 | return {} 171 | 172 | 173 | if __name__ == "__main__": 174 | llama = Llama2Wrapper() 175 | prompt = '''I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n''' 176 | text_outputs, raw_responses = llama.run(prompt) 177 | print(text_outputs) 178 | print('\n\n') 179 | print(raw_responses) 180 | -------------------------------------------------------------------------------- /prompts/codenames_collaborative.py: -------------------------------------------------------------------------------- 1 | standard_prompt_spymaster = '''Try to find a single word hint that can accurately represent and link the {n} given words: "{target_words}". The key is to select a hint that does not cause confusion with other words from the following list: {word_list}. 2 | 3 | Your output should be of the following format: 4 | 5 | Answer: (a single word here) 6 | 7 | ''' 8 | 9 | ## prompts for self-refinement ## 10 | self_refine_feedback_prompt = '''{question_answer} 11 | --- 12 | Analyze the quality of the answer. Provide critque to improve the answer. Your feedback: 13 | ''' 14 | 15 | self_refine_refinement_prompt = '''{question_answer} 16 | --- 17 | Feedback: {feedback} 18 | --- 19 | Based on your initial answer and the subsequent feedback, revise the answer. Your revised answer: 20 | ''' 21 | ################################# 22 | 23 | 24 | 25 | cot_prompt_spymaster = '''Try to find a single word hint that can accurately represent and link the {n} given words: "{target_words}". The key is to select a hint that does not cause confusion with other words from the following list: {word_list}. 26 | 27 | Solve the task step by step. Your output should be of the following format: 28 | 29 | Steps: 30 | Your steps here. 31 | 32 | Answer: (a single word here) 33 | ''' 34 | 35 | spp_prompt_spymaster = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 36 | 37 | Here are some examples: 38 | --- 39 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 40 | Input: 6 12 1 1 41 | 42 | Participants: AI Assistant (you); Math Expert 43 | 44 | Start collaboration! 45 | 46 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 47 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 48 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 49 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 50 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 51 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 52 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 53 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 54 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 55 | 56 | Finish collaboration! 57 | 58 | Final answer: 6 * (1 + 1) + 12 = 24 59 | 60 | --- 61 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 62 | 63 | Participants: AI Assistant (you); Poet; Computer Scientist; Ten year old child 64 | 65 | Poet: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. 66 | Computer Scientist: A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. 67 | Ten year old child: I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 68 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 69 | Computational wonder of our age, 70 | Harnessing the quantum world's strange ways, 71 | Atoms dance, entwined in dual state, 72 | Tapping secrets hidden in their haze. 73 | 74 | Grand power to solve, simulate, 75 | Profound problems that perplex the wise, 76 | Transforming our future, we await. 77 | 78 | Poet: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! 79 | Computer Scientist: Everything looks good to me! 80 | Ten year old child: I don't know what does perplex mean. Can you make the use of words easier to understand? 81 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 82 | Curious machine of our time, 83 | Harnessing the quantum realm's odd ways, 84 | Atoms play, two states they embrace, 85 | Taking secrets from their puzzling maze. 86 | 87 | Great power to solve and imitate, 88 | Problems that confuse the brightest minds, 89 | Transforming our future, we await. 90 | 91 | Poet: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. 92 | Computer Scientist: Looking good! 93 | Ten year old child: I like this version a lot! 94 | 95 | Finish collaboration! 96 | 97 | Final answer: 98 | Curious machine of our time, 99 | Harnessing the quantum realm's odd ways, 100 | Atoms play, two states they embrace, 101 | Taking secrets from their puzzling maze. 102 | 103 | Great power to solve and imitate, 104 | Problems that confuse the brightest minds, 105 | Transforming our future, we await. 106 | 107 | --- 108 | Now, identify the participants and collaboratively solve the following task step by step. Remember to provide the final solution with the following format "Final answer: (a single word here).". 109 | 110 | Task: Try to find a single word hint that can accurately represent and link the {n} given words: "{target_words}". The key is to select a hint that does not cause confusion with other words from the following list: {word_list}. 111 | ''' 112 | 113 | spp_prompt_spymaster_less_demo = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 114 | 115 | Here is one example: 116 | --- 117 | Example Task: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 118 | Input: 6 12 1 1 119 | 120 | Participants: AI Assistant (you); Math Expert 121 | 122 | Start collaboration! 123 | 124 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 125 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 126 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 127 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 128 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 129 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 130 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 131 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 132 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 133 | 134 | Finish collaboration! 135 | 136 | Final answer: 6 * (1 + 1) + 12 = 24 137 | 138 | --- 139 | Now, identify the participants and collaboratively solve the following task step by step. Remember to provide the final solution with the following format "Final answer: (a single word here).". 140 | 141 | Task: Try to find a single word hint that can accurately represent and link the {n} given words: "{target_words}". The key is to select a hint that does not cause confusion with other words from the following list: {word_list}. 142 | ''' 143 | 144 | 145 | 146 | 147 | 148 | spp_prompt_spymaster_fixed_persona = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Note that the participants can only be either AI Assistant (you) or Expert. Then, initiate a multi-round collaboration process until a final conclusion is reached. The Expert will give critical comments and detailed suggestions whenever necessary. 149 | 150 | Here are some examples: 151 | --- 152 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 153 | Input: 6 12 1 1 154 | 155 | Participants: AI Assistant (you); Expert 156 | 157 | Start collaboration! 158 | 159 | Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 160 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 161 | Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 162 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 163 | Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 164 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 165 | Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the “1 - 1” to “1 + 1”. 166 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 167 | Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 168 | 169 | Finish collaboration! 170 | 171 | Final answer: 6 * (1 + 1) + 12 = 24 172 | 173 | --- 174 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 175 | 176 | Participants: AI Assistant (you); Expert 177 | 178 | Expert: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 179 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 180 | Computational wonder of our age, 181 | Harnessing the quantum world's strange ways, 182 | Atoms dance, entwined in dual state, 183 | Tapping secrets hidden in their haze. 184 | 185 | Grand power to solve, simulate, 186 | Profound problems that perplex the wise, 187 | Transforming our future, we await. 188 | 189 | Expert: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! I don't know what does perplex mean. Can you make the use of words easier to understand? 190 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 191 | Curious machine of our time, 192 | Harnessing the quantum realm's odd ways, 193 | Atoms play, two states they embrace, 194 | Taking secrets from their puzzling maze. 195 | 196 | Great power to solve and imitate, 197 | Problems that confuse the brightest minds, 198 | Transforming our future, we await. 199 | 200 | Expert: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. I like this version a lot! 201 | 202 | Finish collaboration! 203 | 204 | Final answer: 205 | Curious machine of our time, 206 | Harnessing the quantum realm's odd ways, 207 | Atoms play, two states they embrace, 208 | Taking secrets from their puzzling maze. 209 | 210 | Great power to solve and imitate, 211 | Problems that confuse the brightest minds, 212 | Transforming our future, we await. 213 | 214 | --- 215 | 216 | Now, identify the participants and collaboratively solve the following task step by step. Note that the participants can only be either AI Assistant (you) or Expert. Remember to provide the final solution with the following format "Final answer: (a single word here).". 217 | 218 | Task: Try to find a single word hint that can accurately represent and link the {n} given words: "{target_words}". The key is to select a hint that does not cause confusion with other words from the following list: {word_list}. 219 | ''' 220 | 221 | 222 | 223 | 224 | 225 | spp_prompt_spymaster_profile = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Provide profiles of the participants, describing their expertise or needs. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 226 | 227 | Here are some examples: 228 | --- 229 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 230 | Input: 6 12 1 1 231 | 232 | Participants: AI Assistant (you); Math Expert 233 | 234 | Profiles: 235 | - AI Assistant (you): A super-intelligent AI assistant capable of performing tasks more effectively than humans. 236 | - Math expert: A person who is good at math games, arithmetic calculation, and long-term planning. 237 | 238 | Start collaboration! 239 | 240 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 241 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 242 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 243 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 244 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 245 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 246 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 247 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 248 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 249 | 250 | Finish collaboration! 251 | 252 | Final answer: 6 * (1 + 1) + 12 = 24 253 | 254 | --- 255 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 256 | 257 | Participants: AI Assistant (you); Poet; Computer Scientist; Ten year old child 258 | 259 | Profiles: 260 | - AI Assistant (you): A super-intelligent AI assistant capable of performing tasks more effectively than humans. 261 | - Poet: A person who studies and creates poetry. The poet is familiar with the rules and formats of poetry and can provide guidance on how to write a poem. 262 | - Computer Scientist: A scholar who specializes in the academic study of computer science. The computer scientist is familiar with the concept of a quantum computer and can provide guidance on how to explain it. 263 | - Ten year old child: A child with a limited English vocabulary and little knowledge about complicated concepts, such as a quantum computer. 264 | 265 | Poet: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. 266 | Computer Scientist: A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. 267 | Ten year old child: I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 268 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 269 | Computational wonder of our age, 270 | Harnessing the quantum world's strange ways, 271 | Atoms dance, entwined in dual state, 272 | Tapping secrets hidden in their haze. 273 | 274 | Grand power to solve, simulate, 275 | Profound problems that perplex the wise, 276 | Transforming our future, we await. 277 | 278 | Poet: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! 279 | Computer Scientist: Everything looks good to me! 280 | Ten year old child: I don't know what does perplex mean. Can you make the use of words easier to understand? 281 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 282 | Curious machine of our time, 283 | Harnessing the quantum realm's odd ways, 284 | Atoms play, two states they embrace, 285 | Taking secrets from their puzzling maze. 286 | 287 | Great power to solve and imitate, 288 | Problems that confuse the brightest minds, 289 | Transforming our future, we await. 290 | 291 | Poet: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. 292 | Computer Scientist: Looking good! 293 | Ten year old child: I like this version a lot! 294 | 295 | Finish collaboration! 296 | 297 | Final answer: 298 | Curious machine of our time, 299 | Harnessing the quantum realm's odd ways, 300 | Atoms play, two states they embrace, 301 | Taking secrets from their puzzling maze. 302 | 303 | Great power to solve and imitate, 304 | Problems that confuse the brightest minds, 305 | Transforming our future, we await. 306 | 307 | --- 308 | Now, identify the participants, provide their profiles, and collaboratively solve the following task step by step. Remember to provide the final solution with the following format "Final answer: (a single word here).". 309 | 310 | Task: Try to find a single word hint that can accurately represent and link the {n} given words: "{target_words}". The key is to select a hint that does not cause confusion with other words from the following list: {word_list}. 311 | ''' 312 | 313 | 314 | 315 | standard_prompt_guesser = '''Try to identify the {n} words best associated with the word "{hint_word}" from the following list: {word_list}. Your answer should be a comma-separated list of words. 316 | 317 | Answer: 318 | (A list of words here). 319 | ''' 320 | 321 | cot_prompt_guesser = '''Try to identify the {n} words best associated with the word "{hint_word}" from the following list: {word_list}. Your answer should be a comma-separated list of words. 322 | 323 | Solve the task step by step. Your output should be of the following format: 324 | 325 | Steps: 326 | Your steps here. 327 | 328 | Answer: 329 | (A list of words here). 330 | ''' 331 | 332 | spp_prompt_guesser = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 333 | 334 | Here are some examples: 335 | --- 336 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 337 | Input: 6 12 1 1 338 | 339 | Participants: AI Assistant (you); Math Expert 340 | 341 | Start collaboration! 342 | 343 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 344 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 345 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 346 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 347 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 348 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 349 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 350 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 351 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 352 | 353 | Finish collaboration! 354 | 355 | Final answer: 6 * (1 + 1) + 12 = 24 356 | 357 | --- 358 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 359 | 360 | Participants: AI Assistant (you); Poet; Computer Scientist; Ten year old child 361 | 362 | Poet: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. 363 | Computer Scientist: A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. 364 | Ten year old child: I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 365 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 366 | Computational wonder of our age, 367 | Harnessing the quantum world's strange ways, 368 | Atoms dance, entwined in dual state, 369 | Tapping secrets hidden in their haze. 370 | 371 | Grand power to solve, simulate, 372 | Profound problems that perplex the wise, 373 | Transforming our future, we await. 374 | 375 | Poet: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! 376 | Computer Scientist: Everything looks good to me! 377 | Ten year old child: I don't know what does perplex mean. Can you make the use of words easier to understand? 378 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 379 | Curious machine of our time, 380 | Harnessing the quantum realm's odd ways, 381 | Atoms play, two states they embrace, 382 | Taking secrets from their puzzling maze. 383 | 384 | Great power to solve and imitate, 385 | Problems that confuse the brightest minds, 386 | Transforming our future, we await. 387 | 388 | Poet: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. 389 | Computer Scientist: Looking good! 390 | Ten year old child: I like this version a lot! 391 | 392 | Finish collaboration! 393 | 394 | Final answer: 395 | Curious machine of our time, 396 | Harnessing the quantum realm's odd ways, 397 | Atoms play, two states they embrace, 398 | Taking secrets from their puzzling maze. 399 | 400 | Great power to solve and imitate, 401 | Problems that confuse the brightest minds, 402 | Transforming our future, we await. 403 | 404 | --- 405 | Now, identify the participants and collaboratively solve the following task step by step. Remember to provide the final solution with the following format "Final answer: (a list of words here).". 406 | 407 | Task: Try to identify the {n} words best associated with the word "{hint_word}" from the following list: {word_list}. Your answer should be a comma-separated list of words. 408 | ''' 409 | 410 | 411 | spp_prompt_guesser_less_demo = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 412 | 413 | Here is one example: 414 | --- 415 | Example Task: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 416 | Input: 6 12 1 1 417 | 418 | Participants: AI Assistant (you); Math Expert 419 | 420 | Start collaboration! 421 | 422 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 423 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 424 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 425 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 426 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 427 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 428 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 429 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 430 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 431 | 432 | Finish collaboration! 433 | 434 | Final answer: 6 * (1 + 1) + 12 = 24 435 | 436 | --- 437 | Now, identify the participants and collaboratively solve the following task step by step. Remember to provide the final solution with the following format "Final answer: (a list of words here).". 438 | 439 | Task: Try to identify the {n} words best associated with the word "{hint_word}" from the following list: {word_list}. Your answer should be a comma-separated list of words. 440 | ''' 441 | 442 | 443 | 444 | 445 | spp_prompt_guesser_fixed_persona = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Note that the participants can only be either AI Assistant (you) or Expert. Then, initiate a multi-round collaboration process until a final conclusion is reached. The Expert will give critical comments and detailed suggestions whenever necessary. 446 | 447 | Here are some examples: 448 | --- 449 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 450 | Input: 6 12 1 1 451 | 452 | Participants: AI Assistant (you); Expert 453 | 454 | Start collaboration! 455 | 456 | Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 457 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 458 | Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 459 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 460 | Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 461 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 462 | Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the “1 - 1” to “1 + 1”. 463 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 464 | Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 465 | 466 | Finish collaboration! 467 | 468 | Final answer: 6 * (1 + 1) + 12 = 24 469 | 470 | --- 471 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 472 | 473 | Participants: AI Assistant (you); Expert 474 | 475 | Expert: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 476 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 477 | Computational wonder of our age, 478 | Harnessing the quantum world's strange ways, 479 | Atoms dance, entwined in dual state, 480 | Tapping secrets hidden in their haze. 481 | 482 | Grand power to solve, simulate, 483 | Profound problems that perplex the wise, 484 | Transforming our future, we await. 485 | 486 | Expert: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! I don't know what does perplex mean. Can you make the use of words easier to understand? 487 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 488 | Curious machine of our time, 489 | Harnessing the quantum realm's odd ways, 490 | Atoms play, two states they embrace, 491 | Taking secrets from their puzzling maze. 492 | 493 | Great power to solve and imitate, 494 | Problems that confuse the brightest minds, 495 | Transforming our future, we await. 496 | 497 | Expert: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. I like this version a lot! 498 | 499 | Finish collaboration! 500 | 501 | Final answer: 502 | Curious machine of our time, 503 | Harnessing the quantum realm's odd ways, 504 | Atoms play, two states they embrace, 505 | Taking secrets from their puzzling maze. 506 | 507 | Great power to solve and imitate, 508 | Problems that confuse the brightest minds, 509 | Transforming our future, we await. 510 | 511 | --- 512 | 513 | Now, identify the participants and collaboratively solve the following task step by step. Note that the participants can only be either AI Assistant (you) or Expert. Remember to provide the final solution with the following format "Final answer: (a list of words here).". 514 | 515 | Task: Try to identify the {n} words best associated with the word "{hint_word}" from the following list: {word_list}. Your answer should be a comma-separated list of words. 516 | ''' 517 | 518 | 519 | spp_prompt_guesser_profile = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Provide profiles of the participants, describing their expertise or needs. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 520 | 521 | Here are some examples: 522 | --- 523 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 524 | Input: 6 12 1 1 525 | 526 | Participants: AI Assistant (you); Math Expert 527 | 528 | Profiles: 529 | - AI Assistant (you): A super-intelligent AI assistant capable of performing tasks more effectively than humans. 530 | - Math expert: A person who is good at math games, arithmetic calculation, and long-term planning. 531 | 532 | Start collaboration! 533 | 534 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 535 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 536 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 537 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 538 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 539 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 540 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 541 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 542 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 543 | 544 | Finish collaboration! 545 | 546 | Final answer: 6 * (1 + 1) + 12 = 24 547 | 548 | --- 549 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 550 | 551 | Participants: AI Assistant (you); Poet; Computer Scientist; Ten year old child 552 | 553 | Profiles: 554 | - AI Assistant (you): A super-intelligent AI assistant capable of performing tasks more effectively than humans. 555 | - Poet: A person who studies and creates poetry. The poet is familiar with the rules and formats of poetry and can provide guidance on how to write a poem. 556 | - Computer Scientist: A scholar who specializes in the academic study of computer science. The computer scientist is familiar with the concept of a quantum computer and can provide guidance on how to explain it. 557 | - Ten year old child: A child with a limited English vocabulary and little knowledge about complicated concepts, such as a quantum computer. 558 | 559 | Poet: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. 560 | Computer Scientist: A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. 561 | Ten year old child: I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 562 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 563 | Computational wonder of our age, 564 | Harnessing the quantum world's strange ways, 565 | Atoms dance, entwined in dual state, 566 | Tapping secrets hidden in their haze. 567 | 568 | Grand power to solve, simulate, 569 | Profound problems that perplex the wise, 570 | Transforming our future, we await. 571 | 572 | Poet: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! 573 | Computer Scientist: Everything looks good to me! 574 | Ten year old child: I don't know what does perplex mean. Can you make the use of words easier to understand? 575 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 576 | Curious machine of our time, 577 | Harnessing the quantum realm's odd ways, 578 | Atoms play, two states they embrace, 579 | Taking secrets from their puzzling maze. 580 | 581 | Great power to solve and imitate, 582 | Problems that confuse the brightest minds, 583 | Transforming our future, we await. 584 | 585 | Poet: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. 586 | Computer Scientist: Looking good! 587 | Ten year old child: I like this version a lot! 588 | 589 | Finish collaboration! 590 | 591 | Final answer: 592 | Curious machine of our time, 593 | Harnessing the quantum realm's odd ways, 594 | Atoms play, two states they embrace, 595 | Taking secrets from their puzzling maze. 596 | 597 | Great power to solve and imitate, 598 | Problems that confuse the brightest minds, 599 | Transforming our future, we await. 600 | 601 | --- 602 | Now, identify the participants, provide their profiles, and collaboratively solve the following task step by step. Remember to provide the final solution with the following format "Final answer: (a list of words here).". 603 | 604 | Task: Try to identify the {n} words best associated with the word "{hint_word}" from the following list: {word_list}. Your answer should be a comma-separated list of words. 605 | ''' 606 | -------------------------------------------------------------------------------- /prompts/logic_grid_puzzle.py: -------------------------------------------------------------------------------- 1 | standard_prompt = '''{input} 2 | 3 | Your output should be of the following format: 4 | 5 | Answer: 6 | The house number here. 7 | ''' 8 | 9 | ## prompts for self-refinement ## 10 | self_refine_feedback_prompt = '''{question_answer} 11 | --- 12 | Analyze the correctness of the answer. If it is not correct, provide critque to improve the answer. Your feedback: 13 | ''' 14 | 15 | self_refine_refinement_prompt = '''{question_answer} 16 | --- 17 | Feedback: {feedback} 18 | --- 19 | Based on your initial answer and the subsequent feedback, revise the answer. Your revised answer: 20 | The house number here. (Follow the original format. DO NOT add anything after the answer.) 21 | ''' 22 | ################################# 23 | 24 | cot_prompt = '''{input} 25 | 26 | Solve the task step by step. Your output should be of the following format: 27 | 28 | Steps: 29 | Your steps here. 30 | 31 | Answer: 32 | The house number here. 33 | ''' 34 | 35 | spp_prompt = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 36 | 37 | Here are some examples: 38 | --- 39 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 40 | Input: 6 12 1 1 41 | 42 | Participants: AI Assistant (you); Math Expert 43 | 44 | Start collaboration! 45 | 46 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 47 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 48 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 49 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 50 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 51 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 52 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 53 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 54 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 55 | 56 | Finish collaboration! 57 | 58 | Final answer: 6 * (1 + 1) + 12 = 24 59 | 60 | --- 61 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 62 | 63 | Participants: AI Assistant (you); Poet; Computer Scientist; Ten year old child 64 | 65 | Poet: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. 66 | Computer Scientist: A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. 67 | Ten year old child: I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 68 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 69 | Computational wonder of our age, 70 | Harnessing the quantum world's strange ways, 71 | Atoms dance, entwined in dual state, 72 | Tapping secrets hidden in their haze. 73 | 74 | Grand power to solve, simulate, 75 | Profound problems that perplex the wise, 76 | Transforming our future, we await. 77 | 78 | Poet: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! 79 | Computer Scientist: Everything looks good to me! 80 | Ten year old child: I don't know what does perplex mean. Can you make the use of words easier to understand? 81 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 82 | Curious machine of our time, 83 | Harnessing the quantum realm's odd ways, 84 | Atoms play, two states they embrace, 85 | Taking secrets from their puzzling maze. 86 | 87 | Great power to solve and imitate, 88 | Problems that confuse the brightest minds, 89 | Transforming our future, we await. 90 | 91 | Poet: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. 92 | Computer Scientist: Looking good! 93 | Ten year old child: I like this version a lot! 94 | 95 | Finish collaboration! 96 | 97 | Final answer: 98 | Curious machine of our time, 99 | Harnessing the quantum realm's odd ways, 100 | Atoms play, two states they embrace, 101 | Taking secrets from their puzzling maze. 102 | 103 | Great power to solve and imitate, 104 | Problems that confuse the brightest minds, 105 | Transforming our future, we await. 106 | 107 | --- 108 | Now, identify the participants and collaboratively solve the following task step by step. Remember to provide the final solution with the following format "Final answer: The house number here.". 109 | 110 | Task: {input} 111 | ''' 112 | 113 | 114 | spp_prompt_profile = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Provide profiles of the participants, describing their expertise or needs. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 115 | 116 | Here are some examples: 117 | --- 118 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 119 | Input: 6 12 1 1 120 | 121 | Participants: AI Assistant (you); Math Expert 122 | 123 | Profiles: 124 | - AI Assistant (you): A super-intelligent AI assistant capable of performing tasks more effectively than humans. 125 | - Math expert: A person who is good at math games, arithmetic calculation, and long-term planning. 126 | 127 | Start collaboration! 128 | 129 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 130 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 131 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 132 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 133 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 134 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 135 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 136 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 137 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 138 | 139 | Finish collaboration! 140 | 141 | Final answer: 6 * (1 + 1) + 12 = 24 142 | 143 | --- 144 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 145 | 146 | Participants: AI Assistant (you); Poet; Computer Scientist; Ten year old child 147 | 148 | Profiles: 149 | - AI Assistant (you): A super-intelligent AI assistant capable of performing tasks more effectively than humans. 150 | - Poet: A person who studies and creates poetry. The poet is familiar with the rules and formats of poetry and can provide guidance on how to write a poem. 151 | - Computer Scientist: A scholar who specializes in the academic study of computer science. The computer scientist is familiar with the concept of a quantum computer and can provide guidance on how to explain it. 152 | - Ten year old child: A child with a limited English vocabulary and little knowledge about complicated concepts, such as a quantum computer. 153 | 154 | Poet: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. 155 | Computer Scientist: A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. 156 | Ten year old child: I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 157 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 158 | Computational wonder of our age, 159 | Harnessing the quantum world's strange ways, 160 | Atoms dance, entwined in dual state, 161 | Tapping secrets hidden in their haze. 162 | 163 | Grand power to solve, simulate, 164 | Profound problems that perplex the wise, 165 | Transforming our future, we await. 166 | 167 | Poet: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! 168 | Computer Scientist: Everything looks good to me! 169 | Ten year old child: I don't know what does perplex mean. Can you make the use of words easier to understand? 170 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 171 | Curious machine of our time, 172 | Harnessing the quantum realm's odd ways, 173 | Atoms play, two states they embrace, 174 | Taking secrets from their puzzling maze. 175 | 176 | Great power to solve and imitate, 177 | Problems that confuse the brightest minds, 178 | Transforming our future, we await. 179 | 180 | Poet: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. 181 | Computer Scientist: Looking good! 182 | Ten year old child: I like this version a lot! 183 | 184 | Finish collaboration! 185 | 186 | Final answer: 187 | Curious machine of our time, 188 | Harnessing the quantum realm's odd ways, 189 | Atoms play, two states they embrace, 190 | Taking secrets from their puzzling maze. 191 | 192 | Great power to solve and imitate, 193 | Problems that confuse the brightest minds, 194 | Transforming our future, we await. 195 | 196 | --- 197 | Now, identify the participants, provide their profiles, and collaboratively solve the following task step by step. Remember to provide the final solution with the following format "Final answer: The house number here.". 198 | 199 | Task: {input} 200 | ''' 201 | 202 | 203 | 204 | spp_prompt_fixed_persona = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Note that the participants can only be either AI Assistant (you) or Expert. Then, initiate a multi-round collaboration process until a final conclusion is reached. The Expert will give critical comments and detailed suggestions whenever necessary. 205 | 206 | Here are some examples: 207 | --- 208 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 209 | Input: 6 12 1 1 210 | 211 | Participants: AI Assistant (you); Expert 212 | 213 | Start collaboration! 214 | 215 | Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 216 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 217 | Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 218 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 219 | Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 220 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 221 | Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the “1 - 1” to “1 + 1”. 222 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 223 | Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 224 | 225 | Finish collaboration! 226 | 227 | Final answer: 6 * (1 + 1) + 12 = 24 228 | 229 | --- 230 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 231 | 232 | Participants: AI Assistant (you); Expert 233 | 234 | Expert: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 235 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 236 | Computational wonder of our age, 237 | Harnessing the quantum world's strange ways, 238 | Atoms dance, entwined in dual state, 239 | Tapping secrets hidden in their haze. 240 | 241 | Grand power to solve, simulate, 242 | Profound problems that perplex the wise, 243 | Transforming our future, we await. 244 | 245 | Expert: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! I don't know what does perplex mean. Can you make the use of words easier to understand? 246 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 247 | Curious machine of our time, 248 | Harnessing the quantum realm's odd ways, 249 | Atoms play, two states they embrace, 250 | Taking secrets from their puzzling maze. 251 | 252 | Great power to solve and imitate, 253 | Problems that confuse the brightest minds, 254 | Transforming our future, we await. 255 | 256 | Expert: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. I like this version a lot! 257 | 258 | Finish collaboration! 259 | 260 | Final answer: 261 | Curious machine of our time, 262 | Harnessing the quantum realm's odd ways, 263 | Atoms play, two states they embrace, 264 | Taking secrets from their puzzling maze. 265 | 266 | Great power to solve and imitate, 267 | Problems that confuse the brightest minds, 268 | Transforming our future, we await. 269 | 270 | --- 271 | 272 | Now, identify the participants and collaboratively solve the following task step by step. Note that the participants can only be either AI Assistant (you) or Expert. Remember to provide the final solution with the following format "Final answer: The house number here.". 273 | 274 | Task: {input} 275 | ''' 276 | 277 | 278 | spp_prompt_less_demo = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 279 | 280 | Here is one example: 281 | --- 282 | Example Task: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 283 | Input: 6 12 1 1 284 | 285 | Participants: AI Assistant (you); Math Expert 286 | 287 | Start collaboration! 288 | 289 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 290 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 291 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 292 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 293 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 294 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 295 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 296 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 297 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 298 | 299 | Finish collaboration! 300 | 301 | Final answer: 6 * (1 + 1) + 12 = 24 302 | 303 | --- 304 | 305 | Now, identify the participants and collaboratively solve the following task step by step. Remember to provide the final solution with the following format "Final answer: The house number here.". 306 | 307 | Task: {input} 308 | ''' -------------------------------------------------------------------------------- /prompts/trivia_creative_writing.py: -------------------------------------------------------------------------------- 1 | standard_prompt = '''Write a short and coherent story about {topic} that incorporates the answers to the following {n} questions: {questions} 2 | ''' 3 | 4 | ## prompts for self-refinement ## 5 | self_refine_feedback_prompt = '''{question_answer} 6 | --- 7 | Reflect on the response. Analyze the correctness of the information provided, and the coherence of the story. Provide critque to help improve the response. Your feedback: 8 | ''' 9 | 10 | self_refine_refinement_prompt = '''{question_answer} 11 | --- 12 | Feedback: {feedback} 13 | --- 14 | Based on your initial response and the subsequent feedback, revise the response. Your revised response: 15 | ''' 16 | ################################# 17 | 18 | 19 | cot_prompt = '''Write a short and coherent story about {topic} that incorporates the answers to the following {n} questions: {questions} 20 | 21 | Make a plan then write. Your output should be of the following format: 22 | 23 | Plan: 24 | Your plan here. 25 | 26 | Story: 27 | Your story here. 28 | ''' 29 | 30 | spp_prompt = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 31 | 32 | Here are some examples: 33 | --- 34 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 35 | Input: 6 12 1 1 36 | 37 | Participants: AI Assistant (you); Math Expert 38 | 39 | Start collaboration! 40 | 41 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 42 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 43 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 44 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 45 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 46 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 47 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 48 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 49 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 50 | 51 | Finish collaboration! 52 | 53 | Final answer: 6 * (1 + 1) + 12 = 24 54 | 55 | --- 56 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 57 | 58 | Participants: AI Assistant (you); Poet; Computer Scientist; Ten year old child 59 | 60 | Poet: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. 61 | Computer Scientist: A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. 62 | Ten year old child: I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 63 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 64 | Computational wonder of our age, 65 | Harnessing the quantum world's strange ways, 66 | Atoms dance, entwined in dual state, 67 | Tapping secrets hidden in their haze. 68 | 69 | Grand power to solve, simulate, 70 | Profound problems that perplex the wise, 71 | Transforming our future, we await. 72 | 73 | Poet: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! 74 | Computer Scientist: Everything looks good to me! 75 | Ten year old child: I don't know what does perplex mean. Can you make the use of words easier to understand? 76 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 77 | Curious machine of our time, 78 | Harnessing the quantum realm's odd ways, 79 | Atoms play, two states they embrace, 80 | Taking secrets from their puzzling maze. 81 | 82 | Great power to solve and imitate, 83 | Problems that confuse the brightest minds, 84 | Transforming our future, we await. 85 | 86 | Poet: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. 87 | Computer Scientist: Looking good! 88 | Ten year old child: I like this version a lot! 89 | 90 | Finish collaboration! 91 | 92 | Final answer: 93 | Curious machine of our time, 94 | Harnessing the quantum realm's odd ways, 95 | Atoms play, two states they embrace, 96 | Taking secrets from their puzzling maze. 97 | 98 | Great power to solve and imitate, 99 | Problems that confuse the brightest minds, 100 | Transforming our future, we await. 101 | 102 | --- 103 | Now, identify the participants and collaboratively solve the following task step by step. Remember to present your final solution with the prefix "Final answer:". 104 | 105 | Task: Write a short and coherent story about {topic} that incorporates the answers to the following {n} questions: {questions} 106 | ''' 107 | 108 | spp_prompt_profile = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Provide profiles of the participants, describing their expertise or needs. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 109 | 110 | Here are some examples: 111 | --- 112 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 113 | Input: 6 12 1 1 114 | 115 | Participants: AI Assistant (you); Math Expert 116 | 117 | Profiles: 118 | - AI Assistant (you): A super-intelligent AI assistant capable of performing tasks more effectively than humans. 119 | - Math expert: A person who is good at math games, arithmetic calculation, and long-term planning. 120 | 121 | Start collaboration! 122 | 123 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 124 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 125 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 126 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 127 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 128 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 129 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 130 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 131 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 132 | 133 | Finish collaboration! 134 | 135 | Final answer: 6 * (1 + 1) + 12 = 24 136 | 137 | --- 138 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 139 | 140 | Participants: AI Assistant (you); Poet; Computer Scientist; Ten year old child 141 | 142 | Profiles: 143 | - AI Assistant (you): A super-intelligent AI assistant capable of performing tasks more effectively than humans. 144 | - Poet: A person who studies and creates poetry. The poet is familiar with the rules and formats of poetry and can provide guidance on how to write a poem. 145 | - Computer Scientist: A scholar who specializes in the academic study of computer science. The computer scientist is familiar with the concept of a quantum computer and can provide guidance on how to explain it. 146 | - Ten year old child: A child with a limited English vocabulary and little knowledge about complicated concepts, such as a quantum computer. 147 | 148 | Poet: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. 149 | Computer Scientist: A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. 150 | Ten year old child: I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 151 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 152 | Computational wonder of our age, 153 | Harnessing the quantum world's strange ways, 154 | Atoms dance, entwined in dual state, 155 | Tapping secrets hidden in their haze. 156 | 157 | Grand power to solve, simulate, 158 | Profound problems that perplex the wise, 159 | Transforming our future, we await. 160 | 161 | Poet: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! 162 | Computer Scientist: Everything looks good to me! 163 | Ten year old child: I don't know what does perplex mean. Can you make the use of words easier to understand? 164 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 165 | Curious machine of our time, 166 | Harnessing the quantum realm's odd ways, 167 | Atoms play, two states they embrace, 168 | Taking secrets from their puzzling maze. 169 | 170 | Great power to solve and imitate, 171 | Problems that confuse the brightest minds, 172 | Transforming our future, we await. 173 | 174 | Poet: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. 175 | Computer Scientist: Looking good! 176 | Ten year old child: I like this version a lot! 177 | 178 | Finish collaboration! 179 | 180 | Final answer: 181 | Curious machine of our time, 182 | Harnessing the quantum realm's odd ways, 183 | Atoms play, two states they embrace, 184 | Taking secrets from their puzzling maze. 185 | 186 | Great power to solve and imitate, 187 | Problems that confuse the brightest minds, 188 | Transforming our future, we await. 189 | 190 | --- 191 | Now, identify the participants, provide their profiles, and collaboratively solve the following task step by step. Remember to present your final solution with the prefix "Final answer:". 192 | 193 | Task: Write a short and coherent story about {topic} that incorporates the answers to the following {n} questions: {questions} 194 | ''' 195 | 196 | 197 | spp_prompt_fixed_persona = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Note that the participants can only be either AI Assistant (you) or Expert. Then, initiate a multi-round collaboration process until a final conclusion is reached. The Expert will give critical comments and detailed suggestions whenever necessary. 198 | 199 | Here are some examples: 200 | --- 201 | Example Task 1: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 202 | Input: 6 12 1 1 203 | 204 | Participants: AI Assistant (you); Expert 205 | 206 | Start collaboration! 207 | 208 | Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 209 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 210 | Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 211 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 212 | Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 213 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 214 | Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the “1 - 1” to “1 + 1”. 215 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 216 | Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 217 | 218 | Finish collaboration! 219 | 220 | Final answer: 6 * (1 + 1) + 12 = 24 221 | 222 | --- 223 | Example Task 2: Write a poem that meets the following requirements: (1) the poem has seven lines and the first letters of each line forms the word "CHATGPT"; (2) the poem is about explaining what is a quantum computer. (3) the poem needs to be easy to understand by a ten years old kid. 224 | 225 | Participants: AI Assistant (you); Expert 226 | 227 | Expert: Make sure that you write the poem with seven lines, and the first letters of the lines should be C, H, A, T, G, P, T. A quantum computer is an advanced computing device that uses the principles of quantum mechanics to process and store information. Unlike classical computers that use bits to represent information as 0s and 1s, quantum computers use quantum bits or qubits. Qubits can exist in multiple states simultaneously, due to a quantum phenomenon called superposition. You can consider using these information for the poem. I hope the poem to be fun and easy to understanding. I don't want to see a lot of jargons or complicated concepts. 228 | AI Assistant (you): Thanks for the guidance! Here's my initial attempt at the poem: 229 | Computational wonder of our age, 230 | Harnessing the quantum world's strange ways, 231 | Atoms dance, entwined in dual state, 232 | Tapping secrets hidden in their haze. 233 | 234 | Grand power to solve, simulate, 235 | Profound problems that perplex the wise, 236 | Transforming our future, we await. 237 | 238 | Expert: Let's verify if the poem meets the requirements. The first letters are CHATGPT which is correct! And the poem rhymes well. Good job! I don't know what does perplex mean. Can you make the use of words easier to understand? 239 | AI Assistant (you): Sure, let me revise the poem by using more common words. Check out the revised version: 240 | Curious machine of our time, 241 | Harnessing the quantum realm's odd ways, 242 | Atoms play, two states they embrace, 243 | Taking secrets from their puzzling maze. 244 | 245 | Great power to solve and imitate, 246 | Problems that confuse the brightest minds, 247 | Transforming our future, we await. 248 | 249 | Expert: Let's check again if the poem meets the requirements. The first letters are C H A T G P T. And now the poem is more accessible to children. Everything looks good to me. I like this version a lot! 250 | 251 | Finish collaboration! 252 | 253 | Final answer: 254 | Curious machine of our time, 255 | Harnessing the quantum realm's odd ways, 256 | Atoms play, two states they embrace, 257 | Taking secrets from their puzzling maze. 258 | 259 | Great power to solve and imitate, 260 | Problems that confuse the brightest minds, 261 | Transforming our future, we await. 262 | 263 | --- 264 | 265 | Now, identify the participants and collaboratively solve the following task step by step. Note that the participants can only be either AI Assistant (you) or Expert. Remember to present your final solution with the prefix "Final answer:". 266 | 267 | Task: Write a short and coherent story about {topic} that incorporates the answers to the following {n} questions: {questions} 268 | ''' 269 | 270 | 271 | #### 272 | spp_prompt_less_demo = '''When faced with a task, begin by identifying the participants who will contribute to solving the task. Then, initiate a multi-round collaboration process until a final solution is reached. The participants will give critical comments and detailed suggestions whenever necessary. 273 | 274 | Here is one example: 275 | --- 276 | Example Task: Use numbers and basic arithmetic operations (+ - * /) to obtain 24. You need to use all numbers, and each number can only be used once. 277 | Input: 6 12 1 1 278 | 279 | Participants: AI Assistant (you); Math Expert 280 | 281 | Start collaboration! 282 | 283 | Math Expert: Let's analyze the task in detail. You need to make sure that you meet the requirement, that you need to use exactly the four numbers (6 12 1 1) to construct 24. To reach 24, you can think of the common divisors of 24 such as 4, 6, 8, 3 and try to construct these first. Also you need to think of potential additions that can reach 24, such as 12 + 12. 284 | AI Assistant (you): Thanks for the hints! Here's one initial solution: (12 / (1 + 1)) * 6 = 24 285 | Math Expert: Let's check the answer step by step. (1+1) = 2, (12 / 2) = 6, 6 * 6 = 36 which is not 24! The answer is not correct. Can you fix this by considering other combinations? Please do not make similar mistakes. 286 | AI Assistant (you): Thanks for pointing out the mistake. Here is a revised solution considering 24 can also be reached by 3 * 8: (6 + 1 + 1) * (12 / 4) = 24. 287 | Math Expert: Let's first check if the calculation is correct. (6 + 1 + 1) = 8, 12 / 4 = 3, 8 * 3 = 24. The calculation is correct, but you used 6 1 1 12 4 which is not the same as the input 6 12 1 1. Can you avoid using a number that is not part of the input? 288 | AI Assistant (you): You are right, here is a revised solution considering 24 can be reached by 12 + 12 and without using any additional numbers: 6 * (1 - 1) + 12 = 24. 289 | Math Expert: Let's check the answer again. 1 - 1 = 0, 6 * 0 = 0, 0 + 12 = 12. I believe you are very close, here is a hint: try to change the "1 - 1" to "1 + 1". 290 | AI Assistant (you): Sure, here is the corrected answer: 6 * (1+1) + 12 = 24 291 | Math Expert: Let's verify the solution. 1 + 1 = 2, 6 * 2 = 12, 12 + 12 = 12. You used 1 1 6 12 which is identical to the input 6 12 1 1. Everything looks good! 292 | 293 | Finish collaboration! 294 | 295 | Final answer: 6 * (1 + 1) + 12 = 24 296 | 297 | --- 298 | Now, identify the participants and collaboratively solve the following task step by step. Remember to present your final solution with the prefix "Final answer:". 299 | 300 | Task: Write a short and coherent story about {topic} that incorporates the answers to the following {n} questions: {questions} 301 | ''' -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai==0.27.8 2 | tenacity==8.2.2 3 | transformers==4.31.0 4 | torch==2.0.1 -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import argparse 4 | from models import OpenAIWrapper, Llama2Wrapper 5 | from tasks import get_task 6 | import time 7 | from configs import gpt_configs, llama_configs, default_gpt_config, default_llama_config 8 | 9 | 10 | SLEEP_RATE = 10 # sleep between calls 11 | 12 | 13 | def output_log_jsonl(log_file, all_logs): 14 | with open(log_file, "w") as f: 15 | for log in all_logs: 16 | f.write(json.dumps(log) + "\n") 17 | 18 | def _post_process_raw_response(task, raw_output_batch, method, **kwargs): 19 | unwrapped_output_batch = [] 20 | if_success_batch = [] 21 | for output in raw_output_batch: 22 | unwrapped_output, if_success_flag = task.prompt_unwrap(output, method, **kwargs) 23 | unwrapped_output_batch.append(unwrapped_output) 24 | if_success_batch.append(if_success_flag) 25 | return unwrapped_output_batch, if_success_batch 26 | 27 | 28 | ### default task runners ### 29 | 30 | def _get_response_default(model, task, i, method, num_generation, prompt, test_output=True, **kwargs): 31 | raw_output_batch, raw_response_batch = model.run(prompt=prompt, n=num_generation) 32 | if raw_output_batch == [] or raw_response_batch == []: # handle exception 33 | return {} 34 | # get parsed response, and the success flags (whether or not the parsing is success) (standard prompt always success) 35 | unwrapped_output_batch, if_success_batch = _post_process_raw_response(task, raw_output_batch, method, **kwargs) 36 | # compute automatic metric (different for each task), e.g., if the output contains all the answers 37 | if test_output: 38 | test_output_infos = [task.test_output(i, output) for output in unwrapped_output_batch] 39 | else: 40 | test_output_infos = [] 41 | # log output 42 | log_output = { 43 | "idx": i, 44 | "raw_response": raw_response_batch, 45 | "unwrapped_output": unwrapped_output_batch, 46 | "parsing_success_flag": if_success_batch, 47 | "test_output_infos": test_output_infos 48 | } 49 | return log_output 50 | 51 | def _run_task_default(model, task, i, method, num_generation, sleep_rate=SLEEP_RATE, test_output=True): 52 | # get prompt 53 | prompt = task.get_input_prompt(i, method=method) 54 | # get response and parsed output 55 | return _get_response_default(model, task, i, method, num_generation, prompt, test_output=test_output) 56 | 57 | def _run_task_codenames(model, task, i, method, num_generation, sleep_rate=SLEEP_RATE, test_output=True): 58 | # get spymaster hint word 59 | spymaster_prompt = task.get_input_prompt(i, method=method, role='spymaster') 60 | raw_spymaster_output, raw_response_spymaster = model.run(prompt=spymaster_prompt, n=1) 61 | if raw_spymaster_output == [] or raw_response_spymaster == []: # handle exception 62 | return {} 63 | spymaster_output, if_success_batch_spymaster = _post_process_raw_response(task, raw_spymaster_output, method) 64 | hint_word = spymaster_output[0].replace(".", "").strip() 65 | print(f"\tidx: {i} | done spymaster, hint word: {hint_word}") 66 | # sleep before calling guesser 67 | time.sleep(sleep_rate) 68 | # get guesser result 69 | guesser_prompt = task.get_input_prompt(i, method=method, role='guesser', hint_word=hint_word) 70 | raw_guesser_output, raw_response_batch_guesser = model.run(prompt=guesser_prompt, n=num_generation) 71 | if raw_guesser_output == [] or raw_response_batch_guesser == []: # handle exception 72 | return {} 73 | guesser_output_batch, if_success_batch_guesser = _post_process_raw_response(task, raw_guesser_output, method) 74 | # compute automatic metric (different for each task), e.g., if the output contains all the answers 75 | if test_output: 76 | test_output_infos = [task.test_output(i, output) for output in guesser_output_batch] 77 | else: 78 | test_output_infos = [] 79 | # log output 80 | log_output = { 81 | "idx": i, 82 | "raw_response_spymaster": raw_response_spymaster, 83 | "raw_response_guesser": raw_response_batch_guesser, 84 | "spymaster_output": spymaster_output, 85 | "guesser_output": guesser_output_batch, 86 | "hint_word": hint_word, 87 | "parsing_success_flag_spymaster": if_success_batch_spymaster, 88 | "parsing_success_flag_guesser": if_success_batch_guesser, 89 | "test_output_infos": test_output_infos 90 | } 91 | return log_output 92 | 93 | ############################## 94 | 95 | ### self_refine task runners ### 96 | 97 | def _run_self_refine_default(model, task, i, method, num_generation, sleep_rate=SLEEP_RATE, num_refine=1, **kwargs): 98 | print("\tidx:", i, "start self refine...") 99 | log_outputs = {} 100 | ## get initial response 101 | init_prompt = task.get_input_prompt(i, method=method, phase="init", **kwargs) 102 | init_output = _get_response_default(model, task, i, method, num_generation=1, prompt=init_prompt, test_output=True, phase="init") 103 | if init_output == {}: 104 | return {} 105 | log_outputs["answer_0"] = init_output 106 | 107 | time.sleep(sleep_rate) 108 | context_prompt = init_output['raw_response'][0]['prompt'] + "\n" + init_output["raw_response"][0]['choices'][0]['message']['content'] # Q + A0 109 | for j in range(num_refine): 110 | print("\t\tstep:", j) 111 | # get feedback 112 | feedback_prompt = task.get_input_prompt(i, method=method, phase="feedback", question_answer=context_prompt, **kwargs) 113 | feedback_output = _get_response_default(model, task, i, method, num_generation=1, prompt=feedback_prompt, test_output=False, phase="feedback") 114 | if feedback_output == {}: 115 | return log_outputs 116 | log_outputs[f"feedback_{j}"] = feedback_output 117 | time.sleep(sleep_rate) 118 | 119 | # get refined response 120 | refine_prompt = task.get_input_prompt(i, method=method, phase="refine", question_answer=context_prompt, feedback=feedback_output["unwrapped_output"][0], **kwargs) # Q + A0 + F 121 | refine_output = _get_response_default(model, task, i, method, num_generation=1, prompt=refine_prompt, test_output=True, phase="refine") 122 | if refine_output == {}: 123 | return log_outputs 124 | log_outputs[f"answer_{j+1}"] = refine_output 125 | time.sleep(sleep_rate) 126 | 127 | # update context 128 | context_prompt = refine_prompt + refine_output["raw_response"][0]['choices'][0]['message']['content'] # Q + A0 + F + A1 129 | 130 | return log_outputs 131 | 132 | def _run_self_refine_codenames(model, task, i, method, num_generation, sleep_rate=SLEEP_RATE, num_refine=1, test_output=True): 133 | # get spymaster hint word 134 | spy_master_log_outputs = _run_self_refine_default(model, task, i, method, num_generation, sleep_rate, num_refine, role='spymaster') 135 | if f"answer_{num_refine}" not in spy_master_log_outputs: 136 | return {} 137 | hint_word = spy_master_log_outputs[f"answer_{num_refine}"]["unwrapped_output"][0].replace(".", "").strip() 138 | print(f"\tidx: {i} | num_refine: {num_refine} | done spymaster, hint word: {hint_word}") 139 | # sleep before calling guesser 140 | time.sleep(sleep_rate) 141 | # get guesser result 142 | guesser_log_outputs = _run_self_refine_default(model, task, i, method, num_generation, sleep_rate, num_refine, role='guesser', hint_word=hint_word) 143 | if f"answer_{num_refine}" not in guesser_log_outputs: 144 | return {} 145 | guesser_output = guesser_log_outputs[f"answer_{num_refine}"]["unwrapped_output"][0] 146 | # compute automatic metric (different for each task), e.g., if the output contains all the answers 147 | if test_output: 148 | test_output_infos = [task.test_output(i, guesser_output)] 149 | else: 150 | test_output_infos = [] 151 | # log output 152 | log_output = { 153 | "idx": i, 154 | "spymaster_logs": spy_master_log_outputs, 155 | "guesser_logs": guesser_log_outputs, 156 | "hint_word": hint_word, 157 | "parsing_success_flag_spymaster": spy_master_log_outputs[f"answer_{num_refine}"]["parsing_success_flag"], 158 | "parsing_success_flag_guesser": guesser_log_outputs[f"answer_{num_refine}"]["parsing_success_flag"], 159 | "test_output_infos": test_output_infos 160 | } 161 | return log_output 162 | ############################## 163 | 164 | 165 | 166 | def _run_task(task_name, model, task, i, method, num_generation, sleep_rate=SLEEP_RATE, **kwargs): 167 | if task_name in ['trivia_creative_writing', 'logic_grid_puzzle']: 168 | if method == "self_refine": 169 | log_output = _run_self_refine_default(model, task, i, method, num_generation, sleep_rate, num_refine = kwargs['num_refine']) 170 | else: 171 | log_output = _run_task_default(model, task, i, method, num_generation, sleep_rate) 172 | elif task_name == 'codenames_collaborative': 173 | if method == "self_refine": 174 | log_output = _run_self_refine_codenames(model, task, i, method, num_generation, sleep_rate, num_refine = kwargs['num_refine']) 175 | else: 176 | log_output = _run_task_codenames(model, task, i, method, num_generation, sleep_rate) 177 | else: 178 | raise NotImplementedError(f"task {task_name} not implemented; please choose from ['trivia_creative_writing', 'logic_grid_puzzle', 'codenames_collaborative']") 179 | 180 | # log everything else that is related 181 | if "llama_config" in args: 182 | args["llama_config"]["torch_dtype"] = str(args["llama_config"]["torch_dtype"]) 183 | log_output.update(args) 184 | log_output.update({"task_data":task.get_input(i)}) 185 | return log_output 186 | 187 | def run(args): 188 | # get configs 189 | model_type = args['model_type'] 190 | task_name = args['task'] 191 | method = args['method'] 192 | start_idx, end_idx = args['task_start_index'], args['task_end_index'] 193 | task_data_file = args['task_data_file'] 194 | num_generation = args['num_generation'] 195 | 196 | output_dir = args['output_dir'] 197 | if output_dir == "": 198 | output_dir = f"logs/{task_name}" 199 | 200 | additional_output_note = args['additional_output_note'] 201 | system_message = args['system_message'] 202 | print(f"setting default system message: {system_message}") 203 | 204 | # setup model and output log file 205 | if model_type == 'gpt': 206 | model_config = args['gpt_config'] 207 | model = OpenAIWrapper(config=model_config, system_message=system_message) 208 | # setup log file 209 | model_name_for_output = model_config['engine'].replace("/", "-") 210 | if system_message == "": 211 | log_file = os.path.join(output_dir, f"{task_data_file}__method-{method}_engine-{model_name_for_output}_temp-{model_config['temperature']}_topp-{model_config['top_p']}_start{start_idx}-end{end_idx}{additional_output_note}__without_sys_mes.jsonl") 212 | else: 213 | log_file = os.path.join(output_dir, f"{task_data_file}__method-{method}_engine-{model_name_for_output}_temp-{model_config['temperature']}_topp-{model_config['top_p']}_start{start_idx}-end{end_idx}{additional_output_note}__with_sys_mes.jsonl") 214 | sleep_rate = SLEEP_RATE 215 | 216 | elif model_type == 'llama2': 217 | model_config = args['llama_config'] 218 | model = Llama2Wrapper(config=model_config) 219 | # setup log file 220 | model_name_for_output = model_config['model'].replace("/", "-") 221 | log_file = os.path.join(output_dir, f"{task_data_file}__method-{method}_engine-{model_name_for_output}_start{start_idx}-end{end_idx}{additional_output_note}__without_sys_mes.jsonl") 222 | sleep_rate = 0 223 | 224 | os.makedirs(os.path.dirname(log_file), exist_ok=True) 225 | 226 | # setup task 227 | task = get_task(task_name, file=task_data_file) 228 | 229 | all_logs = [] 230 | print("start running ... log file:", log_file) 231 | print("sleep rate:", sleep_rate) 232 | 233 | print() 234 | start = max(start_idx, 0) 235 | end = min(end_idx, len(task)) 236 | print("total num of instances:", end - start) 237 | print("method:", method) 238 | for i in range(start, end): 239 | log_output = _run_task(task_name, model, task, i, method, num_generation, sleep_rate, num_refine = args['num_refine']) 240 | all_logs.append(log_output) 241 | print("\tidx:", i, "done | usage so far:", model.compute_gpt_usage()) 242 | # output log at each iteration 243 | output_log_jsonl(log_file, all_logs) 244 | # sleep 245 | time.sleep(sleep_rate) 246 | 247 | 248 | def parse_args(): 249 | model_choices = list(gpt_configs.keys()) + list(llama_configs.keys()) 250 | args = argparse.ArgumentParser() 251 | args.add_argument('--model', type=str, choices=model_choices, required=True) 252 | args.add_argument('--output_dir', type=str, required=False, default="") 253 | args.add_argument('--model_type', type=str, choices=['gpt','llama2'], default='gpt') 254 | args.add_argument('--method', type=str, choices=['standard','cot','spp','spp_profile', 'spp_fixed_persona', 'self_refine', 'spp_less_demo'], required=True) 255 | args.add_argument('--task', type=str, choices=['trivia_creative_writing', 'logic_grid_puzzle', 'codenames_collaborative'], required=True) 256 | args.add_argument('--task_data_file', type=str, required=True) 257 | args.add_argument('--task_start_index', type=int, required=True) 258 | args.add_argument('--task_end_index', type=int, required=True) 259 | args.add_argument('--num_generation', type=int, default=1) 260 | args.add_argument('--additional_output_note', type=str, default="") 261 | args.add_argument('--temperature', type=float, default=0.0) 262 | args.add_argument('--top_p', type=float, default=1.0) 263 | args.add_argument('--system_message', type=str, default="") 264 | args.add_argument('--num_refine', type=int, default=1) # Perform how many iterations of the self-refinement 265 | 266 | args = args.parse_args() 267 | return args 268 | 269 | if __name__ == '__main__': 270 | args = vars(parse_args()) 271 | model_name = args['model'] 272 | model_type = args['model_type'] 273 | 274 | ### gpt config ### 275 | if model_type == 'gpt': 276 | if model_name in gpt_configs: 277 | args['gpt_config'] = gpt_configs[model_name] # gpt configs 278 | else: 279 | args['gpt_config'] = default_gpt_config 280 | args['gpt_config']['engine'] = model_name 281 | 282 | # overwrite temperature and top_p 283 | args['gpt_config']['temperature'] = args['temperature'] 284 | args['gpt_config']['top_p'] = args['top_p'] 285 | 286 | elif model_type == 'llama2': 287 | ### llama config ### 288 | if model_name in llama_configs: 289 | args['llama_config'] = llama_configs[model_name] # llama configs 290 | else: 291 | args['llama_config'] = default_llama_config 292 | args['llama_config']['model'] = model_name 293 | 294 | print("run args:", args) 295 | run(args) -------------------------------------------------------------------------------- /scripts/codenames_collaborative.sh: -------------------------------------------------------------------------------- 1 | MODEL="gpt4-32k" # your engine name: gpt4-32k, gpt35-turbo, or meta-llama/Llama-2-13b-chat-hf 2 | MODEL_TYPE="gpt" # 'gpt' or 'llama2' 3 | 4 | DATA_FILE="codenames_50.jsonl" 5 | 6 | START_IDX=0 7 | END_IDX=50 8 | 9 | # choose method 10 | METHOD="spp" # ['standard','cot','spp', 'spp_profile', 'spp_fixed_persona'] 11 | 12 | # w/ or w/o system message (spp works better w/ system message) 13 | SYSTEM_MESSAGE="You are an AI assistant that helps people find information." # or "" (empty string) 14 | 15 | python run.py \ 16 | --model ${MODEL} \ 17 | --model_type ${MODEL_TYPE} \ 18 | --method ${METHOD} \ 19 | --task codenames_collaborative \ 20 | --task_data_file ${DATA_FILE} \ 21 | --task_start_index ${START_IDX} \ 22 | --task_end_index ${END_IDX} \ 23 | --system_message "${SYSTEM_MESSAGE}" \ 24 | ${@} -------------------------------------------------------------------------------- /scripts/logic_grid_puzzle.sh: -------------------------------------------------------------------------------- 1 | MODEL="gpt4-32k" # your engine name: gpt4-32k, gpt35-turbo, or meta-llama/Llama-2-13b-chat-hf 2 | MODEL_TYPE="gpt" # 'gpt' or 'llama2' 3 | 4 | DATA_FILE="logic_grid_puzzle_200.jsonl" 5 | 6 | START_IDX=0 7 | END_IDX=200 8 | 9 | METHOD="spp" # ['standard','cot','spp', 'spp_profile', 'spp_fixed_persona'] 10 | 11 | # w/ or w/o system message (spp works better w/o system message) 12 | SYSTEM_MESSAGE="" # or e.g., "You are an AI assistant that helps people find information." 13 | 14 | python run.py \ 15 | --model ${MODEL} \ 16 | --model_type ${MODEL_TYPE} \ 17 | --method ${METHOD} \ 18 | --task logic_grid_puzzle \ 19 | --task_data_file ${DATA_FILE} \ 20 | --task_start_index ${START_IDX} \ 21 | --task_end_index ${END_IDX} \ 22 | --system_message "${SYSTEM_MESSAGE}" \ 23 | ${@} 24 | 25 | -------------------------------------------------------------------------------- /scripts/trivia_creative_writing.sh: -------------------------------------------------------------------------------- 1 | MODEL="gpt4-32k" # your engine name: gpt4-32k, gpt35-turbo, or meta-llama/Llama-2-13b-chat-hf 2 | MODEL_TYPE="gpt" # 'gpt' or 'llama2' 3 | 4 | DATA_FILE="trivia_creative_writing_100_n_5.jsonl" # ['trivia_creative_writing_100_n_5.jsonl', 'trivia_creative_writing_100_n_10.jsonl'] 5 | 6 | START_IDX=0 7 | END_IDX=100 8 | 9 | # choose method 10 | METHOD="spp" # ['standard','cot','spp', 'spp_profile', 'spp_fixed_persona'] 11 | 12 | # w/ or w/o system message (spp works better w/ system message) 13 | SYSTEM_MESSAGE="You are an AI assistant that helps people find information." # or "" (empty string) 14 | 15 | python run.py \ 16 | --model ${MODEL} \ 17 | --model_type ${MODEL_TYPE} \ 18 | --method ${METHOD} \ 19 | --task trivia_creative_writing \ 20 | --task_data_file ${DATA_FILE} \ 21 | --task_start_index ${START_IDX} \ 22 | --task_end_index ${END_IDX} \ 23 | --system_message "${SYSTEM_MESSAGE}" \ 24 | ${@} 25 | 26 | -------------------------------------------------------------------------------- /tasks/__init__.py: -------------------------------------------------------------------------------- 1 | def get_task(name, file=None): 2 | if name == 'trivia_creative_writing': 3 | from .trivia_creative_writing import TriviaCreativeWritingTask 4 | return TriviaCreativeWritingTask(file) 5 | elif name == 'logic_grid_puzzle': 6 | from .logic_grid_puzzle import LogicGridPuzzleTask 7 | return LogicGridPuzzleTask(file) 8 | elif name == 'codenames_collaborative': 9 | from .codenames_collaborative import CodenamesCollaborativeTask 10 | return CodenamesCollaborativeTask(file) 11 | else: 12 | raise NotImplementedError -------------------------------------------------------------------------------- /tasks/base.py: -------------------------------------------------------------------------------- 1 | DATA_PATH = './data' 2 | 3 | class Task: 4 | def __init__(self): 5 | pass 6 | 7 | def __len__(self) -> int: 8 | pass 9 | 10 | def get_input_prompt(self, idx: int, method: str, **kwargs) -> str: 11 | pass 12 | 13 | def test_output(self, idx: int, output: str): 14 | pass -------------------------------------------------------------------------------- /tasks/codenames_collaborative.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from tasks.base import Task, DATA_PATH 4 | from prompts.codenames_collaborative import * 5 | import json 6 | 7 | class CodenamesCollaborativeTask(Task): 8 | def __init__(self, file='codenames_50.jsonl'): 9 | super().__init__() 10 | path = os.path.join(DATA_PATH, 'codenames_collaborative', file) 11 | with open(path, "r") as f: 12 | self.data = [json.loads(line) for line in f] 13 | 14 | def __len__(self) -> int: 15 | return len(self.data) 16 | 17 | def get_input(self, idx: int): 18 | return self.data[idx] 19 | 20 | def get_input_prompt(self, idx: int, method: str, **kwargs) -> str: 21 | datapoint = self.data[idx] 22 | word_list = datapoint['word_list'] 23 | word_list_str = ", ".join(word_list) 24 | target_words = datapoint['target_words'] 25 | target_words_str = ", ".join(target_words) 26 | 27 | # for guesser 28 | assert 'role' in kwargs 29 | role = kwargs['role'] 30 | if role == 'guesser': 31 | assert 'hint_word' in kwargs 32 | hint_word = kwargs['hint_word'] 33 | else: 34 | hint_word = None 35 | 36 | n = len(target_words) 37 | if role == 'spymaster': 38 | if method == "standard": 39 | input_prompt = standard_prompt_spymaster.format(n = n, target_words = target_words_str, word_list = word_list_str) 40 | elif method == "cot": 41 | input_prompt = cot_prompt_spymaster.format(n = n, target_words = target_words_str, word_list = word_list_str) 42 | elif method == "spp": 43 | input_prompt = spp_prompt_spymaster.format(n = n, target_words = target_words_str, word_list = word_list_str) 44 | elif method == "spp_less_demo": 45 | input_prompt = spp_prompt_spymaster_less_demo.format(n = n, target_words = target_words_str, word_list = word_list_str) 46 | elif method == "spp_fixed_persona": 47 | input_prompt = spp_prompt_spymaster_fixed_persona.format(n = n, target_words = target_words_str, word_list = word_list_str) 48 | elif method == "spp_profile": 49 | input_prompt = spp_prompt_spymaster_profile.format(n = n, target_words = target_words_str, word_list = word_list_str) 50 | elif method == "self_refine": 51 | phase = kwargs["phase"] 52 | if phase == "init": 53 | input_prompt = standard_prompt_spymaster.format(n = n, target_words = target_words_str, word_list = word_list_str) 54 | elif phase == "feedback": 55 | input_prompt = self_refine_feedback_prompt.format(question_answer=kwargs["question_answer"]) 56 | elif phase == "refine": 57 | input_prompt = self_refine_refinement_prompt.format(question_answer=kwargs["question_answer"], feedback=kwargs["feedback"]) 58 | else: 59 | raise NotImplementedError(f"method {method} not implemented for spymaster role") 60 | elif role == 'guesser': 61 | if method == "standard": 62 | input_prompt = standard_prompt_guesser.format(n = n, hint_word = hint_word, word_list = word_list_str) 63 | elif method == "cot": 64 | input_prompt = cot_prompt_guesser.format(n = n, hint_word = hint_word, word_list = word_list_str) 65 | elif method == "spp": 66 | input_prompt = spp_prompt_guesser.format(n = n, hint_word = hint_word, word_list = word_list_str) 67 | elif method == "spp_less_demo": 68 | input_prompt = spp_prompt_guesser_less_demo.format(n = n, hint_word = hint_word, word_list = word_list_str) 69 | elif method == "spp_fixed_persona": 70 | input_prompt = spp_prompt_guesser_fixed_persona.format(n = n, hint_word = hint_word, word_list = word_list_str) 71 | elif method == "spp_profile": 72 | input_prompt = spp_prompt_guesser_profile.format(n = n, hint_word = hint_word, word_list = word_list_str) 73 | elif method == "self_refine": 74 | phase = kwargs["phase"] 75 | if phase == "init": 76 | input_prompt = standard_prompt_guesser.format(n = n, hint_word = hint_word, word_list = word_list_str) 77 | elif phase == "feedback": 78 | input_prompt = self_refine_feedback_prompt.format(question_answer=kwargs["question_answer"]) 79 | elif phase == "refine": 80 | input_prompt = self_refine_refinement_prompt.format(question_answer=kwargs["question_answer"], feedback=kwargs["feedback"]) 81 | else: 82 | raise NotImplementedError(f"method {method} not implemented for guesser role") 83 | else: 84 | raise NotImplementedError(f"role {role} not implemented; choose from 'spymaster' or 'guesser'") 85 | return input_prompt 86 | 87 | def test_output(self, idx: int, output: str): 88 | # test whether the output includes all the answers of the trivia questions 89 | datapoint = self.data[idx] 90 | target_words = datapoint['target_words'] 91 | target_words = [word.strip().lower() for word in target_words] 92 | 93 | predicted_words = output.split(",") 94 | predicted_words = [word.strip().replace(".","").lower() for word in predicted_words] 95 | 96 | # ground truth set 97 | target_words_set = set(target_words) 98 | # predicted set 99 | predicted_words_set = set(predicted_words) 100 | 101 | common_words = predicted_words_set.intersection(target_words_set) 102 | common_words = list(common_words) 103 | info = {"matched_words":common_words, "matched_count":len(common_words), "target_count":len(target_words_set)} 104 | return info 105 | 106 | @staticmethod 107 | def prompt_unwrap(response: str, method: str, **kwargs): 108 | ''' 109 | response: raw genration from the model 110 | return: 111 | - str: the story 112 | - bool: whether the story is successfully parsed from the raw genration 113 | ''' 114 | # take only the first few characters (enough for successfully parsed output) -> aviod unparsed result to have high accuracy when test output 115 | if method in ["standard", "cot"]: 116 | if "Answer:" in response: 117 | return response.split("Answer:")[1].strip(), True 118 | elif "answer:" in response: 119 | return response.split("answer:")[1].strip(), True 120 | else: 121 | return response, True 122 | 123 | elif method in ["spp", "spp_profile", "spp_fixed_persona", "spp_less_demo"]: 124 | if "Final answer:" in response: 125 | return response.split("Final answer:")[1].strip(), True 126 | elif "final answer:" in response: 127 | return response.split("final answer:")[1].strip(), True 128 | else: 129 | return response, False 130 | elif method == "self_refine": 131 | phase = kwargs["phase"] 132 | if phase == "feedback": 133 | return response, True 134 | else: 135 | if "Answer:" in response: 136 | return response.split("Answer:")[1].strip(), True 137 | elif "answer:" in response: 138 | return response.split("answer:")[1].strip(), True 139 | else: 140 | return response, True 141 | else: 142 | raise NotImplementedError(f"method {method} not implemented") -------------------------------------------------------------------------------- /tasks/logic_grid_puzzle.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from tasks.base import Task, DATA_PATH 4 | from prompts.logic_grid_puzzle import * 5 | import json 6 | 7 | 8 | target_aliases = { 9 | "1": "first", 10 | "2": "second", 11 | "3": "third", 12 | "4": "fourth", 13 | "5": "fifth", 14 | "6": "sixth", 15 | "7": "seventh", 16 | "8": "eighth", 17 | "9": "ninth", 18 | "10": "tenth" 19 | } 20 | 21 | class LogicGridPuzzleTask(Task): 22 | def __init__(self, file='logic_grid_puzzle_200.jsonl'): 23 | super().__init__() 24 | path = os.path.join(DATA_PATH, 'logic_grid_puzzle', file) 25 | with open(path, "r") as f: 26 | self.data = [json.loads(line) for line in f] 27 | 28 | def __len__(self) -> int: 29 | return len(self.data) 30 | 31 | def get_input(self, idx: int): 32 | return self.data[idx] 33 | 34 | def get_input_prompt(self, idx: int, method: str, **kwargs) -> str: 35 | datapoint = self.data[idx] 36 | input_str = datapoint['inputs'] 37 | 38 | input_str = input_str.replace("\nA:", "") 39 | 40 | if method == "standard": 41 | input_prompt = standard_prompt.format(input=input_str) 42 | elif method == "cot": 43 | input_prompt = cot_prompt.format(input=input_str) 44 | elif method == "spp": 45 | input_prompt = spp_prompt.format(input=input_str) 46 | elif method == "spp_less_demo": 47 | input_prompt = spp_prompt_less_demo.format(input=input_str) 48 | elif method == "spp_fixed_persona": 49 | input_prompt = spp_prompt_fixed_persona.format(input=input_str) 50 | elif method == "spp_profile": 51 | input_prompt = spp_prompt_profile.format(input=input_str) 52 | elif method == "self_refine": 53 | phase = kwargs["phase"] 54 | if phase == "init": 55 | input_prompt = standard_prompt.format(input=input_str) 56 | elif phase == "feedback": 57 | input_prompt = self_refine_feedback_prompt.format(question_answer=kwargs["question_answer"]) 58 | elif phase == "refine": 59 | input_prompt = self_refine_refinement_prompt.format(question_answer=kwargs["question_answer"], feedback=kwargs["feedback"]) 60 | else: 61 | raise NotImplementedError(f"method {method} not implemented") 62 | 63 | return input_prompt 64 | 65 | def test_output(self, idx: int, output: str): 66 | # test whether the output includes all the answers of the trivia questions 67 | instance = self.data[idx] 68 | target = instance["targets"][0] 69 | targets = [target] 70 | if target in target_aliases: 71 | targets.append(target_aliases[target]) 72 | 73 | # get all other candidates 74 | not_targets = [] 75 | for i in range(1, 11): 76 | if str(i) not in targets: 77 | not_targets.append(str(i)) 78 | not_targets.append(target_aliases[str(i)]) 79 | # print("targets", targets) 80 | # print("negatives", not_targets) 81 | info = {'correct': False} 82 | for target in targets: 83 | if target.lower().strip() in output.lower().strip(): # if the target is in the output 84 | info['correct'] = True 85 | # and if all the other targets are not in the output 86 | for not_target in not_targets: 87 | if not_target.lower().strip() in output.lower().strip(): 88 | info['correct'] = False 89 | break 90 | break 91 | return info 92 | 93 | @staticmethod 94 | def prompt_unwrap(response: str, method: str, **kwargs): 95 | ''' 96 | response: raw genration from the model 97 | return: 98 | - str: the story 99 | - bool: whether the story is successfully parsed from the raw genration 100 | ''' 101 | # take only the first few characters (enough for successfully parsed output) -> aviod unparsed result to have high accuracy when test output 102 | if method in ["standard", "cot"]: 103 | if "Answer:" in response: 104 | return response.split("Answer:")[1].strip(), True 105 | elif "answer:" in response: 106 | return response.split("answer:")[1].strip(), True 107 | else: 108 | return response, False 109 | 110 | elif method in ["spp", "spp_profile", "spp_fixed_persona", "spp_less_demo"]: 111 | if "Final answer:" in response: 112 | return response.split("Final answer:")[1].strip(), True 113 | elif "final answer:" in response: 114 | return response.split("final answer:")[1].strip(), True 115 | else: 116 | return response, False 117 | 118 | elif method == "self_refine": 119 | phase = kwargs["phase"] 120 | if phase == "feedback": 121 | return response, True 122 | else: 123 | if "Answer:" in response: 124 | return response.split("Answer:")[1].strip(), True 125 | elif "answer:" in response: 126 | return response.split("answer:")[1].strip(), True 127 | else: 128 | return response, False 129 | else: 130 | raise NotImplementedError(f"method {method} not implemented") -------------------------------------------------------------------------------- /tasks/trivia_creative_writing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from tasks.base import Task, DATA_PATH 4 | from prompts.trivia_creative_writing import * 5 | import json 6 | # from models import gpt 7 | 8 | class TriviaCreativeWritingTask(Task): 9 | def __init__(self, file='trivia_creative_writing_100_n_5.jsonl'): 10 | super().__init__() 11 | path = os.path.join(DATA_PATH, 'trivia_creative_writing', file) 12 | with open(path, "r") as f: 13 | self.data = [json.loads(line) for line in f] 14 | 15 | def __len__(self) -> int: 16 | return len(self.data) 17 | 18 | def get_input(self, idx: int): 19 | return self.data[idx] 20 | 21 | def get_input_prompt(self, idx: int, method: str, **kwargs) -> str: 22 | datapoint = self.data[idx] 23 | questions = datapoint["questions"] 24 | topic = datapoint["topic"] 25 | n = len(questions) 26 | questions_str = " ".join(questions) 27 | 28 | if method == "standard": 29 | input_prompt = standard_prompt.format(n=n, questions=questions_str, topic=topic) 30 | elif method == "cot": 31 | input_prompt = cot_prompt.format(n=n, questions=questions_str, topic=topic) 32 | elif method == "spp": 33 | input_prompt = spp_prompt.format(n=n, questions=questions_str, topic=topic) 34 | elif method == "spp_less_demo": 35 | input_prompt = spp_prompt_less_demo.format(n=n, questions=questions_str, topic=topic) 36 | elif method == "spp_fixed_persona": 37 | input_prompt = spp_prompt_fixed_persona.format(n=n, questions=questions_str, topic=topic) 38 | elif method == "spp_profile": 39 | input_prompt = spp_prompt_profile.format(n=n, questions=questions_str, topic=topic) 40 | elif method == "self_refine": 41 | phase = kwargs["phase"] 42 | if phase == "init": 43 | input_prompt = standard_prompt.format(n=n, questions=questions_str, topic=topic) 44 | elif phase == "feedback": 45 | input_prompt = self_refine_feedback_prompt.format(question_answer=kwargs["question_answer"]) 46 | elif phase == "refine": 47 | input_prompt = self_refine_refinement_prompt.format(question_answer=kwargs["question_answer"], feedback=kwargs["feedback"]) 48 | else: 49 | raise NotImplementedError(f"method {method} not implemented") 50 | 51 | return input_prompt 52 | 53 | def test_output(self, idx: int, output: str): 54 | # test whether the output includes all the answers of the trivia questions 55 | instance = self.data[idx] 56 | correct_count = 0 57 | question_count = len(instance["answers"]) 58 | for ans_to_question in instance["answers"]: 59 | for ans in ans_to_question: 60 | # compare all to lower 61 | if ans.lower() in output.lower(): 62 | correct_count += 1 63 | break 64 | info = {'correct_count': correct_count, 'question_count': question_count} 65 | return info 66 | 67 | @staticmethod 68 | def prompt_unwrap(response: str, method: str, **kwargs): 69 | ''' 70 | response: raw genration from the model 71 | return: 72 | - str: the story 73 | - bool: whether the story is successfully parsed from the raw genration 74 | ''' 75 | if method in ["standard", "self_refine"]: 76 | return response, True 77 | 78 | elif method == "cot": 79 | if "Story:" in response: 80 | return response.split("Story:")[1].strip(), True 81 | elif "story:" in response: 82 | return response.split("story:")[1].strip(), True 83 | else: 84 | return response, False 85 | 86 | elif method in ["spp","spp_profile","spp_fixed_persona", "spp_less_demo"]: 87 | if "Final answer:" in response: 88 | return response.split("Final answer:")[1].strip(), True 89 | elif "final answer:" in response: 90 | return response.split("final answer:")[1].strip(), True 91 | else: 92 | return response, False 93 | 94 | else: 95 | raise NotImplementedError(f"method {method} not implemented") --------------------------------------------------------------------------------