├── scripts ├── pal │ ├── core │ │ ├── __init__.py │ │ ├── runtime.py │ │ ├── backend.py │ │ ├── vicuna.py │ │ └── interface.py │ ├── prompt │ │ ├── __init__.py │ │ ├── strategy_qa_prompt.py │ │ ├── boolean_expressions.py │ │ ├── penguins_in_a_table.py │ │ ├── ld_three.py │ │ ├── tracking_three.py │ │ ├── date_understanding_prompt.py │ │ ├── snarks.py │ │ ├── ruin_names.py │ │ ├── disambiguation_qa.py │ │ ├── math_prompts.py │ │ └── salient_translation.py │ └── __init__.py ├── run_self_consistency.sh ├── run_adaptive_consistency.sh ├── eval_outputs.py ├── fastchat_server.py └── run_eval.py ├── docs ├── CNAME ├── .gitignore ├── static │ ├── css │ │ ├── prism.css:Zone.Identifier │ │ ├── prism-diff-highlight.css │ │ ├── prism-line-highlight.css │ │ ├── prism_bak.css │ │ ├── index.css │ │ ├── bulma-carousel.min.css │ │ ├── prism.css │ │ └── bulma-slider.min.css │ ├── videos │ │ ├── ._Icon │ │ └── AC_teaser_video.mp4 │ ├── adaptive_paper.pdf │ ├── images │ │ ├── ac_results_math.png │ │ ├── ac_results_misc.png │ │ ├── ac_teaser_new.png │ │ ├── ac_results_logic.png │ │ ├── ac_results_logic_bak.png │ │ └── favicon.svg │ └── js │ │ ├── index.js │ │ ├── prism-diff-highlight.js │ │ ├── bulma-slider.min.js │ │ ├── prism-line-highlight.js │ │ └── bulma-slider.js ├── README.md └── index.html ├── requirements.txt ├── download_outputs.sh ├── download_datasets.bash ├── adaptive_consistency ├── __pycache__ │ ├── main.cpython-39.pyc │ ├── __init__.cpython-39.pyc │ └── stopping_criterias.cpython-39.pyc ├── __init__.py ├── main.py └── stopping_criterias.py ├── .gitignore ├── setup.py ├── outputs └── README.md ├── README.md └── LICENSE /scripts/pal/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/pal/prompt/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | sample-step-by-step.info -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | gdown 4 | tqdm -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_store 2 | .idea 3 | scratch 4 | -------------------------------------------------------------------------------- /docs/static/css/prism.css:Zone.Identifier: -------------------------------------------------------------------------------- 1 | [ZoneTransfer] 2 | ZoneId=3 3 | HostUrl=about:internet 4 | -------------------------------------------------------------------------------- /docs/static/videos/._Icon: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/videos/._Icon -------------------------------------------------------------------------------- /download_outputs.sh: -------------------------------------------------------------------------------- 1 | gdown "11ripw7-E5T8a2TZUiD5vbC7QuR_qfZOA" -O outputs.zip 2 | unzip -n -q outputs.zip 3 | rm outputs.zip -------------------------------------------------------------------------------- /docs/static/adaptive_paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/adaptive_paper.pdf -------------------------------------------------------------------------------- /download_datasets.bash: -------------------------------------------------------------------------------- 1 | gdown "14a2TJY_lqz_h1W1h8iclROMEpBynPTI4" -O datasets.zip 2 | unzip datasets.zip 3 | rm datasets.zip 4 | -------------------------------------------------------------------------------- /docs/static/images/ac_results_math.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/images/ac_results_math.png -------------------------------------------------------------------------------- /docs/static/images/ac_results_misc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/images/ac_results_misc.png -------------------------------------------------------------------------------- /docs/static/images/ac_teaser_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/images/ac_teaser_new.png -------------------------------------------------------------------------------- /docs/static/videos/AC_teaser_video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/videos/AC_teaser_video.mp4 -------------------------------------------------------------------------------- /docs/static/images/ac_results_logic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/images/ac_results_logic.png -------------------------------------------------------------------------------- /docs/static/images/ac_results_logic_bak.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/images/ac_results_logic_bak.png -------------------------------------------------------------------------------- /adaptive_consistency/__pycache__/main.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/adaptive_consistency/__pycache__/main.cpython-39.pyc -------------------------------------------------------------------------------- /adaptive_consistency/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/adaptive_consistency/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | *.egg-info 4 | *.egg 5 | transfer_outputs.py 6 | outputs/code-davinci-002 7 | outputs/vicuna-13b 8 | datasets 9 | scratch 10 | __pycache__ 11 | *.pyc -------------------------------------------------------------------------------- /adaptive_consistency/__pycache__/stopping_criterias.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/adaptive_consistency/__pycache__/stopping_criterias.cpython-39.pyc -------------------------------------------------------------------------------- /adaptive_consistency/__init__.py: -------------------------------------------------------------------------------- 1 | from .main import AC 2 | from .main import stop_criteria_dict 3 | from .stopping_criterias import BetaStoppingCriteria 4 | from .stopping_criterias import DirichletStoppingCriteria 5 | from .stopping_criterias import RandomStoppingCriteria 6 | from .stopping_criterias import MajorityStoppingCriteria 7 | from .stopping_criterias import EntropyStoppingCriteria -------------------------------------------------------------------------------- /docs/static/css/prism-diff-highlight.css: -------------------------------------------------------------------------------- 1 | pre.diff-highlight > code .token.deleted:not(.prefix), 2 | pre > code.diff-highlight .token.deleted:not(.prefix) { 3 | background-color: rgba(255, 0, 0, .1); 4 | color: inherit; 5 | display: block; 6 | } 7 | 8 | pre.diff-highlight > code .token.inserted:not(.prefix), 9 | pre > code.diff-highlight .token.inserted:not(.prefix) { 10 | background-color: rgba(0, 255, 128, .1); 11 | color: inherit; 12 | display: block; 13 | } 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | 5 | def read(fname): 6 | return open(os.path.join(os.path.dirname(__file__), fname), encoding='utf-8').read() 7 | 8 | setup( 9 | name = "AdaptiveConsistency", 10 | version = "1.0.0", 11 | description = ("Library for running AdapativeConsistency based Inference on large language models."), 12 | license = "Apache License 2.0", 13 | packages=find_packages(), 14 | long_description=read('README.md'), 15 | long_description_content_type='text/markdown', 16 | ) 17 | -------------------------------------------------------------------------------- /outputs/README.md: -------------------------------------------------------------------------------- 1 | Model outputs from different models on 13 different datasets. 2 | 3 | Run `bash download_outputs.sh` to download all the outputs. \ 4 |
5 | 6 | Directory structure: 7 | ``` 8 | outputs 9 | ├── README.md 10 | ├── code-davinci-002 11 | │   ├── dataset-1 12 | │   │   ├── outputs_seed1.jsonl 13 | │   │   ├── outputs_seed2.jsonl 14 | │   │   └── outputs_seed3.jsonl 15 | │   ├── dataset-2 16 | │   │   ├── ... 17 | │   ├── ... 18 | ├── vicuna-13b 19 | │   ├── ... 20 | └── ... 21 | ``` 22 | 23 |
24 | 25 | We use the following hyperparameters for model generation: 26 | - `temperature`: 0.7 27 | - `top_p`: 1.0 28 | - `max_length`: 512 29 | - `max_gens`: 40 30 | 31 | -------------------------------------------------------------------------------- /scripts/pal/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 PAL Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from .core import interface, runtime 17 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Nerfies 2 | 3 | This is the repository that contains source code for the [Nerfies website](https://nerfies.github.io). 4 | 5 | If you find Nerfies useful for your work please cite: 6 | ``` 7 | @article{park2021nerfies 8 | author = {Park, Keunhong and Sinha, Utkarsh and Barron, Jonathan T. and Bouaziz, Sofien and Goldman, Dan B and Seitz, Steven M. and Martin-Brualla, Ricardo}, 9 | title = {Nerfies: Deformable Neural Radiance Fields}, 10 | journal = {ICCV}, 11 | year = {2021}, 12 | } 13 | ``` 14 | 15 | # Website License 16 | Creative Commons License
This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. 17 | -------------------------------------------------------------------------------- /scripts/pal/prompt/strategy_qa_prompt.py: -------------------------------------------------------------------------------- 1 | MATH_PROMPT = """ 2 | # Q: Do hamsters provide food for any animals? 3 | Answer: Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals. The answer is yes 4 | 5 | 6 | # Q: Yes or no: Hydrogen's atomic number squared exceeds number of Spice Girls? 7 | Answer: Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen's atomic number squared is less than 5. The answer is no 8 | 9 | 10 | # Q: Yes or no: Would a pear sink in water? 11 | Answer: The density of a pear is about 0.6 g\/cm^3, which is less than water. Objects less dense than water float. Thus, a pear would float. The answer is no 12 | 13 | 14 | # Q: Yes or no: Is it common to see frost during some college commencements? 15 | Answer: College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements. The answer is yes 16 | 17 | 18 | # Q: Yes or no: Could a llama birth twice during War in Vietnam (1945-46)? 19 | Answer: The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam. The answer is no 20 | 21 | 22 | # Q: Could Brooke Shields succeed at University of Pennsylvania? 23 | Answer: Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania. The answer is yes 24 | 25 | # Q: {question} 26 | """.strip() + '\n' 27 | -------------------------------------------------------------------------------- /docs/static/images/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 13 | 14 | awesome_icon 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /docs/static/css/prism-line-highlight.css: -------------------------------------------------------------------------------- 1 | pre[data-line] { 2 | position: relative; 3 | padding: 1em 0 1em 3em; 4 | } 5 | 6 | .line-highlight { 7 | position: absolute; 8 | left: 0; 9 | right: 0; 10 | padding: inherit 0; 11 | margin-top: 1em; /* Same as .prism’s padding-top */ 12 | 13 | background: hsla(24, 20%, 50%,.08); 14 | background: linear-gradient(to right, hsla(24, 20%, 50%,.1) 70%, hsla(24, 20%, 50%,0)); 15 | 16 | pointer-events: none; 17 | 18 | line-height: inherit; 19 | white-space: pre; 20 | } 21 | 22 | @media print { 23 | .line-highlight { 24 | /* 25 | * This will prevent browsers from replacing the background color with white. 26 | * It's necessary because the element is layered on top of the displayed code. 27 | */ 28 | -webkit-print-color-adjust: exact; 29 | color-adjust: exact; 30 | } 31 | } 32 | 33 | .line-highlight:before, 34 | .line-highlight[data-end]:after { 35 | content: attr(data-start); 36 | position: absolute; 37 | top: .4em; 38 | left: .6em; 39 | min-width: 1em; 40 | padding: 0 .5em; 41 | background-color: hsla(24, 20%, 50%,.4); 42 | color: hsl(24, 20%, 95%); 43 | font: bold 65%/1.5 sans-serif; 44 | text-align: center; 45 | vertical-align: .3em; 46 | border-radius: 999px; 47 | text-shadow: none; 48 | box-shadow: 0 1px white; 49 | } 50 | 51 | .line-highlight[data-end]:after { 52 | content: attr(data-end); 53 | top: auto; 54 | bottom: .4em; 55 | } 56 | 57 | .line-numbers .line-highlight:before, 58 | .line-numbers .line-highlight:after { 59 | content: none; 60 | } 61 | 62 | pre[id].linkable-line-numbers span.line-numbers-rows { 63 | pointer-events: all; 64 | } 65 | pre[id].linkable-line-numbers span.line-numbers-rows > span:before { 66 | cursor: pointer; 67 | } 68 | pre[id].linkable-line-numbers span.line-numbers-rows > span:hover:before { 69 | background-color: rgba(128, 128, 128, .2); 70 | } 71 | -------------------------------------------------------------------------------- /scripts/pal/prompt/boolean_expressions.py: -------------------------------------------------------------------------------- 1 | MATH_PROMPT=''' 2 | Evaluate the result of a random Boolean expression. 3 | 4 | # Q: not ( ( not not True ) ) is 5 | A: Let's think step by step. 6 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively. 7 | We first simplify this expression "Z" as follows: "Z = not ( ( not not True ) ) = not ( ( A ) )" where "A = not not True". 8 | Let's evaluate A: A = not not True = not (not True) = not False = True. 9 | Plugging in A, we get: Z = not ( ( A ) ) = not ( ( True ) ) = not True = False. So the answer is False. 10 | 11 | # Q: True and False and not True and True is 12 | A: Let's think step by step. 13 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively. 14 | We first simplify this expression "Z" as follows: "Z = True and False and not True and True = A and B" where "A = True and False" and "B = not True and True". 15 | Let's evaluate A: A = True and False = False. 16 | Let's evaluate B: B = not True and True = not (True and True) = not (True) = False. 17 | Plugging in A and B, we get: Z = A and B = False and False = False. So the answer is False. 18 | 19 | # Q: not not ( not ( False ) ) is 20 | A: Let's think step by step. 21 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively. 22 | We first simplify this expression "Z" as follows: "Z = not not ( not ( False ) ) = not not ( A )" where "A = not ( False )". 23 | Let's evaluate A: A = not ( False ) = not False = True. 24 | Plugging in A, we get: Z = not not ( A ) = not not (True) = not not False = True. So the answer is True. 25 | 26 | # Q: {question} 27 | '''.strip() + '\n' -------------------------------------------------------------------------------- /scripts/run_self_consistency.sh: -------------------------------------------------------------------------------- 1 | python scripts/run_eval.py --dataset gsm --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts 2 | python scripts/run_eval.py --dataset asdiv --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts 3 | python scripts/run_eval.py --dataset svamp --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts 4 | python scripts/run_eval.py --dataset date --answer_type str --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file date_understanding_prompt 5 | python scripts/run_eval.py --dataset tracking_three --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file tracking_three --end "# Q:" 6 | python scripts/run_eval.py --dataset ld_three --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file ld_three 7 | python scripts/run_eval.py --dataset strategy_qa --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file strategy_qa_prompt 8 | python scripts/run_eval.py --dataset boolean_expressions --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file boolean_expressions 9 | python scripts/run_eval.py --dataset snarks --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file snarks 10 | python scripts/run_eval.py --dataset ruin_names --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file ruin_names 11 | python scripts/run_eval.py --dataset salient_translation --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file salient_translation 12 | python scripts/run_eval.py --dataset disambiguation_qa --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file disambiguation_qa 13 | python scripts/run_eval.py --dataset penguins_in_a_table_text --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file penguins_in_a_table -------------------------------------------------------------------------------- /scripts/pal/core/runtime.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 PAL Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | import copy 17 | import datetime 18 | from typing import Any, Dict 19 | import dateutil.relativedelta 20 | 21 | 22 | class GenericRuntime: 23 | GLOBAL_DICT = {} 24 | LOCAL_DICT = None 25 | HEADERS = [] 26 | def __init__(self): 27 | self._global_vars = copy.copy(self.GLOBAL_DICT) 28 | self._local_vars = copy.copy(self.LOCAL_DICT) if self.LOCAL_DICT else None 29 | 30 | for c in self.HEADERS: 31 | self.exec_code(c) 32 | 33 | def exec_code(self, code_piece: str) -> None: 34 | exec(code_piece, self._global_vars) 35 | 36 | def eval_code(self, expr: str) -> Any: 37 | return eval(expr, self._global_vars) 38 | 39 | def inject(self, var_dict: Dict[str, Any]) -> None: 40 | for k, v in var_dict.items(): 41 | self._global_vars[k] = v 42 | 43 | @property 44 | def answer(self): 45 | return self._global_vars['answer'] 46 | 47 | 48 | class DateRuntime(GenericRuntime): 49 | GLOBAL_DICT = { 50 | 'datetime': datetime.datetime, 51 | 'timedelta': dateutil.relativedelta.relativedelta, 52 | 'relativedelta': dateutil.relativedelta.relativedelta 53 | } 54 | 55 | 56 | class CustomDict(dict): 57 | def __iter__(self): 58 | return list(super().__iter__()).__iter__() 59 | 60 | 61 | class ColorObjectRuntime(GenericRuntime): 62 | GLOBAL_DICT = {'dict': CustomDict} 63 | 64 | -------------------------------------------------------------------------------- /scripts/run_adaptive_consistency.sh: -------------------------------------------------------------------------------- 1 | # Usage: bash scripts/run_adaptive_consistency.sh [optional] 2 | stop_criteria=$1 3 | # if stop_criteria_thresh is none set to -1 4 | stop_criteria_thresh=${2:-"-1"} 5 | 6 | # stop_criteria is one of: 7 | # beta 8 | # dirichlet 9 | # entropy 10 | # random 11 | # majority 12 | 13 | echo $stop_criteria 14 | 15 | python scripts/run_eval.py --dataset gsm --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts --stop_criteria $stop_criteria --stop_criteria_thresh $stop_criteria_thresh 16 | python scripts/run_eval.py --dataset asdiv --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts 17 | python scripts/run_eval.py --dataset svamp --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts 18 | python scripts/run_eval.py --dataset date --answer_type str --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file date_understanding_prompt 19 | python scripts/run_eval.py --dataset tracking_three --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file tracking_three --end "# Q:" 20 | python scripts/run_eval.py --dataset ld_three --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file ld_three 21 | python scripts/run_eval.py --dataset strategy_qa --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file strategy_qa_prompt 22 | python scripts/run_eval.py --dataset boolean_expressions --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file boolean_expressions 23 | python scripts/run_eval.py --dataset snarks --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file snarks 24 | python scripts/run_eval.py --dataset ruin_names --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file ruin_names 25 | python scripts/run_eval.py --dataset salient_translation --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file salient_translation 26 | python scripts/run_eval.py --dataset disambiguation_qa --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file disambiguation_qa 27 | python scripts/run_eval.py --dataset penguins_in_a_table_text --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file penguins_in_a_table -------------------------------------------------------------------------------- /scripts/pal/prompt/penguins_in_a_table.py: -------------------------------------------------------------------------------- 1 | MATH_PROMPT=''' 2 | Answer questions about a table of penguins and their attributes. 3 | 4 | # Q: Here is a table where the first line is a header and each subsequent line is a penguin: name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15 For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm. We now add a penguin to the table: 5 | James, 12, 90, 12 6 | How many penguins are less than 8 years old? 7 | Options: 8 | (A) 1 9 | (B) 2 10 | (C) 3 11 | (D) 4 12 | (E) 5 13 | A: Let's think step by step. 14 | This question focuses on age. We know the following: Louis is 7 years old, Bernard is 5 years old, Vincent is 9 years old, and Gwen is 8 years old. 15 | Now, we add James to this table: James is 12 years old. 16 | The penguins that are less than 8 years old are Louis and Bernard. 17 | There are 2 penguins less than 8 years old. So the answer is (B). 18 | 19 | # Q: Here is a table where the first line is a header and each subsequent line is a penguin: name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15 For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm. Which is the youngest penguin? 20 | Options: 21 | (A) Louis 22 | (B) Bernard 23 | (C) Vincent 24 | (D) Gwen 25 | (E) James 26 | A: Let's think step by step. 27 | This question focuses on age. We know the following: Louis is 7 years old, Bernard is 5 years old, Vincent is 9 years old, and Gwen is 8 years old. 28 | According to the table, Bernard (5) is the youngest amongst them. 29 | The youngest penguin is Bernard. So the answer is (B). 30 | 31 | # Q: Here is a table where the first line is a header and each subsequent line is a penguin: name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15 For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm. What is the name of the second penguin sorted by alphabetic order? 32 | Options: 33 | (A) Louis 34 | (B) Bernard 35 | (C) Vincent 36 | (D) Gwen 37 | (E) James 38 | A: Let's think step by step. 39 | This question focuses on the name. We know the following: The names of the penguin in the table are Louis, Bernard, Vincent, and Gwen. 40 | When we sort their names alphabetically, we get Bernard, Gwen, Louis, Vincent. 41 | The name of the second penguin sorted by alphabetical order is Gwen. 42 | The name of the second penguin sorted by alphabetic order is Gwen. So the answer is (D). 43 | 44 | # Q: {question} 45 | '''.strip() + '\n' -------------------------------------------------------------------------------- /scripts/pal/prompt/ld_three.py: -------------------------------------------------------------------------------- 1 | MATH_PROMPT=''' 2 | A logical deduction task which requires deducing the order of a sequence of objects. 3 | 4 | # Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy. 5 | Options: 6 | (A) Amy finished last 7 | (B) Eli finished last 8 | (C) Eve finished last 9 | A: Let's think step by step. 10 | (1) Eve finished above Amy: "(above) ? Eve ? Amy ? (below)". 11 | (2) Eli finished below Amy: "(above) ? Amy ? Eli ? (below)". 12 | (3) Combining (1) and (2) we get the following ordering: "(above) Eve Amy Eli (below)". 13 | According to this ordering, the person who finished last (the one at the bottom of this list) is Eli. 14 | Eli finished last. So the answer is (B). 15 | 16 | # Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a white book, a green book, and an orange book. The green book is to the right of the white book. The orange book is the rightmost. 17 | Options: 18 | (A) The white book is the leftmost 19 | (B) The green book is the leftmost 20 | (C) The orange book is the leftmost 21 | A: Let's think step by step. 22 | (1) The green book is to the right of the white book: "(left) ? white ? green ? (right)". 23 | (2) The orange book is the rightmost: "(left) ? white ? green orange (right)". 24 | (3) Combining (1) and (2) we get the following ordering: "(left) white green orange (right)". 25 | According to this ordering, the leftmost book is the white book. 26 | The white book is the leftmost. So the answer is (A). 27 | 28 | # Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a red book, a gray book, and a white book. The white book is to the left of the gray book. The red book is the second from the left. 29 | Options: 30 | (A) The red book is the leftmost 31 | (B) The gray book is the leftmost 32 | (C) The white book is the leftmost 33 | A: Let's think step by step. 34 | (1) The white book is to the left of the gray book: "(left) ? white ? gray ? (right)". 35 | (2) The red book is the second from the left: "(left) ? white red gray ? (right)". 36 | (3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)". 37 | According to this ordering, the leftmost book is the white book. 38 | The white book is the leftmost. So the answer is (C). 39 | 40 | # Q: {question} 41 | '''.strip() + '\n' -------------------------------------------------------------------------------- /docs/static/js/index.js: -------------------------------------------------------------------------------- 1 | window.HELP_IMPROVE_VIDEOJS = false; 2 | 3 | var INTERP_BASE = "./static/interpolation/stacked"; 4 | var NUM_INTERP_FRAMES = 240; 5 | 6 | var interp_images = []; 7 | function preloadInterpolationImages() { 8 | for (var i = 0; i < NUM_INTERP_FRAMES; i++) { 9 | var path = INTERP_BASE + '/' + String(i).padStart(6, '0') + '.jpg'; 10 | interp_images[i] = new Image(); 11 | interp_images[i].src = path; 12 | } 13 | } 14 | 15 | function setInterpolationImage(i) { 16 | var image = interp_images[i]; 17 | image.ondragstart = function() { return false; }; 18 | image.oncontextmenu = function() { return false; }; 19 | $('#interpolation-image-wrapper').empty().append(image); 20 | } 21 | 22 | 23 | $(document).ready(function() { 24 | // Check for click events on the navbar burger icon 25 | $(".navbar-burger").click(function() { 26 | // Toggle the "is-active" class on both the "navbar-burger" and the "navbar-menu" 27 | $(".navbar-burger").toggleClass("is-active"); 28 | $(".navbar-menu").toggleClass("is-active"); 29 | 30 | }); 31 | 32 | var options = { 33 | slidesToScroll: 1, 34 | slidesToShow: 3, 35 | loop: true, 36 | infinite: true, 37 | autoplay: false, 38 | autoplaySpeed: 3000, 39 | } 40 | 41 | // Initialize all div with carousel class 42 | var carousels = bulmaCarousel.attach('.carousel', options); 43 | 44 | // Loop on each carousel initialized 45 | for(var i = 0; i < carousels.length; i++) { 46 | // Add listener to event 47 | carousels[i].on('before:show', state => { 48 | console.log(state); 49 | }); 50 | } 51 | 52 | // Access to bulmaCarousel instance of an element 53 | var element = document.querySelector('#my-element'); 54 | if (element && element.bulmaCarousel) { 55 | // bulmaCarousel instance is available as element.bulmaCarousel 56 | element.bulmaCarousel.on('before-show', function(state) { 57 | console.log(state); 58 | }); 59 | } 60 | 61 | /*var player = document.getElementById('interpolation-video'); 62 | player.addEventListener('loadedmetadata', function() { 63 | $('#interpolation-slider').on('input', function(event) { 64 | console.log(this.value, player.duration); 65 | player.currentTime = player.duration / 100 * this.value; 66 | }) 67 | }, false);*/ 68 | preloadInterpolationImages(); 69 | 70 | $('#interpolation-slider').on('input', function(event) { 71 | setInterpolationImage(this.value); 72 | }); 73 | setInterpolationImage(0); 74 | $('#interpolation-slider').prop('max', NUM_INTERP_FRAMES - 1); 75 | 76 | bulmaSlider.attach(); 77 | 78 | }) 79 | -------------------------------------------------------------------------------- /scripts/pal/prompt/tracking_three.py: -------------------------------------------------------------------------------- 1 | MATH_PROMPT=''' 2 | A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps. 3 | 4 | # Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball. 5 | As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the 6 | Options: 7 | (A) yellow ball 8 | (B) blue ball 9 | (C) pink ball 10 | A: Let's think step by step. 11 | (0) At the start: Alice: yellow, Bob: blue, Claire: pink. 12 | (1) Claire and Alice swap balls: Alice: pink, Bob: blue, Claire: yellow. 13 | (2) Alice and Bob swap balls: Alice: blue, Bob: pink, Claire: yellow. 14 | (3) Claire and Bob swap balls: Alice: blue, Bob: yellow, Claire: pink. 15 | At the end of the game, Bob has the yellow ball. So the answer is (A). 16 | 17 | # Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a white ball, Bob has a purple ball, and Claire has a pink ball. 18 | As the game progresses, pairs of players trade balls. First, Bob and Alice swap balls. Then, Bob and Claire swap balls. Finally, Bob and Alice swap balls. At the end of the game, Alice has the 19 | Options: 20 | (A) white ball 21 | (B) purple ball 22 | (C) pink ball 23 | A: Let's think step by step. 24 | (0) At the start: Alice: white, Bob: purple, Claire: pink. 25 | (1) Bob and Alice swap balls: Alice: purple, Bob: white, Claire: pink. 26 | (2) Bob and Claire swap balls: Alice: purple, Bob: pink, Claire: white. 27 | (3) Bob and Alice swap balls: Alice: pink, Bob: purple, Claire: white. 28 | At the end of the game, Alice has the pink ball. So the answer is (C). 29 | 30 | # Q: Alice, Bob, and Claire are dancers at a square dance. At the start of a song, they each have a partner: Alice is dancing with Lola, Bob is dancing with Rodrigo, and Claire is dancing with Patrick. 31 | Throughout the song, the dancers often trade partners. First, Alice and Bob switch partners. Then, Claire and Bob switch partners. Finally, Bob and Alice switch partners. At the end of the dance, Alice is dancing with 32 | Options: 33 | (A) Lola 34 | (B) Rodrigo 35 | (C) Patrick 36 | A: Let's think step by step. 37 | (0) At the start: Alice: Lola, Bob: Rodrigo, Claire: Patrick. 38 | (1) Alice and Bob switch partners: Alice: Rodrigo, Bob: Lola, Claire: Patrick. 39 | (2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola. 40 | (3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola. 41 | At the end of the dance, Alice is dancing with Patrick. So the answer is (C). 42 | 43 | # Q: {question} 44 | '''.strip() + '\n' -------------------------------------------------------------------------------- /docs/static/css/prism_bak.css: -------------------------------------------------------------------------------- 1 | /* PrismJS 1.29.0 2 | https://prismjs.com/download.html#themes=prism-okaidia&languages=markup+css+clike+javascript+diff+python&plugins=line-highlight+diff-highlight */ 3 | code[class*=language-],pre[class*=language-]{color:#f8f8f2;background:0 0;text-shadow:0 1px rgba(0,0,0,.3);font-family:Consolas,Monaco,'Andale Mono','Ubuntu Mono',monospace;font-size:1em;text-align:left;white-space:pre;word-spacing:normal;word-break:normal;word-wrap:normal;line-height:1.5;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-hyphens:none;-moz-hyphens:none;-ms-hyphens:none;hyphens:none}pre[class*=language-]{padding:1em;margin:.5em 0;overflow:auto;border-radius:.3em}:not(pre)>code[class*=language-],pre[class*=language-]{background:#272822}:not(pre)>code[class*=language-]{padding:.1em;border-radius:.3em;white-space:normal}.token.cdata,.token.comment,.token.doctype,.token.prolog{color:#8292a2}.token.punctuation{color:#f8f8f2}.token.namespace{opacity:.7}.token.constant,.token.deleted,.token.property,.token.symbol,.token.tag{color:#f92672}.token.boolean,.token.number{color:#ae81ff}.token.attr-name,.token.builtin,.token.char,.token.inserted,.token.selector,.token.string{color:#a6e22e}.language-css .token.string,.style .token.string,.token.entity,.token.operator,.token.url,.token.variable{color:#f8f8f2}.token.atrule,.token.attr-value,.token.class-name,.token.function{color:#e6db74}.token.keyword{color:#66d9ef}.token.important,.token.regex{color:#fd971f}.token.bold,.token.important{font-weight:700}.token.italic{font-style:italic}.token.entity{cursor:help} 4 | pre[data-line]{position:relative;padding:1em 0 1em 3em}.line-highlight{position:absolute;left:0;right:0;padding:inherit 0;margin-top:1em;background:hsla(24,20%,50%,.08);background:linear-gradient(to right,hsla(24,20%,50%,.1) 70%,hsla(24,20%,50%,0));pointer-events:none;line-height:inherit;white-space:pre}@media print{.line-highlight{-webkit-print-color-adjust:exact;color-adjust:exact}}.line-highlight:before,.line-highlight[data-end]:after{content:attr(data-start);position:absolute;top:.4em;left:.6em;min-width:1em;padding:0 .5em;background-color:hsla(24,20%,50%,.4);color:#f4f1ef;font:bold 65%/1.5 sans-serif;text-align:center;vertical-align:.3em;border-radius:999px;text-shadow:none;box-shadow:0 1px #fff}.line-highlight[data-end]:after{content:attr(data-end);top:auto;bottom:.4em}.line-numbers .line-highlight:after,.line-numbers .line-highlight:before{content:none}pre[id].linkable-line-numbers span.line-numbers-rows{pointer-events:all}pre[id].linkable-line-numbers span.line-numbers-rows>span:before{cursor:pointer}pre[id].linkable-line-numbers span.line-numbers-rows>span:hover:before{background-color:rgba(128,128,128,.2)} 5 | pre.diff-highlight>code .token.deleted:not(.prefix),pre>code.diff-highlight .token.deleted:not(.prefix){background-color:rgba(255,0,0,.1);color:inherit;display:block}pre.diff-highlight>code .token.inserted:not(.prefix),pre>code.diff-highlight .token.inserted:not(.prefix){background-color:rgba(0,255,128,.1);color:inherit;display:block} 6 | -------------------------------------------------------------------------------- /docs/static/css/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: 'Noto Sans', sans-serif; 3 | } 4 | 5 | 6 | .footer .icon-link { 7 | font-size: 25px; 8 | color: #000; 9 | } 10 | 11 | .link-block a { 12 | margin-top: 5px; 13 | margin-bottom: 5px; 14 | } 15 | 16 | .dnerf { 17 | font-variant: small-caps; 18 | } 19 | 20 | 21 | .teaser .hero-body { 22 | padding-top: 0; 23 | padding-bottom: 3rem; 24 | } 25 | 26 | .teaser { 27 | font-family: 'Google Sans', sans-serif; 28 | } 29 | 30 | 31 | .publication-title { 32 | } 33 | 34 | .publication-banner { 35 | max-height: parent; 36 | 37 | } 38 | 39 | .publication-banner video { 40 | position: relative; 41 | left: auto; 42 | top: auto; 43 | transform: none; 44 | object-fit: fit; 45 | } 46 | 47 | .publication-header .hero-body { 48 | } 49 | 50 | .publication-title { 51 | font-family: 'Google Sans', sans-serif; 52 | } 53 | 54 | .publication-authors { 55 | font-family: 'Google Sans', sans-serif; 56 | } 57 | 58 | .publication-venue { 59 | color: #555; 60 | width: fit-content; 61 | font-weight: bold; 62 | } 63 | 64 | .publication-awards { 65 | color: #ff3860; 66 | width: fit-content; 67 | font-weight: bolder; 68 | } 69 | 70 | .publication-authors { 71 | } 72 | 73 | .publication-authors a { 74 | color: hsl(204, 86%, 53%) !important; 75 | } 76 | 77 | .publication-authors a:hover { 78 | text-decoration: underline; 79 | } 80 | 81 | .author-block { 82 | display: inline-block; 83 | } 84 | 85 | .publication-banner img { 86 | } 87 | 88 | .publication-authors { 89 | /*color: #4286f4;*/ 90 | } 91 | 92 | .publication-video { 93 | position: relative; 94 | width: 100%; 95 | height: 0; 96 | padding-bottom: 56.25%; 97 | 98 | overflow: hidden; 99 | border-radius: 10px !important; 100 | } 101 | 102 | .publication-video iframe { 103 | position: absolute; 104 | top: 0; 105 | left: 0; 106 | width: 100%; 107 | height: 100%; 108 | } 109 | 110 | .publication-body img { 111 | } 112 | 113 | .results-carousel { 114 | overflow: hidden; 115 | } 116 | 117 | .results-carousel .item { 118 | margin: 5px; 119 | overflow: hidden; 120 | border: 1px solid #bbb; 121 | border-radius: 10px; 122 | padding: 0; 123 | font-size: 0; 124 | } 125 | 126 | .results-carousel video { 127 | margin: 0; 128 | } 129 | 130 | 131 | .interpolation-panel { 132 | background: #f5f5f5; 133 | border-radius: 10px; 134 | } 135 | 136 | .interpolation-panel .interpolation-image { 137 | width: 100%; 138 | border-radius: 5px; 139 | } 140 | 141 | .interpolation-video-column { 142 | } 143 | 144 | .interpolation-panel .slider { 145 | margin: 0 !important; 146 | } 147 | 148 | .interpolation-panel .slider { 149 | margin: 0 !important; 150 | } 151 | 152 | #interpolation-image-wrapper { 153 | width: 100%; 154 | } 155 | #interpolation-image-wrapper img { 156 | border-radius: 5px; 157 | } 158 | -------------------------------------------------------------------------------- /scripts/pal/prompt/date_understanding_prompt.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 PAL Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | MATH_PROMPT = """ 17 | # Q: 2015 is coming in 36 hours. What is the date one week from today in MM/DD/YYYY? 18 | # If 2015 is coming in 36 hours, then today is 36 hours before. 19 | today = datetime(2015, 1, 1) - relativedelta(hours=36) 20 | # One week from today, 21 | one_week_from_today = today + relativedelta(weeks=1) 22 | # The answer formatted with %m/%d/%Y is 23 | one_week_from_today.strftime('%m/%d/%Y') 24 | 25 | # Q: The first day of 2019 is a Tuesday, and today is the first Monday of 2019. What is the date today in MM/DD/YYYY? 26 | # If the first day of 2019 is a Tuesday, and today is the first Monday of 2019, then today is 6 days later. 27 | today = datetime(2019, 1, 1) + relativedelta(days=6) 28 | # The answer formatted with %m/%d/%Y is 29 | today.strftime('%m/%d/%Y') 30 | 31 | # Q: The concert was scheduled to be on 06/01/1943, but was delayed by one day to today. What is the date 10 days ago in MM/DD/YYYY? 32 | # If the concert was scheduled to be on 06/01/1943, but was delayed by one day to today, then today is one day later. 33 | today = datetime(1943, 6, 1) + relativedelta(days=1) 34 | # 10 days ago, 35 | ten_days_ago = today - relativedelta(days=10) 36 | # The answer formatted with %m/%d/%Y is 37 | ten_days_ago.strftime('%m/%d/%Y') 38 | 39 | # Q: It is 4/19/1969 today. What is the date 24 hours later in MM/DD/YYYY? 40 | # It is 4/19/1969 today. 41 | today = datetime(1969, 4, 19) 42 | # 24 hours later, 43 | later = today + relativedelta(hours=24) 44 | # The answer formatted with %m/%d/%Y is 45 | today.strftime('%m/%d/%Y') 46 | 47 | # Q: Jane thought today is 3/11/2002, but today is in fact Mar 12, which is 1 day later. What is the date 24 hours later in MM/DD/YYYY? 48 | # If Jane thought today is 3/11/2002, but today is in fact Mar 12, then today is 3/1/2002. 49 | today = datetime(2002, 3, 12) 50 | # 24 hours later, 51 | later = today + relativedelta(hours=24) 52 | # The answer formatted with %m/%d/%Y is 53 | later.strftime('%m/%d/%Y') 54 | 55 | # Q: Jane was born on the last day of Feburary in 2001. Today is her 16-year-old birthday. What is the date yesterday in MM/DD/YYYY? 56 | # If Jane was born on the last day of Feburary in 2001 and today is her 16-year-old birthday, then today is 16 years later. 57 | today = datetime(2001, 2, 28) + relativedelta(years=16) 58 | # Yesterday, 59 | yesterday = today - relativedelta(days=1) 60 | # The answer formatted with %m/%d/%Y is 61 | yesterday.strftime('%m/%d/%Y') 62 | 63 | # Q: {question} 64 | """.strip() + '\n' 65 | -------------------------------------------------------------------------------- /docs/static/js/prism-diff-highlight.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | 3 | if (typeof Prism === 'undefined') { 4 | return; 5 | } 6 | 7 | 8 | var LANGUAGE_REGEX = /^diff-([\w-]+)/i; 9 | var HTML_TAG = /<\/?(?!\d)[^\s>\/=$<%]+(?:\s(?:\s*[^\s>\/=]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))|(?=[\s/>])))+)?\s*\/?>/g; 10 | //this will match a line plus the line break while ignoring the line breaks HTML tags may contain. 11 | var HTML_LINE = RegExp(/(?:__|[^\r\n<])*(?:\r\n?|\n|(?:__|[^\r\n<])(?![^\r\n]))/.source.replace(/__/g, function () { return HTML_TAG.source; }), 'gi'); 12 | 13 | var warningLogged = false; 14 | 15 | Prism.hooks.add('before-sanity-check', function (env) { 16 | var lang = env.language; 17 | if (LANGUAGE_REGEX.test(lang) && !env.grammar) { 18 | env.grammar = Prism.languages[lang] = Prism.languages.diff; 19 | } 20 | }); 21 | Prism.hooks.add('before-tokenize', function (env) { 22 | if (!warningLogged && !Prism.languages.diff && !Prism.plugins.autoloader) { 23 | warningLogged = true; 24 | console.warn("Prism's Diff Highlight plugin requires the Diff language definition (prism-diff.js)." + 25 | "Make sure the language definition is loaded or use Prism's Autoloader plugin."); 26 | } 27 | 28 | var lang = env.language; 29 | if (LANGUAGE_REGEX.test(lang) && !Prism.languages[lang]) { 30 | Prism.languages[lang] = Prism.languages.diff; 31 | } 32 | }); 33 | 34 | Prism.hooks.add('wrap', function (env) { 35 | var diffLanguage; var diffGrammar; 36 | 37 | if (env.language !== 'diff') { 38 | var langMatch = LANGUAGE_REGEX.exec(env.language); 39 | if (!langMatch) { 40 | return; // not a language specific diff 41 | } 42 | 43 | diffLanguage = langMatch[1]; 44 | diffGrammar = Prism.languages[diffLanguage]; 45 | } 46 | 47 | var PREFIXES = Prism.languages.diff && Prism.languages.diff.PREFIXES; 48 | 49 | // one of the diff tokens without any nested tokens 50 | if (PREFIXES && env.type in PREFIXES) { 51 | /** @type {string} */ 52 | var content = env.content.replace(HTML_TAG, ''); // remove all HTML tags 53 | 54 | /** @type {string} */ 55 | var decoded = content.replace(/</g, '<').replace(/&/g, '&'); 56 | 57 | // remove any one-character prefix 58 | var code = decoded.replace(/(^|[\r\n])./g, '$1'); 59 | 60 | // highlight, if possible 61 | var highlighted; 62 | if (diffGrammar) { 63 | highlighted = Prism.highlight(code, diffGrammar, diffLanguage); 64 | } else { 65 | highlighted = Prism.util.encode(code); 66 | } 67 | 68 | // get the HTML source of the prefix token 69 | var prefixToken = new Prism.Token('prefix', PREFIXES[env.type], [/\w+/.exec(env.type)[0]]); 70 | var prefix = Prism.Token.stringify(prefixToken, env.language); 71 | 72 | // add prefix 73 | var lines = []; var m; 74 | HTML_LINE.lastIndex = 0; 75 | while ((m = HTML_LINE.exec(highlighted))) { 76 | lines.push(prefix + m[0]); 77 | } 78 | if (/(?:^|[\r\n]).$/.test(decoded)) { 79 | // because both "+a\n+" and "+a\n" will map to "a\n" after the line prefixes are removed 80 | lines.push(prefix); 81 | } 82 | env.content = lines.join(''); 83 | 84 | if (diffGrammar) { 85 | env.classes.push('language-' + diffLanguage); 86 | } 87 | } 88 | }); 89 | 90 | }()); 91 | -------------------------------------------------------------------------------- /scripts/pal/prompt/snarks.py: -------------------------------------------------------------------------------- 1 | MATH_PROMPT=''' 2 | Determine which of two sentences is sarcastic. 3 | 4 | According to Cambridge University Dictionary, sarcasm is "the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way." Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks. 5 | 6 | # Q: Which statement is sarcastic? 7 | Options: 8 | (A) Yes, because having interests and actively researching them is a huge waste 9 | (B) Yes, because having interests and actively researching them is a huge deal 10 | A: Let's think step by step. 11 | If we look at (A), it says that having interests and actively researching them is a huge waste, implying that it is a useless effort. However, we know that having interests and actively researching them is typically not a waste but rather is beneficial to the individual. The presence of such a juxtaposition in (A) suggests that it contains a taste of irony and sarcasm. 12 | If we look at (B), it says that having interests and actively researching them is a huge deal, implying that it is an important and consequential effort. This is arguably a neutral and correct statement. 13 | Above the above, the sarcastic option is (A). So the answer is (A). 14 | 15 | # Q: Which statement is sarcastic? 16 | Options: 17 | (A) No one is going to disagree with you on this. Avoiding ad hominem attacks really help your case 18 | (B) No one is going to disagree with you on this. Ad hominem attacks really help your case 19 | A: Let's think step by step. 20 | If we look at (A), it says that avoiding ad hominem attacks really help your case, implying that ad hominem attacks are adverse and injurious. Because ad hominem attacks are adressed at a person rather than an idea, it is indeed true that avoiding them is often useful and helpful; so, (A) is a neutral (valid and agreeable) statement. 21 | If we look at (B), it says that ad hominem attacks really help your case, implying that ad hominem attacks are a positive thing. However, we stated previously that ad hominem attacks are often not useful or constructive. The speaker in this sentence therefore seems to mean the opposite of what they are saying; so, there appears to have a taste of irony and sarcasm in (B). 22 | Above the above, the sarcastic option is (B). So the answer is (B). 23 | 24 | # Q: Which statement is sarcastic? 25 | Options: 26 | (A) Consistency in the league's punishments? What do you think this is supposed to be, politics? 27 | (B) Consistency in the league's punishments? What do you think this is supposed to be, moral? 28 | A: Let's think step by step. 29 | If we look at (A), it likens the consistency in the league's punishments with that in politics. Because politics or political affairs are often not considered to be consistent or dependable, this sentence appears to be satirical. 30 | If we look at (B), it likens the consistency in the league's punishments with that in morality. Discussing the consistency of the league's punishments in the context of morality, ethics, or law makes sense and does not appear to make a satirical point about anything. 31 | Above the above, the sarcastic option is (A). So the answer is (A). 32 | 33 | # Q: {question} 34 | '''.strip() + '\n' -------------------------------------------------------------------------------- /scripts/eval_outputs.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import argparse 3 | from adaptive_consistency import AC, stop_criteria_dict 4 | import json 5 | 6 | def main(dt, ac, min_gens = 1, max_gens = 40, eval_as_str = False): 7 | 8 | correct_answers = 0 9 | total_answers = len(dt) 10 | total_gens = 0 11 | for _, x in tqdm(enumerate(dt), total = len(dt)): 12 | x = {k: (v[:max_gens] if isinstance(v, list) else v) for k, v in x.items() } 13 | for m in range(min_gens, len(x['scores'])+1): 14 | answers = x['answers'][:m] 15 | new_answers = [] 16 | for _, xx in enumerate(answers): 17 | try: 18 | if eval_as_str: 19 | if str(xx).strip()=='': 20 | continue 21 | new_answers.append(str(xx)) 22 | else: 23 | new_answers.append(float(xx)) 24 | except: ... 25 | if len(new_answers) == 0: 26 | if m == len(x['scores']): 27 | total_gens += m 28 | continue 29 | outp = ac.should_stop(new_answers, return_dict = True) 30 | majority_val, majority_bool = outp['most_common'], outp['stop'] 31 | 32 | 33 | if majority_bool or (m == len(x['scores'])): 34 | total_gens += m 35 | try: 36 | if eval_as_str: 37 | if str(majority_val).strip() == str(x['target']).strip(): 38 | correct_answers += 1 39 | break 40 | else: 41 | if abs(float(str(majority_val).strip()) - float(x['target'])) < 1e-3: 42 | correct_answers += 1 43 | break 44 | except Exception as e: 45 | print('Error', majority_val, m, e) 46 | break 47 | break 48 | return correct_answers, total_answers, total_gens 49 | 50 | if __name__ == '__main__': 51 | 52 | # Usage: python examples/eval_outputs.py --output_file examples/outputs/outputs.jsonl --stop_criteria beta --stop_criteria_thresh 0.95 53 | 54 | parser = argparse.ArgumentParser() 55 | parser.add_argument('--output_file', type=str, required=True) 56 | parser.add_argument('--stop_criteria', type=str, default=None) 57 | parser.add_argument('--stop_criteria_thresh', type=float, required=False, default=None) 58 | 59 | args = parser.parse_args() 60 | 61 | if args.stop_criteria is None: 62 | args.stop_criteria = 'always_false' 63 | print('No Stop Criteria Provided. Running Self-Consistency') 64 | if args.stop_criteria_thresh is None or args.stop_criteria_thresh == -1: 65 | ac = AC(max_gens = 1000, stop_criteria=stop_criteria_dict[args.stop_criteria]()) 66 | else: 67 | ac = AC(max_gens = 1000, stop_criteria=stop_criteria_dict[args.stop_criteria](conf_thresh = args.stop_criteria_thresh)) 68 | 69 | 70 | dt = list(map(json.loads, open(args.output_file))) 71 | 72 | eval_as_str = not ('gsm' in args.output_file or 'asdiv' in args.output_file or 'svamp' in args.output_file) 73 | 74 | correct_answers, total_answers, total_gens = main(dt, ac, eval_as_str = eval_as_str) 75 | print(f'Accuracy: {correct_answers}/{total_answers} ({correct_answers/total_answers*100:.2f}%)') 76 | print(f'Average Gens: {total_gens/total_answers:.2f}') -------------------------------------------------------------------------------- /docs/static/css/bulma-carousel.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10} -------------------------------------------------------------------------------- /scripts/pal/prompt/ruin_names.py: -------------------------------------------------------------------------------- 1 | MATH_PROMPT=''' 2 | Select the humorous edit that 'ruins' the input movie or musical artist name. 3 | 4 | # Q: Which of the following is a humorous edit of this artist or movie name: 'whitesnake'? 5 | Options: 6 | (A) whitesnape 7 | (B) whitesnapke 8 | (C) whitesnuake 9 | (D) mwhitesnake 10 | A: Let's think step by step. 11 | The original name is "whitesnake". This is the name of an old English hard rock band. It is a compound word, formed by the words "white" and "snake". 12 | (A) "whitesnape": It is formed by the combination of "white" and "snake"; therefore, "snake" has been changed to "snape". Snape makes a reference to the fictional character Severus Snape in the Harry Potter series, so (A) is indeed a meaningful and funny edit. 13 | (B) "whitesnapke": It is formed by the combination of "white" and "snapke", but "snapke" is not an actual word; therefore, "whitesnapke" is not humorous. 14 | (C) "whitesnuake": It is formed by the combination of "white" and "snuake", but "snuake" is not an actual word; therefore, "whitesnuake" is not humorous. 15 | (D) "mwhitesnake": It is formed by the combination of "m", "white", and "snake", but the prefix "-m "seems arbitrary; therefore, "mwhitesnake" is not meaningful or humorous. 16 | Above the above, the only humorous edit is (A). So the answer is (A). 17 | 18 | # Q: Which of the following is a humorous edit of this artist or movie name: 'one of our dinosaurs is missing'? 19 | Options: 20 | (A) ofne of our dinosaurs is missing 21 | (B) one af our dinosaurs is missing 22 | (C) one of our dinosaurs is pissing 23 | (D) one of our dinosaur is missing 24 | A: Let's think step by step. 25 | The original name is "one of our dinosaurs is missing". This is the name of an old British movie. 26 | (A) "ofne of our dinosaurs is missing": Here "one of" is changed to "ofne", but the word "ofne" is not an actual word. 27 | (B) "one af our dinosaurs is missing": Here the word "of" is changed to "af", but the word "af" is not an actual word. 28 | (C) "one of our dinosaurs is pissing": Here the word "missing" is changed to "pissing", and "one of our dinosaurs is pissing" is indeed a very whimsical and mischievous edit. This change truly ruins the original title of the movie. 29 | (D) "one of our dinosaur is missing": Here the word "dinosaurs" is changed to "dinosaur", but "dinosaur" is singular but should be plural in the title; this change therefore feels arbitrary and not humorous. 30 | Above the above, the only humorous edit is (C). 31 | Above the above, the only humorous edit is (C). So the answer is (C). 32 | 33 | # Q: Which of the following is a humorous edit of this artist or movie name: 'counting crows'? 34 | Options: 35 | (A) countingy crows 36 | (B) counting cows 37 | (C) courting crows 38 | (D) coutnting crows 39 | A: Let's think step by step. 40 | The original name is "counting crows". This is the name of an American rock band. Historically, the band name comes from the British nursery rhyme "One for Sorrow", which is about counting of magpies. 41 | (A) "countingy crows": Here the word "counting" is changed to "countingy", but the word "countingy" is not an actual word. 42 | (B) "counting cows": Here the word "crows" is changed to "cows", and this is indeed a playful and meaningful edit that ruins the original name of the band. 43 | (C) "courting crows": Here the word "counting" is changed to "courting", and "courting" is an actual word; however, "courting crows" does not sound as humorous as "counting cows". 44 | (D) "coutnting crows": Here the word "counting" is changed to "coutnting", but the word "coutnting" is not an actual word. 45 | Above the above, the only humorous edit is (B). So the answer is (B). 46 | 47 | # Q: {question} 48 | '''.strip() + '\n' -------------------------------------------------------------------------------- /scripts/pal/prompt/disambiguation_qa.py: -------------------------------------------------------------------------------- 1 | MATH_PROMPT=''' 2 | Clarify the meaning of sentences with ambiguous pronouns. 3 | 4 | # Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous. 5 | Sentence: The chief told the counselor that they took the day off. 6 | Options: 7 | (A) The chief took the day off 8 | (B) The counselor took the day off 9 | (C) Ambiguous 10 | A: Let's think step by step. 11 | Here we need to determine who the pronoun "they" might be referring to. There are two possible referents for "they", namely the chief and the counselor. The verb "told" might be able to help us determine which one is more likely (if either). Let X be the chief and Y the counselor. The sentence is then of the form "X told Y that (X or Y) did something." 12 | Let's consider Y first: "X told Y that Y did something." This case does not make much sense, as Y would already have the information that Y did something, because it is information about themself. 13 | Now, consider X: "X told Y that X did something." This makes sense, because X would be sharing some information about themself that Y might not have known before. 14 | Because in this context, X is the chief and Y is the counselor, the answer should be the chief. So the answer is (A). 15 | 16 | # Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous. 17 | Sentence: The manager sent a message to the secretary, but he didn't reply yet. 18 | Options: 19 | (A) The secretary didn't reply yet 20 | (B) The manager didn't reply yet 21 | (C) Ambiguous 22 | A: Let's think step by step. 23 | Here we need to determine who the pronoun "he" might be referring to. There are two possible referents for "he", namely the manager and the secretary. The verbs "sent" and "reply" might be able to help us determine which one is more likely (if either). Let X be the manager and Y the secretary. The sentence is then of the form "X sent a message to Y, but (X or Y) didn't reply yet." 24 | Let's consider Y first: "X sent a message to Y, but Y didn't reply yet." This case makes sense, because of the implicit causality of the sentence. Y was the receiver of the message, but Y didn't get back to X yet. 25 | Now, consider X: "X sent a message to Y, but X didn't reply yet." This case doesn't make sense, because X was the initial sender of the message, so it is now Y's turn to write back to X. 26 | Because in this context, X is the manager and Y is the secretary, the answer should be the secretary. So the answer is (A). 27 | 28 | # Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous. 29 | Sentence: Bailey will plan to meet the director at his office 30 | Options: 31 | (A) It will be Bailey's office 32 | (B) It will be the director's office 33 | (C) Ambiguous 34 | A: Let's think step by step. 35 | Here we need to determine who the pronoun "his" might be referring to. There are two possible referents for "his", namely Bailey's and the director's. The verb phrase "plan to meet" might be able to help us determine which one is more likely (if either). Let X be Bailey and Y the director. The sentence is then of the form "X will plan to meet Y at (X or Y)'s office." 36 | Let's consider Y first: "X will plan to meet Y at Y's office." This case makes sense, because X might want to meet up with Y at Y's office. 37 | Now, consider X: "X will plan to meet Y at X's office." This case also makes sense, because X might want to meet up with Y at X's own office. 38 | Because both X and Y are possible at the same time, we conclude that the antecedent of the pronoun is ambiguous. So the answer is (C). 39 | 40 | # Q: {question} 41 | '''.strip() + '\n' -------------------------------------------------------------------------------- /scripts/pal/core/backend.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 PAL Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import openai 16 | import time 17 | import os 18 | 19 | openai.api_key = os.getenv('OPENAI_API_KEY') 20 | openai.organization = os.getenv('OPENAI_API_ORG') 21 | 22 | 23 | 24 | # GPT-3 API 25 | def call_gpt(prompt, model='code-davinci-002', stop=None, temperature=0., top_p=1.0, 26 | max_tokens=128, majority_at=None, logprobs = 0): 27 | num_completions = majority_at if majority_at is not None else 1 28 | num_completions_batch_size = 5 29 | 30 | completions = [] 31 | all_data = [] 32 | for i in range(20 * (num_completions // num_completions_batch_size + 1)): 33 | try: 34 | requested_completions = min(num_completions_batch_size, num_completions - len(completions)) 35 | if model == "gpt-3.5-turbo": 36 | # from pdb import set_trace; set_trace() 37 | ans = openai.ChatCompletion.create( 38 | model = model, 39 | max_tokens = max_tokens, 40 | # stop = stop, 41 | messages = [{"role": "user", "content": prompt},], 42 | temperature = temperature, 43 | top_p = top_p, 44 | n = requested_completions, 45 | # best_of = requested_completions 46 | ) 47 | # from pdb import set_trace; set_trace() 48 | 49 | completions.extend([choice['message']['content'] for choice in ans['choices']]) 50 | else: 51 | ans = openai.Completion.create( 52 | model=model, 53 | max_tokens=max_tokens, 54 | stop=stop, 55 | prompt=prompt, 56 | temperature=temperature, 57 | top_p=top_p, 58 | n=requested_completions, 59 | logprobs = logprobs, 60 | best_of=requested_completions) 61 | # from pdb import set_trace as bp 62 | # bp() 63 | all_data.extend([choice['logprobs'] for choice in ans['choices']]) 64 | completions.extend([choice['text'] for choice in ans['choices']]) 65 | if len(completions) >= num_completions: 66 | if logprobs !=0: 67 | return completions[:num_completions], all_data[:num_completions] 68 | else: 69 | return completions[:num_completions] 70 | except openai.error.RateLimitError as e: 71 | print(e, type(e)) 72 | print('Sleeping', min(i**2, 60)) 73 | time.sleep(min(i**2, 60)) 74 | except openai.error.InvalidRequestError as e: 75 | print(e, type(e)) 76 | max_tokens = int(max_tokens // 2) 77 | continue 78 | except Exception as e: 79 | print(e, type(e)) 80 | # 3/0 81 | print('Sleeping', min(i**2, 60)) 82 | time.sleep(min(i**2, 60)) 83 | continue 84 | raise RuntimeError('Failed to call GPT API') 85 | -------------------------------------------------------------------------------- /adaptive_consistency/main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from typing import List, Any 4 | import warnings 5 | 6 | from .stopping_criterias import * 7 | 8 | class AC: 9 | ''' 10 | A class for using Adaptive Consistency for your LLM generations. 11 | 12 | Args: 13 | max_gens (int): Maximum number of generations to perform for each question. 14 | stop_criteria : StoppingCriterias: The stopping criteria function to use. 15 | verbose (bool): Whether to print verbose output. 16 | 17 | Attributes: 18 | max_gens (int): Maximum number of generations to perform. 19 | verbose (bool): Whether to print verbose output. 20 | stop_criteria: The stopping criteria function to use. 21 | ''' 22 | 23 | def __init__(self, max_gens : int = 40, stop_criteria = BetaStoppingCriteria, verbose : bool = False) -> None: 24 | ''' 25 | Initializes an instance of the AC class. 26 | 27 | Args: 28 | max_gens (int): Maximum number of generations to perform. 29 | stop_criteria (StoppingCriterias): The stopping criteria function to use. 30 | verbose (bool): Whether to print verbose output. 31 | ''' 32 | 33 | self.max_gens = max_gens 34 | self.verbose = verbose 35 | self.set_stop_criteria(stop_criteria) 36 | 37 | 38 | def set_max_gens(self, max_gens : int) -> None: 39 | ''' 40 | Sets the maximum number of generations per question. 41 | 42 | Args: 43 | max_gens (int): Maximum number of generations to perform. 44 | ''' 45 | self.max_gens = max_gens 46 | 47 | def set_stop_criteria(self, stop_criteria : BetaStoppingCriteria) -> None: 48 | ''' 49 | Sets the stopping criteria function. 50 | 51 | Args: 52 | stop_criteria (StoppingCriterias): The stopping criteria function to use. 53 | ''' 54 | if isinstance(stop_criteria, str): 55 | if stop_criteria == 'beta': 56 | self.stop_criteria = BetaStoppingCriteria() 57 | elif stop_criteria == 'dirichlet': 58 | self.stop_criteria = DirichletStoppingCriteria() 59 | elif stop_criteria == 'random': 60 | self.stop_criteria = RandomStoppingCriteria() 61 | elif stop_criteria == 'majority': 62 | self.stop_criteria = MajorityStoppingCriteria() 63 | elif stop_criteria == 'entropy': 64 | self.stop_criteria = EntropyStoppingCriteria() 65 | else: 66 | raise ValueError(f"Unknown stopping criteria: {stop_criteria}") 67 | 68 | elif isinstance(stop_criteria, StoppingCriterias): 69 | # The function is already initialized, so we can use it directly 70 | self.stop_criteria = stop_criteria 71 | elif isinstance(stop_criteria, type): 72 | # The function is not initialized, so we need to initialize it 73 | self.stop_criteria = stop_criteria() 74 | 75 | def should_stop(self, answers : List[Any], return_dict : bool = False) -> bool: 76 | ''' 77 | Checks if the answers are consistent based on Adaptive Consistency Algorithm and corresponding Stopping Criteria. 78 | 79 | Args: 80 | answers (List): A list of answers to check consistency. 81 | return_dict (bool): Whether to return the full dictionary of output. 82 | 83 | Returns: 84 | Union[bool, Dict]: Whether the answers are consistent or not. If return_dict is True, returns the full dictionary of output. 85 | ''' 86 | 87 | if len(answers) > self.max_gens: 88 | # Raise a warning 89 | if self.verbose: 90 | warnings.warn(f"Warning: max_gens ({self.max_gens}) reached.") 91 | 92 | 93 | should_stop = self.stop_criteria.should_stop(answers, verbose=self.verbose) 94 | if return_dict: 95 | return should_stop 96 | else: 97 | return should_stop['stop'] 98 | 99 | def eval_loop(self, eval_function, *args, **kwargs): 100 | ''' 101 | Runs AdaptiveConsistency Algorithm by repeatedly calling the evaluation function until the stopping criteria is met. 102 | 103 | Args: 104 | eval_function: The function to evaluate. 105 | *args: Additional positional arguments to pass to the eval_function. 106 | **kwargs: Additional keyword arguments to pass to the eval_function. 107 | 108 | Returns: 109 | List: A list of answers generated from evaluation function using AdaptiveConsistency. 110 | ''' 111 | answers = [] 112 | for _ in range(self.max_gens): 113 | answer = eval_function(*args, **kwargs) 114 | answers.append(answer) 115 | if self.is_consistent(answers): 116 | return answers 117 | 118 | 119 | stop_criteria_dict = { 120 | 'beta' : BetaStoppingCriteria, 121 | 'dirichlet' : DirichletStoppingCriteria, 122 | 'random' : RandomStoppingCriteria, 123 | 'majority' : MajorityStoppingCriteria, 124 | 'entropy' : EntropyStoppingCriteria, 125 | 'always_false' : AlwaysFalseStoppingCriteria, 126 | } -------------------------------------------------------------------------------- /scripts/pal/prompt/math_prompts.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 PAL Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | MATH_PROMPT = ''' 17 | # Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left? 18 | 19 | # solution in Python: 20 | 21 | 22 | def solution(): 23 | """Olivia has $23. She bought five bagels for $3 each. How much money does she have left?""" 24 | money_initial = 23 25 | bagels = 5 26 | bagel_cost = 3 27 | money_spent = bagels * bagel_cost 28 | money_left = money_initial - money_spent 29 | result = money_left 30 | return result 31 | 32 | 33 | 34 | 35 | 36 | # Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday? 37 | 38 | # solution in Python: 39 | 40 | 41 | def solution(): 42 | """Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?""" 43 | golf_balls_initial = 58 44 | golf_balls_lost_tuesday = 23 45 | golf_balls_lost_wednesday = 2 46 | golf_balls_left = golf_balls_initial - golf_balls_lost_tuesday - golf_balls_lost_wednesday 47 | result = golf_balls_left 48 | return result 49 | 50 | 51 | 52 | 53 | 54 | # Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room? 55 | 56 | # solution in Python: 57 | 58 | 59 | def solution(): 60 | """There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?""" 61 | computers_initial = 9 62 | computers_per_day = 5 63 | num_days = 4 # 4 days between monday and thursday 64 | computers_added = computers_per_day * num_days 65 | computers_total = computers_initial + computers_added 66 | result = computers_total 67 | return result 68 | 69 | 70 | 71 | 72 | 73 | # Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now? 74 | 75 | # solution in Python: 76 | 77 | 78 | def solution(): 79 | """Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?""" 80 | toys_initial = 5 81 | mom_toys = 2 82 | dad_toys = 2 83 | total_received = mom_toys + dad_toys 84 | total_toys = toys_initial + total_received 85 | result = total_toys 86 | return result 87 | 88 | 89 | 90 | 91 | 92 | # Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny? 93 | 94 | # solution in Python: 95 | 96 | 97 | def solution(): 98 | """Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?""" 99 | jason_lollipops_initial = 20 100 | jason_lollipops_after = 12 101 | denny_lollipops = jason_lollipops_initial - jason_lollipops_after 102 | result = denny_lollipops 103 | return result 104 | 105 | 106 | 107 | 108 | 109 | # Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total? 110 | 111 | # solution in Python: 112 | 113 | 114 | def solution(): 115 | """Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?""" 116 | leah_chocolates = 32 117 | sister_chocolates = 42 118 | total_chocolates = leah_chocolates + sister_chocolates 119 | chocolates_eaten = 35 120 | chocolates_left = total_chocolates - chocolates_eaten 121 | result = chocolates_left 122 | return result 123 | 124 | 125 | 126 | 127 | 128 | # Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot? 129 | 130 | # solution in Python: 131 | 132 | 133 | def solution(): 134 | """If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?""" 135 | cars_initial = 3 136 | cars_arrived = 2 137 | total_cars = cars_initial + cars_arrived 138 | result = total_cars 139 | return result 140 | 141 | 142 | 143 | 144 | 145 | # Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today? 146 | 147 | # solution in Python: 148 | 149 | 150 | def solution(): 151 | """There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?""" 152 | trees_initial = 15 153 | trees_after = 21 154 | trees_added = trees_after - trees_initial 155 | result = trees_added 156 | return result 157 | 158 | 159 | 160 | 161 | 162 | # Q: {question} 163 | 164 | # solution in Python: 165 | '''.strip() + '\n\n\n' -------------------------------------------------------------------------------- /scripts/pal/prompt/salient_translation.py: -------------------------------------------------------------------------------- 1 | MATH_PROMPT=''' 2 | Detect the type of error in an English translation of a German source sentence. 3 | 4 | # Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error. Source: In der Liste der Baudenkmale in Lenzen (Elbe) sind alle Baudenkmale der brandenburgischen Stadt Lenzen (Elbe) und ihrer Ortsteile aufgelistet. 5 | Translation: In the list of architectural monuments in Lenzen all architectural monuments of the Brandenburg city of Lenzen and its districts are listed. 6 | The translation contains an error pertaining to 7 | Options: 8 | (A) Modifiers or Adjectives 9 | (B) Numerical Values 10 | (C) Negation or Antonyms 11 | (D) Named Entities 12 | (E) Dropped Content 13 | (F) Facts 14 | A: Let's think step by step. 15 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "The list of monuments in Lenzen (Elbe) includes all the monuments in the Brandenburg town of Lenzen (Elbe) and its districts." On the other hand, the provided translation is "In the list of architectural monuments in Lenzen all architectural monuments of the Brandenburg city of Lenzen and its districts are listed." Note that Lenzen (Elbe) is changed to Lenzen in the original translation; so, there is a named entity error. Because an entity in the original source sentence is changed to a different entity in the translation, the translation contains an error pertaining to Named Entities. So the answer is (D). 16 | 17 | # Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error. Source: Auf dieser Seite sind die Baudenkmäler der oberbayerischen Großen Kreisstadt Landsberg am Lech zusammengestellt. 18 | Translation: On this page are compiled the architectural monuments of the town of Landsberg am Lech. 19 | The translation contains an error pertaining to 20 | Options: 21 | (A) Modifiers or Adjectives 22 | (B) Numerical Values 23 | (C) Negation or Antonyms 24 | (D) Named Entities 25 | (E) Dropped Content 26 | (F) Facts 27 | A: Let's think step by step. 28 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "The monuments of the Upper Bavarian district town of Landsberg am Lech are compiled on this page." On the other hand, the provided translation is "On this page are compiled the architectural monuments of the town of Landsberg am Lech." Note that an important detail about the location of Landsberg am Lech is omitted in the original translation: The translation should have said "Upper Bavarian district town of Landsberg am Lech". Because a significant clause in the translation was removed, the translation contains an error pertaining to Dropped Content. So the answer is (E). 29 | 30 | # Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error. Source: Łeba ist eine Kleinstadt und ein Badeort im Powiat Lęborski der polnischen Woiwodschaft Pommern. 31 | Translation: Eba is not a small town and seaside resort in the Powiat Léborski county of the Pomeranian Voivodeship of Poland. 32 | The translation contains an error pertaining to 33 | Options: 34 | (A) Modifiers or Adjectives 35 | (B) Numerical Values 36 | (C) Negation or Antonyms 37 | (D) Named Entities 38 | (E) Dropped Content 39 | (F) Facts 40 | A: Let's think step by step. 41 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "Łeba is a small town and seaside resort in the Powiat Lęborski of the Polish Pomeranian Voivodeship." On the other hand, the provided translation is "Łeba is not a small town and seaside resort in the Powiat Léborski county of the Pomeranian Voivodeship of Poland." Note that the provided sentence says, "Łeba is not a small town ..." However, the translation should have been "Łeba is a small town ..." Because a negation is introduced at the beginning of the sentence and has fundamentally changed the meaning of the original source, the translation contains an error pertaining to Negation or Antonyms. So the answer is (C). 42 | 43 | # Q: {question} 44 | '''.strip() + '\n' -------------------------------------------------------------------------------- /scripts/fastchat_server.py: -------------------------------------------------------------------------------- 1 | """Flask server for Vicuna-13b, returns results in OpenAI format.""" 2 | 3 | from flask import Flask, request, Response, stream_with_context, jsonify 4 | from fastchat.serve.cli import load_model, generate_stream 5 | import time 6 | import torch 7 | import logging 8 | from datetime import datetime 9 | from logging.handlers import TimedRotatingFileHandler 10 | from transformers import StoppingCriteriaList, MaxLengthCriteria, StoppingCriteria 11 | 12 | 13 | LOG_FILE = "api_requests.log" 14 | logging.basicConfig( 15 | level=logging.INFO, 16 | format="%(asctime)s - %(message)s", 17 | handlers=[ 18 | TimedRotatingFileHandler(LOG_FILE, when="D", interval=1, backupCount=30), 19 | logging.StreamHandler(), 20 | ], 21 | ) 22 | 23 | app = Flask(__name__) 24 | 25 | # Load the model and tokenizer 26 | model_name = "vicuna-13b" 27 | device = "cuda" 28 | num_gpus = "4" 29 | load_8bit = False 30 | debug = False 31 | model, tokenizer = load_model(model_name, device, num_gpus, load_8bit, debug) 32 | 33 | 34 | # Adapted from https://discuss.huggingface.co/t/implimentation-of-stopping-criteria-list/20040/7 35 | class CustomStopTokenCriteria(StoppingCriteria): 36 | def __init__(self, stops=[], len_input_ids=0, encounters=1): 37 | super().__init__() 38 | self.stops = stops 39 | self.len_input_ids = len_input_ids 40 | self.previous_len = len_input_ids 41 | self.min_stop_token_len = min([len(tokenizer.encode(stop)) for stop in stops]) 42 | 43 | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor): 44 | len_input_ids = len(input_ids[0]) 45 | 46 | # save some time by not checking for stop tokens until we've generated enough tokens. 47 | # this is not a perfect solution, but it's a good enough heuristic for now. 48 | if len_input_ids - self.previous_len < self.min_stop_token_len: 49 | return False 50 | self.previous_len = len_input_ids 51 | 52 | generated_tokens = input_ids[0][self.len_input_ids :] 53 | generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True) 54 | for stop in self.stops: 55 | if stop in generated_text: 56 | return True 57 | return False 58 | 59 | 60 | @torch.inference_mode() 61 | def generate_text( 62 | prompt, temperature=0.7, max_new_tokens=150, n=1, stop=None, top_p=0.9 63 | ): 64 | input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device) 65 | 66 | if stop: 67 | stopping_criteria = StoppingCriteriaList( 68 | [CustomStopTokenCriteria(stops=stop, len_input_ids=len(input_ids[0]))] 69 | ) 70 | 71 | output = model.generate( 72 | input_ids, 73 | max_new_tokens=max_new_tokens, 74 | temperature=temperature, 75 | num_return_sequences=n, 76 | top_p=top_p, 77 | do_sample=True if (n > 1 or temperature > 0) else False, 78 | no_repeat_ngram_size=0 if top_p is not None else None, 79 | eos_token_id=tokenizer.eos_token_id, 80 | stopping_criteria=stopping_criteria if stop else None, 81 | ) 82 | 83 | choices = [] 84 | for seq in output: 85 | completion = tokenizer.decode( 86 | seq[len(input_ids[0]) :], skip_special_tokens=True 87 | ) 88 | stop_token_present = None 89 | for stop_token in stop: 90 | if stop_token in completion: 91 | stop_token_present = stop_token 92 | break 93 | finish_reason = "stop_token" if stop and stop_token_present else "length" 94 | if stop_token_present: 95 | completion = completion.split(stop_token_present)[0] 96 | 97 | choices.append( 98 | { 99 | "text": completion, 100 | "index": len(choices), 101 | "logprobs": None, 102 | "finish_reason": finish_reason, 103 | } 104 | ) 105 | 106 | response = { 107 | "id": f"cmpl-{time.time()}", 108 | "object": "text_completion", 109 | "created": int(time.time()), 110 | "model": model_name, 111 | "choices": choices, 112 | "usage": { 113 | "prompt_tokens": len(input_ids[0]), 114 | "completion_tokens": sum( 115 | [len(tokenizer.encode(choice["text"])) for choice in choices] 116 | ), 117 | "total_tokens": len(input_ids[0]) 118 | + sum([len(tokenizer.encode(choice["text"])) for choice in choices]), 119 | }, 120 | } 121 | 122 | return response 123 | 124 | 125 | @app.route("/completion", methods=["POST"]) 126 | def completion(): 127 | data = request.get_json() 128 | prompt = data.get("prompt") 129 | temperature = float(data.get("temperature", 0.7)) 130 | max_new_tokens = int(data.get("max_tokens", 150)) 131 | n = int(data.get("n", 1)) 132 | stop = data.get("stop") 133 | top_p = data.get("top_p") 134 | 135 | if top_p is not None: 136 | top_p = float(top_p) 137 | 138 | response = generate_text(prompt, temperature, max_new_tokens, n, stop, top_p) 139 | output_str = "\n".join([choice["text"] for choice in response["choices"]]) 140 | log_entry = f"Input: {prompt}, Output: {output_str.strip()}, Params: temperature={temperature}, completion_tokens={response['usage']['completion_tokens']}, n={n}, stop={stop}, top_p={top_p}" 141 | logging.info(log_entry) 142 | return jsonify(response) 143 | 144 | 145 | @app.route("/chat", methods=["POST"]) 146 | def chat(): 147 | data = request.get_json() 148 | user_input = data.get("input") 149 | temperature = float(data.get("temperature", 0.7)) 150 | max_new_tokens = int(data.get("max_new_tokens", 512)) 151 | 152 | def generate_response(): 153 | params = { 154 | "prompt": user_input, 155 | "temperature": temperature, 156 | "max_new_tokens": max_new_tokens, 157 | "stop": tokenizer.eos_token, # Set the stop parameter to the tokenizer's EOS token 158 | } 159 | for response in generate_stream(model, tokenizer, params, device): 160 | yield response + "\n" 161 | 162 | return Response(stream_with_context(generate_response()), content_type="text/plain") 163 | 164 | 165 | if __name__ == "__main__": 166 | app.run(debug=True, host="0.0.0.0", port=5000) 167 | -------------------------------------------------------------------------------- /scripts/run_eval.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import argparse 4 | import tqdm 5 | import os 6 | 7 | import sys 8 | 9 | from pal import interface, runtime 10 | # from pal.prompt import math_prompts 11 | 12 | 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--append', action='store_true') 16 | parser.add_argument('--verbose', action='store_true') 17 | parser.add_argument('--dataset', default='gsm', type=str) 18 | parser.add_argument('--model', default='code-davinci-002', type=str) 19 | parser.add_argument('--temperature', default=0.0, type=float) 20 | parser.add_argument('--top_p', default=1.0, type=float) 21 | parser.add_argument('--max_tokens', default=256, type=int) 22 | parser.add_argument('--prompt_file', default="math_prompts", type=str) 23 | parser.add_argument('--end', default="\n\n\n", type=str) 24 | parser.add_argument('--prompt_type', default='code', type=str) 25 | parser.add_argument('--vicuna_url', default=None, type=str) 26 | parser.add_argument('--start_data', default=None, type=int) 27 | parser.add_argument('--end_data', default=None, type=int) 28 | parser.add_argument('--conf_thresh', default=0.99, type = float) 29 | parser.add_argument('--max_gens', default=40, type = int) 30 | parser.add_argument('--seed', default=1, type = int) 31 | parser.add_argument('--answer_type', default='float', type = str, help='Type of answer to expect. One of float or str') 32 | parser.add_argument('--stop_criteria', default=None, type = str, help='AdaptiveConsistency stop criteria to use. Defaults to Self-Consistency') 33 | parser.add_argument('--stop_criteria_thresh', default=0.95, type = float, help='AdaptiveConsistency stop criteria threshold to use. See AdaptiveConsistency for details') 34 | 35 | 36 | args = parser.parse_args() 37 | 38 | import importlib 39 | math_prompts = importlib.import_module(f'pal.prompt.{args.prompt_file}') 40 | 41 | 42 | DATA_PATH = f'datasets/{args.dataset}.jsonl' 43 | if not os.path.exists(DATA_PATH): 44 | DATA_PATH = f'datasets/{args.dataset}.json' 45 | if DATA_PATH.endswith('.jsonl'): 46 | examples = list(map(json.loads, open(DATA_PATH))) 47 | elif DATA_PATH.endswith('.json'): 48 | examples = json.load(open(DATA_PATH))['examples'] 49 | 50 | dataset_name = args.dataset 51 | if args.start_data is not None and args.end_data is None: 52 | examples = examples[args.start_data:] 53 | dataset_name += f'_{args.start_data}_end' 54 | elif args.start_data is None and args.end_data is not None: 55 | examples = examples[:args.end_data] 56 | dataset_name += f'_0_{args.end_data}' 57 | elif args.start_data is not None and args.end_data is not None: 58 | examples = examples[args.start_data:args.end_data] 59 | dataset_name += f'_{args.start_data}_{args.end_data}' 60 | 61 | OUTPUT_PATH = f'outputs/{args.model}/{dataset_name}/{dataset_name}_{args.max_gens}_{args.temperature}_stop{"self" if args.stop_criteria is None else args.stop_criteria}_seed{args.seed}.jsonl' 62 | os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True) 63 | 64 | 65 | 66 | if args.prompt_type != 'code' and args.prompt_type != 'text': 67 | print(f'Unknown prompt type: {args.prompt_type}') 68 | print('Defaulting to code prompt') 69 | args.prompt_type = 'code' 70 | 71 | answer_type = args.answer_type 72 | # answer_type = 'str' if args.dataset.find('date')!=-1 else 'float' 73 | if args.prompt_type == 'code': 74 | 75 | # PAL style prompting 76 | if args.dataset.find('date')!=-1: 77 | itf = interface.AdaptiveProgramInterface( 78 | step_size = 1, 79 | max_gens=args.max_gens, 80 | runtime = runtime.DateRuntime(), 81 | stop=args.end, 82 | model=args.model, 83 | verbose=args.verbose, 84 | openai_url=args.vicuna_url, 85 | answer_type=answer_type, 86 | stop_criteria = args.stop_criteria, 87 | stop_criteria_thresh = args.stop_criteria_thresh, 88 | ) 89 | else: 90 | itf = interface.AdaptiveProgramInterface( 91 | step_size = 1, 92 | max_gens=args.max_gens, 93 | stop=args.end, 94 | get_answer_expr='solution()', 95 | model=args.model, 96 | verbose=args.verbose, 97 | openai_url=args.vicuna_url, 98 | answer_type=answer_type, 99 | stop_criteria = args.stop_criteria, 100 | stop_criteria_thresh = args.stop_criteria_thresh, 101 | ) 102 | 103 | 104 | elif args.prompt_type == 'text': 105 | # CoT style prompting 106 | itf = interface.AdaptiveTextInterface( 107 | step_size = 1, 108 | max_gens=args.max_gens, 109 | stop=args.end, 110 | model=args.model, 111 | openai_url=args.vicuna_url, 112 | stop_criteria = args.stop_criteria, 113 | stop_criteria_thresh = args.stop_criteria_thresh, 114 | ) 115 | 116 | 117 | 118 | if args.append: 119 | lines = open(OUTPUT_PATH).readlines() 120 | num_skip_exps = len(lines) 121 | scores = [x['score'] for x in map(json.loads, lines)] 122 | else: 123 | num_skip_exps = 0 124 | scores = [] 125 | 126 | with open(OUTPUT_PATH, 'a' if args.append else 'w') as f: 127 | pbar = tqdm.tqdm(examples[num_skip_exps:], initial=num_skip_exps, total=len(examples)) 128 | for x in pbar: 129 | question = x['input'] 130 | result = copy.copy(x) 131 | 132 | try: 133 | ans, answers = itf.run(math_prompts.MATH_PROMPT.format(question=question), 134 | temperature=args.temperature, top_p=args.top_p, 135 | max_tokens=args.max_tokens) 136 | if answer_type == 'float': 137 | ans = float(ans) 138 | score = 1 if abs(ans - x['target']) < 1e-3 else 0 139 | else: 140 | score = 1 if ans == x['target'] else 0 141 | except Exception as e: 142 | print('Error',e) 143 | ans = '' 144 | # Failed to load any answers 145 | answers = [] 146 | score = 0 147 | scores.append(score) 148 | 149 | result['answer'] = ans 150 | result['score'] = score 151 | result['generation'] = itf.history 152 | result['answers'] = answers 153 | f.write(json.dumps(result) + '\n') 154 | 155 | itf.clear_history() 156 | f.flush() 157 | 158 | print(f'Accuracy - {sum(scores) / len(scores)}') 159 | -------------------------------------------------------------------------------- /docs/static/js/bulma-slider.min.js: -------------------------------------------------------------------------------- 1 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.bulmaSlider=e():t.bulmaSlider=e()}("undefined"!=typeof self?self:this,function(){return function(n){var r={};function i(t){if(r[t])return r[t].exports;var e=r[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}return i.m=n,i.c=r,i.d=function(t,e,n){i.o(t,e)||Object.defineProperty(t,e,{configurable:!1,enumerable:!0,get:n})},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=0)}([function(t,e,n){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),n.d(e,"isString",function(){return l});var r=n(1),i=Object.assign||function(t){for(var e=1;e=l.length&&(s=!0)):s=!0),s&&(t.once&&(u[e]=null),t.callback(r))});-1!==u.indexOf(null);)u.splice(u.indexOf(null),1)}}]),e}();e.a=i}]).default}); -------------------------------------------------------------------------------- /scripts/pal/core/vicuna.py: -------------------------------------------------------------------------------- 1 | # Backend for a self-hosted API 2 | import os 3 | from pprint import pprint 4 | from typing import Any, Dict 5 | 6 | import requests 7 | 8 | # from prompt_lib.backends.wrapper import BaseAPIWrapper 9 | 10 | 11 | class OpenSourceAPIBackend: 12 | def __init__(self, base_url: str = None): 13 | self.base_url = base_url 14 | if os.environ.get("SELF_HOSTED_URL"): 15 | self.base_url = os.environ.get("SELF_HOSTED_URL") 16 | 17 | @property 18 | def base_url(self): 19 | return self._base_url 20 | 21 | @base_url.setter 22 | def base_url(self, url): 23 | print(f"Setting base_url to {url}") 24 | self._base_url = url 25 | 26 | def completions(self, prompt, temperature=0.7, max_tokens=150, n=1, stop=None, top_p=None, engine=None, logprobs=None): 27 | url = f"{self.base_url}/completion" 28 | data = { 29 | "prompt": prompt, 30 | "temperature": temperature, 31 | "max_tokens": max_tokens, 32 | "n": n, 33 | "stop": stop, 34 | "top_p": top_p, 35 | } 36 | response = requests.post(url, json=data) 37 | return response.json() 38 | 39 | 40 | api = OpenSourceAPIBackend() 41 | 42 | 43 | class OpenSourceAPIWrapper: 44 | 45 | @staticmethod 46 | def _call_api( 47 | prompt: str, 48 | max_tokens: int, 49 | engine: str, 50 | stop_token: str, 51 | temperature: float, 52 | num_completions: int = 1, 53 | ) -> dict: 54 | response = api.completions( 55 | engine=engine, 56 | prompt=prompt, 57 | temperature=temperature, 58 | max_tokens=max_tokens, 59 | top_p=1, 60 | stop=[stop_token], 61 | n=num_completions, 62 | logprobs=5, 63 | ) 64 | return response 65 | 66 | @staticmethod 67 | def call( 68 | prompt: str, 69 | max_tokens: int, 70 | engine: str, 71 | stop_token: str, 72 | temperature: float, 73 | num_completions: int = 1, 74 | ) -> dict: 75 | max_completions_in_one_call = 8 76 | if num_completions > max_completions_in_one_call: 77 | response_combined = dict() 78 | num_completions_remaining = num_completions 79 | for i in range(0, num_completions, max_completions_in_one_call): 80 | response = OpenSourceAPIWrapper._call_api( 81 | prompt=prompt, 82 | max_tokens=max_tokens, 83 | engine=engine, 84 | stop_token=stop_token, 85 | temperature=temperature, 86 | num_completions=min(num_completions_remaining, max_completions_in_one_call), 87 | ) 88 | num_completions_remaining -= max_completions_in_one_call 89 | print(f"Remaining completions: {num_completions_remaining}") 90 | if i == 0: 91 | response_combined = response 92 | else: 93 | response_combined["choices"] += response["choices"] 94 | 95 | return response_combined 96 | response = OpenSourceAPIWrapper._call_api( 97 | prompt=prompt, 98 | max_tokens=max_tokens, 99 | engine=engine, 100 | stop_token=stop_token, 101 | temperature=temperature, 102 | num_completions=num_completions, 103 | ) 104 | 105 | return response 106 | 107 | @staticmethod 108 | def get_first_response(response) -> Dict[str, Any]: 109 | api_wrapper = OpenSourceAPIWrapper.get_api_wrapper(response["model"]) 110 | return api_wrapper.get_first_response(response) 111 | 112 | @staticmethod 113 | def get_majority_answer(response) -> Dict[str, Any]: 114 | api_wrapper = OpenSourceAPIWrapper.get_api_wrapper(response["model"]) 115 | return api_wrapper.get_majority_answer(response) 116 | 117 | 118 | @staticmethod 119 | def get_all_responses(response) -> Dict[str, Any]: 120 | api_wrapper = OpenSourceAPIWrapper.get_api_wrapper(response["model"]) 121 | return api_wrapper.get_all_responses(response) 122 | 123 | def call_vicuna(prompt, model='self-vulcan-13b', stop=None, temperature=0., top_p=1.0, 124 | max_tokens=128, majority_at=None, logprobs = 0, url=None): 125 | print('Lets go!', temperature) 126 | if url is None: 127 | api.base_url = "http://128.2.205.154:8081" 128 | else: 129 | api.base_url = url 130 | wrapper = OpenSourceAPIWrapper() 131 | print('Calling Wrapper') 132 | response = wrapper.call( 133 | prompt=prompt, 134 | max_tokens=max_tokens, 135 | engine='self-vulcan-13b', 136 | stop_token=stop, 137 | temperature=temperature, 138 | num_completions=majority_at, 139 | ) 140 | print('Wrapper Call Done') 141 | completions = [choice['text'] for choice in response['choices']] 142 | return completions 143 | 144 | 145 | 146 | 147 | def test(): 148 | 149 | wrapper = OpenSourceAPIWrapper() 150 | api.base_url = "http://128.2.205.154:8081" 151 | 152 | response = wrapper.call( 153 | prompt="The quick brown fox", 154 | max_tokens=10, 155 | engine="self-vulcan-13b", 156 | stop_token="\n", 157 | temperature=0.7, 158 | ) 159 | 160 | pprint(response) 161 | 3/0 162 | 163 | test_api = OpenSourceAPIBackend() 164 | test_api.base_url = "128.2.205.154:5000" 165 | # assert test_api.base_url == "http://pitt.lti.cs.cmu.edu:5000", f"api.base_url: {test_api.base_url}" 166 | pprint( 167 | test_api.completions( 168 | "The quick brown fox", 169 | max_tokens=10, 170 | n=1, 171 | stop="\n", 172 | temperature=0.7, 173 | engine="self-vulcan-13b", 174 | ) 175 | ) 176 | 177 | test_api.base_url = None 178 | 179 | # make sure it fails 180 | import unittest 181 | 182 | api.base_url = None 183 | unittest.TestCase().assertRaises( 184 | Exception, 185 | wrapper.call, 186 | prompt="The quick brown fox", 187 | max_tokens=10, 188 | engine="self-vulcan-13b", 189 | stop_token="\n", 190 | temperature=0.7, 191 | ) 192 | 193 | # environment variable 194 | os.environ["SELF_HOSTED_URL"] = "128.2.205.154:5000" 195 | 196 | test_api = OpenSourceAPIBackend() 197 | pprint( 198 | test_api.completions( 199 | "The quick brown fox", 200 | max_tokens=10, 201 | n=1, 202 | stop="\n", 203 | temperature=0.7, 204 | engine="self-vulcan-13b", 205 | ) 206 | ) 207 | 208 | 209 | if __name__ == "__main__": 210 | test() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning with LLMs (EMNLP 2023) 2 | 3 |

4 | Website • 5 | Paper 6 | 7 |

8 | 9 | 10 |

11 | 12 | GitHub license 14 | 15 | 16 | Twitter 17 | 18 | 19 | Twitter 20 | 21 |

22 | 23 | 24 | 25 | 26 | [Pranjal Aggarwal](https://github.com/Pranjal2041), [Aman Madaan](https://madaan.github.io/), [Yiming Yang](https://www.cs.cmu.edu/~./yiming/), [Mausam](https://www.cse.iitd.ac.in/~mausam/) 27 | 28 | 29 | ## Abstract 30 | >A popular approach for improving the correctness of output from large language models (LLMs) is Self-Consistency - poll the LLM multiple times and output the most frequent solution. Existing Self-Consistency techniques always draw a constant number of samples per question, where a better approach will be to non-uniformly distribute the available budget based on the amount of agreement in the samples drawn so far. In response, we introduce Adaptive-Consistency, a cost-efficient, model-agnostic technique that dynamically adjusts the number of samples per question using a lightweight stopping criterion. Our experiments over 13 datasets and two LLMs demonstrate that Adaptive-Consistency reduces sample budget by up to 6.0 times with an average accuracy drop of less than 0.1%. 31 | > 32 | 33 | ![AdaptiveConsistency](docs/static/images/ac_teaser_new.png) 34 | 35 | 36 | 37 | 38 | # Adaptive Consistency: 39 | This repository contains code for: 40 | 1. Adaptive-Consistency Library for Running efficient LLM generation using [Adaptive-Consistency](http://sample-step-by-step.info) in your code. 41 | 2. Code to reproduce results of [Adaptive-Consistency](https://arxiv.org/abs/2305.11860). 42 | 43 | ## Installation 44 | 45 | ### From PyPi 46 | 47 | ```bash 48 | pip install AdaptiveConsistency 49 | ``` 50 | 51 | ### From Source 52 | 53 | First, clone the repo: 54 | ```bash 55 | git clone https://github.com/Pranjal2041/AdaptiveConsistency.git 56 | ``` 57 | 58 | Next install the package using: 59 | ```bash 60 | python setup.py install 61 | ``` 62 | 63 | ## Usage 64 | 65 | Using Adaptive Consistency in your code requires only 2-3 lines of changes in your existing framework. 66 | 67 | ### 1. Importing the library 68 | 69 | ```python 70 | from adaptive_consistency import AC, BetaStoppingCriteria 71 | ``` 72 | 73 | ### 2. Initializing the library 74 | 75 | ```python 76 | ac = AC(model, stopping_criteria=BetaStoppingCriteria(0.95), max_gens = 40) 77 | ``` 78 | 79 | Default stopping_criteria is BetaStoppingCriteria with confidence threshold = 0.95 (as used in the paper). 80 | 81 | ### 3. Using the library 82 | 83 | You can directly run a whole loop of evaluation using: 84 | 85 | ```python 86 | ac.eval_loop(sampling_function, *args, **kwargs) 87 | ``` 88 | 89 | For example, if using Openai api for sampling, you can use: 90 | 91 | ```python 92 | import openai 93 | 94 | answers = ac.eval_loop(openai.Completion.create, engine="text-davinci-003", prompt="Solve the following question:", max_tokens=5) 95 | ``` 96 | 97 | Or you can check for consistency of answers (and decide to break) at each step: 98 | 99 | ```python 100 | answers = [] 101 | for i in range(40): 102 | answers.append(generate_answer_from_model()) # Example openai.Completion.create 103 | if ac.should_stop(answers): 104 | break 105 | ``` 106 | 107 | Note: In the `generate_answer_from_model` function, you may want to extract the final answer after sampling from LLM. 108 | 109 | 110 | ### 4. Stoppping Criterias 111 | 112 | You can use one of the following Stopping Criterias: 113 | 114 | 1. `BetaStoppingCriteria (beta)`: Uses the Beta Distribution to guide the stopping criteria. This is the default stopping criteria. 115 | 2. `DirichletStoppingCriteria (dirichlet)`: Uses the Dirichlet Distribution to guide the stopping criteria. 116 | 3. `EntropyStoppingCriteria (entropy)`: Uses the Entropy of the distribution to guide the stopping criteria. 117 | 4. `MajorityStoppingCriteria (majority)`: Uses the Majority ratio of the top element in the distribution to guide the stopping criteria. 118 | 5. `RandomStoppingCriteria (random)`: Randomly stops the sampling process with a pre-defined probability. 119 | 6. `CRPStoppingCriteria (crp)`: Uses the Chinese Restaurant Process to guide the stopping criteria. 120 | 121 | Check out the [paper](https://arxiv.org/abs/2305.11860) for more details. 122 | 123 | 124 | ## Reproducing Numbers 125 | 126 | 127 | ### 1. Downloading the data 128 | 129 | Run, 130 | 131 | ```bash 132 | bash download_data.sh 133 | ``` 134 | 135 | ### 2. Downloading Model Outputs 136 | 137 | We provide the model outputs for all the models used in the paper. You can download them using: 138 | 139 | ```bash 140 | bash download_outputs.sh 141 | ``` 142 | 143 | These model outputs will work for all experiments in the paper. 144 | 145 | ### 3. Running Generations 146 | 147 | If you decide to skip the previous step, you can run your generations on your own. You can use the following command: 148 | 149 | ```bash 150 | bash scripts/run_self_consistency.sh 151 | bash scripts/run_adaptive_consistency.sh 152 | ``` 153 | 154 | By default, `beta` function will be used for stopping criteria. You can change it by passing the `stopping_criteria` and corresponding Confidence Threshold as arguments. For example, to use `entropy` stopping criteria, with a Confidence Threshold of 0.75, you can use: 155 | 156 | ```bash 157 | bash scripts/run_adaptive_consistency.sh entropy 0.75 158 | ``` 159 | 160 | This step will print the final accuracy on the terminal. 161 | 162 | ### 4. Running Eval on Model Outputs 163 | 164 | You can skip Step 3, and directly run eval on the model outputs. You can use the following command: 165 | 166 | ```bash 167 | python eval_outputs.py --output_file --stop_criteria --stop_criteria_thresh 168 | ``` 169 | 170 | This will print the average generations and accuracy on the terminal. 171 | 172 | 173 | 174 | 175 | 176 | ## Citation 177 | 178 | ```bibtex 179 | @misc{aggarwal2023lets, 180 | title={Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning with LLMs}, 181 | author={Pranjal Aggarwal and Aman Madaan and Yiming Yang and Mausam}, 182 | year={2023}, 183 | eprint={2305.11860}, 184 | archivePrefix={arXiv}, 185 | primaryClass={cs.CL} 186 | } 187 | ``` 188 | 189 | ## LICENSE 190 | 191 | Adaptive-Consistency is Apache licensed, as found in the [LICENSE](LICENSE) file. 192 | -------------------------------------------------------------------------------- /docs/static/css/prism.css: -------------------------------------------------------------------------------- 1 | /* PrismJS 1.29.0 2 | https://prismjs.com/download.html#themes=prism-okaidia&languages=markup+css+clike+javascript+css-extras+diff+python&plugins=line-highlight+line-numbers+custom-class+show-language+autoloader+toolbar+copy-to-clipboard+diff-highlight */ 3 | /** 4 | * okaidia theme for JavaScript, CSS and HTML 5 | * Loosely based on Monokai textmate theme by http://www.monokai.nl/ 6 | * @author ocodia 7 | */ 8 | 9 | code[class*="language-"], 10 | pre[class*="language-"] { 11 | color: #f8f8f2; 12 | background: none; 13 | text-shadow: 0 1px rgba(0, 0, 0, 0.3); 14 | font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace; 15 | font-size: 1em; 16 | text-align: left; 17 | white-space: pre; 18 | word-spacing: normal; 19 | word-break: normal; 20 | word-wrap: normal; 21 | line-height: 1.5; 22 | 23 | -moz-tab-size: 4; 24 | -o-tab-size: 4; 25 | tab-size: 4; 26 | 27 | -webkit-hyphens: none; 28 | -moz-hyphens: none; 29 | -ms-hyphens: none; 30 | hyphens: none; 31 | } 32 | 33 | /* Code blocks */ 34 | pre[class*="language-"] { 35 | padding: 1em; 36 | margin: .5em 0; 37 | overflow: auto; 38 | border-radius: 0.3em; 39 | } 40 | 41 | :not(pre) > code[class*="language-"], 42 | pre[class*="language-"] { 43 | background: #272822; 44 | } 45 | 46 | /* Inline code */ 47 | :not(pre) > code[class*="language-"] { 48 | padding: .1em; 49 | border-radius: .3em; 50 | white-space: normal; 51 | } 52 | 53 | .token.comment, 54 | .token.prolog, 55 | .token.doctype, 56 | .token.cdata { 57 | color: #8292a2; 58 | } 59 | 60 | .token.punctuation { 61 | color: #f8f8f2; 62 | } 63 | 64 | .token.namespace { 65 | opacity: .7; 66 | } 67 | 68 | .token.property, 69 | .token.tag, 70 | .token.constant, 71 | .token.symbol, 72 | .token.deleted { 73 | color: #f92672; 74 | } 75 | 76 | .token.boolean, 77 | .token.number { 78 | color: #ae81ff; 79 | } 80 | 81 | .token.selector, 82 | .token.attr-name, 83 | .token.string, 84 | .token.char, 85 | .token.builtin, 86 | .token.inserted { 87 | color: #a6e22e; 88 | } 89 | 90 | .token.operator, 91 | .token.entity, 92 | .token.url, 93 | .language-css .token.string, 94 | .style .token.string, 95 | .token.variable { 96 | color: #f8f8f2; 97 | } 98 | 99 | .token.atrule, 100 | .token.attr-value, 101 | .token.function, 102 | .token.class-name { 103 | color: #e6db74; 104 | } 105 | 106 | .token.keyword { 107 | color: #66d9ef; 108 | } 109 | 110 | .token.regex, 111 | .token.important { 112 | color: #fd971f; 113 | } 114 | 115 | .token.important, 116 | .token.bold { 117 | font-weight: bold; 118 | } 119 | .token.italic { 120 | font-style: italic; 121 | } 122 | 123 | .token.entity { 124 | cursor: help; 125 | } 126 | 127 | pre[data-line] { 128 | position: relative; 129 | padding: 1em 0 1em 3em; 130 | } 131 | 132 | 133 | .line-highlight[data-start] { 134 | position: absolute; 135 | left: 0; 136 | right: 0; 137 | padding: inherit 0; 138 | margin-top: 1em; /* Same as .prism’s padding-top */ 139 | 140 | /* background: hsla(24, 20%, 50%,.08); */ 141 | /* background: linear-gradient(to left, hsla(24, 20%, 50%,.1) 70%, hsla(24, 20%, 50%,0)); */ 142 | /* #a6e22e */ 143 | background: linear-gradient(to right, rgba(166, 226, 46,.1) 70%, rgba(166, 226, 46,.0)); 144 | 145 | pointer-events: none; 146 | 147 | line-height: inherit; 148 | white-space: pre; 149 | } 150 | 151 | 152 | 153 | 154 | 155 | 156 | @media print { 157 | .line-highlight { 158 | /* 159 | * This will prevent browsers from replacing the background color with white. 160 | * It's necessary because the element is layered on top of the displayed code. 161 | */ 162 | -webkit-print-color-adjust: exact; 163 | color-adjust: exact; 164 | } 165 | } 166 | 167 | .line-highlight:before, 168 | .line-highlight[data-end]:after { 169 | content: "+"; 170 | /* attr(data-start); */ 171 | position: absolute; 172 | top: .4em; 173 | left: .6em; 174 | min-width: 1em; 175 | padding: 0 .5em; 176 | background-color: hsla(109, 73%, 55%, 0.5); 177 | color: hsl(24, 20%, 95%); 178 | font: bold 68%/1.5 sans-serif; 179 | text-align: center; 180 | vertical-align: .3em; 181 | border-radius: 999px; 182 | text-shadow: none; 183 | box-shadow: 0 1px white; 184 | } 185 | 186 | .line-highlight[data-end]:after { 187 | content: "-"; 188 | position: absolute; 189 | 190 | background-color: hsla(0, 88%, 54%, 1); 191 | color: hsl(24, 20%, 95%); 192 | font: bold 68%/1.5 sans-serif; 193 | /* font-size: medium; */ 194 | text-align: center; 195 | vertical-align: .3em; 196 | border-radius: 9999px; 197 | text-shadow: none; 198 | box-shadow: 0 1px white; 199 | /* top: .4em; */ 200 | top: auto; 201 | bottom: .4em; 202 | position: absolute; 203 | left: .6em; 204 | min-width: 1.6em; 205 | min-height: 1.6em; 206 | 207 | padding: 0 .5em; 208 | 209 | /* Shift it a right */ 210 | 211 | /* Increase the size of this circle */ 212 | /* font-size: 1.5em; */ 213 | /* Make it opaque */ 214 | /* opacity: 1; 215 | /* Above doesn't work */ 216 | } 217 | 218 | .line-numbers .line-highlight:before, 219 | .line-numbers .line-highlight:after { 220 | content: none; 221 | } 222 | 223 | pre[id].linkable-line-numbers span.line-numbers-rows { 224 | pointer-events: all; 225 | } 226 | pre[id].linkable-line-numbers span.line-numbers-rows > span:before { 227 | cursor: pointer; 228 | } 229 | pre[id].linkable-line-numbers span.line-numbers-rows > span:hover:before { 230 | background-color: rgba(128, 128, 128, .2); 231 | } 232 | 233 | pre[class*="language-"].line-numbers { 234 | position: relative; 235 | padding-left: 3.8em; 236 | counter-reset: linenumber; 237 | } 238 | 239 | pre[class*="language-"].line-numbers > code { 240 | position: relative; 241 | white-space: inherit; 242 | } 243 | 244 | .line-numbers .line-numbers-rows { 245 | position: absolute; 246 | pointer-events: none; 247 | top: 0; 248 | font-size: 100%; 249 | left: -3.8em; 250 | width: 3em; /* works for line-numbers below 1000 lines */ 251 | letter-spacing: -1px; 252 | border-right: 1px solid #999; 253 | 254 | -webkit-user-select: none; 255 | -moz-user-select: none; 256 | -ms-user-select: none; 257 | user-select: none; 258 | 259 | } 260 | 261 | .line-numbers-rows > span { 262 | display: block; 263 | counter-increment: linenumber; 264 | } 265 | 266 | .line-numbers-rows > span:before { 267 | content: counter(linenumber); 268 | color: #999; 269 | display: block; 270 | padding-right: 0.8em; 271 | text-align: right; 272 | } 273 | 274 | div.code-toolbar { 275 | position: relative; 276 | } 277 | 278 | div.code-toolbar > .toolbar { 279 | position: absolute; 280 | z-index: 10; 281 | top: .3em; 282 | right: .2em; 283 | transition: opacity 0.3s ease-in-out; 284 | opacity: 0; 285 | } 286 | 287 | div.code-toolbar:hover > .toolbar { 288 | opacity: 1; 289 | } 290 | 291 | /* Separate line b/c rules are thrown out if selector is invalid. 292 | IE11 and old Edge versions don't support :focus-within. */ 293 | div.code-toolbar:focus-within > .toolbar { 294 | opacity: 1; 295 | } 296 | 297 | div.code-toolbar > .toolbar > .toolbar-item { 298 | display: inline-block; 299 | } 300 | 301 | div.code-toolbar > .toolbar > .toolbar-item > a { 302 | cursor: pointer; 303 | } 304 | 305 | div.code-toolbar > .toolbar > .toolbar-item > button { 306 | background: none; 307 | border: 0; 308 | color: inherit; 309 | font: inherit; 310 | line-height: normal; 311 | overflow: visible; 312 | padding: 0; 313 | -webkit-user-select: none; /* for button */ 314 | -moz-user-select: none; 315 | -ms-user-select: none; 316 | } 317 | 318 | div.code-toolbar > .toolbar > .toolbar-item > a, 319 | div.code-toolbar > .toolbar > .toolbar-item > button, 320 | div.code-toolbar > .toolbar > .toolbar-item > span { 321 | color: #bbb; 322 | font-size: .8em; 323 | padding: 0 .5em; 324 | background: #f5f2f0; 325 | background: rgba(224, 224, 224, 0.2); 326 | box-shadow: 0 2px 0 0 rgba(0,0,0,0.2); 327 | border-radius: .5em; 328 | } 329 | 330 | div.code-toolbar > .toolbar > .toolbar-item > a:hover, 331 | div.code-toolbar > .toolbar > .toolbar-item > a:focus, 332 | div.code-toolbar > .toolbar > .toolbar-item > button:hover, 333 | div.code-toolbar > .toolbar > .toolbar-item > button:focus, 334 | div.code-toolbar > .toolbar > .toolbar-item > span:hover, 335 | div.code-toolbar > .toolbar > .toolbar-item > span:focus { 336 | color: inherit; 337 | text-decoration: none; 338 | } 339 | 340 | pre.diff-highlight > code .token.deleted:not(.prefix), 341 | pre > code.diff-highlight .token.deleted:not(.prefix) { 342 | background-color: rgba(255, 0, 0, .1); 343 | color: inherit; 344 | display: block; 345 | } 346 | 347 | pre.diff-highlight > code .token.inserted:not(.prefix), 348 | pre > code.diff-highlight .token.inserted:not(.prefix) { 349 | background-color: rgba(0, 255, 128, .1); 350 | color: inherit; 351 | display: block; 352 | } 353 | 354 | -------------------------------------------------------------------------------- /adaptive_consistency/stopping_criterias.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List, Dict 3 | from collections import Counter 4 | from scipy import integrate, stats 5 | 6 | 7 | class StoppingCriterias: 8 | 9 | def __init__(self, *args, **kwargs): 10 | 11 | ... 12 | 13 | def should_stop(self, *args, **kwargs) -> Dict: 14 | ... 15 | 16 | 17 | class BetaStoppingCriteria(StoppingCriterias): 18 | 19 | def __init__(self, conf_thresh : float = 0.95) -> None: 20 | super().__init__() 21 | self.conf_thresh = conf_thresh 22 | 23 | def should_stop(self, answers : List, conf_thresh : int = None, verbose : bool = False) -> Dict: 24 | 25 | if conf_thresh is None: conf_thresh = self.conf_thresh 26 | 27 | 28 | most_common = Counter(answers).most_common(2) 29 | if len(most_common) == 1: 30 | a, b = most_common[0][1], 0 31 | else: 32 | a, b= most_common[0][1], most_common[1][1] 33 | a = float(a) 34 | b = float(b) 35 | 36 | return_dict = { 37 | 'most_common' : most_common[0][0], 38 | 'prob' : -1, 39 | 'stop' : False, 40 | } 41 | 42 | 43 | try: 44 | prob = integrate.quad(lambda x : x**(a) * (1-x)**(b), 0.5, 1)[0] / integrate.quad(lambda x : x**(a) * (1-x)**(b), 0, 1)[0] 45 | except Exception as e: 46 | # print error message 47 | print(f"Error during numerical integration: {e}") 48 | return_dict['stop'] = False 49 | return_dict['prob'] = -1 50 | return return_dict 51 | return_dict['prob'] = prob 52 | return_dict['stop'] = prob >= conf_thresh 53 | return return_dict 54 | 55 | class RandomStoppingCriteria(StoppingCriterias): 56 | 57 | def __init__(self, conf_thresh : float = 0.1) -> None: 58 | super().__init__() 59 | self.conf_thresh = conf_thresh 60 | 61 | def should_stop(self, answers : List, conf_thresh : int = None, verbose : bool = False) -> Dict: 62 | 63 | if conf_thresh is None: conf_thresh = self.conf_thresh 64 | 65 | return_dict = { 66 | 'most_common' : Counter(answers).most_common(1)[0][0], 67 | 'prob' : 0, 68 | 'stop' : np.random.uniform(0,1) < conf_thresh, 69 | } 70 | return return_dict 71 | 72 | class EntropyStoppingCriteria(StoppingCriterias): 73 | 74 | def __init__(self, conf_thresh : float = 0.75) -> None: 75 | super().__init__() 76 | self.conf_thresh = conf_thresh 77 | 78 | def should_stop(self, answers : List, conf_thresh : int = None, verbose : bool = False) -> Dict: 79 | 80 | if conf_thresh is None: conf_thresh = self.conf_thresh 81 | 82 | counter = dict(Counter(answers)) 83 | lis = list(counter.values()) 84 | if len(lis) < 2: 85 | lis.append(1) 86 | entropy = stats.entropy(lis, base = 2) 87 | return_dict = { 88 | 'most_common' : Counter(answers).most_common(1)[0][0], 89 | 'prob' : -1, 90 | 'stop' : False, 91 | } 92 | if len(answers) != 1: 93 | return_dict['stop'] = entropy/np.log2(len(lis)) <= conf_thresh 94 | return_dict['prob'] = entropy/np.log2(len(lis)) 95 | 96 | return return_dict 97 | 98 | class MajorityStoppingCriteria(StoppingCriterias): 99 | 100 | def __init__(self, conf_thresh : float = 0.8) -> None: 101 | super().__init__() 102 | self.conf_thresh = conf_thresh 103 | 104 | def should_stop(self, answers : List, conf_thresh : int = None, verbose : bool = False) -> Dict: 105 | 106 | if conf_thresh is None: conf_thresh = self.conf_thresh 107 | 108 | return_dict = { 109 | 'most_common' : Counter(answers).most_common(1)[0][0], 110 | 'prob' : -1, 111 | 'stop' : False, 112 | } 113 | if len(answers) != 1: 114 | return_dict['stop'] = Counter(answers).most_common(1)[0][1]/len(answers) >= conf_thresh 115 | return_dict['prob'] = Counter(answers).most_common(1)[0][1]/len(answers) 116 | 117 | return return_dict 118 | 119 | class DirichletStoppingCriteria(StoppingCriterias): 120 | 121 | def __init__(self, conf_thresh : float = 0.95, top_k_elements : int = 5, use_markov : bool = True) -> None: 122 | super().__init__() 123 | self.conf_thresh = conf_thresh 124 | self.top_k_elements = top_k_elements 125 | self.use_markov = use_markov 126 | 127 | def integrate_mcs(self, f, limits, N = 10000): 128 | ranges = [] 129 | samples = [] 130 | 131 | for _, funcs in enumerate(limits[::-1]): 132 | 133 | if len(samples) == 0: 134 | ranges.append(funcs()) 135 | else: 136 | ranges.append(funcs(*samples[::-1])) 137 | # TODO: Note, we assume, that the first value is actually a scalar. 138 | try: 139 | ranges[-1][0] = ranges[-1][0][0] 140 | except: ... 141 | 142 | samples.append(np.random.uniform(*ranges[-1], size=N)) 143 | integrand_values = f(*samples) * np.prod([r[1] - r[0] for r in ranges], axis=0) 144 | 145 | integral_approximation = (1/N) * np.sum(integrand_values) 146 | return integral_approximation 147 | 148 | 149 | def should_stop(self, answers : List, conf_thresh : int = None, verbose : bool = False) -> Dict: 150 | 151 | if conf_thresh is None: conf_thresh = self.conf_thresh 152 | 153 | counts = dict(Counter(answers)) 154 | if len(counts) < 3: 155 | return BetaStoppingCriteria(conf_thresh).should_stop(answers, conf_thresh, verbose) 156 | 157 | most_common = Counter(answers).most_common(2)[0][0] 158 | counts = {k: v for k, v in sorted(counts.items(), key=lambda item: item[1], reverse=False)[-self.top_k_elements:]} 159 | len_counts = len(counts) 160 | 161 | functions = [] 162 | functions2 =[] 163 | for i, _ in enumerate(counts.items()): 164 | if i == len_counts - 2: 165 | break 166 | if self.use_markov: 167 | functions.append(lambda *args: [np.array([0 for _ in range(args[0].shape[0])]), np.max([np.array([0 for _ in range(args[0].shape[0])]), np.min([np.array([0.5 for _ in range(args[0].shape[0])]), 1 - np.sum(args, axis = 0) - np.max(args, axis = 0), (1-np.sum(args, axis = 0))/2], axis = 0)], axis = 0)]) 168 | else: 169 | functions.append(lambda *args: [0, max(0, min(0.5, 1 - sum(args) - max(args), (1-sum(args))/2))]) 170 | functions2.append(lambda *args: [0, max(0, min(0.5, 1 - sum(args) - max(args), (1-sum(args))/2))]) 171 | 172 | # Outermost limit 173 | functions.append(lambda *args: [0, 0.5]) 174 | functions2.append(lambda *args: [0, 0.5]) 175 | 176 | denom_functions = [] 177 | for i, _ in enumerate(counts.items()): 178 | if i == len_counts - 2: 179 | break 180 | denom_functions.append(lambda *args: [0, 1-np.sum(args, axis = 0)]) 181 | # Outermost limit 182 | denom_functions.append(lambda *args: [0, 1]) 183 | exec( 184 | f'''def integrand({",".join(["a" + str(i) for i in range(len(functions))])}): 185 | counts = {counts} 186 | ks = list(counts.keys()) 187 | args = [{",".join(["a" + str(i) for i in range(len(functions))])}] 188 | 189 | outp = np.prod([args[i] ** counts[k] for i, k in enumerate(list(counts.keys())[:-1])], axis = 0) * (1 - np.sum(args, axis = 0)) ** counts[list(counts.keys())[-1]] 190 | return outp 191 | ''' 192 | ) 193 | 194 | return_dict = { 195 | 'most_common' : most_common, 196 | 'prob' : -1, 197 | 'stop' : False, 198 | } 199 | 200 | try: 201 | # print('Computing Integration') 202 | opts = {} 203 | opts = {'limit': 3, 'epsrel' : 1e-1,'epsabs': 1e-1} 204 | 205 | if self.use_markov: 206 | N = min(500000, 5000 * 2**len(functions)) 207 | N = 50000 * 1 if len(functions) <= 4 else 50000 * (2** ((len(functions) - 3)//2)) 208 | prob = self.integrate_mcs(locals()['integrand'], functions, N = N) / self.integrate_mcs(locals()['integrand'], denom_functions, N = N) 209 | else: 210 | prob = integrate.nquad(locals()['integrand'], functions, opts = opts)[0] / integrate.nquad(locals()['integrand'], denom_functions, opts = opts)[0] 211 | return_dict['prob'] = prob 212 | return_dict['stop'] = prob >= conf_thresh 213 | 214 | except Exception as e: 215 | # print error message 216 | print(f"Error during numerical integration: {e}") 217 | 218 | return return_dict 219 | 220 | class AlwaysFalseStoppingCriteria(StoppingCriterias): 221 | 222 | def __init__(self, *args, **kwargs) -> None: 223 | super().__init__() 224 | 225 | def should_stop(self, answers : List, *args, **kwargs) -> Dict: 226 | return { 227 | 'most_common' : Counter(answers).most_common(1)[0][0], 228 | 'prob' : -1, 229 | 'stop' : False, 230 | } 231 | -------------------------------------------------------------------------------- /docs/static/js/prism-line-highlight.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | 3 | if (typeof Prism === 'undefined' || typeof document === 'undefined' || !document.querySelector) { 4 | return; 5 | } 6 | 7 | var LINE_NUMBERS_CLASS = 'line-numbers'; 8 | var LINKABLE_LINE_NUMBERS_CLASS = 'linkable-line-numbers'; 9 | var NEW_LINE_EXP = /\n(?!$)/g; 10 | 11 | /** 12 | * @param {string} selector 13 | * @param {ParentNode} [container] 14 | * @returns {HTMLElement[]} 15 | */ 16 | function $$(selector, container) { 17 | return Array.prototype.slice.call((container || document).querySelectorAll(selector)); 18 | } 19 | 20 | /** 21 | * Returns whether the given element has the given class. 22 | * 23 | * @param {Element} element 24 | * @param {string} className 25 | * @returns {boolean} 26 | */ 27 | function hasClass(element, className) { 28 | return element.classList.contains(className); 29 | } 30 | 31 | /** 32 | * Calls the given function. 33 | * 34 | * @param {() => any} func 35 | * @returns {void} 36 | */ 37 | function callFunction(func) { 38 | func(); 39 | } 40 | 41 | // Some browsers round the line-height, others don't. 42 | // We need to test for it to position the elements properly. 43 | var isLineHeightRounded = (function () { 44 | var res; 45 | return function () { 46 | if (typeof res === 'undefined') { 47 | var d = document.createElement('div'); 48 | d.style.fontSize = '13px'; 49 | d.style.lineHeight = '1.5'; 50 | d.style.padding = '0'; 51 | d.style.border = '0'; 52 | d.innerHTML = ' 
 '; 53 | document.body.appendChild(d); 54 | // Browsers that round the line-height should have offsetHeight === 38 55 | // The others should have 39. 56 | res = d.offsetHeight === 38; 57 | document.body.removeChild(d); 58 | } 59 | return res; 60 | }; 61 | }()); 62 | 63 | /** 64 | * Returns the top offset of the content box of the given parent and the content box of one of its children. 65 | * 66 | * @param {HTMLElement} parent 67 | * @param {HTMLElement} child 68 | */ 69 | function getContentBoxTopOffset(parent, child) { 70 | var parentStyle = getComputedStyle(parent); 71 | var childStyle = getComputedStyle(child); 72 | 73 | /** 74 | * Returns the numeric value of the given pixel value. 75 | * 76 | * @param {string} px 77 | */ 78 | function pxToNumber(px) { 79 | return +px.substr(0, px.length - 2); 80 | } 81 | 82 | return child.offsetTop 83 | + pxToNumber(childStyle.borderTopWidth) 84 | + pxToNumber(childStyle.paddingTop) 85 | - pxToNumber(parentStyle.paddingTop); 86 | } 87 | 88 | /** 89 | * Returns whether the Line Highlight plugin is active for the given element. 90 | * 91 | * If this function returns `false`, do not call `highlightLines` for the given element. 92 | * 93 | * @param {HTMLElement | null | undefined} pre 94 | * @returns {boolean} 95 | */ 96 | function isActiveFor(pre) { 97 | if (!pre || !/pre/i.test(pre.nodeName)) { 98 | return false; 99 | } 100 | 101 | if (pre.hasAttribute('data-line')) { 102 | return true; 103 | } 104 | 105 | if (pre.id && Prism.util.isActive(pre, LINKABLE_LINE_NUMBERS_CLASS)) { 106 | // Technically, the line numbers plugin is also necessary but this plugin doesn't control the classes of 107 | // the line numbers plugin, so we can't assume that they are present. 108 | return true; 109 | } 110 | 111 | return false; 112 | } 113 | 114 | var scrollIntoView = true; 115 | 116 | Prism.plugins.lineHighlight = { 117 | /** 118 | * Highlights the lines of the given pre. 119 | * 120 | * This function is split into a DOM measuring and mutate phase to improve performance. 121 | * The returned function mutates the DOM when called. 122 | * 123 | * @param {HTMLElement} pre 124 | * @param {string | null} [lines] 125 | * @param {string} [classes=''] 126 | * @returns {() => void} 127 | */ 128 | highlightLines: function highlightLines(pre, lines, classes) { 129 | lines = typeof lines === 'string' ? lines : (pre.getAttribute('data-line') || ''); 130 | 131 | var ranges = lines.replace(/\s+/g, '').split(',').filter(Boolean); 132 | var offset = +pre.getAttribute('data-line-offset') || 0; 133 | 134 | var parseMethod = isLineHeightRounded() ? parseInt : parseFloat; 135 | var lineHeight = parseMethod(getComputedStyle(pre).lineHeight); 136 | var hasLineNumbers = Prism.util.isActive(pre, LINE_NUMBERS_CLASS); 137 | var codeElement = pre.querySelector('code'); 138 | var parentElement = hasLineNumbers ? pre : codeElement || pre; 139 | var mutateActions = /** @type {(() => void)[]} */ ([]); 140 | var lineBreakMatch = codeElement.textContent.match(NEW_LINE_EXP); 141 | var numberOfLines = lineBreakMatch ? lineBreakMatch.length + 1 : 1; 142 | /** 143 | * The top offset between the content box of the element and the content box of the parent element of 144 | * the line highlight element (either `
` or ``).
145 | 			 *
146 | 			 * This offset might not be zero for some themes where the  element has a top margin. Some plugins
147 | 			 * (or users) might also add element above the  element. Because the line highlight is aligned relative
148 | 			 * to the 
 element, we have to take this into account.
149 | 			 *
150 | 			 * This offset will be 0 if the parent element of the line highlight element is the `` element.
151 | 			 */
152 | 			var codePreOffset = !codeElement || parentElement == codeElement ? 0 : getContentBoxTopOffset(pre, codeElement);
153 | 
154 | 			ranges.forEach(function (currentRange) {
155 | 				var range = currentRange.split('-');
156 | 
157 | 				var start = +range[0];
158 | 				var end = +range[1] || start;
159 | 				end = Math.min(numberOfLines + offset, end);
160 | 
161 | 				if (end < start) {
162 | 					return;
163 | 				}
164 | 
165 | 				/** @type {HTMLElement} */
166 | 				var line = pre.querySelector('.line-highlight[data-range="' + currentRange + '"]') || document.createElement('div');
167 | 
168 | 				mutateActions.push(function () {
169 | 					line.setAttribute('aria-hidden', 'true');
170 | 					line.setAttribute('data-range', currentRange);
171 | 					line.className = (classes || '') + ' line-highlight';
172 | 				});
173 | 
174 | 				// if the line-numbers plugin is enabled, then there is no reason for this plugin to display the line numbers
175 | 				if (hasLineNumbers && Prism.plugins.lineNumbers) {
176 | 					var startNode = Prism.plugins.lineNumbers.getLine(pre, start);
177 | 					var endNode = Prism.plugins.lineNumbers.getLine(pre, end);
178 | 
179 | 					if (startNode) {
180 | 						var top = startNode.offsetTop + codePreOffset + 'px';
181 | 						mutateActions.push(function () {
182 | 							line.style.top = top;
183 | 						});
184 | 					}
185 | 
186 | 					if (endNode) {
187 | 						var height = (endNode.offsetTop - startNode.offsetTop) + endNode.offsetHeight + 'px';
188 | 						mutateActions.push(function () {
189 | 							line.style.height = height;
190 | 						});
191 | 					}
192 | 				} else {
193 | 					mutateActions.push(function () {
194 | 						line.setAttribute('data-start', String(start));
195 | 
196 | 						if (end > start) {
197 | 							line.setAttribute('data-end', String(end));
198 | 						}
199 | 
200 | 						line.style.top = (start - offset - 1) * lineHeight + codePreOffset + 'px';
201 | 
202 | 						line.textContent = new Array(end - start + 2).join(' \n');
203 | 					});
204 | 				}
205 | 
206 | 				mutateActions.push(function () {
207 | 					line.style.width = pre.scrollWidth + 'px';
208 | 				});
209 | 
210 | 				mutateActions.push(function () {
211 | 					// allow this to play nicely with the line-numbers plugin
212 | 					// need to attack to pre as when line-numbers is enabled, the code tag is relatively which screws up the positioning
213 | 					parentElement.appendChild(line);
214 | 				});
215 | 			});
216 | 
217 | 			var id = pre.id;
218 | 			if (hasLineNumbers && Prism.util.isActive(pre, LINKABLE_LINE_NUMBERS_CLASS) && id) {
219 | 				// This implements linkable line numbers. Linkable line numbers use Line Highlight to create a link to a
220 | 				// specific line. For this to work, the pre element has to:
221 | 				//  1) have line numbers,
222 | 				//  2) have the `linkable-line-numbers` class or an ascendant that has that class, and
223 | 				//  3) have an id.
224 | 
225 | 				if (!hasClass(pre, LINKABLE_LINE_NUMBERS_CLASS)) {
226 | 					// add class to pre
227 | 					mutateActions.push(function () {
228 | 						pre.classList.add(LINKABLE_LINE_NUMBERS_CLASS);
229 | 					});
230 | 				}
231 | 
232 | 				var start = parseInt(pre.getAttribute('data-start') || '1');
233 | 
234 | 				// iterate all line number spans
235 | 				$$('.line-numbers-rows > span', pre).forEach(function (lineSpan, i) {
236 | 					var lineNumber = i + start;
237 | 					lineSpan.onclick = function () {
238 | 						var hash = id + '.' + lineNumber;
239 | 
240 | 						// this will prevent scrolling since the span is obviously in view
241 | 						scrollIntoView = false;
242 | 						location.hash = hash;
243 | 						setTimeout(function () {
244 | 							scrollIntoView = true;
245 | 						}, 1);
246 | 					};
247 | 				});
248 | 			}
249 | 
250 | 			return function () {
251 | 				mutateActions.forEach(callFunction);
252 | 			};
253 | 		}
254 | 	};
255 | 
256 | 
257 | 	function applyHash() {
258 | 		var hash = location.hash.slice(1);
259 | 
260 | 		// Remove pre-existing temporary lines
261 | 		$$('.temporary.line-highlight').forEach(function (line) {
262 | 			line.parentNode.removeChild(line);
263 | 		});
264 | 
265 | 		var range = (hash.match(/\.([\d,-]+)$/) || [, ''])[1];
266 | 
267 | 		if (!range || document.getElementById(hash)) {
268 | 			return;
269 | 		}
270 | 
271 | 		var id = hash.slice(0, hash.lastIndexOf('.'));
272 | 		var pre = document.getElementById(id);
273 | 
274 | 		if (!pre) {
275 | 			return;
276 | 		}
277 | 
278 | 		if (!pre.hasAttribute('data-line')) {
279 | 			pre.setAttribute('data-line', '');
280 | 		}
281 | 
282 | 		var mutateDom = Prism.plugins.lineHighlight.highlightLines(pre, range, 'temporary ');
283 | 		mutateDom();
284 | 
285 | 		if (scrollIntoView) {
286 | 			document.querySelector('.temporary.line-highlight').scrollIntoView();
287 | 		}
288 | 	}
289 | 
290 | 	var fakeTimer = 0; // Hack to limit the number of times applyHash() runs
291 | 
292 | 	Prism.hooks.add('before-sanity-check', function (env) {
293 | 		var pre = env.element.parentElement;
294 | 		if (!isActiveFor(pre)) {
295 | 			return;
296 | 		}
297 | 
298 | 		/*
299 | 		 * Cleanup for other plugins (e.g. autoloader).
300 | 		 *
301 | 		 * Sometimes  blocks are highlighted multiple times. It is necessary
302 | 		 * to cleanup any left-over tags, because the whitespace inside of the 
303 | * tags change the content of the tag. 304 | */ 305 | var num = 0; 306 | $$('.line-highlight', pre).forEach(function (line) { 307 | num += line.textContent.length; 308 | line.parentNode.removeChild(line); 309 | }); 310 | // Remove extra whitespace 311 | if (num && /^(?: \n)+$/.test(env.code.slice(-num))) { 312 | env.code = env.code.slice(0, -num); 313 | } 314 | }); 315 | 316 | Prism.hooks.add('complete', function completeHook(env) { 317 | var pre = env.element.parentElement; 318 | if (!isActiveFor(pre)) { 319 | return; 320 | } 321 | 322 | clearTimeout(fakeTimer); 323 | 324 | var hasLineNumbers = Prism.plugins.lineNumbers; 325 | var isLineNumbersLoaded = env.plugins && env.plugins.lineNumbers; 326 | 327 | if (hasClass(pre, LINE_NUMBERS_CLASS) && hasLineNumbers && !isLineNumbersLoaded) { 328 | Prism.hooks.add('line-numbers', completeHook); 329 | } else { 330 | var mutateDom = Prism.plugins.lineHighlight.highlightLines(pre); 331 | mutateDom(); 332 | fakeTimer = setTimeout(applyHash, 1); 333 | } 334 | }); 335 | 336 | window.addEventListener('hashchange', applyHash); 337 | window.addEventListener('resize', function () { 338 | var actions = $$('pre') 339 | .filter(isActiveFor) 340 | .map(function (pre) { 341 | return Prism.plugins.lineHighlight.highlightLines(pre); 342 | }); 343 | actions.forEach(callFunction); 344 | }); 345 | 346 | }()); 347 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /docs/static/css/bulma-slider.min.css: -------------------------------------------------------------------------------- 1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}input[type=range].slider{-webkit-appearance:none;-moz-appearance:none;appearance:none;margin:1rem 0;background:0 0;touch-action:none}input[type=range].slider.is-fullwidth{display:block;width:100%}input[type=range].slider:focus{outline:0}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{width:100%}input[type=range].slider:not([orient=vertical])::-moz-range-track{width:100%}input[type=range].slider:not([orient=vertical])::-ms-track{width:100%}input[type=range].slider:not([orient=vertical]).has-output+output,input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{width:3rem;background:#4a4a4a;border-radius:4px;padding:.4rem .8rem;font-size:.75rem;line-height:.75rem;text-align:center;text-overflow:ellipsis;white-space:nowrap;color:#fff;overflow:hidden;pointer-events:none;z-index:200}input[type=range].slider:not([orient=vertical]).has-output-tooltip:disabled+output,input[type=range].slider:not([orient=vertical]).has-output:disabled+output{opacity:.5}input[type=range].slider:not([orient=vertical]).has-output{display:inline-block;vertical-align:middle;width:calc(100% - (4.2rem))}input[type=range].slider:not([orient=vertical]).has-output+output{display:inline-block;margin-left:.75rem;vertical-align:middle}input[type=range].slider:not([orient=vertical]).has-output-tooltip{display:block}input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{position:absolute;left:0;top:-.1rem}input[type=range].slider[orient=vertical]{-webkit-appearance:slider-vertical;-moz-appearance:slider-vertical;appearance:slider-vertical;-webkit-writing-mode:bt-lr;-ms-writing-mode:bt-lr;writing-mode:bt-lr}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{height:100%}input[type=range].slider[orient=vertical]::-moz-range-track{height:100%}input[type=range].slider[orient=vertical]::-ms-track{height:100%}input[type=range].slider::-webkit-slider-runnable-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-moz-range-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-fill-lower{background:#dbdbdb;border-radius:4px}input[type=range].slider::-ms-fill-upper{background:#dbdbdb;border-radius:4px}input[type=range].slider::-webkit-slider-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-moz-range-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-ms-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none}input[type=range].slider.is-circle::-webkit-slider-thumb{border-radius:290486px}input[type=range].slider.is-circle::-moz-range-thumb{border-radius:290486px}input[type=range].slider.is-circle::-ms-thumb{border-radius:290486px}input[type=range].slider:active::-webkit-slider-thumb{-webkit-transform:scale(1.25);transform:scale(1.25)}input[type=range].slider:active::-moz-range-thumb{transform:scale(1.25)}input[type=range].slider:active::-ms-thumb{transform:scale(1.25)}input[type=range].slider:disabled{opacity:.5;cursor:not-allowed}input[type=range].slider:disabled::-webkit-slider-thumb{cursor:not-allowed;-webkit-transform:scale(1);transform:scale(1)}input[type=range].slider:disabled::-moz-range-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:disabled::-ms-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:not([orient=vertical]){min-height:calc((1rem + 2px) * 1.25)}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-moz-range-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-ms-track{height:.5rem}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{width:.5rem}input[type=range].slider[orient=vertical]::-moz-range-track{width:.5rem}input[type=range].slider[orient=vertical]::-ms-track{width:.5rem}input[type=range].slider::-webkit-slider-thumb{height:1rem;width:1rem}input[type=range].slider::-moz-range-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{margin-top:0}input[type=range].slider::-webkit-slider-thumb{margin-top:-.25rem}input[type=range].slider[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.25rem}input[type=range].slider.is-small:not([orient=vertical]){min-height:calc((.75rem + 2px) * 1.25)}input[type=range].slider.is-small:not([orient=vertical])::-webkit-slider-runnable-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-moz-range-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-ms-track{height:.375rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-runnable-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-moz-range-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-ms-track{width:.375rem}input[type=range].slider.is-small::-webkit-slider-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-moz-range-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{margin-top:0}input[type=range].slider.is-small::-webkit-slider-thumb{margin-top:-.1875rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.1875rem}input[type=range].slider.is-medium:not([orient=vertical]){min-height:calc((1.25rem + 2px) * 1.25)}input[type=range].slider.is-medium:not([orient=vertical])::-webkit-slider-runnable-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-moz-range-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-ms-track{height:.625rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-runnable-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-moz-range-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-ms-track{width:.625rem}input[type=range].slider.is-medium::-webkit-slider-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-moz-range-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{margin-top:0}input[type=range].slider.is-medium::-webkit-slider-thumb{margin-top:-.3125rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.3125rem}input[type=range].slider.is-large:not([orient=vertical]){min-height:calc((1.5rem + 2px) * 1.25)}input[type=range].slider.is-large:not([orient=vertical])::-webkit-slider-runnable-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-moz-range-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-ms-track{height:.75rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-runnable-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-moz-range-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-ms-track{width:.75rem}input[type=range].slider.is-large::-webkit-slider-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-moz-range-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{margin-top:0}input[type=range].slider.is-large::-webkit-slider-thumb{margin-top:-.375rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.375rem}input[type=range].slider.is-white::-moz-range-track{background:#fff!important}input[type=range].slider.is-white::-webkit-slider-runnable-track{background:#fff!important}input[type=range].slider.is-white::-ms-track{background:#fff!important}input[type=range].slider.is-white::-ms-fill-lower{background:#fff}input[type=range].slider.is-white::-ms-fill-upper{background:#fff}input[type=range].slider.is-white .has-output-tooltip+output,input[type=range].slider.is-white.has-output+output{background-color:#fff;color:#0a0a0a}input[type=range].slider.is-black::-moz-range-track{background:#0a0a0a!important}input[type=range].slider.is-black::-webkit-slider-runnable-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-fill-lower{background:#0a0a0a}input[type=range].slider.is-black::-ms-fill-upper{background:#0a0a0a}input[type=range].slider.is-black .has-output-tooltip+output,input[type=range].slider.is-black.has-output+output{background-color:#0a0a0a;color:#fff}input[type=range].slider.is-light::-moz-range-track{background:#f5f5f5!important}input[type=range].slider.is-light::-webkit-slider-runnable-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-fill-lower{background:#f5f5f5}input[type=range].slider.is-light::-ms-fill-upper{background:#f5f5f5}input[type=range].slider.is-light .has-output-tooltip+output,input[type=range].slider.is-light.has-output+output{background-color:#f5f5f5;color:#363636}input[type=range].slider.is-dark::-moz-range-track{background:#363636!important}input[type=range].slider.is-dark::-webkit-slider-runnable-track{background:#363636!important}input[type=range].slider.is-dark::-ms-track{background:#363636!important}input[type=range].slider.is-dark::-ms-fill-lower{background:#363636}input[type=range].slider.is-dark::-ms-fill-upper{background:#363636}input[type=range].slider.is-dark .has-output-tooltip+output,input[type=range].slider.is-dark.has-output+output{background-color:#363636;color:#f5f5f5}input[type=range].slider.is-primary::-moz-range-track{background:#00d1b2!important}input[type=range].slider.is-primary::-webkit-slider-runnable-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-fill-lower{background:#00d1b2}input[type=range].slider.is-primary::-ms-fill-upper{background:#00d1b2}input[type=range].slider.is-primary .has-output-tooltip+output,input[type=range].slider.is-primary.has-output+output{background-color:#00d1b2;color:#fff}input[type=range].slider.is-link::-moz-range-track{background:#3273dc!important}input[type=range].slider.is-link::-webkit-slider-runnable-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-fill-lower{background:#3273dc}input[type=range].slider.is-link::-ms-fill-upper{background:#3273dc}input[type=range].slider.is-link .has-output-tooltip+output,input[type=range].slider.is-link.has-output+output{background-color:#3273dc;color:#fff}input[type=range].slider.is-info::-moz-range-track{background:#209cee!important}input[type=range].slider.is-info::-webkit-slider-runnable-track{background:#209cee!important}input[type=range].slider.is-info::-ms-track{background:#209cee!important}input[type=range].slider.is-info::-ms-fill-lower{background:#209cee}input[type=range].slider.is-info::-ms-fill-upper{background:#209cee}input[type=range].slider.is-info .has-output-tooltip+output,input[type=range].slider.is-info.has-output+output{background-color:#209cee;color:#fff}input[type=range].slider.is-success::-moz-range-track{background:#23d160!important}input[type=range].slider.is-success::-webkit-slider-runnable-track{background:#23d160!important}input[type=range].slider.is-success::-ms-track{background:#23d160!important}input[type=range].slider.is-success::-ms-fill-lower{background:#23d160}input[type=range].slider.is-success::-ms-fill-upper{background:#23d160}input[type=range].slider.is-success .has-output-tooltip+output,input[type=range].slider.is-success.has-output+output{background-color:#23d160;color:#fff}input[type=range].slider.is-warning::-moz-range-track{background:#ffdd57!important}input[type=range].slider.is-warning::-webkit-slider-runnable-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-fill-lower{background:#ffdd57}input[type=range].slider.is-warning::-ms-fill-upper{background:#ffdd57}input[type=range].slider.is-warning .has-output-tooltip+output,input[type=range].slider.is-warning.has-output+output{background-color:#ffdd57;color:rgba(0,0,0,.7)}input[type=range].slider.is-danger::-moz-range-track{background:#ff3860!important}input[type=range].slider.is-danger::-webkit-slider-runnable-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-fill-lower{background:#ff3860}input[type=range].slider.is-danger::-ms-fill-upper{background:#ff3860}input[type=range].slider.is-danger .has-output-tooltip+output,input[type=range].slider.is-danger.has-output+output{background-color:#ff3860;color:#fff} -------------------------------------------------------------------------------- /scripts/pal/core/interface.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 PAL Authors. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import io 16 | import signal 17 | from contextlib import redirect_stdout 18 | from typing import Any, Callable, List, Optional 19 | from collections import Counter 20 | 21 | from .runtime import GenericRuntime 22 | from .backend import call_gpt 23 | from .vicuna import call_vicuna 24 | 25 | from adaptive_consistency import AC, stop_criteria_dict 26 | 27 | 28 | 29 | def init_adaptive_consistency(max_gens, stop_criteria, stop_criteria_thresh): 30 | if stop_criteria is None: 31 | stop_criteria = 'always_false' 32 | if stop_criteria_thresh is None or stop_criteria_thresh == -1: 33 | ac = AC(max_gens = max_gens, stop_criteria=stop_criteria_dict[stop_criteria]()) 34 | else: 35 | ac = AC(max_gens = max_gens, stop_criteria=stop_criteria_dict[stop_criteria](conf_thresh = stop_criteria_thresh)) 36 | return ac 37 | 38 | 39 | class timeout: 40 | def __init__(self, seconds=1, error_message='Timeout'): 41 | self.seconds = seconds 42 | self.error_message = error_message 43 | def timeout_handler(self, signum, frame): 44 | raise TimeoutError(self.error_message) 45 | def __enter__(self): 46 | signal.signal(signal.SIGALRM, self.timeout_handler) 47 | signal.alarm(self.seconds) 48 | def __exit__(self, type, value, traceback): 49 | signal.alarm(0) 50 | 51 | 52 | class TextInterface: 53 | 54 | def __init__( 55 | self, 56 | max_gens: int = 40, 57 | model: str = 'code-davinci-002', 58 | answer_prefix: str = 'answer is', 59 | stop: str = '\n\n\n', 60 | extract_answer: Optional[Callable[[str], Any]] = None, 61 | openai_url: Optional[str] = None, 62 | stop_criteria: Optional[str] = None, 63 | stop_criteria_thresh: Optional[float] = None, 64 | ): 65 | self.max_gens = max_gens 66 | self.ac = init_adaptive_consistency(self.max_gens, stop_criteria, stop_criteria_thresh) 67 | 68 | self.history = [] 69 | self.answer_prefix = answer_prefix 70 | self.extract_answer_fn = extract_answer 71 | self.stop = stop 72 | self.model = model 73 | self.openai_url = openai_url 74 | # Hacky solution: 75 | if self.openai_url is not None: 76 | globals()['call_gpt'] = lambda *args, **kwargs : call_vicuna(*args, **kwargs, url=openai_url) 77 | 78 | def reinit(self): 79 | ... 80 | 81 | 82 | def clear_history(self): 83 | self.history = [] 84 | 85 | def extract_answer(self, gen: str): 86 | if self.extract_answer_fn: 87 | return self.extract_answer_fn (gen) 88 | last_line = gen.strip().split('\n')[-1] 89 | # TODO: Searching for last line is not at all necessary! 90 | last_idx = last_line.rfind(self.answer_prefix) 91 | if last_idx == -1: 92 | return "" 93 | answer = last_line[last_idx + len(self.answer_prefix):].strip() 94 | if answer.endswith('.'): 95 | answer = answer[:-1] 96 | return answer 97 | 98 | def execute(self, gen: str): 99 | if isinstance(gen, List): 100 | gen = '\n'.join(gen) 101 | return self.extract_answer(gen) 102 | 103 | def run(self, prompt, temperature=0.0, top_p=1.0, majority_at=None, max_tokens=512, logprobs=0): 104 | # gen = call_gpt(prompt, model=self.model, stop=self.stop, 105 | # temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at) 106 | if logprobs != 0: 107 | gens, dt = call_gpt(prompt, model=self.model, stop=self.stop, 108 | temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at, logprobs=logprobs) 109 | else: 110 | gens = call_gpt(prompt, model=self.model, stop=self.stop, 111 | temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at, ) 112 | 113 | if logprobs != 0: 114 | self.history.append([gens, dt]) 115 | else: 116 | self.history.append(gens) 117 | results = [] 118 | for gen in gens: 119 | results.append(self.extract_answer(gen)) 120 | return Counter(results).most_common(1)[0][0] 121 | 122 | 123 | class ProgramInterface: 124 | 125 | def __init__( 126 | self, 127 | max_gens: int = 40, 128 | model: str = 'code-davinci-002', 129 | runtime: Optional[Any] = None, 130 | stop: str = '\n\n', 131 | get_answer_symbol: Optional[str] = None, 132 | get_answer_expr: Optional[str] = None, 133 | get_answer_from_stdout: bool = False, 134 | verbose: bool = False, 135 | openai_url: Optional[str] = None, 136 | stop_criteria: Optional[str] = None, 137 | stop_criteria_thresh: Optional[float] = None, 138 | ) -> None: 139 | 140 | self.max_gens = max_gens 141 | self.ac = init_adaptive_consistency(self.max_gens, stop_criteria, stop_criteria_thresh) 142 | 143 | self.model = model 144 | self.runtime = runtime if runtime else GenericRuntime() 145 | self.history = [] 146 | self.stop = stop 147 | self.answer_symbol = get_answer_symbol 148 | self.answer_expr = get_answer_expr 149 | self.get_answer_from_stdout = get_answer_from_stdout 150 | self.verbose = verbose 151 | 152 | if openai_url is not None: 153 | globals()['call_gpt'] = lambda *args, **kwargs : call_vicuna(*args, **kwargs, url=openai_url) 154 | 155 | def reinit(self): 156 | import copy 157 | self.runtime._global_vars = copy.copy(self.runtime.GLOBAL_DICT) 158 | 159 | 160 | def clear_history(self): 161 | self.history = [] 162 | 163 | def process_generation_to_code(self, gens: str): 164 | return [g.split('\n') for g in gens] 165 | 166 | def generate(self, prompt: str, temperature: float =0.0, top_p: float =1.0, 167 | max_tokens: int =512, majority_at: int = None, logprobs = 0): 168 | if logprobs != 0: 169 | gens, dt = call_gpt(prompt, model=self.model, stop=self.stop, 170 | temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at, logprobs=logprobs) 171 | else: 172 | gens = call_gpt(prompt, model=self.model, stop=self.stop, 173 | temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at, ) 174 | if self.verbose: 175 | print(gens) 176 | code = self.process_generation_to_code([x.strip() for x in gens]) 177 | if logprobs != 0: 178 | self.history.append([gens, dt]) 179 | else: 180 | self.history.append(gens) 181 | return code 182 | 183 | def execute(self, code: Optional[List[str]] = None, TIMEOUT = 2): 184 | # from pdb import set_trace 185 | # set_trace() 186 | with timeout(TIMEOUT): 187 | code = code if code else self.code 188 | if self.get_answer_from_stdout: 189 | program_io = io.StringIO() 190 | with redirect_stdout(program_io): 191 | self.runtime.exec_code('\n'.join(code)) 192 | program_io.seek(0) 193 | return program_io.readlines()[-1] 194 | elif self.answer_symbol: 195 | self.runtime.exec_code('\n'.join(code)) 196 | return self.runtime._global_vars[self.answer_symbol] 197 | elif self.answer_expr: 198 | self.runtime.exec_code('\n'.join(code)) 199 | return self.runtime.eval_code(self.answer_expr) 200 | else: 201 | self.runtime.exec_code('\n'.join(code[:-1])) 202 | return self.runtime.eval_code(code[-1]) 203 | return "" 204 | 205 | def run(self, prompt: str, time_out: float =10, temperature: float =0.0, top_p: float =1.0, 206 | max_tokens: int =512, majority_at: int = None, prepend_to_code = "", logprobs = 0): 207 | code_snippets = self.generate(prompt, majority_at=majority_at, temperature=temperature, top_p=top_p, max_tokens=max_tokens, logprobs = logprobs) 208 | # print(code_snippets) 209 | results = [] 210 | for code in code_snippets: 211 | self.reinit() 212 | with timeout(time_out): 213 | try: 214 | exec_result = self.execute(prepend_to_code.splitlines() + code) 215 | except Exception as e: 216 | print(e) 217 | continue 218 | results.append(exec_result) 219 | counter = Counter(results) 220 | return counter.most_common(1)[0][0] 221 | 222 | 223 | 224 | class AdaptiveProgramInterface(ProgramInterface): 225 | 226 | def __init__(self, answer_type = 'float', step_size = 1, *args, **kwargs): 227 | 228 | super().__init__(*args, **kwargs) 229 | self.answer_type = answer_type 230 | self.step_size = step_size 231 | 232 | def generate(self, prompt: str, temperature: float =0.0, top_p: float =1.0, 233 | max_tokens: int =512, majority_at: int =None, logprobs = 0): 234 | self.history.append([]) 235 | gens = call_gpt(prompt, model=self.model, stop=self.stop, 236 | temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at, ) 237 | if self.verbose: 238 | print(gens) 239 | gens = [x.strip() for x in gens] 240 | # print('Processing generations to code') 241 | code = self.process_generation_to_code(gens) 242 | # print('Appending to code') 243 | self.history[-1].extend(gens) 244 | return code 245 | 246 | def run(self, prompt: str, time_out: float =10, temperature: float =0.0, top_p: float =1.0, 247 | max_tokens: int =512, majority_at: int =None, prepend_to_code = ""): 248 | all_results = [] 249 | for i in range(0, self.max_gens, self.step_size): 250 | code_snippets = self.generate(prompt, majority_at=self.step_size, temperature=temperature, top_p=top_p, max_tokens=max_tokens) 251 | 252 | results = [] 253 | for code in code_snippets: 254 | self.reinit() 255 | with timeout(time_out): 256 | try: 257 | exec_result = self.execute(prepend_to_code.splitlines() + code) 258 | if self.answer_type == 'float': 259 | exec_result = float(exec_result) 260 | else: 261 | exec_result = str(exec_result) 262 | except Exception as e: 263 | print('Eror', e) 264 | # traceback.print_exc() 265 | 266 | continue 267 | results.append(exec_result) 268 | all_results += results 269 | # print(all_results) 270 | if len(all_results) == 0: 271 | continue 272 | # if has_conclusive_majority_binomial_prob(all_results, self.conf_thresh)[1]: 273 | if self.ac.should_stop(all_results): 274 | # print('Less goo!', results) 275 | break 276 | print('Used {} generations'.format(i+4)) 277 | counter = Counter(all_results) 278 | most_common = counter.most_common(1)[0] 279 | return most_common[0], all_results 280 | 281 | 282 | class AdaptiveTextInterface(TextInterface): 283 | def __init__(self, step_size, *args, **kwargs): 284 | super().__init__(*args, **kwargs) 285 | self.history = [] 286 | self.step_size = step_size 287 | 288 | 289 | def run(self, prompt: str, time_out: float =10, temperature: float =0.0, top_p: float =1.0, 290 | max_tokens: int =512, majority_at: int =None, prepend_to_code = ""): 291 | all_results = [] 292 | for i in range(0, self.max_gens, self.step_size): 293 | print(i) 294 | gens = call_gpt(prompt, model=self.model, stop=self.stop, 295 | temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=self.step_size, ) 296 | print(i) 297 | results = [] 298 | for gen in gens: 299 | self.reinit() 300 | self.history.append(gen) 301 | ans = self.extract_answer(gen) 302 | results.append(ans) 303 | all_results += results 304 | if len(all_results) == 0: 305 | continue 306 | # if has_conclusive_majority_binomial_prob(all_results, self.conf_thresh)[1]: 307 | if self.ac.should_stop(all_results): 308 | break 309 | print('Used {} generations'.format(i+4)) 310 | counter = Counter(all_results) 311 | most_common = counter.most_common(1)[0] 312 | return most_common[0], all_results -------------------------------------------------------------------------------- /docs/static/js/bulma-slider.js: -------------------------------------------------------------------------------- 1 | (function webpackUniversalModuleDefinition(root, factory) { 2 | if(typeof exports === 'object' && typeof module === 'object') 3 | module.exports = factory(); 4 | else if(typeof define === 'function' && define.amd) 5 | define([], factory); 6 | else if(typeof exports === 'object') 7 | exports["bulmaSlider"] = factory(); 8 | else 9 | root["bulmaSlider"] = factory(); 10 | })(typeof self !== 'undefined' ? self : this, function() { 11 | return /******/ (function(modules) { // webpackBootstrap 12 | /******/ // The module cache 13 | /******/ var installedModules = {}; 14 | /******/ 15 | /******/ // The require function 16 | /******/ function __webpack_require__(moduleId) { 17 | /******/ 18 | /******/ // Check if module is in cache 19 | /******/ if(installedModules[moduleId]) { 20 | /******/ return installedModules[moduleId].exports; 21 | /******/ } 22 | /******/ // Create a new module (and put it into the cache) 23 | /******/ var module = installedModules[moduleId] = { 24 | /******/ i: moduleId, 25 | /******/ l: false, 26 | /******/ exports: {} 27 | /******/ }; 28 | /******/ 29 | /******/ // Execute the module function 30 | /******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__); 31 | /******/ 32 | /******/ // Flag the module as loaded 33 | /******/ module.l = true; 34 | /******/ 35 | /******/ // Return the exports of the module 36 | /******/ return module.exports; 37 | /******/ } 38 | /******/ 39 | /******/ 40 | /******/ // expose the modules object (__webpack_modules__) 41 | /******/ __webpack_require__.m = modules; 42 | /******/ 43 | /******/ // expose the module cache 44 | /******/ __webpack_require__.c = installedModules; 45 | /******/ 46 | /******/ // define getter function for harmony exports 47 | /******/ __webpack_require__.d = function(exports, name, getter) { 48 | /******/ if(!__webpack_require__.o(exports, name)) { 49 | /******/ Object.defineProperty(exports, name, { 50 | /******/ configurable: false, 51 | /******/ enumerable: true, 52 | /******/ get: getter 53 | /******/ }); 54 | /******/ } 55 | /******/ }; 56 | /******/ 57 | /******/ // getDefaultExport function for compatibility with non-harmony modules 58 | /******/ __webpack_require__.n = function(module) { 59 | /******/ var getter = module && module.__esModule ? 60 | /******/ function getDefault() { return module['default']; } : 61 | /******/ function getModuleExports() { return module; }; 62 | /******/ __webpack_require__.d(getter, 'a', getter); 63 | /******/ return getter; 64 | /******/ }; 65 | /******/ 66 | /******/ // Object.prototype.hasOwnProperty.call 67 | /******/ __webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); }; 68 | /******/ 69 | /******/ // __webpack_public_path__ 70 | /******/ __webpack_require__.p = ""; 71 | /******/ 72 | /******/ // Load entry module and return exports 73 | /******/ return __webpack_require__(__webpack_require__.s = 0); 74 | /******/ }) 75 | /************************************************************************/ 76 | /******/ ([ 77 | /* 0 */ 78 | /***/ (function(module, __webpack_exports__, __webpack_require__) { 79 | 80 | "use strict"; 81 | Object.defineProperty(__webpack_exports__, "__esModule", { value: true }); 82 | /* harmony export (binding) */ __webpack_require__.d(__webpack_exports__, "isString", function() { return isString; }); 83 | /* harmony import */ var __WEBPACK_IMPORTED_MODULE_0__events__ = __webpack_require__(1); 84 | var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; 85 | 86 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); 87 | 88 | var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; 89 | 90 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } 91 | 92 | function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; } 93 | 94 | function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; } 95 | 96 | 97 | 98 | var isString = function isString(unknown) { 99 | return typeof unknown === 'string' || !!unknown && (typeof unknown === 'undefined' ? 'undefined' : _typeof(unknown)) === 'object' && Object.prototype.toString.call(unknown) === '[object String]'; 100 | }; 101 | 102 | var bulmaSlider = function (_EventEmitter) { 103 | _inherits(bulmaSlider, _EventEmitter); 104 | 105 | function bulmaSlider(selector) { 106 | var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; 107 | 108 | _classCallCheck(this, bulmaSlider); 109 | 110 | var _this = _possibleConstructorReturn(this, (bulmaSlider.__proto__ || Object.getPrototypeOf(bulmaSlider)).call(this)); 111 | 112 | _this.element = typeof selector === 'string' ? document.querySelector(selector) : selector; 113 | // An invalid selector or non-DOM node has been provided. 114 | if (!_this.element) { 115 | throw new Error('An invalid selector or non-DOM node has been provided.'); 116 | } 117 | 118 | _this._clickEvents = ['click']; 119 | /// Set default options and merge with instance defined 120 | _this.options = _extends({}, options); 121 | 122 | _this.onSliderInput = _this.onSliderInput.bind(_this); 123 | 124 | _this.init(); 125 | return _this; 126 | } 127 | 128 | /** 129 | * Initiate all DOM element containing selector 130 | * @method 131 | * @return {Array} Array of all slider instances 132 | */ 133 | 134 | 135 | _createClass(bulmaSlider, [{ 136 | key: 'init', 137 | 138 | 139 | /** 140 | * Initiate plugin 141 | * @method init 142 | * @return {void} 143 | */ 144 | value: function init() { 145 | this._id = 'bulmaSlider' + new Date().getTime() + Math.floor(Math.random() * Math.floor(9999)); 146 | this.output = this._findOutputForSlider(); 147 | 148 | this._bindEvents(); 149 | 150 | if (this.output) { 151 | if (this.element.classList.contains('has-output-tooltip')) { 152 | // Get new output position 153 | var newPosition = this._getSliderOutputPosition(); 154 | 155 | // Set output position 156 | this.output.style['left'] = newPosition.position; 157 | } 158 | } 159 | 160 | this.emit('bulmaslider:ready', this.element.value); 161 | } 162 | }, { 163 | key: '_findOutputForSlider', 164 | value: function _findOutputForSlider() { 165 | var _this2 = this; 166 | 167 | var result = null; 168 | var outputs = document.getElementsByTagName('output') || []; 169 | 170 | Array.from(outputs).forEach(function (output) { 171 | if (output.htmlFor == _this2.element.getAttribute('id')) { 172 | result = output; 173 | return true; 174 | } 175 | }); 176 | return result; 177 | } 178 | }, { 179 | key: '_getSliderOutputPosition', 180 | value: function _getSliderOutputPosition() { 181 | // Update output position 182 | var newPlace, minValue; 183 | 184 | var style = window.getComputedStyle(this.element, null); 185 | // Measure width of range input 186 | var sliderWidth = parseInt(style.getPropertyValue('width'), 10); 187 | 188 | // Figure out placement percentage between left and right of input 189 | if (!this.element.getAttribute('min')) { 190 | minValue = 0; 191 | } else { 192 | minValue = this.element.getAttribute('min'); 193 | } 194 | var newPoint = (this.element.value - minValue) / (this.element.getAttribute('max') - minValue); 195 | 196 | // Prevent bubble from going beyond left or right (unsupported browsers) 197 | if (newPoint < 0) { 198 | newPlace = 0; 199 | } else if (newPoint > 1) { 200 | newPlace = sliderWidth; 201 | } else { 202 | newPlace = sliderWidth * newPoint; 203 | } 204 | 205 | return { 206 | 'position': newPlace + 'px' 207 | }; 208 | } 209 | 210 | /** 211 | * Bind all events 212 | * @method _bindEvents 213 | * @return {void} 214 | */ 215 | 216 | }, { 217 | key: '_bindEvents', 218 | value: function _bindEvents() { 219 | if (this.output) { 220 | // Add event listener to update output when slider value change 221 | this.element.addEventListener('input', this.onSliderInput, false); 222 | } 223 | } 224 | }, { 225 | key: 'onSliderInput', 226 | value: function onSliderInput(e) { 227 | e.preventDefault(); 228 | 229 | if (this.element.classList.contains('has-output-tooltip')) { 230 | // Get new output position 231 | var newPosition = this._getSliderOutputPosition(); 232 | 233 | // Set output position 234 | this.output.style['left'] = newPosition.position; 235 | } 236 | 237 | // Check for prefix and postfix 238 | var prefix = this.output.hasAttribute('data-prefix') ? this.output.getAttribute('data-prefix') : ''; 239 | var postfix = this.output.hasAttribute('data-postfix') ? this.output.getAttribute('data-postfix') : ''; 240 | 241 | // Update output with slider value 242 | this.output.value = prefix + this.element.value + postfix; 243 | 244 | this.emit('bulmaslider:ready', this.element.value); 245 | } 246 | }], [{ 247 | key: 'attach', 248 | value: function attach() { 249 | var _this3 = this; 250 | 251 | var selector = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 'input[type="range"].slider'; 252 | var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; 253 | 254 | var instances = new Array(); 255 | 256 | var elements = isString(selector) ? document.querySelectorAll(selector) : Array.isArray(selector) ? selector : [selector]; 257 | elements.forEach(function (element) { 258 | if (typeof element[_this3.constructor.name] === 'undefined') { 259 | var instance = new bulmaSlider(element, options); 260 | element[_this3.constructor.name] = instance; 261 | instances.push(instance); 262 | } else { 263 | instances.push(element[_this3.constructor.name]); 264 | } 265 | }); 266 | 267 | return instances; 268 | } 269 | }]); 270 | 271 | return bulmaSlider; 272 | }(__WEBPACK_IMPORTED_MODULE_0__events__["a" /* default */]); 273 | 274 | /* harmony default export */ __webpack_exports__["default"] = (bulmaSlider); 275 | 276 | /***/ }), 277 | /* 1 */ 278 | /***/ (function(module, __webpack_exports__, __webpack_require__) { 279 | 280 | "use strict"; 281 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); 282 | 283 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } 284 | 285 | var EventEmitter = function () { 286 | function EventEmitter() { 287 | var listeners = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : []; 288 | 289 | _classCallCheck(this, EventEmitter); 290 | 291 | this._listeners = new Map(listeners); 292 | this._middlewares = new Map(); 293 | } 294 | 295 | _createClass(EventEmitter, [{ 296 | key: "listenerCount", 297 | value: function listenerCount(eventName) { 298 | if (!this._listeners.has(eventName)) { 299 | return 0; 300 | } 301 | 302 | var eventListeners = this._listeners.get(eventName); 303 | return eventListeners.length; 304 | } 305 | }, { 306 | key: "removeListeners", 307 | value: function removeListeners() { 308 | var _this = this; 309 | 310 | var eventName = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; 311 | var middleware = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false; 312 | 313 | if (eventName !== null) { 314 | if (Array.isArray(eventName)) { 315 | name.forEach(function (e) { 316 | return _this.removeListeners(e, middleware); 317 | }); 318 | } else { 319 | this._listeners.delete(eventName); 320 | 321 | if (middleware) { 322 | this.removeMiddleware(eventName); 323 | } 324 | } 325 | } else { 326 | this._listeners = new Map(); 327 | } 328 | } 329 | }, { 330 | key: "middleware", 331 | value: function middleware(eventName, fn) { 332 | var _this2 = this; 333 | 334 | if (Array.isArray(eventName)) { 335 | name.forEach(function (e) { 336 | return _this2.middleware(e, fn); 337 | }); 338 | } else { 339 | if (!Array.isArray(this._middlewares.get(eventName))) { 340 | this._middlewares.set(eventName, []); 341 | } 342 | 343 | this._middlewares.get(eventName).push(fn); 344 | } 345 | } 346 | }, { 347 | key: "removeMiddleware", 348 | value: function removeMiddleware() { 349 | var _this3 = this; 350 | 351 | var eventName = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; 352 | 353 | if (eventName !== null) { 354 | if (Array.isArray(eventName)) { 355 | name.forEach(function (e) { 356 | return _this3.removeMiddleware(e); 357 | }); 358 | } else { 359 | this._middlewares.delete(eventName); 360 | } 361 | } else { 362 | this._middlewares = new Map(); 363 | } 364 | } 365 | }, { 366 | key: "on", 367 | value: function on(name, callback) { 368 | var _this4 = this; 369 | 370 | var once = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false; 371 | 372 | if (Array.isArray(name)) { 373 | name.forEach(function (e) { 374 | return _this4.on(e, callback); 375 | }); 376 | } else { 377 | name = name.toString(); 378 | var split = name.split(/,|, | /); 379 | 380 | if (split.length > 1) { 381 | split.forEach(function (e) { 382 | return _this4.on(e, callback); 383 | }); 384 | } else { 385 | if (!Array.isArray(this._listeners.get(name))) { 386 | this._listeners.set(name, []); 387 | } 388 | 389 | this._listeners.get(name).push({ once: once, callback: callback }); 390 | } 391 | } 392 | } 393 | }, { 394 | key: "once", 395 | value: function once(name, callback) { 396 | this.on(name, callback, true); 397 | } 398 | }, { 399 | key: "emit", 400 | value: function emit(name, data) { 401 | var _this5 = this; 402 | 403 | var silent = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false; 404 | 405 | name = name.toString(); 406 | var listeners = this._listeners.get(name); 407 | var middlewares = null; 408 | var doneCount = 0; 409 | var execute = silent; 410 | 411 | if (Array.isArray(listeners)) { 412 | listeners.forEach(function (listener, index) { 413 | // Start Middleware checks unless we're doing a silent emit 414 | if (!silent) { 415 | middlewares = _this5._middlewares.get(name); 416 | // Check and execute Middleware 417 | if (Array.isArray(middlewares)) { 418 | middlewares.forEach(function (middleware) { 419 | middleware(data, function () { 420 | var newData = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null; 421 | 422 | if (newData !== null) { 423 | data = newData; 424 | } 425 | doneCount++; 426 | }, name); 427 | }); 428 | 429 | if (doneCount >= middlewares.length) { 430 | execute = true; 431 | } 432 | } else { 433 | execute = true; 434 | } 435 | } 436 | 437 | // If Middleware checks have been passed, execute 438 | if (execute) { 439 | if (listener.once) { 440 | listeners[index] = null; 441 | } 442 | listener.callback(data); 443 | } 444 | }); 445 | 446 | // Dirty way of removing used Events 447 | while (listeners.indexOf(null) !== -1) { 448 | listeners.splice(listeners.indexOf(null), 1); 449 | } 450 | } 451 | } 452 | }]); 453 | 454 | return EventEmitter; 455 | }(); 456 | 457 | /* harmony default export */ __webpack_exports__["a"] = (EventEmitter); 458 | 459 | /***/ }) 460 | /******/ ])["default"]; 461 | }); -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | 8 | 9 | Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning & Coding with LLMs 10 | 11 | 12 | 13 | 26 | 27 | 29 | 30 | 31 | 32 | 33 | 34 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 46 | 47 | 48 | 49 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 106 | 107 | 108 |
109 |
110 |
111 |
112 |
113 |

Let's Sample Step by Step:
Adaptive-Consistency for Efficient Reasoning with LLMs

114 |
115 | 116 | Pranjal Aggarwal1, 117 | 118 | Aman Madaan2, 119 | 120 | Yiming Yang2, 121 | 122 | 123 | Mausam1, 124 | 125 |
126 | 127 |
128 | 1Department of Computer Science and Engineering, Indian Institute of Technology Delhi 129 | 2Language Technologies Institute, School of CS, Carnegie Mellon University 130 |
131 | 132 |
133 | 184 | 185 |
186 |
187 |
188 |
189 |
190 |
191 | 192 | 193 | 194 | 207 | 208 |

209 | TLDR: a sampling method that matches Self-Consistency in performance with up to 7.9x fewer samples. 210 |

211 | 212 |
213 |
214 |
215 | 220 | 221 | 224 | 225 | 226 |
227 | 228 |
229 |
230 | 231 | 287 | 288 | 289 |
290 |
291 | 292 |
293 |
294 |

Abstract

295 |
296 |

297 | A popular approach for improving the correctness of output from large language models (LLMs) is Self-Consistency -- poll the LLM multiple times and output the most frequent solution. Existing Self-Consistency techniques always draw a constant number of samples per question, where a better approach will be to non-uniformly distribute the available budget, based on the amount of agreement in the samples drawn so far. 298 |

299 |

300 | In response, we introduce Adaptive-Consistency, a cost-efficient, model-agnostic technique that dynamically adjusts the number of samples per question using a lightweight stopping criterion. Our experiments over 13 datasets and two LLMs demonstrate that Adaptive-Consistency reduces sample budget by up to 7.9 times with an average accuracy drop of less than 0.1%. 301 |

302 | 303 | 304 | 305 |
306 |
307 |
308 | 309 | 310 | 311 | 320 | 321 |
322 |
323 | 324 |
325 |
326 |
327 |
328 | 329 |

Key-Highlights

330 | 331 | 332 | 334 |
335 |
    336 |
  • 🚀 Dynamic Sampling: Our method smartly adjusts the number of samples per question using a lightweight stopping criterion.
  • 337 |
  • 📊Cost-Effective Performance: Adaptive-Consistency reduces the sample budget by up to 6.0 times with an average accuracy drop of less than 0.1%.
  • 338 |
  • 🔌Off-the-Shelf Solution: Improve accuracy or reduce cost right away with just 2-3 lines of code changes. No additional training required.
  • 339 |
  • 🌐Compatible with Pre-trained LLMs: Our method works seamlessly with popular large language models like GPT-3.
  • 340 |
341 |
342 | 345 | 346 |
347 |
348 |
349 |
350 | 351 |
352 |
353 |
354 |
355 |

Results Summary

356 | Adaptive-Consistency outperforms Self-Consistency on 13 benchmarks, over varying domain, difficulty, answer type. 357 | 358 |
359 |
360 | 361 |
362 | 363 | Mathematical Reasoning 364 |
365 |
366 | 367 |
368 | 369 | Logical Reasoning 370 |
371 | 372 |
373 | 374 | 375 |
376 | 377 |
378 | Other Reasoning Datasets 379 |
380 |
381 |
382 |
383 |
384 | 385 | 393 | 394 | 395 |
396 |
397 |
398 |

Using Adaptive-Consistency in your code

399 | Using Adaptive-Consistency in your code takes only 2-3 line changes. 400 | 401 | 402 | 403 |
404 | 405 |
406 |

407 | 1. Modifying Self-Consistency 408 |

409 |
from adaptive_consistency import AC
410 | 
411 | outputs = []
412 | ac = AC(stop_criteria = 'beta')
413 | for input in dataset:
414 |     output = openai.Completion.Create(input, *args, **kwargs)
415 |     outputs.append(output)
416 |     if ac.should_stop(outputs):
417 |         break
418 |                 
419 |
420 | 421 |
422 | 423 |
424 | 425 |
426 |

427 | 2. Modifying Vanilla Prompting 428 |

429 |
from adaptive_consistency import AC
430 | 
431 | output = sampling_function(*args, **kwargs)
432 | output = AC(stop_criteria = 'beta').eval_loop(sampling_function, *args, **kwargs)
433 |             
434 |
435 | 436 |
437 |
438 |
439 | 440 | 441 | 442 | 443 |
444 |
445 |

BibTeX

446 |
@misc{aggarwal2023lets,
447 |       title={Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning with LLMs}, 
448 |       author={Pranjal Aggarwal and Aman Madaan and Yiming Yang and Mausam},
449 |       year={2023},
450 |       eprint={2305.11860},
451 |       archivePrefix={arXiv},
452 |       primaryClass={cs.CL}
453 | }
454 |
455 |
456 | 457 | 458 | 482 | 483 | 484 | 485 | --------------------------------------------------------------------------------