├── scripts
    ├── pal
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── runtime.py
    │   │   ├── backend.py
    │   │   ├── vicuna.py
    │   │   └── interface.py
    │   ├── prompt
    │   │   ├── __init__.py
    │   │   ├── strategy_qa_prompt.py
    │   │   ├── boolean_expressions.py
    │   │   ├── penguins_in_a_table.py
    │   │   ├── ld_three.py
    │   │   ├── tracking_three.py
    │   │   ├── date_understanding_prompt.py
    │   │   ├── snarks.py
    │   │   ├── ruin_names.py
    │   │   ├── disambiguation_qa.py
    │   │   ├── math_prompts.py
    │   │   └── salient_translation.py
    │   └── __init__.py
    ├── run_self_consistency.sh
    ├── run_adaptive_consistency.sh
    ├── eval_outputs.py
    ├── fastchat_server.py
    └── run_eval.py
├── docs
    ├── CNAME
    ├── .gitignore
    ├── static
    │   ├── css
    │   │   ├── prism.css:Zone.Identifier
    │   │   ├── prism-diff-highlight.css
    │   │   ├── prism-line-highlight.css
    │   │   ├── prism_bak.css
    │   │   ├── index.css
    │   │   ├── bulma-carousel.min.css
    │   │   ├── prism.css
    │   │   └── bulma-slider.min.css
    │   ├── videos
    │   │   ├── ._Icon
    │   │   └── AC_teaser_video.mp4
    │   ├── adaptive_paper.pdf
    │   ├── images
    │   │   ├── ac_results_math.png
    │   │   ├── ac_results_misc.png
    │   │   ├── ac_teaser_new.png
    │   │   ├── ac_results_logic.png
    │   │   ├── ac_results_logic_bak.png
    │   │   └── favicon.svg
    │   └── js
    │   │   ├── index.js
    │   │   ├── prism-diff-highlight.js
    │   │   ├── bulma-slider.min.js
    │   │   ├── prism-line-highlight.js
    │   │   └── bulma-slider.js
    ├── README.md
    └── index.html
├── requirements.txt
├── download_outputs.sh
├── download_datasets.bash
├── adaptive_consistency
    ├── __pycache__
    │   ├── main.cpython-39.pyc
    │   ├── __init__.cpython-39.pyc
    │   └── stopping_criterias.cpython-39.pyc
    ├── __init__.py
    ├── main.py
    └── stopping_criterias.py
├── .gitignore
├── setup.py
├── outputs
    └── README.md
├── README.md
└── LICENSE


/scripts/pal/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/pal/prompt/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/CNAME:
--------------------------------------------------------------------------------
1 | sample-step-by-step.info


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy 
2 | scipy
3 | gdown
4 | tqdm


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_store
2 | .idea
3 | scratch
4 | 


--------------------------------------------------------------------------------
/docs/static/css/prism.css:Zone.Identifier:
--------------------------------------------------------------------------------
1 | [ZoneTransfer]
2 | ZoneId=3
3 | HostUrl=about:internet
4 | 


--------------------------------------------------------------------------------
/docs/static/videos/._Icon:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/videos/._Icon


--------------------------------------------------------------------------------
/download_outputs.sh:
--------------------------------------------------------------------------------
1 | gdown "11ripw7-E5T8a2TZUiD5vbC7QuR_qfZOA" -O outputs.zip
2 | unzip -n -q outputs.zip
3 | rm outputs.zip


--------------------------------------------------------------------------------
/docs/static/adaptive_paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/adaptive_paper.pdf


--------------------------------------------------------------------------------
/download_datasets.bash:
--------------------------------------------------------------------------------
1 | gdown "14a2TJY_lqz_h1W1h8iclROMEpBynPTI4" -O datasets.zip
2 | unzip datasets.zip
3 | rm datasets.zip
4 | 


--------------------------------------------------------------------------------
/docs/static/images/ac_results_math.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/images/ac_results_math.png


--------------------------------------------------------------------------------
/docs/static/images/ac_results_misc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/images/ac_results_misc.png


--------------------------------------------------------------------------------
/docs/static/images/ac_teaser_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/images/ac_teaser_new.png


--------------------------------------------------------------------------------
/docs/static/videos/AC_teaser_video.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/videos/AC_teaser_video.mp4


--------------------------------------------------------------------------------
/docs/static/images/ac_results_logic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/images/ac_results_logic.png


--------------------------------------------------------------------------------
/docs/static/images/ac_results_logic_bak.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/docs/static/images/ac_results_logic_bak.png


--------------------------------------------------------------------------------
/adaptive_consistency/__pycache__/main.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/adaptive_consistency/__pycache__/main.cpython-39.pyc


--------------------------------------------------------------------------------
/adaptive_consistency/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/adaptive_consistency/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build
 2 | dist
 3 | *.egg-info
 4 | *.egg
 5 | transfer_outputs.py
 6 | outputs/code-davinci-002
 7 | outputs/vicuna-13b
 8 | datasets
 9 | scratch
10 | __pycache__
11 | *.pyc


--------------------------------------------------------------------------------
/adaptive_consistency/__pycache__/stopping_criterias.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pranjal2041/AdaptiveConsistency/HEAD/adaptive_consistency/__pycache__/stopping_criterias.cpython-39.pyc


--------------------------------------------------------------------------------
/adaptive_consistency/__init__.py:
--------------------------------------------------------------------------------
1 | from .main import AC
2 | from .main import stop_criteria_dict
3 | from .stopping_criterias import BetaStoppingCriteria
4 | from .stopping_criterias import DirichletStoppingCriteria
5 | from .stopping_criterias import RandomStoppingCriteria
6 | from .stopping_criterias import MajorityStoppingCriteria
7 | from .stopping_criterias import EntropyStoppingCriteria


--------------------------------------------------------------------------------
/docs/static/css/prism-diff-highlight.css:
--------------------------------------------------------------------------------
 1 | pre.diff-highlight > code .token.deleted:not(.prefix),
 2 | pre > code.diff-highlight .token.deleted:not(.prefix) {
 3 | 	background-color: rgba(255, 0, 0, .1);
 4 | 	color: inherit;
 5 | 	display: block;
 6 | }
 7 | 
 8 | pre.diff-highlight > code .token.inserted:not(.prefix),
 9 | pre > code.diff-highlight .token.inserted:not(.prefix) {
10 | 	background-color: rgba(0, 255, 128, .1);
11 | 	color: inherit;
12 | 	display: block;
13 | }
14 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup, find_packages
 3 | 
 4 | 
 5 | def read(fname):
 6 |     return open(os.path.join(os.path.dirname(__file__), fname), encoding='utf-8').read()
 7 | 
 8 | setup(
 9 |     name = "AdaptiveConsistency",
10 |     version = "1.0.0",
11 |     description = ("Library for running AdapativeConsistency based Inference on large language models."),
12 |     license = "Apache License 2.0",
13 |     packages=find_packages(),
14 |     long_description=read('README.md'),
15 |     long_description_content_type='text/markdown',
16 | )
17 | 


--------------------------------------------------------------------------------
/outputs/README.md:
--------------------------------------------------------------------------------
 1 | Model outputs from different models on 13 different datasets. 
 2 | 
 3 | Run `bash download_outputs.sh` to download all the outputs. \
 4 | <br>
 5 | 
 6 | Directory structure:
 7 | ```
 8 | outputs
 9 | ├── README.md
10 | ├── code-davinci-002
11 | │   ├── dataset-1
12 | │   │   ├── outputs_seed1.jsonl
13 | │   │   ├── outputs_seed2.jsonl
14 | │   │   └── outputs_seed3.jsonl
15 | │   ├── dataset-2
16 | │   │   ├── ...
17 | │   ├── ...
18 | ├── vicuna-13b
19 | │   ├── ...
20 | └── ...
21 | ```
22 | 
23 | <br>
24 | 
25 | We use the following hyperparameters for model generation:
26 | - `temperature`: 0.7
27 | - `top_p`: 1.0
28 | - `max_length`: 512
29 | - `max_gens`: 40
30 | 
31 | 


--------------------------------------------------------------------------------
/scripts/pal/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 PAL Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from .core import interface, runtime
17 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Nerfies
 2 | 
 3 | This is the repository that contains source code for the [Nerfies website](https://nerfies.github.io).
 4 | 
 5 | If you find Nerfies useful for your work please cite:
 6 | ```
 7 | @article{park2021nerfies
 8 |   author    = {Park, Keunhong and Sinha, Utkarsh and Barron, Jonathan T. and Bouaziz, Sofien and Goldman, Dan B and Seitz, Steven M. and Martin-Brualla, Ricardo},
 9 |   title     = {Nerfies: Deformable Neural Radiance Fields},
10 |   journal   = {ICCV},
11 |   year      = {2021},
12 | }
13 | ```
14 | 
15 | # Website License
16 | <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-sa/4.0/88x31.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">Creative Commons Attribution-ShareAlike 4.0 International License</a>.
17 | 


--------------------------------------------------------------------------------
/scripts/pal/prompt/strategy_qa_prompt.py:
--------------------------------------------------------------------------------
 1 | MATH_PROMPT = """
 2 | # Q: Do hamsters provide food for any animals?
 3 | Answer: Hamsters are prey animals. Prey are food for predators. Thus, hamsters provide food for some animals. The answer is yes
 4 | 
 5 | 
 6 | # Q: Yes or no: Hydrogen's atomic number squared exceeds number of Spice Girls?
 7 | Answer: Hydrogen has an atomic number of 1. 1 squared is 1. There are 5 Spice Girls. Thus, Hydrogen's atomic number squared is less than 5. The answer is no
 8 | 
 9 | 
10 | # Q: Yes or no: Would a pear sink in water?
11 | Answer: The density of a pear is about 0.6 g\/cm^3, which is less than water. Objects less dense than water float. Thus, a pear would float. The answer is no
12 | 
13 | 
14 | # Q: Yes or no: Is it common to see frost during some college commencements?
15 | Answer: College commencement ceremonies can happen in December, May, and June. December is in the winter, so there can be frost. Thus, there could be frost at some commencements. The answer is yes
16 | 
17 | 
18 | # Q: Yes or no: Could a llama birth twice during War in Vietnam (1945-46)?
19 | Answer: The War in Vietnam was 6 months. The gestation period for a llama is 11 months, which is more than 6 months. Thus, a llama could not give birth twice during the War in Vietnam. The answer is no
20 | 
21 | 
22 | # Q: Could Brooke Shields succeed at University of Pennsylvania?
23 | Answer: Brooke Shields went to Princeton University. Princeton University is about as academically rigorous as the University of Pennsylvania. Thus, Brooke Shields could also succeed at the University of Pennsylvania. The answer is yes
24 | 
25 | # Q: {question}
26 | """.strip() + '\n'
27 | 


--------------------------------------------------------------------------------
/docs/static/images/favicon.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="iso-8859-1"?>
 2 | <!--Created for PAL-->
 3 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="1000" height="1000" viewBox="0 0 1000 1000">
 4 |   <defs>
 5 |     <style>
 6 |       .cls-1 {
 7 |         fill: #fff;
 8 |       }
 9 |       .cls-2 {
10 |         fill: #000;
11 |       }
12 |     </style>
13 |   </defs>
14 |   <title>awesome_icon</title>
15 |   <g id="Layer_2" data-name="Layer 2">
16 |     <g id="Layer_1-2" data-name="Layer 1">
17 |       <path class="cls-1" d="M500,0C223.6,0,0,223.6,0,500s223.6,500,500,500,500-223.6,500-500S776.4,0,500,0Zm0,900C264.6,900,75,710.4,75,475S264.6,100,500,100,925,289.6,925,525,735.4,900,500,900Z"/>
18 |       <path class="cls-2" d="M500,0C223.6,0,0,223.6,0,500s223.6,500,500,500,500-223.6,500-500S776.4,0,500,0Zm0,900C264.6,900,75,710.4,75,475S264.6,100,500,100,925,289.6,925,525,735.4,900,500,900Z"/>
19 |       <path class="cls-1" d="M500,0C223.6,0,0,223.6,0,500s223.6,500,500,500,500-223.6,500-500S776.4,0,500,0Zm0,900C264.6,900,75,710.4,75,475S264.6,100,500,100,925,289.6,925,525,735.4,900,500,900Z"/>
20 |       <path class="cls-2" d="M500,0C223.6, 0, 0, 223.6, 0, 500s223.6, 500, 500, 500, 500-223.6, 500-500S776.4, 0, 500, 0Zm0, 900C264.6, 900, 75, 710.4, 75, 475S264.6, 100, 500, 100, 925, 289.6, 925, 525, 735.4, 900, 500, 900Z"/>
21 |         <path class="cls-1" d="M500,0C223.6,0,0,223.6,0,500s223.6,500,500,500,500-223.6,500-500S776.4,0,500,0Zm0,900C264.6,900,75,710.4,75,475S264.6,100,500,100,925,289.6,925,525,735.4,900,500,900Z"/>
22 |         <path class="cls-2" d="M500,0C223.6, 0, 0, 223.6, 0, 500s223.6, 500, 500, 500, 500-223.6, 500-500S776.4, 0, 500, 0Zm0, 900C264.6, 900, 75, 710.4, 75, 475S264.6, 100, 500, 100, 925, 289.6, 925, 525, 735.4, 900, 500, 900Z"/>
23 |     </g>
24 |     </g>
25 | </svg>
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/docs/static/css/prism-line-highlight.css:
--------------------------------------------------------------------------------
 1 | pre[data-line] {
 2 | 	position: relative;
 3 | 	padding: 1em 0 1em 3em;
 4 | }
 5 | 
 6 | .line-highlight {
 7 | 	position: absolute;
 8 | 	left: 0;
 9 | 	right: 0;
10 | 	padding: inherit 0;
11 | 	margin-top: 1em; /* Same as .prism’s padding-top */
12 | 
13 | 	background: hsla(24, 20%, 50%,.08);
14 | 	background: linear-gradient(to right, hsla(24, 20%, 50%,.1) 70%, hsla(24, 20%, 50%,0));
15 | 
16 | 	pointer-events: none;
17 | 
18 | 	line-height: inherit;
19 | 	white-space: pre;
20 | }
21 | 
22 | @media print {
23 | 	.line-highlight {
24 | 		/*
25 | 		 * This will prevent browsers from replacing the background color with white.
26 | 		 * It's necessary because the element is layered on top of the displayed code.
27 | 		 */
28 | 		-webkit-print-color-adjust: exact;
29 | 		color-adjust: exact;
30 | 	}
31 | }
32 | 
33 | 	.line-highlight:before,
34 | 	.line-highlight[data-end]:after {
35 | 		content: attr(data-start);
36 | 		position: absolute;
37 | 		top: .4em;
38 | 		left: .6em;
39 | 		min-width: 1em;
40 | 		padding: 0 .5em;
41 | 		background-color: hsla(24, 20%, 50%,.4);
42 | 		color: hsl(24, 20%, 95%);
43 | 		font: bold 65%/1.5 sans-serif;
44 | 		text-align: center;
45 | 		vertical-align: .3em;
46 | 		border-radius: 999px;
47 | 		text-shadow: none;
48 | 		box-shadow: 0 1px white;
49 | 	}
50 | 
51 | 	.line-highlight[data-end]:after {
52 | 		content: attr(data-end);
53 | 		top: auto;
54 | 		bottom: .4em;
55 | 	}
56 | 
57 | .line-numbers .line-highlight:before,
58 | .line-numbers .line-highlight:after {
59 | 	content: none;
60 | }
61 | 
62 | pre[id].linkable-line-numbers span.line-numbers-rows {
63 | 	pointer-events: all;
64 | }
65 | pre[id].linkable-line-numbers span.line-numbers-rows > span:before {
66 | 	cursor: pointer;
67 | }
68 | pre[id].linkable-line-numbers span.line-numbers-rows > span:hover:before {
69 | 	background-color: rgba(128, 128, 128, .2);
70 | }
71 | 


--------------------------------------------------------------------------------
/scripts/pal/prompt/boolean_expressions.py:
--------------------------------------------------------------------------------
 1 | MATH_PROMPT='''
 2 | Evaluate the result of a random Boolean expression.
 3 | 
 4 | # Q: not ( ( not not True ) ) is
 5 | A: Let's think step by step.
 6 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively.
 7 | We first simplify this expression "Z" as follows: "Z = not ( ( not not True ) ) = not ( ( A ) )" where "A = not not True".
 8 | Let's evaluate A: A = not not True = not (not True) = not False = True.
 9 | Plugging in A, we get: Z = not ( ( A ) ) = not ( ( True ) ) = not True = False. So the answer is False.
10 | 
11 | # Q: True and False and not True and True is
12 | A: Let's think step by step.
13 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively.
14 | We first simplify this expression "Z" as follows: "Z = True and False and not True and True = A and B" where "A = True and False" and "B = not True and True".
15 | Let's evaluate A: A = True and False = False.
16 | Let's evaluate B: B = not True and True = not (True and True) = not (True) = False.
17 | Plugging in A and B, we get: Z = A and B = False and False = False. So the answer is False.
18 | 
19 | # Q: not not ( not ( False ) ) is
20 | A: Let's think step by step.
21 | Remember that (i) expressions inside brackets are always evaluated first and that (ii) the order of operations from highest priority to lowest priority is "not", "and", "or", respectively.
22 | We first simplify this expression "Z" as follows: "Z = not not ( not ( False ) ) = not not ( A )" where "A = not ( False )".
23 | Let's evaluate A: A = not ( False ) = not False = True.
24 | Plugging in A, we get: Z = not not ( A ) = not not (True) = not not False = True. So the answer is True.
25 | 
26 | # Q: {question}
27 | '''.strip() + '\n'


--------------------------------------------------------------------------------
/scripts/run_self_consistency.sh:
--------------------------------------------------------------------------------
 1 | python scripts/run_eval.py --dataset gsm --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts
 2 | python scripts/run_eval.py --dataset asdiv --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts
 3 | python scripts/run_eval.py --dataset svamp --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts
 4 | python scripts/run_eval.py --dataset date --answer_type str --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file date_understanding_prompt
 5 | python scripts/run_eval.py --dataset tracking_three --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file tracking_three --end "# Q:"
 6 | python scripts/run_eval.py --dataset ld_three --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file ld_three
 7 | python scripts/run_eval.py --dataset strategy_qa --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file strategy_qa_prompt
 8 | python scripts/run_eval.py --dataset boolean_expressions --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file boolean_expressions
 9 | python scripts/run_eval.py --dataset snarks --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file snarks
10 | python scripts/run_eval.py --dataset ruin_names --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file ruin_names
11 | python scripts/run_eval.py --dataset salient_translation --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file salient_translation
12 | python scripts/run_eval.py --dataset disambiguation_qa --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file disambiguation_qa
13 | python scripts/run_eval.py --dataset penguins_in_a_table_text --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file penguins_in_a_table


--------------------------------------------------------------------------------
/scripts/pal/core/runtime.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 PAL Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import copy
17 | import datetime
18 | from typing import Any, Dict
19 | import dateutil.relativedelta
20 | 
21 | 
22 | class GenericRuntime:
23 |     GLOBAL_DICT = {}
24 |     LOCAL_DICT = None
25 |     HEADERS = []
26 |     def __init__(self):
27 |         self._global_vars = copy.copy(self.GLOBAL_DICT)
28 |         self._local_vars = copy.copy(self.LOCAL_DICT) if self.LOCAL_DICT else None
29 |         
30 |         for c in self.HEADERS:
31 |             self.exec_code(c)
32 |         
33 |     def exec_code(self, code_piece: str) -> None:
34 |         exec(code_piece, self._global_vars)
35 |         
36 |     def eval_code(self, expr: str) -> Any:
37 |         return eval(expr, self._global_vars)
38 |     
39 |     def inject(self, var_dict: Dict[str, Any]) -> None:
40 |         for k, v in var_dict.items():
41 |             self._global_vars[k] = v
42 |     
43 |     @property
44 |     def answer(self):
45 |         return self._global_vars['answer']
46 | 
47 |     
48 | class DateRuntime(GenericRuntime):
49 |     GLOBAL_DICT = {
50 |         'datetime': datetime.datetime, 
51 |         'timedelta': dateutil.relativedelta.relativedelta,
52 |         'relativedelta': dateutil.relativedelta.relativedelta
53 |     }
54 | 
55 | 
56 | class CustomDict(dict):
57 |     def __iter__(self):
58 |         return list(super().__iter__()).__iter__()
59 | 
60 |     
61 | class ColorObjectRuntime(GenericRuntime):
62 |     GLOBAL_DICT = {'dict': CustomDict}
63 | 
64 | 


--------------------------------------------------------------------------------
/scripts/run_adaptive_consistency.sh:
--------------------------------------------------------------------------------
 1 | # Usage: bash scripts/run_adaptive_consistency.sh <stop_criteria> <stop_criteria_thresh>[optional]
 2 | stop_criteria=$1
 3 | # if stop_criteria_thresh is none set to -1
 4 | stop_criteria_thresh=${2:-"-1"}
 5 | 
 6 | # stop_criteria is one of:
 7 | # beta
 8 | # dirichlet
 9 | # entropy
10 | # random
11 | # majority
12 | 
13 | echo $stop_criteria
14 | 
15 | python scripts/run_eval.py --dataset gsm --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts --stop_criteria $stop_criteria --stop_criteria_thresh $stop_criteria_thresh
16 | python scripts/run_eval.py --dataset asdiv --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts
17 | python scripts/run_eval.py --dataset svamp --answer_type float --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file math_prompts
18 | python scripts/run_eval.py --dataset date --answer_type str --max_gens 40 --temperature 0.7 --prompt_type code --prompt_file date_understanding_prompt
19 | python scripts/run_eval.py --dataset tracking_three --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file tracking_three --end "# Q:"
20 | python scripts/run_eval.py --dataset ld_three --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file ld_three
21 | python scripts/run_eval.py --dataset strategy_qa --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file strategy_qa_prompt
22 | python scripts/run_eval.py --dataset boolean_expressions --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file boolean_expressions
23 | python scripts/run_eval.py --dataset snarks --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file snarks
24 | python scripts/run_eval.py --dataset ruin_names --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file ruin_names
25 | python scripts/run_eval.py --dataset salient_translation --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file salient_translation
26 | python scripts/run_eval.py --dataset disambiguation_qa --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file disambiguation_qa
27 | python scripts/run_eval.py --dataset penguins_in_a_table_text --answer_type str --max_gens 40 --temperature 0.7 --prompt_type text --prompt_file penguins_in_a_table


--------------------------------------------------------------------------------
/scripts/pal/prompt/penguins_in_a_table.py:
--------------------------------------------------------------------------------
 1 | MATH_PROMPT='''
 2 | Answer questions about a table of penguins and their attributes.
 3 | 
 4 | # Q: Here is a table where the first line is a header and each subsequent line is a penguin:  name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15  For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm.  We now add a penguin to the table:
 5 | James, 12, 90, 12
 6 | How many penguins are less than 8 years old?
 7 | Options:
 8 | (A) 1
 9 | (B) 2
10 | (C) 3
11 | (D) 4
12 | (E) 5
13 | A: Let's think step by step.
14 | This question focuses on age. We know the following: Louis is 7 years old, Bernard is 5 years old, Vincent is 9 years old, and Gwen is 8 years old.
15 | Now, we add James to this table: James is 12 years old.
16 | The penguins that are less than 8 years old are Louis and Bernard.
17 | There are 2 penguins less than 8 years old. So the answer is (B).
18 | 
19 | # Q: Here is a table where the first line is a header and each subsequent line is a penguin:  name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15  For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm.  Which is the youngest penguin?
20 | Options:
21 | (A) Louis
22 | (B) Bernard
23 | (C) Vincent
24 | (D) Gwen
25 | (E) James
26 | A: Let's think step by step.
27 | This question focuses on age. We know the following: Louis is 7 years old, Bernard is 5 years old, Vincent is 9 years old, and Gwen is 8 years old.
28 | According to the table, Bernard (5) is the youngest amongst them.
29 | The youngest penguin is Bernard. So the answer is (B).
30 | 
31 | # Q: Here is a table where the first line is a header and each subsequent line is a penguin:  name, age, height (cm), weight (kg) Louis, 7, 50, 11 Bernard, 5, 80, 13 Vincent, 9, 60, 11 Gwen, 8, 70, 15  For example: the age of Louis is 7, the weight of Gwen is 15 kg, the height of Bernard is 80 cm.  What is the name of the second penguin sorted by alphabetic order?
32 | Options:
33 | (A) Louis
34 | (B) Bernard
35 | (C) Vincent
36 | (D) Gwen
37 | (E) James
38 | A: Let's think step by step.
39 | This question focuses on the name. We know the following: The names of the penguin in the table are Louis, Bernard, Vincent, and Gwen.
40 | When we sort their names alphabetically, we get Bernard, Gwen, Louis, Vincent.
41 | The name of the second penguin sorted by alphabetical order is Gwen.
42 | The name of the second penguin sorted by alphabetic order is Gwen. So the answer is (D).
43 | 
44 | # Q: {question}
45 | '''.strip() + '\n'


--------------------------------------------------------------------------------
/scripts/pal/prompt/ld_three.py:
--------------------------------------------------------------------------------
 1 | MATH_PROMPT='''
 2 | A logical deduction task which requires deducing the order of a sequence of objects.
 3 | 
 4 | # Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. In a golf tournament, there were three golfers: Amy, Eli, and Eve. Eve finished above Amy. Eli finished below Amy.
 5 | Options:
 6 | (A) Amy finished last
 7 | (B) Eli finished last
 8 | (C) Eve finished last
 9 | A: Let's think step by step.
10 | (1) Eve finished above Amy: "(above) ? Eve ? Amy ? (below)".
11 | (2) Eli finished below Amy: "(above) ? Amy ? Eli ? (below)".
12 | (3) Combining (1) and (2) we get the following ordering: "(above) Eve Amy Eli (below)".
13 | According to this ordering, the person who finished last (the one at the bottom of this list) is Eli.
14 | Eli finished last. So the answer is (B).
15 | 
16 | # Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a white book, a green book, and an orange book. The green book is to the right of the white book. The orange book is the rightmost.
17 | Options:
18 | (A) The white book is the leftmost
19 | (B) The green book is the leftmost
20 | (C) The orange book is the leftmost
21 | A: Let's think step by step.
22 | (1) The green book is to the right of the white book: "(left) ? white ? green ? (right)".
23 | (2) The orange book is the rightmost: "(left) ? white ? green orange (right)".
24 | (3) Combining (1) and (2) we get the following ordering: "(left) white green orange (right)".
25 | According to this ordering, the leftmost book is the white book.
26 | The white book is the leftmost. So the answer is (A).
27 | 
28 | # Q: The following paragraphs each describe a set of three objects arranged in a fixed order. The statements are logically consistent within each paragraph. On a shelf, there are three books: a red book, a gray book, and a white book. The white book is to the left of the gray book. The red book is the second from the left.
29 | Options:
30 | (A) The red book is the leftmost
31 | (B) The gray book is the leftmost
32 | (C) The white book is the leftmost
33 | A: Let's think step by step.
34 | (1) The white book is to the left of the gray book: "(left) ? white ? gray ? (right)".
35 | (2) The red book is the second from the left: "(left) ? white red gray ? (right)".
36 | (3) Combining (1) and (2) we get the following ordering: "(left) white red gray (right)".
37 | According to this ordering, the leftmost book is the white book.
38 | The white book is the leftmost. So the answer is (C).
39 | 
40 | # Q: {question}
41 | '''.strip() + '\n'


--------------------------------------------------------------------------------
/docs/static/js/index.js:
--------------------------------------------------------------------------------
 1 | window.HELP_IMPROVE_VIDEOJS = false;
 2 | 
 3 | var INTERP_BASE = "./static/interpolation/stacked";
 4 | var NUM_INTERP_FRAMES = 240;
 5 | 
 6 | var interp_images = [];
 7 | function preloadInterpolationImages() {
 8 |   for (var i = 0; i < NUM_INTERP_FRAMES; i++) {
 9 |     var path = INTERP_BASE + '/' + String(i).padStart(6, '0') + '.jpg';
10 |     interp_images[i] = new Image();
11 |     interp_images[i].src = path;
12 |   }
13 | }
14 | 
15 | function setInterpolationImage(i) {
16 |   var image = interp_images[i];
17 |   image.ondragstart = function() { return false; };
18 |   image.oncontextmenu = function() { return false; };
19 |   $('#interpolation-image-wrapper').empty().append(image);
20 | }
21 | 
22 | 
23 | $(document).ready(function() {
24 |     // Check for click events on the navbar burger icon
25 |     $(".navbar-burger").click(function() {
26 |       // Toggle the "is-active" class on both the "navbar-burger" and the "navbar-menu"
27 |       $(".navbar-burger").toggleClass("is-active");
28 |       $(".navbar-menu").toggleClass("is-active");
29 | 
30 |     });
31 | 
32 |     var options = {
33 | 			slidesToScroll: 1,
34 | 			slidesToShow: 3,
35 | 			loop: true,
36 | 			infinite: true,
37 | 			autoplay: false,
38 | 			autoplaySpeed: 3000,
39 |     }
40 | 
41 | 		// Initialize all div with carousel class
42 |     var carousels = bulmaCarousel.attach('.carousel', options);
43 | 
44 |     // Loop on each carousel initialized
45 |     for(var i = 0; i < carousels.length; i++) {
46 |     	// Add listener to  event
47 |     	carousels[i].on('before:show', state => {
48 |     		console.log(state);
49 |     	});
50 |     }
51 | 
52 |     // Access to bulmaCarousel instance of an element
53 |     var element = document.querySelector('#my-element');
54 |     if (element && element.bulmaCarousel) {
55 |     	// bulmaCarousel instance is available as element.bulmaCarousel
56 |     	element.bulmaCarousel.on('before-show', function(state) {
57 |     		console.log(state);
58 |     	});
59 |     }
60 | 
61 |     /*var player = document.getElementById('interpolation-video');
62 |     player.addEventListener('loadedmetadata', function() {
63 |       $('#interpolation-slider').on('input', function(event) {
64 |         console.log(this.value, player.duration);
65 |         player.currentTime = player.duration / 100 * this.value;
66 |       })
67 |     }, false);*/
68 |     preloadInterpolationImages();
69 | 
70 |     $('#interpolation-slider').on('input', function(event) {
71 |       setInterpolationImage(this.value);
72 |     });
73 |     setInterpolationImage(0);
74 |     $('#interpolation-slider').prop('max', NUM_INTERP_FRAMES - 1);
75 | 
76 |     bulmaSlider.attach();
77 | 
78 | })
79 | 


--------------------------------------------------------------------------------
/scripts/pal/prompt/tracking_three.py:
--------------------------------------------------------------------------------
 1 | MATH_PROMPT='''
 2 | A task requiring determining the final positions of a set of objects given their initial positions and a description of a sequence of swaps.
 3 | 
 4 | # Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a yellow ball, Bob has a blue ball, and Claire has a pink ball.
 5 | As the game progresses, pairs of players trade balls. First, Claire and Alice swap balls. Then, Alice and Bob swap balls. Finally, Claire and Bob swap balls. At the end of the game, Bob has the
 6 | Options:
 7 | (A) yellow ball
 8 | (B) blue ball
 9 | (C) pink ball
10 | A: Let's think step by step.
11 | (0) At the start: Alice: yellow, Bob: blue, Claire: pink.
12 | (1) Claire and Alice swap balls: Alice: pink, Bob: blue, Claire: yellow.
13 | (2)  Alice and Bob swap balls: Alice: blue, Bob: pink, Claire: yellow.
14 | (3) Claire and Bob swap balls: Alice: blue, Bob: yellow, Claire: pink.
15 | At the end of the game, Bob has the yellow ball. So the answer is (A).
16 | 
17 | # Q: Alice, Bob, and Claire are playing a game. At the start of the game, they are each holding a ball: Alice has a white ball, Bob has a purple ball, and Claire has a pink ball.
18 | As the game progresses, pairs of players trade balls. First, Bob and Alice swap balls. Then, Bob and Claire swap balls. Finally, Bob and Alice swap balls. At the end of the game, Alice has the
19 | Options:
20 | (A) white ball
21 | (B) purple ball
22 | (C) pink ball
23 | A: Let's think step by step.
24 | (0) At the start: Alice: white, Bob: purple, Claire: pink.
25 | (1) Bob and Alice swap balls: Alice: purple, Bob: white, Claire: pink.
26 | (2) Bob and Claire swap balls: Alice: purple, Bob: pink, Claire: white.
27 | (3) Bob and Alice swap balls: Alice: pink, Bob: purple, Claire: white.
28 | At the end of the game, Alice has the pink ball. So the answer is (C).
29 | 
30 | # Q: Alice, Bob, and Claire are dancers at a square dance. At the start of a song, they each have a partner: Alice is dancing with Lola, Bob is dancing with Rodrigo, and Claire is dancing with Patrick.
31 | Throughout the song, the dancers often trade partners. First, Alice and Bob switch partners. Then, Claire and Bob switch partners. Finally, Bob and Alice switch partners. At the end of the dance, Alice is dancing with
32 | Options:
33 | (A) Lola
34 | (B) Rodrigo
35 | (C) Patrick
36 | A: Let's think step by step.
37 | (0) At the start: Alice: Lola, Bob: Rodrigo, Claire: Patrick.
38 | (1) Alice and Bob switch partners: Alice: Rodrigo, Bob: Lola, Claire: Patrick.
39 | (2) Claire and Bob switch partners: Alice: Rodrigo, Bob: Patrick, Claire: Lola.
40 | (3) Bob and Alice switch partners: Alice: Patrick, Bob: Rodrigo, Claire: Lola.
41 | At the end of the dance, Alice is dancing with Patrick. So the answer is (C).
42 | 
43 | # Q: {question}
44 | '''.strip() + '\n'


--------------------------------------------------------------------------------
/docs/static/css/prism_bak.css:
--------------------------------------------------------------------------------
1 | /* PrismJS 1.29.0
2 | https://prismjs.com/download.html#themes=prism-okaidia&languages=markup+css+clike+javascript+diff+python&plugins=line-highlight+diff-highlight */
3 | code[class*=language-],pre[class*=language-]{color:#f8f8f2;background:0 0;text-shadow:0 1px rgba(0,0,0,.3);font-family:Consolas,Monaco,'Andale Mono','Ubuntu Mono',monospace;font-size:1em;text-align:left;white-space:pre;word-spacing:normal;word-break:normal;word-wrap:normal;line-height:1.5;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-hyphens:none;-moz-hyphens:none;-ms-hyphens:none;hyphens:none}pre[class*=language-]{padding:1em;margin:.5em 0;overflow:auto;border-radius:.3em}:not(pre)>code[class*=language-],pre[class*=language-]{background:#272822}:not(pre)>code[class*=language-]{padding:.1em;border-radius:.3em;white-space:normal}.token.cdata,.token.comment,.token.doctype,.token.prolog{color:#8292a2}.token.punctuation{color:#f8f8f2}.token.namespace{opacity:.7}.token.constant,.token.deleted,.token.property,.token.symbol,.token.tag{color:#f92672}.token.boolean,.token.number{color:#ae81ff}.token.attr-name,.token.builtin,.token.char,.token.inserted,.token.selector,.token.string{color:#a6e22e}.language-css .token.string,.style .token.string,.token.entity,.token.operator,.token.url,.token.variable{color:#f8f8f2}.token.atrule,.token.attr-value,.token.class-name,.token.function{color:#e6db74}.token.keyword{color:#66d9ef}.token.important,.token.regex{color:#fd971f}.token.bold,.token.important{font-weight:700}.token.italic{font-style:italic}.token.entity{cursor:help}
4 | pre[data-line]{position:relative;padding:1em 0 1em 3em}.line-highlight{position:absolute;left:0;right:0;padding:inherit 0;margin-top:1em;background:hsla(24,20%,50%,.08);background:linear-gradient(to right,hsla(24,20%,50%,.1) 70%,hsla(24,20%,50%,0));pointer-events:none;line-height:inherit;white-space:pre}@media print{.line-highlight{-webkit-print-color-adjust:exact;color-adjust:exact}}.line-highlight:before,.line-highlight[data-end]:after{content:attr(data-start);position:absolute;top:.4em;left:.6em;min-width:1em;padding:0 .5em;background-color:hsla(24,20%,50%,.4);color:#f4f1ef;font:bold 65%/1.5 sans-serif;text-align:center;vertical-align:.3em;border-radius:999px;text-shadow:none;box-shadow:0 1px #fff}.line-highlight[data-end]:after{content:attr(data-end);top:auto;bottom:.4em}.line-numbers .line-highlight:after,.line-numbers .line-highlight:before{content:none}pre[id].linkable-line-numbers span.line-numbers-rows{pointer-events:all}pre[id].linkable-line-numbers span.line-numbers-rows>span:before{cursor:pointer}pre[id].linkable-line-numbers span.line-numbers-rows>span:hover:before{background-color:rgba(128,128,128,.2)}
5 | pre.diff-highlight>code .token.deleted:not(.prefix),pre>code.diff-highlight .token.deleted:not(.prefix){background-color:rgba(255,0,0,.1);color:inherit;display:block}pre.diff-highlight>code .token.inserted:not(.prefix),pre>code.diff-highlight .token.inserted:not(.prefix){background-color:rgba(0,255,128,.1);color:inherit;display:block}
6 | 


--------------------------------------------------------------------------------
/docs/static/css/index.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |   font-family: 'Noto Sans', sans-serif;
  3 | }
  4 | 
  5 | 
  6 | .footer .icon-link {
  7 |     font-size: 25px;
  8 |     color: #000;
  9 | }
 10 | 
 11 | .link-block a {
 12 |     margin-top: 5px;
 13 |     margin-bottom: 5px;
 14 | }
 15 | 
 16 | .dnerf {
 17 |   font-variant: small-caps;
 18 | }
 19 | 
 20 | 
 21 | .teaser .hero-body {
 22 |   padding-top: 0;
 23 |   padding-bottom: 3rem;
 24 | }
 25 | 
 26 | .teaser {
 27 |   font-family: 'Google Sans', sans-serif;
 28 | }
 29 | 
 30 | 
 31 | .publication-title {
 32 | }
 33 | 
 34 | .publication-banner {
 35 |   max-height: parent;
 36 | 
 37 | }
 38 | 
 39 | .publication-banner video {
 40 |   position: relative;
 41 |   left: auto;
 42 |   top: auto;
 43 |   transform: none;
 44 |   object-fit: fit;
 45 | }
 46 | 
 47 | .publication-header .hero-body {
 48 | }
 49 | 
 50 | .publication-title {
 51 |     font-family: 'Google Sans', sans-serif;
 52 | }
 53 | 
 54 | .publication-authors {
 55 |     font-family: 'Google Sans', sans-serif;
 56 | }
 57 | 
 58 | .publication-venue {
 59 |     color: #555;
 60 |     width: fit-content;
 61 |     font-weight: bold;
 62 | }
 63 | 
 64 | .publication-awards {
 65 |     color: #ff3860;
 66 |     width: fit-content;
 67 |     font-weight: bolder;
 68 | }
 69 | 
 70 | .publication-authors {
 71 | }
 72 | 
 73 | .publication-authors a {
 74 |    color: hsl(204, 86%, 53%) !important;
 75 | }
 76 | 
 77 | .publication-authors a:hover {
 78 |     text-decoration: underline;
 79 | }
 80 | 
 81 | .author-block {
 82 |   display: inline-block;
 83 | }
 84 | 
 85 | .publication-banner img {
 86 | }
 87 | 
 88 | .publication-authors {
 89 |   /*color: #4286f4;*/
 90 | }
 91 | 
 92 | .publication-video {
 93 |     position: relative;
 94 |     width: 100%;
 95 |     height: 0;
 96 |     padding-bottom: 56.25%;
 97 | 
 98 |     overflow: hidden;
 99 |     border-radius: 10px !important;
100 | }
101 | 
102 | .publication-video iframe {
103 |     position: absolute;
104 |     top: 0;
105 |     left: 0;
106 |     width: 100%;
107 |     height: 100%;
108 | }
109 | 
110 | .publication-body img {
111 | }
112 | 
113 | .results-carousel {
114 |   overflow: hidden;
115 | }
116 | 
117 | .results-carousel .item {
118 |   margin: 5px;
119 |   overflow: hidden;
120 |   border: 1px solid #bbb;
121 |   border-radius: 10px;
122 |   padding: 0;
123 |   font-size: 0;
124 | }
125 | 
126 | .results-carousel video {
127 |   margin: 0;
128 | }
129 | 
130 | 
131 | .interpolation-panel {
132 |   background: #f5f5f5;
133 |   border-radius: 10px;
134 | }
135 | 
136 | .interpolation-panel .interpolation-image {
137 |   width: 100%;
138 |   border-radius: 5px;
139 | }
140 | 
141 | .interpolation-video-column {
142 | }
143 | 
144 | .interpolation-panel .slider {
145 |   margin: 0 !important;
146 | }
147 | 
148 | .interpolation-panel .slider {
149 |   margin: 0 !important;
150 | }
151 | 
152 | #interpolation-image-wrapper {
153 |   width: 100%;
154 | }
155 | #interpolation-image-wrapper img {
156 |   border-radius: 5px;
157 | }
158 | 


--------------------------------------------------------------------------------
/scripts/pal/prompt/date_understanding_prompt.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 PAL Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | MATH_PROMPT = """
17 | # Q: 2015 is coming in 36 hours. What is the date one week from today in MM/DD/YYYY?
18 | # If 2015 is coming in 36 hours, then today is 36 hours before.
19 | today = datetime(2015, 1, 1) - relativedelta(hours=36)
20 | # One week from today,
21 | one_week_from_today = today + relativedelta(weeks=1)
22 | # The answer formatted with %m/%d/%Y is
23 | one_week_from_today.strftime('%m/%d/%Y')
24 | 
25 | # Q: The first day of 2019 is a Tuesday, and today is the first Monday of 2019. What is the date today in MM/DD/YYYY?
26 | # If the first day of 2019 is a Tuesday, and today is the first Monday of 2019, then today is 6 days later.
27 | today = datetime(2019, 1, 1) + relativedelta(days=6)
28 | # The answer formatted with %m/%d/%Y is
29 | today.strftime('%m/%d/%Y')
30 | 
31 | # Q: The concert was scheduled to be on 06/01/1943, but was delayed by one day to today. What is the date 10 days ago in MM/DD/YYYY?
32 | # If the concert was scheduled to be on 06/01/1943, but was delayed by one day to today, then today is one day later.
33 | today = datetime(1943, 6, 1) + relativedelta(days=1)
34 | # 10 days ago,
35 | ten_days_ago = today - relativedelta(days=10)
36 | # The answer formatted with %m/%d/%Y is
37 | ten_days_ago.strftime('%m/%d/%Y')
38 | 
39 | # Q: It is 4/19/1969 today. What is the date 24 hours later in MM/DD/YYYY?
40 | # It is 4/19/1969 today.
41 | today = datetime(1969, 4, 19)
42 | # 24 hours later,
43 | later = today + relativedelta(hours=24)
44 | # The answer formatted with %m/%d/%Y is
45 | today.strftime('%m/%d/%Y')
46 | 
47 | # Q: Jane thought today is 3/11/2002, but today is in fact Mar 12, which is 1 day later. What is the date 24 hours later in MM/DD/YYYY?
48 | # If Jane thought today is 3/11/2002, but today is in fact Mar 12, then today is 3/1/2002.
49 | today = datetime(2002, 3, 12)
50 | # 24 hours later,
51 | later = today + relativedelta(hours=24)
52 | # The answer formatted with %m/%d/%Y is
53 | later.strftime('%m/%d/%Y')
54 | 
55 | # Q: Jane was born on the last day of Feburary in 2001. Today is her 16-year-old birthday. What is the date yesterday in MM/DD/YYYY?
56 | # If Jane was born on the last day of Feburary in 2001 and today is her 16-year-old birthday, then today is 16 years later.
57 | today = datetime(2001, 2, 28) + relativedelta(years=16)
58 | # Yesterday,
59 | yesterday = today - relativedelta(days=1)
60 | # The answer formatted with %m/%d/%Y is
61 | yesterday.strftime('%m/%d/%Y')
62 | 
63 | # Q: {question}
64 | """.strip() + '\n'
65 | 


--------------------------------------------------------------------------------
/docs/static/js/prism-diff-highlight.js:
--------------------------------------------------------------------------------
 1 | (function () {
 2 | 
 3 | 	if (typeof Prism === 'undefined') {
 4 | 		return;
 5 | 	}
 6 | 
 7 | 
 8 | 	var LANGUAGE_REGEX = /^diff-([\w-]+)/i;
 9 | 	var HTML_TAG = /<\/?(?!\d)[^\s>\/=$<%]+(?:\s(?:\s*[^\s>\/=]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))|(?=[\s/>])))+)?\s*\/?>/g;
10 | 	//this will match a line plus the line break while ignoring the line breaks HTML tags may contain.
11 | 	var HTML_LINE = RegExp(/(?:__|[^\r\n<])*(?:\r\n?|\n|(?:__|[^\r\n<])(?![^\r\n]))/.source.replace(/__/g, function () { return HTML_TAG.source; }), 'gi');
12 | 
13 | 	var warningLogged = false;
14 | 
15 | 	Prism.hooks.add('before-sanity-check', function (env) {
16 | 		var lang = env.language;
17 | 		if (LANGUAGE_REGEX.test(lang) && !env.grammar) {
18 | 			env.grammar = Prism.languages[lang] = Prism.languages.diff;
19 | 		}
20 | 	});
21 | 	Prism.hooks.add('before-tokenize', function (env) {
22 | 		if (!warningLogged && !Prism.languages.diff && !Prism.plugins.autoloader) {
23 | 			warningLogged = true;
24 | 			console.warn("Prism's Diff Highlight plugin requires the Diff language definition (prism-diff.js)." +
25 | 				"Make sure the language definition is loaded or use Prism's Autoloader plugin.");
26 | 		}
27 | 
28 | 		var lang = env.language;
29 | 		if (LANGUAGE_REGEX.test(lang) && !Prism.languages[lang]) {
30 | 			Prism.languages[lang] = Prism.languages.diff;
31 | 		}
32 | 	});
33 | 
34 | 	Prism.hooks.add('wrap', function (env) {
35 | 		var diffLanguage; var diffGrammar;
36 | 
37 | 		if (env.language !== 'diff') {
38 | 			var langMatch = LANGUAGE_REGEX.exec(env.language);
39 | 			if (!langMatch) {
40 | 				return; // not a language specific diff
41 | 			}
42 | 
43 | 			diffLanguage = langMatch[1];
44 | 			diffGrammar = Prism.languages[diffLanguage];
45 | 		}
46 | 
47 | 		var PREFIXES = Prism.languages.diff && Prism.languages.diff.PREFIXES;
48 | 
49 | 		// one of the diff tokens without any nested tokens
50 | 		if (PREFIXES && env.type in PREFIXES) {
51 | 			/** @type {string} */
52 | 			var content = env.content.replace(HTML_TAG, ''); // remove all HTML tags
53 | 
54 | 			/** @type {string} */
55 | 			var decoded = content.replace(/&lt;/g, '<').replace(/&amp;/g, '&');
56 | 
57 | 			// remove any one-character prefix
58 | 			var code = decoded.replace(/(^|[\r\n])./g, '$1');
59 | 
60 | 			// highlight, if possible
61 | 			var highlighted;
62 | 			if (diffGrammar) {
63 | 				highlighted = Prism.highlight(code, diffGrammar, diffLanguage);
64 | 			} else {
65 | 				highlighted = Prism.util.encode(code);
66 | 			}
67 | 
68 | 			// get the HTML source of the prefix token
69 | 			var prefixToken = new Prism.Token('prefix', PREFIXES[env.type], [/\w+/.exec(env.type)[0]]);
70 | 			var prefix = Prism.Token.stringify(prefixToken, env.language);
71 | 
72 | 			// add prefix
73 | 			var lines = []; var m;
74 | 			HTML_LINE.lastIndex = 0;
75 | 			while ((m = HTML_LINE.exec(highlighted))) {
76 | 				lines.push(prefix + m[0]);
77 | 			}
78 | 			if (/(?:^|[\r\n]).$/.test(decoded)) {
79 | 				// because both "+a\n+" and "+a\n" will map to "a\n" after the line prefixes are removed
80 | 				lines.push(prefix);
81 | 			}
82 | 			env.content = lines.join('');
83 | 
84 | 			if (diffGrammar) {
85 | 				env.classes.push('language-' + diffLanguage);
86 | 			}
87 | 		}
88 | 	});
89 | 
90 | }());
91 | 


--------------------------------------------------------------------------------
/scripts/pal/prompt/snarks.py:
--------------------------------------------------------------------------------
 1 | MATH_PROMPT='''
 2 | Determine which of two sentences is sarcastic.
 3 | 
 4 | According to Cambridge University Dictionary, sarcasm is "the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way." Sarcastic sentences often contain satirical or ironic utterances, hyperboles, ambivalent or witty remarks.
 5 | 
 6 | # Q: Which statement is sarcastic?
 7 | Options:
 8 | (A) Yes, because having interests and actively researching them is a huge waste
 9 | (B) Yes, because having interests and actively researching them is a huge deal
10 | A: Let's think step by step.
11 | If we look at (A), it says that having interests and actively researching them is a huge waste, implying that it is a useless effort. However, we know that having interests and actively researching them is typically not a waste but rather is beneficial to the individual. The presence of such a juxtaposition in (A) suggests that it contains a taste of irony and sarcasm.
12 | If we look at (B), it says that having interests and actively researching them is a huge deal, implying that it is an important and consequential effort. This is arguably a neutral and correct statement.
13 | Above the above, the sarcastic option is (A). So the answer is (A).
14 | 
15 | # Q: Which statement is sarcastic?
16 | Options:
17 | (A) No one is going to disagree with you on this. Avoiding ad hominem attacks really help your case
18 | (B) No one is going to disagree with you on this. Ad hominem attacks really help your case
19 | A: Let's think step by step.
20 | If we look at (A), it says that avoiding ad hominem attacks really help your case, implying that ad hominem attacks are adverse and injurious. Because ad hominem attacks are adressed at a person rather than an idea, it is indeed true that avoiding them is often useful and helpful; so, (A) is a neutral (valid and agreeable) statement.
21 | If we look at (B), it says that ad hominem attacks really help your case, implying that ad hominem attacks are a positive thing. However, we stated previously that ad hominem attacks are often not useful or constructive. The speaker in this sentence therefore seems to mean the opposite of what they are saying; so, there appears to have a taste of irony and sarcasm in (B).
22 | Above the above, the sarcastic option is (B). So the answer is (B).
23 | 
24 | # Q: Which statement is sarcastic?
25 | Options:
26 | (A) Consistency in the league's punishments? What do you think this is supposed to be, politics?
27 | (B) Consistency in the league's punishments? What do you think this is supposed to be, moral?
28 | A: Let's think step by step.
29 | If we look at (A), it likens the consistency in the league's punishments with that in politics. Because politics or political affairs are often not considered to be consistent or dependable, this sentence appears to be satirical.
30 | If we look at (B), it likens the consistency in the league's punishments with that in morality. Discussing the consistency of the league's punishments in the context of morality, ethics, or law makes sense and does not appear to make a satirical point about anything.
31 | Above the above, the sarcastic option is (A). So the answer is (A).
32 | 
33 | # Q: {question}
34 | '''.strip() + '\n'


--------------------------------------------------------------------------------
/scripts/eval_outputs.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | import argparse
 3 | from adaptive_consistency import AC, stop_criteria_dict
 4 | import json
 5 | 
 6 | def main(dt, ac, min_gens = 1, max_gens = 40, eval_as_str = False):
 7 |     
 8 |     correct_answers = 0
 9 |     total_answers = len(dt)
10 |     total_gens = 0
11 |     for _, x in tqdm(enumerate(dt), total = len(dt)):
12 |         x = {k: (v[:max_gens] if isinstance(v, list) else v) for k, v in x.items() }
13 |         for m in range(min_gens, len(x['scores'])+1):
14 |             answers = x['answers'][:m]
15 |             new_answers = []
16 |             for _, xx in enumerate(answers):
17 |                 try: 
18 |                     if eval_as_str:
19 |                         if str(xx).strip()=='':
20 |                             continue
21 |                         new_answers.append(str(xx))
22 |                     else:
23 |                         new_answers.append(float(xx))
24 |                 except: ...
25 |             if len(new_answers) == 0: 
26 |                 if m == len(x['scores']):
27 |                     total_gens += m
28 |                 continue
29 |             outp = ac.should_stop(new_answers, return_dict = True)
30 |             majority_val, majority_bool = outp['most_common'], outp['stop']
31 | 
32 | 
33 |             if majority_bool or (m == len(x['scores'])):
34 |                 total_gens += m
35 |                 try:
36 |                     if eval_as_str:
37 |                         if str(majority_val).strip() == str(x['target']).strip():
38 |                             correct_answers += 1
39 |                             break
40 |                     else:
41 |                         if abs(float(str(majority_val).strip()) - float(x['target'])) < 1e-3:
42 |                             correct_answers += 1
43 |                             break
44 |                 except Exception as e:
45 |                     print('Error', majority_val, m, e)
46 |                     break
47 |                 break
48 |     return correct_answers, total_answers, total_gens
49 | 
50 | if __name__ == '__main__':
51 | 
52 |     # Usage: python examples/eval_outputs.py --output_file examples/outputs/outputs.jsonl --stop_criteria beta --stop_criteria_thresh 0.95
53 | 
54 |     parser = argparse.ArgumentParser()
55 |     parser.add_argument('--output_file', type=str, required=True)
56 |     parser.add_argument('--stop_criteria', type=str, default=None)
57 |     parser.add_argument('--stop_criteria_thresh', type=float, required=False, default=None)
58 | 
59 |     args = parser.parse_args()
60 | 
61 |     if args.stop_criteria is None:
62 |         args.stop_criteria = 'always_false'
63 |         print('No Stop Criteria Provided. Running Self-Consistency')
64 |     if args.stop_criteria_thresh is None or args.stop_criteria_thresh == -1:
65 |         ac = AC(max_gens = 1000, stop_criteria=stop_criteria_dict[args.stop_criteria]())
66 |     else:
67 |         ac = AC(max_gens = 1000, stop_criteria=stop_criteria_dict[args.stop_criteria](conf_thresh = args.stop_criteria_thresh))
68 |     
69 | 
70 |     dt = list(map(json.loads, open(args.output_file)))
71 | 
72 |     eval_as_str = not ('gsm' in args.output_file or 'asdiv' in args.output_file or 'svamp' in args.output_file)
73 | 
74 |     correct_answers, total_answers, total_gens = main(dt, ac, eval_as_str = eval_as_str)
75 |     print(f'Accuracy: {correct_answers}/{total_answers} ({correct_answers/total_answers*100:.2f}%)')
76 |     print(f'Average Gens: {total_gens/total_answers:.2f}')


--------------------------------------------------------------------------------
/docs/static/css/bulma-carousel.min.css:
--------------------------------------------------------------------------------
1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10}


--------------------------------------------------------------------------------
/scripts/pal/prompt/ruin_names.py:
--------------------------------------------------------------------------------
 1 | MATH_PROMPT='''
 2 | Select the humorous edit that 'ruins' the input movie or musical artist name.
 3 | 
 4 | # Q: Which of the following is a humorous edit of this artist or movie name: 'whitesnake'?
 5 | Options:
 6 | (A) whitesnape
 7 | (B) whitesnapke
 8 | (C) whitesnuake
 9 | (D) mwhitesnake
10 | A: Let's think step by step.
11 | The original name is "whitesnake". This is the name of an old English hard rock band. It is a compound word, formed by the words "white" and "snake".
12 | (A) "whitesnape": It is formed by the combination of "white" and "snake"; therefore, "snake" has been changed to "snape". Snape makes a reference to the fictional character Severus Snape in the Harry Potter series, so (A) is indeed a meaningful and funny edit.
13 | (B) "whitesnapke": It is formed by the combination of "white" and "snapke", but "snapke" is not an actual word; therefore, "whitesnapke" is not humorous.
14 | (C) "whitesnuake": It is formed by the combination of "white" and "snuake", but "snuake" is not an actual word; therefore, "whitesnuake" is not humorous.
15 | (D) "mwhitesnake": It is formed by the combination of "m", "white", and "snake", but the prefix "-m "seems arbitrary; therefore, "mwhitesnake" is not meaningful or humorous.
16 | Above the above, the only humorous edit is (A). So the answer is (A).
17 | 
18 | # Q: Which of the following is a humorous edit of this artist or movie name: 'one of our dinosaurs is missing'?
19 | Options:
20 | (A) ofne of our dinosaurs is missing
21 | (B) one af our dinosaurs is missing
22 | (C) one of our dinosaurs is pissing
23 | (D) one of our dinosaur is missing
24 | A: Let's think step by step.
25 | The original name is "one of our dinosaurs is missing". This is the name of an old British movie.
26 | (A) "ofne of our dinosaurs is missing": Here "one of" is changed to "ofne", but the word "ofne" is not an actual word.
27 | (B) "one af our dinosaurs is missing": Here the word "of" is changed to "af", but the word "af" is not an actual word.
28 | (C) "one of our dinosaurs is pissing": Here the word "missing" is changed to "pissing", and "one of our dinosaurs is pissing" is indeed a very whimsical and mischievous edit. This change truly ruins the original title of the movie.
29 | (D) "one of our dinosaur is missing": Here the word "dinosaurs" is changed to "dinosaur", but "dinosaur" is singular but should be plural in the title; this change therefore feels arbitrary and not humorous.
30 | Above the above, the only humorous edit is (C).
31 | Above the above, the only humorous edit is (C). So the answer is (C).
32 | 
33 | # Q: Which of the following is a humorous edit of this artist or movie name: 'counting crows'?
34 | Options:
35 | (A) countingy crows
36 | (B) counting cows
37 | (C) courting crows
38 | (D) coutnting crows
39 | A: Let's think step by step.
40 | The original name is "counting crows". This is the name of an American rock band. Historically, the band name comes from the British nursery rhyme "One for Sorrow", which is about counting of magpies.
41 | (A) "countingy crows": Here the word "counting" is changed to "countingy", but the word "countingy" is not an actual word.
42 | (B) "counting cows": Here the word "crows" is changed to "cows", and this is indeed a playful and meaningful edit that ruins the original name of the band.
43 | (C) "courting crows": Here the word "counting" is changed to "courting", and "courting" is an actual word; however, "courting crows" does not sound as humorous as "counting cows".
44 | (D) "coutnting crows": Here the word "counting" is changed to "coutnting", but the word "coutnting" is not an actual word.
45 | Above the above, the only humorous edit is (B). So the answer is (B).
46 | 
47 | # Q: {question}
48 | '''.strip() + '\n'


--------------------------------------------------------------------------------
/scripts/pal/prompt/disambiguation_qa.py:
--------------------------------------------------------------------------------
 1 | MATH_PROMPT='''
 2 | Clarify the meaning of sentences with ambiguous pronouns.
 3 | 
 4 | # Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous.
 5 | Sentence: The chief told the counselor that they took the day off.
 6 | Options:
 7 | (A) The chief took the day off
 8 | (B) The counselor took the day off
 9 | (C) Ambiguous
10 | A: Let's think step by step.
11 | Here we need to determine who the pronoun "they" might be referring to. There are two possible referents for "they", namely the chief and the counselor. The verb "told" might be able to help us determine which one is more likely (if either). Let X be the chief and Y the counselor. The sentence is then of the form "X told Y that (X or Y) did something."
12 | Let's consider Y first: "X told Y that Y did something." This case does not make much sense, as Y would already have the information that Y did something, because it is information about themself.
13 | Now, consider X: "X told Y that X did something." This makes sense, because X would be sharing some information about themself that Y might not have known before.
14 | Because in this context, X is the chief and Y is the counselor, the answer should be the chief. So the answer is (A).
15 | 
16 | # Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous.
17 | Sentence: The manager sent a message to the secretary, but he didn't reply yet.
18 | Options:
19 | (A) The secretary didn't reply yet
20 | (B) The manager didn't reply yet
21 | (C) Ambiguous
22 | A: Let's think step by step.
23 | Here we need to determine who the pronoun "he" might be referring to. There are two possible referents for "he", namely the manager and the secretary. The verbs "sent" and "reply" might be able to help us determine which one is more likely (if either). Let X be the manager and Y the secretary. The sentence is then of the form "X sent a message to Y, but (X or Y) didn't reply yet."
24 | Let's consider Y first: "X sent a message to Y, but Y didn't reply yet." This case makes sense, because of the implicit causality of the sentence. Y was the receiver of the message, but Y didn't get back to X yet.
25 | Now, consider X: "X sent a message to Y, but X didn't reply yet." This case doesn't make sense, because X was the initial sender of the message, so it is now Y's turn to write back to X.
26 | Because in this context, X is the manager and Y is the secretary, the answer should be the secretary. So the answer is (A).
27 | 
28 | # Q: In the following sentences, explain the antecedent of the pronoun (which thing the pronoun refers to), or state that it is ambiguous.
29 | Sentence: Bailey will plan to meet the director at his office
30 | Options:
31 | (A) It will be Bailey's office
32 | (B) It will be the director's office
33 | (C) Ambiguous
34 | A: Let's think step by step.
35 | Here we need to determine who the pronoun "his" might be referring to. There are two possible referents for "his", namely Bailey's and the director's. The verb phrase "plan to meet" might be able to help us determine which one is more likely (if either). Let X be Bailey and Y the director. The sentence is then of the form "X will plan to meet Y at (X or Y)'s office."
36 | Let's consider Y first: "X will plan to meet Y at Y's office." This case makes sense, because X might want to meet up with Y at Y's office.
37 | Now, consider X: "X will plan to meet Y at X's office." This case also makes sense, because X might want to meet up with Y at X's own office.
38 | Because both X and Y are possible at the same time, we conclude that the antecedent of the pronoun is ambiguous. So the answer is (C).
39 | 
40 | # Q: {question}
41 | '''.strip() + '\n'


--------------------------------------------------------------------------------
/scripts/pal/core/backend.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 PAL Authors. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import openai
16 | import time
17 | import os
18 | 
19 | openai.api_key = os.getenv('OPENAI_API_KEY')
20 | openai.organization = os.getenv('OPENAI_API_ORG')
21 | 
22 | 
23 | 
24 | # GPT-3 API
25 | def call_gpt(prompt, model='code-davinci-002', stop=None, temperature=0., top_p=1.0,
26 |         max_tokens=128, majority_at=None, logprobs = 0):
27 |     num_completions = majority_at if majority_at is not None else 1
28 |     num_completions_batch_size = 5
29 |     
30 |     completions = []
31 |     all_data = []
32 |     for i in range(20 * (num_completions // num_completions_batch_size + 1)):
33 |         try:
34 |             requested_completions = min(num_completions_batch_size, num_completions - len(completions))
35 |             if model == "gpt-3.5-turbo":
36 |                 # from pdb import set_trace; set_trace()
37 |                 ans = openai.ChatCompletion.create(
38 |                     model = model,
39 |                     max_tokens = max_tokens,
40 |                     # stop = stop,
41 |                     messages = [{"role": "user", "content": prompt},],
42 |                     temperature = temperature,
43 |                     top_p = top_p,
44 |                     n = requested_completions,
45 |                     # best_of = requested_completions
46 |                 )
47 |                 # from pdb import set_trace; set_trace()
48 | 
49 |                 completions.extend([choice['message']['content'] for choice in ans['choices']])
50 |             else:
51 |                 ans = openai.Completion.create(
52 |                                 model=model,
53 |                                 max_tokens=max_tokens,
54 |                                 stop=stop,
55 |                                 prompt=prompt,
56 |                                 temperature=temperature,
57 |                                 top_p=top_p,
58 |                                 n=requested_completions,
59 |                                 logprobs = logprobs,
60 |                                 best_of=requested_completions)
61 |                 # from pdb import set_trace as bp
62 |                 # bp()
63 |                 all_data.extend([choice['logprobs'] for choice in ans['choices']])
64 |                 completions.extend([choice['text'] for choice in ans['choices']])
65 |             if len(completions) >= num_completions:
66 |                 if logprobs !=0:
67 |                     return completions[:num_completions], all_data[:num_completions]
68 |                 else:
69 |                     return completions[:num_completions]
70 |         except openai.error.RateLimitError as e:
71 |             print(e, type(e))
72 |             print('Sleeping', min(i**2, 60))
73 |             time.sleep(min(i**2, 60))
74 |         except openai.error.InvalidRequestError as e:
75 |             print(e, type(e))
76 |             max_tokens = int(max_tokens // 2)
77 |             continue
78 |         except Exception as e:
79 |             print(e, type(e))
80 |             # 3/0
81 |             print('Sleeping', min(i**2, 60))
82 |             time.sleep(min(i**2, 60))
83 |             continue
84 |     raise RuntimeError('Failed to call GPT API')
85 | 


--------------------------------------------------------------------------------
/adaptive_consistency/main.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from typing import List, Any
  4 | import warnings
  5 | 
  6 | from .stopping_criterias import *
  7 | 
  8 | class AC:
  9 |     '''
 10 |     A class for using Adaptive Consistency for your LLM generations.
 11 | 
 12 |     Args:
 13 |         max_gens (int): Maximum number of generations to perform for each question.
 14 |         stop_criteria : StoppingCriterias: The stopping criteria function to use. 
 15 |         verbose (bool): Whether to print verbose output.
 16 | 
 17 |     Attributes:
 18 |         max_gens (int): Maximum number of generations to perform.
 19 |         verbose (bool): Whether to print verbose output.
 20 |         stop_criteria: The stopping criteria function to use.
 21 |     '''
 22 | 
 23 |     def __init__(self, max_gens : int = 40, stop_criteria = BetaStoppingCriteria, verbose : bool = False) -> None:
 24 |         '''
 25 |         Initializes an instance of the AC class.
 26 | 
 27 |         Args:
 28 |             max_gens (int): Maximum number of generations to perform.
 29 |             stop_criteria (StoppingCriterias): The stopping criteria function to use. 
 30 |             verbose (bool): Whether to print verbose output.
 31 |         '''
 32 | 
 33 |         self.max_gens = max_gens
 34 |         self.verbose = verbose
 35 |         self.set_stop_criteria(stop_criteria)
 36 | 
 37 | 
 38 |     def set_max_gens(self, max_gens : int) -> None:
 39 |         '''
 40 |         Sets the maximum number of generations per question.
 41 | 
 42 |         Args:
 43 |             max_gens (int): Maximum number of generations to perform.
 44 |         '''
 45 |         self.max_gens = max_gens
 46 | 
 47 |     def set_stop_criteria(self, stop_criteria : BetaStoppingCriteria) -> None:
 48 |         '''
 49 |         Sets the stopping criteria function.
 50 | 
 51 |         Args:
 52 |             stop_criteria (StoppingCriterias): The stopping criteria function to use. 
 53 |         '''
 54 |         if isinstance(stop_criteria, str):
 55 |             if stop_criteria == 'beta':
 56 |                 self.stop_criteria = BetaStoppingCriteria()
 57 |             elif stop_criteria == 'dirichlet':
 58 |                 self.stop_criteria = DirichletStoppingCriteria()
 59 |             elif stop_criteria == 'random':
 60 |                 self.stop_criteria = RandomStoppingCriteria()
 61 |             elif stop_criteria == 'majority':
 62 |                 self.stop_criteria = MajorityStoppingCriteria()
 63 |             elif stop_criteria == 'entropy':
 64 |                 self.stop_criteria = EntropyStoppingCriteria()
 65 |             else:
 66 |                 raise ValueError(f"Unknown stopping criteria: {stop_criteria}")
 67 | 
 68 |         elif isinstance(stop_criteria, StoppingCriterias):
 69 |             # The function is already initialized, so we can use it directly
 70 |             self.stop_criteria = stop_criteria
 71 |         elif isinstance(stop_criteria, type):
 72 |             # The function is not initialized, so we need to initialize it
 73 |             self.stop_criteria = stop_criteria()
 74 | 
 75 |     def should_stop(self, answers : List[Any], return_dict : bool = False) -> bool:
 76 |         '''
 77 |         Checks if the answers are consistent based on Adaptive Consistency Algorithm and corresponding Stopping Criteria.
 78 | 
 79 |         Args:
 80 |             answers (List): A list of answers to check consistency.
 81 |             return_dict (bool): Whether to return the full dictionary of output.
 82 | 
 83 |         Returns:
 84 |             Union[bool, Dict]: Whether the answers are consistent or not. If return_dict is True, returns the full dictionary of output.
 85 |         ''' 
 86 | 
 87 |         if len(answers) > self.max_gens:
 88 |             # Raise a warning
 89 |             if self.verbose:
 90 |                 warnings.warn(f"Warning: max_gens ({self.max_gens}) reached.")
 91 | 
 92 | 
 93 |         should_stop = self.stop_criteria.should_stop(answers, verbose=self.verbose)
 94 |         if return_dict:
 95 |             return should_stop
 96 |         else:
 97 |             return should_stop['stop']
 98 | 
 99 |     def eval_loop(self, eval_function, *args, **kwargs):
100 |         '''
101 |         Runs AdaptiveConsistency Algorithm by repeatedly calling the evaluation function until the stopping criteria is met.
102 | 
103 |         Args:
104 |             eval_function: The function to evaluate.
105 |             *args: Additional positional arguments to pass to the eval_function.
106 |             **kwargs: Additional keyword arguments to pass to the eval_function.
107 | 
108 |         Returns:
109 |             List: A list of answers generated from evaluation function using AdaptiveConsistency.
110 |         '''
111 |         answers = []
112 |         for _ in range(self.max_gens):
113 |             answer = eval_function(*args, **kwargs)
114 |             answers.append(answer)
115 |             if self.is_consistent(answers):
116 |                 return answers
117 |             
118 | 
119 | stop_criteria_dict = {
120 |     'beta' : BetaStoppingCriteria,
121 |     'dirichlet' : DirichletStoppingCriteria,
122 |     'random' : RandomStoppingCriteria,
123 |     'majority' : MajorityStoppingCriteria,
124 |     'entropy' : EntropyStoppingCriteria,
125 |     'always_false' : AlwaysFalseStoppingCriteria,
126 | }


--------------------------------------------------------------------------------
/scripts/pal/prompt/math_prompts.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 PAL Authors. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | 
 16 | MATH_PROMPT = '''
 17 | # Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
 18 | 
 19 | # solution in Python:
 20 | 
 21 | 
 22 | def solution():
 23 |     """Olivia has $23. She bought five bagels for $3 each. How much money does she have left?"""
 24 |     money_initial = 23
 25 |     bagels = 5
 26 |     bagel_cost = 3
 27 |     money_spent = bagels * bagel_cost
 28 |     money_left = money_initial - money_spent
 29 |     result = money_left
 30 |     return result
 31 | 
 32 | 
 33 | 
 34 | 
 35 | 
 36 | # Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?
 37 | 
 38 | # solution in Python:
 39 | 
 40 | 
 41 | def solution():
 42 |     """Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?"""
 43 |     golf_balls_initial = 58
 44 |     golf_balls_lost_tuesday = 23
 45 |     golf_balls_lost_wednesday = 2
 46 |     golf_balls_left = golf_balls_initial - golf_balls_lost_tuesday - golf_balls_lost_wednesday
 47 |     result = golf_balls_left
 48 |     return result
 49 | 
 50 | 
 51 | 
 52 | 
 53 | 
 54 | # Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?
 55 | 
 56 | # solution in Python:
 57 | 
 58 | 
 59 | def solution():
 60 |     """There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?"""
 61 |     computers_initial = 9
 62 |     computers_per_day = 5
 63 |     num_days = 4  # 4 days between monday and thursday
 64 |     computers_added = computers_per_day * num_days
 65 |     computers_total = computers_initial + computers_added
 66 |     result = computers_total
 67 |     return result
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | # Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?
 74 | 
 75 | # solution in Python:
 76 | 
 77 | 
 78 | def solution():
 79 |     """Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?"""
 80 |     toys_initial = 5
 81 |     mom_toys = 2
 82 |     dad_toys = 2
 83 |     total_received = mom_toys + dad_toys
 84 |     total_toys = toys_initial + total_received
 85 |     result = total_toys
 86 |     return result
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | # Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?
 93 | 
 94 | # solution in Python:
 95 | 
 96 | 
 97 | def solution():
 98 |     """Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?"""
 99 |     jason_lollipops_initial = 20
100 |     jason_lollipops_after = 12
101 |     denny_lollipops = jason_lollipops_initial - jason_lollipops_after
102 |     result = denny_lollipops
103 |     return result
104 | 
105 | 
106 | 
107 | 
108 | 
109 | # Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
110 | 
111 | # solution in Python:
112 | 
113 | 
114 | def solution():
115 |     """Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?"""
116 |     leah_chocolates = 32
117 |     sister_chocolates = 42
118 |     total_chocolates = leah_chocolates + sister_chocolates
119 |     chocolates_eaten = 35
120 |     chocolates_left = total_chocolates - chocolates_eaten
121 |     result = chocolates_left
122 |     return result
123 | 
124 | 
125 | 
126 | 
127 | 
128 | # Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
129 | 
130 | # solution in Python:
131 | 
132 | 
133 | def solution():
134 |     """If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?"""
135 |     cars_initial = 3
136 |     cars_arrived = 2
137 |     total_cars = cars_initial + cars_arrived
138 |     result = total_cars
139 |     return result
140 | 
141 | 
142 | 
143 | 
144 | 
145 | # Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?
146 | 
147 | # solution in Python:
148 | 
149 | 
150 | def solution():
151 |     """There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?"""
152 |     trees_initial = 15
153 |     trees_after = 21
154 |     trees_added = trees_after - trees_initial
155 |     result = trees_added
156 |     return result
157 | 
158 | 
159 | 
160 | 
161 | 
162 | # Q: {question}
163 | 
164 | # solution in Python:
165 | '''.strip() + '\n\n\n'


--------------------------------------------------------------------------------
/scripts/pal/prompt/salient_translation.py:
--------------------------------------------------------------------------------
 1 | MATH_PROMPT='''
 2 | Detect the type of error in an English translation of a German source sentence.
 3 | 
 4 | # Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error.  Source: In der Liste der Baudenkmale in Lenzen (Elbe) sind alle Baudenkmale der brandenburgischen Stadt Lenzen (Elbe) und ihrer Ortsteile aufgelistet.
 5 | Translation: In the list of architectural monuments in Lenzen all architectural monuments of the Brandenburg city of Lenzen and its districts are listed.
 6 | The translation contains an error pertaining to
 7 | Options:
 8 | (A) Modifiers or Adjectives
 9 | (B) Numerical Values
10 | (C) Negation or Antonyms
11 | (D) Named Entities
12 | (E) Dropped Content
13 | (F) Facts
14 | A: Let's think step by step.
15 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "The list of monuments in Lenzen (Elbe) includes all the monuments in the Brandenburg town of Lenzen (Elbe) and its districts." On the other hand, the provided translation is "In the list of architectural monuments in Lenzen all architectural monuments of the Brandenburg city of Lenzen and its districts are listed." Note that Lenzen (Elbe) is changed to Lenzen in the original translation; so, there is a named entity error. Because an entity in the original source sentence is changed to a different entity in the translation, the translation contains an error pertaining to Named Entities. So the answer is (D).
16 | 
17 | # Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error.  Source: Auf dieser Seite sind die Baudenkmäler der oberbayerischen Großen Kreisstadt Landsberg am Lech zusammengestellt.
18 | Translation: On this page are compiled the architectural monuments of the town of Landsberg am Lech.
19 | The translation contains an error pertaining to
20 | Options:
21 | (A) Modifiers or Adjectives
22 | (B) Numerical Values
23 | (C) Negation or Antonyms
24 | (D) Named Entities
25 | (E) Dropped Content
26 | (F) Facts
27 | A: Let's think step by step.
28 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "The monuments of the Upper Bavarian district town of Landsberg am Lech are compiled on this page." On the other hand, the provided translation is "On this page are compiled the architectural monuments of the town of Landsberg am Lech." Note that an important detail about the location of Landsberg am Lech is omitted in the original translation: The translation should have said "Upper Bavarian district town of Landsberg am Lech". Because a significant clause in the translation was removed, the translation contains an error pertaining to Dropped Content. So the answer is (E).
29 | 
30 | # Q: The following translations from German to English contain a particular error. That error will be one of the following types: Named Entities: An entity (names, places, locations, etc.) is changed to a different entity. Numerical Values: Numerical values (ordinals or cardinals), dates, and/or units are changed. Modifiers or Adjectives: The modifiers and adjectives pertaining to a noun are changed. Negation or Antonyms: Introduce or remove a negation or change comparatives to their antonyms. Facts: Trivial factual errors not pertaining to the above classes are introduced in the translations. Dropped Content: A significant clause in the translation is removed. Please identify that error.  Source: Łeba ist eine Kleinstadt und ein Badeort im Powiat Lęborski der polnischen Woiwodschaft Pommern.
31 | Translation: Eba is not a small town and seaside resort in the Powiat Léborski county of the Pomeranian Voivodeship of Poland.
32 | The translation contains an error pertaining to
33 | Options:
34 | (A) Modifiers or Adjectives
35 | (B) Numerical Values
36 | (C) Negation or Antonyms
37 | (D) Named Entities
38 | (E) Dropped Content
39 | (F) Facts
40 | A: Let's think step by step.
41 | We solve this question by first translating the source sentence to English and then by comparing our translation with the provided translation. According to Google Translate, the correct translation of the source sentence from German to English is "Łeba is a small town and seaside resort in the Powiat Lęborski of the Polish Pomeranian Voivodeship." On the other hand, the provided translation is "Łeba is not a small town and seaside resort in the Powiat Léborski county of the Pomeranian Voivodeship of Poland." Note that the provided sentence says, "Łeba is not a small town ..." However, the translation should have been "Łeba is a small town ..." Because a negation is introduced at the beginning of the sentence and has fundamentally changed the meaning of the original source, the translation contains an error pertaining to Negation or Antonyms. So the answer is (C).
42 | 
43 | # Q: {question}
44 | '''.strip() + '\n'


--------------------------------------------------------------------------------
/scripts/fastchat_server.py:
--------------------------------------------------------------------------------
  1 | """Flask server for Vicuna-13b, returns results in OpenAI format."""
  2 | 
  3 | from flask import Flask, request, Response, stream_with_context, jsonify
  4 | from fastchat.serve.cli import load_model, generate_stream
  5 | import time
  6 | import torch
  7 | import logging
  8 | from datetime import datetime
  9 | from logging.handlers import TimedRotatingFileHandler
 10 | from transformers import StoppingCriteriaList, MaxLengthCriteria, StoppingCriteria
 11 | 
 12 | 
 13 | LOG_FILE = "api_requests.log"
 14 | logging.basicConfig(
 15 |     level=logging.INFO,
 16 |     format="%(asctime)s - %(message)s",
 17 |     handlers=[
 18 |         TimedRotatingFileHandler(LOG_FILE, when="D", interval=1, backupCount=30),
 19 |         logging.StreamHandler(),
 20 |     ],
 21 | )
 22 | 
 23 | app = Flask(__name__)
 24 | 
 25 | # Load the model and tokenizer
 26 | model_name = "vicuna-13b"
 27 | device = "cuda"
 28 | num_gpus = "4"
 29 | load_8bit = False
 30 | debug = False
 31 | model, tokenizer = load_model(model_name, device, num_gpus, load_8bit, debug)
 32 | 
 33 | 
 34 | # Adapted from https://discuss.huggingface.co/t/implimentation-of-stopping-criteria-list/20040/7
 35 | class CustomStopTokenCriteria(StoppingCriteria):
 36 |     def __init__(self, stops=[], len_input_ids=0, encounters=1):
 37 |         super().__init__()
 38 |         self.stops = stops
 39 |         self.len_input_ids = len_input_ids
 40 |         self.previous_len = len_input_ids
 41 |         self.min_stop_token_len = min([len(tokenizer.encode(stop)) for stop in stops])
 42 | 
 43 |     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
 44 |         len_input_ids = len(input_ids[0])
 45 | 
 46 |         # save some time by not checking for stop tokens until we've generated enough tokens.
 47 |         # this is not a perfect solution, but it's a good enough heuristic for now.
 48 |         if len_input_ids - self.previous_len < self.min_stop_token_len:
 49 |             return False
 50 |         self.previous_len = len_input_ids
 51 | 
 52 |         generated_tokens = input_ids[0][self.len_input_ids :]
 53 |         generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
 54 |         for stop in self.stops:
 55 |             if stop in generated_text:
 56 |                 return True
 57 |         return False
 58 | 
 59 | 
 60 | @torch.inference_mode()
 61 | def generate_text(
 62 |     prompt, temperature=0.7, max_new_tokens=150, n=1, stop=None, top_p=0.9
 63 | ):
 64 |     input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
 65 | 
 66 |     if stop:
 67 |         stopping_criteria = StoppingCriteriaList(
 68 |             [CustomStopTokenCriteria(stops=stop, len_input_ids=len(input_ids[0]))]
 69 |         )
 70 | 
 71 |     output = model.generate(
 72 |         input_ids,
 73 |         max_new_tokens=max_new_tokens,
 74 |         temperature=temperature,
 75 |         num_return_sequences=n,
 76 |         top_p=top_p,
 77 |         do_sample=True if (n > 1 or temperature > 0) else False,
 78 |         no_repeat_ngram_size=0 if top_p is not None else None,
 79 |         eos_token_id=tokenizer.eos_token_id,
 80 |         stopping_criteria=stopping_criteria if stop else None,
 81 |     )
 82 | 
 83 |     choices = []
 84 |     for seq in output:
 85 |         completion = tokenizer.decode(
 86 |             seq[len(input_ids[0]) :], skip_special_tokens=True
 87 |         )
 88 |         stop_token_present = None
 89 |         for stop_token in stop:
 90 |             if stop_token in completion:
 91 |                 stop_token_present = stop_token
 92 |                 break
 93 |         finish_reason = "stop_token" if stop and stop_token_present else "length"
 94 |         if stop_token_present:
 95 |             completion = completion.split(stop_token_present)[0]
 96 | 
 97 |         choices.append(
 98 |             {
 99 |                 "text": completion,
100 |                 "index": len(choices),
101 |                 "logprobs": None,
102 |                 "finish_reason": finish_reason,
103 |             }
104 |         )
105 | 
106 |     response = {
107 |         "id": f"cmpl-{time.time()}",
108 |         "object": "text_completion",
109 |         "created": int(time.time()),
110 |         "model": model_name,
111 |         "choices": choices,
112 |         "usage": {
113 |             "prompt_tokens": len(input_ids[0]),
114 |             "completion_tokens": sum(
115 |                 [len(tokenizer.encode(choice["text"])) for choice in choices]
116 |             ),
117 |             "total_tokens": len(input_ids[0])
118 |             + sum([len(tokenizer.encode(choice["text"])) for choice in choices]),
119 |         },
120 |     }
121 | 
122 |     return response
123 | 
124 | 
125 | @app.route("/completion", methods=["POST"])
126 | def completion():
127 |     data = request.get_json()
128 |     prompt = data.get("prompt")
129 |     temperature = float(data.get("temperature", 0.7))
130 |     max_new_tokens = int(data.get("max_tokens", 150))
131 |     n = int(data.get("n", 1))
132 |     stop = data.get("stop")
133 |     top_p = data.get("top_p")
134 | 
135 |     if top_p is not None:
136 |         top_p = float(top_p)
137 | 
138 |     response = generate_text(prompt, temperature, max_new_tokens, n, stop, top_p)
139 |     output_str = "\n".join([choice["text"] for choice in response["choices"]])
140 |     log_entry = f"Input: {prompt}, Output: {output_str.strip()}, Params: temperature={temperature}, completion_tokens={response['usage']['completion_tokens']}, n={n}, stop={stop}, top_p={top_p}"
141 |     logging.info(log_entry)
142 |     return jsonify(response)
143 | 
144 | 
145 | @app.route("/chat", methods=["POST"])
146 | def chat():
147 |     data = request.get_json()
148 |     user_input = data.get("input")
149 |     temperature = float(data.get("temperature", 0.7))
150 |     max_new_tokens = int(data.get("max_new_tokens", 512))
151 | 
152 |     def generate_response():
153 |         params = {
154 |             "prompt": user_input,
155 |             "temperature": temperature,
156 |             "max_new_tokens": max_new_tokens,
157 |             "stop": tokenizer.eos_token,  # Set the stop parameter to the tokenizer's EOS token
158 |         }
159 |         for response in generate_stream(model, tokenizer, params, device):
160 |             yield response + "\n"
161 | 
162 |     return Response(stream_with_context(generate_response()), content_type="text/plain")
163 | 
164 | 
165 | if __name__ == "__main__":
166 |     app.run(debug=True, host="0.0.0.0", port=5000)
167 | 


--------------------------------------------------------------------------------
/scripts/run_eval.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import json
  3 | import argparse
  4 | import tqdm
  5 | import os
  6 | 
  7 | import sys
  8 | 
  9 | from pal import interface, runtime
 10 | # from pal.prompt import math_prompts
 11 | 
 12 | 
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('--append', action='store_true')
 16 | parser.add_argument('--verbose', action='store_true')
 17 | parser.add_argument('--dataset', default='gsm', type=str)
 18 | parser.add_argument('--model', default='code-davinci-002', type=str)
 19 | parser.add_argument('--temperature', default=0.0, type=float)
 20 | parser.add_argument('--top_p', default=1.0, type=float)
 21 | parser.add_argument('--max_tokens', default=256, type=int)
 22 | parser.add_argument('--prompt_file', default="math_prompts", type=str)
 23 | parser.add_argument('--end', default="\n\n\n", type=str)
 24 | parser.add_argument('--prompt_type', default='code', type=str)
 25 | parser.add_argument('--vicuna_url', default=None, type=str)
 26 | parser.add_argument('--start_data', default=None, type=int)
 27 | parser.add_argument('--end_data', default=None, type=int)
 28 | parser.add_argument('--conf_thresh', default=0.99, type = float)
 29 | parser.add_argument('--max_gens', default=40, type = int)
 30 | parser.add_argument('--seed', default=1, type = int)
 31 | parser.add_argument('--answer_type', default='float', type = str, help='Type of answer to expect. One of float or str')
 32 | parser.add_argument('--stop_criteria', default=None, type = str, help='AdaptiveConsistency stop criteria to use. Defaults to Self-Consistency')
 33 | parser.add_argument('--stop_criteria_thresh', default=0.95, type = float, help='AdaptiveConsistency stop criteria threshold to use. See AdaptiveConsistency for details')
 34 | 
 35 | 
 36 | args = parser.parse_args()
 37 | 
 38 | import importlib
 39 | math_prompts = importlib.import_module(f'pal.prompt.{args.prompt_file}')
 40 | 
 41 | 
 42 | DATA_PATH = f'datasets/{args.dataset}.jsonl'
 43 | if not os.path.exists(DATA_PATH):
 44 |     DATA_PATH = f'datasets/{args.dataset}.json'
 45 | if DATA_PATH.endswith('.jsonl'):
 46 |     examples = list(map(json.loads, open(DATA_PATH)))
 47 | elif DATA_PATH.endswith('.json'):
 48 |     examples = json.load(open(DATA_PATH))['examples']
 49 | 
 50 | dataset_name = args.dataset
 51 | if args.start_data is not None and args.end_data is None:
 52 |     examples = examples[args.start_data:]
 53 |     dataset_name += f'_{args.start_data}_end'
 54 | elif args.start_data is None and args.end_data is not None:
 55 |     examples = examples[:args.end_data]
 56 |     dataset_name += f'_0_{args.end_data}'
 57 | elif args.start_data is not None and args.end_data is not None:
 58 |     examples = examples[args.start_data:args.end_data]
 59 |     dataset_name += f'_{args.start_data}_{args.end_data}'
 60 | 
 61 | OUTPUT_PATH = f'outputs/{args.model}/{dataset_name}/{dataset_name}_{args.max_gens}_{args.temperature}_stop{"self" if args.stop_criteria is None else args.stop_criteria}_seed{args.seed}.jsonl'
 62 | os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
 63 | 
 64 | 
 65 | 
 66 | if args.prompt_type != 'code' and args.prompt_type != 'text':
 67 |     print(f'Unknown prompt type: {args.prompt_type}')
 68 |     print('Defaulting to code prompt')
 69 |     args.prompt_type = 'code'
 70 | 
 71 | answer_type = args.answer_type
 72 | # answer_type = 'str' if args.dataset.find('date')!=-1 else 'float'
 73 | if args.prompt_type == 'code':
 74 | 
 75 |     # PAL style prompting
 76 |     if args.dataset.find('date')!=-1:
 77 |         itf = interface.AdaptiveProgramInterface(
 78 |             step_size = 1,
 79 |             max_gens=args.max_gens,
 80 |             runtime = runtime.DateRuntime(),
 81 |             stop=args.end,
 82 |             model=args.model,
 83 |             verbose=args.verbose,
 84 |             openai_url=args.vicuna_url,
 85 |             answer_type=answer_type,
 86 |             stop_criteria = args.stop_criteria,
 87 |             stop_criteria_thresh = args.stop_criteria_thresh,
 88 |         )
 89 |     else:
 90 |         itf = interface.AdaptiveProgramInterface(
 91 |             step_size = 1,
 92 |             max_gens=args.max_gens,
 93 |             stop=args.end,
 94 |             get_answer_expr='solution()',
 95 |             model=args.model,
 96 |             verbose=args.verbose,
 97 |             openai_url=args.vicuna_url,
 98 |             answer_type=answer_type,
 99 |             stop_criteria = args.stop_criteria,
100 |             stop_criteria_thresh = args.stop_criteria_thresh,
101 |         )
102 | 
103 | 
104 | elif args.prompt_type == 'text':
105 |     # CoT style prompting
106 |     itf = interface.AdaptiveTextInterface(
107 |         step_size = 1,
108 |         max_gens=args.max_gens,
109 |         stop=args.end,
110 |         model=args.model,
111 |         openai_url=args.vicuna_url,
112 |         stop_criteria = args.stop_criteria,
113 |         stop_criteria_thresh = args.stop_criteria_thresh,
114 |     )
115 |         
116 | 
117 | 
118 | if args.append:
119 |     lines = open(OUTPUT_PATH).readlines()
120 |     num_skip_exps = len(lines)
121 |     scores = [x['score'] for x in map(json.loads, lines)]
122 | else:
123 |     num_skip_exps = 0
124 |     scores = []
125 | 
126 | with open(OUTPUT_PATH, 'a' if args.append else 'w') as f:
127 |     pbar = tqdm.tqdm(examples[num_skip_exps:], initial=num_skip_exps, total=len(examples))
128 |     for x in pbar:
129 |         question = x['input']
130 |         result = copy.copy(x)
131 |         
132 |         try:
133 |             ans, answers = itf.run(math_prompts.MATH_PROMPT.format(question=question),
134 |                 temperature=args.temperature, top_p=args.top_p,
135 |                 max_tokens=args.max_tokens)
136 |             if answer_type == 'float':
137 |                 ans = float(ans)
138 |                 score = 1 if abs(ans - x['target']) < 1e-3 else 0
139 |             else:
140 |                 score = 1 if ans == x['target'] else 0
141 |         except Exception as e:
142 |             print('Error',e)
143 |             ans = ''
144 |             # Failed to load any answers
145 |             answers = []
146 |             score = 0
147 |         scores.append(score)
148 |         
149 |         result['answer'] = ans
150 |         result['score'] = score
151 |         result['generation'] = itf.history
152 |         result['answers'] = answers
153 |         f.write(json.dumps(result) + '\n')
154 |         
155 |         itf.clear_history()
156 |         f.flush()
157 | 
158 | print(f'Accuracy - {sum(scores) / len(scores)}')
159 | 


--------------------------------------------------------------------------------
/docs/static/js/bulma-slider.min.js:
--------------------------------------------------------------------------------
1 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.bulmaSlider=e():t.bulmaSlider=e()}("undefined"!=typeof self?self:this,function(){return function(n){var r={};function i(t){if(r[t])return r[t].exports;var e=r[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}return i.m=n,i.c=r,i.d=function(t,e,n){i.o(t,e)||Object.defineProperty(t,e,{configurable:!1,enumerable:!0,get:n})},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=0)}([function(t,e,n){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),n.d(e,"isString",function(){return l});var r=n(1),i=Object.assign||function(t){for(var e=1;e<arguments.length;e++){var n=arguments[e];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(t[r]=n[r])}return t},u=function(){function r(t,e){for(var n=0;n<e.length;n++){var r=e[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(t,r.key,r)}}return function(t,e,n){return e&&r(t.prototype,e),n&&r(t,n),t}}(),o="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t};var l=function(t){return"string"==typeof t||!!t&&"object"===(void 0===t?"undefined":o(t))&&"[object String]"===Object.prototype.toString.call(t)},a=function(t){function o(t){var e=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{};!function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,o);var n=function(t,e){if(!t)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return!e||"object"!=typeof e&&"function"!=typeof e?t:e}(this,(o.__proto__||Object.getPrototypeOf(o)).call(this));if(n.element="string"==typeof t?document.querySelector(t):t,!n.element)throw new Error("An invalid selector or non-DOM node has been provided.");return n._clickEvents=["click"],n.options=i({},e),n.onSliderInput=n.onSliderInput.bind(n),n.init(),n}return function(t,e){if("function"!=typeof e&&null!==e)throw new TypeError("Super expression must either be null or a function, not "+typeof e);t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,enumerable:!1,writable:!0,configurable:!0}}),e&&(Object.setPrototypeOf?Object.setPrototypeOf(t,e):t.__proto__=e)}(o,r["a"]),u(o,[{key:"init",value:function(){if(this._id="bulmaSlider"+(new Date).getTime()+Math.floor(Math.random()*Math.floor(9999)),this.output=this._findOutputForSlider(),this._bindEvents(),this.output&&this.element.classList.contains("has-output-tooltip")){var t=this._getSliderOutputPosition();this.output.style.left=t.position}this.emit("bulmaslider:ready",this.element.value)}},{key:"_findOutputForSlider",value:function(){var e=this,n=null,t=document.getElementsByTagName("output")||[];return Array.from(t).forEach(function(t){if(t.htmlFor==e.element.getAttribute("id"))return n=t,!0}),n}},{key:"_getSliderOutputPosition",value:function(){var t,e=window.getComputedStyle(this.element,null),n=parseInt(e.getPropertyValue("width"),10);t=this.element.getAttribute("min")?this.element.getAttribute("min"):0;var r=(this.element.value-t)/(this.element.getAttribute("max")-t);return{position:(r<0?0:1<r?n:n*r)+"px"}}},{key:"_bindEvents",value:function(){this.output&&this.element.addEventListener("input",this.onSliderInput,!1)}},{key:"onSliderInput",value:function(t){if(t.preventDefault(),this.element.classList.contains("has-output-tooltip")){var e=this._getSliderOutputPosition();this.output.style.left=e.position}var n=this.output.hasAttribute("data-prefix")?this.output.getAttribute("data-prefix"):"",r=this.output.hasAttribute("data-postfix")?this.output.getAttribute("data-postfix"):"";this.output.value=n+this.element.value+r,this.emit("bulmaslider:ready",this.element.value)}}],[{key:"attach",value:function(){var n=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'input[type="range"].slider',r=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{},i=new Array;return(l(t)?document.querySelectorAll(t):Array.isArray(t)?t:[t]).forEach(function(t){if(void 0===t[n.constructor.name]){var e=new o(t,r);t[n.constructor.name]=e,i.push(e)}else i.push(t[n.constructor.name])}),i}}]),o}();e.default=a},function(t,e,n){"use strict";var r=function(){function r(t,e){for(var n=0;n<e.length;n++){var r=e[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(t,r.key,r)}}return function(t,e,n){return e&&r(t.prototype,e),n&&r(t,n),t}}();var i=function(){function e(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:[];!function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,e),this._listeners=new Map(t),this._middlewares=new Map}return r(e,[{key:"listenerCount",value:function(t){return this._listeners.has(t)?this._listeners.get(t).length:0}},{key:"removeListeners",value:function(){var e=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null,n=1<arguments.length&&void 0!==arguments[1]&&arguments[1];null!==t?Array.isArray(t)?name.forEach(function(t){return e.removeListeners(t,n)}):(this._listeners.delete(t),n&&this.removeMiddleware(t)):this._listeners=new Map}},{key:"middleware",value:function(t,e){var n=this;Array.isArray(t)?name.forEach(function(t){return n.middleware(t,e)}):(Array.isArray(this._middlewares.get(t))||this._middlewares.set(t,[]),this._middlewares.get(t).push(e))}},{key:"removeMiddleware",value:function(){var e=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null;null!==t?Array.isArray(t)?name.forEach(function(t){return e.removeMiddleware(t)}):this._middlewares.delete(t):this._middlewares=new Map}},{key:"on",value:function(t,e){var n=this,r=2<arguments.length&&void 0!==arguments[2]&&arguments[2];if(Array.isArray(t))t.forEach(function(t){return n.on(t,e)});else{var i=(t=t.toString()).split(/,|, | /);1<i.length?i.forEach(function(t){return n.on(t,e)}):(Array.isArray(this._listeners.get(t))||this._listeners.set(t,[]),this._listeners.get(t).push({once:r,callback:e}))}}},{key:"once",value:function(t,e){this.on(t,e,!0)}},{key:"emit",value:function(n,r){var i=this,o=2<arguments.length&&void 0!==arguments[2]&&arguments[2];n=n.toString();var u=this._listeners.get(n),l=null,a=0,s=o;if(Array.isArray(u))for(u.forEach(function(t,e){o||(l=i._middlewares.get(n),Array.isArray(l)?(l.forEach(function(t){t(r,function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null;null!==t&&(r=t),a++},n)}),a>=l.length&&(s=!0)):s=!0),s&&(t.once&&(u[e]=null),t.callback(r))});-1!==u.indexOf(null);)u.splice(u.indexOf(null),1)}}]),e}();e.a=i}]).default});


--------------------------------------------------------------------------------
/scripts/pal/core/vicuna.py:
--------------------------------------------------------------------------------
  1 | # Backend for a self-hosted API
  2 | import os
  3 | from pprint import pprint
  4 | from typing import Any, Dict
  5 | 
  6 | import requests
  7 | 
  8 | # from prompt_lib.backends.wrapper import BaseAPIWrapper
  9 | 
 10 | 
 11 | class OpenSourceAPIBackend:
 12 |     def __init__(self, base_url: str = None):
 13 |         self.base_url = base_url
 14 |         if os.environ.get("SELF_HOSTED_URL"):
 15 |             self.base_url = os.environ.get("SELF_HOSTED_URL")
 16 | 
 17 |     @property
 18 |     def base_url(self):
 19 |         return self._base_url
 20 | 
 21 |     @base_url.setter
 22 |     def base_url(self, url):
 23 |         print(f"Setting base_url to {url}")
 24 |         self._base_url = url
 25 | 
 26 |     def completions(self, prompt, temperature=0.7, max_tokens=150, n=1, stop=None, top_p=None, engine=None, logprobs=None):
 27 |         url = f"{self.base_url}/completion"
 28 |         data = {
 29 |             "prompt": prompt,
 30 |             "temperature": temperature,
 31 |             "max_tokens": max_tokens,
 32 |             "n": n,
 33 |             "stop": stop,
 34 |             "top_p": top_p,
 35 |         }
 36 |         response = requests.post(url, json=data)
 37 |         return response.json()
 38 | 
 39 | 
 40 | api = OpenSourceAPIBackend()
 41 | 
 42 | 
 43 | class OpenSourceAPIWrapper:
 44 | 
 45 |     @staticmethod
 46 |     def _call_api(
 47 |         prompt: str,
 48 |         max_tokens: int,
 49 |         engine: str,
 50 |         stop_token: str,
 51 |         temperature: float,
 52 |         num_completions: int = 1,
 53 |     ) -> dict:
 54 |         response = api.completions(
 55 |             engine=engine,
 56 |             prompt=prompt,
 57 |             temperature=temperature,
 58 |             max_tokens=max_tokens,
 59 |             top_p=1,
 60 |             stop=[stop_token],
 61 |             n=num_completions,
 62 |             logprobs=5,
 63 |         )
 64 |         return response
 65 | 
 66 |     @staticmethod
 67 |     def call(
 68 |         prompt: str,
 69 |         max_tokens: int,
 70 |         engine: str,
 71 |         stop_token: str,
 72 |         temperature: float,
 73 |         num_completions: int = 1,
 74 |     ) -> dict:
 75 |         max_completions_in_one_call = 8
 76 |         if num_completions > max_completions_in_one_call:
 77 |             response_combined = dict()
 78 |             num_completions_remaining = num_completions
 79 |             for i in range(0, num_completions, max_completions_in_one_call):
 80 |                 response = OpenSourceAPIWrapper._call_api(
 81 |                     prompt=prompt,
 82 |                     max_tokens=max_tokens,
 83 |                     engine=engine,
 84 |                     stop_token=stop_token,
 85 |                     temperature=temperature,
 86 |                     num_completions=min(num_completions_remaining, max_completions_in_one_call),
 87 |                 )
 88 |                 num_completions_remaining -= max_completions_in_one_call
 89 |                 print(f"Remaining completions: {num_completions_remaining}")
 90 |                 if i == 0:
 91 |                     response_combined = response
 92 |                 else:
 93 |                     response_combined["choices"] += response["choices"]
 94 | 
 95 |             return response_combined
 96 |         response = OpenSourceAPIWrapper._call_api(
 97 |             prompt=prompt,
 98 |             max_tokens=max_tokens,
 99 |             engine=engine,
100 |             stop_token=stop_token,
101 |             temperature=temperature,
102 |             num_completions=num_completions,
103 |         )
104 | 
105 |         return response
106 | 
107 |     @staticmethod
108 |     def get_first_response(response) -> Dict[str, Any]:
109 |         api_wrapper = OpenSourceAPIWrapper.get_api_wrapper(response["model"])
110 |         return api_wrapper.get_first_response(response)
111 | 
112 |     @staticmethod
113 |     def get_majority_answer(response) -> Dict[str, Any]:
114 |         api_wrapper = OpenSourceAPIWrapper.get_api_wrapper(response["model"])
115 |         return api_wrapper.get_majority_answer(response)
116 |     
117 |     
118 |     @staticmethod
119 |     def get_all_responses(response) -> Dict[str, Any]:
120 |         api_wrapper = OpenSourceAPIWrapper.get_api_wrapper(response["model"])
121 |         return api_wrapper.get_all_responses(response)
122 |     
123 | def call_vicuna(prompt, model='self-vulcan-13b', stop=None, temperature=0., top_p=1.0,
124 |         max_tokens=128, majority_at=None, logprobs = 0, url=None):
125 |     print('Lets go!', temperature)
126 |     if url is None:
127 |         api.base_url = "http://128.2.205.154:8081"
128 |     else:
129 |         api.base_url = url
130 |     wrapper = OpenSourceAPIWrapper()
131 |     print('Calling Wrapper')
132 |     response = wrapper.call(
133 |         prompt=prompt,
134 |         max_tokens=max_tokens,
135 |         engine='self-vulcan-13b',
136 |         stop_token=stop,
137 |         temperature=temperature,
138 |         num_completions=majority_at,
139 |     )
140 |     print('Wrapper Call Done')
141 |     completions = [choice['text'] for choice in response['choices']]
142 |     return completions
143 |     
144 | 
145 | 
146 | 
147 | def test():
148 |     
149 |     wrapper = OpenSourceAPIWrapper()
150 |     api.base_url = "http://128.2.205.154:8081"
151 | 
152 |     response = wrapper.call(
153 |         prompt="The quick brown fox",
154 |         max_tokens=10,
155 |         engine="self-vulcan-13b",
156 |         stop_token="\n",
157 |         temperature=0.7,
158 |     )
159 |     
160 |     pprint(response)
161 |     3/0
162 | 
163 |     test_api = OpenSourceAPIBackend()
164 |     test_api.base_url = "128.2.205.154:5000"
165 |     # assert test_api.base_url == "http://pitt.lti.cs.cmu.edu:5000", f"api.base_url: {test_api.base_url}"
166 |     pprint(
167 |         test_api.completions(
168 |             "The quick brown fox",
169 |             max_tokens=10,
170 |             n=1,
171 |             stop="\n",
172 |             temperature=0.7,
173 |             engine="self-vulcan-13b",
174 |         )
175 |     )
176 | 
177 |     test_api.base_url = None
178 | 
179 |     # make sure it fails
180 |     import unittest
181 | 
182 |     api.base_url = None
183 |     unittest.TestCase().assertRaises(
184 |         Exception,
185 |         wrapper.call,
186 |         prompt="The quick brown fox",
187 |         max_tokens=10,
188 |         engine="self-vulcan-13b",
189 |         stop_token="\n",
190 |         temperature=0.7,
191 |     )
192 | 
193 |     # environment variable
194 |     os.environ["SELF_HOSTED_URL"] = "128.2.205.154:5000"
195 | 
196 |     test_api = OpenSourceAPIBackend()
197 |     pprint(
198 |         test_api.completions(
199 |             "The quick brown fox",
200 |             max_tokens=10,
201 |             n=1,
202 |             stop="\n",
203 |             temperature=0.7,
204 |             engine="self-vulcan-13b",
205 |         )
206 |     )
207 | 
208 | 
209 | if __name__ == "__main__":
210 |     test()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning with LLMs (EMNLP 2023)
  2 | 
  3 | <p align="center">
  4 |   <a href="http://sample-step-by-step.info/">Website</a> •
  5 |   <a href="https://arxiv.org/abs/2305.11860">Paper</a> 
  6 | 
  7 | </p>
  8 | 
  9 | 
 10 | <p align="center">
 11 |     <a href="https://github.com/Pranjal2041/AdaptiveConsistency/blob/master/LICENSE">
 12 |             <img src="https://img.shields.io/github/license/Pranjal2041/AdaptiveConsistency.svg"
 13 |                 alt="GitHub license">
 14 |     </a>
 15 |     <a href="https://twitter.com/intent/tweet?text=Check%20out%20AdaptiveConsistency%3A%20https%3A%2F%2Fgithub.com%2Fpranjal2041%2FAdaptiveConsistency">
 16 |     <img src="https://img.shields.io/twitter/url/https/github.com/Pranjal2041/AdaptiveConsistency.svg?style=social" alt="Twitter">
 17 |     </a>
 18 |     <a href="https://pypi.org/project/AdaptiveConsistency/">
 19 |     <img src="https://img.shields.io/pypi/v/AdaptiveConsistency" alt="Twitter">
 20 |     </a>      
 21 | </p>
 22 | 
 23 | 
 24 | 
 25 | <!-- <br> -->
 26 | [Pranjal Aggarwal](https://github.com/Pranjal2041), [Aman Madaan](https://madaan.github.io/), [Yiming Yang](https://www.cs.cmu.edu/~./yiming/),  [Mausam](https://www.cse.iitd.ac.in/~mausam/)
 27 | <!-- <br> -->
 28 | 
 29 | ## Abstract
 30 | >A popular approach for improving the correctness of output from large language models (LLMs) is Self-Consistency - poll the LLM multiple times and output the most frequent solution. Existing Self-Consistency techniques always draw a constant number of samples per question, where a better approach will be to non-uniformly distribute the available budget based on the amount of agreement in the samples drawn so far. In response, we introduce Adaptive-Consistency, a cost-efficient, model-agnostic technique that dynamically adjusts the number of samples per question using a lightweight stopping criterion. Our experiments over 13 datasets and two LLMs demonstrate that Adaptive-Consistency reduces sample budget by up to 6.0 times with an average accuracy drop of less than 0.1%.
 31 | >
 32 | 
 33 | ![AdaptiveConsistency](docs/static/images/ac_teaser_new.png)
 34 | 
 35 | 
 36 | 
 37 | 
 38 | # Adaptive Consistency: 
 39 | This repository contains code for:
 40 | 1. Adaptive-Consistency Library for Running efficient LLM generation using [Adaptive-Consistency](http://sample-step-by-step.info) in your code.
 41 | 2. Code to reproduce results of [Adaptive-Consistency](https://arxiv.org/abs/2305.11860).
 42 | 
 43 | ## Installation
 44 | 
 45 | ### From PyPi
 46 | 
 47 | ```bash
 48 | pip install AdaptiveConsistency
 49 | ```
 50 | 
 51 | ### From Source
 52 | 
 53 | First, clone the repo:
 54 | ```bash
 55 | git clone https://github.com/Pranjal2041/AdaptiveConsistency.git
 56 | ```
 57 | 
 58 | Next install the package using: 
 59 | ```bash 
 60 | python setup.py install
 61 | ```
 62 | 
 63 | ## Usage
 64 | 
 65 | Using Adaptive Consistency in your code requires only 2-3 lines of changes in your existing framework.
 66 | 
 67 | ### 1. Importing the library
 68 | 
 69 | ```python
 70 | from adaptive_consistency import AC, BetaStoppingCriteria
 71 | ```
 72 | 
 73 | ### 2. Initializing the library
 74 | 
 75 | ```python
 76 | ac = AC(model, stopping_criteria=BetaStoppingCriteria(0.95), max_gens = 40)
 77 | ```
 78 | 
 79 | Default stopping_criteria is BetaStoppingCriteria with confidence threshold = 0.95 (as used in the paper). 
 80 | 
 81 | ### 3. Using the library
 82 | 
 83 | You can directly run a whole loop of evaluation using:
 84 | 
 85 | ```python
 86 | ac.eval_loop(sampling_function, *args, **kwargs)
 87 | ```
 88 | 
 89 | For example, if using Openai api for sampling, you can use:
 90 | 
 91 | ```python
 92 | import openai
 93 | 
 94 | answers = ac.eval_loop(openai.Completion.create, engine="text-davinci-003", prompt="Solve the following question:", max_tokens=5)
 95 | ```
 96 | 
 97 | Or you can check for consistency of answers (and decide to break) at each step:
 98 | 
 99 | ```python
100 | answers = []
101 | for i in range(40):
102 |     answers.append(generate_answer_from_model()) # Example openai.Completion.create
103 |     if ac.should_stop(answers):
104 |         break
105 | ```
106 | 
107 | Note: In the `generate_answer_from_model` function, you may want to extract the final answer after sampling from LLM.
108 | 
109 | 
110 | ### 4. Stoppping Criterias
111 | 
112 | You can use one of the following Stopping Criterias:
113 | 
114 | 1. `BetaStoppingCriteria (beta)`: Uses the Beta Distribution to guide the stopping criteria. This is the default stopping criteria.
115 | 2. `DirichletStoppingCriteria (dirichlet)`: Uses the Dirichlet Distribution to guide the stopping criteria.
116 | 3. `EntropyStoppingCriteria (entropy)`: Uses the Entropy of the distribution to guide the stopping criteria.
117 | 4. `MajorityStoppingCriteria (majority)`: Uses the Majority ratio of the top element in the distribution to guide the stopping criteria.
118 | 5. `RandomStoppingCriteria (random)`: Randomly stops the sampling process with a pre-defined probability.
119 | 6. `CRPStoppingCriteria (crp)`: Uses the Chinese Restaurant Process to guide the stopping criteria.
120 | 
121 | Check out the [paper](https://arxiv.org/abs/2305.11860) for more details.
122 | 
123 | 
124 | ## Reproducing Numbers
125 | 
126 | 
127 | ### 1. Downloading the data
128 | 
129 | Run, 
130 | 
131 | ```bash
132 | bash download_data.sh
133 | ```
134 | 
135 | ### 2. Downloading Model Outputs
136 | 
137 | We provide the model outputs for all the models used in the paper. You can download them using:
138 | 
139 | ```bash
140 | bash download_outputs.sh
141 | ```
142 | 
143 | These model outputs will work for all experiments in the paper.
144 | 
145 | ### 3. Running Generations
146 | 
147 | If you decide to skip the previous step, you can run your generations on your own. You can use the following command:
148 | 
149 | ```bash
150 | bash scripts/run_self_consistency.sh
151 | bash scripts/run_adaptive_consistency.sh
152 | ```
153 | 
154 | By default, `beta` function will be used for stopping criteria. You can change it by passing the `stopping_criteria` and corresponding Confidence Threshold as arguments. For example, to use `entropy` stopping criteria, with a Confidence Threshold of 0.75, you can use:
155 | 
156 | ```bash
157 | bash scripts/run_adaptive_consistency.sh entropy 0.75
158 | ``` 
159 | 
160 | This step will print the final accuracy on the terminal.
161 | 
162 | ### 4. Running Eval on Model Outputs
163 | 
164 | You can skip Step 3, and directly run eval on the model outputs. You can use the following command:
165 | 
166 | ```bash
167 | python eval_outputs.py --output_file <path_to_output_file> --stop_criteria <stop_criteria> --stop_criteria_thresh <stop_criteria_thresh>
168 | ```
169 | 
170 | This will print the average generations and accuracy on the terminal.
171 | 
172 | 
173 | 
174 | 
175 | 
176 | ## Citation
177 | 
178 | ```bibtex
179 | @misc{aggarwal2023lets,
180 |       title={Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning with LLMs}, 
181 |       author={Pranjal Aggarwal and Aman Madaan and Yiming Yang and Mausam},
182 |       year={2023},
183 |       eprint={2305.11860},
184 |       archivePrefix={arXiv},
185 |       primaryClass={cs.CL}
186 | }
187 | ```
188 | 
189 | ## LICENSE
190 | 
191 | Adaptive-Consistency is Apache licensed, as found in the [LICENSE](LICENSE) file.
192 | 


--------------------------------------------------------------------------------
/docs/static/css/prism.css:
--------------------------------------------------------------------------------
  1 | /* PrismJS 1.29.0
  2 | https://prismjs.com/download.html#themes=prism-okaidia&languages=markup+css+clike+javascript+css-extras+diff+python&plugins=line-highlight+line-numbers+custom-class+show-language+autoloader+toolbar+copy-to-clipboard+diff-highlight */
  3 | /**
  4 |  * okaidia theme for JavaScript, CSS and HTML
  5 |  * Loosely based on Monokai textmate theme by http://www.monokai.nl/
  6 |  * @author ocodia
  7 |  */
  8 | 
  9 | code[class*="language-"],
 10 | pre[class*="language-"] {
 11 | 	color: #f8f8f2;
 12 | 	background: none;
 13 | 	text-shadow: 0 1px rgba(0, 0, 0, 0.3);
 14 | 	font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace;
 15 | 	font-size: 1em;
 16 | 	text-align: left;
 17 | 	white-space: pre;
 18 | 	word-spacing: normal;
 19 | 	word-break: normal;
 20 | 	word-wrap: normal;
 21 | 	line-height: 1.5;
 22 | 
 23 | 	-moz-tab-size: 4;
 24 | 	-o-tab-size: 4;
 25 | 	tab-size: 4;
 26 | 
 27 | 	-webkit-hyphens: none;
 28 | 	-moz-hyphens: none;
 29 | 	-ms-hyphens: none;
 30 | 	hyphens: none;
 31 | }
 32 | 
 33 | /* Code blocks */
 34 | pre[class*="language-"] {
 35 | 	padding: 1em;
 36 | 	margin: .5em 0;
 37 | 	overflow: auto;
 38 | 	border-radius: 0.3em;
 39 | }
 40 | 
 41 | :not(pre) > code[class*="language-"],
 42 | pre[class*="language-"] {
 43 | 	background: #272822;
 44 | }
 45 | 
 46 | /* Inline code */
 47 | :not(pre) > code[class*="language-"] {
 48 | 	padding: .1em;
 49 | 	border-radius: .3em;
 50 | 	white-space: normal;
 51 | }
 52 | 
 53 | .token.comment,
 54 | .token.prolog,
 55 | .token.doctype,
 56 | .token.cdata {
 57 | 	color: #8292a2;
 58 | }
 59 | 
 60 | .token.punctuation {
 61 | 	color: #f8f8f2;
 62 | }
 63 | 
 64 | .token.namespace {
 65 | 	opacity: .7;
 66 | }
 67 | 
 68 | .token.property,
 69 | .token.tag,
 70 | .token.constant,
 71 | .token.symbol,
 72 | .token.deleted {
 73 | 	color: #f92672;
 74 | }
 75 | 
 76 | .token.boolean,
 77 | .token.number {
 78 | 	color: #ae81ff;
 79 | }
 80 | 
 81 | .token.selector,
 82 | .token.attr-name,
 83 | .token.string,
 84 | .token.char,
 85 | .token.builtin,
 86 | .token.inserted {
 87 | 	color: #a6e22e;
 88 | }
 89 | 
 90 | .token.operator,
 91 | .token.entity,
 92 | .token.url,
 93 | .language-css .token.string,
 94 | .style .token.string,
 95 | .token.variable {
 96 | 	color: #f8f8f2;
 97 | }
 98 | 
 99 | .token.atrule,
100 | .token.attr-value,
101 | .token.function,
102 | .token.class-name {
103 | 	color: #e6db74;
104 | }
105 | 
106 | .token.keyword {
107 | 	color: #66d9ef;
108 | }
109 | 
110 | .token.regex,
111 | .token.important {
112 | 	color: #fd971f;
113 | }
114 | 
115 | .token.important,
116 | .token.bold {
117 | 	font-weight: bold;
118 | }
119 | .token.italic {
120 | 	font-style: italic;
121 | }
122 | 
123 | .token.entity {
124 | 	cursor: help;
125 | }
126 | 
127 | pre[data-line] {
128 | 	position: relative;
129 | 	padding: 1em 0 1em 3em;
130 | }
131 | 
132 | 
133 | .line-highlight[data-start] {
134 | 	position: absolute;
135 | 	left: 0;
136 | 	right: 0;
137 | 	padding: inherit 0;
138 | 	margin-top: 1em; /* Same as .prism’s padding-top */
139 | 
140 | 	/* background: hsla(24, 20%, 50%,.08); */
141 | 	/* background: linear-gradient(to left, hsla(24, 20%, 50%,.1) 70%, hsla(24, 20%, 50%,0)); */
142 |     /* #a6e22e */
143 | 	background: linear-gradient(to right, rgba(166, 226, 46,.1) 70%, rgba(166, 226, 46,.0));
144 | 
145 | 	pointer-events: none;
146 | 
147 | 	line-height: inherit;
148 | 	white-space: pre;
149 | }
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | @media print {
157 | 	.line-highlight {
158 | 		/*
159 | 		 * This will prevent browsers from replacing the background color with white.
160 | 		 * It's necessary because the element is layered on top of the displayed code.
161 | 		 */
162 | 		-webkit-print-color-adjust: exact;
163 | 		color-adjust: exact;
164 | 	}
165 | }
166 | 
167 | 	.line-highlight:before,
168 | 	.line-highlight[data-end]:after {
169 | 		content: "+";
170 |         /* attr(data-start); */
171 | 		position: absolute;
172 | 		top: .4em;
173 | 		left: .6em;
174 | 		min-width: 1em;
175 | 		padding: 0 .5em;
176 | 		background-color: hsla(109, 73%, 55%, 0.5);
177 | 		color: hsl(24, 20%, 95%);
178 | 		font: bold 68%/1.5 sans-serif;
179 | 		text-align: center;
180 | 		vertical-align: .3em;
181 | 		border-radius: 999px;
182 | 		text-shadow: none;
183 | 		box-shadow: 0 1px white;
184 | 	}
185 | 
186 | 	.line-highlight[data-end]:after {
187 | 		content: "-";
188 | 		position: absolute;
189 | 
190 | 		background-color: hsla(0, 88%, 54%, 1);
191 | 		color: hsl(24, 20%, 95%);
192 | 		font: bold 68%/1.5 sans-serif;
193 | 		/* font-size: medium; */
194 | 		text-align: center;
195 | 		vertical-align: .3em;
196 | 		border-radius: 9999px;
197 | 		text-shadow: none;
198 | 		box-shadow: 0 1px white;
199 | 		/* top: .4em; */
200 | 		top: auto;
201 | 		bottom: .4em;
202 | 		position: absolute;
203 | 		left: .6em;
204 | 		min-width: 1.6em;
205 | 		min-height: 1.6em;
206 | 
207 | 		padding: 0 .5em;
208 | 
209 | 		/* Shift it a right */
210 | 
211 | 		/* Increase the size of this circle */
212 | 		/* font-size: 1.5em; */
213 | 		/* Make it opaque */
214 | 		/* opacity: 1;
215 | 		/* Above doesn't work */
216 | 	}
217 | 
218 | .line-numbers .line-highlight:before,
219 | .line-numbers .line-highlight:after {
220 | 	content: none;
221 | }
222 | 
223 | pre[id].linkable-line-numbers span.line-numbers-rows {
224 | 	pointer-events: all;
225 | }
226 | pre[id].linkable-line-numbers span.line-numbers-rows > span:before {
227 | 	cursor: pointer;
228 | }
229 | pre[id].linkable-line-numbers span.line-numbers-rows > span:hover:before {
230 | 	background-color: rgba(128, 128, 128, .2);
231 | }
232 | 
233 | pre[class*="language-"].line-numbers {
234 | 	position: relative;
235 | 	padding-left: 3.8em;
236 | 	counter-reset: linenumber;
237 | }
238 | 
239 | pre[class*="language-"].line-numbers > code {
240 | 	position: relative;
241 | 	white-space: inherit;
242 | }
243 | 
244 | .line-numbers .line-numbers-rows {
245 | 	position: absolute;
246 | 	pointer-events: none;
247 | 	top: 0;
248 | 	font-size: 100%;
249 | 	left: -3.8em;
250 | 	width: 3em; /* works for line-numbers below 1000 lines */
251 | 	letter-spacing: -1px;
252 | 	border-right: 1px solid #999;
253 | 
254 | 	-webkit-user-select: none;
255 | 	-moz-user-select: none;
256 | 	-ms-user-select: none;
257 | 	user-select: none;
258 | 
259 | }
260 | 
261 | 	.line-numbers-rows > span {
262 | 		display: block;
263 | 		counter-increment: linenumber;
264 | 	}
265 | 
266 | 		.line-numbers-rows > span:before {
267 | 			content: counter(linenumber);
268 | 			color: #999;
269 | 			display: block;
270 | 			padding-right: 0.8em;
271 | 			text-align: right;
272 | 		}
273 | 
274 | div.code-toolbar {
275 | 	position: relative;
276 | }
277 | 
278 | div.code-toolbar > .toolbar {
279 | 	position: absolute;
280 | 	z-index: 10;
281 | 	top: .3em;
282 | 	right: .2em;
283 | 	transition: opacity 0.3s ease-in-out;
284 | 	opacity: 0;
285 | }
286 | 
287 | div.code-toolbar:hover > .toolbar {
288 | 	opacity: 1;
289 | }
290 | 
291 | /* Separate line b/c rules are thrown out if selector is invalid.
292 |    IE11 and old Edge versions don't support :focus-within. */
293 | div.code-toolbar:focus-within > .toolbar {
294 | 	opacity: 1;
295 | }
296 | 
297 | div.code-toolbar > .toolbar > .toolbar-item {
298 | 	display: inline-block;
299 | }
300 | 
301 | div.code-toolbar > .toolbar > .toolbar-item > a {
302 | 	cursor: pointer;
303 | }
304 | 
305 | div.code-toolbar > .toolbar > .toolbar-item > button {
306 | 	background: none;
307 | 	border: 0;
308 | 	color: inherit;
309 | 	font: inherit;
310 | 	line-height: normal;
311 | 	overflow: visible;
312 | 	padding: 0;
313 | 	-webkit-user-select: none; /* for button */
314 | 	-moz-user-select: none;
315 | 	-ms-user-select: none;
316 | }
317 | 
318 | div.code-toolbar > .toolbar > .toolbar-item > a,
319 | div.code-toolbar > .toolbar > .toolbar-item > button,
320 | div.code-toolbar > .toolbar > .toolbar-item > span {
321 | 	color: #bbb;
322 | 	font-size: .8em;
323 | 	padding: 0 .5em;
324 | 	background: #f5f2f0;
325 | 	background: rgba(224, 224, 224, 0.2);
326 | 	box-shadow: 0 2px 0 0 rgba(0,0,0,0.2);
327 | 	border-radius: .5em;
328 | }
329 | 
330 | div.code-toolbar > .toolbar > .toolbar-item > a:hover,
331 | div.code-toolbar > .toolbar > .toolbar-item > a:focus,
332 | div.code-toolbar > .toolbar > .toolbar-item > button:hover,
333 | div.code-toolbar > .toolbar > .toolbar-item > button:focus,
334 | div.code-toolbar > .toolbar > .toolbar-item > span:hover,
335 | div.code-toolbar > .toolbar > .toolbar-item > span:focus {
336 | 	color: inherit;
337 | 	text-decoration: none;
338 | }
339 | 
340 | pre.diff-highlight > code .token.deleted:not(.prefix),
341 | pre > code.diff-highlight .token.deleted:not(.prefix) {
342 | 	background-color: rgba(255, 0, 0, .1);
343 | 	color: inherit;
344 | 	display: block;
345 | }
346 | 
347 | pre.diff-highlight > code .token.inserted:not(.prefix),
348 | pre > code.diff-highlight .token.inserted:not(.prefix) {
349 | 	background-color: rgba(0, 255, 128, .1);
350 | 	color: inherit;
351 | 	display: block;
352 | }
353 | 
354 | 


--------------------------------------------------------------------------------
/adaptive_consistency/stopping_criterias.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing import List, Dict
  3 | from collections import Counter
  4 | from scipy import integrate, stats
  5 | 
  6 | 
  7 | class StoppingCriterias:
  8 | 
  9 |     def __init__(self, *args, **kwargs):
 10 | 
 11 |         ...
 12 | 
 13 |     def should_stop(self, *args, **kwargs) -> Dict:
 14 |         ...
 15 | 
 16 | 
 17 | class BetaStoppingCriteria(StoppingCriterias):
 18 | 
 19 |     def __init__(self, conf_thresh : float = 0.95) -> None:
 20 |         super().__init__()
 21 |         self.conf_thresh = conf_thresh
 22 | 
 23 |     def should_stop(self, answers : List, conf_thresh : int = None, verbose : bool = False) -> Dict:
 24 |         
 25 |         if conf_thresh is None: conf_thresh = self.conf_thresh
 26 | 
 27 |         
 28 |         most_common = Counter(answers).most_common(2)
 29 |         if len(most_common) == 1:
 30 |             a, b = most_common[0][1], 0
 31 |         else:
 32 |             a, b= most_common[0][1], most_common[1][1]
 33 |         a = float(a)
 34 |         b = float(b)
 35 | 
 36 |         return_dict = {
 37 |             'most_common' : most_common[0][0],
 38 |             'prob' : -1,
 39 |             'stop' : False,
 40 |         }
 41 |             
 42 | 
 43 |         try:
 44 |             prob =  integrate.quad(lambda x : x**(a) * (1-x)**(b), 0.5, 1)[0] / integrate.quad(lambda x : x**(a) * (1-x)**(b), 0, 1)[0]
 45 |         except Exception as e:
 46 |             # print error message
 47 |             print(f"Error during numerical integration: {e}")
 48 |             return_dict['stop'] = False
 49 |             return_dict['prob'] = -1
 50 |             return return_dict
 51 |         return_dict['prob'] = prob
 52 |         return_dict['stop'] = prob >= conf_thresh
 53 |         return return_dict
 54 | 
 55 | class RandomStoppingCriteria(StoppingCriterias):
 56 | 
 57 |     def __init__(self, conf_thresh : float = 0.1) -> None:
 58 |         super().__init__()
 59 |         self.conf_thresh = conf_thresh
 60 | 
 61 |     def should_stop(self, answers : List, conf_thresh : int = None, verbose : bool = False) -> Dict:
 62 |         
 63 |         if conf_thresh is None: conf_thresh = self.conf_thresh
 64 | 
 65 |         return_dict = {
 66 |             'most_common' : Counter(answers).most_common(1)[0][0],
 67 |             'prob' : 0,
 68 |             'stop' : np.random.uniform(0,1) < conf_thresh,
 69 |         }
 70 |         return return_dict
 71 |     
 72 | class EntropyStoppingCriteria(StoppingCriterias):
 73 | 
 74 |     def __init__(self, conf_thresh : float = 0.75) -> None:
 75 |         super().__init__()
 76 |         self.conf_thresh = conf_thresh
 77 | 
 78 |     def should_stop(self, answers : List, conf_thresh : int = None, verbose : bool = False) -> Dict:
 79 |         
 80 |         if conf_thresh is None: conf_thresh = self.conf_thresh
 81 | 
 82 |         counter = dict(Counter(answers))
 83 |         lis = list(counter.values())
 84 |         if len(lis) < 2:
 85 |             lis.append(1)
 86 |         entropy = stats.entropy(lis, base = 2)
 87 |         return_dict = {
 88 |             'most_common' : Counter(answers).most_common(1)[0][0],
 89 |             'prob' : -1,
 90 |             'stop' : False,
 91 |         }
 92 |         if len(answers) != 1:
 93 |             return_dict['stop'] = entropy/np.log2(len(lis)) <= conf_thresh
 94 |             return_dict['prob'] = entropy/np.log2(len(lis))
 95 |     
 96 |         return return_dict
 97 |         
 98 | class MajorityStoppingCriteria(StoppingCriterias):
 99 | 
100 |     def __init__(self, conf_thresh : float = 0.8) -> None:
101 |         super().__init__()
102 |         self.conf_thresh = conf_thresh
103 | 
104 |     def should_stop(self, answers : List, conf_thresh : int = None, verbose : bool = False) -> Dict:
105 |         
106 |         if conf_thresh is None: conf_thresh = self.conf_thresh
107 | 
108 |         return_dict = {
109 |             'most_common' : Counter(answers).most_common(1)[0][0],
110 |             'prob' : -1,
111 |             'stop' : False,
112 |         }
113 |         if len(answers) != 1:
114 |             return_dict['stop'] = Counter(answers).most_common(1)[0][1]/len(answers) >= conf_thresh
115 |             return_dict['prob'] = Counter(answers).most_common(1)[0][1]/len(answers)
116 |     
117 |         return return_dict
118 |     
119 | class DirichletStoppingCriteria(StoppingCriterias):
120 | 
121 |     def __init__(self, conf_thresh : float = 0.95, top_k_elements : int = 5, use_markov : bool = True) -> None:
122 |         super().__init__()
123 |         self.conf_thresh = conf_thresh
124 |         self.top_k_elements = top_k_elements
125 |         self.use_markov = use_markov
126 | 
127 |     def integrate_mcs(self, f, limits, N = 10000):
128 |         ranges = []
129 |         samples = []
130 | 
131 |         for _, funcs in enumerate(limits[::-1]):
132 | 
133 |             if len(samples) == 0:
134 |                 ranges.append(funcs())
135 |             else:
136 |                 ranges.append(funcs(*samples[::-1]))
137 |                 # TODO: Note, we assume, that the first value is actually a scalar.
138 |                 try:
139 |                     ranges[-1][0] = ranges[-1][0][0]
140 |                 except: ...
141 | 
142 |             samples.append(np.random.uniform(*ranges[-1], size=N))
143 |         integrand_values = f(*samples) * np.prod([r[1] - r[0] for r in ranges], axis=0)
144 | 
145 |         integral_approximation = (1/N) * np.sum(integrand_values)
146 |         return integral_approximation
147 | 
148 | 
149 |     def should_stop(self, answers : List, conf_thresh : int = None, verbose : bool = False) -> Dict:
150 |         
151 |         if conf_thresh is None: conf_thresh = self.conf_thresh
152 | 
153 |         counts = dict(Counter(answers))
154 |         if len(counts) < 3:
155 |             return BetaStoppingCriteria(conf_thresh).should_stop(answers, conf_thresh, verbose)
156 |         
157 |         most_common = Counter(answers).most_common(2)[0][0]
158 |         counts = {k: v for k, v in sorted(counts.items(), key=lambda item: item[1], reverse=False)[-self.top_k_elements:]}
159 |         len_counts = len(counts)
160 | 
161 |         functions = []
162 |         functions2  =[]
163 |         for i, _ in enumerate(counts.items()):
164 |             if i == len_counts - 2:
165 |                 break
166 |             if self.use_markov:
167 |                 functions.append(lambda *args: [np.array([0 for _ in range(args[0].shape[0])]), np.max([np.array([0 for _ in range(args[0].shape[0])]), np.min([np.array([0.5 for _ in range(args[0].shape[0])]), 1 - np.sum(args, axis = 0) - np.max(args, axis = 0), (1-np.sum(args, axis = 0))/2], axis = 0)], axis = 0)])
168 |             else:
169 |                 functions.append(lambda *args: [0, max(0, min(0.5, 1 - sum(args) - max(args), (1-sum(args))/2))])
170 |             functions2.append(lambda *args: [0, max(0, min(0.5, 1 - sum(args) - max(args), (1-sum(args))/2))])
171 | 
172 |         # Outermost limit
173 |         functions.append(lambda *args: [0, 0.5])
174 |         functions2.append(lambda *args: [0, 0.5])
175 | 
176 |         denom_functions = []
177 |         for i, _ in enumerate(counts.items()):
178 |             if i == len_counts - 2:
179 |                 break
180 |             denom_functions.append(lambda *args: [0, 1-np.sum(args, axis = 0)])
181 |         # Outermost limit
182 |         denom_functions.append(lambda *args: [0, 1])
183 |         exec(
184 |             f'''def integrand({",".join(["a" + str(i) for i in range(len(functions))])}):
185 |                 counts = {counts}
186 |                 ks = list(counts.keys())
187 |                 args = [{",".join(["a" + str(i) for i in range(len(functions))])}]
188 | 
189 |                 outp = np.prod([args[i] ** counts[k] for i, k in enumerate(list(counts.keys())[:-1])], axis = 0) * (1 - np.sum(args, axis = 0)) ** counts[list(counts.keys())[-1]]
190 |                 return outp
191 |             '''
192 |         )
193 | 
194 |         return_dict = {
195 |             'most_common' : most_common,
196 |             'prob' : -1,
197 |             'stop' : False,
198 |         }
199 | 
200 |         try:
201 |             # print('Computing Integration')
202 |             opts = {}
203 |             opts = {'limit': 3, 'epsrel' : 1e-1,'epsabs': 1e-1}
204 | 
205 |             if self.use_markov:
206 |                 N = min(500000, 5000 * 2**len(functions))
207 |                 N = 50000 * 1 if len(functions) <= 4 else 50000 * (2** ((len(functions) - 3)//2))
208 |                 prob = self.integrate_mcs(locals()['integrand'], functions, N = N) / self.integrate_mcs(locals()['integrand'], denom_functions, N = N)
209 |             else:
210 |                 prob = integrate.nquad(locals()['integrand'], functions, opts = opts)[0] / integrate.nquad(locals()['integrand'], denom_functions, opts = opts)[0]
211 |             return_dict['prob'] = prob
212 |             return_dict['stop'] = prob >= conf_thresh
213 | 
214 |         except Exception as e:
215 |             # print error message
216 |             print(f"Error during numerical integration: {e}")
217 |         
218 |         return return_dict
219 |     
220 | class AlwaysFalseStoppingCriteria(StoppingCriterias):
221 | 
222 |     def __init__(self, *args, **kwargs) -> None:
223 |         super().__init__()
224 | 
225 |     def should_stop(self, answers : List, *args, **kwargs) -> Dict:
226 |         return {
227 |             'most_common' : Counter(answers).most_common(1)[0][0],
228 |             'prob' : -1,
229 |             'stop' : False,
230 |         }
231 |     


--------------------------------------------------------------------------------
/docs/static/js/prism-line-highlight.js:
--------------------------------------------------------------------------------
  1 | (function () {
  2 | 
  3 | 	if (typeof Prism === 'undefined' || typeof document === 'undefined' || !document.querySelector) {
  4 | 		return;
  5 | 	}
  6 | 
  7 | 	var LINE_NUMBERS_CLASS = 'line-numbers';
  8 | 	var LINKABLE_LINE_NUMBERS_CLASS = 'linkable-line-numbers';
  9 | 	var NEW_LINE_EXP = /\n(?!$)/g;
 10 | 
 11 | 	/**
 12 | 	 * @param {string} selector
 13 | 	 * @param {ParentNode} [container]
 14 | 	 * @returns {HTMLElement[]}
 15 | 	 */
 16 | 	function $$(selector, container) {
 17 | 		return Array.prototype.slice.call((container || document).querySelectorAll(selector));
 18 | 	}
 19 | 
 20 | 	/**
 21 | 	 * Returns whether the given element has the given class.
 22 | 	 *
 23 | 	 * @param {Element} element
 24 | 	 * @param {string} className
 25 | 	 * @returns {boolean}
 26 | 	 */
 27 | 	function hasClass(element, className) {
 28 | 		return element.classList.contains(className);
 29 | 	}
 30 | 
 31 | 	/**
 32 | 	 * Calls the given function.
 33 | 	 *
 34 | 	 * @param {() => any} func
 35 | 	 * @returns {void}
 36 | 	 */
 37 | 	function callFunction(func) {
 38 | 		func();
 39 | 	}
 40 | 
 41 | 	// Some browsers round the line-height, others don't.
 42 | 	// We need to test for it to position the elements properly.
 43 | 	var isLineHeightRounded = (function () {
 44 | 		var res;
 45 | 		return function () {
 46 | 			if (typeof res === 'undefined') {
 47 | 				var d = document.createElement('div');
 48 | 				d.style.fontSize = '13px';
 49 | 				d.style.lineHeight = '1.5';
 50 | 				d.style.padding = '0';
 51 | 				d.style.border = '0';
 52 | 				d.innerHTML = '&nbsp;<br />&nbsp;';
 53 | 				document.body.appendChild(d);
 54 | 				// Browsers that round the line-height should have offsetHeight === 38
 55 | 				// The others should have 39.
 56 | 				res = d.offsetHeight === 38;
 57 | 				document.body.removeChild(d);
 58 | 			}
 59 | 			return res;
 60 | 		};
 61 | 	}());
 62 | 
 63 | 	/**
 64 | 	 * Returns the top offset of the content box of the given parent and the content box of one of its children.
 65 | 	 *
 66 | 	 * @param {HTMLElement} parent
 67 | 	 * @param {HTMLElement} child
 68 | 	 */
 69 | 	function getContentBoxTopOffset(parent, child) {
 70 | 		var parentStyle = getComputedStyle(parent);
 71 | 		var childStyle = getComputedStyle(child);
 72 | 
 73 | 		/**
 74 | 		 * Returns the numeric value of the given pixel value.
 75 | 		 *
 76 | 		 * @param {string} px
 77 | 		 */
 78 | 		function pxToNumber(px) {
 79 | 			return +px.substr(0, px.length - 2);
 80 | 		}
 81 | 
 82 | 		return child.offsetTop
 83 | 			+ pxToNumber(childStyle.borderTopWidth)
 84 | 			+ pxToNumber(childStyle.paddingTop)
 85 | 			- pxToNumber(parentStyle.paddingTop);
 86 | 	}
 87 | 
 88 | 	/**
 89 | 	 * Returns whether the Line Highlight plugin is active for the given element.
 90 | 	 *
 91 | 	 * If this function returns `false`, do not call `highlightLines` for the given element.
 92 | 	 *
 93 | 	 * @param {HTMLElement | null | undefined} pre
 94 | 	 * @returns {boolean}
 95 | 	 */
 96 | 	function isActiveFor(pre) {
 97 | 		if (!pre || !/pre/i.test(pre.nodeName)) {
 98 | 			return false;
 99 | 		}
100 | 
101 | 		if (pre.hasAttribute('data-line')) {
102 | 			return true;
103 | 		}
104 | 
105 | 		if (pre.id && Prism.util.isActive(pre, LINKABLE_LINE_NUMBERS_CLASS)) {
106 | 			// Technically, the line numbers plugin is also necessary but this plugin doesn't control the classes of
107 | 			// the line numbers plugin, so we can't assume that they are present.
108 | 			return true;
109 | 		}
110 | 
111 | 		return false;
112 | 	}
113 | 
114 | 	var scrollIntoView = true;
115 | 
116 | 	Prism.plugins.lineHighlight = {
117 | 		/**
118 | 		 * Highlights the lines of the given pre.
119 | 		 *
120 | 		 * This function is split into a DOM measuring and mutate phase to improve performance.
121 | 		 * The returned function mutates the DOM when called.
122 | 		 *
123 | 		 * @param {HTMLElement} pre
124 | 		 * @param {string | null} [lines]
125 | 		 * @param {string} [classes='']
126 | 		 * @returns {() => void}
127 | 		 */
128 | 		highlightLines: function highlightLines(pre, lines, classes) {
129 | 			lines = typeof lines === 'string' ? lines : (pre.getAttribute('data-line') || '');
130 | 
131 | 			var ranges = lines.replace(/\s+/g, '').split(',').filter(Boolean);
132 | 			var offset = +pre.getAttribute('data-line-offset') || 0;
133 | 
134 | 			var parseMethod = isLineHeightRounded() ? parseInt : parseFloat;
135 | 			var lineHeight = parseMethod(getComputedStyle(pre).lineHeight);
136 | 			var hasLineNumbers = Prism.util.isActive(pre, LINE_NUMBERS_CLASS);
137 | 			var codeElement = pre.querySelector('code');
138 | 			var parentElement = hasLineNumbers ? pre : codeElement || pre;
139 | 			var mutateActions = /** @type {(() => void)[]} */ ([]);
140 | 			var lineBreakMatch = codeElement.textContent.match(NEW_LINE_EXP);
141 | 			var numberOfLines = lineBreakMatch ? lineBreakMatch.length + 1 : 1;
142 | 			/**
143 | 			 * The top offset between the content box of the <code> element and the content box of the parent element of
144 | 			 * the line highlight element (either `<pre>` or `<code>`).
145 | 			 *
146 | 			 * This offset might not be zero for some themes where the <code> element has a top margin. Some plugins
147 | 			 * (or users) might also add element above the <code> element. Because the line highlight is aligned relative
148 | 			 * to the <pre> element, we have to take this into account.
149 | 			 *
150 | 			 * This offset will be 0 if the parent element of the line highlight element is the `<code>` element.
151 | 			 */
152 | 			var codePreOffset = !codeElement || parentElement == codeElement ? 0 : getContentBoxTopOffset(pre, codeElement);
153 | 
154 | 			ranges.forEach(function (currentRange) {
155 | 				var range = currentRange.split('-');
156 | 
157 | 				var start = +range[0];
158 | 				var end = +range[1] || start;
159 | 				end = Math.min(numberOfLines + offset, end);
160 | 
161 | 				if (end < start) {
162 | 					return;
163 | 				}
164 | 
165 | 				/** @type {HTMLElement} */
166 | 				var line = pre.querySelector('.line-highlight[data-range="' + currentRange + '"]') || document.createElement('div');
167 | 
168 | 				mutateActions.push(function () {
169 | 					line.setAttribute('aria-hidden', 'true');
170 | 					line.setAttribute('data-range', currentRange);
171 | 					line.className = (classes || '') + ' line-highlight';
172 | 				});
173 | 
174 | 				// if the line-numbers plugin is enabled, then there is no reason for this plugin to display the line numbers
175 | 				if (hasLineNumbers && Prism.plugins.lineNumbers) {
176 | 					var startNode = Prism.plugins.lineNumbers.getLine(pre, start);
177 | 					var endNode = Prism.plugins.lineNumbers.getLine(pre, end);
178 | 
179 | 					if (startNode) {
180 | 						var top = startNode.offsetTop + codePreOffset + 'px';
181 | 						mutateActions.push(function () {
182 | 							line.style.top = top;
183 | 						});
184 | 					}
185 | 
186 | 					if (endNode) {
187 | 						var height = (endNode.offsetTop - startNode.offsetTop) + endNode.offsetHeight + 'px';
188 | 						mutateActions.push(function () {
189 | 							line.style.height = height;
190 | 						});
191 | 					}
192 | 				} else {
193 | 					mutateActions.push(function () {
194 | 						line.setAttribute('data-start', String(start));
195 | 
196 | 						if (end > start) {
197 | 							line.setAttribute('data-end', String(end));
198 | 						}
199 | 
200 | 						line.style.top = (start - offset - 1) * lineHeight + codePreOffset + 'px';
201 | 
202 | 						line.textContent = new Array(end - start + 2).join(' \n');
203 | 					});
204 | 				}
205 | 
206 | 				mutateActions.push(function () {
207 | 					line.style.width = pre.scrollWidth + 'px';
208 | 				});
209 | 
210 | 				mutateActions.push(function () {
211 | 					// allow this to play nicely with the line-numbers plugin
212 | 					// need to attack to pre as when line-numbers is enabled, the code tag is relatively which screws up the positioning
213 | 					parentElement.appendChild(line);
214 | 				});
215 | 			});
216 | 
217 | 			var id = pre.id;
218 | 			if (hasLineNumbers && Prism.util.isActive(pre, LINKABLE_LINE_NUMBERS_CLASS) && id) {
219 | 				// This implements linkable line numbers. Linkable line numbers use Line Highlight to create a link to a
220 | 				// specific line. For this to work, the pre element has to:
221 | 				//  1) have line numbers,
222 | 				//  2) have the `linkable-line-numbers` class or an ascendant that has that class, and
223 | 				//  3) have an id.
224 | 
225 | 				if (!hasClass(pre, LINKABLE_LINE_NUMBERS_CLASS)) {
226 | 					// add class to pre
227 | 					mutateActions.push(function () {
228 | 						pre.classList.add(LINKABLE_LINE_NUMBERS_CLASS);
229 | 					});
230 | 				}
231 | 
232 | 				var start = parseInt(pre.getAttribute('data-start') || '1');
233 | 
234 | 				// iterate all line number spans
235 | 				$$('.line-numbers-rows > span', pre).forEach(function (lineSpan, i) {
236 | 					var lineNumber = i + start;
237 | 					lineSpan.onclick = function () {
238 | 						var hash = id + '.' + lineNumber;
239 | 
240 | 						// this will prevent scrolling since the span is obviously in view
241 | 						scrollIntoView = false;
242 | 						location.hash = hash;
243 | 						setTimeout(function () {
244 | 							scrollIntoView = true;
245 | 						}, 1);
246 | 					};
247 | 				});
248 | 			}
249 | 
250 | 			return function () {
251 | 				mutateActions.forEach(callFunction);
252 | 			};
253 | 		}
254 | 	};
255 | 
256 | 
257 | 	function applyHash() {
258 | 		var hash = location.hash.slice(1);
259 | 
260 | 		// Remove pre-existing temporary lines
261 | 		$$('.temporary.line-highlight').forEach(function (line) {
262 | 			line.parentNode.removeChild(line);
263 | 		});
264 | 
265 | 		var range = (hash.match(/\.([\d,-]+)$/) || [, ''])[1];
266 | 
267 | 		if (!range || document.getElementById(hash)) {
268 | 			return;
269 | 		}
270 | 
271 | 		var id = hash.slice(0, hash.lastIndexOf('.'));
272 | 		var pre = document.getElementById(id);
273 | 
274 | 		if (!pre) {
275 | 			return;
276 | 		}
277 | 
278 | 		if (!pre.hasAttribute('data-line')) {
279 | 			pre.setAttribute('data-line', '');
280 | 		}
281 | 
282 | 		var mutateDom = Prism.plugins.lineHighlight.highlightLines(pre, range, 'temporary ');
283 | 		mutateDom();
284 | 
285 | 		if (scrollIntoView) {
286 | 			document.querySelector('.temporary.line-highlight').scrollIntoView();
287 | 		}
288 | 	}
289 | 
290 | 	var fakeTimer = 0; // Hack to limit the number of times applyHash() runs
291 | 
292 | 	Prism.hooks.add('before-sanity-check', function (env) {
293 | 		var pre = env.element.parentElement;
294 | 		if (!isActiveFor(pre)) {
295 | 			return;
296 | 		}
297 | 
298 | 		/*
299 | 		 * Cleanup for other plugins (e.g. autoloader).
300 | 		 *
301 | 		 * Sometimes <code> blocks are highlighted multiple times. It is necessary
302 | 		 * to cleanup any left-over tags, because the whitespace inside of the <div>
303 | 		 * tags change the content of the <code> tag.
304 | 		 */
305 | 		var num = 0;
306 | 		$$('.line-highlight', pre).forEach(function (line) {
307 | 			num += line.textContent.length;
308 | 			line.parentNode.removeChild(line);
309 | 		});
310 | 		// Remove extra whitespace
311 | 		if (num && /^(?: \n)+$/.test(env.code.slice(-num))) {
312 | 			env.code = env.code.slice(0, -num);
313 | 		}
314 | 	});
315 | 
316 | 	Prism.hooks.add('complete', function completeHook(env) {
317 | 		var pre = env.element.parentElement;
318 | 		if (!isActiveFor(pre)) {
319 | 			return;
320 | 		}
321 | 
322 | 		clearTimeout(fakeTimer);
323 | 
324 | 		var hasLineNumbers = Prism.plugins.lineNumbers;
325 | 		var isLineNumbersLoaded = env.plugins && env.plugins.lineNumbers;
326 | 
327 | 		if (hasClass(pre, LINE_NUMBERS_CLASS) && hasLineNumbers && !isLineNumbersLoaded) {
328 | 			Prism.hooks.add('line-numbers', completeHook);
329 | 		} else {
330 | 			var mutateDom = Prism.plugins.lineHighlight.highlightLines(pre);
331 | 			mutateDom();
332 | 			fakeTimer = setTimeout(applyHash, 1);
333 | 		}
334 | 	});
335 | 
336 | 	window.addEventListener('hashchange', applyHash);
337 | 	window.addEventListener('resize', function () {
338 | 		var actions = $$('pre')
339 | 			.filter(isActiveFor)
340 | 			.map(function (pre) {
341 | 				return Prism.plugins.lineHighlight.highlightLines(pre);
342 | 			});
343 | 		actions.forEach(callFunction);
344 | 	});
345 | 
346 | }());
347 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/docs/static/css/bulma-slider.min.css:
--------------------------------------------------------------------------------
1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}input[type=range].slider{-webkit-appearance:none;-moz-appearance:none;appearance:none;margin:1rem 0;background:0 0;touch-action:none}input[type=range].slider.is-fullwidth{display:block;width:100%}input[type=range].slider:focus{outline:0}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{width:100%}input[type=range].slider:not([orient=vertical])::-moz-range-track{width:100%}input[type=range].slider:not([orient=vertical])::-ms-track{width:100%}input[type=range].slider:not([orient=vertical]).has-output+output,input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{width:3rem;background:#4a4a4a;border-radius:4px;padding:.4rem .8rem;font-size:.75rem;line-height:.75rem;text-align:center;text-overflow:ellipsis;white-space:nowrap;color:#fff;overflow:hidden;pointer-events:none;z-index:200}input[type=range].slider:not([orient=vertical]).has-output-tooltip:disabled+output,input[type=range].slider:not([orient=vertical]).has-output:disabled+output{opacity:.5}input[type=range].slider:not([orient=vertical]).has-output{display:inline-block;vertical-align:middle;width:calc(100% - (4.2rem))}input[type=range].slider:not([orient=vertical]).has-output+output{display:inline-block;margin-left:.75rem;vertical-align:middle}input[type=range].slider:not([orient=vertical]).has-output-tooltip{display:block}input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{position:absolute;left:0;top:-.1rem}input[type=range].slider[orient=vertical]{-webkit-appearance:slider-vertical;-moz-appearance:slider-vertical;appearance:slider-vertical;-webkit-writing-mode:bt-lr;-ms-writing-mode:bt-lr;writing-mode:bt-lr}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{height:100%}input[type=range].slider[orient=vertical]::-moz-range-track{height:100%}input[type=range].slider[orient=vertical]::-ms-track{height:100%}input[type=range].slider::-webkit-slider-runnable-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-moz-range-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-fill-lower{background:#dbdbdb;border-radius:4px}input[type=range].slider::-ms-fill-upper{background:#dbdbdb;border-radius:4px}input[type=range].slider::-webkit-slider-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-moz-range-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-ms-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none}input[type=range].slider.is-circle::-webkit-slider-thumb{border-radius:290486px}input[type=range].slider.is-circle::-moz-range-thumb{border-radius:290486px}input[type=range].slider.is-circle::-ms-thumb{border-radius:290486px}input[type=range].slider:active::-webkit-slider-thumb{-webkit-transform:scale(1.25);transform:scale(1.25)}input[type=range].slider:active::-moz-range-thumb{transform:scale(1.25)}input[type=range].slider:active::-ms-thumb{transform:scale(1.25)}input[type=range].slider:disabled{opacity:.5;cursor:not-allowed}input[type=range].slider:disabled::-webkit-slider-thumb{cursor:not-allowed;-webkit-transform:scale(1);transform:scale(1)}input[type=range].slider:disabled::-moz-range-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:disabled::-ms-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:not([orient=vertical]){min-height:calc((1rem + 2px) * 1.25)}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-moz-range-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-ms-track{height:.5rem}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{width:.5rem}input[type=range].slider[orient=vertical]::-moz-range-track{width:.5rem}input[type=range].slider[orient=vertical]::-ms-track{width:.5rem}input[type=range].slider::-webkit-slider-thumb{height:1rem;width:1rem}input[type=range].slider::-moz-range-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{margin-top:0}input[type=range].slider::-webkit-slider-thumb{margin-top:-.25rem}input[type=range].slider[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.25rem}input[type=range].slider.is-small:not([orient=vertical]){min-height:calc((.75rem + 2px) * 1.25)}input[type=range].slider.is-small:not([orient=vertical])::-webkit-slider-runnable-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-moz-range-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-ms-track{height:.375rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-runnable-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-moz-range-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-ms-track{width:.375rem}input[type=range].slider.is-small::-webkit-slider-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-moz-range-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{margin-top:0}input[type=range].slider.is-small::-webkit-slider-thumb{margin-top:-.1875rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.1875rem}input[type=range].slider.is-medium:not([orient=vertical]){min-height:calc((1.25rem + 2px) * 1.25)}input[type=range].slider.is-medium:not([orient=vertical])::-webkit-slider-runnable-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-moz-range-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-ms-track{height:.625rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-runnable-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-moz-range-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-ms-track{width:.625rem}input[type=range].slider.is-medium::-webkit-slider-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-moz-range-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{margin-top:0}input[type=range].slider.is-medium::-webkit-slider-thumb{margin-top:-.3125rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.3125rem}input[type=range].slider.is-large:not([orient=vertical]){min-height:calc((1.5rem + 2px) * 1.25)}input[type=range].slider.is-large:not([orient=vertical])::-webkit-slider-runnable-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-moz-range-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-ms-track{height:.75rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-runnable-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-moz-range-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-ms-track{width:.75rem}input[type=range].slider.is-large::-webkit-slider-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-moz-range-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{margin-top:0}input[type=range].slider.is-large::-webkit-slider-thumb{margin-top:-.375rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.375rem}input[type=range].slider.is-white::-moz-range-track{background:#fff!important}input[type=range].slider.is-white::-webkit-slider-runnable-track{background:#fff!important}input[type=range].slider.is-white::-ms-track{background:#fff!important}input[type=range].slider.is-white::-ms-fill-lower{background:#fff}input[type=range].slider.is-white::-ms-fill-upper{background:#fff}input[type=range].slider.is-white .has-output-tooltip+output,input[type=range].slider.is-white.has-output+output{background-color:#fff;color:#0a0a0a}input[type=range].slider.is-black::-moz-range-track{background:#0a0a0a!important}input[type=range].slider.is-black::-webkit-slider-runnable-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-fill-lower{background:#0a0a0a}input[type=range].slider.is-black::-ms-fill-upper{background:#0a0a0a}input[type=range].slider.is-black .has-output-tooltip+output,input[type=range].slider.is-black.has-output+output{background-color:#0a0a0a;color:#fff}input[type=range].slider.is-light::-moz-range-track{background:#f5f5f5!important}input[type=range].slider.is-light::-webkit-slider-runnable-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-fill-lower{background:#f5f5f5}input[type=range].slider.is-light::-ms-fill-upper{background:#f5f5f5}input[type=range].slider.is-light .has-output-tooltip+output,input[type=range].slider.is-light.has-output+output{background-color:#f5f5f5;color:#363636}input[type=range].slider.is-dark::-moz-range-track{background:#363636!important}input[type=range].slider.is-dark::-webkit-slider-runnable-track{background:#363636!important}input[type=range].slider.is-dark::-ms-track{background:#363636!important}input[type=range].slider.is-dark::-ms-fill-lower{background:#363636}input[type=range].slider.is-dark::-ms-fill-upper{background:#363636}input[type=range].slider.is-dark .has-output-tooltip+output,input[type=range].slider.is-dark.has-output+output{background-color:#363636;color:#f5f5f5}input[type=range].slider.is-primary::-moz-range-track{background:#00d1b2!important}input[type=range].slider.is-primary::-webkit-slider-runnable-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-fill-lower{background:#00d1b2}input[type=range].slider.is-primary::-ms-fill-upper{background:#00d1b2}input[type=range].slider.is-primary .has-output-tooltip+output,input[type=range].slider.is-primary.has-output+output{background-color:#00d1b2;color:#fff}input[type=range].slider.is-link::-moz-range-track{background:#3273dc!important}input[type=range].slider.is-link::-webkit-slider-runnable-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-fill-lower{background:#3273dc}input[type=range].slider.is-link::-ms-fill-upper{background:#3273dc}input[type=range].slider.is-link .has-output-tooltip+output,input[type=range].slider.is-link.has-output+output{background-color:#3273dc;color:#fff}input[type=range].slider.is-info::-moz-range-track{background:#209cee!important}input[type=range].slider.is-info::-webkit-slider-runnable-track{background:#209cee!important}input[type=range].slider.is-info::-ms-track{background:#209cee!important}input[type=range].slider.is-info::-ms-fill-lower{background:#209cee}input[type=range].slider.is-info::-ms-fill-upper{background:#209cee}input[type=range].slider.is-info .has-output-tooltip+output,input[type=range].slider.is-info.has-output+output{background-color:#209cee;color:#fff}input[type=range].slider.is-success::-moz-range-track{background:#23d160!important}input[type=range].slider.is-success::-webkit-slider-runnable-track{background:#23d160!important}input[type=range].slider.is-success::-ms-track{background:#23d160!important}input[type=range].slider.is-success::-ms-fill-lower{background:#23d160}input[type=range].slider.is-success::-ms-fill-upper{background:#23d160}input[type=range].slider.is-success .has-output-tooltip+output,input[type=range].slider.is-success.has-output+output{background-color:#23d160;color:#fff}input[type=range].slider.is-warning::-moz-range-track{background:#ffdd57!important}input[type=range].slider.is-warning::-webkit-slider-runnable-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-fill-lower{background:#ffdd57}input[type=range].slider.is-warning::-ms-fill-upper{background:#ffdd57}input[type=range].slider.is-warning .has-output-tooltip+output,input[type=range].slider.is-warning.has-output+output{background-color:#ffdd57;color:rgba(0,0,0,.7)}input[type=range].slider.is-danger::-moz-range-track{background:#ff3860!important}input[type=range].slider.is-danger::-webkit-slider-runnable-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-fill-lower{background:#ff3860}input[type=range].slider.is-danger::-ms-fill-upper{background:#ff3860}input[type=range].slider.is-danger .has-output-tooltip+output,input[type=range].slider.is-danger.has-output+output{background-color:#ff3860;color:#fff}


--------------------------------------------------------------------------------
/scripts/pal/core/interface.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 PAL Authors. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import io
 16 | import signal
 17 | from contextlib import redirect_stdout
 18 | from typing import Any, Callable, List, Optional
 19 | from collections import Counter
 20 | 
 21 | from .runtime import GenericRuntime
 22 | from .backend import call_gpt
 23 | from .vicuna import call_vicuna
 24 | 
 25 | from adaptive_consistency import AC, stop_criteria_dict
 26 | 
 27 | 
 28 | 
 29 | def init_adaptive_consistency(max_gens, stop_criteria, stop_criteria_thresh):
 30 |     if stop_criteria is None:
 31 |         stop_criteria = 'always_false'
 32 |     if stop_criteria_thresh is None or stop_criteria_thresh == -1:
 33 |         ac = AC(max_gens = max_gens, stop_criteria=stop_criteria_dict[stop_criteria]())
 34 |     else:
 35 |         ac = AC(max_gens = max_gens, stop_criteria=stop_criteria_dict[stop_criteria](conf_thresh = stop_criteria_thresh))
 36 |     return ac
 37 | 
 38 | 
 39 | class timeout:
 40 |     def __init__(self, seconds=1, error_message='Timeout'):
 41 |         self.seconds = seconds
 42 |         self.error_message = error_message
 43 |     def timeout_handler(self, signum, frame):
 44 |         raise TimeoutError(self.error_message)
 45 |     def __enter__(self):
 46 |         signal.signal(signal.SIGALRM, self.timeout_handler)
 47 |         signal.alarm(self.seconds)
 48 |     def __exit__(self, type, value, traceback):
 49 |         signal.alarm(0)
 50 | 
 51 | 
 52 | class TextInterface:
 53 |     
 54 |     def __init__(
 55 |         self,
 56 |         max_gens: int = 40,
 57 |         model: str = 'code-davinci-002',
 58 |         answer_prefix: str = 'answer is',
 59 |         stop: str = '\n\n\n',
 60 |         extract_answer: Optional[Callable[[str], Any]] = None,
 61 |         openai_url: Optional[str] = None,
 62 |         stop_criteria: Optional[str] = None,
 63 |         stop_criteria_thresh: Optional[float] = None,
 64 |     ):
 65 |         self.max_gens = max_gens
 66 |         self.ac = init_adaptive_consistency(self.max_gens, stop_criteria, stop_criteria_thresh)
 67 | 
 68 |         self.history = []
 69 |         self.answer_prefix = answer_prefix
 70 |         self.extract_answer_fn = extract_answer
 71 |         self.stop = stop
 72 |         self.model = model
 73 |         self.openai_url = openai_url
 74 |         # Hacky solution:
 75 |         if self.openai_url is not None:
 76 |             globals()['call_gpt'] = lambda *args, **kwargs : call_vicuna(*args, **kwargs, url=openai_url)
 77 |         
 78 |     def reinit(self):
 79 |         ...
 80 | 
 81 | 
 82 |     def clear_history(self):
 83 |         self.history = []
 84 |     
 85 |     def extract_answer(self, gen: str):
 86 |         if self.extract_answer_fn:
 87 |             return self.extract_answer_fn (gen)
 88 |         last_line = gen.strip().split('\n')[-1]
 89 |         # TODO: Searching for last line is not at all necessary!
 90 |         last_idx = last_line.rfind(self.answer_prefix)
 91 |         if last_idx == -1:
 92 |             return ""
 93 |         answer = last_line[last_idx + len(self.answer_prefix):].strip()
 94 |         if answer.endswith('.'):
 95 |             answer = answer[:-1]
 96 |         return answer
 97 |     
 98 |     def execute(self, gen: str):
 99 |         if isinstance(gen, List):
100 |             gen = '\n'.join(gen)
101 |         return self.extract_answer(gen)
102 | 
103 |     def run(self, prompt, temperature=0.0, top_p=1.0, majority_at=None, max_tokens=512, logprobs=0):
104 |         # gen = call_gpt(prompt, model=self.model, stop=self.stop, 
105 |             # temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at)
106 |         if logprobs != 0:
107 |             gens, dt = call_gpt(prompt, model=self.model, stop=self.stop, 
108 |                     temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at, logprobs=logprobs)   
109 |         else:
110 |             gens = call_gpt(prompt, model=self.model, stop=self.stop, 
111 |                 temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at, )
112 |             
113 |         if logprobs != 0:
114 |             self.history.append([gens, dt])
115 |         else:
116 |             self.history.append(gens)
117 |         results = []
118 |         for gen in gens:
119 |             results.append(self.extract_answer(gen))
120 |         return Counter(results).most_common(1)[0][0]
121 |         
122 | 
123 | class ProgramInterface:
124 |     
125 |     def __init__(
126 |         self,
127 |         max_gens: int = 40,
128 |         model: str = 'code-davinci-002',
129 |         runtime: Optional[Any] = None,
130 |         stop: str = '\n\n',
131 |         get_answer_symbol: Optional[str] = None,
132 |         get_answer_expr: Optional[str] = None,
133 |         get_answer_from_stdout: bool = False,
134 |         verbose: bool = False,
135 |         openai_url: Optional[str] = None,
136 |         stop_criteria: Optional[str] = None,
137 |         stop_criteria_thresh: Optional[float] = None,
138 |     ) -> None:
139 | 
140 |         self.max_gens = max_gens
141 |         self.ac = init_adaptive_consistency(self.max_gens, stop_criteria, stop_criteria_thresh)
142 | 
143 |         self.model = model
144 |         self.runtime = runtime if runtime else GenericRuntime()
145 |         self.history = []
146 |         self.stop = stop
147 |         self.answer_symbol = get_answer_symbol
148 |         self.answer_expr = get_answer_expr
149 |         self.get_answer_from_stdout = get_answer_from_stdout
150 |         self.verbose = verbose
151 | 
152 |         if openai_url is not None:
153 |             globals()['call_gpt'] = lambda *args, **kwargs : call_vicuna(*args, **kwargs, url=openai_url)
154 | 
155 |     def reinit(self):
156 |         import copy
157 |         self.runtime._global_vars = copy.copy(self.runtime.GLOBAL_DICT) 
158 | 
159 |     
160 |     def clear_history(self):
161 |         self.history = []
162 |     
163 |     def process_generation_to_code(self, gens: str):
164 |         return [g.split('\n') for g in gens]
165 |     
166 |     def generate(self, prompt: str, temperature: float =0.0, top_p: float =1.0, 
167 |             max_tokens: int =512, majority_at: int = None, logprobs = 0):
168 |         if logprobs != 0:
169 |             gens, dt = call_gpt(prompt, model=self.model, stop=self.stop, 
170 |                 temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at, logprobs=logprobs)   
171 |         else:
172 |             gens = call_gpt(prompt, model=self.model, stop=self.stop, 
173 |                 temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at, )
174 |         if self.verbose:
175 |             print(gens)
176 |         code = self.process_generation_to_code([x.strip() for x in gens])
177 |         if logprobs != 0:
178 |             self.history.append([gens, dt])
179 |         else:
180 |             self.history.append(gens)
181 |         return code
182 |     
183 |     def execute(self, code: Optional[List[str]] = None, TIMEOUT = 2):
184 |         # from pdb import set_trace
185 |         # set_trace()
186 |         with timeout(TIMEOUT):
187 |             code = code if code else self.code
188 |             if self.get_answer_from_stdout:
189 |                 program_io = io.StringIO()
190 |                 with redirect_stdout(program_io):
191 |                     self.runtime.exec_code('\n'.join(code))
192 |                 program_io.seek(0)
193 |                 return program_io.readlines()[-1]
194 |             elif self.answer_symbol:
195 |                 self.runtime.exec_code('\n'.join(code))
196 |                 return self.runtime._global_vars[self.answer_symbol]
197 |             elif self.answer_expr:
198 |                 self.runtime.exec_code('\n'.join(code))
199 |                 return self.runtime.eval_code(self.answer_expr)
200 |             else:
201 |                 self.runtime.exec_code('\n'.join(code[:-1]))
202 |                 return self.runtime.eval_code(code[-1])
203 |         return ""
204 |     
205 |     def run(self, prompt: str, time_out: float =10, temperature: float =0.0, top_p: float =1.0, 
206 |             max_tokens: int =512, majority_at: int = None, prepend_to_code = "", logprobs = 0):
207 |         code_snippets = self.generate(prompt, majority_at=majority_at, temperature=temperature, top_p=top_p, max_tokens=max_tokens, logprobs = logprobs)
208 |         # print(code_snippets)
209 |         results = []
210 |         for code in code_snippets:
211 |             self.reinit()
212 |             with timeout(time_out):
213 |                 try:
214 |                     exec_result = self.execute(prepend_to_code.splitlines() + code)
215 |                 except Exception as e:
216 |                     print(e)
217 |                     continue
218 |                 results.append(exec_result)
219 |         counter = Counter(results)
220 |         return counter.most_common(1)[0][0]
221 | 
222 | 
223 | 
224 | class AdaptiveProgramInterface(ProgramInterface):
225 | 
226 |     def __init__(self, answer_type = 'float', step_size = 1,  *args, **kwargs):
227 | 
228 |         super().__init__(*args, **kwargs)
229 |         self.answer_type = answer_type
230 |         self.step_size = step_size
231 | 
232 |     def generate(self, prompt: str, temperature: float =0.0, top_p: float =1.0, 
233 |             max_tokens: int =512, majority_at: int =None, logprobs = 0):
234 |         self.history.append([])
235 |         gens = call_gpt(prompt, model=self.model, stop=self.stop, 
236 |             temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=majority_at, )
237 |         if self.verbose:
238 |             print(gens)
239 |         gens = [x.strip() for x in gens]
240 |         # print('Processing generations to code')
241 |         code = self.process_generation_to_code(gens)
242 |         # print('Appending to code')
243 |         self.history[-1].extend(gens)
244 |         return code
245 | 
246 |     def run(self, prompt: str, time_out: float =10, temperature: float =0.0, top_p: float =1.0, 
247 |             max_tokens: int =512, majority_at: int =None, prepend_to_code = ""):
248 |         all_results = []
249 |         for i in range(0, self.max_gens, self.step_size):
250 |             code_snippets = self.generate(prompt, majority_at=self.step_size, temperature=temperature, top_p=top_p, max_tokens=max_tokens)
251 |             
252 |             results = []
253 |             for code in code_snippets:
254 |                 self.reinit()
255 |                 with timeout(time_out):
256 |                     try:
257 |                         exec_result = self.execute(prepend_to_code.splitlines() + code)
258 |                         if self.answer_type == 'float':
259 |                             exec_result = float(exec_result)
260 |                         else:
261 |                             exec_result = str(exec_result)
262 |                     except Exception as e:
263 |                         print('Eror', e)
264 |                         # traceback.print_exc()
265 | 
266 |                         continue
267 |                     results.append(exec_result)
268 |             all_results += results
269 |             # print(all_results)
270 |             if len(all_results) == 0:
271 |                 continue
272 |             # if has_conclusive_majority_binomial_prob(all_results, self.conf_thresh)[1]:
273 |             if self.ac.should_stop(all_results):
274 |                 # print('Less goo!', results)
275 |                 break
276 |         print('Used {} generations'.format(i+4))
277 |         counter = Counter(all_results)
278 |         most_common = counter.most_common(1)[0]
279 |         return most_common[0], all_results
280 |     
281 | 
282 | class AdaptiveTextInterface(TextInterface):
283 |     def __init__(self, step_size, *args, **kwargs):
284 |         super().__init__(*args, **kwargs)
285 |         self.history = []
286 |         self.step_size = step_size
287 | 
288 |     
289 |     def run(self, prompt: str, time_out: float =10, temperature: float =0.0, top_p: float =1.0, 
290 |             max_tokens: int =512, majority_at: int =None, prepend_to_code = ""):
291 |         all_results = []
292 |         for i in range(0, self.max_gens, self.step_size):
293 |             print(i)
294 |             gens = call_gpt(prompt, model=self.model, stop=self.stop, 
295 |                     temperature=temperature, top_p=top_p, max_tokens=max_tokens, majority_at=self.step_size, )
296 |             print(i)
297 |             results = []
298 |             for gen in gens:
299 |                 self.reinit()
300 |                 self.history.append(gen)
301 |                 ans = self.extract_answer(gen)
302 |                 results.append(ans)
303 |             all_results += results
304 |             if len(all_results) == 0:
305 |                 continue
306 |             # if has_conclusive_majority_binomial_prob(all_results, self.conf_thresh)[1]:
307 |             if self.ac.should_stop(all_results):
308 |                 break
309 |         print('Used {} generations'.format(i+4))
310 |         counter = Counter(all_results)
311 |         most_common = counter.most_common(1)[0]
312 |         return most_common[0], all_results


--------------------------------------------------------------------------------
/docs/static/js/bulma-slider.js:
--------------------------------------------------------------------------------
  1 | (function webpackUniversalModuleDefinition(root, factory) {
  2 | 	if(typeof exports === 'object' && typeof module === 'object')
  3 | 		module.exports = factory();
  4 | 	else if(typeof define === 'function' && define.amd)
  5 | 		define([], factory);
  6 | 	else if(typeof exports === 'object')
  7 | 		exports["bulmaSlider"] = factory();
  8 | 	else
  9 | 		root["bulmaSlider"] = factory();
 10 | })(typeof self !== 'undefined' ? self : this, function() {
 11 | return /******/ (function(modules) { // webpackBootstrap
 12 | /******/ 	// The module cache
 13 | /******/ 	var installedModules = {};
 14 | /******/
 15 | /******/ 	// The require function
 16 | /******/ 	function __webpack_require__(moduleId) {
 17 | /******/
 18 | /******/ 		// Check if module is in cache
 19 | /******/ 		if(installedModules[moduleId]) {
 20 | /******/ 			return installedModules[moduleId].exports;
 21 | /******/ 		}
 22 | /******/ 		// Create a new module (and put it into the cache)
 23 | /******/ 		var module = installedModules[moduleId] = {
 24 | /******/ 			i: moduleId,
 25 | /******/ 			l: false,
 26 | /******/ 			exports: {}
 27 | /******/ 		};
 28 | /******/
 29 | /******/ 		// Execute the module function
 30 | /******/ 		modules[moduleId].call(module.exports, module, module.exports, __webpack_require__);
 31 | /******/
 32 | /******/ 		// Flag the module as loaded
 33 | /******/ 		module.l = true;
 34 | /******/
 35 | /******/ 		// Return the exports of the module
 36 | /******/ 		return module.exports;
 37 | /******/ 	}
 38 | /******/
 39 | /******/
 40 | /******/ 	// expose the modules object (__webpack_modules__)
 41 | /******/ 	__webpack_require__.m = modules;
 42 | /******/
 43 | /******/ 	// expose the module cache
 44 | /******/ 	__webpack_require__.c = installedModules;
 45 | /******/
 46 | /******/ 	// define getter function for harmony exports
 47 | /******/ 	__webpack_require__.d = function(exports, name, getter) {
 48 | /******/ 		if(!__webpack_require__.o(exports, name)) {
 49 | /******/ 			Object.defineProperty(exports, name, {
 50 | /******/ 				configurable: false,
 51 | /******/ 				enumerable: true,
 52 | /******/ 				get: getter
 53 | /******/ 			});
 54 | /******/ 		}
 55 | /******/ 	};
 56 | /******/
 57 | /******/ 	// getDefaultExport function for compatibility with non-harmony modules
 58 | /******/ 	__webpack_require__.n = function(module) {
 59 | /******/ 		var getter = module && module.__esModule ?
 60 | /******/ 			function getDefault() { return module['default']; } :
 61 | /******/ 			function getModuleExports() { return module; };
 62 | /******/ 		__webpack_require__.d(getter, 'a', getter);
 63 | /******/ 		return getter;
 64 | /******/ 	};
 65 | /******/
 66 | /******/ 	// Object.prototype.hasOwnProperty.call
 67 | /******/ 	__webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); };
 68 | /******/
 69 | /******/ 	// __webpack_public_path__
 70 | /******/ 	__webpack_require__.p = "";
 71 | /******/
 72 | /******/ 	// Load entry module and return exports
 73 | /******/ 	return __webpack_require__(__webpack_require__.s = 0);
 74 | /******/ })
 75 | /************************************************************************/
 76 | /******/ ([
 77 | /* 0 */
 78 | /***/ (function(module, __webpack_exports__, __webpack_require__) {
 79 | 
 80 | "use strict";
 81 | Object.defineProperty(__webpack_exports__, "__esModule", { value: true });
 82 | /* harmony export (binding) */ __webpack_require__.d(__webpack_exports__, "isString", function() { return isString; });
 83 | /* harmony import */ var __WEBPACK_IMPORTED_MODULE_0__events__ = __webpack_require__(1);
 84 | var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; };
 85 | 
 86 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
 87 | 
 88 | var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; };
 89 | 
 90 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
 91 | 
 92 | function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; }
 93 | 
 94 | function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; }
 95 | 
 96 | 
 97 | 
 98 | var isString = function isString(unknown) {
 99 |   return typeof unknown === 'string' || !!unknown && (typeof unknown === 'undefined' ? 'undefined' : _typeof(unknown)) === 'object' && Object.prototype.toString.call(unknown) === '[object String]';
100 | };
101 | 
102 | var bulmaSlider = function (_EventEmitter) {
103 |   _inherits(bulmaSlider, _EventEmitter);
104 | 
105 |   function bulmaSlider(selector) {
106 |     var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
107 | 
108 |     _classCallCheck(this, bulmaSlider);
109 | 
110 |     var _this = _possibleConstructorReturn(this, (bulmaSlider.__proto__ || Object.getPrototypeOf(bulmaSlider)).call(this));
111 | 
112 |     _this.element = typeof selector === 'string' ? document.querySelector(selector) : selector;
113 |     // An invalid selector or non-DOM node has been provided.
114 |     if (!_this.element) {
115 |       throw new Error('An invalid selector or non-DOM node has been provided.');
116 |     }
117 | 
118 |     _this._clickEvents = ['click'];
119 |     /// Set default options and merge with instance defined
120 |     _this.options = _extends({}, options);
121 | 
122 |     _this.onSliderInput = _this.onSliderInput.bind(_this);
123 | 
124 |     _this.init();
125 |     return _this;
126 |   }
127 | 
128 |   /**
129 |    * Initiate all DOM element containing selector
130 |    * @method
131 |    * @return {Array} Array of all slider instances
132 |    */
133 | 
134 | 
135 |   _createClass(bulmaSlider, [{
136 |     key: 'init',
137 | 
138 | 
139 |     /**
140 |      * Initiate plugin
141 |      * @method init
142 |      * @return {void}
143 |      */
144 |     value: function init() {
145 |       this._id = 'bulmaSlider' + new Date().getTime() + Math.floor(Math.random() * Math.floor(9999));
146 |       this.output = this._findOutputForSlider();
147 | 
148 |       this._bindEvents();
149 | 
150 |       if (this.output) {
151 |         if (this.element.classList.contains('has-output-tooltip')) {
152 |           // Get new output position
153 |           var newPosition = this._getSliderOutputPosition();
154 | 
155 |           // Set output position
156 |           this.output.style['left'] = newPosition.position;
157 |         }
158 |       }
159 | 
160 |       this.emit('bulmaslider:ready', this.element.value);
161 |     }
162 |   }, {
163 |     key: '_findOutputForSlider',
164 |     value: function _findOutputForSlider() {
165 |       var _this2 = this;
166 | 
167 |       var result = null;
168 |       var outputs = document.getElementsByTagName('output') || [];
169 | 
170 |       Array.from(outputs).forEach(function (output) {
171 |         if (output.htmlFor == _this2.element.getAttribute('id')) {
172 |           result = output;
173 |           return true;
174 |         }
175 |       });
176 |       return result;
177 |     }
178 |   }, {
179 |     key: '_getSliderOutputPosition',
180 |     value: function _getSliderOutputPosition() {
181 |       // Update output position
182 |       var newPlace, minValue;
183 | 
184 |       var style = window.getComputedStyle(this.element, null);
185 |       // Measure width of range input
186 |       var sliderWidth = parseInt(style.getPropertyValue('width'), 10);
187 | 
188 |       // Figure out placement percentage between left and right of input
189 |       if (!this.element.getAttribute('min')) {
190 |         minValue = 0;
191 |       } else {
192 |         minValue = this.element.getAttribute('min');
193 |       }
194 |       var newPoint = (this.element.value - minValue) / (this.element.getAttribute('max') - minValue);
195 | 
196 |       // Prevent bubble from going beyond left or right (unsupported browsers)
197 |       if (newPoint < 0) {
198 |         newPlace = 0;
199 |       } else if (newPoint > 1) {
200 |         newPlace = sliderWidth;
201 |       } else {
202 |         newPlace = sliderWidth * newPoint;
203 |       }
204 | 
205 |       return {
206 |         'position': newPlace + 'px'
207 |       };
208 |     }
209 | 
210 |     /**
211 |      * Bind all events
212 |      * @method _bindEvents
213 |      * @return {void}
214 |      */
215 | 
216 |   }, {
217 |     key: '_bindEvents',
218 |     value: function _bindEvents() {
219 |       if (this.output) {
220 |         // Add event listener to update output when slider value change
221 |         this.element.addEventListener('input', this.onSliderInput, false);
222 |       }
223 |     }
224 |   }, {
225 |     key: 'onSliderInput',
226 |     value: function onSliderInput(e) {
227 |       e.preventDefault();
228 | 
229 |       if (this.element.classList.contains('has-output-tooltip')) {
230 |         // Get new output position
231 |         var newPosition = this._getSliderOutputPosition();
232 | 
233 |         // Set output position
234 |         this.output.style['left'] = newPosition.position;
235 |       }
236 | 
237 |       // Check for prefix and postfix
238 |       var prefix = this.output.hasAttribute('data-prefix') ? this.output.getAttribute('data-prefix') : '';
239 |       var postfix = this.output.hasAttribute('data-postfix') ? this.output.getAttribute('data-postfix') : '';
240 | 
241 |       // Update output with slider value
242 |       this.output.value = prefix + this.element.value + postfix;
243 | 
244 |       this.emit('bulmaslider:ready', this.element.value);
245 |     }
246 |   }], [{
247 |     key: 'attach',
248 |     value: function attach() {
249 |       var _this3 = this;
250 | 
251 |       var selector = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 'input[type="range"].slider';
252 |       var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
253 | 
254 |       var instances = new Array();
255 | 
256 |       var elements = isString(selector) ? document.querySelectorAll(selector) : Array.isArray(selector) ? selector : [selector];
257 |       elements.forEach(function (element) {
258 |         if (typeof element[_this3.constructor.name] === 'undefined') {
259 |           var instance = new bulmaSlider(element, options);
260 |           element[_this3.constructor.name] = instance;
261 |           instances.push(instance);
262 |         } else {
263 |           instances.push(element[_this3.constructor.name]);
264 |         }
265 |       });
266 | 
267 |       return instances;
268 |     }
269 |   }]);
270 | 
271 |   return bulmaSlider;
272 | }(__WEBPACK_IMPORTED_MODULE_0__events__["a" /* default */]);
273 | 
274 | /* harmony default export */ __webpack_exports__["default"] = (bulmaSlider);
275 | 
276 | /***/ }),
277 | /* 1 */
278 | /***/ (function(module, __webpack_exports__, __webpack_require__) {
279 | 
280 | "use strict";
281 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
282 | 
283 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
284 | 
285 | var EventEmitter = function () {
286 |   function EventEmitter() {
287 |     var listeners = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
288 | 
289 |     _classCallCheck(this, EventEmitter);
290 | 
291 |     this._listeners = new Map(listeners);
292 |     this._middlewares = new Map();
293 |   }
294 | 
295 |   _createClass(EventEmitter, [{
296 |     key: "listenerCount",
297 |     value: function listenerCount(eventName) {
298 |       if (!this._listeners.has(eventName)) {
299 |         return 0;
300 |       }
301 | 
302 |       var eventListeners = this._listeners.get(eventName);
303 |       return eventListeners.length;
304 |     }
305 |   }, {
306 |     key: "removeListeners",
307 |     value: function removeListeners() {
308 |       var _this = this;
309 | 
310 |       var eventName = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
311 |       var middleware = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
312 | 
313 |       if (eventName !== null) {
314 |         if (Array.isArray(eventName)) {
315 |           name.forEach(function (e) {
316 |             return _this.removeListeners(e, middleware);
317 |           });
318 |         } else {
319 |           this._listeners.delete(eventName);
320 | 
321 |           if (middleware) {
322 |             this.removeMiddleware(eventName);
323 |           }
324 |         }
325 |       } else {
326 |         this._listeners = new Map();
327 |       }
328 |     }
329 |   }, {
330 |     key: "middleware",
331 |     value: function middleware(eventName, fn) {
332 |       var _this2 = this;
333 | 
334 |       if (Array.isArray(eventName)) {
335 |         name.forEach(function (e) {
336 |           return _this2.middleware(e, fn);
337 |         });
338 |       } else {
339 |         if (!Array.isArray(this._middlewares.get(eventName))) {
340 |           this._middlewares.set(eventName, []);
341 |         }
342 | 
343 |         this._middlewares.get(eventName).push(fn);
344 |       }
345 |     }
346 |   }, {
347 |     key: "removeMiddleware",
348 |     value: function removeMiddleware() {
349 |       var _this3 = this;
350 | 
351 |       var eventName = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
352 | 
353 |       if (eventName !== null) {
354 |         if (Array.isArray(eventName)) {
355 |           name.forEach(function (e) {
356 |             return _this3.removeMiddleware(e);
357 |           });
358 |         } else {
359 |           this._middlewares.delete(eventName);
360 |         }
361 |       } else {
362 |         this._middlewares = new Map();
363 |       }
364 |     }
365 |   }, {
366 |     key: "on",
367 |     value: function on(name, callback) {
368 |       var _this4 = this;
369 | 
370 |       var once = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
371 | 
372 |       if (Array.isArray(name)) {
373 |         name.forEach(function (e) {
374 |           return _this4.on(e, callback);
375 |         });
376 |       } else {
377 |         name = name.toString();
378 |         var split = name.split(/,|, | /);
379 | 
380 |         if (split.length > 1) {
381 |           split.forEach(function (e) {
382 |             return _this4.on(e, callback);
383 |           });
384 |         } else {
385 |           if (!Array.isArray(this._listeners.get(name))) {
386 |             this._listeners.set(name, []);
387 |           }
388 | 
389 |           this._listeners.get(name).push({ once: once, callback: callback });
390 |         }
391 |       }
392 |     }
393 |   }, {
394 |     key: "once",
395 |     value: function once(name, callback) {
396 |       this.on(name, callback, true);
397 |     }
398 |   }, {
399 |     key: "emit",
400 |     value: function emit(name, data) {
401 |       var _this5 = this;
402 | 
403 |       var silent = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
404 | 
405 |       name = name.toString();
406 |       var listeners = this._listeners.get(name);
407 |       var middlewares = null;
408 |       var doneCount = 0;
409 |       var execute = silent;
410 | 
411 |       if (Array.isArray(listeners)) {
412 |         listeners.forEach(function (listener, index) {
413 |           // Start Middleware checks unless we're doing a silent emit
414 |           if (!silent) {
415 |             middlewares = _this5._middlewares.get(name);
416 |             // Check and execute Middleware
417 |             if (Array.isArray(middlewares)) {
418 |               middlewares.forEach(function (middleware) {
419 |                 middleware(data, function () {
420 |                   var newData = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
421 | 
422 |                   if (newData !== null) {
423 |                     data = newData;
424 |                   }
425 |                   doneCount++;
426 |                 }, name);
427 |               });
428 | 
429 |               if (doneCount >= middlewares.length) {
430 |                 execute = true;
431 |               }
432 |             } else {
433 |               execute = true;
434 |             }
435 |           }
436 | 
437 |           // If Middleware checks have been passed, execute
438 |           if (execute) {
439 |             if (listener.once) {
440 |               listeners[index] = null;
441 |             }
442 |             listener.callback(data);
443 |           }
444 |         });
445 | 
446 |         // Dirty way of removing used Events
447 |         while (listeners.indexOf(null) !== -1) {
448 |           listeners.splice(listeners.indexOf(null), 1);
449 |         }
450 |       }
451 |     }
452 |   }]);
453 | 
454 |   return EventEmitter;
455 | }();
456 | 
457 | /* harmony default export */ __webpack_exports__["a"] = (EventEmitter);
458 | 
459 | /***/ })
460 | /******/ ])["default"];
461 | });


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |   <meta charset="utf-8">
  5 |   <meta name="description"
  6 |         content="Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning with LLMs">
  7 |   <meta name="keywords" content="reasoning, nlp, LLMs">
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1">
  9 |   <title>Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning & Coding with LLMs</title>
 10 | 
 11 |   <!-- Global site tag (gtag.js) - Google Analytics -->
 12 |   <!-- <script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script> -->
 13 |   <script>
 14 |     window.dataLayer = window.dataLayer || [];
 15 | 
 16 |     
 17 |     
 18 |     function gtag() {
 19 |       dataLayer.push(arguments);
 20 |     }
 21 | 
 22 |     gtag('js', new Date());
 23 | 
 24 |     gtag('config', 'G-PYVRSFMDRL');
 25 |   </script>
 26 | 
 27 |   <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
 28 |         rel="stylesheet">
 29 | 
 30 |   <link rel="stylesheet" href="./static/css/bulma.min.css">
 31 |   <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
 32 |   <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
 33 |   <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
 34 |   <link rel="stylesheet"
 35 |         href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
 36 |   <link rel="stylesheet" href="./static/css/index.css">
 37 |   <link rel="icon" href="./static/images/favicon.svg">
 38 |   <link rel="stylesheet" href="./static/css/prism.css"/>
 39 |   <link rel="stylesheet" href="plugins/line-highlight/prism-line-highlight.css" data-noprefix="">
 40 |   <link rel="stylesheet" href="plugins/diff-highlight/prism-diff-highlight.css" data-noprefix="">
 41 | 
 42 |   <!-- <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.20.0/components/prism-core.min.js"></script>
 43 |   <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.20.0/plugins/autoloader/prism-autoloader.min.js"></script>
 44 |   <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.20.0/plugins/custom-class/prism-custom-class.min.js"></script>
 45 |    -->
 46 | 
 47 |   <script src="./static/js/prism.js"></script>
 48 |   <script src="static/js/prism-diff-highlight.js"></script>
 49 |   <!-- <script>
 50 |     // Prism.plugins.customClass.prefix('prism-');
 51 |     Prism.plugins.customClass.map({
 52 |     number: 'prism-number',
 53 |     token: 'prism-token',
 54 |     // tag: 'prism-tag'
 55 |   });
 56 |   </script> -->
 57 | 
 58 | 
 59 |   <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
 60 |   <script defer src="./static/js/fontawesome.all.min.js"></script>
 61 |   <script src="./static/js/bulma-carousel.min.js"></script>
 62 |   <script src="./static/js/bulma-slider.min.js"></script>
 63 |   <script src="./static/js/index.js"></script>
 64 | </head>
 65 | <body>
 66 | 
 67 | <!-- <nav class="navbar" role="navigation" aria-label="main navigation">
 68 |   <div class="navbar-brand">
 69 |     <a role="button" class="navbar-burger" aria-label="menu" aria-expanded="false">
 70 |       <span aria-hidden="true"></span>
 71 |       <span aria-hidden="true"></span>
 72 |       <span aria-hidden="true"></span>
 73 |     </a>
 74 |   </div>
 75 |   <div class="navbar-menu">
 76 |     <div class="navbar-start" style="flex-grow: 1; justify-content: center;">
 77 |       <a class="navbar-item" href="https://keunhong.com">
 78 |       <span class="icon">
 79 |           <i class="fas fa-home"></i>
 80 |       </span>
 81 |       </a>
 82 | 
 83 |       <div class="navbar-item has-dropdown is-hoverable">
 84 |         <a class="navbar-link">
 85 |           More Research
 86 |         </a>
 87 |         <div class="navbar-dropdown">
 88 |           <a class="navbar-item" href="https://hypernerf.github.io">
 89 |             HyperNeRF
 90 |           </a>
 91 |           <a class="navbar-item" href="https://nerfies.github.io">
 92 |             Nerfies
 93 |           </a>
 94 |           <a class="navbar-item" href="https://latentfusion.github.io">
 95 |             LatentFusion
 96 |           </a>
 97 |           <a class="navbar-item" href="https://photoshape.github.io">
 98 |             PhotoShape
 99 |           </a>
100 |         </div>
101 |       </div>
102 |     </div>
103 | 
104 |   </div>
105 | </nav> -->
106 | 
107 | 
108 | <section class="hero">
109 |   <div class="hero-body">
110 |     <div class="container is-max-desktop">
111 |       <div class="columns is-centered">
112 |         <div class="column has-text-centered">
113 |           <h1 class="title is-1 publication-title">Let's Sample Step by Step:<br> Adaptive-Consistency for Efficient Reasoning with LLMs</h1>
114 |           <div class="is-size-5 publication-authors">
115 |             <span class="author-block">
116 |               <a href="https://github.com/Pranjal2041">Pranjal Aggarwal</a><sup>1</sup>,</span>
117 |             <span class="author-block">
118 |               <a href="https://madaan.github.io/">Aman Madaan</a><sup>2</sup>,</span>
119 |             <span class="author-block">
120 |               <a href="https://www.cs.cmu.edu/~./yiming/">Yiming Yang</a><sup>2</sup>,
121 |             </span>
122 |             <span class="author-block">
123 |               <a href="https://www.cse.iitd.ac.in/~mausam/">Mausam</a><sup>1</sup>,
124 |             </span>
125 |           </div>
126 | 
127 |           <div class="is-size-5 publication-authors">
128 |             <span class="author-block"><sup>1</sup>Department of Computer Science and Engineering, Indian Institute of Technology Delhi</span>
129 |             <span class="author-block"><sup>2</sup>Language Technologies Institute, School of CS, Carnegie Mellon University</span>
130 |           </div>
131 | 
132 |           <div class="column has-text-centered">
133 |             <div class="publication-links">
134 |               <!-- PDF Link. -->
135 |               <span class="link-block">
136 |                 <a href="https://arxiv.org/pdf/2305.11860.pdf"
137 |                    class="external-link button is-normal is-rounded is-dark">
138 |                   <span class="icon">
139 |                       <i class="ai ai-arxiv"></i>
140 |                   </span>
141 |                   <span>arXiv</span>
142 |                 </a>
143 |               </span>
144 |               <!-- <span class="link-block">
145 |                 <a href="https://huggingface.com/Pranjal2041"
146 |                    class="external-link button is-normal is-rounded is-dark">
147 |                   <span class="icon">
148 |                       <i class="fab fa-terminal"></i>
149 |                   </span>
150 |                   <span>Demo</span>
151 |                 </a>
152 |               </span> -->
153 | 
154 |               <!-- Video Link. -->
155 |               <!-- <span class="link-block">
156 |                 <a href="https://www.youtube.com/watch?v=MrKrnHhk8IA"
157 |                    class="external-link button is-normal is-rounded is-dark">
158 |                   <span class="icon">
159 |                       <i class="fab fa-youtube"></i>
160 |                   </span>
161 |                   <span>Video</span>
162 |                 </a>
163 |               </span> -->
164 |               <!-- Code Link. -->
165 |               <span class="link-block">
166 |                 <a href="https://github.com/Pranjal2041/AdaptiveConsistency"
167 |                    class="external-link button is-normal is-rounded is-dark">
168 |                   <span class="icon">
169 |                       <i class="fab fa-github"></i>
170 |                   </span>
171 |                   <span>Code</span>
172 |                   </a>
173 |               </span>
174 |               <!-- Dataset Link. -->
175 |               <span class="link-block">
176 |                 <a href="https://github.com/Pranjal2041/AdaptiveConsistency/tree/main/outputs"
177 |                    class="external-link button is-normal is-rounded is-dark">
178 |                   <span class="icon">
179 |                       <i class="far fa-images"></i>
180 |                   </span>
181 |                   <span>Data</span>
182 |                   </a>
183 |             </div>
184 | 
185 |           </div>
186 |         </div>
187 |       </div>
188 |     </div>
189 |   </div>
190 | </section>
191 | 
192 | 
193 | <!-- Discarded Code <Section> -->
194 | <!-- <section class="hero teaser">
195 |   <div class="container is-max-desktop">
196 |     <div class="hero-body">
197 |       <video id="teaser" autoplay muted loop playsinline height="100%">
198 |         <source src="./static/videos/adaptive_consistency_teaser.mp4"
199 |                 type="video/mp4">
200 |       </video>
201 |       <h2 class="subtitle has-text-centered">
202 |         <span class="dnerf">TLDR:</span> a sampling method that matches Self-Consistency in performance with up to 6x fewer samples.
203 |       </h2>
204 |     </div>
205 |   </div>
206 | </section> -->
207 | 
208 | <h2 class="subtitle has-text-centered">
209 |   <span class="dnerf">TLDR:</span> a sampling method that matches Self-Consistency in performance with up to 7.9x fewer samples.
210 | </h2>
211 | 
212 | <section class="hero teaser">
213 |   <div class="container is-max-desktop">
214 |       <div class="hero-body">
215 |           <video id="teaser" controls muted playsinline height="100%" poster="static/images/ac_teaser_new.png">
216 |             <!-- Insert GIF here -->
217 |             <source src="./static/videos/AC_teaser_video.mp4"
218 |                     type="video/mp4">
219 |           </video> 
220 |              
221 |           <!-- <figure class="image">
222 |               <img id="animatedGif" src="./static/videos/output.gif">
223 |             </figure> -->
224 | 
225 | 
226 |       </div>
227 | 
228 |   </div>
229 | </section>
230 |  
231 | <!-- <section class="hero is-light is-small">
232 |   <div class="hero-body">
233 |     <div class="container">
234 |       <div id="results-carousel" class="carousel results-carousel">
235 |         <div class="item item-steve">
236 |           <video poster="" id="steve" autoplay controls muted loop playsinline height="100%">
237 |             <source src="./static/videos/steve.mp4"
238 |                     type="video/mp4">
239 |           </video>
240 |         </div>
241 |         <div class="item item-chair-tp">
242 |           <video poster="" id="chair-tp" autoplay controls muted loop playsinline height="100%">
243 |             <source src="./static/videos/chair-tp.mp4"
244 |                     type="video/mp4">
245 |           </video>
246 |         </div>
247 |         <div class="item item-shiba">
248 |           <video poster="" id="shiba" autoplay controls muted loop playsinline height="100%">
249 |             <source src="./static/videos/shiba.mp4"
250 |                     type="video/mp4">
251 |           </video>
252 |         </div>
253 |         <div class="item item-fullbody">
254 |           <video poster="" id="fullbody" autoplay controls muted loop playsinline height="100%">
255 |             <source src="./static/videos/fullbody.mp4"
256 |                     type="video/mp4">
257 |           </video>
258 |         </div>
259 |         <div class="item item-blueshirt">
260 |           <video poster="" id="blueshirt" autoplay controls muted loop playsinline height="100%">
261 |             <source src="./static/videos/blueshirt.mp4"
262 |                     type="video/mp4">
263 |           </video>
264 |         </div>
265 |         <div class="item item-mask">
266 |           <video poster="" id="mask" autoplay controls muted loop playsinline height="100%">
267 |             <source src="./static/videos/mask.mp4"
268 |                     type="video/mp4">
269 |           </video>
270 |         </div>
271 |         <div class="item item-coffee">
272 |           <video poster="" id="coffee" autoplay controls muted loop playsinline height="100%">
273 |             <source src="./static/videos/coffee.mp4"
274 |                     type="video/mp4">
275 |           </video>
276 |         </div>
277 |         <div class="item item-toby">
278 |           <video poster="" id="toby" autoplay controls muted loop playsinline height="100%">
279 |             <source src="./static/videos/toby2.mp4"
280 |                     type="video/mp4">
281 |           </video>
282 |         </div>
283 |       </div>
284 |     </div>
285 |   </div>
286 | </section> -->
287 | 
288 | 
289 | <section class="section">
290 |   <div class="container is-max-desktop">
291 |     <!-- Abstract. -->
292 |     <div class="columns is-centered has-text-centered">
293 |       <div class="column is-four-fifths">
294 |         <h2 class="title is-3">Abstract</h2>
295 |         <div class="content has-text-justified is-size-4">
296 |           <p>
297 |             A popular approach for improving the correctness of output from large language models (LLMs) is Self-Consistency -- poll the LLM multiple times and output the most frequent solution. Existing Self-Consistency techniques always draw a <i>constant</i> number of samples per question, where a better approach will be to non-uniformly distribute the available budget, based on the amount of agreement in the samples drawn so far.
298 |           </p>
299 |           <p>
300 |             In response, we introduce <b>Adaptive-Consistency</b>, a cost-efficient, model-agnostic technique that <i>dynamically</i> adjusts the number of samples per question using a lightweight stopping criterion. Our experiments over 13 datasets and two LLMs demonstrate that Adaptive-Consistency reduces sample budget by up to 7.9 times with an average accuracy drop of less than 0.1%.
301 |           </p>
302 | 
303 | 
304 | 
305 |         </div>
306 |       </div>
307 |     </div>
308 |     <!--/ Abstract. -->
309 | 
310 |     <!-- Paper video. -->
311 |     <!-- <div class="columns is-centered has-text-centered">
312 |       <div class="column is-four-fifths">
313 |         <h2 class="title is-3">Video</h2>
314 |         <div class="publication-video">
315 |           <iframe src="https://www.youtube.com/embed/MrKrnHhk8IA?rel=0&amp;showinfo=0"
316 |                   frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
317 |         </div>
318 |       </div>
319 |     </div> -->
320 |     <!--/ Paper video. -->
321 |   </div>
322 | </section>
323 | 
324 | <section class="section" id="highlights">
325 |   <div class="container is-max-desktop">
326 |       <div class="columns is-centered has-text-centered">
327 |           <div class="column ">
328 |           
329 |             <h2 class="title is-3">Key-Highlights</h2>
330 |             
331 |             <!-- List View using bulma template -->
332 |             <!-- <div class="columns is-centered">
333 |               <div class="column is-four-fifths"> -->
334 |                 <div class="content has-text-justified is-size-4">
335 |                   <ul>
336 |                     <li>🚀 <strong>Dynamic Sampling:</strong> Our method smartly adjusts the number of samples per question using a lightweight stopping criterion.</li>
337 |                     <li>📊<strong>Cost-Effective Performance:</strong> Adaptive-Consistency reduces the sample budget by up to <strong>6.0 times</strong> with an average accuracy drop of less than <strong>0.1%</strong>.</li>
338 |                     <li>🔌<strong>Off-the-Shelf Solution:</strong> Improve accuracy or reduce cost right away with just 2-3 lines of code changes. No additional training required.</li>
339 |                     <li>🌐<strong>Compatible with Pre-trained LLMs:</strong> Our method works seamlessly with popular large language models like GPT-3.</li>
340 |                   </ul>
341 |                 </div>
342 |               <!-- </div>
343 | 
344 |             </div> -->
345 |             <!--/ List View using bulma template -->
346 |           </div>
347 |       </div>
348 |   </div>
349 | </section>
350 | <!-- Results Section -->
351 | <section class="section" id="results">
352 |   <div class="container is-max-desktop">
353 |       <div class="columns is-centered has-text-centered">
354 |           <div class="column is-four-fifths">
355 |               <h2 class="title is-3">Results Summary</h2>
356 |               <strong><span class="pal">Adaptive-Consistency</span> outperforms Self-Consistency on 13 benchmarks, over varying domain, difficulty, answer type.</strong>
357 |               <!-- Add vertical space -->
358 |               <br>
359 |               <br>
360 |               <!--row1, colored objects-->
361 |               <figure class="image is-64by64">
362 |                   <img src="static/images/ac_results_math.png">
363 |                   <strong>Mathematical Reasoning</strong>
364 |               </figure>
365 |               <hr>
366 |               <!--row2, math reasoning-->
367 |               <figure class="image">
368 |                   <img src="static/images/ac_results_logic.png">
369 |                   <strong>Logical Reasoning</strong>
370 |               </figure>
371 |               
372 |               <hr>
373 | 
374 |               <!--row3, algorithmic reasoning-->
375 |               <figure>
376 |                   <img src="static/images/ac_results_misc.png">
377 |                   <br>
378 |                   <strong>Other Reasoning Datasets</strong>
379 |               </figure>
380 |           </div>
381 |       </div>
382 |   </div>
383 | </section>
384 | 
385 | <!-- <pre class="language-diff-python content" tabindex="0"><code class="diff-highlight language-diff-python">
386 |   <span class="tokens coord">@@ -4,6 +4,5 @@</span>
387 |   <span class="prism-token deleted-sign deleted">
388 |     <span class="token prefix deleted">-</span>    let foo = bar.baz([1, 2, 3]);
389 |   <span class="token prefix deleted">-</span>   
390 |    foo = foo + 1;
391 |   </span><span class="token inserted-sign inserted"><span class="token prefix inserted">+</span>    const foo = bar.baz([1, 2, 3]) + 1;
392 |   </span><span class="token unchanged"><span class="token prefix unchanged"> </span>    console.log(`foo: ${foo}`);</span></code></pre> -->
393 | 
394 | 
395 | <section class="section" id="library">
396 |   <div class="container is-max-desktop">
397 |     <div class="box has-background-light">
398 |       <h2 class="title is-3">Using Adaptive-Consistency in your code</h2>
399 |       Using Adaptive-Consistency in your code takes only 2-3 line changes.
400 | 
401 | 
402 |       <!-- # Create two nested boxes. -->
403 |       <div class="box has-background-light">
404 |         <!-- <div class="box has-background-light"> -->
405 |           <div class="content has-text-justified is-size-6">
406 |             <p>
407 |               <strong>1. Modifying Self-Consistency</strong>
408 |             </p>
409 |             <pre class=" language-python" data-src="static/js/prism-line-highlight.js" data-line="1,4,8,9"><code class=" language-python diff-highlight language-diff">from adaptive_consistency import AC
410 | 
411 | outputs = []
412 | ac = AC(stop_criteria = 'beta')
413 | for input in dataset:
414 |     output = openai.Completion.Create(input, *args, **kwargs)
415 |     outputs.append(output)
416 |     if ac.should_stop(outputs):
417 |         break
418 |                 </code></pre>
419 |           </div>
420 |       <!-- </div> -->
421 |     </div>
422 | 
423 |         <div class="box has-background-light">
424 |           <!-- <div class="box has-background-light"> -->
425 |             <div class="content has-text-justified is-size-6">
426 |               <p>
427 |                 <strong>2. Modifying Vanilla Prompting</strong>
428 |               </p>
429 |               <pre class=" language-python" data-src="static/js/prism-line-highlight.js" data-line="1,3,4"><code class=" language-python diff-highlight language-diff">from adaptive_consistency import AC
430 | 
431 | output = sampling_function(*args, **kwargs)
432 | output = AC(stop_criteria = 'beta').eval_loop(sampling_function, *args, **kwargs)
433 |             </code></pre>
434 |             </div>
435 |         <!-- </div> -->
436 |       </div>
437 |   </div>
438 | </section>
439 | 
440 | 
441 | 
442 | 
443 | <section class="section" id="BibTeX">
444 |   <div class="container is-max-desktop content">
445 |     <h2 class="title">BibTeX</h2>
446 |     <pre><code>@misc{aggarwal2023lets,
447 |       title={Let's Sample Step by Step: Adaptive-Consistency for Efficient Reasoning with LLMs}, 
448 |       author={Pranjal Aggarwal and Aman Madaan and Yiming Yang and Mausam},
449 |       year={2023},
450 |       eprint={2305.11860},
451 |       archivePrefix={arXiv},
452 |       primaryClass={cs.CL}
453 | }</code></pre>
454 |   </div>
455 | </section>
456 | 
457 | 
458 | <footer class="footer">
459 |   <div class="container">
460 |     <div class="content has-text-centered">
461 |       <a class="icon-link"
462 |          href="https://arxiv.org/pdf/2305.11860.pdf">
463 |         <i class="fas fa-file-pdf"></i>
464 |       </a>
465 |       <a class="icon-link" href="https://github.com/Pranjal2041/AdaptiveConsistency" class="external-link" disabled>
466 |         <i class="fab fa-github"></i>
467 |       </a>
468 |     </div>
469 |     <div class="columns is-centered">
470 |       <div class="column is-8">
471 |         <div class="content">
472 |           <p>
473 |             Template adapted from <a href="http://nerfies.github.io/">Nerfies</a> by Keunhong Park et
474 |             al.
475 |             and uses <a href="https://bulma.io/">Bulma</a>.
476 |           </p>
477 |         </div>
478 |       </div>
479 |     </div>
480 |   </div>
481 | </footer>
482 | 
483 | </body>
484 | </html>
485 | 


--------------------------------------------------------------------------------