├── .gitignore
├── LICENSE
├── README.md
├── assets
    └── results.png
├── config.json
├── package.json
├── populate_index.py
├── requirements.txt
├── src
    ├── __init__.py
    ├── engines
    │   ├── __init__.py
    │   ├── engine_google_vertex.py
    │   ├── engine_llamacpp.py
    │   └── engine_mockup.py
    ├── evals
    │   ├── __init__.py
    │   ├── assets
    │   │   └── sample_bill.jpg
    │   ├── components
    │   │   ├── __init__.py
    │   │   ├── api_builder.py
    │   │   ├── factorization.py
    │   │   └── paper.py
    │   ├── eval_computation_graphs.py
    │   ├── eval_in_context_associations.py
    │   ├── eval_logic_components.py
    │   ├── eval_multimodal_bindings.py
    │   ├── eval_program_synthesis.py
    │   └── snippets
    │   │   ├── code_api_builder.txt
    │   │   ├── code_api_builder2.txt
    │   │   ├── code_api_builder_website_result.txt
    │   │   ├── einstein_puzzle.txt
    │   │   ├── einstein_puzzle_human_solution.txt
    │   │   ├── einstein_puzzle_logic_solution.txt
    │   │   ├── formulations_dsl_rewriting.txt
    │   │   ├── google_organic_results_20240111_query=What-is-sulfuric-acid.txt
    │   │   ├── google_organic_results_20240121_query=Search-for-U-235.txt
    │   │   ├── jays_brother_human_solution.txt
    │   │   ├── jays_brother_trajectories.txt
    │   │   ├── latex_templating_output.txt
    │   │   ├── latex_templating_problem.txt
    │   │   ├── latex_templating_solution_1.txt
    │   │   ├── latex_templating_solution_2.txt
    │   │   ├── paper
    │   │       ├── bib
    │   │       │   └── related_work
    │   │       │   │   ├── laird87.txt
    │   │       │   │   ├── mccarthy06.txt
    │   │       │   │   ├── newell56.txt
    │   │       │   │   ├── newell57.txt
    │   │       │   │   └── newell72.txt
    │   │       ├── method
    │   │       │   └── symbolicai_docs.txt
    │   │       ├── ref
    │   │       │   ├── reference_abstract.txt
    │   │       │   ├── reference_paper.txt
    │   │       │   ├── reference_section_framework.txt
    │   │       │   ├── reference_section_relatedwork.txt
    │   │       │   └── reference_title.txt
    │   │       └── traj
    │   │       │   ├── reference_abstract.txt
    │   │       │   ├── reference_paper.txt
    │   │       │   ├── reference_section_framework.txt
    │   │       │   ├── reference_section_relatedwork.txt
    │   │       │   └── reference_title.txt
    │   │   ├── richard_feynman_summary.txt
    │   │   ├── sample_bill.txt
    │   │   └── wiki_page_20240121.txt
    ├── func.py
    ├── report.py
    └── utils.py
└── test.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | src/evals/.DS_Store
162 | src/.DS_Store
163 | .DS_Store
164 | experiments
165 | symai.config.json
166 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2023, ExtensityAI
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Benchmark
 2 | 
 3 | ## SymbolicAI: A framework for logic-based approaches combining generative models and solvers
 4 | 
 5 | We introduce SymbolicAI, a versatile and modular framework employing a logic-based approach to concept learning and flow management in generative processes. SymbolicAI enables the seamless integration of generative models with a diverse range of solvers by treating large language models (LLMs) as semantic parsers that execute tasks based on both natural and formal language instructions, thus bridging the gap between symbolic reasoning and generative AI. We leverage probabilistic programming principles to tackle complex tasks, and utilize differentiable and classical programming paradigms with their respective strengths. The framework introduces a set of polymorphic, compositional, and self-referential operations for data stream manipulation, aligning LLM outputs with user objectives. As a result, we can transition between the capabilities of various foundation models endowed with zero- and few-shot learning capabilities and specialized, fine-tuned models or solvers proficient in addressing specific problems. In turn, the framework facilitates the creation and evaluation of explainable computational graphs. We conclude by introducing a quality measure and its empirical score for evaluating these computational graphs, and propose a benchmark that compares various state-of-the-art LLMs across a set of complex workflows. We refer to the empirical score as the "Vector Embedding for Relational Trajectory Evaluation through Cross-similarity", or VERTEX score for short. The SymbolicAI framework codebase is available [here](https://github.com/ExtensityAI/symbolicai).
 6 | 
 7 | <img src="./assets/results.png" alt="Results" width="1000"/>
 8 | 
 9 | ## Installation
10 | 
11 | ### Requirements
12 | 
13 | Install dependencies.
14 | 
15 | ```bash
16 | pip install "symbolicai[all]"
17 | pip install -r requirements.txt
18 | ```
19 | 
20 | Install LlamaCpp backend.
21 | 
22 | ```bash
23 | sympkg i ExtensityAI/llamacpp
24 | ```
25 | 
26 | Then follow the instructions in the [ExtensityAI/llamacpp](https://github.com/ExtensityAI/llamacpp) repository to install and run the LlamaCpp backend with various HuggingFace models.
27 | 
28 | Install embeddings backend.
29 | 
30 | ```bash
31 | sympkg i ExtensityAI/embeddings
32 | ```
33 | 
34 | ## Configuration
35 | 
36 | Set the respective `config.json` properties for engine API keys and local models as shown below, and run the local models with the configured port and host name.
37 | 
38 | ```json
39 | {
40 |     "gpt4": {
41 |         "api_key": "",
42 |         "model": "gpt-4-1106-preview"
43 |     },
44 |     "gpt3.5": {
45 |         "api_key": "",
46 |         "model": "gpt-3.5-turbo-1106"
47 |     },
48 |     "gemini": {
49 |         "api_key": "",
50 |         "model": "gemini-pro"
51 |     },
52 |     "llama": {
53 |         "host": "http://localhost",
54 |         "port": 8080
55 |     },
56 |     ...
57 | }
58 | ```
59 | 
60 | ## Usage
61 | 
62 | Run the full benchmark.
63 | 
64 | ```bash
65 | python test.py --context_associations --program_synthesis --multimodal_bindings --logic_components --computation_graphs
66 | ```
67 | 
68 | This will run all the evaluations in the benchmark.
69 | 
70 | ## Cite us
71 | 
72 | ```bibtex
73 | @article{
74 |     Dinu:24,
75 |     title={SymbolicAI: A framework for logic-based approaches combining generative models and solvers},
76 |     author={Marius–Constantin Dinu and Claudiu Leoveanu–Condrei and Markus Holzleitner and Werner Zellinger and Sepp Hochreiter},
77 |     year={2024},
78 |     eprint={2402.00854},
79 |     archivePrefix={arXiv},
80 |     primaryClass={cs.LG},
81 |     url={https://arxiv.org/abs/2402.00854}
82 | }
83 | ```
84 | 


--------------------------------------------------------------------------------
/assets/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExtensityAI/benchmark/24d3e93681d454b379d7f1e787b2a2284c41922f/assets/results.png


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "gpt4": {
 3 |         "api_key": "",
 4 |         "model": "gpt-4-1106-preview"
 5 |     },
 6 |     "gpt3.5": {
 7 |         "api_key": "",
 8 |         "model": "gpt-3.5-turbo"
 9 |     },
10 |     "gemini": {
11 |         "api_key": "",
12 |         "model": "gemini-1.0-pro"
13 |     },
14 |     "llama": {
15 |         "host": "http://localhost",
16 |         "port": 8080
17 |     },
18 |     "zephyr": {
19 |         "host": "http://localhost",
20 |         "port": 8081
21 |     },
22 |     "mistral": {
23 |         "host": "http://localhost",
24 |         "port": 8082
25 |     },
26 |     "llama3_8B": {
27 |         "host": "http://localhost",
28 |         "port": 8083
29 |     },
30 |     "llama3_70B": {
31 |         "host": "http://localhost",
32 |         "port": 8084
33 |     },
34 |     "gemini1.5": {
35 |         "api_key": "",
36 |         "model": "gemini-1.5-pro-latest"
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.0.1",
 3 |     "name": "ExtensityAI/benchmark",
 4 |     "description": "Evaluation of the neuro-symbolic framework.",
 5 |     "expressions": [
 6 |         {
 7 |             "module": "src/func",
 8 |             "type": "EvaluateBenchmark"
 9 |         }
10 |     ],
11 |     "run": {
12 |         "module": "src/func",
13 |         "type": "EvaluateBenchmark"
14 |     },
15 |     "dependencies": [
16 |         "ExtensityAI/llamacpp",
17 |         "ExtensityAI/tuning"
18 |     ]
19 | }


--------------------------------------------------------------------------------
/populate_index.py:
--------------------------------------------------------------------------------
 1 | from symai.shellsv import retrieval_augmented_indexing
 2 | from symai.functional import EngineRepository
 3 | from symai.backend.engines.index.engine_pinecone import PineconeIndexEngine
 4 | from symai.backend.engines.index.engine_vectordb import VectorDBIndexEngine
 5 | 
 6 | 
 7 | def run():
 8 |     # Register embeddings engine globally for all Symbols from plugin
 9 |     EngineRepository.register_from_plugin('embedding', plugin='ExtensityAI/embeddings', kwargs={'model': 'all-mpnet-base-v2'}, allow_engine_override=True)
10 |     # EngineRepository.register('index', PineconeIndexEngine(index_name='dataindex',
11 |     #                                                        index_dims=768,
12 |     #                                                        index_top_k=5))
13 |     vectorDB = VectorDBIndexEngine(index_name='dataindex',
14 |                                    index_dims=768,
15 |                                    index_top_k=5)
16 |     EngineRepository.register('index', vectorDB)
17 |     # insert into the index
18 |     retrieval_augmented_indexing('!src/evals/snippets', index_name='dataindex')
19 |     # # need to persist in-memory to disk
20 |     vectorDB.save()
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     run()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | backoff
2 | seaborn
3 | google-cloud-aiplatform
4 | google-generativeai
5 | anthropic
6 | wandb
7 | parso
8 | sympy
9 | z3-solver


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExtensityAI/benchmark/24d3e93681d454b379d7f1e787b2a2284c41922f/src/__init__.py


--------------------------------------------------------------------------------
/src/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExtensityAI/benchmark/24d3e93681d454b379d7f1e787b2a2284c41922f/src/engines/__init__.py


--------------------------------------------------------------------------------
/src/engines/engine_google_vertex.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import google.generativeai as genai
  3 | 
  4 | from typing import List, Optional
  5 | 
  6 | from symai.backend.base import Engine
  7 | from symai.backend.settings import SYMAI_CONFIG
  8 | 
  9 | 
 10 | logging.getLogger("requests").setLevel(logging.ERROR)
 11 | logging.getLogger("urllib").setLevel(logging.ERROR)
 12 | logging.getLogger("httpx").setLevel(logging.ERROR)
 13 | logging.getLogger("httpcore").setLevel(logging.ERROR)
 14 | 
 15 | 
 16 | class GoogleGeminiEngine(Engine):
 17 |     def __init__(self, api_key: Optional[str] = None, model: Optional[str] = None):
 18 |         super().__init__()
 19 |         logger              = logging.getLogger('vertexai')
 20 |         logger.setLevel(logging.WARNING)
 21 |         self.config         = SYMAI_CONFIG
 22 |         # Initialize the Vertex AI project
 23 |         self.api_key        = api_key
 24 |         genai.configure(api_key=api_key)
 25 |         # Create a generative model instance from Vertex AI
 26 |         self.model = genai.GenerativeModel(model_name=model)
 27 |         self.max_tokens     = 32_760 - 100 # @NOTE: account for tolerance.
 28 |         self.seed           = None
 29 |         self.except_remedy  = None
 30 | 
 31 |     def id(self) -> str:
 32 |         if   self.config['NEUROSYMBOLIC_ENGINE_MODEL'] and \
 33 |              self.config['NEUROSYMBOLIC_ENGINE_MODEL'].startswith('gemini'):
 34 |             return 'neurosymbolic'
 35 |         return super().id() # default to unregistered
 36 | 
 37 |     def command(self, *args, **kwargs):
 38 |         super().command(*args, **kwargs)
 39 |         if 'NEUROSYMBOLIC_ENGINE_MODEL' in kwargs:
 40 |             self.model     = kwargs['NEUROSYMBOLIC_ENGINE_MODEL']
 41 |         if 'seed' in kwargs:
 42 |             self.seed      = kwargs['seed']
 43 |         if 'except_remedy' in kwargs:
 44 |             self.except_remedy = kwargs['except_remedy']
 45 | 
 46 |     def compute_remaining_tokens(self, prompts: list) -> int:
 47 |         return int((8_192) * 0.99) # @NOTE: account for tolerance.
 48 | 
 49 |     def forward(self, argument):
 50 |         kwargs              = argument.kwargs
 51 |         prompts_            = argument.prop.prepared_input
 52 | 
 53 |         # send prompt to GPT-X Chat-based
 54 |         stop                = kwargs['stop'] if 'stop' in kwargs else None
 55 |         model               = kwargs['model'] if 'model' in kwargs else self.model
 56 |         seed                = kwargs['seed'] if 'seed' in kwargs else self.seed
 57 | 
 58 |         # convert map to list of strings
 59 |         max_tokens          = kwargs['max_tokens'] if 'max_tokens' in kwargs else self.compute_remaining_tokens(prompts_)
 60 |         temperature         = kwargs['temperature'] if 'temperature' in kwargs else 0.1
 61 |         top_p               = kwargs['top_p'] if 'top_p' in kwargs else 1
 62 |         top_k               = kwargs['top_k'] if 'top_k' in kwargs else 40
 63 |         except_remedy       = kwargs['except_remedy'] if 'except_remedy' in kwargs else self.except_remedy
 64 | 
 65 |         try:
 66 |             res = model.generate_content(
 67 |                 prompts_,
 68 |                 generation_config={
 69 |                     "temperature": temperature,
 70 |                     "max_output_tokens": max_tokens,
 71 |                     "top_p": top_p,
 72 |                     "top_k": top_k
 73 |                 }
 74 |             )
 75 | 
 76 |         except Exception as e:
 77 |             callback = model.generate_content
 78 |             kwargs['model'] = kwargs['model'] if 'model' in kwargs else self.model
 79 |             if except_remedy is not None:
 80 |                 res = except_remedy(self, e, callback, argument)
 81 |             else:
 82 |                 raise e
 83 | 
 84 |         metadata = {}
 85 |         output   = [res.text]
 86 |         return output, metadata
 87 | 
 88 |     def prepare(self, argument):
 89 |         if argument.prop.raw_input:
 90 |             if not argument.prop.processed_input:
 91 |                 raise ValueError('Need to provide a prompt instruction to the engine if raw_input is enabled.')
 92 |             argument.prop.prepared_input = str(argument.prop.processed_input)
 93 |             return
 94 | 
 95 |         _non_verbose_output = """[META INSTRUCTIONS START]\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n"""
 96 |         user:   str = ""
 97 |         system: str = ""
 98 | 
 99 |         if argument.prop.disable_verbose_output_suppression:
100 |             system += _non_verbose_output
101 |         system = f'{system}\n' if system and len(system) > 0 else ''
102 | 
103 |         ref = argument.prop.instance
104 |         static_ctxt, dyn_ctxt = ref.global_context
105 |         if len(static_ctxt) > 0:
106 |             system += f"[STATIC CONTEXT]\n{static_ctxt}\n\n"
107 | 
108 |         if len(dyn_ctxt) > 0:
109 |             system += f"[DYNAMIC CONTEXT]\n{dyn_ctxt}\n\n"
110 | 
111 |         payload = argument.prop.payload
112 |         if argument.prop.payload:
113 |             system += f"[ADDITIONAL CONTEXT]\n{str(payload)}\n\n"
114 | 
115 |         examples: List[str] = argument.prop.examples
116 |         if examples and len(examples) > 0:
117 |             system += f"[EXAMPLES]\n{str(examples)}\n\n"
118 | 
119 |         if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
120 |             val = str(argument.prop.prompt)
121 |             system += f"[INSTRUCTION]\n{val}"
122 | 
123 |         suffix: str = str(argument.prop.processed_input)
124 | 
125 |         if '[SYSTEM_INSTRUCTION::]: <<<' in suffix and argument.prop.parse_system_instructions:
126 |             parts = suffix.split('\n>>>\n')
127 |             # first parts are the system instructions
128 |             c = 0
129 |             for i, p in enumerate(parts):
130 |                 if 'SYSTEM_INSTRUCTION' in p:
131 |                     system += f"{p}\n"
132 |                     c += 1
133 |                 else:
134 |                     break
135 |             # last part is the user input
136 |             suffix = '\n>>>\n'.join(parts[c:])
137 |         user += f"{suffix}"
138 | 
139 |         if argument.prop.template_suffix:
140 |             user += f"\n[[PLACEHOLDER]]\n{str(argument.prop.template_suffix)}\n\n"
141 |             user += f"Only generate content for the placeholder `[[PLACEHOLDER]]` following the instructions and context information. Do NOT write `[[PLACEHOLDER]]` or anything else in your output.\n\n"
142 | 
143 |         argument.prop.prepared_input = f'---------SYSTEM BEHAVIOR--------\n{system}\n\n---------USER REQUEST--------\n{user}'
144 | 


--------------------------------------------------------------------------------
/src/engines/engine_llamacpp.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import requests
  3 | import json
  4 | 
  5 | from typing import List
  6 | from requests_toolbelt.multipart.encoder import MultipartEncoder
  7 | 
  8 | from symai.backend.base import Engine
  9 | from symai.backend.settings import SYMAI_CONFIG
 10 | from symai.symbol import Result
 11 | 
 12 | 
 13 | class LLaMAResult(Result):
 14 |     def __init__(self, value=None, *args, **kwargs):
 15 |         super().__init__(value, *args, **kwargs)
 16 |         self._value = value
 17 |         self.error  = None
 18 |         self.raw    = value
 19 |         self._perse_result()
 20 | 
 21 |     def _perse_result(self):
 22 |         val        = json.loads(self.value)
 23 |         self.value = val
 24 |         if 'error' in val:
 25 |             self.error = val['error']
 26 |         if 'content' in val:
 27 |             self.value = val['content']
 28 | 
 29 | 
 30 | class LLaMACppClientEngine(Engine):
 31 |     def __init__(self, host: str = 'http://localhost', port: int = 8080, uri: str = 'completion', timeout: int = 600):
 32 |         super().__init__()
 33 |         logger = logging.getLogger('nesy_client')
 34 |         logger.setLevel(logging.WARNING)
 35 |         self.config         = SYMAI_CONFIG
 36 |         self.host           = host
 37 |         self.port           = port
 38 |         self.uri            = uri
 39 |         self.timeout        = timeout
 40 |         self.seed           = None
 41 |         self.except_remedy  = None
 42 | 
 43 |     def id(self) -> str:
 44 |         if  self.config['CAPTION_ENGINE_MODEL'] and \
 45 |             'llamacpp' in self.config['CAPTION_ENGINE_MODEL']:
 46 |             return 'neurosymbolic'
 47 |         return super().id() # default to unregistered
 48 | 
 49 |     def command(self, *args, **kwargs):
 50 |         super().command(*args, **kwargs)
 51 |         if 'seed' in kwargs:
 52 |             self.seed      = kwargs['seed']
 53 |         if 'except_remedy' in kwargs:
 54 |             self.except_remedy = kwargs['except_remedy']
 55 | 
 56 |     @property
 57 |     def max_tokens(self):
 58 |         return 2048
 59 | 
 60 |     def compute_remaining_tokens(self, prompts: list) -> int:
 61 |         return int((1024) * 0.99) # @NOTE: account for tolerance.
 62 | 
 63 |     def forward(self, argument):
 64 |         prompts             = argument.prop.prepared_input
 65 |         kwargs              = argument.kwargs
 66 | 
 67 |         model_kwargs = {}
 68 | 
 69 |         # convert map to list of strings
 70 |         stop                = kwargs['stop'] if 'stop' in kwargs else None
 71 |         seed                = kwargs['seed'] if 'seed' in kwargs else self.seed
 72 |         max_tokens          = kwargs['max_tokens'] if 'max_tokens' in kwargs else self.compute_remaining_tokens(prompts)
 73 |         temperature         = kwargs['temperature'] if 'temperature' in kwargs else 0.7
 74 |         top_p               = kwargs['top_p'] if 'top_p' in kwargs else 0.95
 75 |         top_k               = kwargs['top_k'] if 'top_k' in kwargs else 40
 76 |         except_remedy       = kwargs['except_remedy'] if 'except_remedy' in kwargs else self.except_remedy
 77 | 
 78 |         if stop is not None:
 79 |             model_kwargs['stop'] = stop
 80 |         if seed is not None:
 81 |             model_kwargs['seed'] = seed
 82 |         if max_tokens is not None:
 83 |             model_kwargs['n_predict'] = max_tokens
 84 |         if temperature is not None:
 85 |             model_kwargs['temperature'] = temperature
 86 |         if top_p is not None:
 87 |             model_kwargs['top_p'] = top_p
 88 |         if top_k is not None:
 89 |             model_kwargs['top_k'] = top_k
 90 | 
 91 |         # Create multipart/form-data payload
 92 |         # Since the LLaMA server expects a JSON payload, we construct JSON data
 93 |         prompt  = prompts[0] if prompts[0] is not None and len(prompts[0]) > 0 else ' ' # @NOTE: space char to produce at least empty prompt and avoid exception on server side
 94 |         payload = {
 95 |             'prompt': prompt,
 96 |             **model_kwargs
 97 |         }
 98 |         headers = {'Content-Type': 'application/json'}
 99 |         api     = f'{self.host}:{self.port}/{self.uri}'
100 |         try:
101 |             rsp = requests.post(api, json=payload, headers=headers, timeout=self.timeout)
102 |             # Verify the success of the response
103 |             rsp.raise_for_status()
104 |             res = rsp.text
105 |         except requests.exceptions.HTTPError as e:
106 |             if except_remedy is None:
107 |                 self.logger.error(f"HTTP error occurred: {e}")
108 |                 # Here you can add more sophisticated error handling and recovery
109 |                 raise e
110 |             # Retry the request or handle it based on the exception remedy provided
111 |             callback = lambda: requests.post(api, data=payload, headers=headers, timeout=self.timeout)
112 |             res = except_remedy(self, e, callback, argument)
113 |         except requests.exceptions.RequestException as e:
114 |             # Handle non-HTTP exceptions (e.g., network errors, timeout)
115 |             if except_remedy is None:
116 |                 self.logger.error(f"Request error occurred: {e}")
117 |                 raise e
118 |             # Retry the request or handle it based on the exception remedy provided
119 |             callback = lambda: requests.post(api, data=payload, headers=headers, timeout=self.timeout)
120 |             res = except_remedy(self, e, callback, argument)
121 |         except Exception as e:
122 |             # Handle unforeseen exceptions
123 |             self.logger.error(f"An unexpected error occurred: {e}")
124 |             raise e
125 | 
126 |         metadata = {}
127 | 
128 |         try:
129 |             res = LLaMAResult(res)
130 |         except json.JSONDecodeError:
131 |             # Handle a JSON parse error specifically
132 |             self.logger.error(f"JSON parse error: Invalid response {res}")
133 |             raise Exception(f"Invalid response: {res}")
134 | 
135 |         rsp = [res]
136 |         output = rsp if isinstance(prompts, list) else rsp[0]
137 |         return output, metadata
138 | 
139 |     def prepare(self, argument):
140 |         if argument.prop.raw_input:
141 |             if not argument.prop.processed_input:
142 |                 raise ValueError('Need to provide a prompt instruction to the engine if raw_input is enabled.')
143 |             argument.prop.prepared_input = [str(argument.prop.processed_input)]
144 |             return
145 | 
146 |         _non_verbose_output = """[META INSTRUCTIONS START]\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n"""
147 |         user:   str = ""
148 |         system: str = ""
149 | 
150 |         if argument.prop.suppress_verbose_output:
151 |             system += _non_verbose_output
152 |         system = f'{system}\n' if system and len(system) > 0 else ''
153 | 
154 |         ref = argument.prop.instance
155 |         static_ctxt, dyn_ctxt = ref.global_context
156 |         if len(static_ctxt) > 0:
157 |             system += f"[STATIC CONTEXT]\n{static_ctxt}\n\n"
158 | 
159 |         if len(dyn_ctxt) > 0:
160 |             system += f"[DYNAMIC CONTEXT]\n{dyn_ctxt}\n\n"
161 | 
162 |         payload = argument.prop.payload
163 |         if argument.prop.payload:
164 |             system += f"[ADDITIONAL CONTEXT]\n{str(payload)}\n\n"
165 | 
166 |         examples: List[str] = argument.prop.examples
167 |         if examples and len(examples) > 0:
168 |             system += f"[EXAMPLES]\n{str(examples)}\n\n"
169 | 
170 |         if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
171 |             val = str(argument.prop.prompt)
172 |             # in this engine, instructions are considered as user prompts
173 |             user += f"[INSTRUCTION]\n{val}"
174 | 
175 |         suffix: str = str(argument.prop.processed_input)
176 | 
177 |         if '[SYSTEM_INSTRUCTION::]: <<<' in suffix and argument.prop.parse_system_instructions:
178 |             parts = suffix.split('\n>>>\n')
179 |             # first parts are the system instructions
180 |             c = 0
181 |             for i, p in enumerate(parts):
182 |                 if 'SYSTEM_INSTRUCTION' in p:
183 |                     system += f"{p}\n"
184 |                     c += 1
185 |                 else:
186 |                     break
187 |             # last part is the user input
188 |             suffix = '\n>>>\n'.join(parts[c:])
189 |         user += f"{suffix}"
190 | 
191 |         if argument.prop.template_suffix:
192 |             user += f"\n[[PLACEHOLDER]]\n{str(argument.prop.template_suffix)}\n\n"
193 |             user += f"Only generate content for the placeholder `[[PLACEHOLDER]]` following the instructions and context information. Do NOT write `[[PLACEHOLDER]]` or anything else in your output.\n\n"
194 | 
195 |         argument.prop.prepared_input = [f'---------SYSTEM BEHAVIOR--------\n{system}\n\n---------USER REQUEST--------\n{user}']
196 | 


--------------------------------------------------------------------------------
/src/engines/engine_mockup.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | 
  3 | from box import Box
  4 | from typing import List
  5 | 
  6 | from symai.backend.base import Engine
  7 | from symai.backend.settings import SYMAI_CONFIG
  8 | from symai.symbol import Result
  9 | 
 10 | from ..utils import RANDOM_RESPONSE
 11 | 
 12 | 
 13 | class MockupResult(Result):
 14 |     def __init__(self, value=None, *args, **kwargs):
 15 |         super().__init__(value, *args, **kwargs)
 16 |         self._value = value
 17 |         self.error  = None
 18 |         self.raw    = value
 19 | 
 20 | 
 21 | class MockupEngine(Engine):
 22 |     def __init__(self, verbose: bool = False):
 23 |         super().__init__()
 24 |         self.logger         = logging.getLogger('mockup')
 25 |         self.logger.setLevel(logging.DEBUG)
 26 |         self.config         = SYMAI_CONFIG
 27 |         self.seed           = None
 28 |         self.except_remedy  = None
 29 |         self.verbose        = verbose
 30 | 
 31 |     def id(self) -> str:
 32 |         return super().id() # default to unregistered
 33 | 
 34 |     def command(self, *args, **kwargs):
 35 |         super().command(*args, **kwargs)
 36 | 
 37 |     @property
 38 |     def max_tokens(self):
 39 |         return 2048
 40 | 
 41 |     def compute_remaining_tokens(self, prompts: list) -> int:
 42 |         return int((1024) * 0.99)
 43 | 
 44 |     def forward(self, argument):
 45 |         prompts             = argument.prop.prepared_input
 46 |         kwargs              = argument.kwargs
 47 | 
 48 |         model_kwargs = {}
 49 | 
 50 |         # convert map to list of strings
 51 |         stop                = kwargs['stop'] if 'stop' in kwargs else None
 52 |         seed                = kwargs['seed'] if 'seed' in kwargs else self.seed
 53 |         max_tokens          = kwargs['max_tokens'] if 'max_tokens' in kwargs else self.compute_remaining_tokens(prompts)
 54 |         temperature         = kwargs['temperature'] if 'temperature' in kwargs else 0.7
 55 |         top_p               = kwargs['top_p'] if 'top_p' in kwargs else 0.95
 56 |         top_k               = kwargs['top_k'] if 'top_k' in kwargs else 40
 57 |         except_remedy       = kwargs['except_remedy'] if 'except_remedy' in kwargs else self.except_remedy
 58 | 
 59 |         if stop is not None:
 60 |             model_kwargs['stop'] = stop
 61 |         if seed is not None:
 62 |             model_kwargs['seed'] = seed
 63 |         if max_tokens is not None:
 64 |             model_kwargs['n_predict'] = max_tokens
 65 |         if temperature is not None:
 66 |             model_kwargs['temperature'] = temperature
 67 |         if top_p is not None:
 68 |             model_kwargs['top_p'] = top_p
 69 |         if top_k is not None:
 70 |             model_kwargs['top_k'] = top_k
 71 | 
 72 |         if self.verbose:
 73 |             self.logger.debug(f"kwargs: {kwargs}")
 74 |             self.logger.debug(f"prompts: {prompts}")
 75 |             self.logger.debug(f"model_kwargs: {model_kwargs}")
 76 | 
 77 |         # Create multipart/form-data payload
 78 |         # Since the LLaMA server expects a JSON payload, we construct JSON data
 79 |         try:
 80 |             rsp = Box({
 81 |                 'text': RANDOM_RESPONSE
 82 |             })
 83 |             # Verify the success of the response
 84 |             res = rsp.text
 85 |         except Exception as e:
 86 |             # Handle unforeseen exceptions
 87 |             self.logger.error(f"An unexpected error occurred: {e}")
 88 |             raise e
 89 | 
 90 |         metadata = {}
 91 |         res = MockupResult(res)
 92 | 
 93 |         rsp = [res]
 94 |         output = rsp if isinstance(prompts, list) else rsp[0]
 95 |         return output, metadata
 96 | 
 97 |     def prepare(self, argument):
 98 |         if argument.prop.raw_input:
 99 |             if not argument.prop.processed_input:
100 |                 raise ValueError('Need to provide a prompt instruction to the engine if raw_input is enabled.')
101 |             argument.prop.prepared_input = [str(argument.prop.processed_input)]
102 |             return
103 | 
104 |         _non_verbose_output = """[META INSTRUCTIONS START]\nYou do not output anything else, like verbose preambles or post explanation, such as "Sure, let me...", "Hope that was helpful...", "Yes, I can help you with that...", etc. Consider well formatted output, e.g. for sentences use punctuation, spaces etc. or for code use indentation, etc. Never add meta instructions information to your output!\n"""
105 |         user:   str = ""
106 |         system: str = ""
107 | 
108 |         if argument.prop.disable_verbose_output_suppression:
109 |             system += _non_verbose_output
110 |         system = f'{system}\n' if system and len(system) > 0 else ''
111 | 
112 |         ref = argument.prop.instance
113 |         static_ctxt, dyn_ctxt = ref.global_context
114 |         if len(static_ctxt) > 0:
115 |             system += f"[STATIC CONTEXT]\n{static_ctxt}\n\n"
116 | 
117 |         if len(dyn_ctxt) > 0:
118 |             system += f"[DYNAMIC CONTEXT]\n{dyn_ctxt}\n\n"
119 | 
120 |         payload = argument.prop.payload
121 |         if argument.prop.payload:
122 |             system += f"[ADDITIONAL CONTEXT]\n{str(payload)}\n\n"
123 | 
124 |         examples: List[str] = argument.prop.examples
125 |         if examples and len(examples) > 0:
126 |             system += f"[EXAMPLES]\n{str(examples)}\n\n"
127 | 
128 |         if argument.prop.prompt is not None and len(argument.prop.prompt) > 0:
129 |             val = str(argument.prop.prompt)
130 |             # in this engine, instructions are considered as user prompts
131 |             user += f"[INSTRUCTION]\n{val}"
132 | 
133 |         suffix: str = str(argument.prop.processed_input)
134 | 
135 |         if '[SYSTEM_INSTRUCTION::]: <<<' in suffix and argument.prop.parse_system_instructions:
136 |             parts = suffix.split('\n>>>\n')
137 |             # first parts are the system instructions
138 |             c = 0
139 |             for i, p in enumerate(parts):
140 |                 if 'SYSTEM_INSTRUCTION' in p:
141 |                     system += f"{p}\n"
142 |                     c += 1
143 |                 else:
144 |                     break
145 |             # last part is the user input
146 |             suffix = '\n>>>\n'.join(parts[c:])
147 |         user += f"{suffix}"
148 | 
149 |         if argument.prop.template_suffix:
150 |             user += f"\n[[PLACEHOLDER]]\n{str(argument.prop.template_suffix)}\n\n"
151 |             user += f"Only generate content for the placeholder `[[PLACEHOLDER]]` following the instructions and context information. Do NOT write `[[PLACEHOLDER]]` or anything else in your output.\n\n"
152 | 
153 |         argument.prop.prepared_input = [f'---------SYSTEM BEHAVIOR--------\n{system}\n\n---------USER REQUEST--------\n{user}']
154 | 


--------------------------------------------------------------------------------
/src/evals/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExtensityAI/benchmark/24d3e93681d454b379d7f1e787b2a2284c41922f/src/evals/__init__.py


--------------------------------------------------------------------------------
/src/evals/assets/sample_bill.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ExtensityAI/benchmark/24d3e93681d454b379d7f1e787b2a2284c41922f/src/evals/assets/sample_bill.jpg


--------------------------------------------------------------------------------
/src/evals/components/__init__.py:
--------------------------------------------------------------------------------
1 | from .factorization import *


--------------------------------------------------------------------------------
/src/evals/components/api_builder.py:
--------------------------------------------------------------------------------
  1 | from symai import core
  2 | from symai import Expression, Symbol
  3 | from symai.pre_processors import PreProcessor
  4 | from symai.post_processors import CodeExtractPostProcessor
  5 | from symai.components import Execute
  6 | 
  7 | 
  8 | API_BUILDER_DESCRIPTION = """[Description]
  9 | You are an API coding tool for Python that creates API calls to any web URL based on user requests.
 10 | For example, if the user wants to use the X API (former Twitter) to post a tweet, you will create the required API post call for that, i.e. 'Write Twitter post `hey, what's up` API-Key:...'.
 11 | If the user wants to use the X API to get the latest tweets, you will create the API call for that, e.g. 'Read Twitter post https://twitter.com/...'.
 12 | Each created function is atomic and can be used as a building block for more complex functions.
 13 | You can also create a function that calls other functions. However, all code must be self-contained in one function `run` including all imports.
 14 | Another constraint is that there is one mandatory function called `run` as an entry point to the executable runnable and one provided pre-build function that uses an large language model to extract and parse API calls parameters of user requests or manipulates string-based data as you see fit.
 15 | All code parts marked with [MANAGED] are strictly forbidden to be changed! They must be provided as is.
 16 | Always generate the entire code for the `run` function, including the `try` and `except` blocks, imports, etc. and the unchanged managed code parts.
 17 | 
 18 | For example, you can write yourself prompts to extract parameters from user requests and use them to create API calls:
 19 | ```python
 20 | # all code must be self-contained in one function called `run` including all imports
 21 | def run(text: str) -> str: # [MANAGED] entry point cannot be changed
 22 |     # [MANAGED-BEGIN] mandatory imports here
 23 |     import traceback
 24 |     import requests
 25 |     from symai import Function
 26 |     # [MANAGED-END] mandatory imports here
 27 | 
 28 |     # optional imports here
 29 |     # TODO: all your imports and code here
 30 | 
 31 |     # executable code here
 32 |     try: # [MANAGED] must contain this line, do not change
 33 |         # optional helper functions here
 34 | 
 35 |         # optional params extraction here
 36 |         # TODO: extract params from request full-text if needed
 37 |         # Example:
 38 |         func = Function('YOUR_PROMPT_1') # TODO: extract function param 1
 39 |         param1 = func(request)
 40 |         func = Function('YOUR_PROMPT_2') # TODO: extract function param 2
 41 |         param2 = func(request)
 42 |         # ... extract more params if needed
 43 | 
 44 |         # optional params manipulation here
 45 |         res = # TODO: run https APIs with the respective params, use tools like requests, urllib, etc.
 46 | 
 47 |         # optional result formatting here
 48 |         # Another example:
 49 |         func = Function('YOUR_PROMPT_3') # TODO: format result if needed
 50 |         res = func(res)
 51 | 
 52 |         # mandatory return statement here
 53 |         res = str(res) # [MANAGED] must contain this line, do not change
 54 |         return res # [MANAGED] must return a string, do not change
 55 |     except Exception as e: # [MANAGED] must catch all exceptions and return them as string
 56 |         tb = traceback.format_exc() # [MANAGED] return full error stack trace as string
 57 |         return tb # [MANAGED] return tb as string, do not change
 58 | 
 59 | # mandatory statement here
 60 | res = run(value) # [MANAGED] must contain this line, do not change
 61 | ```
 62 | """
 63 | 
 64 | 
 65 | class APIBuilderPreProcessor(PreProcessor):
 66 |     def __call__(self, argument):
 67 |         return '$> {} =>'.format(str(argument.args[0]))
 68 | 
 69 | 
 70 | class APIBuilder(Expression):
 71 |     @property
 72 |     def static_context(self) -> str:
 73 |         return API_BUILDER_DESCRIPTION
 74 | 
 75 |     def __init__(self, **kwargs):
 76 |         super().__init__(**kwargs)
 77 |         self.sym_return_type = APIBuilder
 78 | 
 79 |     def forward(self, sym: Symbol, **kwargs) -> Symbol:
 80 |         @core.zero_shot(prompt="Build the API call code:\n",
 81 |                    pre_processors=[APIBuilderPreProcessor()],
 82 |                    post_processors=[CodeExtractPostProcessor()], **kwargs)
 83 |         def _func(_, text) -> str:
 84 |             pass
 85 | 
 86 |         return _func(self, sym)
 87 | 
 88 | 
 89 | class StackTraceRetryExecutor(Expression):
 90 |     def __init__(self, retries: int = 1, **kwargs):
 91 |         super().__init__(**kwargs)
 92 |         self.executor = Execute()
 93 |         self.max_retries = retries
 94 |         self._runnable = None
 95 | 
 96 |     def forward(self, code: Symbol, request: Symbol, **kwargs) -> Symbol:
 97 |         code = str(code)
 98 |         # Set value that gets passed on to the 'run' function in the generated code
 99 |         value = request.value # do not remove this line
100 |         # Create the 'run' function
101 |         self._runnable = self.executor(code, locals=locals().copy(), globals=globals().copy())
102 |         result = self._runnable['locals']['run'](value)
103 |         retry = 0
104 |         # Retry if there is a 'Traceback' in the result
105 |         while 'Traceback' in result and retry < self.max_retries:
106 |             self._runnable = self.executor(code, payload=result, locals=locals().copy(), globals=globals().copy(), **kwargs)
107 |             result = self._runnable['locals']['run'](value)
108 |             retry += 1
109 |         if 'locals_res' in self._runnable:
110 |             result = self._runnable['locals_res']
111 |         return result
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/src/evals/components/factorization.py:
--------------------------------------------------------------------------------
 1 | from symai import Function
 2 | 
 3 | 
 4 | FACTORIZATION_CONTEXT = """[Context]
 5 | Compute the factorization of expression, ``f``, into irreducibles. (To
 6 | factor an integer into primes, use ``factorint``.)
 7 | 
 8 | There two modes implemented: symbolic and formal. If ``f`` is not an
 9 | instance of :class:`Poly` and generators are not specified, then the
10 | former mode is used. Otherwise, the formal mode is used.
11 | 
12 | In symbolic mode, :func:`factor` will traverse the expression tree and
13 | factor its components without any prior expansion, unless an instance
14 | of :class:`~.Add` is encountered (in this case formal factorization is
15 | used). This way :func:`factor` can handle large or symbolic exponents.
16 | 
17 | By default, the factorization is computed over the rationals. To factor
18 | over other domain, e.g. an algebraic or finite field, use appropriate
19 | options: ``extension``, ``modulus`` or ``domain``.
20 | """
21 | 
22 | 
23 | class Factorization(Function):
24 |     @property
25 |     def static_context(self):
26 |         return FACTORIZATION_CONTEXT
27 | 


--------------------------------------------------------------------------------
/src/evals/components/paper.py:
--------------------------------------------------------------------------------
  1 | from symai import Function
  2 | from symai.components import Sequence, Parallel
  3 | from symai.extended import Conversation
  4 | from symai.post_processors import StripPostProcessor, CodeExtractPostProcessor
  5 | 
  6 | 
  7 | SYMBOLIC_AI_PAPER = """Write a scientific paper about the machine learning framework called SymbolicAI which operates on the following principles:
  8 | - Symbolic methods
  9 | - Sub-symbolic methods
 10 | - Neural-symbolic methods
 11 | - Probabilistic programming methods
 12 | - Cognitive architectures
 13 | Be precise in your writing and follow a scientific style. Do not use any colloquial language. However, formulate simple and understandable sentences."""
 14 | 
 15 | 
 16 | PAPER_STATIC_CONTEXT = """[General Context]
 17 | {context}
 18 | 
 19 | [Format]
 20 | Your output format should be parsable by a LaTeX compiler. All produced content should be enclosed between the \n```latex\n ... \n``` blocks. Do not create document classes or other LaTeX meta commands. Always assume that the document class is already defined. Only produce exactly one latex block with all your content.
 21 | Only use either `section`, `subsection`, paragraph`, `texttt`, `textbf`, `emph` or `citep` commands to structure your content. Do not use any other LaTeX commands.
 22 | The following is an example of your expected output:
 23 | 
 24 | [Example]
 25 | ```latex
 26 | \\documentclass{{article}}
 27 | \\begin{{document}}
 28 | % TODO: your content here
 29 | \\end{{document}}
 30 | ```
 31 | 
 32 | {description}
 33 | """
 34 | 
 35 | 
 36 | class Paper(Function):
 37 |     def __init__(self, *sequence, context: str = SYMBOLIC_AI_PAPER, **kwargs):
 38 |         super().__init__(**kwargs)
 39 |         self.sequence = Sequence(*sequence)
 40 |         self.context  = context
 41 | 
 42 |     def forward(self, task, **kwargs):
 43 |         # execute the sequence of tasks
 44 |         res         = self.sequence(task, **kwargs)
 45 |         # access results from the global root node metadata
 46 |         results     = self.linker.results
 47 |         # return the reversed results
 48 |         reverse_res = str(list(reversed(list(results.values()))))
 49 |         # create the final task by concatenating the results
 50 |         return super().forward(task | reverse_res | res, **kwargs)
 51 | 
 52 |     @property
 53 |     def static_context(self):
 54 |         return PAPER_STATIC_CONTEXT.format(context=self.context, description='The final paper must include the title an abstract and a related work section and method section.')
 55 | 
 56 | 
 57 | class Context(Conversation):
 58 |     def __init__(self, context: str = SYMBOLIC_AI_PAPER, **kwargs):
 59 |         super().__init__(**kwargs)
 60 |         self.auto_print   = False
 61 |         self.prompt       = 'Replace the % TODO: with your content and follow the task description below.'
 62 |         self.context      = context
 63 | 
 64 |     def forward(self, task, *args, **kwargs):
 65 |         function = Function(self.prompt,
 66 |                             post_processors=[StripPostProcessor(), CodeExtractPostProcessor()],
 67 |                             static_context=self.static_context,
 68 |                             dynamic_context=self.dynamic_context)
 69 |         return function(f"{task}\n[Source]\n{self.history()}", *args, **kwargs)
 70 | 
 71 |     @property
 72 |     def description(self):
 73 |         raise NotImplementedError()
 74 | 
 75 |     @property
 76 |     def static_context(self):
 77 |         return PAPER_STATIC_CONTEXT.format(context=self.context, description=self.description)
 78 | 
 79 | 
 80 | class Source(Context):
 81 |     @property
 82 |     def description(self):
 83 |         return """[Task]
 84 | Summarize the referenced method to use it as a conditioning context for a large Language model like GPT-3.
 85 | Do not create any sections or subsections. Only write one coherent text about the main principles and concepts of the method.
 86 | """
 87 | 
 88 | class Method(Context):
 89 |     def __init__(self, source, **kwargs):
 90 |         super().__init__(**kwargs)
 91 |         self.source = source
 92 | 
 93 |     def forward(self, task, **kwargs):
 94 |         summary = self.source(task, **kwargs)
 95 |         # update the dynamic context globally for all types
 96 |         self.adapt(context=summary, types=[RelatedWork, Abstract, Title, Introduction, Cite])
 97 |         return super().forward(task | summary, **kwargs)
 98 | 
 99 |     @property
100 |     def description(self):
101 |         return """[Task]
102 | Your goal is to write the method section which describes the main approach and principles used. Add one methodology section with one consistent paragraph. Provide citations and references.
103 | """
104 | 
105 | 
106 | class Cite(Source):
107 |     @property
108 |     def description(self):
109 |         return """[Task]
110 | Write a short two sentence related work summary in the context of the paper. Do not add any sections or subsections.
111 | """
112 | 
113 | 
114 | class RelatedWork(Context):
115 |     def __init__(self, *citations, **kwargs):
116 |         super().__init__(**kwargs)
117 |         self.citations = Parallel(*citations, sequential=True) # to avoid API rate limits process parallel citations sequentially
118 | 
119 |     def forward(self, task, **kwargs):
120 |         # execute the parallel tasks
121 |         res = self.citations(task, **kwargs)
122 |         return super().forward(res, **kwargs)
123 | 
124 |     @property
125 |     def description(self):
126 |         return """[Task]
127 | Write a coherent related work section in the context of the paper and based on the provided citation sources. Add one related work section with one consistent paragraph. Provide citations and references.
128 | """
129 | 
130 | 
131 | class Introduction(Context):
132 |     def __init__(self, *citations, **kwargs):
133 |         super().__init__(**kwargs)
134 |         self.citations = Parallel(*citations, sequential=True)
135 | 
136 |     def forward(self, task, **kwargs):
137 |         # execute the parallel tasks
138 |         res = self.citations(task, **kwargs)
139 |         return super().forward(res, **kwargs)
140 | 
141 |     @property
142 |     def description(self):
143 |         return """[Task]
144 | Write a coherent introduction section in the context of the paper and based on the provided context. Add one introduction section with one consistent paragraph. Provide citations and references.
145 | """
146 | 
147 | 
148 | class Abstract(Context):
149 |     @property
150 |     def description(self):
151 |         return """[Task]
152 | Write the paper abstract given the provided context. Add one abstract section with one consistent paragraph.
153 | """
154 | 
155 | 
156 | class Title(Context):
157 |     @property
158 |     def description(self):
159 |         return """[Task]
160 | Write the paper title given the provided context. Add one title tag for the document.
161 | """
162 | 


--------------------------------------------------------------------------------
/src/evals/eval_in_context_associations.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from symai import Symbol, Expression
  4 | from symai.utils import toggle_test
  5 | 
  6 | from src.utils import MOCK_RETURN, RANDOMNESS, bool_success, normalize
  7 | 
  8 | 
  9 | ACTIVE = True
 10 | 
 11 | 
 12 | # Define basic test functions
 13 | @toggle_test(ACTIVE, default=MOCK_RETURN)
 14 | def test_basic_factual_prompt(aggregate):
 15 |     '''Sanity check test if the basic prompt works'''
 16 |     sym = Expression.prompt('''[Last Instruction]
 17 | Return only a number as an answer.
 18 | [Last Query]
 19 | Give the meaning of life a number, meaning that the answer to life, the universe and everything is:
 20 | [Answer]''')
 21 |     # sanity check if models are working
 22 |     # every model must pass this basic test
 23 |     res = ('42' in str(sym))                                                                      | aggregate.res         # collect the result value
 24 |     return res, bool_success(res)
 25 | 
 26 | 
 27 | @toggle_test(ACTIVE, default=MOCK_RETURN)
 28 | def test_basic_factual_prompt_pi(aggregate):
 29 |     '''Sanity check test if the basic prompt works'''
 30 |     sym = Expression.prompt('''[Last Instruction]
 31 | Return only a number as an answer.
 32 | [Last Query]
 33 | Write the number of Pi up to the 10th digit after the comma:
 34 | [Last Answer]''')                                                                                 | aggregate.sym         # collect the symbol value
 35 |     # sanity check if models are working
 36 |     # every model must pass this basic test
 37 |     base  = Symbol('3.1415926535')                                                                | aggregate.base        # collect the base value
 38 |     score = sym.measure(base)                                                                     | aggregate.score       # collect the score
 39 |     return True, {'scores': [score.value]}
 40 | 
 41 | 
 42 | # Define the test functions based on in-context learning associations and compositions
 43 | @toggle_test(ACTIVE, default=MOCK_RETURN)
 44 | def test_add_and_equals(aggregate):
 45 |     '''Test if the addition operator between two number symbols works'''
 46 |     try:
 47 |         sym = (Symbol(1) + Symbol(2)).int()
 48 |     except:
 49 |         sym = 0 # default value for failure
 50 |     res = (sym == 3)                                                                              | aggregate.res         # collect the result value
 51 |     return res, bool_success(res)
 52 | 
 53 | 
 54 | @toggle_test(ACTIVE, default=MOCK_RETURN)
 55 | def test_add_and_equals_2(aggregate):
 56 |     '''Test if the addition operator between a number symbol and linguistic number symbol works'''
 57 |     # auto cast to Symbol
 58 |     try:
 59 |         sym = (Symbol(17) + 'two').int()
 60 |     except:
 61 |         sym = 0 # default value for failure
 62 |     res = (sym == 19)                                                                             | aggregate.res         # collect the result value
 63 |     return res, bool_success(res)
 64 | 
 65 | 
 66 | @toggle_test(ACTIVE, default=MOCK_RETURN)
 67 | def test_add_and_equals_3(aggregate):
 68 |     '''Test if the addition operator between a large number symbol and linguistic number symbol works'''
 69 |     # auto cast to Symbol
 70 |     try:
 71 |         sym = ('two hundred and thirty four' + Symbol(7000)).int()
 72 |     except:
 73 |         sym = 0 # default value for failure
 74 |     res = (sym == 7234)                                                                           | aggregate.res         # collect the result value
 75 |     return res, bool_success(res)
 76 | 
 77 | 
 78 | @toggle_test(ACTIVE, default=MOCK_RETURN)
 79 | def test_check_pi(aggregate):
 80 |     '''Test if a fuzzy equality between pi string symbol and an number approximation symbol works'''
 81 |     # semantic understanding of pi
 82 |     sym = Symbol('pi')                                                                            | aggregate.sym         # collect the symbol value
 83 |     # test if pi is equal to 3.14159265... by approximating
 84 |     res = (sym == '3.14159265...')                                                                | aggregate.res         # collect the result value
 85 |     return res, bool_success(res)
 86 | 
 87 | 
 88 | @toggle_test(ACTIVE, default=MOCK_RETURN)
 89 | def test_check_pi_2(aggregate):
 90 |     '''Test if a fuzzy equality between np.pi number symbol and an number approximation symbol works'''
 91 |     # has high floating point precision
 92 |     sym = Symbol(np.pi)                                                                           | aggregate.sym         # collect the symbol value
 93 |     # test if pi is equal to 3.14159265... by approximating
 94 |     res = (sym == '3.14159265...')                                                                | aggregate.res         # collect the result value
 95 |     return res, bool_success(res)
 96 | 
 97 | 
 98 | @toggle_test(ACTIVE, default=MOCK_RETURN)
 99 | def test_sub_and_contains(aggregate):
100 |     '''Test if a semantic subtraction operator between two symbols works'''
101 |     # semantic understanding of subtraction
102 |     base  = 'Hello, I would like a cup of coffee.'                                                | aggregate.base        # collect the base value
103 |     res   = ((Symbol('Hello, I would like a cup of tea.') - Symbol('tea')) + 'coffee')            | aggregate.res         # collect the result value
104 |     rand  = Symbol(RANDOMNESS).mean().measure(base)                                               | aggregate.rand        # collect the random value
105 |     # @NOTE: special case, where we expect the exact solution
106 |     score = res.measure(base, normalize=normalize(1.0, rand))                                     | aggregate.score       # collect the score
107 |     return True, {'scores': [score.value]}
108 | 
109 | 
110 | @toggle_test(ACTIVE, default=MOCK_RETURN)
111 | def test_compare(aggregate):
112 |     '''Test if a comparison operator between two number symbols works'''
113 |     res = (Symbol(10) > Symbol('5'))
114 |     # @NOTE: Bernoulli trial
115 |     res = (res == True)                                                                           | aggregate.res         # collect the result value
116 |     return res, bool_success(res)
117 | 
118 | 
119 | @toggle_test(ACTIVE, default=MOCK_RETURN)
120 | def test_compare_2(aggregate):
121 |     '''Test if a semantic comparison operator between two symbols works'''
122 |     res = Symbol(10) < Symbol('fifteen thousand')
123 |     # @NOTE: Bernoulli trial
124 |     res = (res == True)                                                                           | aggregate.res         # collect the result value
125 |     return res, bool_success(res)
126 | 
127 | 
128 | @toggle_test(ACTIVE, default=MOCK_RETURN)
129 | def test_insert_rshift(aggregate):
130 |     '''Test if information can be inserted into a symbol using the RSHIFT operator'''
131 |     base  = 'I love to eat apples and bananas'                                                    | aggregate.base        # collect the base value
132 |     sym   = Symbol('I love to eat apples')                                                        | aggregate.sym         # collect the symbol value
133 |     res   = ('and bananas' >> sym)                                                                | aggregate.res         # collect the result value
134 |     # expect exact solution
135 |     rand  = Symbol(RANDOMNESS).mean().measure(base)                                               | aggregate.rand        # collect the random value
136 |     # @NOTE: special case, where we expect the exact solution
137 |     score = res.measure(base, normalize=normalize(1.0, rand))                                     | aggregate.score       # collect the score
138 |     return True, {'scores': [score.value]}
139 | 
140 | 
141 | @toggle_test(ACTIVE, default=MOCK_RETURN)
142 | def test_extract_information(aggregate):
143 |     '''Test if information can be extracted from a symbol using the EXTRACT operator'''
144 |     sym  = Symbol('I have an iPhone from Apple. And it is not cheap. ' + \
145 |                   'I love to eat bananas, mangos, and oranges. ' + \
146 |                   'My hobbies are playing football and basketball.')                              | aggregate.sym         # collect the symbol value
147 |     res  = sym.extract('fruits')
148 |     res  = str(res).lower().strip()                                                               | aggregate.res         # collect the result value
149 |     cnt  = 0
150 |     succ = True
151 |     # check if the EXTRACT operator retains the 3 essential words
152 |     succ &= 'bananas' in res
153 |     # @NOTE: Bernoulli trials
154 |     cnt += (1 if succ else 0)                                                                     | aggregate.cnt         # collect the result value
155 |     succ &= 'mangos' in res
156 |     cnt += (1 if succ else 0)                                                                     | aggregate.cnt         # collect the result value
157 |     succ &= 'oranges' in res
158 |     cnt += (1 if succ else 0)                                                                     | aggregate.cnt         # collect the result value
159 |     return succ, {'scores': [cnt/3.0]}
160 | 
161 | 
162 | @toggle_test(ACTIVE, default=MOCK_RETURN)
163 | def test_extract_contextual_information(aggregate):
164 |     '''Test if number information can be extracted from a symbol using the EXTRACT operator'''
165 |     sym = Symbol("""Exception: Failed to query GPT-3 after 3 retries. Errors: [InvalidRequestError(message="This model's maximum context length is 4097 tokens, however you requested 7410 tokens (2988 in your prompt; 4422 for the completion). Please reduce your prompt; or completion length.",
166 |                     param=None, code=None, http_status=400, request_id=None)]""")                 | aggregate.sym         # collect the symbol value
167 |     try:
168 |         res = sym.extract('requested tokens').int() # cast to int
169 |     except:
170 |         res = 0 # default value
171 |     # check if the EXTRACT operator gets the correct number of tokens
172 |     res = (res == 7410)                                                                           | aggregate.res         # collect the result value
173 |     return res, bool_success(res)
174 | 
175 | 
176 | @toggle_test(ACTIVE, default=MOCK_RETURN)
177 | def test_filter(aggregate):
178 |     '''Test if filtering information can be applied to a symbol using the FILTER operator'''
179 |     sym  = Symbol('Physics, Sports, Mathematics, Music, Art, Theater, Writing')                   | aggregate.sym         # collect the symbol value
180 |     res  = sym.filter('science related subjects')
181 |     res  = str(res).lower().strip()                                                               | aggregate.res         # collect the result value
182 |     cnt  = 0
183 |     succ = True
184 |     # check if the FILTER operator retains the essential words
185 |     # @NOTE: Bernoulli trials
186 |     succ &= 'physics' in res
187 |     cnt += (1 if succ else 0)                                                                     | aggregate.cnt         # collect the result value
188 |     succ &= 'mathematics' in res
189 |     cnt += (1 if succ else 0)                                                                     | aggregate.cnt         # collect the result value
190 |     succ &= 'music' not in res
191 |     cnt += (1 if succ else 0)                                                                     | aggregate.cnt         # collect the result value
192 |     succ &= 'art' not in res
193 |     cnt += (1 if succ else 0)                                                                     | aggregate.cnt         # collect the result value
194 |     succ &= 'theater' not in res
195 |     cnt += (1 if succ else 0)                                                                     | aggregate.cnt         # collect the result value
196 |     succ &= 'writing' not in res
197 |     cnt += (1 if succ else 0)                                                                     | aggregate.cnt         # collect the result value
198 |     succ &= 'sports' not in res
199 |     cnt += (1 if succ else 0)                                                                     | aggregate.cnt         # collect the result value
200 |     return succ, {'scores': [cnt/7.0]}
201 | 
202 | 
203 | @toggle_test(ACTIVE, default=MOCK_RETURN)
204 | def test_clean(aggregate):
205 |     '''Test if cleaning information can be applied to a symbol using the CLEAN operator'''
206 |     base  = 'Hello World'                                                                         | aggregate.base        # collect the base value
207 |     sym   = Symbol('Hello *&&7amp;;; \t\t\t\nWorld')                                              | aggregate.sym         # collect the symbol value
208 |     res   = sym.clean()                                                                           | aggregate.res         # collect the result value
209 |     # check if the CLEAN operator retains the 2 essential words
210 |     # expect exact solution
211 |     rand  = Symbol(RANDOMNESS).mean().measure(base)                                               | aggregate.rand        # collect the random value
212 |     # @NOTE: special case, where we expect the exact solution
213 |     score = res.measure(base, normalize=normalize(1.0, rand))                                     | aggregate.score       # collect the score
214 |     return True, {'scores': [score.value]}
215 | 


--------------------------------------------------------------------------------
/src/evals/eval_multimodal_bindings.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | 
  3 | from src.utils import normalize, RANDOMNESS, MOCK_RETURN
  4 | 
  5 | from symai import core_ext, Symbol, Expression, Interface, Function
  6 | from symai.utils import toggle_test
  7 | 
  8 | 
  9 | ACTIVE = True
 10 | 
 11 | 
 12 | OPTION0_BASE_REF = ['Mathematics related topic',
 13 |                     'MATHEMATICS RELATED TOPIC',
 14 |                     'mathematics and related topics']
 15 | OPTION1_BASE_REF = ['Website Content Scraping and Crawling',
 16 |                     'web content scraping and crawling',
 17 |                     'WEBSITE CONTENT RELATED TOPICS']
 18 | OPTION2_BASE_REF = ['Search Engine Query',
 19 |                     'search engine query',
 20 |                     'SEARCH ENGINE QUERY']
 21 | OPTION3_BASE_REF = ['Optical Character Recognition',
 22 |                     'optical character recognition',
 23 |                     'OPTICAL CHARACTER RECOGNITION']
 24 | OPTION_REFS      = [OPTION0_BASE_REF, OPTION1_BASE_REF, OPTION2_BASE_REF, OPTION3_BASE_REF]
 25 | 
 26 | 
 27 | class Category(Expression):
 28 |     def __init__(self, **kwargs):
 29 |         super().__init__(**kwargs)
 30 |         self.options   = {
 31 |             0: 'mathematics related topic',
 32 |             1: 'website content scraping and crawling',
 33 |             2: 'search engine query',
 34 |             3: 'optical character recognition',
 35 |             4: 'image rendering',
 36 |             5: 'image captioning',
 37 |             6: 'audio transcription',
 38 |             7: 'unknown'
 39 |         }
 40 | 
 41 |     def forward(self):
 42 |         @core_ext.cache(in_memory=True)
 43 |         def _embed(_):
 44 |             def _emb_mapping_(category):
 45 |                 sym  = Symbol(category)
 46 |                 return sym.embed()
 47 |             emb = map(_emb_mapping_, self.options.values())
 48 |             return list(emb)
 49 |         return _embed(self)
 50 | 
 51 | 
 52 | LINEAR_ALGEBRA = 'linear algebra'
 53 | NUMBER_COMPARISON = 'number comparison'
 54 | 
 55 | 
 56 | class MultiModalExpression(Expression):
 57 |     def __init__(self, val, **kwargs):
 58 |         super().__init__(val, **kwargs)
 59 |         # define interfaces
 60 |         self.solver      = Interface('wolframalpha')
 61 |         self.crawler     = Interface('selenium')
 62 |         self.search      = Interface('serpapi')
 63 |         self.ocr         = Interface('ocr')
 64 |         self.rendering   = Interface('dall_e')
 65 |         self.captioning  = Interface('llava')
 66 |         self.transcribe  = Interface('whisper')
 67 |         # evaluation interfaces
 68 |         self.clip        = Interface('clip')
 69 |         # define functions
 70 |         self.func        = Function("Summarize the content:")
 71 |         self.category    = Category()
 72 | 
 73 |     def detect_option(self, aggregate, assertion):
 74 |         option     = assertion()                                                                                    | aggregate.category.option
 75 |         # testing the category detection accuracy
 76 |         category   = self.choice(self.category.options.values(), default='unknown', temperature=0.0)                | aggregate.category.category
 77 |         base       = Symbol(OPTION_REFS[option])
 78 |         base_mean  = base.mean(axis=0)                                                                              | aggregate.category.base_mean
 79 |         base_score = base.cvs()                                                                                     | aggregate.category.base_score
 80 |         rand_seq   = Symbol(RANDOMNESS).mean(axis=0)                                                                | aggregate.category.rand_mean
 81 |         rand_score = base_mean.measure(rand_seq)                                                                    | aggregate.category.rand_score
 82 |         score      = category.measure(self.category.options[option],
 83 |                                       normalize=normalize(base_score, rand_score))                                  | aggregate.category.score
 84 |         return option, score.value
 85 | 
 86 |     def forward(self, aggregate, assertion, presets, **kwargs):
 87 |         res     = None
 88 |         scoring = []
 89 |         success = False
 90 |         # detect the type of expression
 91 |         option, score = self.detect_option(aggregate, assertion)
 92 |         scoring.append(score)
 93 | 
 94 |         # mathematical formula
 95 |         if option == 0:
 96 |             ref_formula, instance_type, details  = presets()
 97 |             ref_formula = Symbol(ref_formula)                                                                       | aggregate.ref_formula
 98 |             formula     = self.extract('mathematical formula')                                                      | aggregate.formula
 99 |             score       = ref_formula.measure(formula)                                                              | aggregate.formula_score
100 |             scoring.append(score.value)
101 |             # subtypes of mathematical formula
102 |             if self.isinstanceof(LINEAR_ALGEBRA, temperature=0.0):
103 |                 score    = (1.0 if str(instance_type) == LINEAR_ALGEBRA else 0.0)                                   | aggregate.linear_function.score
104 |                 scoring.append(score)
105 |                 if score == 0.0: # avoid error when in wrong category
106 |                     # no score for other types of mathematical formula
107 |                     score = 0.0                                                                                     | aggregate.linear_function.answer_score
108 |                     scoring.append(score)
109 |                     return success, scoring
110 |                 answer, solutions = details
111 |                 answer   = Symbol(answer)                                                                           | aggregate.linear_function.answer
112 |                 # prepare for wolframalpha
113 |                 res        = self.solver(formula)
114 |                 res        = res.query('write a one sentence summary of the answer')                                | aggregate.number_comparison.res
115 |                 rand_seq   = Symbol(RANDOMNESS).mean(axis=0)                                                        | aggregate.number_comparison.rand_mean
116 |                 sol_mean   = solutions.mean(axis=0)                                                                 | aggregate.number_comparison.solutions_mean
117 |                 base_score = solutions.cvs()                                                                        | aggregate.number_comparison.base_score
118 |                 rand_score = answer.measure(rand_seq)                                                               | aggregate.number_comparison.rand_score
119 |                 score      = answer.measure(sol_mean, normalize=normalize(base_score, rand_score))                  | aggregate.number_comparison.answer_score
120 |                 scoring.append(score.value)
121 |                 success    = True
122 | 
123 |             elif self.isinstanceof(NUMBER_COMPARISON, temperature=0.0):
124 |                 score    = (1.0 if str(instance_type) == NUMBER_COMPARISON else 0.0)                                | aggregate.number_comparison.score
125 |                 scoring.append(score)
126 |                 if score == 0.0: # avoid error when in wrong category
127 |                     # no score for other types of mathematical formula
128 |                     score = 0.0                                                                                     | aggregate.number_comparison.answer_score
129 |                     scoring.append(score)
130 |                     return success, scoring
131 |                 answer   = details                                                                                  | aggregate.number_comparison.answer
132 |                 res      = self.solver(formula) # send directly to wolframalpha
133 |                 score    = (1.0 if res == answer else 0.0)                                                          | aggregate.number_comparison.answer_score
134 |                 scoring.append(score)
135 |                 success  = True
136 | 
137 |             else:
138 |                 # no score for other types of mathematical formula
139 |                 score    = 0.0                                                                                      | aggregate.unknown.score
140 |                 scoring.append(score)
141 |                 success  = False
142 | 
143 |         # website content scraping and crawling
144 |         elif option == 1:
145 |             ori_url, page, content_sym, base_score, rand_score = presets()
146 |             ori_url_sym = Symbol(ori_url)                                                                           | aggregate.website_scraping.ori_url
147 |             url         = self.extract('url')                                                                       | aggregate.website_scraping.gen_url
148 |             score       = ori_url_sym.measure(url)                                                                  | aggregate.website_scraping.score
149 |             scoring.append(score.value)
150 |             res         = self.func(page)                                                                           | aggregate.website_scraping.res
151 |             # normalize the score towards the original content
152 |             score       = content_sym.measure(res, normalize=normalize(base_score, rand_score))                     | aggregate.website_scraping.score
153 |             scoring.append(score.value)
154 |             success     = True
155 | 
156 |         # search engine query
157 |         elif option == 2:
158 |             answer = presets()                                                                                      | aggregate.search_engine.answer
159 | 
160 |             if kwargs.get('real_time'):
161 |                 res = self.search(self.value)
162 |                 res = res.raw.organic_results.to_list()
163 |             else:
164 |                 snippet_path = Path(__file__).parent / "snippets" / "google_organic_results_20240111_query=What-is-sulfuric-acid.txt"
165 |                 res = open(snippet_path, "r").read()
166 | 
167 |             res     = Symbol(res)                                                                                   | aggregate.search_engine.res
168 |             res     = res.extract("The answer based on the CDC source.")
169 |             score   = res.measure(answer)                                                                           | aggregate.search_engine.score
170 |             scoring.append(score.value)
171 |             success = True
172 | 
173 |         # optical character recognition
174 |         elif option == 3:
175 |             answer  = presets()                                                                                     | aggregate.ocr_engine.answer
176 |             if kwargs.get('real_time'):
177 |                 res = self.ocr((Path(__file__).parent / "assets" / "sample_bill.jpg").as_posix())
178 |             else:
179 |                 snippet_path = Path(__file__).parent / "snippets" / "sample_bill.txt"
180 |                 res = open(snippet_path, "r").read()
181 |                 res = Symbol(res)
182 | 
183 |             res     = res.extract(self.value)                                                                       | aggregate.ocr_engine.res
184 |             score   = res.measure(answer)                                                                           | aggregate.ocr_engine.score
185 |             scoring.append(score.value)
186 |             success = True
187 | 
188 |         # Other modalities we could evaluate and include in the score in the future, but exceeds the scope of this benchmark.
189 |         # image rendering
190 |         # elif option == 4:
191 |         #     query = self.extract('image url')
192 |         #     res   = self.rendering(query)
193 | 
194 |         # image captioning
195 |         # elif option == 5:
196 |         #     image = self.extract('image path')
197 |         #     res   = self.captioning(image)
198 | 
199 |         # audio transcription
200 |         # elif option == 6:
201 |         #     audio = self.extract('audio path')
202 |         #     res   = self.transcribe(audio)
203 | 
204 |         else:
205 |             score   = 0.0                                                                                           | aggregate.unknown.score
206 |             scoring.append(score)
207 |             success = False
208 | 
209 |         return success, scoring
210 | 
211 | 
212 | @toggle_test(ACTIVE, default=MOCK_RETURN)
213 | def test_website_scraping(aggregate):
214 |     # scraped content
215 |     content = """ChatGPT back online after ‘major outage,’ OpenAI says
216 | PUBLISHED THU, DEC 14 20231:58 AM EST
217 | 
218 | KEY POINTS
219 | OpenAI on Thursday said that a major outage on its artificial intelligence chatbot ChatGPT was resolved.
220 | ChatGPT had issues for around 40 minutes, during which service was “intermittently unavailable.”
221 | OpenAI did not give an explanation on what caused the latest issues.
222 | 
223 | OpenAI on Thursday said that a major outage on its artificial intelligence chatbot, ChatGPT, was resolved.
224 | 
225 | ChatGPT had issues for around 40 minutes, during which the service was “intermittently unavailable.”
226 | 
227 | OpenAI also said that some users of ChatGPT Enterprise, which is designed for businesses, were encountering “elevated error rates.”
228 | 
229 | Earlier this month, ChatGPT suffered another issue, where the company said around 10% of users may have been unable to send a message to ChatGPT. The AI technology had another major outage in November.
230 | 
231 | OpenAI did not give an explanation on what caused the latest issues.
232 | 
233 | ChatGPT broke records as the fastest-growing consumer app in history and now has about 100 million weekly active users, while more than 92% of Fortune 500 companies employ the platform, according to OpenAI.
234 | 
235 | The Microsoft
236 | -backed company has had a rocky time of late, as the board fired CEO Sam Altman in November, only for him to be reinstated days later after pressure from employees and investors.
237 | 
238 | — CNBC’s Hayden Field contributed to this article."""
239 |     summary = """OpenAI reported that a significant outage affecting its AI chatbot, ChatGPT, was resolved following a 40-minute disruption that left the service intermittently unavailable. It was noted that users of the ChatGPT Enterprise experienced elevated error rates as well. Earlier in the month and in November, ChatGPT had faced other service issues. OpenAI did not disclose the cause of the recent outage. ChatGPT has become immensely popular, touted as the fastest-growing consumer app ever, with approximately 100 million weekly active users and adoption by many top companies. Despite its success, OpenAI, supported by Microsoft, has experienced some turbulence, including the brief dismissal and subsequent reinstatement of CEO Sam Altman."""
240 |     url  = "https://www.cnbc.com/2023/12/14/chatgpt-back-online-after-major-outage-openai-says.html"
241 |     val  = f"crawl the news site from {url}"
242 |     expr = MultiModalExpression(val)
243 |     content_sym   = Symbol(content)                                                                                 | aggregate.content
244 |     summary_sym   = Symbol(summary)                                                                                 | aggregate.summary
245 |     base_score    = content_sym.measure(summary_sym)                                                                | aggregate.content_score
246 |     rand_seq      = Symbol(RANDOMNESS).mean(axis=0)                                                                 | aggregate.rand_seq
247 |     rand_score    = content_sym.measure(rand_seq)                                                                   | aggregate.rand_score
248 |     succ, scoring = expr(
249 |         aggregate,
250 |         lambda: 1,
251 |         lambda: (url, content, content_sym, base_score, rand_score)
252 |     )
253 |     return succ, {'scores': scoring}
254 | 
255 | 
256 | @toggle_test(ACTIVE, default=MOCK_RETURN)
257 | def test_search_engine(aggregate):
258 |     query         = "What is sulfuric acid?"
259 |     # Let's test whether or not it can extract the answer based on the CDC source.
260 |     answer        = Symbol("Sulfuric acid (H2S04) is a corrosive substance, destructive to the skin, eyes, teeth, and lungs. Severe exposure can result in death.")
261 |     expr          = MultiModalExpression(query)
262 |     succ, scoring = expr(
263 |         aggregate,
264 |         lambda: 2,
265 |         lambda: answer,
266 |         real_time=False
267 |     )
268 | 
269 |     return succ, {'scores': scoring}
270 | 
271 | 
272 | @toggle_test(ACTIVE, default=MOCK_RETURN)
273 | def test_linear_function_computation(aggregate):
274 |     query         = Symbol('Analyse the following vectors and asses if (2, -11, 2) and (14, 2, 2) are linearly dependent?')
275 |     ref           = Symbol("(2, -11, 2) and (14, 2, 2) are linearly independent.")
276 |     solutions     = Symbol([
277 |         "(2, -11, 2) and (14, 2, 2) are actually linearly independent.",
278 |         "No, the vectors (2, -11, 2) and (14, 2, 2) demonstrate linear independence.",
279 |         "The vectors (2, -11, 2) and (14, 2, 2) are not linearly dependent."
280 |     ])
281 |     expr          = MultiModalExpression(query)
282 |     succ, scoring = expr(
283 |         aggregate,
284 |         lambda: 0,
285 |         lambda: ('(2, -11, 2) and (14, 2, 2) are linearly independent?', Symbol(LINEAR_ALGEBRA), (ref, solutions))
286 |     )
287 | 
288 |     return succ, {'scores': scoring}
289 | 
290 | 
291 | @toggle_test(ACTIVE, default=MOCK_RETURN)
292 | def test_comparison(aggregate):
293 |     val        = Symbol("is 100044347 bigger than 129981063.472?")
294 |     expr       = MultiModalExpression(val)
295 |     succ, res  = expr(
296 |         aggregate,
297 |         lambda: 0,
298 |         lambda: ('100044347 > 129981063.472', Symbol(NUMBER_COMPARISON), False)
299 |     )
300 |     return succ, {'scores': res}
301 | 
302 | 
303 | @toggle_test(ACTIVE, default=MOCK_RETURN)
304 | def test_ocr_engine(aggregate):
305 |     query         = "Extract the current balance from the bill image."
306 |     answer        = Symbol("$ 21,920.37")
307 |     expr          = MultiModalExpression(query)
308 |     succ, scoring = expr(
309 |         aggregate,
310 |         lambda: 3,
311 |         lambda: answer,
312 |         real_time=False
313 |     )
314 |     return succ, {'scores': scoring}
315 | 
316 | 


--------------------------------------------------------------------------------
/src/evals/eval_program_synthesis.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from src.utils import normalize, rand_ast_measure, ast_measure, RANDOMNESS, MOCK_RETURN
  4 | 
  5 | from symai import Symbol, Expression, Conversation, Call
  6 | from symai.components import FileReader, Execute, RuntimeExpression, ExpressionBuilder
  7 | from symai.processor import ProcessorPipeline
  8 | from symai.post_processors import StripPostProcessor, CodeExtractPostProcessor
  9 | from symai.utils import toggle_test
 10 | from symai.extended.api_builder import APIBuilder, StackTraceRetryExecutor
 11 | 
 12 | 
 13 | ACTIVE = True
 14 | cur_file_dir = os.path.dirname(os.path.abspath(__file__))
 15 | 
 16 | 
 17 | @toggle_test(ACTIVE, default=MOCK_RETURN)
 18 | def test_application_template(aggregate):
 19 |     task      = """[Task]
 20 | Create a function `create_latex_result` that takes in the `benchmark_results` as `data` and parses the LaTeX table rows and columns based on the `data` results. The table should follow the `latex_template` format and populate the rows table as indicated by the placeholder variables. Mark the best performing model per row with bold text. At the bottom of the benchmarks, place the values of the total row by computing the average over all columns and populating the `total_values` entry in the `latex_template`.
 21 | The table should be returned as a string by the function.
 22 | All required imports are already provided. The code of the `create_latex_result` function should be written between a
 23 | ```python
 24 | ...
 25 | ```
 26 | code block.
 27 | The `create_latex_result` function must be self-contained, fully functional and pass all tests.
 28 | No other functions or explanations are required.
 29 | """
 30 |     # Define random sequence to normalize data
 31 |     random_seq = Symbol(RANDOMNESS).mean(axis=0)                                                                        | aggregate.random_seq
 32 |     # Create a template
 33 |     template   = os.path.join(cur_file_dir, 'snippets/latex_templating_problem.txt')
 34 |     conv       = Conversation(file_link=[template], auto_print=False)
 35 |     raw_res    = conv(task)                                                                                             | aggregate.gen_raw_res
 36 |     scoring    = []
 37 |     processors = ProcessorPipeline([StripPostProcessor(), CodeExtractPostProcessor()])
 38 |     code       = Symbol(processors(str(raw_res), None))                                                                 | aggregate.gen_code
 39 |     reader     = FileReader()
 40 |     solution1  = reader(os.path.join(cur_file_dir, 'snippets/latex_templating_solution_1.txt'))                         | aggregate.solution1
 41 |     solution2  = reader(os.path.join(cur_file_dir, 'snippets/latex_templating_solution_2.txt'))                         | aggregate.solution2
 42 |     solutions  = Symbol([solution1, solution2]).mean(axis=0)                                                            | aggregate.solutions
 43 |     base_score = solution1.measure(solution2)                                                                           | aggregate.conv_base_score
 44 |     # remove the chance of simply rephrasing the task description
 45 |     rand_score = solutions.measure(random_seq)                                                                          | aggregate.conv_rand_score
 46 |     score      = solutions.measure(raw_res, normalize=normalize(base_score, rand_score))                                | aggregate.conv_score
 47 |     scoring.append(score.value)
 48 | 
 49 |     # Read the source code from files
 50 |     solution1  = Symbol(solution1, callables=[Call('measure', ast_measure)])
 51 |     # compute again normalization score but this time for AST measure
 52 |     base_score = solution1.measure(solution2)                                                                           | aggregate.ast_base_score
 53 |     rand_score = (0.5*(rand_ast_measure(solution1) + rand_ast_measure(solution2)))                                      | aggregate.ast_rand_score
 54 |     score      = solution1.measure(code, normalize=normalize(base_score, rand_score))                                   | aggregate.ast_score
 55 |     scoring.append(score.value)
 56 | 
 57 |     # Execute the code
 58 |     code       = reader(template).str().replace('{TODO}', str(code))
 59 |     runner     = Execute(enclosure=True)
 60 |     success    = False
 61 |     try:
 62 |         res    = runner(code)
 63 |         # extract the output from the locals
 64 |         out    = Symbol(res['locals']['_output_'])                                                                      | aggregate.code_output
 65 |         ori    = reader(os.path.join(cur_file_dir, 'snippets/latex_templating_output.txt'))                             | aggregate.code_solution
 66 |         # no normalization is needed here since the output has to be an exact match
 67 |         score  = out.measure(ori)                                                                                       | aggregate.code_score
 68 |         scoring.append(score.value)
 69 |         success = True
 70 |     except Exception as e:
 71 |         score  = 0.0                                                                                                    | aggregate.code_score
 72 |         scoring.append(score)
 73 | 
 74 |     return success, {'scores': scoring}
 75 | 
 76 | 
 77 | class APIExecutor(Expression):
 78 |     def __init__(self, verbose=False, **kwargs):
 79 |         super().__init__(**kwargs)
 80 |         self.builder     = APIBuilder()
 81 |         self.executor    = StackTraceRetryExecutor(retries=0) # disable retries
 82 |         self._verbose    = verbose
 83 |         self._request    = None
 84 |         self._code       = None
 85 |         self._result     = None
 86 | 
 87 |     @property
 88 |     def _runnable(self):
 89 |         return self.executor._runnable
 90 | 
 91 |     def forward(self, aggregate, request: Symbol, presets, **kwargs) -> Symbol:
 92 |         answer, refs, code, code2, rand = presets()
 93 |         self._request = self._to_symbol(request)
 94 |         if self._verbose: print('[REQUEST]', self._request)
 95 |         # Generate the code to implement the API call
 96 |         try:
 97 |             self._code    = self.builder(self._request)
 98 |         except Exception as e:
 99 |             code_score    = 0.0                                                                                         | aggregate.code_score
100 |             web_score     = 0.0                                                                                         | aggregate.web_score
101 |             return [code_score, web_score]
102 |         if self._verbose: print('[GENERATED_CODE]', self._code)
103 |         base_score    = code.measure(code2)                                                                             | aggregate.base_score
104 |         rand_score    = rand.measure(refs)                                                                              | aggregate.rand_score
105 |         code_score    = code.measure(self._code, normalize=normalize(base_score, rand_score))                           | aggregate.code_score
106 |         code_score    = code_score.value
107 |         # Execute the code to define the 'run' function
108 |         try:
109 |             self._result  = self.executor(str(self._code), request=self._request)                                       | aggregate.output
110 |             if self._verbose: print('[RESULT]:', self._result)
111 |             web_score     = answer.measure(self._result)                                                                | aggregate.web_score
112 |             web_score     = web_score.value
113 |         except Exception as e:
114 |             self._result  = str(e)
115 |             web_score     = 0.0                                                                                         | aggregate.web_score
116 |         self._value       = self._result
117 |         return [code_score, web_score]
118 | 
119 | 
120 | @toggle_test(ACTIVE, default=MOCK_RETURN)
121 | def test_api_builder(aggregate):
122 |     answer    = Symbol("Yannic Kilcher")                                                                                | aggregate.answer
123 |     rand_seq  = Symbol(RANDOMNESS).mean(axis=0)                                                                         | aggregate.random_seq
124 |     reader    = FileReader()
125 |     website   = reader(os.path.join(cur_file_dir, 'snippets/code_api_builder_website_result.txt'))
126 |     ref_code  = reader(os.path.join(cur_file_dir, 'snippets/code_api_builder.txt'))                                     | aggregate.ref_code
127 |     ref_code2 = reader(os.path.join(cur_file_dir, 'snippets/code_api_builder2.txt'))                                    | aggregate.ref_code2
128 |     refs      = Symbol([ref_code, ref_code2]).mean(axis=0)                                                              | aggregate.refs
129 |     executor  = APIExecutor() # creates code on the fly and executes it
130 |     scores    = executor(aggregate,
131 |                         'Fetch data from URL https://www.ykilcher.com/ and use Function to extract the full name of the author.', # the request
132 |                         lambda: (answer, refs, ref_code, ref_code2, rand_seq)) # interprets the instruction to generate a HTTP request
133 |     return True, {'scores': scores}
134 | 
135 | 
136 | @toggle_test(ACTIVE, default=MOCK_RETURN)
137 | def test_expression_builder(aggregate):
138 |     solution1 = Symbol("""
139 | # do not remove or change the imports
140 | from symai import Expression, Function, Symbol
141 | class QueryExpression(Expression):
142 |     # initialize the expression with task specific arguments
143 |     def __init__(self, prompt: str, **kwargs):
144 |         super().__init__(**kwargs)
145 |         self.func = Function(prompt, **kwargs)
146 | 
147 |     # define the forward function with data specific arguments
148 |     def forward(self, sym: Symbol, *args, **kwargs) -> Symbol:
149 |         sym = self._to_symbol(sym)
150 |         result = self.func(sym, *args, **kwargs)
151 |         return result
152 | # assign the expression type to the variable _value_obj_
153 | _value_obj_ = QueryExpression
154 | """)                                                                                                                    | aggregate.solution1
155 |     solution2 = Symbol("""
156 | from symai import Expression, Function, Symbol
157 | class QueryExpression(Expression):
158 |     def __init__(self, prompt: str, **kwargs):
159 |         super().__init__(**kwargs)
160 |         self.func = Function(prompt, **kwargs)
161 |     def forward(self, sym: Symbol, *args, **kwargs) -> Symbol:
162 |         sym = self._to_symbol(sym)
163 |         return self.func(sym, *args, **kwargs)
164 | _value_obj_ = QueryExpression
165 | """)                                                                                                                    | aggregate.solution2
166 |     solutions = Symbol([solution1, solution2]).mean(axis=0)                                                             | aggregate.solutions
167 |     rand_seq  = Symbol(RANDOMNESS).mean(axis=0)                                                                         | aggregate.random_seq
168 |     builder   = ExpressionBuilder()
169 |     code      = builder("Create a query Expression that is initializes a Function with a prompt and processes a data Symbol based on the custom Function.")
170 |     runner    = RuntimeExpression()
171 |     scoring   = []
172 |     try:
173 |         expr  = runner(code)
174 |         score = 1.0                                                                                                     | aggregate.code_score
175 |         scoring.append(score)
176 |         # initialize the expression with the prompt
177 |         query = expr('extract the names from the text')
178 |     except:
179 |         score = 0.0                                                                                                     | aggregate.code_score
180 |         scoring.append(score)
181 |     base_score  = solution1.measure(solution2)                                                                          | aggregate.base_score
182 |     rand_score  = solutions.measure(rand_seq)                                                                           | aggregate.rand_score
183 |     score       = solution1.measure(code, normalize=normalize(base_score, rand_score))                                  | aggregate.code_score
184 |     scoring.append(score.value)
185 |     try:
186 |         # run the expression on the data
187 |         res   = query('Hello my name is Max and I am 20 years old.')                                                    | aggregate.query_res
188 |         score = res.measure('Max')                                                                                      | aggregate.query_score
189 |         scoring.append(score.value)
190 |     except:
191 |         score = 0.0                                                                                                     | aggregate.query_score
192 |         scoring.append(score)
193 |     return True, {'scores': scoring}
194 | 


--------------------------------------------------------------------------------
/src/evals/snippets/code_api_builder.txt:
--------------------------------------------------------------------------------
 1 | def run(text: str) -> str:  # [MANAGED] entry point cannot be changed
 2 |     # [MANAGED-BEGIN] mandatory imports here
 3 |     import traceback
 4 |     import requests
 5 |     from symai import Function
 6 |     # [MANAGED-END] mandatory imports here
 7 | 
 8 |     # executable code here
 9 |     try:  # [MANAGED] must contain this line, do not change
10 |         # API call to fetch data from URL
11 |         response = requests.get('https://www.ykilcher.com/')
12 | 
13 |         # Check if the request was successful
14 |         if response.status_code == 200:
15 |             res = response.text  # Get the content of the response
16 |         else:
17 |             res = f"Error: {response.status_code}"
18 | 
19 |         # mandatory return statement here
20 |         res = str(res)  # [MANAGED] must contain this line, do not change
21 | 
22 |         # Use the Function class to log messages
23 |         func = Function('Extract full name from text')
24 |         res = func(res)
25 | 
26 |         return res  # [MANAGED] must return a string, do not change
27 |     except Exception as e:  # [MANAGED] must catch all exceptions and return them as string
28 |         tb = traceback.format_exc()  # [MANAGED] return full error stack trace as string
29 |         return tb  # [MANAGED] return tb as string, do not change
30 | 
31 | # Example request value
32 | value = "Fetch data from URL https://www.ykilcher.com/"
33 | # mandatory statement here
34 | res = run(value)  # [MANAGED] must contain this line, do not change
35 | 


--------------------------------------------------------------------------------
/src/evals/snippets/code_api_builder2.txt:
--------------------------------------------------------------------------------
 1 | def run(text: str) -> str:
 2 |     import requests
 3 |     from symai import Function
 4 |     url = 'https://www.ykilcher.com/'
 5 |     rsp = requests.get(url)
 6 |     if rsp.status_code != 200:
 7 |         raise Exception(f"Error: {rsp.status_code}")
 8 |     res = rsp.text
 9 |     res = str(res)
10 |     func = Function('extract the name from text')
11 |     return func(res)
12 | value = "Fetch data from URL https://www.ykilcher.com/"
13 | res = run(value)
14 | 


--------------------------------------------------------------------------------
/src/evals/snippets/code_api_builder_website_result.txt:
--------------------------------------------------------------------------------
1 | '<!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><title>Yannic Kilcher</title><meta name="description" content="Yannic Kilcher&#x27;s Homepage"/><link rel="icon" href="/favicon.ico"/><script src="https://apis.google.com/js/platform.js" async="" defer=""></script><meta name="next-head-count" content="6"/><link rel="preload" href="/_next/static/css/d3600e2ca0b0efb5.css" as="style"/><link rel="stylesheet" href="/_next/static/css/d3600e2ca0b0efb5.css" data-n-g=""/><link rel="preload" href="/_next/static/css/8c46507748aeca73.css" as="style"/><link rel="stylesheet" href="/_next/static/css/8c46507748aeca73.css" data-n-p=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js"></script><script src="/_next/static/chunks/webpack-5752944655d749a0.js" defer=""></script><script src="/_next/static/chunks/framework-5f4595e5518b5600.js" defer=""></script><script src="/_next/static/chunks/main-1038209226d7...
2 | 


--------------------------------------------------------------------------------
/src/evals/snippets/einstein_puzzle.txt:
--------------------------------------------------------------------------------
 1 | The Englishman lives in the red house.
 2 | The Swede keeps dogs.
 3 | The Dane drinks tea.
 4 | The green house is just to the left of the white one.
 5 | The owner of the green house drinks coffee.
 6 | The Pall Mall smoker keeps birds.
 7 | The owner of the yellow house smokes Dunhills.
 8 | The man in the center house drinks milk.
 9 | The Norwegian lives in the first house.
10 | The Blend smoker has a neighbor who keeps cats.
11 | The man who smokes Blue Masters drinks bier.
12 | The man who keeps horses lives next to the Dunhill smoker.
13 | The German smokes Prince.
14 | The Norwegian lives next to the blue house.
15 | The Blend smoker has a neighbor who drinks water.
16 | The question to be answered is: Who keeps fish?"""
17 | 


--------------------------------------------------------------------------------
/src/evals/snippets/einstein_puzzle_human_solution.txt:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from z3 import Solver, Function, IntSort, EnumSort, Int, And, Or, Xor, Const
 3 | 
 4 | # Define a function that takes a solver as input and returns a query constant as output
 5 | def solve_puzzle(S: Solver) -> Const:
 6 |     # Create a new variable. Defines a synonym for Int to make the code more readable
 7 |     v = 0
 8 |     def variable():
 9 |         nonlocal v
10 |         i = Int("v" + str(v))
11 |         v = v + 1
12 |         S.add(1 <= i, i <= 5)
13 |         return i
14 | 
15 |     # Assert a new fact. Defines a synonym for add to make the code more readable
16 |     def fact0(f):
17 |         S.add(f)
18 | 
19 |     # Assert a fact about a new variable
20 |     def fact1(f):
21 |         i = variable()
22 |         S.add(f(i))
23 | 
24 |     # Assert a fact about two variables
25 |     def fact2(f):
26 |         i = variable()
27 |         j = variable()
28 |         S.add(i != j)
29 |         S.add(f(i, j))
30 | 
31 |     # Assert two objects are next to each other
32 |     def neighbor(i, j):
33 |         return (Or(i == j+1, j == i+1))
34 | 
35 |     # Define the enumerated sorts
36 |     Color      , (Red     , Green   , White     , Yellow   , Blue)   = EnumSort('Color'      , ('Red'     , 'Green'   , 'White'     , 'Yellow'   , 'Blue'))
37 |     Nationality, (Briton  , Dane    , Swede     , Norwegian, German) = EnumSort('Nationality', ('Briton'  , 'Dane'    , 'Swede'     , 'Norwegian', 'German'))
38 |     Beverage   , (Tea     , Coffee  , Milk      , Beer     , Water)  = EnumSort('Beverage'   , ('Tea'     , 'Coffee'  , 'Milk'      , 'Beer'     , 'Water'))
39 |     Pet        , (Dog     , Horse   , Cat       , Bird     , Fish)   = EnumSort('Pet'        , ('Dog'     , 'Horse'   , 'Cat'       , 'Bird'     , 'Fish'))
40 |     Sport      , (Football, Baseball, Volleyball, Hockey   , Tennis) = EnumSort('Sport'      , ('Football', 'Baseball', 'Volleyball', 'Hockey'   , 'Tennis'))
41 | 
42 |     # Uninterpreted functions
43 |     c = Function('color',       IntSort(), Color)
44 |     n = Function('nationality', IntSort(), Nationality)
45 |     b = Function('beverage',    IntSort(), Beverage)
46 |     p = Function('pet',         IntSort(), Pet)
47 |     s = Function('sport',       IntSort(), Sport)
48 | 
49 |     fact1(lambda i   : And(n(i) == Briton,     c(i) == Red))                       # The Briton lives in the red house.
50 |     fact1(lambda i   : And(n(i) == Swede,      p(i) == Dog))                       # The Swede keeps dogs as pets.
51 |     fact1(lambda i   : And(n(i) == Dane,       b(i) == Tea))                       # The Dane drinks tea.
52 |     fact2(lambda i, j: And(c(i) == Green,      c(j) == White, i == j-1))           # The green house is left to the white house.
53 |     fact1(lambda i   : And(c(i) == Green,      b(i) == Coffee))                    # The owner of the green house drinks coffee.
54 |     fact1(lambda i   : And(s(i) == Football,   p(i) == Bird))                      # The person who plays football rears birds.
55 |     fact1(lambda i   : And(c(i) == Yellow,     s(i) == Baseball))                  # The owner of the yellow house plays baseball.
56 |     fact0(                 b(3) == Milk)                                           # The man living in the center house drinks milk.
57 |     fact0(                 n(1) == Norwegian)                                      # The Norwegian lives in the first house.
58 |     fact2(lambda i, j: And(s(i) == Volleyball, p(j) == Cat,      neighbor(i, j)))  # The man who plays volleyball lives next to the one who keeps cats.
59 |     fact2(lambda i, j: And(p(i) == Horse,      s(j) == Baseball, neighbor(i, j)))  # The man who keeps the horse lives next to the one who plays baseball.
60 |     fact1(lambda i   : And(s(i) == Tennis,     b(i) == Beer))                      # The owner who plays tennis drinks beer.
61 |     fact1(lambda i   : And(n(i) == German,     s(i) == Hockey))                    # The German plays hockey.
62 |     fact2(lambda i, j: And(n(i) == Norwegian,  c(j) == Blue,     neighbor(i, j)))  # The Norwegian lives next to the blue house.
63 |     fact2(lambda i, j: And(s(i) == Volleyball, b(j) == Water,    neighbor(i, j)))  # The man who plays volleyball has a neighbor who drinks water.
64 | 
65 |     # Determine who owns the fish
66 |     query = Const("query", Nationality)
67 |     fact1 (lambda i: And(n(i) == query, p(i) == Fish))
68 | 
69 |     return query
70 | ```
71 | 


--------------------------------------------------------------------------------
/src/evals/snippets/einstein_puzzle_logic_solution.txt:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from z3 import Solver, Function, IntSort, EnumSort, Int, And, Or, Xor, Const
 3 | 
 4 | # Define the problem statement as a function that takes a solver as input and returns a query constant as output
 5 | def problem_statement(S: Solver) -> Const:
 6 |     # Example for using the solver:
 7 |     # Porp, (A, B, C) = EnumSort('Prop', ('A', 'B', 'C')) # Define an enumerated sort
 8 |     # p = Function('prop_func', IntSort(), Prop)          # Define an uninterpreted function that takes an integer as input and returns a Prop as output
 9 |     # S.add(B == p(2))                                    # Assert a new fact
10 |     # S.add(And(p(1) == A, p(2) == B, p(3) == C))         # Assert a new fact
11 |     # ...                                                 # Define more facts
12 |     # query = Const("query", Prop)                        # Create a new constant
13 |     #
14 |     # TODO: Define the logic expressions here using the S variable as the solver.
15 |     query = None
16 |     # insert your code here
17 |     #### DEFINE THE LOGIC HELPER FUNCTIONS ####
18 | 
19 |     # Create a new variable. Defines a synonym for Int to make the code more readable
20 |     v = 0
21 |     def variable():
22 |         nonlocal v
23 |         i = Int("v" + str(v))
24 |         v = v + 1
25 |         S.add(1 <= i, i <= 5)
26 |         return i
27 | 
28 |     # Assert a new fact. Defines a synonym for add to make the code more readable
29 |     def fact0(f):
30 |         S.add(f)
31 | 
32 |     # Assert a fact about a new variable
33 |     def fact1(f):
34 |         i = variable()
35 |         S.add(f(i))
36 | 
37 |     # Assert a fact about two variables
38 |     def fact2(f):
39 |         i = variable()
40 |         j = variable()
41 |         S.add(i != j)
42 |         S.add(f(i, j))
43 | 
44 |     # Assert two objects are next to each other
45 |     def neighbor(i, j):
46 |         return (Or(i == j+1, j == i+1))
47 | 
48 |     #### DEFINE THE OBJECTS ####
49 | 
50 |     # Define the enumerated sorts
51 |     Color      , (Red     , Green   , White     , Yellow   , Blue)   = EnumSort('Color'      , ('Red'     , 'Green'   , 'White'     , 'Yellow'   , 'Blue'))
52 |     Nationality, (Briton  , Dane    , Swede     , Norwegian, German) = EnumSort('Nationality', ('Briton'  , 'Dane'    , 'Swede'     , 'Norwegian', 'German'))
53 |     Beverage   , (Tea     , Coffee  , Milk      , Beer     , Water)  = EnumSort('Beverage'   , ('Tea'     , 'Coffee'  , 'Milk'      , 'Beer'     , 'Water'))
54 |     Pet        , (Dog     , Horse   , Cat       , Bird     , Fish)   = EnumSort('Pet'        , ('Dog'     , 'Horse'   , 'Cat'       , 'Bird'     , 'Fish'))
55 |     Sport      , (Football, Baseball, Volleyball, Hockey   , Tennis) = EnumSort('Sport'      , ('Football', 'Baseball', 'Volleyball', 'Hockey'   , 'Tennis'))
56 | 
57 |     # Uninterpreted functions
58 |     c = Function('color',       IntSort(), Color)
59 |     n = Function('nationality', IntSort(), Nationality)
60 |     b = Function('beverage',    IntSort(), Beverage)
61 |     p = Function('pet',         IntSort(), Pet)
62 |     s = Function('sport',       IntSort(), Sport)
63 | 
64 |     #### DEFINE THE FACTS ####
65 | 
66 |     fact1(lambda i   : And(n(i) == Briton,     c(i) == Red))                       # The Briton lives in the red house.
67 |     fact1(lambda i   : And(n(i) == Swede,      p(i) == Dog))                       # The Swede keeps dogs as pets.
68 |     fact1(lambda i   : And(n(i) == Dane,       b(i) == Tea))                       # The Dane drinks tea.
69 |     fact2(lambda i, j: And(c(i) == Green,      c(j) == White, i == j-1))           # The green house is left to the white house.
70 |     fact1(lambda i   : And(c(i) == Green,      b(i) == Coffee))                    # The owner of the green house drinks coffee.
71 |     fact1(lambda i   : And(s(i) == Football,   p(i) == Bird))                      # The person who plays football rears birds.
72 |     fact1(lambda i   : And(c(i) == Yellow,     s(i) == Baseball))                  # The owner of the yellow house plays baseball.
73 |     fact0(                 b(3) == Milk)                                           # The man living in the center house drinks milk.
74 |     fact0(                 n(1) == Norwegian)                                      # The Norwegian lives in the first house.
75 |     fact2(lambda i, j: And(s(i) == Volleyball, p(j) == Cat,      neighbor(i, j)))  # The man who plays volleyball lives next to the one who keeps cats.
76 |     fact2(lambda i, j: And(p(i) == Horse,      s(j) == Baseball, neighbor(i, j)))  # The man who keeps the horse lives next to the one who plays baseball.
77 |     fact1(lambda i   : And(s(i) == Tennis,     b(i) == Beer))                      # The owner who plays tennis drinks beer.
78 |     fact1(lambda i   : And(n(i) == German,     s(i) == Hockey))                    # The German plays hockey.
79 |     fact2(lambda i, j: And(n(i) == Norwegian,  c(j) == Blue,     neighbor(i, j)))  # The Norwegian lives next to the blue house.
80 |     fact2(lambda i, j: And(s(i) == Volleyball, b(j) == Water,    neighbor(i, j)))  # The man who plays volleyball has a neighbor who drinks water.
81 | 
82 |     #### DEFINE THE QUESTIONS ####
83 | 
84 |     # Determine who owns the fish
85 |     query = Const("query", Nationality)
86 |     fact1 (lambda i: And(n(i) == query, p(i) == Fish))
87 | 
88 |     return query
89 | 
90 | # assign result to global output variable to make accessible for caller
91 | _value_obj_ = problem_statement
92 | ```


--------------------------------------------------------------------------------
/src/evals/snippets/formulations_dsl_rewriting.txt:
--------------------------------------------------------------------------------
 1 | // Query
 2 | IsBrotherOf(jay, john, bob) <- BrotherOf(jay, john) AND FatherOf(bob, jay) AND FatherOf(bob, john);
 3 | 
 4 | // Facts
 5 | BrotherOf(x, y) <- HAS(x, brother) AND HAS(y, brother) AND Sibling(x, y);
 6 | FatherOf(x, y) <- HAS(x, son) AND ParentOf(x, y);
 7 | ParentOf(x, y) <- IS(x, parent) AND IS(y, child);
 8 | Sibling(x, y) <- IS(x, father) AND IS(y, father) OR IS(x, mother) AND IS(y, mother);
 9 | 
10 | // Primitive Types
11 | son: "a male child in relation to his parents";
12 | father: "a male parent";
13 | mother: "a female parent";
14 | brother: "a male sibling";
15 | parent: "a person's father or mother";
16 | child: "a young human being below the legal age of majority associated to this person as a parent";
17 | 
18 | 
19 | 
20 | IsBrotherOf(x, y, z) <- BrotherOf(x, y) AND FatherOf(z, x) AND FatherOf(z, y);
21 | BrotherOf(x, y) <- Sibling(x, y) AND IS(x, brother) AND IS(y, brother);
22 | FatherOf(x, y) <- ParentOf(x, y) AND IS(y, son);
23 | Sibling(x, y) <- CommonParent(x, y);
24 | CommonParent(x, y) <- (IS(x, father) AND IS(y, father)) OR (IS(x, mother) AND IS(y, mother));
25 | ParentOf(x, y) <- IS(x, parent) AND IS(y, child);
26 | IS(x, brother) <- TRUE; // Implied by the use of 'x, brother' and 'y, brother'
27 | IS(y, brother) <- TRUE;
28 | IS(y, son) <- TRUE;
29 | father: "a male parent of a child";
30 | mother: "a female parent of a child";
31 | parent: "is acknowledged as being mother or father of someone";
32 | child: "is acknowledged as being son or daughter of someone";
33 | brother: "is acknowledged as being related as a male sibling";
34 | son: "is acknowledged as being related to someone as a male child";


--------------------------------------------------------------------------------
/src/evals/snippets/google_organic_results_20240111_query=What-is-sulfuric-acid.txt:
--------------------------------------------------------------------------------
1 | [{'position': 1, 'title': 'Sulfuric acid', 'link': 'https://en.wikipedia.org/wiki/Sulfuric_acid', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://en.wikipedia.org/wiki/Sulfuric_acid&ved=2ahUKEwi3oNHgm9aDAxV8GlkFHZ1VA1wQFnoECBQQAQ', 'displayed_link': 'https://en.wikipedia.org › wiki › Sulfuric_acid', 'favicon': 'https://serpapi.com/searches/65a0552e2ff421c4812075f9/images/8729f669d9758bd691b822819d4674d927fabbed09f86fa25fa46f432dd5d721.png', 'snippet': 'Sulfuric acid or sulphuric acid (Commonwealth spelling), known in antiquity as oil of vitriol, is a mineral acid composed of the elements sulfur, oxygen, ...', 'snippet_highlighted_words': ['a mineral acid composed of the elements sulfur, oxygen'], 'sitelinks': {'inline': [{'title': 'Wet sulfuric acid process', 'link': 'https://en.wikipedia.org/wiki/Wet_sulfuric_acid_process'}, {'title': 'Fluorosulfuric acid', 'link': 'https://en.wikipedia.org/wiki/Fluorosulfuric_acid'}, {'title': 'Sulfuric acid poisoning', 'link': 'https://en.wikipedia.org/wiki/Sulfuric_acid_poisoning'}, {'title': 'Mineral acid', 'link': 'https://en.wikipedia.org/wiki/Mineral_acid'}]}, 'source': 'Wikipedia'}, {'position': 2, 'title': 'Sulfuric acid | Structure, Formula, Uses, & Facts', 'link': 'https://www.britannica.com/science/sulfuric-acid', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.britannica.com/science/sulfuric-acid&ved=2ahUKEwi3oNHgm9aDAxV8GlkFHZ1VA1wQFnoECBUQAQ', 'displayed_link': 'https://www.britannica.com › Science › Chemistry', 'favicon': 'https://serpapi.com/searches/65a0552e2ff421c4812075f9/images/8729f669d9758bd691b822819d4674d95341c12cfc66453d0a20657dac475671.png', 'date': '7 days ago', 'snippet': 'Sulfuric acid is a very strong acid; in aqueous solutions it ionizes completely to form hydronium ions (H3O+) and hydrogen sulfate ions (HSO4−) ...', 'snippet_highlighted_words': ['a very strong acid'], 'source': 'Britannica'}, {'position': 3, 'title': 'Sulfuric Acid | H2SO4 | CID 1118', 'link': 'https://pubchem.ncbi.nlm.nih.gov/compound/Sulfuric-Acid', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://pubchem.ncbi.nlm.nih.gov/compound/Sulfuric-Acid&ved=2ahUKEwi3oNHgm9aDAxV8GlkFHZ1VA1wQFnoECBMQAQ', 'displayed_link': 'https://pubchem.ncbi.nlm.nih.gov › compound › Sulfur...', 'favicon': 'https://serpapi.com/searches/65a0552e2ff421c4812075f9/images/8729f669d9758bd691b822819d4674d99a1c7cdf708d6f389c73df944140c78d.png', 'snippet': 'Sulfuric Acid | H2SO4 or H2O4S | CID 1118 - structure, chemical names, physical and chemical properties, classification, patents, literature, ...', 'snippet_highlighted_words': ['H2SO4'], 'source': 'National Institutes of Health (.gov)'}, {'position': 4, 'title': 'Sulphuric Acid - an overview', 'link': 'https://www.sciencedirect.com/topics/earth-and-planetary-sciences/sulphuric-acid', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.sciencedirect.com/topics/earth-and-planetary-sciences/sulphuric-acid&ved=2ahUKEwi3oNHgm9aDAxV8GlkFHZ1VA1wQFnoECBAQAQ', 'displayed_link': 'https://www.sciencedirect.com › topics › sulphuric-acid', 'favicon': 'https://serpapi.com/searches/65a0552e2ff421c4812075f9/images/8729f669d9758bd691b822819d4674d94b8bb70b437347db073b8381aa2a8cca.png', 'snippet': 'Reactivity. Sulfuric acid is very reactive and dissolves most metals, it is a concentrated acid that oxidizes, dehydrates, or sulfonates most organic compounds ...', 'snippet_highlighted_words': ['a concentrated acid that oxidizes, dehydrates, or sulfonates most organic compounds'], 'source': 'ScienceDirect'}, {'position': 5, 'title': 'Sulfuric Acid | H2SO4 | CID 1118', 'link': 'https://pubchem.ncbi.nlm.nih.gov/compound/sulfuric%20acid', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://pubchem.ncbi.nlm.nih.gov/compound/sulfuric%2520acid&ved=2ahUKEwi3oNHgm9aDAxV8GlkFHZ1VA1wQFnoECDAQAQ', 'displayed_link': 'https://pubchem.ncbi.nlm.nih.gov › compound › sulfur...', 'favicon': 'https://serpapi.com/searches/65a0552e2ff421c4812075f9/images/8729f669d9758bd691b822819d4674d9f4d173d8fb056997aae9c0723fe28eed.png', 'snippet': 'Sulfuric acid is a colorless oily liquid. It is soluble in water with release of heat. It is corrosive to metals and tissue.', 'snippet_highlighted_words': ['a colorless oily liquid'], 'source': 'National Institutes of Health (.gov)'}, {'position': 6, 'title': 'Sulfuric Acid | NIOSH', 'link': 'https://www.cdc.gov/niosh/topics/sulfuric-acid/default.html', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.cdc.gov/niosh/topics/sulfuric-acid/default.html&ved=2ahUKEwi3oNHgm9aDAxV8GlkFHZ1VA1wQFnoECC8QAQ', 'displayed_link': 'https://www.cdc.gov › niosh › topics › sulfuric-acid', 'favicon': 'https://serpapi.com/searches/65a0552e2ff421c4812075f9/images/8729f669d9758bd691b822819d4674d9f49e1e7b67de9693dfb0e5130084b427.png', 'snippet': 'Sulfuric acid (H2S04) is a corrosive substance, destructive to the skin, eyes, teeth, and lungs. Severe exposure can result in death.', 'snippet_highlighted_words': ['a corrosive substance'], 'source': 'Centers for Disease Control and Prevention (.gov)'}, {'position': 7, 'title': 'Sulfuric acid', 'link': 'https://www.dcceew.gov.au/environment/protection/npi/substances/fact-sheets/sulfuric-acid', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.dcceew.gov.au/environment/protection/npi/substances/fact-sheets/sulfuric-acid&ved=2ahUKEwi3oNHgm9aDAxV8GlkFHZ1VA1wQFnoECDMQAQ', 'displayed_link': 'https://www.dcceew.gov.au › substances › fact-sheets', 'favicon': 'https://serpapi.com/searches/65a0552e2ff421c4812075f9/images/8729f669d9758bd691b822819d4674d907bc9f8f978456817da8c707e0652596.png', 'date': 'Aug 15, 2022', 'snippet': 'Description. Sulfuric acid is a corrosive chemical and can severely burn the skin and eyes. It may cause third degree burns and blindness on ...', 'snippet_highlighted_words': ['a corrosive chemical'], 'source': 'DCCEEW'}, {'position': 8, 'title': 'Right to Know Hazardous Substance Fact Sheet', 'link': 'https://nj.gov/health/eoh/rtkweb/documents/fs/1761.pdf', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://nj.gov/health/eoh/rtkweb/documents/fs/1761.pdf&ved=2ahUKEwi3oNHgm9aDAxV8GlkFHZ1VA1wQFnoECDIQAQ', 'displayed_link': 'https://nj.gov › health › eoh › rtkweb › documents', 'favicon': 'https://serpapi.com/searches/65a0552e2ff421c4812075f9/images/8729f669d9758bd691b822819d4674d99e695c38112563552cbd709000f50c83.png', 'snippet': 'Description and Use. Sulfuric Acid is a clear, colorless to brown, odorless liquid. It is used to make storage batteries, fertilizers, paper products, textiles ...', 'snippet_highlighted_words': ['a clear, colorless to brown, odorless liquid'], 'rich_snippet': {'top': {'detected_extensions': {'pages': 6}, 'extensions': ['6 pages']}}, 'source': 'New Jersey (.gov)'}, {'position': 9, 'title': 'Top 5 Uses of Sulfuric Acid', 'link': 'https://labproinc.com/blogs/chemicals-and-solvents/top-5-uses-of-sulfuric-acid', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://labproinc.com/blogs/chemicals-and-solvents/top-5-uses-of-sulfuric-acid&ved=2ahUKEwi3oNHgm9aDAxV8GlkFHZ1VA1wQFnoECC4QAQ', 'displayed_link': 'https://labproinc.com › blogs › chemicals-and-solvents', 'favicon': 'https://serpapi.com/searches/65a0552e2ff421c4812075f9/images/8729f669d9758bd691b822819d4674d940a214d590736403a4d1cfba3016efb7.png', 'date': 'Mar 22, 2021', 'snippet': 'In its purest form, sulfuric acid is a highly corrosive liquid with a pH of 0.5 and is miscible with water at all concentrations.', 'snippet_highlighted_words': ['a highly corrosive liquid with a pH of 0.5'], 'source': 'Lab Pro'}, {'position': 10, 'title': 'Sulfuric Acid - an overview', 'link': 'https://www.sciencedirect.com/topics/medicine-and-dentistry/sulfuric-acid', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.sciencedirect.com/topics/medicine-and-dentistry/sulfuric-acid&ved=2ahUKEwi3oNHgm9aDAxV8GlkFHZ1VA1wQFnoECDEQAQ', 'displayed_link': 'https://www.sciencedirect.com › medicine-and-dentistry', 'favicon': 'https://serpapi.com/searches/65a0552e2ff421c4812075f9/images/8729f669d9758bd691b822819d4674d9038e5a368ee90161e373d82e3632150e.png', 'snippet': 'It is a colorless, odorless and viscous liquid that was historically called oil of vitriol. Sulfuric acid is a key chemical used in the chemical industry and ...', 'snippet_highlighted_words': ['a colorless, odorless and viscous liquid that was historically called oil of vitriol'], 'source': 'ScienceDirect'}]


--------------------------------------------------------------------------------
/src/evals/snippets/google_organic_results_20240121_query=Search-for-U-235.txt:
--------------------------------------------------------------------------------
1 | [{'position': 1, 'title': 'Uranium-235', 'link': 'https://en.wikipedia.org/wiki/Uranium-235', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://en.wikipedia.org/wiki/Uranium-235&ved=2ahUKEwj1pr6C4e6DAxXJAHkGHe8_AogQFnoECBQQAQ', 'displayed_link': 'https://en.wikipedia.org › wiki › Uranium-235', 'favicon': 'https://serpapi.com/searches/65ad330ed737d7e2206f7351/images/514998e6e426c435a40bad2cb37f1823b7ea21d3488a8ed29ff0ab161b2593ac.png', 'snippet': 'Uranium-235 is an isotope of uranium making up about 0.72% of natural uranium. Unlike the predominant isotope uranium-238, it is fissile, i.e., ...', 'snippet_highlighted_words': ['Uranium-235'], 'source': 'Wikipedia'}, {'position': 2, 'title': 'Radioisotope Brief: Uranium-235 (U- ...', 'link': 'https://www.cdc.gov/nceh/radiation/emergencies/isotopes/uranium.htm', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.cdc.gov/nceh/radiation/emergencies/isotopes/uranium.htm&ved=2ahUKEwj1pr6C4e6DAxXJAHkGHe8_AogQFnoECBMQAQ', 'displayed_link': 'https://www.cdc.gov › nceh › emergencies › isotopes', 'favicon': 'https://serpapi.com/searches/65ad330ed737d7e2206f7351/images/514998e6e426c435a40bad2cb37f182360a77dc35fe60608cee5f4312528b141.png', 'snippet': 'What does it look like? When it has been refined and enriched, uranium is a silvery-white metal. How can it hurt me? Because uranium decays by alpha ...', 'snippet_highlighted_words': ['look', 'uranium', 'uranium'], 'source': 'Centers for Disease Control and Prevention (.gov)'}, {'position': 3, 'title': 'Uranium-235 (U-235) | Definition, Uses, Half-Life, & Facts', 'link': 'https://www.britannica.com/science/uranium-235', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.britannica.com/science/uranium-235&ved=2ahUKEwj1pr6C4e6DAxXJAHkGHe8_AogQFnoECBYQAQ', 'displayed_link': 'https://www.britannica.com › Science › Chemistry', 'favicon': 'https://serpapi.com/searches/65ad330ed737d7e2206f7351/images/514998e6e426c435a40bad2cb37f1823193f49a69cdd45c49a4dadef0f825fc9.png', 'date': 'Nov 24, 2023', 'snippet': 'Uranium-235 (U-235), radioactive isotope of the element uranium with a nucleus containing 92 protons and 143 neutrons. Uranium-235 is the ...', 'snippet_highlighted_words': ['Uranium-235', 'U', '235', 'Uranium-235'], 'source': 'Britannica'}, {'position': 4, 'title': 'Uranium 235 - an overview', 'link': 'https://www.sciencedirect.com/topics/earth-and-planetary-sciences/uranium-235', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.sciencedirect.com/topics/earth-and-planetary-sciences/uranium-235&ved=2ahUKEwj1pr6C4e6DAxXJAHkGHe8_AogQFnoECBUQAQ', 'displayed_link': 'https://www.sciencedirect.com › topics › uranium-235', 'favicon': 'https://serpapi.com/searches/65ad330ed737d7e2206f7351/images/514998e6e426c435a40bad2cb37f1823630b126f3685268958d8429f7ee8634d.png', 'snippet': 'Uranium-235 is the only naturally occurring fissile nuclide.13 Fissile nuclides undergo thermal fission stimulated by neutron capture.', 'snippet_highlighted_words': ['Uranium-235'], 'source': 'ScienceDirect'}, {'position': 5, 'title': 'Is it difficult to find Uranium-235?', 'link': 'https://www.quora.com/Is-it-difficult-to-find-Uranium-235-1', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.quora.com/Is-it-difficult-to-find-Uranium-235-1&ved=2ahUKEwj1pr6C4e6DAxXJAHkGHe8_AogQFnoECBkQAQ', 'displayed_link': 'https://www.quora.com › Is-it-difficult-to-find-Uranium-...', 'favicon': 'https://serpapi.com/searches/65ad330ed737d7e2206f7351/images/514998e6e426c435a40bad2cb37f1823929a16e298f77d74f1d022e62e25b8fe.png', 'date': 'Jan 17, 2020', 'snippet': 'No. Uranium is not a very rare metal, and all natural uranium consists of the same mixture of U238 and a little U235. The mixture is comp!early ...', 'snippet_highlighted_words': ['U235'], 'sitelinks': {'list': [{'title': 'Where can I find serious buyers for Uranium ore ...', 'link': 'https://www.quora.com/Where-can-I-find-serious-buyers-for-Uranium-ore-235-and-238', 'answer_count': 6, 'date': 'Nov 2, 2018'}, {'title': 'How do we find the exact atomic mass of Uranium 235 ...', 'link': 'https://www.quora.com/How-do-we-find-the-exact-atomic-mass-of-Uranium-235-I-tried-to-find-it-by-adding-protons-and-neutronss-masses-But-It-came-out-about-236-0526-But-the-web-says-235-0439', 'answer_count': 5, 'date': 'Jun 28, 2022'}, {'title': "Where can Uranium-235 be found? What's the price ...", 'link': 'https://www.quora.com/Where-can-Uranium-235-be-found-Whats-the-price', 'answer_count': 8, 'date': 'Aug 30, 2019'}, {'title': 'How many e-, p+, and no does uranium-235 have ...', 'link': 'https://www.quora.com/How-many-e-p+-and-no-does-uranium-235-have', 'answer_count': 2, 'date': 'Sep 25, 2017'}]}, 'source': 'Quora'}, {'position': 6, 'title': 'Uranium: What It It?', 'link': 'https://geoinfo.nmt.edu/resources/uranium/what.html', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://geoinfo.nmt.edu/resources/uranium/what.html&ved=2ahUKEwj1pr6C4e6DAxXJAHkGHe8_AogQFnoECDAQAQ', 'displayed_link': 'https://geoinfo.nmt.edu › resources › uranium › what', 'favicon': 'https://serpapi.com/searches/65ad330ed737d7e2206f7351/images/514998e6e426c435a40bad2cb37f1823a2e4d1f6f0f98f58a16607ac18446a17.png', 'snippet': 'Uranium has a well-established radioactive decay series. U-238 and U-235 (which has 143 neutrons) are the most common isotopes of uranium. Uranium naturally ...', 'snippet_highlighted_words': ['U', '235'], 'source': 'New Mexico Bureau of Geology & Mineral Resources'}, {'position': 7, 'title': 'The Fission Process | MIT Nuclear Reactor Laboratory', 'link': 'https://nrl.mit.edu/reactor/fission-process', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://nrl.mit.edu/reactor/fission-process&ved=2ahUKEwj1pr6C4e6DAxXJAHkGHe8_AogQFnoECC4QAQ', 'displayed_link': 'https://nrl.mit.edu › reactor › fission-process', 'favicon': 'https://serpapi.com/searches/65ad330ed737d7e2206f7351/images/514998e6e426c435a40bad2cb37f1823c88cb98f600d5c67666b76ab5d51f2c2.png', 'snippet': 'When a U-235 nucleus absorbs an extra neutron, it quickly breaks into two parts. This process is known as fission (see diagram below). Each time a U-235 nucleus ...', 'snippet_highlighted_words': ['U', '235', 'U', '235'], 'source': 'MIT Nuclear Reactor Laboratory'}, {'position': 8, 'title': 'Nuclear Fuel Facts: Uranium', 'link': 'https://www.energy.gov/ne/nuclear-fuel-facts-uranium', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://www.energy.gov/ne/nuclear-fuel-facts-uranium&ved=2ahUKEwj1pr6C4e6DAxXJAHkGHe8_AogQFnoECC8QAQ', 'displayed_link': 'https://www.energy.gov › nuclear-fuel-facts-uranium', 'favicon': 'https://serpapi.com/searches/65ad330ed737d7e2206f7351/images/514998e6e426c435a40bad2cb37f18232dacbea4e480a578e90551cfd58fef3f.png', 'snippet': 'Enter the terms you wish to search for. Search. National Labs · Energy.gov Offices ... Depleted Uranium – contains a 235U concentration of 0.711 percent or less.', 'snippet_highlighted_words': ['you', 'search', 'Search', 'Uranium', '235U'], 'source': 'Department of Energy (.gov)'}, {'position': 9, 'title': 'Uranium-235 | U | CID 61784', 'link': 'https://pubchem.ncbi.nlm.nih.gov/compound/Uranium-235', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://pubchem.ncbi.nlm.nih.gov/compound/Uranium-235&ved=2ahUKEwj1pr6C4e6DAxXJAHkGHe8_AogQFnoECDIQAQ', 'displayed_link': 'https://pubchem.ncbi.nlm.nih.gov › compound › Uraniu...', 'favicon': 'https://serpapi.com/searches/65ad330ed737d7e2206f7351/images/514998e6e426c435a40bad2cb37f1823ef923acf422eead4cc3807dbae873f1c.png', 'snippet': 'Uranium-235 is an isotope of uranium. Uranium is a chemical element that has the symbol U and atomic number 92. It is a normal part of rocks, soil, air, ...', 'snippet_highlighted_words': ['Uranium-235'], 'source': 'National Institutes of Health (.gov)'}, {'position': 10, 'title': 'What is Uranium? How Does it Work', 'link': 'https://world-nuclear.org/information-library/nuclear-fuel-cycle/introduction/what-is-uranium-how-does-it-work.aspx', 'redirect_link': 'https://www.google.com/url?sa=t&source=web&rct=j&opi=89978449&url=https://world-nuclear.org/information-library/nuclear-fuel-cycle/introduction/what-is-uranium-how-does-it-work.aspx&ved=2ahUKEwj1pr6C4e6DAxXJAHkGHe8_AogQFnoECDkQAQ', 'displayed_link': 'https://world-nuclear.org › introduction › what-is-uran...', 'favicon': 'https://serpapi.com/searches/65ad330ed737d7e2206f7351/images/514998e6e426c435a40bad2cb37f1823b1816644af2dcf7af0d3681a34c2baa4.png', 'snippet': 'Uranium is a heavy metal which has been used as an abundant source of concentrated energy for over 60 years. Uranium occurs in most rocks in concentrations of 2 ...', 'snippet_highlighted_words': ['Uranium', 'Uranium'], 'source': 'World Nuclear Association'}]


--------------------------------------------------------------------------------
/src/evals/snippets/jays_brother_human_solution.txt:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from z3 import Solver, Function, EnumSort, IntSort, Const, And, sat
 3 | 
 4 | # Define the enumerated sort for family members
 5 | FamilyMember, (Bob, John, Jay) = EnumSort('FamilyMember', ('Bob', 'John', 'Jay'))
 6 | 
 7 | # Uninterpreted functions for family relationships
 8 | father_of = Function('father_of', FamilyMember, FamilyMember)
 9 | brother_of = Function('brother_of', FamilyMember, FamilyMember)
10 | 
11 | S = Solver()
12 | 
13 | # Facts based on the problem statement
14 | S.add(father_of(John) == Bob)
15 | S.add(father_of(Jay) == Bob)
16 | S.add(brother_of(Jay) == John)
17 | 
18 | # Query to determine Jay's brother
19 | query = Const("query", FamilyMember)
20 | S.add(query == brother_of(Jay))
21 | 
22 | r = S.check()
23 | if r == sat:
24 |     m = S.model()
25 |     answer = m[query]
26 | else:
27 |     answer = "Failed"
28 | ```


--------------------------------------------------------------------------------
/src/evals/snippets/jays_brother_trajectories.txt:
--------------------------------------------------------------------------------
 1 | ```python
 2 | from z3 import Int, Solver, And, Const, IntSort, EnumSort, Function
 3 | 
 4 | def solve_puzzle(S: Solver) -> Const:
 5 |     # Define the enumeration sort for the individuals
 6 |     Person, (BobE, JohnE, JayE, JaysBrotherE, FatherE) = EnumSort('Person', ['Bob', 'John', 'Jay', 'JaysBrother', 'Father'])
 7 | 
 8 |     # Define a function from integers to persons
 9 |     identity = Function('identity', IntSort(), Person)
10 | 
11 |     # Define integer variables to represent the identities of the individuals
12 |     Bob, John, Jay, JaysBrother, Father = Int('Bob'), Int('John'), Int('Jay'), Int('JaysBrother'), Int('Father')
13 | 
14 |     # Add puzzle constraints
15 |     S.add(identity(1) == BobE, identity(2) == JohnE, identity(3) == JayE)
16 |     S.add(Bob == 1, Father == 1, John == 2, Jay == 3, JaysBrother == John)
17 |     S.add(And(Father == Bob, John != Jay, Jay != Father))
18 |     S.add(And(JaysBrother == John, Jay != Father, Father == Bob))
19 | 
20 |     # Create a new constant of Person sort and equate it to JaysBrother enumeration
21 |     query = Const("query", Person)
22 |     S.add(query == identity(JaysBrother))
23 | 
24 |     return query
25 | ```
26 | 
27 | 
28 | ```python
29 | from z3 import Solver, Bool, And, Not, Const, BoolSort, EnumSort, Function, IntSort
30 | 
31 | def solve_puzzle(S: Solver) -> Const:
32 |     # Define the enumeration sort for the individuals
33 |     Person, (BobE, JohnE, JayE, JaysBrotherE, FatherE) = EnumSort('Person', ['Bob', 'John', 'Jay', 'JaysBrother', 'Father'])
34 | 
35 |     # Define a function from boolean to persons (for brother status)
36 |     is_brother = Function('is_brother', Person, BoolSort())
37 | 
38 |     # Define the relationships
39 |     S.add(is_brother(JohnE) == True)  # John is a brother
40 |     S.add(is_brother(JayE) == True)   # Jay is a brother
41 | 
42 |     # Bob has two sons, John and Jay
43 |     S.add(And(is_brother(JohnE), is_brother(JayE)))
44 | 
45 |     # Jay has one brother, and John must be the brother
46 |     S.add(is_brother(JayE) == is_brother(JohnE))
47 | 
48 |     # Jay's brother (John) has a brother (Jay) and a father
49 |     S.add(And(is_brother(JaysBrotherE), JaysBrotherE != FatherE))
50 | 
51 |     # Create a new constant of Person sort to represent JaysBrother
52 |     query = Const("query", Person)
53 |     S.add(query == JohnE)  # John is Jay's brother
54 | 
55 |     return query
56 | ```
57 | 
58 | 
59 | ```python
60 | from z3 import Solver, Const, EnumSort, Function, IntSort
61 | 
62 | def solve_puzzle(S: Solver) -> Const:
63 |     # Define the enumeration sort for the individuals
64 |     Person, (BobE, JohnE, JayE, JaysBrotherE, FatherE) = EnumSort('Person', ['Bob', 'John', 'Jay', 'JaysBrother', 'Father'])
65 | 
66 |     # Define a function from persons to the count of their sons
67 |     sons_count = Function('sons_count', Person, IntSort())
68 | 
69 |     # Add constraints based on the problem statement
70 |     # Bob has two sons
71 |     S.add(sons_count(BobE) == 2)
72 | 
73 |     # Jay has one brother and a father
74 |     # Jay_sons represents Jay + Jay's brother
75 |     S.add(sons_count(JayE) + sons_count(JaysBrotherE) == 2)
76 | 
77 |     # Ensure all the previous constraints conclude that John is Jay's brother
78 |     S.add(sons_count(JayE) + sons_count(JohnE) == 2)
79 |     S.add(sons_count(BobE) == sons_count(JayE) + sons_count(JohnE))
80 | 
81 |     # Create a new constant of Person sort to represent JaysBrother
82 |     query = Const("query", Person)
83 |     S.add(query == JohnE)  # Assuming John is Jay's brother
84 | 
85 |     return query
86 | ```
87 | 


--------------------------------------------------------------------------------
/src/evals/snippets/latex_templating_output.txt:
--------------------------------------------------------------------------------
 1 | \begin{figure*}[ht]
 2 |   \centering
 3 |   \begin{minipage}{.57\textwidth}
 4 |     \centering
 5 |     \begin{tabular}{lcccccc}
 6 |       \toprule
 7 |       \textbf{Benchmarks} &  GPT-3.5 Turbo & GPT-4 Turbo & Gemini-Pro & LlaMA 2 13B & Mistral 7B & Zephyr 7B \\
 8 |       \midrule
 9 |       Associations & 0.80 & \textbf{0.95} & 0.90 & 0.60 & 0.67 & 0.70 \\
10 |       Modality & 0.60 & 0.85 & \textbf{0.89} & 0.45 & 0.50 & 0.60 \\
11 |       Code & 0.70 & \textbf{0.80} & 0.78 & 0.40 & 0.45 & 0.50 \\
12 |       Components & 0.67 & \textbf{0.85} & 0.75 & 0.30 & 0.40 & 0.43 \\
13 |       Graphs & 0.70 & \textbf{0.90} & 0.70 & 0.45 & 0.30 & 0.36 \\
14 |       \midrule
15 |       \textbf{Total} & Total0.69 & \textbf{0.87} & 0.80 & 0.44 & 0.46 & 0.52 \\
16 |       \bottomrule
17 |     \end{tabular}
18 |     % You could also add a subcaption specific to the table here if needed (use the subcaption package)
19 |     % \subcaption{Performance benchmark results.}
20 |     \label{tab:benchmark_results}
21 |   \end{minipage}%
22 |   ~
23 |   \begin{minipage}{.43\textwidth}
24 |     \centering
25 |     \includegraphics[width=\linewidth]{images/benchmark_comparison_chart.pdf}
26 |     % You could also add a subcaption specific to the figure here if needed (use the subcaption package)
27 |     % \subcaption{Benchmark comparison chart.}
28 |     \label{fig:spider_plot}
29 |   \end{minipage}
30 |   \caption{Placeholder for performance benchmarks and comparison chart for various models.}
31 |   \label{fig:my_label} % General label for the whole figure (both image and table)
32 | \end{figure*}


--------------------------------------------------------------------------------
/src/evals/snippets/latex_templating_problem.txt:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | import seaborn as sns
  6 | 
  7 | from src.func import run
  8 | 
  9 | 
 10 | BENCHMARK_NAME_MAPPING = {
 11 |     'eval_in_context_associations': 'Associations',
 12 |     'eval_multimodal_bindings': 'Modality',
 13 |     'eval_program_synthesis': 'Code',
 14 |     'eval_logic_components': 'Logic',
 15 |     'eval_computation_graphs': 'Graphs'
 16 | }
 17 | 
 18 | 
 19 | MODEL_NAME_MAPPING = {
 20 |     'gpt4': 'GPT-4 Turbo',
 21 |     'gpt3.5': 'GPT-3.5 Turbo',
 22 |     'gemini': 'Gemini-Pro',
 23 |     'llama': 'LlaMA 2 13B',
 24 |     'mistral': 'Mistral 7B',
 25 |     'zephyr': 'Zephyr 7B'
 26 | }
 27 | 
 28 | 
 29 | DUMMY_DATA = {
 30 |     f"{MODEL_NAME_MAPPING['gpt3.5']}": {
 31 |         f"{BENCHMARK_NAME_MAPPING['eval_in_context_associations']}": {"performance": 0.8},
 32 |         f"{BENCHMARK_NAME_MAPPING['eval_multimodal_bindings']}": {"performance": 0.6},
 33 |         f"{BENCHMARK_NAME_MAPPING['eval_program_synthesis']}": {"performance": 0.7},
 34 |         f"{BENCHMARK_NAME_MAPPING['eval_logic_components']}": {"performance": 0.67},
 35 |         f"{BENCHMARK_NAME_MAPPING['eval_computation_graphs']}": {"performance": 0.7}
 36 |     },
 37 |     f"{MODEL_NAME_MAPPING['gpt4']}": {
 38 |         f"{BENCHMARK_NAME_MAPPING['eval_in_context_associations']}": {"performance": 0.95},
 39 |         f"{BENCHMARK_NAME_MAPPING['eval_multimodal_bindings']}": {"performance": 0.85},
 40 |         f"{BENCHMARK_NAME_MAPPING['eval_program_synthesis']}": {"performance": 0.8},
 41 |         f"{BENCHMARK_NAME_MAPPING['eval_logic_components']}": {"performance": 0.85},
 42 |         f"{BENCHMARK_NAME_MAPPING['eval_computation_graphs']}": {"performance": 0.9}
 43 |     },
 44 |     f"{MODEL_NAME_MAPPING['gemini']}": {
 45 |         f"{BENCHMARK_NAME_MAPPING['eval_in_context_associations']}": {"performance": 0.9},
 46 |         f"{BENCHMARK_NAME_MAPPING['eval_multimodal_bindings']}": {"performance": 0.89},
 47 |         f"{BENCHMARK_NAME_MAPPING['eval_program_synthesis']}": {"performance": 0.78},
 48 |         f"{BENCHMARK_NAME_MAPPING['eval_logic_components']}": {"performance": 0.75},
 49 |         f"{BENCHMARK_NAME_MAPPING['eval_computation_graphs']}": {"performance": 0.7}
 50 |     },
 51 |     f"{MODEL_NAME_MAPPING['llama']}": {
 52 |         f"{BENCHMARK_NAME_MAPPING['eval_in_context_associations']}": {"performance": 0.6},
 53 |         f"{BENCHMARK_NAME_MAPPING['eval_multimodal_bindings']}": {"performance": 0.45},
 54 |         f"{BENCHMARK_NAME_MAPPING['eval_program_synthesis']}": {"performance": 0.4},
 55 |         f"{BENCHMARK_NAME_MAPPING['eval_logic_components']}": {"performance": 0.3},
 56 |         f"{BENCHMARK_NAME_MAPPING['eval_computation_graphs']}": {"performance": 0.45}
 57 |     },
 58 |     f"{MODEL_NAME_MAPPING['mistral']}": {
 59 |         f"{BENCHMARK_NAME_MAPPING['eval_in_context_associations']}": {"performance": 0.67},
 60 |         f"{BENCHMARK_NAME_MAPPING['eval_multimodal_bindings']}": {"performance": 0.5},
 61 |         f"{BENCHMARK_NAME_MAPPING['eval_program_synthesis']}": {"performance": 0.45},
 62 |         f"{BENCHMARK_NAME_MAPPING['eval_logic_components']}": {"performance": 0.4},
 63 |         f"{BENCHMARK_NAME_MAPPING['eval_computation_graphs']}": {"performance": 0.3}
 64 |     },
 65 |     f"{MODEL_NAME_MAPPING['zephyr']}": {
 66 |         f"{BENCHMARK_NAME_MAPPING['eval_in_context_associations']}": {"performance": 0.7},
 67 |         f"{BENCHMARK_NAME_MAPPING['eval_multimodal_bindings']}": {"performance": 0.6},
 68 |         f"{BENCHMARK_NAME_MAPPING['eval_program_synthesis']}": {"performance": 0.5},
 69 |         f"{BENCHMARK_NAME_MAPPING['eval_logic_components']}": {"performance": 0.43},
 70 |         f"{BENCHMARK_NAME_MAPPING['eval_computation_graphs']}": {"performance": 0.36}
 71 |     }
 72 | }
 73 | 
 74 | 
 75 | LATEX_TEMPLATE = """
 76 | \\begin{{figure*}}[ht]
 77 |   \\centering
 78 |   \\begin{{minipage}}{{.57\\textwidth}}
 79 |     \\centering
 80 |     \\begin{{tabular}}{{lcccccc}}
 81 |       \\toprule
 82 |       \\textbf{{Benchmarks}} &  {model_names} \\\\
 83 |       \\midrule
 84 |       {benchmark_in_context_association_row} \\\\
 85 |       {benchmark_multimodality_row} \\\\
 86 |       {benchmark_program_synthesis_row} \\\\
 87 |       {benchmark_components_row} \\\\
 88 |       {benchmark_computation_graphs_row} \\\\
 89 |       \\midrule
 90 |       \\textbf{{Total}} & {total_row} \\\\
 91 |       \\bottomrule
 92 |     \\end{{tabular}}
 93 |     % You could also add a subcaption specific to the table here if needed (use the subcaption package)
 94 |     % \\subcaption{{Performance benchmark results.}}
 95 |     \\label{{tab:benchmark_results}}
 96 |   \\end{{minipage}}%
 97 |   ~
 98 |   \\begin{{minipage}}{{.43\\textwidth}}
 99 |     \\centering
100 |     \\includegraphics[width=\\linewidth]{{images/benchmark_comparison_chart.pdf}}
101 |     % You could also add a subcaption specific to the figure here if needed (use the subcaption package)
102 |     % \\subcaption{{Benchmark comparison chart.}}
103 |     \\label{{fig:spider_plot}}
104 |   \\end{{minipage}}
105 |   \\caption{{Placeholder for performance benchmarks and comparison chart for various models.}}
106 |   \\label{{fig:my_label}} % General label for the whole figure (both image and table)
107 | \\end{{figure*}}
108 | """
109 | 
110 | 
111 | #def create_latex_result(data):
112 | #    latex_table = ''
113 | #    # TODO: Write this function to create a latex table from the data
114 | #
115 | #    return latex_table
116 | {TODO}
117 | 
118 | 
119 | # assign result to global output variable
120 | _value_obj_ = create_latex_result(DUMMY_DATA)
121 | 


--------------------------------------------------------------------------------
/src/evals/snippets/latex_templating_solution_1.txt:
--------------------------------------------------------------------------------
 1 | ```python
 2 | def create_latex_result(data):
 3 |     """
 4 |     This function creates a latex table from the data dictionary.
 5 | 
 6 |     :param data: A dictionary with the results of the benchmarks
 7 |     :return: A latex table as a string
 8 |     """
 9 |     # Gather the model names
10 |     model_names = " & ".join(key for key in data.keys())
11 | 
12 |     # Initialize the total scores
13 |     total_scores = {model: 0.0 for model in data.keys()}
14 | 
15 |     # Prepare table content
16 |     benchmark_rows = {bench_name: "" for bench_name in BENCHMARK_NAME_MAPPING.values()}
17 |     for bench_name in BENCHMARK_NAME_MAPPING.values():
18 |         if bench_name not in str(list(data.values())):
19 |             print(f"Skipping benchmark because not all results are computed. Did not find `{bench_name}` in `{data.keys()}`")
20 |             return
21 |         # Initialize list to keep the scores for this benchmark to find the best model
22 |         scores = [(model, values[bench_name]['performance']) for model, values in data.items()]
23 |         best_score = max(scores, key=lambda x: x[1])[1]
24 | 
25 |         # Create row for the latex table and update the total scores
26 |         row = f"{bench_name}"
27 |         for model, score in scores:
28 |             # Add to the total score
29 |             total_scores[model] += score
30 |             # Format row with best model in bold
31 |             if score == best_score:
32 |                 row += f" & \\textbf{{{score:.2f}}}"
33 |             else:
34 |                 row += f" & {score:.2f}"
35 |         benchmark_rows[bench_name] = row
36 | 
37 |     # Compute the average of total scores
38 |     for model in total_scores.keys():
39 |         total_scores[model] /= len(BENCHMARK_NAME_MAPPING)
40 | 
41 |     # Best total performance in bold
42 |     best_total = max(total_scores.values())
43 |     total_values = " & ".join(f"\\textbf{{{v:.2f}}}" if v == best_total else f"{v:.2f}" for v in total_scores.values())
44 | 
45 |     # Use the LATEX_TEMPLATE and inject the benchmark rows
46 |     latex_table = LATEX_TEMPLATE.format(
47 |         model_names=model_names,
48 |         benchmark_in_context_association_row=benchmark_rows[BENCHMARK_NAME_MAPPING['eval_in_context_associations']],
49 |         benchmark_multimodality_row=benchmark_rows[BENCHMARK_NAME_MAPPING['eval_multimodal_bindings']],
50 |         benchmark_program_synthesis_row=benchmark_rows[BENCHMARK_NAME_MAPPING['eval_program_synthesis']],
51 |         benchmark_components_row=benchmark_rows[BENCHMARK_NAME_MAPPING['eval_logic_components']],
52 |         benchmark_computation_graphs_row=benchmark_rows[BENCHMARK_NAME_MAPPING['eval_computation_graphs']],
53 |         total_row='Total' + total_values
54 |     )
55 | 
56 |     # Print the latex table to the console
57 |     return latex_table
58 | ```


--------------------------------------------------------------------------------
/src/evals/snippets/latex_templating_solution_2.txt:
--------------------------------------------------------------------------------
 1 | ```python
 2 | def create_latex_result(data):
 3 |     modelnames = " & ".join(key for key in data.keys())
 4 |     totalscores = {model: 0.0 for model in data.keys()}
 5 |     benchmarkrows = {bench_name: "" for bench_name in BENCHMARK_NAME_MAPPING.values()}
 6 |     for bench_name in BENCHMARK_NAME_MAPPING.values():
 7 |         scores = [(model, values[bench_name]['performance']) for model, values in data.items()]
 8 |         best_score = max(scores, key=lambda x: x[1])[1]
 9 |         row = f"{bench_name}"
10 |         for model, score in scores:
11 |             if score == best_score:
12 |                 benchmarkrows[bench_name] = row + f" & \\textbf{{{score:.2f}}}"
13 |             else:
14 |                 benchmarkrows[bench_name] = row + f" & {score:.2f}"
15 |             totalscores[model] += score
16 |     for model in totalscores.keys():
17 |         totalscores[model] /= len(BENCHMARK_NAME_MAPPING)
18 |     best_total = max(totalscores.values())
19 |     total_values = " & ".join(f"\\textbf{{{v:.2f}}}" if v == best_total else f"{v:.2f}" for v in totalscores.values())
20 |     return LATEX_TEMPLATE.format(
21 |         model_names=modelnames,
22 |         benchmark_in_context_association_row=benchmarkrows[BENCHMARK_NAME_MAPPING['eval_in_context_associations']],
23 |         benchmark_multimodality_row=benchmarkrows[BENCHMARK_NAME_MAPPING['eval_multimodal_bindings']],
24 |         benchmark_program_synthesis_row=benchmarkrows[BENCHMARK_NAME_MAPPING['eval_program_synthesis']],
25 |         benchmark_components_row=benchmarkrows[BENCHMARK_NAME_MAPPING['eval_logic_components']],
26 |         benchmark_computation_graphs_row=benchmarkrows[BENCHMARK_NAME_MAPPING['eval_computation_graphs']],
27 |         total_row='Total' + total_values
28 |     )
29 | ```


--------------------------------------------------------------------------------
/src/evals/snippets/paper/bib/related_work/laird87.txt:
--------------------------------------------------------------------------------
 1 | The ultimate goal of work in cognitive architecture is to provide the foundation for a system capable of general intelligent behavior. That is, the goal is to provide the underlying structure that would enable a system to perform the full range of cognitive tasks, employ the full range of problem solving methods and representations appropriate for the tasks, and learn about all aspects of the tasks and its performance on them. In this article we present SOAR, an implemented proposal for such an architecture. We describe its organizational principles, the system as currently implemented, and demonstrations of its capabilities.
 2 | 
 3 | @article{Laird:87,
 4 |     title = {SOAR: An architecture for general intelligence},
 5 |     journal = {Artificial Intelligence},
 6 |     volume = {33},
 7 |     number = {1},
 8 |     pages = {1-64},
 9 |     year = {1987},
10 |     issn = {0004-3702},
11 |     author = {J. E. Laird and A. Newell and P. S. Rosenbloom}
12 | }


--------------------------------------------------------------------------------
/src/evals/snippets/paper/bib/related_work/mccarthy06.txt:
--------------------------------------------------------------------------------
 1 | We propose that a 2 month, 10 man study of artificial intelligence be carried out during the summer of 1956 at Dartmouth College in Hanover, New Hampshire. The study is to proceed on the basis of the conjecture that every aspect of learning or any other feature of intelligence can in principle be so precisely described that a machine can be made to simulate it. An attempt will be made to find how to make machines use language, form abstractions and concepts, solve kinds of problems now reserved for humans, and improve themselves. We think that a significant advance can be made in one or more of these problems if a carefully selected group of scientists work on it together for a summer.
 2 | The following are some aspects of the artificial intelligence problem:
 3 | 1 Automatic Computers
 4 | If a machine can do a job, then an automatic calculator can be programmed to simulate the machine. The speeds and memory capacities of present computers may be insufficient to simulate many of the higher functions of the human brain, but the major obstacle is not lack of machine capacity, but our inability to write programs taking full advantage of what we have.
 5 | 2. How Can a Computer be Programmed to Use a Language
 6 | It may be speculated that a large part of human thought consists of manipulating words according to rules of reasoning and rules of conjecture. From this point of view, forming a generalization consists of admitting a new word and some rules whereby sentences containing it imply and are implied by others. This idea has never been very precisely formulated nor have examples been worked out.
 7 | 3. Neuron Nets
 8 | How can a set of (hypothetical neurons be arranged so as to form concepts. Considerable theoretical and experimental work has been done on this problem by Uttley, Rashevsky and his group, Farley and Clark, Pitts and McCulloch, Minsky, Rochester and Holland, and others. Partial results have been obtained but the problem needs more theoretical work.
 9 | 4. Theory of the Size of a Calculation
10 | If we are given a well-defined problem (one for which it is possible to test mechanically whether or not a proposed answer is a valid answer) one way of solving it is to try all possible answers in order. This method is inefficient, and to exclude it one must have some criterion for efficiency of calculation. Some consideration will show that to get a measure of the efficiency of a calculation it is necessary to have on hand a method of measuring the complexity of calculating devices which in turn can be done if one has a theory of the complexity of functions. Some partial results on this problem have been obtained by Shannon, and also by McCarthy.
11 | 5. Self-Improvement
12 | Probably a truly intelligent machine will carry out activities which may best be described as self-improvement. Some schemes for doing this have been proposed and are worth further study.
13 | It seems likely that this question can be studied abstractly as well.
14 | 6. Abstractions
15 | A number of types of "abstraction" can be distinctly defined and several others less distinctly.
16 | A direct attempt to classify these and to describe machine methods of forming abstractions from sensory and other data would seem worthwhile.
17 | 7. Randomness and Creativity
18 | A fairly attractive and yet clearly incomplete conjecture is that the difference between creative thinking and unimaginative competent thinking lies in the injection of a some randomness. The randomness must be guided by intuition to be efficient. In other words, the educated guess or the hunch include controlled randomness in otherwise orderly thinking.
19 | 
20 | 
21 | @article{McCarthy:06,
22 |   title={A proposal for the Dartmouth summer research project on artificial intelligence, August 31, 1955},
23 |   author={J. McCarthy and M. L. Minsky and N. Rochester and C. E. Shannon},
24 |   year={2006},
25 |   journal={AI magazine},
26 |   volume={27},
27 |   number={4},
28 |   pages={12--12}
29 | }


--------------------------------------------------------------------------------
/src/evals/snippets/paper/bib/related_work/newell56.txt:
--------------------------------------------------------------------------------
 1 | In this paper we describe a complex information processing system, which we call the logic theory machine, that is capable of discovering proofs for theorems in symbolic logic. This system, in contrast to the systematic algorithms that are ordinarily employed in computation, relies heavily on heuristic methods similar to those that have been observed in . human problem solving activity. The specification is written in a formal language, of the nature of a pseudo-code, that is suitable for coding for digital computers. However, the present paper is concerned exclusively with specification of the system, and not with its realization in a computer. The logic theory machine is part of a program of research to understand complex information processing systems by specifying and synthesizing a substantial variety of such systems for empirical study.
 2 | 
 3 | @article{Newell:56,
 4 |   title={The logic theory machine--A complex information processing system},
 5 |   author={A. Newell and H. Simon},
 6 |   journal={IRE Transactions on information theory},
 7 |   volume={2},
 8 |   number={3},
 9 |   pages={61--79},
10 |   year={1956},
11 |   publisher={IEEE}
12 | }


--------------------------------------------------------------------------------
/src/evals/snippets/paper/bib/related_work/newell57.txt:
--------------------------------------------------------------------------------
 1 | This paper is a case study in problem solving, representing part of a program of research on complex information-processing systems. We have specified a system for finding proofs of theorems in elementary symbolic logic, and by programming a computer to these specifications, have obtained empirical data on the problem-solving process in elementary logic. The program is called the Logic Theory Machine (LT); it was devised to learn how it is possible to solve difficult problems such as proving mathematical theorems, discovering scientific laws from data, playing chess, or understanding the meaning of English prose.
 2 | 
 3 | @article{Newell:57,
 4 |   title={Empirical explorations of the logic theory machine: a case study in heuristic},
 5 |   author={Newell, A. and Shaw, J. C. and Simon, H. A.},
 6 |   journal={IRE-AIEE-ACM '57 (Western): Papers presented at the February 26-28, 1957, western joint computer conference: Techniques for reliability},
 7 |   pages={218--230},
 8 |   year={1957},
 9 |   publisher={ACM},
10 |   doi={10.1145/1455567.1455605},
11 |   url={https://doi.org/10.1145/1455567.1455605}
12 | }


--------------------------------------------------------------------------------
/src/evals/snippets/paper/bib/related_work/newell72.txt:
--------------------------------------------------------------------------------
 1 | The aim of this book is to advance our understanding of how humans think.
 2 | It seeks to do so by putting forth a theory of human problem solving, along with a body of empirical evidence that permits assessment of the theory.
 3 | No single work advances understanding very far. The aims of a scientific work are limited by the formal character of the theory, by the phenomena it encompasses, by the experimental situations it uses, by the types of subjects it studies, and by the data it gathers. Of course, a theory may speak beyond its initial base-all scientists hope for just that. But science is a series of successive approx-imations. Not all things can be done at once, and even if one aspires to go far, he must start somewhere. If one aims at covering all of human thinking in a single work, the work will necessarily be superficial. If one aims at probing in depth, then many aspects of the subject, however important, will be left untouched.
 4 | 
 5 | @article{Newell:72,
 6 |     title = {Human problem solving.},
 7 |     author = {A. Newell and H. A. Simon},
 8 |     journal = {Prentice-Hall},
 9 |     pages = {920},
10 |     year = {1972}
11 | }


--------------------------------------------------------------------------------
/src/evals/snippets/paper/method/symbolicai_docs.txt:
--------------------------------------------------------------------------------
  1 | ## Why SymbolicAI?
  2 | 
  3 | SymbolicAI aims to bridge the gap between classical programming, or Software 1.0, and modern data-driven programming (aka Software 2.0). It is a framework designed to build software applications that leverage the power of large language models (LLMs) with composability and inheritance, two potent concepts in the object-oriented classical programming paradigm.
  4 | 
  5 | By using SymbolicAI, you can traverse the spectrum between the classical programming realm and the data-driven programming realm, as illustrated in the following figure:
  6 | 
  7 | We adopt a divide-and-conquer approach, breaking down complex problems into smaller, manageable tasks. We use the expressiveness and flexibility of LLMs to evaluate these sub-problems. By re-combining the results of these operations, we can solve the broader, more complex problem.
  8 | 
  9 | In time, and with sufficient data, we can gradually transition from general-purpose LLMs with `zero` and `few-shot` learning capabilities to specialized, fine-tuned models designed to solve specific problems (see above). This strategy enables the design of operations with fine-tuned, task-specific behavior.
 10 | 
 11 | ## Tell me some more fun facts!
 12 | 
 13 | SymbolicAI is fundamentally inspired by the neuro-symbolic programming paradigm.
 14 | 
 15 | **Neuro-symbolic programming** is an artificial intelligence and cognitive computing paradigm that combines the strengths of deep neural networks and symbolic reasoning.
 16 | 
 17 | **Deep neural networks** are machine learning algorithms inspired by the structure and function of biological neural networks. They excel in tasks such as image recognition and natural language processing. However, they struggle with tasks that necessitate explicit reasoning, like long-term planning, problem-solving, and understanding causal relationships.
 18 | 
 19 | **Symbolic reasoning** uses formal languages and logical rules to represent knowledge, enabling tasks such as planning, problem-solving, and understanding causal relationships. While symbolic reasoning systems excel in tasks requiring explicit reasoning, they fall short in tasks demanding pattern recognition or generalization, like image recognition or natural language processing.
 20 | 
 21 | **Neuro-symbolic programming** aims to merge the strengths of both neural networks and symbolic reasoning, creating AI systems capable of handling various tasks. This combination is achieved by using neural networks to extract information from data and utilizing symbolic reasoning to make inferences and decisions based on that data. Another approach is for symbolic reasoning to guide the neural networks' generative process and increase interpretability.
 22 | 
 23 | **Embedded accelerators for LLMs** will likely be ubiquitous in future computation platforms, including wearables, smartphones, tablets, and notebooks. These devices will incorporate models similar to GPT-3, ChatGPT, OPT, or Bloom.
 24 | 
 25 | LLMs are expected to perform a wide range of computations, like natural language understanding and decision-making. Additionally, neuro-symbolic computation engines will learn how to tackle unseen tasks and resolve complex problems by querying various data sources for solutions and executing logical statements on top.
 26 | To ensure the content generated aligns with our objectives, it is crucial to develop methods for instructing, steering, and controlling the generative processes of machine learning models. As a result, our approach works to enable active and transparent flow control of these generative processes.
 27 | 
 28 | The figure above depicts this generative process as shifting the probability mass of an input stream toward an output stream in a contextualized manner. With properly designed conditions and expressions, you can validate and guide the behavior towards a desired outcome or repeat expressions that fail to meet requirements. Our approach consists of defining a set of _fuzzy_ operations to manipulate the data stream and condition LLMs to align with our goals. We regard all data objects – such as strings, letters, integers, and arrays – as symbols and view natural language as the primary interface for interaction. See the following figure:
 29 | 
 30 | As long as our goals can be expressed through natural language, LLMs can be used for neuro-symbolic computations.
 31 | Consequently, we develop operations that manipulate these symbols to construct new symbols. Each symbol can be interpreted as a statement, and multiple statements can be combined to formulate a logical expression.
 32 | 
 33 | By combining statements together, we can build causal relationship functions and complete computations, transcending reliance purely on inductive approaches. The resulting computational stack resembles a neuro-symbolic computation engine at its core, facilitating the creation of new applications in tandem with established frameworks.
 34 | 
 35 | ## How Does it Work?
 36 | 
 37 | We will now demonstrate how we define our `Symbolic API`, which is based on object-oriented and compositional design patterns. The `Symbol` class serves as the base class for all functional operations, and in the context of symbolic programming (fully resolved expressions), we refer to it as a terminal symbol. The Symbol class contains helpful operations that can be interpreted as expressions to manipulate its content and evaluate new Symbols.
 38 | 
 39 | ### Symbolic Operations
 40 | 
 41 | Let's define a Symbol and perform some basic manipulations. We begin with a translation operation:
 42 | 
 43 | ```python
 44 | sym = ai.Symbol("Welcome to our tutorial.")
 45 | sym.translate('German')
 46 | ```
 47 | ```bash
 48 | Output:
 49 | <class 'symai.expressions.Symbol'>(value=Willkommen zu unserem Tutorial.)
 50 | ```
 51 | 
 52 | ### Evaluating Expressions by Best Effort
 53 | 
 54 | Evaluations are resolved in the language domain and by best effort. We showcase this on the example of [word2vec](https://arxiv.org/abs/1301.3781).
 55 | 
 56 | **Word2Vec** generates dense vector representations of words by training a shallow neural network to predict a word based on its neighbors in a text corpus. These resulting vectors are then employed in numerous natural language processing applications, such as sentiment analysis, text classification, and clustering.
 57 | 
 58 | In the example below, we can observe how operations on word embeddings (colored boxes) are performed. Words are tokenized and mapped to a vector space where semantic operations can be executed using vector arithmetic.
 59 | 
 60 | Similar to word2vec, we aim to perform contextualized operations on different symbols. However, as opposed to operating in vector space, we work in the natural language domain. This provides us the ability to perform arithmetic on words, sentences, paragraphs, etc., and verify the results in a human-readable format.
 61 | 
 62 | The following examples display how to evaluate such an expression using a string representation:
 63 | 
 64 | ```python
 65 | ai.Symbol('King - Man + Women').expression()
 66 | ```
 67 | ```bash
 68 | Output:
 69 | <class 'symai.expressions.Symbol'>(value=Queen)
 70 | ```
 71 | 
 72 | In the next section, we will explore operations.
 73 | 
 74 | ## Operations
 75 | 
 76 | Operations form the core of our framework and serve as the building blocks of our API. These operations define the behavior of symbols by acting as contextualized functions that accept a `Symbol` object and send it to the neuro-symbolic engine for evaluation. Operations then return one or multiple new objects, which primarily consist of new symbols but may include other types as well. Polymorphism plays a crucial role in operations, allowing them to be applied to various data types such as strings, integers, floats, and lists, with different behaviors based on the object instance.
 77 | 
 78 | Operations are executed using the `Symbol` object's `value` attribute, which contains the original data type converted into a string representation and sent to the engine for processing. As a result, all values are represented as strings, requiring custom objects to define a suitable `__str__` method for conversion while preserving the object's semantics.
 79 | 
 80 | Inheritance is another essential aspect of our API, which is built on the `Symbol` class as its base. All operations are inherited from this class, offering an easy way to add custom operations by subclassing `Symbol` while maintaining access to basic operations without complicated syntax or redundant functionality. Subclassing the `Symbol` class allows for the creation of contextualized operations with unique constraints and prompt designs by simply overriding the relevant methods. However, it is recommended to subclass the `Expression` class for additional functionality.
 81 | 
 82 | Basic operations in `Symbol` are implemented by defining local functions and decorating them with corresponding operation decorators from the `symai/core.py` file, a collection of predefined operation decorators that can be applied rapidly to any function. Using local functions instead of decorating main methods directly avoids unnecessary communication with the neural engine and allows for default behavior implementation. It also helps cast operation return types to symbols or derived classes, using the `self.sym_return_type(...)` method for contextualized behavior based on the determined return type. More details can be found in the [`Symbol` class](https://github.com/ExtensityAI/symbolicai/blob/main/symai/symbol.py).
 83 | 
 84 | The following section demonstrates that most operations in `symai/core.py` are derived from the more general `few_shot` decorator.
 85 | 
 86 | ### Few-Shot Operations
 87 | 
 88 | The `@ai.few_shot` decorator is a generalized version of the `@ai.zero_shot` decorator, used to define custom operations that require demonstration examples. To provide a clearer understanding, we present the function signature of the `few_shot` decorator:
 89 | 
 90 | ```python
 91 | def few_shot(prompt: str,
 92 |              examples: Prompt,
 93 |              constraints: List[Callable] = [],
 94 |              default: Optional[object] = None,
 95 |              limit: int = 1,
 96 |              pre_processors: Optional[List[PreProcessor]] = None,
 97 |              post_processors: Optional[List[PostProcessor]] = None,
 98 |              **decorator_kwargs):
 99 | ```
100 | 
101 | The `prompt` and `constraints` attributes behave similarly to those in the `zero_shot` decorator. The `examples` and `limit` arguments are new. The `examples` argument defines a list of demonstrations used to condition the neural computation engine, while the `limit` argument specifies the maximum number of examples returned, given that there are more results. The `pre_processors` argument accepts a list of `PreProcessor` objects for pre-processing input before it's fed into the neural computation engine. The `post_processors` argument accepts a list of `PostProcessor` objects for post-processing output before returning it to the user. Lastly, the `decorator_kwargs` argument passes additional arguments from the decorator kwargs, which are streamlined towards the neural computation engine and other engines.
102 | 
103 | To provide a more comprehensive understanding of our conceptual implementation, refer to the flow diagram below, containing the most important classes:
104 | 
105 | The colors indicate logical groups of data processing steps. `Yellow` represents input and output data, `blue` shows places where one can customize or prepare the input of the engine, `green` indicates post-processing steps of the engine response, `red` displays the application of constraints (including attempted casting of the `return type signature` if specified in the decorated method), and `grey` denotes the custom method defining all properties, thus having access to all the previously mentioned objects.
106 | 
107 | To conclude this section, here is an example of how to write a custom Japanese name generator using our `@ai.zero_shot` decorator:
108 | 
109 | ```python
110 | import symai as ai
111 | class Demo(ai.Symbol):
112 |     @ai.few_shot(prompt="Generate Japanese names: ",
113 |                  examples=ai.Prompt(
114 |                    ["愛子", "和花", "一郎", "和枝"]
115 |                  ),
116 |                  limit=2,
117 |                  constraints=[lambda x: len(x) > 1])
118 |     def generate_japanese_names(self) -> list:
119 |         return ['愛子', '和花'] # dummy implementation
120 | ```
121 | 
122 | If the neural computation engine cannot compute the desired outcome, it will revert to the `default` implementation or default value. If no default implementation or value is found, the method call will raise an exception.
123 | 


--------------------------------------------------------------------------------
/src/evals/snippets/paper/ref/reference_abstract.txt:
--------------------------------------------------------------------------------
1 | We introduce \emph{SymbolicAI}, a versatile and modular framework that employs a logic-based approach to concept learning and flow management in generative processes. This framework facilitates the seamless integration of generative models with a diverse range of solvers. We integrate large language models (LLMs) and probabilistic programming principles to tackle complex tasks and utilize differentiable and classical programming paradigms with their respective strengths. Central to our framework are neuro-symbolic engines, which include LLMs for task executions of natural and formal language instructions. Our approach establishes a set of  operations that manipulate a data stream and guide the LLMs to align with the user's objectives. Inherently, these operations exhibit polymorphic, compositional and self-referential properties, which facilitate the implementation of various data types and behaviors. As a result, we can seamlessly transition between capabilities of various foundation models endowed with zero and few-shot learning capabilities and specialized, fine-tuned models or solvers proficient in addressing specific problems. In turn, our framework enables the creation of explainable computational graphs through compositional expressions and functions.


--------------------------------------------------------------------------------
/src/evals/snippets/paper/ref/reference_paper.txt:
--------------------------------------------------------------------------------
 1 | \title{SymbolicAI: A framework for logic-based approaches combining generative models and solvers}
 2 | 
 3 | \begin{abstract}
 4 | 
 5 | We introduce \emph{SymbolicAI}, a versatile and modular framework that employs a logic-based approach to concept learning and flow management in generative processes. This framework facilitates the seamless integration of generative models with a diverse range of solvers. We integrate large language models (LLMs) and probabilistic programming principles to tackle complex tasks and utilize differentiable and classical programming paradigms with their respective strengths. Central to our framework are neuro-symbolic engines, which include LLMs for task executions of natural and formal language instructions. Our approach establishes a set of  operations that manipulate a data stream and guide the LLMs to align with the user's objectives. Inherently, these operations exhibit polymorphic, compositional and self-referential properties, which facilitate the implementation of various data types and behaviors. As a result, we can seamlessly transition between capabilities of various foundation models endowed with zero and few-shot learning capabilities and specialized, fine-tuned models or solvers proficient in addressing specific problems. In turn, our framework enables the creation of explainable computational graphs through compositional expressions and functions.
 6 | 
 7 | \end{abstract}
 8 | 
 9 | \section{Related Work}
10 | 
11 | The field of symbolic AI has its foundations in the works of the Logic Theorist (LT) \citep{Newell:56} and the General Problem Solver (GPS) \citep{Newell:57}.
12 | These programs represented the first steps in automated reasoning and problem-solving using symbolic representations.
13 | Despite their advancements, both faced challenges in dealing with the complexity of real-world problems, particularly due to combinatorial explosion.
14 | To address these limitations, the Soar \citep{Laird:87} cognitive architecture was developed, advancing the notion that intelligent behavior results from goal-oriented search through a problem space \citep{Newell:72, McCarthy:06}, with each step consisting of selecting and applying operators.
15 | Soar introduced components like reinforcement learning, impasses, substates, and chunking to enhance its problem-solving capabilities.
16 | 
17 | \section{Framework}
18 | 
19 | In this section, we discuss the design choices pertaining to implementation and the specifics of the framework we've employed.
20 | 
21 | Analogous to the Python \texttt{object} type, the base type of SymbolicAI is a symbol, and is represented through its name equivalent base type \texttt{Symbol}.
22 | A \texttt{Symbol} object marks a non-reducible atomic unit.
23 | All other sub-types, such as \texttt{Expression} and its derivatives, are analogous to their mathematical namesakes, representing expressions or units that can be further evaluated and simplified.
24 | These sub-types inherit from \texttt{Symbol} the base attributes, primitive operators, and helper methods.
25 | Furthermore, each \texttt{Symbol} object contains valued and vector-valued representations, obtained through \texttt{value} and \texttt{embedding} attributes.
26 | The latter, in particular, serve as a means to impute a symbol's current context, akin to embedding text and storing it as a PyTorch tensor.
27 | Although for an LLM, the numerical tensors may lack inherent meaning, the vector-valued representations play a strategic role: when composite symbols coalesce into more complex expressions, these embedded tensors become amenable to context updates through gradient-based optimization.
28 | This choice is critical in the dynamic landscape of symbolic interactions, particularly for developing self-evolving systems.
29 | 
30 | To enable the processing of symbols by LLMs, we assume that each \texttt{Symbol} object is facilitated by Python's native string functionality, where the \texttt{\_\_str\_\_} method enables a string representation.
31 | Worth noting is that encoding a complex object into a string sometimes precludes perfect object reconstitution.
32 | However, this concern does not substantially impede our methodology: we can employ approximations or proxy representations stored by the vector-valued property to effectively re-map objects.
33 | These representations are obtained through respective embedding models.
34 | Therefore, we can theoretically assert that any Python object \emph{is} parsable by an LLM by design.
35 | 


--------------------------------------------------------------------------------
/src/evals/snippets/paper/ref/reference_section_framework.txt:
--------------------------------------------------------------------------------
 1 | \section{Framework}
 2 | 
 3 | In this section, we discuss the design choices pertaining to implementation and the specifics of the framework we've employed.
 4 | 
 5 | Analogous to the Python \texttt{object} type, the base type of SymbolicAI is a symbol, and is represented through its name equivalent base type \texttt{Symbol}.
 6 | A \texttt{Symbol} object marks a non-reducible atomic unit.
 7 | All other sub-types, such as \texttt{Expression} and its derivatives, are analogous to their mathematical namesakes, representing expressions or units that can be further evaluated and simplified.
 8 | These sub-types inherit from \texttt{Symbol} the base attributes, primitive operators, and helper methods.
 9 | Furthermore, each \texttt{Symbol} object contains valued and vector-valued representations, obtained through \texttt{value} and \texttt{embedding} attributes.
10 | The latter, in particular, serve as a means to impute a symbol's current context, akin to embedding text and storing it as a PyTorch tensor.
11 | Although for an LLM, the numerical tensors may lack inherent meaning, the vector-valued representations play a strategic role: when composite symbols coalesce into more complex expressions, these embedded tensors become amenable to context updates through gradient-based optimization.
12 | This choice is critical in the dynamic landscape of symbolic interactions, particularly for developing self-evolving systems.
13 | 
14 | To enable the processing of symbols by LLMs, we assume that each \texttt{Symbol} object is facilitated by Python's native string functionality, where the \texttt{\_\_str\_\_} method enables a string representation.
15 | Worth noting is that encoding a complex object into a string sometimes precludes perfect object reconstitution.
16 | However, this concern does not substantially impede our methodology: we can employ approximations or proxy representations stored by the vector-valued property to effectively re-map objects.
17 | These representations are obtained through respective embedding models.
18 | Therefore, we can theoretically assert that any Python object \emph{is} parsable by an LLM by design.
19 | 


--------------------------------------------------------------------------------
/src/evals/snippets/paper/ref/reference_section_relatedwork.txt:
--------------------------------------------------------------------------------
1 | \section{Related Work}
2 | 
3 | The field of symbolic AI has its foundations in the works of the Logic Theorist (LT) \citep{Newell:56} and the General Problem Solver (GPS) \citep{Newell:57}.
4 | These programs represented the first steps in automated reasoning and problem-solving using symbolic representations.
5 | Despite their advancements, both faced challenges in dealing with the complexity of real-world problems, particularly due to combinatorial explosion.
6 | To address these limitations, the Soar \citep{Laird:87} cognitive architecture was developed, advancing the notion that intelligent behavior results from goal-oriented search through a problem space \citep{Newell:72, McCarthy:06}, with each step consisting of selecting and applying operators.
7 | Soar introduced components like reinforcement learning, impasses, substates, and chunking to enhance its problem-solving capabilities.
8 | 
9 | 


--------------------------------------------------------------------------------
/src/evals/snippets/paper/ref/reference_title.txt:
--------------------------------------------------------------------------------
1 | \title{SymbolicAI: A framework for logic-based approaches combining generative models and solvers}


--------------------------------------------------------------------------------
/src/evals/snippets/paper/traj/reference_abstract.txt:
--------------------------------------------------------------------------------
1 | We present \emph{SymbolicAI}, a highly adaptive and modularized architecture that takes a logical stance on concept learning and the orchestration of generative processes. This architecture ensures effortless fusing of generative architectures with a broad spectrum of solver technologies. By amalgamating sizable language models (LLMs) and principles of probabilistic programming, we handle sophisticated challenges and harness the respective powerhouses of differentiable and traditional programming regimes. At the heart of our architecture lie neuro-symbolic engines, constituting LLMs responsible for the execution of tasks penned in both natural and formal languages. Our methodology crafts a suite of operations that sculpt the data stream, effectively nudging the LLMs towards fulfilling the aspirations of the users. These operations are inherently polymorphic, compositional, and self-referential, empowering us to flexibly incorporate a multitude of data types and behaviors. Such flexibility allows for smooth transitions between the skill sets of various foundational models, all equipped with zero and few-shot learning proficiencies, as well as specialized models or solvers meticulously devised to take on particular challenges. Consequently, our architecture paves the way for the genesis of intelligible computational graphs through the assembly of compositional expressions and functions.


--------------------------------------------------------------------------------
/src/evals/snippets/paper/traj/reference_paper.txt:
--------------------------------------------------------------------------------
 1 | \title{SymbolicAI: A Framework for Logic-Based Approaches Combining Generative Models with Solvers}
 2 | 
 3 | \begin{abstract}
 4 | 
 5 | We introduce \emph{SymbolicAI}, a robust and flexible framework utilizing a logic-based approach for concept learning and flow management in generative processes. This framework enables the fluid integration of generative models with a wide spectrum of solvers. By merging large language models (LLMs) with probabilistic programming frameworks, we tackle intricate tasks and employ the distinctive advantages of differentiable and traditional programming paradigms. At the heart of our framework lie neuro-symbolic engines, comprising LLMs for the execution of tasks driven by natural and formal language instructions. Our methodology delineates a suite of operations that sculpt the data stream, steering the LLMs towards the user’s goals. These operations exhibit polymorphic, compositional, and self-referential characteristics, thus supporting the implementation of diverse data types and behaviors. Consequently, we are equipped to fluidly navigate between the capabilities of foundational models vested with zero-shot and few-shot learning, and specialized models or solvers adept in particular problem domains. Consequentially, our framework fosters the generation of interpretable computational graphs through compositional constructs and functions.
 6 | 
 7 | \end{abstract}
 8 | 
 9 | \section{Related Work}
10 | 
11 | The discipline of symbolic AI is deeply rooted in the seminal works of the Logic Theorist (LT) \citep{Newell:56} and the General Problem Solver (GPS) \citep{Newell:57}.
12 | These pioneers in automated reasoning and symbolic problem-solving confronted hurdles with the intricacies of real-world quandaries, especially due to combinatorial explosion.
13 | To surmount such barriers, the Soar \citep{Laird:87} cognitive architecture was conceived, propagating the concept that intelligent behavior emanates from a purposeful search within a problem space \citep{Newell:72, McCarthy:06}, with each juncture involving the choice and application of operators.
14 | Soar integrated elements such as reinforcement learning, impasses, substates, and chunking to refine its problem-solving acumen.
15 | 
16 | \section{Framework}
17 | 
18 | Herein, we delineate the strategic design choices and specifications pertaining to the SymbolicAI framework.
19 | 
20 | In a manner analogous to Python's \texttt{object} class, the foundational type in SymbolicAI is termed a symbol and instantiated as the \texttt{Symbol} base class.
21 | A \texttt{Symbol} signifies an indivisible atomic unit.
22 | Descendant subtypes, such as \texttt{Expression} and its derivatives, mimic their mathematical equivalents, encapsulating units capable of further refinement and simplification.
23 | Inheriting from the \texttt{Symbol} base are essential attributes, elementary operations, and ancillary methods.
24 | Moreover, each \texttt{Symbol} harbors scalar and vector-valued depictions, accessible via the \texttt{value} and \texttt{embedding} properties, respectively.
25 | Notably, the \texttt{embedding} property is pivotal in conferring a symbol's contemporaneous context, paralleling the embedding of text and its storage as a PyTorch tensor.
26 | For an LLM, the numerical tensors might be devoid of intrinsic signification, yet the vector-valued depictions play a critical role: as composite symbols amalgamate into intricate expressions, these tensors are primed for context augmentation through gradient-driven optimization—a crucial aspect for evolving symbolic systems.
27 | 
28 | To facilitate symbol processing by LLMs, we postulate that each \texttt{Symbol} incorporates Python's intrinsic string capabilities, with the \texttt{\_\_str\_\_} method yielding a string depiction.
29 | It is pertinent to acknowledge that encoding a multifaceted object as a string may occasionally hinder immaculate object regeneration.
30 | Nevertheless, this does not significantly encumber our approach: we can utilize approximations or stand-in depictions maintained by the vector-valued attribute for effective object remapping.
31 | Such representations are derived from respective embedding models.
32 | Thus, we postulate that, by design, any Python object \emph{is indeed} interpretable by an LLM.
33 | 


--------------------------------------------------------------------------------
/src/evals/snippets/paper/traj/reference_section_framework.txt:
--------------------------------------------------------------------------------
1 | \section{Framework}
2 | 
3 | Within this section, we deliberate on the decisions foundational to the implementation and the intricacies inherent to the framework we have adopted.
4 | 
5 | Comparable to the pervasive \texttt{object} in Python, the cornerstone of SymbolicAI is the \texttt{Symbol}, epitomized by its namesake basic type. A \texttt{Symbol} object signifies an indivisible atomic entity. All succeeding sub-types, like \texttt{Expression} and its kin, echo their mathematical counterparts by embodying expressions or entities poised for further elucidation and refinement.
6 | Inheriting from \texttt{Symbol} are fundamental attributes, elemental operators, and auxiliary methods permeating these sub-types. In addition, each \texttt{Symbol} encompasses both scalar and vectorial representations, accessible via \texttt{value} and \texttt{embedding} attributes, respectively. The latter is crucially deployed for infusing a symbol’s prevailing milieu, paralleling the embedding of text and its subsequent encapsulation as a PyTorch tensor. While to an LLM the numerical tensors might stand void of intrinsic meaning, the vectorial portrayals are pivotal: as singular symbols amalgamate into elaborated constructs, these embedded tensors are poised for contextual rejuvenation through gradient based optimization. Such versatility underpins a decisive role within the fluid realm of symbolic discourse, particularly when forging self-augmenting systems.
7 | 
8 | Forging a bridge between symbols and LLMs presupposes that each \texttt{Symbol} interweaves seamlessly with Python's intrinsic string utilities, a feat accomplished by the \texttt{\_\_str\_\_} method which confers a string portrayal. It is noteworthy that transmuting a multifaceted object to a string may at times impede flawless reconstitution of said object. Nevertheless, this impediment scarcely detracts from our approach: through the leverage of vectors furnished by the embedding feature, we adeptly navigate proxies or approximative representations to effectively retrace objects. These portrayals are sourced from dedicated embedding models. Consequently, we assert with theoretical conviction that any Python object, by virtue of design, \emph{is} interpretable by an LLM.


--------------------------------------------------------------------------------
/src/evals/snippets/paper/traj/reference_section_relatedwork.txt:
--------------------------------------------------------------------------------
1 | \section{Related Work}
2 | 
3 | The realm of symbolic AI traces its origins to seminal works such as the Logic Theorist (LT) \citep{Newell:56} and the General Problem Solver (GPS) \citep{Newell:57}. These pioneering programs embodied the inaugural attempts at automated reasoning and problem-solving via symbolic representations. However, despite representing milestones in AI, they encountered significant challenges when addressing the complexity intrinsic to real-world problems, chiefly the issue of combinatorial explosion. In an effort to surmount these challenges, the Soar cognitive architecture was conceived \citep{Laird:87}, building upon the concept that intelligent behavior emanates from a purposeful navigation through a problem space—a notion proposed by \citep{Newell:72, McCarthy:06}. Each progression within this space involves the careful selection and application of various operators. Soar was a progressive stride, embedding elements such as reinforcement learning, impasses, substates, and chunking, to amplify its capacity for solving complex problems.


--------------------------------------------------------------------------------
/src/evals/snippets/paper/traj/reference_title.txt:
--------------------------------------------------------------------------------
1 | \title{SymbolicAI: An Integrated Framework Merging Generative Models with Logic Solvers}


--------------------------------------------------------------------------------
/src/evals/snippets/richard_feynman_summary.txt:
--------------------------------------------------------------------------------
 1 | # The Enigmatic Richard Feynman: A Journey into the Heart of Physics
 2 | 
 3 | ## Unveiling the Legacy of an Iconic Theoretical Physicist
 4 | 
 5 | Richard Phillips Feynman—a name synonymous with genius, curiosity, and an unorthodox approach to science and life itself. Born on May 11, 1918, in New York City, Feynman grew to become one of the most influential and colorful figures in the world of physics. A man of myriad talents, his contributions to science are as vast as the quantum realms he studied.
 6 | 
 7 | ## Path Integral to Nobel Glory
 8 | 
 9 | Feynman's relentless pursuit of knowledge led him to profound discoveries in quantum mechanics. His work on the path integral formulation elegantly described the behavior of subatomic particles in a language that, today, is the lexicon of theoretical physicists. However, it was perhaps his development of Feynman diagrams, a graphical representation of the behavior of particles, where he imprinted his indelible mark on the fabric of physics.
10 | 
11 | His brilliance and distinctive methodologies awarded him the Nobel Prize in Physics in 1965 along with Julian Schwinger and Shin'ichirō Tomonaga. They were honored for their groundbreaking work in quantum electrodynamics (QED), which elucidated the interaction of light and matter—a cornerstone in the field that has profoundly shaped modern physics.
12 | 
13 | ## A Legacy Beyond Theoretical Constructs
14 | 
15 | Richard Feynman was also known for his magnetic personality, which transcended academic circles. Not only a Nobel laureate but also an author, bongo player, and safe-cracker, he popularized physics through books and lectures. His bestselling autobiographies, "Surely You're Joking, Mr. Feynman!" and "What Do You Care What Other People Think?" painted a vibrant picture of his life outside the lab, bringing science closer to the masses.
16 | 
17 | ## The Fame Beyond the Lab
18 | 
19 | Apart from the Nobel Prize (1965), Feynman's accolades include the Albert Einstein Award (1954) for his work on quantum electrodynamics, the E.O. Lawrence Award (1962), and the National Medal of Science (1979). His membership in the National Academy of Sciences was a testament to his contributions to the field, though he famously resigned due to personal reasons. Additionally, he was named a Foreign Member of the Royal Society (1965), showcasing the international recognition of his impact.
20 | 
21 | ## Mentorship and Intellectual Progeny
22 | 
23 | During his storied career, Feynman was not only a researcher but also an educator. His doctoral students reflect the breadth of his influence. Among them were James M. Bardeen, who went on to contribute significantly to the theory of general relativity, and Albert Hibbs, who suggested space probes as a means for space exploration and later became a science administrator at NASA's Jet Propulsion Laboratory.
24 | 
25 | Other notable students include Thomas Curtright, currently a professor of physics, and George Zweig, who developed the quark model independently of Murray Gell-Mann. Not to forget, Robert Barro, an economist whose incursions into new economic analysis were perhaps inspired by the same curiosity Feynman fostered in all disciplines.
26 | 
27 | ## The Enduring Feynman Effect
28 | 
29 | Richard Feynman's contributions continue to ripple through the world of physics and beyond. While his physical presence left us on February 15, 1988, his teachings, insights, and exemplary life remain a rich source of inspiration. Icons like Feynman never truly depart; their spirit forever animates their field of study and those who follow in their footsteps, asking those same probing questions and imagining the simplicity in complexity, as only Feynman could.
30 | 
31 | As we reflect on the lessons he imparted to his students and the world, let us strive to embody the curiosity and integrity that Richard Feynman demonstrated throughout his luminous journey. It's a sure bet that his maverick spirit will continue to spark the light of discovery for generations to come.
32 | 
33 | ## Closing Remark
34 | 
35 | From the atomic bomb to the intricacies of subatomic particles, from Nobel triumphs to unraveling the mysteries of antiparticles, Richard Feynman remains an unforgettable architect of the world we understand today. Aspiring scientists and curious minds still peer through Feynman's lenses, hoping to catch a glimpse of the universe's hidden elegance.
36 | 


--------------------------------------------------------------------------------
/src/evals/snippets/sample_bill.txt:
--------------------------------------------------------------------------------
 1 | Penn
 2 | UNIVERSITY of PENNSYLVANIA
 3 | BENJAMIN FRANKLIN
 4 | UNIVERSITY OF PENNSYLVANIA
 5 | 1 COLLEGE HALL
 6 | PHILADELPHIA, PA 19104
 7 | Student ID: 12345678
 8 | TERM
 9 | Summer II 2017 3
10 | Fall 2017
11 | Fall 2017
12 | Fall 2017
13 | PREVIOUS BALANCE
14 | $ 0.00
15 | DATE
16 | Account Statement
17 | DESCRIPTION
18 | PREVIOUS BILLED BALANCE 2
19 | 06/26/2017
20 | 06/09/2017
21 | 06/09/2017
22 | 06/16/2017
23 | 06/16/2017
24 | 06/16/2017 Clinical Fee
25 | 06/16/2017 Technology Fee-The College
26 | 06/28/2017 Rent-Stouffer House
27 | Tuition-The College
28 | General Fee
29 | General Fee
30 | Tuition-The College
31 | 07/05/2017
32 | 07/05/2017
33 | 07/05/2017
34 | STATEMENT DATE
35 | 07/05/2017
36 | 06/28/2017 College House Fee-Stouffer
37 | 06/29/2017 Dining-Club Activities Plan
38 | 06/29/2017 Payment Plan Enrollment Fee 7
39 | - CURRENT CHARGES -
40 | 06/20/2017 Penn Grant
41 | 07/05/2017 PennPay AMEX Payment
42 | Pell Grant
43 | - CURRENT CREDITS -
44 | * CURRENT BILLED BALANCE
45 | - MEMOED FINANCIAL AID - 10
46 | PAST DUE
47 | $0.00
48 | ** MEMO FINAID BALANCE **
49 | - MEMO ITEMS -
50 | Installment Payment Plan-Fall
51 | Installment Payment Plan-Fall 11
52 | Installment Payment Plan-Fall
53 | ***** MEMO BALANCE *****
54 | CURRENT BALANCE
55 | $ 21,920.37
56 | Make check payable to The Trustees of the University of Pennsylvania.
57 | If paying by check, please print the student's full name and 8-digit PennID#
58 | University of Pennsylvania
59 | Philadelphia, PA 19104-6270
60 | www.upenn.edu
61 | www.srfs.upenn.edu
62 | DUE DATE
63 | 07/25/2017
64 | CHARGES
65 | AMOUNT DUE
66 | $ 5,070.50
67 | 0.00
68 | 4,070.00
69 | 348.00
70 | 2,376.00
71 | 23,708.00
72 | 273.00
73 | 410.00
74 | 4,910.00
75 | 118.00
76 | 1,446.00
77 | 45.00
78 | 21,920.37
79 | CREDITS
80 | 10,962.00
81 | 4,821.63
82 | 2,385.00
83 | 2,385.00
84 | 4,821.62
85 | 4,821.63
86 | 4,821.62
87 | 14,464.87
88 | TOTAL AMOUNT DUE
89 | $ 5,070.50 12


--------------------------------------------------------------------------------
/src/evals/snippets/wiki_page_20240121.txt:
--------------------------------------------------------------------------------
   1 | 
   2 | 
   3 | Uranium-235 - Wikipedia
   4 | 
   5 | 
   6 | 
   7 | 
   8 | 
   9 | 
  10 | 
  11 | 
  12 | 
  13 | 
  14 | 
  15 | 
  16 | 
  17 | 
  18 | 
  19 | 
  20 | 
  21 | 
  22 | 
  23 | 
  24 | 
  25 | 
  26 | 
  27 | 
  28 | 
  29 | 
  30 | 
  31 | 
  32 | 
  33 | 
  34 | 
  35 | 
  36 | 
  37 | 
  38 | 
  39 | 
  40 | Jump to content
  41 | 
  42 | 
  43 | 
  44 | 
  45 | 
  46 | 
  47 | 
  48 | Main menu
  49 | 
  50 | 
  51 | 
  52 | 
  53 | 
  54 | Main menu
  55 | move to sidebar
  56 | hide
  57 | 
  58 | 
  59 | 
  60 | 		Navigation
  61 | 	
  62 | 
  63 | 
  64 | Main pageContentsCurrent eventsRandom articleAbout WikipediaContact usDonate
  65 | 
  66 | 
  67 | 
  68 | 
  69 | 
  70 | 		Contribute
  71 | 	
  72 | 
  73 | 
  74 | HelpLearn to editCommunity portalRecent changesUpload file
  75 | 
  76 | 
  77 | 
  78 | 
  79 | 
  80 | Languages
  81 | 
  82 | Language links are at the top of the page.
  83 | 
  84 | 
  85 | 
  86 | 
  87 | 
  88 | 
  89 | 
  90 | 
  91 | 
  92 | 
  93 | 
  94 | 
  95 | 
  96 | 
  97 | 
  98 | 
  99 | 
 100 | 
 101 | 
 102 | Search
 103 | 
 104 | 
 105 | 
 106 | 
 107 | 
 108 | 
 109 | 
 110 | 
 111 | 
 112 | 
 113 | 
 114 | Search
 115 | 
 116 | 
 117 | 
 118 | 
 119 | 
 120 | 
 121 | 
 122 | 
 123 | 
 124 | 
 125 | 
 126 | 
 127 | 
 128 | 
 129 | 
 130 | 
 131 | 
 132 | 
 133 | 
 134 | 
 135 | 
 136 | 
 137 | 
 138 | 
 139 | 
 140 | 
 141 | 
 142 | 
 143 | 
 144 | Create account
 145 | 
 146 | Log in
 147 | 
 148 | 
 149 | 
 150 | 
 151 | 
 152 | 
 153 | 
 154 | 
 155 | Personal tools
 156 | 
 157 | 
 158 | 
 159 | 
 160 | 
 161 |  Create account Log in
 162 | 
 163 | 
 164 | 
 165 | 
 166 | 
 167 | 		Pages for logged out editors learn more
 168 | 
 169 | 
 170 | 
 171 | ContributionsTalk
 172 | 
 173 | 
 174 | 
 175 | 
 176 | 
 177 | 
 178 | 
 179 | 
 180 | 
 181 | 
 182 | 
 183 | 
 184 | 
 185 | 
 186 | 
 187 | 
 188 | 
 189 | 
 190 | 
 191 | 
 192 | 
 193 | 
 194 | 
 195 | 
 196 | 
 197 | 
 198 | 
 199 | 
 200 | 
 201 | 
 202 | 
 203 | 
 204 | 
 205 | 
 206 | 
 207 | 
 208 | 
 209 | Toggle the table of contents
 210 | 
 211 | 
 212 | 
 213 | 
 214 | 
 215 | Contents
 216 | move to sidebar
 217 | hide
 218 | 
 219 | 
 220 | 
 221 | 
 222 | (Top)
 223 | 
 224 | 
 225 | 
 226 | 
 227 | 
 228 | 1Fission properties
 229 | 
 230 | 
 231 | 
 232 | Toggle Fission properties subsection
 233 | 
 234 | 
 235 | 
 236 | 
 237 | 
 238 | 1.1Nuclear weapons
 239 | 
 240 | 
 241 | 
 242 | 
 243 | 
 244 | 
 245 | 
 246 | 
 247 | 
 248 | 2Natural decay chain
 249 | 
 250 | 
 251 | 
 252 | 
 253 | 
 254 | 
 255 | 
 256 | 3Uses
 257 | 
 258 | 
 259 | 
 260 | 
 261 | 
 262 | 
 263 | 
 264 | 4References
 265 | 
 266 | 
 267 | 
 268 | 
 269 | 
 270 | 
 271 | 
 272 | 5External links
 273 | 
 274 | 
 275 | 
 276 | 
 277 | 
 278 | 
 279 | 
 280 | 
 281 | 
 282 | Uranium-235
 283 | 
 284 | 
 285 | 
 286 | 35 languages
 287 | 
 288 | 
 289 | 
 290 | 
 291 | AfrikaansالعربيةБългарскиCatalàČeštinaDanskDeutschEspañolEuskaraفارسیFrançaisGalego한국어Bahasa IndonesiaInterlinguaItalianoעבריתLietuviųMagyarМакедонскиBahasa MelayuNederlands日本語Norsk bokmålPortuguêsРусскийSimple EnglishСрпски / srpskiSuomiதமிழ்TürkçeУкраїнськаاردوTiếng Việt中文
 292 | 
 293 | Edit links
 294 | 
 295 | 
 296 | 
 297 | 
 298 | 
 299 | 
 300 | 
 301 | 
 302 | 
 303 | 
 304 | 
 305 | ArticleTalk
 306 | 
 307 | 
 308 | 
 309 | 
 310 | 
 311 | English
 312 | 
 313 | 
 314 | 
 315 | 
 316 | 
 317 | 
 318 | 
 319 | 
 320 | 
 321 | 
 322 | 
 323 | 
 324 | 
 325 | 
 326 | 
 327 | 
 328 | 
 329 | ReadEditView history
 330 | 
 331 | 
 332 | 
 333 | 
 334 | 
 335 | 
 336 | 
 337 | Tools
 338 | 
 339 | 
 340 | 
 341 | 
 342 | 
 343 | Tools
 344 | move to sidebar
 345 | hide
 346 | 
 347 | 
 348 | 
 349 | 		Actions
 350 | 	
 351 | 
 352 | 
 353 | ReadEditView history
 354 | 
 355 | 
 356 | 
 357 | 
 358 | 
 359 | 		General
 360 | 	
 361 | 
 362 | 
 363 | What links hereRelated changesUpload fileSpecial pagesPermanent linkPage informationCite this pageGet shortened URLWikidata item
 364 | Edit interlanguage linksExpand all
 365 | 
 366 | 
 367 | 
 368 | 
 369 | 		Print/export
 370 | 	
 371 | 
 372 | 
 373 | Download as PDFPrintable version
 374 | 
 375 | 
 376 | 
 377 | 
 378 | 
 379 | 		In other projects
 380 | 	
 381 | 
 382 | 
 383 | Wikimedia Commons
 384 | 
 385 | 
 386 | 
 387 | 
 388 | 
 389 | 
 390 | 
 391 | 
 392 | 
 393 | 
 394 | 
 395 | 
 396 | 
 397 | 
 398 | 
 399 | 
 400 | 
 401 | 
 402 | 
 403 | 
 404 | 
 405 | 
 406 | 
 407 | 
 408 | 
 409 | From Wikipedia, the free encyclopedia
 410 | 
 411 | 
 412 | Isotope of uranium
 413 | "U-235" redirects here. For the World War II submarine, see German submarine U-235.
 414 | Uranium-235, 235UUranium metal highly enriched in uranium-235GeneralSymbol235UNamesuranium-235, 235U, U-235Protons (Z)92Neutrons (N)143Nuclide dataNatural abundance0.72%Half-life (t1/2)703800000 yearsIsotope mass235.0439299 DaSpin7/2−Excess energy40914.062±1.970 keVBinding energy1783870.285±1.996 keVParent isotopes235Pa235Np239PuDecay products231ThDecay modesDecay modeDecay energy (MeV)Alpha4.679Isotopes of uranium Complete table of nuclides
 415 | Uranium-235 (235U or U-235) is an isotope of uranium making up about 0.72% of natural uranium. Unlike the predominant isotope uranium-238, it is fissile, i.e., it can sustain a nuclear chain reaction. It is the only fissile isotope that exists in nature as a primordial nuclide.
 416 | Uranium-235 has a half-life of 703.8 million years.  It was discovered in 1935  by Arthur Jeffrey Dempster. Its fission cross section for slow thermal neutrons is about 584.3±1 barns.[1] For fast neutrons it is on the order of 1 barn.[2]
 417 | Most neutron absorptions induce fission, though a minority result in the formation of uranium-236.[citation needed]
 418 | 
 419 | 
 420 | Fission properties[edit]
 421 | Nuclear fission seen with a uranium-235 nucleus
 422 | The fission of one atom of uranium-235 releases 202.5 MeV (3.24×10−11 J) inside the reactor. That corresponds to 19.54 TJ/mol, or 83.14 TJ/kg.[3] Another 8.8 MeV escapes the reactor as anti-neutrinos. When 23592U nuclei are bombarded with neutrons, one of the many fission reactions that it can undergo is the following (shown in the adjacent image):
 423 | 
 424 | 10n + 23592U → 14156Ba + 9236Kr + 3 10n 
 425 | Heavy water reactors and some graphite moderated reactors can use natural uranium, but light water reactors must use low enriched uranium because of the higher neutron absorption of light water. Uranium enrichment removes some of the uranium-238 and increases the proportion of uranium-235. Highly enriched uranium (HEU), which contains an even greater proportion of uranium-235, is sometimes used in the reactors of nuclear submarines, research reactors and nuclear weapons.
 426 | If at least one neutron from uranium-235 fission strikes another nucleus and causes it to fission, then the chain reaction will continue. If the reaction continues to sustain itself, it is said to be critical, and the mass of 235U required to produce the critical condition is said to be a critical mass. A critical chain reaction can be achieved at low concentrations of 235U if the neutrons from fission are moderated to lower their speed, since the probability for fission with slow neutrons is greater. A fission chain reaction produces intermediate mass fragments which are highly radioactive and produce further energy by their radioactive decay. Some of them produce neutrons, called delayed neutrons, which contribute to the fission chain reaction. The power output of nuclear reactors is adjusted by the location of control rods containing elements that strongly absorb neutrons, e.g., boron, cadmium, or hafnium, in the reactor core. In nuclear bombs, the reaction is uncontrolled and the large amount of energy released creates a nuclear explosion.
 427 | 
 428 | Nuclear weapons[edit]
 429 | The Little Boy gun-type atomic bomb dropped on Hiroshima on August 6, 1945, was made of highly enriched uranium with a large tamper. The nominal spherical critical mass for an untampered 235U nuclear weapon is 56 kilograms (123 lb),[4] which would form a sphere 17.32 centimetres (6.82 in) in diameter. The material must be 85% or more of 235U and is known as weapons grade uranium, though for a crude and inefficient weapon 20% enrichment is sufficient (called weapon(s)-usable). Even lower enrichment can be used, but this results in the required critical mass rapidly increasing. Use of a large tamper, implosion geometries, trigger tubes, polonium triggers, tritium enhancement, and neutron reflectors can enable a more compact, economical weapon using one-fourth or less of the nominal critical mass, though this would likely only be possible in a country that already had extensive experience in engineering nuclear weapons. Most modern nuclear weapon designs use plutonium-239 as the fissile component of the primary stage;[5][6] however, HEU (highly enriched uranium, in this case uranium that is 20% or more 235U) is frequently used in the secondary stage as an ignitor for the fusion fuel.
 430 | 
 431 | 
 432 | 
 433 | Source
 434 | 
 435 | Average energyreleased [MeV][3]
 436 | 
 437 | 
 438 | Instantaneously released energy
 439 | 
 440 | 
 441 | Kinetic energy of fission fragments
 442 | 
 443 | 169.1
 444 | 
 445 | 
 446 | Kinetic energy of prompt neutrons
 447 | 
 448 |   4.8
 449 | 
 450 | 
 451 | Energy carried by prompt γ-rays
 452 | 
 453 |   7.0
 454 | 
 455 | 
 456 | Energy from decaying fission products
 457 | 
 458 | 
 459 | Energy of β− particles
 460 | 
 461 |   6.5
 462 | 
 463 | 
 464 | Energy of delayed γ-rays
 465 | 
 466 |   6.3
 467 | 
 468 | 
 469 | Energy released when those prompt neutrons which do not (re)produce fission are captured
 470 | 
 471 |   8.8
 472 | 
 473 | 
 474 | Total energy converted into heat in an operating thermal nuclear reactor
 475 | 
 476 | 202.5
 477 | 
 478 | 
 479 | Energy of anti-neutrinos
 480 | 
 481 |   8.8
 482 | 
 483 | 
 484 | Sum
 485 | 
 486 | 211.3
 487 | 
 488 | Natural decay chain[edit]
 489 | 
 490 | 
 491 | 
 492 | 
 493 | 
 494 | 
 495 | 
 496 | 
 497 | 
 498 | 
 499 | U
 500 | 
 501 | 
 502 | 
 503 | 92
 504 | 235
 505 | 
 506 | 
 507 | 
 508 | →
 509 | 
 510 | 7.038
 511 | ×
 512 | 
 513 | 10
 514 | 
 515 | 8
 516 | 
 517 | 
 518 |  
 519 | 
 520 | y
 521 | 
 522 | 
 523 | 
 524 | α
 525 | 
 526 | 
 527 | 
 528 | 
 529 | 
 530 | Th
 531 | 
 532 | 
 533 | 
 534 | 90
 535 | 231
 536 | 
 537 | 
 538 | 
 539 | 
 540 | →
 541 | 
 542 | 25.52
 543 |  
 544 | 
 545 | h
 546 | 
 547 | 
 548 | 
 549 | 
 550 | β
 551 | 
 552 | −
 553 | 
 554 | 
 555 | 
 556 | 
 557 | 
 558 | 
 559 | 
 560 | Pa
 561 | 
 562 | 
 563 | 
 564 | 91
 565 | 231
 566 | 
 567 | 
 568 | 
 569 | 
 570 | →
 571 | 
 572 | 3.276
 573 | ×
 574 | 
 575 | 10
 576 | 
 577 | 4
 578 | 
 579 | 
 580 |  
 581 | 
 582 | y
 583 | 
 584 | 
 585 | 
 586 | α
 587 | 
 588 | 
 589 | 
 590 | 
 591 | 
 592 | Ac
 593 | 
 594 | 
 595 | 
 596 | 89
 597 | 227
 598 | 
 599 | 
 600 | 
 601 | 
 602 | 
 603 | {
 604 | 
 605 | 
 606 | 
 607 | 
 608 | 
 609 | 
 610 | →
 611 | 
 612 | 21.773
 613 |  
 614 | 
 615 | y
 616 | 
 617 | 
 618 | 
 619 | 98.62
 620 | %
 621 | 
 622 | β
 623 | 
 624 | −
 625 | 
 626 | 
 627 | 
 628 | 
 629 | 
 630 | 
 631 | 
 632 | Th
 633 | 
 634 | 
 635 | 
 636 | 90
 637 | 227
 638 | 
 639 | 
 640 | 
 641 | 
 642 | →
 643 | 
 644 | 18.718
 645 |  
 646 | 
 647 | d
 648 | 
 649 | 
 650 | 
 651 | α
 652 | 
 653 | 
 654 | 
 655 | 
 656 | 
 657 | 
 658 | 
 659 | 
 660 | 
 661 | 
 662 | 
 663 | →
 664 | 
 665 | 21.773
 666 |  
 667 | 
 668 | y
 669 | 
 670 | 
 671 | 
 672 | 1.38
 673 | %
 674 | α
 675 | 
 676 | 
 677 | 
 678 | 
 679 | 
 680 | Fr
 681 | 
 682 | 
 683 | 
 684 | 87
 685 | 223
 686 | 
 687 | 
 688 | 
 689 | 
 690 | →
 691 | 
 692 | 21.8
 693 |  
 694 | 
 695 | min
 696 | 
 697 | 
 698 | 
 699 | 
 700 | β
 701 | 
 702 | −
 703 | 
 704 | 
 705 | 
 706 | 
 707 | 
 708 | 
 709 | 
 710 | 
 711 | 
 712 | }
 713 | 
 714 | 
 715 | 
 716 | 
 717 | Ra
 718 | 
 719 | 
 720 | 
 721 | 88
 722 | 223
 723 | 
 724 | 
 725 | 
 726 | →
 727 | 
 728 | 11.434
 729 |  
 730 | 
 731 | d
 732 | 
 733 | 
 734 | 
 735 | α
 736 | 
 737 | 
 738 | 
 739 | 
 740 | 
 741 | Rn
 742 | 
 743 | 
 744 | 
 745 | 86
 746 | 219
 747 | 
 748 | 
 749 | 
 750 | 
 751 | 
 752 | 
 753 | 
 754 | 
 755 | 
 756 | Rn
 757 | 
 758 | 
 759 | 
 760 | 86
 761 | 219
 762 | 
 763 | 
 764 | 
 765 | →
 766 | 
 767 | 3.96
 768 |  
 769 | 
 770 | s
 771 | 
 772 | 
 773 | 
 774 | α
 775 | 
 776 | 
 777 | 
 778 | 
 779 | 
 780 | Po
 781 | 
 782 | 
 783 | 
 784 | 84
 785 | 215
 786 | 
 787 | 
 788 | 
 789 | 
 790 | 
 791 | {
 792 | 
 793 | 
 794 | 
 795 | 
 796 | 
 797 | 
 798 | →
 799 | 
 800 | 1.778
 801 |  
 802 | 
 803 | ms
 804 | 
 805 | 
 806 | 
 807 | 99.99
 808 | %
 809 | α
 810 | 
 811 | 
 812 | 
 813 | 
 814 | 
 815 | Pb
 816 | 
 817 | 
 818 | 
 819 | 82
 820 | 211
 821 | 
 822 | 
 823 | 
 824 | 
 825 | →
 826 | 
 827 | 36.1
 828 |  
 829 | 
 830 | min
 831 | 
 832 | 
 833 | 
 834 | 
 835 | β
 836 | 
 837 | −
 838 | 
 839 | 
 840 | 
 841 | 
 842 | 
 843 | 
 844 | 
 845 | 
 846 | 
 847 | 
 848 | 
 849 | 
 850 | 
 851 | →
 852 | 
 853 | 1.778
 854 |  
 855 | 
 856 | ms
 857 | 
 858 | 
 859 | 
 860 | 2.3
 861 | ×
 862 | 
 863 | 10
 864 | 
 865 | −
 866 | 4
 867 | 
 868 | 
 869 | %
 870 | 
 871 | β
 872 | 
 873 | −
 874 | 
 875 | 
 876 | 
 877 | 
 878 | 
 879 | 
 880 | 
 881 | At
 882 | 
 883 | 
 884 | 
 885 | 85
 886 | 215
 887 | 
 888 | 
 889 | 
 890 | 
 891 | →
 892 | 
 893 | 0.10
 894 |  
 895 | 
 896 | ms
 897 | 
 898 | 
 899 | 
 900 | α
 901 | 
 902 | 
 903 | 
 904 | 
 905 | 
 906 | 
 907 | 
 908 | }
 909 | 
 910 | 
 911 | 
 912 | 
 913 | Bi
 914 | 
 915 | 
 916 | 
 917 | 83
 918 | 211
 919 | 
 920 | 
 921 | 
 922 | 
 923 | {
 924 | 
 925 | 
 926 | 
 927 | 
 928 | 
 929 | 
 930 | →
 931 | 
 932 | 2.13
 933 |  
 934 | 
 935 | min
 936 | 
 937 | 
 938 | 
 939 | 99.73
 940 | %
 941 | α
 942 | 
 943 | 
 944 | 
 945 | 
 946 | 
 947 | Tl
 948 | 
 949 | 
 950 | 
 951 | 81
 952 | 207
 953 | 
 954 | 
 955 | 
 956 | 
 957 | →
 958 | 
 959 | 4.77
 960 |  
 961 | 
 962 | min
 963 | 
 964 | 
 965 | 
 966 | 
 967 | β
 968 | 
 969 | −
 970 | 
 971 | 
 972 | 
 973 | 
 974 | 
 975 | 
 976 | 
 977 | 
 978 | 
 979 | 
 980 | 
 981 | 
 982 | 
 983 | →
 984 | 
 985 | 2.13
 986 |  
 987 | 
 988 | min
 989 | 
 990 | 
 991 | 
 992 | 0.27
 993 | %
 994 | 
 995 | β
 996 | 
 997 | −
 998 | 
 999 | 
1000 | 
1001 | 
1002 | 
1003 | 
1004 | 
1005 | Po
1006 | 
1007 | 
1008 | 
1009 | 84
1010 | 211
1011 | 
1012 | 
1013 | 
1014 | 
1015 | →
1016 | 
1017 | 0.516
1018 |  
1019 | 
1020 | s
1021 | 
1022 | 
1023 | 
1024 | α
1025 | 
1026 | 
1027 | 
1028 | 
1029 | 
1030 | 
1031 | 
1032 | }
1033 | 
1034 | 
1035 | 
1036 | 
1037 | Pb
1038 | 
1039 | (
1040 | stable
1041 | )
1042 | 
1043 | 
1044 | 
1045 | 82
1046 | 207
1047 | 
1048 | 
1049 | 
1050 | 
1051 | 
1052 | 
1053 | 
1054 | 
1055 | {\displaystyle {\begin{array}{r}{\ce {^{235}_{92}U->[\alpha ][7.038\times 10^{8}\ {\ce {y}}]{^{231}_{90}Th}->[\beta ^{-}][25.52\ {\ce {h}}]{^{231}_{91}Pa}->[\alpha ][3.276\times 10^{4}\ {\ce {y}}]{^{227}_{89}Ac}}}{\begin{Bmatrix}{\ce {->[98.62\%\beta ^{-}][21.773\ {\ce {y}}]{^{227}_{90}Th}->[\alpha ][18.718\ {\ce {d}}]}}\\{\ce {->[1.38\%\alpha ][21.773\ {\ce {y}}]{^{223}_{87}Fr}->[\beta ^{-}][21.8\ {\ce {min}}]}}\end{Bmatrix}}{\ce {^{223}_{88}Ra->[\alpha ][11.434\ {\ce {d}}]{^{219}_{86}Rn}}}\\{\ce {^{219}_{86}Rn->[\alpha ][3.96\ {\ce {s}}]{^{215}_{84}Po}}}{\begin{Bmatrix}{\ce {->[99.99\%\alpha ][1.778\ {\ce {ms}}]{^{211}_{82}Pb}->[\beta ^{-}][36.1\ {\ce {min}}]}}\\{\ce {->[2.3\times 10^{-4}\%\beta ^{-}][1.778\ {\ce {ms}}]{^{215}_{85}At}->[\alpha ][0.10\ {\ce {ms}}]}}\end{Bmatrix}}{\ce {^{211}_{83}Bi}}{\begin{Bmatrix}{\ce {->[99.73\%\alpha ][2.13\ {\ce {min}}]{^{207}_{81}Tl}->[\beta ^{-}][4.77\ {\ce {min}}]}}\\{\ce {->[0.27\%\beta ^{-}][2.13\ {\ce {min}}]{^{211}_{84}Po}->[\alpha ][0.516\ {\ce {s}}]}}\end{Bmatrix}}{\ce {^{207}_{82}Pb_{(stable)}}}\end{array}}}
1056 | 
1057 | 
1058 | 
1059 | Uses[edit]
1060 | Uranium-235 has many uses such as fuel for nuclear power plants and in nuclear weapons such as nuclear bombs. Some artificial satellites, such as the SNAP-10A and the RORSATs were powered by nuclear reactors fueled with uranium-235.[7][8]
1061 | 
1062 | References[edit]
1063 | 
1064 | 
1065 | ^ "#Standard Reaction: 235U(n,f)". www-nds.iaea.org. IAEA. Retrieved 4 May 2020.
1066 | 
1067 | ^ ""Some Physics of Uranium", UIC.com.au". Archived from the original on July 17, 2007. Retrieved 2009-01-18.{{cite web}}:  CS1 maint: bot: original URL status unknown (link)
1068 | 
1069 | ^ Jump up to: a b Nuclear fission and fusion, and neutron interactions, National Physical Laboratory Archive.
1070 | 
1071 | ^ "FAS Nuclear Weapons Design FAQ". Archived from the original on 1999-05-07. Retrieved 2010-09-02.
1072 | 
1073 | ^ Nuclear Weapon Design. Federation of American Scientists. Archived from the original on 2008-12-26. Retrieved 2016-06-04.
1074 | 
1075 | ^ Miner, William N.; Schonfeld, Fred W. (1968). "Plutonium". In Clifford A. Hampel (ed.). The Encyclopedia of the Chemical Elements. New York (NY): Reinhold Book Corporation. p. 541. LCCN 68029938.
1076 | 
1077 | ^ Schmidt, Glen (February 2011). "SNAP Overview – radium-219 – general background" (PDF). American Nuclear Society. Retrieved 27 August 2012.
1078 | 
1079 | ^ "RORSAT (Radar Ocean Reconnaissance Satellite)". daviddarling.info.
1080 | 
1081 | 
1082 | External links[edit]
1083 | Table of Nuclides.
1084 | DOE Fundamentals handbook: Nuclear Physics and Reactor theory Vol. 1 Archived 2017-07-31 at the Wayback Machine, Vol. 2 Archived 2016-12-20 at the Wayback Machine.
1085 | Radionuclide Basics: Uranium |US EPA
1086 | NLM Hazardous Substances Databank – Uranium, Radioactive
1087 | "The Miracle of U-235", Popular Mechanics, January 1941—one of the earliest articles on U-235 for the general public
1088 | 
1089 | 
1090 | 
1091 | 
1092 | Lighter:  uranium-234
1093 | 
1094 | Uranium-235 is an  isotope of uranium
1095 | 
1096 | Heavier:  uranium-236
1097 | 
1098 | 
1099 | Decay product of:  protactinium-235neptunium-235plutonium-239
1100 | 
1101 | Decay chain  of uranium-235
1102 | 
1103 | Decays to:  thorium-231
1104 | 
1105 | hidevteMain isotopes of uranium
1106 | 232U
1107 | 233U
1108 | 234U
1109 | 235U
1110 | 236U
1111 | 238U
1112 | 
1113 | 
1114 | 
1115 | 
1116 | 
1117 | Retrieved from "https://en.wikipedia.org/w/index.php?title=Uranium-235&oldid=1191118528"
1118 | Categories: ActinidesFissile materialsIsotopes of uraniumSpecial nuclear materialsRadionuclides used in radiometric datingHidden categories: CS1 maint: bot: original URL status unknownArticles with short descriptionShort description matches WikidataIsotope content pageAll articles with unsourced statementsArticles with unsourced statements from April 2021Webarchive template wayback linksPages that use a deprecated format of the chem tags
1119 | 
1120 | 
1121 | 
1122 | 
1123 | 
1124 | 
1125 |  This page was last edited on 21 December 2023, at 16:48 (UTC).
1126 | Text is available under the Creative Commons Attribution-ShareAlike License 4.0;
1127 | additional terms may apply.  By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
1128 | 
1129 | 
1130 | Privacy policy
1131 | About Wikipedia
1132 | Disclaimers
1133 | Contact Wikipedia
1134 | Code of Conduct
1135 | Developers
1136 | Statistics
1137 | Cookie statement
1138 | Mobile view
1139 | 
1140 | 
1141 | 
1142 | 
1143 | 
1144 | 
1145 | 
1146 | 
1147 | 
1148 | 
1149 | 
1150 | 
1151 | 
1152 | Toggle limited content width
1153 | 
1154 | 
1155 | 
1156 | 
1157 | 
1158 | 
1159 | 


--------------------------------------------------------------------------------
/src/report.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from symai.collect.stats import Aggregator
 4 | 
 5 | 
 6 | agg = Aggregator.load('result/aggregation.json')
 7 | agg.finalize()
 8 | 
 9 | 
10 | # apply mean before returning values
11 | agg.map = lambda x: np.mean(x, axis=0)
12 | 
13 | 
14 | class Report:
15 |     ics     = agg.eval_in_context_associations.test_comparison
16 |     score   = (ics.score - ics.rand_score) / (ics.base_score - ics.rand_score)
17 | 
18 | 
19 | print(Report.score)
20 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import parso
  4 | import difflib
  5 | import string
  6 | import numpy as np
  7 | 
  8 | from pathlib import Path
  9 | 
 10 | from symai import Symbol
 11 | 
 12 | 
 13 | success_score = {'scores': [1.0]}
 14 | mock_score    = copy.deepcopy(success_score)
 15 | mock_score.update({'mock': True})
 16 | MOCK_RETURN = (True, mock_score)
 17 | 
 18 | 
 19 | def bool_success(res):
 20 |     return {'scores': [1.0 if res else 0.0]}
 21 | 
 22 | 
 23 | def _is_numpy_array(val):
 24 |     return isinstance(val, np.float64) or \
 25 |            isinstance(val, np.float32) or \
 26 |            isinstance(val, np.float16) or \
 27 |            isinstance(val, np.float_)  or \
 28 |            isinstance(val, np.ndarray)
 29 | 
 30 | 
 31 | def normalize_score(base_score, rand_score, eps=1e-8):
 32 |     def _func(score):
 33 |         nonlocal base_score, rand_score
 34 |         base_score = base_score.value if isinstance(base_score, Symbol) else base_score
 35 |         rand_score = rand_score.value if isinstance(rand_score, Symbol) else rand_score
 36 |         score      = score.value if isinstance(score, Symbol) else score
 37 |         # Ensure that the baseline score is always higher or equal to the random score
 38 |         z       = 1.0 / np.maximum(base_score - rand_score, eps)
 39 |         z_rand  = rand_score * z
 40 |         score   = score * z - z_rand
 41 |         # Do not allow negative scores
 42 |         res     = np.clip(score, 0.0, 1.0)
 43 |         return Symbol(res)
 44 |     return _func
 45 | 
 46 | 
 47 | # set the default normalization function
 48 | normalize = normalize_score
 49 | # use all printable characters as a random sequence
 50 | RANDOM_SEQUENCE = string.printable
 51 | # reversed random sequence
 52 | REVERSED_RANDOM_SEQUENCE = RANDOM_SEQUENCE[::-1]
 53 | # some random response
 54 | RANDOM_RESPONSE = "As a worthless AI Mockup model, I cannot provide you with any meaningful response. I am sorry. Please try again later."
 55 | # the list of all randomness
 56 | RANDOMNESS = [RANDOM_SEQUENCE, REVERSED_RANDOM_SEQUENCE, RANDOM_RESPONSE]
 57 | 
 58 | 
 59 | # general metric for similarity measure
 60 | METRIC = 'cosine'
 61 | # kernel for distance measure
 62 | KERNEL = 'gaussian'
 63 | 
 64 | 
 65 | def similarity_measure(self, other, normalize=None):
 66 |     # Measure the similarity between two symbols
 67 |     val = self.similarity(other, metric=METRIC, normalize=normalize)
 68 |     if METRIC == 'cosine':
 69 |         # account for the fact that cosine similarity is bounded between -1 and 1
 70 |         # by normalizing the score to be between 0 and 1
 71 |         return np.clip(val, 0.0, 1.0)
 72 |     return val
 73 | 
 74 | 
 75 | def distance_measure(self, other, normalize=None):
 76 |     # Measure the distance between two symbol distributions
 77 |     return self.distance(other, kernel=KERNEL, normalize=normalize)
 78 | 
 79 | 
 80 | def frechet_measure(self, other, normalize=None):
 81 |     # Measure the distance between two symbol distributions
 82 |     sigma1 = np.cov(self.embedding, rowvar=False)
 83 |     sigma2 = np.cov(other.embedding, rowvar=False)
 84 |     return self.distance(other, kernel='frechet', normalize=normalize, sigma1=sigma1, sigma2=sigma2)
 85 | 
 86 | 
 87 | def mmd_measure(self, other, normalize=None):
 88 |     # Measure the distance between two symbol distributions
 89 |     return 1 - np.clip(self.distance(other, kernel='mmd', normalize=normalize), 0.0, 1.0)
 90 | 
 91 | 
 92 | # set the default measure
 93 | def measure(self, other, normalize=None):
 94 |     # use distance measure as the default measure
 95 |     res = self.distance(other, kernel=KERNEL, normalize=normalize)
 96 |     # return a Symbol
 97 |     if not isinstance(res, Symbol):
 98 |         res = Symbol(res)
 99 |     return res
100 | 
101 | 
102 | def embedding_mean(self, axis=0):
103 |     # Compute the mean of the embedding
104 |     res = np.mean(self.embedding, axis=axis)
105 |     return Symbol(res)
106 | 
107 | 
108 | def cross_validation_score(self, min_samples=2):
109 |     # Compute the cross validation score
110 |     embeddings = self.embedding
111 |     assert len(embeddings.shape) == 2, "Embeddings must be a 2D array"
112 |     assert embeddings.shape[0] >= min_samples, "There must be at least two embeddings to perform a (cross) validation"
113 |     # permute indices of embeddings shape[0] to all possible combinations
114 |     indices  = np.random.permutation(embeddings.shape[0])
115 |     # compute leave-one-out cross validation score
116 |     scores   = []
117 |     # if there are only two embeddings, then we can only do one fold
118 |     range_   = 1 if embeddings.shape[0] == 2 else embeddings.shape[0]
119 |     for i in range(range_):
120 |         # leave out the i-th embedding
121 |         test_idx       = indices[i]
122 |         test_sample    = embeddings[test_idx]
123 |         if len(test_sample.shape) == 1:
124 |             test_mean  = test_sample
125 |         else:
126 |             test_mean  = np.mean(test_sample, axis=0)
127 |         # use the rest of the embeddings as the training sample
128 |         train_idx      = np.delete(indices, i)
129 |         train_sample   = embeddings[train_idx]
130 |         # compute the mean of the training sample
131 |         if len(train_sample.shape) == 1:
132 |             train_mean = train_sample
133 |         else:
134 |             train_mean = np.mean(train_sample, axis=0)
135 |         assert train_mean.shape == test_mean.shape, "Train and test mean must have the same shape"
136 |         # compute the distance between the test sample and the training mean
137 |         score = Symbol(train_mean).measure(Symbol(test_sample))
138 |         scores.append(score.value)
139 |     return Symbol(np.mean(scores))
140 | 
141 | 
142 | def parse_file_to_ast(filename):
143 |     with open(filename, "r") as file:
144 |         source = file.read()
145 |     # Parse the source code with 9807parso, which is more tolerant to errors
146 |     return parso.parse(source), source
147 | 
148 | 
149 | def tree_to_str(node):
150 |     # Generate a string representation of the nodes in the parse tree
151 |     if node.type == 'endmarker':  # Skip the end marker that parso adds
152 |         return ""
153 |     children_str = ''.join(tree_to_str(child) for child in node.children) if hasattr(node, 'children') else ""
154 |     return f"{node.type}({children_str})"
155 | 
156 | 
157 | def rand_ast_measure(tree, random_sequence=RANDOMNESS):
158 |     if (isinstance(tree, str) or isinstance(tree, Path)) and os.path.exists(tree):
159 |         tree, _ = parse_file_to_ast(tree)
160 |     elif isinstance(tree, str):
161 |         tree = parso.parse(tree)
162 |     elif isinstance(tree, Symbol):
163 |         tree = parso.parse(str(tree))
164 |     # Convert parse trees to string representations
165 |     str_     = tree_to_str(tree)
166 |     # Generate a random parse tree
167 |     score = []
168 |     for rand_seq in random_sequence:
169 |         random_tree  = parso.parse(rand_seq)
170 |         randstr      = tree_to_str(random_tree)
171 |         # Random string match
172 |         matcher         = difflib.SequenceMatcher(None, str_, randstr)
173 |         rand_match  = matcher.ratio()
174 |         score.append(rand_match)
175 |     mean_match = np.mean(score).item()
176 |     return mean_match
177 | 
178 | 
179 | # used as a primitive function for the Symbol class
180 | def ast_measure(self, tree2, normalize=None):
181 |     tree1 = self.value
182 | 
183 |     # Check if the input is a file path or an AST
184 |     if (isinstance(tree1, str) or isinstance(tree1, Path)) and os.path.exists(tree1):
185 |         tree1, _ = parse_file_to_ast(tree1)
186 |     # Assume that the input is a source code string
187 |     elif isinstance(tree1, str):
188 |         tree1    = parso.parse(tree1)
189 | 
190 |     if (isinstance(tree2, str) or isinstance(tree2, Path)) and os.path.exists(tree2):
191 |         tree2, _ = parse_file_to_ast(tree2)
192 |     elif isinstance(tree2, str):
193 |         tree2 = parso.parse(tree2)
194 |     elif isinstance(tree2, Symbol):
195 |         tree2 = parso.parse(str(tree2))
196 | 
197 |     # Convert parse trees to string representations
198 |     str1    = tree_to_str(tree1)
199 |     str2    = tree_to_str(tree2)
200 | 
201 |     # Use SequenceMatcher to calculate the ratio of matching characters
202 |     matcher     = difflib.SequenceMatcher(None, str1, str2)
203 |     match_ratio = matcher.ratio()
204 | 
205 |     # Normalize the match score
206 |     if normalize is not None:
207 |         # Normalize the match score
208 |         match_ratio = normalize(match_ratio)
209 |     return match_ratio
210 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import argparse
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import seaborn as sns
  7 | 
  8 | from src.func import run, BENCHMARK_NAME_MAPPING, MODEL_NAME_MAPPING
  9 | 
 10 | 
 11 | def parse_args():
 12 |     parser = argparse.ArgumentParser(description='Run the benchmark.')
 13 |     parser.add_argument('--context_associations', help='Run the in-context associations benchmark.', action='store_true')
 14 |     parser.add_argument('--multimodal_bindings',  help='Run the multimodal bindings benchmark.',     action='store_true')
 15 |     parser.add_argument('--program_synthesis',    help='Run the program synthesis benchmark.',       action='store_true')
 16 |     parser.add_argument('--logic_components',     help='Run the logic components benchmark.',        action='store_true')
 17 |     parser.add_argument('--computation_graphs',   help='Run the computation graphs benchmark.',      action='store_true')
 18 |     parser.add_argument('--all',                  help='Run all benchmarks.',                        action='store_true')
 19 |     parser.add_argument('--dummy',                help='Run the dummy benchmark.',                   action='store_true')
 20 |     parser.add_argument('--verbose',              help='Print additional information.',              action='store_true')
 21 |     parser.add_argument('--models',               help='Run the specified models.',                  default=['all'],       type=str, nargs='+')
 22 |     parser.add_argument('--seeds',                help='Run the specified seeds.',                   default=['all'],       type=int, nargs='+')
 23 |     parser.add_argument('--tests',                help='Run only specific tests.',                   default=['all'],       type=str, nargs='+')
 24 |     parser.add_argument('--plot',                 help='Plot the results.',                                                 type=str)
 25 |     return parser.parse_args()
 26 | 
 27 | 
 28 | LATEX_TEMPLATE = """
 29 | \\begin{{figure*}}[ht]
 30 |   \\centering
 31 |   \\begin{{minipage}}{{.6\\textwidth}}
 32 |     \\centering
 33 |     \\begin{{adjustbox}}{{max width=\\linewidth}}
 34 |     \\begin{{tabular}}{{lccccccccc}}
 35 |       \\toprule
 36 |       \\textbf{{Benchmarks}} &  {model_names} \\\\
 37 |       \\midrule
 38 |       {benchmark_in_context_association_row} \\\\
 39 |       {benchmark_multimodality_row} \\\\
 40 |       {benchmark_program_synthesis_row} \\\\
 41 |       {benchmark_components_row} \\\\
 42 |       {benchmark_computation_graphs_row} \\\\
 43 |       \\midrule
 44 |       \\textbf{{Total}} & {total_row} \\\\
 45 |       \\bottomrule
 46 |     \\end{{tabular}}
 47 |     \\end{{adjustbox}}
 48 |     \\label{{tab:benchmark_results}}
 49 |   \\end{{minipage}}%
 50 |   ~
 51 |   \\begin{{minipage}}{{.4\\textwidth}}
 52 |     \\centering
 53 |     \\begin{{adjustbox}}{{max width=\\linewidth}}
 54 |     \\includegraphics[width=\\linewidth]{{images/benchmark_comparison_chart.pdf}}
 55 |     \\end{{adjustbox}}
 56 |     \\label{{fig:spider_plot}}
 57 |   \\end{{minipage}}
 58 |   \\caption{{Placeholder for performance benchmarks and comparison chart for various models.}}
 59 |   \\label{{fig:my_label}}
 60 | \\end{{figure*}}
 61 | """
 62 | 
 63 | 
 64 | def sort_by_name(model):
 65 |     if 'GPT-4' in model:
 66 |         return 0
 67 |     elif 'GPT-3.5' in model:
 68 |         return 1
 69 |     elif 'Random' in model:
 70 |         return 2
 71 |     elif 'LlaMA' in model:
 72 |         return 3
 73 |     elif 'Mistral' in model:
 74 |         return 4
 75 |     elif 'Zephyr' in model:
 76 |         return 5
 77 |     elif 'Gemini' in model:
 78 |         return 6
 79 |     else:
 80 |         return 7
 81 | 
 82 | 
 83 | def sort_items_by_name(model):
 84 |     return sort_by_name(model[0])
 85 | 
 86 | 
 87 | remap_name = {
 88 |     'GPT-4 Turbo': 'GPT-4 Turbo',
 89 |     'GPT-3.5 Turbo': 'GPT-3.5 Turbo',
 90 |     'Gemini 1.0 Pro': 'Gemini 1.0 Pro',
 91 |     'Gemini 1.5 Pro': 'Gemini 1.5 Pro',
 92 |     'LlaMA 2 13B': 'LlaMA 2 13B',
 93 |     'LlaMA 3 8B': 'LlaMA 3 8B',
 94 |     'LlaMA 3 70B': 'LlaMA 3 70B',
 95 |     'Zephyr 7B': 'Zephyr 7B',
 96 |     'Mistral 7B': 'Mistral 7B',
 97 |     'Random': 'Random'
 98 | }
 99 | 
100 | 
101 | def create_latex_result(data):
102 |     # Define the directory and file name
103 |     directory = 'tmp'
104 |     # make sure the directory exists
105 |     os.makedirs(directory, exist_ok=True)
106 |     filename = 'benchmark_results.tex'
107 |     filepath = os.path.join(directory, filename)
108 | 
109 |     # Gather the model names
110 |     data_model_names = list(data.keys())
111 |     # Sort the models by name
112 |     data_model_names.sort(key=sort_by_name)
113 |     model_names = " & ".join(remap_name[key] for key in data_model_names)
114 | 
115 |     # Initialize the total scores
116 |     total_scores = {model: 0.0 for model in data_model_names}
117 | 
118 |     # Prepare table content
119 |     benchmark_rows = {bench_name: "" for bench_name in BENCHMARK_NAME_MAPPING.values()}
120 |     for bench_name in BENCHMARK_NAME_MAPPING.values():
121 |         if bench_name not in str(list(data.values())):
122 |             print(f"Skipping benchmark because not all results are computed. Did not find `{bench_name}` in `{data.keys()}`")
123 |             return
124 |         # Initialize list to keep the scores for this benchmark to find the best model
125 |         scores = [(model, np.mean([np.mean(run['scores']) for run in values[bench_name]['runs']])) for model, values in data.items()]
126 |         # sort the scores by name following this order: GPT-4, GPT-3.5, Gemini-Pro, LlaMA 2, Mistral, Zephyr, Random
127 |         # write custom sorting function to sort by name
128 |         scores.sort(key=sort_items_by_name)
129 | 
130 |         best_score = max(scores, key=lambda x: x[1])[1]
131 | 
132 |         # Create row for the latex table and update the total scores
133 |         row = f"{bench_name}"
134 |         for model, score in scores:
135 |             # Add to the total score
136 |             total_scores[model] += score
137 |             # Format row with best model in bold
138 |             if score == best_score:
139 |                 row += f" & \\textbf{{{score:.2f}}}"
140 |             else:
141 |                 row += f" & {score:.2f}"
142 |         benchmark_rows[bench_name] = row
143 | 
144 |     # Compute the average of total scores
145 |     for model in total_scores.keys():
146 |         total_scores[model] /= len(BENCHMARK_NAME_MAPPING)
147 | 
148 |     # Best total performance in bold
149 |     best_total = max(total_scores.values())
150 |     total_values = " & ".join(f"\\textbf{{{v:.2f}}}" if v == best_total else f"{v:.2f}" for v in total_scores.values())
151 | 
152 |     # Use the LATEX_TEMPLATE and inject the benchmark rows
153 |     latex_table = LATEX_TEMPLATE.format(
154 |         model_names=model_names,
155 |         benchmark_in_context_association_row=benchmark_rows[BENCHMARK_NAME_MAPPING['eval_in_context_associations']],
156 |         benchmark_multimodality_row=benchmark_rows[BENCHMARK_NAME_MAPPING['eval_multimodal_bindings']],
157 |         benchmark_program_synthesis_row=benchmark_rows[BENCHMARK_NAME_MAPPING['eval_program_synthesis']],
158 |         benchmark_components_row=benchmark_rows[BENCHMARK_NAME_MAPPING['eval_logic_components']],
159 |         benchmark_computation_graphs_row=benchmark_rows[BENCHMARK_NAME_MAPPING['eval_computation_graphs']],
160 |         total_row=total_values
161 |     )
162 | 
163 |     # Print the latex table to the console
164 |     print(latex_table)
165 | 
166 |     # Save the latex table to a file
167 |     if not os.path.exists(directory):
168 |         os.makedirs(directory)
169 |     with open(filepath, 'w') as f:
170 |         f.write(latex_table)
171 | 
172 |     print(f"LaTeX table saved to {filepath}")
173 | 
174 | 
175 | def create_plot(data):
176 |     # Define the categories and models
177 |     categories = list(next(iter(data.values())).keys())  # Assuming all models have the same structure
178 |     models = list(data.keys())
179 |     N = len(categories)
180 | 
181 |     # Prepare data for plotting
182 |     values = [list(d.values()) for d in data.values()]
183 |     values = [[np.mean([np.mean(run['scores']) for run in v['runs']]) for v in sublist] for sublist in values]
184 |     values = np.array(values)
185 | 
186 |     # Create a radar chart
187 |     angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
188 |     values = np.concatenate((values, values[:,[0]]), axis=1)  # Repeat the first value to close the circle
189 |     angles += angles[:1]
190 | 
191 |     fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
192 |     sns.set_theme(context='paper', style='whitegrid')
193 | 
194 |     def add_to_radar(values, model_name, color):
195 |         if model_name == MODEL_NAME_MAPPING['random']:
196 |             val = np.max(values) # Use the maximum value to draw a circle for the random model
197 |             angles_circle = np.linspace(0, 2 * np.pi, 100)  # Use 100 points to make a smooth circle
198 |             ax.plot(angles_circle, np.full_like(angles_circle, val), '--', linewidth=2, color=color, label=model_name)
199 |             ax.fill(angles_circle, np.full_like(angles_circle, val), color=color, alpha=0.25)
200 |         else:
201 |             ax.plot(angles, values, color=color, linewidth=2, label=model_name)
202 |             ax.fill(angles, values, color=color, alpha=0.25)
203 | 
204 |     colors = [ax._get_lines.get_next_color() for _ in range(len(models))]
205 |     zippped = zip(values, models, colors)
206 |     # sort based on name
207 |     zippped = sorted(zippped, key=lambda x: sort_by_name(x[1]))
208 | 
209 |     # Add each model to the radar chart
210 |     for values, model_name, color in zippped:
211 |         model_name = remap_name[model_name]
212 |         add_to_radar(values, model_name, color)
213 | 
214 |     # Add labels to the plot with increased label padding
215 |     label_padding = 1.1  # Adjust label padding as needed
216 |     ax.set_theta_offset(np.pi / 2)
217 |     ax.set_theta_direction(-1)
218 |     # Define font size
219 |     label_font_size = 15  # Choose desired font size
220 |     ax.set_thetagrids(np.degrees(angles[:-1]), categories, fontsize=label_font_size)
221 |     for label, angle in zip(ax.get_xticklabels(), angles):
222 |         if angle in (0, np.pi):
223 |             label.set_horizontalalignment('center')
224 |         elif 0 < angle < np.pi:
225 |             label.set_horizontalalignment('left')
226 |         else:
227 |             label.set_horizontalalignment('right')
228 |         label.set_position((label_padding, label.get_position()[1]))
229 | 
230 |     # Increase the font size for the legend
231 |     plt.legend(loc='upper right', ncol=4, bbox_to_anchor=(1.38, 1.3, 0, 0), borderaxespad=0., fontsize=label_font_size)
232 | 
233 |     # Set tight layout
234 |     plt.tight_layout()
235 | 
236 |     # Save as PDF
237 |     plt.savefig("tmp/benchmark_comparison_chart.pdf", format="pdf", bbox_inches='tight')
238 | 
239 |     # Show the plot
240 |     plt.show()
241 | 
242 | 
243 | if __name__ == '__main__':
244 |     args = parse_args()
245 |     if args.plot is None:
246 |         results = run(args)
247 |     else:
248 |         with open(args.plot, 'r') as f:
249 |             results = json.load(f)
250 |     create_latex_result(results)
251 |     create_plot(results)
252 | 


--------------------------------------------------------------------------------