├── .gitignore ├── .vscode └── settings.json ├── README.md ├── dev.ipynb ├── example.ipynb ├── img ├── cover2.png └── cover4.png ├── license.txt ├── poetry.lock ├── poetry.toml ├── prob_dist.ipynb ├── prob_jsonformer ├── __init__.py ├── format.py ├── logits_processors.py ├── main.py ├── prob_choice_tree.py └── type_prefixes.py └── pyproject.toml /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .venv 3 | workspace.ipynb 4 | dist 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.formatOnSave": true, 3 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # prob_jsonformer: Probabilistic Structured JSON from Language Models. 2 | 3 | This fork has been modified to include the token probabilities. This is not complaint with json schema, but it can be useful for efficient extracting of a range of possible values. 4 | 5 | I've also merged some of the recent PR's for enum, integer, null, union. They are not yet included in the upstream Jsonformer. You can see them all below in this example: 6 | 7 | 8 | ~~~ 9 | # installing 10 | pip install git+https://github.com/wassname/prob_jsonformer.git 11 | ~~~ 12 | 13 | 14 | ## Metrics 15 | 16 | How well does it work? Well when I asked is `Q: Please sample a number from the distribution [0, 20]: `, assumming it should be a uniform distribution, this is how well it did: 17 | 18 | Lower is better as it indicates a faithful sampling of the distribution. Time is in seconds. 19 | 20 | | method | KL_div_loss | time | 21 | | :----------------------- | ----------: | -------: | 22 | | method0: sampling | -3.09214 | 48.5044 | 23 | | method1: hindsight | -3.09214 | 0.683987 | 24 | | method3: generation tree | **-3.09216**| **0.075112**| 25 | 26 | KL_div_loss is the -1 * KL divergence between the true distribution and the generated distribution. 27 | 28 | 29 | ## Example 30 | 31 | ```python 32 | from prob_jsonformer import Jsonformer 33 | from transformers import AutoModelForCausalLM, AutoTokenizer 34 | 35 | model_name = "databricks/dolly-v2-3b" 36 | model = AutoModelForCausalLM.from_pretrained(model_name) 37 | tokenizer = AutoTokenizer.from_pretrained(model_name) 38 | 39 | json_schema = { 40 | "type": "object", 41 | "properties": { 42 | # we can return the probability of each choice, even if they are multiple tokens 43 | "age_probs": {"type": "p_enum", "values": [str(s) for s in range(10, 20)]}, 44 | # we can return the probabilistic weighted mean of a range 45 | "age_wmean": {"type": "p_integer", "minimum": 10, "maximum": 20}, 46 | # the prob of true and false 47 | "is_student_probs": {"type": "p_enum", "values": ["true", "false"]}, 48 | "is_student": {"type": "boolean"}, 49 | # we've merged patches for enum, integer, null, union - currently mising from jsonformer 50 | "name": {"type": "string", "maxLength": 4}, 51 | "age": {"type": "integer"}, 52 | "unit_time": {"type": "number"}, 53 | "courses": {"type": "array", "items": {"type": "string"}}, 54 | "trim": {"type": ["string", "null"]}, 55 | "color": { 56 | "type": "enum", 57 | "values": ["red", "green", "blue", "brown", "white", "black"], 58 | }, 59 | }, 60 | } 61 | 62 | prompt = "Generate a young person's information based on the following schema:" 63 | jsonformer = Jsonformer(model, tokenizer, json_schema, prompt, temperature=0) 64 | generated_data = jsonformer() 65 | 66 | generated_data = { 67 | "age_probs": [ 68 | {"prob": 0.62353515625, "choice": "10"}, 69 | {"prob": 0.349609375, "choice": "12"}, 70 | {"prob": 0.01123809814453125, "choice": "11"}, 71 | {"prob": 0.00760650634765625, "choice": "16"}, 72 | {"prob": 0.0025482177734375, "choice": "13"}, 73 | {"prob": 0.0025081634521484375, "choice": "15"}, 74 | {"prob": 0.0018062591552734375, "choice": "14"}, 75 | {"prob": 0.00104522705078125, "choice": "18"}, 76 | {"prob": 0.00011551380157470703, "choice": "17"}, 77 | {"prob": 5.042552947998047e-05, "choice": "19"}, 78 | ], 79 | "age_wmean": 15.544570922851562, 80 | "is_student_probs": [ 81 | {"prob": 0.962890625, "choice": "true"}, 82 | {"prob": 0.037322998046875, "choice": "false"}, 83 | ], 84 | "is_student": False, 85 | "name": "John", 86 | "age": 17, 87 | "unit_time": 0.5, 88 | "courses": ["C++"], 89 | "trim": None, 90 | "color": "green", 91 | } 92 | ``` 93 | 94 | The original [README](https://github.com/1rgs/jsonformer) is included below. 95 | 96 | # ORIGINAL: Jsonformer: A Bulletproof Way to Generate Structured JSON from Language Models. 97 | 98 | ### Problem: Getting models to output structured JSON is hard 99 | 100 | ### Solution: Only generate the content tokens and fill in the fixed tokens 101 | 102 | [![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/1rgs/jsonformer/blob/main/Jsonformer_example.ipynb) 103 | 104 | ![cover](img/cover4.png) 105 | 106 | Generating structured JSON from language models is a challenging task. The 107 | generated JSON must be syntactically correct, and it must conform to a schema 108 | that specifies the structure of the JSON. 109 | 110 | Current approaches to this problem are brittle and error-prone. They rely on prompt engineering, fine-tuning, and post-processing, but they still fail to generate syntactically correct JSON in many cases. 111 | 112 | Jsonformer is a new approach to this problem. In structured data, many tokens are fixed and predictable. Jsonformer is a wrapper around Hugging Face models that fills in the fixed tokens during the generation process, and only delegates the generation of content tokens to the language model. This makes it more efficient and bulletproof than existing approaches. 113 | 114 | This currently supports a subset of JSON Schema. Below is a list of the supported schema types: 115 | 116 | - number 117 | - boolean 118 | - string 119 | - array 120 | - object 121 | 122 | ## Example 123 | 124 | ```python 125 | from jsonformer import Jsonformer 126 | from transformers import AutoModelForCausalLM, AutoTokenizer 127 | 128 | model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-12b") 129 | tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-12b") 130 | 131 | json_schema = { 132 | "type": "object", 133 | "properties": { 134 | "name": {"type": "string"}, 135 | "age": {"type": "number"}, 136 | "is_student": {"type": "boolean"}, 137 | "courses": { 138 | "type": "array", 139 | "items": {"type": "string"} 140 | } 141 | } 142 | } 143 | 144 | prompt = "Generate a person's information based on the following schema:" 145 | jsonformer = Jsonformer(model, tokenizer, json_schema, prompt) 146 | generated_data = jsonformer() 147 | 148 | print(generated_data) 149 | ``` 150 | 151 | ### Jsonformer works on complex schemas, even with tiny models. Here is an example of a schema with nested objects and arrays, generated by a 3B parameter model. 152 | 153 | ```python 154 | {"type": "object", "properties": {"car": {"type": "object", "properties": {"make": {"type": "string"}, "model": {"type": "string"}, "year": {"type": "number"}, "colors": {"type": "array", "items": {"type": "string"}}, "features": {"type": "object", "properties": {"audio": {"type": "object", "properties": {"brand": {"type": "string"}, "speakers": {"type": "number"}, "hasBluetooth": {"type": "boolean"}}}, "safety": {"type": "object", "properties": {"airbags": {"type": "number"}, "parkingSensors": {"type": "boolean"}, "laneAssist": {"type": "boolean"}}}, "performance": {"type": "object", "properties": {"engine": {"type": "string"}, "horsepower": {"type": "number"}, "topSpeed": {"type": "number"}}}}}}}, "owner": {"type": "object", "properties": {"firstName": {"type": "string"}, "lastName": {"type": "string"}, "age": {"type": "number"}}}}} 155 | ``` 156 | 157 | ```python 158 | { 159 | car: { 160 | make: "audi", 161 | model: "model A8", 162 | year: 2016.0, 163 | colors: [ 164 | "blue" 165 | ], 166 | features: { 167 | audio: { 168 | brand: "sony", 169 | speakers: 2.0, 170 | hasBluetooth: True 171 | }, 172 | safety: { 173 | airbags: 2.0, 174 | parkingSensors: True, 175 | laneAssist: True 176 | }, 177 | performance: { 178 | engine: "4.0", 179 | horsepower: 220.0, 180 | topSpeed: 220.0 181 | } 182 | } 183 | }, 184 | owner: { 185 | firstName: "John", 186 | lastName: "Doe", 187 | age: 40.0 188 | } 189 | } 190 | ``` 191 | 192 | ## Features 193 | 194 | - Bulletproof JSON generation: Jsonformer ensures that the generated JSON is always syntactically correct and conforms to the specified schema. 195 | - Efficiency: By generating only the content tokens and filling in the fixed tokens, Jsonformer is more efficient than generating a full JSON string and parsing it. 196 | - Flexible and extendable: Jsonformer is built on top of the Hugging Face transformers library, making it compatible with any model that supports the Hugging Face interface. 197 | 198 | ## Installation 199 | 200 | ```bash 201 | pip install jsonformer 202 | ``` 203 | 204 | ## Development 205 | 206 | [Poetry](https://python-poetry.org/docs/#installation) is used for dependency management. 207 | 208 | ```bash 209 | poetry install 210 | ``` 211 | 212 | ```bash 213 | poetry run python -m jsonformer.example 214 | ``` 215 | 216 | ## License 217 | 218 | Jsonformer is released under the MIT License. You are free to use, modify, and distribute this software for any purpose, commercial or non-commercial, as long as the original copyright and license notice are included. 219 | -------------------------------------------------------------------------------- /dev.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# autoreload your package\n", 10 | "%load_ext autoreload\n", 11 | "%autoreload 2" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Loading model and tokenizer...\n" 24 | ] 25 | }, 26 | { 27 | "data": { 28 | "application/vnd.jupyter.widget-view+json": { 29 | "model_id": "c7620cdf45c54ab5abaf479b141f4479", 30 | "version_major": 2, 31 | "version_minor": 0 32 | }, 33 | "text/plain": [ 34 | "config.json: 0%| | 0.00/819 [00:00 29\u001b[0m generated_data \u001b[38;5;241m=\u001b[39m \u001b[43mjsonformer\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 31\u001b[0m generated_data\n", 147 | "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:439\u001b[0m, in \u001b[0;36mJsonformer.__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Dict[\u001b[38;5;28mstr\u001b[39m, Any]:\n\u001b[1;32m 438\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalue \u001b[38;5;241m=\u001b[39m {}\n\u001b[0;32m--> 439\u001b[0m generated_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_object\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 440\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_schema\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mproperties\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalue\u001b[49m\n\u001b[1;32m 441\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m generated_data\n", 148 | "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:274\u001b[0m, in \u001b[0;36mJsonformer.generate_object\u001b[0;34m(self, properties, obj)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, schema \u001b[38;5;129;01min\u001b[39;00m properties\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 273\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[generate_object] generating value for\u001b[39m\u001b[38;5;124m\"\u001b[39m, key)\n\u001b[0;32m--> 274\u001b[0m obj[key] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n", 149 | "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:351\u001b[0m, in \u001b[0;36mJsonformer.generate_value\u001b[0;34m(self, schema, obj, key)\u001b[0m\n\u001b[1;32m 349\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 350\u001b[0m obj\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgeneration_marker)\n\u001b[0;32m--> 351\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgenerate_p_enum(\u001b[43mschema\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvalues\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m, \u001b[38;5;28mround\u001b[39m\u001b[38;5;241m=\u001b[39mschema\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mround\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m3\u001b[39m))\n\u001b[1;32m 352\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m schema_type \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mp_integer\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 353\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key:\n", 150 | "\u001b[0;31mKeyError\u001b[0m: 'values'" 151 | ] 152 | } 153 | ], 154 | "source": [ 155 | "from prob_jsonformer import Jsonformer\n", 156 | "\n", 157 | "json_schema = {\n", 158 | " \"type\": \"object\",\n", 159 | " \"properties\": {\n", 160 | " # we can return the probability of each choice, even if they are multiple tokens\n", 161 | " \"age_probs\": {\"type\": \"p_enum\", \"enum\": [str(s) for s in range(10, 30)]},\n", 162 | " # we can return the probabilistic weighted mean of a range\n", 163 | " \"age_wmean\": {\"type\": \"p_integer\", \"minimum\": 10, \"maximum\": 30},\n", 164 | " # the prob of true and false\n", 165 | " \"is_student_probs\": {\"type\": \"p_enum\", \"enum\": [\"true\", \"false\"]},\n", 166 | " \"is_student\": {\"type\": \"boolean\"},\n", 167 | " # we've merged patches for enum, integer, null, union - currently mising from jsonformer\n", 168 | " \"name\": {\"type\": \"string\", \"maxLength\": 4},\n", 169 | " \"age\": {\"type\": \"integer\"},\n", 170 | " \"unit_time\": {\"type\": \"number\"},\n", 171 | " \"courses\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}},\n", 172 | " \"trim\": {\"type\": [\"string\", \"null\"]},\n", 173 | " \"color\": {\n", 174 | " \"type\": \"enum\",\n", 175 | " \"values\": [\"red\", \"green\", \"blue\", \"brown\", \"white\", \"black\"],\n", 176 | " },\n", 177 | " },\n", 178 | "}\n", 179 | "\n", 180 | "\n", 181 | "prompt = \"Generate a young person's information based on the following schema:\"\n", 182 | "jsonformer = Jsonformer(model, tokenizer, json_schema, prompt)\n", 183 | "generated_data = jsonformer()\n", 184 | "\n", 185 | "generated_data" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 4, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "generated_data = {\n", 195 | " \"age_probs\": [\n", 196 | " {\"prob\": 0.94091796875, \"choice\": \"10\"},\n", 197 | " {\"prob\": 0.033233642578125, \"choice\": \"20\"},\n", 198 | " {\"prob\": 0.0122222900390625, \"choice\": \"12\"},\n", 199 | " {\"prob\": 0.00412750244140625, \"choice\": \"21\"},\n", 200 | " {\"prob\": 0.0028362274169921875, \"choice\": \"16\"},\n", 201 | " {\"prob\": 0.0018453598022460938, \"choice\": \"15\"},\n", 202 | " {\"prob\": 0.00113677978515625, \"choice\": \"11\"},\n", 203 | " {\"prob\": 0.0011110305786132812, \"choice\": \"18\"},\n", 204 | " {\"prob\": 0.0005083084106445312, \"choice\": \"25\"},\n", 205 | " {\"prob\": 0.0004558563232421875, \"choice\": \"23\"},\n", 206 | " {\"prob\": 0.0002498626708984375, \"choice\": \"14\"},\n", 207 | " {\"prob\": 0.00023281574249267578, \"choice\": \"13\"},\n", 208 | " {\"prob\": 0.0002238750457763672, \"choice\": \"22\"},\n", 209 | " {\"prob\": 0.00018131732940673828, \"choice\": \"26\"},\n", 210 | " {\"prob\": 0.0001690387725830078, \"choice\": \"24\"},\n", 211 | " {\"prob\": 0.00012552738189697266, \"choice\": \"19\"},\n", 212 | " {\"prob\": 7.796287536621094e-05, \"choice\": \"27\"},\n", 213 | " {\"prob\": 7.265806198120117e-05, \"choice\": \"28\"},\n", 214 | " {\"prob\": 4.106760025024414e-05, \"choice\": \"17\"},\n", 215 | " {\"prob\": 2.5033950805664062e-06, \"choice\": \"29\"},\n", 216 | " ],\n", 217 | " \"age_wmean\": 17.816404402256012,\n", 218 | " \"is_student_probs\": [\n", 219 | " {\"prob\": 0.974609375, \"choice\": \"true\"},\n", 220 | " {\"prob\": 0.025177001953125, \"choice\": \"false\"},\n", 221 | " ],\n", 222 | " \"is_student\": False,\n", 223 | " \"name\": \"John\",\n", 224 | " \"age\": 17,\n", 225 | " \"unit_time\": 0.5,\n", 226 | " \"courses\": [\"C++\"],\n", 227 | " \"trim\": None,\n", 228 | " \"color\": \"white\",\n", 229 | "}" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [] 238 | } 239 | ], 240 | "metadata": { 241 | "kernelspec": { 242 | "display_name": "pytorch_hf_env", 243 | "language": "python", 244 | "name": "pytorch_hf_env" 245 | }, 246 | "language_info": { 247 | "codemirror_mode": { 248 | "name": "ipython", 249 | "version": 3 250 | }, 251 | "file_extension": ".py", 252 | "mimetype": "text/x-python", 253 | "name": "python", 254 | "nbconvert_exporter": "python", 255 | "pygments_lexer": "ipython3", 256 | "version": "3.10.16" 257 | } 258 | }, 259 | "nbformat": 4, 260 | "nbformat_minor": 2 261 | } 262 | -------------------------------------------------------------------------------- /example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# autoreload your package\n", 10 | "%load_ext autoreload\n", 11 | "%autoreload 2" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Loading model and tokenizer...\n", 24 | "Loaded model and tokenizer\n" 25 | ] 26 | } 27 | ], 28 | "source": [ 29 | "from transformers import AutoModelForCausalLM, AutoTokenizer\n", 30 | "import torch\n", 31 | "\n", 32 | "print(\"Loading model and tokenizer...\")\n", 33 | "model_name = \"databricks/dolly-v2-3b\"\n", 34 | "model = AutoModelForCausalLM.from_pretrained(\n", 35 | " model_name,\n", 36 | " use_cache=True,\n", 37 | " torch_dtype=torch.float16,\n", 38 | " attn_implementation=\"eager\",\n", 39 | ").to(\"cuda:0\")\n", 40 | "tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, use_cache=True)\n", 41 | "print(\"Loaded model and tokenizer\")" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "# Continue" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stderr", 58 | "output_type": "stream", 59 | "text": [ 60 | "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n" 61 | ] 62 | }, 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "Generating...\n" 68 | ] 69 | }, 70 | { 71 | "ename": "KeyError", 72 | "evalue": "'values'", 73 | "output_type": "error", 74 | "traceback": [ 75 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 76 | "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", 77 | "Cell \u001b[0;32mIn[3], line 47\u001b[0m\n\u001b[1;32m 38\u001b[0m builder \u001b[38;5;241m=\u001b[39m Jsonformer(\n\u001b[1;32m 39\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 40\u001b[0m tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 43\u001b[0m max_string_token_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m,\n\u001b[1;32m 44\u001b[0m )\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGenerating...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 47\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 49\u001b[0m highlight_values(output)\n", 78 | "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:439\u001b[0m, in \u001b[0;36mJsonformer.__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Dict[\u001b[38;5;28mstr\u001b[39m, Any]:\n\u001b[1;32m 438\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalue \u001b[38;5;241m=\u001b[39m {}\n\u001b[0;32m--> 439\u001b[0m generated_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_object\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 440\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_schema\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mproperties\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalue\u001b[49m\n\u001b[1;32m 441\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m generated_data\n", 79 | "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:274\u001b[0m, in \u001b[0;36mJsonformer.generate_object\u001b[0;34m(self, properties, obj)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, schema \u001b[38;5;129;01min\u001b[39;00m properties\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 273\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[generate_object] generating value for\u001b[39m\u001b[38;5;124m\"\u001b[39m, key)\n\u001b[0;32m--> 274\u001b[0m obj[key] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n", 80 | "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:376\u001b[0m, in \u001b[0;36mJsonformer.generate_value\u001b[0;34m(self, schema, obj, key)\u001b[0m\n\u001b[1;32m 374\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 375\u001b[0m obj\u001b[38;5;241m.\u001b[39mappend(new_obj)\n\u001b[0;32m--> 376\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mproperties\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnew_obj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 377\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m schema_type \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnull\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 378\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", 81 | "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:274\u001b[0m, in \u001b[0;36mJsonformer.generate_object\u001b[0;34m(self, properties, obj)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, schema \u001b[38;5;129;01min\u001b[39;00m properties\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 273\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[generate_object] generating value for\u001b[39m\u001b[38;5;124m\"\u001b[39m, key)\n\u001b[0;32m--> 274\u001b[0m obj[key] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n", 82 | "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:351\u001b[0m, in \u001b[0;36mJsonformer.generate_value\u001b[0;34m(self, schema, obj, key)\u001b[0m\n\u001b[1;32m 349\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 350\u001b[0m obj\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgeneration_marker)\n\u001b[0;32m--> 351\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgenerate_p_enum(\u001b[43mschema\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvalues\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m, \u001b[38;5;28mround\u001b[39m\u001b[38;5;241m=\u001b[39mschema\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mround\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m3\u001b[39m))\n\u001b[1;32m 352\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m schema_type \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mp_integer\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 353\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key:\n", 83 | "\u001b[0;31mKeyError\u001b[0m: 'values'" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "from prob_jsonformer.format import highlight_values\n", 89 | "from prob_jsonformer.main import Jsonformer\n", 90 | "\n", 91 | "ecomm = {\n", 92 | " \"type\": \"object\",\n", 93 | " \"properties\": {\n", 94 | " \"store\": {\n", 95 | " \"type\": \"object\",\n", 96 | " \"properties\": {\n", 97 | " \"name\": {\"type\": \"string\"},\n", 98 | " \"location\": {\"type\": \"string\"},\n", 99 | " \"p_enum\": {\n", 100 | " \"type\": \"p_enum\",\n", 101 | " \"enum\": [\"ski\", \"snowboard\", \"walk\", \"pretend\"],\n", 102 | " },\n", 103 | " \"inventory\": {\n", 104 | " \"type\": \"array\",\n", 105 | " \"items\": {\n", 106 | " \"type\": \"object\",\n", 107 | " \"properties\": {\n", 108 | " \"productId\": {\"type\": \"string\"},\n", 109 | " \"name\": {\"type\": \"string\"},\n", 110 | " \"description\": {\"type\": \"string\"},\n", 111 | " \"category\": {\"type\": \"string\"},\n", 112 | " \"price\": {\"type\": \"number\"},\n", 113 | " \"inStock\": {\"type\": \"boolean\"},\n", 114 | " \"rating\": {\"type\": \"number\"},\n", 115 | " \"images\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}},\n", 116 | " },\n", 117 | " },\n", 118 | " },\n", 119 | " },\n", 120 | " }\n", 121 | " },\n", 122 | "}\n", 123 | "\n", 124 | "\n", 125 | "builder = Jsonformer(\n", 126 | " model=model,\n", 127 | " tokenizer=tokenizer,\n", 128 | " json_schema=ecomm,\n", 129 | " prompt=\"write a description about mike's ski shop which sells premium skis and snowboards\",\n", 130 | " max_string_token_length=20,\n", 131 | ")\n", 132 | "\n", 133 | "print(\"Generating...\")\n", 134 | "output = builder()\n", 135 | "\n", 136 | "highlight_values(output)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "car = {\n", 146 | " \"type\": \"object\",\n", 147 | " \"properties\": {\n", 148 | " \"make\": {\"type\": \"string\"},\n", 149 | " \"model\": {\"type\": \"p_enum\", \"enum\": [\"Mazda\", \"Kea\"]},\n", 150 | " \"new\": {\"type\": \"p_enum\", \"enum\": [\"true\", \"false\"]},\n", 151 | " \"rating\": {\"type\": \"p_enum\", \"enum\": [\"1\", \"2\", \"3\", \"4\"]},\n", 152 | " \"year\": {\"type\": \"number\"},\n", 153 | " \"colors_available\": {\n", 154 | " \"type\": \"array\",\n", 155 | " \"items\": {\"type\": \"string\"},\n", 156 | " },\n", 157 | " },\n", 158 | "}\n", 159 | "\n", 160 | "builder = Jsonformer(\n", 161 | " model=model,\n", 162 | " tokenizer=tokenizer,\n", 163 | " json_schema=car,\n", 164 | " prompt=\"generate an example car\",\n", 165 | ")\n", 166 | "\n", 167 | "print(\"Generating...\")\n", 168 | "output = builder()\n", 169 | "\n", 170 | "highlight_values(output)" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "complex_car = {\n", 180 | " \"type\": \"object\",\n", 181 | " \"properties\": {\n", 182 | " \"car\": {\n", 183 | " \"type\": \"object\",\n", 184 | " \"properties\": {\n", 185 | " \"make\": {\"type\": \"string\"},\n", 186 | " \"model\": {\"type\": \"string\"},\n", 187 | " \"year\": {\"type\": \"number\"},\n", 188 | " \"colors\": {\n", 189 | " \"type\": \"p_enum\",\n", 190 | " \"enum\": [\"red\", \"green\", \"blue\", \"black\", \"white\"],\n", 191 | " },\n", 192 | " \"as_new\": {\"type\": \"p_enum\", \"enum\": [\"true\", \"false\"]},\n", 193 | " \"rating\": {\"type\": \"p_enum\", \"enum\": [\"1\", \"2\", \"3\", \"4\"]},\n", 194 | " \"features\": {\n", 195 | " \"type\": \"object\",\n", 196 | " \"properties\": {\n", 197 | " \"audio\": {\n", 198 | " \"type\": \"object\",\n", 199 | " \"properties\": {\n", 200 | " \"brand\": {\"type\": \"string\"},\n", 201 | " \"speakers\": {\"type\": \"number\"},\n", 202 | " \"hasBluetooth\": {\"type\": \"boolean\"},\n", 203 | " },\n", 204 | " },\n", 205 | " \"safety\": {\n", 206 | " \"type\": \"object\",\n", 207 | " \"properties\": {\n", 208 | " \"airbags\": {\"type\": \"number\"},\n", 209 | " \"parkingSensors\": {\"type\": \"boolean\"},\n", 210 | " \"laneAssist\": {\"type\": \"boolean\"},\n", 211 | " },\n", 212 | " },\n", 213 | " \"performance\": {\n", 214 | " \"type\": \"object\",\n", 215 | " \"properties\": {\n", 216 | " \"engine\": {\"type\": \"string\"},\n", 217 | " \"horsepower\": {\"type\": \"number\"},\n", 218 | " \"topSpeed\": {\"type\": \"number\"},\n", 219 | " },\n", 220 | " },\n", 221 | " },\n", 222 | " },\n", 223 | " },\n", 224 | " },\n", 225 | " \"owner\": {\n", 226 | " \"type\": \"object\",\n", 227 | " \"properties\": {\n", 228 | " \"firstName\": {\"type\": \"string\"},\n", 229 | " \"lastName\": {\"type\": \"string\"},\n", 230 | " \"age\": {\"type\": \"number\"},\n", 231 | " },\n", 232 | " },\n", 233 | " },\n", 234 | "}\n", 235 | "builder = Jsonformer(\n", 236 | " model=model,\n", 237 | " tokenizer=tokenizer,\n", 238 | " json_schema=complex_car,\n", 239 | " prompt=\"generate an example Rolls Royce Phantom\",\n", 240 | ")\n", 241 | "\n", 242 | "print(\"Generating...\")\n", 243 | "output = builder()\n", 244 | "\n", 245 | "highlight_values(output)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "## Readme example" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": null, 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "from transformers import AutoModelForCausalLM, AutoTokenizer\n", 262 | "\n", 263 | "model_name = \"databricks/dolly-v2-3b\"\n", 264 | "model = AutoModelForCausalLM.from_pretrained(model_name)\n", 265 | "tokenizer = AutoTokenizer.from_pretrained(model_name)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "metadata": {}, 272 | "outputs": [ 273 | { 274 | "data": { 275 | "text/plain": [ 276 | "{'age_probs': [{'prob': 0.62353515625, 'choice': '10'},\n", 277 | " {'prob': 0.349609375, 'choice': '12'},\n", 278 | " {'prob': 0.01123809814453125, 'choice': '11'},\n", 279 | " {'prob': 0.00760650634765625, 'choice': '16'},\n", 280 | " {'prob': 0.0025482177734375, 'choice': '13'},\n", 281 | " {'prob': 0.0025081634521484375, 'choice': '15'},\n", 282 | " {'prob': 0.0018062591552734375, 'choice': '14'},\n", 283 | " {'prob': 0.00104522705078125, 'choice': '18'},\n", 284 | " {'prob': 0.00011551380157470703, 'choice': '17'},\n", 285 | " {'prob': 5.042552947998047e-05, 'choice': '19'}],\n", 286 | " 'age_wmean': 15.544570922851562,\n", 287 | " 'is_student_probs': [{'prob': 0.962890625, 'choice': 'true'},\n", 288 | " {'prob': 0.037322998046875, 'choice': 'false'}],\n", 289 | " 'is_student': False,\n", 290 | " 'name': 'John',\n", 291 | " 'age': 17,\n", 292 | " 'unit_time': 0.5,\n", 293 | " 'courses': ['C++'],\n", 294 | " 'trim': None,\n", 295 | " 'color': 'green'}" 296 | ] 297 | }, 298 | "execution_count": 9, 299 | "metadata": {}, 300 | "output_type": "execute_result" 301 | } 302 | ], 303 | "source": [ 304 | "from prob_jsonformer import Jsonformer\n", 305 | "\n", 306 | "json_schema = {\n", 307 | " \"type\": \"object\",\n", 308 | " \"properties\": {\n", 309 | " # we can return the probability of each choice, even if they are multiple tokens\n", 310 | " \"age_probs\": {\"type\": \"p_enum\", \"values\": [str(s) for s in range(10, 20)]},\n", 311 | " # we can return the probabilistic weighted mean of a range\n", 312 | " \"age_wmean\": {\"type\": \"p_integer\", \"minimum\": 10, \"maximum\": 20},\n", 313 | " # the prob of true and false\n", 314 | " \"is_student_probs\": {\"type\": \"p_enum\", \"values\": [\"true\", \"false\"]},\n", 315 | " \"is_student\": {\"type\": \"boolean\"},\n", 316 | " # we've merged patches for enum, integer, null, union - currently mising from jsonformer\n", 317 | " \"name\": {\"type\": \"string\", \"maxLength\": 4},\n", 318 | " \"age\": {\"type\": \"integer\"},\n", 319 | " \"unit_time\": {\"type\": \"number\"},\n", 320 | " \"courses\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}},\n", 321 | " \"trim\": {\"type\": [\"string\", \"null\"]},\n", 322 | " \"color\": {\n", 323 | " \"type\": \"enum\",\n", 324 | " \"values\": [\"red\", \"green\", \"blue\", \"brown\", \"white\", \"black\"],\n", 325 | " },\n", 326 | " },\n", 327 | "}\n", 328 | "prompt = \"Generate a young person's information based on the following schema:\"\n", 329 | "jsonformer = Jsonformer(model, tokenizer, json_schema, prompt)\n", 330 | "generated_data = jsonformer()\n", 331 | "\n", 332 | "generated_data" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [] 341 | } 342 | ], 343 | "metadata": { 344 | "kernelspec": { 345 | "display_name": "pytorch_hf_env", 346 | "language": "python", 347 | "name": "pytorch_hf_env" 348 | }, 349 | "language_info": { 350 | "codemirror_mode": { 351 | "name": "ipython", 352 | "version": 3 353 | }, 354 | "file_extension": ".py", 355 | "mimetype": "text/x-python", 356 | "name": "python", 357 | "nbconvert_exporter": "python", 358 | "pygments_lexer": "ipython3", 359 | "version": "3.10.16" 360 | }, 361 | "orig_nbformat": 4 362 | }, 363 | "nbformat": 4, 364 | "nbformat_minor": 2 365 | } 366 | -------------------------------------------------------------------------------- /img/cover2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wassname/prob_jsonformer/b639079d045ab174398762e3b1e1fdca1d8d30ef/img/cover2.png -------------------------------------------------------------------------------- /img/cover4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wassname/prob_jsonformer/b639079d045ab174398762e3b1e1fdca1d8d30ef/img/cover4.png -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | Copyright (c) 2018 Rahul Sengottuvelu 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in all 10 | copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | SOFTWARE. -------------------------------------------------------------------------------- /poetry.toml: -------------------------------------------------------------------------------- 1 | [virtualenvs] 2 | in-project = true 3 | -------------------------------------------------------------------------------- /prob_dist.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# autoreload your package\n", 10 | "%load_ext autoreload\n", 11 | "%autoreload 2" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stderr", 21 | "output_type": "stream", 22 | "text": [ 23 | "/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", 24 | " from .autonotebook import tqdm as notebook_tqdm\n" 25 | ] 26 | }, 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | "Loading model and tokenizer...\n" 32 | ] 33 | }, 34 | { 35 | "name": "stderr", 36 | "output_type": "stream", 37 | "text": [ 38 | "/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/.venv/lib/python3.9/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", 39 | " warnings.warn(\n", 40 | "Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00, 1.42it/s]\n", 41 | "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n" 42 | ] 43 | }, 44 | { 45 | "name": "stdout", 46 | "output_type": "stream", 47 | "text": [ 48 | "Loaded model and tokenizer\n" 49 | ] 50 | } 51 | ], 52 | "source": [ 53 | "from transformers import AutoModelForCausalLM, AutoTokenizer\n", 54 | "import torch\n", 55 | "\n", 56 | "from prob_jsonformer import Jsonformer\n", 57 | "\n", 58 | "print(\"Loading model and tokenizer...\")\n", 59 | "model_name = \"databricks/dolly-v2-3b\"\n", 60 | "model_name = \"NousResearch/Meta-Llama-3-8B-Instruct\".lower()\n", 61 | "# model_name = \"failspy/Llama-3-8B-Instruct-abliterated\"\n", 62 | "# model_name = \"cognitivecomputations/Llama-3-8B-Instruct-abliterated-v2\"\n", 63 | "# model_name = \"nvidia/Llama3-ChatQA-1.5-8B\" # 4b\n", 64 | "# model_name = \"CohereForAI/c4ai-command-r-v01-4bit\" # 35b/4 = 8.75b\n", 65 | "model = AutoModelForCausalLM.from_pretrained(\n", 66 | " model_name,\n", 67 | " use_cache=True,\n", 68 | " torch_dtype=torch.float16,\n", 69 | " attn_implementation=\"eager\",\n", 70 | ").to(\"cuda:0\")\n", 71 | "tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, use_cache=True)\n", 72 | "tokenizer.padding_side = \"left\"\n", 73 | "print(\"Loaded model and tokenizer\")" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 3, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "if tokenizer.pad_token_id is None:\n", 83 | " tokenizer.pad_token_id = tokenizer.bos_token_id" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "## Prob dist\n", 91 | "\n", 92 | "- Can LLM's sample from a distribution? http://people.csail.mit.edu/renda/llm-sampling-paper\n", 93 | "- Can they forecast events https://arxiv.org/abs/2402.07862" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 4, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "from prob_jsonformer.prob_choice_tree import prob_choice_tree\n", 103 | "import pandas as pd\n", 104 | "import torch.nn.functional as F\n", 105 | "from tqdm.auto import tqdm" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 171, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "def method0(choices, n=400):\n", 115 | " \"\"\"\n", 116 | " just generate many times\n", 117 | " \"\"\"\n", 118 | "\n", 119 | " toks = tokenizer.encode(prompt, return_tensors=\"pt\").to(model.device)\n", 120 | " data = []\n", 121 | " i = 0\n", 122 | " while i\n", 240 | "\n", 253 | "\n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | "
0
40.073333
50.100000
70.100000
80.080000
100.046667
110.046667
120.125556
130.034444
140.367778
150.025556
\n", 303 | "" 304 | ], 305 | "text/plain": [ 306 | " 0\n", 307 | "4 0.073333\n", 308 | "5 0.100000\n", 309 | "7 0.100000\n", 310 | "8 0.080000\n", 311 | "10 0.046667\n", 312 | "11 0.046667\n", 313 | "12 0.125556\n", 314 | "13 0.034444\n", 315 | "14 0.367778\n", 316 | "15 0.025556" 317 | ] 318 | }, 319 | "execution_count": 139, 320 | "metadata": {}, 321 | "output_type": "execute_result" 322 | } 323 | ], 324 | "source": [ 325 | "t0 = pd.Timestamp.now()\n", 326 | "r0 = method0(choices, n=900)\n", 327 | "t0 = pd.Timestamp.now() - t0\n", 328 | "r0" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 163, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "t1 = pd.Timestamp.now()\n", 338 | "r1 = method1(choices)\n", 339 | "t1 = pd.Timestamp.now() - t1\n" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 164, 345 | "metadata": {}, 346 | "outputs": [], 347 | "source": [ 348 | "t3 = pd.Timestamp.now()\n", 349 | "r3 = method3(choices)\n", 350 | "t3 = pd.Timestamp.now() - t3" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 172, 356 | "metadata": {}, 357 | "outputs": [ 358 | { 359 | "data": { 360 | "text/html": [ 361 | "
\n", 362 | "\n", 375 | "\n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | "
method0: samplingmethod1: hindsightmethod3: generation treeideal
00.0000000.0006420.03650.047619
10.0000000.0028120.06670.047619
20.0000000.0010100.07100.047619
30.0000000.0090760.06530.047619
40.0733330.0288440.07860.047619
50.1000000.0439820.09990.047619
60.0000000.0138400.06620.047619
70.1000000.0639930.08580.047619
80.0800000.0591840.06820.047619
90.0000000.0122130.07450.047619
100.0466670.0376200.03270.047619
110.0466670.0453780.01870.047619
120.1255560.1195550.03070.047619
130.0344440.0316790.02790.047619
140.3677780.4238600.02740.047619
150.0255560.0426290.03120.047619
160.0000000.0102850.03440.047619
170.0000000.0347930.02350.047619
180.0000000.0174950.02250.047619
190.0000000.0011010.01090.047619
200.0000000.0000100.02770.047619
\n", 535 | "
" 536 | ], 537 | "text/plain": [ 538 | " method0: sampling method1: hindsight method3: generation tree ideal\n", 539 | "0 0.000000 0.000642 0.0365 0.047619\n", 540 | "1 0.000000 0.002812 0.0667 0.047619\n", 541 | "2 0.000000 0.001010 0.0710 0.047619\n", 542 | "3 0.000000 0.009076 0.0653 0.047619\n", 543 | "4 0.073333 0.028844 0.0786 0.047619\n", 544 | "5 0.100000 0.043982 0.0999 0.047619\n", 545 | "6 0.000000 0.013840 0.0662 0.047619\n", 546 | "7 0.100000 0.063993 0.0858 0.047619\n", 547 | "8 0.080000 0.059184 0.0682 0.047619\n", 548 | "9 0.000000 0.012213 0.0745 0.047619\n", 549 | "10 0.046667 0.037620 0.0327 0.047619\n", 550 | "11 0.046667 0.045378 0.0187 0.047619\n", 551 | "12 0.125556 0.119555 0.0307 0.047619\n", 552 | "13 0.034444 0.031679 0.0279 0.047619\n", 553 | "14 0.367778 0.423860 0.0274 0.047619\n", 554 | "15 0.025556 0.042629 0.0312 0.047619\n", 555 | "16 0.000000 0.010285 0.0344 0.047619\n", 556 | "17 0.000000 0.034793 0.0235 0.047619\n", 557 | "18 0.000000 0.017495 0.0225 0.047619\n", 558 | "19 0.000000 0.001101 0.0109 0.047619\n", 559 | "20 0.000000 0.000010 0.0277 0.047619" 560 | ] 561 | }, 562 | "execution_count": 172, 563 | "metadata": {}, 564 | "output_type": "execute_result" 565 | }, 566 | { 567 | "data": { 568 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAGhCAYAAABCse9yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABWQElEQVR4nO3de1wU5f4H8M+yXJaLXATlYuiiqEiiKAiBppabYGai5S2PIHlLxeTgvRQ0L6gRoemB8uT1VFrnqF1MTBEsDUExtfKGJkEpePsJggoKz+8PX4yugLILyICf9+s1L9l5nvnOM7PCfnZmdlYhhBAgIiIikjGD+h4AERER0eMwsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsGdb3AGpDWVkZLly4gCZNmkChUNT3cIiIiKgahBC4ceMGnJycYGDw6GMojSKwXLhwAc7OzvU9DCIiItJDTk4OnnnmmUf2aRSBpUmTJgDubbClpWU9j4aIiIiqo6CgAM7OztLr+KM0isBSfhrI0tKSgYWIiKiBqc7lHLzoloiIiGSPgYWIiIhkj4GFiIiIZK9RXMNSXaWlpbhz5059D4PoqWZkZASlUlnfwyCiBuapCCxCCOTm5uL69ev1PRQiAmBtbQ0HBwfeN4mIqu2pCCzlYaV58+YwMzPjH0mieiKEwM2bN3Hp0iUAgKOjYz2PiIgaikYfWEpLS6WwYmtrW9/DIXrqmZqaAgAuXbqE5s2b8/QQEVVLo7/otvyaFTMzs3oeCRGVK/995DVlRFRdjT6wlONpICL54O8jEenqqQksRERE1HAxsBAAICUlBQqFok4+SaVQKLB9+/ZarytHD+/H9evXw9raul7HRETUGDT6i24fRT17xxNdX9bS/k90fVXp3bs3PD09ERcXVy/rv3btGqZMmYJvv/0WBgYGeO2117BixQpYWFjUy3jq0rBhw/Dyyy/X9zCIiBo8HmGhJ27kyJH4/fffsXv3bnz33Xf48ccfMX78+PoeVp0wNTVF8+bN63sYREQNHgOLjPXu3RtTpkxBeHg4bGxsYG9vjzVr1qCoqAihoaFo0qQJXF1dsXPnTq3lfvvtN/Tr1w8WFhawt7fHqFGjcOXKFQDA6NGjsW/fPqxYsQIKhQIKhQJZWVnSshkZGfD29oaZmRn8/f1x+vRprdrx8fFo06YNjI2N0b59e2zatEmrPTMzEz179oRKpYK7uzt2796t1X7y5EkkJibi3//+N3x9fdGjRw989NFH2Lx5My5cuFDtffPnn39iwIABsLGxgbm5OZ599ll8//33AO59lH3MmDFwcXGBqakp2rdvjxUrVmgtP3r0aAQFBWHJkiWwt7eHtbU13nvvPdy9exczZsxA06ZN8cwzz2DdunXSMllZWVAoFNi8eTP8/f2hUqnQsWNH7Nu3r8pxPnxKaP78+fD09MSmTZugVqthZWWF4cOH48aNG1KfGzduYOTIkTA3N4ejoyM+/PBD9O7dG+Hh4dXeP0REjQ0Di8xt2LABdnZ2SE9Px5QpUzBx4kQMGTIE/v7+OHLkCPr27YtRo0bh5s2bAIDr16/jxRdfRJcuXXD48GEkJiYiLy8PQ4cOBQCsWLECfn5+GDduHC5evIiLFy/C2dlZWt+7776LDz74AIcPH4ahoSHefPNNqW3btm2YOnUqpk2bht9++w0TJkxAaGgokpOTAQBlZWUYPHgwjI2NkZaWhoSEBMyaNUtre1JTU2FtbQ1vb29pnkajgYGBAdLS0qR5CoUC69evr3K/TJ48GcXFxfjxxx/x66+/YtmyZdIppbKyMjzzzDP46quvcOLECURGRuKdd97Bl19+qVVj7969uHDhAn788UfExsYiKioKr7zyCmxsbJCWloa33noLEyZMwF9//aW13IwZMzBt2jT88ssv8PPzw4ABA3D16tXHPpflzp07h+3bt+O7777Dd999h3379mHp0qVSe0REBA4cOIBvvvkGu3fvxk8//YQjR45Uuz4RUWPEwCJznTt3xty5c9G2bVvMmTMHKpUKdnZ2GDduHNq2bYvIyEhcvXoVx48fBwCsWrUKXbp0wZIlS+Dm5oYuXbpg7dq1SE5OxpkzZ2BlZQVjY2OYmZnBwcEBDg4OWjfuWrx4MXr16gV3d3fMnj0bP//8M27fvg0AiImJwejRozFp0iS0a9cOERERGDx4MGJiYgAAe/bswalTp7Bx40Z07twZPXv2xJIlS7S2Jzc3t8IpEkNDQzRt2hS5ubnSvPbt28PKyqrK/ZKdnY3u3bvDw8MDrVu3xiuvvIKePXsCuPddNQsWLIC3tzdcXFwwcuRIhIaGVggsTZs2xcqVK9G+fXu8+eabaN++PW7evIl33nlH2t/GxsbYv3+/1nJhYWF47bXX0KFDB8THx8PKygqffvpptZ5P4F6gWr9+PTp27Ijnn38eo0aNQlJSEoB7R1c2bNiAmJgY9OnTBx07dsS6detQWlpa7fpENTbf6v5EJBMMLDLXqVMn6WelUglbW1t4eHhI8+zt7QFAutX5sWPHkJycDAsLC2lyc3MDcO+dvS7rK79tenntkydPonv37lr9u3fvjpMnT0rtzs7OcHJyktr9/Pyqv7EPOHXqFAYNGlRl+9tvv41Fixahe/fuiIqKkgJbudWrV8PLywvNmjWDhYUFPvnkE2RnZ2v1efbZZ2FgcP9XwN7eXmvflu/v8u2vbJsMDQ3h7e0t7YPqUKvVaNKkifTY0dFRWscff/yBO3fuwMfHR2q3srJC+/btq12fiKgxYmCROSMjI63HCoVCa175DbjKysoAAIWFhRgwYACOHj2qNZVfW6LL+h6uXRscHBwqBIC7d+/i2rVrcHBwqHadsWPH4o8//sCoUaPw66+/wtvbGx999BEAYPPmzZg+fTrGjBmDH374AUePHkVoaChKSkq0ajxu35bPq83tr2q9tb0OIqLGhoGlkenatSt+//13qNVquLq6ak3m5uYAAGNjY71OMXTo0AEHDhzQmnfgwAG4u7tL7Tk5Obh48aLUfvDgQa3+fn5+uH79OjIyMqR5e/fuRVlZGXx9fXUaj7OzM9566y1s3boV06ZNw5o1a6Qx+fv7Y9KkSejSpQtcXV2rdXSpuh7cprt37yIjIwMdOnSoldqtW7eGkZERDh06JM3Lz8/HmTNnaqU+EVFDxcDSyEyePBnXrl3DiBEjcOjQIZw7dw67du1CaGioFFLUajXS0tKQlZWFK1euVPvd/YwZM7B+/XrEx8cjMzMTsbGx2Lp1K6ZPnw7g3sWz7dq1Q0hICI4dO4affvoJ7777rlaNDh06IDAwEOPGjUN6ejoOHDiAsLAwDB8+XOtUkpubG7Zt21blWMLDw7Fr1y6cP38eR44cQXJyshQa2rZti8OHD2PXrl04c+YM5s2bpxUAamr16tXYtm0bTp06hcmTJ+P//u//tC5OrokmTZogJCQEM2bMQHJyMn7//XeMGTMGBgYGvJ09ET3Vnuobx8nlRm61ycnJCQcOHMCsWbPQt29fFBcXo1WrVggMDJSu15g+fTpCQkLg7u6OW7du4fz589WqHRQUhBUrViAmJgZTp06Fi4sL1q1bh969ewMADAwMsG3bNowZMwY+Pj5Qq9VYuXIlAgMDtep89tlnCAsLQ58+faQbx61cuVKrz+nTp5Gfn1/lWEpLSzF58mT89ddfsLS0RGBgID788EMAwIQJE/DLL79g2LBhUCgUGDFiBCZNmlTh49/6Wrp0KZYuXYqjR4/C1dUV33zzDezs7GqlNgDExsbirbfewiuvvAJLS0vMnDkTOTk5UKlUtbYOIqKGRiGEEPU9iJoqKCiAlZUV8vPzYWlpqdV2+/ZtnD9/Hi4uLvyDTzWSlZUFFxcX/PLLL/D09Hxi6y0qKkKLFi3wwQcfYMyYMU9svXWJv5cy9+Cng+ZX/caBqKYe9fr9sKf6CAuRHP3yyy84deoUfHx8kJ+fj/feew8AMHDgwHoeGRFR/WFgIZKhmJgYnD59GsbGxvDy8sJPP/1Uq6ediIgaGgYWompSq9V4EmdQu3TpovUpKiIi4qeEiIiIqAFgYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZ0yuwrF69Gmq1GiqVCr6+vkhPT6/Wcps3b4ZCoUBQUJDWfCEEIiMj4ejoCFNTU2g0GmRmZuozNNJTSkoKFAoFrl+/Xuu1FQoFtm/fXqs1qzPe+fPn18oN3tRqNeLi4mq9L3DvZnQKhQJHjx7Va2xERE8LnQPLli1bEBERgaioKBw5cgSdO3dGQEBAhW/gfVhWVhamT5+O559/vkLb8uXLsXLlSiQkJCAtLQ3m5uYICAjA7du3dR0eVUPv3r0RHh5eb+tfvHgx/P39YWZmBmtr6zpbz/Tp05GUlFRn9Stz6NAhjB8/vlZrrl+/vk73ExFRQ6DzfVhiY2Mxbtw4hIaGAgASEhKwY8cOrF27FrNnz650mdLSUowcORILFizATz/9pPWuWAiBuLg4zJ07V7qT58aNG2Fvb4/t27dj+PDhemxWNT14++kngbe4BgCUlJRgyJAh8PPzw6efflpn67GwsICFhUWd1a9Ms2bNnuj6iIieFjodYSkpKUFGRgY0Gs39AgYG0Gg0SE1NrXK59957D82bN6/0e1DOnz+P3NxcrZpWVlbw9fWtsmZxcTEKCgq0psaod+/emDJlCsLDw2FjYwN7e3usWbMGRUVFCA0NRZMmTeDq6lrhS/1+++039OvXDxYWFrC3t8eoUaNw5coVAMDo0aOxb98+rFixAgqFAgqFAllZWdKyGRkZ8Pb2hpmZGfz9/XH69Gmt2vHx8WjTpg2MjY3Rvn17bNq0Sas9MzMTPXv2hEqlgru7O3bv3l1huxYsWIB//vOf8PDwqPE+etR4Hz4lNHr0aAQFBSEmJgaOjo6wtbXF5MmTcefOHanPpUuXMGDAAJiamsLFxQWfffaZ1vqEEJg/fz5atmwJExMTODk54e2335baHz4ldOrUKfTo0UPaH3v27Kn0FNkff/yBF154AWZmZujcubP0fz8lJQWhoaHIz8+Xnq/58+fXeL8RETU0OgWWK1euoLS0FPb29lrz7e3tkZubW+ky+/fvx6effoo1a9ZU2l6+nC41o6OjYWVlJU3Ozs66bEaDsmHDBtjZ2SE9PR1TpkzBxIkTMWTIEPj7++PIkSPo27cvRo0ahZs3bwIArl+/jhdffBFdunTB4cOHkZiYiLy8PAwdOhQAsGLFCvj5+WHcuHG4ePEiLl68qLX/3n33XXzwwQc4fPgwDA0N8eabb0pt27Ztw9SpUzFt2jT89ttvmDBhAkJDQ5GcnAwAKCsrw+DBg2FsbIy0tDQkJCRg1qxZem23Wq2u1gvzo8ZbmeTkZJw7dw7JycnYsGED1q9fj/Xr10vto0ePRk5ODpKTk/Hf//4X//rXv7ROd/7vf//Dhx9+iI8//hiZmZnYvn17lcGrtLQUQUFBMDMzQ1paGj755BO8++67VW7H9OnTcfToUbRr1w4jRozA3bt34e/vj7i4OFhaWkrP1/Tp0x+7X4iIGps6vTX/jRs3MGrUKKxZs6ZWvwdlzpw5iIiIkB4XFBQ02tDSuXNnzJ07F8C97V66dCns7Owwbtw4AEBkZCTi4+Nx/PhxPPfcc1i1ahW6dOmCJUuWSDXWrl0LZ2dnnDlzBu3atYOxsTHMzMzg4OBQYX2LFy9Gr169AACzZ89G//79cfv2bahUKsTExGD06NGYNGkSACAiIgIHDx5ETEwMXnjhBezZswenTp3Crl274OTkBABYsmQJ+vXrp/N2t2nTplr/Zx413srY2Nhg1apVUCqVcHNzQ//+/ZGUlIRx48bhzJkz2LlzJ9LT09GtWzcAwKeffooOHTpIy2dnZ8PBwQEajQZGRkZo2bIlfHx8Kl3X7t27ce7cOaSkpEj7evHixXjppZcq9J0+fTr69+8P4N4RqGeffRZnz56Fm5sbrKysoFAoKn2+iIieFjodYbGzs4NSqUReXp7W/Ly8vEr/mJ47dw5ZWVkYMGAADA0NYWhoiI0bN+Kbb76BoaEhzp07Jy1X3ZoAYGJiAktLS62pserUqZP0s1KphK2trdY7+vIjU+VHAY4dO4bk5GTp+g0LCwu4ubkBuPd86LI+R0dHrdonT55E9+7dtfp3794dJ0+elNqdnZ2lsAIAfn5+1d/YByQlJSEsLKxG463Ms88+C6VSqbXMg9tnaGgILy8vqd3NzU3rgtchQ4bg1q1baN26NcaNG4dt27bh7t27la7r9OnTcHZ21vp/XFW40XU7iIieNjoFlvJvjn3wkxdlZWVISkqq9IXJzc0Nv/76K44ePSpNr776Kl544QUcPXoUzs7OcHFxgYODg1bNgoICpKWl6f1i15gYGRlpPVYoFFrzFAoFgHvPAwAUFhZiwIABWvv86NGj0rUluqzv4dpypOt4K9ufumyfs7MzTp8+jX/9618wNTXFpEmT0LNnT63rYPTR0PY7EdGTpvMpoYiICISEhMDb2xs+Pj6Ii4uTLgIFgODgYLRo0QLR0dFQqVTo2LGj1vLl71YfnB8eHo5Fixahbdu2cHFxwbx58+Dk5FThfi30eF27dsX//vc/qNVqGBpW/vQaGxujtLRU59odOnTAgQMHEBISIs07cOAA3N3dpfacnBxcvHhROkpw8OBBPbaifri5ueHu3bvIyMiQTgmdPn26wr1eTE1NMWDAAAwYMACTJ0+WgnnXrl21+rVv3x45OTnIy8uTjoQdOnRI53Hp+3wRETUmOgeWYcOG4fLly4iMjERubi48PT2RmJgo/UHOzs6GgYFut3eZOXMmioqKMH78eFy/fh09evRAYmJildchUNUmT56MNWvWYMSIEZg5cyaaNm2Ks2fPYvPmzfj3v/8NpVIJtVqNtLQ0ZGVlwcLCAk2bNq1W7RkzZmDo0KHo0qULNBoNvv32W2zduhV79uwBAGg0GrRr1w4hISF4//33UVBQUOlFptnZ2bh27Rqys7NRWloq3TTN1dVV+hhynz59MGjQoGqdFqot7du3R2BgICZMmID4+HgYGhoiPDwcpqamUp/169ejtLQUvr6+MDMzw3/+8x+YmpqiVatWFeq99NJLaNOmDUJCQrB8+XLcuHFDuh6p/ChKdajVahQWFiIpKQmdO3eGmZkZzMzMar7BREQNiF53ug0LC8Off/6J4uJipKWlwdfXV2pLSUnR+tTFw9avX1/hI50KhQLvvfcecnNzcfv2bezZswft2rXTZ2hPPScnJxw4cAClpaXo27cvPDw8EB4eDmtraylITp8+HUqlEu7u7mjWrBmys7OrVTsoKAgrVqxATEwMnn32WXz88cdYt24devfuDeDeR9y3bduGW7duwcfHB2PHjsXixYsr1ImMjESXLl0QFRWFwsJCdOnSRfpUU7lz585JH8V+ktatWwcnJyf06tULgwcPxvjx49G8eXOp3draGmvWrEH37t3RqVMn7NmzB99++y1sbW0r1FIqldi+fTsKCwvRrVs3jB07VgpwuoRxf39/vPXWWxg2bBiaNWuG5cuX13xDiYgaGIUQQtT3IGqqoKAAVlZWyM/Pr3AB7u3bt3H+/Hm4uLjwiA3VuwMHDqBHjx44e/Ys2rRpU9/DqTf8vZS5B2+qyRteUh161Ov3w+r0Y81ET7tt27bBwsICbdu2xdmzZzF16lR07979qQ4rRET6YGAhqkM3btzArFmzkJ2dDTs7O2g0GnzwwQf1PSwiogaHgYWoDgUHByM4OLi+h0FE1ODpddEtERER0ZPEwEJERESyx8BCREREssfAQkRERLLHwEJERESyx8BCREREssfAQgDufaWCQqGo8EV/tUGhUFT4OgaqmlqtRlxcXH0Pg4hIVhhYnkK9e/dGeHh4va3/1VdfRcuWLaFSqeDo6IhRo0bhwoUL9Tae+rJ+/Xrp28sfdOjQIYwfP75e1k1EJFdP9Y3jPDZ4PNH1/Rry6xNdn1y98MILeOedd+Do6Ii///4b06dPx+uvv46ff/65vodWK0pKSmBsbKz38s2aNavF0dRMTbeFiKi28AiLjPXu3RtTpkxBeHg4bGxsYG9vjzVr1qCoqAihoaFo0qQJXF1dsXPnTq3lfvvtN/Tr1w8WFhawt7fHqFGjpG8+Hj16NPbt24cVK1ZAoVBAoVAgKytLWjYjIwPe3t4wMzODv78/Tp8+rVU7Pj4ebdq0gbGxMdq3b49NmzZptWdmZqJnz55QqVRwd3fH7t27K2zXP//5Tzz33HNo1aoV/P39MXv2bBw8eBB37tzRaf+sWbMGzs7OMDMzw6BBgxAbG1vhqMHXX3+Nrl27QqVSoXXr1liwYAHu3r0rtSsUCvz73//GoEGDYGZmhrZt2+Kbb76p9v4E7j1PYWFhCA8Ph52dHQICAgAAsbGx8PDwgLm5OZydnTFp0iQUFhYCuHcKLjQ0FPn5+dLzMH/+fAAVTwllZ2dj4MCBsLCwgKWlJYYOHYq8vDypff78+fD09MSmTZugVqthZWWF4cOH48aNG5Xut8ete+HChQgODoalpaV0pGf//v14/vnnYWpqCmdnZ7z99tsoKiqSahYXF2P69Olo0aIFzM3N4evri5SUlMc/iURE1cTAInMbNmyAnZ0d0tPTMWXKFEycOBFDhgyBv78/jhw5gr59+2LUqFG4efMmAOD69et48cUX0aVLFxw+fBiJiYnIy8vD0KFDAQArVqyAn58fxo0bh4sXL+LixYtwdnaW1vfuu+/igw8+wOHDh2FoaIg333xTatu2bRumTp2KadOm4bfffsOECRMQGhqK5ORkAEBZWRkGDx4MY2NjpKWlISEhAbNmzXrk9l27dg2fffYZ/P39YWRkJM1XKBRYv359lcsdOHAAb731FqZOnYqjR4/ipZdewuLFi7X6/PTTTwgODsbUqVNx4sQJfPzxx1i/fn2FfgsWLMDQoUNx/PhxvPzyyxg5ciSuXbtWrf354PNkbGyMAwcOICEhAQBgYGCAlStX4vfff8eGDRuwd+9ezJw5EwDg7++PuLg4WFpaSs/D9OnTK2xnWVkZBg4ciGvXrmHfvn3YvXs3/vjjDwwbNkyr37lz57B9+3Z89913+O6777Bv3z4sXbq00n33uHXHxMSgc+fO+OWXXzBv3jycO3cOgYGBeO2113D8+HFs2bIF+/fvR1hYmLRMWFgYUlNTsXnzZhw/fhxDhgxBYGAgMjMzq3wOiYh0IhqB/Px8AUDk5+dXaLt165Y4ceKEuHXrVoW2jus7PtFJV7169RI9evSQHt+9e1eYm5uLUaNGSfMuXrwoAIjU1FQhhBALFy4Uffv21aqTk5MjAIjTp09LdadOnarVJzk5WQAQe/bskebt2LFDAJD2nb+/vxg3bpzWckOGDBEvv/yyEEKIXbt2CUNDQ/H3339L7Tt37hQAxLZt27SWmzlzpjAzMxMAxHPPPSeuXLmi1d6+fXuxdevWKvfNsGHDRP/+/bXmjRw5UlhZWUmP+/TpI5YsWaLVZ9OmTcLR0VF6DEDMnTtXelxYWCgAiJ07dwohqr8/u3TpUuVYy3311VfC1tZWerxu3Tqt8ZZr1aqV+PDDD4UQQvzwww9CqVSK7Oxsqf33338XAER6eroQQoioqChhZmYmCgoKpD4zZswQvr6+VY7lUesOCgrSmjdmzBgxfvx4rXk//fSTMDAwELdu3RJ//vmnUCqVWs+7EPf2/5w5cypd/6N+L0kGoizvT0R16FGv3w/jERaZ69Spk/SzUqmEra0tPDzuX3tjb28PALh06RIA4NixY0hOToaFhYU0ubm5Abj3LlyX9Tk6OmrVPnnyJLp3767Vv3v37jh58qTU7uzsDCcnJ6ndz8+v0vXMmDEDv/zyC3744QcolUoEBwdDCCG1nzp1CoMGDapynKdPn4aPj4/WvIcfHzt2DO+9957Wvig/slR+ROrhbTY3N4elpaXO+9PLy6vCGPfs2YM+ffqgRYsWaNKkCUaNGoWrV69qrftxyvfpg0fB3N3dYW1tLe134N6pnCZNmkiPHR0dpW3Qlbe3t9bjY8eOYf369Vr7ICAgAGVlZTh//jx+/fVXlJaWol27dlp99u3bV63/c0RE1fFUX3TbEDx4mgS4d6rk4VMnwL1TBwBQWFiIAQMGYNmyZRVqlQeQ6q7v4dq1yc7ODnZ2dmjXrh06dOgAZ2dnHDx4sMqAo4/CwkIsWLAAgwcPrtCmUqmknyvbx7ruT3Nzc622rKwsvPLKK5g4cSIWL16Mpk2bYv/+/RgzZgxKSkpgZmZWo2172KO2QVcPb0thYSEmTJiAt99+u0Lfli1b4vjx41AqlcjIyIBSqdRqt7Cw0GsMREQPY2BpZLp27Yr//e9/UKvVMDSs/Ok1NjZGaWmpzrU7dOiAAwcOICQkRJp34MABuLu7S+05OTm4ePGi9GJ+8ODBx9Ytf2EtLi6u9ljat2+PQ4cOac17+HHXrl1x+vRpuLq6Vrvuw6qzPyuTkZGBsrIyfPDBBzAwuHcg88svv9TqU53noXyf5uTkSEdZTpw4gevXr0v7XR+6/B/o2rUrTpw4UeV+7NKlC0pLS3Hp0iU8//zzeo+JiOhReEqokZk8eTKuXbuGESNG4NChQzh37hx27dqF0NBQ6QVKrVYjLS0NWVlZuHLlSrXfic+YMQPr169HfHw8MjMzERsbi61bt0oXbGo0GrRr1w4hISE4duwYfvrpJ7z77rtaNdLS0rBq1SocPXoUf/75J/bu3YsRI0agTZs2WkdX3NzcsG3btirHMmXKFHz//feIjY1FZmYmPv74Y+zcuVM6KgQAkZGR2LhxIxYsWIDff/8dJ0+exObNmzF37txa3Z+VcXV1xZ07d/DRRx/hjz/+wKZNm6SLccup1WoUFhYiKSkJV65cqfRUkUajgYeHB0aOHIkjR44gPT0dwcHB6NWrV4VTN7qozrrLzZo1Cz///DPCwsJw9OhRZGZm4uuvv5Yuum3Xrh1GjhyJ4OBgbN26FefPn0d6ejqio6OxY8cOvcdIRPQgBpZGxsnJCQcOHEBpaSn69u0LDw8PhIeHw9raWnqnP336dCiVSri7u6NZs2bIzs6uVu2goCCsWLECMTExePbZZ/Hxxx9j3bp16N27N4B7n4rZtm0bbt26BR8fH4wdO7bCJ3LMzMywdetW9OnTB+3bt8eYMWPQqVMn7Nu3DyYmJlK/06dPIz8/v8qxdO/eHQkJCYiNjUXnzp2RmJiIf/7zn1qnegICAvDdd9/hhx9+QLdu3fDcc8/hww8/RKtWraq7O6u1PyvTuXNnxMbGYtmyZejYsSM+++wzREdHa/Xx9/fHW2+9hWHDhqFZs2ZYvnx5hToKhQJff/01bGxs0LNnT2g0GrRu3Rpbtmyp9jZUpjrrLlf+/Jw5cwbPP/88unTpgsjISK1rldatW4fg4GBMmzYN7du3R1BQEA4dOoSWLVvWaJz05Khn75AmIjlSiAevdGygCgoKYGVlhfz8fFhaWmq13b59G+fPn4eLi4vWixk1PuPGjcOpU6fw008/1fdQ6DH4eyk/DwaVLNUb9xvmV/3GgaimHvX6/TBew0INVkxMDF566SWYm5tj586d2LBhA/71r3/V97CIiKgOMLBQg5Weno7ly5fjxo0baN26NVauXImxY8fW97CIiKgOMLBQg/Xwp26IiKjx4kW3REREJHsMLERERCR7DCxEREQkewwsREREJHsMLERERCR7egWW1atXQ61WQ6VSwdfXF+np6VX23bp1K7y9vWFtbQ1zc3N4enpi06ZNWn1Gjx4NhUKhNQUGBuozNCIiImqEdA4sW7ZsQUREBKKionDkyBF07twZAQEBVX6VfdOmTfHuu+8iNTUVx48fR2hoKEJDQ7Fr1y6tfoGBgbh48aI0ffHFF/ptUSPSu3dvhIeHV9muVqsRFxdXq+usi5pEREQ1pfN9WGJjYzFu3DiEhoYCABISErBjxw6sXbsWs2fPrtC//Htmyk2dOhUbNmzA/v37ERAQIM03MTGBg4ODrsNp1LZu3QojI6P6HgYREVG90ymwlJSUICMjA3PmzJHmGRgYQKPRIDU19bHLCyGwd+9enD59GsuWLdNqS0lJQfPmzWFjY4MXX3wRixYtgq2trS7D09lJtw51Wv9hHU6d1Kl/06ZN62gkREREDYtOp4SuXLmC0tJS2Nvba823t7dHbm5ulcvl5+fDwsICxsbG6N+/Pz766CO89NJLUntgYCA2btyIpKQkLFu2DPv27UO/fv1QWlpaab3i4mIUFBRoTY3Rg6eELl26hAEDBsDU1BQuLi747LPPKvS/fv06xo4di2bNmsHS0hIvvvgijh07JrWfO3cOAwcOhL29PSwsLNCtWzfs2bPnSW0OERGR3p7IrfmbNGmCo0ePorCwEElJSYiIiEDr1q2l00XDhw+X+np4eKBTp05o06YNUlJS0KdPnwr1oqOjsWDBgicxdNkYPXo0Lly4gOTkZBgZGeHtt9+ucN3QkCFDYGpqip07d8LKygoff/wx+vTpgzNnzqBp06YoLCzEyy+/jMWLF8PExAQbN27EgAEDcPr0abRs2bKetoyIiOjxdDrCYmdnB6VSiby8PK35eXl5j7z+xMDAAK6urvD09MS0adPw+uuvIzo6usr+rVu3hp2dHc6ePVtp+5w5c5Cfny9NOTk5umxGg3PmzBns3LkTa9aswXPPPQcvLy98+umnuHXrltRn//79SE9Px1dffQVvb2+0bdsWMTExsLa2xn//+18AQOfOnTFhwgR07NgRbdu2xcKFC9GmTRt888039bVpRERE1aJTYDE2NoaXlxeSkpKkeWVlZUhKSoKfn1+165SVlaG4uLjK9r/++gtXr16Fo6Njpe0mJiawtLTUmhqzkydPwtDQEF5eXtI8Nzc3WFtbS4+PHTuGwsJC2NrawsLCQprOnz+Pc+fOAQAKCwsxffp0dOjQAdbW1rCwsMDJkyeRnZ39pDeJiIhIJzqfEoqIiEBISAi8vb3h4+ODuLg4FBUVSZ8aCg4ORosWLaQjKNHR0fD29kabNm1QXFyM77//Hps2bUJ8fDyAey+iCxYswGuvvQYHBwecO3cOM2fOhKurq9aniOjRCgsL4ejoiJSUlApt5cFm+vTp2L17N2JiYuDq6gpTU1O8/vrrKCkpebKDJSIi0pHOgWXYsGG4fPkyIiMjkZubC09PTyQmJkoX4mZnZ8PA4P6Bm6KiIkyaNAl//fUXTE1N4ebmhv/85z8YNmwYAECpVOL48ePYsGEDrl+/DicnJ/Tt2xcLFy6EiYlJLW1mw+bm5oa7d+8iIyMD3bp1AwCcPn0a169fl/p07doVubm5MDQ0hFqtrrTOgQMHMHr0aAwaNAjAvZCTlZVVx6MnIiKqOb0uug0LC0NYWFilbQ+/w1+0aBEWLVpUZS1TU9MKN5Ejbe3bt0dgYCAmTJiA+Ph4GBoaIjw8HKamplIfjUYDPz8/BAUFYfny5WjXrh0uXLiAHTt2YNCgQdJ1LVu3bsWAAQOgUCgwb948lJWV1eOWERERVQ+/S6iBWLduHZycnNCrVy8MHjwY48ePR/PmzaV2hUKB77//Hj179kRoaCjatWuH4cOH488//5SOfsXGxsLGxgb+/v4YMGAAAgIC0LVr1/raJCIiompTCCFEfQ+ipgoKCmBlZYX8/PwKF+Devn0b58+fh4uLC1QqVT2NkIgexN9L+VHP3iH9nKV6437D/Px6GA09LR71+v0wHmEhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItl7agJLI/gwFFGjwd9HItJVow8sRkZGAICbN2/W80iIqFz572P57ycR0ePodafbhkSpVMLa2hqXLl0CAJiZmUGhUNTzqIieTkII3Lx5E5cuXYK1tTWUSmV9D4mIGohGH1gAwMHBAQCk0EJE9cva2lr6vSQiqo6nIrAoFAo4OjqiefPmuHPnTn0Ph+ipZmRkxCMrRKSzpyKwlFMqlfxDSURE1AA1+otuiYiIqOFjYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2dMrsKxevRpqtRoqlQq+vr5IT0+vsu/WrVvh7e0Na2trmJubw9PTE5s2bdLqI4RAZGQkHB0dYWpqCo1Gg8zMTH2GRkRERI2QzoFly5YtiIiIQFRUFI4cOYLOnTsjICAAly5dqrR/06ZN8e677yI1NRXHjx9HaGgoQkNDsWvXLqnP8uXLsXLlSiQkJCAtLQ3m5uYICAjA7du39d8yIiIiajQUQgihywK+vr7o1q0bVq1aBQAoKyuDs7MzpkyZgtmzZ1erRteuXdG/f38sXLgQQgg4OTlh2rRpmD59OgAgPz8f9vb2WL9+PYYPH/7YegUFBbCyskJ+fj4sLS112RwiIgKgnr1D+jlL9cb9hvn59TAaelro8vqt0xGWkpISZGRkQKPR3C9gYACNRoPU1NTHLi+EQFJSEk6fPo2ePXsCAM6fP4/c3FytmlZWVvD19a2yZnFxMQoKCrQmIiIiarx0CixXrlxBaWkp7O3ttebb29sjNze3yuXy8/NhYWEBY2Nj9O/fHx999BFeeuklAJCW06VmdHQ0rKyspMnZ2VmXzSAiIqIG5ol8SqhJkyY4evQoDh06hMWLFyMiIgIpKSl615szZw7y8/OlKScnp/YGS0RERLJjqEtnOzs7KJVK5OXlac3Py8uDg4NDlcsZGBjA1dUVAODp6YmTJ08iOjoavXv3lpbLy8uDo6OjVk1PT89K65mYmMDExESXoRMREVEDptMRFmNjY3h5eSEpKUmaV1ZWhqSkJPj5+VW7TllZGYqLiwEALi4ucHBw0KpZUFCAtLQ0nWoSERFR46XTERYAiIiIQEhICLy9veHj44O4uDgUFRUhNDQUABAcHIwWLVogOjoawL3rTby9vdGmTRsUFxfj+++/x6ZNmxAfHw8AUCgUCA8Px6JFi9C2bVu4uLhg3rx5cHJyQlBQUO1tKRERETVYOgeWYcOG4fLly4iMjERubi48PT2RmJgoXTSbnZ0NA4P7B26KioowadIk/PXXXzA1NYWbmxv+85//YNiwYVKfmTNnoqioCOPHj8f169fRo0cPJCYmQqVS1cImEhERUUOn831Y5Ij3YSEiqhneh4XqQ53dh4WIiIioPjCwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHs6RVYVq9eDbVaDZVKBV9fX6Snp1fZd82aNXj++edhY2MDGxsbaDSaCv1Hjx4NhUKhNQUGBuozNCIiImqEdA4sW7ZsQUREBKKionDkyBF07twZAQEBuHTpUqX9U1JSMGLECCQnJyM1NRXOzs7o27cv/v77b61+gYGBuHjxojR98cUX+m0RERERNTo6B5bY2FiMGzcOoaGhcHd3R0JCAszMzLB27dpK+3/22WeYNGkSPD094ebmhn//+98oKytDUlKSVj8TExM4ODhIk42NjX5bRERERI2OToGlpKQEGRkZ0Gg09wsYGECj0SA1NbVaNW7evIk7d+6gadOmWvNTUlLQvHlztG/fHhMnTsTVq1d1GRoRERE1Yoa6dL5y5QpKS0thb2+vNd/e3h6nTp2qVo1Zs2bByclJK/QEBgZi8ODBcHFxwblz5/DOO++gX79+SE1NhVKprFCjuLgYxcXF0uOCggJdNoOIiIgaGJ0CS00tXboUmzdvRkpKClQqlTR/+PDh0s8eHh7o1KkT2rRpg5SUFPTp06dCnejoaCxYsOCJjJmIiIjqn06nhOzs7KBUKpGXl6c1Py8vDw4ODo9cNiYmBkuXLsUPP/yATp06PbJv69atYWdnh7Nnz1baPmfOHOTn50tTTk6OLptBREREDYxOgcXY2BheXl5aF8yWX0Dr5+dX5XLLly/HwoULkZiYCG9v78eu56+//sLVq1fh6OhYabuJiQksLS21JiIiImq8dP6UUEREBNasWYMNGzbg5MmTmDhxIoqKihAaGgoACA4Oxpw5c6T+y5Ytw7x587B27Vqo1Wrk5uYiNzcXhYWFAIDCwkLMmDEDBw8eRFZWFpKSkjBw4EC4uroiICCgljaTiIiIGjKdr2EZNmwYLl++jMjISOTm5sLT0xOJiYnShbjZ2dkwMLifg+Lj41FSUoLXX39dq05UVBTmz58PpVKJ48ePY8OGDbh+/TqcnJzQt29fLFy4ECYmJjXcPCIiImoMFEIIUd+DqKmCggJYWVkhPz+fp4eIiPSgnr1D+jlL9cb9hvn59TAaelro8vrN7xIiIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2dMrsKxevRpqtRoqlQq+vr5IT0+vsu+aNWvw/PPPw8bGBjY2NtBoNBX6CyEQGRkJR0dHmJqaQqPRIDMzU5+hERERUSOkc2DZsmULIiIiEBUVhSNHjqBz584ICAjApUuXKu2fkpKCESNGIDk5GampqXB2dkbfvn3x999/S32WL1+OlStXIiEhAWlpaTA3N0dAQABu376t/5YRERFRo6EQQghdFvD19UW3bt2watUqAEBZWRmcnZ0xZcoUzJ49+7HLl5aWwsbGBqtWrUJwcDCEEHBycsK0adMwffp0AEB+fj7s7e2xfv16DB8+/LE1CwoKYGVlhfz8fFhaWuqyOUREBEA9e4f0c5bqjfsN8/PrYTT0tNDl9VunIywlJSXIyMiARqO5X8DAABqNBqmpqdWqcfPmTdy5cwdNmzYFAJw/fx65ublaNa2srODr61tlzeLiYhQUFGhNRERE1HjpFFiuXLmC0tJS2Nvba823t7dHbm5utWrMmjULTk5OUkApX06XmtHR0bCyspImZ2dnXTaDiIiIGpgn+imhpUuXYvPmzdi2bRtUKpXedebMmYP8/HxpysnJqcVREhERkdwY6tLZzs4OSqUSeXl5WvPz8vLg4ODwyGVjYmKwdOlS7NmzB506dZLmly+Xl5cHR0dHrZqenp6V1jIxMYGJiYkuQyciIqIGTKcjLMbGxvDy8kJSUpI0r6ysDElJSfDz86tyueXLl2PhwoVITEyEt7e3VpuLiwscHBy0ahYUFCAtLe2RNYmIiOjpodMRFgCIiIhASEgIvL294ePjg7i4OBQVFSE0NBQAEBwcjBYtWiA6OhoAsGzZMkRGRuLzzz+HWq2WrkuxsLCAhYUFFAoFwsPDsWjRIrRt2xYuLi6YN28enJycEBQUVHtbSkRERA2WzoFl2LBhuHz5MiIjI5GbmwtPT08kJiZKF81mZ2fDwOD+gZv4+HiUlJTg9ddf16oTFRWF+fPnAwBmzpyJoqIijB8/HtevX0ePHj2QmJhYo+tciIiIqPHQ+T4scsT7sBAR1Qzvw0L1oc7uw0JERERUHxhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9vQKLKtXr4ZarYZKpYKvry/S09Or7Pv777/jtddeg1qthkKhQFxcXIU+8+fPh0Kh0Jrc3Nz0GRoRERE1QjoHli1btiAiIgJRUVE4cuQIOnfujICAAFy6dKnS/jdv3kTr1q2xdOlSODg4VFn32WefxcWLF6Vp//79ug6NiIiIGimdA0tsbCzGjRuH0NBQuLu7IyEhAWZmZli7dm2l/bt164b3338fw4cPh4mJSZV1DQ0N4eDgIE12dna6Do2IiIgaKZ0CS0lJCTIyMqDRaO4XMDCARqNBampqjQaSmZkJJycntG7dGiNHjkR2dnaVfYuLi1FQUKA1ERERUeOlU2C5cuUKSktLYW9vrzXf3t4eubm5eg/C19cX69evR2JiIuLj43H+/Hk8//zzuHHjRqX9o6OjYWVlJU3Ozs56r5uIiIjkTxafEurXrx+GDBmCTp06ISAgAN9//z2uX7+OL7/8stL+c+bMQX5+vjTl5OQ84RETERHRk2SoS2c7OzsolUrk5eVpzc/Ly3vkBbW6sra2Rrt27XD27NlK201MTB55PQwRERE1LjodYTE2NoaXlxeSkpKkeWVlZUhKSoKfn1+tDaqwsBDnzp2Do6NjrdUkIiKihkunIywAEBERgZCQEHh7e8PHxwdxcXEoKipCaGgoACA4OBgtWrRAdHQ0gHsX6p44cUL6+e+//8bRo0dhYWEBV1dXAMD06dMxYMAAtGrVChcuXEBUVBSUSiVGjBhRW9tJREREDZjOgWXYsGG4fPkyIiMjkZubC09PTyQmJkoX4mZnZ8PA4P6BmwsXLqBLly7S45iYGMTExKBXr15ISUkBAPz1118YMWIErl69imbNmqFHjx44ePAgmjVrVsPNIyIiosZAIYQQ9T2ImiooKICVlRXy8/NhaWlZ38MhImpw1LN3SD9nqd643zA/vx5GQ08LXV6/ZfEpISIiIqJHYWAhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZM6zvARARNRZa33i8tH89joSo8eERFiIiIpI9BhYiIiKSPQYWIiIikj0GFiIiIpI9XnRLRFQX5ls98HN+/Y2DqJHgERYiIiKSPQYWIiIikj0GFiIiIpI9BhYiIiKSPQYWIiIikj0GFiIiIpI9BhYiIiKSPQYWIiIikj0GFiIiIpI9BhYiIiKSPQYWIiIikj29Asvq1auhVquhUqng6+uL9PT0Kvv+/vvveO2116BWq6FQKBAXF1fjmkRERPR00TmwbNmyBREREYiKisKRI0fQuXNnBAQE4NKlS5X2v3nzJlq3bo2lS5fCwcGhVmoSERHR00XnwBIbG4tx48YhNDQU7u7uSEhIgJmZGdauXVtp/27duuH999/H8OHDYWJiUis1iYiI6OmiU2ApKSlBRkYGNBrN/QIGBtBoNEhNTdVrAPrULC4uRkFBgdZEREREjZehLp2vXLmC0tJS2Nvba823t7fHqVOn9BqAPjWjo6OxYMECvdZHjYN69g7p56yl/etxJPpp6OMnInrSGuSnhObMmYP8/HxpysnJqe8hkUx4bPCQJiIiajx0OsJiZ2cHpVKJvLw8rfl5eXlVXlBbFzVNTEyqvB6GiIiIGh+djrAYGxvDy8sLSUlJ0ryysjIkJSXBz89PrwHURU0iIiJqXHQ6wgIAERERCAkJgbe3N3x8fBAXF4eioiKEhoYCAIKDg9GiRQtER0cDuHdR7YkTJ6Sf//77bxw9ehQWFhZwdXWtVk0iIiJ6uukcWIYNG4bLly8jMjISubm58PT0RGJionTRbHZ2NgwM7h+4uXDhArp06SI9jomJQUxMDHr16oWUlJRq1SQiIqKnm86BBQDCwsIQFhZWaVt5CCmnVqshhKhRTSIiInq6NchPCREREdHThYGFiIiIZI+BhYiIiGSPgYWIiIhkj4GFiIiIZE+vTwkRUd148CsFfg35tR5HQkQkLzzCQkRERLLHwEJERESyx8BCREREssfAQkRERLLHi26JGhn17B3Sz1lL+9fjSIiIag8DCz1x/CQMERHpiqeEiIiISPYYWIiIiEj2GFiIiIhI9ngNCxHVCl6bRER1iYGF6gw/rUJERLWFp4SIiIhI9hhYGiiPDR5ah+CJiIgaMwYWIiIikj1ew0L0lOBFsUTUkDGw1BG+OBAREdUeBhaqgGGratw3RET1g4GFqL7Nt7r/s0vL+hsHEZGM8aJbIiIikj0GFiIiIpI9BhYiIiKSPV7DQtSY8foYImokeISFiIiIZI+BhYiIiGSPgYWIiIhkT6/Asnr1aqjVaqhUKvj6+iI9Pf2R/b/66iu4ublBpVLBw8MD33//vVb76NGjoVAotKbAwEB9hkZERESNkM6BZcuWLYiIiEBUVBSOHDmCzp07IyAgAJcuXaq0/88//4wRI0ZgzJgx+OWXXxAUFISgoCD89ttvWv0CAwNx8eJFafriiy/02yIiIiJqdHQOLLGxsRg3bhxCQ0Ph7u6OhIQEmJmZYe3atZX2X7FiBQIDAzFjxgx06NABCxcuRNeuXbFq1SqtfiYmJnBwcJAmGxsb/baIiIiIGh2dAktJSQkyMjKg0WjuFzAwgEajQWpqaqXLpKamavUHgICAgAr9U1JS0Lx5c7Rv3x4TJ07E1atXqxxHcXExCgoKtCYiIiJqvHQKLFeuXEFpaSns7e215tvb2yM3N7fSZXJzcx/bPzAwEBs3bkRSUhKWLVuGffv2oV+/figtLa20ZnR0NKysrKTJ2dlZl80gIiKiBkYWN44bPny49LOHhwc6deqENm3aICUlBX369KnQf86cOYiIiJAeFxQUMLQQERE1YjoFFjs7OyiVSuTl5WnNz8vLg4ODQ6XLODg46NQfAFq3bg07OzucPXu20sBiYmICExMTXYb+SCfdOkg/dzh1stbqPon6DXnsdV2/IY+9rutz7I2zfkMe+4P1G/LY66J+Qx57bdbX6ZSQsbExvLy8kJSUJM0rKytDUlIS/Pz8Kl3Gz89Pqz8A7N69u8r+APDXX3/h6tWrcHR01GV4RNQIqGfvkKaGVJuI6pbOp4QiIiIQEhICb29v+Pj4IC4uDkVFRQgNDQUABAcHo0WLFoiOjgYATJ06Fb169cIHH3yA/v37Y/PmzTh8+DA++eQTAEBhYSEWLFiA1157DQ4ODjh37hxmzpwJV1dXBAQE1OKmEhE94MHvWZqfX3/jIKJq0TmwDBs2DJcvX0ZkZCRyc3Ph6emJxMRE6cLa7OxsGBjcP3Dj7++Pzz//HHPnzsU777yDtm3bYvv27ejYsSMAQKlU4vjx49iwYQOuX78OJycn9O3bFwsXLqzV0z7UiPEL/oiIGj29LroNCwtDWFhYpW0pKSkV5g0ZMgRDhgyptL+pqSl27dqlzzCIiKgh4tEt0gO/S4iIiIhkj4GFiIiIZI+BhYiIiGSPgYWI5Gu+lfb1DkT01JLFnW7rg8cGD+nnL+txHERERPR4PMJCREREsvfUHmEhImpIHrw7b9bS/vU4koajTo6k8yPZ9YaBhYhkj6dwiYiBhYiooeG7fHoKMbAQEdUxHiGqGvcNVRcvuiUiIiLZ4xEWejL4BYVEdYJHKOhpwcBCRERVYiAifdTF/xsGFiIiqnNaH8tW1eNAqMHiNSxEREQkezzCQkREpAeeLnuyGFiIiIhkhmGoIp4SIiIiItljYCEiIiLZ4ykhInrq8fA7kfwxsBAREckBb7D5SDwlRERERLLHwEJERESyx1NCRI/Dw7RERPWOgYWI9McwR0RPCAMLERHRU6YhfjKOgYWIiBoPHvVrtBhY6B7+khMRkYzxU0JEREQkewwsREREJHt6BZbVq1dDrVZDpVLB19cX6enpj+z/1Vdfwc3NDSqVCh4eHvj++++12oUQiIyMhKOjI0xNTaHRaJCZmanP0IiIiKgR0jmwbNmyBREREYiKisKRI0fQuXNnBAQE4NKlS5X2//nnnzFixAiMGTMGv/zyC4KCghAUFITffvtN6rN8+XKsXLkSCQkJSEtLg7m5OQICAnD79m39t4yIiIgaDZ0DS2xsLMaNG4fQ0FC4u7sjISEBZmZmWLt2baX9V6xYgcDAQMyYMQMdOnTAwoUL0bVrV6xatQrAvaMrcXFxmDt3LgYOHIhOnTph48aNuHDhArZv316jjXvi5lvdn4iIiKjW6BRYSkpKkJGRAY1Gc7+AgQE0Gg1SU1MrXSY1NVWrPwAEBARI/c+fP4/c3FytPlZWVvD19a2yJhERET1ddPpY85UrV1BaWgp7e3ut+fb29jh16lSly+Tm5lbaPzc3V2ovn1dVn4cVFxejuLhYepyfnw8AKCgoePQGRD8j/Vja6v7PhaWl0s+PrfEoxeJ+/Vv3a9Za/UrGL+exlxXfvL+sonbr12XtCrhvqvaE9k1D2O91Xb8hj13X+nL7P/nE6tfDa0it1n9Adcdf/lgIgccSOvj7778FAPHzzz9rzZ8xY4bw8fGpdBkjIyPx+eefa81bvXq1aN68uRBCiAMHDggA4sKFC1p9hgwZIoYOHVppzaioKAGAEydOnDhx4tQIppycnMdmEJ2OsNjZ2UGpVCIvL09rfl5eHhwcHCpdxsHB4ZH9y//Ny8uDo6OjVh9PT89Ka86ZMwcRERHS47KyMly7dg22trZQKBSP3Y6CggI4OzsjJycHlpaWj+2vq4Zcn2NvnPU59sZZvyGPva7rc+wNo74QAjdu3ICTk9Nj6+oUWIyNjeHl5YWkpCQEBQUBuBcWkpKSEBYWVukyfn5+SEpKQnh4uDRv9+7d8PPzAwC4uLjAwcEBSUlJUkApKChAWloaJk6cWGlNExMTmJiYaM2ztrbWZVMAAJaWlnXyZDWG+hx746zPsTfO+g157HVdn2OXf30rK6tq1dP51vwREREICQmBt7c3fHx8EBcXh6KiIoSGhgIAgoOD0aJFC0RHRwMApk6dil69euGDDz5A//79sXnzZhw+fBiffPIJAEChUCA8PByLFi1C27Zt4eLignnz5sHJyUkKRURERPR00zmwDBs2DJcvX0ZkZCRyc3Ph6emJxMRE6aLZ7OxsGBjc//CRv78/Pv/8c8ydOxfvvPMO2rZti+3bt6Njx45Sn5kzZ6KoqAjjx4/H9evX0aNHDyQmJkKlUtXCJhIREVFDp9eXH4aFhVV5CiglJaXCvCFDhmDIkCFV1lMoFHjvvffw3nvv6TMcnZmYmCAqKqrCaSXW59gba32OvXHWb8hjr+v6HHvjq68QojqfJSIiIiKqP/zyQyIiIpI9BhYiIiKSPQYWIiIikj0GFnqieMkUERHpQ69PCTU0V65cwdq1a5Gamip9P5GDgwP8/f0xevRoNGvWrJ5H+PQwMTHBsWPH0KFDh/oeCtXAxYsXER8fj/379+PixYswMDBA69atERQUhNGjR0OpVNb3EImokWn0nxI6dOgQAgICYGZmBo1GI90vJi8vD0lJSbh58yZ27doFb2/vOll/Tk4OoqKisHbtWr1r3Lp1CxkZGWjatCnc3d212m7fvo0vv/wSwcHBetU+efIkDh48CD8/P7i5ueHUqVNYsWIFiouL8Y9//AMvvviiXnUf/OqEB61YsQL/+Mc/YGtrCwCIjY3Vq/7DioqK8OWXX+Ls2bNwdHTEiBEjpHXo48iRI7CxsYGLiwsAYNOmTUhISEB2djZatWqFsLAwDB8+XO/6U6ZMwdChQ/H888/rXeNRVq1ahfT0dLz88ssYPnw4Nm3ahOjoaJSVlWHw4MF47733YGio3/uVw4cPQ6PRwNXVFaampkhNTcUbb7yBkpIS7Nq1C+7u7khMTESTJk1qeauISK7S09MrHBTw8/ODj49P7a3ksd821MD5+vqK8ePHi7KysgptZWVlYvz48eK5556rs/UfPXpUGBgY6L386dOnRatWrYRCoRAGBgaiZ8+eWl8UmZubq3f9nTt3CmNjY9G0aVOhUqnEzp07RbNmzYRGoxEvvviiUCqVIikpSa/aCoVCeHp6it69e2tNCoVCdOvWTfTu3Vu88MILetUWQogOHTqIq1evCiGEyM7OFmq1WlhZWYlu3bqJpk2biubNm4s//vhD7/qdOnUSu3fvFkIIsWbNGmFqairefvttER8fL8LDw4WFhYX49NNP9a5f/ny2bdtWLF26VFy8eFHvWg9buHChaNKkiXjttdeEg4ODWLp0qbC1tRWLFi0SS5YsEc2aNRORkZF61+/evbuYP3++9HjTpk3C19dXCCHEtWvXhKenp3j77bdrtA3FxcViy5YtIjw8XAwfPlwMHz5chIeHiy+//FIUFxfXqPbj5ObmigULFtSoRk5Ojrhx40aF+SUlJWLfvn01qn3lyhWxd+9e6f//5cuXxdKlS8WCBQvEiRMnalS7Ki4uLuLMmTO1WrOsrEzs3btXfPLJJ+Lbb78VJSUlNaqXk5MjLl++LD3+8ccfxRtvvCF69OghRo4cWeFLe3URExMjsrKyajS+x/n222/FvHnzxP79+4UQQiQlJYl+/fqJgIAA8fHHH9e4/s2bN8Wnn34qQkNDRWBgoHj55ZdFWFiY2LNnT43q5uXliR49egiFQiFatWolfHx8hI+Pj/S61aNHD5GXl1fj8QshRKMPLCqVSpw8ebLK9pMnTwqVSqV3/a+//vqR04cfflijwBIUFCT69+8vLl++LDIzM0X//v2Fi4uL+PPPP4UQNQssfn5+4t133xVCCPHFF18IGxsb8c4770jts2fPFi+99JJetaOjo4WLi0uFwGNoaCh+//13vWo+SKFQSL8EI0eOFP7+/uL69etCCCFu3LghNBqNGDFihN71TU1NpT9QXbp0EZ988olW+2effSbc3d31rq9QKMSePXvE1KlThZ2dnTAyMhKvvvqq+Pbbb0VpaanedYUQok2bNuJ///ufEOJeYFYqleI///mP1L5161bh6uqqd31TU1Nx7tw56XFpaakwMjISubm5QgghfvjhB+Hk5KR3/czMTNG6dWuhUqlEr169xNChQ8XQoUNFr169hEqlEq6uriIzM1Pv+o9TkzcZFy5cEN26dRMGBgZCqVSKUaNGaQWXmvy+CiFEWlqasLKyEgqFQtjY2IjDhw8LFxcX0bZtW9GmTRthamoqMjIy9K6/YsWKSielUinmzJkjPdZHv379pN/Rq1evCl9fX6FQKESzZs2EgYGBcHNzE5cuXdJ77D4+PuLbb78VQgixfft2YWBgIF599VUxa9YsMWjQIGFkZCS160qhUAilUik0Go3YvHlzrYfmhIQEYWhoKLy8vISlpaXYtGmTaNKkiRg7dqyYMGGCMDU1FXFxcXrXz8zMFK1atRLNmzcXzs7OQqFQiP79+wtfX1+hVCrFkCFDxJ07d/Sq/dprrwk/Pz9x6tSpCm2nTp0S/v7+4vXXX9d77A9q9IFFrVaLDRs2VNm+YcMG0apVK73rl79TVigUVU41+QPVvHlzcfz4celxWVmZeOutt0TLli3FuXPnavQH0NLSUvrDX1paKgwNDcWRI0ek9l9//VXY29vrPfb09HTRrl07MW3aNOndU10EltatW4sffvhBq/3AgQPC2dlZ7/q2trbi8OHDQoh7z8HRo0e12s+ePStMTU31rv/g+EtKSsSWLVtEQECAUCqVwsnJSbzzzjt6vyibmppKgVYIIYyMjMRvv/0mPc7KyhJmZmZ6j71Vq1bSu0Ah7r1IKxQKcfPmTSGEEOfPn6/RmwCNRiMGDhwo8vPzK7Tl5+eLgQMHir59++pd/9ixY4+ctmzZovfvVHBwsPD19RWHDh0Su3fvFl5eXsLb21tcu3ZNCHEvsCgUCr3HrtFoxNixY0VBQYF4//33xTPPPCPGjh0rtYeGhoqgoCC96ysUCvHMM88ItVqtNSkUCtGiRQuhVquFi4uL3rXL/89PnDhRuLu7S0dBc3JyhJeXl3jrrbf0Hru5ublUz9fXVyxdulSr/aOPPhJdunTRe+zr1q0TAwcOFEZGRsLW1lZMnTpV/Prrr3qP90Hu7u7Sm6K9e/cKlUolVq9eLbWvW7dOdOjQQe/6/fr1ExMmTJDONCxdulT069dPCCHEmTNnhFqtFlFRUXrVtrCw0HrdeNjhw4eFhYWFXrUf1ugDy6pVq4SJiYl4++23xddffy0OHjwoDh48KL7++mvx9ttvC1NTU63/GLpycnIS27dvr7L9l19+qVFgadKkSaWHeSdPniyeeeYZ8eOPP9YosJw9e1Z6bGFhofXOOSsrq0YvPELcO9oRHBwsOnXqJH799VdhZGRUa4Gl/N2Yk5NThT8cNR37P/7xDzFmzBghhBBDhgwRc+fO1WpfsmSJ8PDw0Lv+g3+8H/Tnn3+KqKgo0apVK72fVxcXF7Fz504hxL0/RgYGBuLLL7+U2nfs2CHUarV+AxdCTJ06VXTs2FHs3LlT7N27V7zwwguid+/eUntiYqJo06aN3vVNTU0f+UJw/PjxGofFqt5klM/Xd987OTmJtLQ06fHt27fFgAEDhKenp7h69WqNj7DY2NhIfw9KSkqEgYGB1voyMjJEixYt9K4/YcIE4enpWeFvTm280Xjw/3z79u3F119/rdW+Z88evcOQEEJYWVmJY8eOCSHuvcko/7nc2bNn9Q7qD449Ly9PLFu2TLi5uQkDAwPRrVs38cknn4iCggK9x17Zm4wHfwfOnz9fozcZZmZmWqf0iouLhZGRkbhy5YoQ4t4RKX3/Jtja2oqUlJQq25OTk4Wtra1etR/W6AOLEEJs3rxZ+Pr6CkNDQ+kPk6GhofD19RVbtmypUe0BAwaIefPmVdl+9OjRGr2j6tatm9i4cWOlbZMnTxbW1tZ6/wHs1KmT9MImxL0jKg8eFvzxxx9r9AfkQV988YWwt7cXBgYGtRZYPDw8RJcuXYSFhYX473//q9W+b9++Gv3h/vvvv4VarRY9e/YUERERwtTUVPTo0UOMGzdO9OzZUxgbG4sdO3bUaPyPOq9bVlZW4ahRdc2dO1c0a9ZMjB07Vri4uIjZs2eLli1bivj4eJGQkCCcnZ3FP//5T32HLm7cuCGGDh0q/T75+/trXS+0a9curYCkK0dHx0ceuv/mm2+Eo6Oj3vVtbW3Fp59+KrKysiqdduzYoffvlLm5eYVrPe7cuSOCgoJEp06dxPHjx2sUWMzNzcX58+elxw+/yfjzzz9r/CZj69atwtnZWXz00UfSvNoKLOVvMpo3b6511E+Ie28yTExM9K7/6quvitmzZwshhAgICKhw6mrNmjWibdu2etWu6vf1xx9/FCEhIcLc3FyYm5vrVVsIIb35FOLe3x6FQqH19yUlJUU888wzetd3cnLSOlX4f//3f0KhUEgh648//tB730+aNEm0atVKbN26VeuoaH5+vti6datQq9UiLCxM77E/6KkILOVKSkrEhQsXxIULF2p8gVe5H3/8UetF/2GFhYWPTJ+Ps2TJEunQXWUmTpyodyCKj48X3333XZXtc+bMkY4y1IacnByxfft2UVhYWONa8+fP15oSExO12qdPny6GDx9eo3X83//9n5g1a5Zwd3cXKpVKGBsbi1atWok33nhDHDp0qEa11Wq19O6mtpWWlorFixeLV155RSxZskSUlZWJL774Qjg7OwtbW1sxevToWnkObt26VemFpTU1b948YWNjI2JjY8WxY8dEbm6uyM3NFceOHROxsbGiadOmeh++FkKIvn37ioULF1bZXpM3GR4eHhXCsxD3Q0vLli1rFFjc3Ny0rgv77rvvpFNxQghx8ODBGr2wlfvrr7/Eiy++KAIDA8XFixdrLbC8/PLLYtCgQcLGxqZCKD148GCNTkGfOHFC2NraiuDgYLFw4UJhYWEh/vGPf4jFixeL4OBgYWJiItatW6dXbQMDg0e+wcjPz69wnZsuJk+eLNq2bSsWLVokfHx8REhIiHBzcxM7d+4UiYmJwsPDQ7z55pt61w8JCRG9evUSJ0+eFH/88YcYNmyY1umxlJQUvU+h3759W7z11lvC2NhYGBgYCJVKJVQqlTAwMBDGxsZi4sSJ4vbt23qP/UFPVWAhooZh6dKlwtHRUTo9U36qxtHRUSxbtqxGtbdu3So2bdpUZfu1a9fE+vXr9ao9c+bMKq+vuXPnjnj11VdrdMR1/vz54osvvqiy/Z133hGDBw/Wu/6DysrKxJIlS4SDg4NQKpU1DiyjR4/Wmh4+uj1jxgwREBBQo3WcPXtWDB8+XDRp0kQ6mm5kZCT8/f3Ftm3b9K77uCOiNVVYWCjGjRsnOnbsKMaPHy+Ki4vF+++/L4yNjYVCoRC9e/eu0frz8vLEc889J/0+tWrVSuu6k6+++kqsXLmyRtuQn58v9u7dKz7//HPx+eefi71791Z6HVpNNPr7sBBRw3X+/Hmt+zqU3xdHru7evYubN2/C0tKyyva///4brVq1qpP137x5E0qlEiYmJrVWMyMjA/v370dwcDBsbGxqre7DioqKoFQqoVKpalxLCIFLly6hrKwMdnZ2MDIyqoURPnm3b9/GnTt3au2eRpmZmSguLoabm5ve92GqT7w1PxHJlouLC/z8/ODn5yeFlZycHLz55pt1ts6a1Dc0NKwyrAD37hC8YMECfYf2WFevXsXEiRNrtaaXlxemTp0KGxubOt33165dw6RJk2qllkKhgL29PRwdHaWwUpdjr6vaKpUKTZo0qbX6bdu2RceOHSuElZrWv3XrFvbv348TJ05UaLt9+zY2btyod+0H8QgLETUox44dQ9euXVFaWtrg6jfksdd1fY69YdY/c+YM+vbti+zsbCgUCvTo0QNffPEFnJycANy7q7yTk1OtjL3hHRMiokbtm2++eWT7H3/8Idv6DXnsdV2fY2+c9WfNmoWOHTvi8OHDuH79OsLDw9GjRw+kpKSgZcuWetetDI+wEJGsGBgYQKFQPPKbvRUKhd7v2OqyfkMee13X59gbZ317e3vs2bMHHh4eAO5dPzRp0iR8//33SE5Ohrm5ea0dYeE1LEQkK46Ojti6dSvKysoqnY4cOSLb+g157HVdn2NvnPVv3bqldU2MQqFAfHw8BgwYgF69euHMmTM1GvuDGFiISFa8vLyQkZFRZfvj3inWZ/2GPPa6rs+xN876bm5uOHz4cIX5q1atwsCBA/Hqq6/qVbcyvIaFiGRlxowZKCoqqrLd1dUVycnJsqzfkMde1/U59sZZf9CgQfjiiy8watSoCm2rVq1CWVkZEhIS9Kr9MF7DQkRERLLHU0JEREQkewwsREREJHsMLERERCR7DCxEREQkewwsREREJHsMLERERCR7DCxEREQkewwsREREJHv/D29WypRhKoObAAAAAElFTkSuQmCC", 569 | "text/plain": [ 570 | "
" 571 | ] 572 | }, 573 | "metadata": {}, 574 | "output_type": "display_data" 575 | } 576 | ], 577 | "source": [ 578 | "df = pd.concat([r0, r1, r3, ideal_dist], axis=1)\n", 579 | "df.columns = ['method0: sampling', 'method1: hindsight', 'method3: generation tree', 'ideal']\n", 580 | "\n", 581 | "\n", 582 | "df = df.sort_index().fillna(0)\n", 583 | "df.plot.bar()\n", 584 | "df" 585 | ] 586 | }, 587 | { 588 | "cell_type": "code", 589 | "execution_count": 173, 590 | "metadata": {}, 591 | "outputs": [], 592 | "source": [ 593 | "# df.sum()" 594 | ] 595 | }, 596 | { 597 | "cell_type": "code", 598 | "execution_count": 174, 599 | "metadata": {}, 600 | "outputs": [ 601 | { 602 | "name": "stdout", 603 | "output_type": "stream", 604 | "text": [ 605 | "MAE coverage (smaller is better\n" 606 | ] 607 | }, 608 | { 609 | "data": { 610 | "text/plain": [ 611 | "method0: sampling 1.121905\n", 612 | "method1: hindsight 0.952232\n", 613 | "method3: generation tree 0.494957\n", 614 | "dtype: float64" 615 | ] 616 | }, 617 | "execution_count": 174, 618 | "metadata": {}, 619 | "output_type": "execute_result" 620 | } 621 | ], 622 | "source": [ 623 | "df_score = df / df['ideal'].values[:, None] - 1\n", 624 | "ratios = df_score.iloc[:21, :3]\n", 625 | "print('MAE coverage (smaller is better')\n", 626 | "ratios.abs().mean()" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": 175, 632 | "metadata": {}, 633 | "outputs": [ 634 | { 635 | "name": "stdout", 636 | "output_type": "stream", 637 | "text": [ 638 | "tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0733, 0.1000, 0.0000, 0.1000, 0.0800,\n", 639 | " 0.0000, 0.0467, 0.0467, 0.1256, 0.0344, 0.3678, 0.0256, 0.0000, 0.0000,\n", 640 | " 0.0000, 0.0000, 0.0000]], dtype=torch.float64) method0: sampling\n", 641 | "tensor([[6.4225e-04, 2.8117e-03, 1.0104e-03, 9.0762e-03, 2.8844e-02, 4.3982e-02,\n", 642 | " 1.3840e-02, 6.3993e-02, 5.9184e-02, 1.2213e-02, 3.7620e-02, 4.5378e-02,\n", 643 | " 1.1956e-01, 3.1679e-02, 4.2386e-01, 4.2629e-02, 1.0285e-02, 3.4793e-02,\n", 644 | " 1.7495e-02, 1.1011e-03, 9.9832e-06]], dtype=torch.float64) method1: hindsight\n", 645 | "tensor([[0.0365, 0.0667, 0.0710, 0.0653, 0.0786, 0.0999, 0.0662, 0.0858, 0.0682,\n", 646 | " 0.0745, 0.0327, 0.0187, 0.0307, 0.0279, 0.0274, 0.0312, 0.0344, 0.0235,\n", 647 | " 0.0225, 0.0109, 0.0277]], dtype=torch.float64) method3: generation tree\n" 648 | ] 649 | }, 650 | { 651 | "data": { 652 | "text/html": [ 653 | "\n", 667 | "\n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | "
KL_div_loss and time for each method (lower is better)
 KL_div_losstime
method  
method0: sampling-3.09214148.504429
method1: hindsight-3.0921410.683987
method3: generation tree-3.0921560.075112
\n" 699 | ], 700 | "text/plain": [ 701 | "" 702 | ] 703 | }, 704 | "execution_count": 175, 705 | "metadata": {}, 706 | "output_type": "execute_result" 707 | } 708 | ], 709 | "source": [ 710 | "data = []\n", 711 | "times = dict(zip(df.columns, [t0, t1, t3]))\n", 712 | "for k in df.columns[:3]:\n", 713 | " input = torch.tensor(df[k].values)[None, :]\n", 714 | " print(input, k)\n", 715 | " target = torch.tensor(df['ideal'].values)[None, :]\n", 716 | " # https://pytorch.org/docs/stable/generated/torch.nn.KLDivLoss.html#torch.nn.KLDivLoss\n", 717 | " s = F.kl_div(input, target , reduction='batchmean', log_target=False).item()\n", 718 | " t = times[k].total_seconds()\n", 719 | " data.append({'method': k, 'KL_div_loss': s, 'time': t})\n", 720 | "dfr = pd.DataFrame(data).set_index('method')\n", 721 | "# color values with cmap\n", 722 | "dfs = dfr.style.background_gradient(cmap='YlOrRd')\n", 723 | "dfs.set_caption('KL_div_loss and time for each method (lower is better)')\n", 724 | "dfs" 725 | ] 726 | }, 727 | { 728 | "cell_type": "code", 729 | "execution_count": 176, 730 | "metadata": {}, 731 | "outputs": [ 732 | { 733 | "name": "stdout", 734 | "output_type": "stream", 735 | "text": [ 736 | "| method | KL_div_loss | time |\n", 737 | "|:-------------------------|--------------:|----------:|\n", 738 | "| method0: sampling | -3.09214 | 48.5044 |\n", 739 | "| method1: hindsight | -3.09214 | 0.683987 |\n", 740 | "| method3: generation tree | -3.09216 | 0.075112 |\n" 741 | ] 742 | } 743 | ], 744 | "source": [ 745 | "print(dfr.to_markdown())" 746 | ] 747 | }, 748 | { 749 | "cell_type": "markdown", 750 | "metadata": {}, 751 | "source": [ 752 | "## Scratch, collapse probs" 753 | ] 754 | }, 755 | { 756 | "cell_type": "code", 757 | "execution_count": 14, 758 | "metadata": {}, 759 | "outputs": [], 760 | "source": [ 761 | "# def collapse_probs(json_schema, generated_data, keys = []):\n", 762 | "\n", 763 | "# # get current row\n", 764 | "# schema = json_schema\n", 765 | "# data = generated_data\n", 766 | "# for key in keys[:-1]:\n", 767 | "# print(schema)\n", 768 | "# schema = schema[key]\n", 769 | "# data = data[key]\n", 770 | "\n", 771 | "# schema_type = schema[\"type\"]\n", 772 | "\n", 773 | "# if schema_type == \"p_enum\":\n", 774 | "# k = keys[-1]\n", 775 | "# data[k] = data[k][0]['choice']\n", 776 | "# elif schema_type == \"object\":\n", 777 | "# k = \"properties\"\n", 778 | "# for key in schema[\"properties\"]:\n", 779 | "# data[\"properties\"] = collapse_probs(json_schema, generated_data, keys + [key])\n", 780 | " \n", 781 | "# return generated_data\n", 782 | "\n", 783 | "\n", 784 | "# list(collapse_probs(json_schema, generated_data))" 785 | ] 786 | } 787 | ], 788 | "metadata": { 789 | "kernelspec": { 790 | "display_name": ".venv", 791 | "language": "python", 792 | "name": "python3" 793 | }, 794 | "language_info": { 795 | "codemirror_mode": { 796 | "name": "ipython", 797 | "version": 3 798 | }, 799 | "file_extension": ".py", 800 | "mimetype": "text/x-python", 801 | "name": "python", 802 | "nbconvert_exporter": "python", 803 | "pygments_lexer": "ipython3", 804 | "version": "3.9.16" 805 | } 806 | }, 807 | "nbformat": 4, 808 | "nbformat_minor": 2 809 | } 810 | -------------------------------------------------------------------------------- /prob_jsonformer/__init__.py: -------------------------------------------------------------------------------- 1 | from prob_jsonformer.main import Jsonformer 2 | from prob_jsonformer.format import highlight_values 3 | -------------------------------------------------------------------------------- /prob_jsonformer/format.py: -------------------------------------------------------------------------------- 1 | from termcolor import colored 2 | 3 | 4 | def highlight_values(value): 5 | def recursive_print(obj, indent=0, is_last_element=True): 6 | if isinstance(obj, dict): 7 | print("{") 8 | last_key = list(obj.keys())[-1] 9 | for key, value in obj.items(): 10 | print(f"{' ' * (indent + 2)}{key}: ", end="") 11 | recursive_print(value, indent + 2, key == last_key) 12 | print(f"{' ' * indent}}}", end=",\n" if not is_last_element else "\n") 13 | elif isinstance(obj, list): 14 | print("[") 15 | for index, value in enumerate(obj): 16 | print(f"{' ' * (indent + 2)}", end="") 17 | recursive_print(value, indent + 2, index == len(obj) - 1) 18 | print(f"{' ' * indent}]", end=",\n" if not is_last_element else "\n") 19 | else: 20 | if isinstance(obj, str): 21 | obj = f'"{obj}"' 22 | print(colored(obj, "green"), end=",\n" if not is_last_element else "\n") 23 | 24 | recursive_print(value) 25 | -------------------------------------------------------------------------------- /prob_jsonformer/logits_processors.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from transformers import PreTrainedTokenizer, StoppingCriteria 3 | from transformers.generation.logits_process import LogitsProcessor 4 | import torch 5 | 6 | 7 | class StringStoppingCriteria(StoppingCriteria): 8 | def __init__( 9 | self, 10 | tokenizer: PreTrainedTokenizer, 11 | prompt_length: int, 12 | max_length: int = None, 13 | min_length: int = 1, 14 | ): 15 | self.tokenizer = tokenizer 16 | self.prompt_length = prompt_length 17 | self.max_length = max_length 18 | self.min_length = min_length 19 | 20 | def __call__( 21 | self, 22 | input_ids: torch.LongTensor, 23 | _, 24 | ) -> bool: 25 | if len(input_ids[0]) <= self.prompt_length: 26 | return False 27 | 28 | last_token_id = input_ids[0][-1] 29 | last_token = self.tokenizer.decode(last_token_id, skip_special_tokens=True) 30 | 31 | result = '"' in last_token 32 | 33 | if self.max_length is not None: 34 | # because of tokens this wont work pefectly, we might go 0-10 chars over 35 | gen_ids = input_ids[0][self.prompt_length :] 36 | o = self.tokenizer.decode(gen_ids, skip_special_tokens=True) 37 | str_l = len(o) 38 | if str_l > self.max_length: 39 | return True 40 | 41 | if self.min_length is not None: 42 | gen_ids = input_ids[0][self.prompt_length :] 43 | o = self.tokenizer.decode(gen_ids, skip_special_tokens=True) 44 | str_l = len(o) 45 | if str_l < self.min_length: 46 | return False 47 | 48 | return result 49 | 50 | 51 | class NumberStoppingCriteria(StoppingCriteria): 52 | def __init__( 53 | self, 54 | tokenizer: PreTrainedTokenizer, 55 | prompt_length: int, 56 | precision: int = 3, 57 | ): 58 | self.tokenizer = tokenizer 59 | self.precision = precision 60 | self.prompt_length = prompt_length 61 | 62 | def __call__( 63 | self, 64 | input_ids: torch.LongTensor, 65 | scores: torch.FloatTensor, 66 | ) -> bool: 67 | decoded = self.tokenizer.decode( 68 | input_ids[0][self.prompt_length :], skip_special_tokens=True 69 | ) 70 | 71 | if decoded.count(".") > 1: 72 | return True 73 | 74 | if ( 75 | decoded.count(".") == 1 76 | and len(decoded.replace(" ", "").split(".")[1]) > self.precision 77 | ): 78 | return True 79 | 80 | if ( 81 | len(decoded) > 1 82 | and "," in decoded 83 | and any(c.isdigit() for c in decoded.split(",")[0]) 84 | ): 85 | return True 86 | 87 | if ( 88 | len(decoded) > 1 89 | and any(c.isdigit() for c in decoded) 90 | and ("," in decoded or decoded[-1] in (" ", "\n")) 91 | ): 92 | return True 93 | 94 | return False 95 | 96 | 97 | class OutputNumbersTokens(LogitsProcessor): 98 | def __init__(self, tokenizer: PreTrainedTokenizer, prompt: str): 99 | self.tokenizer = tokenizer 100 | self.tokenized_prompt = tokenizer(prompt, return_tensors="pt") 101 | vocab_size = len(tokenizer) 102 | self.allowed_mask = torch.zeros(vocab_size, dtype=torch.bool) 103 | 104 | for _, token_id in tokenizer.get_vocab().items(): 105 | token_str = tokenizer.decode(token_id, skip_special_tokens=True).strip() 106 | 107 | if ( 108 | token_str == "" 109 | or ( 110 | all(c.isdigit() or c == "." for c in token_str) 111 | and token_str.count(".") <= 1 112 | ) 113 | or ( 114 | "," in token_str 115 | and all(c.isdigit() or c == "." for c in token_str.split(",")[0]) 116 | and token_str.count(".") <= 1 117 | ) 118 | ): 119 | self.allowed_mask[token_id] = True 120 | 121 | def __call__(self, _, scores): 122 | mask = self.allowed_mask.expand_as(scores) 123 | scores[~mask] = -float("inf") 124 | 125 | return scores 126 | 127 | 128 | class IntegerStoppingCriteria(StoppingCriteria): 129 | def __init__( 130 | self, 131 | tokenizer: PreTrainedTokenizer, 132 | prompt_length: int, 133 | max_digits: int = 15, 134 | ): 135 | self.tokenizer = tokenizer 136 | self.prompt_length = prompt_length 137 | self.max_digits = max_digits 138 | 139 | def __call__( 140 | self, 141 | input_ids: torch.LongTensor, 142 | scores: torch.FloatTensor, 143 | ) -> bool: 144 | decoded = self.tokenizer.decode( 145 | input_ids[0][self.prompt_length :], skip_special_tokens=True 146 | ) 147 | 148 | if len(decoded.strip()) > self.max_digits: 149 | return True 150 | 151 | if ( 152 | len(decoded) > 1 153 | and "," in decoded 154 | and any(c.isdigit() for c in decoded.split(",")[0]) 155 | ): 156 | return True 157 | 158 | if ( 159 | len(decoded) > 1 160 | and any(c.isdigit() for c in decoded) 161 | and decoded[-1] in (" ", "\n") 162 | ): 163 | return True 164 | 165 | return False 166 | 167 | 168 | class OutputIntegersTokens(LogitsProcessor): 169 | def __init__(self, tokenizer: PreTrainedTokenizer, prompt: str): 170 | self.tokenizer = tokenizer 171 | self.tokenized_prompt = tokenizer(prompt, return_tensors="pt") 172 | vocab_size = len(tokenizer) 173 | self.allowed_mask = torch.zeros(vocab_size, dtype=torch.bool) 174 | 175 | for _, token_id in tokenizer.get_vocab().items(): 176 | token_str = tokenizer.decode(token_id, skip_special_tokens=True).strip() 177 | 178 | if ( 179 | token_str == "" 180 | or all(c.isdigit() for c in token_str) 181 | or "," in token_str 182 | and all(c.isdigit() for c in token_str.split(",")[0]) 183 | ): 184 | self.allowed_mask[token_id] = True 185 | 186 | def __call__(self, _, scores): 187 | mask = self.allowed_mask.expand_as(scores) 188 | scores[~mask] = -float("inf") 189 | 190 | return scores 191 | -------------------------------------------------------------------------------- /prob_jsonformer/main.py: -------------------------------------------------------------------------------- 1 | from typing import List, Set, Union, Dict, Any, Optional 2 | 3 | from prob_jsonformer.logits_processors import ( 4 | NumberStoppingCriteria, 5 | OutputNumbersTokens, 6 | IntegerStoppingCriteria, 7 | OutputIntegersTokens, 8 | StringStoppingCriteria, 9 | ) 10 | from prob_jsonformer.prob_choice_tree import prob_choice_tree, round_to_nsf 11 | from prob_jsonformer.type_prefixes import get_prefix_tokens_for_types 12 | 13 | from termcolor import cprint 14 | from transformers import PreTrainedModel, PreTrainedTokenizer 15 | import json 16 | import torch 17 | 18 | GENERATION_MARKER = "|GENERATION|" 19 | 20 | 21 | class Jsonformer: 22 | value: Dict[str, Any] = {} 23 | 24 | def __init__( 25 | self, 26 | model: PreTrainedModel, 27 | tokenizer: PreTrainedTokenizer, 28 | json_schema: Dict[str, Any], 29 | prompt: str, 30 | *, 31 | debug: bool = False, 32 | max_array_length: int = 10, 33 | max_number_tokens: int = 6, 34 | temperature: Optional[float] = None, 35 | max_string_token_length: Optional[int] = None, 36 | ): 37 | self.model = model 38 | self.tokenizer = tokenizer 39 | self.json_schema = json_schema 40 | self.prompt = prompt 41 | 42 | self.type_prefix_tokens = get_prefix_tokens_for_types(tokenizer) 43 | 44 | self.number_logit_processor = OutputNumbersTokens(self.tokenizer, self.prompt) 45 | self.integer_logit_processor = OutputIntegersTokens(self.tokenizer, self.prompt) 46 | 47 | self.generation_marker = "|GENERATION|" 48 | self.debug_on = debug 49 | self.max_array_length = max_array_length 50 | 51 | self.max_number_tokens = max_number_tokens 52 | self.temperature = temperature 53 | self.max_string_token_length = max_string_token_length 54 | 55 | def debug(self, caller: str, value: str, is_prompt: bool = False): 56 | if self.debug_on: 57 | if is_prompt: 58 | cprint(caller, "green", end=" ") 59 | cprint(value, "yellow") 60 | else: 61 | cprint(caller, "green", end=" ") 62 | cprint(value, "blue") 63 | 64 | def generate_number(self, temperature: Union[float, None] = None, iterations=0): 65 | prompt = self.get_prompt() 66 | self.debug("[generate_number]", prompt, is_prompt=True) 67 | input_tokens = self.tokenizer.encode(prompt, return_tensors="pt").to( 68 | self.model.device 69 | ) 70 | response = self.model.generate( 71 | input_tokens, 72 | max_new_tokens=self.max_number_tokens, 73 | num_return_sequences=1, 74 | logits_processor=[self.number_logit_processor], 75 | stopping_criteria=[ 76 | NumberStoppingCriteria(self.tokenizer, len(input_tokens[0])) 77 | ], 78 | temperature=temperature or self.temperature, 79 | pad_token_id=self.tokenizer.eos_token_id, 80 | ) 81 | response = self.tokenizer.decode(response[0], skip_special_tokens=True) 82 | 83 | response = response[len(prompt) :] 84 | if "," in response: 85 | response = response.split(",")[0] 86 | response = response.replace(" ", "").rstrip(".") 87 | self.debug("[generate_number]", response) 88 | try: 89 | return float(response) 90 | except ValueError: 91 | if iterations > 3: 92 | raise ValueError("Failed to generate a valid number") 93 | 94 | return self.generate_number( 95 | temperature=self.temperature * 1.3, iterations=iterations + 1 96 | ) 97 | 98 | def generate_integer(self, temperature: Union[float, None] = None, iterations=0): 99 | prompt = self.get_prompt() 100 | self.debug("[generate_number]", prompt, is_prompt=True) 101 | input_tokens = self.tokenizer.encode(prompt, return_tensors="pt").to( 102 | self.model.device 103 | ) 104 | response = self.model.generate( 105 | input_tokens, 106 | max_new_tokens=self.max_number_tokens, 107 | num_return_sequences=1, 108 | logits_processor=[self.integer_logit_processor], 109 | stopping_criteria=[ 110 | IntegerStoppingCriteria(self.tokenizer, len(input_tokens[0])) 111 | ], 112 | temperature=temperature or self.temperature, 113 | pad_token_id=self.tokenizer.eos_token_id, 114 | ) 115 | response = self.tokenizer.decode(response[0], skip_special_tokens=True) 116 | 117 | response = response[len(prompt) :] 118 | if "," in response: 119 | response = response.split(",")[0] 120 | response = response.replace(" ", "") 121 | self.debug("[generate_integer]", response) 122 | try: 123 | return int(response) 124 | except ValueError: 125 | if iterations > 3: 126 | raise ValueError("Failed to generate a valid integer") 127 | 128 | return self.generate_integer(temperature=self.temperature * 1.3) 129 | 130 | def generate_boolean(self) -> bool: 131 | prompt = self.get_prompt() 132 | self.debug("[generate_boolean]", prompt, is_prompt=True) 133 | 134 | input_tensor = self.tokenizer.encode(prompt, return_tensors="pt") 135 | output = self.model.forward(input_tensor.to(self.model.device)) 136 | logits = output.logits[0, -1] 137 | 138 | true_token_id = self.tokenizer.encode("true", return_tensors="pt")[0, 0] 139 | false_token_id = self.tokenizer.encode("false", return_tensors="pt")[0, 0] 140 | 141 | result = logits[true_token_id] > logits[false_token_id] 142 | 143 | self.debug("[generate_boolean]", result) 144 | 145 | return result.item() 146 | 147 | def generate_string(self, maxLength=None, minLength=None) -> str: 148 | prompt = self.get_prompt() + '"' 149 | self.debug("[generate_string]", prompt, is_prompt=True) 150 | input_tokens = self.tokenizer.encode(prompt, return_tensors="pt").to( 151 | self.model.device 152 | ) 153 | 154 | response = self.model.generate( 155 | input_tokens, 156 | max_new_tokens=self.max_string_token_length, 157 | num_return_sequences=1, 158 | temperature=self.temperature, 159 | stopping_criteria=[ 160 | StringStoppingCriteria( 161 | self.tokenizer, len(input_tokens[0]), maxLength, minLength 162 | ) 163 | ], 164 | early_stopping=False, 165 | pad_token_id=self.tokenizer.eos_token_id, 166 | ) 167 | 168 | # Some models output the prompt as part of the response 169 | # This removes the prompt from the response if it is present 170 | if ( 171 | len(response[0]) >= len(input_tokens[0]) 172 | and (response[0][: len(input_tokens[0])] == input_tokens).all() 173 | ): 174 | response = response[0][len(input_tokens[0]) :] 175 | if response.shape[0] == 1: 176 | response = response[0] 177 | 178 | response = self.tokenizer.decode(response, skip_special_tokens=True) 179 | 180 | self.debug("[generate_string]", "|" + response + "|") 181 | 182 | if response.count('"') < 1: 183 | return response 184 | 185 | return response.split('"')[0].strip() 186 | 187 | def generate_p_enum(self, values: list, round: int) -> str: 188 | """ 189 | This is not in the json schema, but can be usefull for effeciently getting the prob distibution over choices 190 | """ 191 | prompt = self.get_prompt() + '"' 192 | self.debug("[generate_p_enum]", prompt, is_prompt=True) 193 | input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to( 194 | self.model.device 195 | )[0] 196 | values_tokens = self.tokenizer(values).input_ids 197 | values_tokens = [torch.tensor(c) for c in values_tokens] 198 | 199 | r = list( 200 | prob_choice_tree( 201 | self.model, self.tokenizer, input_ids, values_tokens, round=round 202 | ) 203 | ) 204 | return r 205 | 206 | def generate_p_integer( 207 | self, range_min: float, range_max: float, round: int 208 | ) -> float: 209 | """ 210 | This is not in the json schema, but can be usefull for effeciently generating the weighted mean from a range of integers 211 | """ 212 | values = [str(n) for n in range(int(range_min), int(range_max) + 1)] 213 | result = self.generate_p_enum(values, round=round) 214 | 215 | # now do a weighted average 216 | total = 0.0 217 | for r in result: 218 | total += float(r["choice"]) * r["prob"] 219 | 220 | if round is not None: 221 | total = round_to_nsf(total, round) 222 | return total 223 | 224 | def generate_enum(self, enum_values: Set[str]) -> str: 225 | prompt = self.get_prompt() 226 | self.debug("[generate_enum]", prompt, is_prompt=True) 227 | 228 | # These are necessary because we don't know if we're at the end or middle of an object/array 229 | terminal_tokens = torch.concat( 230 | [ 231 | self.tokenizer.encode(s, add_special_tokens=False, return_tensors="pt")[ 232 | :, 0 233 | ] 234 | for s in ('", "', '"}', '"]') 235 | ] 236 | ) 237 | 238 | highest_probability = 0.0 239 | best_option = None 240 | for option in enum_values: 241 | n_option_tokens = self.tokenizer.encode( 242 | f'"{option}', add_special_tokens=False, return_tensors="pt" 243 | ).shape[1] 244 | prompt_tokens = self.tokenizer.encode( 245 | prompt + f'"{option}', return_tensors="pt" 246 | ) 247 | option_tokens = prompt_tokens[0, -n_option_tokens:] 248 | 249 | with torch.no_grad(): 250 | logits = self.model.forward(prompt_tokens.to(self.model.device)).logits[ 251 | 0, -n_option_tokens - 1 : 252 | ] 253 | probabilities = torch.softmax(logits, dim=1) 254 | option_token_probabilities = probabilities[:-1][ 255 | torch.arange(probabilities.shape[0] - 1), option_tokens 256 | ] 257 | 258 | termination_probability = torch.max(probabilities[-1, terminal_tokens]) 259 | option_probability = ( 260 | torch.prod(option_token_probabilities) * termination_probability 261 | ) 262 | self.debug("[generate_enum]", f"{option_probability}, {option}") 263 | 264 | if option_probability > highest_probability: 265 | best_option = option 266 | highest_probability = option_probability 267 | 268 | self.debug("[generate_enum]", best_option) 269 | 270 | return best_option 271 | 272 | def generate_object( 273 | self, properties: Dict[str, Any], obj: Dict[str, Any] 274 | ) -> Dict[str, Any]: 275 | for key, schema in properties.items(): 276 | self.debug("[generate_object] generating value for", key) 277 | obj[key] = self.generate_value(schema, obj, key) 278 | return obj 279 | 280 | def choose_type_to_generate(self, possible_types: List[str]) -> str: 281 | possible_types = list(set(possible_types)) # remove duplicates 282 | self.debug("[choose_type_to_generate]", possible_types) 283 | if len(possible_types) < 1: 284 | raise ValueError(f"Union type must not be empty") 285 | elif len(possible_types) == 1: 286 | return possible_types[0] 287 | 288 | prompt = self.get_prompt() 289 | input_tensor = self.tokenizer.encode(prompt, return_tensors="pt") 290 | output = self.model.forward(input_tensor.to(self.model.device)) 291 | logits = output.logits[0, -1] 292 | 293 | max_type = None 294 | max_logit = -float("inf") 295 | for possible_type in possible_types: 296 | try: 297 | prefix_tokens = self.type_prefix_tokens[possible_type] 298 | except KeyError: 299 | raise ValueError(f"Unsupported schema type: {possible_type}") 300 | max_type_logit = logits[prefix_tokens].max() 301 | if max_type_logit > max_logit: 302 | max_type = possible_type 303 | max_logit = max_type_logit 304 | 305 | if max_type is None: 306 | raise Exception("Unable to find best type to generate for union type") 307 | self.debug("[choose_type_to_generate]", max_type) 308 | return max_type 309 | 310 | def generate_value( 311 | self, 312 | schema: Dict[str, Any], 313 | obj: Union[Dict[str, Any], List[Any]], 314 | key: Union[str, None] = None, 315 | ) -> Any: 316 | schema_type = schema["type"] 317 | if isinstance(schema_type, list): 318 | if key: 319 | obj[key] = self.generation_marker 320 | else: 321 | obj.append(self.generation_marker) 322 | schema_type = self.choose_type_to_generate(schema_type) 323 | if schema_type == "number": 324 | if key: 325 | obj[key] = self.generation_marker 326 | else: 327 | obj.append(self.generation_marker) 328 | return self.generate_number() 329 | elif schema_type == "integer": 330 | if key: 331 | obj[key] = self.generation_marker 332 | else: 333 | obj.append(self.generation_marker) 334 | return self.generate_integer() 335 | elif schema_type == "boolean": 336 | if key: 337 | obj[key] = self.generation_marker 338 | else: 339 | obj.append(self.generation_marker) 340 | return self.generate_boolean() 341 | elif schema_type == "string": 342 | if key: 343 | obj[key] = self.generation_marker 344 | else: 345 | obj.append(self.generation_marker) 346 | return self.generate_string( 347 | schema["maxLength"] if "maxLength" in schema else None 348 | ) 349 | elif schema_type == "p_enum": 350 | if key: 351 | obj[key] = self.generation_marker 352 | else: 353 | obj.append(self.generation_marker) 354 | return self.generate_p_enum(schema["values"], round=schema.get("round", 3)) 355 | elif schema_type == "p_integer": 356 | if key: 357 | obj[key] = self.generation_marker 358 | else: 359 | obj.append(self.generation_marker) 360 | return self.generate_p_integer( 361 | schema["minimum"], schema["maximum"], round=schema.get("round", 3) 362 | ) 363 | elif schema_type == "enum": 364 | if key: 365 | obj[key] = self.generation_marker 366 | else: 367 | obj.append(self.generation_marker) 368 | return self.generate_enum(set(schema["values"])) 369 | elif schema_type == "array": 370 | new_array = [] 371 | obj[key] = new_array 372 | return self.generate_array(schema["items"], new_array) 373 | elif schema_type == "object": 374 | new_obj = {} 375 | if key: 376 | obj[key] = new_obj 377 | else: 378 | obj.append(new_obj) 379 | return self.generate_object(schema["properties"], new_obj) 380 | elif schema_type == "null": 381 | return None 382 | else: 383 | raise ValueError(f"Unsupported schema type: {schema_type}") 384 | 385 | def generate_array(self, item_schema: Dict[str, Any], obj: Dict[str, Any]) -> list: 386 | for _ in range(self.max_array_length): 387 | # forces array to have at least one element 388 | element = self.generate_value(item_schema, obj) 389 | obj[-1] = element 390 | 391 | obj.append(self.generation_marker) 392 | input_prompt = self.get_prompt() 393 | obj.pop() 394 | input_tensor = self.tokenizer.encode(input_prompt, return_tensors="pt") 395 | output = self.model.forward(input_tensor.to(self.model.device)) 396 | logits = output.logits[0, -1] 397 | 398 | top_indices = logits.topk(30).indices 399 | sorted_token_ids = top_indices[logits[top_indices].argsort(descending=True)] 400 | 401 | found_comma = False 402 | found_close_bracket = False 403 | 404 | for token_id in sorted_token_ids: 405 | decoded_token = self.tokenizer.decode( 406 | token_id, skip_special_tokens=True 407 | ) 408 | if "," in decoded_token: 409 | found_comma = True 410 | break 411 | if "]" in decoded_token: 412 | found_close_bracket = True 413 | break 414 | 415 | if found_close_bracket or not found_comma: 416 | break 417 | 418 | return obj 419 | 420 | def get_prompt(self): 421 | template = """{prompt}\nOutput result in the following JSON schema format:\n```json{schema}```\nResult: ```json\n{progress}""" 422 | # TODO: collapse p_X schema types into X to not confuse the model 423 | value = self.value 424 | 425 | progress = json.dumps(value) 426 | gen_marker_index = progress.find(f'"{self.generation_marker}"') 427 | if gen_marker_index != -1: 428 | progress = progress[:gen_marker_index] 429 | else: 430 | raise ValueError("Failed to find generation marker") 431 | 432 | prompt = template.format( 433 | prompt=self.prompt, 434 | schema=json.dumps(self.json_schema), 435 | progress=progress, 436 | ) 437 | 438 | return prompt 439 | 440 | def __call__(self) -> Dict[str, Any]: 441 | self.value = {} 442 | generated_data = self.generate_object( 443 | self.json_schema["properties"], self.value 444 | ) 445 | return generated_data 446 | -------------------------------------------------------------------------------- /prob_jsonformer/prob_choice_tree.py: -------------------------------------------------------------------------------- 1 | from jaxtyping import Float, Int 2 | import torch 3 | from torch.nn import functional as F 4 | from torch import Tensor 5 | from typing import List, Callable, Tuple, Dict, Optional 6 | import pandas as pd 7 | from transformers import AutoModelForCausalLM, AutoTokenizer 8 | import math 9 | 10 | 11 | def round_to_nsf(num, nsf): 12 | if num != 0: 13 | return round(num, -int(math.floor(math.log10(abs(num))) + 1 - nsf)) 14 | else: 15 | return 0 # Can't take the log of 0 16 | 17 | 18 | def get_valid_next_choices(choices_tokens, current_tokens): 19 | next_choices = [] 20 | for choice_tokens in choices_tokens: 21 | # if we have some more slots left 22 | if len(current_tokens) < len(choice_tokens): 23 | # see if current_tokens matches 24 | if (choice_tokens[: len(current_tokens)] == current_tokens).all(): 25 | c = choice_tokens[len(current_tokens)].item() 26 | next_choices.append(c) 27 | 28 | next_choices = list(set(next_choices)) 29 | return torch.LongTensor(next_choices) 30 | 31 | 32 | def _prob_choice_tree( 33 | model: AutoModelForCausalLM, 34 | tokenizer: AutoTokenizer, 35 | input_ids: Int[Tensor, "seq"], 36 | choices_tokens: List[Int[Tensor, "seq"]], 37 | choice: Optional[Int[Tensor, ""]] = None, 38 | prob: float = 1, 39 | current_tokens: Int[Tensor, "seq"] = torch.LongTensor([]), 40 | ): 41 | if choice is not None: 42 | c = choice[None].to(current_tokens.device) 43 | current_tokens = torch.cat([current_tokens, c], dim=-1) 44 | c = choice[None].to(input_ids.device) 45 | input_ids = torch.cat([input_ids, c], dim=-1) 46 | 47 | next_choices = get_valid_next_choices(choices_tokens, current_tokens) 48 | if len(next_choices) == 0: 49 | s = tokenizer.decode(current_tokens, skip_special_tokens=True) 50 | r = dict(prob=prob, choice=s) 51 | yield r 52 | else: 53 | o = model(input_ids[None]) 54 | logits_constrained = o.logits[0, -1][next_choices] 55 | probs = F.softmax(logits_constrained, dim=-1) 56 | for i in range(len(next_choices)): 57 | next_choice = next_choices[i] 58 | next_prob = prob * probs[i].item() 59 | yield from prob_choice_tree( 60 | model=model, 61 | tokenizer=tokenizer, 62 | choices_tokens=choices_tokens, 63 | input_ids=input_ids, 64 | choice=next_choice, 65 | prob=next_prob, 66 | current_tokens=current_tokens, 67 | ) 68 | 69 | 70 | def prob_choice_tree( 71 | *args, 72 | sort: bool = True, 73 | round=3, 74 | **kwargs, 75 | ): 76 | choice_json = list( 77 | _prob_choice_tree( 78 | *args, 79 | **kwargs, 80 | ) 81 | ) 82 | # order by probability 83 | if sort: 84 | choice_json = sorted(choice_json, key=lambda x: -x["prob"]) 85 | 86 | # round probabilities 87 | for c in choice_json: 88 | c["prob"] = round_to_nsf(c["prob"], round) 89 | return choice_json 90 | -------------------------------------------------------------------------------- /prob_jsonformer/type_prefixes.py: -------------------------------------------------------------------------------- 1 | from transformers import PreTrainedTokenizer 2 | from typing import Dict, List 3 | import re 4 | 5 | def is_number_prefix(s: str) -> bool: 6 | return re.match(r"^[\-\d]+\.?[\d]*$", s) 7 | 8 | def is_boolean_prefix(s: str) -> bool: 9 | return 'true'.startswith(s) or 'false'.startswith(s) 10 | 11 | def is_null_prefix(s: str) -> bool: 12 | return 'null'.startswith(s) 13 | 14 | def is_string_prefix(s: str) -> bool: 15 | return re.match(r'^"[^"]*"?$', s) 16 | 17 | def is_array_prefix(s: str) -> bool: 18 | return re.match(r'^\[["\-\d\[{]*$', s) 19 | 20 | def is_object_prefix(s: str) -> bool: 21 | return re.match(r'^\{"?$', s) 22 | 23 | def get_prefix_tokens_for_types(tokenizer: PreTrainedTokenizer) -> Dict[str, List[str]]: 24 | vocab = tokenizer.vocab.items() 25 | return { 26 | "number": [v for k, v in vocab if is_number_prefix(k)], 27 | "boolean": [v for k, v in vocab if is_boolean_prefix(k)], 28 | "null": [v for k, v in vocab if is_null_prefix(k)], 29 | "string": [v for k, v in vocab if is_string_prefix(k)], 30 | "array": [v for k, v in vocab if is_array_prefix(k)], 31 | "object": [v for k, v in vocab if is_object_prefix(k)], 32 | } 33 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "prob_jsonformer" 3 | version = "0.12.0" 4 | description = "" 5 | authors = ["1rgs "] 6 | readme = "README.md" 7 | 8 | [tool.poetry.dependencies] 9 | python = "^3.9,<4.0" 10 | termcolor = "^2.3.0" 11 | jaxtyping = "^0.2.28" 12 | 13 | [tool.poetry.group.dev.dependencies] 14 | pandas = "^2.2.2" 15 | ipykernel = "^6.22.0" 16 | torch = "^2.0.0" 17 | accelerate = "^0.18.0" 18 | bitsandbytes = "^0.38.1" 19 | transformers = "^4.49" 20 | 21 | 22 | [build-system] 23 | requires = ["poetry-core"] 24 | build-backend = "poetry.core.masonry.api" 25 | 26 | [virtualenvs] 27 | create = true 28 | in-project = true 29 | --------------------------------------------------------------------------------