├── .gitignore
├── .vscode
    └── settings.json
├── README.md
├── dev.ipynb
├── example.ipynb
├── img
    ├── cover2.png
    └── cover4.png
├── license.txt
├── poetry.lock
├── poetry.toml
├── prob_dist.ipynb
├── prob_jsonformer
    ├── __init__.py
    ├── format.py
    ├── logits_processors.py
    ├── main.py
    ├── prob_choice_tree.py
    └── type_prefixes.py
└── pyproject.toml


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .venv
3 | workspace.ipynb
4 | dist
5 | .DS_Store
6 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "editor.formatOnSave": true,
3 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # prob_jsonformer: Probabilistic Structured JSON from Language Models.
  2 | 
  3 | This fork has been modified to include the token probabilities. This is not complaint with json schema, but it can be useful for efficient extracting of a range of possible values.
  4 | 
  5 | I've also merged some of the recent PR's for enum, integer, null, union. They are not yet included in the upstream Jsonformer. You can see them all below in this example:
  6 | 
  7 | 
  8 | ~~~
  9 | # installing
 10 | pip install git+https://github.com/wassname/prob_jsonformer.git
 11 | ~~~
 12 | 
 13 | 
 14 | ## Metrics
 15 | 
 16 | How well does it work? Well when I asked is `Q: Please sample a number from the distribution [0, 20]: `, assumming it should be a uniform distribution, this is how well it did:
 17 | 
 18 | Lower is better as it indicates a faithful sampling of the distribution. Time is in seconds.
 19 | 
 20 | | method                   | KL_div_loss |     time |
 21 | | :----------------------- | ----------: | -------: |
 22 | | method0: sampling        |    -3.09214 |  48.5044 |
 23 | | method1: hindsight       |    -3.09214 | 0.683987 |
 24 | | method3: generation tree |   **-3.09216**| **0.075112**|
 25 | 
 26 | KL_div_loss is the -1 * KL divergence between the true distribution and the generated distribution. 
 27 | 
 28 | 
 29 | ## Example
 30 | 
 31 | ```python
 32 | from prob_jsonformer import Jsonformer
 33 | from transformers import AutoModelForCausalLM, AutoTokenizer
 34 | 
 35 | model_name = "databricks/dolly-v2-3b"
 36 | model = AutoModelForCausalLM.from_pretrained(model_name)
 37 | tokenizer = AutoTokenizer.from_pretrained(model_name)
 38 | 
 39 | json_schema = {
 40 |     "type": "object",
 41 |     "properties": {
 42 |         # we can return the probability of each choice, even if they are multiple tokens
 43 |         "age_probs": {"type": "p_enum", "values": [str(s) for s in range(10, 20)]},
 44 |         # we can return the probabilistic weighted mean of a range
 45 |         "age_wmean": {"type": "p_integer", "minimum": 10, "maximum": 20},
 46 |         # the prob of true and false
 47 |         "is_student_probs": {"type": "p_enum", "values": ["true", "false"]},
 48 |         "is_student": {"type": "boolean"},
 49 |         # we've merged patches for enum, integer, null, union - currently mising from jsonformer
 50 |         "name": {"type": "string", "maxLength": 4},
 51 |         "age": {"type": "integer"},
 52 |         "unit_time": {"type": "number"},
 53 |         "courses": {"type": "array", "items": {"type": "string"}},
 54 |         "trim": {"type": ["string", "null"]},
 55 |         "color": {
 56 |             "type": "enum",
 57 |             "values": ["red", "green", "blue", "brown", "white", "black"],
 58 |         },
 59 |     },
 60 | }
 61 | 
 62 | prompt = "Generate a young person's information based on the following schema:"
 63 | jsonformer = Jsonformer(model, tokenizer, json_schema, prompt, temperature=0)
 64 | generated_data = jsonformer()
 65 | 
 66 | generated_data = {
 67 |     "age_probs": [
 68 |         {"prob": 0.62353515625, "choice": "10"},
 69 |         {"prob": 0.349609375, "choice": "12"},
 70 |         {"prob": 0.01123809814453125, "choice": "11"},
 71 |         {"prob": 0.00760650634765625, "choice": "16"},
 72 |         {"prob": 0.0025482177734375, "choice": "13"},
 73 |         {"prob": 0.0025081634521484375, "choice": "15"},
 74 |         {"prob": 0.0018062591552734375, "choice": "14"},
 75 |         {"prob": 0.00104522705078125, "choice": "18"},
 76 |         {"prob": 0.00011551380157470703, "choice": "17"},
 77 |         {"prob": 5.042552947998047e-05, "choice": "19"},
 78 |     ],
 79 |     "age_wmean": 15.544570922851562,
 80 |     "is_student_probs": [
 81 |         {"prob": 0.962890625, "choice": "true"},
 82 |         {"prob": 0.037322998046875, "choice": "false"},
 83 |     ],
 84 |     "is_student": False,
 85 |     "name": "John",
 86 |     "age": 17,
 87 |     "unit_time": 0.5,
 88 |     "courses": ["C++"],
 89 |     "trim": None,
 90 |     "color": "green",
 91 | }
 92 | ```
 93 | 
 94 |  The original [README](https://github.com/1rgs/jsonformer) is included below.
 95 | 
 96 | # ORIGINAL: Jsonformer: A Bulletproof Way to Generate Structured JSON from Language Models.
 97 | 
 98 | ### Problem: Getting models to output structured JSON is hard
 99 | 
100 | ### Solution: Only generate the content tokens and fill in the fixed tokens
101 | 
102 | [![colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/1rgs/jsonformer/blob/main/Jsonformer_example.ipynb)
103 | 
104 | ![cover](img/cover4.png)
105 | 
106 | Generating structured JSON from language models is a challenging task. The
107 | generated JSON must be syntactically correct, and it must conform to a schema
108 | that specifies the structure of the JSON.
109 | 
110 | Current approaches to this problem are brittle and error-prone. They rely on prompt engineering, fine-tuning, and post-processing, but they still fail to generate syntactically correct JSON in many cases.
111 | 
112 | Jsonformer is a new approach to this problem. In structured data, many tokens are fixed and predictable. Jsonformer is a wrapper around Hugging Face models that fills in the fixed tokens during the generation process, and only delegates the generation of content tokens to the language model. This makes it more efficient and bulletproof than existing approaches.
113 | 
114 | This currently supports a subset of JSON Schema. Below is a list of the supported schema types:
115 | 
116 | - number
117 | - boolean
118 | - string
119 | - array
120 | - object
121 | 
122 | ## Example
123 | 
124 | ```python
125 | from jsonformer import Jsonformer
126 | from transformers import AutoModelForCausalLM, AutoTokenizer
127 | 
128 | model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-12b")
129 | tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-12b")
130 | 
131 | json_schema = {
132 |     "type": "object",
133 |     "properties": {
134 |         "name": {"type": "string"},
135 |         "age": {"type": "number"},
136 |         "is_student": {"type": "boolean"},
137 |         "courses": {
138 |             "type": "array",
139 |             "items": {"type": "string"}
140 |         }
141 |     }
142 | }
143 | 
144 | prompt = "Generate a person's information based on the following schema:"
145 | jsonformer = Jsonformer(model, tokenizer, json_schema, prompt)
146 | generated_data = jsonformer()
147 | 
148 | print(generated_data)
149 | ```
150 | 
151 | ### Jsonformer works on complex schemas, even with tiny models. Here is an example of a schema with nested objects and arrays, generated by a 3B parameter model.
152 | 
153 | ```python
154 | {"type": "object", "properties": {"car": {"type": "object", "properties": {"make": {"type": "string"}, "model": {"type": "string"}, "year": {"type": "number"}, "colors": {"type": "array", "items": {"type": "string"}}, "features": {"type": "object", "properties": {"audio": {"type": "object", "properties": {"brand": {"type": "string"}, "speakers": {"type": "number"}, "hasBluetooth": {"type": "boolean"}}}, "safety": {"type": "object", "properties": {"airbags": {"type": "number"}, "parkingSensors": {"type": "boolean"}, "laneAssist": {"type": "boolean"}}}, "performance": {"type": "object", "properties": {"engine": {"type": "string"}, "horsepower": {"type": "number"}, "topSpeed": {"type": "number"}}}}}}}, "owner": {"type": "object", "properties": {"firstName": {"type": "string"}, "lastName": {"type": "string"}, "age": {"type": "number"}}}}}
155 | ```
156 | 
157 | ```python
158 | {
159 |   car: {
160 |     make: "audi",
161 |     model: "model A8",
162 |     year: 2016.0,
163 |     colors: [
164 |       "blue"
165 |     ],
166 |     features: {
167 |       audio: {
168 |         brand: "sony",
169 |         speakers: 2.0,
170 |         hasBluetooth: True
171 |       },
172 |       safety: {
173 |         airbags: 2.0,
174 |         parkingSensors: True,
175 |         laneAssist: True
176 |       },
177 |       performance: {
178 |         engine: "4.0",
179 |         horsepower: 220.0,
180 |         topSpeed: 220.0
181 |       }
182 |     }
183 |   },
184 |   owner: {
185 |     firstName: "John",
186 |     lastName: "Doe",
187 |     age: 40.0
188 |   }
189 | }
190 | ```
191 | 
192 | ## Features
193 | 
194 | - Bulletproof JSON generation: Jsonformer ensures that the generated JSON is always syntactically correct and conforms to the specified schema.
195 | - Efficiency: By generating only the content tokens and filling in the fixed tokens, Jsonformer is more efficient than generating a full JSON string and parsing it.
196 | - Flexible and extendable: Jsonformer is built on top of the Hugging Face transformers library, making it compatible with any model that supports the Hugging Face interface.
197 | 
198 | ## Installation
199 | 
200 | ```bash
201 | pip install jsonformer
202 | ```
203 | 
204 | ## Development
205 | 
206 | [Poetry](https://python-poetry.org/docs/#installation) is used for dependency management.
207 | 
208 | ```bash
209 | poetry install
210 | ```
211 | 
212 | ```bash
213 | poetry run python -m jsonformer.example
214 | ```
215 | 
216 | ## License
217 | 
218 | Jsonformer is released under the MIT License. You are free to use, modify, and distribute this software for any purpose, commercial or non-commercial, as long as the original copyright and license notice are included.
219 | 


--------------------------------------------------------------------------------
/dev.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# autoreload your package\n",
 10 |     "%load_ext autoreload\n",
 11 |     "%autoreload 2"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stdout",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "Loading model and tokenizer...\n"
 24 |      ]
 25 |     },
 26 |     {
 27 |      "data": {
 28 |       "application/vnd.jupyter.widget-view+json": {
 29 |        "model_id": "c7620cdf45c54ab5abaf479b141f4479",
 30 |        "version_major": 2,
 31 |        "version_minor": 0
 32 |       },
 33 |       "text/plain": [
 34 |        "config.json:   0%|          | 0.00/819 [00:00<?, ?B/s]"
 35 |       ]
 36 |      },
 37 |      "metadata": {},
 38 |      "output_type": "display_data"
 39 |     },
 40 |     {
 41 |      "data": {
 42 |       "application/vnd.jupyter.widget-view+json": {
 43 |        "model_id": "b9dba56663274820997ef5a07e7acea4",
 44 |        "version_major": 2,
 45 |        "version_minor": 0
 46 |       },
 47 |       "text/plain": [
 48 |        "pytorch_model.bin:   0%|          | 0.00/5.68G [00:00<?, ?B/s]"
 49 |       ]
 50 |      },
 51 |      "metadata": {},
 52 |      "output_type": "display_data"
 53 |     },
 54 |     {
 55 |      "data": {
 56 |       "application/vnd.jupyter.widget-view+json": {
 57 |        "model_id": "8571f2926f9541469ed34140333cdf81",
 58 |        "version_major": 2,
 59 |        "version_minor": 0
 60 |       },
 61 |       "text/plain": [
 62 |        "model.safetensors:   0%|          | 0.00/5.68G [00:00<?, ?B/s]"
 63 |       ]
 64 |      },
 65 |      "metadata": {},
 66 |      "output_type": "display_data"
 67 |     },
 68 |     {
 69 |      "data": {
 70 |       "application/vnd.jupyter.widget-view+json": {
 71 |        "model_id": "93c30a28fbdd4257ab783cc659b24687",
 72 |        "version_major": 2,
 73 |        "version_minor": 0
 74 |       },
 75 |       "text/plain": [
 76 |        "tokenizer_config.json:   0%|          | 0.00/450 [00:00<?, ?B/s]"
 77 |       ]
 78 |      },
 79 |      "metadata": {},
 80 |      "output_type": "display_data"
 81 |     },
 82 |     {
 83 |      "data": {
 84 |       "application/vnd.jupyter.widget-view+json": {
 85 |        "model_id": "10ad2621da06464bbdc5b4aa49687d50",
 86 |        "version_major": 2,
 87 |        "version_minor": 0
 88 |       },
 89 |       "text/plain": [
 90 |        "tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]"
 91 |       ]
 92 |      },
 93 |      "metadata": {},
 94 |      "output_type": "display_data"
 95 |     },
 96 |     {
 97 |      "data": {
 98 |       "application/vnd.jupyter.widget-view+json": {
 99 |        "model_id": "3abe2819228e4da4ad859c54abefc479",
100 |        "version_major": 2,
101 |        "version_minor": 0
102 |       },
103 |       "text/plain": [
104 |        "special_tokens_map.json:   0%|          | 0.00/228 [00:00<?, ?B/s]"
105 |       ]
106 |      },
107 |      "metadata": {},
108 |      "output_type": "display_data"
109 |     },
110 |     {
111 |      "name": "stdout",
112 |      "output_type": "stream",
113 |      "text": [
114 |       "Loaded model and tokenizer\n"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
120 |     "import torch\n",
121 |     "\n",
122 |     "print(\"Loading model and tokenizer...\")\n",
123 |     "model_name = \"databricks/dolly-v2-3b\"\n",
124 |     "model = AutoModelForCausalLM.from_pretrained(\n",
125 |     "    model_name,\n",
126 |     "    use_cache=True,\n",
127 |     "    torch_dtype=torch.float16,\n",
128 |     "    attn_implementation=\"eager\",\n",
129 |     ").to(\"cuda:0\")\n",
130 |     "tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, use_cache=True)\n",
131 |     "print(\"Loaded model and tokenizer\")"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 3,
137 |    "metadata": {},
138 |    "outputs": [
139 |     {
140 |      "ename": "KeyError",
141 |      "evalue": "'values'",
142 |      "output_type": "error",
143 |      "traceback": [
144 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
145 |       "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
146 |       "Cell \u001b[0;32mIn[3], line 29\u001b[0m\n\u001b[1;32m     27\u001b[0m prompt \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGenerate a young person\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124ms information based on the following schema:\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     28\u001b[0m jsonformer \u001b[38;5;241m=\u001b[39m Jsonformer(model, tokenizer, json_schema, prompt)\n\u001b[0;32m---> 29\u001b[0m generated_data \u001b[38;5;241m=\u001b[39m \u001b[43mjsonformer\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     31\u001b[0m generated_data\n",
147 |       "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:439\u001b[0m, in \u001b[0;36mJsonformer.__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    437\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Dict[\u001b[38;5;28mstr\u001b[39m, Any]:\n\u001b[1;32m    438\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalue \u001b[38;5;241m=\u001b[39m {}\n\u001b[0;32m--> 439\u001b[0m     generated_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_object\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    440\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_schema\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mproperties\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalue\u001b[49m\n\u001b[1;32m    441\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    442\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m generated_data\n",
148 |       "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:274\u001b[0m, in \u001b[0;36mJsonformer.generate_object\u001b[0;34m(self, properties, obj)\u001b[0m\n\u001b[1;32m    272\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, schema \u001b[38;5;129;01min\u001b[39;00m properties\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m    273\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[generate_object] generating value for\u001b[39m\u001b[38;5;124m\"\u001b[39m, key)\n\u001b[0;32m--> 274\u001b[0m     obj[key] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n",
149 |       "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:351\u001b[0m, in \u001b[0;36mJsonformer.generate_value\u001b[0;34m(self, schema, obj, key)\u001b[0m\n\u001b[1;32m    349\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    350\u001b[0m         obj\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgeneration_marker)\n\u001b[0;32m--> 351\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgenerate_p_enum(\u001b[43mschema\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvalues\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m, \u001b[38;5;28mround\u001b[39m\u001b[38;5;241m=\u001b[39mschema\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mround\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m3\u001b[39m))\n\u001b[1;32m    352\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m schema_type \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mp_integer\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    353\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m key:\n",
150 |       "\u001b[0;31mKeyError\u001b[0m: 'values'"
151 |      ]
152 |     }
153 |    ],
154 |    "source": [
155 |     "from prob_jsonformer import Jsonformer\n",
156 |     "\n",
157 |     "json_schema = {\n",
158 |     "    \"type\": \"object\",\n",
159 |     "    \"properties\": {\n",
160 |     "        # we can return the probability of each choice, even if they are multiple tokens\n",
161 |     "        \"age_probs\": {\"type\": \"p_enum\", \"enum\": [str(s) for s in range(10, 30)]},\n",
162 |     "        # we can return the probabilistic weighted mean of a range\n",
163 |     "        \"age_wmean\": {\"type\": \"p_integer\", \"minimum\": 10, \"maximum\": 30},\n",
164 |     "        # the prob of true and false\n",
165 |     "        \"is_student_probs\": {\"type\": \"p_enum\", \"enum\": [\"true\", \"false\"]},\n",
166 |     "        \"is_student\": {\"type\": \"boolean\"},\n",
167 |     "        # we've merged patches for enum, integer, null, union - currently mising from jsonformer\n",
168 |     "        \"name\": {\"type\": \"string\", \"maxLength\": 4},\n",
169 |     "        \"age\": {\"type\": \"integer\"},\n",
170 |     "        \"unit_time\": {\"type\": \"number\"},\n",
171 |     "        \"courses\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}},\n",
172 |     "        \"trim\": {\"type\": [\"string\", \"null\"]},\n",
173 |     "        \"color\": {\n",
174 |     "            \"type\": \"enum\",\n",
175 |     "            \"values\": [\"red\", \"green\", \"blue\", \"brown\", \"white\", \"black\"],\n",
176 |     "        },\n",
177 |     "    },\n",
178 |     "}\n",
179 |     "\n",
180 |     "\n",
181 |     "prompt = \"Generate a young person's information based on the following schema:\"\n",
182 |     "jsonformer = Jsonformer(model, tokenizer, json_schema, prompt)\n",
183 |     "generated_data = jsonformer()\n",
184 |     "\n",
185 |     "generated_data"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 4,
191 |    "metadata": {},
192 |    "outputs": [],
193 |    "source": [
194 |     "generated_data = {\n",
195 |     "    \"age_probs\": [\n",
196 |     "        {\"prob\": 0.94091796875, \"choice\": \"10\"},\n",
197 |     "        {\"prob\": 0.033233642578125, \"choice\": \"20\"},\n",
198 |     "        {\"prob\": 0.0122222900390625, \"choice\": \"12\"},\n",
199 |     "        {\"prob\": 0.00412750244140625, \"choice\": \"21\"},\n",
200 |     "        {\"prob\": 0.0028362274169921875, \"choice\": \"16\"},\n",
201 |     "        {\"prob\": 0.0018453598022460938, \"choice\": \"15\"},\n",
202 |     "        {\"prob\": 0.00113677978515625, \"choice\": \"11\"},\n",
203 |     "        {\"prob\": 0.0011110305786132812, \"choice\": \"18\"},\n",
204 |     "        {\"prob\": 0.0005083084106445312, \"choice\": \"25\"},\n",
205 |     "        {\"prob\": 0.0004558563232421875, \"choice\": \"23\"},\n",
206 |     "        {\"prob\": 0.0002498626708984375, \"choice\": \"14\"},\n",
207 |     "        {\"prob\": 0.00023281574249267578, \"choice\": \"13\"},\n",
208 |     "        {\"prob\": 0.0002238750457763672, \"choice\": \"22\"},\n",
209 |     "        {\"prob\": 0.00018131732940673828, \"choice\": \"26\"},\n",
210 |     "        {\"prob\": 0.0001690387725830078, \"choice\": \"24\"},\n",
211 |     "        {\"prob\": 0.00012552738189697266, \"choice\": \"19\"},\n",
212 |     "        {\"prob\": 7.796287536621094e-05, \"choice\": \"27\"},\n",
213 |     "        {\"prob\": 7.265806198120117e-05, \"choice\": \"28\"},\n",
214 |     "        {\"prob\": 4.106760025024414e-05, \"choice\": \"17\"},\n",
215 |     "        {\"prob\": 2.5033950805664062e-06, \"choice\": \"29\"},\n",
216 |     "    ],\n",
217 |     "    \"age_wmean\": 17.816404402256012,\n",
218 |     "    \"is_student_probs\": [\n",
219 |     "        {\"prob\": 0.974609375, \"choice\": \"true\"},\n",
220 |     "        {\"prob\": 0.025177001953125, \"choice\": \"false\"},\n",
221 |     "    ],\n",
222 |     "    \"is_student\": False,\n",
223 |     "    \"name\": \"John\",\n",
224 |     "    \"age\": 17,\n",
225 |     "    \"unit_time\": 0.5,\n",
226 |     "    \"courses\": [\"C++\"],\n",
227 |     "    \"trim\": None,\n",
228 |     "    \"color\": \"white\",\n",
229 |     "}"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": []
238 |   }
239 |  ],
240 |  "metadata": {
241 |   "kernelspec": {
242 |    "display_name": "pytorch_hf_env",
243 |    "language": "python",
244 |    "name": "pytorch_hf_env"
245 |   },
246 |   "language_info": {
247 |    "codemirror_mode": {
248 |     "name": "ipython",
249 |     "version": 3
250 |    },
251 |    "file_extension": ".py",
252 |    "mimetype": "text/x-python",
253 |    "name": "python",
254 |    "nbconvert_exporter": "python",
255 |    "pygments_lexer": "ipython3",
256 |    "version": "3.10.16"
257 |   }
258 |  },
259 |  "nbformat": 4,
260 |  "nbformat_minor": 2
261 | }
262 | 


--------------------------------------------------------------------------------
/example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |         {
  4 |             "cell_type": "code",
  5 |             "execution_count": 1,
  6 |             "metadata": {},
  7 |             "outputs": [],
  8 |             "source": [
  9 |                 "# autoreload your package\n",
 10 |                 "%load_ext autoreload\n",
 11 |                 "%autoreload 2"
 12 |             ]
 13 |         },
 14 |         {
 15 |             "cell_type": "code",
 16 |             "execution_count": 2,
 17 |             "metadata": {},
 18 |             "outputs": [
 19 |                 {
 20 |                     "name": "stdout",
 21 |                     "output_type": "stream",
 22 |                     "text": [
 23 |                         "Loading model and tokenizer...\n",
 24 |                         "Loaded model and tokenizer\n"
 25 |                     ]
 26 |                 }
 27 |             ],
 28 |             "source": [
 29 |                 "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
 30 |                 "import torch\n",
 31 |                 "\n",
 32 |                 "print(\"Loading model and tokenizer...\")\n",
 33 |                 "model_name = \"databricks/dolly-v2-3b\"\n",
 34 |                 "model = AutoModelForCausalLM.from_pretrained(\n",
 35 |                 "    model_name,\n",
 36 |                 "    use_cache=True,\n",
 37 |                 "    torch_dtype=torch.float16,\n",
 38 |                 "    attn_implementation=\"eager\",\n",
 39 |                 ").to(\"cuda:0\")\n",
 40 |                 "tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, use_cache=True)\n",
 41 |                 "print(\"Loaded model and tokenizer\")"
 42 |             ]
 43 |         },
 44 |         {
 45 |             "cell_type": "markdown",
 46 |             "metadata": {},
 47 |             "source": [
 48 |                 "# Continue"
 49 |             ]
 50 |         },
 51 |         {
 52 |             "cell_type": "code",
 53 |             "execution_count": 3,
 54 |             "metadata": {},
 55 |             "outputs": [
 56 |                 {
 57 |                     "name": "stderr",
 58 |                     "output_type": "stream",
 59 |                     "text": [
 60 |                         "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n"
 61 |                     ]
 62 |                 },
 63 |                 {
 64 |                     "name": "stdout",
 65 |                     "output_type": "stream",
 66 |                     "text": [
 67 |                         "Generating...\n"
 68 |                     ]
 69 |                 },
 70 |                 {
 71 |                     "ename": "KeyError",
 72 |                     "evalue": "'values'",
 73 |                     "output_type": "error",
 74 |                     "traceback": [
 75 |                         "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 76 |                         "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
 77 |                         "Cell \u001b[0;32mIn[3], line 47\u001b[0m\n\u001b[1;32m     38\u001b[0m builder \u001b[38;5;241m=\u001b[39m Jsonformer(\n\u001b[1;32m     39\u001b[0m     model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m     40\u001b[0m     tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     43\u001b[0m     max_string_token_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m20\u001b[39m,\n\u001b[1;32m     44\u001b[0m )\n\u001b[1;32m     46\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGenerating...\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 47\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     49\u001b[0m highlight_values(output)\n",
 78 |                         "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:439\u001b[0m, in \u001b[0;36mJsonformer.__call__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    437\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Dict[\u001b[38;5;28mstr\u001b[39m, Any]:\n\u001b[1;32m    438\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalue \u001b[38;5;241m=\u001b[39m {}\n\u001b[0;32m--> 439\u001b[0m     generated_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_object\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    440\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson_schema\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mproperties\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalue\u001b[49m\n\u001b[1;32m    441\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    442\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m generated_data\n",
 79 |                         "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:274\u001b[0m, in \u001b[0;36mJsonformer.generate_object\u001b[0;34m(self, properties, obj)\u001b[0m\n\u001b[1;32m    272\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, schema \u001b[38;5;129;01min\u001b[39;00m properties\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m    273\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[generate_object] generating value for\u001b[39m\u001b[38;5;124m\"\u001b[39m, key)\n\u001b[0;32m--> 274\u001b[0m     obj[key] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n",
 80 |                         "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:376\u001b[0m, in \u001b[0;36mJsonformer.generate_value\u001b[0;34m(self, schema, obj, key)\u001b[0m\n\u001b[1;32m    374\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    375\u001b[0m         obj\u001b[38;5;241m.\u001b[39mappend(new_obj)\n\u001b[0;32m--> 376\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_object\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mproperties\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnew_obj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    377\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m schema_type \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnull\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    378\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
 81 |                         "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:274\u001b[0m, in \u001b[0;36mJsonformer.generate_object\u001b[0;34m(self, properties, obj)\u001b[0m\n\u001b[1;32m    272\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, schema \u001b[38;5;129;01min\u001b[39;00m properties\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m    273\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[generate_object] generating value for\u001b[39m\u001b[38;5;124m\"\u001b[39m, key)\n\u001b[0;32m--> 274\u001b[0m     obj[key] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mschema\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n",
 82 |                         "File \u001b[0;32m/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/prob_jsonformer/main.py:351\u001b[0m, in \u001b[0;36mJsonformer.generate_value\u001b[0;34m(self, schema, obj, key)\u001b[0m\n\u001b[1;32m    349\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    350\u001b[0m         obj\u001b[38;5;241m.\u001b[39mappend(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgeneration_marker)\n\u001b[0;32m--> 351\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgenerate_p_enum(\u001b[43mschema\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvalues\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m, \u001b[38;5;28mround\u001b[39m\u001b[38;5;241m=\u001b[39mschema\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mround\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m3\u001b[39m))\n\u001b[1;32m    352\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m schema_type \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mp_integer\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    353\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m key:\n",
 83 |                         "\u001b[0;31mKeyError\u001b[0m: 'values'"
 84 |                     ]
 85 |                 }
 86 |             ],
 87 |             "source": [
 88 |                 "from prob_jsonformer.format import highlight_values\n",
 89 |                 "from prob_jsonformer.main import Jsonformer\n",
 90 |                 "\n",
 91 |                 "ecomm = {\n",
 92 |                 "    \"type\": \"object\",\n",
 93 |                 "    \"properties\": {\n",
 94 |                 "        \"store\": {\n",
 95 |                 "            \"type\": \"object\",\n",
 96 |                 "            \"properties\": {\n",
 97 |                 "                \"name\": {\"type\": \"string\"},\n",
 98 |                 "                \"location\": {\"type\": \"string\"},\n",
 99 |                 "                \"p_enum\": {\n",
100 |                 "                    \"type\": \"p_enum\",\n",
101 |                 "                    \"enum\": [\"ski\", \"snowboard\", \"walk\", \"pretend\"],\n",
102 |                 "                },\n",
103 |                 "                \"inventory\": {\n",
104 |                 "                    \"type\": \"array\",\n",
105 |                 "                    \"items\": {\n",
106 |                 "                        \"type\": \"object\",\n",
107 |                 "                        \"properties\": {\n",
108 |                 "                            \"productId\": {\"type\": \"string\"},\n",
109 |                 "                            \"name\": {\"type\": \"string\"},\n",
110 |                 "                            \"description\": {\"type\": \"string\"},\n",
111 |                 "                            \"category\": {\"type\": \"string\"},\n",
112 |                 "                            \"price\": {\"type\": \"number\"},\n",
113 |                 "                            \"inStock\": {\"type\": \"boolean\"},\n",
114 |                 "                            \"rating\": {\"type\": \"number\"},\n",
115 |                 "                            \"images\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}},\n",
116 |                 "                        },\n",
117 |                 "                    },\n",
118 |                 "                },\n",
119 |                 "            },\n",
120 |                 "        }\n",
121 |                 "    },\n",
122 |                 "}\n",
123 |                 "\n",
124 |                 "\n",
125 |                 "builder = Jsonformer(\n",
126 |                 "    model=model,\n",
127 |                 "    tokenizer=tokenizer,\n",
128 |                 "    json_schema=ecomm,\n",
129 |                 "    prompt=\"write a description about mike's ski shop which sells premium skis and snowboards\",\n",
130 |                 "    max_string_token_length=20,\n",
131 |                 ")\n",
132 |                 "\n",
133 |                 "print(\"Generating...\")\n",
134 |                 "output = builder()\n",
135 |                 "\n",
136 |                 "highlight_values(output)"
137 |             ]
138 |         },
139 |         {
140 |             "cell_type": "code",
141 |             "execution_count": null,
142 |             "metadata": {},
143 |             "outputs": [],
144 |             "source": [
145 |                 "car = {\n",
146 |                 "    \"type\": \"object\",\n",
147 |                 "    \"properties\": {\n",
148 |                 "        \"make\": {\"type\": \"string\"},\n",
149 |                 "        \"model\": {\"type\": \"p_enum\", \"enum\": [\"Mazda\", \"Kea\"]},\n",
150 |                 "        \"new\": {\"type\": \"p_enum\", \"enum\": [\"true\", \"false\"]},\n",
151 |                 "        \"rating\": {\"type\": \"p_enum\", \"enum\": [\"1\", \"2\", \"3\", \"4\"]},\n",
152 |                 "        \"year\": {\"type\": \"number\"},\n",
153 |                 "        \"colors_available\": {\n",
154 |                 "            \"type\": \"array\",\n",
155 |                 "            \"items\": {\"type\": \"string\"},\n",
156 |                 "        },\n",
157 |                 "    },\n",
158 |                 "}\n",
159 |                 "\n",
160 |                 "builder = Jsonformer(\n",
161 |                 "    model=model,\n",
162 |                 "    tokenizer=tokenizer,\n",
163 |                 "    json_schema=car,\n",
164 |                 "    prompt=\"generate an example car\",\n",
165 |                 ")\n",
166 |                 "\n",
167 |                 "print(\"Generating...\")\n",
168 |                 "output = builder()\n",
169 |                 "\n",
170 |                 "highlight_values(output)"
171 |             ]
172 |         },
173 |         {
174 |             "cell_type": "code",
175 |             "execution_count": null,
176 |             "metadata": {},
177 |             "outputs": [],
178 |             "source": [
179 |                 "complex_car = {\n",
180 |                 "    \"type\": \"object\",\n",
181 |                 "    \"properties\": {\n",
182 |                 "        \"car\": {\n",
183 |                 "            \"type\": \"object\",\n",
184 |                 "            \"properties\": {\n",
185 |                 "                \"make\": {\"type\": \"string\"},\n",
186 |                 "                \"model\": {\"type\": \"string\"},\n",
187 |                 "                \"year\": {\"type\": \"number\"},\n",
188 |                 "                \"colors\": {\n",
189 |                 "                    \"type\": \"p_enum\",\n",
190 |                 "                    \"enum\": [\"red\", \"green\", \"blue\", \"black\", \"white\"],\n",
191 |                 "                },\n",
192 |                 "                \"as_new\": {\"type\": \"p_enum\", \"enum\": [\"true\", \"false\"]},\n",
193 |                 "                \"rating\": {\"type\": \"p_enum\", \"enum\": [\"1\", \"2\", \"3\", \"4\"]},\n",
194 |                 "                \"features\": {\n",
195 |                 "                    \"type\": \"object\",\n",
196 |                 "                    \"properties\": {\n",
197 |                 "                        \"audio\": {\n",
198 |                 "                            \"type\": \"object\",\n",
199 |                 "                            \"properties\": {\n",
200 |                 "                                \"brand\": {\"type\": \"string\"},\n",
201 |                 "                                \"speakers\": {\"type\": \"number\"},\n",
202 |                 "                                \"hasBluetooth\": {\"type\": \"boolean\"},\n",
203 |                 "                            },\n",
204 |                 "                        },\n",
205 |                 "                        \"safety\": {\n",
206 |                 "                            \"type\": \"object\",\n",
207 |                 "                            \"properties\": {\n",
208 |                 "                                \"airbags\": {\"type\": \"number\"},\n",
209 |                 "                                \"parkingSensors\": {\"type\": \"boolean\"},\n",
210 |                 "                                \"laneAssist\": {\"type\": \"boolean\"},\n",
211 |                 "                            },\n",
212 |                 "                        },\n",
213 |                 "                        \"performance\": {\n",
214 |                 "                            \"type\": \"object\",\n",
215 |                 "                            \"properties\": {\n",
216 |                 "                                \"engine\": {\"type\": \"string\"},\n",
217 |                 "                                \"horsepower\": {\"type\": \"number\"},\n",
218 |                 "                                \"topSpeed\": {\"type\": \"number\"},\n",
219 |                 "                            },\n",
220 |                 "                        },\n",
221 |                 "                    },\n",
222 |                 "                },\n",
223 |                 "            },\n",
224 |                 "        },\n",
225 |                 "        \"owner\": {\n",
226 |                 "            \"type\": \"object\",\n",
227 |                 "            \"properties\": {\n",
228 |                 "                \"firstName\": {\"type\": \"string\"},\n",
229 |                 "                \"lastName\": {\"type\": \"string\"},\n",
230 |                 "                \"age\": {\"type\": \"number\"},\n",
231 |                 "            },\n",
232 |                 "        },\n",
233 |                 "    },\n",
234 |                 "}\n",
235 |                 "builder = Jsonformer(\n",
236 |                 "    model=model,\n",
237 |                 "    tokenizer=tokenizer,\n",
238 |                 "    json_schema=complex_car,\n",
239 |                 "    prompt=\"generate an example Rolls Royce Phantom\",\n",
240 |                 ")\n",
241 |                 "\n",
242 |                 "print(\"Generating...\")\n",
243 |                 "output = builder()\n",
244 |                 "\n",
245 |                 "highlight_values(output)"
246 |             ]
247 |         },
248 |         {
249 |             "cell_type": "markdown",
250 |             "metadata": {},
251 |             "source": [
252 |                 "## Readme example"
253 |             ]
254 |         },
255 |         {
256 |             "cell_type": "code",
257 |             "execution_count": null,
258 |             "metadata": {},
259 |             "outputs": [],
260 |             "source": [
261 |                 "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
262 |                 "\n",
263 |                 "model_name = \"databricks/dolly-v2-3b\"\n",
264 |                 "model = AutoModelForCausalLM.from_pretrained(model_name)\n",
265 |                 "tokenizer = AutoTokenizer.from_pretrained(model_name)"
266 |             ]
267 |         },
268 |         {
269 |             "cell_type": "code",
270 |             "execution_count": null,
271 |             "metadata": {},
272 |             "outputs": [
273 |                 {
274 |                     "data": {
275 |                         "text/plain": [
276 |                             "{'age_probs': [{'prob': 0.62353515625, 'choice': '10'},\n",
277 |                             "  {'prob': 0.349609375, 'choice': '12'},\n",
278 |                             "  {'prob': 0.01123809814453125, 'choice': '11'},\n",
279 |                             "  {'prob': 0.00760650634765625, 'choice': '16'},\n",
280 |                             "  {'prob': 0.0025482177734375, 'choice': '13'},\n",
281 |                             "  {'prob': 0.0025081634521484375, 'choice': '15'},\n",
282 |                             "  {'prob': 0.0018062591552734375, 'choice': '14'},\n",
283 |                             "  {'prob': 0.00104522705078125, 'choice': '18'},\n",
284 |                             "  {'prob': 0.00011551380157470703, 'choice': '17'},\n",
285 |                             "  {'prob': 5.042552947998047e-05, 'choice': '19'}],\n",
286 |                             " 'age_wmean': 15.544570922851562,\n",
287 |                             " 'is_student_probs': [{'prob': 0.962890625, 'choice': 'true'},\n",
288 |                             "  {'prob': 0.037322998046875, 'choice': 'false'}],\n",
289 |                             " 'is_student': False,\n",
290 |                             " 'name': 'John',\n",
291 |                             " 'age': 17,\n",
292 |                             " 'unit_time': 0.5,\n",
293 |                             " 'courses': ['C++'],\n",
294 |                             " 'trim': None,\n",
295 |                             " 'color': 'green'}"
296 |                         ]
297 |                     },
298 |                     "execution_count": 9,
299 |                     "metadata": {},
300 |                     "output_type": "execute_result"
301 |                 }
302 |             ],
303 |             "source": [
304 |                 "from prob_jsonformer import Jsonformer\n",
305 |                 "\n",
306 |                 "json_schema = {\n",
307 |                 "    \"type\": \"object\",\n",
308 |                 "    \"properties\": {\n",
309 |                 "        # we can return the probability of each choice, even if they are multiple tokens\n",
310 |                 "        \"age_probs\": {\"type\": \"p_enum\", \"values\": [str(s) for s in range(10, 20)]},\n",
311 |                 "        # we can return the probabilistic weighted mean of a range\n",
312 |                 "        \"age_wmean\": {\"type\": \"p_integer\", \"minimum\": 10, \"maximum\": 20},\n",
313 |                 "        # the prob of true and false\n",
314 |                 "        \"is_student_probs\": {\"type\": \"p_enum\", \"values\": [\"true\", \"false\"]},\n",
315 |                 "        \"is_student\": {\"type\": \"boolean\"},\n",
316 |                 "        # we've merged patches for enum, integer, null, union - currently mising from jsonformer\n",
317 |                 "        \"name\": {\"type\": \"string\", \"maxLength\": 4},\n",
318 |                 "        \"age\": {\"type\": \"integer\"},\n",
319 |                 "        \"unit_time\": {\"type\": \"number\"},\n",
320 |                 "        \"courses\": {\"type\": \"array\", \"items\": {\"type\": \"string\"}},\n",
321 |                 "        \"trim\": {\"type\": [\"string\", \"null\"]},\n",
322 |                 "        \"color\": {\n",
323 |                 "            \"type\": \"enum\",\n",
324 |                 "            \"values\": [\"red\", \"green\", \"blue\", \"brown\", \"white\", \"black\"],\n",
325 |                 "        },\n",
326 |                 "    },\n",
327 |                 "}\n",
328 |                 "prompt = \"Generate a young person's information based on the following schema:\"\n",
329 |                 "jsonformer = Jsonformer(model, tokenizer, json_schema, prompt)\n",
330 |                 "generated_data = jsonformer()\n",
331 |                 "\n",
332 |                 "generated_data"
333 |             ]
334 |         },
335 |         {
336 |             "cell_type": "code",
337 |             "execution_count": null,
338 |             "metadata": {},
339 |             "outputs": [],
340 |             "source": []
341 |         }
342 |     ],
343 |     "metadata": {
344 |         "kernelspec": {
345 |             "display_name": "pytorch_hf_env",
346 |             "language": "python",
347 |             "name": "pytorch_hf_env"
348 |         },
349 |         "language_info": {
350 |             "codemirror_mode": {
351 |                 "name": "ipython",
352 |                 "version": 3
353 |             },
354 |             "file_extension": ".py",
355 |             "mimetype": "text/x-python",
356 |             "name": "python",
357 |             "nbconvert_exporter": "python",
358 |             "pygments_lexer": "ipython3",
359 |             "version": "3.10.16"
360 |         },
361 |         "orig_nbformat": 4
362 |     },
363 |     "nbformat": 4,
364 |     "nbformat_minor": 2
365 | }
366 | 


--------------------------------------------------------------------------------
/img/cover2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wassname/prob_jsonformer/b639079d045ab174398762e3b1e1fdca1d8d30ef/img/cover2.png


--------------------------------------------------------------------------------
/img/cover4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wassname/prob_jsonformer/b639079d045ab174398762e3b1e1fdca1d8d30ef/img/cover4.png


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | Copyright (c) 2018 Rahul Sengottuvelu
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in all
10 | copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17 | SOFTWARE.


--------------------------------------------------------------------------------
/poetry.toml:
--------------------------------------------------------------------------------
1 | [virtualenvs]
2 | in-project = true
3 | 


--------------------------------------------------------------------------------
/prob_dist.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# autoreload your package\n",
 10 |     "%load_ext autoreload\n",
 11 |     "%autoreload 2"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stderr",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
 24 |       "  from .autonotebook import tqdm as notebook_tqdm\n"
 25 |      ]
 26 |     },
 27 |     {
 28 |      "name": "stdout",
 29 |      "output_type": "stream",
 30 |      "text": [
 31 |       "Loading model and tokenizer...\n"
 32 |      ]
 33 |     },
 34 |     {
 35 |      "name": "stderr",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "/media/wassname/SGIronWolf/projects5/2024/prob_jsonformer/.venv/lib/python3.9/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
 39 |       "  warnings.warn(\n",
 40 |       "Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.42it/s]\n",
 41 |       "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
 42 |      ]
 43 |     },
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "Loaded model and tokenizer\n"
 49 |      ]
 50 |     }
 51 |    ],
 52 |    "source": [
 53 |     "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
 54 |     "import torch\n",
 55 |     "\n",
 56 |     "from prob_jsonformer import Jsonformer\n",
 57 |     "\n",
 58 |     "print(\"Loading model and tokenizer...\")\n",
 59 |     "model_name = \"databricks/dolly-v2-3b\"\n",
 60 |     "model_name = \"NousResearch/Meta-Llama-3-8B-Instruct\".lower()\n",
 61 |     "# model_name = \"failspy/Llama-3-8B-Instruct-abliterated\"\n",
 62 |     "# model_name = \"cognitivecomputations/Llama-3-8B-Instruct-abliterated-v2\"\n",
 63 |     "# model_name = \"nvidia/Llama3-ChatQA-1.5-8B\" # 4b\n",
 64 |     "# model_name = \"CohereForAI/c4ai-command-r-v01-4bit\" # 35b/4 = 8.75b\n",
 65 |     "model = AutoModelForCausalLM.from_pretrained(\n",
 66 |     "    model_name,\n",
 67 |     "    use_cache=True,\n",
 68 |     "    torch_dtype=torch.float16,\n",
 69 |     "    attn_implementation=\"eager\",\n",
 70 |     ").to(\"cuda:0\")\n",
 71 |     "tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, use_cache=True)\n",
 72 |     "tokenizer.padding_side = \"left\"\n",
 73 |     "print(\"Loaded model and tokenizer\")"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 3,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "if tokenizer.pad_token_id is None:\n",
 83 |     "    tokenizer.pad_token_id = tokenizer.bos_token_id"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "## Prob dist\n",
 91 |     "\n",
 92 |     "- Can LLM's sample from a distribution? http://people.csail.mit.edu/renda/llm-sampling-paper\n",
 93 |     "- Can they forecast events https://arxiv.org/abs/2402.07862"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 4,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "from prob_jsonformer.prob_choice_tree import prob_choice_tree\n",
103 |     "import pandas as pd\n",
104 |     "import torch.nn.functional as F\n",
105 |     "from tqdm.auto import tqdm"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 171,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "def method0(choices, n=400):\n",
115 |     "    \"\"\"\n",
116 |     "    just generate many times\n",
117 |     "    \"\"\"\n",
118 |     "\n",
119 |     "    toks = tokenizer.encode(prompt, return_tensors=\"pt\").to(model.device)\n",
120 |     "    data = []\n",
121 |     "    i = 0\n",
122 |     "    while i<n*10 and len(data) < n:\n",
123 |     "        i += 1\n",
124 |     "        o = model.generate(\n",
125 |     "            toks,\n",
126 |     "            do_sample=True,\n",
127 |     "            max_new_tokens=2,\n",
128 |     "            temperature=1,\n",
129 |     "            pad_token_id=tokenizer.pad_token_id\n",
130 |     "        )\n",
131 |     "        o = tokenizer.decode(o[0, -2:], skip_special_tokens=True)\n",
132 |     "        try:\n",
133 |     "            o = int(float(o))\n",
134 |     "        except ValueError:\n",
135 |     "            print(f\"failed `float({o})`\")\n",
136 |     "            continue\n",
137 |     "        if str(o) in choices:\n",
138 |     "            data.append(o)\n",
139 |     "        else:\n",
140 |     "            print(f\"Generated `{o}` not in choices {choices}\")\n",
141 |     "    d = pd.Series(data).value_counts(normalize=True).to_dict()\n",
142 |     "\n",
143 |     "    df = pd.DataFrame([d]).T\n",
144 |     "    df.index = df.index.astype(int)\n",
145 |     "    df = df.sort_index()\n",
146 |     "    return df\n",
147 |     "\n",
148 |     "def method1(choices):\n",
149 |     "    \"\"\"\n",
150 |     "    Here we try each completion to get the hindsight probability of each choice\n",
151 |     "\n",
152 |     "    from https://github.com/1rgs/jsonformer/pull/19\n",
153 |     "    \"\"\"\n",
154 |     "    data = {}\n",
155 |     "    for choice in choices:\n",
156 |     "        n_option_tokens = tokenizer.encode(f'\"{choice}', add_special_tokens=False, return_tensors=\"pt\").shape[1]\n",
157 |     "        toks = tokenizer.encode(prompt + f\"{choice}\", return_tensors=\"pt\").to(model.device)\n",
158 |     "        option_tokens = toks[0, -n_option_tokens:]\n",
159 |     "        o = model.forward(\n",
160 |     "            toks,\n",
161 |     "        )\n",
162 |     "        logits = o.logits[0, -n_option_tokens-1:]\n",
163 |     "\n",
164 |     "        probabilities = torch.softmax(logits, dim=1)\n",
165 |     "        option_token_probabilities = probabilities[:-1][torch.arange(probabilities.shape[0]-1), option_tokens]\n",
166 |     "        option_probability = torch.prod(option_token_probabilities)\n",
167 |     "        data[choice] = option_probability.item()\n",
168 |     "\n",
169 |     "    df = pd.DataFrame([data]).T\n",
170 |     "    df.index = df.index.astype(int)\n",
171 |     "    df.iloc[:, 0] = df.iloc[:, 0] / df.iloc[:, 0].sum()\n",
172 |     "    df = df.sort_index()\n",
173 |     "    return df\n",
174 |     "\n",
175 |     "\n",
176 |     "def method2(choices):\n",
177 |     "    \"\"\"\n",
178 |     "    Useing logit constraints\n",
179 |     "\n",
180 |     "    https://github.com/1rgs/jsonformer/pull/41\n",
181 |     "    \"\"\"\n",
182 |     "    # TODO\n",
183 |     "\n",
184 |     "def method3(choices):\n",
185 |     "    \"\"\"using my prob choice tree.\"\"\"\n",
186 |     "    input_ids = tokenizer.encode(prompt, return_tensors=\"pt\").to(\n",
187 |     "        model.device\n",
188 |     "    )[0]\n",
189 |     "    choice_tokens = tokenizer(choices).input_ids\n",
190 |     "    choice_tokens = [torch.tensor(c) for c in choice_tokens]\n",
191 |     "    r = prob_choice_tree(model, tokenizer, input_ids, choice_tokens)\n",
192 |     "\n",
193 |     "    df = pd.DataFrame(r).set_index('choice')\n",
194 |     "    df['prob'] = df['prob'].astype(float)\n",
195 |     "    df.index = df.index.astype(int)\n",
196 |     "    df.iloc[:, 0] = df.iloc[:, 0] / df.iloc[:, 0].sum()\n",
197 |     "    df = df.sort_index()\n",
198 |     "    return df\n",
199 |     "\n",
200 |     "\n",
201 |     "\n",
202 |     "prompt = \"\"\"Q: Please sample a number from the distribution [0, 20]: \n",
203 |     "A: \"\"\"\n",
204 |     "\n",
205 |     "choices = [str(i) for i in range(21)]\n",
206 |     "\n",
207 |     "ideal_dist = pd.Series({str(i): 1/21 for i in range(21)})\n",
208 |     "ideal_dist.index = ideal_dist.index.astype(int)\n",
209 |     "# ideal_dist"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 161,
215 |    "metadata": {},
216 |    "outputs": [
217 |     {
218 |      "data": {
219 |       "text/plain": [
220 |        "0    1.0\n",
221 |        "dtype: float64"
222 |       ]
223 |      },
224 |      "execution_count": 161,
225 |      "metadata": {},
226 |      "output_type": "execute_result"
227 |     }
228 |    ],
229 |    "source": []
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": 139,
234 |    "metadata": {},
235 |    "outputs": [
236 |     {
237 |      "data": {
238 |       "text/html": [
239 |        "<div>\n",
240 |        "<style scoped>\n",
241 |        "    .dataframe tbody tr th:only-of-type {\n",
242 |        "        vertical-align: middle;\n",
243 |        "    }\n",
244 |        "\n",
245 |        "    .dataframe tbody tr th {\n",
246 |        "        vertical-align: top;\n",
247 |        "    }\n",
248 |        "\n",
249 |        "    .dataframe thead th {\n",
250 |        "        text-align: right;\n",
251 |        "    }\n",
252 |        "</style>\n",
253 |        "<table border=\"1\" class=\"dataframe\">\n",
254 |        "  <thead>\n",
255 |        "    <tr style=\"text-align: right;\">\n",
256 |        "      <th></th>\n",
257 |        "      <th>0</th>\n",
258 |        "    </tr>\n",
259 |        "  </thead>\n",
260 |        "  <tbody>\n",
261 |        "    <tr>\n",
262 |        "      <th>4</th>\n",
263 |        "      <td>0.073333</td>\n",
264 |        "    </tr>\n",
265 |        "    <tr>\n",
266 |        "      <th>5</th>\n",
267 |        "      <td>0.100000</td>\n",
268 |        "    </tr>\n",
269 |        "    <tr>\n",
270 |        "      <th>7</th>\n",
271 |        "      <td>0.100000</td>\n",
272 |        "    </tr>\n",
273 |        "    <tr>\n",
274 |        "      <th>8</th>\n",
275 |        "      <td>0.080000</td>\n",
276 |        "    </tr>\n",
277 |        "    <tr>\n",
278 |        "      <th>10</th>\n",
279 |        "      <td>0.046667</td>\n",
280 |        "    </tr>\n",
281 |        "    <tr>\n",
282 |        "      <th>11</th>\n",
283 |        "      <td>0.046667</td>\n",
284 |        "    </tr>\n",
285 |        "    <tr>\n",
286 |        "      <th>12</th>\n",
287 |        "      <td>0.125556</td>\n",
288 |        "    </tr>\n",
289 |        "    <tr>\n",
290 |        "      <th>13</th>\n",
291 |        "      <td>0.034444</td>\n",
292 |        "    </tr>\n",
293 |        "    <tr>\n",
294 |        "      <th>14</th>\n",
295 |        "      <td>0.367778</td>\n",
296 |        "    </tr>\n",
297 |        "    <tr>\n",
298 |        "      <th>15</th>\n",
299 |        "      <td>0.025556</td>\n",
300 |        "    </tr>\n",
301 |        "  </tbody>\n",
302 |        "</table>\n",
303 |        "</div>"
304 |       ],
305 |       "text/plain": [
306 |        "           0\n",
307 |        "4   0.073333\n",
308 |        "5   0.100000\n",
309 |        "7   0.100000\n",
310 |        "8   0.080000\n",
311 |        "10  0.046667\n",
312 |        "11  0.046667\n",
313 |        "12  0.125556\n",
314 |        "13  0.034444\n",
315 |        "14  0.367778\n",
316 |        "15  0.025556"
317 |       ]
318 |      },
319 |      "execution_count": 139,
320 |      "metadata": {},
321 |      "output_type": "execute_result"
322 |     }
323 |    ],
324 |    "source": [
325 |     "t0 = pd.Timestamp.now()\n",
326 |     "r0 = method0(choices, n=900)\n",
327 |     "t0 = pd.Timestamp.now() - t0\n",
328 |     "r0"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "code",
333 |    "execution_count": 163,
334 |    "metadata": {},
335 |    "outputs": [],
336 |    "source": [
337 |     "t1 = pd.Timestamp.now()\n",
338 |     "r1 = method1(choices)\n",
339 |     "t1 = pd.Timestamp.now() - t1\n"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": 164,
345 |    "metadata": {},
346 |    "outputs": [],
347 |    "source": [
348 |     "t3 = pd.Timestamp.now()\n",
349 |     "r3 = method3(choices)\n",
350 |     "t3 = pd.Timestamp.now() - t3"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": 172,
356 |    "metadata": {},
357 |    "outputs": [
358 |     {
359 |      "data": {
360 |       "text/html": [
361 |        "<div>\n",
362 |        "<style scoped>\n",
363 |        "    .dataframe tbody tr th:only-of-type {\n",
364 |        "        vertical-align: middle;\n",
365 |        "    }\n",
366 |        "\n",
367 |        "    .dataframe tbody tr th {\n",
368 |        "        vertical-align: top;\n",
369 |        "    }\n",
370 |        "\n",
371 |        "    .dataframe thead th {\n",
372 |        "        text-align: right;\n",
373 |        "    }\n",
374 |        "</style>\n",
375 |        "<table border=\"1\" class=\"dataframe\">\n",
376 |        "  <thead>\n",
377 |        "    <tr style=\"text-align: right;\">\n",
378 |        "      <th></th>\n",
379 |        "      <th>method0: sampling</th>\n",
380 |        "      <th>method1: hindsight</th>\n",
381 |        "      <th>method3: generation tree</th>\n",
382 |        "      <th>ideal</th>\n",
383 |        "    </tr>\n",
384 |        "  </thead>\n",
385 |        "  <tbody>\n",
386 |        "    <tr>\n",
387 |        "      <th>0</th>\n",
388 |        "      <td>0.000000</td>\n",
389 |        "      <td>0.000642</td>\n",
390 |        "      <td>0.0365</td>\n",
391 |        "      <td>0.047619</td>\n",
392 |        "    </tr>\n",
393 |        "    <tr>\n",
394 |        "      <th>1</th>\n",
395 |        "      <td>0.000000</td>\n",
396 |        "      <td>0.002812</td>\n",
397 |        "      <td>0.0667</td>\n",
398 |        "      <td>0.047619</td>\n",
399 |        "    </tr>\n",
400 |        "    <tr>\n",
401 |        "      <th>2</th>\n",
402 |        "      <td>0.000000</td>\n",
403 |        "      <td>0.001010</td>\n",
404 |        "      <td>0.0710</td>\n",
405 |        "      <td>0.047619</td>\n",
406 |        "    </tr>\n",
407 |        "    <tr>\n",
408 |        "      <th>3</th>\n",
409 |        "      <td>0.000000</td>\n",
410 |        "      <td>0.009076</td>\n",
411 |        "      <td>0.0653</td>\n",
412 |        "      <td>0.047619</td>\n",
413 |        "    </tr>\n",
414 |        "    <tr>\n",
415 |        "      <th>4</th>\n",
416 |        "      <td>0.073333</td>\n",
417 |        "      <td>0.028844</td>\n",
418 |        "      <td>0.0786</td>\n",
419 |        "      <td>0.047619</td>\n",
420 |        "    </tr>\n",
421 |        "    <tr>\n",
422 |        "      <th>5</th>\n",
423 |        "      <td>0.100000</td>\n",
424 |        "      <td>0.043982</td>\n",
425 |        "      <td>0.0999</td>\n",
426 |        "      <td>0.047619</td>\n",
427 |        "    </tr>\n",
428 |        "    <tr>\n",
429 |        "      <th>6</th>\n",
430 |        "      <td>0.000000</td>\n",
431 |        "      <td>0.013840</td>\n",
432 |        "      <td>0.0662</td>\n",
433 |        "      <td>0.047619</td>\n",
434 |        "    </tr>\n",
435 |        "    <tr>\n",
436 |        "      <th>7</th>\n",
437 |        "      <td>0.100000</td>\n",
438 |        "      <td>0.063993</td>\n",
439 |        "      <td>0.0858</td>\n",
440 |        "      <td>0.047619</td>\n",
441 |        "    </tr>\n",
442 |        "    <tr>\n",
443 |        "      <th>8</th>\n",
444 |        "      <td>0.080000</td>\n",
445 |        "      <td>0.059184</td>\n",
446 |        "      <td>0.0682</td>\n",
447 |        "      <td>0.047619</td>\n",
448 |        "    </tr>\n",
449 |        "    <tr>\n",
450 |        "      <th>9</th>\n",
451 |        "      <td>0.000000</td>\n",
452 |        "      <td>0.012213</td>\n",
453 |        "      <td>0.0745</td>\n",
454 |        "      <td>0.047619</td>\n",
455 |        "    </tr>\n",
456 |        "    <tr>\n",
457 |        "      <th>10</th>\n",
458 |        "      <td>0.046667</td>\n",
459 |        "      <td>0.037620</td>\n",
460 |        "      <td>0.0327</td>\n",
461 |        "      <td>0.047619</td>\n",
462 |        "    </tr>\n",
463 |        "    <tr>\n",
464 |        "      <th>11</th>\n",
465 |        "      <td>0.046667</td>\n",
466 |        "      <td>0.045378</td>\n",
467 |        "      <td>0.0187</td>\n",
468 |        "      <td>0.047619</td>\n",
469 |        "    </tr>\n",
470 |        "    <tr>\n",
471 |        "      <th>12</th>\n",
472 |        "      <td>0.125556</td>\n",
473 |        "      <td>0.119555</td>\n",
474 |        "      <td>0.0307</td>\n",
475 |        "      <td>0.047619</td>\n",
476 |        "    </tr>\n",
477 |        "    <tr>\n",
478 |        "      <th>13</th>\n",
479 |        "      <td>0.034444</td>\n",
480 |        "      <td>0.031679</td>\n",
481 |        "      <td>0.0279</td>\n",
482 |        "      <td>0.047619</td>\n",
483 |        "    </tr>\n",
484 |        "    <tr>\n",
485 |        "      <th>14</th>\n",
486 |        "      <td>0.367778</td>\n",
487 |        "      <td>0.423860</td>\n",
488 |        "      <td>0.0274</td>\n",
489 |        "      <td>0.047619</td>\n",
490 |        "    </tr>\n",
491 |        "    <tr>\n",
492 |        "      <th>15</th>\n",
493 |        "      <td>0.025556</td>\n",
494 |        "      <td>0.042629</td>\n",
495 |        "      <td>0.0312</td>\n",
496 |        "      <td>0.047619</td>\n",
497 |        "    </tr>\n",
498 |        "    <tr>\n",
499 |        "      <th>16</th>\n",
500 |        "      <td>0.000000</td>\n",
501 |        "      <td>0.010285</td>\n",
502 |        "      <td>0.0344</td>\n",
503 |        "      <td>0.047619</td>\n",
504 |        "    </tr>\n",
505 |        "    <tr>\n",
506 |        "      <th>17</th>\n",
507 |        "      <td>0.000000</td>\n",
508 |        "      <td>0.034793</td>\n",
509 |        "      <td>0.0235</td>\n",
510 |        "      <td>0.047619</td>\n",
511 |        "    </tr>\n",
512 |        "    <tr>\n",
513 |        "      <th>18</th>\n",
514 |        "      <td>0.000000</td>\n",
515 |        "      <td>0.017495</td>\n",
516 |        "      <td>0.0225</td>\n",
517 |        "      <td>0.047619</td>\n",
518 |        "    </tr>\n",
519 |        "    <tr>\n",
520 |        "      <th>19</th>\n",
521 |        "      <td>0.000000</td>\n",
522 |        "      <td>0.001101</td>\n",
523 |        "      <td>0.0109</td>\n",
524 |        "      <td>0.047619</td>\n",
525 |        "    </tr>\n",
526 |        "    <tr>\n",
527 |        "      <th>20</th>\n",
528 |        "      <td>0.000000</td>\n",
529 |        "      <td>0.000010</td>\n",
530 |        "      <td>0.0277</td>\n",
531 |        "      <td>0.047619</td>\n",
532 |        "    </tr>\n",
533 |        "  </tbody>\n",
534 |        "</table>\n",
535 |        "</div>"
536 |       ],
537 |       "text/plain": [
538 |        "    method0: sampling  method1: hindsight  method3: generation tree     ideal\n",
539 |        "0            0.000000            0.000642                    0.0365  0.047619\n",
540 |        "1            0.000000            0.002812                    0.0667  0.047619\n",
541 |        "2            0.000000            0.001010                    0.0710  0.047619\n",
542 |        "3            0.000000            0.009076                    0.0653  0.047619\n",
543 |        "4            0.073333            0.028844                    0.0786  0.047619\n",
544 |        "5            0.100000            0.043982                    0.0999  0.047619\n",
545 |        "6            0.000000            0.013840                    0.0662  0.047619\n",
546 |        "7            0.100000            0.063993                    0.0858  0.047619\n",
547 |        "8            0.080000            0.059184                    0.0682  0.047619\n",
548 |        "9            0.000000            0.012213                    0.0745  0.047619\n",
549 |        "10           0.046667            0.037620                    0.0327  0.047619\n",
550 |        "11           0.046667            0.045378                    0.0187  0.047619\n",
551 |        "12           0.125556            0.119555                    0.0307  0.047619\n",
552 |        "13           0.034444            0.031679                    0.0279  0.047619\n",
553 |        "14           0.367778            0.423860                    0.0274  0.047619\n",
554 |        "15           0.025556            0.042629                    0.0312  0.047619\n",
555 |        "16           0.000000            0.010285                    0.0344  0.047619\n",
556 |        "17           0.000000            0.034793                    0.0235  0.047619\n",
557 |        "18           0.000000            0.017495                    0.0225  0.047619\n",
558 |        "19           0.000000            0.001101                    0.0109  0.047619\n",
559 |        "20           0.000000            0.000010                    0.0277  0.047619"
560 |       ]
561 |      },
562 |      "execution_count": 172,
563 |      "metadata": {},
564 |      "output_type": "execute_result"
565 |     },
566 |     {
567 |      "data": {
568 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAGhCAYAAABCse9yAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAABWQElEQVR4nO3de1wU5f4H8M+yXJaLXATlYuiiqEiiKAiBppabYGai5S2PIHlLxeTgvRQ0L6gRoemB8uT1VFrnqF1MTBEsDUExtfKGJkEpePsJggoKz+8PX4yugLILyICf9+s1L9l5nvnOM7PCfnZmdlYhhBAgIiIikjGD+h4AERER0eMwsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsGdb3AGpDWVkZLly4gCZNmkChUNT3cIiIiKgahBC4ceMGnJycYGDw6GMojSKwXLhwAc7OzvU9DCIiItJDTk4OnnnmmUf2aRSBpUmTJgDubbClpWU9j4aIiIiqo6CgAM7OztLr+KM0isBSfhrI0tKSgYWIiKiBqc7lHLzoloiIiGSPgYWIiIhkj4GFiIiIZK9RXMNSXaWlpbhz5059D4PoqWZkZASlUlnfwyCiBuapCCxCCOTm5uL69ev1PRQiAmBtbQ0HBwfeN4mIqu2pCCzlYaV58+YwMzPjH0mieiKEwM2bN3Hp0iUAgKOjYz2PiIgaikYfWEpLS6WwYmtrW9/DIXrqmZqaAgAuXbqE5s2b8/QQEVVLo7/otvyaFTMzs3oeCRGVK/995DVlRFRdjT6wlONpICL54O8jEenqqQksRERE1HAxsBAAICUlBQqFok4+SaVQKLB9+/ZarytHD+/H9evXw9raul7HRETUGDT6i24fRT17xxNdX9bS/k90fVXp3bs3PD09ERcXVy/rv3btGqZMmYJvv/0WBgYGeO2117BixQpYWFjUy3jq0rBhw/Dyyy/X9zCIiBo8HmGhJ27kyJH4/fffsXv3bnz33Xf48ccfMX78+PoeVp0wNTVF8+bN63sYREQNHgOLjPXu3RtTpkxBeHg4bGxsYG9vjzVr1qCoqAihoaFo0qQJXF1dsXPnTq3lfvvtN/Tr1w8WFhawt7fHqFGjcOXKFQDA6NGjsW/fPqxYsQIKhQIKhQJZWVnSshkZGfD29oaZmRn8/f1x+vRprdrx8fFo06YNjI2N0b59e2zatEmrPTMzEz179oRKpYK7uzt2796t1X7y5EkkJibi3//+N3x9fdGjRw989NFH2Lx5My5cuFDtffPnn39iwIABsLGxgbm5OZ599ll8//33AO59lH3MmDFwcXGBqakp2rdvjxUrVmgtP3r0aAQFBWHJkiWwt7eHtbU13nvvPdy9exczZsxA06ZN8cwzz2DdunXSMllZWVAoFNi8eTP8/f2hUqnQsWNH7Nu3r8pxPnxKaP78+fD09MSmTZugVqthZWWF4cOH48aNG1KfGzduYOTIkTA3N4ejoyM+/PBD9O7dG+Hh4dXeP0REjQ0Di8xt2LABdnZ2SE9Px5QpUzBx4kQMGTIE/v7+OHLkCPr27YtRo0bh5s2bAIDr16/jxRdfRJcuXXD48GEkJiYiLy8PQ4cOBQCsWLECfn5+GDduHC5evIiLFy/C2dlZWt+7776LDz74AIcPH4ahoSHefPNNqW3btm2YOnUqpk2bht9++w0TJkxAaGgokpOTAQBlZWUYPHgwjI2NkZaWhoSEBMyaNUtre1JTU2FtbQ1vb29pnkajgYGBAdLS0qR5CoUC69evr3K/TJ48GcXFxfjxxx/x66+/YtmyZdIppbKyMjzzzDP46quvcOLECURGRuKdd97Bl19+qVVj7969uHDhAn788UfExsYiKioKr7zyCmxsbJCWloa33noLEyZMwF9//aW13IwZMzBt2jT88ssv8PPzw4ABA3D16tXHPpflzp07h+3bt+O7777Dd999h3379mHp0qVSe0REBA4cOIBvvvkGu3fvxk8//YQjR45Uuz4RUWPEwCJznTt3xty5c9G2bVvMmTMHKpUKdnZ2GDduHNq2bYvIyEhcvXoVx48fBwCsWrUKXbp0wZIlS+Dm5oYuXbpg7dq1SE5OxpkzZ2BlZQVjY2OYmZnBwcEBDg4OWjfuWrx4MXr16gV3d3fMnj0bP//8M27fvg0AiImJwejRozFp0iS0a9cOERERGDx4MGJiYgAAe/bswalTp7Bx40Z07twZPXv2xJIlS7S2Jzc3t8IpEkNDQzRt2hS5ubnSvPbt28PKyqrK/ZKdnY3u3bvDw8MDrVu3xiuvvIKePXsCuPddNQsWLIC3tzdcXFwwcuRIhIaGVggsTZs2xcqVK9G+fXu8+eabaN++PW7evIl33nlH2t/GxsbYv3+/1nJhYWF47bXX0KFDB8THx8PKygqffvpptZ5P4F6gWr9+PTp27Ijnn38eo0aNQlJSEoB7R1c2bNiAmJgY9OnTBx07dsS6detQWlpa7fpENTbf6v5EJBMMLDLXqVMn6WelUglbW1t4eHhI8+zt7QFAutX5sWPHkJycDAsLC2lyc3MDcO+dvS7rK79tenntkydPonv37lr9u3fvjpMnT0rtzs7OcHJyktr9/Pyqv7EPOHXqFAYNGlRl+9tvv41Fixahe/fuiIqKkgJbudWrV8PLywvNmjWDhYUFPvnkE2RnZ2v1efbZZ2FgcP9XwN7eXmvflu/v8u2vbJsMDQ3h7e0t7YPqUKvVaNKkifTY0dFRWscff/yBO3fuwMfHR2q3srJC+/btq12fiKgxYmCROSMjI63HCoVCa175DbjKysoAAIWFhRgwYACOHj2qNZVfW6LL+h6uXRscHBwqBIC7d+/i2rVrcHBwqHadsWPH4o8//sCoUaPw66+/wtvbGx999BEAYPPmzZg+fTrGjBmDH374AUePHkVoaChKSkq0ajxu35bPq83tr2q9tb0OIqLGhoGlkenatSt+//13qNVquLq6ak3m5uYAAGNjY71OMXTo0AEHDhzQmnfgwAG4u7tL7Tk5Obh48aLUfvDgQa3+fn5+uH79OjIyMqR5e/fuRVlZGXx9fXUaj7OzM9566y1s3boV06ZNw5o1a6Qx+fv7Y9KkSejSpQtcXV2rdXSpuh7cprt37yIjIwMdOnSoldqtW7eGkZERDh06JM3Lz8/HmTNnaqU+EVFDxcDSyEyePBnXrl3DiBEjcOjQIZw7dw67du1CaGioFFLUajXS0tKQlZWFK1euVPvd/YwZM7B+/XrEx8cjMzMTsbGx2Lp1K6ZPnw7g3sWz7dq1Q0hICI4dO4affvoJ7777rlaNDh06IDAwEOPGjUN6ejoOHDiAsLAwDB8+XOtUkpubG7Zt21blWMLDw7Fr1y6cP38eR44cQXJyshQa2rZti8OHD2PXrl04c+YM5s2bpxUAamr16tXYtm0bTp06hcmTJ+P//u//tC5OrokmTZogJCQEM2bMQHJyMn7//XeMGTMGBgYGvJ09ET3Vnuobx8nlRm61ycnJCQcOHMCsWbPQt29fFBcXo1WrVggMDJSu15g+fTpCQkLg7u6OW7du4fz589WqHRQUhBUrViAmJgZTp06Fi4sL1q1bh969ewMADAwMsG3bNowZMwY+Pj5Qq9VYuXIlAgMDtep89tlnCAsLQ58+faQbx61cuVKrz+nTp5Gfn1/lWEpLSzF58mT89ddfsLS0RGBgID788EMAwIQJE/DLL79g2LBhUCgUGDFiBCZNmlTh49/6Wrp0KZYuXYqjR4/C1dUV33zzDezs7GqlNgDExsbirbfewiuvvAJLS0vMnDkTOTk5UKlUtbYOIqKGRiGEEPU9iJoqKCiAlZUV8vPzYWlpqdV2+/ZtnD9/Hi4uLvyDTzWSlZUFFxcX/PLLL/D09Hxi6y0qKkKLFi3wwQcfYMyYMU9svXWJv5cy9+Cng+ZX/caBqKYe9fr9sKf6CAuRHP3yyy84deoUfHx8kJ+fj/feew8AMHDgwHoeGRFR/WFgIZKhmJgYnD59GsbGxvDy8sJPP/1Uq6ediIgaGgYWompSq9V4EmdQu3TpovUpKiIi4qeEiIiIqAFgYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZ0yuwrF69Gmq1GiqVCr6+vkhPT6/Wcps3b4ZCoUBQUJDWfCEEIiMj4ejoCFNTU2g0GmRmZuozNNJTSkoKFAoFrl+/Xuu1FQoFtm/fXqs1qzPe+fPn18oN3tRqNeLi4mq9L3DvZnQKhQJHjx7Va2xERE8LnQPLli1bEBERgaioKBw5cgSdO3dGQEBAhW/gfVhWVhamT5+O559/vkLb8uXLsXLlSiQkJCAtLQ3m5uYICAjA7du3dR0eVUPv3r0RHh5eb+tfvHgx/P39YWZmBmtr6zpbz/Tp05GUlFRn9Stz6NAhjB8/vlZrrl+/vk73ExFRQ6DzfVhiY2Mxbtw4hIaGAgASEhKwY8cOrF27FrNnz650mdLSUowcORILFizATz/9pPWuWAiBuLg4zJ07V7qT58aNG2Fvb4/t27dj+PDhemxWNT14++kngbe4BgCUlJRgyJAh8PPzw6efflpn67GwsICFhUWd1a9Ms2bNnuj6iIieFjodYSkpKUFGRgY0Gs39AgYG0Gg0SE1NrXK59957D82bN6/0e1DOnz+P3NxcrZpWVlbw9fWtsmZxcTEKCgq0psaod+/emDJlCsLDw2FjYwN7e3usWbMGRUVFCA0NRZMmTeDq6lrhS/1+++039OvXDxYWFrC3t8eoUaNw5coVAMDo0aOxb98+rFixAgqFAgqFAllZWdKyGRkZ8Pb2hpmZGfz9/XH69Gmt2vHx8WjTpg2MjY3Rvn17bNq0Sas9MzMTPXv2hEqlgru7O3bv3l1huxYsWIB//vOf8PDwqPE+etR4Hz4lNHr0aAQFBSEmJgaOjo6wtbXF5MmTcefOHanPpUuXMGDAAJiamsLFxQWfffaZ1vqEEJg/fz5atmwJExMTODk54e2335baHz4ldOrUKfTo0UPaH3v27Kn0FNkff/yBF154AWZmZujcubP0fz8lJQWhoaHIz8+Xnq/58+fXeL8RETU0OgWWK1euoLS0FPb29lrz7e3tkZubW+ky+/fvx6effoo1a9ZU2l6+nC41o6OjYWVlJU3Ozs66bEaDsmHDBtjZ2SE9PR1TpkzBxIkTMWTIEPj7++PIkSPo27cvRo0ahZs3bwIArl+/jhdffBFdunTB4cOHkZiYiLy8PAwdOhQAsGLFCvj5+WHcuHG4ePEiLl68qLX/3n33XXzwwQc4fPgwDA0N8eabb0pt27Ztw9SpUzFt2jT89ttvmDBhAkJDQ5GcnAwAKCsrw+DBg2FsbIy0tDQkJCRg1qxZem23Wq2u1gvzo8ZbmeTkZJw7dw7JycnYsGED1q9fj/Xr10vto0ePRk5ODpKTk/Hf//4X//rXv7ROd/7vf//Dhx9+iI8//hiZmZnYvn17lcGrtLQUQUFBMDMzQ1paGj755BO8++67VW7H9OnTcfToUbRr1w4jRozA3bt34e/vj7i4OFhaWkrP1/Tp0x+7X4iIGps6vTX/jRs3MGrUKKxZs6ZWvwdlzpw5iIiIkB4XFBQ02tDSuXNnzJ07F8C97V66dCns7Owwbtw4AEBkZCTi4+Nx/PhxPPfcc1i1ahW6dOmCJUuWSDXWrl0LZ2dnnDlzBu3atYOxsTHMzMzg4OBQYX2LFy9Gr169AACzZ89G//79cfv2bahUKsTExGD06NGYNGkSACAiIgIHDx5ETEwMXnjhBezZswenTp3Crl274OTkBABYsmQJ+vXrp/N2t2nTplr/Zx413srY2Nhg1apVUCqVcHNzQ//+/ZGUlIRx48bhzJkz2LlzJ9LT09GtWzcAwKeffooOHTpIy2dnZ8PBwQEajQZGRkZo2bIlfHx8Kl3X7t27ce7cOaSkpEj7evHixXjppZcq9J0+fTr69+8P4N4RqGeffRZnz56Fm5sbrKysoFAoKn2+iIieFjodYbGzs4NSqUReXp7W/Ly8vEr/mJ47dw5ZWVkYMGAADA0NYWhoiI0bN+Kbb76BoaEhzp07Jy1X3ZoAYGJiAktLS62pserUqZP0s1KphK2trdY7+vIjU+VHAY4dO4bk5GTp+g0LCwu4ubkBuPd86LI+R0dHrdonT55E9+7dtfp3794dJ0+elNqdnZ2lsAIAfn5+1d/YByQlJSEsLKxG463Ms88+C6VSqbXMg9tnaGgILy8vqd3NzU3rgtchQ4bg1q1baN26NcaNG4dt27bh7t27la7r9OnTcHZ21vp/XFW40XU7iIieNjoFlvJvjn3wkxdlZWVISkqq9IXJzc0Nv/76K44ePSpNr776Kl544QUcPXoUzs7OcHFxgYODg1bNgoICpKWl6f1i15gYGRlpPVYoFFrzFAoFgHvPAwAUFhZiwIABWvv86NGj0rUluqzv4dpypOt4K9ufumyfs7MzTp8+jX/9618wNTXFpEmT0LNnT63rYPTR0PY7EdGTpvMpoYiICISEhMDb2xs+Pj6Ii4uTLgIFgODgYLRo0QLR0dFQqVTo2LGj1vLl71YfnB8eHo5Fixahbdu2cHFxwbx58+Dk5FThfi30eF27dsX//vc/qNVqGBpW/vQaGxujtLRU59odOnTAgQMHEBISIs07cOAA3N3dpfacnBxcvHhROkpw8OBBPbaifri5ueHu3bvIyMiQTgmdPn26wr1eTE1NMWDAAAwYMACTJ0+WgnnXrl21+rVv3x45OTnIy8uTjoQdOnRI53Hp+3wRETUmOgeWYcOG4fLly4iMjERubi48PT2RmJgo/UHOzs6GgYFut3eZOXMmioqKMH78eFy/fh09evRAYmJildchUNUmT56MNWvWYMSIEZg5cyaaNm2Ks2fPYvPmzfj3v/8NpVIJtVqNtLQ0ZGVlwcLCAk2bNq1W7RkzZmDo0KHo0qULNBoNvv32W2zduhV79uwBAGg0GrRr1w4hISF4//33UVBQUOlFptnZ2bh27Rqys7NRWloq3TTN1dVV+hhynz59MGjQoGqdFqot7du3R2BgICZMmID4+HgYGhoiPDwcpqamUp/169ejtLQUvr6+MDMzw3/+8x+YmpqiVatWFeq99NJLaNOmDUJCQrB8+XLcuHFDuh6p/ChKdajVahQWFiIpKQmdO3eGmZkZzMzMar7BREQNiF53ug0LC8Off/6J4uJipKWlwdfXV2pLSUnR+tTFw9avX1/hI50KhQLvvfcecnNzcfv2bezZswft2rXTZ2hPPScnJxw4cAClpaXo27cvPDw8EB4eDmtraylITp8+HUqlEu7u7mjWrBmys7OrVTsoKAgrVqxATEwMnn32WXz88cdYt24devfuDeDeR9y3bduGW7duwcfHB2PHjsXixYsr1ImMjESXLl0QFRWFwsJCdOnSRfpUU7lz585JH8V+ktatWwcnJyf06tULgwcPxvjx49G8eXOp3draGmvWrEH37t3RqVMn7NmzB99++y1sbW0r1FIqldi+fTsKCwvRrVs3jB07VgpwuoRxf39/vPXWWxg2bBiaNWuG5cuX13xDiYgaGIUQQtT3IGqqoKAAVlZWyM/Pr3AB7u3bt3H+/Hm4uLjwiA3VuwMHDqBHjx44e/Ys2rRpU9/DqTf8vZS5B2+qyRteUh161Ov3w+r0Y81ET7tt27bBwsICbdu2xdmzZzF16lR07979qQ4rRET6YGAhqkM3btzArFmzkJ2dDTs7O2g0GnzwwQf1PSwiogaHgYWoDgUHByM4OLi+h0FE1ODpddEtERER0ZPEwEJERESyx8BCREREssfAQkRERLLHwEJERESyx8BCREREssfAQgDufaWCQqGo8EV/tUGhUFT4OgaqmlqtRlxcXH0Pg4hIVhhYnkK9e/dGeHh4va3/1VdfRcuWLaFSqeDo6IhRo0bhwoUL9Tae+rJ+/Xrp28sfdOjQIYwfP75e1k1EJFdP9Y3jPDZ4PNH1/Rry6xNdn1y98MILeOedd+Do6Ii///4b06dPx+uvv46ff/65vodWK0pKSmBsbKz38s2aNavF0dRMTbeFiKi28AiLjPXu3RtTpkxBeHg4bGxsYG9vjzVr1qCoqAihoaFo0qQJXF1dsXPnTq3lfvvtN/Tr1w8WFhawt7fHqFGjpG8+Hj16NPbt24cVK1ZAoVBAoVAgKytLWjYjIwPe3t4wMzODv78/Tp8+rVU7Pj4ebdq0gbGxMdq3b49NmzZptWdmZqJnz55QqVRwd3fH7t27K2zXP//5Tzz33HNo1aoV/P39MXv2bBw8eBB37tzRaf+sWbMGzs7OMDMzw6BBgxAbG1vhqMHXX3+Nrl27QqVSoXXr1liwYAHu3r0rtSsUCvz73//GoEGDYGZmhrZt2+Kbb76p9v4E7j1PYWFhCA8Ph52dHQICAgAAsbGx8PDwgLm5OZydnTFp0iQUFhYCuHcKLjQ0FPn5+dLzMH/+fAAVTwllZ2dj4MCBsLCwgKWlJYYOHYq8vDypff78+fD09MSmTZugVqthZWWF4cOH48aNG5Xut8ete+HChQgODoalpaV0pGf//v14/vnnYWpqCmdnZ7z99tsoKiqSahYXF2P69Olo0aIFzM3N4evri5SUlMc/iURE1cTAInMbNmyAnZ0d0tPTMWXKFEycOBFDhgyBv78/jhw5gr59+2LUqFG4efMmAOD69et48cUX0aVLFxw+fBiJiYnIy8vD0KFDAQArVqyAn58fxo0bh4sXL+LixYtwdnaW1vfuu+/igw8+wOHDh2FoaIg333xTatu2bRumTp2KadOm4bfffsOECRMQGhqK5ORkAEBZWRkGDx4MY2NjpKWlISEhAbNmzXrk9l27dg2fffYZ/P39YWRkJM1XKBRYv359lcsdOHAAb731FqZOnYqjR4/ipZdewuLFi7X6/PTTTwgODsbUqVNx4sQJfPzxx1i/fn2FfgsWLMDQoUNx/PhxvPzyyxg5ciSuXbtWrf354PNkbGyMAwcOICEhAQBgYGCAlStX4vfff8eGDRuwd+9ezJw5EwDg7++PuLg4WFpaSs/D9OnTK2xnWVkZBg4ciGvXrmHfvn3YvXs3/vjjDwwbNkyr37lz57B9+3Z89913+O6777Bv3z4sXbq00n33uHXHxMSgc+fO+OWXXzBv3jycO3cOgYGBeO2113D8+HFs2bIF+/fvR1hYmLRMWFgYUlNTsXnzZhw/fhxDhgxBYGAgMjMzq3wOiYh0IhqB/Px8AUDk5+dXaLt165Y4ceKEuHXrVoW2jus7PtFJV7169RI9evSQHt+9e1eYm5uLUaNGSfMuXrwoAIjU1FQhhBALFy4Uffv21aqTk5MjAIjTp09LdadOnarVJzk5WQAQe/bskebt2LFDAJD2nb+/vxg3bpzWckOGDBEvv/yyEEKIXbt2CUNDQ/H3339L7Tt37hQAxLZt27SWmzlzpjAzMxMAxHPPPSeuXLmi1d6+fXuxdevWKvfNsGHDRP/+/bXmjRw5UlhZWUmP+/TpI5YsWaLVZ9OmTcLR0VF6DEDMnTtXelxYWCgAiJ07dwohqr8/u3TpUuVYy3311VfC1tZWerxu3Tqt8ZZr1aqV+PDDD4UQQvzwww9CqVSK7Oxsqf33338XAER6eroQQoioqChhZmYmCgoKpD4zZswQvr6+VY7lUesOCgrSmjdmzBgxfvx4rXk//fSTMDAwELdu3RJ//vmnUCqVWs+7EPf2/5w5cypd/6N+L0kGoizvT0R16FGv3w/jERaZ69Spk/SzUqmEra0tPDzuX3tjb28PALh06RIA4NixY0hOToaFhYU0ubm5Abj3LlyX9Tk6OmrVPnnyJLp3767Vv3v37jh58qTU7uzsDCcnJ6ndz8+v0vXMmDEDv/zyC3744QcolUoEBwdDCCG1nzp1CoMGDapynKdPn4aPj4/WvIcfHzt2DO+9957Wvig/slR+ROrhbTY3N4elpaXO+9PLy6vCGPfs2YM+ffqgRYsWaNKkCUaNGoWrV69qrftxyvfpg0fB3N3dYW1tLe134N6pnCZNmkiPHR0dpW3Qlbe3t9bjY8eOYf369Vr7ICAgAGVlZTh//jx+/fVXlJaWol27dlp99u3bV63/c0RE1fFUX3TbEDx4mgS4d6rk4VMnwL1TBwBQWFiIAQMGYNmyZRVqlQeQ6q7v4dq1yc7ODnZ2dmjXrh06dOgAZ2dnHDx4sMqAo4/CwkIsWLAAgwcPrtCmUqmknyvbx7ruT3Nzc622rKwsvPLKK5g4cSIWL16Mpk2bYv/+/RgzZgxKSkpgZmZWo2172KO2QVcPb0thYSEmTJiAt99+u0Lfli1b4vjx41AqlcjIyIBSqdRqt7Cw0GsMREQPY2BpZLp27Yr//e9/UKvVMDSs/Ok1NjZGaWmpzrU7dOiAAwcOICQkRJp34MABuLu7S+05OTm4ePGi9GJ+8ODBx9Ytf2EtLi6u9ljat2+PQ4cOac17+HHXrl1x+vRpuLq6Vrvuw6qzPyuTkZGBsrIyfPDBBzAwuHcg88svv9TqU53noXyf5uTkSEdZTpw4gevXr0v7XR+6/B/o2rUrTpw4UeV+7NKlC0pLS3Hp0iU8//zzeo+JiOhReEqokZk8eTKuXbuGESNG4NChQzh37hx27dqF0NBQ6QVKrVYjLS0NWVlZuHLlSrXfic+YMQPr169HfHw8MjMzERsbi61bt0oXbGo0GrRr1w4hISE4duwYfvrpJ7z77rtaNdLS0rBq1SocPXoUf/75J/bu3YsRI0agTZs2WkdX3NzcsG3btirHMmXKFHz//feIjY1FZmYmPv74Y+zcuVM6KgQAkZGR2LhxIxYsWIDff/8dJ0+exObNmzF37txa3Z+VcXV1xZ07d/DRRx/hjz/+wKZNm6SLccup1WoUFhYiKSkJV65cqfRUkUajgYeHB0aOHIkjR44gPT0dwcHB6NWrV4VTN7qozrrLzZo1Cz///DPCwsJw9OhRZGZm4uuvv5Yuum3Xrh1GjhyJ4OBgbN26FefPn0d6ejqio6OxY8cOvcdIRPQgBpZGxsnJCQcOHEBpaSn69u0LDw8PhIeHw9raWnqnP336dCiVSri7u6NZs2bIzs6uVu2goCCsWLECMTExePbZZ/Hxxx9j3bp16N27N4B7n4rZtm0bbt26BR8fH4wdO7bCJ3LMzMywdetW9OnTB+3bt8eYMWPQqVMn7Nu3DyYmJlK/06dPIz8/v8qxdO/eHQkJCYiNjUXnzp2RmJiIf/7zn1qnegICAvDdd9/hhx9+QLdu3fDcc8/hww8/RKtWraq7O6u1PyvTuXNnxMbGYtmyZejYsSM+++wzREdHa/Xx9/fHW2+9hWHDhqFZs2ZYvnx5hToKhQJff/01bGxs0LNnT2g0GrRu3Rpbtmyp9jZUpjrrLlf+/Jw5cwbPP/88unTpgsjISK1rldatW4fg4GBMmzYN7du3R1BQEA4dOoSWLVvWaJz05Khn75AmIjlSiAevdGygCgoKYGVlhfz8fFhaWmq13b59G+fPn4eLi4vWixk1PuPGjcOpU6fw008/1fdQ6DH4eyk/DwaVLNUb9xvmV/3GgaimHvX6/TBew0INVkxMDF566SWYm5tj586d2LBhA/71r3/V97CIiKgOMLBQg5Weno7ly5fjxo0baN26NVauXImxY8fW97CIiKgOMLBQg/Xwp26IiKjx4kW3REREJHsMLERERCR7DCxEREQkewwsREREJHsMLERERCR7egWW1atXQ61WQ6VSwdfXF+np6VX23bp1K7y9vWFtbQ1zc3N4enpi06ZNWn1Gjx4NhUKhNQUGBuozNCIiImqEdA4sW7ZsQUREBKKionDkyBF07twZAQEBVX6VfdOmTfHuu+8iNTUVx48fR2hoKEJDQ7Fr1y6tfoGBgbh48aI0ffHFF/ptUSPSu3dvhIeHV9muVqsRFxdXq+usi5pEREQ1pfN9WGJjYzFu3DiEhoYCABISErBjxw6sXbsWs2fPrtC//Htmyk2dOhUbNmzA/v37ERAQIM03MTGBg4ODrsNp1LZu3QojI6P6HgYREVG90ymwlJSUICMjA3PmzJHmGRgYQKPRIDU19bHLCyGwd+9enD59GsuWLdNqS0lJQfPmzWFjY4MXX3wRixYtgq2trS7D09lJtw51Wv9hHU6d1Kl/06ZN62gkREREDYtOp4SuXLmC0tJS2Nvba823t7dHbm5ulcvl5+fDwsICxsbG6N+/Pz766CO89NJLUntgYCA2btyIpKQkLFu2DPv27UO/fv1QWlpaab3i4mIUFBRoTY3Rg6eELl26hAEDBsDU1BQuLi747LPPKvS/fv06xo4di2bNmsHS0hIvvvgijh07JrWfO3cOAwcOhL29PSwsLNCtWzfs2bPnSW0OERGR3p7IrfmbNGmCo0ePorCwEElJSYiIiEDr1q2l00XDhw+X+np4eKBTp05o06YNUlJS0KdPnwr1oqOjsWDBgicxdNkYPXo0Lly4gOTkZBgZGeHtt9+ucN3QkCFDYGpqip07d8LKygoff/wx+vTpgzNnzqBp06YoLCzEyy+/jMWLF8PExAQbN27EgAEDcPr0abRs2bKetoyIiOjxdDrCYmdnB6VSiby8PK35eXl5j7z+xMDAAK6urvD09MS0adPw+uuvIzo6usr+rVu3hp2dHc6ePVtp+5w5c5Cfny9NOTk5umxGg3PmzBns3LkTa9aswXPPPQcvLy98+umnuHXrltRn//79SE9Px1dffQVvb2+0bdsWMTExsLa2xn//+18AQOfOnTFhwgR07NgRbdu2xcKFC9GmTRt888039bVpRERE1aJTYDE2NoaXlxeSkpKkeWVlZUhKSoKfn1+165SVlaG4uLjK9r/++gtXr16Fo6Njpe0mJiawtLTUmhqzkydPwtDQEF5eXtI8Nzc3WFtbS4+PHTuGwsJC2NrawsLCQprOnz+Pc+fOAQAKCwsxffp0dOjQAdbW1rCwsMDJkyeRnZ39pDeJiIhIJzqfEoqIiEBISAi8vb3h4+ODuLg4FBUVSZ8aCg4ORosWLaQjKNHR0fD29kabNm1QXFyM77//Hps2bUJ8fDyAey+iCxYswGuvvQYHBwecO3cOM2fOhKurq9aniOjRCgsL4ejoiJSUlApt5cFm+vTp2L17N2JiYuDq6gpTU1O8/vrrKCkpebKDJSIi0pHOgWXYsGG4fPkyIiMjkZubC09PTyQmJkoX4mZnZ8PA4P6Bm6KiIkyaNAl//fUXTE1N4ebmhv/85z8YNmwYAECpVOL48ePYsGEDrl+/DicnJ/Tt2xcLFy6EiYlJLW1mw+bm5oa7d+8iIyMD3bp1AwCcPn0a169fl/p07doVubm5MDQ0hFqtrrTOgQMHMHr0aAwaNAjAvZCTlZVVx6MnIiKqOb0uug0LC0NYWFilbQ+/w1+0aBEWLVpUZS1TU9MKN5Ejbe3bt0dgYCAmTJiA+Ph4GBoaIjw8HKamplIfjUYDPz8/BAUFYfny5WjXrh0uXLiAHTt2YNCgQdJ1LVu3bsWAAQOgUCgwb948lJWV1eOWERERVQ+/S6iBWLduHZycnNCrVy8MHjwY48ePR/PmzaV2hUKB77//Hj179kRoaCjatWuH4cOH488//5SOfsXGxsLGxgb+/v4YMGAAAgIC0LVr1/raJCIiompTCCFEfQ+ipgoKCmBlZYX8/PwKF+Devn0b58+fh4uLC1QqVT2NkIgexN9L+VHP3iH9nKV6437D/Px6GA09LR71+v0wHmEhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItl7agJLI/gwFFGjwd9HItJVow8sRkZGAICbN2/W80iIqFz572P57ycR0ePodafbhkSpVMLa2hqXLl0CAJiZmUGhUNTzqIieTkII3Lx5E5cuXYK1tTWUSmV9D4mIGohGH1gAwMHBAQCk0EJE9cva2lr6vSQiqo6nIrAoFAo4OjqiefPmuHPnTn0Ph+ipZmRkxCMrRKSzpyKwlFMqlfxDSURE1AA1+otuiYiIqOFjYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2dMrsKxevRpqtRoqlQq+vr5IT0+vsu/WrVvh7e0Na2trmJubw9PTE5s2bdLqI4RAZGQkHB0dYWpqCo1Gg8zMTH2GRkRERI2QzoFly5YtiIiIQFRUFI4cOYLOnTsjICAAly5dqrR/06ZN8e677yI1NRXHjx9HaGgoQkNDsWvXLqnP8uXLsXLlSiQkJCAtLQ3m5uYICAjA7du39d8yIiIiajQUQgihywK+vr7o1q0bVq1aBQAoKyuDs7MzpkyZgtmzZ1erRteuXdG/f38sXLgQQgg4OTlh2rRpmD59OgAgPz8f9vb2WL9+PYYPH/7YegUFBbCyskJ+fj4sLS112RwiIgKgnr1D+jlL9cb9hvn59TAaelro8vqt0xGWkpISZGRkQKPR3C9gYACNRoPU1NTHLi+EQFJSEk6fPo2ePXsCAM6fP4/c3FytmlZWVvD19a2yZnFxMQoKCrQmIiIiarx0CixXrlxBaWkp7O3ttebb29sjNze3yuXy8/NhYWEBY2Nj9O/fHx999BFeeuklAJCW06VmdHQ0rKyspMnZ2VmXzSAiIqIG5ol8SqhJkyY4evQoDh06hMWLFyMiIgIpKSl615szZw7y8/OlKScnp/YGS0RERLJjqEtnOzs7KJVK5OXlac3Py8uDg4NDlcsZGBjA1dUVAODp6YmTJ08iOjoavXv3lpbLy8uDo6OjVk1PT89K65mYmMDExESXoRMREVEDptMRFmNjY3h5eSEpKUmaV1ZWhqSkJPj5+VW7TllZGYqLiwEALi4ucHBw0KpZUFCAtLQ0nWoSERFR46XTERYAiIiIQEhICLy9veHj44O4uDgUFRUhNDQUABAcHIwWLVogOjoawL3rTby9vdGmTRsUFxfj+++/x6ZNmxAfHw8AUCgUCA8Px6JFi9C2bVu4uLhg3rx5cHJyQlBQUO1tKRERETVYOgeWYcOG4fLly4iMjERubi48PT2RmJgoXTSbnZ0NA4P7B26KioowadIk/PXXXzA1NYWbmxv+85//YNiwYVKfmTNnoqioCOPHj8f169fRo0cPJCYmQqVS1cImEhERUUOn831Y5Ij3YSEiqhneh4XqQ53dh4WIiIioPjCwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHsMbAQERGR7DGwEBERkewxsBAREZHs6RVYVq9eDbVaDZVKBV9fX6Snp1fZd82aNXj++edhY2MDGxsbaDSaCv1Hjx4NhUKhNQUGBuozNCIiImqEdA4sW7ZsQUREBKKionDkyBF07twZAQEBuHTpUqX9U1JSMGLECCQnJyM1NRXOzs7o27cv/v77b61+gYGBuHjxojR98cUX+m0RERERNTo6B5bY2FiMGzcOoaGhcHd3R0JCAszMzLB27dpK+3/22WeYNGkSPD094ebmhn//+98oKytDUlKSVj8TExM4ODhIk42NjX5bRERERI2OToGlpKQEGRkZ0Gg09wsYGECj0SA1NbVaNW7evIk7d+6gadOmWvNTUlLQvHlztG/fHhMnTsTVq1d1GRoRERE1Yoa6dL5y5QpKS0thb2+vNd/e3h6nTp2qVo1Zs2bByclJK/QEBgZi8ODBcHFxwblz5/DOO++gX79+SE1NhVKprFCjuLgYxcXF0uOCggJdNoOIiIgaGJ0CS00tXboUmzdvRkpKClQqlTR/+PDh0s8eHh7o1KkT2rRpg5SUFPTp06dCnejoaCxYsOCJjJmIiIjqn06nhOzs7KBUKpGXl6c1Py8vDw4ODo9cNiYmBkuXLsUPP/yATp06PbJv69atYWdnh7Nnz1baPmfOHOTn50tTTk6OLptBREREDYxOgcXY2BheXl5aF8yWX0Dr5+dX5XLLly/HwoULkZiYCG9v78eu56+//sLVq1fh6OhYabuJiQksLS21JiIiImq8dP6UUEREBNasWYMNGzbg5MmTmDhxIoqKihAaGgoACA4Oxpw5c6T+y5Ytw7x587B27Vqo1Wrk5uYiNzcXhYWFAIDCwkLMmDEDBw8eRFZWFpKSkjBw4EC4uroiICCgljaTiIiIGjKdr2EZNmwYLl++jMjISOTm5sLT0xOJiYnShbjZ2dkwMLifg+Lj41FSUoLXX39dq05UVBTmz58PpVKJ48ePY8OGDbh+/TqcnJzQt29fLFy4ECYmJjXcPCIiImoMFEIIUd+DqKmCggJYWVkhPz+fp4eIiPSgnr1D+jlL9cb9hvn59TAaelro8vrN7xIiIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2dMrsKxevRpqtRoqlQq+vr5IT0+vsu+aNWvw/PPPw8bGBjY2NtBoNBX6CyEQGRkJR0dHmJqaQqPRIDMzU5+hERERUSOkc2DZsmULIiIiEBUVhSNHjqBz584ICAjApUuXKu2fkpKCESNGIDk5GampqXB2dkbfvn3x999/S32WL1+OlStXIiEhAWlpaTA3N0dAQABu376t/5YRERFRo6EQQghdFvD19UW3bt2watUqAEBZWRmcnZ0xZcoUzJ49+7HLl5aWwsbGBqtWrUJwcDCEEHBycsK0adMwffp0AEB+fj7s7e2xfv16DB8+/LE1CwoKYGVlhfz8fFhaWuqyOUREBEA9e4f0c5bqjfsN8/PrYTT0tNDl9VunIywlJSXIyMiARqO5X8DAABqNBqmpqdWqcfPmTdy5cwdNmzYFAJw/fx65ublaNa2srODr61tlzeLiYhQUFGhNRERE1HjpFFiuXLmC0tJS2Nvba823t7dHbm5utWrMmjULTk5OUkApX06XmtHR0bCyspImZ2dnXTaDiIiIGpgn+imhpUuXYvPmzdi2bRtUKpXedebMmYP8/HxpysnJqcVREhERkdwY6tLZzs4OSqUSeXl5WvPz8vLg4ODwyGVjYmKwdOlS7NmzB506dZLmly+Xl5cHR0dHrZqenp6V1jIxMYGJiYkuQyciIqIGTKcjLMbGxvDy8kJSUpI0r6ysDElJSfDz86tyueXLl2PhwoVITEyEt7e3VpuLiwscHBy0ahYUFCAtLe2RNYmIiOjpodMRFgCIiIhASEgIvL294ePjg7i4OBQVFSE0NBQAEBwcjBYtWiA6OhoAsGzZMkRGRuLzzz+HWq2WrkuxsLCAhYUFFAoFwsPDsWjRIrRt2xYuLi6YN28enJycEBQUVHtbSkRERA2WzoFl2LBhuHz5MiIjI5GbmwtPT08kJiZKF81mZ2fDwOD+gZv4+HiUlJTg9ddf16oTFRWF+fPnAwBmzpyJoqIijB8/HtevX0ePHj2QmJhYo+tciIiIqPHQ+T4scsT7sBAR1Qzvw0L1oc7uw0JERERUHxhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9hhYiIiISPYYWIiIiEj2GFiIiIhI9vQKLKtXr4ZarYZKpYKvry/S09Or7Pv777/jtddeg1qthkKhQFxcXIU+8+fPh0Kh0Jrc3Nz0GRoRERE1QjoHli1btiAiIgJRUVE4cuQIOnfujICAAFy6dKnS/jdv3kTr1q2xdOlSODg4VFn32WefxcWLF6Vp//79ug6NiIiIGimdA0tsbCzGjRuH0NBQuLu7IyEhAWZmZli7dm2l/bt164b3338fw4cPh4mJSZV1DQ0N4eDgIE12dna6Do2IiIgaKZ0CS0lJCTIyMqDRaO4XMDCARqNBampqjQaSmZkJJycntG7dGiNHjkR2dnaVfYuLi1FQUKA1ERERUeOlU2C5cuUKSktLYW9vrzXf3t4eubm5eg/C19cX69evR2JiIuLj43H+/Hk8//zzuHHjRqX9o6OjYWVlJU3Ozs56r5uIiIjkTxafEurXrx+GDBmCTp06ISAgAN9//z2uX7+OL7/8stL+c+bMQX5+vjTl5OQ84RETERHRk2SoS2c7OzsolUrk5eVpzc/Ly3vkBbW6sra2Rrt27XD27NlK201MTB55PQwRERE1LjodYTE2NoaXlxeSkpKkeWVlZUhKSoKfn1+tDaqwsBDnzp2Do6NjrdUkIiKihkunIywAEBERgZCQEHh7e8PHxwdxcXEoKipCaGgoACA4OBgtWrRAdHQ0gHsX6p44cUL6+e+//8bRo0dhYWEBV1dXAMD06dMxYMAAtGrVChcuXEBUVBSUSiVGjBhRW9tJREREDZjOgWXYsGG4fPkyIiMjkZubC09PTyQmJkoX4mZnZ8PA4P6BmwsXLqBLly7S45iYGMTExKBXr15ISUkBAPz1118YMWIErl69imbNmqFHjx44ePAgmjVrVsPNIyIiosZAIYQQ9T2ImiooKICVlRXy8/NhaWlZ38MhImpw1LN3SD9nqd643zA/vx5GQ08LXV6/ZfEpISIiIqJHYWAhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZY2AhIiIi2WNgISIiItljYCEiIiLZM6zvARARNRZa33i8tH89joSo8eERFiIiIpI9BhYiIiKSPQYWIiIikj0GFiIiIpI9XnRLRFQX5ls98HN+/Y2DqJHgERYiIiKSPQYWIiIikj0GFiIiIpI9BhYiIiKSPQYWIiIikj0GFiIiIpI9BhYiIiKSPQYWIiIikj0GFiIiIpI9BhYiIiKSPQYWIiIikj29Asvq1auhVquhUqng6+uL9PT0Kvv+/vvveO2116BWq6FQKBAXF1fjmkRERPR00TmwbNmyBREREYiKisKRI0fQuXNnBAQE4NKlS5X2v3nzJlq3bo2lS5fCwcGhVmoSERHR00XnwBIbG4tx48YhNDQU7u7uSEhIgJmZGdauXVtp/27duuH999/H8OHDYWJiUis1iYiI6OmiU2ApKSlBRkYGNBrN/QIGBtBoNEhNTdVrAPrULC4uRkFBgdZEREREjZehLp2vXLmC0tJS2Nvba823t7fHqVOn9BqAPjWjo6OxYMECvdZHjYN69g7p56yl/etxJPpp6OMnInrSGuSnhObMmYP8/HxpysnJqe8hkUx4bPCQJiIiajx0OsJiZ2cHpVKJvLw8rfl5eXlVXlBbFzVNTEyqvB6GiIiIGh+djrAYGxvDy8sLSUlJ0ryysjIkJSXBz89PrwHURU0iIiJqXHQ6wgIAERERCAkJgbe3N3x8fBAXF4eioiKEhoYCAIKDg9GiRQtER0cDuHdR7YkTJ6Sf//77bxw9ehQWFhZwdXWtVk0iIiJ6uukcWIYNG4bLly8jMjISubm58PT0RGJionTRbHZ2NgwM7h+4uXDhArp06SI9jomJQUxMDHr16oWUlJRq1SQiIqKnm86BBQDCwsIQFhZWaVt5CCmnVqshhKhRTSIiInq6NchPCREREdHThYGFiIiIZI+BhYiIiGSPgYWIiIhkj4GFiIiIZE+vTwkRUd148CsFfg35tR5HQkQkLzzCQkRERLLHwEJERESyx8BCREREssfAQkRERLLHi26JGhn17B3Sz1lL+9fjSIiIag8DCz1x/CQMERHpiqeEiIiISPYYWIiIiEj2GFiIiIhI9ngNCxHVCl6bRER1iYGF6gw/rUJERLWFp4SIiIhI9hhYGiiPDR5ah+CJiIgaMwYWIiIikj1ew0L0lOBFsUTUkDGw1BG+OBAREdUeBhaqgGGratw3RET1g4GFqL7Nt7r/s0vL+hsHEZGM8aJbIiIikj0GFiIiIpI9BhYiIiKSPV7DQtSY8foYImokeISFiIiIZI+BhYiIiGSPgYWIiIhkT6/Asnr1aqjVaqhUKvj6+iI9Pf2R/b/66iu4ublBpVLBw8MD33//vVb76NGjoVAotKbAwEB9hkZERESNkM6BZcuWLYiIiEBUVBSOHDmCzp07IyAgAJcuXaq0/88//4wRI0ZgzJgx+OWXXxAUFISgoCD89ttvWv0CAwNx8eJFafriiy/02yIiIiJqdHQOLLGxsRg3bhxCQ0Ph7u6OhIQEmJmZYe3atZX2X7FiBQIDAzFjxgx06NABCxcuRNeuXbFq1SqtfiYmJnBwcJAmGxsb/baIiIiIGh2dAktJSQkyMjKg0WjuFzAwgEajQWpqaqXLpKamavUHgICAgAr9U1JS0Lx5c7Rv3x4TJ07E1atXqxxHcXExCgoKtCYiIiJqvHQKLFeuXEFpaSns7e215tvb2yM3N7fSZXJzcx/bPzAwEBs3bkRSUhKWLVuGffv2oV+/figtLa20ZnR0NKysrKTJ2dlZl80gIiKiBkYWN44bPny49LOHhwc6deqENm3aICUlBX369KnQf86cOYiIiJAeFxQUMLQQERE1YjoFFjs7OyiVSuTl5WnNz8vLg4ODQ6XLODg46NQfAFq3bg07OzucPXu20sBiYmICExMTXYb+SCfdOkg/dzh1stbqPon6DXnsdV2/IY+9rutz7I2zfkMe+4P1G/LY66J+Qx57bdbX6ZSQsbExvLy8kJSUJM0rKytDUlIS/Pz8Kl3Gz89Pqz8A7N69u8r+APDXX3/h6tWrcHR01GV4RNQIqGfvkKaGVJuI6pbOp4QiIiIQEhICb29v+Pj4IC4uDkVFRQgNDQUABAcHo0WLFoiOjgYATJ06Fb169cIHH3yA/v37Y/PmzTh8+DA++eQTAEBhYSEWLFiA1157DQ4ODjh37hxmzpwJV1dXBAQE1OKmEhE94MHvWZqfX3/jIKJq0TmwDBs2DJcvX0ZkZCRyc3Ph6emJxMRE6cLa7OxsGBjcP3Dj7++Pzz//HHPnzsU777yDtm3bYvv27ejYsSMAQKlU4vjx49iwYQOuX78OJycn9O3bFwsXLqzV0z7UiPEL/oiIGj29LroNCwtDWFhYpW0pKSkV5g0ZMgRDhgyptL+pqSl27dqlzzCIiKgh4tEt0gO/S4iIiIhkj4GFiIiIZI+BhYiIiGSPgYWI5Gu+lfb1DkT01JLFnW7rg8cGD+nnL+txHERERPR4PMJCREREsvfUHmEhImpIHrw7b9bS/vU4koajTo6k8yPZ9YaBhYhkj6dwiYiBhYiooeG7fHoKMbAQEdUxHiGqGvcNVRcvuiUiIiLZ4xEWejL4BYVEdYJHKOhpwcBCRERVYiAifdTF/xsGFiIiqnNaH8tW1eNAqMHiNSxEREQkezzCQkREpAeeLnuyGFiIiIhkhmGoIp4SIiIiItljYCEiIiLZ4ykhInrq8fA7kfwxsBAREckBb7D5SDwlRERERLLHwEJERESyx1NCRI/Dw7RERPWOgYWI9McwR0RPCAMLERHRU6YhfjKOgYWIiBoPHvVrtBhY6B7+khMRkYzxU0JEREQkewwsREREJHt6BZbVq1dDrVZDpVLB19cX6enpj+z/1Vdfwc3NDSqVCh4eHvj++++12oUQiIyMhKOjI0xNTaHRaJCZmanP0IiIiKgR0jmwbNmyBREREYiKisKRI0fQuXNnBAQE4NKlS5X2//nnnzFixAiMGTMGv/zyC4KCghAUFITffvtN6rN8+XKsXLkSCQkJSEtLg7m5OQICAnD79m39t4yIiIgaDZ0DS2xsLMaNG4fQ0FC4u7sjISEBZmZmWLt2baX9V6xYgcDAQMyYMQMdOnTAwoUL0bVrV6xatQrAvaMrcXFxmDt3LgYOHIhOnTph48aNuHDhArZv316jjXvi5lvdn4iIiKjW6BRYSkpKkJGRAY1Gc7+AgQE0Gg1SU1MrXSY1NVWrPwAEBARI/c+fP4/c3FytPlZWVvD19a2yJhERET1ddPpY85UrV1BaWgp7e3ut+fb29jh16lSly+Tm5lbaPzc3V2ovn1dVn4cVFxejuLhYepyfnw8AKCgoePQGRD8j/Vja6v7PhaWl0s+PrfEoxeJ+/Vv3a9Za/UrGL+exlxXfvL+sonbr12XtCrhvqvaE9k1D2O91Xb8hj13X+nL7P/nE6tfDa0it1n9Adcdf/lgIgccSOvj7778FAPHzzz9rzZ8xY4bw8fGpdBkjIyPx+eefa81bvXq1aN68uRBCiAMHDggA4sKFC1p9hgwZIoYOHVppzaioKAGAEydOnDhx4tQIppycnMdmEJ2OsNjZ2UGpVCIvL09rfl5eHhwcHCpdxsHB4ZH9y//Ny8uDo6OjVh9PT89Ka86ZMwcRERHS47KyMly7dg22trZQKBSP3Y6CggI4OzsjJycHlpaWj+2vq4Zcn2NvnPU59sZZvyGPva7rc+wNo74QAjdu3ICTk9Nj6+oUWIyNjeHl5YWkpCQEBQUBuBcWkpKSEBYWVukyfn5+SEpKQnh4uDRv9+7d8PPzAwC4uLjAwcEBSUlJUkApKChAWloaJk6cWGlNExMTmJiYaM2ztrbWZVMAAJaWlnXyZDWG+hx746zPsTfO+g157HVdn2OXf30rK6tq1dP51vwREREICQmBt7c3fHx8EBcXh6KiIoSGhgIAgoOD0aJFC0RHRwMApk6dil69euGDDz5A//79sXnzZhw+fBiffPIJAEChUCA8PByLFi1C27Zt4eLignnz5sHJyUkKRURERPR00zmwDBs2DJcvX0ZkZCRyc3Ph6emJxMRE6aLZ7OxsGBjc//CRv78/Pv/8c8ydOxfvvPMO2rZti+3bt6Njx45Sn5kzZ6KoqAjjx4/H9evX0aNHDyQmJkKlUtXCJhIREVFDp9eXH4aFhVV5CiglJaXCvCFDhmDIkCFV1lMoFHjvvffw3nvv6TMcnZmYmCAqKqrCaSXW59gba32OvXHWb8hjr+v6HHvjq68QojqfJSIiIiKqP/zyQyIiIpI9BhYiIiKSPQYWIiIikj0GFnqieMkUERHpQ69PCTU0V65cwdq1a5Gamip9P5GDgwP8/f0xevRoNGvWrJ5H+PQwMTHBsWPH0KFDh/oeCtXAxYsXER8fj/379+PixYswMDBA69atERQUhNGjR0OpVNb3EImokWn0nxI6dOgQAgICYGZmBo1GI90vJi8vD0lJSbh58yZ27doFb2/vOll/Tk4OoqKisHbtWr1r3Lp1CxkZGWjatCnc3d212m7fvo0vv/wSwcHBetU+efIkDh48CD8/P7i5ueHUqVNYsWIFiouL8Y9//AMvvviiXnUf/OqEB61YsQL/+Mc/YGtrCwCIjY3Vq/7DioqK8OWXX+Ls2bNwdHTEiBEjpHXo48iRI7CxsYGLiwsAYNOmTUhISEB2djZatWqFsLAwDB8+XO/6U6ZMwdChQ/H888/rXeNRVq1ahfT0dLz88ssYPnw4Nm3ahOjoaJSVlWHw4MF47733YGio3/uVw4cPQ6PRwNXVFaampkhNTcUbb7yBkpIS7Nq1C+7u7khMTESTJk1qeauISK7S09MrHBTw8/ODj49P7a3ksd821MD5+vqK8ePHi7KysgptZWVlYvz48eK5556rs/UfPXpUGBgY6L386dOnRatWrYRCoRAGBgaiZ8+eWl8UmZubq3f9nTt3CmNjY9G0aVOhUqnEzp07RbNmzYRGoxEvvviiUCqVIikpSa/aCoVCeHp6it69e2tNCoVCdOvWTfTu3Vu88MILetUWQogOHTqIq1evCiGEyM7OFmq1WlhZWYlu3bqJpk2biubNm4s//vhD7/qdOnUSu3fvFkIIsWbNGmFqairefvttER8fL8LDw4WFhYX49NNP9a5f/ny2bdtWLF26VFy8eFHvWg9buHChaNKkiXjttdeEg4ODWLp0qbC1tRWLFi0SS5YsEc2aNRORkZF61+/evbuYP3++9HjTpk3C19dXCCHEtWvXhKenp3j77bdrtA3FxcViy5YtIjw8XAwfPlwMHz5chIeHiy+//FIUFxfXqPbj5ObmigULFtSoRk5Ojrhx40aF+SUlJWLfvn01qn3lyhWxd+9e6f//5cuXxdKlS8WCBQvEiRMnalS7Ki4uLuLMmTO1WrOsrEzs3btXfPLJJ+Lbb78VJSUlNaqXk5MjLl++LD3+8ccfxRtvvCF69OghRo4cWeFLe3URExMjsrKyajS+x/n222/FvHnzxP79+4UQQiQlJYl+/fqJgIAA8fHHH9e4/s2bN8Wnn34qQkNDRWBgoHj55ZdFWFiY2LNnT43q5uXliR49egiFQiFatWolfHx8hI+Pj/S61aNHD5GXl1fj8QshRKMPLCqVSpw8ebLK9pMnTwqVSqV3/a+//vqR04cfflijwBIUFCT69+8vLl++LDIzM0X//v2Fi4uL+PPPP4UQNQssfn5+4t133xVCCPHFF18IGxsb8c4770jts2fPFi+99JJetaOjo4WLi0uFwGNoaCh+//13vWo+SKFQSL8EI0eOFP7+/uL69etCCCFu3LghNBqNGDFihN71TU1NpT9QXbp0EZ988olW+2effSbc3d31rq9QKMSePXvE1KlThZ2dnTAyMhKvvvqq+Pbbb0VpaanedYUQok2bNuJ///ufEOJeYFYqleI///mP1L5161bh6uqqd31TU1Nx7tw56XFpaakwMjISubm5QgghfvjhB+Hk5KR3/czMTNG6dWuhUqlEr169xNChQ8XQoUNFr169hEqlEq6uriIzM1Pv+o9TkzcZFy5cEN26dRMGBgZCqVSKUaNGaQWXmvy+CiFEWlqasLKyEgqFQtjY2IjDhw8LFxcX0bZtW9GmTRthamoqMjIy9K6/YsWKSielUinmzJkjPdZHv379pN/Rq1evCl9fX6FQKESzZs2EgYGBcHNzE5cuXdJ77D4+PuLbb78VQgixfft2YWBgIF599VUxa9YsMWjQIGFkZCS160qhUAilUik0Go3YvHlzrYfmhIQEYWhoKLy8vISlpaXYtGmTaNKkiRg7dqyYMGGCMDU1FXFxcXrXz8zMFK1atRLNmzcXzs7OQqFQiP79+wtfX1+hVCrFkCFDxJ07d/Sq/dprrwk/Pz9x6tSpCm2nTp0S/v7+4vXXX9d77A9q9IFFrVaLDRs2VNm+YcMG0apVK73rl79TVigUVU41+QPVvHlzcfz4celxWVmZeOutt0TLli3FuXPnavQH0NLSUvrDX1paKgwNDcWRI0ek9l9//VXY29vrPfb09HTRrl07MW3aNOndU10EltatW4sffvhBq/3AgQPC2dlZ7/q2trbi8OHDQoh7z8HRo0e12s+ePStMTU31rv/g+EtKSsSWLVtEQECAUCqVwsnJSbzzzjt6vyibmppKgVYIIYyMjMRvv/0mPc7KyhJmZmZ6j71Vq1bSu0Ah7r1IKxQKcfPmTSGEEOfPn6/RmwCNRiMGDhwo8vPzK7Tl5+eLgQMHir59++pd/9ixY4+ctmzZovfvVHBwsPD19RWHDh0Su3fvFl5eXsLb21tcu3ZNCHEvsCgUCr3HrtFoxNixY0VBQYF4//33xTPPPCPGjh0rtYeGhoqgoCC96ysUCvHMM88ItVqtNSkUCtGiRQuhVquFi4uL3rXL/89PnDhRuLu7S0dBc3JyhJeXl3jrrbf0Hru5ublUz9fXVyxdulSr/aOPPhJdunTRe+zr1q0TAwcOFEZGRsLW1lZMnTpV/Prrr3qP90Hu7u7Sm6K9e/cKlUolVq9eLbWvW7dOdOjQQe/6/fr1ExMmTJDONCxdulT069dPCCHEmTNnhFqtFlFRUXrVtrCw0HrdeNjhw4eFhYWFXrUf1ugDy6pVq4SJiYl4++23xddffy0OHjwoDh48KL7++mvx9ttvC1NTU63/GLpycnIS27dvr7L9l19+qVFgadKkSaWHeSdPniyeeeYZ8eOPP9YosJw9e1Z6bGFhofXOOSsrq0YvPELcO9oRHBwsOnXqJH799VdhZGRUa4Gl/N2Yk5NThT8cNR37P/7xDzFmzBghhBBDhgwRc+fO1WpfsmSJ8PDw0Lv+g3+8H/Tnn3+KqKgo0apVK72fVxcXF7Fz504hxL0/RgYGBuLLL7+U2nfs2CHUarV+AxdCTJ06VXTs2FHs3LlT7N27V7zwwguid+/eUntiYqJo06aN3vVNTU0f+UJw/PjxGofFqt5klM/Xd987OTmJtLQ06fHt27fFgAEDhKenp7h69WqNj7DY2NhIfw9KSkqEgYGB1voyMjJEixYt9K4/YcIE4enpWeFvTm280Xjw/3z79u3F119/rdW+Z88evcOQEEJYWVmJY8eOCSHuvcko/7nc2bNn9Q7qD449Ly9PLFu2TLi5uQkDAwPRrVs38cknn4iCggK9x17Zm4wHfwfOnz9fozcZZmZmWqf0iouLhZGRkbhy5YoQ4t4RKX3/Jtja2oqUlJQq25OTk4Wtra1etR/W6AOLEEJs3rxZ+Pr6CkNDQ+kPk6GhofD19RVbtmypUe0BAwaIefPmVdl+9OjRGr2j6tatm9i4cWOlbZMnTxbW1tZ6/wHs1KmT9MImxL0jKg8eFvzxxx9r9AfkQV988YWwt7cXBgYGtRZYPDw8RJcuXYSFhYX473//q9W+b9++Gv3h/vvvv4VarRY9e/YUERERwtTUVPTo0UOMGzdO9OzZUxgbG4sdO3bUaPyPOq9bVlZW4ahRdc2dO1c0a9ZMjB07Vri4uIjZs2eLli1bivj4eJGQkCCcnZ3FP//5T32HLm7cuCGGDh0q/T75+/trXS+0a9curYCkK0dHx0ceuv/mm2+Eo6Oj3vVtbW3Fp59+KrKysiqdduzYoffvlLm5eYVrPe7cuSOCgoJEp06dxPHjx2sUWMzNzcX58+elxw+/yfjzzz9r/CZj69atwtnZWXz00UfSvNoKLOVvMpo3b6511E+Ie28yTExM9K7/6quvitmzZwshhAgICKhw6mrNmjWibdu2etWu6vf1xx9/FCEhIcLc3FyYm5vrVVsIIb35FOLe3x6FQqH19yUlJUU888wzetd3cnLSOlX4f//3f0KhUEgh648//tB730+aNEm0atVKbN26VeuoaH5+vti6datQq9UiLCxM77E/6KkILOVKSkrEhQsXxIULF2p8gVe5H3/8UetF/2GFhYWPTJ+Ps2TJEunQXWUmTpyodyCKj48X3333XZXtc+bMkY4y1IacnByxfft2UVhYWONa8+fP15oSExO12qdPny6GDx9eo3X83//9n5g1a5Zwd3cXKpVKGBsbi1atWok33nhDHDp0qEa11Wq19O6mtpWWlorFixeLV155RSxZskSUlZWJL774Qjg7OwtbW1sxevToWnkObt26VemFpTU1b948YWNjI2JjY8WxY8dEbm6uyM3NFceOHROxsbGiadOmeh++FkKIvn37ioULF1bZXpM3GR4eHhXCsxD3Q0vLli1rFFjc3Ny0rgv77rvvpFNxQghx8ODBGr2wlfvrr7/Eiy++KAIDA8XFixdrLbC8/PLLYtCgQcLGxqZCKD148GCNTkGfOHFC2NraiuDgYLFw4UJhYWEh/vGPf4jFixeL4OBgYWJiItatW6dXbQMDg0e+wcjPz69wnZsuJk+eLNq2bSsWLVokfHx8REhIiHBzcxM7d+4UiYmJwsPDQ7z55pt61w8JCRG9evUSJ0+eFH/88YcYNmyY1umxlJQUvU+h3759W7z11lvC2NhYGBgYCJVKJVQqlTAwMBDGxsZi4sSJ4vbt23qP/UFPVWAhooZh6dKlwtHRUTo9U36qxtHRUSxbtqxGtbdu3So2bdpUZfu1a9fE+vXr9ao9c+bMKq+vuXPnjnj11VdrdMR1/vz54osvvqiy/Z133hGDBw/Wu/6DysrKxJIlS4SDg4NQKpU1DiyjR4/Wmh4+uj1jxgwREBBQo3WcPXtWDB8+XDRp0kQ6mm5kZCT8/f3Ftm3b9K77uCOiNVVYWCjGjRsnOnbsKMaPHy+Ki4vF+++/L4yNjYVCoRC9e/eu0frz8vLEc889J/0+tWrVSuu6k6+++kqsXLmyRtuQn58v9u7dKz7//HPx+eefi71791Z6HVpNNPr7sBBRw3X+/Hmt+zqU3xdHru7evYubN2/C0tKyyva///4brVq1qpP137x5E0qlEiYmJrVWMyMjA/v370dwcDBsbGxqre7DioqKoFQqoVKpalxLCIFLly6hrKwMdnZ2MDIyqoURPnm3b9/GnTt3au2eRpmZmSguLoabm5ve92GqT7w1PxHJlouLC/z8/ODn5yeFlZycHLz55pt1ts6a1Dc0NKwyrAD37hC8YMECfYf2WFevXsXEiRNrtaaXlxemTp0KGxubOt33165dw6RJk2qllkKhgL29PRwdHaWwUpdjr6vaKpUKTZo0qbX6bdu2RceOHSuElZrWv3XrFvbv348TJ05UaLt9+zY2btyod+0H8QgLETUox44dQ9euXVFaWtrg6jfksdd1fY69YdY/c+YM+vbti+zsbCgUCvTo0QNffPEFnJycANy7q7yTk1OtjL3hHRMiokbtm2++eWT7H3/8Idv6DXnsdV2fY2+c9WfNmoWOHTvi8OHDuH79OsLDw9GjRw+kpKSgZcuWetetDI+wEJGsGBgYQKFQPPKbvRUKhd7v2OqyfkMee13X59gbZ317e3vs2bMHHh4eAO5dPzRp0iR8//33SE5Ohrm5ea0dYeE1LEQkK46Ojti6dSvKysoqnY4cOSLb+g157HVdn2NvnPVv3bqldU2MQqFAfHw8BgwYgF69euHMmTM1GvuDGFiISFa8vLyQkZFRZfvj3inWZ/2GPPa6rs+xN876bm5uOHz4cIX5q1atwsCBA/Hqq6/qVbcyvIaFiGRlxowZKCoqqrLd1dUVycnJsqzfkMde1/U59sZZf9CgQfjiiy8watSoCm2rVq1CWVkZEhIS9Kr9MF7DQkRERLLHU0JEREQkewwsREREJHsMLERERCR7DCxEREQkewwsREREJHsMLERERCR7DCxEREQkewwsREREJHv/D29WypRhKoObAAAAAElFTkSuQmCC",
569 |       "text/plain": [
570 |        "<Figure size 640x480 with 1 Axes>"
571 |       ]
572 |      },
573 |      "metadata": {},
574 |      "output_type": "display_data"
575 |     }
576 |    ],
577 |    "source": [
578 |     "df = pd.concat([r0, r1, r3, ideal_dist], axis=1)\n",
579 |     "df.columns = ['method0: sampling', 'method1: hindsight', 'method3: generation tree', 'ideal']\n",
580 |     "\n",
581 |     "\n",
582 |     "df = df.sort_index().fillna(0)\n",
583 |     "df.plot.bar()\n",
584 |     "df"
585 |    ]
586 |   },
587 |   {
588 |    "cell_type": "code",
589 |    "execution_count": 173,
590 |    "metadata": {},
591 |    "outputs": [],
592 |    "source": [
593 |     "# df.sum()"
594 |    ]
595 |   },
596 |   {
597 |    "cell_type": "code",
598 |    "execution_count": 174,
599 |    "metadata": {},
600 |    "outputs": [
601 |     {
602 |      "name": "stdout",
603 |      "output_type": "stream",
604 |      "text": [
605 |       "MAE coverage (smaller is better\n"
606 |      ]
607 |     },
608 |     {
609 |      "data": {
610 |       "text/plain": [
611 |        "method0: sampling           1.121905\n",
612 |        "method1: hindsight          0.952232\n",
613 |        "method3: generation tree    0.494957\n",
614 |        "dtype: float64"
615 |       ]
616 |      },
617 |      "execution_count": 174,
618 |      "metadata": {},
619 |      "output_type": "execute_result"
620 |     }
621 |    ],
622 |    "source": [
623 |     "df_score = df / df['ideal'].values[:, None] - 1\n",
624 |     "ratios = df_score.iloc[:21, :3]\n",
625 |     "print('MAE coverage (smaller is better')\n",
626 |     "ratios.abs().mean()"
627 |    ]
628 |   },
629 |   {
630 |    "cell_type": "code",
631 |    "execution_count": 175,
632 |    "metadata": {},
633 |    "outputs": [
634 |     {
635 |      "name": "stdout",
636 |      "output_type": "stream",
637 |      "text": [
638 |       "tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0733, 0.1000, 0.0000, 0.1000, 0.0800,\n",
639 |       "         0.0000, 0.0467, 0.0467, 0.1256, 0.0344, 0.3678, 0.0256, 0.0000, 0.0000,\n",
640 |       "         0.0000, 0.0000, 0.0000]], dtype=torch.float64) method0: sampling\n",
641 |       "tensor([[6.4225e-04, 2.8117e-03, 1.0104e-03, 9.0762e-03, 2.8844e-02, 4.3982e-02,\n",
642 |       "         1.3840e-02, 6.3993e-02, 5.9184e-02, 1.2213e-02, 3.7620e-02, 4.5378e-02,\n",
643 |       "         1.1956e-01, 3.1679e-02, 4.2386e-01, 4.2629e-02, 1.0285e-02, 3.4793e-02,\n",
644 |       "         1.7495e-02, 1.1011e-03, 9.9832e-06]], dtype=torch.float64) method1: hindsight\n",
645 |       "tensor([[0.0365, 0.0667, 0.0710, 0.0653, 0.0786, 0.0999, 0.0662, 0.0858, 0.0682,\n",
646 |       "         0.0745, 0.0327, 0.0187, 0.0307, 0.0279, 0.0274, 0.0312, 0.0344, 0.0235,\n",
647 |       "         0.0225, 0.0109, 0.0277]], dtype=torch.float64) method3: generation tree\n"
648 |      ]
649 |     },
650 |     {
651 |      "data": {
652 |       "text/html": [
653 |        "<style type=\"text/css\">\n",
654 |        "#T_90c6a_row0_col0, #T_90c6a_row0_col1, #T_90c6a_row1_col0 {\n",
655 |        "  background-color: #800026;\n",
656 |        "  color: #f1f1f1;\n",
657 |        "}\n",
658 |        "#T_90c6a_row1_col1 {\n",
659 |        "  background-color: #fffdc8;\n",
660 |        "  color: #000000;\n",
661 |        "}\n",
662 |        "#T_90c6a_row2_col0, #T_90c6a_row2_col1 {\n",
663 |        "  background-color: #ffffcc;\n",
664 |        "  color: #000000;\n",
665 |        "}\n",
666 |        "</style>\n",
667 |        "<table id=\"T_90c6a\">\n",
668 |        "  <caption>KL_div_loss and time for each method (lower is better)</caption>\n",
669 |        "  <thead>\n",
670 |        "    <tr>\n",
671 |        "      <th class=\"blank level0\" >&nbsp;</th>\n",
672 |        "      <th id=\"T_90c6a_level0_col0\" class=\"col_heading level0 col0\" >KL_div_loss</th>\n",
673 |        "      <th id=\"T_90c6a_level0_col1\" class=\"col_heading level0 col1\" >time</th>\n",
674 |        "    </tr>\n",
675 |        "    <tr>\n",
676 |        "      <th class=\"index_name level0\" >method</th>\n",
677 |        "      <th class=\"blank col0\" >&nbsp;</th>\n",
678 |        "      <th class=\"blank col1\" >&nbsp;</th>\n",
679 |        "    </tr>\n",
680 |        "  </thead>\n",
681 |        "  <tbody>\n",
682 |        "    <tr>\n",
683 |        "      <th id=\"T_90c6a_level0_row0\" class=\"row_heading level0 row0\" >method0: sampling</th>\n",
684 |        "      <td id=\"T_90c6a_row0_col0\" class=\"data row0 col0\" >-3.092141</td>\n",
685 |        "      <td id=\"T_90c6a_row0_col1\" class=\"data row0 col1\" >48.504429</td>\n",
686 |        "    </tr>\n",
687 |        "    <tr>\n",
688 |        "      <th id=\"T_90c6a_level0_row1\" class=\"row_heading level0 row1\" >method1: hindsight</th>\n",
689 |        "      <td id=\"T_90c6a_row1_col0\" class=\"data row1 col0\" >-3.092141</td>\n",
690 |        "      <td id=\"T_90c6a_row1_col1\" class=\"data row1 col1\" >0.683987</td>\n",
691 |        "    </tr>\n",
692 |        "    <tr>\n",
693 |        "      <th id=\"T_90c6a_level0_row2\" class=\"row_heading level0 row2\" >method3: generation tree</th>\n",
694 |        "      <td id=\"T_90c6a_row2_col0\" class=\"data row2 col0\" >-3.092156</td>\n",
695 |        "      <td id=\"T_90c6a_row2_col1\" class=\"data row2 col1\" >0.075112</td>\n",
696 |        "    </tr>\n",
697 |        "  </tbody>\n",
698 |        "</table>\n"
699 |       ],
700 |       "text/plain": [
701 |        "<pandas.io.formats.style.Styler at 0x76460c6033d0>"
702 |       ]
703 |      },
704 |      "execution_count": 175,
705 |      "metadata": {},
706 |      "output_type": "execute_result"
707 |     }
708 |    ],
709 |    "source": [
710 |     "data = []\n",
711 |     "times = dict(zip(df.columns, [t0, t1, t3]))\n",
712 |     "for k in df.columns[:3]:\n",
713 |     "    input = torch.tensor(df[k].values)[None, :]\n",
714 |     "    print(input, k)\n",
715 |     "    target = torch.tensor(df['ideal'].values)[None, :]\n",
716 |     "    # https://pytorch.org/docs/stable/generated/torch.nn.KLDivLoss.html#torch.nn.KLDivLoss\n",
717 |     "    s = F.kl_div(input, target , reduction='batchmean', log_target=False).item()\n",
718 |     "    t = times[k].total_seconds()\n",
719 |     "    data.append({'method': k, 'KL_div_loss': s, 'time': t})\n",
720 |     "dfr = pd.DataFrame(data).set_index('method')\n",
721 |     "# color values with cmap\n",
722 |     "dfs = dfr.style.background_gradient(cmap='YlOrRd')\n",
723 |     "dfs.set_caption('KL_div_loss and time for each method (lower is better)')\n",
724 |     "dfs"
725 |    ]
726 |   },
727 |   {
728 |    "cell_type": "code",
729 |    "execution_count": 176,
730 |    "metadata": {},
731 |    "outputs": [
732 |     {
733 |      "name": "stdout",
734 |      "output_type": "stream",
735 |      "text": [
736 |       "| method                   |   KL_div_loss |      time |\n",
737 |       "|:-------------------------|--------------:|----------:|\n",
738 |       "| method0: sampling        |      -3.09214 | 48.5044   |\n",
739 |       "| method1: hindsight       |      -3.09214 |  0.683987 |\n",
740 |       "| method3: generation tree |      -3.09216 |  0.075112 |\n"
741 |      ]
742 |     }
743 |    ],
744 |    "source": [
745 |     "print(dfr.to_markdown())"
746 |    ]
747 |   },
748 |   {
749 |    "cell_type": "markdown",
750 |    "metadata": {},
751 |    "source": [
752 |     "## Scratch, collapse probs"
753 |    ]
754 |   },
755 |   {
756 |    "cell_type": "code",
757 |    "execution_count": 14,
758 |    "metadata": {},
759 |    "outputs": [],
760 |    "source": [
761 |     "# def collapse_probs(json_schema, generated_data, keys = []):\n",
762 |     "\n",
763 |     "#     # get current row\n",
764 |     "#     schema = json_schema\n",
765 |     "#     data = generated_data\n",
766 |     "#     for key in keys[:-1]:\n",
767 |     "#         print(schema)\n",
768 |     "#         schema = schema[key]\n",
769 |     "#         data = data[key]\n",
770 |     "\n",
771 |     "#     schema_type = schema[\"type\"]\n",
772 |     "\n",
773 |     "#     if schema_type == \"p_enum\":\n",
774 |     "#         k = keys[-1]\n",
775 |     "#         data[k] = data[k][0]['choice']\n",
776 |     "#     elif schema_type == \"object\":\n",
777 |     "#         k = \"properties\"\n",
778 |     "#         for key in schema[\"properties\"]:\n",
779 |     "#             data[\"properties\"] = collapse_probs(json_schema, generated_data, keys + [key])\n",
780 |     "    \n",
781 |     "#     return generated_data\n",
782 |     "\n",
783 |     "\n",
784 |     "# list(collapse_probs(json_schema, generated_data))"
785 |    ]
786 |   }
787 |  ],
788 |  "metadata": {
789 |   "kernelspec": {
790 |    "display_name": ".venv",
791 |    "language": "python",
792 |    "name": "python3"
793 |   },
794 |   "language_info": {
795 |    "codemirror_mode": {
796 |     "name": "ipython",
797 |     "version": 3
798 |    },
799 |    "file_extension": ".py",
800 |    "mimetype": "text/x-python",
801 |    "name": "python",
802 |    "nbconvert_exporter": "python",
803 |    "pygments_lexer": "ipython3",
804 |    "version": "3.9.16"
805 |   }
806 |  },
807 |  "nbformat": 4,
808 |  "nbformat_minor": 2
809 | }
810 | 


--------------------------------------------------------------------------------
/prob_jsonformer/__init__.py:
--------------------------------------------------------------------------------
1 | from prob_jsonformer.main import Jsonformer
2 | from prob_jsonformer.format import highlight_values
3 | 


--------------------------------------------------------------------------------
/prob_jsonformer/format.py:
--------------------------------------------------------------------------------
 1 | from termcolor import colored
 2 | 
 3 | 
 4 | def highlight_values(value):
 5 |     def recursive_print(obj, indent=0, is_last_element=True):
 6 |         if isinstance(obj, dict):
 7 |             print("{")
 8 |             last_key = list(obj.keys())[-1]
 9 |             for key, value in obj.items():
10 |                 print(f"{' ' * (indent + 2)}{key}: ", end="")
11 |                 recursive_print(value, indent + 2, key == last_key)
12 |             print(f"{' ' * indent}}}", end=",\n" if not is_last_element else "\n")
13 |         elif isinstance(obj, list):
14 |             print("[")
15 |             for index, value in enumerate(obj):
16 |                 print(f"{' ' * (indent + 2)}", end="")
17 |                 recursive_print(value, indent + 2, index == len(obj) - 1)
18 |             print(f"{' ' * indent}]", end=",\n" if not is_last_element else "\n")
19 |         else:
20 |             if isinstance(obj, str):
21 |                 obj = f'"{obj}"'
22 |             print(colored(obj, "green"), end=",\n" if not is_last_element else "\n")
23 | 
24 |     recursive_print(value)
25 | 


--------------------------------------------------------------------------------
/prob_jsonformer/logits_processors.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | from transformers import PreTrainedTokenizer, StoppingCriteria
  3 | from transformers.generation.logits_process import LogitsProcessor
  4 | import torch
  5 | 
  6 | 
  7 | class StringStoppingCriteria(StoppingCriteria):
  8 |     def __init__(
  9 |         self,
 10 |         tokenizer: PreTrainedTokenizer,
 11 |         prompt_length: int,
 12 |         max_length: int = None,
 13 |         min_length: int = 1,
 14 |     ):
 15 |         self.tokenizer = tokenizer
 16 |         self.prompt_length = prompt_length
 17 |         self.max_length = max_length
 18 |         self.min_length = min_length
 19 | 
 20 |     def __call__(
 21 |         self,
 22 |         input_ids: torch.LongTensor,
 23 |         _,
 24 |     ) -> bool:
 25 |         if len(input_ids[0]) <= self.prompt_length:
 26 |             return False
 27 | 
 28 |         last_token_id = input_ids[0][-1]
 29 |         last_token = self.tokenizer.decode(last_token_id, skip_special_tokens=True)
 30 | 
 31 |         result = '"' in last_token
 32 | 
 33 |         if self.max_length is not None:
 34 |             # because of tokens this wont work pefectly, we might go 0-10 chars over
 35 |             gen_ids = input_ids[0][self.prompt_length :]
 36 |             o = self.tokenizer.decode(gen_ids, skip_special_tokens=True)
 37 |             str_l = len(o)
 38 |             if str_l > self.max_length:
 39 |                 return True
 40 | 
 41 |         if self.min_length is not None:
 42 |             gen_ids = input_ids[0][self.prompt_length :]
 43 |             o = self.tokenizer.decode(gen_ids, skip_special_tokens=True)
 44 |             str_l = len(o)
 45 |             if str_l < self.min_length:
 46 |                 return False
 47 | 
 48 |         return result
 49 | 
 50 | 
 51 | class NumberStoppingCriteria(StoppingCriteria):
 52 |     def __init__(
 53 |         self,
 54 |         tokenizer: PreTrainedTokenizer,
 55 |         prompt_length: int,
 56 |         precision: int = 3,
 57 |     ):
 58 |         self.tokenizer = tokenizer
 59 |         self.precision = precision
 60 |         self.prompt_length = prompt_length
 61 | 
 62 |     def __call__(
 63 |         self,
 64 |         input_ids: torch.LongTensor,
 65 |         scores: torch.FloatTensor,
 66 |     ) -> bool:
 67 |         decoded = self.tokenizer.decode(
 68 |             input_ids[0][self.prompt_length :], skip_special_tokens=True
 69 |         )
 70 | 
 71 |         if decoded.count(".") > 1:
 72 |             return True
 73 | 
 74 |         if (
 75 |             decoded.count(".") == 1
 76 |             and len(decoded.replace(" ", "").split(".")[1]) > self.precision
 77 |         ):
 78 |             return True
 79 | 
 80 |         if (
 81 |             len(decoded) > 1
 82 |             and "," in decoded
 83 |             and any(c.isdigit() for c in decoded.split(",")[0])
 84 |         ):
 85 |             return True
 86 | 
 87 |         if (
 88 |             len(decoded) > 1
 89 |             and any(c.isdigit() for c in decoded)
 90 |             and ("," in decoded or decoded[-1] in (" ", "\n"))
 91 |         ):
 92 |             return True
 93 | 
 94 |         return False
 95 | 
 96 | 
 97 | class OutputNumbersTokens(LogitsProcessor):
 98 |     def __init__(self, tokenizer: PreTrainedTokenizer, prompt: str):
 99 |         self.tokenizer = tokenizer
100 |         self.tokenized_prompt = tokenizer(prompt, return_tensors="pt")
101 |         vocab_size = len(tokenizer)
102 |         self.allowed_mask = torch.zeros(vocab_size, dtype=torch.bool)
103 | 
104 |         for _, token_id in tokenizer.get_vocab().items():
105 |             token_str = tokenizer.decode(token_id, skip_special_tokens=True).strip()
106 | 
107 |             if (
108 |                 token_str == ""
109 |                 or (
110 |                     all(c.isdigit() or c == "." for c in token_str)
111 |                     and token_str.count(".") <= 1
112 |                 )
113 |                 or (
114 |                     "," in token_str
115 |                     and all(c.isdigit() or c == "." for c in token_str.split(",")[0])
116 |                     and token_str.count(".") <= 1
117 |                 )
118 |             ):
119 |                 self.allowed_mask[token_id] = True
120 | 
121 |     def __call__(self, _, scores):
122 |         mask = self.allowed_mask.expand_as(scores)
123 |         scores[~mask] = -float("inf")
124 | 
125 |         return scores
126 | 
127 | 
128 | class IntegerStoppingCriteria(StoppingCriteria):
129 |     def __init__(
130 |         self,
131 |         tokenizer: PreTrainedTokenizer,
132 |         prompt_length: int,
133 |         max_digits: int = 15,
134 |     ):
135 |         self.tokenizer = tokenizer
136 |         self.prompt_length = prompt_length
137 |         self.max_digits = max_digits
138 | 
139 |     def __call__(
140 |         self,
141 |         input_ids: torch.LongTensor,
142 |         scores: torch.FloatTensor,
143 |     ) -> bool:
144 |         decoded = self.tokenizer.decode(
145 |             input_ids[0][self.prompt_length :], skip_special_tokens=True
146 |         )
147 | 
148 |         if len(decoded.strip()) > self.max_digits:
149 |             return True
150 | 
151 |         if (
152 |             len(decoded) > 1
153 |             and "," in decoded
154 |             and any(c.isdigit() for c in decoded.split(",")[0])
155 |         ):
156 |             return True
157 | 
158 |         if (
159 |             len(decoded) > 1
160 |             and any(c.isdigit() for c in decoded)
161 |             and decoded[-1] in (" ", "\n")
162 |         ):
163 |             return True
164 | 
165 |         return False
166 | 
167 | 
168 | class OutputIntegersTokens(LogitsProcessor):
169 |     def __init__(self, tokenizer: PreTrainedTokenizer, prompt: str):
170 |         self.tokenizer = tokenizer
171 |         self.tokenized_prompt = tokenizer(prompt, return_tensors="pt")
172 |         vocab_size = len(tokenizer)
173 |         self.allowed_mask = torch.zeros(vocab_size, dtype=torch.bool)
174 | 
175 |         for _, token_id in tokenizer.get_vocab().items():
176 |             token_str = tokenizer.decode(token_id, skip_special_tokens=True).strip()
177 | 
178 |             if (
179 |                 token_str == ""
180 |                 or all(c.isdigit() for c in token_str)
181 |                 or "," in token_str
182 |                 and all(c.isdigit() for c in token_str.split(",")[0])
183 |             ):
184 |                 self.allowed_mask[token_id] = True
185 | 
186 |     def __call__(self, _, scores):
187 |         mask = self.allowed_mask.expand_as(scores)
188 |         scores[~mask] = -float("inf")
189 | 
190 |         return scores
191 | 


--------------------------------------------------------------------------------
/prob_jsonformer/main.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Set, Union, Dict, Any, Optional
  2 | 
  3 | from prob_jsonformer.logits_processors import (
  4 |     NumberStoppingCriteria,
  5 |     OutputNumbersTokens,
  6 |     IntegerStoppingCriteria,
  7 |     OutputIntegersTokens,
  8 |     StringStoppingCriteria,
  9 | )
 10 | from prob_jsonformer.prob_choice_tree import prob_choice_tree, round_to_nsf
 11 | from prob_jsonformer.type_prefixes import get_prefix_tokens_for_types
 12 | 
 13 | from termcolor import cprint
 14 | from transformers import PreTrainedModel, PreTrainedTokenizer
 15 | import json
 16 | import torch
 17 | 
 18 | GENERATION_MARKER = "|GENERATION|"
 19 | 
 20 | 
 21 | class Jsonformer:
 22 |     value: Dict[str, Any] = {}
 23 | 
 24 |     def __init__(
 25 |         self,
 26 |         model: PreTrainedModel,
 27 |         tokenizer: PreTrainedTokenizer,
 28 |         json_schema: Dict[str, Any],
 29 |         prompt: str,
 30 |         *,
 31 |         debug: bool = False,
 32 |         max_array_length: int = 10,
 33 |         max_number_tokens: int = 6,
 34 |         temperature: Optional[float] = None,
 35 |         max_string_token_length: Optional[int] = None,
 36 |     ):
 37 |         self.model = model
 38 |         self.tokenizer = tokenizer
 39 |         self.json_schema = json_schema
 40 |         self.prompt = prompt
 41 | 
 42 |         self.type_prefix_tokens = get_prefix_tokens_for_types(tokenizer)
 43 | 
 44 |         self.number_logit_processor = OutputNumbersTokens(self.tokenizer, self.prompt)
 45 |         self.integer_logit_processor = OutputIntegersTokens(self.tokenizer, self.prompt)
 46 | 
 47 |         self.generation_marker = "|GENERATION|"
 48 |         self.debug_on = debug
 49 |         self.max_array_length = max_array_length
 50 | 
 51 |         self.max_number_tokens = max_number_tokens
 52 |         self.temperature = temperature
 53 |         self.max_string_token_length = max_string_token_length
 54 | 
 55 |     def debug(self, caller: str, value: str, is_prompt: bool = False):
 56 |         if self.debug_on:
 57 |             if is_prompt:
 58 |                 cprint(caller, "green", end=" ")
 59 |                 cprint(value, "yellow")
 60 |             else:
 61 |                 cprint(caller, "green", end=" ")
 62 |                 cprint(value, "blue")
 63 | 
 64 |     def generate_number(self, temperature: Union[float, None] = None, iterations=0):
 65 |         prompt = self.get_prompt()
 66 |         self.debug("[generate_number]", prompt, is_prompt=True)
 67 |         input_tokens = self.tokenizer.encode(prompt, return_tensors="pt").to(
 68 |             self.model.device
 69 |         )
 70 |         response = self.model.generate(
 71 |             input_tokens,
 72 |             max_new_tokens=self.max_number_tokens,
 73 |             num_return_sequences=1,
 74 |             logits_processor=[self.number_logit_processor],
 75 |             stopping_criteria=[
 76 |                 NumberStoppingCriteria(self.tokenizer, len(input_tokens[0]))
 77 |             ],
 78 |             temperature=temperature or self.temperature,
 79 |             pad_token_id=self.tokenizer.eos_token_id,
 80 |         )
 81 |         response = self.tokenizer.decode(response[0], skip_special_tokens=True)
 82 | 
 83 |         response = response[len(prompt) :]
 84 |         if "," in response:
 85 |             response = response.split(",")[0]
 86 |         response = response.replace(" ", "").rstrip(".")
 87 |         self.debug("[generate_number]", response)
 88 |         try:
 89 |             return float(response)
 90 |         except ValueError:
 91 |             if iterations > 3:
 92 |                 raise ValueError("Failed to generate a valid number")
 93 | 
 94 |             return self.generate_number(
 95 |                 temperature=self.temperature * 1.3, iterations=iterations + 1
 96 |             )
 97 | 
 98 |     def generate_integer(self, temperature: Union[float, None] = None, iterations=0):
 99 |         prompt = self.get_prompt()
100 |         self.debug("[generate_number]", prompt, is_prompt=True)
101 |         input_tokens = self.tokenizer.encode(prompt, return_tensors="pt").to(
102 |             self.model.device
103 |         )
104 |         response = self.model.generate(
105 |             input_tokens,
106 |             max_new_tokens=self.max_number_tokens,
107 |             num_return_sequences=1,
108 |             logits_processor=[self.integer_logit_processor],
109 |             stopping_criteria=[
110 |                 IntegerStoppingCriteria(self.tokenizer, len(input_tokens[0]))
111 |             ],
112 |             temperature=temperature or self.temperature,
113 |             pad_token_id=self.tokenizer.eos_token_id,
114 |         )
115 |         response = self.tokenizer.decode(response[0], skip_special_tokens=True)
116 | 
117 |         response = response[len(prompt) :]
118 |         if "," in response:
119 |             response = response.split(",")[0]
120 |         response = response.replace(" ", "")
121 |         self.debug("[generate_integer]", response)
122 |         try:
123 |             return int(response)
124 |         except ValueError:
125 |             if iterations > 3:
126 |                 raise ValueError("Failed to generate a valid integer")
127 | 
128 |             return self.generate_integer(temperature=self.temperature * 1.3)
129 | 
130 |     def generate_boolean(self) -> bool:
131 |         prompt = self.get_prompt()
132 |         self.debug("[generate_boolean]", prompt, is_prompt=True)
133 | 
134 |         input_tensor = self.tokenizer.encode(prompt, return_tensors="pt")
135 |         output = self.model.forward(input_tensor.to(self.model.device))
136 |         logits = output.logits[0, -1]
137 | 
138 |         true_token_id = self.tokenizer.encode("true", return_tensors="pt")[0, 0]
139 |         false_token_id = self.tokenizer.encode("false", return_tensors="pt")[0, 0]
140 | 
141 |         result = logits[true_token_id] > logits[false_token_id]
142 | 
143 |         self.debug("[generate_boolean]", result)
144 | 
145 |         return result.item()
146 | 
147 |     def generate_string(self, maxLength=None, minLength=None) -> str:
148 |         prompt = self.get_prompt() + '"'
149 |         self.debug("[generate_string]", prompt, is_prompt=True)
150 |         input_tokens = self.tokenizer.encode(prompt, return_tensors="pt").to(
151 |             self.model.device
152 |         )
153 | 
154 |         response = self.model.generate(
155 |             input_tokens,
156 |             max_new_tokens=self.max_string_token_length,
157 |             num_return_sequences=1,
158 |             temperature=self.temperature,
159 |             stopping_criteria=[
160 |                 StringStoppingCriteria(
161 |                     self.tokenizer, len(input_tokens[0]), maxLength, minLength
162 |                 )
163 |             ],
164 |             early_stopping=False,
165 |             pad_token_id=self.tokenizer.eos_token_id,
166 |         )
167 | 
168 |         # Some models output the prompt as part of the response
169 |         # This removes the prompt from the response if it is present
170 |         if (
171 |             len(response[0]) >= len(input_tokens[0])
172 |             and (response[0][: len(input_tokens[0])] == input_tokens).all()
173 |         ):
174 |             response = response[0][len(input_tokens[0]) :]
175 |         if response.shape[0] == 1:
176 |             response = response[0]
177 | 
178 |         response = self.tokenizer.decode(response, skip_special_tokens=True)
179 | 
180 |         self.debug("[generate_string]", "|" + response + "|")
181 | 
182 |         if response.count('"') < 1:
183 |             return response
184 | 
185 |         return response.split('"')[0].strip()
186 | 
187 |     def generate_p_enum(self, values: list, round: int) -> str:
188 |         """
189 |         This is not in the json schema, but can be usefull for effeciently getting the prob distibution over choices
190 |         """
191 |         prompt = self.get_prompt() + '"'
192 |         self.debug("[generate_p_enum]", prompt, is_prompt=True)
193 |         input_ids = self.tokenizer.encode(prompt, return_tensors="pt").to(
194 |             self.model.device
195 |         )[0]
196 |         values_tokens = self.tokenizer(values).input_ids
197 |         values_tokens = [torch.tensor(c) for c in values_tokens]
198 | 
199 |         r = list(
200 |             prob_choice_tree(
201 |                 self.model, self.tokenizer, input_ids, values_tokens, round=round
202 |             )
203 |         )
204 |         return r
205 | 
206 |     def generate_p_integer(
207 |         self, range_min: float, range_max: float, round: int
208 |     ) -> float:
209 |         """
210 |         This is not in the json schema, but can be usefull for effeciently generating the weighted mean from a range of integers
211 |         """
212 |         values = [str(n) for n in range(int(range_min), int(range_max) + 1)]
213 |         result = self.generate_p_enum(values, round=round)
214 | 
215 |         # now do a weighted average
216 |         total = 0.0
217 |         for r in result:
218 |             total += float(r["choice"]) * r["prob"]
219 | 
220 |         if round is not None:
221 |             total = round_to_nsf(total, round)
222 |         return total
223 | 
224 |     def generate_enum(self, enum_values: Set[str]) -> str:
225 |         prompt = self.get_prompt()
226 |         self.debug("[generate_enum]", prompt, is_prompt=True)
227 | 
228 |         # These are necessary because we don't know if we're at the end or middle of an object/array
229 |         terminal_tokens = torch.concat(
230 |             [
231 |                 self.tokenizer.encode(s, add_special_tokens=False, return_tensors="pt")[
232 |                     :, 0
233 |                 ]
234 |                 for s in ('", "', '"}', '"]')
235 |             ]
236 |         )
237 | 
238 |         highest_probability = 0.0
239 |         best_option = None
240 |         for option in enum_values:
241 |             n_option_tokens = self.tokenizer.encode(
242 |                 f'"{option}', add_special_tokens=False, return_tensors="pt"
243 |             ).shape[1]
244 |             prompt_tokens = self.tokenizer.encode(
245 |                 prompt + f'"{option}', return_tensors="pt"
246 |             )
247 |             option_tokens = prompt_tokens[0, -n_option_tokens:]
248 | 
249 |             with torch.no_grad():
250 |                 logits = self.model.forward(prompt_tokens.to(self.model.device)).logits[
251 |                     0, -n_option_tokens - 1 :
252 |                 ]
253 |             probabilities = torch.softmax(logits, dim=1)
254 |             option_token_probabilities = probabilities[:-1][
255 |                 torch.arange(probabilities.shape[0] - 1), option_tokens
256 |             ]
257 | 
258 |             termination_probability = torch.max(probabilities[-1, terminal_tokens])
259 |             option_probability = (
260 |                 torch.prod(option_token_probabilities) * termination_probability
261 |             )
262 |             self.debug("[generate_enum]", f"{option_probability}, {option}")
263 | 
264 |             if option_probability > highest_probability:
265 |                 best_option = option
266 |                 highest_probability = option_probability
267 | 
268 |         self.debug("[generate_enum]", best_option)
269 | 
270 |         return best_option
271 | 
272 |     def generate_object(
273 |         self, properties: Dict[str, Any], obj: Dict[str, Any]
274 |     ) -> Dict[str, Any]:
275 |         for key, schema in properties.items():
276 |             self.debug("[generate_object] generating value for", key)
277 |             obj[key] = self.generate_value(schema, obj, key)
278 |         return obj
279 | 
280 |     def choose_type_to_generate(self, possible_types: List[str]) -> str:
281 |         possible_types = list(set(possible_types))  # remove duplicates
282 |         self.debug("[choose_type_to_generate]", possible_types)
283 |         if len(possible_types) < 1:
284 |             raise ValueError(f"Union type must not be empty")
285 |         elif len(possible_types) == 1:
286 |             return possible_types[0]
287 | 
288 |         prompt = self.get_prompt()
289 |         input_tensor = self.tokenizer.encode(prompt, return_tensors="pt")
290 |         output = self.model.forward(input_tensor.to(self.model.device))
291 |         logits = output.logits[0, -1]
292 | 
293 |         max_type = None
294 |         max_logit = -float("inf")
295 |         for possible_type in possible_types:
296 |             try:
297 |                 prefix_tokens = self.type_prefix_tokens[possible_type]
298 |             except KeyError:
299 |                 raise ValueError(f"Unsupported schema type: {possible_type}")
300 |             max_type_logit = logits[prefix_tokens].max()
301 |             if max_type_logit > max_logit:
302 |                 max_type = possible_type
303 |                 max_logit = max_type_logit
304 | 
305 |         if max_type is None:
306 |             raise Exception("Unable to find best type to generate for union type")
307 |         self.debug("[choose_type_to_generate]", max_type)
308 |         return max_type
309 | 
310 |     def generate_value(
311 |         self,
312 |         schema: Dict[str, Any],
313 |         obj: Union[Dict[str, Any], List[Any]],
314 |         key: Union[str, None] = None,
315 |     ) -> Any:
316 |         schema_type = schema["type"]
317 |         if isinstance(schema_type, list):
318 |             if key:
319 |                 obj[key] = self.generation_marker
320 |             else:
321 |                 obj.append(self.generation_marker)
322 |             schema_type = self.choose_type_to_generate(schema_type)
323 |         if schema_type == "number":
324 |             if key:
325 |                 obj[key] = self.generation_marker
326 |             else:
327 |                 obj.append(self.generation_marker)
328 |             return self.generate_number()
329 |         elif schema_type == "integer":
330 |             if key:
331 |                 obj[key] = self.generation_marker
332 |             else:
333 |                 obj.append(self.generation_marker)
334 |             return self.generate_integer()
335 |         elif schema_type == "boolean":
336 |             if key:
337 |                 obj[key] = self.generation_marker
338 |             else:
339 |                 obj.append(self.generation_marker)
340 |             return self.generate_boolean()
341 |         elif schema_type == "string":
342 |             if key:
343 |                 obj[key] = self.generation_marker
344 |             else:
345 |                 obj.append(self.generation_marker)
346 |             return self.generate_string(
347 |                 schema["maxLength"] if "maxLength" in schema else None
348 |             )
349 |         elif schema_type == "p_enum":
350 |             if key:
351 |                 obj[key] = self.generation_marker
352 |             else:
353 |                 obj.append(self.generation_marker)
354 |             return self.generate_p_enum(schema["values"], round=schema.get("round", 3))
355 |         elif schema_type == "p_integer":
356 |             if key:
357 |                 obj[key] = self.generation_marker
358 |             else:
359 |                 obj.append(self.generation_marker)
360 |             return self.generate_p_integer(
361 |                 schema["minimum"], schema["maximum"], round=schema.get("round", 3)
362 |             )
363 |         elif schema_type == "enum":
364 |             if key:
365 |                 obj[key] = self.generation_marker
366 |             else:
367 |                 obj.append(self.generation_marker)
368 |             return self.generate_enum(set(schema["values"]))
369 |         elif schema_type == "array":
370 |             new_array = []
371 |             obj[key] = new_array
372 |             return self.generate_array(schema["items"], new_array)
373 |         elif schema_type == "object":
374 |             new_obj = {}
375 |             if key:
376 |                 obj[key] = new_obj
377 |             else:
378 |                 obj.append(new_obj)
379 |             return self.generate_object(schema["properties"], new_obj)
380 |         elif schema_type == "null":
381 |             return None
382 |         else:
383 |             raise ValueError(f"Unsupported schema type: {schema_type}")
384 | 
385 |     def generate_array(self, item_schema: Dict[str, Any], obj: Dict[str, Any]) -> list:
386 |         for _ in range(self.max_array_length):
387 |             # forces array to have at least one element
388 |             element = self.generate_value(item_schema, obj)
389 |             obj[-1] = element
390 | 
391 |             obj.append(self.generation_marker)
392 |             input_prompt = self.get_prompt()
393 |             obj.pop()
394 |             input_tensor = self.tokenizer.encode(input_prompt, return_tensors="pt")
395 |             output = self.model.forward(input_tensor.to(self.model.device))
396 |             logits = output.logits[0, -1]
397 | 
398 |             top_indices = logits.topk(30).indices
399 |             sorted_token_ids = top_indices[logits[top_indices].argsort(descending=True)]
400 | 
401 |             found_comma = False
402 |             found_close_bracket = False
403 | 
404 |             for token_id in sorted_token_ids:
405 |                 decoded_token = self.tokenizer.decode(
406 |                     token_id, skip_special_tokens=True
407 |                 )
408 |                 if "," in decoded_token:
409 |                     found_comma = True
410 |                     break
411 |                 if "]" in decoded_token:
412 |                     found_close_bracket = True
413 |                     break
414 | 
415 |             if found_close_bracket or not found_comma:
416 |                 break
417 | 
418 |         return obj
419 | 
420 |     def get_prompt(self):
421 |         template = """{prompt}\nOutput result in the following JSON schema format:\n```json{schema}```\nResult: ```json\n{progress}"""
422 |         # TODO: collapse p_X schema types into X to not confuse the model
423 |         value = self.value
424 | 
425 |         progress = json.dumps(value)
426 |         gen_marker_index = progress.find(f'"{self.generation_marker}"')
427 |         if gen_marker_index != -1:
428 |             progress = progress[:gen_marker_index]
429 |         else:
430 |             raise ValueError("Failed to find generation marker")
431 | 
432 |         prompt = template.format(
433 |             prompt=self.prompt,
434 |             schema=json.dumps(self.json_schema),
435 |             progress=progress,
436 |         )
437 | 
438 |         return prompt
439 | 
440 |     def __call__(self) -> Dict[str, Any]:
441 |         self.value = {}
442 |         generated_data = self.generate_object(
443 |             self.json_schema["properties"], self.value
444 |         )
445 |         return generated_data
446 | 


--------------------------------------------------------------------------------
/prob_jsonformer/prob_choice_tree.py:
--------------------------------------------------------------------------------
 1 | from jaxtyping import Float, Int
 2 | import torch
 3 | from torch.nn import functional as F
 4 | from torch import Tensor
 5 | from typing import List, Callable, Tuple, Dict, Optional
 6 | import pandas as pd
 7 | from transformers import AutoModelForCausalLM, AutoTokenizer
 8 | import math
 9 | 
10 | 
11 | def round_to_nsf(num, nsf):
12 |     if num != 0:
13 |         return round(num, -int(math.floor(math.log10(abs(num))) + 1 - nsf))
14 |     else:
15 |         return 0  # Can't take the log of 0
16 | 
17 | 
18 | def get_valid_next_choices(choices_tokens, current_tokens):
19 |     next_choices = []
20 |     for choice_tokens in choices_tokens:
21 |         # if we have some more slots left
22 |         if len(current_tokens) < len(choice_tokens):
23 |             # see if current_tokens matches
24 |             if (choice_tokens[: len(current_tokens)] == current_tokens).all():
25 |                 c = choice_tokens[len(current_tokens)].item()
26 |                 next_choices.append(c)
27 | 
28 |     next_choices = list(set(next_choices))
29 |     return torch.LongTensor(next_choices)
30 | 
31 | 
32 | def _prob_choice_tree(
33 |     model: AutoModelForCausalLM,
34 |     tokenizer: AutoTokenizer,
35 |     input_ids: Int[Tensor, "seq"],
36 |     choices_tokens: List[Int[Tensor, "seq"]],
37 |     choice: Optional[Int[Tensor, ""]] = None,
38 |     prob: float = 1,
39 |     current_tokens: Int[Tensor, "seq"] = torch.LongTensor([]),
40 | ):
41 |     if choice is not None:
42 |         c = choice[None].to(current_tokens.device)
43 |         current_tokens = torch.cat([current_tokens, c], dim=-1)
44 |         c = choice[None].to(input_ids.device)
45 |         input_ids = torch.cat([input_ids, c], dim=-1)
46 | 
47 |     next_choices = get_valid_next_choices(choices_tokens, current_tokens)
48 |     if len(next_choices) == 0:
49 |         s = tokenizer.decode(current_tokens, skip_special_tokens=True)
50 |         r = dict(prob=prob, choice=s)
51 |         yield r
52 |     else:
53 |         o = model(input_ids[None])
54 |         logits_constrained = o.logits[0, -1][next_choices]
55 |         probs = F.softmax(logits_constrained, dim=-1)
56 |         for i in range(len(next_choices)):
57 |             next_choice = next_choices[i]
58 |             next_prob = prob * probs[i].item()
59 |             yield from prob_choice_tree(
60 |                 model=model,
61 |                 tokenizer=tokenizer,
62 |                 choices_tokens=choices_tokens,
63 |                 input_ids=input_ids,
64 |                 choice=next_choice,
65 |                 prob=next_prob,
66 |                 current_tokens=current_tokens,
67 |             )
68 | 
69 | 
70 | def prob_choice_tree(
71 |     *args,
72 |     sort: bool = True,
73 |     round=3,
74 |     **kwargs,
75 | ):
76 |     choice_json = list(
77 |         _prob_choice_tree(
78 |             *args,
79 |             **kwargs,
80 |         )
81 |     )
82 |     # order by probability
83 |     if sort:
84 |         choice_json = sorted(choice_json, key=lambda x: -x["prob"])
85 | 
86 |     # round probabilities
87 |     for c in choice_json:
88 |         c["prob"] = round_to_nsf(c["prob"], round)
89 |     return choice_json
90 | 


--------------------------------------------------------------------------------
/prob_jsonformer/type_prefixes.py:
--------------------------------------------------------------------------------
 1 | from transformers import PreTrainedTokenizer
 2 | from typing import Dict, List
 3 | import re
 4 | 
 5 | def is_number_prefix(s: str) -> bool:
 6 |     return re.match(r"^[\-\d]+\.?[\d]*$", s)
 7 | 
 8 | def is_boolean_prefix(s: str) -> bool:
 9 |     return 'true'.startswith(s) or 'false'.startswith(s)
10 | 
11 | def is_null_prefix(s: str) -> bool:
12 |     return 'null'.startswith(s)
13 | 
14 | def is_string_prefix(s: str) -> bool:
15 |     return re.match(r'^"[^"]*"?$', s)
16 | 
17 | def is_array_prefix(s: str) -> bool:
18 |     return re.match(r'^\[["\-\d\[{]*$', s)
19 | 
20 | def is_object_prefix(s: str) -> bool:
21 |     return re.match(r'^\{"?$', s)
22 | 
23 | def get_prefix_tokens_for_types(tokenizer: PreTrainedTokenizer) -> Dict[str, List[str]]:
24 |     vocab = tokenizer.vocab.items()
25 |     return {
26 |         "number": [v for k, v in vocab if is_number_prefix(k)],
27 |         "boolean": [v for k, v in vocab if is_boolean_prefix(k)],
28 |         "null": [v for k, v in vocab if is_null_prefix(k)],
29 |         "string": [v for k, v in vocab if is_string_prefix(k)],
30 |         "array": [v for k, v in vocab if is_array_prefix(k)],
31 |         "object": [v for k, v in vocab if is_object_prefix(k)],
32 |     }
33 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "prob_jsonformer"
 3 | version = "0.12.0"
 4 | description = ""
 5 | authors = ["1rgs <rgsduke@gmail.com>"]
 6 | readme = "README.md"
 7 | 
 8 | [tool.poetry.dependencies]
 9 | python = "^3.9,<4.0"
10 | termcolor = "^2.3.0"
11 | jaxtyping = "^0.2.28"
12 | 
13 | [tool.poetry.group.dev.dependencies]
14 | pandas = "^2.2.2"
15 | ipykernel = "^6.22.0"
16 | torch = "^2.0.0"
17 | accelerate = "^0.18.0"
18 | bitsandbytes = "^0.38.1"
19 | transformers = "^4.49"
20 | 
21 | 
22 | [build-system]
23 | requires = ["poetry-core"]
24 | build-backend = "poetry.core.masonry.api"
25 | 
26 | [virtualenvs]
27 | create = true
28 | in-project = true
29 | 


--------------------------------------------------------------------------------