├── .github
    └── workflows
    │   ├── deploy.yaml
    │   └── test.yaml.off
├── .gitignore
├── 00_xml.ipynb
├── 01_funccall.ipynb
├── 02_shell.ipynb
├── 03_download.ipynb
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── _quarto.yml
├── index.ipynb
├── nbdev.yml
├── pyproject.toml
├── samples
    ├── sample_core.py
    └── sample_styles.css
├── settings.ini
├── setup.py
├── styles.css
└── toolslm
    ├── __init__.py
    ├── _modidx.py
    ├── download.py
    ├── funccall.py
    ├── md_hier.py
    ├── shell.py
    └── xml.py


/.github/workflows/deploy.yaml:
--------------------------------------------------------------------------------
 1 | name: Deploy to GitHub Pages
 2 | 
 3 | permissions:
 4 |   contents: write
 5 |   pages: write
 6 | 
 7 | on:
 8 |   push:
 9 |     branches: [ "main", "master" ]
10 |   workflow_dispatch:
11 | jobs:
12 |   deploy:
13 |     runs-on: ubuntu-latest
14 |     steps: [uses: fastai/workflows/quarto-ghp@master]
15 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yaml.off:
--------------------------------------------------------------------------------
1 | name: CI
2 | on:  [workflow_dispatch, pull_request, push]
3 | 
4 | jobs:
5 |   test:
6 |     runs-on: ubuntu-latest
7 |     steps: [uses: fastai/workflows/nbdev-ci@master]
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .gitattributes
  2 | _proc/
  3 | index_files/
  4 | sidebar.yml
  5 | Gemfile.lock
  6 | token
  7 | _docs/
  8 | conda/
  9 | .last_checked
 10 | .gitconfig
 11 | *.bak
 12 | *.log
 13 | *~
 14 | ~*
 15 | _tmp*
 16 | tmp*
 17 | tags
 18 | 
 19 | # Byte-compiled / optimized / DLL files
 20 | __pycache__/
 21 | *.py[cod]
 22 | *$py.class
 23 | 
 24 | # C extensions
 25 | *.so
 26 | 
 27 | # Distribution / packaging
 28 | .Python
 29 | env/
 30 | build/
 31 | develop-eggs/
 32 | dist/
 33 | downloads/
 34 | eggs/
 35 | .eggs/
 36 | lib/
 37 | lib64/
 38 | parts/
 39 | sdist/
 40 | var/
 41 | wheels/
 42 | *.egg-info/
 43 | .installed.cfg
 44 | *.egg
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | .hypothesis/
 66 | 
 67 | # Translations
 68 | *.mo
 69 | *.pot
 70 | 
 71 | # Django stuff:
 72 | *.log
 73 | local_settings.py
 74 | 
 75 | # Flask stuff:
 76 | instance/
 77 | .webassets-cache
 78 | 
 79 | # Scrapy stuff:
 80 | .scrapy
 81 | 
 82 | # Sphinx documentation
 83 | docs/_build/
 84 | 
 85 | # PyBuilder
 86 | target/
 87 | 
 88 | # Jupyter Notebook
 89 | .ipynb_checkpoints
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # celery beat schedule file
 95 | celerybeat-schedule
 96 | 
 97 | # SageMath parsed files
 98 | *.sage.py
 99 | 
100 | # dotenv
101 | .env
102 | 
103 | # virtualenv
104 | .venv
105 | venv/
106 | ENV/
107 | 
108 | # Spyder project settings
109 | .spyderproject
110 | .spyproject
111 | 
112 | # Rope project settings
113 | .ropeproject
114 | 
115 | # mkdocs documentation
116 | /site
117 | 
118 | # mypy
119 | .mypy_cache/
120 | 
121 | .vscode
122 | *.swp
123 | 
124 | # osx generated files
125 | .DS_Store
126 | .DS_Store?
127 | .Trashes
128 | ehthumbs.db
129 | Thumbs.db
130 | .idea
131 | 
132 | # pytest
133 | .pytest_cache
134 | 
135 | # tools/trust-doc-nbs
136 | docs_src/.last_checked
137 | 
138 | # symlinks to fastai
139 | docs_src/fastai
140 | tools/fastai
141 | 
142 | # link checker
143 | checklink/cookies.txt
144 | 
145 | # .gitconfig is now autogenerated
146 | .gitconfig
147 | 
148 | _docs
149 | 
150 | /.quarto/
151 | 


--------------------------------------------------------------------------------
/00_xml.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "efe78920",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "#|default_exp xml"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "3d773712-12fe-440e-891f-36f59666dfde",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# xml source"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "id": "ff6f6471-8061-4fdd-85a1-25fdc27c5cf3",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "## Setup"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "id": "033c76fd",
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "#| export\n",
 37 |     "import hashlib,xml.etree.ElementTree as ET\n",
 38 |     "from collections import namedtuple\n",
 39 |     "\n",
 40 |     "from fastcore.utils import *\n",
 41 |     "from fastcore.meta import delegates\n",
 42 |     "from fastcore.xtras import hl_md\n",
 43 |     "from fastcore.xml import to_xml, Document, Documents, Document_content, Src\n",
 44 |     "from fastcore.script import call_parse\n",
 45 |     "try: from IPython import display\n",
 46 |     "except: display=None"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "id": "2795f9fc",
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "#| exports\n",
 57 |     "def json_to_xml(d:dict, # JSON dictionary to convert\n",
 58 |     "                rnm:str # Root name\n",
 59 |     "               )->str:\n",
 60 |     "    \"Convert `d` to XML.\"\n",
 61 |     "    root = ET.Element(rnm)\n",
 62 |     "    def build_xml(data, parent):\n",
 63 |     "        if isinstance(data, dict):\n",
 64 |     "            for key, value in data.items(): build_xml(value, ET.SubElement(parent, key))\n",
 65 |     "        elif isinstance(data, list):\n",
 66 |     "            for item in data: build_xml(item, ET.SubElement(parent, 'item'))\n",
 67 |     "        else: parent.text = str(data)\n",
 68 |     "    build_xml(d, root)\n",
 69 |     "    ET.indent(root)\n",
 70 |     "    return ET.tostring(root, encoding='unicode')"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "id": "140a35a2",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "JSON doesn't map as nicely to XML as the data structure used in `fastcore.xml`, but for simple XML trees it can be convenient -- for example:"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "id": "005a5be4",
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "text/markdown": [
 90 |        "```xml\n",
 91 |        "<person>\n",
 92 |        "  <surname>Howard</surname>\n",
 93 |        "  <firstnames>\n",
 94 |        "    <item>Jeremy</item>\n",
 95 |        "    <item>Peter</item>\n",
 96 |        "  </firstnames>\n",
 97 |        "  <address>\n",
 98 |        "    <state>Queensland</state>\n",
 99 |        "    <country>Australia</country>\n",
100 |        "  </address>\n",
101 |        "</person>\n",
102 |        "```"
103 |       ],
104 |       "text/plain": [
105 |        "<IPython.core.display.Markdown object>"
106 |       ]
107 |      },
108 |      "execution_count": null,
109 |      "metadata": {},
110 |      "output_type": "execute_result"
111 |     }
112 |    ],
113 |    "source": [
114 |     "a = dict(surname='Howard', firstnames=['Jeremy','Peter'],\n",
115 |     "         address=dict(state='Queensland',country='Australia'))\n",
116 |     "hl_md(json_to_xml(a, 'person'))"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "markdown",
121 |    "id": "7788c48c",
122 |    "metadata": {},
123 |    "source": [
124 |     "## Including documents"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "id": "479be4c9",
130 |    "metadata": {},
131 |    "source": [
132 |     "According [to Anthropic](https://docs.anthropic.com/claude/docs/long-context-window-tips), \"*it's essential to structure your prompts in a way that clearly separates the input data from the instructions*\". They recommend using something like the following:\n",
133 |     "\n",
134 |     "```xml\n",
135 |     "Here are some documents for you to reference for your task:\n",
136 |     "    \n",
137 |     "<documents>\n",
138 |     "<document index=\"1\">\n",
139 |     "<source>\n",
140 |     "(URL, file name, hash, etc)\n",
141 |     "</source>\n",
142 |     "<document_content>\n",
143 |     "(the text content)\n",
144 |     "</document_content>\n",
145 |     "</document>\n",
146 |     "</documents>\n",
147 |     "```\n",
148 |     "\n",
149 |     "We will create some small helper functions to make it easier to generate context in this format, although we're use `<src>` instead of `<source>` to avoid conflict with that HTML tag. Although it's based on Anthropic's recommendation, it's likely to work well with other models too."
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "id": "a01dc320",
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "#| exports\n",
160 |     "doctype = namedtuple('doctype', ['src', 'content'])"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "id": "6620a123",
166 |    "metadata": {},
167 |    "source": [
168 |     "We'll use `doctype` to store our pairs."
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "id": "ce853491",
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "#| exports\n",
179 |     "def _add_nls(s):\n",
180 |     "    \"Add newlines to start and end of `s` if missing\"\n",
181 |     "    if not s: return s\n",
182 |     "    if s[ 0]!='\\n': s = '\\n'+s\n",
183 |     "    if s[-1]!='\\n': s = s+'\\n'\n",
184 |     "    return s"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "id": "026d3b06",
190 |    "metadata": {},
191 |    "source": [
192 |     "Since Anthropic's example shows newlines before and after each tag, we'll do the same."
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "id": "26fddbc3",
199 |    "metadata": {},
200 |    "outputs": [
201 |     {
202 |      "data": {
203 |       "text/plain": [
204 |        "'<src>a</src>'"
205 |       ]
206 |      },
207 |      "execution_count": null,
208 |      "metadata": {},
209 |      "output_type": "execute_result"
210 |     }
211 |    ],
212 |    "source": [
213 |     "to_xml(Src('a'))"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "id": "1bac81ce",
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "data": {
224 |       "text/plain": [
225 |        "'<document>a</document>'"
226 |       ]
227 |      },
228 |      "execution_count": null,
229 |      "metadata": {},
230 |      "output_type": "execute_result"
231 |     }
232 |    ],
233 |    "source": [
234 |     "to_xml(Document('a'))"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": null,
240 |    "id": "40a7e0ba",
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/plain": [
246 |        "'<documents>a</documents>'"
247 |       ]
248 |      },
249 |      "execution_count": null,
250 |      "metadata": {},
251 |      "output_type": "execute_result"
252 |     }
253 |    ],
254 |    "source": [
255 |     "to_xml(Documents('a'))"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "id": "932e8858",
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": [
265 |     "#| exports\n",
266 |     "def mk_doctype(content:str,  # The document content\n",
267 |     "           src:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided\n",
268 |     "          ) -> namedtuple:\n",
269 |     "    \"Create a `doctype` named tuple\"\n",
270 |     "    if src is None: src = hashlib.md5(content.encode()).hexdigest()[:8]\n",
271 |     "    return doctype(_add_nls(str(src).strip()), _add_nls(content.strip()))"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "markdown",
276 |    "id": "8800921b",
277 |    "metadata": {},
278 |    "source": [
279 |     "This is a convenience wrapper to ensure that a `doctype` has the needed information in the right format."
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "id": "14f9e185",
286 |    "metadata": {},
287 |    "outputs": [
288 |     {
289 |      "data": {
290 |       "text/plain": [
291 |        "doctype(src='\\n47e19350\\n', content='\\nThis is a \"sample\"\\n')"
292 |       ]
293 |      },
294 |      "execution_count": null,
295 |      "metadata": {},
296 |      "output_type": "execute_result"
297 |     }
298 |    ],
299 |    "source": [
300 |     "doc = 'This is a \"sample\"'\n",
301 |     "mk_doctype(doc)"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": null,
307 |    "id": "15e454db",
308 |    "metadata": {},
309 |    "outputs": [],
310 |    "source": [
311 |     "#| exports\n",
312 |     "def mk_doc(index:int,  # The document index\n",
313 |     "           content:str,  # The document content\n",
314 |     "           src:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided\n",
315 |     "           **kwargs\n",
316 |     "          ) -> tuple:\n",
317 |     "    \"Create an `ft` format tuple for a single doc in Anthropic's recommended format\"\n",
318 |     "    dt = mk_doctype(content, src)\n",
319 |     "    content = Document_content(NotStr(dt.content))\n",
320 |     "    src = Src(NotStr(dt.src))\n",
321 |     "    return Document(src, content, index=index, **kwargs)"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "markdown",
326 |    "id": "a8b6ac26",
327 |    "metadata": {},
328 |    "source": [
329 |     "We can now generate XML for one document in the suggested format:"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": null,
335 |    "id": "e7ed5a9a",
336 |    "metadata": {},
337 |    "outputs": [
338 |     {
339 |      "data": {
340 |       "text/markdown": [
341 |        "```html\n",
342 |        "<document index=\"1\" title=\"test\"><src>\n",
343 |        "47e19350\n",
344 |        "</src><document-content>\n",
345 |        "This is a \"sample\"\n",
346 |        "</document-content></document>\n",
347 |        "```"
348 |       ],
349 |       "text/plain": [
350 |        "document((src(('\\n47e19350\\n',),{}), document-content(('\\nThis is a \"sample\"\\n',),{})),{'index': 1, 'title': 'test'})"
351 |       ]
352 |      },
353 |      "execution_count": null,
354 |      "metadata": {},
355 |      "output_type": "execute_result"
356 |     }
357 |    ],
358 |    "source": [
359 |     "mk_doc(1, doc, title=\"test\")"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": null,
365 |    "id": "ba5ebfab",
366 |    "metadata": {},
367 |    "outputs": [],
368 |    "source": [
369 |     "#| exports\n",
370 |     "def docs_xml(docs:list[str],  # The content of each document\n",
371 |     "             srcs:Optional[list]=None,  # URLs, filenames, etc; each one defaults to `md5(content)` if not provided\n",
372 |     "             prefix:bool=True, # Include Anthropic's suggested prose intro?\n",
373 |     "             details:Optional[list]=None # Optional list of dicts with additional attrs for each doc\n",
374 |     "            )->str:\n",
375 |     "    \"Create an XML string containing `docs` in Anthropic's recommended format\"\n",
376 |     "    pre = 'Here are some documents for you to reference for your task:\\n\\n' if prefix else ''\n",
377 |     "    if srcs is None: srcs = [None]*len(docs)\n",
378 |     "    if details is None: details = [{}]*len(docs)\n",
379 |     "    docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))\n",
380 |     "    return pre + to_xml(Documents(docs))"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "id": "85004124",
386 |    "metadata": {},
387 |    "source": [
388 |     "Putting it all together, we have our final XML format:"
389 |    ]
390 |   },
391 |   {
392 |    "cell_type": "code",
393 |    "execution_count": null,
394 |    "id": "1dac60f6",
395 |    "metadata": {},
396 |    "outputs": [
397 |     {
398 |      "name": "stdout",
399 |      "output_type": "stream",
400 |      "text": [
401 |       "Here are some documents for you to reference for your task:\n",
402 |       "\n",
403 |       "<documents><document index=\"1\"><src>\n",
404 |       "47e19350\n",
405 |       "</src><document-content>\n",
406 |       "This is a \"sample\"\n",
407 |       "</document-content></document><document index=\"2\"><src>\n",
408 |       "doc.txt\n",
409 |       "</src><document-content>\n",
410 |       "And another one\n",
411 |       "</document-content></document></documents>\n"
412 |      ]
413 |     }
414 |    ],
415 |    "source": [
416 |     "docs = [doc, 'And another one']\n",
417 |     "srcs = [None, 'doc.txt']\n",
418 |     "print(docs_xml(docs, srcs))"
419 |    ]
420 |   },
421 |   {
422 |    "cell_type": "markdown",
423 |    "id": "2a8a7a9a",
424 |    "metadata": {},
425 |    "source": [
426 |     "## Context creation"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "markdown",
431 |    "id": "cd06b2dc",
432 |    "metadata": {},
433 |    "source": [
434 |     "Now that we can generate Anthropic's XML format, let's make it easy for a few common cases."
435 |    ]
436 |   },
437 |   {
438 |    "cell_type": "markdown",
439 |    "id": "65317fc6",
440 |    "metadata": {},
441 |    "source": [
442 |     "### File list to context"
443 |    ]
444 |   },
445 |   {
446 |    "cell_type": "markdown",
447 |    "id": "3778e8ed",
448 |    "metadata": {},
449 |    "source": [
450 |     "For generating XML context from files, we'll just read them as text and use the file names as `src`."
451 |    ]
452 |   },
453 |   {
454 |    "cell_type": "code",
455 |    "execution_count": null,
456 |    "id": "0a168636",
457 |    "metadata": {},
458 |    "outputs": [],
459 |    "source": [
460 |     "#| exports\n",
461 |     "def files2ctx(\n",
462 |     "    fnames:list[Union[str,Path]], # List of file names to add to context\n",
463 |     "    prefix:bool=True # Include Anthropic's suggested prose intro?\n",
464 |     ")->str: # XML for LM context\n",
465 |     "    fnames = [Path(o) for o in fnames]\n",
466 |     "    contents = [o.read_text() for o in fnames]\n",
467 |     "    return docs_xml(contents, fnames, prefix=prefix)"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": null,
473 |    "id": "1bf73d36",
474 |    "metadata": {},
475 |    "outputs": [
476 |     {
477 |      "data": {
478 |       "text/markdown": [
479 |        "```xml\n",
480 |        "Here are some documents for you to reference for your task:\n",
481 |        "\n",
482 |        "<documents><document index=\"1\"><src>\n",
483 |        "samples/sample_core.py\n",
484 |        "</src><document-content>\n",
485 |        "import inspect\n",
486 |        "empty = inspect.Parameter.empty\n",
487 |        "models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'\n",
488 |        "</document-content></document><document index=\"2\"><src>\n",
489 |        "samples/sample_styles.css\n",
490 |        "</src><document-content>\n",
491 |        ".cell { margin-bottom: 1rem; }\n",
492 |        ".cell > .sourceCode { margin-bottom: 0; }\n",
493 |        ".cell-output > pre { margin-bottom: 0; }\n",
494 |        "</document-content></document></documents>\n",
495 |        "```"
496 |       ],
497 |       "text/plain": [
498 |        "<IPython.core.display.Markdown object>"
499 |       ]
500 |      },
501 |      "execution_count": null,
502 |      "metadata": {},
503 |      "output_type": "execute_result"
504 |     }
505 |    ],
506 |    "source": [
507 |     "fnames = ['samples/sample_core.py', 'samples/sample_styles.css']\n",
508 |     "hl_md(files2ctx(fnames))"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "markdown",
513 |    "id": "191ddb2b",
514 |    "metadata": {},
515 |    "source": [
516 |     "### Folder to context"
517 |    ]
518 |   },
519 |   {
520 |    "cell_type": "code",
521 |    "execution_count": null,
522 |    "id": "a0452a21",
523 |    "metadata": {},
524 |    "outputs": [],
525 |    "source": [
526 |     "#| exports\n",
527 |     "@delegates(globtastic)\n",
528 |     "def folder2ctx(\n",
529 |     "    folder:Union[str,Path], # Folder name containing files to add to context\n",
530 |     "    prefix:bool=True, # Include Anthropic's suggested prose intro?\n",
531 |     "    **kwargs # Passed to `globtastic`\n",
532 |     ")->str: # XML for Claude context\n",
533 |     "    fnames = globtastic(folder, **kwargs)\n",
534 |     "    return files2ctx(fnames, prefix=prefix)"
535 |    ]
536 |   },
537 |   {
538 |    "cell_type": "code",
539 |    "execution_count": null,
540 |    "id": "efd52392",
541 |    "metadata": {},
542 |    "outputs": [
543 |     {
544 |      "name": "stdout",
545 |      "output_type": "stream",
546 |      "text": [
547 |       "<documents><document index=\"1\"><src>\n",
548 |       "samples/sample_core.py\n",
549 |       "</src><document-content>\n",
550 |       "import inspect\n",
551 |       "empty = inspect.Parameter.empty\n",
552 |       "models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'\n",
553 |       "</document-content></document></documents>\n"
554 |      ]
555 |     }
556 |    ],
557 |    "source": [
558 |     "print(folder2ctx('samples', prefix=False, file_glob='*.py'))"
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "code",
563 |    "execution_count": null,
564 |    "id": "0cd4bbeb-b07f-447d-abe8-2b4190d4aa63",
565 |    "metadata": {},
566 |    "outputs": [],
567 |    "source": [
568 |     "#| exports\n",
569 |     "#| hide\n",
570 |     "@call_parse\n",
571 |     "@delegates(folder2ctx)\n",
572 |     "def folder2ctx_cli(\n",
573 |     "    folder:str, # Folder name containing files to add to context\n",
574 |     "    **kwargs # Passed to `folder2ctx`\n",
575 |     ")->str: # XML for Claude context\n",
576 |     "    print(folder2ctx(folder, **kwargs))"
577 |    ]
578 |   },
579 |   {
580 |    "cell_type": "markdown",
581 |    "id": "95bc490c-bf9d-4146-a729-97f7221559af",
582 |    "metadata": {},
583 |    "source": [
584 |     ":::{.callout-tip}\n",
585 |     "\n",
586 |     "After you install `toolslm`, `folder2ctx` becomes available from the command line.  You can see how to use it with the following command:\n",
587 |     "\n",
588 |     "```bash\n",
589 |     "folder2ctx -h\n",
590 |     "```\n",
591 |     ":::"
592 |    ]
593 |   },
594 |   {
595 |    "cell_type": "markdown",
596 |    "id": "94ec4289",
597 |    "metadata": {},
598 |    "source": [
599 |     "## Export -"
600 |    ]
601 |   },
602 |   {
603 |    "cell_type": "code",
604 |    "execution_count": null,
605 |    "id": "1e9ee5c1",
606 |    "metadata": {},
607 |    "outputs": [],
608 |    "source": [
609 |     "#|hide\n",
610 |     "#|eval: false\n",
611 |     "from nbdev.doclinks import nbdev_export\n",
612 |     "nbdev_export()"
613 |    ]
614 |   },
615 |   {
616 |    "cell_type": "code",
617 |    "execution_count": null,
618 |    "id": "5d06a6ce",
619 |    "metadata": {},
620 |    "outputs": [],
621 |    "source": []
622 |   }
623 |  ],
624 |  "metadata": {
625 |   "kernelspec": {
626 |    "display_name": "python3",
627 |    "language": "python",
628 |    "name": "python3"
629 |   }
630 |  },
631 |  "nbformat": 4,
632 |  "nbformat_minor": 5
633 | }
634 | 


--------------------------------------------------------------------------------
/01_funccall.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": null,
   6 |    "id": "efe78920",
   7 |    "metadata": {},
   8 |    "outputs": [],
   9 |    "source": [
  10 |     "#|default_exp funccall"
  11 |    ]
  12 |   },
  13 |   {
  14 |    "cell_type": "markdown",
  15 |    "id": "3d773712-12fe-440e-891f-36f59666dfde",
  16 |    "metadata": {},
  17 |    "source": [
  18 |     "# funccall source"
  19 |    ]
  20 |   },
  21 |   {
  22 |    "cell_type": "code",
  23 |    "execution_count": null,
  24 |    "id": "e5ad6b86",
  25 |    "metadata": {},
  26 |    "outputs": [],
  27 |    "source": [
  28 |     "#| exports\n",
  29 |     "import inspect\n",
  30 |     "from collections import abc\n",
  31 |     "from fastcore.utils import *\n",
  32 |     "from fastcore.docments import docments\n",
  33 |     "from typing import get_origin, get_args, Dict, List, Optional, Tuple, Union\n",
  34 |     "from types import UnionType"
  35 |    ]
  36 |   },
  37 |   {
  38 |    "cell_type": "code",
  39 |    "execution_count": null,
  40 |    "id": "aec123ab",
  41 |    "metadata": {},
  42 |    "outputs": [],
  43 |    "source": [
  44 |     "#|hide\n",
  45 |     "from fastcore.test import *"
  46 |    ]
  47 |   },
  48 |   {
  49 |    "cell_type": "code",
  50 |    "execution_count": null,
  51 |    "id": "a9f43047",
  52 |    "metadata": {},
  53 |    "outputs": [],
  54 |    "source": [
  55 |     "#| export\n",
  56 |     "empty = inspect.Parameter.empty"
  57 |    ]
  58 |   },
  59 |   {
  60 |    "cell_type": "markdown",
  61 |    "id": "1a7cdbc6",
  62 |    "metadata": {},
  63 |    "source": [
  64 |     "## Function calling"
  65 |    ]
  66 |   },
  67 |   {
  68 |    "cell_type": "markdown",
  69 |    "id": "7ec35c95",
  70 |    "metadata": {},
  71 |    "source": [
  72 |     "Many LLMs do function calling (aka tool use) by taking advantage of JSON schema.\n",
  73 |     "\n",
  74 |     "We'll use [docments](https://fastcore.fast.ai/docments.html) to make getting JSON schema from Python functions as ergonomic as possible. Each parameter (and the return value) should have a type, and a docments comment with the description of what it is. Here's an example:"
  75 |    ]
  76 |   },
  77 |   {
  78 |    "cell_type": "code",
  79 |    "execution_count": null,
  80 |    "id": "4a017af1",
  81 |    "metadata": {},
  82 |    "outputs": [],
  83 |    "source": [
  84 |     "def silly_sum(\n",
  85 |     "    a:int, # First thing to sum\n",
  86 |     "    b:int=1, # Second thing to sum\n",
  87 |     "    c:list[int]=None, # A pointless argument\n",
  88 |     ") -> int: # The sum of the inputs\n",
  89 |     "    \"Adds a + b.\"\n",
  90 |     "    return a + b"
  91 |    ]
  92 |   },
  93 |   {
  94 |    "cell_type": "markdown",
  95 |    "id": "1a3ff443",
  96 |    "metadata": {},
  97 |    "source": [
  98 |     "This is what `docments` makes of that:"
  99 |    ]
 100 |   },
 101 |   {
 102 |    "cell_type": "code",
 103 |    "execution_count": null,
 104 |    "id": "b3f2ebcf",
 105 |    "metadata": {},
 106 |    "outputs": [
 107 |     {
 108 |      "data": {
 109 |       "text/markdown": [
 110 |        "```json\n",
 111 |        "{ 'a': { 'anno': <class 'int'>,\n",
 112 |        "         'default': <class 'inspect._empty'>,\n",
 113 |        "         'docment': 'First thing to sum'},\n",
 114 |        "  'b': {'anno': <class 'int'>, 'default': 1, 'docment': 'Second thing to sum'},\n",
 115 |        "  'c': {'anno': list[int], 'default': None, 'docment': 'A pointless argument'},\n",
 116 |        "  'return': { 'anno': <class 'int'>,\n",
 117 |        "              'default': <class 'inspect._empty'>,\n",
 118 |        "              'docment': 'The sum of the inputs'}}\n",
 119 |        "```"
 120 |       ],
 121 |       "text/plain": [
 122 |        "{'a': {'docment': 'First thing to sum',\n",
 123 |        "  'anno': int,\n",
 124 |        "  'default': inspect._empty},\n",
 125 |        " 'b': {'docment': 'Second thing to sum', 'anno': int, 'default': 1},\n",
 126 |        " 'c': {'docment': 'A pointless argument', 'anno': list[int], 'default': None},\n",
 127 |        " 'return': {'docment': 'The sum of the inputs',\n",
 128 |        "  'anno': int,\n",
 129 |        "  'default': inspect._empty}}"
 130 |       ]
 131 |      },
 132 |      "execution_count": null,
 133 |      "metadata": {},
 134 |      "output_type": "execute_result"
 135 |     }
 136 |    ],
 137 |    "source": [
 138 |     "d = docments(silly_sum, full=True)\n",
 139 |     "d"
 140 |    ]
 141 |   },
 142 |   {
 143 |    "cell_type": "markdown",
 144 |    "id": "745e44ea",
 145 |    "metadata": {},
 146 |    "source": [
 147 |     "Note that this is an [AttrDict](https://fastcore.fast.ai/basics.html#attrdict) so we can treat it like an object, *or* a dict:"
 148 |    ]
 149 |   },
 150 |   {
 151 |    "cell_type": "code",
 152 |    "execution_count": null,
 153 |    "id": "35cb279d",
 154 |    "metadata": {},
 155 |    "outputs": [
 156 |     {
 157 |      "data": {
 158 |       "text/plain": [
 159 |        "('First thing to sum', int)"
 160 |       ]
 161 |      },
 162 |      "execution_count": null,
 163 |      "metadata": {},
 164 |      "output_type": "execute_result"
 165 |     }
 166 |    ],
 167 |    "source": [
 168 |     "d.a.docment, d['a']['anno']"
 169 |    ]
 170 |   },
 171 |   {
 172 |    "cell_type": "code",
 173 |    "execution_count": null,
 174 |    "id": "e7bf4025",
 175 |    "metadata": {},
 176 |    "outputs": [],
 177 |    "source": [
 178 |     "#| exports\n",
 179 |     "def _types(t:type)->tuple[str,Optional[str]]:\n",
 180 |     "    \"Tuple of json schema type name and (if appropriate) array item name.\"\n",
 181 |     "    if t is empty: raise TypeError('Missing type')\n",
 182 |     "    tmap = {int:\"integer\", float:\"number\", str:\"string\", bool:\"boolean\", list:\"array\", dict:\"object\"}\n",
 183 |     "    tmap.update({k.__name__: v for k, v in tmap.items()})\n",
 184 |     "    if getattr(t, '__origin__', None) in (list,tuple):\n",
 185 |     "        args = getattr(t, '__args__', None)\n",
 186 |     "        item_type = \"object\" if not args else tmap.get(t.__args__[0].__name__, \"object\")\n",
 187 |     "        return \"array\", item_type\n",
 188 |     "    # if t is a string like 'int', directly use the string as the key\n",
 189 |     "    elif isinstance(t, str): return tmap.get(t, \"object\"), None\n",
 190 |     "    # if t is the type itself and a container\n",
 191 |     "    elif get_origin(t): return tmap.get(get_origin(t).__name__, \"object\"), None\n",
 192 |     "    # if t is the type itself like int, use the __name__ representation as the key\n",
 193 |     "    else: return tmap.get(t.__name__, \"object\"), None"
 194 |    ]
 195 |   },
 196 |   {
 197 |    "cell_type": "markdown",
 198 |    "id": "edf73046",
 199 |    "metadata": {},
 200 |    "source": [
 201 |     "This internal function is needed to convert Python types into JSON schema types."
 202 |    ]
 203 |   },
 204 |   {
 205 |    "cell_type": "code",
 206 |    "execution_count": null,
 207 |    "id": "ecb7bc52",
 208 |    "metadata": {},
 209 |    "outputs": [
 210 |     {
 211 |      "data": {
 212 |       "text/plain": [
 213 |        "(('array', 'integer'), ('integer', None), ('integer', None))"
 214 |       ]
 215 |      },
 216 |      "execution_count": null,
 217 |      "metadata": {},
 218 |      "output_type": "execute_result"
 219 |     }
 220 |    ],
 221 |    "source": [
 222 |     "_types(list[int]), _types(int), _types('int')"
 223 |    ]
 224 |   },
 225 |   {
 226 |    "cell_type": "code",
 227 |    "execution_count": null,
 228 |    "id": "38b4650a",
 229 |    "metadata": {},
 230 |    "outputs": [
 231 |     {
 232 |      "data": {
 233 |       "text/plain": [
 234 |        "(('array', 'integer'), ('object', None), ('object', None), ('array', 'string'))"
 235 |       ]
 236 |      },
 237 |      "execution_count": null,
 238 |      "metadata": {},
 239 |      "output_type": "execute_result"
 240 |     }
 241 |    ],
 242 |    "source": [
 243 |     "_types(List[int]), _types(Optional[str]), _types(str | None), _types(Tuple[str, int])"
 244 |    ]
 245 |   },
 246 |   {
 247 |    "cell_type": "markdown",
 248 |    "id": "f4d0ac1e",
 249 |    "metadata": {},
 250 |    "source": [
 251 |     "Note the current behavior:\n",
 252 |     "\n",
 253 |     "- ignores all but the first argument for tuples\n",
 254 |     "- union types map to object which is a stand-in for arbitrary types\n",
 255 |     "\n",
 256 |     "These and other approximations may require further refinement in the future."
 257 |    ]
 258 |   },
 259 |   {
 260 |    "cell_type": "markdown",
 261 |    "id": "c0e3c940",
 262 |    "metadata": {},
 263 |    "source": [
 264 |     "Will also convert custom types to the `object` type."
 265 |    ]
 266 |   },
 267 |   {
 268 |    "cell_type": "code",
 269 |    "execution_count": null,
 270 |    "id": "9969fd00",
 271 |    "metadata": {},
 272 |    "outputs": [
 273 |     {
 274 |      "data": {
 275 |       "text/plain": [
 276 |        "(('array', 'object'), ('object', None))"
 277 |       ]
 278 |      },
 279 |      "execution_count": null,
 280 |      "metadata": {},
 281 |      "output_type": "execute_result"
 282 |     }
 283 |    ],
 284 |    "source": [
 285 |     "class Custom: a: int\n",
 286 |     "_types(list[Custom]), _types(Custom)"
 287 |    ]
 288 |   },
 289 |   {
 290 |    "cell_type": "code",
 291 |    "execution_count": null,
 292 |    "id": "4d5dc245",
 293 |    "metadata": {},
 294 |    "outputs": [],
 295 |    "source": [
 296 |     "#| exports\n",
 297 |     "def _param(name, info):\n",
 298 |     "    \"json schema parameter given `name` and `info` from docments full dict.\"\n",
 299 |     "    paramt,itemt = _types(info.anno)\n",
 300 |     "    pschema = dict(type=paramt, description=info.docment or \"\")\n",
 301 |     "    if itemt: pschema[\"items\"] = {\"type\": itemt}\n",
 302 |     "    if info.default is not empty: pschema[\"default\"] = info.default\n",
 303 |     "    return pschema"
 304 |    ]
 305 |   },
 306 |   {
 307 |    "cell_type": "markdown",
 308 |    "id": "5337d6bd",
 309 |    "metadata": {},
 310 |    "source": [
 311 |     "This private function converts a key/value pair from the `docments` structure into the `dict` that will be needed for the schema."
 312 |    ]
 313 |   },
 314 |   {
 315 |    "cell_type": "code",
 316 |    "execution_count": null,
 317 |    "id": "2450ace6",
 318 |    "metadata": {},
 319 |    "outputs": [
 320 |     {
 321 |      "name": "stdout",
 322 |      "output_type": "stream",
 323 |      "text": [
 324 |       "a // {'docment': 'First thing to sum', 'anno': <class 'int'>, 'default': <class 'inspect._empty'>}\n"
 325 |      ]
 326 |     },
 327 |     {
 328 |      "data": {
 329 |       "text/plain": [
 330 |        "{'type': 'integer', 'description': 'First thing to sum'}"
 331 |       ]
 332 |      },
 333 |      "execution_count": null,
 334 |      "metadata": {},
 335 |      "output_type": "execute_result"
 336 |     }
 337 |    ],
 338 |    "source": [
 339 |     "n,o = first(d.items())\n",
 340 |     "print(n,'//', o)\n",
 341 |     "_param(n, o)"
 342 |    ]
 343 |   },
 344 |   {
 345 |    "cell_type": "code",
 346 |    "execution_count": null,
 347 |    "id": "ba6bcac4",
 348 |    "metadata": {},
 349 |    "outputs": [],
 350 |    "source": [
 351 |     "#| export\n",
 352 |     "custom_types = {Path}\n",
 353 |     "\n",
 354 |     "def _handle_type(t, defs):\n",
 355 |     "    \"Handle a single type, creating nested schemas if necessary\"\n",
 356 |     "    if t is NoneType: return {'type': 'null'}\n",
 357 |     "    if t in custom_types: return {'type':'string', 'format':t.__name__}\n",
 358 |     "    if isinstance(t, type) and not issubclass(t, (int, float, str, bool)) or inspect.isfunction(t):\n",
 359 |     "        defs[t.__name__] = _get_nested_schema(t)\n",
 360 |     "        return {'$ref': f'#/$defs/{t.__name__}'}\n",
 361 |     "    return {'type': _types(t)[0]}"
 362 |    ]
 363 |   },
 364 |   {
 365 |    "cell_type": "code",
 366 |    "execution_count": null,
 367 |    "id": "16dbf080",
 368 |    "metadata": {},
 369 |    "outputs": [
 370 |     {
 371 |      "data": {
 372 |       "text/plain": [
 373 |        "({'type': 'integer'}, {'type': 'string', 'format': 'Path'})"
 374 |       ]
 375 |      },
 376 |      "execution_count": null,
 377 |      "metadata": {},
 378 |      "output_type": "execute_result"
 379 |     }
 380 |    ],
 381 |    "source": [
 382 |     "_handle_type(int, None), _handle_type(Path, None)"
 383 |    ]
 384 |   },
 385 |   {
 386 |    "cell_type": "code",
 387 |    "execution_count": null,
 388 |    "id": "7fd6cd29",
 389 |    "metadata": {},
 390 |    "outputs": [],
 391 |    "source": [
 392 |     "#| export\n",
 393 |     "def _is_container(t):\n",
 394 |     "    \"Check if type is a container (list, dict, tuple, set, Union)\"\n",
 395 |     "    origin = get_origin(t)\n",
 396 |     "    return origin in (list, dict, tuple, set, Union) if origin else False\n",
 397 |     "\n",
 398 |     "def _is_parameterized(t):\n",
 399 |     "    \"Check if type has arguments (e.g. list[int] vs list, dict[str, int] vs dict)\"\n",
 400 |     "    return _is_container(t) and (get_args(t) != ())"
 401 |    ]
 402 |   },
 403 |   {
 404 |    "cell_type": "code",
 405 |    "execution_count": null,
 406 |    "id": "783747af",
 407 |    "metadata": {},
 408 |    "outputs": [],
 409 |    "source": [
 410 |     "assert _is_parameterized(list[int]) == True\n",
 411 |     "assert _is_parameterized(int) == False\n",
 412 |     "assert _is_container(list[int]) == True\n",
 413 |     "assert _is_container(dict[str, int]) == True\n",
 414 |     "assert _is_container(int) == False"
 415 |    ]
 416 |   },
 417 |   {
 418 |    "cell_type": "markdown",
 419 |    "id": "d42c88dd",
 420 |    "metadata": {},
 421 |    "source": [
 422 |     "For union and optional types, `Union` covers older `Union[str]` syntax while `UnionType` covers 3.10+ `str | None` syntax."
 423 |    ]
 424 |   },
 425 |   {
 426 |    "cell_type": "code",
 427 |    "execution_count": null,
 428 |    "id": "7815799b",
 429 |    "metadata": {},
 430 |    "outputs": [
 431 |     {
 432 |      "data": {
 433 |       "text/plain": [
 434 |        "(str | None, types.UnionType, (str, NoneType))"
 435 |       ]
 436 |      },
 437 |      "execution_count": null,
 438 |      "metadata": {},
 439 |      "output_type": "execute_result"
 440 |     }
 441 |    ],
 442 |    "source": [
 443 |     "def _example_new_unioin(opt_tup: str | None):\n",
 444 |     "    pass\n",
 445 |     "\n",
 446 |     "d = docments(_example_new_unioin, full=True)\n",
 447 |     "anno1 = first(d.items())[1].anno\n",
 448 |     "(anno1, get_origin(anno1), get_args(anno1))"
 449 |    ]
 450 |   },
 451 |   {
 452 |    "cell_type": "code",
 453 |    "execution_count": null,
 454 |    "id": "d745c902",
 455 |    "metadata": {},
 456 |    "outputs": [
 457 |     {
 458 |      "data": {
 459 |       "text/plain": [
 460 |        "(typing.Optional[str], typing.Union, (str, NoneType))"
 461 |       ]
 462 |      },
 463 |      "execution_count": null,
 464 |      "metadata": {},
 465 |      "output_type": "execute_result"
 466 |     }
 467 |    ],
 468 |    "source": [
 469 |     "def _example_old_union(opt_tup: Union[str, type(None)] =None):\n",
 470 |     "    pass\n",
 471 |     "\n",
 472 |     "d = docments(_example_old_union, full=True)\n",
 473 |     "anno2 = first(d.items())[1].anno\n",
 474 |     "(anno2, get_origin(anno2), get_args(anno2))"
 475 |    ]
 476 |   },
 477 |   {
 478 |    "cell_type": "markdown",
 479 |    "id": "3c5701c7",
 480 |    "metadata": {},
 481 |    "source": [
 482 |     "Support for both union types is part of the broader container handling:"
 483 |    ]
 484 |   },
 485 |   {
 486 |    "cell_type": "code",
 487 |    "execution_count": null,
 488 |    "id": "c1153f02",
 489 |    "metadata": {},
 490 |    "outputs": [],
 491 |    "source": [
 492 |     "#| export\n",
 493 |     "def _handle_container(origin, args, defs):\n",
 494 |     "    \"Handle container types like dict, list, tuple, set, and Union\"\n",
 495 |     "    if origin is Union or origin is UnionType:\n",
 496 |     "        return {\"anyOf\": [_handle_type(arg, defs) for arg in args]}\n",
 497 |     "    if origin is dict:\n",
 498 |     "        value_type = args[1].__args__[0] if hasattr(args[1], '__args__') else args[1]\n",
 499 |     "        return {\n",
 500 |     "            'type': 'object',\n",
 501 |     "            'additionalProperties': (\n",
 502 |     "                {'type': 'array', 'items': _handle_type(value_type, defs)}\n",
 503 |     "                if hasattr(args[1], '__origin__') else _handle_type(args[1], defs)\n",
 504 |     "            )\n",
 505 |     "        }\n",
 506 |     "    elif origin in (list, tuple, set):\n",
 507 |     "        schema = {'type': 'array', 'items': _handle_type(args[0], defs)}\n",
 508 |     "        if origin is set:\n",
 509 |     "            schema['uniqueItems'] = True\n",
 510 |     "        return schema\n",
 511 |     "    return None"
 512 |    ]
 513 |   },
 514 |   {
 515 |    "cell_type": "code",
 516 |    "execution_count": null,
 517 |    "id": "5ee1c529",
 518 |    "metadata": {},
 519 |    "outputs": [],
 520 |    "source": [
 521 |     "#| export\n",
 522 |     "def _process_property(name, obj, props, req, defs):\n",
 523 |     "    \"Process a single property of the schema\"\n",
 524 |     "    p = _param(name, obj)\n",
 525 |     "    props[name] = p\n",
 526 |     "    if obj.default is empty: req[name] = True\n",
 527 |     "\n",
 528 |     "    if _is_container(obj.anno) and _is_parameterized(obj.anno):\n",
 529 |     "            p.update(_handle_container(get_origin(obj.anno), get_args(obj.anno), defs))        \n",
 530 |     "    else:\n",
 531 |     "        # Non-container type or container without arguments\n",
 532 |     "        p.update(_handle_type(obj.anno, defs))"
 533 |    ]
 534 |   },
 535 |   {
 536 |    "cell_type": "code",
 537 |    "execution_count": null,
 538 |    "id": "38b0f97e",
 539 |    "metadata": {},
 540 |    "outputs": [],
 541 |    "source": [
 542 |     "#| export\n",
 543 |     "def _get_nested_schema(obj):\n",
 544 |     "    \"Generate nested JSON schema for a class or function\"\n",
 545 |     "    d = docments(obj, full=True)\n",
 546 |     "    props, req, defs = {}, {}, {}\n",
 547 |     "\n",
 548 |     "    for n, o in d.items():\n",
 549 |     "        if n != 'return' and n != 'self':\n",
 550 |     "            _process_property(n, o, props, req, defs)\n",
 551 |     "\n",
 552 |     "    schema = dict(type='object', properties=props, title=obj.__name__ if isinstance(obj, type) else None)\n",
 553 |     "    if req: schema['required'] = list(req)\n",
 554 |     "    if defs: schema['$defs'] = defs\n",
 555 |     "    return schema"
 556 |    ]
 557 |   },
 558 |   {
 559 |    "cell_type": "code",
 560 |    "execution_count": null,
 561 |    "id": "1bb9df6c",
 562 |    "metadata": {},
 563 |    "outputs": [],
 564 |    "source": [
 565 |     "# Test primitive types\n",
 566 |     "defs = {}\n",
 567 |     "assert _handle_type(int, defs) == {'type': 'integer'}\n",
 568 |     "assert _handle_type(str, defs) == {'type': 'string'}\n",
 569 |     "assert _handle_type(bool, defs) == {'type': 'boolean'}\n",
 570 |     "assert _handle_type(float, defs) == {'type': 'number'}\n",
 571 |     "\n",
 572 |     "# Test custom class\n",
 573 |     "class TestClass:\n",
 574 |     "    def __init__(self, x: int, y: int): store_attr()\n",
 575 |     "\n",
 576 |     "result = _handle_type(TestClass, defs)\n",
 577 |     "assert result == {'$ref': '#/$defs/TestClass'}\n",
 578 |     "assert 'TestClass' in defs\n",
 579 |     "assert defs['TestClass']['type'] == 'object'\n",
 580 |     "assert 'properties' in defs['TestClass']"
 581 |    ]
 582 |   },
 583 |   {
 584 |    "cell_type": "code",
 585 |    "execution_count": null,
 586 |    "id": "b1d09435",
 587 |    "metadata": {},
 588 |    "outputs": [],
 589 |    "source": [
 590 |     "# Test primitive types in containers\n",
 591 |     "assert _handle_container(list, (int,), defs) == {'type': 'array', 'items': {'type': 'integer'}}\n",
 592 |     "assert _handle_container(tuple, (str,), defs) == {'type': 'array', 'items': {'type': 'string'}}\n",
 593 |     "assert _handle_container(set, (str,), defs) == {'type': 'array', 'items': {'type': 'string'}, 'uniqueItems': True}\n",
 594 |     "assert _handle_container(dict, (str,bool), defs) == {'type': 'object', 'additionalProperties': {'type': 'boolean'}}\n",
 595 |     "\n",
 596 |     "result = _handle_container(list, (TestClass,), defs)\n",
 597 |     "assert result == {'type': 'array', 'items': {'$ref': '#/$defs/TestClass'}}\n",
 598 |     "assert 'TestClass' in defs\n",
 599 |     "\n",
 600 |     "# Test complex nested structure\n",
 601 |     "ComplexType = dict[str, list[TestClass]]\n",
 602 |     "result = _handle_container(dict, (str, list[TestClass]), defs)\n",
 603 |     "assert result == {\n",
 604 |     "    'type': 'object',\n",
 605 |     "    'additionalProperties': {\n",
 606 |     "        'type': 'array',\n",
 607 |     "        'items': {'$ref': '#/$defs/TestClass'}\n",
 608 |     "    }\n",
 609 |     "}"
 610 |    ]
 611 |   },
 612 |   {
 613 |    "cell_type": "code",
 614 |    "execution_count": null,
 615 |    "id": "a5fd37d5",
 616 |    "metadata": {},
 617 |    "outputs": [],
 618 |    "source": [
 619 |     "# Test processing of a required integer property\n",
 620 |     "props, req = {}, {}\n",
 621 |     "class TestClass:\n",
 622 |     "    \"Test class\"\n",
 623 |     "    def __init__(\n",
 624 |     "        self,\n",
 625 |     "        x: int, # First thing\n",
 626 |     "        y: list[float], # Second thing\n",
 627 |     "        z: str = \"default\", # Third thing\n",
 628 |     "    ): store_attr()\n",
 629 |     "\n",
 630 |     "d = docments(TestClass, full=True)\n",
 631 |     "_process_property('x', d.x, props, req, defs)\n",
 632 |     "assert 'x' in props\n",
 633 |     "assert props['x']['type'] == 'integer'\n",
 634 |     "assert 'x' in req\n",
 635 |     "\n",
 636 |     "# Test processing of a required list property\n",
 637 |     "_process_property('y', d.y, props, req, defs)\n",
 638 |     "assert 'y' in props\n",
 639 |     "assert props['y']['type'] == 'array'\n",
 640 |     "assert props['y']['items']['type'] == 'number'\n",
 641 |     "assert 'y' in req\n",
 642 |     "\n",
 643 |     "# Test processing of an optional string property with default\n",
 644 |     "_process_property('z', d.z, props, req, defs)\n",
 645 |     "assert 'z' in props\n",
 646 |     "assert props['z']['type'] == 'string'\n",
 647 |     "assert props['z']['default'] == \"default\"\n",
 648 |     "assert 'z' not in req"
 649 |    ]
 650 |   },
 651 |   {
 652 |    "cell_type": "code",
 653 |    "execution_count": null,
 654 |    "id": "23f54386",
 655 |    "metadata": {},
 656 |    "outputs": [],
 657 |    "source": [
 658 |     "#| exports\n",
 659 |     "def get_schema(f:Union[callable,dict], pname='input_schema')->dict:\n",
 660 |     "    \"Generate JSON schema for a class, function, or method\"\n",
 661 |     "    if isinstance(f, dict): return f\n",
 662 |     "    schema = _get_nested_schema(f)\n",
 663 |     "    desc = f.__doc__\n",
 664 |     "    assert desc, \"Docstring missing!\"\n",
 665 |     "    d = docments(f, full=True)\n",
 666 |     "    ret = d.pop('return')\n",
 667 |     "    if ret.anno is not empty: desc += f'\\n\\nReturns:\\n- type: {_types(ret.anno)[0]}'\n",
 668 |     "    return {\"name\": f.__name__, \"description\": desc, pname: schema}"
 669 |    ]
 670 |   },
 671 |   {
 672 |    "cell_type": "markdown",
 673 |    "id": "a59df671",
 674 |    "metadata": {},
 675 |    "source": [
 676 |     "Putting this all together, we can now test getting a schema from `silly_sum`. The tool use spec doesn't support return annotations directly, so we put that in the description instead."
 677 |    ]
 678 |   },
 679 |   {
 680 |    "cell_type": "code",
 681 |    "execution_count": null,
 682 |    "id": "e7311af9",
 683 |    "metadata": {},
 684 |    "outputs": [
 685 |     {
 686 |      "name": "stdout",
 687 |      "output_type": "stream",
 688 |      "text": [
 689 |       "Adds a + b.\n",
 690 |       "\n",
 691 |       "Returns:\n",
 692 |       "- type: integer\n"
 693 |      ]
 694 |     },
 695 |     {
 696 |      "data": {
 697 |       "text/plain": [
 698 |        "{'name': 'silly_sum',\n",
 699 |        " 'input_schema': {'type': 'object',\n",
 700 |        "  'properties': {'a': {'type': 'integer', 'description': 'First thing to sum'},\n",
 701 |        "   'b': {'type': 'integer',\n",
 702 |        "    'description': 'Second thing to sum',\n",
 703 |        "    'default': 1},\n",
 704 |        "   'c': {'type': 'array',\n",
 705 |        "    'description': 'A pointless argument',\n",
 706 |        "    'items': {'type': 'integer'},\n",
 707 |        "    'default': None}},\n",
 708 |        "  'title': None,\n",
 709 |        "  'required': ['a']}}"
 710 |       ]
 711 |      },
 712 |      "execution_count": null,
 713 |      "metadata": {},
 714 |      "output_type": "execute_result"
 715 |     }
 716 |    ],
 717 |    "source": [
 718 |     "s = get_schema(silly_sum)\n",
 719 |     "desc = s.pop('description')\n",
 720 |     "print(desc)\n",
 721 |     "s"
 722 |    ]
 723 |   },
 724 |   {
 725 |    "cell_type": "markdown",
 726 |    "id": "d478ba6b",
 727 |    "metadata": {},
 728 |    "source": [
 729 |     "This also works with string annotations, e.g:"
 730 |    ]
 731 |   },
 732 |   {
 733 |    "cell_type": "code",
 734 |    "execution_count": null,
 735 |    "id": "80203962",
 736 |    "metadata": {},
 737 |    "outputs": [
 738 |     {
 739 |      "data": {
 740 |       "text/plain": [
 741 |        "{'name': 'silly_test',\n",
 742 |        " 'description': 'Mandatory docstring',\n",
 743 |        " 'input_schema': {'type': 'object',\n",
 744 |        "  'properties': {'a': {'type': 'integer', 'description': 'quoted type hint'}},\n",
 745 |        "  'title': None,\n",
 746 |        "  'required': ['a']}}"
 747 |       ]
 748 |      },
 749 |      "execution_count": null,
 750 |      "metadata": {},
 751 |      "output_type": "execute_result"
 752 |     }
 753 |    ],
 754 |    "source": [
 755 |     "def silly_test(\n",
 756 |     "    a: 'int',  # quoted type hint\n",
 757 |     "):\n",
 758 |     "    \"Mandatory docstring\"\n",
 759 |     "    return a\n",
 760 |     "\n",
 761 |     "get_schema(silly_test)"
 762 |    ]
 763 |   },
 764 |   {
 765 |    "cell_type": "markdown",
 766 |    "id": "e3f36f8a",
 767 |    "metadata": {},
 768 |    "source": [
 769 |     "This also works with instance methods:"
 770 |    ]
 771 |   },
 772 |   {
 773 |    "cell_type": "code",
 774 |    "execution_count": null,
 775 |    "id": "05d33447",
 776 |    "metadata": {},
 777 |    "outputs": [
 778 |     {
 779 |      "data": {
 780 |       "text/plain": [
 781 |        "{'name': 'sums',\n",
 782 |        " 'description': 'Adds a + b.\\n\\nReturns:\\n- type: integer',\n",
 783 |        " 'input_schema': {'type': 'object',\n",
 784 |        "  'properties': {'a': {'type': 'integer', 'description': 'First thing to sum'},\n",
 785 |        "   'b': {'type': 'integer',\n",
 786 |        "    'description': 'Second thing to sum',\n",
 787 |        "    'default': 1}},\n",
 788 |        "  'title': None,\n",
 789 |        "  'required': ['a']}}"
 790 |       ]
 791 |      },
 792 |      "execution_count": null,
 793 |      "metadata": {},
 794 |      "output_type": "execute_result"
 795 |     }
 796 |    ],
 797 |    "source": [
 798 |     "class Dummy:\n",
 799 |     "    def sums(\n",
 800 |     "        self,\n",
 801 |     "        a:int,  # First thing to sum\n",
 802 |     "        b:int=1 # Second thing to sum\n",
 803 |     "    ) -> int: # The sum of the inputs\n",
 804 |     "        \"Adds a + b.\"\n",
 805 |     "        print(f\"Finding the sum of {a} and {b}\")\n",
 806 |     "        return a + b\n",
 807 |     "\n",
 808 |     "get_schema(Dummy.sums)"
 809 |    ]
 810 |   },
 811 |   {
 812 |    "cell_type": "markdown",
 813 |    "id": "ae3fdfa4",
 814 |    "metadata": {},
 815 |    "source": [
 816 |     "`get_schema` also handles more complicated structures such as nested classes. This is useful for things like structured outputs."
 817 |    ]
 818 |   },
 819 |   {
 820 |    "cell_type": "code",
 821 |    "execution_count": null,
 822 |    "id": "ce3be915",
 823 |    "metadata": {},
 824 |    "outputs": [
 825 |     {
 826 |      "data": {
 827 |       "text/plain": [
 828 |        "{'name': 'Conversation',\n",
 829 |        " 'description': 'A conversation between two speakers',\n",
 830 |        " 'input_schema': {'type': 'object',\n",
 831 |        "  'properties': {'turns': {'type': 'array',\n",
 832 |        "    'description': 'Turns of the conversation',\n",
 833 |        "    'items': {'$ref': '#/$defs/Turn'}}},\n",
 834 |        "  'title': 'Conversation',\n",
 835 |        "  'required': ['turns'],\n",
 836 |        "  '$defs': {'Turn': {'type': 'object',\n",
 837 |        "    'properties': {'speaker_a': {'type': 'string',\n",
 838 |        "      'description': \"First speaker's message\"},\n",
 839 |        "     'speaker_b': {'type': 'string',\n",
 840 |        "      'description': \"Second speaker's message\"}},\n",
 841 |        "    'title': 'Turn',\n",
 842 |        "    'required': ['speaker_a', 'speaker_b']}}}}"
 843 |       ]
 844 |      },
 845 |      "execution_count": null,
 846 |      "metadata": {},
 847 |      "output_type": "execute_result"
 848 |     }
 849 |    ],
 850 |    "source": [
 851 |     "class Turn:\n",
 852 |     "    \"Turn between two speakers\"\n",
 853 |     "    def __init__(\n",
 854 |     "        self,\n",
 855 |     "        speaker_a:str, # First speaker's message\n",
 856 |     "        speaker_b:str,  # Second speaker's message\n",
 857 |     "    ): store_attr()\n",
 858 |     "\n",
 859 |     "class Conversation:\n",
 860 |     "    \"A conversation between two speakers\"\n",
 861 |     "    def __init__(\n",
 862 |     "        self,\n",
 863 |     "        turns:list[Turn], # Turns of the conversation\n",
 864 |     "    ): store_attr()\n",
 865 |     "\n",
 866 |     "get_schema(Conversation)"
 867 |    ]
 868 |   },
 869 |   {
 870 |    "cell_type": "code",
 871 |    "execution_count": null,
 872 |    "id": "386e514d",
 873 |    "metadata": {},
 874 |    "outputs": [
 875 |     {
 876 |      "data": {
 877 |       "text/plain": [
 878 |        "{'name': 'DictConversation',\n",
 879 |        " 'description': 'A conversation between two speakers',\n",
 880 |        " 'input_schema': {'type': 'object',\n",
 881 |        "  'properties': {'turns': {'type': 'object',\n",
 882 |        "    'description': 'dictionary of topics and the Turns of the conversation',\n",
 883 |        "    'additionalProperties': {'type': 'array',\n",
 884 |        "     'items': {'$ref': '#/$defs/Turn'}}}},\n",
 885 |        "  'title': 'DictConversation',\n",
 886 |        "  'required': ['turns'],\n",
 887 |        "  '$defs': {'Turn': {'type': 'object',\n",
 888 |        "    'properties': {'speaker_a': {'type': 'string',\n",
 889 |        "      'description': \"First speaker's message\"},\n",
 890 |        "     'speaker_b': {'type': 'string',\n",
 891 |        "      'description': \"Second speaker's message\"}},\n",
 892 |        "    'title': 'Turn',\n",
 893 |        "    'required': ['speaker_a', 'speaker_b']}}}}"
 894 |       ]
 895 |      },
 896 |      "execution_count": null,
 897 |      "metadata": {},
 898 |      "output_type": "execute_result"
 899 |     }
 900 |    ],
 901 |    "source": [
 902 |     "class DictConversation:\n",
 903 |     "    \"A conversation between two speakers\"\n",
 904 |     "    def __init__(\n",
 905 |     "        self,\n",
 906 |     "        turns:dict[str,list[Turn]], # dictionary of topics and the Turns of the conversation\n",
 907 |     "    ): store_attr()\n",
 908 |     "\n",
 909 |     "get_schema(DictConversation)"
 910 |    ]
 911 |   },
 912 |   {
 913 |    "cell_type": "code",
 914 |    "execution_count": null,
 915 |    "id": "2c08ac6b",
 916 |    "metadata": {},
 917 |    "outputs": [
 918 |     {
 919 |      "data": {
 920 |       "text/plain": [
 921 |        "{'name': 'SetConversation',\n",
 922 |        " 'description': 'A conversation between two speakers',\n",
 923 |        " 'input_schema': {'type': 'object',\n",
 924 |        "  'properties': {'turns': {'type': 'array',\n",
 925 |        "    'description': 'the unique Turns of the conversation',\n",
 926 |        "    'items': {'$ref': '#/$defs/Turn'},\n",
 927 |        "    'uniqueItems': True}},\n",
 928 |        "  'title': 'SetConversation',\n",
 929 |        "  'required': ['turns'],\n",
 930 |        "  '$defs': {'Turn': {'type': 'object',\n",
 931 |        "    'properties': {'speaker_a': {'type': 'string',\n",
 932 |        "      'description': \"First speaker's message\"},\n",
 933 |        "     'speaker_b': {'type': 'string',\n",
 934 |        "      'description': \"Second speaker's message\"}},\n",
 935 |        "    'title': 'Turn',\n",
 936 |        "    'required': ['speaker_a', 'speaker_b']}}}}"
 937 |       ]
 938 |      },
 939 |      "execution_count": null,
 940 |      "metadata": {},
 941 |      "output_type": "execute_result"
 942 |     }
 943 |    ],
 944 |    "source": [
 945 |     "class SetConversation:\n",
 946 |     "    \"A conversation between two speakers\"\n",
 947 |     "    def __init__(\n",
 948 |     "        self,\n",
 949 |     "        turns:set[Turn], # the unique Turns of the conversation\n",
 950 |     "    ): store_attr()\n",
 951 |     "\n",
 952 |     "get_schema(SetConversation)"
 953 |    ]
 954 |   },
 955 |   {
 956 |    "cell_type": "code",
 957 |    "execution_count": null,
 958 |    "id": "8cf3f35c",
 959 |    "metadata": {},
 960 |    "outputs": [],
 961 |    "source": [
 962 |     "#| exports\n",
 963 |     "def PathArg(\n",
 964 |     "    path: str  # A filesystem path\n",
 965 |     "): return Path(path)"
 966 |    ]
 967 |   },
 968 |   {
 969 |    "cell_type": "markdown",
 970 |    "id": "169212a6",
 971 |    "metadata": {},
 972 |    "source": [
 973 |     "Paths are a special case, since they only take `*args` and `**kwargs` as params, but normally we'd use them in a schema by just passing a str. So we create a custom param type for that."
 974 |    ]
 975 |   },
 976 |   {
 977 |    "cell_type": "code",
 978 |    "execution_count": null,
 979 |    "id": "e9135dfa",
 980 |    "metadata": {},
 981 |    "outputs": [
 982 |     {
 983 |      "data": {
 984 |       "text/plain": [
 985 |        "{'name': 'path_test',\n",
 986 |        " 'description': 'Mandatory docstring',\n",
 987 |        " 'input_schema': {'type': 'object',\n",
 988 |        "  'properties': {'a': {'type': 'object',\n",
 989 |        "    'description': 'a type hint',\n",
 990 |        "    '$ref': '#/$defs/PathArg'},\n",
 991 |        "   'b': {'type': 'object',\n",
 992 |        "    'description': 'b type hint',\n",
 993 |        "    '$ref': '#/$defs/PathArg'}},\n",
 994 |        "  'title': None,\n",
 995 |        "  'required': ['a', 'b'],\n",
 996 |        "  '$defs': {'PathArg': {'type': 'object',\n",
 997 |        "    'properties': {'path': {'type': 'string',\n",
 998 |        "      'description': 'A filesystem path'}},\n",
 999 |        "    'title': None,\n",
1000 |        "    'required': ['path']}}}}"
1001 |       ]
1002 |      },
1003 |      "execution_count": null,
1004 |      "metadata": {},
1005 |      "output_type": "execute_result"
1006 |     }
1007 |    ],
1008 |    "source": [
1009 |     "def path_test(\n",
1010 |     "    a: PathArg,  # a type hint\n",
1011 |     "    b: PathArg   # b type hint\n",
1012 |     "):\n",
1013 |     "    \"Mandatory docstring\"\n",
1014 |     "    return a/b\n",
1015 |     "\n",
1016 |     "get_schema(path_test)"
1017 |    ]
1018 |   },
1019 |   {
1020 |    "cell_type": "markdown",
1021 |    "id": "c6d1d0c8",
1022 |    "metadata": {},
1023 |    "source": [
1024 |     "Alternatively, use `Path` as usual, and handle the `format` key in the json to use that as a callable:"
1025 |    ]
1026 |   },
1027 |   {
1028 |    "cell_type": "code",
1029 |    "execution_count": null,
1030 |    "id": "bdb69462",
1031 |    "metadata": {},
1032 |    "outputs": [
1033 |     {
1034 |      "data": {
1035 |       "text/plain": [
1036 |        "{'name': 'path_test2',\n",
1037 |        " 'description': 'Mandatory docstring',\n",
1038 |        " 'input_schema': {'type': 'object',\n",
1039 |        "  'properties': {'a': {'type': 'string',\n",
1040 |        "    'description': 'a type hint',\n",
1041 |        "    'format': 'Path'},\n",
1042 |        "   'b': {'type': 'string', 'description': 'b type hint', 'format': 'Path'}},\n",
1043 |        "  'title': None,\n",
1044 |        "  'required': ['a', 'b']}}"
1045 |       ]
1046 |      },
1047 |      "execution_count": null,
1048 |      "metadata": {},
1049 |      "output_type": "execute_result"
1050 |     }
1051 |    ],
1052 |    "source": [
1053 |     "def path_test2(\n",
1054 |     "    a: Path,  # a type hint\n",
1055 |     "    b: Path   # b type hint\n",
1056 |     "):\n",
1057 |     "    \"Mandatory docstring\"\n",
1058 |     "    return a/b\n",
1059 |     "\n",
1060 |     "get_schema(path_test2)"
1061 |    ]
1062 |   },
1063 |   {
1064 |    "cell_type": "markdown",
1065 |    "id": "369320d4",
1066 |    "metadata": {},
1067 |    "source": [
1068 |     "### Additional `get_schema()` Test Cases"
1069 |    ]
1070 |   },
1071 |   {
1072 |    "cell_type": "markdown",
1073 |    "id": "a8052380",
1074 |    "metadata": {},
1075 |    "source": [
1076 |     "Union types are approximately mapped to JSON schema 'anyOf' with two or more value types."
1077 |    ]
1078 |   },
1079 |   {
1080 |    "cell_type": "code",
1081 |    "execution_count": null,
1082 |    "id": "6fc1d6f9",
1083 |    "metadata": {},
1084 |    "outputs": [
1085 |     {
1086 |      "data": {
1087 |       "text/plain": [
1088 |        "{'name': '_union_test',\n",
1089 |        " 'description': 'Mandatory docstring',\n",
1090 |        " 'input_schema': {'type': 'object',\n",
1091 |        "  'properties': {'opt_tup': {'type': 'object',\n",
1092 |        "    'description': '',\n",
1093 |        "    'default': None,\n",
1094 |        "    'anyOf': [{'type': 'array'}, {'type': 'string'}, {'type': 'integer'}]}},\n",
1095 |        "  'title': None}}"
1096 |       ]
1097 |      },
1098 |      "execution_count": null,
1099 |      "metadata": {},
1100 |      "output_type": "execute_result"
1101 |     }
1102 |    ],
1103 |    "source": [
1104 |     "def _union_test(opt_tup: Union[Tuple[int, int], str, int]=None):\n",
1105 |     "    \"Mandatory docstring\"\n",
1106 |     "    return \"\"\n",
1107 |     "get_schema(_union_test)"
1108 |    ]
1109 |   },
1110 |   {
1111 |    "cell_type": "markdown",
1112 |    "id": "7641aca8",
1113 |    "metadata": {},
1114 |    "source": [
1115 |     "The new (Python 3.10+) union syntax can also be used, producing an equivalent schema."
1116 |    ]
1117 |   },
1118 |   {
1119 |    "cell_type": "code",
1120 |    "execution_count": null,
1121 |    "id": "a1a11b3b",
1122 |    "metadata": {},
1123 |    "outputs": [
1124 |     {
1125 |      "data": {
1126 |       "text/plain": [
1127 |        "{'name': '_new_union_test',\n",
1128 |        " 'description': 'Mandatory docstring',\n",
1129 |        " 'input_schema': {'type': 'object',\n",
1130 |        "  'properties': {'opt_tup': {'type': 'object',\n",
1131 |        "    'description': '',\n",
1132 |        "    'default': None,\n",
1133 |        "    'anyOf': [{'type': 'array'}, {'type': 'string'}, {'type': 'integer'}]}},\n",
1134 |        "  'title': None}}"
1135 |       ]
1136 |      },
1137 |      "execution_count": null,
1138 |      "metadata": {},
1139 |      "output_type": "execute_result"
1140 |     }
1141 |    ],
1142 |    "source": [
1143 |     "def _new_union_test(opt_tup: Tuple[int, int] | str | int =None):\n",
1144 |     "    \"Mandatory docstring\"\n",
1145 |     "    pass\n",
1146 |     "get_schema(_new_union_test)"
1147 |    ]
1148 |   },
1149 |   {
1150 |    "cell_type": "markdown",
1151 |    "id": "8d24cc0a",
1152 |    "metadata": {},
1153 |    "source": [
1154 |     "Optional is a special case of union types, limited to two types, one of which is None (mapped to null in JSON schema):"
1155 |    ]
1156 |   },
1157 |   {
1158 |    "cell_type": "code",
1159 |    "execution_count": null,
1160 |    "id": "ac8f3d19",
1161 |    "metadata": {},
1162 |    "outputs": [
1163 |     {
1164 |      "data": {
1165 |       "text/plain": [
1166 |        "{'name': '_optional_test',\n",
1167 |        " 'description': 'Mandatory docstring',\n",
1168 |        " 'input_schema': {'type': 'object',\n",
1169 |        "  'properties': {'opt_tup': {'type': 'object',\n",
1170 |        "    'description': '',\n",
1171 |        "    'default': None,\n",
1172 |        "    'anyOf': [{'type': 'array'}, {'type': 'null'}]}},\n",
1173 |        "  'title': None}}"
1174 |       ]
1175 |      },
1176 |      "execution_count": null,
1177 |      "metadata": {},
1178 |      "output_type": "execute_result"
1179 |     }
1180 |    ],
1181 |    "source": [
1182 |     "def _optional_test(opt_tup: Optional[Tuple[int, int]]=None):\n",
1183 |     "    \"Mandatory docstring\"\n",
1184 |     "    pass\n",
1185 |     "get_schema(_optional_test)"
1186 |    ]
1187 |   },
1188 |   {
1189 |    "cell_type": "markdown",
1190 |    "id": "c969721b",
1191 |    "metadata": {},
1192 |    "source": [
1193 |     "Containers can also be used, both in their parameterized form (`List[int]`) or as their unparameterized raw type (`List`). In the latter case, the item type is mapped to `object` in JSON schema."
1194 |    ]
1195 |   },
1196 |   {
1197 |    "cell_type": "code",
1198 |    "execution_count": null,
1199 |    "id": "b2959197",
1200 |    "metadata": {},
1201 |    "outputs": [
1202 |     {
1203 |      "data": {
1204 |       "text/plain": [
1205 |        "{'name': '_list_test',\n",
1206 |        " 'description': 'Mandatory docstring',\n",
1207 |        " 'input_schema': {'type': 'object',\n",
1208 |        "  'properties': {'l': {'type': 'array',\n",
1209 |        "    'description': '',\n",
1210 |        "    'items': {'type': 'integer'}}},\n",
1211 |        "  'title': None,\n",
1212 |        "  'required': ['l']}}"
1213 |       ]
1214 |      },
1215 |      "execution_count": null,
1216 |      "metadata": {},
1217 |      "output_type": "execute_result"
1218 |     }
1219 |    ],
1220 |    "source": [
1221 |     "def _list_test(l: List[int]):\n",
1222 |     "    \"Mandatory docstring\"\n",
1223 |     "    pass\n",
1224 |     "get_schema(_list_test)"
1225 |    ]
1226 |   },
1227 |   {
1228 |    "cell_type": "code",
1229 |    "execution_count": null,
1230 |    "id": "c8fbfea7",
1231 |    "metadata": {},
1232 |    "outputs": [
1233 |     {
1234 |      "data": {
1235 |       "text/plain": [
1236 |        "{'name': '_raw_list_test',\n",
1237 |        " 'description': 'Mandatory docstring',\n",
1238 |        " 'input_schema': {'type': 'object',\n",
1239 |        "  'properties': {'l': {'type': 'array',\n",
1240 |        "    'description': '',\n",
1241 |        "    'items': {'type': 'object'}}},\n",
1242 |        "  'title': None,\n",
1243 |        "  'required': ['l']}}"
1244 |       ]
1245 |      },
1246 |      "execution_count": null,
1247 |      "metadata": {},
1248 |      "output_type": "execute_result"
1249 |     }
1250 |    ],
1251 |    "source": [
1252 |     "def _raw_list_test(l: List):\n",
1253 |     "    \"Mandatory docstring\"\n",
1254 |     "    pass\n",
1255 |     "get_schema(_raw_list_test)"
1256 |    ]
1257 |   },
1258 |   {
1259 |    "cell_type": "markdown",
1260 |    "id": "5704c197",
1261 |    "metadata": {},
1262 |    "source": [
1263 |     "The same applies to dictionary, which can similarly be parameterized with key/value types or specified as a raw type."
1264 |    ]
1265 |   },
1266 |   {
1267 |    "cell_type": "code",
1268 |    "execution_count": null,
1269 |    "id": "b2e8c567",
1270 |    "metadata": {},
1271 |    "outputs": [
1272 |     {
1273 |      "data": {
1274 |       "text/plain": [
1275 |        "{'name': '_dict_test',\n",
1276 |        " 'description': 'Mandatory docstring',\n",
1277 |        " 'input_schema': {'type': 'object',\n",
1278 |        "  'properties': {'d': {'type': 'object',\n",
1279 |        "    'description': '',\n",
1280 |        "    'additionalProperties': {'type': 'integer'}}},\n",
1281 |        "  'title': None,\n",
1282 |        "  'required': ['d']}}"
1283 |       ]
1284 |      },
1285 |      "execution_count": null,
1286 |      "metadata": {},
1287 |      "output_type": "execute_result"
1288 |     }
1289 |    ],
1290 |    "source": [
1291 |     "def _dict_test(d: Dict[str, int]):\n",
1292 |     "    \"Mandatory docstring\"\n",
1293 |     "    pass\n",
1294 |     "get_schema(_dict_test)"
1295 |    ]
1296 |   },
1297 |   {
1298 |    "cell_type": "code",
1299 |    "execution_count": null,
1300 |    "id": "b3138ac4",
1301 |    "metadata": {},
1302 |    "outputs": [
1303 |     {
1304 |      "data": {
1305 |       "text/plain": [
1306 |        "{'name': '_raw_dict_test',\n",
1307 |        " 'description': 'Mandatory docstring',\n",
1308 |        " 'input_schema': {'type': 'object',\n",
1309 |        "  'properties': {'d': {'type': 'object', 'description': ''}},\n",
1310 |        "  'title': None,\n",
1311 |        "  'required': ['d']}}"
1312 |       ]
1313 |      },
1314 |      "execution_count": null,
1315 |      "metadata": {},
1316 |      "output_type": "execute_result"
1317 |     }
1318 |    ],
1319 |    "source": [
1320 |     "def _raw_dict_test(d: Dict):\n",
1321 |     "    \"Mandatory docstring\"\n",
1322 |     "get_schema(_raw_dict_test)"
1323 |    ]
1324 |   },
1325 |   {
1326 |    "cell_type": "markdown",
1327 |    "id": "9529d39a",
1328 |    "metadata": {},
1329 |    "source": [
1330 |     "### Python tool"
1331 |    ]
1332 |   },
1333 |   {
1334 |    "cell_type": "markdown",
1335 |    "id": "7a69cad9",
1336 |    "metadata": {},
1337 |    "source": [
1338 |     "In language model clients it's often useful to have a 'code interpreter' -- this is something that runs code, and generally outputs the result of the last expression (i.e like IPython or Jupyter). \n",
1339 |     "\n",
1340 |     "In this section we'll create the `python` function, which executes a string as Python code, with an optional timeout. If the last line is an expression, we'll return that -- just like in IPython or Jupyter, but without needing them installed."
1341 |    ]
1342 |   },
1343 |   {
1344 |    "cell_type": "code",
1345 |    "execution_count": null,
1346 |    "id": "873000d7",
1347 |    "metadata": {},
1348 |    "outputs": [],
1349 |    "source": [
1350 |     "#| exports\n",
1351 |     "import ast, time, signal, traceback\n",
1352 |     "from fastcore.utils import *"
1353 |    ]
1354 |   },
1355 |   {
1356 |    "cell_type": "code",
1357 |    "execution_count": null,
1358 |    "id": "4703296a",
1359 |    "metadata": {},
1360 |    "outputs": [],
1361 |    "source": [
1362 |     "#| exports\n",
1363 |     "def _copy_loc(new, orig):\n",
1364 |     "    \"Copy location information from original node to new node and all children.\"\n",
1365 |     "    new = ast.copy_location(new, orig)\n",
1366 |     "    for field, o in ast.iter_fields(new):\n",
1367 |     "        if isinstance(o, ast.AST): setattr(new, field, _copy_loc(o, orig))\n",
1368 |     "        elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])\n",
1369 |     "    return new"
1370 |    ]
1371 |   },
1372 |   {
1373 |    "cell_type": "markdown",
1374 |    "id": "6c0d4922",
1375 |    "metadata": {},
1376 |    "source": [
1377 |     "This is an internal function that's needed for `_run` to ensure that location information is available in the abstract syntax tree (AST), since otherwise python complains."
1378 |    ]
1379 |   },
1380 |   {
1381 |    "cell_type": "code",
1382 |    "execution_count": null,
1383 |    "id": "1574585f",
1384 |    "metadata": {},
1385 |    "outputs": [],
1386 |    "source": [
1387 |     "#| exports\n",
1388 |     "def _run(code:str, glb:dict=None, loc:dict=None):\n",
1389 |     "    \"Run `code`, returning final expression (similar to IPython)\"\n",
1390 |     "    tree = ast.parse(code)\n",
1391 |     "    last_node = tree.body[-1] if tree.body else None\n",
1392 |     "    \n",
1393 |     "    # If the last node is an expression, modify the AST to capture the result\n",
1394 |     "    if isinstance(last_node, ast.Expr):\n",
1395 |     "        tgt = [ast.Name(id='_result', ctx=ast.Store())]\n",
1396 |     "        assign_node = ast.Assign(targets=tgt, value=last_node.value)\n",
1397 |     "        tree.body[-1] = _copy_loc(assign_node, last_node)\n",
1398 |     "\n",
1399 |     "    compiled_code = compile(tree, filename='<ast>', mode='exec')\n",
1400 |     "    glb = glb or {}\n",
1401 |     "    stdout_buffer = io.StringIO()\n",
1402 |     "    saved_stdout = sys.stdout\n",
1403 |     "    sys.stdout = stdout_buffer\n",
1404 |     "    try: exec(compiled_code, glb, loc)\n",
1405 |     "    finally: sys.stdout = saved_stdout\n",
1406 |     "    _result = glb.get('_result', None)\n",
1407 |     "    if _result is not None: return _result\n",
1408 |     "    return stdout_buffer.getvalue().strip()"
1409 |    ]
1410 |   },
1411 |   {
1412 |    "cell_type": "markdown",
1413 |    "id": "92ca7f47",
1414 |    "metadata": {},
1415 |    "source": [
1416 |     "This is the internal function used to actually run the code -- we pull off the last AST to see if it's an expression (i.e something that returns a value), and if so, we store it to a special `_result` variable so we can return it."
1417 |    ]
1418 |   },
1419 |   {
1420 |    "cell_type": "code",
1421 |    "execution_count": null,
1422 |    "id": "15b72cb2",
1423 |    "metadata": {},
1424 |    "outputs": [
1425 |     {
1426 |      "data": {
1427 |       "text/plain": [
1428 |        "479001600"
1429 |       ]
1430 |      },
1431 |      "execution_count": null,
1432 |      "metadata": {},
1433 |      "output_type": "execute_result"
1434 |     }
1435 |    ],
1436 |    "source": [
1437 |     "_run('import math;math.factorial(12)')"
1438 |    ]
1439 |   },
1440 |   {
1441 |    "cell_type": "code",
1442 |    "execution_count": null,
1443 |    "id": "632a7ac1",
1444 |    "metadata": {},
1445 |    "outputs": [
1446 |     {
1447 |      "data": {
1448 |       "text/plain": [
1449 |        "'2'"
1450 |       ]
1451 |      },
1452 |      "execution_count": null,
1453 |      "metadata": {},
1454 |      "output_type": "execute_result"
1455 |     }
1456 |    ],
1457 |    "source": [
1458 |     "_run('print(1+1)')"
1459 |    ]
1460 |   },
1461 |   {
1462 |    "cell_type": "markdown",
1463 |    "id": "34f2e5c2",
1464 |    "metadata": {},
1465 |    "source": [
1466 |     "We now have the machinery needed to create our `python` function."
1467 |    ]
1468 |   },
1469 |   {
1470 |    "cell_type": "code",
1471 |    "execution_count": null,
1472 |    "id": "81857615",
1473 |    "metadata": {},
1474 |    "outputs": [],
1475 |    "source": [
1476 |     "#| exports\n",
1477 |     "def python(code:str, # Code to execute\n",
1478 |     "           glb:Optional[dict]=None, # Globals namespace\n",
1479 |     "           loc:Optional[dict]=None, # Locals namespace\n",
1480 |     "           timeout:int=3600 # Maximum run time in seconds before a `TimeoutError` is raised\n",
1481 |     "          ): # Result of last node, if it's an expression, or `None` otherwise\n",
1482 |     "    \"\"\"Executes python `code` with `timeout` and returning final expression (similar to IPython).\n",
1483 |     "    Raised exceptions are returned as a string, with a stack trace.\"\"\"\n",
1484 |     "    def handler(*args): raise TimeoutError()\n",
1485 |     "    if glb is None: glb = inspect.currentframe().f_back.f_globals\n",
1486 |     "    if loc is None: loc=glb\n",
1487 |     "    signal.signal(signal.SIGALRM, handler)\n",
1488 |     "    signal.alarm(timeout)\n",
1489 |     "    try: return _run(code, glb, loc)\n",
1490 |     "    except Exception as e: return traceback.format_exc()\n",
1491 |     "    finally: signal.alarm(0)"
1492 |    ]
1493 |   },
1494 |   {
1495 |    "cell_type": "markdown",
1496 |    "id": "b6b9324f",
1497 |    "metadata": {},
1498 |    "source": [
1499 |     "There's no builtin security here -- you should generally use this in a sandbox, or alternatively prompt before running code. It can handle multiline function definitions, and pretty much any other normal Python syntax."
1500 |    ]
1501 |   },
1502 |   {
1503 |    "cell_type": "code",
1504 |    "execution_count": null,
1505 |    "id": "69d74f4d",
1506 |    "metadata": {},
1507 |    "outputs": [
1508 |     {
1509 |      "data": {
1510 |       "text/plain": [
1511 |        "120"
1512 |       ]
1513 |      },
1514 |      "execution_count": null,
1515 |      "metadata": {},
1516 |      "output_type": "execute_result"
1517 |     }
1518 |    ],
1519 |    "source": [
1520 |     "python(\"\"\"def factorial(n):\n",
1521 |     "    if n == 0 or n == 1: return 1\n",
1522 |     "    else: return n * factorial(n-1)\n",
1523 |     "factorial(5)\"\"\")"
1524 |    ]
1525 |   },
1526 |   {
1527 |    "cell_type": "markdown",
1528 |    "id": "6c629442",
1529 |    "metadata": {},
1530 |    "source": [
1531 |     "If the code takes longer than `timeout` then it returns an error string."
1532 |    ]
1533 |   },
1534 |   {
1535 |    "cell_type": "code",
1536 |    "execution_count": null,
1537 |    "id": "fcb472b3",
1538 |    "metadata": {},
1539 |    "outputs": [
1540 |     {
1541 |      "name": "stdout",
1542 |      "output_type": "stream",
1543 |      "text": [
1544 |       "Traceback (most recent call last):\n",
1545 |       "  File \"/var/folders/5c/jls7k26j1tq6l03cl_kvpnwh0000gn/T/ipykernel_97636/2963369439.py\", line 14, in python\n",
1546 |       "    try: return _run(code, glb, loc)\n",
1547 |       "                ^^^^^^^^^^^^^^^^^^^^\n",
1548 |       "  File \"/var/folders/5c/jls7k26j1tq6l03cl_kvpnwh0000gn/T/ipykernel_97636/1858893181.py\", line 18, in _run\n",
1549 |       "    try: exec(compiled_code, glb, loc)\n",
1550 |       "         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
1551 |       "  File \"<ast>\", line 1, in <module>\n",
1552 |       "  File \"/var/folders/5c/jls7k26j1tq6l03cl_kvpnwh0000gn/T/ipykernel_97636/2963369439.py\", line 9, in handler\n",
1553 |       "    def handler(*args): raise TimeoutError()\n",
1554 |       "                        ^^^^^^^^^^^^^^^^^^^^\n",
1555 |       "TimeoutError\n",
1556 |       "\n"
1557 |      ]
1558 |     }
1559 |    ],
1560 |    "source": [
1561 |     "print(python('import time; time.sleep(10)', timeout=1))"
1562 |    ]
1563 |   },
1564 |   {
1565 |    "cell_type": "markdown",
1566 |    "id": "d45684c1",
1567 |    "metadata": {},
1568 |    "source": [
1569 |     "By default the caller's global namespace is used."
1570 |    ]
1571 |   },
1572 |   {
1573 |    "cell_type": "code",
1574 |    "execution_count": null,
1575 |    "id": "72dfe290",
1576 |    "metadata": {},
1577 |    "outputs": [
1578 |     {
1579 |      "data": {
1580 |       "text/plain": [
1581 |        "1"
1582 |       ]
1583 |      },
1584 |      "execution_count": null,
1585 |      "metadata": {},
1586 |      "output_type": "execute_result"
1587 |     }
1588 |    ],
1589 |    "source": [
1590 |     "python(\"a=1\")\n",
1591 |     "a"
1592 |    ]
1593 |   },
1594 |   {
1595 |    "cell_type": "markdown",
1596 |    "id": "bf48557c",
1597 |    "metadata": {},
1598 |    "source": [
1599 |     "Pass a different `glb` if needed."
1600 |    ]
1601 |   },
1602 |   {
1603 |    "cell_type": "code",
1604 |    "execution_count": null,
1605 |    "id": "55fb5613",
1606 |    "metadata": {},
1607 |    "outputs": [
1608 |     {
1609 |      "data": {
1610 |       "text/plain": [
1611 |        "(1, 3)"
1612 |       ]
1613 |      },
1614 |      "execution_count": null,
1615 |      "metadata": {},
1616 |      "output_type": "execute_result"
1617 |     }
1618 |    ],
1619 |    "source": [
1620 |     "glb = {}\n",
1621 |     "python(\"a=3\", glb)\n",
1622 |     "a, glb['a']"
1623 |    ]
1624 |   },
1625 |   {
1626 |    "cell_type": "markdown",
1627 |    "id": "244c502e",
1628 |    "metadata": {},
1629 |    "source": [
1630 |     "### Tool Calling"
1631 |    ]
1632 |   },
1633 |   {
1634 |    "cell_type": "markdown",
1635 |    "id": "186408f8",
1636 |    "metadata": {},
1637 |    "source": [
1638 |     "Many LLM API providers offer tool calling where an LLM can choose to call a given tool. This is also helpful for structured outputs since the response from the LLM is contrained to the required arguments of the tool.\n",
1639 |     "\n",
1640 |     "This section will be dedicated to helper functions for calling tools. We don't want to allow LLMs to call just any possible function (that would be a security disaster!) so we create a namespace -- that is, a dictionary of allowable function names to call."
1641 |    ]
1642 |   },
1643 |   {
1644 |    "cell_type": "code",
1645 |    "execution_count": null,
1646 |    "id": "782c4415",
1647 |    "metadata": {},
1648 |    "outputs": [],
1649 |    "source": [
1650 |     "#| export\n",
1651 |     "def mk_ns(*funcs_or_objs):\n",
1652 |     "    merged = {}\n",
1653 |     "    for o in funcs_or_objs:\n",
1654 |     "        if isinstance(o, type): merged |= {n:getattr(o,n) for n,m in o.__dict__.items() if isinstance(m, (staticmethod, classmethod))}\n",
1655 |     "        if isinstance(o, object): merged |= {n:getattr(o,n) for n, m in inspect.getmembers(o, inspect.ismethod)} | {n:m for n,m in o.__class__.__dict__.items() if isinstance(m, staticmethod)}\n",
1656 |     "        if callable(o) and hasattr(o, '__name__'): merged |= {o.__name__: o}\n",
1657 |     "    return merged"
1658 |    ]
1659 |   },
1660 |   {
1661 |    "cell_type": "code",
1662 |    "execution_count": null,
1663 |    "id": "5947aac4",
1664 |    "metadata": {},
1665 |    "outputs": [
1666 |     {
1667 |      "data": {
1668 |       "text/plain": [
1669 |        "{'sums': <function __main__.sums(a, b)>}"
1670 |       ]
1671 |      },
1672 |      "execution_count": null,
1673 |      "metadata": {},
1674 |      "output_type": "execute_result"
1675 |     }
1676 |    ],
1677 |    "source": [
1678 |     "def sums(a, b): return a + b\n",
1679 |     "ns = mk_ns(sums); ns"
1680 |    ]
1681 |   },
1682 |   {
1683 |    "cell_type": "code",
1684 |    "execution_count": null,
1685 |    "id": "86ce0458",
1686 |    "metadata": {},
1687 |    "outputs": [
1688 |     {
1689 |      "data": {
1690 |       "text/plain": [
1691 |        "3"
1692 |       ]
1693 |      },
1694 |      "execution_count": null,
1695 |      "metadata": {},
1696 |      "output_type": "execute_result"
1697 |     }
1698 |    ],
1699 |    "source": [
1700 |     "ns['sums'](1, 2)"
1701 |    ]
1702 |   },
1703 |   {
1704 |    "cell_type": "code",
1705 |    "execution_count": null,
1706 |    "id": "29d22f82",
1707 |    "metadata": {},
1708 |    "outputs": [],
1709 |    "source": [
1710 |     "class Dummy:\n",
1711 |     "    def __init__(self,a): self.a = a\n",
1712 |     "    def __call__(self): return self.a\n",
1713 |     "    def sums(self, a, b): return a + b\n",
1714 |     "    @staticmethod\n",
1715 |     "    def subs(a, b): return a - b\n",
1716 |     "    @classmethod\n",
1717 |     "    def mults(cls, a, b): return a * b"
1718 |    ]
1719 |   },
1720 |   {
1721 |    "cell_type": "code",
1722 |    "execution_count": null,
1723 |    "id": "ca50b957",
1724 |    "metadata": {},
1725 |    "outputs": [
1726 |     {
1727 |      "data": {
1728 |       "text/plain": [
1729 |        "{'subs': <function __main__.Dummy.subs(a, b)>,\n",
1730 |        " 'mults': <bound method Dummy.mults of <class '__main__.Dummy'>>,\n",
1731 |        " 'Dummy': __main__.Dummy}"
1732 |       ]
1733 |      },
1734 |      "execution_count": null,
1735 |      "metadata": {},
1736 |      "output_type": "execute_result"
1737 |     }
1738 |    ],
1739 |    "source": [
1740 |     "ns = mk_ns(Dummy); ns"
1741 |    ]
1742 |   },
1743 |   {
1744 |    "cell_type": "code",
1745 |    "execution_count": null,
1746 |    "id": "59ef734f",
1747 |    "metadata": {},
1748 |    "outputs": [
1749 |     {
1750 |      "data": {
1751 |       "text/plain": [
1752 |        "(-1, 6)"
1753 |       ]
1754 |      },
1755 |      "execution_count": null,
1756 |      "metadata": {},
1757 |      "output_type": "execute_result"
1758 |     }
1759 |    ],
1760 |    "source": [
1761 |     "ns['subs'](1, 2), ns['mults'](3, 2)"
1762 |    ]
1763 |   },
1764 |   {
1765 |    "cell_type": "code",
1766 |    "execution_count": null,
1767 |    "id": "15871e6d",
1768 |    "metadata": {},
1769 |    "outputs": [
1770 |     {
1771 |      "data": {
1772 |       "text/plain": [
1773 |        "{'__call__': <bound method Dummy.__call__ of <__main__.Dummy object>>,\n",
1774 |        " '__init__': <bound method Dummy.__init__ of <__main__.Dummy object>>,\n",
1775 |        " 'mults': <bound method Dummy.mults of <class '__main__.Dummy'>>,\n",
1776 |        " 'sums': <bound method Dummy.sums of <__main__.Dummy object>>,\n",
1777 |        " 'subs': <staticmethod(<function Dummy.subs>)>}"
1778 |       ]
1779 |      },
1780 |      "execution_count": null,
1781 |      "metadata": {},
1782 |      "output_type": "execute_result"
1783 |     }
1784 |    ],
1785 |    "source": [
1786 |     "d = Dummy(10)\n",
1787 |     "ns = mk_ns(d); ns"
1788 |    ]
1789 |   },
1790 |   {
1791 |    "cell_type": "code",
1792 |    "execution_count": null,
1793 |    "id": "13cb7685",
1794 |    "metadata": {},
1795 |    "outputs": [
1796 |     {
1797 |      "data": {
1798 |       "text/plain": [
1799 |        "(-1, 6, 5, 10)"
1800 |       ]
1801 |      },
1802 |      "execution_count": null,
1803 |      "metadata": {},
1804 |      "output_type": "execute_result"
1805 |     }
1806 |    ],
1807 |    "source": [
1808 |     "ns['subs'](1, 2), ns['mults'](3, 2), ns['sums'](3, 2), ns['__call__']()"
1809 |    ]
1810 |   },
1811 |   {
1812 |    "cell_type": "code",
1813 |    "execution_count": null,
1814 |    "id": "2dfe13ae",
1815 |    "metadata": {},
1816 |    "outputs": [
1817 |     {
1818 |      "data": {
1819 |       "text/plain": [
1820 |        "(None, -99)"
1821 |       ]
1822 |      },
1823 |      "execution_count": null,
1824 |      "metadata": {},
1825 |      "output_type": "execute_result"
1826 |     }
1827 |    ],
1828 |    "source": [
1829 |     "ns['__init__'](-99), ns['__call__']()"
1830 |    ]
1831 |   },
1832 |   {
1833 |    "cell_type": "code",
1834 |    "execution_count": null,
1835 |    "id": "85b4734f",
1836 |    "metadata": {},
1837 |    "outputs": [],
1838 |    "source": [
1839 |     "#| exports\n",
1840 |     "def call_func(fc_name, fc_inputs, ns, raise_on_err=True):\n",
1841 |     "    \"Call the function `fc_name` with the given `fc_inputs` using namespace `ns`.\"\n",
1842 |     "    if not isinstance(ns, abc.Mapping): ns = mk_ns(*ns)\n",
1843 |     "    func = ns[fc_name]\n",
1844 |     "    try: return func(**fc_inputs)\n",
1845 |     "    except Exception as e:\n",
1846 |     "        if raise_on_err: raise e\n",
1847 |     "        else: return traceback.format_exc()"
1848 |    ]
1849 |   },
1850 |   {
1851 |    "cell_type": "markdown",
1852 |    "id": "ce9cce60",
1853 |    "metadata": {},
1854 |    "source": [
1855 |     "Now when we an LLM responses with the tool to use and its inputs, we can simply use the same namespace it was given to look up the tool and call it."
1856 |    ]
1857 |   },
1858 |   {
1859 |    "cell_type": "code",
1860 |    "execution_count": null,
1861 |    "id": "f2ade8a8",
1862 |    "metadata": {},
1863 |    "outputs": [
1864 |     {
1865 |      "data": {
1866 |       "text/plain": [
1867 |        "3"
1868 |       ]
1869 |      },
1870 |      "execution_count": null,
1871 |      "metadata": {},
1872 |      "output_type": "execute_result"
1873 |     }
1874 |    ],
1875 |    "source": [
1876 |     "call_func('sums', {'a': 1, 'b': 2}, ns=[sums])"
1877 |    ]
1878 |   },
1879 |   {
1880 |    "cell_type": "code",
1881 |    "execution_count": null,
1882 |    "id": "9aace64a",
1883 |    "metadata": {},
1884 |    "outputs": [
1885 |     {
1886 |      "data": {
1887 |       "text/plain": [
1888 |        "-1"
1889 |       ]
1890 |      },
1891 |      "execution_count": null,
1892 |      "metadata": {},
1893 |      "output_type": "execute_result"
1894 |     }
1895 |    ],
1896 |    "source": [
1897 |     "call_func('subs', {'a': 1, 'b': 2}, ns=mk_ns(d))"
1898 |    ]
1899 |   },
1900 |   {
1901 |    "cell_type": "code",
1902 |    "execution_count": null,
1903 |    "id": "6c93c0ef",
1904 |    "metadata": {},
1905 |    "outputs": [],
1906 |    "source": [
1907 |     "assert \"unsupported operand type(s) for -: 'int' and 'str'\" in call_func('subs', {'a': 1, 'b': '3'}, ns=mk_ns(d), raise_on_err=False)"
1908 |    ]
1909 |   },
1910 |   {
1911 |    "cell_type": "code",
1912 |    "execution_count": null,
1913 |    "id": "85489c3d",
1914 |    "metadata": {},
1915 |    "outputs": [],
1916 |    "source": [
1917 |     "test_fail(call_func, args=['subs', {'a': 1, 'b': '3'}], kwargs={'ns': mk_ns(d)})"
1918 |    ]
1919 |   },
1920 |   {
1921 |    "cell_type": "code",
1922 |    "execution_count": null,
1923 |    "id": "b19298ac",
1924 |    "metadata": {},
1925 |    "outputs": [],
1926 |    "source": [
1927 |     "%%ai\n",
1928 |     "How do I get the whole traceback of an error instead of just str(e) like above?"
1929 |    ]
1930 |   },
1931 |   {
1932 |    "cell_type": "markdown",
1933 |    "id": "6ec89b42",
1934 |    "metadata": {},
1935 |    "source": [
1936 |     "To get the whole traceback of an error instead of just `str(e)`, you can use the `traceback` module, which you've already imported in your code. Modify the `call_func` function to capture and return the full traceback when an error occurs:\n",
1937 |     "\n",
1938 |     "```python\n",
1939 |     "#| exports\n",
1940 |     "def call_func(fc_name, fc_inputs, ns, raise_on_err=True):\n",
1941 |     "    \"Call the function `fc_name` with the given `fc_inputs` using namespace `ns`.\"\n",
1942 |     "    if not isinstance(ns, abc.Mapping): ns = mk_ns(*ns)\n",
1943 |     "    func = ns[fc_name]\n",
1944 |     "    try: return func(**fc_inputs)\n",
1945 |     "    except Exception as e:\n",
1946 |     "        if raise_on_err: raise e\n",
1947 |     "        else: return traceback.format_exc()\n",
1948 |     "```\n",
1949 |     "\n",
1950 |     "This replaces `str(e)` with `traceback.format_exc()`, which returns the full traceback as a string, including the error type, message, and the call stack that led to the error. This gives you much more context about where and why the error occurred."
1951 |    ]
1952 |   },
1953 |   {
1954 |    "cell_type": "markdown",
1955 |    "id": "591574b8-6b53-4908-8159-b87be42133f7",
1956 |    "metadata": {},
1957 |    "source": [
1958 |     "### Async function calling"
1959 |    ]
1960 |   },
1961 |   {
1962 |    "cell_type": "markdown",
1963 |    "id": "96a3a7d3-31ef-4cc6-b47c-35eaa8bbff8b",
1964 |    "metadata": {},
1965 |    "source": [
1966 |     "Since tools defined by MCP servers are async function, it is probably a good idea to have an async version of `call_func`."
1967 |    ]
1968 |   },
1969 |   {
1970 |    "cell_type": "code",
1971 |    "execution_count": null,
1972 |    "id": "e273507b-6e4b-40bb-ae23-6397e89a4d51",
1973 |    "metadata": {},
1974 |    "outputs": [
1975 |     {
1976 |      "data": {
1977 |       "text/plain": [
1978 |        "{'asums': <function __main__.asums(a, b)>}"
1979 |       ]
1980 |      },
1981 |      "execution_count": null,
1982 |      "metadata": {},
1983 |      "output_type": "execute_result"
1984 |     }
1985 |    ],
1986 |    "source": [
1987 |     "async def asums(a, b): return a + b\n",
1988 |     "ns = mk_ns(asums); ns"
1989 |    ]
1990 |   },
1991 |   {
1992 |    "cell_type": "code",
1993 |    "execution_count": null,
1994 |    "id": "7ac04e80-7bb9-4b52-8285-454684605d47",
1995 |    "metadata": {},
1996 |    "outputs": [],
1997 |    "source": [
1998 |     "#| exports\n",
1999 |     "async def call_func_async(fc_name, fc_inputs, ns, raise_on_err=True):\n",
2000 |     "    \"Awaits the function `fc_name` with the given `fc_inputs` using namespace `ns`.\"\n",
2001 |     "    res = call_func(fc_name, fc_inputs, ns, raise_on_err=raise_on_err)\n",
2002 |     "    if inspect.iscoroutine(res):\n",
2003 |     "        try: res = await res\n",
2004 |     "        except Exception as e:\n",
2005 |     "            if raise_on_err: raise e\n",
2006 |     "            else: return traceback.format_exc()\n",
2007 |     "    return res"
2008 |    ]
2009 |   },
2010 |   {
2011 |    "cell_type": "code",
2012 |    "execution_count": null,
2013 |    "id": "b83998ac-68e2-4dbe-b594-65fb4fdf59b8",
2014 |    "metadata": {},
2015 |    "outputs": [
2016 |     {
2017 |      "data": {
2018 |       "text/plain": [
2019 |        "3"
2020 |       ]
2021 |      },
2022 |      "execution_count": null,
2023 |      "metadata": {},
2024 |      "output_type": "execute_result"
2025 |     }
2026 |    ],
2027 |    "source": [
2028 |     "await call_func_async('asums', {'a': 1, 'b': 2}, ns=[asums])"
2029 |    ]
2030 |   },
2031 |   {
2032 |    "cell_type": "code",
2033 |    "execution_count": null,
2034 |    "id": "91092ee9",
2035 |    "metadata": {},
2036 |    "outputs": [],
2037 |    "source": [
2038 |     "test_eq(await call_func_async('asums', {'a': 1, 'b': '2'}, ns=[asums], raise_on_err=False), \"unsupported operand type(s) for +: 'int' and 'str'\")"
2039 |    ]
2040 |   },
2041 |   {
2042 |    "cell_type": "code",
2043 |    "execution_count": null,
2044 |    "id": "a06776cf",
2045 |    "metadata": {},
2046 |    "outputs": [],
2047 |    "source": [
2048 |     "ex = False\n",
2049 |     "try: await call_func_async('asums', {'a': 1, 'b': '2'}, ns=[asums], raise_on_err=True)\n",
2050 |     "except: ex = True\n",
2051 |     "assert ex"
2052 |    ]
2053 |   },
2054 |   {
2055 |    "cell_type": "markdown",
2056 |    "id": "94ec4289",
2057 |    "metadata": {},
2058 |    "source": [
2059 |     "## Export -"
2060 |    ]
2061 |   },
2062 |   {
2063 |    "cell_type": "code",
2064 |    "execution_count": null,
2065 |    "id": "1e9ee5c1",
2066 |    "metadata": {},
2067 |    "outputs": [],
2068 |    "source": [
2069 |     "#|hide\n",
2070 |     "#|eval: false\n",
2071 |     "from nbdev.doclinks import nbdev_export\n",
2072 |     "nbdev_export()"
2073 |    ]
2074 |   },
2075 |   {
2076 |    "cell_type": "code",
2077 |    "execution_count": null,
2078 |    "id": "9cf037e0",
2079 |    "metadata": {},
2080 |    "outputs": [],
2081 |    "source": []
2082 |   }
2083 |  ],
2084 |  "metadata": {
2085 |   "kernelspec": {
2086 |    "display_name": "python3",
2087 |    "language": "python",
2088 |    "name": "python3"
2089 |   }
2090 |  },
2091 |  "nbformat": 4,
2092 |  "nbformat_minor": 5
2093 | }
2094 | 


--------------------------------------------------------------------------------
/02_shell.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "efe78920",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "#|default_exp shell"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "3d773712-12fe-440e-891f-36f59666dfde",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# shell source"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "1328ef69",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "#| exports\n",
 29 |     "import ast, time, signal, traceback\n",
 30 |     "from fastcore.utils import *"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "id": "481b4368",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "`get_shell` is like `python`, except it also maintains a stateful interpreter, rather than just running a single line of code. This is implemented using IPython, so that must be installed."
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "id": "6bbf062d",
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "#| exports\n",
 49 |     "from IPython.terminal.interactiveshell import TerminalInteractiveShell\n",
 50 |     "from IPython.utils.capture import capture_output"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "id": "d3d04ec5",
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "def exception2str(ex:Exception)->str:\n",
 61 |     "    \"Convert exception `ex` into a string\"\n",
 62 |     "    return ''.join(traceback.format_exception(type(ex), ex, ex.__traceback__))"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "id": "d6ba32b4",
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "name": "stdout",
 73 |      "output_type": "stream",
 74 |      "text": [
 75 |       "Traceback (most recent call last):\n",
 76 |       "  File \"/var/folders/ss/34z569j921v58v8n1n_8z7h40000gn/T/ipykernel_37260/4058275565.py\", line 1, in <module>\n",
 77 |       "    try: print(1/0)\n",
 78 |       "               ~^~\n",
 79 |       "ZeroDivisionError: division by zero\n",
 80 |       "\n"
 81 |      ]
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "try: print(1/0)\n",
 86 |     "except Exception as e: print(exception2str(e))"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "id": "34099c2f",
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "#| exports\n",
 97 |     "TerminalInteractiveShell.orig_run = TerminalInteractiveShell.run_cell"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "id": "d6aa8e7b",
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "#| exports\n",
108 |     "@patch\n",
109 |     "def run_cell(self:TerminalInteractiveShell, cell, timeout=None):\n",
110 |     "    \"Wrapper for original `run_cell` which adds timeout and output capture\"\n",
111 |     "    if timeout:\n",
112 |     "        def handler(*args): raise TimeoutError()\n",
113 |     "        signal.signal(signal.SIGALRM, handler)\n",
114 |     "        signal.alarm(timeout)\n",
115 |     "    try:\n",
116 |     "        with capture_output() as io: result = self.orig_run(cell)\n",
117 |     "        result.stdout = io.stdout\n",
118 |     "        return result\n",
119 |     "    except TimeoutException as e:\n",
120 |     "        result = self.ExecutionResult(error_before_exec=None, error_in_exec=e)\n",
121 |     "    finally:\n",
122 |     "        if timeout: signal.alarm(0)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "id": "cdadbb12",
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "#| exports\n",
133 |     "def get_shell()->TerminalInteractiveShell:\n",
134 |     "    \"Get a `TerminalInteractiveShell` with minimal functionality\"\n",
135 |     "    sh = TerminalInteractiveShell()\n",
136 |     "    sh.logger.log_output = sh.history_manager.enabled = False\n",
137 |     "    dh = sh.displayhook\n",
138 |     "    dh.finish_displayhook = dh.write_output_prompt = dh.start_displayhook = lambda: None\n",
139 |     "    dh.write_format_data = lambda format_dict, md_dict=None: None\n",
140 |     "    sh.logstart = sh.automagic = sh.autoindent = False\n",
141 |     "    sh.autocall = 0\n",
142 |     "    sh.system = lambda cmd: None\n",
143 |     "    return sh"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "id": "5ffbe57e",
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "shell = get_shell()"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "id": "b03b78b3",
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "data": {
164 |       "text/plain": [
165 |        "(2, '3\\n')"
166 |       ]
167 |      },
168 |      "execution_count": null,
169 |      "metadata": {},
170 |      "output_type": "execute_result"
171 |     }
172 |    ],
173 |    "source": [
174 |     "r = shell.run_cell('print(3); 1+1')\n",
175 |     "r.result,r.stdout"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "id": "48849fc3",
182 |    "metadata": {},
183 |    "outputs": [
184 |     {
185 |      "name": "stdout",
186 |      "output_type": "stream",
187 |      "text": [
188 |       "Traceback (most recent call last):\n",
189 |       "  File \"/Users/jhoward/miniconda3/lib/python3.12/site-packages/IPython/core/interactiveshell.py\", line 3577, in run_code\n",
190 |       "    exec(code_obj, self.user_global_ns, self.user_ns)\n",
191 |       "  File \"<ipython-input-1-338156281413>\", line 1, in <module>\n",
192 |       "    raise Exception(\"blah\")\n",
193 |       "Exception: blah\n",
194 |       "\n"
195 |      ]
196 |     }
197 |    ],
198 |    "source": [
199 |     "r = shell.run_cell('raise Exception(\"blah\")')\n",
200 |     "print(exception2str(r.error_in_exec))"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "id": "ddabea6d",
207 |    "metadata": {},
208 |    "outputs": [
209 |     {
210 |      "data": {
211 |       "text/plain": [
212 |        "TimeoutError()"
213 |       ]
214 |      },
215 |      "execution_count": null,
216 |      "metadata": {},
217 |      "output_type": "execute_result"
218 |     }
219 |    ],
220 |    "source": [
221 |     "r = shell.run_cell('import time; time.sleep(10)', timeout=1)\n",
222 |     "r.error_in_exec"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "markdown",
227 |    "id": "94ec4289",
228 |    "metadata": {},
229 |    "source": [
230 |     "## Export -"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "id": "1e9ee5c1",
237 |    "metadata": {},
238 |    "outputs": [],
239 |    "source": [
240 |     "#|hide\n",
241 |     "#|eval: false\n",
242 |     "from nbdev.doclinks import nbdev_export\n",
243 |     "nbdev_export()"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "id": "207f9715",
250 |    "metadata": {},
251 |    "outputs": [],
252 |    "source": []
253 |   }
254 |  ],
255 |  "metadata": {
256 |   "kernelspec": {
257 |    "display_name": "python3",
258 |    "language": "python",
259 |    "name": "python3"
260 |   }
261 |  },
262 |  "nbformat": 4,
263 |  "nbformat_minor": 5
264 | }
265 | 


--------------------------------------------------------------------------------
/03_download.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "92c3dff2",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "#| default_exp download"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "1d533800",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Download helpers\n",
 19 |     "\n",
 20 |     "- Download and process LLM-ready documents"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "id": "e58d8c43",
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "#| export\n",
 31 |     "from fastcore.utils import *\n",
 32 |     "from httpx import get\n",
 33 |     "from fastcore.meta import delegates\n",
 34 |     "from urllib.parse import urlparse, urljoin"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "id": "30199708",
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "from IPython.display import Markdown,HTML\n",
 45 |     "from fastcore.test import *"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "id": "95c4cab1",
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "#| export\n",
 56 |     "def clean_md(text, rm_comments=True, rm_details=True):\n",
 57 |     "    \"Remove comments and `<details>` sections from `text`\"\n",
 58 |     "    if rm_comments: text = re.sub(r'\\n?<!--.*?-->\\n?', '', text, flags=re.DOTALL)\n",
 59 |     "    if rm_details: text = re.sub(r'\\n?<details>.*?</details>\\n?', '', text, flags=re.DOTALL)\n",
 60 |     "    return text"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "id": "0f3d5c69",
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "#| export\n",
 71 |     "@delegates(get)\n",
 72 |     "def read_md(url, rm_comments=True, rm_details=True, **kwargs):\n",
 73 |     "    \"Read text from `url` and clean with `clean_docs`\"\n",
 74 |     "    return clean_md(get(url, **kwargs).text, rm_comments=rm_comments, rm_details=rm_details)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "id": "478d5508",
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "mdurl = 'https://claudette.answer.ai/index.html.md'\n",
 85 |     "md = read_md(mdurl)\n",
 86 |     "# Markdown(md)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": null,
 92 |    "id": "d8d61937",
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "#| export\n",
 97 |     "def html2md(s:str, ignore_links=True):\n",
 98 |     "    \"Convert `s` from HTML to markdown\"\n",
 99 |     "    import html2text\n",
100 |     "    o = html2text.HTML2Text(bodywidth=5000)\n",
101 |     "    o.ignore_links = ignore_links\n",
102 |     "    o.mark_code = True\n",
103 |     "    o.ignore_images = True\n",
104 |     "    return o.handle(s)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "id": "5e897053",
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "#| export\n",
115 |     "def read_html(url, # URL to read\n",
116 |     "              sel=None, # Read only outerHTML of CSS selector `sel`\n",
117 |     "              rm_comments=True, # Removes HTML comments\n",
118 |     "              rm_details=True, # Removes `<details>` tags\n",
119 |     "              multi=False, # Get all matches to `sel` or first one  \n",
120 |     "              wrap_tag=None, #If multi, each selection wrapped with <wrap_tag>content</wrap_tag>\n",
121 |     "              ignore_links=True,\n",
122 |     "             ): # Cleaned markdown\n",
123 |     "    \"Get `url`, optionally selecting CSS selector `sel`, and convert to clean markdown\"\n",
124 |     "    page = get(url).text\n",
125 |     "    if sel:\n",
126 |     "        from bs4 import BeautifulSoup\n",
127 |     "        soup = BeautifulSoup(page, 'html.parser')\n",
128 |     "        if multi:\n",
129 |     "            page = [str(el) for el in soup.select(sel)]\n",
130 |     "            if not wrap_tag: page = \"\\n\".join(page)\n",
131 |     "        else: page = str(soup.select_one(sel))\n",
132 |     "    mds = map(lambda x: clean_md(html2md(x, ignore_links=ignore_links), rm_comments, rm_details=rm_details), tuplify(page))\n",
133 |     "    if wrap_tag: return '\\n'.join([f\"\\n<{wrap_tag}>\\n{o}</{wrap_tag}>\\n\" for o in mds])\n",
134 |     "    else: return'\\n'.join(mds)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "id": "1d07c687",
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "# test single class selector\n",
145 |     "listings = read_html('https://www.answer.ai/', sel='.listing-description')\n",
146 |     "assert len(listings) < 500\n",
147 |     "\n",
148 |     "# Test multi class selector\n",
149 |     "listings = read_html('https://www.answer.ai/', sel='.listing-description', multi=True)\n",
150 |     "assert len(listings) > 1000 # returns more than single so selecting multi\n",
151 |     "\n",
152 |     "# Test multi_wrap_tag\n",
153 |     "listings = read_html('https://www.answer.ai/', sel='.listing-description', multi=True, wrap_tag='document')\n",
154 |     "assert '<document>' in listings and '</document>' in listings "
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "id": "20188898",
161 |    "metadata": {},
162 |    "outputs": [
163 |     {
164 |      "data": {
165 |       "text/plain": [
166 |        "'[My experience learning GPU programming, and implementing a new GPU education app in the process](./posts/2025-03-17-gpu-programming-scratch.html)\\n\\n'"
167 |       ]
168 |      },
169 |      "execution_count": null,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": [
175 |     "read_html('https://www.answer.ai/', sel='.listing-description', ignore_links=False)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "id": "7406a52d",
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "# test tag css selectors\n",
186 |     "assert len(read_html('https://www.answer.ai/', sel='div.listing-description', multi=True)) > 1000\n",
187 |     "assert len(read_html('https://www.answer.ai/', sel='div', multi=True)) > 1000"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "id": "8f25e767",
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "htmlurl = 'https://hypermedia.systems/hypermedia-a-reintroduction/'\n",
198 |     "hmd = read_html(htmlurl)\n",
199 |     "assert len(hmd) > 100\n",
200 |     "# Markdown(hmd)"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "id": "066b5532",
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": [
210 |     "#| export\n",
211 |     "def get_llmstxt(url, optional=False, n_workers=None):\n",
212 |     "    \"Get llms.txt file from and expand it with `llms_txt.create_ctx()`\"\n",
213 |     "    if not url.endswith('llms.txt'): return None\n",
214 |     "    import llms_txt\n",
215 |     "    resp = get(url)\n",
216 |     "    if resp.status_code!=200: return None\n",
217 |     "    return llms_txt.create_ctx(resp.text, optional=optional, n_workers=n_workers)"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": null,
223 |    "id": "2c370bf2",
224 |    "metadata": {},
225 |    "outputs": [],
226 |    "source": [
227 |     "# print(get_llmstxt('https://llmstxt.org/llms.txt'))"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "id": "a2fc5a55",
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "#| export\n",
238 |     "def split_url(url):\n",
239 |     "    \"Split `url` into base, path, and file name, normalising name to '/' if empty\"\n",
240 |     "    parsed = urlparse(url.strip('/'))\n",
241 |     "    base = f\"{parsed.scheme}://{parsed.netloc}\"\n",
242 |     "    path,spl,fname = parsed.path.rpartition('/')\n",
243 |     "    fname = spl+fname\n",
244 |     "    if not path and not fname: path='/'\n",
245 |     "    return base,path,fname"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "id": "1a92b74e",
252 |    "metadata": {},
253 |    "outputs": [
254 |     {
255 |      "data": {
256 |       "text/plain": [
257 |        "[('https://claudette.answer.ai', '', '/path'),\n",
258 |        " ('https://claudette.answer.ai', '/', ''),\n",
259 |        " ('https://llmstxt.org', '/', ''),\n",
260 |        " ('https://llmstxt.org', '/', '')]"
261 |       ]
262 |      },
263 |      "execution_count": null,
264 |      "metadata": {},
265 |      "output_type": "execute_result"
266 |     }
267 |    ],
268 |    "source": [
269 |     "urls = ('https://claudette.answer.ai/path/', 'https://claudette.answer.ai/', 'https://llmstxt.org', 'https://llmstxt.org/')\n",
270 |     "\n",
271 |     "[split_url(o) for o in urls]"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": null,
277 |    "id": "5337c0a2",
278 |    "metadata": {},
279 |    "outputs": [],
280 |    "source": [
281 |     "#| export\n",
282 |     "def _tryget(url):\n",
283 |     "    \"Return response from `url` if `status_code!=404`, otherwise `None`\"\n",
284 |     "    res = get(url)\n",
285 |     "    return None if res.status_code==404 else url"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "id": "189f5b24",
292 |    "metadata": {},
293 |    "outputs": [],
294 |    "source": [
295 |     "#| export\n",
296 |     "def find_docs(url):\n",
297 |     "    \"If available, return LLM-friendly llms.txt context or markdown file location from `url`\"\n",
298 |     "    base,path,fname = split_url(url)\n",
299 |     "    url = (base+path+fname).strip('/')\n",
300 |     "    if fname=='/llms.txt': return url\n",
301 |     "    if Path(fname).suffix in('.md', '.txt', '.rst'): return _tryget(url)\n",
302 |     "    if '.' in fname: return _tryget(url+'.md') or find_docs(url[:url.rfind('/')])\n",
303 |     "    res = _tryget(url+'/llms.txt')\n",
304 |     "    if res: return res\n",
305 |     "    res = _tryget(url+'/index.md')\n",
306 |     "    if res: return res\n",
307 |     "    res = _tryget(url+'/index.html.md')\n",
308 |     "    if res: return res\n",
309 |     "    res = _tryget(url+'/index-commonmark.md')\n",
310 |     "    if res: return res\n",
311 |     "    parsed_url = urlparse(url)\n",
312 |     "    if parsed_url.path == '/' or not parsed_url.path: return None\n",
313 |     "    return find_docs(urljoin(url, '..'))"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "id": "5d1722d9",
320 |    "metadata": {},
321 |    "outputs": [],
322 |    "source": [
323 |     "fl_url = 'https://answerdotai.github.io/fastlite'"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "id": "0b226407",
330 |    "metadata": {},
331 |    "outputs": [
332 |     {
333 |      "data": {
334 |       "text/plain": [
335 |        "'https://answerdotai.github.io/fastlite/llms.txt'"
336 |       ]
337 |      },
338 |      "execution_count": null,
339 |      "metadata": {},
340 |      "output_type": "execute_result"
341 |     }
342 |    ],
343 |    "source": [
344 |     "find_docs(fl_url)"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": null,
350 |    "id": "14344890",
351 |    "metadata": {},
352 |    "outputs": [
353 |     {
354 |      "name": "stdout",
355 |      "output_type": "stream",
356 |      "text": [
357 |       "https://claudette.answer.ai/llms.txt\n",
358 |       "https://claudette.answer.ai/llms.txt\n",
359 |       "https://llmstxt.org/llms.txt\n",
360 |       "https://llmstxt.org/llms.txt\n"
361 |      ]
362 |     }
363 |    ],
364 |    "source": [
365 |     "for o in urls: print(find_docs(o))"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": null,
371 |    "id": "439546d4",
372 |    "metadata": {},
373 |    "outputs": [],
374 |    "source": [
375 |     "#| eval: false\n",
376 |     "suffixes = [\"/\", \"/tmp\", \"/tmp/tmp/\"]\n",
377 |     "for suff in suffixes:\n",
378 |     "    for o in urls:  test_eq(find_docs(o), find_docs(o+suff))\n",
379 |     "\n",
380 |     "test_eq(find_docs(\"https://github.com\"), \"https://github.com/llms.txt\")\n",
381 |     "test_eq(find_docs(\"https://github.com/AnswerDotAI\"), \"https://github.com/llms.txt\")\n",
382 |     "test_eq(find_docs(\"https://github.com/AnswerDotAI/\"), \"https://github.com/llms.txt\")"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "id": "771d1208",
389 |    "metadata": {},
390 |    "outputs": [],
391 |    "source": [
392 |     "#| export\n",
393 |     "def read_docs(url, optional=False, n_workers=None, rm_comments=True, rm_details=True):\n",
394 |     "    \"If available, return LLM-friendly llms.txt context or markdown file response for `url`\"\n",
395 |     "    url = find_docs(url)\n",
396 |     "    if url.endswith('/llms.txt'): res = get_llmstxt(url, optional=optional, n_workers=n_workers)\n",
397 |     "    else: res = get(url).text\n",
398 |     "    return clean_md(res, rm_comments=rm_comments, rm_details=rm_details)"
399 |    ]
400 |   },
401 |   {
402 |    "cell_type": "markdown",
403 |    "id": "94ec4289",
404 |    "metadata": {},
405 |    "source": [
406 |     "## Export -"
407 |    ]
408 |   },
409 |   {
410 |    "cell_type": "code",
411 |    "execution_count": null,
412 |    "id": "1e9ee5c1",
413 |    "metadata": {},
414 |    "outputs": [],
415 |    "source": [
416 |     "#|hide\n",
417 |     "#|eval: false\n",
418 |     "from nbdev.doclinks import nbdev_export\n",
419 |     "nbdev_export()"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": null,
425 |    "id": "0c01784b",
426 |    "metadata": {},
427 |    "outputs": [],
428 |    "source": []
429 |   }
430 |  ],
431 |  "metadata": {
432 |   "kernelspec": {
433 |    "display_name": "python3",
434 |    "language": "python",
435 |    "name": "python3"
436 |   }
437 |  },
438 |  "nbformat": 4,
439 |  "nbformat_minor": 5
440 | }
441 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Release notes
  2 | 
  3 | <!-- do not remove -->
  4 | 
  5 | ## 0.2.1
  6 | 
  7 | ### New Features
  8 | 
  9 | - Optionally dont raise error on `call_func` ([#31](https://github.com/AnswerDotAI/toolslm/pull/31)), thanks to [@erikgaas](https://github.com/erikgaas)
 10 | - dict support in `get_schema` ([#30](https://github.com/AnswerDotAI/toolslm/issues/30))
 11 | 
 12 | 
 13 | ## 0.2.0
 14 | 
 15 | ### Breaking changes
 16 | 
 17 | - Optional libs (http2text, beautifulsoup, llms_txt) are no longer automatically installed
 18 | 
 19 | ### New Features
 20 | 
 21 | - Lazily load optional modules ([#29](https://github.com/AnswerDotAI/toolslm/issues/29))
 22 | 
 23 | 
 24 | ## 0.1.3
 25 | 
 26 | ### New Features
 27 | 
 28 | - Pass glb,loc to python ([#28](https://github.com/AnswerDotAI/toolslm/issues/28))
 29 | 
 30 | ## 0.1.2
 31 | 
 32 | ### New Features
 33 | 
 34 | - Adds `call_func_async` ([#27](https://github.com/AnswerDotAI/toolslm/pull/27)), thanks to [@mikonapoli](https://github.com/mikonapoli)
 35 | - Add arg ignore links ([#26](https://github.com/AnswerDotAI/toolslm/pull/26)), thanks to [@Isaac-Flath](https://github.com/Isaac-Flath)
 36 | 
 37 | 
 38 | ## 0.1.1
 39 | 
 40 | ### New Features
 41 | 
 42 | - Add arg ignore links ([#26](https://github.com/AnswerDotAI/toolslm/pull/26)), thanks to [@Isaac-Flath](https://github.com/Isaac-Flath)
 43 | 
 44 | ### Bugs Squashed
 45 | 
 46 | - fix: prevent markdown heading detection inside code blocks ([#25](https://github.com/AnswerDotAI/toolslm/pull/25)), thanks to [@franckalbinet](https://github.com/franckalbinet)
 47 | - Fix markdown hierarchy parsing for arbitrary header levels ([#22](https://github.com/AnswerDotAI/toolslm/pull/22)), thanks to [@erikgaas](https://github.com/erikgaas)
 48 | 
 49 | 
 50 | ## 0.1.0
 51 | 
 52 | ### Breaking changes
 53 | 
 54 | - Replace `source` with `src` in context generation ([#17](https://github.com/AnswerDotAI/toolslm/issues/17))
 55 | 
 56 | 
 57 | ## 0.0.8
 58 | 
 59 | ### New Features
 60 | 
 61 | - Escape and print context in `folder2ctx` et al ([#16](https://github.com/AnswerDotAI/toolslm/issues/16))
 62 | 
 63 | 
 64 | ## 0.0.7
 65 | 
 66 | ### New Features
 67 | 
 68 | - Add `dict2obj` to `md_hier` funcs ([#15](https://github.com/AnswerDotAI/toolslm/issues/15))
 69 | - Migrate call_func from claudette to toolslm ([#14](https://github.com/AnswerDotAI/toolslm/pull/14)), thanks to [@ncoop57](https://github.com/ncoop57)
 70 | - Allow for getting schemas from nested structures ([#11](https://github.com/AnswerDotAI/toolslm/pull/11)), thanks to [@ncoop57](https://github.com/ncoop57)
 71 | - Allow for `sel` to select and wrap multiple element results ([#10](https://github.com/AnswerDotAI/toolslm/pull/10)), thanks to [@Isaac-Flath](https://github.com/Isaac-Flath)
 72 | 
 73 | ### Bugs Squashed
 74 | 
 75 | - Using `get_schema` on class method results in type missing error ([#12](https://github.com/AnswerDotAI/toolslm/issues/12))
 76 | 
 77 | 
 78 | ## 0.0.6
 79 | 
 80 | ### New Features
 81 | 
 82 | - Add `read_docs` and `find_docs` ([#8](https://github.com/AnswerDotAI/toolslm/issues/8))
 83 | 
 84 | 
 85 | ## 0.0.5
 86 | 
 87 | ### Bugs Squashed
 88 | 
 89 | - XML tools assume all files have content ([#3](https://github.com/AnswerDotAI/toolslm/issues/3))
 90 | 
 91 | 
 92 | ## 0.0.4
 93 | 
 94 | - Minor updates
 95 | 
 96 | ## 0.0.2
 97 | 
 98 | - Rename project
 99 | 
100 | 
101 | ## 0.0.1
102 | 
103 | - Initial alpha release
104 | 
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include settings.ini
2 | include LICENSE
3 | include CONTRIBUTING.md
4 | include README.md
5 | recursive-exclude * __pycache__
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # toolslm
 2 | 
 3 | 
 4 | <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
 5 | 
 6 | This is a work in progress…
 7 | 
 8 | ## Install
 9 | 
10 | ``` sh
11 | pip install toolslm
12 | ```
13 | 
14 | ## How to use
15 | 
16 | ### Context creation
17 | 
18 | toolslm has some helpers to make it easier to generate XML context from
19 | files, for instance
20 | [`folder2ctx`](https://AnswerDotAI.github.io/toolslm/xml.html#folder2ctx):
21 | 
22 | ``` python
23 | print(folder2ctx('samples', prefix=False, file_glob='*.py'))
24 | ```
25 | 
26 |     <documents><document index="1"><src>
27 |     samples/sample_core.py
28 |     </src><document-content>
29 |     import inspect
30 |     empty = inspect.Parameter.empty
31 |     models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'
32 |     </document-content></document></documents>
33 | 
34 | JSON doesn’t map as nicely to XML as the `ft` data structure from
35 | `fastcore.xml`, but for simple XML trees it can be convenient. The
36 | [`json_to_xml`](https://AnswerDotAI.github.io/toolslm/xml.html#json_to_xml)
37 | function handles that conversion:
38 | 
39 | ``` python
40 | a = dict(surname='Howard', firstnames=['Jeremy','Peter'],
41 |          address=dict(state='Queensland',country='Australia'))
42 | print(json_to_xml(a, 'person'))
43 | ```
44 | 
45 |     <person>
46 |       <surname>Howard</surname>
47 |       <firstnames>
48 |         <item>Jeremy</item>
49 |         <item>Peter</item>
50 |       </firstnames>
51 |       <address>
52 |         <state>Queensland</state>
53 |         <country>Australia</country>
54 |       </address>
55 |     </person>
56 | 


--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
 1 | project:
 2 |   type: website
 3 | 
 4 | format:
 5 |   html:
 6 |     theme: cosmo
 7 |     css: styles.css
 8 |     toc: true
 9 |     keep-md: true
10 |   commonmark: default
11 | 
12 | website:
13 |   twitter-card: true
14 |   open-graph: true
15 |   repo-actions: [issue]
16 |   navbar:
17 |     background: primary
18 |     search: true
19 |   sidebar:
20 |     style: floating
21 | 
22 | metadata-files: [nbdev.yml, sidebar.yml]
23 | 


--------------------------------------------------------------------------------
/index.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "56e2fbc1",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "#| hide\n",
 11 |     "from toolslm.xml import *"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "id": "9c85d17d",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "# toolslm\n",
 20 |     "\n",
 21 |     "> Tools to make language models a bit easier to use"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "id": "947109d0",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "This is a work in progress..."
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "id": "431900fc",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "## Install"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "id": "6cf13202",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "```sh\n",
 46 |     "pip install toolslm\n",
 47 |     "```"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "id": "36346546",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "## How to use"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "id": "2a8a7a9a",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "### Context creation"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "id": "3778e8ed",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "toolslm has some helpers to make it easier to generate XML context from files, for instance `folder2ctx`:"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "id": "efd52392",
 78 |    "metadata": {},
 79 |    "outputs": [
 80 |     {
 81 |      "name": "stdout",
 82 |      "output_type": "stream",
 83 |      "text": [
 84 |       "<documents><document index=\"1\"><src>\n",
 85 |       "samples/sample_core.py\n",
 86 |       "</src><document-content>\n",
 87 |       "import inspect\n",
 88 |       "empty = inspect.Parameter.empty\n",
 89 |       "models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'\n",
 90 |       "</document-content></document></documents>\n"
 91 |      ]
 92 |     }
 93 |    ],
 94 |    "source": [
 95 |     "print(folder2ctx('samples', prefix=False, file_glob='*.py'))"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "id": "58206da8",
101 |    "metadata": {},
102 |    "source": [
103 |     "JSON doesn't map as nicely to XML as the `ft` data structure from `fastcore.xml`, but for simple XML trees it can be convenient. The `json_to_xml` function handles that conversion:"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "id": "9bcb985e",
110 |    "metadata": {},
111 |    "outputs": [
112 |     {
113 |      "name": "stdout",
114 |      "output_type": "stream",
115 |      "text": [
116 |       "<person>\n",
117 |       "  <surname>Howard</surname>\n",
118 |       "  <firstnames>\n",
119 |       "    <item>Jeremy</item>\n",
120 |       "    <item>Peter</item>\n",
121 |       "  </firstnames>\n",
122 |       "  <address>\n",
123 |       "    <state>Queensland</state>\n",
124 |       "    <country>Australia</country>\n",
125 |       "  </address>\n",
126 |       "</person>\n"
127 |      ]
128 |     }
129 |    ],
130 |    "source": [
131 |     "a = dict(surname='Howard', firstnames=['Jeremy','Peter'],\n",
132 |     "         address=dict(state='Queensland',country='Australia'))\n",
133 |     "print(json_to_xml(a, 'person'))"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "id": "7a3b2c28",
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": []
143 |   }
144 |  ],
145 |  "metadata": {
146 |   "kernelspec": {
147 |    "display_name": "python3",
148 |    "language": "python",
149 |    "name": "python3"
150 |   }
151 |  },
152 |  "nbformat": 4,
153 |  "nbformat_minor": 5
154 | }
155 | 


--------------------------------------------------------------------------------
/nbdev.yml:
--------------------------------------------------------------------------------
 1 | project:
 2 |   output-dir: _docs
 3 | 
 4 | website:
 5 |   title: "toolslm"
 6 |   site-url: "https://AnswerDotAI.github.io/toolslm"
 7 |   description: "Tools to make language models a bit easier to use"
 8 |   repo-branch: main
 9 |   repo-url: "https://github.com/AnswerDotAI/toolslm"
10 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=64.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name="toolslm"
 7 | requires-python=">=3.9"
 8 | dynamic = [ "keywords", "description", "version", "dependencies", "optional-dependencies", "readme", "license", "authors", "classifiers", "entry-points", "scripts", "urls"]
 9 | 
10 | [tool.uv]
11 | cache-keys = [{ file = "pyproject.toml" }, { file = "settings.ini" }, { file = "setup.py" }]
12 | 


--------------------------------------------------------------------------------
/samples/sample_core.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | empty = inspect.Parameter.empty
3 | models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'
4 | 


--------------------------------------------------------------------------------
/samples/sample_styles.css:
--------------------------------------------------------------------------------
1 | .cell { margin-bottom: 1rem; }
2 | .cell > .sourceCode { margin-bottom: 0; }
3 | .cell-output > pre { margin-bottom: 0; }
4 | 
5 | 


--------------------------------------------------------------------------------
/settings.ini:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | repo = toolslm
 3 | lib_name = toolslm
 4 | version = 0.2.2
 5 | min_python = 3.9
 6 | license = apache2
 7 | black_formatting = False
 8 | requirements = fastcore>=1.5.47 httpx
 9 | doc_path = _docs
10 | lib_path = toolslm
11 | nbs_path = .
12 | recursive = True
13 | tst_flags = notest
14 | put_version_in_init = True
15 | branch = main
16 | custom_sidebar = False
17 | doc_host = https://AnswerDotAI.github.io
18 | doc_baseurl = /toolslm
19 | git_url = https://github.com/AnswerDotAI/toolslm
20 | title = toolslm
21 | audience = Developers
22 | author = Jeremy Howard
23 | author_email = j@fast.ai
24 | copyright = 2024 onwards, Jeremy Howard
25 | description = Tools to make language models a bit easier to use
26 | keywords = nbdev jupyter notebook python
27 | language = English
28 | status = 3
29 | user = AnswerDotAI
30 | readme_nb = index.ipynb
31 | allowed_metadata_keys = 
32 | allowed_cell_metadata_keys = 
33 | jupyter_hooks = True
34 | clean_ids = True
35 | clear_all = False
36 | conda_user = fastai
37 | console_scripts = folder2ctx=toolslm.xml:folder2ctx_cli
38 | cell_number = True
39 | skip_procs = 
40 | update_pyproject = True
41 | 
42 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pkg_resources import parse_version
 2 | from configparser import ConfigParser
 3 | import setuptools, shlex
 4 | assert parse_version(setuptools.__version__)>=parse_version('36.2')
 5 | 
 6 | # note: all settings are in settings.ini; edit there, not here
 7 | config = ConfigParser(delimiters=['='])
 8 | config.read('settings.ini', encoding='utf-8')
 9 | cfg = config['DEFAULT']
10 | 
11 | cfg_keys = 'version description keywords author author_email'.split()
12 | expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
13 | for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
14 | setup_cfg = {o:cfg[o] for o in cfg_keys}
15 | 
16 | licenses = {
17 |     'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
18 |     'mit': ('MIT License', 'OSI Approved :: MIT License'),
19 |     'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'),
20 |     'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'),
21 |     'bsd3': ('BSD License', 'OSI Approved :: BSD License'),
22 | }
23 | statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
24 |     '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
25 | py_versions = '3.6 3.7 3.8 3.9 3.10'.split()
26 | 
27 | requirements = shlex.split(cfg.get('requirements', ''))
28 | if cfg.get('pip_requirements'): requirements += shlex.split(cfg.get('pip_requirements', ''))
29 | min_python = cfg['min_python']
30 | lic = licenses.get(cfg['license'].lower(), (cfg['license'], None))
31 | dev_requirements = (cfg.get('dev_requirements') or '').split()
32 | 
33 | setuptools.setup(
34 |     name = cfg['lib_name'],
35 |     license = lic[0],
36 |     classifiers = [
37 |         'Development Status :: ' + statuses[int(cfg['status'])],
38 |         'Intended Audience :: ' + cfg['audience'].title(),
39 |         'Natural Language :: ' + cfg['language'].title(),
40 |     ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []),
41 |     url = cfg['git_url'],
42 |     packages = setuptools.find_packages(),
43 |     include_package_data = True,
44 |     install_requires = requirements,
45 |     extras_require={ 'dev': dev_requirements },
46 |     dependency_links = cfg.get('dep_links','').split(),
47 |     python_requires  = '>=' + cfg['min_python'],
48 |     long_description = open('README.md', encoding='utf-8').read(),
49 |     long_description_content_type = 'text/markdown',
50 |     zip_safe = False,
51 |     entry_points = {
52 |         'console_scripts': cfg.get('console_scripts','').split(),
53 |         'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d']
54 |     },
55 |     **setup_cfg)
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/styles.css:
--------------------------------------------------------------------------------
 1 | .cell {
 2 |   margin-bottom: 1rem;
 3 | }
 4 | 
 5 | .cell > .sourceCode {
 6 |   margin-bottom: 0;
 7 | }
 8 | 
 9 | .cell-output > pre {
10 |   margin-bottom: 0;
11 | }
12 | 
13 | .cell-output > pre, .cell-output > .sourceCode > pre, .cell-output-stdout > pre {
14 |   margin-left: 0.8rem;
15 |   margin-top: 0;
16 |   background: none;
17 |   border-left: 2px solid lightsalmon;
18 |   border-top-left-radius: 0;
19 |   border-top-right-radius: 0;
20 | }
21 | 
22 | .cell-output > .sourceCode {
23 |   border: none;
24 | }
25 | 
26 | .cell-output > .sourceCode {
27 |   background: none;
28 |   margin-top: 0;
29 | }
30 | 
31 | div.description {
32 |   padding-left: 2px;
33 |   padding-top: 5px;
34 |   font-style: italic;
35 |   font-size: 135%;
36 |   opacity: 70%;
37 | }
38 | 


--------------------------------------------------------------------------------
/toolslm/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.2"
2 | 


--------------------------------------------------------------------------------
/toolslm/_modidx.py:
--------------------------------------------------------------------------------
 1 | # Autogenerated by nbdev
 2 | 
 3 | d = { 'settings': { 'branch': 'main',
 4 |                 'doc_baseurl': '/toolslm',
 5 |                 'doc_host': 'https://AnswerDotAI.github.io',
 6 |                 'git_url': 'https://github.com/AnswerDotAI/toolslm',
 7 |                 'lib_path': 'toolslm'},
 8 |   'syms': { 'toolslm.download': { 'toolslm.download._tryget': ('download.html#_tryget', 'toolslm/download.py'),
 9 |                                   'toolslm.download.clean_md': ('download.html#clean_md', 'toolslm/download.py'),
10 |                                   'toolslm.download.find_docs': ('download.html#find_docs', 'toolslm/download.py'),
11 |                                   'toolslm.download.get_llmstxt': ('download.html#get_llmstxt', 'toolslm/download.py'),
12 |                                   'toolslm.download.html2md': ('download.html#html2md', 'toolslm/download.py'),
13 |                                   'toolslm.download.read_docs': ('download.html#read_docs', 'toolslm/download.py'),
14 |                                   'toolslm.download.read_html': ('download.html#read_html', 'toolslm/download.py'),
15 |                                   'toolslm.download.read_md': ('download.html#read_md', 'toolslm/download.py'),
16 |                                   'toolslm.download.split_url': ('download.html#split_url', 'toolslm/download.py')},
17 |             'toolslm.funccall': { 'toolslm.funccall.PathArg': ('funccall.html#patharg', 'toolslm/funccall.py'),
18 |                                   'toolslm.funccall._copy_loc': ('funccall.html#_copy_loc', 'toolslm/funccall.py'),
19 |                                   'toolslm.funccall._get_nested_schema': ('funccall.html#_get_nested_schema', 'toolslm/funccall.py'),
20 |                                   'toolslm.funccall._handle_container': ('funccall.html#_handle_container', 'toolslm/funccall.py'),
21 |                                   'toolslm.funccall._handle_type': ('funccall.html#_handle_type', 'toolslm/funccall.py'),
22 |                                   'toolslm.funccall._is_container': ('funccall.html#_is_container', 'toolslm/funccall.py'),
23 |                                   'toolslm.funccall._is_parameterized': ('funccall.html#_is_parameterized', 'toolslm/funccall.py'),
24 |                                   'toolslm.funccall._param': ('funccall.html#_param', 'toolslm/funccall.py'),
25 |                                   'toolslm.funccall._process_property': ('funccall.html#_process_property', 'toolslm/funccall.py'),
26 |                                   'toolslm.funccall._run': ('funccall.html#_run', 'toolslm/funccall.py'),
27 |                                   'toolslm.funccall._types': ('funccall.html#_types', 'toolslm/funccall.py'),
28 |                                   'toolslm.funccall.call_func': ('funccall.html#call_func', 'toolslm/funccall.py'),
29 |                                   'toolslm.funccall.call_func_async': ('funccall.html#call_func_async', 'toolslm/funccall.py'),
30 |                                   'toolslm.funccall.get_schema': ('funccall.html#get_schema', 'toolslm/funccall.py'),
31 |                                   'toolslm.funccall.mk_ns': ('funccall.html#mk_ns', 'toolslm/funccall.py'),
32 |                                   'toolslm.funccall.python': ('funccall.html#python', 'toolslm/funccall.py')},
33 |             'toolslm.md_hier': {},
34 |             'toolslm.shell': { 'toolslm.shell.TerminalInteractiveShell.run_cell': ( 'shell.html#terminalinteractiveshell.run_cell',
35 |                                                                                     'toolslm/shell.py'),
36 |                                'toolslm.shell.get_shell': ('shell.html#get_shell', 'toolslm/shell.py')},
37 |             'toolslm.xml': { 'toolslm.xml._add_nls': ('xml.html#_add_nls', 'toolslm/xml.py'),
38 |                              'toolslm.xml.docs_xml': ('xml.html#docs_xml', 'toolslm/xml.py'),
39 |                              'toolslm.xml.files2ctx': ('xml.html#files2ctx', 'toolslm/xml.py'),
40 |                              'toolslm.xml.folder2ctx': ('xml.html#folder2ctx', 'toolslm/xml.py'),
41 |                              'toolslm.xml.folder2ctx_cli': ('xml.html#folder2ctx_cli', 'toolslm/xml.py'),
42 |                              'toolslm.xml.json_to_xml': ('xml.html#json_to_xml', 'toolslm/xml.py'),
43 |                              'toolslm.xml.mk_doc': ('xml.html#mk_doc', 'toolslm/xml.py'),
44 |                              'toolslm.xml.mk_doctype': ('xml.html#mk_doctype', 'toolslm/xml.py')}}}
45 | 


--------------------------------------------------------------------------------
/toolslm/download.py:
--------------------------------------------------------------------------------
  1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../03_download.ipynb.
  2 | 
  3 | # %% auto 0
  4 | __all__ = ['clean_md', 'read_md', 'html2md', 'read_html', 'get_llmstxt', 'split_url', 'find_docs', 'read_docs']
  5 | 
  6 | # %% ../03_download.ipynb 2
  7 | from fastcore.utils import *
  8 | from httpx import get
  9 | from fastcore.meta import delegates
 10 | from urllib.parse import urlparse, urljoin
 11 | 
 12 | # %% ../03_download.ipynb 4
 13 | def clean_md(text, rm_comments=True, rm_details=True):
 14 |     "Remove comments and `<details>` sections from `text`"
 15 |     if rm_comments: text = re.sub(r'\n?<!--.*?-->\n?', '', text, flags=re.DOTALL)
 16 |     if rm_details: text = re.sub(r'\n?<details>.*?</details>\n?', '', text, flags=re.DOTALL)
 17 |     return text
 18 | 
 19 | # %% ../03_download.ipynb 5
 20 | @delegates(get)
 21 | def read_md(url, rm_comments=True, rm_details=True, **kwargs):
 22 |     "Read text from `url` and clean with `clean_docs`"
 23 |     return clean_md(get(url, **kwargs).text, rm_comments=rm_comments, rm_details=rm_details)
 24 | 
 25 | # %% ../03_download.ipynb 7
 26 | def html2md(s:str, ignore_links=True):
 27 |     "Convert `s` from HTML to markdown"
 28 |     import html2text
 29 |     o = html2text.HTML2Text(bodywidth=5000)
 30 |     o.ignore_links = ignore_links
 31 |     o.mark_code = True
 32 |     o.ignore_images = True
 33 |     return o.handle(s)
 34 | 
 35 | # %% ../03_download.ipynb 8
 36 | def read_html(url, # URL to read
 37 |               sel=None, # Read only outerHTML of CSS selector `sel`
 38 |               rm_comments=True, # Removes HTML comments
 39 |               rm_details=True, # Removes `<details>` tags
 40 |               multi=False, # Get all matches to `sel` or first one  
 41 |               wrap_tag=None, #If multi, each selection wrapped with <wrap_tag>content</wrap_tag>
 42 |               ignore_links=True,
 43 |              ): # Cleaned markdown
 44 |     "Get `url`, optionally selecting CSS selector `sel`, and convert to clean markdown"
 45 |     page = get(url).text
 46 |     if sel:
 47 |         from bs4 import BeautifulSoup
 48 |         soup = BeautifulSoup(page, 'html.parser')
 49 |         if multi:
 50 |             page = [str(el) for el in soup.select(sel)]
 51 |             if not wrap_tag: page = "\n".join(page)
 52 |         else: page = str(soup.select_one(sel))
 53 |     mds = map(lambda x: clean_md(html2md(x, ignore_links=ignore_links), rm_comments, rm_details=rm_details), tuplify(page))
 54 |     if wrap_tag: return '\n'.join([f"\n<{wrap_tag}>\n{o}</{wrap_tag}>\n" for o in mds])
 55 |     else: return'\n'.join(mds)
 56 | 
 57 | # %% ../03_download.ipynb 13
 58 | def get_llmstxt(url, optional=False, n_workers=None):
 59 |     "Get llms.txt file from and expand it with `llms_txt.create_ctx()`"
 60 |     if not url.endswith('llms.txt'): return None
 61 |     import llms_txt
 62 |     resp = get(url)
 63 |     if resp.status_code!=200: return None
 64 |     return llms_txt.create_ctx(resp.text, optional=optional, n_workers=n_workers)
 65 | 
 66 | # %% ../03_download.ipynb 15
 67 | def split_url(url):
 68 |     "Split `url` into base, path, and file name, normalising name to '/' if empty"
 69 |     parsed = urlparse(url.strip('/'))
 70 |     base = f"{parsed.scheme}://{parsed.netloc}"
 71 |     path,spl,fname = parsed.path.rpartition('/')
 72 |     fname = spl+fname
 73 |     if not path and not fname: path='/'
 74 |     return base,path,fname
 75 | 
 76 | # %% ../03_download.ipynb 17
 77 | def _tryget(url):
 78 |     "Return response from `url` if `status_code!=404`, otherwise `None`"
 79 |     res = get(url)
 80 |     return None if res.status_code==404 else url
 81 | 
 82 | # %% ../03_download.ipynb 18
 83 | def find_docs(url):
 84 |     "If available, return LLM-friendly llms.txt context or markdown file location from `url`"
 85 |     base,path,fname = split_url(url)
 86 |     url = (base+path+fname).strip('/')
 87 |     if fname=='/llms.txt': return url
 88 |     if Path(fname).suffix in('.md', '.txt', '.rst'): return _tryget(url)
 89 |     if '.' in fname: return _tryget(url+'.md') or find_docs(url[:url.rfind('/')])
 90 |     res = _tryget(url+'/llms.txt')
 91 |     if res: return res
 92 |     res = _tryget(url+'/index.md')
 93 |     if res: return res
 94 |     res = _tryget(url+'/index.html.md')
 95 |     if res: return res
 96 |     res = _tryget(url+'/index-commonmark.md')
 97 |     if res: return res
 98 |     parsed_url = urlparse(url)
 99 |     if parsed_url.path == '/' or not parsed_url.path: return None
100 |     return find_docs(urljoin(url, '..'))
101 | 
102 | # %% ../03_download.ipynb 23
103 | def read_docs(url, optional=False, n_workers=None, rm_comments=True, rm_details=True):
104 |     "If available, return LLM-friendly llms.txt context or markdown file response for `url`"
105 |     url = find_docs(url)
106 |     if url.endswith('/llms.txt'): res = get_llmstxt(url, optional=optional, n_workers=n_workers)
107 |     else: res = get(url).text
108 |     return clean_md(res, rm_comments=rm_comments, rm_details=rm_details)
109 | 


--------------------------------------------------------------------------------
/toolslm/funccall.py:
--------------------------------------------------------------------------------
  1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../01_funccall.ipynb.
  2 | 
  3 | # %% auto 0
  4 | __all__ = ['empty', 'custom_types', 'get_schema', 'PathArg', 'python', 'mk_ns', 'call_func', 'call_func_async']
  5 | 
  6 | # %% ../01_funccall.ipynb 2
  7 | import inspect
  8 | from collections import abc
  9 | from fastcore.utils import *
 10 | from fastcore.docments import docments
 11 | from typing import get_origin, get_args, Dict, List, Optional, Tuple, Union
 12 | from types import UnionType
 13 | 
 14 | # %% ../01_funccall.ipynb 4
 15 | empty = inspect.Parameter.empty
 16 | 
 17 | # %% ../01_funccall.ipynb 12
 18 | def _types(t:type)->tuple[str,Optional[str]]:
 19 |     "Tuple of json schema type name and (if appropriate) array item name."
 20 |     if t is empty: raise TypeError('Missing type')
 21 |     tmap = {int:"integer", float:"number", str:"string", bool:"boolean", list:"array", dict:"object"}
 22 |     tmap.update({k.__name__: v for k, v in tmap.items()})
 23 |     if getattr(t, '__origin__', None) in (list,tuple):
 24 |         args = getattr(t, '__args__', None)
 25 |         item_type = "object" if not args else tmap.get(t.__args__[0].__name__, "object")
 26 |         return "array", item_type
 27 |     # if t is a string like 'int', directly use the string as the key
 28 |     elif isinstance(t, str): return tmap.get(t, "object"), None
 29 |     # if t is the type itself and a container
 30 |     elif get_origin(t): return tmap.get(get_origin(t).__name__, "object"), None
 31 |     # if t is the type itself like int, use the __name__ representation as the key
 32 |     else: return tmap.get(t.__name__, "object"), None
 33 | 
 34 | # %% ../01_funccall.ipynb 19
 35 | def _param(name, info):
 36 |     "json schema parameter given `name` and `info` from docments full dict."
 37 |     paramt,itemt = _types(info.anno)
 38 |     pschema = dict(type=paramt, description=info.docment or "")
 39 |     if itemt: pschema["items"] = {"type": itemt}
 40 |     if info.default is not empty: pschema["default"] = info.default
 41 |     return pschema
 42 | 
 43 | # %% ../01_funccall.ipynb 22
 44 | custom_types = {Path}
 45 | 
 46 | def _handle_type(t, defs):
 47 |     "Handle a single type, creating nested schemas if necessary"
 48 |     if t is NoneType: return {'type': 'null'}
 49 |     if t in custom_types: return {'type':'string', 'format':t.__name__}
 50 |     if isinstance(t, type) and not issubclass(t, (int, float, str, bool)) or inspect.isfunction(t):
 51 |         defs[t.__name__] = _get_nested_schema(t)
 52 |         return {'$ref': f'#/$defs/{t.__name__}'}
 53 |     return {'type': _types(t)[0]}
 54 | 
 55 | # %% ../01_funccall.ipynb 24
 56 | def _is_container(t):
 57 |     "Check if type is a container (list, dict, tuple, set, Union)"
 58 |     origin = get_origin(t)
 59 |     return origin in (list, dict, tuple, set, Union) if origin else False
 60 | 
 61 | def _is_parameterized(t):
 62 |     "Check if type has arguments (e.g. list[int] vs list, dict[str, int] vs dict)"
 63 |     return _is_container(t) and (get_args(t) != ())
 64 | 
 65 | # %% ../01_funccall.ipynb 30
 66 | def _handle_container(origin, args, defs):
 67 |     "Handle container types like dict, list, tuple, set, and Union"
 68 |     if origin is Union or origin is UnionType:
 69 |         return {"anyOf": [_handle_type(arg, defs) for arg in args]}
 70 |     if origin is dict:
 71 |         value_type = args[1].__args__[0] if hasattr(args[1], '__args__') else args[1]
 72 |         return {
 73 |             'type': 'object',
 74 |             'additionalProperties': (
 75 |                 {'type': 'array', 'items': _handle_type(value_type, defs)}
 76 |                 if hasattr(args[1], '__origin__') else _handle_type(args[1], defs)
 77 |             )
 78 |         }
 79 |     elif origin in (list, tuple, set):
 80 |         schema = {'type': 'array', 'items': _handle_type(args[0], defs)}
 81 |         if origin is set:
 82 |             schema['uniqueItems'] = True
 83 |         return schema
 84 |     return None
 85 | 
 86 | # %% ../01_funccall.ipynb 31
 87 | def _process_property(name, obj, props, req, defs):
 88 |     "Process a single property of the schema"
 89 |     p = _param(name, obj)
 90 |     props[name] = p
 91 |     if obj.default is empty: req[name] = True
 92 | 
 93 |     if _is_container(obj.anno) and _is_parameterized(obj.anno):
 94 |             p.update(_handle_container(get_origin(obj.anno), get_args(obj.anno), defs))        
 95 |     else:
 96 |         # Non-container type or container without arguments
 97 |         p.update(_handle_type(obj.anno, defs))
 98 | 
 99 | # %% ../01_funccall.ipynb 32
100 | def _get_nested_schema(obj):
101 |     "Generate nested JSON schema for a class or function"
102 |     d = docments(obj, full=True)
103 |     props, req, defs = {}, {}, {}
104 | 
105 |     for n, o in d.items():
106 |         if n != 'return' and n != 'self':
107 |             _process_property(n, o, props, req, defs)
108 | 
109 |     schema = dict(type='object', properties=props, title=obj.__name__ if isinstance(obj, type) else None)
110 |     if req: schema['required'] = list(req)
111 |     if defs: schema['$defs'] = defs
112 |     return schema
113 | 
114 | # %% ../01_funccall.ipynb 36
115 | def get_schema(f:Union[callable,dict], pname='input_schema')->dict:
116 |     "Generate JSON schema for a class, function, or method"
117 |     if isinstance(f, dict): return f
118 |     schema = _get_nested_schema(f)
119 |     desc = f.__doc__
120 |     assert desc, "Docstring missing!"
121 |     d = docments(f, full=True)
122 |     ret = d.pop('return')
123 |     if ret.anno is not empty: desc += f'\n\nReturns:\n- type: {_types(ret.anno)[0]}'
124 |     return {"name": f.__name__, "description": desc, pname: schema}
125 | 
126 | # %% ../01_funccall.ipynb 47
127 | def PathArg(
128 |     path: str  # A filesystem path
129 | ): return Path(path)
130 | 
131 | # %% ../01_funccall.ipynb 67
132 | import ast, time, signal, traceback
133 | from fastcore.utils import *
134 | 
135 | # %% ../01_funccall.ipynb 68
136 | def _copy_loc(new, orig):
137 |     "Copy location information from original node to new node and all children."
138 |     new = ast.copy_location(new, orig)
139 |     for field, o in ast.iter_fields(new):
140 |         if isinstance(o, ast.AST): setattr(new, field, _copy_loc(o, orig))
141 |         elif isinstance(o, list): setattr(new, field, [_copy_loc(value, orig) for value in o])
142 |     return new
143 | 
144 | # %% ../01_funccall.ipynb 70
145 | def _run(code:str, glb:dict=None, loc:dict=None):
146 |     "Run `code`, returning final expression (similar to IPython)"
147 |     tree = ast.parse(code)
148 |     last_node = tree.body[-1] if tree.body else None
149 |     
150 |     # If the last node is an expression, modify the AST to capture the result
151 |     if isinstance(last_node, ast.Expr):
152 |         tgt = [ast.Name(id='_result', ctx=ast.Store())]
153 |         assign_node = ast.Assign(targets=tgt, value=last_node.value)
154 |         tree.body[-1] = _copy_loc(assign_node, last_node)
155 | 
156 |     compiled_code = compile(tree, filename='<ast>', mode='exec')
157 |     glb = glb or {}
158 |     stdout_buffer = io.StringIO()
159 |     saved_stdout = sys.stdout
160 |     sys.stdout = stdout_buffer
161 |     try: exec(compiled_code, glb, loc)
162 |     finally: sys.stdout = saved_stdout
163 |     _result = glb.get('_result', None)
164 |     if _result is not None: return _result
165 |     return stdout_buffer.getvalue().strip()
166 | 
167 | # %% ../01_funccall.ipynb 75
168 | def python(code:str, # Code to execute
169 |            glb:Optional[dict]=None, # Globals namespace
170 |            loc:Optional[dict]=None, # Locals namespace
171 |            timeout:int=3600 # Maximum run time in seconds before a `TimeoutError` is raised
172 |           ): # Result of last node, if it's an expression, or `None` otherwise
173 |     """Executes python `code` with `timeout` and returning final expression (similar to IPython).
174 |     Raised exceptions are returned as a string, with a stack trace."""
175 |     def handler(*args): raise TimeoutError()
176 |     if glb is None: glb = inspect.currentframe().f_back.f_globals
177 |     if loc is None: loc=glb
178 |     signal.signal(signal.SIGALRM, handler)
179 |     signal.alarm(timeout)
180 |     try: return _run(code, glb, loc)
181 |     except Exception as e: return traceback.format_exc()
182 |     finally: signal.alarm(0)
183 | 
184 | # %% ../01_funccall.ipynb 86
185 | def mk_ns(*funcs_or_objs):
186 |     merged = {}
187 |     for o in funcs_or_objs:
188 |         if isinstance(o, type): merged |= {n:getattr(o,n) for n,m in o.__dict__.items() if isinstance(m, (staticmethod, classmethod))}
189 |         if isinstance(o, object): merged |= {n:getattr(o,n) for n, m in inspect.getmembers(o, inspect.ismethod)} | {n:m for n,m in o.__class__.__dict__.items() if isinstance(m, staticmethod)}
190 |         if callable(o) and hasattr(o, '__name__'): merged |= {o.__name__: o}
191 |     return merged
192 | 
193 | # %% ../01_funccall.ipynb 95
194 | def call_func(fc_name, fc_inputs, ns, raise_on_err=True):
195 |     "Call the function `fc_name` with the given `fc_inputs` using namespace `ns`."
196 |     if not isinstance(ns, abc.Mapping): ns = mk_ns(*ns)
197 |     func = ns[fc_name]
198 |     try: return func(**fc_inputs)
199 |     except Exception as e:
200 |         if raise_on_err: raise e
201 |         else: return traceback.format_exc()
202 | 
203 | # %% ../01_funccall.ipynb 106
204 | async def call_func_async(fc_name, fc_inputs, ns, raise_on_err=True):
205 |     "Awaits the function `fc_name` with the given `fc_inputs` using namespace `ns`."
206 |     res = call_func(fc_name, fc_inputs, ns, raise_on_err=raise_on_err)
207 |     if inspect.iscoroutine(res):
208 |         try: res = await res
209 |         except Exception as e:
210 |             if raise_on_err: raise e
211 |             else: return traceback.format_exc()
212 |     return res
213 | 


--------------------------------------------------------------------------------
/toolslm/md_hier.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from fastcore.utils import *
  3 | __all__ = ['markdown_to_dict', 'create_heading_dict']
  4 | 
  5 | def markdown_to_dict(markdown_content):
  6 |     def clean_heading(text): return re.sub(r'[^A-Za-z0-9 ]+', '', text).strip()
  7 | 
  8 |     lines = markdown_content.splitlines()
  9 |     headings = []
 10 |     in_code_block = False
 11 | 
 12 |     # Parse headings with their levels and line numbers
 13 |     for idx, line in enumerate(lines):
 14 |         # Toggle code block state when encountering fence
 15 |         if line.strip().startswith('```'): in_code_block = not in_code_block
 16 |         
 17 |         # Only detect headings when not in a code block
 18 |         if in_code_block: continue
 19 |         match = re.match(r'^(#{1,6})\s*(.*)', line)
 20 |         if match:
 21 |             level = len(match.group(1))
 22 |             text = match.group(2).strip()
 23 |             headings.append({'level': level, 'text': text, 'line': idx})
 24 | 
 25 |     # Assign content to each heading, including subheadings
 26 |     for i, h in enumerate(headings):
 27 |         start = h['line']  # Include the heading line itself
 28 |         # Find the end index: next heading of same or higher level
 29 |         for j in range(i + 1, len(headings)):
 30 |             if headings[j]['level'] <= h['level']:
 31 |                 end = headings[j]['line']
 32 |                 break
 33 |         else: end = len(lines)
 34 |         h['content'] = '\n'.join(lines[start:end]).strip()
 35 | 
 36 |     # Build the dictionary with hierarchical keys
 37 |     result,stack = {},[]
 38 |     first_level = headings[0]['level']
 39 |     for h in headings:
 40 |         stack = stack[:h['level'] - first_level] + [clean_heading(h['text'])]
 41 |         key = '.'.join(stack)
 42 |         result[key] = h['content']
 43 |     return dict2obj(result)
 44 | 
 45 | def create_heading_dict(text):
 46 |     text = re.sub(r'```[\s\S]*?```', '', text)
 47 |     headings = re.findall(r'^#+.*', text, flags=re.MULTILINE)
 48 |     result = {}
 49 |     stack = [result]
 50 |     prev_level = 0
 51 | 
 52 |     for heading in headings:
 53 |         level = heading.count('#')
 54 |         title = heading.strip('#').strip()
 55 |         while level <= prev_level:
 56 |             stack.pop()
 57 |             prev_level -= 1
 58 |         new_dict = {}
 59 |         stack[-1][title] = new_dict
 60 |         stack.append(new_dict)
 61 |         prev_level = level
 62 |     return dict2obj(result)
 63 | 
 64 | 
 65 | if __name__=='__main__':
 66 |     md_content = """
 67 | # User
 68 | 
 69 | This is the User section.
 70 | 
 71 | ## Tokens
 72 | 
 73 | Details about tokens.
 74 | 
 75 | ### Value
 76 | 
 77 | The value of tokens.
 78 | 
 79 | Some more details.
 80 | 
 81 | ## Settings
 82 | 
 83 | User settings information.
 84 | 
 85 | # Admin
 86 | 
 87 | Admin section.
 88 | 
 89 | ## Users
 90 | 
 91 | Admin users management.
 92 | """
 93 | 
 94 |     result = markdown_to_dict(md_content)
 95 |     #for key, value in result.items(): print(f'Key: {key}\nValue:\n{value}\n{"-"*40}')
 96 | 
 97 |     def test_empty_content():
 98 |         md_content = "# Empty Heading"
 99 |         result = markdown_to_dict(md_content)
100 |         assert result['Empty Heading'] == '# Empty Heading'
101 | 
102 |     def test_special_characters():
103 |         md_content = "# Heading *With* Special _Characters_!\nContent under heading."
104 |         result = markdown_to_dict(md_content)
105 |         assert 'Heading With Special Characters' in result
106 |         assert result['Heading With Special Characters'] == '# Heading *With* Special _Characters_!\nContent under heading.'
107 | 
108 |     def test_duplicate_headings():
109 |         md_content = "# Duplicate\n## Duplicate\n### Duplicate\nContent under duplicate headings."
110 |         result = markdown_to_dict(md_content)
111 |         assert 'Duplicate' in result
112 |         assert 'Duplicate.Duplicate' in result
113 |         assert 'Duplicate.Duplicate.Duplicate' in result
114 |         assert result['Duplicate.Duplicate.Duplicate'] == '### Duplicate\nContent under duplicate headings.'
115 | 
116 |     def test_no_content():
117 |         md_content = "# No Content Heading\n## Subheading"
118 |         result = markdown_to_dict(md_content)
119 |         assert result['No Content Heading'] == '# No Content Heading\n## Subheading'
120 |         assert result['No Content Heading.Subheading'] == '## Subheading'
121 | 
122 |     def test_different_levels():
123 |         md_content = "### Level 3 Heading\nContent at level 3.\n# Level 1 Heading\nContent at level 1."
124 |         result = markdown_to_dict(md_content)
125 |         assert 'Level 3 Heading' in result
126 |         assert 'Level 1 Heading' in result
127 |         assert result['Level 3 Heading'] == '### Level 3 Heading\nContent at level 3.'
128 |         assert result['Level 1 Heading'] == '# Level 1 Heading\nContent at level 1.'
129 | 
130 |     def test_parent_includes_subheadings():
131 |         md_content = "# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content."
132 |         result = markdown_to_dict(md_content)
133 |         assert result['Parent'] == '# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content.'
134 |         assert result['Parent.Child'] == '## Child\nChild content.\n### Grandchild\nGrandchild content.'
135 |         assert result['Parent.Child.Grandchild'] == '### Grandchild\nGrandchild content.'
136 | 
137 |     def test_multiple_level2_siblings():
138 |         md_content = "##Sib 1\n##Sib 2\n##Sib 3\n##Sib 4\n##Sib 5'"
139 |         result = markdown_to_dict(md_content)
140 |         assert 'Sib 1' in result
141 |         assert 'Sib 2' in result
142 |         assert 'Sib 3' in result
143 |         assert 'Sib 4' in result
144 |         assert 'Sib 5' in result
145 |         
146 |     def test_code_chunks_escaped():
147 |         md_content = "# Parent\nParent content.\n## Child\nChild content.\n```python\n# Code comment\nprint('Hello, world!')\n```"
148 |         result = markdown_to_dict(md_content)
149 |         assert 'Code comment' not in result
150 |         assert "# Code comment" in result['Parent.Child']
151 | 
152 |     test_empty_content()
153 |     test_special_characters()
154 |     test_duplicate_headings()
155 |     test_no_content()
156 |     test_different_levels()
157 |     test_parent_includes_subheadings()
158 |     test_multiple_level2_siblings()
159 |     test_code_chunks_escaped()
160 |     print('tests passed')
161 | 
162 |     def test_nested_headings():    
163 |         md_content = "# Parent\nParent content.\n## Child\nChild content.\n### Grandchild\nGrandchild content."
164 |         result = create_heading_dict(md_content)
165 |         assert 'Child' in result['Parent']
166 |         assert 'Grandchild' in result['Parent']['Child']
167 | 
168 |     def test_code_chunks_escaped():
169 |         md_content = "# Parent\nParent content.\n## Child\nChild content.\n```python\n# Code comment\nprint('Hello, world!')\n```"
170 |         result = create_heading_dict(md_content)
171 |         assert 'Code comment' not in result
172 |     
173 |     test_nested_headings()
174 |     test_code_chunks_escaped()
175 |     print('tests passed')


--------------------------------------------------------------------------------
/toolslm/shell.py:
--------------------------------------------------------------------------------
 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../02_shell.ipynb.
 2 | 
 3 | # %% auto 0
 4 | __all__ = ['get_shell']
 5 | 
 6 | # %% ../02_shell.ipynb 2
 7 | import ast, time, signal, traceback
 8 | from fastcore.utils import *
 9 | 
10 | # %% ../02_shell.ipynb 4
11 | from IPython.terminal.interactiveshell import TerminalInteractiveShell
12 | from IPython.utils.capture import capture_output
13 | 
14 | # %% ../02_shell.ipynb 7
15 | TerminalInteractiveShell.orig_run = TerminalInteractiveShell.run_cell
16 | 
17 | # %% ../02_shell.ipynb 8
18 | @patch
19 | def run_cell(self:TerminalInteractiveShell, cell, timeout=None):
20 |     "Wrapper for original `run_cell` which adds timeout and output capture"
21 |     if timeout:
22 |         def handler(*args): raise TimeoutError()
23 |         signal.signal(signal.SIGALRM, handler)
24 |         signal.alarm(timeout)
25 |     try:
26 |         with capture_output() as io: result = self.orig_run(cell)
27 |         result.stdout = io.stdout
28 |         return result
29 |     except TimeoutException as e:
30 |         result = self.ExecutionResult(error_before_exec=None, error_in_exec=e)
31 |     finally:
32 |         if timeout: signal.alarm(0)
33 | 
34 | # %% ../02_shell.ipynb 9
35 | def get_shell()->TerminalInteractiveShell:
36 |     "Get a `TerminalInteractiveShell` with minimal functionality"
37 |     sh = TerminalInteractiveShell()
38 |     sh.logger.log_output = sh.history_manager.enabled = False
39 |     dh = sh.displayhook
40 |     dh.finish_displayhook = dh.write_output_prompt = dh.start_displayhook = lambda: None
41 |     dh.write_format_data = lambda format_dict, md_dict=None: None
42 |     sh.logstart = sh.automagic = sh.autoindent = False
43 |     sh.autocall = 0
44 |     sh.system = lambda cmd: None
45 |     return sh
46 | 


--------------------------------------------------------------------------------
/toolslm/xml.py:
--------------------------------------------------------------------------------
  1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../00_xml.ipynb.
  2 | 
  3 | # %% auto 0
  4 | __all__ = ['doctype', 'json_to_xml', 'mk_doctype', 'mk_doc', 'docs_xml', 'files2ctx', 'folder2ctx', 'folder2ctx_cli']
  5 | 
  6 | # %% ../00_xml.ipynb 3
  7 | import hashlib,xml.etree.ElementTree as ET
  8 | from collections import namedtuple
  9 | 
 10 | from fastcore.utils import *
 11 | from fastcore.meta import delegates
 12 | from fastcore.xtras import hl_md
 13 | from fastcore.xml import to_xml, Document, Documents, Document_content, Src
 14 | from fastcore.script import call_parse
 15 | try: from IPython import display
 16 | except: display=None
 17 | 
 18 | # %% ../00_xml.ipynb 4
 19 | def json_to_xml(d:dict, # JSON dictionary to convert
 20 |                 rnm:str # Root name
 21 |                )->str:
 22 |     "Convert `d` to XML."
 23 |     root = ET.Element(rnm)
 24 |     def build_xml(data, parent):
 25 |         if isinstance(data, dict):
 26 |             for key, value in data.items(): build_xml(value, ET.SubElement(parent, key))
 27 |         elif isinstance(data, list):
 28 |             for item in data: build_xml(item, ET.SubElement(parent, 'item'))
 29 |         else: parent.text = str(data)
 30 |     build_xml(d, root)
 31 |     ET.indent(root)
 32 |     return ET.tostring(root, encoding='unicode')
 33 | 
 34 | # %% ../00_xml.ipynb 9
 35 | doctype = namedtuple('doctype', ['src', 'content'])
 36 | 
 37 | # %% ../00_xml.ipynb 11
 38 | def _add_nls(s):
 39 |     "Add newlines to start and end of `s` if missing"
 40 |     if not s: return s
 41 |     if s[ 0]!='\n': s = '\n'+s
 42 |     if s[-1]!='\n': s = s+'\n'
 43 |     return s
 44 | 
 45 | # %% ../00_xml.ipynb 16
 46 | def mk_doctype(content:str,  # The document content
 47 |            src:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided
 48 |           ) -> namedtuple:
 49 |     "Create a `doctype` named tuple"
 50 |     if src is None: src = hashlib.md5(content.encode()).hexdigest()[:8]
 51 |     return doctype(_add_nls(str(src).strip()), _add_nls(content.strip()))
 52 | 
 53 | # %% ../00_xml.ipynb 19
 54 | def mk_doc(index:int,  # The document index
 55 |            content:str,  # The document content
 56 |            src:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided
 57 |            **kwargs
 58 |           ) -> tuple:
 59 |     "Create an `ft` format tuple for a single doc in Anthropic's recommended format"
 60 |     dt = mk_doctype(content, src)
 61 |     content = Document_content(NotStr(dt.content))
 62 |     src = Src(NotStr(dt.src))
 63 |     return Document(src, content, index=index, **kwargs)
 64 | 
 65 | # %% ../00_xml.ipynb 22
 66 | def docs_xml(docs:list[str],  # The content of each document
 67 |              srcs:Optional[list]=None,  # URLs, filenames, etc; each one defaults to `md5(content)` if not provided
 68 |              prefix:bool=True, # Include Anthropic's suggested prose intro?
 69 |              details:Optional[list]=None # Optional list of dicts with additional attrs for each doc
 70 |             )->str:
 71 |     "Create an XML string containing `docs` in Anthropic's recommended format"
 72 |     pre = 'Here are some documents for you to reference for your task:\n\n' if prefix else ''
 73 |     if srcs is None: srcs = [None]*len(docs)
 74 |     if details is None: details = [{}]*len(docs)
 75 |     docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,srcs,details)))
 76 |     return pre + to_xml(Documents(docs))
 77 | 
 78 | # %% ../00_xml.ipynb 29
 79 | def files2ctx(
 80 |     fnames:list[Union[str,Path]], # List of file names to add to context
 81 |     prefix:bool=True # Include Anthropic's suggested prose intro?
 82 | )->str: # XML for LM context
 83 |     fnames = [Path(o) for o in fnames]
 84 |     contents = [o.read_text() for o in fnames]
 85 |     return docs_xml(contents, fnames, prefix=prefix)
 86 | 
 87 | # %% ../00_xml.ipynb 32
 88 | @delegates(globtastic)
 89 | def folder2ctx(
 90 |     folder:Union[str,Path], # Folder name containing files to add to context
 91 |     prefix:bool=True, # Include Anthropic's suggested prose intro?
 92 |     **kwargs # Passed to `globtastic`
 93 | )->str: # XML for Claude context
 94 |     fnames = globtastic(folder, **kwargs)
 95 |     return files2ctx(fnames, prefix=prefix)
 96 | 
 97 | # %% ../00_xml.ipynb 34
 98 | @call_parse
 99 | @delegates(folder2ctx)
100 | def folder2ctx_cli(
101 |     folder:str, # Folder name containing files to add to context
102 |     **kwargs # Passed to `folder2ctx`
103 | )->str: # XML for Claude context
104 |     print(folder2ctx(folder, **kwargs))
105 | 


--------------------------------------------------------------------------------