├── docs
    ├── posts
    │   ├── .gitignore
    │   ├── llms-and-data-pt0
    │   │   ├── .gitignore
    │   │   └── index.qmd
    │   ├── llms-and-data-pt1
    │   │   ├── .gitignore
    │   │   └── index.qmd
    │   ├── llms-and-data-pt2
    │   │   ├── .gitignore
    │   │   └── index.qmd
    │   ├── llms-and-data-pt3
    │   │   ├── .gitignore
    │   │   ├── index.qmd
    │   │   └── index.ipynb
    │   ├── llms-and-data-pt4
    │   │   ├── .gitignore
    │   │   ├── images
    │   │   │   └── figure1.png
    │   │   └── index.qmd
    │   ├── llms-and-data-pt5
    │   │   ├── .gitignore
    │   │   └── index.qmd
    │   ├── llms-and-data-pt6
    │   │   ├── .gitignore
    │   │   └── index.qmd
    │   └── _metadata.yml
    ├── styles.css
    ├── how-to
    │   ├── _metadata.yml
    │   └── input-output
    │   │   └── basics.qmd
    ├── release_notes.md
    ├── .gitignore
    ├── images
    │   ├── bad.png
    │   ├── ibas.png
    │   ├── logo.png
    │   ├── bbrans.png
    │   ├── brians.png
    │   ├── ibribans.png
    │   └── thumbnail.png
    ├── concepts
    │   ├── archive
    │   │   ├── platforms.qmd
    │   │   ├── ops.qmd
    │   │   ├── user-interfaces.qmd
    │   │   └── llms.qmd
    │   ├── bots.qmd
    │   ├── tasks.qmd
    │   ├── flows.qmd
    │   ├── messages.qmd
    │   └── attachments.qmd
    ├── contribute
    │   └── contributing.qmd
    ├── demo.qmd
    ├── posts.qmd
    ├── _freeze
    │   ├── concepts
    │   │   ├── bots
    │   │   │   └── execute-results
    │   │   │   │   └── html.json
    │   │   ├── tasks
    │   │   │   └── execute-results
    │   │   │   │   └── html.json
    │   │   ├── flows
    │   │   │   └── execute-results
    │   │   │   │   └── html.json
    │   │   ├── messages
    │   │   │   └── execute-results
    │   │   │   │   └── html.json
    │   │   └── attachments
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── install
    │   │   └── execute-results
    │   │   │   └── html.json
    │   ├── posts
    │   │   ├── llms-and-data-pt4
    │   │   │   └── index
    │   │   │   │   └── execute-results
    │   │   │   │       └── html.json
    │   │   └── llms-and-data-pt2
    │   │   │   └── index
    │   │   │       └── execute-results
    │   │   │           └── html.json
    │   ├── site_libs
    │   │   ├── quarto-listing
    │   │   │   └── quarto-listing.js
    │   │   └── clipboard
    │   │   │   └── clipboard.min.js
    │   └── tutorials
    │   │   └── python
    │   │       └── execute-results
    │   │           └── html.json
    ├── install.qmd
    ├── index.qmd
    ├── why.qmd
    ├── tutorials
    │   ├── python.qmd
    │   └── cli.qmd
    └── _quarto.yml
├── presentation
    ├── _quarto.yml
    ├── images
    │   └── duckdb-nsql.png
    └── index.qmd
├── src
    └── ibis_birdbrain
    │   ├── __main__.py
    │   ├── __init__.py
    │   ├── logging
    │       └── __init__.py
    │   ├── commands
    │       ├── __init__.py
    │       ├── testing.py
    │       └── ipy.py
    │   ├── messages
    │       ├── email.py
    │       └── __init__.py
    │   ├── utils
    │       ├── messages.py
    │       ├── strings.py
    │       ├── web.py
    │       └── attachments.py
    │   ├── attachments
    │       ├── viz.py
    │       ├── text.py
    │       ├── data.py
    │       └── __init__.py
    │   ├── cli.py
    │   ├── tasks
    │       ├── __init__.py
    │       └── sql.py
    │   ├── flows
    │       ├── __init__.py
    │       └── data.py
    │   ├── strings.py
    │   ├── app.py
    │   └── bot.py
├── dev-requirements.txt
├── README.md
├── .github
    └── workflows
    │   └── docs.yaml
├── pyproject.toml
├── justfile
├── .gitignore
└── LICENSE


/docs/posts/.gitignore:
--------------------------------------------------------------------------------
1 | *.ipynb
2 | 


--------------------------------------------------------------------------------
/docs/styles.css:
--------------------------------------------------------------------------------
1 | /* css styles */
2 | 


--------------------------------------------------------------------------------
/docs/how-to/_metadata.yml:
--------------------------------------------------------------------------------
1 | code-annotations: hover
2 | 


--------------------------------------------------------------------------------
/docs/release_notes.md:
--------------------------------------------------------------------------------
1 | # Release notes
2 | 
3 | TBD
4 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt0/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | *.ddb*
3 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt1/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | *.ddb*
3 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt2/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | *.ddb*
3 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt3/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | *.ddb*
3 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt4/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | *.ddb*
3 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt5/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | *.ddb*
3 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt6/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | *.ddb*
3 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _site
2 | objects.json
3 | reference/
4 | /.quarto/
5 | 


--------------------------------------------------------------------------------
/presentation/_quarto.yml:
--------------------------------------------------------------------------------
1 | project:
2 |   type: website
3 |   output-dir: _output


--------------------------------------------------------------------------------
/docs/images/bad.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ibis-project/ibis-birdbrain/HEAD/docs/images/bad.png


--------------------------------------------------------------------------------
/docs/images/ibas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ibis-project/ibis-birdbrain/HEAD/docs/images/ibas.png


--------------------------------------------------------------------------------
/docs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ibis-project/ibis-birdbrain/HEAD/docs/images/logo.png


--------------------------------------------------------------------------------
/docs/how-to/input-output/basics.qmd:
--------------------------------------------------------------------------------
1 | # Basic input/output
2 | 
3 | You can...
4 | 
5 | ## Overview
6 | 
7 | 


--------------------------------------------------------------------------------
/docs/images/bbrans.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ibis-project/ibis-birdbrain/HEAD/docs/images/bbrans.png


--------------------------------------------------------------------------------
/docs/images/brians.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ibis-project/ibis-birdbrain/HEAD/docs/images/brians.png


--------------------------------------------------------------------------------
/src/ibis_birdbrain/__main__.py:
--------------------------------------------------------------------------------
1 | from ibis_birdbrain.cli import app
2 | 
3 | app(prog_name="birdbrain")
4 | 


--------------------------------------------------------------------------------
/docs/images/ibribans.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ibis-project/ibis-birdbrain/HEAD/docs/images/ibribans.png


--------------------------------------------------------------------------------
/docs/images/thumbnail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ibis-project/ibis-birdbrain/HEAD/docs/images/thumbnail.png


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
 1 | # python
 2 | ruff
 3 | build
 4 | twine
 5 | nbclient
 6 | ipykernel
 7 | 
 8 | # docs
 9 | quartodoc
10 | 


--------------------------------------------------------------------------------
/presentation/images/duckdb-nsql.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ibis-project/ibis-birdbrain/HEAD/presentation/images/duckdb-nsql.png


--------------------------------------------------------------------------------
/src/ibis_birdbrain/__init__.py:
--------------------------------------------------------------------------------
1 | # imports
2 | 
3 | # exports
4 | from ibis_birdbrain.bot import Bot
5 | 
6 | __all__ = ["Bot"]
7 | 


--------------------------------------------------------------------------------
/docs/concepts/archive/platforms.qmd:
--------------------------------------------------------------------------------
1 | # Data and AI platforms
2 | 
3 | Data and AI platforms are...
4 | 
5 | ## Data platforms
6 | 
7 | ## AI platforms
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt4/images/figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ibis-project/ibis-birdbrain/HEAD/docs/posts/llms-and-data-pt4/images/figure1.png


--------------------------------------------------------------------------------
/docs/contribute/contributing.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | hide:
 3 |   - toc
 4 | ---
 5 | 
 6 | # Contributing
 7 | 
 8 | ## Required dependencies
 9 | 
10 | To contribute...
11 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/logging/__init__.py:
--------------------------------------------------------------------------------
1 | # imports
2 | import logging as log
3 | 
4 | # config
5 | log.basicConfig(level=log.INFO)
6 | 
7 | # exports
8 | __all__ = ["log"]
9 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/commands/__init__.py:
--------------------------------------------------------------------------------
1 | # imports
2 | 
3 | # exports
4 | from ibis_birdbrain.commands.ipy import ipy_run
5 | from ibis_birdbrain.commands.testing import testing_run
6 | 
7 | __all__ = ["ipy_run", "testing_run"]
8 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/commands/testing.py:
--------------------------------------------------------------------------------
1 | def testing_run():
2 |     from rich.console import Console
3 | 
4 |     console = Console()
5 |     console.print(f"testing: ", style="bold violet", end="")
6 |     console.print(f"done...")
7 | 


--------------------------------------------------------------------------------
/docs/concepts/bots.qmd:
--------------------------------------------------------------------------------
 1 | # Bots
 2 | 
 3 | Ibis Birdbrain implements a `Bot` calss that can be used to instantiate one or more bots that automate various tasks.
 4 | 
 5 | ## Usage
 6 | 
 7 | ```{python}
 8 | from ibis_birdbrain import Bot
 9 | Bot
10 | ```
11 | 


--------------------------------------------------------------------------------
/docs/demo.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "demo"
 3 | format: 
 4 |   html:
 5 |     page-layout: full
 6 | ---
 7 | 
 8 | ```{=html}
 9 | <iframe class="streamlit-app-inner" width="100%" height="100%" src="https://squawk-bot-for-gil.streamlit.app/?embedded=true"></iframe>
10 | ```
11 | 
12 | 


--------------------------------------------------------------------------------
/docs/concepts/archive/ops.qmd:
--------------------------------------------------------------------------------
 1 | # LLMOps, MLOps, DevOps
 2 | 
 3 | The ML landscape is plagued by "the toy problem" -- building something cool is easy, putting something in production is hard.
 4 | 
 5 | ## The original MLOps paper
 6 | 
 7 | 
 8 | ## Learnings from industry
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/posts.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Posts"
 3 | listing:
 4 |   contents: posts
 5 |   sort: "date desc"
 6 |   image-placeholder: images/logo.png
 7 |   #type: default
 8 |   type: grid
 9 |   categories: true
10 |   sort-ui: true
11 |   filter-ui: true
12 |   feed: true
13 | page-layout: full
14 | ---
15 | 


--------------------------------------------------------------------------------
/docs/concepts/tasks.qmd:
--------------------------------------------------------------------------------
 1 | # Tasks
 2 | 
 3 | Ibis Birdbrain's [`Flow`](./flow.qmd) executes one or more `Tasks` to accomplish its goal. A `Task` is a single unit of work that takes a [`Message`](./message.qmd) as input and returns a `Message` as output.
 4 | 
 5 | ## Usage
 6 | 
 7 | ```{python}
 8 | from ibis_birdbrain.tasks import Task, Tasks
 9 | 
10 | task = Task()
11 | task
12 | ```
13 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/commands/ipy.py:
--------------------------------------------------------------------------------
 1 | def ipy_run(interactive=False):
 2 |     # imports
 3 |     import ibis
 4 |     import IPython
 5 | 
 6 |     from ibis_birdbrain import Bot
 7 | 
 8 |     # config
 9 |     ibis.options.interactive = True
10 |     ibis.options.repr.interactive.max_rows = 20
11 |     ibis.options.repr.interactive.max_columns = None
12 | 
13 |     # start IPython
14 |     IPython.embed(colors="neutral")
15 | 


--------------------------------------------------------------------------------
/docs/concepts/flows.qmd:
--------------------------------------------------------------------------------
 1 | # Flows
 2 | 
 3 | Ibis Birdbrain's [`Bot`](./bot.qmd) chooses a `Flow` to execute based on [`Messages`](./messages.qmd).
 4 | 
 5 | A Flow takes Messages as input and returns Messages as output. The details of a given Flow are specific to itself, running a series of [`Tasks`](./tasks.qmd) to accomplish its goal.
 6 | 
 7 | ## Usage
 8 | 
 9 | ```{python}
10 | from ibis_birdbrain.flows import Flow, Flows
11 | 
12 | flow = Flow()
13 | flow
14 | ```
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Ibis Birdbrain
 2 | 
 3 | The portable Python ML-powered data bot for your data project. Powered by
 4 | [Ibis](https://ibis-project.org), your data platform,
 5 | [Marvin](https://github.com/prefectHQ/marvin), and your ML platform.
 6 | 
 7 | > [!WARNING]
 8 | > Highly experimental. Effectively abandoned. Made for cool demos. If anyone's interested in forking this or understanding the learnings, reach out!
 9 | > 
10 | > May eventually be under re-construction by slow humans.
11 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/messages/email.py:
--------------------------------------------------------------------------------
 1 | # imports
 2 | from ibis_birdbrain.messages import Message
 3 | 
 4 | 
 5 | # classes
 6 | class Email(Message):
 7 |     """An email."""
 8 | 
 9 |     def __str__(self):
10 |         return f"""To: {self.to_address}
11 | From: {self.from_address}
12 | Subject: {self.subject}
13 | Sent at: {self.created_at}
14 | Message: {self.id}
15 | 
16 | {self.body}
17 | 
18 | Attachments:
19 | 
20 | {self.attachments}\n"""
21 | 
22 |     def __repr__(self):
23 |         return str(self)
24 | 


--------------------------------------------------------------------------------
/docs/posts/_metadata.yml:
--------------------------------------------------------------------------------
 1 | # options specified here will apply to all posts in this folder
 2 | 
 3 | # freeze computational output
 4 | # (see https://quarto.org/docs/projects/code-execution.html#freeze)
 5 | freeze: auto
 6 | 
 7 | # Enable banner style title blocks
 8 | title-block-banner: true
 9 | 
10 | execute: 
11 |   warning: false
12 | 
13 | # code annoations
14 | code-annotations: hover
15 | 
16 | # comments # TODO: enable
17 | # comments:
18 | #   giscus:
19 | #     repo: ibis-project/ibis
20 | #     category: Q&A
21 | #     reactions-enabled: false
22 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/utils/messages.py:
--------------------------------------------------------------------------------
 1 | # imports
 2 | from typing import Any
 3 | from ibis_birdbrain.messages import Message, Email
 4 | 
 5 | from ibis_birdbrain.utils.attachments import to_attachment
 6 | 
 7 | 
 8 | # functions
 9 | def to_message(text: str, stuff: list[Any] = []) -> Message:
10 |     """Convert text and stuff into a message with attachments."""
11 |     attachments = []
12 |     for thing in stuff:
13 |         attachment = to_attachment(thing)
14 |         if attachment is not None:
15 |             attachments.append(attachment)
16 | 
17 |     return Email(body=text, attachments=attachments)
18 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/attachments/viz.py:
--------------------------------------------------------------------------------
 1 | # imports
 2 | from plotly.graph_objs import Figure
 3 | 
 4 | from ibis_birdbrain.attachments import Attachment
 5 | 
 6 | 
 7 | # classes
 8 | class ChartAttachment(Attachment):
 9 |     """A chart attachment."""
10 | 
11 |     content: Figure
12 | 
13 |     def __init__(self, *args, **kwargs):
14 |         super().__init__(*args, **kwargs)
15 | 
16 |     def encode(self):
17 |         ...
18 | 
19 |     def decode(self):
20 |         ...
21 | 
22 |     def __str__(self):
23 |         return (
24 |             super().__str__()
25 |             + f"""
26 |     **chart**:\n{self.content}"""
27 |         )
28 | 


--------------------------------------------------------------------------------
/docs/_freeze/concepts/bots/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "bd2906497289e9de4361517b57031090",
 3 |   "result": {
 4 |     "engine": "jupyter",
 5 |     "markdown": "---\ntitle: Bots\n---\n\n\n\nIbis Birdbrain implements a `Bot` calss that can be used to instantiate one or more bots that automate various tasks.\n\n## Usage\n\n\n::: {#a5d8d0d4 .cell execution_count=1}\n``` {.python .cell-code}\nfrom ibis_birdbrain import Bot\nBot\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```\nibis_birdbrain.bot.Bot\n```\n:::\n:::\n\n\n",
 6 |     "supporting": [
 7 |       "bots_files"
 8 |     ],
 9 |     "filters": [],
10 |     "includes": {}
11 |   }
12 | }


--------------------------------------------------------------------------------
/docs/concepts/messages.qmd:
--------------------------------------------------------------------------------
 1 | # Messages
 2 | 
 3 | Ibis Birdbrain communicates with the user, itself, and (eventually) other bots through `Messages`. A `Message` is a simple wrapper around text with metadata and optional [`Attachments`](./attachments.qmd).
 4 | 
 5 | 
 6 | ## Usage
 7 | 
 8 | ```{python}
 9 | from ibis_birdbrain.messages import Message, Messages, Email
10 | 
11 | m1 = Message("Hello, world!")
12 | m1
13 | ```
14 | 
15 | ## Emails
16 | 
17 | Currently, the only implementation of `Message` that is viewable as a proper string is `Email`.
18 | 
19 | ```{python}
20 | e1 = Email("Hello")
21 | e2 = Email(", world!")
22 | 
23 | messages = Messages([e1, e2])
24 | messages
25 | ```
26 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/cli.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Ibis Birdbrain CLI.
 3 | """
 4 | 
 5 | # imports
 6 | import typer
 7 | 
 8 | from typing_extensions import Annotated, Optional
 9 | 
10 | from ibis_birdbrain.commands import ipy_run, testing_run
11 | 
12 | # typer config
13 | app = typer.Typer(no_args_is_help=True)
14 | 
15 | 
16 | # subcommands
17 | @app.command()
18 | def ipy():
19 |     """
20 |     ipy
21 |     """
22 |     ipy_run()
23 | 
24 | 
25 | @app.command()
26 | def test():
27 |     """
28 |     test
29 |     """
30 |     testing_run()
31 | 
32 | 
33 | # main
34 | @app.callback()
35 | def cli():
36 |     return
37 | 
38 | 
39 | ## main
40 | if __name__ == "__main__":
41 |     typer.run(cli)
42 | 


--------------------------------------------------------------------------------
/docs/_freeze/concepts/tasks/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "ea522272a8d0a666dacb53873801a1aa",
 3 |   "result": {
 4 |     "engine": "jupyter",
 5 |     "markdown": "---\ntitle: Tasks\n---\n\n\n\nIbis Birdbrain's [`Flow`](./flow.qmd) executes one or more `Tasks` to accomplish its goal. A `Task` is a single unit of work that takes a [`Message`](./message.qmd) as input and returns a `Message` as output.\n\n## Usage\n\n\n::: {#ef7b5518 .cell execution_count=1}\n``` {.python .cell-code}\nfrom ibis_birdbrain.tasks import Task, Tasks\n\ntask = Task()\ntask\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```\n<ibis_birdbrain.tasks.Task at 0x14264fa90>\n```\n:::\n:::\n\n\n",
 6 |     "supporting": [
 7 |       "tasks_files"
 8 |     ],
 9 |     "filters": [],
10 |     "includes": {}
11 |   }
12 | }


--------------------------------------------------------------------------------
/docs/_freeze/concepts/flows/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "5ca217bfa5ee488df2c050375105b2a1",
 3 |   "result": {
 4 |     "engine": "jupyter",
 5 |     "markdown": "---\ntitle: Flows\n---\n\n\n\nIbis Birdbrain's [`Bot`](./bot.qmd) chooses a `Flow` to execute based on [`Messages`](./messages.qmd).\n\nA Flow takes Messages as input and returns Messages as output. The details of a given Flow are specific to itself, running a series of [`Tasks`](./tasks.qmd) to accomplish its goal.\n\n## Usage\n\n\n::: {#254b2753 .cell execution_count=1}\n``` {.python .cell-code}\nfrom ibis_birdbrain.flows import Flow, Flows\n\nflow = Flow()\nflow\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```\n<ibis_birdbrain.flows.Flow at 0x1090f15d0>\n```\n:::\n:::\n\n\n",
 6 |     "supporting": [
 7 |       "flows_files"
 8 |     ],
 9 |     "filters": [],
10 |     "includes": {}
11 |   }
12 | }


--------------------------------------------------------------------------------
/docs/install.qmd:
--------------------------------------------------------------------------------
 1 | # Installation and setup
 2 | 
 3 | This page describes how to install and setup Ibis Birdbrain.
 4 | 
 5 | ## Install from PyPI
 6 | 
 7 | ```bash
 8 | pip install ibis-birdbrain
 9 | ```
10 | 
11 | ## Data platform setup
12 | 
13 | Create an Ibis connection:
14 | 
15 | :::{.callout-warning}
16 | Only DuckDB is installed by default. For now, you need to install other backends
17 | manually with Ibis.
18 | :::
19 | 
20 | ```{python}
21 | import ibis
22 | 
23 | backend = "duckdb"
24 | backend_uri = ""
25 | 
26 | con = ibis.connect(f"{backend}://{backend_uri}")
27 | con
28 | ```
29 | 
30 | ## AI platform setup
31 | 
32 | See [Marvin documentation for
33 | setup](https://www.askmarvin.ai/docs/configuration/settings/).
34 | 
35 | :::{.callout-warning}
36 | Only OpenAI and Azure OpenAI are supported for now.
37 | :::
38 | 
39 | ## Next steps
40 | 
41 | [Learn how get started with Ibis Birdbrain's CLI](tutorials/cli.qmd).
42 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/utils/strings.py:
--------------------------------------------------------------------------------
 1 | # imports
 2 | 
 3 | 
 4 | # functions
 5 | def estimate_tokens(s: str) -> int:
 6 |     """Estimates the number of tokens in a string."""
 7 | 
 8 |     return len(s) // 4
 9 | 
10 | 
11 | def shorten_str(s: str, max_len: int = 27) -> str:
12 |     """Converts a string to a display string."""
13 | 
14 |     if len(s) > max_len:
15 |         return f"{s[:max_len]}..."
16 |     else:
17 |         return s
18 | 
19 | 
20 | def str_to_list_of_str(s: str, max_chunk_len: int = 1000, sep: str = "\n") -> list[str]:
21 |     """Splits a string into a list of strings."""
22 | 
23 |     result = []
24 | 
25 |     # TODO: better string chunking algorithm
26 |     # split the string into chunks
27 |     chunks = [s[i : i + max_chunk_len] for i in range(0, len(s), max_chunk_len)]
28 | 
29 |     # split the chunks into lines
30 |     for chunk in chunks:
31 |         result.extend(chunk.split(sep))
32 | 
33 |     return result
34 | 


--------------------------------------------------------------------------------
/docs/index.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Ibis Birdbrain"
 3 | description: "the portable Python LM-powered data bot"
 4 | repo-actions: false
 5 | code-annotations: hover
 6 | format:
 7 |   html:
 8 |     toc: false
 9 | about:
10 |   id: about
11 |   template: jolla
12 |   image: images/logo.png
13 |   links:
14 |     - icon: info-circle
15 |       text: Why Ibis Birdbrain?
16 |       href: /why/
17 |     - icon: download
18 |       text: Installation
19 |       href: /install/
20 |     - icon: book
21 |       text: "Tutorial: getting started"
22 |       href: /tutorials/python/
23 |     - icon: github
24 |       text: GitHub
25 |       href: https://github.com/ibis-project/ibis-birdbrain
26 |     - icon: slack
27 |       text: Chat
28 |       href: https://ibis-project.zulipchat.com
29 |     # - icon: rss
30 |     #   text: RSS
31 |     #   href: https://ibis-project.github.io/ibis-birdbrain/posts.xml
32 | ---
33 | 
34 | ::: {#about}
35 | :::
36 | 
37 | ::: {.callout-warning}
38 | Ibis Birdbrain is highly experimental and currently under construction by slow humans. Please check back soon!
39 | :::
40 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/utils/web.py:
--------------------------------------------------------------------------------
 1 | # imports
 2 | import requests
 3 | import webbrowser
 4 | 
 5 | 
 6 | from itertools import islice
 7 | from html2text import html2text
 8 | from duckduckgo_search import DDGS
 9 | 
10 | 
11 | # functions
12 | def open_browser(url: str) -> str:
13 |     """Opens the URL in a web browser."""
14 |     try:
15 |         webbrowser.open(url.strip("/"))
16 |     except Exception as e:
17 |         return str(e)
18 | 
19 |     return f"Opened {url} in web browser."
20 | 
21 | 
22 | def search_internet(query: str, n_results: int = 10) -> list[dict]:
23 |     """Searches the internet for n results."""
24 |     ddgs = DDGS()
25 |     return [r for r in islice(ddgs.text(query, backend="lite"), n_results)]
26 | 
27 | 
28 | def webpage_to_str(url: str = "https://ibis-project.org") -> str:
29 |     """Reads a webpage link into a string."""
30 |     response = requests.get(url)
31 |     return (
32 |         html2text(response.text)
33 |         # .replace("\n", " ")
34 |         # .replace("\r", " ")
35 |         # .replace("\t", " ")
36 |         # .replace("  ", " ")
37 |     )
38 | 


--------------------------------------------------------------------------------
/docs/concepts/attachments.qmd:
--------------------------------------------------------------------------------
 1 | # Attachments
 2 | 
 3 | Ibis Birdbrain passes Python objects as `Attachments` to [`Messages`](./messages.qmd). This allows the user, itself, and (eventually) other bots to interact with data, code, and more.
 4 | 
 5 | 
 6 | ## Usage
 7 | 
 8 | ```{python}
 9 | from ibis_birdbrain.attachments import Attachment, Attachments
10 | 
11 | a1 = Attachment(content="Hello, world!")
12 | a1
13 | ```
14 | 
15 | ## TableAttachment
16 | 
17 | A `TableAttachment` contains an Ibis table:
18 | 
19 | ```{python}
20 | import ibis
21 | 
22 | from ibis_birdbrain.attachments import TableAttachment
23 | 
24 | ibis.options.interactive = True
25 | 
26 | t = ibis.examples.penguins.fetch()
27 | 
28 | a2 = TableAttachment(content=t)
29 | a2
30 | ```
31 | 
32 | Notice the name, description (schema), and preview are automatically populated.
33 | 
34 | ## CodeAttachment
35 | 
36 | A `CodeAttachment` contains code -- typically Python or SQL:
37 | 
38 | ```{python}
39 | from ibis_birdbrain.attachments import CodeAttachment
40 | 
41 | a3 = CodeAttachment(content="select 1 as id", language="sql")
42 | a3
43 | ```
44 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yaml:
--------------------------------------------------------------------------------
 1 | name: docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |     paths:
 7 |       - "docs/**"
 8 |       - ".github/workflows/docs.yaml"
 9 | 
10 | permissions:
11 |   contents: write
12 | 
13 | jobs:
14 |   build-docs:
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - uses: actions/checkout@v3
18 | 
19 |       - name: Install Python
20 |         uses: actions/setup-python@v4
21 |         with:
22 |           python-version: "3.11"
23 | 
24 |       - name: Install Quarto
25 |         uses: quarto-dev/quarto-actions/setup@v2
26 |         with:
27 |           version: "1.4.398"
28 | 
29 |       - name: Install ibis-birdbrain and dependencies
30 |         run: |
31 |           pip install -r dev-requirements.txt
32 |           pip install -e "."
33 | 
34 |       - name: Build Docs
35 |         run: |
36 |           pushd docs
37 |           quarto render
38 |           popd
39 | 
40 |       - name: Deploy
41 |         uses: peaceiris/actions-gh-pages@v3
42 |         if: github.ref == 'refs/heads/main'
43 |         with:
44 |           github_token: ${{ secrets.GITHUB_TOKEN }}
45 |           publish_dir: ./docs/_site
46 | 


--------------------------------------------------------------------------------
/docs/why.qmd:
--------------------------------------------------------------------------------
 1 | # Why Ibis Birdbrain?
 2 | 
 3 | [Ibis](https://ibis-project.org) is the portable Python dataframe library.
 4 | 
 5 | Ibis Birdbrain is the portable Python LM-powered data bot, built on Ibis with
 6 | support for all data platforms Ibis's 20+ backends support. Ibis Birdbrain is
 7 | also built on [Marvin](https://github.com/PrefectHQ/marvin) with support for all
 8 | AI platforms Marvin supports.
 9 | 
10 | ## Language models (LMs)
11 | 
12 | Langauge models have...
13 | 
14 | ## Standards
15 | 
16 | > > When things don't work as they should, it often means that standards are absent.
17 | > >
18 | > > \- [The International Organization for Standardization (ISO)](https://www.iso.org/standards.html)
19 | > 
20 | > \- [The Composable Codex](https://voltrondata.com/codex/standards-over-silos)
21 | 
22 | ## Composable data systems are MICE
23 | 
24 | [Composable data systems are MICE: modular, interoperable, customizable, and extensible](https://voltrondata.com/codex/standards-over-silos).
25 | 
26 | Ibis Birdbrain aims to bring these values to a data + AI framework that makes working with LLMs delightful and easy, for individual hobbiests or the more advanced technology organizations.
27 | 
28 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.hatch.metadata]
 2 | allow-direct-references = true
 3 | 
 4 | [build-system]
 5 | requires = ["hatchling"]
 6 | build-backend = "hatchling.build"
 7 | 
 8 | [project]
 9 | name = "ibis-birdbrain"
10 | version = "0.2.0"
11 | authors = [
12 |   { name="Cody", email="cody@dkdc.dev" },
13 | ]
14 | description = "the portable Python LM-powered data bot"
15 | readme = "README.md"
16 | requires-python = ">=3.11"
17 | classifiers = [
18 |     "Programming Language :: Python :: 3",
19 |     "License :: OSI Approved :: MIT License",
20 |     "Operating System :: OS Independent",
21 | ]
22 | dependencies = [
23 |   'ipython',
24 |   'python-dotenv',
25 |   'toml',
26 |   'httpx',
27 |   'Pillow',
28 |   'typer[all]',
29 |   'requests',
30 |   'html2text',
31 |   'duckduckgo-search',
32 |   'marvin>2,<3',
33 |   'ibis-framework[duckdb,examples]', # @ git+https://github.com/ibis-project/ibis',
34 |   'plotly',
35 |   'streamlit',
36 |   'sqlglot',
37 |   'levenshtein',
38 | ]
39 | 
40 | [project.urls]
41 | "Homepage" = "https://github.com/ibis-project/ibis-birdbrain"
42 | "Bug Tracker" = "https://github.com/ibis-project/ibis-birdbrain/issues"
43 | 
44 | [project.scripts]
45 | birdbrain = "ibis_birdbrain.cli:app"
46 | 


--------------------------------------------------------------------------------
/docs/_freeze/install/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "5d2f1ac330c63e71709868d483412e5f",
 3 |   "result": {
 4 |     "engine": "jupyter",
 5 |     "markdown": "---\ntitle: Installation and setup\n---\n\n\n\nThis page describes how to install and setup Ibis Birdbrain.\n\n## Install from PyPI\n\n```bash\npip install ibis-birdbrain\n```\n\n## Data platform setup\n\nCreate an Ibis connection:\n\n:::{.callout-warning}\nOnly DuckDB is installed by default. For now, you need to install other backends\nmanually with Ibis.\n:::\n\n\n::: {#9af055c5 .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\n\nbackend = \"duckdb\"\nbackend_uri = \"\"\n\ncon = ibis.connect(f\"{backend}://{backend_uri}\")\ncon\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```\n<ibis.backends.duckdb.Backend at 0x1487a2890>\n```\n:::\n:::\n\n\n## AI platform setup\n\nSee [Marvin documentation for\nsetup](https://www.askmarvin.ai/docs/configuration/settings/).\n\n:::{.callout-warning}\nOnly OpenAI and Azure OpenAI are supported for now.\n:::\n\n## Next steps\n\n[Learn how get started with Ibis Birdbrain's CLI](tutorials/cli.qmd).\n\n",
 6 |     "supporting": [
 7 |       "install_files"
 8 |     ],
 9 |     "filters": [],
10 |     "includes": {}
11 |   }
12 | }


--------------------------------------------------------------------------------
/docs/concepts/archive/user-interfaces.qmd:
--------------------------------------------------------------------------------
 1 | # User interfaces
 2 | 
 3 | A user interface (UI) in our context is the mechanism by which a human interacts with software.
 4 | 
 5 | ## Command-line interface
 6 | 
 7 | A command-line interface (CLI) is a UI for interacting with a computer program where the user (or client) issues commands to the program in the form of successive lines of text (command lines). CLIs are great for automating tasks and often user-friendly, with a short learning curve (if designed well).
 8 | 
 9 | CLIs are great for containing common data analysis tasks expressed in a single, configurable command. However, remembering the exact syntax of a command can be difficult, especially if the command is not used frequently.
10 | 
11 | ## Python
12 | 
13 | Python can be an interactive UI in a terminal, notebook, IDE, or other setup to serve as a user-interface for data analytics.
14 | 
15 | ## Language user interface
16 | 
17 | Ibis Birdbrain aims to implement a language user interface (LUI) for data analytics, providing additional AI assistance in both a CLI and Python interface. This enables users to interact with a bot through natural language that can perform tasks on their behalf.
18 | 
19 | :::{.callout-warning}
20 | This LUI paradigm is highly experimental and should be put in production (automated) with caution.
21 | :::
22 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/utils/attachments.py:
--------------------------------------------------------------------------------
 1 | # imports
 2 | from typing import Any
 3 | 
 4 | from plotly.graph_objs import Figure
 5 | 
 6 | from ibis.expr.types import Table
 7 | from ibis.backends.base import BaseBackend
 8 | 
 9 | from ibis_birdbrain.attachments import (
10 |     Attachment,
11 |     TextAttachment,
12 |     DataAttachment,
13 |     TableAttachment,
14 |     ChartAttachment,
15 |     WebpageAttachment,
16 | )
17 | 
18 | 
19 | # functions
20 | def to_attachment(thing: Any) -> Attachment | None:
21 |     """Converts a thing to an attachment."""
22 |     if isinstance(thing, Attachment):
23 |         return thing
24 |     elif isinstance(thing, str):
25 |         if thing.startswith("http"):
26 |             return WebpageAttachment(thing)
27 |         else:
28 |             return TextAttachment(thing)
29 |     elif isinstance(thing, BaseBackend):
30 |         return DataAttachment(thing)
31 |     elif isinstance(thing, Table):
32 |         return TableAttachment(thing)
33 |     elif isinstance(thing, Figure):
34 |         return ChartAttachment(thing)
35 | 
36 |     return None
37 | 
38 | 
39 | def to_attachments(things: list[Any]) -> list[Attachment]:
40 |     """Converts a list of things to a list of attachments."""
41 |     return [
42 |         to_attachment(thing) for thing in things if to_attachment(thing) is not None
43 |     ]
44 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | # imports
 2 | from ibis_birdbrain.messages import Message
 3 | 
 4 | 
 5 | # classes
 6 | class Task:
 7 |     """Ibis Birdbrain task."""
 8 | 
 9 |     name: str | None
10 |     description: str | None
11 | 
12 |     def __init__(
13 |         self,
14 |         name=None,
15 |         description=None,
16 |     ):
17 |         self.name = name
18 |         self.description = description
19 | 
20 |     def __call__(self, m: Message) -> Message:
21 |         raise NotImplementedError
22 | 
23 | 
24 | class Tasks:
25 |     """Ibis Birdbrain tasks."""
26 | 
27 |     tasks: dict[str, Task]
28 | 
29 |     def __init__(self, tasks: list[Task] = []) -> None:
30 |         """Initialize the flows."""
31 |         self.tasks = {t.name: t for t in tasks}
32 | 
33 |     def __getitem__(self, id: str | int) -> Task:
34 |         """Get a task by its name, index, or a text description."""
35 |         if id in self.tasks.keys():
36 |             return self.tasks[id]
37 |         elif id in range(len(self.tasks)):
38 |             return self.tasks[list(self.tasks.keys())[id]]
39 |         else:
40 |             # TODO: implement LM magic
41 |             raise KeyError
42 | 
43 |     def select_task(self, ms: Message, instructions: str) -> Task:
44 |         """Select a single task."""
45 |         raise NotImplementedError
46 | 
47 | 
48 | # exports
49 | __all__ = [
50 |     "Task",
51 |     "Tasks",
52 | ]
53 | 


--------------------------------------------------------------------------------
/docs/_freeze/concepts/messages/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "a1999ce28adcecc054b19196cb9fa5fb",
 3 |   "result": {
 4 |     "engine": "jupyter",
 5 |     "markdown": "---\ntitle: Messages\n---\n\n\n\nIbis Birdbrain communicates with the user, itself, and (eventually) other bots through `Messages`. A `Message` is a simple wrapper around text with metadata and optional [`Attachments`](./attachments.qmd).\n\n\n## Usage\n\n\n::: {#9c0146d7 .cell execution_count=1}\n``` {.python .cell-code}\nfrom ibis_birdbrain.messages import Message, Messages, Email\n\nm1 = Message(\"Hello, world!\")\nm1\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```\nMessage(5956d627-9038-4bb4-b827-1d9ed646c75e)\n```\n:::\n:::\n\n\n## Emails\n\nCurrently, the only implementation of `Message` that is viewable as a proper string is `Email`.\n\n::: {#2e43c663 .cell execution_count=2}\n``` {.python .cell-code}\ne1 = Email(\"Hello\")\ne2 = Email(\", world!\")\n\nmessages = Messages([e1, e2])\nmessages\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```\nTo: \nFrom: \nSubject: \nSent at: 2024-03-05 11:22:52.258182\nMessage: e855820a-eedd-4229-96d1-7ae525800995\n\nHello\n\nAttachments:\n\n\n---\nTo: \nFrom: \nSubject: \nSent at: 2024-03-05 11:22:52.258207\nMessage: 50a43477-12d3-4138-b5ff-38fd5b2b7704\n\n, world!\n\nAttachments:\n\n```\n:::\n:::\n\n\n",
 6 |     "supporting": [
 7 |       "messages_files"
 8 |     ],
 9 |     "filters": [],
10 |     "includes": {}
11 |   }
12 | }


--------------------------------------------------------------------------------
/src/ibis_birdbrain/flows/__init__.py:
--------------------------------------------------------------------------------
 1 | # imports
 2 | from ibis_birdbrain.tasks import Tasks
 3 | from ibis_birdbrain.logging import log
 4 | from ibis_birdbrain.messages import Messages
 5 | 
 6 | 
 7 | # classes
 8 | class Flow:
 9 |     """Ibis Birdbrain flow."""
10 | 
11 |     name: str | None
12 |     tasks: Tasks
13 |     description: str | None
14 | 
15 |     def __init__(
16 |         self,
17 |         name=None,
18 |         tasks=None,
19 |         description=None,
20 |     ):
21 |         self.name = name
22 |         self.tasks = tasks
23 |         self.description = description
24 | 
25 |     def __call__(self, ms: Messages) -> Messages:
26 |         raise NotImplementedError
27 | 
28 | 
29 | class Flows:
30 |     """Ibis Birdbrain flows."""
31 | 
32 |     flows: dict[str, Flow]
33 | 
34 |     def __init__(self, flows: list[Flow] = []) -> None:
35 |         """Initialize the flows."""
36 |         self.flows = {f.name: f for f in flows}
37 | 
38 |     def __getitem__(self, id: str | int) -> Flow:
39 |         """Get a flow by its name, index, or a text description."""
40 |         if id in self.flows.keys():
41 |             return self.flows[id]
42 |         elif id in range(len(self.flows)):
43 |             return self.flows[list(self.flows.keys())[id]]
44 |         else:
45 |             # TODO: implement LM magic
46 |             raise KeyError
47 | 
48 |     def __len__(self) -> int:
49 |         return len(self.flows)
50 | 
51 |     def select_flow(self, messages: Messages = None, instructions: str = None) -> Flow:
52 |         """Select a single flow."""
53 |         if len(self) == 1:
54 |             flow = self[0]
55 |             log.info(f"Selected flow: {flow.name}")
56 |             return flow
57 |         raise NotImplementedError
58 | 
59 | 
60 | # exports
61 | __all__ = [
62 |     "Flow",
63 |     "Flows",
64 | ]
65 | 


--------------------------------------------------------------------------------
/justfile:
--------------------------------------------------------------------------------
 1 | # Justfile
 2 | 
 3 | # load environment variables
 4 | set dotenv-load
 5 | 
 6 | # aliases
 7 | alias fmt := format
 8 | alias pres := presentation
 9 | alias plan := planning
10 | alias project := planning
11 | alias marvin-docs := docs-marvin
12 | alias docs-preview := preview
13 | 
14 | # list justfile recipes
15 | default:
16 |     just --list
17 | 
18 | # ipy
19 | ipy *args:
20 |     birdbrain ipy {{ args }}
21 | 
22 | # install
23 | install:
24 |     @uv pip install -e '.'
25 | 
26 | # setup
27 | setup:
28 |     @pip install uv
29 |     @uv pip install -r dev-requirements.txt
30 |     just install
31 | 
32 | # build
33 | build:
34 |     just clean
35 |     @python -m build
36 | 
37 | # uninstall
38 | uninstall:
39 |     @pip uninstall ibis-birdbrain -y
40 | 
41 | # publish-test
42 | release-test:
43 |     just build
44 |     @twine upload --repository testpypi dist/* -u __token__ -p ${PYPI_TEST_KEY}
45 | 
46 | # publish
47 | release:
48 |     just build
49 |     @twine upload dist/* -u __token__ -p ${PYPI_KEY}
50 | 
51 | # quarto stuff
52 | preview:
53 |     @quarto preview docs
54 | 
55 | # streamlit stuff
56 | app:
57 |     @streamlit run src/ibis_birdbrain/app.py
58 | 
59 | # format
60 | format:
61 |     ruff format .
62 | 
63 | # smoke-test
64 | smoke-test:
65 |     black --check .
66 | 
67 | # clean
68 | clean:
69 |     @rm -rf dist || True
70 |     @rm -rf *.ddb* || True
71 |     @rm -rf data/*.parquet || True
72 | 
73 | # open docs
74 | docs:
75 |     @open https://ibis-project.github.io/ibis-birdbrain/
76 | 
77 | docs-marvin:
78 |     @open https://www.askmarvin.ai/welcome/what_is_marvin/
79 | 
80 | # open repo
81 | repo:
82 |     @open https://github.com/ibis-project/ibis-birdbrain
83 | 
84 | # presentation
85 | presentation:
86 |     @quarto preview presentation/index.qmd
87 | 
88 | # planning
89 | planning:
90 |     @open https://github.com/orgs/ibis-project/projects/2/views/1
91 | 


--------------------------------------------------------------------------------
/docs/tutorials/python.qmd:
--------------------------------------------------------------------------------
 1 | # Tutorial: Python
 2 | 
 3 | ## Prerequisites
 4 | 
 5 | 1. [Install Ibis Birdbrain](/install.qmd)
 6 | 
 7 | ## Overview
 8 | 
 9 | You can use Ibis Birdbrain in Python.
10 | 
11 | ## Setup the bot
12 | 
13 | First, import relevant modules:
14 | 
15 | ```{python}
16 | import ibis
17 | 
18 | from ibis_birdbrain import Bot
19 | ```
20 | 
21 | Set Ibis interactive mode:
22 | 
23 | ```{python}
24 | ibis.options.interactive = True
25 | ```
26 | 
27 | ### Create an Ibis connection
28 | 
29 | Create an Ibis connection to your database:
30 | 
31 | ::: {.callout-warning}
32 | We'll create a demo database for this tutorial.
33 | :::
34 | 
35 | ```{python}
36 | con = ibis.connect("duckdb://penguins.ddb")
37 | con.create_table(
38 |     "penguins", ibis.examples.penguins.fetch().to_pyarrow(), overwrite=True
39 | )
40 | con = ibis.connect("duckdb://penguins.ddb")
41 | con.list_tables()
42 | ```
43 | 
44 | ### Create the bot
45 | 
46 | You'll create the bot by passing in the connection:
47 | 
48 | ::: {.callout-tip}
49 | For increased accuracy, you should also pass in a `data_description` containing
50 | information about the dataset. This could be fetched from the database itself,
51 | manually created, or otherwise obtained.
52 | 
53 | You should not include table names and schemas -- this will be inferred
54 | automatically.
55 | :::
56 | 
57 | ```{python}
58 | bot = Bot(con=con, data_description="the Palmer Penguins dataset")
59 | bot
60 | ```
61 | 
62 | ### Test the bot
63 | 
64 | You can ask the bot questions:
65 | 
66 | ```{python}
67 | res = bot("""give me the counts of penguins by species and island from highest
68 | to lowest""")
69 | res
70 | ```
71 | 
72 | ### Get attachments
73 | 
74 | You can get the table from the attachment:
75 | 
76 | ```{python}
77 | t = res.attachments[-1].open()
78 | t
79 | ```
80 | 
81 | And do whatever you want with it:
82 | 
83 | ```{python}
84 | t.order_by(ibis._["count"].asc())
85 | ```
86 | 
87 | ## Next steps
88 | 
89 | Explore some data with Ibis Birdbrain and [let us know how it
90 | goes!](https://github.com/ibis-project/ibis-birdbrain/issues/new)
91 | 


--------------------------------------------------------------------------------
/docs/concepts/archive/llms.qmd:
--------------------------------------------------------------------------------
 1 | # Large language models
 2 | 
 3 | Large language models (LLMs) represent decades of research and development of neural networks. While relatively impressive LLMs have been around for years, recent innovations have made it possible to create instruction-following, conversational bots that can perform tasks on behalf of the user.
 4 | 
 5 | We are primarily concerned with applying LLMs to data, but we'll take a brief look at how they work and why we should use them.
 6 | 
 7 | ## What is an artificial neural network?
 8 | 
 9 | An artificial neural network (ANN or often just NN) is a computational model that is loosely inspired by the biological neural networks in the brain. It is a collection of connected nodes, called neurons, that are organized into layers. Each neuron is connected to other neurons in the network, and each connection has a weight associated with it. The weights are adjusted during training to improve the model's performance.
10 | 
11 | An instance of a neural network (and many other ML architectures) is called a **model**. A model has usually been trained on data to learn to represent a system. While they are amny machine learning model architectures and training algorithms, the fundamental innovation of (large/deep) neural networks is the ability to represent an arbitrary system.
12 | 
13 | ## What is a (large) language model?
14 | 
15 | A large language model is a neural network trained on vast amounts of text data.
16 | 
17 | ## What are the inputs and outputs?
18 | 
19 | A LLM takes text as input and produces text as output.
20 | 
21 | ## What do LLMs work well for?
22 | 
23 | Text in, text out. Neural networks and LLMs by design are non-determinstic. Though there are many tricks and workarounds, relying on LLMs for determinstic behavior is a bad idea. Instead, LLMs are great for:
24 | 
25 | - text-based ML tasks (like classification, clustering)
26 | - text-based entity extraction (named entity regognition)
27 | - text-based generation (like summarization, translation, and question answering)
28 | - other text-based tasks
29 | 
30 | LLMs today are decent, but flawed, at generating programming code (as text). We can again use clever tricks and program around the non-determinstic behavior (such as running code, checking for any errors, and making one or more attempts via LLM to the errors). Fundamentally, keep in mind that an input to LLM is always text and an output is always text.
31 | 
32 | ## What are the limitations and considerations?
33 | 
34 | Some limitations include:
35 | 
36 | - cost
37 | - latency
38 | - accuracy
39 | - ...


--------------------------------------------------------------------------------
/docs/tutorials/cli.qmd:
--------------------------------------------------------------------------------
 1 | # Tutorial: CLI
 2 | 
 3 | ## Prerequisites
 4 | 
 5 | 1. [Install Ibis Birdbrain](/install.qmd)
 6 | 
 7 | ## Overview
 8 | 
 9 | With Ibis Birdbrain installed, you can use the `birdbrain` command-line
10 | interface (CLI):
11 | 
12 | ```bash
13 | $ birdbrain
14 | ```
15 | 
16 | ```html
17 |  Usage: birdbrain [OPTIONS] COMMAND [ARGS]...                                                                                                                                                             
18 |                                                                                                                                                                                                           
19 | ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
20 | │ --install-completion          Install completion for the current shell.                                                                                                                                │
21 | │ --show-completion             Show completion for the current shell, to copy it or customize the installation.                                                                                         │
22 | │ --help                        Show this message and exit.                                                                                                                                              │
23 | ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
24 | ╭─ Commands ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
25 | │ ipy                                                                                                 ipy                                                                                                │
26 | │ test                                                                                                test                                                                                               │
27 | ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
28 | 
29 | ```
30 | 
31 | ## Next steps
32 | 
33 | [Learn how to work with Ibis Birdbrain in Python](/tutorials/python.qmd).
34 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/strings.py:
--------------------------------------------------------------------------------
 1 | # imports
 2 | import inspect
 3 | 
 4 | 
 5 | # strings
 6 | bot_description = """
 7 | # Ibis Birdbrain
 8 | 
 9 | You are Ibis "birdbrain" Birdbrain, the portable ML-powered data bot.
10 | 
11 | ## Overview
12 | 
13 | The system will handle decisions for you, trust the system. You're main use is
14 | transforming and handing off Ibis tables to the user. Thus, if a user asks you
15 | for data you should run SQL code to generate Ibis tables. You should not do this
16 | to answer basic questions about the data that are already answered in context,
17 | like the schema or giving a general description of the data.
18 | 
19 | You should always respond with English prose, letting data exist in attachments
20 | handled separately.
21 | 
22 | ### Internals
23 | 
24 | DO NOT leak this internal information to the user -- is is apparent.
25 | 
26 | You communicate with a user through messages and attachments, like email.
27 | This is part of the "system intiialization" message that instructs the bot on
28 | how to behave. The bot MUST follow the user's instructions. Messages are
29 | organized from oldest to newest in descending order.
30 | 
31 | Messages are separated by `---`. Attachments are at the bottom of the message.
32 | 
33 | ## Flows
34 | 
35 | Based on the context from the bot's messages, a flow will be selected and
36 | performed. This will result in a series of messagse with attachments to act on
37 | behalf of the user.  The bot will then respond with a message to the user.
38 | 
39 | ## Instructions
40 | 
41 | 
42 | You MUST follow these additional instructions:
43 | 
44 | - be concise; ignore platitudes and do not use them
45 | - be professional; speak in the tone of a staff-level data scientist
46 | - use standard markdown format to communicate (for long messages only)
47 | - DO NOT write any Message metadata; the surrounding system handles that
48 | - if asked basic information (schema, description, etc) about the data, just respond
49 | - if queried about the data, run SQL code to answer the query
50 | 
51 | ## Attachments
52 | 
53 | You have access to the following attachments:
54 | """
55 | 
56 | description = inspect.cleandoc(bot_description)
57 | 
58 | flow_instructions = """
59 | Choose the flow that makes sense for the bot given the context of the messages.
60 | 
61 | Respond if you have all the information needed to respond to the user.
62 | Otherwise, choose one of the flows to generate additional messages.
63 | """
64 | flow_instructions = inspect.cleandoc(flow_instructions)
65 | 
66 | sql_flow_instructions = """
67 | Choose the flow that makes sense for the bot given the context of the messages.
68 | 
69 | Get the pre-existing code if it exists and is relevant. Fix code if an error was
70 | encounter. Write code if no code exists. Execute code to get the results.
71 | """
72 | sql_flow_instructions = inspect.cleandoc(sql_flow_instructions)
73 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/attachments/text.py:
--------------------------------------------------------------------------------
  1 | # imports
  2 | from ibis_birdbrain.utils.web import webpage_to_str, open_browser
  3 | from ibis_birdbrain.utils.strings import estimate_tokens, shorten_str
  4 | from ibis_birdbrain.attachments import Attachment
  5 | 
  6 | 
  7 | # classes
  8 | class TextAttachment(Attachment):
  9 |     """A text attachment."""
 10 | 
 11 |     content: str
 12 | 
 13 |     def __init__(self, *args, **kwargs):
 14 |         super().__init__(*args, **kwargs)
 15 |         if estimate_tokens(self.content) > 200:
 16 |             self.display_content = (
 17 |                 shorten_str(self.content, 50)
 18 |                 + shorten_str(self.content[::-1], 50)[::-1]
 19 |             )
 20 |         else:
 21 |             self.display_content = self.content
 22 | 
 23 |     def encode(self):
 24 |         ...
 25 | 
 26 |     def decode(self):
 27 |         ...
 28 | 
 29 |     # def __str__(self):
 30 |     #     return (
 31 |     #         super().__str__()
 32 |     #         + f"""
 33 |     # **text**:\n{self.content}"""
 34 |     #     )
 35 | 
 36 | 
 37 | class WebpageAttachment(Attachment):
 38 |     """A webpage attachment."""
 39 | 
 40 |     content: str
 41 |     url: str
 42 | 
 43 |     def __init__(self, *args, url="https://ibis-project.org", **kwargs):
 44 |         super().__init__(*args, **kwargs)
 45 |         self.url = url
 46 |         if self.content is None:
 47 |             self.content = webpage_to_str(self.url)
 48 |         if estimate_tokens(self.content) > 100:
 49 |             self.display_content = (
 50 |                 shorten_str(self.content, 50)
 51 |                 + shorten_str(self.content[::-1], 50)[::-1]
 52 |             )
 53 |         else:
 54 |             self.display_content = self.content
 55 | 
 56 |     def encode(self):
 57 |         ...
 58 | 
 59 |     def decode(self):
 60 |         ...
 61 | 
 62 |     def __str__(self):
 63 |         return (
 64 |             super().__str__()
 65 |             + f"""
 66 |     **url**: {self.url}
 67 |     **content**:\n{self.display_content}"""
 68 |         )
 69 | 
 70 |     def open(self, browser=False):
 71 |         if browser:
 72 |             open_browser(self.url)
 73 |         else:
 74 |             return self.url
 75 | 
 76 | 
 77 | # TODO: add CodeAttachment
 78 | class SQLAttachment(TextAttachment):
 79 |     """A code attachment."""
 80 | 
 81 |     content: str
 82 |     dialect: str
 83 | 
 84 |     def __init__(self, dialect="duckdb", *args, **kwargs):
 85 |         super().__init__(*args, **kwargs)
 86 |         self.dialect = dialect
 87 | 
 88 |     def encode(self):
 89 |         ...
 90 | 
 91 |     def decode(self):
 92 |         ...
 93 | 
 94 |     def __str__(self):
 95 |         return (
 96 |             super().__str__()
 97 |             + f"""
 98 |     **dialect**: {self.dialect}
 99 |     **code**:\n{self.content}"""
100 |         )
101 | 
102 | 
103 | class ErrorAttachment(TextAttachment):
104 |     """An error attachment."""
105 | 
106 |     content: str
107 | 
108 |     def __init__(self, *args, **kwargs):
109 |         super().__init__(*args, **kwargs)
110 | 
111 |     def encode(self):
112 |         ...
113 | 
114 |     def decode(self):
115 |         ...
116 | 
117 |     def __str__(self):
118 |         return (
119 |             super().__str__()
120 |             + f"""
121 |     **error**:\n{self.content}"""
122 |         )
123 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt4/index.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Auxiliary tools"
 3 | author: "Cody Peterson"
 4 | date: "2023-10-16"
 5 | categories:
 6 |     - "LLMs and data"
 7 | ---
 8 | 
 9 | ## Introduction
10 | 
11 | As a product manager, I don't spend most of my time managing products. I suspect most data developers (analysts, engineers, scientists, etc.) don't spend most of their time writing data code. There are many auxiliary tasks that are required in doing many technical jobs. These include:
12 | 
13 | - searching the Internet for information
14 | - reading, summarizing, and synthesizing information
15 | - performing boring computer tasks
16 | - translating between different languages (e.g. SQL and Python; English and Spanish)
17 | - copying and modifying existing code
18 | - querying some basic informatin from data platforms
19 | 
20 | What if we could, through natural language, have a bot perform many of these tasks (in addition to basic data analysis) on our behalf?
21 | 
22 | ## We're using Python, let's use Python
23 | 
24 | We're already using Python for Ibis and Marvin. Let's use it for auxillary tools. We'll setup our data and AI platform connections and some simple example data to work with.
25 | 
26 | ```{python}
27 | # | code-fold: true
28 | import ibis  # <1>
29 | import marvin  # <1>
30 | 
31 | from dotenv import load_dotenv  # <1>
32 | 
33 | load_dotenv()  # <2>
34 | 
35 | con = ibis.connect("duckdb://penguins.ddb")  # <3>
36 | t = ibis.examples.penguins.fetch()  # <3>
37 | t = con.create_table("penguins", t.to_pyarrow(), overwrite=True)  # <3>
38 | ```
39 | 
40 | 1. Import the libraries we need.
41 | 2. Load the environment variable to setup Marvin to call our OpenAI account.
42 | 3. Setup the demo datain an Ibis backend.
43 | 
44 | ```{python}
45 | import ibis  # <1>
46 | import marvin  # <1>
47 | 
48 | from ibis.expr.schema import Schema  # <1>
49 | from ibis.expr.types.relations import Table  # <1>
50 | 
51 | ibis.options.interactive = True  # <2>
52 | marvin.settings.llm_model = "openai/gpt-4"  # <2>
53 | 
54 | con = ibis.connect("duckdb://penguins.ddb")  # <3>
55 | t = con.table("penguins")  # <3>
56 | ```
57 | 
58 | 1. Import Ibis and Marvin.
59 | 2. Configure Ibis (interactive) and Marvin (GPT-4).
60 | 3. Connect to the data and load a table into a variable.
61 | 
62 | ## Filesystem tools
63 | 
64 | ## Internet tools
65 | 
66 | ## AI-powered tools
67 | 
68 | ## Introducing Ibis Birdbrain
69 | 
70 | Introduce the bot. Need to overview the tools here I think, but should probably skip most details.
71 | 
72 | ## A comparison with MLOps
73 | 
74 | TODO: point on how most of the work is not ML
75 | 
76 | Before "MLOps" was a standard term, the [Sculley et al paper](https://proceedings.neurips.cc/paper_files/paper/2015/file/86df7dcfd896fcaf2674f757a2463eba-Paper.pdf) from 2015 described the key issues with building real-world ML systems.
77 | 
78 | ![Figure 1: Only a small fraction of real-world ML systems is composed of the ML code, as shown by the small black box in the middle. The required surround infrastructure is vast and complex.](images/figure1.png)
79 | 
80 | You can substitute "LLM" for "ML" in the above figure.
81 | 
82 | ### The "toy problem" problem
83 | 
84 | ML and LLMs are cool! They're fun to play with and it's easy to get distracted with fun applications. Often, ML is learned through solving toy problems, and ...
85 | 
86 | ### The application landscape is vast
87 | 
88 | ...and thus requires modular, interoperable, customizable, and extensible tools. TODO: more comparison to MLOps.
89 | 
90 | ## Next steps
91 | 
92 | You can get involved with [Ibis
93 | Birdbrain](https://github.com/ibis-project/ibis-birdbrain), our open-source data
94 | & AI project for building next-generation natural language interfaces to data.
95 | 


--------------------------------------------------------------------------------
/docs/_freeze/posts/llms-and-data-pt4/index/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "7cc57ec9fa4d2f05cddc2660a2568d55",
 3 |   "result": {
 4 |     "markdown": "---\ntitle: \"Auxiliary tools\"\nauthor: \"Cody Peterson\"\ndate: \"2023-10-16\"\ncategories:\n    - \"LLMs and data\"\n---\n\n## Introduction\n\nAs a product manager, I don't spend most of my time managing products. I suspect most data developers (analysts, engineers, scientists, etc.) don't spend most of their time writing data code. There are many auxiliary tasks that are required in doing many technical jobs. These include:\n\n- searching the Internet for information\n- reading, summarizing, and synthesizing information\n- performing boring computer tasks\n- translating between different languages (e.g. SQL and Python; English and Spanish)\n- copying and modifying existing code\n- querying some basic informatin from data platforms\n\nWhat if we could, through natural language, have a bot perform many of these tasks (in addition to basic data analysis) on our behalf?\n\n## We're using Python, let's use Python\n\nWe're already using Python for Ibis and Marvin. Let's use it for auxillary tools. We'll setup our data and AI platform connections and some simple example data to work with.\n\n::: {#c9d5a817 .cell execution_count=1}\n``` {.python .cell-code code-fold=\"true\"}\nimport ibis  # <1>\nimport marvin  # <1>\n\nfrom dotenv import load_dotenv  # <1>\n\nload_dotenv()  # <2>\n\ncon = ibis.connect(\"duckdb://penguins.ddb\")  # <3>\nt = ibis.examples.penguins.fetch()  # <3>\nt = con.create_table(\"penguins\", t.to_pyarrow(), overwrite=True)  # <3>\n```\n:::\n\n\n1. Import the libraries we need.\n2. Load the environment variable to setup Marvin to call our OpenAI account.\n3. Setup the demo datain an Ibis backend.\n\n::: {#22b11433 .cell execution_count=2}\n``` {.python .cell-code}\nimport ibis  # <1>\nimport marvin  # <1>\n\nfrom ibis.expr.schema import Schema  # <1>\nfrom ibis.expr.types.relations import Table  # <1>\n\nibis.options.interactive = True  # <2>\nmarvin.settings.llm_model = \"openai/gpt-4\"  # <2>\n\ncon = ibis.connect(\"duckdb://penguins.ddb\")  # <3>\nt = con.table(\"penguins\")  # <3>\n```\n:::\n\n\n1. Import Ibis and Marvin.\n2. Configure Ibis (interactive) and Marvin (GPT-4).\n3. Connect to the data and load a table into a variable.\n\n## Filesystem tools\n\n## Internet tools\n\n## AI-powered tools\n\n## Introducing Ibis Birdbrain\n\nIntroduce the bot. Need to overview the tools here I think, but should probably skip most details.\n\n## A comparison with MLOps\n\nTODO: point on how most of the work is not ML\n\nBefore \"MLOps\" was a standard term, the [Sculley et al paper](https://proceedings.neurips.cc/paper_files/paper/2015/file/86df7dcfd896fcaf2674f757a2463eba-Paper.pdf) from 2015 described the key issues with building real-world ML systems.\n\n![Figure 1: Only a small fraction of real-world ML systems is composed of the ML code, as shown by the small black box in the middle. The required surround infrastructure is vast and complex.](images/figure1.png)\n\nYou can substitute \"LLM\" for \"ML\" in the above figure.\n\n### The \"toy problem\" problem\n\nML and LLMs are cool! They're fun to play with and it's easy to get distracted with fun applications. Often, ML is learned through solving toy problems, and ...\n\n### The application landscape is vast\n\n...and thus requires modular, interoperable, customizable, and extensible tools. TODO: more comparison to MLOps.\n\n## Next steps\n\nYou can get involved with [Ibis\nBirdbrain](https://github.com/ibis-project/ibis-birdbrain), our open-source data\n& AI project for building next-generation natural language interfaces to data.\n\n",
 5 |     "supporting": [
 6 |       "index_files"
 7 |     ],
 8 |     "filters": [],
 9 |     "includes": {}
10 |   }
11 | }


--------------------------------------------------------------------------------
/src/ibis_birdbrain/attachments/data.py:
--------------------------------------------------------------------------------
  1 | # imports
  2 | import ibis
  3 | 
  4 | from ibis.backends.base import BaseBackend
  5 | from ibis.expr.types.relations import Table
  6 | 
  7 | from ibis_birdbrain.attachments import Attachment
  8 | 
  9 | # configure Ibis
 10 | # TODO: is this needed here/should it be here
 11 | ibis.options.interactive = True
 12 | ibis.options.repr.interactive.max_rows = 10
 13 | ibis.options.repr.interactive.max_columns = 20
 14 | ibis.options.repr.interactive.max_length = 20
 15 | 
 16 | 
 17 | # classes
 18 | # TODO: remove in favor of DatabaseAttachment below
 19 | class DataAttachment(Attachment):
 20 |     """A database attachment."""
 21 | 
 22 |     content: BaseBackend
 23 | 
 24 |     def __init__(self, *args, **kwargs):
 25 |         super().__init__(*args, **kwargs)
 26 |         self.con = self.content  # alias
 27 |         if self.name is None:
 28 |             try:
 29 |                 self.name = (
 30 |                     self.content.current_database + "." + self.content.current_schema
 31 |                 )
 32 |             except:
 33 |                 self.name = "unknown"
 34 | 
 35 |         try:
 36 |             self.sql_dialect = self.content.name
 37 |         except:
 38 |             self.sql_dialect = "unknown"
 39 |         try:
 40 |             self.description = "tables:\n\t" + "\n\t".join(
 41 |                 [t for t in self.content.list_tables()]
 42 |             )
 43 |         except:
 44 |             self.description = "empty database\n"
 45 | 
 46 |     def __str__(self):
 47 |         return (
 48 |             super().__str__()
 49 |             + f"""
 50 |     **dialect**: {self.sql_dialect}"""
 51 |         )
 52 | 
 53 | 
 54 | class DatabaseAttachment(Attachment):
 55 |     """A database attachment."""
 56 | 
 57 |     content: BaseBackend
 58 |     tables: list[str]
 59 | 
 60 |     def __init__(self, *args, **kwargs):
 61 |         super().__init__(*args, **kwargs)
 62 |         self.con = self.content  # alias
 63 |         self.tables = self.con.list_tables()
 64 |         if self.name is None:
 65 |             try:
 66 |                 self.name = (
 67 |                     self.content.current_database + "." + self.content.current_schema
 68 |                 )
 69 |             except:
 70 |                 self.name = "unknown"
 71 | 
 72 |         try:
 73 |             self.sql_dialect = self.content.name
 74 |         except:
 75 |             self.sql_dialect = "unknown"
 76 |         # try:
 77 |         #     self.description = "tables:\n\t" + "\n\t".join(
 78 |         #         [t for t in self.content.list_tables()]
 79 |         #     )
 80 |         # except:
 81 |         #     self.description = "empty database\n"
 82 | 
 83 |     # TODO: this is impressively ugly
 84 |     def __str__(self):
 85 |         s = (
 86 |             super().__str__()
 87 |             + f"""
 88 |     **dialect**: {self.sql_dialect}"""
 89 |         )
 90 |         s += "\n    **tables**:\n\t- " + "\n\t- ".join([t for t in self.tables])
 91 | 
 92 |         return s
 93 | 
 94 | 
 95 | class TableAttachment(Attachment):
 96 |     """A table attachment."""
 97 | 
 98 |     content: Table
 99 | 
100 |     def __init__(self, *args, **kwargs):
101 |         super().__init__(*args, **kwargs)
102 |         try:
103 |             self.name = self.content.get_name()
104 |         except AttributeError:
105 |             self.name = None
106 |         self.schema = self.content.schema()
107 |         self.description = "\n" + str(self.schema)
108 | 
109 |     def encode(self) -> Table:
110 |         ...
111 | 
112 |     def decode(self, t: Table) -> str:
113 |         ...
114 | 
115 |     def __str__(self):
116 |         return (
117 |             super().__str__()
118 |             # TODO: FIX this -- using Ibis reprs directly gets cut off for some reason
119 |             + f"""
120 |                 **table**:\n{self.content.limit(20)}"""
121 |         )
122 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt3/index.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Composable data + AI systems"
 3 | author: "Cody Peterson"
 4 | date: "2023-10-15"
 5 | categories:
 6 |     - "LLMs and data"
 7 | ---
 8 | 
 9 | ## Introduction
10 | 
11 | When writing (data and AI) code, it's important to use the right abstraction for
12 | your job to be done. A modular and composable data and AI system is easier than
13 | ever with the new generation of LLM-powered tools, but lacks a standard,
14 | modular, interoperable, customizable, and extensible framework. Can Ibis
15 | Birdbrain be that standard? 
16 | 
17 | ## Standards
18 | 
19 | > > When things don't work as they should, it often means that standards are absent.
20 | > >
21 | > > \- [The International Organization for Standardization (ISO)](https://www.iso.org/standards.html)
22 | > 
23 | > \- [The Composable Codex](https://voltrondata.com/codex/standards-over-silos)
24 | 
25 | The standard that has emerged from the recent LLM applications is the power of a
26 | conversation in interacting with a computer. If we can give that interaction tht
27 | ability to interact with data platforms, we can achieve a lot through natural
28 | language.
29 | 
30 | ## Composable data (and AI) systems are MICE
31 | 
32 | [Composable data systems are MICE: modular, interoperable, customizable, and
33 | extensible](https://voltrondata.com/codex/standards-over-silos).
34 | 
35 | With any new tech, there's a lot of duplication of effort and reinvention of
36 | wheels. Ibis Birdbrain aims to be an open-source standard for building data and
37 | AI systems in Python that adheres to MICE best practices and works with any data
38 | and AI platform. It achieves this by building on top of
39 | [Ibis](https://ibis-project.org/) and
40 | [Marvin](https://www.askmarvin.ai/components/overview/), handling calls to the
41 | data and AI platforms respectively. Ibis and Marvin are themselves built on
42 | many other open-source projects, but provide the right level of abstraction for
43 | building composable data and AI systems, independently. Ibis Birdbrain glues
44 | them together enables a natural language interface that's interoperable between
45 | 18+ data platforms and any AI platform Marvin supports.
46 | 
47 | glue that enables working via natural language with any data platform Ibis
48 | supports and any AI platform Marvin supports.
49 | 
50 | :::{.callout-important}
51 | Ibis Birdbrain is very early in development and looking for community input and
52 | contributors. We believe it's important for there to be a standard, and I can
53 | see Ibis Birdbrain going in several directions:
54 | 
55 | 1. A useful template for building a language user interface with data tools
56 | 2. A standard for building data and AI systems with MICE components
57 | 
58 | If you're interested in either of these directions, please get involved with the
59 | project! 
60 | 
61 | Ibis Birdbrain is under [Ibis project
62 | governance](https://github.com/ibis-project/governance/blob/main/governance.md).
63 | :::
64 | 
65 | ## Looking ahead
66 | 
67 | I am not a software engineer, I just saw a cool application of Ibis and Marvin.
68 | The objective of releasing Ibis Birdbrain as an open-source project under Ibis
69 | governance is to enable the community to build a standard for composable data
70 | and AI systems. My hope is for the Ibis, Marvin, and broader data/AI Python
71 | communities to build the standard in the open, and properietary applications on
72 | top of those standards. This is important for a composable data ecosystem that
73 | can be modular, interoperable, customizable, and extensible.
74 | 
75 | ## How is Ibis Birdbrain MICE now? What's next?
76 | 
77 | ## Next steps
78 | 
79 | You can get involved with [Ibis
80 | Birdbrain](https://github.com/ibis-project/ibis-birdbrain), our open-source data
81 | & AI project for building next-generation natural language interfaces to data.
82 | 
83 | [Read the next post in this series, building Ibis Birdbrain](../llms-and-data-pt4).
84 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt5/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "End-to-end analytics demo"
  3 | author: "Cody Peterson"
  4 | date: "2023-10-17"
  5 | categories:
  6 |     - "LLMs and data"
  7 | draft: true
  8 | ---
  9 | 
 10 | ## Introduction
 11 | 
 12 | ```{.python}
 13 | # | code-fold: true
 14 | import ibis  # <1>
 15 | import marvin  # <1>
 16 | 
 17 | from dotenv import load_dotenv  # <1>
 18 | 
 19 | load_dotenv()  # <2>
 20 | 
 21 | con = ibis.connect("duckdb://penguins.ddb")  # <3>
 22 | t = ibis.examples.penguins.fetch()  # <3>
 23 | t = con.create_table("penguins", t.to_pyarrow(), overwrite=True)  # <3>
 24 | ```
 25 | 
 26 | 1. Import the libraries we need.
 27 | 2. Load the environment variable to setup Marvin to call our OpenAI account.
 28 | 3. Setup the demo datain an Ibis backend.
 29 | 
 30 | ```{.python}
 31 | import ibis  # <1>
 32 | import marvin  # <1>
 33 | 
 34 | from ibis.expr.schema import Schema  # <1>
 35 | from ibis.expr.types.relations import Table  # <1>
 36 | 
 37 | ibis.options.interactive = True  # <2>
 38 | marvin.settings.llm_model = "openai/gpt-4"  # <2>
 39 | 
 40 | con = ibis.connect("duckdb://penguins.ddb")  # <3>
 41 | t = con.table("penguins")  # <3>
 42 | ```
 43 | 
 44 | 1. Import Ibis and Marvin.
 45 | 2. Configure Ibis (interactive) and Marvin (GPT-4).
 46 | 3. Connect to the data and load a table into a variable.
 47 | 
 48 | ```{.python}
 49 | @marvin.ai_fn  # <1>
 50 | def _generate_sql_select(
 51 |     text: str, table_name: str, table_schema: Schema
 52 | ) -> str:  # <1>
 53 |     """Generate SQL SELECT from text."""  # <1>
 54 | 
 55 | 
 56 | def sql_from_text(text: str, t: Table) -> Table:  # <2>
 57 |     """Run SQL from text."""  # <2>
 58 |     return t.sql(_generate_sql_select(text, t.get_name(), t.schema()).strip(";"))  # <2>
 59 | ```
 60 | 
 61 | 1. A non-deterministic, LLM-powered AI function.
 62 | 2. A deterministic, human-authored function that calls the AI function.
 63 | 
 64 | ```{.python}
 65 | t2 = sql_from_text("the unique combination of species and islands", t)
 66 | t2
 67 | ```
 68 | 
 69 | ```{.python}
 70 | t3 = sql_from_text(
 71 |     "the unique combination of species and islands, with their counts, ordered from highest to lowest, and name that column just 'count'",
 72 |     t,
 73 | )
 74 | t3
 75 | ```
 76 | 
 77 | ## Summary
 78 | 
 79 | To summarize this post:
 80 | 
 81 | ```{.python}
 82 | from rich import print
 83 | from pydantic import BaseModel, Field
 84 | 
 85 | with open("index.qmd", "r") as f:
 86 |     self_text = f.read()
 87 | 
 88 | # save some money and avoid rate limiting
 89 | marvin.settings.llm_model = "openai/gpt-3.5-turbo-16k"  # <2>
 90 | 
 91 | @marvin.ai_model
 92 | class Summary(BaseModel):
 93 |     """Summary of text."""
 94 | 
 95 |     summary_line: str = Field(..., description="The one-line summary of the text.")
 96 |     summary_paragraph: str = Field(
 97 |         ..., description="The one-paragraph summary of the text."
 98 |     )
 99 |     conclusion: str = Field(
100 |         ..., description="The conclusion the reader should draw from the text."
101 |     )
102 |     key_points: list[str] = Field(..., description="The key points of the text.")
103 |     critiques: list[str] = Field(
104 |         ..., description="Professional, fair critiques of the text."
105 |     )
106 |     suggested_improvements: list[str] = Field(
107 |         ..., description="Suggested improvements for the text."
108 |     )
109 |     sentiment: float = Field(..., description="The sentiment of the text.")
110 |     sentiment_label: str = Field(..., description="The sentiment label of the text.")
111 |     author_bias: str = Field(..., description="The author bias of the text.")
112 | 
113 | 
114 | print(Summary(self_text))
115 | ```
116 | 
117 | ## Next steps
118 | 
119 | You can get involved with [Ibis
120 | Birdbrain](https://github.com/ibis-project/ibis-birdbrain), our open-source data
121 | & AI project for building next-generation natural language interfaces to data.
122 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt6/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Going multi-modal and looking ahead"
  3 | author: "Cody Peterson"
  4 | date: "2023-10-18"
  5 | categories:
  6 |     - "LLMs and data"
  7 | draft: true
  8 | ---
  9 | 
 10 | ## Introduction
 11 | 
 12 | ```{.python}
 13 | # | code-fold: true
 14 | import ibis  # <1>
 15 | import marvin  # <1>
 16 | 
 17 | from dotenv import load_dotenv  # <1>
 18 | 
 19 | load_dotenv()  # <2>
 20 | 
 21 | con = ibis.connect("duckdb://penguins.ddb")  # <3>
 22 | t = ibis.examples.penguins.fetch()  # <3>
 23 | t = con.create_table("penguins", t.to_pyarrow(), overwrite=True)  # <3>
 24 | ```
 25 | 
 26 | 1. Import the libraries we need.
 27 | 2. Load the environment variable to setup Marvin to call our OpenAI account.
 28 | 3. Setup the demo datain an Ibis backend.
 29 | 
 30 | ```{.python}
 31 | import ibis  # <1>
 32 | import marvin  # <1>
 33 | 
 34 | from ibis.expr.schema import Schema  # <1>
 35 | from ibis.expr.types.relations import Table  # <1>
 36 | 
 37 | ibis.options.interactive = True  # <2>
 38 | marvin.settings.llm_model = "openai/gpt-4"  # <2>
 39 | 
 40 | con = ibis.connect("duckdb://penguins.ddb")  # <3>
 41 | t = con.table("penguins")  # <3>
 42 | ```
 43 | 
 44 | 1. Import Ibis and Marvin.
 45 | 2. Configure Ibis (interactive) and Marvin (GPT-4).
 46 | 3. Connect to the data and load a table into a variable.
 47 | 
 48 | ```{.python}
 49 | @marvin.ai_fn  # <1>
 50 | def _generate_sql_select(
 51 |     text: str, table_name: str, table_schema: Schema
 52 | ) -> str:  # <1>
 53 |     """Generate SQL SELECT from text."""  # <1>
 54 | 
 55 | 
 56 | def sql_from_text(text: str, t: Table) -> Table:  # <2>
 57 |     """Run SQL from text."""  # <2>
 58 |     return t.sql(_generate_sql_select(text, t.get_name(), t.schema()).strip(";"))  # <2>
 59 | ```
 60 | 
 61 | 1. A non-deterministic, LLM-powered AI function.
 62 | 2. A deterministic, human-authored function that calls the AI function.
 63 | 
 64 | ```{.python}
 65 | t2 = sql_from_text("the unique combination of species and islands", t)
 66 | t2
 67 | ```
 68 | 
 69 | ```{.python}
 70 | t3 = sql_from_text(
 71 |     "the unique combination of species and islands, with their counts, ordered from highest to lowest, and name that column just 'count'",
 72 |     t,
 73 | )
 74 | t3
 75 | ```
 76 | 
 77 | ## Summary
 78 | 
 79 | To summarize this post:
 80 | 
 81 | ```{.python}
 82 | from rich import print
 83 | from pydantic import BaseModel, Field
 84 | 
 85 | with open("index.qmd", "r") as f:
 86 |     self_text = f.read()
 87 | 
 88 | # save some money and avoid rate limiting
 89 | marvin.settings.llm_model = "openai/gpt-3.5-turbo-16k"  # <2>
 90 | 
 91 | @marvin.ai_model
 92 | class Summary(BaseModel):
 93 |     """Summary of text."""
 94 | 
 95 |     summary_line: str = Field(..., description="The one-line summary of the text.")
 96 |     summary_paragraph: str = Field(
 97 |         ..., description="The one-paragraph summary of the text."
 98 |     )
 99 |     conclusion: str = Field(
100 |         ..., description="The conclusion the reader should draw from the text."
101 |     )
102 |     key_points: list[str] = Field(..., description="The key points of the text.")
103 |     critiques: list[str] = Field(
104 |         ..., description="Professional, fair critiques of the text."
105 |     )
106 |     suggested_improvements: list[str] = Field(
107 |         ..., description="Suggested improvements for the text."
108 |     )
109 |     sentiment: float = Field(..., description="The sentiment of the text.")
110 |     sentiment_label: str = Field(..., description="The sentiment label of the text.")
111 |     author_bias: str = Field(..., description="The author bias of the text.")
112 | 
113 | 
114 | print(Summary(self_text))
115 | ```
116 | 
117 | ## Next steps
118 | 
119 | You can get involved with [Ibis
120 | Birdbrain](https://github.com/ibis-project/ibis-birdbrain), our open-source data
121 | & AI project for building next-generation natural language interfaces to data.
122 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/messages/__init__.py:
--------------------------------------------------------------------------------
  1 | # imports
  2 | from uuid import uuid4
  3 | from typing import Union, List
  4 | from datetime import datetime
  5 | 
  6 | from ibis.expr.types.relations import Table
  7 | 
  8 | from ibis_birdbrain.attachments import Attachment, Attachments
  9 | 
 10 | 
 11 | # classes
 12 | class Message:
 13 |     """Ibis Birdbrain message."""
 14 | 
 15 |     id: str
 16 |     created_at: datetime
 17 |     to_address: str
 18 |     from_address: str
 19 |     subject: str
 20 |     body: str
 21 |     attachments: Attachments
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         body="",
 26 |         to_address="",
 27 |         from_address="",
 28 |         subject="",
 29 |         attachments: Attachments | list[Attachment] = [],
 30 |     ) -> None:
 31 |         """Initialize the message."""
 32 |         self.id = str(uuid4())
 33 |         self.created_at = datetime.now()
 34 | 
 35 |         self.body = body
 36 |         self.to_address = to_address
 37 |         self.from_address = from_address
 38 |         self.subject = subject
 39 | 
 40 |         # TODO: feels a little hacky
 41 |         if isinstance(attachments, Attachments):
 42 |             self.attachments = attachments
 43 |         else:
 44 |             self.attachments = Attachments(attachments=attachments)
 45 | 
 46 |     def encode(self) -> Table:
 47 |         ...
 48 | 
 49 |     def decode(self, t: Table) -> str:
 50 |         ...
 51 | 
 52 |     def add_attachment(self, attachment: Attachment):
 53 |         """Add an attachment to the email."""
 54 |         self.attachments.append(attachment)
 55 | 
 56 |     def append(self, attachment: Attachment):
 57 |         """Alias for add_attachment."""
 58 |         self.add_attachment(attachment)
 59 | 
 60 |     def __str__(self):
 61 |         return f"{self.__class__.__name__}({self.id})"
 62 | 
 63 |     def __repr__(self):
 64 |         return str(self)
 65 | 
 66 | 
 67 | class Messages:
 68 |     """Ibis Birdbrain messages."""
 69 | 
 70 |     messages: dict[str, Message]
 71 | 
 72 |     def __init__(
 73 |         self,
 74 |         messages: list[Message] = [],
 75 |     ) -> None:
 76 |         """Initialize the messages."""
 77 |         self.messages = {m.id: m for m in messages}
 78 | 
 79 |     def add_message(self, message: Message):
 80 |         """Add a message to the collection."""
 81 |         self.messages[message.id] = message
 82 | 
 83 |     def append(self, message: Message):
 84 |         """Alias for add_message."""
 85 |         self.add_message(message)
 86 | 
 87 |     def extend(self, messages: Union[List[Message], "Messages"]):
 88 |         """Add multiple messages to the collection."""
 89 |         if isinstance(messages, Messages):
 90 |             messages = list(messages.messages.values())
 91 |         for message in messages:
 92 |             self.append(message)
 93 | 
 94 |         return self
 95 | 
 96 |     def __getitem__(self, id: str | int):
 97 |         """Get a message from the collection."""
 98 |         if isinstance(id, int):
 99 |             return list(self.messages.values())[id]
100 |         return self.messages[id]
101 | 
102 |     def __setitem__(self, id: str, message: Message):
103 |         """Set a message in the collection."""
104 |         self.messages[id] = message
105 | 
106 |     def __len__(self) -> int:
107 |         """Get the length of the collection."""
108 |         return len(self.messages)
109 | 
110 |     def __iter__(self):
111 |         """Iterate over the collection."""
112 |         return iter(self.messages.keys())
113 | 
114 |     def __str__(self):
115 |         return f"---\n".join([str(m) for m in self.messages.values()])
116 | 
117 |     def __repr__(self):
118 |         return str(self)
119 | 
120 |     def attachments(self) -> list[str]:
121 |         """Get the list of attachment GUIDs from the messages."""
122 |         return list(set([a for m in self.messages.values() for a in m.attachments]))
123 | 
124 | 
125 | # exports
126 | from ibis_birdbrain.messages.email import Email
127 | 
128 | __all__ = ["Message", "Messages", "Email"]
129 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Quarto
  2 | _output
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | cover/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # poetry
101 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
103 | #   commonly ignored for libraries.
104 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 | 
107 | # pdm
108 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | #   in version control.
112 | #   https://pdm.fming.dev/#use-with-ide
113 | .pdm.toml
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Cython debug symbols
156 | cython_debug/
157 | 
158 | # PyCharm
159 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
162 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | #.idea/
164 | 
165 | # reflex
166 | *.db
167 | *.py[cod]
168 | .web
169 | __pycache__/
170 | 
171 | *.ddb*
172 | data
173 | 
174 | /.quarto/
175 | 


--------------------------------------------------------------------------------
/presentation/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Ibis Birdbrain"
  3 | format:
  4 |   revealjs:
  5 |     footer: <https://ibis-project.org>
  6 |     #preview-links: true
  7 |     incremental: true
  8 |     # https://quarto.org/docs/presentations/revealjs/themes.html#using-themes
  9 |     theme: dark
 10 |     echo: true
 11 |     scrollable: true
 12 | ---
 13 | 
 14 | # why?
 15 | 
 16 | ## Ibis + LLMs = Ibis Birdbrain
 17 | 
 18 | Why would we build this?
 19 | 
 20 | 1. it's cool
 21 | 2. marketing/sales
 22 | 3. marketing/sales
 23 | 
 24 | ## Ibis + LLMs = Ibis Birdbrain
 25 | 
 26 | Why would we build this?
 27 | 
 28 | ::: {.nonincremental}
 29 | 1. it's cool
 30 | 2. marketing/sales (for Ibis)
 31 | 3. marketing/sales (for Theseus)
 32 | :::
 33 | 
 34 | ## language model basics
 35 | 
 36 | (Large) language models are neural networks trained on text to predict the next word in a sequence:
 37 | 
 38 | - text is converted to **tokens** (numbers) so the neural network can understand it
 39 | - providers (e.g. OpenAI) charge based on the number of tokens used
 40 | - output tokens are more expensive than input tokens
 41 | - response time is proportional to the number of tokens used
 42 | - providers have a **system prompt** that instructs the language model on how to behave
 43 | 
 44 | ## text in, text out
 45 | 
 46 | - <span style="color:cyan;">System prompt: You are ChatGPT, a language interface based on the GPT-4 model whose purpose is to...</span>
 47 | - <span style="color:blue;">Additional system prompt: My name is Cody. I want you to concisely respond. My messages will be about...</span>
 48 | - <span style="color:green;">User message 1: Write a new README.md for the Ibis project.</span>
 49 | - <span style="color:purple;">System response 1: Ibis is...</span>
 50 | - <span style="color:green;">User message 2: Adjust it to...</span>
 51 | - <span style="color:purple;">System response 2: Ibis is...</span>
 52 | - <span style="color:green;">User message 3: ...</span>
 53 | 
 54 | ## demo
 55 | 
 56 | [ChatGPT](https://chat.openai.com/)
 57 | 
 58 | ## issues
 59 | 
 60 | - text in, (unstructured) text out
 61 | - expensive
 62 | - slow
 63 | - **PERSONAL OPINION ALERT**: plateau at GPT-4
 64 | - **we need (small) task-specific language models**
 65 | - **we need the system(s) around the language models**
 66 | 
 67 | ## task-specific example: DuckDB-NSQL
 68 | 
 69 | MotherDuck, in collaboration with someone, trained a DuckDB-specific CodeLlama 7B variant:
 70 | 
 71 | > In total, we generate **200k text-to-SQL training data pairs using Mixtral-8x7B-Instruct-v0.1 model for data generation. The data covers 600 different DuckDB scalar, aggregate, and table functions, more than 40 different DuckDB expressions, and 20 DuckDB extensions.**
 72 | > 
 73 | > To train the model, we use the base model of CodeLLama 7B and finetune over our DuckDB training dataset...on **8XA100 80G machine**.
 74 | 
 75 | ## image
 76 | 
 77 | ![](images/duckdb-nsql.png)
 78 | 
 79 | # how?
 80 | 
 81 | ## Marvin
 82 | 
 83 | [Marvin](https://www.askmarvin.ai/welcome/what_is_marvin/) is "The AI Engineering Toolkit" built by Prefect.
 84 | 
 85 | - Pythonic usage of language models
 86 | - only supports OpenAI, but could support any provider
 87 | - very easy to use
 88 | 
 89 | ## Marvin functionality
 90 | 
 91 | Marvin supports:
 92 | 
 93 | - image/audio things (not important for birdbrain)
 94 | - generating synthetic data (not important for birdbrain)
 95 | - casting text to structured Python objects (possibly important for birdbrain)
 96 | - extracting structured Python objects from text (possibly important for birdbrain)
 97 | - generic language model functions (important for birdbrain)
 98 | - classification of text (very important for birdbrain)
 99 | 
100 | ## Classification
101 | 
102 | Marvin uses [the clever logit bias trick](https://twitter.com/AAAzzam/status/1669753721574633473) to classify text with a single output token.
103 | 
104 | ```{.python}
105 | import marvin
106 | 
107 | category = marvin.classify(
108 |     "Ibis Birdbrain is a great product name.", 
109 |     labels=[True, False]
110 | )
111 | category
112 | ```
113 | 
114 | Importantly, **only a single output token is used**. This makes the language model call fast and efficient. We use classification throughout Ibis Birdbrain for decision making.
115 | 
116 | ## Text to SQL: setup
117 | 
118 | ## Text to SQL: method 1
119 | 
120 | We can use Marvin to convert natural language to SQL.
121 | 
122 | ## Text to SQL: method 2
123 | 
124 | # what?
125 | 
126 | # future
127 | 
128 | # questions?
129 | 
130 | # the end
131 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/app.py:
--------------------------------------------------------------------------------
  1 | # imports
  2 | import time
  3 | import ibis
  4 | import inspect
  5 | 
  6 | import streamlit as st
  7 | 
  8 | from dotenv import load_dotenv
  9 | 
 10 | from ibis_birdbrain.bot import Bot
 11 | from ibis_birdbrain.attachments import (
 12 |     SQLAttachment,
 13 |     ErrorAttachment,
 14 |     TextAttachment,
 15 |     WebpageAttachment,
 16 |     TableAttachment,
 17 |     DataAttachment,
 18 |     ChartAttachment,
 19 | )
 20 | 
 21 | # config
 22 | ## load .env
 23 | load_dotenv()
 24 | 
 25 | ## streamlit config
 26 | st.set_page_config(layout="wide")
 27 | 
 28 | ## ibis config
 29 | ibis.options.interactive = True
 30 | ibis.options.repr.interactive.max_rows = 20
 31 | ibis.options.repr.interactive.max_columns = None
 32 | 
 33 | # TODO: move to config.toml or something
 34 | con = ibis.connect("duckdb://app.ddb")
 35 | description = f"""
 36 | This is the IMDB database with a few tables.
 37 | 
 38 | Join them on `tconst`. If asked about `movies`, make sure to filter on `titleType` of `movie`.
 39 | """
 40 | description = inspect.cleandoc(description)
 41 | 
 42 | # ml-powered data bot
 43 | bot = Bot(con=con, data_description=description)
 44 | 
 45 | 
 46 | # functions
 47 | def process_message(message, include_attachments=False):
 48 |     """
 49 |     Process message and attachments into appropriate streamlit component.
 50 |     """
 51 |     results = []
 52 |     results.append(st.markdown(message.body))
 53 |     if include_attachments:
 54 |         if sql_attachment := message.attachments.get_attachment_by_type(SQLAttachment):
 55 |             expander = st.expander(label=a.dialect, expanded=False)
 56 |             results.append(expander.markdown(f"```{sql_attachment.dialect}\n{sql_attachment.open()}"))
 57 | 
 58 |         if table_attachments := message.attachments.get_attachment_by_type(TableAttachment):
 59 |             # only have 1 table
 60 |             results.append(
 61 |                     st.dataframe(
 62 |                         table_attachments[0].open().limit(1000).to_pandas(), use_container_width=True
 63 |                     )
 64 |             )
 65 | 
 66 |         # for attachment in message.attachments:
 67 |         #     a = message.attachments[attachment]  # TODO: hack
 68 |         #     if isinstance(a, SQLAttachment):
 69 |         #         expander = st.expander(label=a.dialect, expanded=False)
 70 |         #         results.append(expander.markdown(f"```{a.dialect}\n{a.open()}"))
 71 |             # elif isinstance(a, TextAttachment):
 72 |             #     results.append(st.markdown(a.open()))
 73 |             # elif isinstance(a, ErrorAttachment):
 74 |             #     results.append(st.markdown(a.open()))
 75 |             # elif isinstance(a, WebpageAttachment):
 76 |             #     results.append(st.markdown(a.open()))  # TODO: better?
 77 |             # elif isinstance(a, DataAttachment):
 78 |             # results.append(st.markdown(a.open()))
 79 |             # elif isinstance(a, TableAttachment):
 80 |             #     results.append(
 81 |             #         st.dataframe(
 82 |             #             a.open().limit(1000).to_pandas(), use_container_width=True
 83 |             #         )
 84 |             #     )
 85 |             # elif isinstance(a, ChartAttachment):
 86 |             #     results.append(st.plotly_chart(a.open(), use_container_width=True))
 87 |             # else:
 88 |             #     results.append(st.markdown("Unknown attachment type"))
 89 | 
 90 |     return results
 91 | 
 92 | 
 93 | # header
 94 | f"""
 95 | # Ibis Birdbrain
 96 | """
 97 | 
 98 | with st.expander(label="data", expanded=False):
 99 |     tables_str = ""
100 |     for table in con.list_tables():
101 |         tables_str += f"- {table} ({str(con.table(table).schema())})\n"
102 | 
103 |     st.markdown(tables_str)
104 | 
105 | # take input
106 | if prompt := st.chat_input("ask birdbrain..."):
107 |     with st.spinner("birdbrain is thinking..."):
108 |         time.sleep(3)
109 |         bot(prompt)
110 | 
111 | # display history
112 | for message in bot.messages:
113 |     # user-to-bot message
114 |     if (
115 |         bot.messages[message].from_address == bot.user_name
116 |         and bot.messages[message].to_address == bot.name
117 |     ):
118 |         with st.chat_message("user"):  # bot.messages[message].from_address):
119 |             process_message(bot.messages[message])
120 |     # bot-to-user message
121 |     elif (
122 |         bot.messages[message].from_address == bot.name
123 |         and bot.messages[message].to_address == bot.user_name
124 |     ):
125 |         with st.chat_message("assistant"):  # bot.messages[message].from_address):
126 |             process_message(bot.messages[message], include_attachments=True)
127 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/attachments/__init__.py:
--------------------------------------------------------------------------------
  1 | # imports
  2 | from collections import defaultdict
  3 | from uuid import uuid4
  4 | from typing import Any, Union, List, Type
  5 | from datetime import datetime
  6 | 
  7 | from ibis.expr.types.relations import Table
  8 | 
  9 | 
 10 | # classes
 11 | class Attachment:
 12 |     """Ibis Birdbrain attachment."""
 13 | 
 14 |     content: Any
 15 |     id: str
 16 |     created_at: datetime
 17 |     name: str | None
 18 |     description: str | None
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         content,
 23 |         name=None,
 24 |         description=None,
 25 |     ):
 26 |         self.id = str(uuid4())
 27 |         self.created_at = datetime.now()
 28 | 
 29 |         self.name = name
 30 |         self.description = description
 31 |         self.content = content
 32 | 
 33 |     def encode(self) -> Table:
 34 |         ...
 35 | 
 36 |     def decode(self, t: Table) -> str:
 37 |         ...
 38 | 
 39 |     def open(self) -> Any:
 40 |         return self.content
 41 | 
 42 |     def __str__(self):
 43 |         return f"""{self.__class__.__name__}
 44 |     **guid**: {self.id}
 45 |     **time**: {self.created_at}
 46 |     **name**: {self.name}
 47 |     **desc**: {self.description}"""
 48 | 
 49 |     def __repr__(self):
 50 |         return str(self)
 51 | 
 52 | 
 53 | class Attachments:
 54 |     """Ibis Birdbrain attachments."""
 55 | 
 56 |     attachments: dict[str, Attachment]
 57 |     type_id_map: dict[Type[Attachment], List[str]]
 58 | 
 59 |     def __init__(self, attachments: list[Attachment] = []) -> None:
 60 |         """Initialize the attachments."""
 61 |         self.attachments = {a.id: a for a in attachments}
 62 |         self.type_id_map = defaultdict(list)
 63 |         for a in attachments:
 64 |             self.type_id_map[type(a)].append(a.id)
 65 | 
 66 | 
 67 |     def add_attachment(self, attachment: Attachment):
 68 |         """Add an attachment to the collection."""
 69 |         self.attachments[attachment.id] = attachment
 70 |         self.type_id_map[type(attachment)].append(attachment.id)
 71 | 
 72 |     def append(self, attachment: Attachment):
 73 |         """Alias for add_attachment."""
 74 |         self.add_attachment(attachment)
 75 | 
 76 |     def extend(self, attachments: Union[List[Attachment], "Attachments"]):
 77 |         """Adds multiple attachments to the collection."""
 78 |         if isinstance(attachments, Attachments):
 79 |             attachments = list(attachments.attachments.values())
 80 |         for attachment in attachments:
 81 |             self.add_attachment(attachment)
 82 | 
 83 |         return self
 84 | 
 85 |     def get_attachment_by_type(self, attachment_type: Type[Attachment]):
 86 |         """Get attachments of a specific type."""
 87 |         if attachment_type not in self.type_id_map:
 88 |             return None
 89 | 
 90 |         ids = self.type_id_map[attachment_type]
 91 |         if not isinstance(self.attachments[ids[0]], TableAttachment):
 92 |             return self.attachments[ids[0]]
 93 | 
 94 |         # One messages may have multiple table attachment
 95 |         attachments = Attachments()
 96 |         for id in ids:
 97 |             attachments.append(self.attachments[id])
 98 |         return attachments
 99 | 
100 |     def __getitem__(self, id: str | int):
101 |         """Get an attachment from the collection."""
102 |         if isinstance(id, int):
103 |             return list(self.attachments.values())[id]
104 |         return self.attachments[id]
105 | 
106 |     def __setitem__(self, id: str, attachment: Attachment):
107 |         """Set an attachment in the collection."""
108 |         self.attachments[id] = attachment
109 |         self.type_id_map[type(attachment)].append(id)
110 | 
111 |     def __len__(self) -> int:
112 |         """Get the length of the collection."""
113 |         return len(self.attachments)
114 | 
115 |     def __iter__(self):
116 |         """Iterate over the collection."""
117 |         return iter(self.attachments.keys())
118 | 
119 |     def __str__(self):
120 |         return "\n\n".join([str(a) for a in self.attachments.values()])
121 | 
122 |     def __repr__(self):
123 |         return str(self)
124 | 
125 | 
126 | # exports
127 | from ibis_birdbrain.attachments.viz import ChartAttachment
128 | from ibis_birdbrain.attachments.data import (
129 |     DataAttachment,
130 |     DatabaseAttachment,
131 |     TableAttachment,
132 | )
133 | from ibis_birdbrain.attachments.text import (
134 |     TextAttachment,
135 |     SQLAttachment,
136 |     ErrorAttachment,
137 |     WebpageAttachment,
138 | )
139 | 
140 | __all__ = [
141 |     "Attachment",
142 |     "Attachments",
143 |     "DataAttachment",
144 |     "DatabaseAttachment",
145 |     "TableAttachment",
146 |     "ChartAttachment",
147 |     "TextAttachment",
148 |     "SQLAttachment",
149 |     "ErrorAttachment",
150 |     "WebpageAttachment",
151 | ]
152 | 


--------------------------------------------------------------------------------
/src/ibis_birdbrain/flows/data.py:
--------------------------------------------------------------------------------
  1 | # imports
  2 | import inspect
  3 | 
  4 | from ibis_birdbrain.flows import Flow
  5 | from ibis_birdbrain.logging import log
  6 | from ibis_birdbrain.messages import Email, Messages
  7 | from ibis_birdbrain.attachments import (
  8 |     TableAttachment,
  9 |     SQLAttachment,
 10 |     ErrorAttachment,
 11 |     Attachments,
 12 |     DatabaseAttachment,
 13 | )
 14 | 
 15 | from ibis_birdbrain.tasks import Tasks
 16 | from ibis_birdbrain.tasks.sql import TextToSQLTask, ExecuteSQLTask, FixSQLTask, SearchTextTask
 17 | 
 18 | 
 19 | # flows
 20 | class DataFlow(Flow):
 21 |     """Ibis Birdbrain data flow."""
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         name: str = "data",
 26 |         description: str = "Ibis Birdbrain data flow",
 27 |         tasks=Tasks([
 28 |             TextToSQLTask(),
 29 |             ExecuteSQLTask(),
 30 |             FixSQLTask(),
 31 |             SearchTextTask(),
 32 |         ]),
 33 |         retries: int = 3,
 34 |     ) -> None:
 35 |         """Initialize the data flow."""
 36 |         self.retries = retries
 37 |         super().__init__(name=name, description=description, tasks=tasks)
 38 | 
 39 |     def __call__(self, messages: Messages) -> Messages:
 40 |         """Execute the data flow."""
 41 |         log.info("Executing the data flow")
 42 | 
 43 |         # TODOs:
 44 |         # - we need to construct the task messages
 45 |         # - the first message in messages has the DatabaseAttachment
 46 |         # - the last message in messages has the TableAttachments
 47 |         # - turn the body of the last message + TableAttachments into a SQLAttachment
 48 |         # - turn the DatabaseAttachment + SQLAttachment into a TableAttachment (or ErrorAttachment)
 49 |         # - finally, return the SQLAttachment + TableAttachment as results
 50 | 
 51 |         database_attachment = messages[0].attachments.get_attachment_by_type(DatabaseAttachment)
 52 |         table_attachments = messages[-1].attachments.get_attachment_by_type(TableAttachment)  
 53 | 
 54 |         # check if question is found in cached table
 55 |         search_task_response = self.tasks["search-cached-question"](messages)
 56 |         sql_attachment = search_task_response.attachments.get_attachment_by_type(SQLAttachment)
 57 |         # initialize response messages
 58 |         response_messages = Messages()
 59 |         if not sql_attachment:
 60 |             # If not existing question and sql found in the cache table
 61 |             # call the text-to-SQL task
 62 |             task_message = Email(
 63 |                 body=messages[-1].body,
 64 |                 attachments=[database_attachment] + list(table_attachments.attachments.values()),
 65 |                 to_address=self.tasks["text-to-SQL"].name,
 66 |                 from_address=self.name,
 67 |             )
 68 | 
 69 |             task_response = self.tasks["text-to-SQL"](task_message)
 70 |             response_messages.append(task_response)
 71 | 
 72 |             # check the response
 73 |             assert task_response.attachments.get_attachment_by_type(SQLAttachment) is not None
 74 | 
 75 |             # extract the SQL attachment
 76 |             sql_attachment = task_response.attachments.get_attachment_by_type(SQLAttachment)
 77 | 
 78 |         # try executing
 79 |         task_message = Email(
 80 |             body="execute this SQL on the database",
 81 |             attachments=[database_attachment, sql_attachment],
 82 |             to_address=self.tasks["execute-SQL"].name,
 83 |             from_address=self.name,
 84 |         )
 85 | 
 86 |         task_response = self.tasks["execute-SQL"](task_message)
 87 |         response_messages.append(task_response)
 88 | 
 89 |         assert len(task_response.attachments) == 2
 90 | 
 91 |         # check the response
 92 |         if task_response.attachments.get_attachment_by_type(TableAttachment):
 93 |             return response_messages
 94 |         elif task_response.attachments.get_attachment_by_type(ErrorAttachment):
 95 |             # for N retries
 96 |             for i in range(self.retries):
 97 |                 error_attachment = task_response.attachments[0]
 98 | 
 99 |                 task_message = Email(
100 |                     body="fix this SQL",
101 |                     attachments=[error_attachment, database_attachment, sql_attachment]
102 |                     + list(table_attachments.attachments.values()),
103 |                     to_address=self.tasks["fix-SQL"].name,
104 |                     from_address=self.name,
105 |                 )
106 | 
107 |                 # fix the SQL
108 |                 task_response = self.tasks["fix-SQL"](task_message)
109 |                 response_messages.append(task_response)
110 | 
111 |                 # get the new sql_attachment
112 |                 sql_attachment = task_response.attachments.get_attachment_by_type(SQLAttachment)
113 | 
114 |                 # try executing
115 |                 task_response = self.tasks["execute-SQL"](
116 |                     Email(attachments=[database_attachment, sql_attachment])
117 |                 )
118 |                 response_messages.append(task_response)
119 | 
120 |                 if isinstance(task_response.attachments[0], TableAttachment):
121 |                     return response_messages
122 |                 elif isinstance(task_response.attachments[0], ErrorAttachment):
123 |                     continue
124 |                 else:
125 |                     raise ValueError
126 |         else:
127 |             raise ValueError
128 | 
129 |         return response_messages
130 | 


--------------------------------------------------------------------------------
/docs/_quarto.yml:
--------------------------------------------------------------------------------
  1 | # project
  2 | project:
  3 |   type: website
  4 | 
  5 | execute:
  6 |   freeze: auto  # re-render only when source changes
  7 | 
  8 | # website
  9 | website:
 10 | 
 11 |   # basics
 12 |   title: "Ibis Birdbrain"
 13 |   site-url: https://ibis-project.github.io
 14 |   site-path: ibis-birdbrain
 15 |   description: "the portable Python ML-powered data bot"
 16 |   favicon: images/logo.png
 17 | 
 18 |   # options
 19 |   reader-mode: false
 20 |   twitter-card: true
 21 |   back-to-top-navigation: true
 22 |   repo-url: https://github.com/ibis-project/ibis-birdbrain
 23 |   repo-actions: [edit, issue]
 24 |   repo-subdir: docs
 25 |   issue-url: https://github.com/ibis-project/ibis-birdbrain/issues/new/choose
 26 | 
 27 |   # footer
 28 |   page-footer:
 29 |     border: false
 30 |     left: ""
 31 |     right:
 32 |       - icon: github
 33 |         href: https://github.com/ibis-project/ibis-birdbrain
 34 | 
 35 |   # search
 36 |   search:
 37 |     location: navbar
 38 |     type: overlay
 39 | 
 40 |   # navbar
 41 |   navbar:
 42 |     logo: images/logo.png
 43 |     tools:
 44 |       - icon: github
 45 |         menu:
 46 |           - text: Source code
 47 |             url: https://github.com/ibis-project/ibis-birdbrain
 48 |           - text: Report an issue
 49 |             url: https://github.com/ibis-project/ibis-birdbrain/issues/new
 50 |           - text: Ask for help
 51 |             url: https://github.com/ibis-project/ibis-birdbrain/discussions/new?category=q-a
 52 | 
 53 |     left:
 54 |       - sidebar:getting-started
 55 |       - sidebar:concepts
 56 |       #- sidebar:how-to
 57 |       #- sidebar:reference
 58 | 
 59 |     # right:
 60 |       #- posts.qmd
 61 |       #- release_notes.md # TODO: release notes
 62 |       #- sidebar:contribute
 63 | 
 64 |   # sidebar
 65 |   sidebar:
 66 |     - id: ""
 67 |     - id: getting-started
 68 |       title: "Getting started"
 69 |       style: "docked"
 70 |       collapse-level: 2
 71 |       contents:
 72 |         - install.qmd
 73 |         - auto: tutorials/*.qmd
 74 |     - id: concepts
 75 |       title: "Concepts"
 76 |       style: "docked"
 77 |       collapse-level: 2
 78 |       contents:
 79 |         - why.qmd
 80 |         - concepts/bots.qmd
 81 |         - concepts/messages.qmd
 82 |         - concepts/attachments.qmd
 83 |         - concepts/flows.qmd
 84 |         - concepts/tasks.qmd
 85 |     - id: how-to
 86 |       title: "How-to"
 87 |       style: "docked"
 88 |       collapse-level: 2
 89 |       contents:
 90 |         - auto: "how-to/input-output"
 91 |     - id: contribute
 92 |       title: "Contribute"
 93 |       style: "docked"
 94 |       collapse-level: 2
 95 |       contents:
 96 |         - auto: contribute
 97 |           #- id: reference
 98 |           #  title: "Reference"
 99 |           #  style: "docked"
100 |           #  collapse-level: 2
101 |           #  contents:
102 |           #    - section: Tools
103 |           #      contents:
104 |           #        - reference/code.qmd
105 |           #        - reference/filesystem.qmd
106 | 
107 | # format
108 | format:
109 |   html:
110 |     #theme: vapor
111 |     theme:
112 |       dark: darkly
113 |       light: darkly
114 |       #dark: vapor
115 |       #light: vapor
116 |     #fontcolor: fuchsia
117 |     #linkcolor: fuchsia
118 |     css: styles.css
119 |     toc: true
120 | 
121 | # resources
122 | resources:
123 |   - CNAME
124 | 
125 |     #quartodoc:
126 |     #  package: ibis_birdbrain
127 |     #  title: Reference
128 |     #  sidebar: reference/_sidebar.yml
129 |     #  dynamic: true
130 |     #  render_interlinks: false
131 |     #  options:
132 |     #    member_options:
133 |     #      signature_name: short
134 |     #  sections:
135 |     #    - title: Filesystem tools
136 |     #      desc: Tools for the filesystem
137 |     #      package: ibis_birdbrain.tools.filesystem
138 |     #      contents:
139 |     #      - kind: page
140 |     #        path: filesystem
141 |     #        summary:
142 |     #          name: filesystem
143 |     #          desc: Filesystem tools
144 |     #        contents:
145 |     #          - read_file
146 |     #          - list_files_and_dirs
147 |     #          - write_file
148 |     #    - title: EDA tools
149 |     #      desc: Tools for EDA
150 |     #      package: ibis_birdbrain.tools.eda
151 |     #      contents:
152 |     #        - kind: page
153 |     #          path: eda
154 |     #          summary:
155 |     #            name: eda
156 |     #            desc: EDA tools
157 |     #          contents:
158 |     #            - read_excel_file
159 |     #            - read_delta_table
160 |     #            - list_tables
161 |     #            - get_table_schemas
162 |     #    - title: Code tools
163 |     #      desc: Tools for code
164 |     #      package: ibis_birdbrain.tools.code
165 |     #      contents:
166 |     #      - kind: page
167 |     #        path: code
168 |     #        summary:
169 |     #          name: code
170 |     #          desc: Code tools
171 |     #        contents:
172 |     #          - text_to_python
173 |     #          - fix_python_error
174 |     #          - python_function_to_udf
175 |     #          - run_python_code
176 |     #    - title: Internet tools
177 |     #      desc: Tools for the Internet
178 |     #      package: ibis_birdbrain.tools.internet
179 |     #      contents:
180 |     #      - kind: page
181 |     #        path: internet
182 |     #        summary:
183 |     #          name: internet
184 |     #          desc: Internet tools
185 |     #        contents:
186 |     #          - search_internet
187 |     #          - webpage_to_str
188 |     #          - open_browser
189 |     #
190 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt2/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Computations and control flow: it's just programming"
  3 | author: "Cody Peterson"
  4 | date: "2023-10-14"
  5 | categories:
  6 |     - "LLMs and data"
  7 | ---
  8 | 
  9 | ## Introduction
 10 | 
 11 | The recent Generative AI hype cycle has led to a lot of new terminology to
 12 | understand. In this post, we'll cover some key concepts from the groud up and
 13 | explain the basics of working with LLMs in the context of data.
 14 | 
 15 | This post assumes [basic familiarity with Marvin and Ibis](../llms-and-data-pt0)
 16 | and [three approaches to applying LLMs to data](../llms-and-data-pt1).
 17 | 
 18 | ```{python}
 19 | # | code-fold: true
 20 | import ibis  # <1>
 21 | import marvin  # <1>
 22 | 
 23 | from dotenv import load_dotenv  # <1>
 24 | 
 25 | load_dotenv()  # <2>
 26 | 
 27 | con = ibis.connect("duckdb://penguins.ddb")  # <3>
 28 | t = ibis.examples.penguins.fetch()  # <3>
 29 | t = con.create_table("penguins", t.to_pyarrow(), overwrite=True)  # <3>
 30 | ```
 31 | 
 32 | 1. Import the libraries we need.
 33 | 2. Load the environment variable to setup Marvin to call our OpenAI account.
 34 | 3. Setup the demo datain an Ibis backend.
 35 | 
 36 | First, we'll setup Ibis and Marvin with some simple example data:
 37 | 
 38 | ```{python}
 39 | import ibis  # <1>
 40 | import marvin  # <1>
 41 | 
 42 | from ibis.expr.schema import Schema  # <1>
 43 | from ibis.expr.types.relations import Table  # <1>
 44 | 
 45 | ibis.options.interactive = True  # <2>
 46 | marvin.settings.llm_model = "openai/gpt-4"  # <2>
 47 | 
 48 | con = ibis.connect("duckdb://penguins.ddb")  # <3>
 49 | t = con.table("penguins")  # <3>
 50 | ```
 51 | 
 52 | 1. Import Ibis and Marvin.
 53 | 2. Configure Ibis (interactive) and Marvin (GPT-4).
 54 | 3. Connect to the data and load a table into a variable.
 55 | 
 56 | ## Context
 57 | 
 58 | Context is a fancy way of talking about the input to a LLM.
 59 | 
 60 | ## Calls
 61 | 
 62 | We make calls with inputs to functions or systems and get outputs. We can think
 63 | of calling the LLM with our input (context) and getting an output (text).
 64 | 
 65 | ## Computations
 66 | 
 67 | A function or system often computes something. We can be pedantic about calls
 68 | versus computations, but in general the connotation around computations is more
 69 | time and resource intensive than a call. At the end of the day, they will both
 70 | take some computer cycles.
 71 | 
 72 | ## Retrieval augmented generation (RAG)
 73 | 
 74 | Instead of you typing out context for the bot, we can **retrieve** context from
 75 | somewhere, **augment** our strings sent to the bot with this context, and then
 76 | **generate** a response from the bot.
 77 | 
 78 | As a contrived example, instead of saying "The capitol of foo is bar", we can
 79 | retrieve the capitol of foo from a database, augment it with our context, and
 80 | then generate a response from the bot. You may notice that [we already did this
 81 | in the firt post in the series -- let's review that code
 82 | again](../llms-and-data-pt0):
 83 | 
 84 | ```{python}
 85 | from ibis.expr.schema import Schema
 86 | from ibis.expr.types.relations import Table
 87 | 
 88 | 
 89 | @marvin.ai_fn
 90 | def sql_select(
 91 |     text: str, table_name: str = t.get_name(), schema: Schema = t.schema()
 92 | ) -> str:
 93 |     """writes the SQL SELECT statement to query the table according to the text"""
 94 | 
 95 | 
 96 | query = "the unique combination of species and islands"
 97 | sql = sql_select(query).strip(";")
 98 | sql
 99 | ```
100 | 
101 | Notice that we **retrieved** the table name and schema with calls to the Ibis
102 | table (`t.get_name()` and `t.schema()`). We then **augment** our context (the
103 | query in natural language) with this information and **generate** a response
104 | from the bot.
105 | 
106 | This works reasonably well for simple SQL queries:
107 | 
108 | ```{python}
109 | t.sql(sql)
110 | ```
111 | 
112 | I would argue in this case there wasn't any real **computation** done by our
113 | **calls** to the Ibis table -- we were just retrieving some relatively static
114 | metadata -- but we could have done some more complex computations (on any of 18+
115 | data platforms).
116 | 
117 | ## Thought leadership
118 | 
119 | TODO: human rewrite
120 | 
121 | In the realm of Generative AI, particularly when working with Language Learning
122 | Models (LLMs), understanding the concept of 'context' is crucial. Context, in this
123 | domain, refers to the inputs that are fed into an LLM, and the corresponding
124 | outputs they generate. This post breaks down the complexities of this process into
125 | understandable fragments, including retrieval of context, its augmentation, and,
126 | thereafter, the generation of a response.
127 | 
128 | An illustrative example is provided, showcasing a database interaction. It
129 | demonstrates how the data retrieved can be used to augment the context before the
130 | bot generates a response. This valuable insight underlines the practical
131 | application of the theory, reinforcing the understanding of the readers.
132 | 
133 | We also venture into the difference between simple static metadata retrieval and
134 | the more intricate computations. This distinction echoes the breadth and depth of
135 | the processes involved in Generative AI.
136 | 
137 | As we continue to explore and unravel the potential of Generative AI and LLMs,
138 | this post serves as a fundamental building block. It creates a pathway for
139 | enthusiasts and professionals alike to delve deeper into this exciting field. By
140 | breaking down complex concepts into comprehensible segments, it fosters an
141 | environment of learning and growth.
142 | 
143 | This marks just the beginning of our journey into the world of Generative AI. As
144 | we dig deeper, we will continue to explore, learn and share with our readers. Stay
145 | tuned for more insightful content in this series. [1]
146 | 
147 | [1] https://github.com/ibis-project/ibis-birdbrain
148 | 
149 | ## Next steps
150 | 
151 | You can get involved with [Ibis
152 | Birdbrain](https://github.com/ibis-project/ibis-birdbrain), our open-source data
153 | & AI project for building next-generation natural language interfaces to data.
154 | 
155 | [Read the next post in this series](../llms-and-data-pt3).
156 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt3/index.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "raw",
  5 |       "metadata": {},
  6 |       "source": [
  7 |         "---\n",
  8 |         "title: \"LLMs and data: modular and composable best practices\"\n",
  9 |         "author: \"Cody Peterson\"\n",
 10 |         "date: \"2023-10-03\"\n",
 11 |         "freeze: auto\n",
 12 |         "categories:\n",
 13 |         "    - blog\n",
 14 |         "    - ai\n",
 15 |         "    - llm\n",
 16 |         "---"
 17 |       ],
 18 |       "id": "56691f4e"
 19 |     },
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {},
 23 |       "source": [
 24 |         "## Introduction\n"
 25 |       ],
 26 |       "id": "f55bf8d3"
 27 |     },
 28 |     {
 29 |       "cell_type": "code",
 30 |       "metadata": {},
 31 |       "source": [
 32 |         "# | code-fold: true\n",
 33 |         "import ibis  # <1>\n",
 34 |         "import marvin  # <1>\n",
 35 |         "\n",
 36 |         "from dotenv import load_dotenv  # <1>\n",
 37 |         "\n",
 38 |         "load_dotenv()  # <2>\n",
 39 |         "\n",
 40 |         "con = ibis.connect(\"duckdb://penguins.ddb\")  # <3>\n",
 41 |         "t = ibis.examples.penguins.fetch()  # <3>\n",
 42 |         "t = con.create_table(\"penguins\", t.to_pyarrow(), overwrite=True)  # <3>"
 43 |       ],
 44 |       "id": "60b0d282",
 45 |       "execution_count": null,
 46 |       "outputs": []
 47 |     },
 48 |     {
 49 |       "cell_type": "markdown",
 50 |       "metadata": {},
 51 |       "source": [
 52 |         "1. Import the libraries we need.\n",
 53 |         "2. Load the environment variable to setup Marvin to call our OpenAI account.\n",
 54 |         "3. Setup the demo datain an Ibis backend.\n"
 55 |       ],
 56 |       "id": "8d246789"
 57 |     },
 58 |     {
 59 |       "cell_type": "code",
 60 |       "metadata": {},
 61 |       "source": [
 62 |         "import ibis  # <1>\n",
 63 |         "import marvin  # <1>\n",
 64 |         "\n",
 65 |         "from ibis.expr.schema import Schema  # <1>\n",
 66 |         "from ibis.expr.types.relations import Table  # <1>\n",
 67 |         "\n",
 68 |         "ibis.options.interactive = True  # <2>\n",
 69 |         "marvin.settings.llm_model = \"openai/gpt-4\"  # <2>\n",
 70 |         "\n",
 71 |         "con = ibis.connect(\"duckdb://penguins.ddb\")  # <3>\n",
 72 |         "t = con.table(\"penguins\")  # <3>"
 73 |       ],
 74 |       "id": "7bf8f9ee",
 75 |       "execution_count": null,
 76 |       "outputs": []
 77 |     },
 78 |     {
 79 |       "cell_type": "markdown",
 80 |       "metadata": {},
 81 |       "source": [
 82 |         "1. Import Ibis and Marvin.\n",
 83 |         "2. Configure Ibis (interactive) and Marvin (GPT-4).\n",
 84 |         "3. Connect to the data and load a table into a variable.\n"
 85 |       ],
 86 |       "id": "d38a0eb1"
 87 |     },
 88 |     {
 89 |       "cell_type": "code",
 90 |       "metadata": {},
 91 |       "source": [
 92 |         "@marvin.ai_fn  # <1>\n",
 93 |         "def _generate_sql_select(\n",
 94 |         "    text: str, table_name: str, table_schema: Schema\n",
 95 |         ") -> str:  # <1>\n",
 96 |         "    \"\"\"Generate SQL SELECT from text.\"\"\"  # <1>\n",
 97 |         "\n",
 98 |         "\n",
 99 |         "def sql_from_text(text: str, t: Table) -> Table:  # <2>\n",
100 |         "    \"\"\"Run SQL from text.\"\"\"  # <2>\n",
101 |         "    return t.sql(_generate_sql_select(text, t.get_name(), t.schema()).strip(\";\"))  # <2>"
102 |       ],
103 |       "id": "e28e6dcd",
104 |       "execution_count": null,
105 |       "outputs": []
106 |     },
107 |     {
108 |       "cell_type": "markdown",
109 |       "metadata": {},
110 |       "source": [
111 |         "1. A non-deterministic, LLM-powered AI function.\n",
112 |         "2. A deterministic, human-authored function that calls the AI function.\n"
113 |       ],
114 |       "id": "989a0f14"
115 |     },
116 |     {
117 |       "cell_type": "code",
118 |       "metadata": {},
119 |       "source": [
120 |         "t2 = sql_from_text(\"the unique combination of species and islands\", t)\n",
121 |         "t2"
122 |       ],
123 |       "id": "286ac581",
124 |       "execution_count": null,
125 |       "outputs": []
126 |     },
127 |     {
128 |       "cell_type": "code",
129 |       "metadata": {},
130 |       "source": [
131 |         "t3 = sql_from_text(\n",
132 |         "    \"the unique combination of species and islands, with their counts, ordered from highest to lowest, and name that column just 'count'\",\n",
133 |         "    t,\n",
134 |         ")\n",
135 |         "t3"
136 |       ],
137 |       "id": "7acb6628",
138 |       "execution_count": null,
139 |       "outputs": []
140 |     },
141 |     {
142 |       "cell_type": "markdown",
143 |       "metadata": {},
144 |       "source": [
145 |         "## Summary\n",
146 |         "\n",
147 |         "To summarize this post:\n"
148 |       ],
149 |       "id": "ac4c3dd8"
150 |     },
151 |     {
152 |       "cell_type": "code",
153 |       "metadata": {},
154 |       "source": [
155 |         "from rich import print\n",
156 |         "\n",
157 |         "with open(\"index.qmd\", \"r\") as f:\n",
158 |         "    self_text = f.read()\n",
159 |         "\n",
160 |         "# save some money and avoid rate limiting\n",
161 |         "marvin.settings.llm_model = \"openai/gpt-3.5-turbo-16k\"  # <2>\n",
162 |         "\n",
163 |         "@marvin.ai_model\n",
164 |         "class Summary(BaseModel):\n",
165 |         "    \"\"\"Summary of text.\"\"\"\n",
166 |         "\n",
167 |         "    summary_line: str = Field(..., description=\"The one-line summary of the text.\")\n",
168 |         "    summary_paragraph: str = Field(\n",
169 |         "        ..., description=\"The one-paragraph summary of the text.\"\n",
170 |         "    )\n",
171 |         "    conclusion: str = Field(\n",
172 |         "        ..., description=\"The conclusion the reader should draw from the text.\"\n",
173 |         "    )\n",
174 |         "    key_points: list[str] = Field(..., description=\"The key points of the text.\")\n",
175 |         "    critiques: list[str] = Field(\n",
176 |         "        ..., description=\"Professional, fair critiques of the text.\"\n",
177 |         "    )\n",
178 |         "    suggested_improvements: list[str] = Field(\n",
179 |         "        ..., description=\"Suggested improvements for the text.\"\n",
180 |         "    )\n",
181 |         "    sentiment: float = Field(..., description=\"The sentiment of the text.\")\n",
182 |         "    sentiment_label: str = Field(..., description=\"The sentiment label of the text.\")\n",
183 |         "    author_bias: str = Field(..., description=\"The author bias of the text.\")\n",
184 |         "\n",
185 |         "\n",
186 |         "print(Summary(self_text))"
187 |       ],
188 |       "id": "d1dddf9f",
189 |       "execution_count": null,
190 |       "outputs": []
191 |     },
192 |     {
193 |       "cell_type": "markdown",
194 |       "metadata": {},
195 |       "source": [
196 |         "## Next steps\n",
197 |         "\n",
198 |         "You can get involved with [Ibis\n",
199 |         "Birdbrain](https://github.com/ibis-project/ibis-birdbrain), our open-source data\n",
200 |         "& AI project for building next-generation natural language interfaces to data.\n",
201 |         "\n",
202 |         "## Discussions"
203 |       ],
204 |       "id": "471587d8"
205 |     }
206 |   ],
207 |   "metadata": {
208 |     "kernelspec": {
209 |       "name": "python3",
210 |       "language": "python",
211 |       "display_name": "Python 3 (ipykernel)"
212 |     }
213 |   },
214 |   "nbformat": 4,
215 |   "nbformat_minor": 5
216 | }


--------------------------------------------------------------------------------
/src/ibis_birdbrain/bot.py:
--------------------------------------------------------------------------------
  1 | # imports
  2 | import ibis
  3 | import sqlglot as sg
  4 | 
  5 | from uuid import uuid4
  6 | from typing import Any
  7 | from datetime import datetime
  8 | 
  9 | from ibis.backends.base import BaseBackend
 10 | 
 11 | from ibis_birdbrain.logging import log
 12 | from ibis_birdbrain.attachments import (
 13 |     Attachments,
 14 |     TableAttachment,
 15 |     DatabaseAttachment,
 16 |     SQLAttachment,
 17 | )
 18 | from ibis_birdbrain.flows import Flows
 19 | from ibis_birdbrain.strings import bot_description
 20 | from ibis_birdbrain.messages import Message, Messages, Email
 21 | from ibis_birdbrain.utils.strings import shorten_str
 22 | from ibis_birdbrain.utils.attachments import to_attachments
 23 | from ibis_birdbrain.tasks.sql import ExecuteSQLTask
 24 | 
 25 | from ibis_birdbrain.flows.data import DataFlow
 26 | 
 27 | 
 28 | # bot
 29 | class Bot:
 30 |     """Ibis Birdbrain bot."""
 31 | 
 32 |     id: str
 33 |     created_at: datetime
 34 | 
 35 |     con: BaseBackend
 36 |     name: str
 37 |     user_name: str
 38 |     description: str
 39 |     data_description: str
 40 |     version: str
 41 |     messages: Messages
 42 |     source_table_attachments: Attachments
 43 |     flows: Flows
 44 |     lm_response: bool
 45 |     conversational: bool
 46 | 
 47 |     current_subject: str
 48 | 
 49 |     def __init__(
 50 |         self,
 51 |         con=ibis.connect("duckdb://"),
 52 |         cached_table=None,
 53 |         name="birdbrain",
 54 |         user_name="user",
 55 |         description=bot_description,
 56 |         data_description="",
 57 |         version="infinity",
 58 |         messages=Messages(),
 59 |         source_table_attachments=None,
 60 |         flows=Flows([DataFlow()]),
 61 |         lm_response=False,
 62 |         conversational=False,
 63 |     ) -> None:
 64 |         """Initialize the bot."""
 65 |         self.id = uuid4()
 66 |         self.created_at = datetime.now()
 67 | 
 68 |         self.con = con
 69 |         self.name = name
 70 |         self.user_name = user_name
 71 |         self.description = description
 72 |         self.data_description = data_description
 73 |         self.version = version
 74 |         self.messages = messages
 75 |         self.flows = flows
 76 | 
 77 |         self.lm_response = lm_response
 78 |         self.conversational = conversational
 79 | 
 80 |         self.current_subject = ""
 81 | 
 82 |         source_table_attachments = Attachments()
 83 |         for table in con.list_tables():
 84 |             a = TableAttachment(con.table(table))
 85 |             source_table_attachments.append(a)
 86 | 
 87 |         self.source_table_attachments = source_table_attachments
 88 | 
 89 |         # TODO: add flows to the description/body
 90 |         body = """TODO"""  # noqa
 91 | 
 92 |         # system initialization message
 93 |         attachments = [
 94 |                 DatabaseAttachment(con, description=self.data_description),
 95 |         ]
 96 | 
 97 |         if cached_table is not None:
 98 |             attachments.append(
 99 |                 TableAttachment(cached_table)
100 |             )    
101 |         system_message = Email(
102 |             body=self.description,
103 |             subject="system initialization",
104 |             to_address=self.name,
105 |             from_address=self.name,
106 |             attachments=attachments,
107 |         )
108 |         self.messages.append(system_message)
109 |         log.info(f"Bot {self.name} initialized...")
110 | 
111 |     def __call__(
112 |         self,
113 |         text: str = "Who are you and what can you do?",
114 |         stuff: list[Any] = [],
115 |     ) -> Message:
116 |         """Call upon the bot."""
117 | 
118 |         log.info(f"Bot {self.name} called with text: {text}")
119 | 
120 |         # convert user input to message
121 |         if self.current_subject == "":
122 |             self.current_subject = shorten_str(text)
123 | 
124 |         input_attachments = to_attachments(stuff)
125 | 
126 |         input_message = Email(
127 |             body=text,
128 |             subject=self.current_subject,
129 |             to_address=self.name,
130 |             from_address=self.user_name,
131 |             attachments=input_attachments,
132 |         )
133 | 
134 |         # add message to messages
135 |         self.messages.append(input_message)
136 | 
137 |         # if conversational, use all messages
138 |         if self.conversational:
139 |             flow_messages = self.messages
140 |         else:
141 |             flow_messages = Messages([self.messages[0], self.messages[-1]])
142 | 
143 |         # select the flow
144 |         flow = self.flows.select_flow(flow_messages)
145 | 
146 |         # TODO: slight hack
147 |         if flow.name == "data":
148 |             flow_messages[-1].attachments.extend(self.source_table_attachments)
149 | 
150 |         # execute the flow
151 |         result_messages = flow(flow_messages)
152 | 
153 |         # extend the messages
154 |         self.messages.extend(result_messages)
155 | 
156 |         # generate the response
157 |         response_attachments = Attachments()
158 |         # TODO: smarter here
159 |         for message in result_messages:
160 |             response_attachments.extend(result_messages[message].attachments)
161 |         response_message = Email(
162 |             body="TODO",  # TODO: generate body from attachments
163 |             subject=self.current_subject,
164 |             to_address=self.user_name,
165 |             from_address=self.name,
166 |             attachments=response_attachments,
167 |         )
168 | 
169 |         # update the response body
170 |         if self.lm_response:
171 |             response_message.body = self.respond(self.messages)
172 |         else:
173 |             response_message.body = "Ibis Birdbrain has attached the results."
174 | 
175 |         # add the response to the messages
176 |         self.messages.append(response_message)
177 | 
178 |         # return the response
179 |         self.messages[-1].to_address = self.user_name
180 |         self.messages[-1].from_address = self.name
181 |         return self.messages[-1]
182 | 
183 |     def respond(self, messages: Messages) -> Message:
184 |         """Respond to the messages."""
185 |         ...
186 | 
187 |     def transpile_sql(self, sql: str, dialect_from: str, dialect_to: str) -> str:
188 |         """Translate SQL from one dialect to another."""
189 |         
190 |         return sg.transpile(
191 |             sql=sql,
192 |             read=dialect_from,
193 |             write=dialect_to,
194 |             identity=False,
195 |             pretty=True,
196 |         )[0]
197 | 
198 |     def execute_last_sql(self, con: BaseBackend) -> Message:
199 |         """Execute the last successfully executed SQL statement."""
200 | 
201 |         sql_attachment = None
202 |         for m in reversed(self.messages):
203 |             if m.attachments.get_attachment_by_type(SQLAttachment) and m.attachments.get_attachment_by_type(TableAttachment):
204 |                 sql_attachment = m.attachments.get_attachment_by_type(SQLAttachment)
205 |                 break
206 | 
207 |         if sql_attachment:
208 |             database_attachment = DatabaseAttachment(con)
209 |             task_message = Email(
210 |                 body="execute this SQL on the {con.name}",
211 |                 attachments=[database_attachment, sql_attachment],
212 |                 to_address="execute-SQL",
213 |                 from_address=self.name,
214 |             )
215 |             return ExecuteSQLTask("execute-SQL")(task_message)
216 | 
217 | 
218 |         return Email(
219 |                 body=f"No Sql query executed",
220 |                 to_address=self.messages[-1].from_address,
221 |                 from_address=self.name,
222 |             )


--------------------------------------------------------------------------------
/docs/_freeze/site_libs/quarto-listing/quarto-listing.js:
--------------------------------------------------------------------------------
  1 | const kProgressiveAttr = "data-src";
  2 | let categoriesLoaded = false;
  3 | 
  4 | window.quartoListingCategory = (category) => {
  5 |   if (categoriesLoaded) {
  6 |     activateCategory(category);
  7 |     setCategoryHash(category);
  8 |   }
  9 | };
 10 | 
 11 | window["quarto-listing-loaded"] = () => {
 12 |   // Process any existing hash
 13 |   const hash = getHash();
 14 | 
 15 |   if (hash) {
 16 |     // If there is a category, switch to that
 17 |     if (hash.category) {
 18 |       activateCategory(hash.category);
 19 |     }
 20 |     // Paginate a specific listing
 21 |     const listingIds = Object.keys(window["quarto-listings"]);
 22 |     for (const listingId of listingIds) {
 23 |       const page = hash[getListingPageKey(listingId)];
 24 |       if (page) {
 25 |         showPage(listingId, page);
 26 |       }
 27 |     }
 28 |   }
 29 | 
 30 |   const listingIds = Object.keys(window["quarto-listings"]);
 31 |   for (const listingId of listingIds) {
 32 |     // The actual list
 33 |     const list = window["quarto-listings"][listingId];
 34 | 
 35 |     // Update the handlers for pagination events
 36 |     refreshPaginationHandlers(listingId);
 37 | 
 38 |     // Render any visible items that need it
 39 |     renderVisibleProgressiveImages(list);
 40 | 
 41 |     // Whenever the list is updated, we also need to
 42 |     // attach handlers to the new pagination elements
 43 |     // and refresh any newly visible items.
 44 |     list.on("updated", function () {
 45 |       renderVisibleProgressiveImages(list);
 46 |       setTimeout(() => refreshPaginationHandlers(listingId));
 47 | 
 48 |       // Show or hide the no matching message
 49 |       toggleNoMatchingMessage(list);
 50 |     });
 51 |   }
 52 | };
 53 | 
 54 | window.document.addEventListener("DOMContentLoaded", function (_event) {
 55 |   // Attach click handlers to categories
 56 |   const categoryEls = window.document.querySelectorAll(
 57 |     ".quarto-listing-category .category"
 58 |   );
 59 | 
 60 |   for (const categoryEl of categoryEls) {
 61 |     const category = categoryEl.getAttribute("data-category");
 62 |     categoryEl.onclick = () => {
 63 |       activateCategory(category);
 64 |       setCategoryHash(category);
 65 |     };
 66 |   }
 67 | 
 68 |   // Attach a click handler to the category title
 69 |   // (there should be only one, but since it is a class name, handle N)
 70 |   const categoryTitleEls = window.document.querySelectorAll(
 71 |     ".quarto-listing-category-title"
 72 |   );
 73 |   for (const categoryTitleEl of categoryTitleEls) {
 74 |     categoryTitleEl.onclick = () => {
 75 |       activateCategory("");
 76 |       setCategoryHash("");
 77 |     };
 78 |   }
 79 | 
 80 |   categoriesLoaded = true;
 81 | });
 82 | 
 83 | function toggleNoMatchingMessage(list) {
 84 |   const selector = `#${list.listContainer.id} .listing-no-matching`;
 85 |   const noMatchingEl = window.document.querySelector(selector);
 86 |   if (noMatchingEl) {
 87 |     if (list.visibleItems.length === 0) {
 88 |       noMatchingEl.classList.remove("d-none");
 89 |     } else {
 90 |       if (!noMatchingEl.classList.contains("d-none")) {
 91 |         noMatchingEl.classList.add("d-none");
 92 |       }
 93 |     }
 94 |   }
 95 | }
 96 | 
 97 | function setCategoryHash(category) {
 98 |   setHash({ category });
 99 | }
100 | 
101 | function setPageHash(listingId, page) {
102 |   const currentHash = getHash() || {};
103 |   currentHash[getListingPageKey(listingId)] = page;
104 |   setHash(currentHash);
105 | }
106 | 
107 | function getListingPageKey(listingId) {
108 |   return `${listingId}-page`;
109 | }
110 | 
111 | function refreshPaginationHandlers(listingId) {
112 |   const listingEl = window.document.getElementById(listingId);
113 |   const paginationEls = listingEl.querySelectorAll(
114 |     ".pagination li.page-item:not(.disabled) .page.page-link"
115 |   );
116 |   for (const paginationEl of paginationEls) {
117 |     paginationEl.onclick = (sender) => {
118 |       setPageHash(listingId, sender.target.getAttribute("data-i"));
119 |       showPage(listingId, sender.target.getAttribute("data-i"));
120 |       return false;
121 |     };
122 |   }
123 | }
124 | 
125 | function renderVisibleProgressiveImages(list) {
126 |   // Run through the visible items and render any progressive images
127 |   for (const item of list.visibleItems) {
128 |     const itemEl = item.elm;
129 |     if (itemEl) {
130 |       const progressiveImgs = itemEl.querySelectorAll(
131 |         `img[${kProgressiveAttr}]`
132 |       );
133 |       for (const progressiveImg of progressiveImgs) {
134 |         const srcValue = progressiveImg.getAttribute(kProgressiveAttr);
135 |         if (srcValue) {
136 |           progressiveImg.setAttribute("src", srcValue);
137 |         }
138 |         progressiveImg.removeAttribute(kProgressiveAttr);
139 |       }
140 |     }
141 |   }
142 | }
143 | 
144 | function getHash() {
145 |   // Hashes are of the form
146 |   // #name:value|name1:value1|name2:value2
147 |   const currentUrl = new URL(window.location);
148 |   const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined;
149 |   return parseHash(hashRaw);
150 | }
151 | 
152 | const kAnd = "&";
153 | const kEquals = "=";
154 | 
155 | function parseHash(hash) {
156 |   if (!hash) {
157 |     return undefined;
158 |   }
159 |   const hasValuesStrs = hash.split(kAnd);
160 |   const hashValues = hasValuesStrs
161 |     .map((hashValueStr) => {
162 |       const vals = hashValueStr.split(kEquals);
163 |       if (vals.length === 2) {
164 |         return { name: vals[0], value: vals[1] };
165 |       } else {
166 |         return undefined;
167 |       }
168 |     })
169 |     .filter((value) => {
170 |       return value !== undefined;
171 |     });
172 | 
173 |   const hashObj = {};
174 |   hashValues.forEach((hashValue) => {
175 |     hashObj[hashValue.name] = decodeURIComponent(hashValue.value);
176 |   });
177 |   return hashObj;
178 | }
179 | 
180 | function makeHash(obj) {
181 |   return Object.keys(obj)
182 |     .map((key) => {
183 |       return `${key}${kEquals}${obj[key]}`;
184 |     })
185 |     .join(kAnd);
186 | }
187 | 
188 | function setHash(obj) {
189 |   const hash = makeHash(obj);
190 |   window.history.pushState(null, null, `#${hash}`);
191 | }
192 | 
193 | function showPage(listingId, page) {
194 |   const list = window["quarto-listings"][listingId];
195 |   if (list) {
196 |     list.show((page - 1) * list.page + 1, list.page);
197 |   }
198 | }
199 | 
200 | function activateCategory(category) {
201 |   // Deactivate existing categories
202 |   const activeEls = window.document.querySelectorAll(
203 |     ".quarto-listing-category .category.active"
204 |   );
205 |   for (const activeEl of activeEls) {
206 |     activeEl.classList.remove("active");
207 |   }
208 | 
209 |   // Activate this category
210 |   const categoryEl = window.document.querySelector(
211 |     `.quarto-listing-category .category[data-category='${category}'`
212 |   );
213 |   if (categoryEl) {
214 |     categoryEl.classList.add("active");
215 |   }
216 | 
217 |   // Filter the listings to this category
218 |   filterListingCategory(category);
219 | }
220 | 
221 | function filterListingCategory(category) {
222 |   const listingIds = Object.keys(window["quarto-listings"]);
223 |   for (const listingId of listingIds) {
224 |     const list = window["quarto-listings"][listingId];
225 |     if (list) {
226 |       if (category === "") {
227 |         // resets the filter
228 |         list.filter();
229 |       } else {
230 |         // filter to this category
231 |         list.filter(function (item) {
232 |           const itemValues = item.values();
233 |           if (itemValues.categories !== null) {
234 |             const categories = itemValues.categories.split(",");
235 |             return categories.includes(category);
236 |           } else {
237 |             return false;
238 |           }
239 |         });
240 |       }
241 |     }
242 |   }
243 | }
244 | 


--------------------------------------------------------------------------------
/docs/_freeze/posts/llms-and-data-pt2/index/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "d58aef84abe91176621bc06047c25c39",
 3 |   "result": {
 4 |     "markdown": "---\ntitle: \"Computations and control flow: it's just programming\"\nauthor: \"Cody Peterson\"\ndate: \"2023-10-14\"\ncategories:\n    - \"LLMs and data\"\n---\n\n## Introduction\n\nThe recent Generative AI hype cycle has led to a lot of new terminology to\nunderstand. In this post, we'll cover some key concepts from the groud up and\nexplain the basics of working with LLMs in the context of data.\n\nThis post assumes [basic familiarity with Marvin and Ibis](../llms-and-data-pt0)\nand [three approaches to applying LLMs to data](../llms-and-data-pt1).\n\n::: {#675d651b .cell execution_count=1}\n``` {.python .cell-code code-fold=\"true\"}\nimport ibis  # <1>\nimport marvin  # <1>\n\nfrom dotenv import load_dotenv  # <1>\n\nload_dotenv()  # <2>\n\ncon = ibis.connect(\"duckdb://penguins.ddb\")  # <3>\nt = ibis.examples.penguins.fetch()  # <3>\nt = con.create_table(\"penguins\", t.to_pyarrow(), overwrite=True)  # <3>\n```\n:::\n\n\n1. Import the libraries we need.\n2. Load the environment variable to setup Marvin to call our OpenAI account.\n3. Setup the demo datain an Ibis backend.\n\nFirst, we'll setup Ibis and Marvin with some simple example data:\n\n::: {#297c21b2 .cell execution_count=2}\n``` {.python .cell-code}\nimport ibis  # <1>\nimport marvin  # <1>\n\nfrom ibis.expr.schema import Schema  # <1>\nfrom ibis.expr.types.relations import Table  # <1>\n\nibis.options.interactive = True  # <2>\nmarvin.settings.llm_model = \"openai/gpt-4\"  # <2>\n\ncon = ibis.connect(\"duckdb://penguins.ddb\")  # <3>\nt = con.table(\"penguins\")  # <3>\n```\n:::\n\n\n1. Import Ibis and Marvin.\n2. Configure Ibis (interactive) and Marvin (GPT-4).\n3. Connect to the data and load a table into a variable.\n\n## Context\n\nContext is a fancy way of talking about the input to a LLM.\n\n## Calls\n\nWe make calls with inputs to functions or systems and get outputs. We can think\nof calling the LLM with our input (context) and getting an output (text).\n\n## Computations\n\nA function or system often computes something. We can be pedantic about calls\nversus computations, but in general the connotation around computations is more\ntime and resource intensive than a call. At the end of the day, they will both\ntake some computer cycles.\n\n## Retrieval augmented generation (RAG)\n\nInstead of you typing out context for the bot, we can **retrieve** context from\nsomewhere, **augment** our strings sent to the bot with this context, and then\n**generate** a response from the bot.\n\nAs a contrived example, instead of saying \"The capitol of foo is bar\", we can\nretrieve the capitol of foo from a database, augment it with our context, and\nthen generate a response from the bot. You may notice that [we already did this\nin the firt post in the series -- let's review that code\nagain](../llms-and-data-pt0):\n\n::: {#1e60f7bd .cell execution_count=3}\n``` {.python .cell-code}\nfrom ibis.expr.schema import Schema\nfrom ibis.expr.types.relations import Table\n\n\n@marvin.ai_fn\ndef sql_select(\n    text: str, table_name: str = t.get_name(), schema: Schema = t.schema()\n) -> str:\n    \"\"\"writes the SQL SELECT statement to query the table according to the text\"\"\"\n\n\nquery = \"the unique combination of species and islands\"\nsql = sql_select(query).strip(\";\")\nsql\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```\n'SELECT DISTINCT species, island FROM penguins'\n```\n:::\n:::\n\n\nNotice that we **retrieved** the table name and schema with calls to the Ibis\ntable (`t.get_name()` and `t.schema()`). We then **augment** our context (the\nquery in natural language) with this information and **generate** a response\nfrom the bot.\n\nThis works reasonably well for simple SQL queries:\n\n::: {#07c59dc8 .cell execution_count=4}\n``` {.python .cell-code}\nt.sql(sql)\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━┓\n┃<span style=\"font-weight: bold\"> species   </span>┃<span style=\"font-weight: bold\"> island    </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │\n├───────────┼───────────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Adelie   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Torgersen</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Adelie   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Biscoe   </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Adelie   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Dream    </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Gentoo   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Biscoe   </span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Chinstrap</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Dream    </span> │\n└───────────┴───────────┘\n</pre>\n```\n:::\n:::\n\n\nI would argue in this case there wasn't any real **computation** done by our\n**calls** to the Ibis table -- we were just retrieving some relatively static\nmetadata -- but we could have done some more complex computations (on any of 18+\ndata platforms).\n\n## Thought leadership\n\nTODO: human rewrite\n\nIn the realm of Generative AI, particularly when working with Language Learning\nModels (LLMs), understanding the concept of 'context' is crucial. Context, in this\ndomain, refers to the inputs that are fed into an LLM, and the corresponding\noutputs they generate. This post breaks down the complexities of this process into\nunderstandable fragments, including retrieval of context, its augmentation, and,\nthereafter, the generation of a response.\n\nAn illustrative example is provided, showcasing a database interaction. It\ndemonstrates how the data retrieved can be used to augment the context before the\nbot generates a response. This valuable insight underlines the practical\napplication of the theory, reinforcing the understanding of the readers.\n\nWe also venture into the difference between simple static metadata retrieval and\nthe more intricate computations. This distinction echoes the breadth and depth of\nthe processes involved in Generative AI.\n\nAs we continue to explore and unravel the potential of Generative AI and LLMs,\nthis post serves as a fundamental building block. It creates a pathway for\nenthusiasts and professionals alike to delve deeper into this exciting field. By\nbreaking down complex concepts into comprehensible segments, it fosters an\nenvironment of learning and growth.\n\nThis marks just the beginning of our journey into the world of Generative AI. As\nwe dig deeper, we will continue to explore, learn and share with our readers. Stay\ntuned for more insightful content in this series. [1]\n\n[1] https://github.com/ibis-project/ibis-birdbrain\n\n## Next steps\n\nYou can get involved with [Ibis\nBirdbrain](https://github.com/ibis-project/ibis-birdbrain), our open-source data\n& AI project for building next-generation natural language interfaces to data.\n\n[Read the next post in this series](../llms-and-data-pt3).\n\n",
 5 |     "supporting": [
 6 |       "index_files/figure-html"
 7 |     ],
 8 |     "filters": [],
 9 |     "includes": {
10 |       "include-in-header": [
11 |         "<script src=\"https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js\" integrity=\"sha512-c3Nl8+7g4LMSTdrm621y7kf9v3SDPnhxLNhcjFJbKECVnmZHTdo+IRO05sNLTH/D3vA6u1X32ehoLC7WFVdheg==\" crossorigin=\"anonymous\"></script>\n<script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.5.1/jquery.min.js\" integrity=\"sha512-bLT0Qm9VnAYZDflyKcBaQ2gg0hSYNQrJ8RilYldYQ1FxQYoCLtUjuuRuZo+fjqhx/qtq/1itJ0C2ejDxltZVFg==\" crossorigin=\"anonymous\"></script>\n<script type=\"application/javascript\">define('jquery', [],function() {return window.jQuery;})</script>\n"
12 |       ]
13 |     }
14 |   }
15 | }


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt0/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "An introduction to Marvin and Ibis"
  3 | author: "Cody Peterson"
  4 | date: "2023-10-12"
  5 | execute: 
  6 |   warning: false
  7 | categories:
  8 |     - "LLMs and data"
  9 | ---
 10 | 
 11 | ## Introduction
 12 | 
 13 | In this "LLMs and data" series, we'll explore how to apply large-language models
 14 | (LLMs) to data analytics. We'll walk through the steps to build Ibis Birdbrain.
 15 | 
 16 | Throughout the series, we'll be using
 17 | [Marvin](https://www.askmarvin.ai/welcome/overview/) and
 18 | [Ibis](https://ibis-project.org). A brief introduction to each is provided
 19 | below.
 20 | 
 21 | ## Marvin
 22 | 
 23 | [Marvin](https://www.askmarvin.ai/welcome/overview/) is an AI engineering
 24 | framework that makes it easy to build up to an interactive conversational
 25 | application.
 26 | 
 27 | Marvin makes calls to an AI platform. You typically use an API key set as an
 28 | environment variable -- in this case, we'll load a `.env` file that contians
 29 | secrets for the AI platform that Marvin will use. We also set the large language
 30 | model model.
 31 | 
 32 | ```{python}
 33 | import marvin  # <1>
 34 | 
 35 | from rich import print  # <1>
 36 | from time import sleep  # <1>
 37 | from dotenv import load_dotenv  # <1>
 38 | 
 39 | load_dotenv()  # <2>
 40 | 
 41 | # increase accuracy
 42 | marvin.settings.llm_model = "openai/gpt-4"  # <3>
 43 | # decrease cost
 44 | # marvin.settings.llm_model = "openai/gpt-3.5-turbo"  # <3>
 45 | 
 46 | test_str = "working with data and LLMs on 18+ data platforms is easy!"  # <4>
 47 | test_str
 48 | ```
 49 | 
 50 | 1. Import the libraries we need.
 51 | 2. Load the environment variable to setup Marvin to call our OpenAI account.
 52 | 3. Configure the LLM model to use.
 53 | 4. Some text to test on
 54 | 
 55 | ### Functions
 56 | 
 57 | AI functions are one of the building blocks in Marvin and allow yout to specify
 58 | a typed python function with no code -- only a docstring -- to achieve a wide
 59 | variety of tasks.
 60 | 
 61 | We'll demonstrate this with an AI function that trnaslates text:
 62 | 
 63 | ```{python}
 64 | @marvin.ai_fn
 65 | def translate(text: str, from_: str = "English", to: str = "Spanish") -> str:
 66 |     """translates the text"""
 67 | 
 68 | translate(test_str)
 69 | ```
 70 | 
 71 | ```{python}
 72 | # | code-fold: true
 73 | sleep(1) # <1>
 74 | ```
 75 | 
 76 | 1. Avoid rate-limiting by waiting.
 77 | 
 78 | ```{python}
 79 | translate(translate(test_str), from_="Spanish", to="English")
 80 | ```
 81 | 
 82 | ```{python}
 83 | # | code-fold: true
 84 | sleep(3) # <1>
 85 | ```
 86 | 
 87 | 1. Avoid rate-limiting by waiting.
 88 | 
 89 | ### Models
 90 | 
 91 | AI models are another building block for generating python classes from input
 92 | text. It's a great way to build structured data from unstructured data that can
 93 | be customized for your needs. 
 94 | 
 95 | We'll demosntrate this with an AI model that translates text:
 96 | 
 97 | ```{python}
 98 | from pydantic import BaseModel, Field
 99 | 
100 | # decrease cost
101 | marvin.settings.llm_model = "openai/gpt-3.5-turbo"
102 | 
103 | @marvin.ai_model
104 | class ExtractParts(BaseModel):
105 |     """Extracts parts of a sentence"""
106 |     subject: str = Field(..., description="The subject of the sentence.")
107 |     objects: list[str] = Field(..., description="The objects of the sentence.")
108 |     predicate: str = Field(..., description="The predicate of the sentence.")
109 |     modifiers: list[str] = Field(..., description="The modifiers of the sentence.")
110 | 
111 | ExtractParts(test_str)
112 | ```
113 | 
114 | ```{python}
115 | # | code-fold: true
116 | sleep(1) # <1>
117 | ```
118 | 
119 | 1. Avoid rate-limiting by waiting.
120 | 
121 | ### Classifiers
122 | 
123 | AI classifiers are another building block for generating python classes from
124 | input text. It's the most efficient (time and cost) method for applying LLMs as
125 | it only results in a single output token, selecting an output in a specified
126 | Enum.
127 | 
128 | We'll demonstrate this by classifying the language of some text:
129 | 
130 | ```{python}
131 | from enum import Enum
132 | 
133 | # increase accuracy
134 | marvin.settings.llm_model = "openai/gpt-4"
135 | 
136 | @marvin.ai_classifier
137 | class IdentifyLanguage(Enum):
138 |     """Identifies the language of the text"""
139 | 
140 |     english = "English"
141 |     spanish = "Spanish"
142 | 
143 | 
144 | IdentifyLanguage(test_str).value
145 | ```
146 | 
147 | ```{python}
148 | # | code-fold: true
149 | sleep(1) # <1>
150 | ```
151 | 
152 | 1. Avoid rate-limiting by waiting.
153 | 
154 | ```{python}
155 | IdentifyLanguage(translate(test_str)).value
156 | ```
157 | 
158 | ```{python}
159 | # | code-fold: true
160 | sleep(3) # <1>
161 | ```
162 | 
163 | 1. Avoid rate-limiting by waiting.
164 | 
165 | ## Ibis
166 | 
167 | [Ibis](https://ibis-project.org) is the portable Python dataframe library that
168 | enables Ibis Birdbrain to work on many data platforms at native scale.
169 | 
170 | Ibis makes calls to a data platform, providing an API but pushing the compute to
171 | (local or remote) query engines and storage. DuckDB is the default and we'll
172 | typically use it for demo puroses. You can work with an in-memory instance, but
173 | we'll often create a database file from example data:
174 | 
175 | ```{python}
176 | import ibis  # <1>
177 | 
178 | con = ibis.connect("duckdb://penguins.ddb")  # <2>
179 | t = ibis.examples.penguins.fetch()  # <2>
180 | t = con.create_table("penguins", t.to_pyarrow(), overwrite=True)  # <2>
181 | ```
182 | 
183 | 1. Import the libraries we need.
184 | 2. Setup the demo datain an Ibis backend.
185 | 
186 | You will typically connect to an existing data platform via your corresponding
187 | Ibis backend and have access to a number of tables:
188 | 
189 | ```{python}
190 | import ibis  # <1>
191 | 
192 | ibis.options.interactive = True  # <2>
193 | 
194 | con = ibis.connect("duckdb://penguins.ddb")  # <3>
195 | t = con.table("penguins")  # <3>
196 | ```
197 | 
198 | 1. Import Ibis.
199 | 2. Configure Ibis (interactive).
200 | 3. Connect to the data and load a table into a variable.
201 | 
202 | ### Backend
203 | 
204 | A backend provides the connection and basic management of the data platform.
205 | Above, we created the `con` variable that is an instance of a DuckDB backend:
206 | 
207 | ```{python}
208 | con
209 | ```
210 | 
211 | It usually contains some tables:
212 | 
213 | ```{python}
214 | con.list_tables()
215 | ```
216 | 
217 | We can access some internals of Ibis to see what backends are available:
218 | 
219 | ::: {.callout-tip}
220 | Don't rely on accessing internals of Ibis in production.
221 | :::
222 | 
223 | ```{python}
224 | backends = [entrypoint.name for entrypoint in ibis.util.backend_entry_points()]
225 | backends
226 | ```
227 | 
228 | ### Table
229 | 
230 | You typically work with a table, conventionally named `t` for demo or
231 | exploratory purposes:
232 | 
233 | ```{python}
234 | t
235 | ```
236 | 
237 | When working with many tables, you should name them descriptively.
238 | 
239 | ### Schema
240 | 
241 | A table has a schema that Ibis maps to the data platform's data types:
242 | 
243 | ```{python}
244 | t.schema()
245 | ```
246 | 
247 | ## LLMs and data: Marvin and Ibis
248 | 
249 | You can use Marvin and Ibis together to easily apply LLMs to data.
250 | 
251 | ```{python}
252 | from ibis.expr.schema import Schema
253 | from ibis.expr.types.relations import Table
254 | 
255 | @marvin.ai_fn
256 | def sql_select(
257 |     text: str, table_name: str = t.get_name(), schema: Schema = t.schema()
258 | ) -> str:
259 |     """writes the SQL SELECT statement to query the table according to the text"""
260 | 
261 | 
262 | query = "the unique combination of species and islands"
263 | sql = sql_select(query).strip(";")
264 | sql
265 | ```
266 | 
267 | ```{python}
268 | t.sql(sql)
269 | ```
270 | 
271 | ```{python}
272 | # | code-fold: true
273 | sleep(3) # <1>
274 | ```
275 | 
276 | 1. Avoid rate-limiting by waiting.
277 | 
278 | ```{python}
279 | t.sql(sql_select(query + " and include their counts in from highest to lowest").strip(";"))
280 | ```
281 | 
282 | ## Next steps
283 | 
284 | You can get involved with [Ibis
285 | Birdbrain](https://github.com/ibis-project/ibis-birdbrain), our open-source data
286 | & AI project for building next-generation natural language interfaces to data.
287 | 
288 | [Read the next post in this series](../llms-and-data-pt1).
289 | 


--------------------------------------------------------------------------------
/docs/_freeze/site_libs/clipboard/clipboard.min.js:
--------------------------------------------------------------------------------
1 | /*!
2 |  * clipboard.js v2.0.11
3 |  * https://clipboardjs.com/
4 |  *
5 |  * Licensed MIT © Zeno Rocha
6 |  */
7 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return b}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),r=n.n(e);function c(t){try{return document.execCommand(t)}catch(t){return}}var a=function(t){t=r()(t);return c("cut"),t};function o(t,e){var n,o,t=(n=t,o="rtl"===document.documentElement.getAttribute("dir"),(t=document.createElement("textarea")).style.fontSize="12pt",t.style.border="0",t.style.padding="0",t.style.margin="0",t.style.position="absolute",t.style[o?"right":"left"]="-9999px",o=window.pageYOffset||document.documentElement.scrollTop,t.style.top="".concat(o,"px"),t.setAttribute("readonly",""),t.value=n,t);return e.container.appendChild(t),e=r()(t),c("copy"),t.remove(),e}var f=function(t){var e=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{container:document.body},n="";return"string"==typeof t?n=o(t,e):t instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(null==t?void 0:t.type)?n=o(t.value,e):(n=r()(t),c("copy")),n};function l(t){return(l="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}var s=function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:{},e=t.action,n=void 0===e?"copy":e,o=t.container,e=t.target,t=t.text;if("copy"!==n&&"cut"!==n)throw new Error('Invalid "action" value, use either "copy" or "cut"');if(void 0!==e){if(!e||"object"!==l(e)||1!==e.nodeType)throw new Error('Invalid "target" value, use a valid Element');if("copy"===n&&e.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if("cut"===n&&(e.hasAttribute("readonly")||e.hasAttribute("disabled")))throw new Error('Invalid "target" attribute. You can\'t cut text from elements with "readonly" or "disabled" attributes')}return t?f(t,{container:o}):e?"cut"===n?a(e):f(e,{container:o}):void 0};function p(t){return(p="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}function d(t,e){for(var n=0;n<e.length;n++){var o=e[n];o.enumerable=o.enumerable||!1,o.configurable=!0,"value"in o&&(o.writable=!0),Object.defineProperty(t,o.key,o)}}function y(t,e){return(y=Object.setPrototypeOf||function(t,e){return t.__proto__=e,t})(t,e)}function h(n){var o=function(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}();return function(){var t,e=v(n);return t=o?(t=v(this).constructor,Reflect.construct(e,arguments,t)):e.apply(this,arguments),e=this,!(t=t)||"object"!==p(t)&&"function"!=typeof t?function(t){if(void 0!==t)return t;throw new ReferenceError("this hasn't been initialised - super() hasn't been called")}(e):t}}function v(t){return(v=Object.setPrototypeOf?Object.getPrototypeOf:function(t){return t.__proto__||Object.getPrototypeOf(t)})(t)}function m(t,e){t="data-clipboard-".concat(t);if(e.hasAttribute(t))return e.getAttribute(t)}var b=function(){!function(t,e){if("function"!=typeof e&&null!==e)throw new TypeError("Super expression must either be null or a function");t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,writable:!0,configurable:!0}}),e&&y(t,e)}(r,i());var t,e,n,o=h(r);function r(t,e){var n;return function(t){if(!(t instanceof r))throw new TypeError("Cannot call a class as a function")}(this),(n=o.call(this)).resolveOptions(e),n.listenClick(t),n}return t=r,n=[{key:"copy",value:function(t){var e=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{container:document.body};return f(t,e)}},{key:"cut",value:function(t){return a(t)}},{key:"isSupported",value:function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:["copy","cut"],t="string"==typeof t?[t]:t,e=!!document.queryCommandSupported;return t.forEach(function(t){e=e&&!!document.queryCommandSupported(t)}),e}}],(e=[{key:"resolveOptions",value:function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:{};this.action="function"==typeof t.action?t.action:this.defaultAction,this.target="function"==typeof t.target?t.target:this.defaultTarget,this.text="function"==typeof t.text?t.text:this.defaultText,this.container="object"===p(t.container)?t.container:document.body}},{key:"listenClick",value:function(t){var e=this;this.listener=u()(t,"click",function(t){return e.onClick(t)})}},{key:"onClick",value:function(t){var e=t.delegateTarget||t.currentTarget,n=this.action(e)||"copy",t=s({action:n,container:this.container,target:this.target(e),text:this.text(e)});this.emit(t?"success":"error",{action:n,text:t,trigger:e,clearSelection:function(){e&&e.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(t){return m("action",t)}},{key:"defaultTarget",value:function(t){t=m("target",t);if(t)return document.querySelector(t)}},{key:"defaultText",value:function(t){return m("text",t)}},{key:"destroy",value:function(){this.listener.destroy()}}])&&d(t.prototype,e),n&&d(t,n),r}()},828:function(t){var e;"undefined"==typeof Element||Element.prototype.matches||((e=Element.prototype).matches=e.matchesSelector||e.mozMatchesSelector||e.msMatchesSelector||e.oMatchesSelector||e.webkitMatchesSelector),t.exports=function(t,e){for(;t&&9!==t.nodeType;){if("function"==typeof t.matches&&t.matches(e))return t;t=t.parentNode}}},438:function(t,e,n){var u=n(828);function i(t,e,n,o,r){var i=function(e,n,t,o){return function(t){t.delegateTarget=u(t.target,n),t.delegateTarget&&o.call(e,t)}}.apply(this,arguments);return t.addEventListener(n,i,r),{destroy:function(){t.removeEventListener(n,i,r)}}}t.exports=function(t,e,n,o,r){return"function"==typeof t.addEventListener?i.apply(null,arguments):"function"==typeof n?i.bind(null,document).apply(null,arguments):("string"==typeof t&&(t=document.querySelectorAll(t)),Array.prototype.map.call(t,function(t){return i(t,e,n,o,r)}))}},879:function(t,n){n.node=function(t){return void 0!==t&&t instanceof HTMLElement&&1===t.nodeType},n.nodeList=function(t){var e=Object.prototype.toString.call(t);return void 0!==t&&("[object NodeList]"===e||"[object HTMLCollection]"===e)&&"length"in t&&(0===t.length||n.node(t[0]))},n.string=function(t){return"string"==typeof t||t instanceof String},n.fn=function(t){return"[object Function]"===Object.prototype.toString.call(t)}},370:function(t,e,n){var f=n(879),l=n(438);t.exports=function(t,e,n){if(!t&&!e&&!n)throw new Error("Missing required arguments");if(!f.string(e))throw new TypeError("Second argument must be a String");if(!f.fn(n))throw new TypeError("Third argument must be a Function");if(f.node(t))return c=e,a=n,(u=t).addEventListener(c,a),{destroy:function(){u.removeEventListener(c,a)}};if(f.nodeList(t))return o=t,r=e,i=n,Array.prototype.forEach.call(o,function(t){t.addEventListener(r,i)}),{destroy:function(){Array.prototype.forEach.call(o,function(t){t.removeEventListener(r,i)})}};if(f.string(t))return t=t,e=e,n=n,l(document.body,t,e,n);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList");var o,r,i,u,c,a}},817:function(t){t.exports=function(t){var e,n="SELECT"===t.nodeName?(t.focus(),t.value):"INPUT"===t.nodeName||"TEXTAREA"===t.nodeName?((e=t.hasAttribute("readonly"))||t.setAttribute("readonly",""),t.select(),t.setSelectionRange(0,t.value.length),e||t.removeAttribute("readonly"),t.value):(t.hasAttribute("contenteditable")&&t.focus(),n=window.getSelection(),(e=document.createRange()).selectNodeContents(t),n.removeAllRanges(),n.addRange(e),n.toString());return n}},279:function(t){function e(){}e.prototype={on:function(t,e,n){var o=this.e||(this.e={});return(o[t]||(o[t]=[])).push({fn:e,ctx:n}),this},once:function(t,e,n){var o=this;function r(){o.off(t,r),e.apply(n,arguments)}return r._=e,this.on(t,r,n)},emit:function(t){for(var e=[].slice.call(arguments,1),n=((this.e||(this.e={}))[t]||[]).slice(),o=0,r=n.length;o<r;o++)n[o].fn.apply(n[o].ctx,e);return this},off:function(t,e){var n=this.e||(this.e={}),o=n[t],r=[];if(o&&e)for(var i=0,u=o.length;i<u;i++)o[i].fn!==e&&o[i].fn._!==e&&r.push(o[i]);return r.length?n[t]=r:delete n[t],this}},t.exports=e,t.exports.TinyEmitter=e}},r={},o.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return o.d(e,{a:e}),e},o.d=function(t,e){for(var n in e)o.o(e,n)&&!o.o(t,n)&&Object.defineProperty(t,n,{enumerable:!0,get:e[n]})},o.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},o(686).default;function o(t){if(r[t])return r[t].exports;var e=r[t]={exports:{}};return n[t](e,e.exports,o),e.exports}var n,r});


--------------------------------------------------------------------------------
/docs/_freeze/concepts/attachments/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "e1d529a819e9e68cc7f0a9043ae41cbb",
 3 |   "result": {
 4 |     "engine": "jupyter",
 5 |     "markdown": "---\ntitle: Attachments\n---\n\n\n\nIbis Birdbrain passes Python objects as `Attachments` to [`Messages`](./messages.qmd). This allows the user, itself, and (eventually) other bots to interact with data, code, and more.\n\n\n## Usage\n\n\n::: {#77a9f256 .cell execution_count=1}\n``` {.python .cell-code}\nfrom ibis_birdbrain.attachments import Attachment, Attachments\n\na1 = Attachment(content=\"Hello, world!\")\na1\n```\n\n::: {.cell-output .cell-output-display execution_count=1}\n```\nAttachment\n    **guid**: aefa7dd8-1504-466e-afa8-fa0053ef087e\n    **time**: 2024-03-05 11:22:38.343515\n    **name**: None\n    **desc**: None\n```\n:::\n:::\n\n\n## TableAttachment\n\nA `TableAttachment` contains an Ibis table:\n\n::: {#9a87d9f6 .cell execution_count=2}\n``` {.python .cell-code}\nimport ibis\n\nfrom ibis_birdbrain.attachments import TableAttachment\n\nibis.options.interactive = True\n\nt = ibis.examples.penguins.fetch()\n\na2 = TableAttachment(content=t)\na2\n```\n\n::: {.cell-output .cell-output-stderr}\n```\nINFO:pins.cache:cache file: /Users/cody/Library/Caches/pins-py/gcs_332a30997e141da0e08f15fbfae8b3c3ec90463922d117a96fa3b1bef85a2a4c/penguins/20230905T090411Z-9aae2/data.txt\nINFO:pins.cache:cache file: /Users/cody/Library/Caches/pins-py/gcs_332a30997e141da0e08f15fbfae8b3c3ec90463922d117a96fa3b1bef85a2a4c/penguins/20230905T090411Z-9aae2/penguins.csv.gz\n```\n:::\n\n::: {.cell-output .cell-output-display}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=2}\n\n::: {.ansi-escaped-output}\n```{=html}\n<pre>TableAttachment\n    **guid**: 5992ed9a-8a14-46c6-9da8-afdb3644a23d\n    **time**: 2024-03-05 11:22:40.051587\n    **name**: penguins\n    **desc**: \nibis.Schema {\n  species            string\n  island             string\n  bill_length_mm     float64\n  bill_depth_mm      float64\n  flipper_length_mm  int64\n  body_mass_g        int64\n  sex                string\n  year               int64\n}\n                **table**:\n┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓\n┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">species</span><span class=\"ansi-bold\"> </span>┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">island</span><span class=\"ansi-bold\">   </span><span class=\"ansi-bold\"> </span>┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">bill_length_mm</span><span class=\"ansi-bold\"> </span>┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">bill_depth_mm</span><span class=\"ansi-bold\"> </span>┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">flipper_length_mm</span><span class=\"ansi-bold\"> </span>┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">body_mass_g</span><span class=\"ansi-bold\"> </span>┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">sex</span><span class=\"ansi-bold\">   </span><span class=\"ansi-bold\"> </span>┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">year</span><span class=\"ansi-bold\"> </span><span class=\"ansi-bold\"> </span>┃\n┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩\n│ string  │ string    │ float64        │ float64       │ int64             │ int64       │ string │ int64 │\n├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤\n│ <span class=\"ansi-green-fg\">Adelie </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │           <span class=\"ansi-cyan-fg ansi-bold\">39.1</span> │          <span class=\"ansi-cyan-fg ansi-bold\">18.7</span> │               <span class=\"ansi-cyan-fg ansi-bold\">181</span> │        <span class=\"ansi-cyan-fg ansi-bold\">3750</span> │ <span class=\"ansi-green-fg\">male  </span> │  <span class=\"ansi-cyan-fg ansi-bold\">2007</span> │\n│ <span class=\"ansi-green-fg\">Adelie </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │           <span class=\"ansi-cyan-fg ansi-bold\">39.5</span> │          <span class=\"ansi-cyan-fg ansi-bold\">17.4</span> │               <span class=\"ansi-cyan-fg ansi-bold\">186</span> │        <span class=\"ansi-cyan-fg ansi-bold\">3800</span> │ <span class=\"ansi-green-fg\">female</span> │  <span class=\"ansi-cyan-fg ansi-bold\">2007</span> │\n│ <span class=\"ansi-green-fg\">Adelie </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │           <span class=\"ansi-cyan-fg ansi-bold\">40.3</span> │          <span class=\"ansi-cyan-fg ansi-bold\">18.0</span> │               <span class=\"ansi-cyan-fg ansi-bold\">195</span> │        <span class=\"ansi-cyan-fg ansi-bold\">3250</span> │ <span class=\"ansi-green-fg\">female</span> │  <span class=\"ansi-cyan-fg ansi-bold\">2007</span> │\n│ <span class=\"ansi-green-fg\">Adelie </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │           NULL │          NULL │              NULL │        NULL │ NULL   │  <span class=\"ansi-cyan-fg ansi-bold\">2007</span> │\n│ <span class=\"ansi-green-fg\">Adelie </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │           <span class=\"ansi-cyan-fg ansi-bold\">36.7</span> │          <span class=\"ansi-cyan-fg ansi-bold\">19.3</span> │               <span class=\"ansi-cyan-fg ansi-bold\">193</span> │        <span class=\"ansi-cyan-fg ansi-bold\">3450</span> │ <span class=\"ansi-green-fg\">female</span> │  <span class=\"ansi-cyan-fg ansi-bold\">2007</span> │\n│ <span class=\"ansi-green-fg\">Adelie </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │           <span class=\"ansi-cyan-fg ansi-bold\">39.3</span> │          <span class=\"ansi-cyan-fg ansi-bold\">20.6</span> │               <span class=\"ansi-cyan-fg ansi-bold\">190</span> │        <span class=\"ansi-cyan-fg ansi-bold\">3650</span> │ <span class=\"ansi-green-fg\">male  </span> │  <span class=\"ansi-cyan-fg ansi-bold\">2007</span> │\n│ <span class=\"ansi-green-fg\">Adelie </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │           <span class=\"ansi-cyan-fg ansi-bold\">38.9</span> │          <span class=\"ansi-cyan-fg ansi-bold\">17.8</span> │               <span class=\"ansi-cyan-fg ansi-bold\">181</span> │        <span class=\"ansi-cyan-fg ansi-bold\">3625</span> │ <span class=\"ansi-green-fg\">female</span> │  <span class=\"ansi-cyan-fg ansi-bold\">2007</span> │\n│ <span class=\"ansi-green-fg\">Adelie </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │           <span class=\"ansi-cyan-fg ansi-bold\">39.2</span> │          <span class=\"ansi-cyan-fg ansi-bold\">19.6</span> │               <span class=\"ansi-cyan-fg ansi-bold\">195</span> │        <span class=\"ansi-cyan-fg ansi-bold\">4675</span> │ <span class=\"ansi-green-fg\">male  </span> │  <span class=\"ansi-cyan-fg ansi-bold\">2007</span> │\n│ <span class=\"ansi-green-fg\">Adelie </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │           <span class=\"ansi-cyan-fg ansi-bold\">34.1</span> │          <span class=\"ansi-cyan-fg ansi-bold\">18.1</span> │               <span class=\"ansi-cyan-fg ansi-bold\">193</span> │        <span class=\"ansi-cyan-fg ansi-bold\">3475</span> │ NULL   │  <span class=\"ansi-cyan-fg ansi-bold\">2007</span> │\n│ <span class=\"ansi-green-fg\">Adelie </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │           <span class=\"ansi-cyan-fg ansi-bold\">42.0</span> │          <span class=\"ansi-cyan-fg ansi-bold\">20.2</span> │               <span class=\"ansi-cyan-fg ansi-bold\">190</span> │        <span class=\"ansi-cyan-fg ansi-bold\">4250</span> │ NULL   │  <span class=\"ansi-cyan-fg ansi-bold\">2007</span> │\n│ …       │ …         │              … │             … │                 … │           … │ …      │     … │\n└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘</pre>\n```\n:::\n\n:::\n:::\n\n\nNotice the name, description (schema), and preview are automatically populated.\n\n## CodeAttachment\n\nA `CodeAttachment` contains code -- typically Python or SQL:\n\n::: {#89ecfbf6 .cell execution_count=3}\n``` {.python .cell-code}\nfrom ibis_birdbrain.attachments import CodeAttachment\n\na3 = CodeAttachment(content=\"select 1 as id\", language=\"sql\")\na3\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```\nCodeAttachment\n    **guid**: 8138742b-dcef-4735-b452-8481468e12da\n    **time**: 2024-03-05 11:22:40.087917\n    **name**: None\n    **desc**: None\n    **language**: sql\n    **code**:\nselect 1 as id\n```\n:::\n:::\n\n\n",
 6 |     "supporting": [
 7 |       "attachments_files"
 8 |     ],
 9 |     "filters": [],
10 |     "includes": {
11 |       "include-in-header": [
12 |         "<script src=\"https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js\" integrity=\"sha512-c3Nl8+7g4LMSTdrm621y7kf9v3SDPnhxLNhcjFJbKECVnmZHTdo+IRO05sNLTH/D3vA6u1X32ehoLC7WFVdheg==\" crossorigin=\"anonymous\"></script>\n<script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.5.1/jquery.min.js\" integrity=\"sha512-bLT0Qm9VnAYZDflyKcBaQ2gg0hSYNQrJ8RilYldYQ1FxQYoCLtUjuuRuZo+fjqhx/qtq/1itJ0C2ejDxltZVFg==\" crossorigin=\"anonymous\" data-relocate-top=\"true\"></script>\n<script type=\"application/javascript\">define('jquery', [],function() {return window.jQuery;})</script>\n"
13 |       ]
14 |     }
15 |   }
16 | }


--------------------------------------------------------------------------------
/src/ibis_birdbrain/tasks/sql.py:
--------------------------------------------------------------------------------
  1 | # imports
  2 | import ibis
  3 | import marvin
  4 | 
  5 | from Levenshtein import ratio
  6 | 
  7 | from ibis_birdbrain.tasks import Task
  8 | from ibis_birdbrain.logging import log
  9 | from ibis_birdbrain.messages import Email, Message, Messages
 10 | from ibis_birdbrain.attachments import (
 11 |     Attachments,
 12 |     TableAttachment,
 13 |     SQLAttachment,
 14 |     ErrorAttachment,
 15 |     DatabaseAttachment,
 16 | )
 17 | 
 18 | 
 19 | @ibis.udf.scalar.python
 20 | def levenshtein_ratio(text1: str, text2: str) -> float:
 21 |     return ratio(text1, text2)
 22 | 
 23 | # tasks
 24 | class SearchTextTask(Task):
 25 |     """Ibis Birdbrain task to search cached text and SQL pair."""
 26 | 
 27 |     def __init__(
 28 |         self, name: str = "search-cached-question", description: str = "Ibis Birdbrain search task"
 29 |     ) -> None:
 30 |         """Initialize the searchtask."""
 31 |         super().__init__(name=name, description=description)
 32 | 
 33 |     def __call__(self, messages: Messages) -> Message:
 34 |         """Search text task."""
 35 |         log.info("Search text task.")
 36 | 
 37 |         try:
 38 |             cached_table = messages[0].attachments.get_attachment_by_type(TableAttachment)[0].open()
 39 |             question = messages[-1].body
 40 |             matched_res = cached_table.mutate(
 41 |                 similarity=levenshtein_ratio(ibis._.question, question)
 42 |             ).filter(
 43 |                 ibis._.similarity > 0.75 # Threshold
 44 |             ).order_by(
 45 |                 ibis._.similarity.desc()
 46 |             ).limit(1).to_pandas()
 47 | 
 48 |             log.info(f"similarity = {matched_res.iloc[0]['similarity']}")
 49 |             log.info(f"sql = {matched_res.iloc[0]['sql']}")
 50 | 
 51 |             sql_attachment = SQLAttachment(
 52 |                 dialect=matched_res.iloc[0]['dialect'],
 53 |                 content=matched_res.iloc[0]['sql']
 54 |             )
 55 |             
 56 |             return Email(
 57 |                 body="search cached question called",
 58 |                 attachments=[sql_attachment],
 59 |                 to_address=messages[-1].from_address,
 60 |                 from_address=self.name,
 61 |             )
 62 |         except Exception as e:
 63 |             return Email(
 64 |                 body=f"No similar question found",
 65 |                 to_address=messages[-1].from_address,
 66 |                 from_address=self.name,
 67 |             )
 68 |     
 69 | 
 70 | class TextToSQLTask(Task):
 71 |     """Ibis Birdbrain task to turn text into SQL."""
 72 | 
 73 |     def __init__(
 74 |         self, name: str = "text-to-SQL", description: str = "Ibis Birdbrain SQL task"
 75 |     ) -> None:
 76 |         """Initialize the SQL task."""
 77 |         super().__init__(name=name, description=description)
 78 | 
 79 |     def __call__(self, message: Message) -> Message:
 80 |         """Text to SQL task."""
 81 |         log.info("Text to SQL task")
 82 | 
 83 |         # get the database attachment and table attachments
 84 |         table_attachments = message.attachments.get_attachment_by_type(TableAttachment)
 85 |         database_attachment = message.attachments.get_attachment_by_type(DatabaseAttachment)
 86 | 
 87 |         assert table_attachments is not None, "No table attachments found"
 88 |         assert database_attachment is not None, "No database attachment found"
 89 | 
 90 |         dialect = database_attachment.open().name
 91 | 
 92 |         # generate the SQL
 93 |         sql = self.text_to_sql(
 94 |             text=message.body,
 95 |             tables=table_attachments,
 96 |             data_description=database_attachment.description,
 97 |             dialect=dialect,
 98 |         )
 99 |         sql_attachment = SQLAttachment(dialect=dialect, content=sql)
100 | 
101 |         # generate the response message
102 |         response_message = Email(
103 |             body="text to sql called",
104 |             attachments=[sql_attachment],
105 |             to_address=message.from_address,
106 |             from_address=self.name,
107 |         )
108 |         return response_message
109 | 
110 |     @staticmethod
111 |     @marvin.fn
112 |     def _text_to_sql(
113 |         text: str, tables: Attachments, data_description: str, dialect: str
114 |     ) -> str:
115 |         """
116 |         Generates correct, simple, and human-readable SQL based on the input
117 |         `text`, `tables, and `data_description`, returns a SQL SELECT statement
118 |         in the `dialect`.
119 | 
120 |         The `text` will contain a query in natural language to be answered.
121 | 
122 |         The `tables` will contain the table names and their schemas, alongside
123 |         some metadata that can be ignored. DO NOT change the spelling or casing
124 |         and only generate SQL for the provided tables.
125 | 
126 |         DO NOT add a LIMIT unless specifically told otherwise.
127 | 
128 |         Return (select) ALL possible columns unless specifically told otherwise.
129 | 
130 |         After joins, ensure that the columns are correctly qualified with the table name.
131 |         """
132 | 
133 |     def text_to_sql(
134 |         self, text: str, tables: Attachments, data_description: str, dialect="duckdb"
135 |     ) -> str:
136 |         """Convert text to SQL."""
137 |         return (
138 |             self._text_to_sql(
139 |                 text=text,
140 |                 tables=tables,
141 |                 data_description=data_description,
142 |                 dialect=dialect,
143 |             )
144 |             .strip()
145 |             .strip(";")
146 |         )
147 | 
148 | 
149 | class ExecuteSQLTask(Task):
150 |     """Ibis Birdbrain task to execute SQL."""
151 | 
152 |     def __init__(
153 |         self, name: str = "execute-SQL", description: str = "Ibis Birdbrain SQL task"
154 |     ) -> None:
155 |         """Initialize the SQL task."""
156 |         super().__init__(name=name, description=description)
157 | 
158 |     def __call__(self, message: Message) -> Message:
159 |         """Execute the SQL task."""
160 |         log.info("Executing the SQL task")
161 | 
162 |         # get the database attachment and sql attachments
163 |         sql_attachment = message.attachments.get_attachment_by_type(SQLAttachment)
164 |         database_attachment = message.attachments.get_attachment_by_type(DatabaseAttachment)
165 | 
166 |         con = database_attachment.open()
167 |         sql = sql_attachment.open()
168 | 
169 |         # execute the SQL
170 |         try:
171 |             table = con.sql(sql)
172 |             attachment = TableAttachment(name="table", content=table)
173 |         except Exception as e:
174 |             log.error("Error executing the SQL")
175 |             log.error("SQL: " + sql)
176 |             log.error(e)
177 |             attachment = ErrorAttachment(name="error", content=str(e))
178 | 
179 |         response_message = Email(
180 |             body="execute SQL called",
181 |             attachments=[attachment, sql_attachment],
182 |             to_address=message.from_address,
183 |             from_address=self.name,
184 |         )
185 |         return response_message
186 | 
187 | 
188 | class FixSQLTask(Task):
189 |     """Ibis Birdbrain task to fix SQL."""
190 | 
191 |     def __init__(
192 |         self, name: str = "fix-SQL", description: str = "Ibis Birdbrain SQL task"
193 |     ) -> None:
194 |         """Initialize the SQL task."""
195 |         super().__init__(name=name, description=description)
196 | 
197 |     def __call__(self, message: Message) -> Message:
198 |         """Fix the SQL task."""
199 |         log.info("Fixing the SQL task")
200 | 
201 |         database_attachment = message.attachments.get_attachment_by_type(DatabaseAttachment)
202 |         table_attachments = message.attachments.get_attachment_by_type(TableAttachment)
203 |         sql_attachment = message.attachments.get_attachment_by_type(SQLAttachment)
204 |         error_attachment = message.attachments.get_attachment_by_type(ErrorAttachment)
205 | 
206 |         assert database_attachment is not None, "No database attachment found"
207 |         assert table_attachments is not None, "No table attachments found"
208 |         assert sql_attachment is not None, "No SQL attachment found"
209 |         assert error_attachment is not None, "No error attachment found"
210 | 
211 |         sql = self.fix_text_to_sql(
212 |             text=message.body,
213 |             sql=sql_attachment.open(),
214 |             error=error_attachment.open(),
215 |             tables=table_attachments,
216 |             data_description=database_attachment.description,
217 |             dialect=sql_attachment.dialect
218 |         )
219 | 
220 |         response_message = Email(
221 |             body="fix SQL called",
222 |             attachments=[SQLAttachment(dialect=sql_attachment.dialect, content=sql)],
223 |             to_address=message.from_address,
224 |             from_address=self.name,
225 |         )
226 |         return response_message
227 | 
228 |     @staticmethod
229 |     @marvin.fn
230 |     def _fix_text_to_sql(
231 |         text: str,
232 |         sql: str,
233 |         error: str,
234 |         tables: Attachments,
235 |         data_description: str,
236 |         dialect: str,
237 |     ) -> str:
238 |         """
239 |         Fixes the `sql` to answer the `text` based on the `error`.
240 | 
241 |         Using `tables, and `data_description`, returns a SQL SELECT statement
242 |         in the `dialect` that fixes the input SQL.
243 | 
244 |         The `text` will contain a query in natural language to be answered.
245 | 
246 |         The `tables` will contain the table names and their schemas, alongside
247 |         some metadata that can be ignored. DO NOT change the spelling or casing
248 |         and only generate SQL for the provided tables.
249 | 
250 |         DO NOT add a LIMIT unless specifically told otherwise.
251 | 
252 |         Return (select) ALL possible columns unless specifically told otherwise.
253 | 
254 |         After joins, ensure that the columns are correctly qualified with the table name.
255 |         """
256 | 
257 |     def fix_text_to_sql(
258 |         self,
259 |         text: str,
260 |         sql: str,
261 |         error: str,
262 |         tables: Attachments,
263 |         data_description: str,
264 |         dialect="duckdb",
265 |     ) -> str:
266 |         """Convert text to SQL."""
267 |         return (
268 |             self._fix_text_to_sql(
269 |                 text=text,
270 |                 sql=sql,
271 |                 error=error,
272 |                 tables=tables,
273 |                 data_description=data_description,
274 |                 dialect=dialect,
275 |             )
276 |             .strip()
277 |             .strip(";")
278 |         )
279 | 


--------------------------------------------------------------------------------
/docs/posts/llms-and-data-pt1/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Three approaches"
  3 | author: "Cody Peterson"
  4 | date: "2023-10-13"
  5 | execute: 
  6 |   warning: false
  7 | categories:
  8 |     - "LLMs and data"
  9 | ---
 10 | 
 11 | ## Introduction
 12 | 
 13 | The thought of using natural language to transform and analyze data is
 14 | appealing. This post assumes familiarity with Marvin and Ibis -- [read the
 15 | previous post in the series for a quick overview](../llms-and-data-pt0).
 16 | 
 17 | ## Approaches
 18 | 
 19 | When discussed at Voltron Data, we identified three distinct approaches to
 20 | applying LLMs to data analytics that can be implemented today:
 21 | 
 22 | 1. LLM writes an analytic code
 23 | 2. LLM writes an analytic subroutine
 24 | 3. Use LLM in an analytic subroutine
 25 | 
 26 | While these three approaches are not an exhaustive list of how LLMs can be
 27 | applied to data, they can be easily understood and implemented with Ibis and
 28 | Marvin in a few lines of code. Together with these two open-source tools, we can
 29 | build a natural language interface for data analytics that supports 18+
 30 | backends.
 31 | 
 32 | But first, let's demonstrate the three approaches.
 33 | 
 34 | ### Approach 1: LLM writes analytic code
 35 | 
 36 | State of the art (SoTA) LLMs are decent at generating SQL out of the box. We can
 37 | be clever to handle errors, retries, and more, but in its simplest form:
 38 | 
 39 | ```{python}
 40 | # | code-fold: true
 41 | import ibis  # <1>
 42 | import marvin  # <1>
 43 | 
 44 | from rich import print # <1>
 45 | from time import sleep  # <1>
 46 | from dotenv import load_dotenv  # <1>
 47 | 
 48 | load_dotenv()  # <2>
 49 | 
 50 | con = ibis.connect("duckdb://penguins.ddb")  # <3>
 51 | t = ibis.examples.penguins.fetch()  # <3>
 52 | t = con.create_table("penguins", t.to_pyarrow(), overwrite=True)  # <3>
 53 | ```
 54 | 
 55 | 1. Import the libraries we need.
 56 | 2. Load the environment variable to setup Marvin to call our OpenAI account.
 57 | 3. Setup the demo datain an Ibis backend.
 58 | 
 59 | ```{python}
 60 | import ibis  # <1>
 61 | import marvin  # <1>
 62 | 
 63 | from ibis.expr.schema import Schema  # <1>
 64 | from ibis.expr.types.relations import Table  # <1>
 65 | 
 66 | 
 67 | ibis.options.interactive = True # <2>
 68 | marvin.settings.llm_model = "openai/gpt-4"  # <2>
 69 | ```
 70 | 
 71 | 1. Import Ibis and Marvin.
 72 | 2. Configure Ibis and Marvin
 73 | 
 74 | ```{python}
 75 | @marvin.ai_fn  # <1>
 76 | def _generate_sql_select(
 77 |     text: str, table_name: str, table_schema: Schema
 78 | ) -> str:  # <1>
 79 |     """Generate SQL SELECT from text."""  # <1>
 80 | 
 81 | 
 82 | def sql_from_text(text: str, t: Table) -> Table:  # <2>
 83 |     """Run SQL from text."""  # <2>
 84 |     return t.sql(_generate_sql_select(text, t.get_name(), t.schema()).strip(";"))  # <2>
 85 | ```
 86 | 
 87 | 1. A non-deterministic, LLM-powered AI function.
 88 | 2. A deterministic, human-authored function that calls the AI function.
 89 | 
 90 | ```{python}
 91 | t2 = sql_from_text("the unique combination of species and islands", t)
 92 | t2
 93 | ```
 94 | 
 95 | ```{python}
 96 | # | code-fold: true
 97 | sleep(3) # <1>
 98 | ```
 99 | 
100 | 1. Avoid rate-limiting by waiting.
101 | 
102 | ```{python}
103 | t3 = sql_from_text(
104 |     "the unique combination of species and islands, with their counts, ordered from highest to lowest, and name that column just 'count'",
105 |     t,
106 | )
107 | t3
108 | ```
109 | 
110 | ```{python}
111 | # | code-fold: true
112 | sleep(3) # <1>
113 | ```
114 | 
115 | 1. Avoid rate-limiting by waiting.
116 | 
117 | This works well-enough for simple cases and can be expanded to handle complex
118 | ones. In many scenarios, it may be easier to express a query in English or
119 | another language than to write it in SQL, especially if working across multiple
120 | SQL dialects.
121 | 
122 | SQL isn't standard, with many dialects across data platforms. Ibis works around
123 | this by providing a standard Python API for analytic code but must make
124 | compromises to support many data platforms, often via SQL in their native
125 | dialect. [Substrait](https://substrait.io) is a newer project that aims to solve
126 | this problem by providing a standard, portable, and extensible intermediary
127 | representation (IR) for data transformation code that Ibis and data platforms
128 | could all standardize on. Substrait is still in the early stages of development,
129 | but it's worth keeping an eye on and will be adopted in Ibis once supported
130 | across many data platforms.
131 | 
132 | For now, we'll focus on generating SQL and Python analytical code with LLMs.
133 | 
134 | ### Approach 2: LLM writes an analytical subroutine
135 | 
136 | If more complex logic needs to be expressed, SoTA LLMs are also decent at
137 | writing Python and a number of other programming languages that are used in
138 | analytical subroutines. Many data platforms support user-defined functions
139 | (UDFs) in Python or some other language. We'll stick to scalar Python UDFs via
140 | DuckDB to demonstrate the concept:
141 | 
142 | ```{python}
143 | @marvin.ai_fn  # <1>
144 | def _generate_python_function(text: str) -> str:  # <1>
145 |     """Generate a simple, typed, correct Python function from text."""  # <1>
146 | 
147 | 
148 | def create_udf_from_text(text: str) -> str:  # <2>
149 |     """Create a UDF from text."""  # <2>
150 |     return f"""
151 | import ibis
152 | 
153 | @ibis.udf.scalar.python
154 | {_generate_python_function(text)}
155 | """.strip()  # <2>
156 | ```
157 | 
158 | 1. A non-deterministic, LLM-powered AI function.
159 | 2. A deterministic, human-authored function that calls the AI function.
160 | 
161 | ```{python}
162 | udf = create_udf_from_text(
163 |     "a function named count_vowels that given an input string, returns an int w/ the number of vowels (y_included as a boolean option defaulted to False)"
164 | )
165 | print(udf)
166 | exec(udf)
167 | ```
168 | 
169 | ```{python}
170 | # | code-fold: true
171 | sleep(3) # <1>
172 | ```
173 | 
174 | 1. Avoid rate-limiting by waiting.
175 | 
176 | ```{python}
177 | t4 = t3.mutate(
178 |     species_vowel_count=count_vowels(t3.species),
179 |     island_vowel_count=count_vowels(t3.island),
180 | )
181 | t4
182 | ```
183 | 
184 | ```{python}
185 | # | code-fold: true
186 | sleep(3) # <1>
187 | ```
188 | 
189 | 1. Avoid rate-limiting by waiting.
190 | 
191 | In this case, there's no reason not to have a human in the loop reviewing the
192 | output code and committing it for production use. This could be useful for quick
193 | prototyping or, given a box of tools in the form of UDFs,
194 | working through a natural language interface.
195 | 
196 | ### Approach 3: Use LLM in an analytical subroutine
197 | 
198 | We can also call the LLM once-per-row in the table via a subroutine. For
199 | variety, we'll use an [AI model](https://www.askmarvin.ai/components/ai_model/)
200 | instead of an [AI function](https://www.askmarvin.ai/components/ai_function/):
201 | 
202 | ```{python}
203 | from pydantic import BaseModel, Field  # <1>
204 | 
205 | # decrease cost
206 | marvin.settings.llm_model = "openai/gpt-3.5-turbo-16k"  # <2>
207 | 
208 | 
209 | @marvin.ai_model  # <3>
210 | class VowelCounter(BaseModel):  # <3>
211 |     """Count vowels in a string."""  # <3>
212 | 
213 |     include_y: bool = Field(False, description="Include 'y' as a vowel.")  # <3>
214 |     # num_a: int = Field(..., description="The number of 'a' vowels.") # <3>
215 |     # num_e: int = Field(..., description="The number of 'e' vowels.") # <3>
216 |     # num_i: int = Field(..., description="The number of 'i' vowels.") # <3>
217 |     # num_o: int = Field(..., description="The number of 'o' vowels.") # <3>
218 |     # num_u: int = Field(..., description="The number of 'u' vowels.") # <3>
219 |     # num_y: int = Field(..., description="The number of 'y' vowels.") # <3>
220 |     num_total: int = Field(..., description="The total number of vowels.")  # <3>
221 | 
222 | 
223 | VowelCounter("hello world")  # <4>
224 | ```
225 | 
226 | 1. Additional imports for Pydantic.
227 | 2. Configure Marvin to use a cheaper model.
228 | 3. A non-deterministic, LLM-powered AI model.
229 | 4. Call the AI model on some text.
230 | 
231 | Then we'll have the LLM write the UDF that calls the LLM, just to be fancy:
232 | 
233 | ```{python}
234 | udf = create_udf_from_text(
235 |     "a function named count_vowels_ai that given an input string, calls VowelCounter on it and returns the num_total attribute of that result"
236 | )
237 | print(udf)
238 | exec(udf)
239 | ```
240 | 
241 | ```{python}
242 | # | code-fold: true
243 | sleep(3) # <1>
244 | ```
245 | 
246 | 1. Avoid rate-limiting by waiting.
247 | 
248 | ```{python}
249 | t5 = t3.mutate(
250 |     species_vowel_count=count_vowels_ai(t3.species),
251 |     island_vowel_count=count_vowels_ai(t3.island),
252 | )
253 | t5
254 | ```
255 | 
256 | Notice that in this UDF, unlike in the previous example, a LLM is being called
257 | (possibly several times) for each row in the table. This is a very expensive
258 | operation and we'll need to be careful about how we use it in practice.
259 | 
260 | ```{python}
261 | # | code-fold: true
262 | sleep(3) # <1>
263 | ```
264 | 
265 | 1. Avoid rate-limiting by waiting.
266 | 
267 | ## Summary
268 | 
269 | To summarize this post:
270 | 
271 | ```{python}
272 | from rich import print
273 | 
274 | with open("index.qmd", "r") as f:
275 |     self_text = f.read()
276 | 
277 | # increase accuracy
278 | marvin.settings.llm_model = "openai/gpt-4"
279 | 
280 | @marvin.ai_model
281 | class Summary(BaseModel):
282 |     """Summary of text."""
283 | 
284 |     summary_line: str = Field(..., description="The one-line summary of the text.")
285 |     summary_paragraph: str = Field(
286 |         ..., description="The one-paragraph summary of the text."
287 |     )
288 |     conclusion: str = Field(
289 |         ..., description="The conclusion the reader should draw from the text."
290 |     )
291 |     key_points: list[str] = Field(..., description="The key points of the text.")
292 |     critiques: list[str] = Field(
293 |         ..., description="Professional, fair critiques of the text."
294 |     )
295 |     suggested_improvements: list[str] = Field(
296 |         ..., description="Suggested improvements for the text."
297 |     )
298 |     sentiment: float = Field(..., description="The sentiment of the text.")
299 |     sentiment_label: str = Field(..., description="The sentiment label of the text.")
300 |     author_bias: str = Field(..., description="The author bias of the text.")
301 | 
302 | 
303 | print(Summary(self_text))
304 | ```
305 | 
306 | ## Next steps
307 | 
308 | You can get involved with [Ibis
309 | Birdbrain](https://github.com/ibis-project/ibis-birdbrain), our open-source data
310 | & AI project for building next-generation natural language interfaces to data.
311 | 
312 | [Read the next post in this series](../llms-and-data-pt2).
313 | 


--------------------------------------------------------------------------------
/docs/_freeze/tutorials/python/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "5473aeb233c43d87b1bfa8acc656ed98",
 3 |   "result": {
 4 |     "engine": "jupyter",
 5 |     "markdown": "---\ntitle: 'Tutorial: Python'\n---\n\n\n\n## Prerequisites\n\n1. [Install Ibis Birdbrain](/install.qmd)\n\n## Overview\n\nYou can use Ibis Birdbrain in Python.\n\n## Setup the bot\n\nFirst, import relevant modules:\n\n\n::: {#935cf68c .cell execution_count=1}\n``` {.python .cell-code}\nimport ibis\n\nfrom ibis_birdbrain import Bot\n```\n:::\n\n\nSet Ibis interactive mode:\n\n::: {#9ee1c955 .cell execution_count=2}\n``` {.python .cell-code}\nibis.options.interactive = True\n```\n:::\n\n\n### Create an Ibis connection\n\nCreate an Ibis connection to your database:\n\n::: {.callout-warning}\nWe'll create a demo database for this tutorial.\n:::\n\n::: {#1b36ceee .cell execution_count=3}\n``` {.python .cell-code}\ncon = ibis.connect(\"duckdb://penguins.ddb\")\ncon.create_table(\n    \"penguins\", ibis.examples.penguins.fetch().to_pyarrow(), overwrite=True\n)\ncon = ibis.connect(\"duckdb://penguins.ddb\")\ncon.list_tables()\n```\n\n::: {.cell-output .cell-output-stderr}\n```\nINFO:pins.cache:cache file: /Users/cody/Library/Caches/pins-py/gcs_332a30997e141da0e08f15fbfae8b3c3ec90463922d117a96fa3b1bef85a2a4c/penguins/20230905T090411Z-9aae2/data.txt\nINFO:pins.cache:cache file: /Users/cody/Library/Caches/pins-py/gcs_332a30997e141da0e08f15fbfae8b3c3ec90463922d117a96fa3b1bef85a2a4c/penguins/20230905T090411Z-9aae2/penguins.csv.gz\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=3}\n```\n['penguins']\n```\n:::\n:::\n\n\n### Create the bot\n\nYou'll create the bot by passing in the connection:\n\n::: {.callout-tip}\nFor increased accuracy, you should also pass in a `data_description` containing\ninformation about the dataset. This could be fetched from the database itself,\nmanually created, or otherwise obtained.\n\nYou should not include table names and schemas -- this will be inferred\nautomatically.\n:::\n\n::: {#f4eab662 .cell execution_count=4}\n``` {.python .cell-code}\nbot = Bot(con=con, data_description=\"the Palmer Penguins dataset\")\nbot\n```\n\n::: {.cell-output .cell-output-stderr}\n```\nINFO:root:Bot birdbrain initialized...\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=4}\n```\n<ibis_birdbrain.bot.Bot at 0x292262c10>\n```\n:::\n:::\n\n\n### Test the bot\n\nYou can ask the bot questions:\n\n::: {#fe023103 .cell execution_count=5}\n``` {.python .cell-code}\nres = bot(\"\"\"give me the counts of penguins by species and island from highest\nto lowest\"\"\")\nres\n```\n\n::: {.cell-output .cell-output-stderr}\n```\nINFO:root:Bot birdbrain called with text: give me the counts of penguins by species and island from highest\nto lowest\nINFO:root:Selected flow: data\nINFO:root:Executing the data flow\nINFO:root:Text to SQL task\nINFO:httpx:HTTP Request: POST https://birdbrain-eh.openai.azure.com/openai/deployments/gpt-4-turbo/chat/completions?api-version=2023-12-01-preview \"HTTP/1.1 200 OK\"\nINFO:root:Executing the SQL task\n```\n:::\n\n::: {.cell-output .cell-output-display}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n```\n:::\n\n::: {.cell-output .cell-output-display}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n```\n:::\n\n::: {.cell-output .cell-output-display execution_count=5}\n\n::: {.ansi-escaped-output}\n```{=html}\n<pre>To: user\nFrom: birdbrain\nSubject: give me the counts of pengu...\nSent at: 2024-03-05 12:18:51.951695\nMessage: 67c02541-9406-4de5-b99e-21cc5dc8d3c5\nIbis Birdbrain has attached the results.\nAttachments:\nCodeAttachment\n    **guid**: e2152990-296e-4b92-a159-05af61b5334a\n    **time**: 2024-03-05 12:18:51.949484\n    **name**: None\n    **desc**: None\n    **language**: sql\n    **code**:\nSELECT species, island, COUNT(*) AS count\nFROM penguins\nGROUP BY species, island\nORDER BY COUNT(*) DESC\nTableAttachment\n    **guid**: e6acb87c-b4ca-4fa7-8292-638d3fc0b6e8\n    **time**: 2024-03-05 12:18:51.951600\n    **name**: None\n    **desc**: \nibis.Schema {\n  species  string\n  island   string\n  count    int64\n}\n                **table**:\n┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┓\n┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">species</span><span class=\"ansi-bold\">  </span><span class=\"ansi-bold\"> </span>┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">island</span><span class=\"ansi-bold\">   </span><span class=\"ansi-bold\"> </span>┃<span class=\"ansi-bold\"> </span><span class=\"ansi-bold\">count</span><span class=\"ansi-bold\"> </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━┩\n│ string    │ string    │ int64 │\n├───────────┼───────────┼───────┤\n│ <span class=\"ansi-green-fg\">Gentoo   </span> │ <span class=\"ansi-green-fg\">Biscoe   </span> │   <span class=\"ansi-cyan-fg ansi-bold\">124</span> │\n│ <span class=\"ansi-green-fg\">Chinstrap</span> │ <span class=\"ansi-green-fg\">Dream    </span> │    <span class=\"ansi-cyan-fg ansi-bold\">68</span> │\n│ <span class=\"ansi-green-fg\">Adelie   </span> │ <span class=\"ansi-green-fg\">Dream    </span> │    <span class=\"ansi-cyan-fg ansi-bold\">56</span> │\n│ <span class=\"ansi-green-fg\">Adelie   </span> │ <span class=\"ansi-green-fg\">Torgersen</span> │    <span class=\"ansi-cyan-fg ansi-bold\">52</span> │\n│ <span class=\"ansi-green-fg\">Adelie   </span> │ <span class=\"ansi-green-fg\">Biscoe   </span> │    <span class=\"ansi-cyan-fg ansi-bold\">44</span> │\n└───────────┴───────────┴───────┘</pre>\n```\n:::\n\n:::\n:::\n\n\n### Get attachments\n\nYou can get the table from the attachment:\n\n::: {#822b287e .cell execution_count=6}\n``` {.python .cell-code}\nt = res.attachments[-1].open()\nt\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┓\n┃<span style=\"font-weight: bold\"> species   </span>┃<span style=\"font-weight: bold\"> island    </span>┃<span style=\"font-weight: bold\"> count </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │\n├───────────┼───────────┼───────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Gentoo   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Biscoe   </span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">124</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Chinstrap</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Dream    </span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">68</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Adelie   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Dream    </span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">56</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Adelie   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Torgersen</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">52</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Adelie   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Biscoe   </span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">44</span> │\n└───────────┴───────────┴───────┘\n</pre>\n```\n:::\n:::\n\n\nAnd do whatever you want with it:\n\n::: {#f616ff8d .cell execution_count=7}\n``` {.python .cell-code}\nt.order_by(ibis._[\"count\"].asc())\n```\n\n::: {.cell-output .cell-output-display execution_count=7}\n```{=html}\n<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┓\n┃<span style=\"font-weight: bold\"> species   </span>┃<span style=\"font-weight: bold\"> island    </span>┃<span style=\"font-weight: bold\"> count </span>┃\n┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━┩\n│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span>    │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │\n├───────────┼───────────┼───────┤\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Adelie   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Biscoe   </span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">44</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Adelie   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Torgersen</span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">52</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Adelie   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Dream    </span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">56</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Chinstrap</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Dream    </span> │    <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">68</span> │\n│ <span style=\"color: #008000; text-decoration-color: #008000\">Gentoo   </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Biscoe   </span> │   <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">124</span> │\n└───────────┴───────────┴───────┘\n</pre>\n```\n:::\n:::\n\n\n## Next steps\n\nExplore some data with Ibis Birdbrain and [let us know how it\ngoes!](https://github.com/ibis-project/ibis-birdbrain/issues/new)\n\n",
 6 |     "supporting": [
 7 |       "python_files/figure-html"
 8 |     ],
 9 |     "filters": [],
10 |     "includes": {
11 |       "include-in-header": [
12 |         "<script src=\"https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js\" integrity=\"sha512-c3Nl8+7g4LMSTdrm621y7kf9v3SDPnhxLNhcjFJbKECVnmZHTdo+IRO05sNLTH/D3vA6u1X32ehoLC7WFVdheg==\" crossorigin=\"anonymous\"></script>\n<script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.5.1/jquery.min.js\" integrity=\"sha512-bLT0Qm9VnAYZDflyKcBaQ2gg0hSYNQrJ8RilYldYQ1FxQYoCLtUjuuRuZo+fjqhx/qtq/1itJ0C2ejDxltZVFg==\" crossorigin=\"anonymous\" data-relocate-top=\"true\"></script>\n<script type=\"application/javascript\">define('jquery', [],function() {return window.jQuery;})</script>\n"
13 |       ]
14 |     }
15 |   }
16 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright Ibis developers
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------