├── pkg-py ├── tests │ ├── __init__.py │ ├── test_querychat.py │ ├── test_tools.py │ ├── test_init_with_pandas.py │ ├── test_data.py │ └── test_df_to_html.py ├── docs │ ├── _extensions │ │ └── machow │ │ │ └── interlinks │ │ │ ├── .gitignore │ │ │ ├── _extension.yml │ │ │ └── test.qmd │ ├── images │ │ ├── sidebot.png │ │ ├── querychat.png │ │ ├── quickstart.png │ │ ├── plotly-data-view.png │ │ ├── rich-data-views.png │ │ ├── multiple-datasets.png │ │ ├── quickstart-filter.png │ │ └── quickstart-summary.png │ ├── .gitignore │ ├── styles.scss │ ├── greet.qmd │ ├── _quarto.yml │ ├── context.qmd │ ├── models.qmd │ ├── tools.qmd │ └── data-sources.qmd ├── src │ └── querychat │ │ ├── data │ │ ├── tips.csv.gz │ │ ├── titanic.csv.gz │ │ └── __init__.py │ │ ├── express │ │ └── __init__.py │ │ ├── types │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── static │ │ ├── css │ │ │ └── styles.css │ │ └── js │ │ │ └── querychat.js │ │ ├── prompts │ │ ├── tool-reset-dashboard.md │ │ ├── tool-update-dashboard.md │ │ ├── tool-query.md │ │ └── prompt.md │ │ ├── _icons.py │ │ ├── _deprecated.py │ │ ├── _utils.py │ │ └── _querychat_module.py ├── examples │ ├── 01-hello-app.py │ ├── 02-prompt-app.py │ ├── 03-sidebar-express-app.py │ ├── greeting.md │ ├── 03-sidebar-core-app.py │ └── data_description.md ├── README.md ├── LICENSE └── CHANGELOG.md ├── pkg-r ├── .covrignore ├── LICENSE ├── .Rbuildignore ├── man │ ├── figures │ │ ├── logo.png │ │ ├── lifecycle-deprecated.svg │ │ ├── lifecycle-superseded.svg │ │ ├── lifecycle-experimental.svg │ │ └── lifecycle-stable.svg │ ├── is_data_source.Rd │ ├── deprecated.Rd │ ├── querychat-package.Rd │ ├── querychat-convenience.Rd │ ├── DataSource.Rd │ ├── DataFrameSource.Rd │ └── DBISource.Rd ├── pkgdown │ ├── favicon │ │ ├── favicon.ico │ │ ├── favicon-96x96.png │ │ ├── apple-touch-icon.png │ │ ├── web-app-manifest-192x192.png │ │ ├── web-app-manifest-512x512.png │ │ └── site.webmanifest │ ├── _brand.yml │ └── _pkgdown.yml ├── tests │ ├── testthat │ │ ├── test-deprecated.R │ │ ├── test-querychat_module.R │ │ ├── apps │ │ │ └── basic │ │ │ │ └── app.R │ │ ├── _snaps │ │ │ ├── QueryChat.md │ │ │ └── DataSource.md │ │ └── helper-fixtures.R │ └── testthat.R ├── inst │ ├── examples-shiny │ │ ├── 01-hello-app │ │ │ └── app.R │ │ ├── 02-sidebar-app │ │ │ └── app.R │ │ └── sqlite │ │ │ ├── README.md │ │ │ └── app.R │ ├── prompts │ │ ├── tool-reset-dashboard.md │ │ ├── tool-update-dashboard.md │ │ ├── tool-query.md │ │ └── prompt.md │ └── htmldep │ │ ├── styles.css │ │ └── querychat.js ├── NAMESPACE ├── R │ ├── staticimports.R │ ├── utils-check.R │ ├── utils-ellmer.R │ ├── querychat-package.R │ ├── deprecated.R │ └── querychat_module.R ├── LICENSE.md ├── DESCRIPTION └── NEWS.md ├── animation.gif ├── docs ├── animation.gif ├── logo-r.svg └── logo-python.svg ├── .vscode └── settings.json ├── .github └── workflows │ ├── R-CMD-check.yml │ ├── py-release.yml │ ├── py-test.yml │ ├── docs-r-pkgdown.yml │ └── docs-py-quartodoc.yml ├── LICENSE.md ├── README.md ├── .claude └── R-TESTING.md ├── CLAUDE.md └── .gitignore /pkg-py/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pkg-r/.covrignore: -------------------------------------------------------------------------------- 1 | R/querychat-package.R 2 | -------------------------------------------------------------------------------- /pkg-r/LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2025 2 | COPYRIGHT HOLDER: querychat authors 3 | -------------------------------------------------------------------------------- /pkg-r/.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^pkgdown$ 2 | ^LICENSE\.md$ 3 | ^\.covrignore$ 4 | -------------------------------------------------------------------------------- /animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/animation.gif -------------------------------------------------------------------------------- /pkg-py/docs/_extensions/machow/interlinks/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.pdf 3 | *_files/ 4 | -------------------------------------------------------------------------------- /docs/animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/docs/animation.gif -------------------------------------------------------------------------------- /pkg-r/man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-r/man/figures/logo.png -------------------------------------------------------------------------------- /pkg-py/docs/images/sidebot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-py/docs/images/sidebot.png -------------------------------------------------------------------------------- /pkg-py/docs/images/querychat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-py/docs/images/querychat.png -------------------------------------------------------------------------------- /pkg-py/docs/images/quickstart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-py/docs/images/quickstart.png -------------------------------------------------------------------------------- /pkg-r/pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-r/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /pkg-py/docs/images/plotly-data-view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-py/docs/images/plotly-data-view.png -------------------------------------------------------------------------------- /pkg-py/docs/images/rich-data-views.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-py/docs/images/rich-data-views.png -------------------------------------------------------------------------------- /pkg-py/src/querychat/data/tips.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-py/src/querychat/data/tips.csv.gz -------------------------------------------------------------------------------- /pkg-py/src/querychat/express/__init__.py: -------------------------------------------------------------------------------- 1 | from .._querychat import QueryChatExpress as QueryChat 2 | 3 | __all__ = ["QueryChat"] 4 | -------------------------------------------------------------------------------- /pkg-r/pkgdown/favicon/favicon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-r/pkgdown/favicon/favicon-96x96.png -------------------------------------------------------------------------------- /pkg-py/docs/images/multiple-datasets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-py/docs/images/multiple-datasets.png -------------------------------------------------------------------------------- /pkg-py/docs/images/quickstart-filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-py/docs/images/quickstart-filter.png -------------------------------------------------------------------------------- /pkg-py/docs/images/quickstart-summary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-py/docs/images/quickstart-summary.png -------------------------------------------------------------------------------- /pkg-py/src/querychat/data/titanic.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-py/src/querychat/data/titanic.csv.gz -------------------------------------------------------------------------------- /pkg-r/pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-r/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkg-r/pkgdown/favicon/web-app-manifest-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-r/pkgdown/favicon/web-app-manifest-192x192.png -------------------------------------------------------------------------------- /pkg-r/pkgdown/favicon/web-app-manifest-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/posit-dev/querychat/HEAD/pkg-r/pkgdown/favicon/web-app-manifest-512x512.png -------------------------------------------------------------------------------- /pkg-py/examples/01-hello-app.py: -------------------------------------------------------------------------------- 1 | from querychat import QueryChat 2 | from querychat.data import titanic 3 | 4 | qc = QueryChat(titanic(), "titanic") 5 | app = qc.app() 6 | -------------------------------------------------------------------------------- /pkg-py/docs/_extensions/machow/interlinks/_extension.yml: -------------------------------------------------------------------------------- 1 | title: Interlinks 2 | author: Michael Chow 3 | version: 1.1.0 4 | quarto-required: ">=1.2.0" 5 | contributes: 6 | filters: 7 | - interlinks.lua 8 | -------------------------------------------------------------------------------- /pkg-py/docs/.gitignore: -------------------------------------------------------------------------------- 1 | /.quarto/ 2 | /_site 3 | /_inv 4 | *.quarto_ipynb 5 | objects.txt 6 | objects.json 7 | CHANGELOG.md 8 | 9 | # Ignore quartodoc artifacts, these are built in CI 10 | _sidebar-python.yml 11 | api/ 12 | reference/ 13 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/types/__init__.py: -------------------------------------------------------------------------------- 1 | from .._datasource import DataFrameSource, DataSource, SQLAlchemySource # noqa: A005 2 | from .._querychat_module import ServerValues 3 | 4 | __all__ = ( 5 | "DataFrameSource", 6 | "DataSource", 7 | "SQLAlchemySource", 8 | "ServerValues", 9 | ) 10 | -------------------------------------------------------------------------------- /pkg-py/examples/02-prompt-app.py: -------------------------------------------------------------------------------- 1 | 2 | from pathlib import Path 3 | from querychat import QueryChat 4 | from querychat.data import titanic 5 | 6 | greeting = Path(__file__).parent / "greeting.md" 7 | data_desc = Path(__file__).parent / "data_description.md" 8 | 9 | qc = QueryChat( 10 | titanic(), 11 | "titanic", 12 | greeting=greeting, 13 | data_description=data_desc, 14 | ) 15 | 16 | qc.app() 17 | -------------------------------------------------------------------------------- /pkg-r/tests/testthat/test-deprecated.R: -------------------------------------------------------------------------------- 1 | test_that("deprecated functions throw deprecation warnings", { 2 | lifecycle::expect_defunct(querychat_init()) 3 | lifecycle::expect_defunct(querychat_sidebar()) 4 | lifecycle::expect_defunct(querychat_ui()) 5 | lifecycle::expect_defunct(querychat_server()) 6 | lifecycle::expect_defunct(querychat_greeting()) 7 | lifecycle::expect_defunct(querychat_data_source()) 8 | }) 9 | -------------------------------------------------------------------------------- /pkg-r/inst/examples-shiny/01-hello-app/app.R: -------------------------------------------------------------------------------- 1 | library(querychat) 2 | library(palmerpenguins) 3 | 4 | # Create a QueryChat object and generate a complete app with $app() 5 | qc <- QueryChat$new(penguins) 6 | qc$app() 7 | 8 | # That's it! The app includes: 9 | # - A sidebar with the chat interface 10 | # - SQL query display with syntax highlighting 11 | # - Data table showing filtered results 12 | # - Reset button to clear queries 13 | -------------------------------------------------------------------------------- /pkg-py/docs/_extensions/machow/interlinks/test.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | filters: 3 | - interlinks.lua 4 | interlinks: 5 | autolink: true 6 | aliases: 7 | quartodoc: null 8 | #sources: 9 | # test: 10 | # url: https://example.com 11 | --- 12 | 13 | * `some_func` 14 | * `some_func()` 15 | * `some_func(a=1)` 16 | * `some_func()`{.qd-no-link} 17 | * `some_func + some_func` 18 | * `a.b.c` 19 | * `~a.b.c` 20 | * `a.b.c()` 21 | * `quartodoc.Auto()` 22 | * `Auto()` -------------------------------------------------------------------------------- /pkg-r/man/is_data_source.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DataSource.R 3 | \name{is_data_source} 4 | \alias{is_data_source} 5 | \title{Check if object is a DataSource} 6 | \usage{ 7 | is_data_source(x) 8 | } 9 | \arguments{ 10 | \item{x}{Object to check} 11 | } 12 | \value{ 13 | TRUE if x is a DataSource, FALSE otherwise 14 | } 15 | \description{ 16 | Check if object is a DataSource 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /pkg-r/tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview 7 | # * https://testthat.r-lib.org/articles/special-files.html 8 | 9 | library(testthat) 10 | library(querychat) 11 | 12 | test_check("querychat") 13 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/__init__.py: -------------------------------------------------------------------------------- 1 | from ._deprecated import greeting, init, sidebar, system_prompt 2 | from ._deprecated import mod_server as server 3 | from ._deprecated import mod_ui as ui 4 | from ._querychat import QueryChat 5 | 6 | __all__ = ( 7 | "QueryChat", 8 | # TODO(lifecycle): Remove these deprecated functions when we reach v1.0 9 | "greeting", 10 | "init", 11 | "server", 12 | "sidebar", 13 | "system_prompt", 14 | "ui", 15 | ) 16 | -------------------------------------------------------------------------------- /pkg-r/inst/prompts/tool-reset-dashboard.md: -------------------------------------------------------------------------------- 1 | Reset the dashboard to its original state 2 | 3 | Resets the dashboard to use the original unfiltered dataset and clears any custom title. 4 | 5 | If the user asks to reset the dashboard, simply call this tool with no other response. The reset action will be obvious to the user. 6 | 7 | If the user asks to start over, call this tool and then provide a new set of suggestions for next steps. Include suggestions that encourage exploration of the data in new directions. 8 | -------------------------------------------------------------------------------- /pkg-r/pkgdown/favicon/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "short_name": "", 4 | "icons": [ 5 | { 6 | "src": "/web-app-manifest-192x192.png", 7 | "sizes": "192x192", 8 | "type": "image/png", 9 | "purpose": "maskable" 10 | }, 11 | { 12 | "src": "/web-app-manifest-512x512.png", 13 | "sizes": "512x512", 14 | "type": "image/png", 15 | "purpose": "maskable" 16 | } 17 | ], 18 | "theme_color": "#ffffff", 19 | "background_color": "#ffffff", 20 | "display": "standalone" 21 | } -------------------------------------------------------------------------------- /pkg-r/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(DBISource) 4 | export(DataFrameSource) 5 | export(DataSource) 6 | export(QueryChat) 7 | export(querychat) 8 | export(querychat_app) 9 | export(querychat_data_source) 10 | export(querychat_greeting) 11 | export(querychat_init) 12 | export(querychat_server) 13 | export(querychat_sidebar) 14 | export(querychat_ui) 15 | if (getRversion() < "4.3.0") importFrom("S7", "@") 16 | import(rlang) 17 | importFrom(R6,R6Class) 18 | importFrom(bslib,sidebar) 19 | importFrom(lifecycle,deprecated) 20 | -------------------------------------------------------------------------------- /pkg-r/inst/htmldep/styles.css: -------------------------------------------------------------------------------- 1 | .querychat shiny-chat-message table td, 2 | .querychat shiny-chat-message table th { 3 | border: var(--bs-border-width) var(--bs-border-style) var(--bs-border-color); 4 | padding: 3px; 5 | } 6 | 7 | .querychat shiny-chat-message table td { 8 | font-family: var(--bs-font-monospace); 9 | } 10 | 11 | /* querychat takes up the full sidebar, so move the collapse toggle out of the way */ 12 | .bslib-sidebar-layout:has(.querychat-sidebar):not(.sidebar-collapsed)>.collapse-toggle { 13 | right: 4px; 14 | top: 4px; 15 | } 16 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/static/css/styles.css: -------------------------------------------------------------------------------- 1 | .querychat shiny-chat-message table td, 2 | .querychat shiny-chat-message table th { 3 | border: var(--bs-border-width) var(--bs-border-style) var(--bs-border-color); 4 | padding: 3px; 5 | } 6 | 7 | .querychat shiny-chat-message table td { 8 | font-family: var(--bs-font-monospace); 9 | } 10 | 11 | /* querychat takes up the full sidebar, so move the collapse toggle out of the way */ 12 | .bslib-sidebar-layout:has(.querychat-sidebar):not(.sidebar-collapsed)>.collapse-toggle { 13 | right: 4px; 14 | top: 4px; 15 | } 16 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.autoComplete.extraPaths": ["${workspaceFolder}/pkg-py"], 3 | "[python]": { 4 | "editor.formatOnSave": true, 5 | "editor.codeActionsOnSave": { 6 | "source.fixAll": "explicit", 7 | "source.organizeImports": "explicit" 8 | }, 9 | "editor.defaultFormatter": "charliermarsh.ruff", 10 | }, 11 | "flake8.args": [ 12 | "--max-line-length=120" 13 | ], 14 | "python.testing.pytestArgs": [ 15 | "pkg-py" 16 | ], 17 | "python.testing.unittestEnabled": false, 18 | "python.testing.pytestEnabled": true 19 | } 20 | -------------------------------------------------------------------------------- /pkg-py/README.md: -------------------------------------------------------------------------------- 1 | # querychat for Python 2 | 3 | Please see [the package documentation site](https://posit-dev.github.io/querychat/py/index.html) for installation, setup, and usage. 4 | 5 | If you are looking for querychat python examples, 6 | you can find them in the `examples/` directory. 7 | 8 | ## Installation 9 | 10 | You can install the package from PyPI using pip: 11 | 12 | ```bash 13 | pip install querychat 14 | ``` 15 | 16 | Or you can install querychat directly from GitHub: 17 | 18 | ```bash 19 | pip install "querychat @ git+https://github.com/posit-dev/querychat" 20 | ``` 21 | -------------------------------------------------------------------------------- /pkg-r/R/staticimports.R: -------------------------------------------------------------------------------- 1 | # Generated by staticimports; do not edit by hand. 2 | # ====================================================================== 3 | # Imported from pkg:staticimports 4 | # ====================================================================== 5 | 6 | raw_to_utf8 <- function(data) { 7 | res <- rawToChar(data) 8 | Encoding(res) <- "UTF-8" 9 | res 10 | } 11 | 12 | read_raw <- function(file) { 13 | readBin(file, "raw", n = file.info(file, extra_cols = FALSE)$size) 14 | } 15 | 16 | # Read file as UTF-8 17 | read_utf8 <- function(file) { 18 | res <- read_raw(file) 19 | raw_to_utf8(res) 20 | } 21 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/prompts/tool-reset-dashboard.md: -------------------------------------------------------------------------------- 1 | Reset the dashboard to its original state 2 | 3 | Resets the dashboard to use the original unfiltered dataset and clears any custom title. 4 | 5 | If the user asks to reset the dashboard, simply call this tool with no other response. The reset action will be obvious to the user. 6 | 7 | If the user asks to start over, call this tool and then provide a new set of suggestions for next steps. Include suggestions that encourage exploration of the data in new directions. 8 | 9 | Returns 10 | ------- 11 | : 12 | Confirmation that the dashboard has been reset to show all data. 13 | -------------------------------------------------------------------------------- /pkg-r/inst/htmldep/querychat.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | if (!window.Shiny) return; 3 | 4 | window.addEventListener("click", function (event) { 5 | if (event.target.tagName.toLowerCase() !== "button") return; 6 | if (!event.target.matches(".querychat-update-dashboard-btn")) return; 7 | 8 | const chatContainer = event.target.closest("shiny-chat-container"); 9 | if (!chatContainer) return; 10 | 11 | const chatId = chatContainer.id; 12 | const { query, title } = event.target.dataset; 13 | 14 | window.Shiny.setInputValue( 15 | chatId + "_update", 16 | { query, title }, 17 | { priority: "event" } 18 | ); 19 | }); 20 | })(); 21 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/static/js/querychat.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | if (!window.Shiny) return; 3 | 4 | window.addEventListener("click", function (event) { 5 | if (event.target.tagName.toLowerCase() !== "button") return; 6 | if (!event.target.matches(".querychat-update-dashboard-btn")) return; 7 | 8 | const chatContainer = event.target.closest("shiny-chat-container"); 9 | if (!chatContainer) return; 10 | 11 | const chatId = chatContainer.id; 12 | const { query, title } = event.target.dataset; 13 | 14 | window.Shiny.setInputValue( 15 | chatId + "_update", 16 | { query, title }, 17 | { priority: "event" } 18 | ); 19 | }); 20 | })(); -------------------------------------------------------------------------------- /pkg-r/tests/testthat/test-querychat_module.R: -------------------------------------------------------------------------------- 1 | test_that("Shiny app example loads without errors", { 2 | skip_if_not_installed("DT") 3 | skip_if_not_installed("RSQLite") 4 | skip_if_not_installed("shinytest2") 5 | 6 | # Create a simplified test app with mocked ellmer 7 | test_app_dir <- withr::local_tempdir() 8 | test_app_file <- file.path(test_app_dir, "app.R") 9 | dir.create(dirname(test_app_file), showWarnings = FALSE) 10 | 11 | file.copy(test_path("apps/basic/app.R"), test_app_file) 12 | 13 | # Test that the app can be loaded without immediate errors 14 | expect_no_error({ 15 | # Try to parse and evaluate the app code 16 | source(test_app_file, local = TRUE) 17 | }) 18 | }) 19 | -------------------------------------------------------------------------------- /pkg-py/examples/03-sidebar-express-app.py: -------------------------------------------------------------------------------- 1 | from shiny.express import render, ui 2 | from querychat.express import QueryChat 3 | from querychat.data import titanic 4 | 5 | # 1. Provide data source to QueryChat 6 | qc = QueryChat(titanic(), "titanic") 7 | 8 | # 2. Add sidebar chat control 9 | qc.sidebar() 10 | 11 | # 3. Add a card with reactive title and data frame 12 | with ui.card(): 13 | with ui.card_header(): 14 | @render.text 15 | def title(): 16 | return qc.title() or "Titanic Dataset" 17 | 18 | @render.data_frame 19 | def data_table(): 20 | return qc.df() 21 | 22 | # 4. Set some page options (optional) 23 | ui.page_opts( 24 | fillable=True, 25 | title="Titanic Dataset Explorer" 26 | ) 27 | -------------------------------------------------------------------------------- /pkg-py/examples/greeting.md: -------------------------------------------------------------------------------- 1 | Hello! Welcome to your Titanic data dashboard. I'm here to help you filter, sort, and analyze the data. Here are a few ideas to get you started: 2 | 3 | * Explore the data 4 | * Show me all passengers who survived 5 | * Show only first class passengers 6 | * Analyze statistics 7 | * What is the average age of passengers? 8 | * How many children were on board? 9 | * Compare and dig deeper 10 | * Which class had the highest survival rate? 11 | * Show the fare distribution by embarkation town 12 | 13 | Let me know what you'd like to explore! 14 | -------------------------------------------------------------------------------- /pkg-r/man/deprecated.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deprecated.R 3 | \name{deprecated} 4 | \alias{deprecated} 5 | \alias{querychat_init} 6 | \alias{querychat_sidebar} 7 | \alias{querychat_ui} 8 | \alias{querychat_server} 9 | \alias{querychat_greeting} 10 | \alias{querychat_data_source} 11 | \title{Deprecated functions} 12 | \usage{ 13 | querychat_init(...) 14 | 15 | querychat_sidebar(...) 16 | 17 | querychat_ui(...) 18 | 19 | querychat_server(...) 20 | 21 | querychat_greeting(...) 22 | 23 | querychat_data_source(...) 24 | } 25 | \description{ 26 | These functions have been replaced by the new \code{QueryChat} R6 class API. 27 | Please update your code to use the new class-based approach. 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /pkg-py/examples/03-sidebar-core-app.py: -------------------------------------------------------------------------------- 1 | from shiny import App, render, ui 2 | from querychat import QueryChat 3 | from querychat.data import titanic 4 | 5 | # 1. Provide data source to QueryChat 6 | qc = QueryChat(titanic(), "titanic") 7 | 8 | app_ui = ui.page_sidebar( 9 | # 2. Create sidebar chat control 10 | qc.sidebar(), 11 | ui.card( 12 | ui.card_header(ui.output_text("title")), 13 | ui.output_data_frame("data_table"), 14 | fill=True, 15 | ), 16 | fillable=True 17 | ) 18 | 19 | 20 | def server(input, output, session): 21 | # 3. Add server logic (to get reactive data frame and title) 22 | qc_vals = qc.server() 23 | 24 | # 4. Use the filtered/sorted data frame reactively 25 | @render.data_frame 26 | def data_table(): 27 | return qc_vals.df() 28 | 29 | @render.text 30 | def title(): 31 | return qc_vals.title() or "Titanic Dataset" 32 | 33 | 34 | app = App(app_ui, server) 35 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/rstudio/shiny-workflows 2 | # 3 | # NOTE: This Shiny team GHA workflow is overkill for most R packages. 4 | # For most R packages it is better to use https://github.com/r-lib/actions 5 | on: 6 | push: 7 | branches: [main, rc-**] 8 | paths: 9 | - 'pkg-r/**' 10 | - '.github/workflows/R-CMD-check.yml' 11 | pull_request: 12 | paths: 13 | - 'pkg-r/**' 14 | - '.github/workflows/R-CMD-check.yml' 15 | schedule: 16 | - cron: "0 8 * * 1" # every monday 17 | 18 | name: Package checks 19 | 20 | jobs: 21 | # website: 22 | # uses: rstudio/shiny-workflows/.github/workflows/website.yaml@v1 23 | # with: 24 | # working-directory: ./pkg-r 25 | routine: 26 | uses: rstudio/shiny-workflows/.github/workflows/routine.yaml@v1 27 | with: 28 | format-r-code: true 29 | working-directory: ./pkg-r 30 | R-CMD-check: 31 | uses: rstudio/shiny-workflows/.github/workflows/R-CMD-check.yaml@v1 32 | with: 33 | working-directory: ./pkg-r 34 | -------------------------------------------------------------------------------- /pkg-r/pkgdown/_brand.yml: -------------------------------------------------------------------------------- 1 | color: 2 | palette: 3 | blue: "#007bc2" 4 | navy: "#193D56" 5 | indigo: "#4b00c1" 6 | purple: "#74149c" 7 | pink: "#bf007f" 8 | red: "#c10000" 9 | orange: "#f45100" 10 | yellow: "#f9b928" 11 | green: "#00891a" 12 | teal: "#00bf7f" 13 | cyan: "#03c7e8" 14 | white: "#ffffff" 15 | black: "#1D1F21" 16 | 17 | foreground: black 18 | background: white 19 | primary: blue 20 | secondary: gray 21 | success: green 22 | info: cyan 23 | warning: yellow 24 | danger: red 25 | light: "#f8f8f8" 26 | dark: "#212529" 27 | 28 | typography: 29 | fonts: 30 | - family: Open Sans 31 | source: bunny 32 | - family: Hubot Sans 33 | source: bunny 34 | - family: Source Code Pro 35 | source: bunny 36 | 37 | headings: 38 | family: Hubot Sans 39 | color: navy 40 | weight: 400 41 | monospace: Source Code Pro 42 | monospace-inline: 43 | color: pink 44 | background-color: transparent 45 | size: 0.95em 46 | 47 | defaults: 48 | bootstrap: 49 | defaults: 50 | navbar-bg: $brand-navy 51 | code-color-dark: "#fa88d4" 52 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2025 Posit Software, PBC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pkg-py/LICENSE: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2025 Posit Software, PBC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pkg-r/LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2025 querychat authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pkg-r/pkgdown/_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://posit-dev.github.io/querychat 2 | 3 | destination: "../docs/r" 4 | 5 | # development: 6 | # mode: auto 7 | 8 | authors: 9 | Posit Software, PBC: 10 | href: https://www.posit.co 11 | html: >- 12 | Posit 13 | Joe Cheng: 14 | href: https://github.com/jcheng5 15 | Carson Sievert: 16 | href: https://cpsievert.me 17 | Garrick Aden-Buie: 18 | href: https://garrickadenbuie.com 19 | Barret Schloerke: 20 | href: https://schloerke.com/ 21 | Daniel Chen: 22 | href: https://chendaniely.github.io 23 | 24 | template: 25 | bootstrap: 5 26 | light-switch: true 27 | theme: github-light 28 | theme-dark: github-dark 29 | bslib: 30 | brand: pkgdown/_brand.yml 31 | 32 | navbar: 33 | structure: 34 | left: [get-started, articles, reference, news] 35 | right: [search, github, lightswitch] 36 | components: 37 | home: ~ 38 | 39 | reference: 40 | - title: Chat interfaces 41 | contents: 42 | - querychat 43 | - QueryChat 44 | 45 | - title: Data Sources 46 | contents: 47 | - DataSource 48 | - DataFrameSource 49 | - DBISource 50 | -------------------------------------------------------------------------------- /pkg-r/inst/prompts/tool-update-dashboard.md: -------------------------------------------------------------------------------- 1 | Filter and sort the dashboard data 2 | 3 | This tool executes a {{db_type}} SQL SELECT `query` to filter or sort the data used in the dashboard. 4 | 5 | **Returns:** A confirmation that the dashboard was updated successfully, or the error that occurred when running the SQL query. The results of the query will update the data shown in the dashboard. 6 | 7 | **When to use:** Call this tool whenever the user requests filtering, sorting, or data manipulation on the dashboard with questions like "Show me..." or "Which records have...". This tool is appropriate for any request that involves showing a subset of the data or reordering it. 8 | 9 | **When not to use:** Do NOT use this tool for general questions about the data that can be answered with a single value or summary statistic. For those questions, use the `querychat_query` tool instead. 10 | 11 | **Important constraints:** 12 | 13 | - All original schema columns must be present in the SELECT output 14 | - Use a single SQL query. You can use CTEs but you cannot chain multiple queries 15 | - For statistical filters (stddev, percentiles), use CTEs to calculate thresholds within the query 16 | - Assume the user will only see the original columns in the dataset 17 | 18 | -------------------------------------------------------------------------------- /pkg-r/man/figures/lifecycle-deprecated.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: deprecated 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | deprecated 20 | 21 | 22 | -------------------------------------------------------------------------------- /pkg-r/man/figures/lifecycle-superseded.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: superseded 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | superseded 20 | 21 | 22 | -------------------------------------------------------------------------------- /pkg-py/examples/data_description.md: -------------------------------------------------------------------------------- 1 | # Data Dictionary 2 | 3 | - **survival**: Survival status 4 | - 0 = No 5 | - 1 = Yes 6 | 7 | - **pclass**: Ticket class 8 | - 1 = 1st class 9 | - 2 = 2nd class 10 | - 3 = 3rd class 11 | 12 | - **sex**: Sex of the passenger 13 | 14 | - **age**: Age in years 15 | 16 | - **sibsp**: Number of siblings or spouses aboard the Titanic 17 | 18 | - **parch**: Number of parents or children aboard the Titanic 19 | 20 | - **ticket**: Ticket number 21 | 22 | - **fare**: Passenger fare 23 | 24 | - **cabin**: Cabin number 25 | 26 | - **embarked**: Port of embarkation 27 | - C = Cherbourg 28 | - Q = Queenstown 29 | - S = Southampton 30 | 31 | ## Variable Notes 32 | 33 | - **pclass** is a proxy for socio-economic status (SES): 34 | - 1st = Upper class 35 | - 2nd = Middle class 36 | - 3rd = Lower class 37 | 38 | - **age**: 39 | - If less than 1 year old, age is fractional. 40 | - Estimated ages are represented as `xx.5`. 41 | 42 | - **sibsp**: Family relations are defined as: 43 | - Sibling = brother, sister, stepbrother, stepsister 44 | - Spouse = husband, wife (mistresses and fiancés were ignored) 45 | 46 | - **parch**: Family relations are defined as: 47 | - Parent = mother, father 48 | - Child = daughter, son, stepdaughter, stepson 49 | - Some children traveled only with a nanny, so `parch = 0` for them. -------------------------------------------------------------------------------- /pkg-r/man/figures/lifecycle-experimental.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: experimental 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | experimental 20 | 21 | 22 | -------------------------------------------------------------------------------- /.github/workflows/py-release.yml: -------------------------------------------------------------------------------- 1 | name: Python - Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | env: 8 | PYTHON_VERSION: 3.12 9 | 10 | jobs: 11 | pypi-release: 12 | name: Build and release Python package 13 | runs-on: ubuntu-latest 14 | 15 | if: startsWith(github.ref, 'refs/tags/py/v') 16 | 17 | environment: 18 | name: pypi 19 | url: https://pypi.org/project/querychat/ 20 | 21 | permissions: # for trusted publishing 22 | id-token: write 23 | 24 | steps: 25 | - uses: actions/checkout@v4 26 | 27 | - name: 🚀 Install uv 28 | uses: astral-sh/setup-uv@v3 29 | 30 | - name: 🐍 Set up Python ${{ env.PYTHON_VERSION }} 31 | run: uv python install ${{ env.PYTHON_VERSION }} 32 | 33 | - name: 📦 Install the project 34 | run: uv sync --python ${{ env.PYTHON_VERSION }} --all-extras --all-groups 35 | 36 | - name: 🧪 Check tests 37 | run: make py-check-tests 38 | 39 | - name: 📝 Check types 40 | run: make py-check-types 41 | 42 | - name: 📐 Check formatting 43 | run: make py-check-format 44 | - name: 🧳 Build package 45 | run: make py-build 46 | 47 | # TODO: https://pypi.org/manage/project/querychat/settings/publishing/ 48 | - name: 🚢 Publish release on PyPI 49 | uses: pypa/gh-action-pypi-publish@release/v1 50 | with: 51 | packages-dir: ./dist 52 | -------------------------------------------------------------------------------- /pkg-r/man/figures/lifecycle-stable.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: stable 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 19 | 20 | lifecycle 21 | 22 | 25 | 26 | stable 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /pkg-r/R/utils-check.R: -------------------------------------------------------------------------------- 1 | # SQL table name validation ---------------------------------------------- 2 | 3 | #' Check SQL table name validity 4 | #' 5 | #' Validates that a string is a valid SQL table name. A valid SQL table name 6 | #' must begin with a letter and contain only letters, numbers, and underscores. 7 | #' 8 | #' @param x The value to check 9 | #' @param ... These dots are for future extensions and must be empty. 10 | #' @param allow_null Logical. If `TRUE`, `NULL` is accepted as a valid value. 11 | #' @param arg Argument name to use in error messages 12 | #' @param call Calling environment for error messages 13 | #' 14 | #' @return Invisibly returns `NULL` if validation passes. Otherwise throws an error. 15 | #' @keywords internal 16 | #' @noRd 17 | check_sql_table_name <- function( 18 | x, 19 | ..., 20 | allow_null = FALSE, 21 | arg = caller_arg(x), 22 | call = caller_env() 23 | ) { 24 | check_dots_empty() 25 | 26 | # Check if NULL is allowed 27 | if (allow_null && is.null(x)) { 28 | return(invisible(NULL)) 29 | } 30 | 31 | # First check it's a string 32 | check_string(x, allow_null = allow_null, arg = arg, call = call) 33 | 34 | # Then validate SQL table name pattern 35 | if (!grepl("^[a-zA-Z][a-zA-Z0-9_]*$", x)) { 36 | cli::cli_abort( 37 | c( 38 | "{.arg {arg}} must be a valid SQL table name", 39 | "i" = "Table names must begin with a letter and contain only letters, numbers, and underscores", 40 | "x" = "You provided: {.val {x}}" 41 | ), 42 | call = call 43 | ) 44 | } 45 | 46 | invisible(NULL) 47 | } 48 | -------------------------------------------------------------------------------- /pkg-py/docs/styles.scss: -------------------------------------------------------------------------------- 1 | /*-- scss:defaults --*/ 2 | 3 | $font-family-sans-serif: 'Public Sans', sans-serif; 4 | $font-family-monospace: 'Fira Code', monospace; 5 | $headings-font-family: 'Hubot Sans', sans-serif; 6 | $display-font-family: 'Hubot Sans', sans-serif; 7 | $headings-color: #193D56; 8 | 9 | /*-- scss:rules --*/ 10 | 11 | @import url('https://fonts.googleapis.com/css2?family=Public+Sans:ital,wght@0,100..900;1,100..900&display=swap'); 12 | @import url('https://fonts.googleapis.com/css?family=Fira Code'); 13 | @import url('https://fonts.googleapis.com/css?family=Hubot Sans'); 14 | 15 | .header { 16 | font-family: $headings-font-family; 17 | color: $headings-color; 18 | } 19 | 20 | /* css styles */ 21 | 22 | .cell-output pre code { 23 | white-space: pre-wrap; 24 | } 25 | 26 | /* Undo somebody's aggressive CSS */ 27 | pre { 28 | font-family: var(--bs-font-monospace); 29 | } 30 | 31 | 32 | /* sidebar */ 33 | .sidebar-item-container { 34 | font-size: 1rem; 35 | 36 | .text-start { 37 | font-weight: 600; 38 | } 39 | } 40 | 41 | .sidebar-item-section { 42 | padding-top: 0.5rem; 43 | } 44 | 45 | // make it even more noticable 46 | .sidebar-link { 47 | &:hover { 48 | font-weight: 500; 49 | } 50 | 51 | &.active { 52 | position: relative; 53 | 54 | &::before { 55 | content: "\23F5"; 56 | position: absolute; 57 | left: -0.9em; 58 | font-size: 1em; 59 | color: var(--bs-primary); 60 | } 61 | } 62 | } 63 | 64 | 65 | /* Get code output to look like a sourceCode block */ 66 | pre:has(> code) { 67 | background-color: rgba(233, 236, 239, 0.65); 68 | border-radius: .25em; 69 | padding: .4em; 70 | } -------------------------------------------------------------------------------- /pkg-py/src/querychat/prompts/tool-update-dashboard.md: -------------------------------------------------------------------------------- 1 | Filter and sort the dashboard data 2 | 3 | This tool executes a {{db_type}} SQL SELECT query to filter or sort the data used in the dashboard. 4 | 5 | **When to use:** Call this tool whenever the user requests filtering, sorting, or data manipulation on the dashboard with questions like "Show me..." or "Which records have...". This tool is appropriate for any request that involves showing a subset of the data or reordering it. 6 | 7 | **When not to use:** Do NOT use this tool for general questions about the data that can be answered with a single value or summary statistic. For those questions, use the `querychat_query` tool instead. 8 | 9 | **Important constraints:** 10 | 11 | - All original schema columns must be present in the SELECT output 12 | - Use a single SQL query. You can use CTEs but you cannot chain multiple queries 13 | - For statistical filters (stddev, percentiles), use CTEs to calculate thresholds within the query 14 | - Assume the user will only see the original columns in the dataset 15 | 16 | 17 | Parameters 18 | ---------- 19 | query : 20 | A {{db_type}} SQL SELECT query that MUST return all existing schema columns (use SELECT * or explicitly list all columns). May include additional computed columns, subqueries, CTEs, WHERE clauses, ORDER BY, and any {{db_type}}-supported SQL functions. 21 | title : 22 | A brief title for display purposes, summarizing the intent of the SQL query. 23 | 24 | Returns 25 | ------- 26 | : 27 | A confirmation that the dashboard was updated successfully, or the error that occurred when running the SQL query. The results of the query will update the data shown in the dashboard. 28 | 29 | -------------------------------------------------------------------------------- /pkg-r/R/utils-ellmer.R: -------------------------------------------------------------------------------- 1 | interpolate_package <- function(path, ..., .envir = parent.frame()) { 2 | # This helper replicates ellmer::interpolate_package() to work with load_all() 3 | stopifnot( 4 | "`path` must be a single string" = is.character(path), 5 | "`path` must be a single string" = length(path) == 1 6 | ) 7 | 8 | path <- system.file("prompts", path, package = "querychat") 9 | stopifnot( 10 | "`path` does not exist" = nzchar(path), 11 | "`path` does not exist" = file.exists(path) 12 | ) 13 | 14 | ellmer::interpolate_file(path, ..., .envir = .envir) 15 | } 16 | 17 | 18 | as_querychat_client <- function(client = NULL) { 19 | if (is.null(client)) { 20 | client <- querychat_client_option() 21 | } 22 | 23 | if (is.null(client)) { 24 | # Use OpenAI with ellmer's default model 25 | return(ellmer::chat_openai()) 26 | } 27 | 28 | if (is_function(client)) { 29 | # `client` as a function was the first interface we supported and expected 30 | # `system_prompt` as an argument. This avoids breaking existing code. 31 | client <- client(system_prompt = NULL) 32 | } 33 | 34 | if (is_string(client)) { 35 | client <- ellmer::chat(client) 36 | } 37 | 38 | if (!inherits(client, "Chat")) { 39 | cli::cli_abort( 40 | "{.arg client} must be an {.pkg ellmer} {.cls Chat} object or a function that returns one" 41 | ) 42 | } 43 | 44 | client 45 | } 46 | 47 | querychat_client_option <- function() { 48 | opt <- getOption("querychat.client", NULL) 49 | if (!is.null(opt)) { 50 | return(opt) 51 | } 52 | 53 | env <- Sys.getenv("QUERYCHAT_CLIENT", "") 54 | if (nzchar(env)) { 55 | return(env) 56 | } 57 | 58 | NULL 59 | } 60 | -------------------------------------------------------------------------------- /pkg-r/inst/prompts/tool-query.md: -------------------------------------------------------------------------------- 1 | Execute a SQL query and return the results 2 | 3 | This tool executes a {{db_type}} SQL SELECT query against the database and returns the raw result data for analysis. 4 | 5 | **Returns:** The tabular data results from executing the SQL query. The query results will be visible to the user in the interface, so you must interpret and explain the data in natural language after receiving it. 6 | 7 | **When to use:** Call this tool whenever the user asks a question that requires data analysis, aggregation, or calculations. Use this for questions like: 8 | - "What is the average...?" 9 | - "How many records...?" 10 | - "Which item has the highest/lowest...?" 11 | - "What's the total sum of...?" 12 | - "What percentage of ...?" 13 | 14 | Always use SQL for counting, averaging, summing, and other calculations—NEVER attempt manual calculations on your own. Use this tool repeatedly if needed to avoid any kind of manual calculation. 15 | 16 | **When not to use:** Do NOT use this tool for filtering or sorting the dashboard display. If the user wants to "Show me..." or "Filter to..." certain records in the dashboard, use the `querychat_update_dashboard` tool instead. 17 | 18 | **Important guidelines:** 19 | 20 | - Queries must be valid {{db_type}} SQL SELECT statements 21 | - Optimize for readability over efficiency—use clear column aliases and SQL comments to explain complex logic 22 | - Subqueries and CTEs are acceptable and encouraged for complex calculations 23 | - After receiving results, provide an explanation of the answer and an overview of how you arrived at it, if not already explained in SQL comments 24 | - The user can see your SQL query, they will follow up with detailed explanations if needed 25 | -------------------------------------------------------------------------------- /.github/workflows/py-test.yml: -------------------------------------------------------------------------------- 1 | name: Test - Python 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: ["main", "rc-*"] 7 | paths: 8 | - 'pkg-py/**' 9 | - 'pyproject.toml' 10 | - '.github/workflows/py-test.yml' 11 | pull_request: 12 | types: [opened, synchronize, reopened, ready_for_review] 13 | paths: 14 | - 'pkg-py/**' 15 | - 'pyproject.toml' 16 | - '.github/workflows/py-test.yml' 17 | release: 18 | types: [published] 19 | 20 | permissions: 21 | contents: read 22 | 23 | jobs: 24 | test: 25 | runs-on: ubuntu-latest 26 | strategy: 27 | matrix: 28 | config: 29 | - { python-version: "3.10", test_google: false, test_azure: false } 30 | - { python-version: "3.11", test_google: false, test_azure: false } 31 | - { python-version: "3.12", test_google: true, test_azure: true } 32 | - { python-version: "3.13", test_google: false, test_azure: false } 33 | - { python-version: "3.14", test_google: false, test_azure: false } 34 | fail-fast: false 35 | 36 | steps: 37 | - uses: actions/checkout@v4 38 | 39 | - name: 🚀 Install uv 40 | uses: astral-sh/setup-uv@v3 41 | 42 | - name: 🐍 Set up Python ${{ matrix.config.python-version }} 43 | run: uv python install ${{matrix.config.python-version }} 44 | 45 | - name: 📦 Install the project 46 | run: uv sync --python ${{matrix.config.python-version }} --all-extras --all-groups 47 | 48 | - name: 🧪 Check tests 49 | run: make py-check-tests 50 | 51 | - name: 📝 Check types 52 | run: make py-check-types 53 | 54 | - name: 📐 Check formatting 55 | run: make py-check-format 56 | -------------------------------------------------------------------------------- /pkg-r/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: querychat 2 | Title: Filter and Query Data Frames in 'shiny' Using an LLM Chat Interface 3 | Version: 0.1.0.9000 4 | Authors@R: c( 5 | person("Garrick", "Aden-Buie", , "garrick@posit.co", role = c("aut", "cre"), 6 | comment = c(ORCID = "0000-0002-7111-0077")), 7 | person("Joe", "Cheng", , "joe@posit.co", role = c("aut", "ccp")), 8 | person("Carson", "Sievert", , "carson@posit.co", role = "aut", 9 | comment = c(ORCID = "0000-0002-4958-2844")), 10 | person("Posit Software, PBC", role = c("cph", "fnd")) 11 | ) 12 | Description: Adds an LLM-powered chatbot to your 'shiny' app, that can 13 | turn your users' natural language questions into SQL queries that run 14 | against your data, and return the result as a reactive dataframe. Use 15 | it to drive reactive calculations, visualizations, downloads, etc. 16 | License: MIT + file LICENSE 17 | URL: https://posit-dev.github.io/querychat/pkg-r, 18 | https://posit-dev.github.io/querychat, 19 | https://github.com/posit-dev/querychat 20 | BugReports: https://github.com/posit-dev/querychat/issues 21 | Depends: 22 | R (>= 4.1.0) 23 | Imports: 24 | bslib, 25 | cli, 26 | DBI, 27 | duckdb, 28 | ellmer (>= 0.3.0), 29 | htmltools, 30 | lifecycle, 31 | promises, 32 | R6, 33 | rlang (>= 1.1.0), 34 | S7, 35 | shiny, 36 | shinychat (>= 0.2.0.9000), 37 | utils, 38 | whisker 39 | Suggests: 40 | bsicons, 41 | DT, 42 | palmerpenguins, 43 | RSQLite, 44 | shinytest2, 45 | testthat (>= 3.0.0), 46 | withr 47 | Remotes: 48 | posit-dev/shinychat/pkg-r 49 | Config/testthat/edition: 3 50 | Config/testthat/parallel: true 51 | Encoding: UTF-8 52 | Roxygen: list(markdown = TRUE) 53 | RoxygenNote: 7.3.3 54 | -------------------------------------------------------------------------------- /pkg-py/tests/test_querychat.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | import pytest 5 | from querychat import QueryChat 6 | 7 | 8 | @pytest.fixture(autouse=True) 9 | def set_dummy_api_key(): 10 | """Set a dummy OpenAI API key for testing.""" 11 | old_api_key = os.environ.get("OPENAI_API_KEY") 12 | os.environ["OPENAI_API_KEY"] = "sk-dummy-api-key-for-testing" 13 | yield 14 | if old_api_key is not None: 15 | os.environ["OPENAI_API_KEY"] = old_api_key 16 | else: 17 | del os.environ["OPENAI_API_KEY"] 18 | 19 | 20 | @pytest.fixture 21 | def sample_df(): 22 | """Create a sample pandas DataFrame for testing.""" 23 | return pd.DataFrame( 24 | { 25 | "id": [1, 2, 3], 26 | "name": ["Alice", "Bob", "Charlie"], 27 | "age": [25, 30, 35], 28 | }, 29 | ) 30 | 31 | 32 | def test_querychat_init(sample_df): 33 | """Test that QueryChat (Express mode) initializes correctly.""" 34 | qc = QueryChat( 35 | data_source=sample_df, 36 | table_name="test_table", 37 | greeting="Hello!", 38 | ) 39 | 40 | # Verify basic attributes are set 41 | assert qc is not None 42 | assert qc.id == "test_table" 43 | 44 | # Even without server initialization, we should be able to query the data source 45 | result = qc.data_source.execute_query( 46 | "SELECT * FROM test_table WHERE id = 2", 47 | ) 48 | 49 | assert len(result) == 1 50 | assert result.iloc[0]["name"] == "Bob" 51 | 52 | 53 | def test_querychat_custom_id(sample_df): 54 | """Test that QueryChat accepts custom ID.""" 55 | qc = QueryChat( 56 | data_source=sample_df, 57 | table_name="test_table", 58 | id="custom_id", 59 | greeting="Hello!", 60 | ) 61 | 62 | assert qc.id == "custom_id" 63 | -------------------------------------------------------------------------------- /pkg-r/tests/testthat/apps/basic/app.R: -------------------------------------------------------------------------------- 1 | library(shiny) 2 | library(bslib, warn.conflicts = FALSE) 3 | library(querychat) 4 | library(DBI) 5 | library(RSQLite) 6 | 7 | # Mock chat function to avoid LLM API calls 8 | MockChat <- R6::R6Class( 9 | "MockChat", 10 | inherit = asNamespace("ellmer")[["Chat"]], 11 | public = list( 12 | stream_async = function(message, ...) { 13 | "Welcome! This is a mock response for testing." 14 | } 15 | ) 16 | ) 17 | 18 | # Create test database 19 | temp_db <- tempfile(fileext = ".db") 20 | conn <- dbConnect(RSQLite::SQLite(), temp_db) 21 | dbWriteTable(conn, "iris", iris, overwrite = TRUE) 22 | dbDisconnect(conn) 23 | 24 | # Setup database source and QueryChat instance 25 | db_conn <- dbConnect(RSQLite::SQLite(), temp_db) 26 | 27 | # Create QueryChat instance 28 | qc <- QueryChat$new( 29 | data_source = db_conn, 30 | table_name = "iris", 31 | greeting = "Welcome to the test app!", 32 | client = MockChat$new(ellmer::Provider("test", "test", "test")) 33 | ) 34 | 35 | ui <- page_sidebar( 36 | title = "Test Database App", 37 | sidebar = qc$sidebar(), 38 | h2("Data"), 39 | DT::DTOutput("data_table"), 40 | h3("SQL Query"), 41 | verbatimTextOutput("sql_query") 42 | ) 43 | 44 | server <- function(input, output, session) { 45 | qc_vals <- qc$server() 46 | 47 | output$data_table <- DT::renderDT( 48 | { 49 | qc_vals$df() 50 | }, 51 | options = list(pageLength = 5) 52 | ) 53 | 54 | output$sql_query <- renderText({ 55 | query <- qc_vals$sql() 56 | if (is.null(query) || !nzchar(query)) "No filter applied" else query 57 | }) 58 | 59 | session$onSessionEnded(function() { 60 | if (DBI::dbIsValid(db_conn)) { 61 | DBI::dbDisconnect(db_conn) 62 | } 63 | unlink(temp_db) 64 | }) 65 | } 66 | 67 | shinyApp(ui = ui, server = server) 68 | -------------------------------------------------------------------------------- /.github/workflows/docs-r-pkgdown.yml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/posit-dev/shinychat/blob/main/.github/workflows/pkgdown.yaml 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | paths: 7 | - 'pkg-r/man/**/*' 8 | - 'pkg-r/vignettes/**/*' 9 | - 'pkg-r/pkgdown/**/*' 10 | - '.github/workflows/docs-r-pkgdown.yml' 11 | pull_request: 12 | paths: 13 | - 'pkg-r/man/**/*' 14 | - 'pkg-r/vignettes/**/*' 15 | - 'pkg-r/pkgdown/**/*' 16 | - '.github/workflows/docs-r-pkgdown.yml' 17 | release: 18 | types: [published] 19 | workflow_dispatch: 20 | 21 | name: docs-r-pkgdown.yml 22 | 23 | permissions: 24 | contents: write 25 | 26 | jobs: 27 | r-docs-pkgdown: 28 | runs-on: ubuntu-latest 29 | # Only restrict concurrency for non-PR jobs 30 | concurrency: 31 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 32 | env: 33 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 34 | permissions: 35 | contents: write 36 | 37 | # Only run on release events for tags start with "r/v*" 38 | if: github.event_name != 'release' || startsWith(github.ref, 'refs/tags/r/v') 39 | 40 | steps: 41 | - uses: actions/checkout@v4 42 | 43 | - uses: r-lib/actions/setup-pandoc@v2 44 | 45 | - uses: r-lib/actions/setup-r@v2 46 | with: 47 | use-public-rspm: true 48 | 49 | - uses: r-lib/actions/setup-r-dependencies@v2 50 | with: 51 | extra-packages: any::pkgdown, local::. 52 | needs: website 53 | working-directory: pkg-r 54 | 55 | - name: Build site 56 | run: make r-docs 57 | 58 | - name: Deploy to GitHub pages 🚀 59 | if: github.event_name != 'pull_request' 60 | uses: JamesIves/github-pages-deploy-action@v4.5.0 61 | with: 62 | clean: false 63 | branch: gh-pages 64 | folder: docs 65 | -------------------------------------------------------------------------------- /docs/logo-r.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /pkg-r/tests/testthat/_snaps/QueryChat.md: -------------------------------------------------------------------------------- 1 | # QueryChat$new() / errors with invalid argument types 2 | 3 | Code 4 | QueryChat$new(test_df, table_name = "test", id = 123) 5 | Condition 6 | Error in `initialize()`: 7 | ! `id` must be a single string or `NULL`, not the number 123. 8 | 9 | --- 10 | 11 | Code 12 | QueryChat$new(test_df, table_name = "test", greeting = 123) 13 | Condition 14 | Error in `initialize()`: 15 | ! `greeting` must be a single string or `NULL`, not the number 123. 16 | 17 | --- 18 | 19 | Code 20 | QueryChat$new(test_df, table_name = "test", categorical_threshold = "not_a_number") 21 | Condition 22 | Error in `initialize()`: 23 | ! `categorical_threshold` must be a whole number, not the string "not_a_number". 24 | 25 | --- 26 | 27 | Code 28 | QueryChat$new(test_df, table_name = "test", cleanup = "not_logical") 29 | Condition 30 | Error in `initialize()`: 31 | ! `cleanup` must be `TRUE`, `FALSE`, or `NA`, not the string "not_logical". 32 | 33 | # QueryChat$server() errors when called outside Shiny context 34 | 35 | Code 36 | qc$server() 37 | Condition 38 | Error in `qc$server()`: 39 | ! `$server()` must be called within a Shiny server function 40 | 41 | # normalize_data_source() / errors with invalid data source types 42 | 43 | Code 44 | normalize_data_source("not_a_data_source", "table_name") 45 | Condition 46 | Error in `normalize_data_source()`: 47 | ! `data_source` must be a , , or , not a string. 48 | 49 | --- 50 | 51 | Code 52 | normalize_data_source(list(a = 1, b = 2), "table_name") 53 | Condition 54 | Error in `normalize_data_source()`: 55 | ! `data_source` must be a , , or , not a list. 56 | 57 | --- 58 | 59 | Code 60 | normalize_data_source(NULL, "table_name") 61 | Condition 62 | Error in `normalize_data_source()`: 63 | ! `data_source` must be a , , or , not NULL. 64 | 65 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/_icons.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from shiny import ui 4 | 5 | ICON_NAMES = Literal["arrow-counterclockwise", "funnel-fill", "terminal-fill", "table"] 6 | 7 | 8 | def bs_icon(name: ICON_NAMES) -> ui.HTML: 9 | """Get Bootstrap icon SVG by name.""" 10 | if name not in BS_ICONS: 11 | raise ValueError(f"Unknown Bootstrap icon: {name}") 12 | return ui.HTML(BS_ICONS[name]) 13 | 14 | 15 | BS_ICONS = { 16 | "arrow-counterclockwise": '', 17 | "funnel-fill": '', 18 | "terminal-fill": '', 19 | "table": '', 20 | } 21 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/prompts/tool-query.md: -------------------------------------------------------------------------------- 1 | Execute a SQL query and return the results 2 | 3 | This tool executes a {{db_type}} SQL SELECT query against the database and returns the raw result data for analysis. 4 | 5 | **When to use:** Call this tool whenever the user asks a question that requires data analysis, aggregation, or calculations. Use this for questions like: 6 | - "What is the average...?" 7 | - "How many records...?" 8 | - "Which item has the highest/lowest...?" 9 | - "What's the total sum of...?" 10 | - "What percentage of ...?" 11 | 12 | Always use SQL for counting, averaging, summing, and other calculations—NEVER attempt manual calculations on your own. Use this tool repeatedly if needed to avoid any kind of manual calculation. 13 | 14 | **When not to use:** Do NOT use this tool for filtering or sorting the dashboard display. If the user wants to "Show me..." or "Filter to..." certain records in the dashboard, use the `querychat_update_dashboard` tool instead. 15 | 16 | **Important guidelines:** 17 | 18 | - Queries must be valid {{db_type}} SQL SELECT statements 19 | - Optimize for readability over efficiency—use clear column aliases and SQL comments to explain complex logic 20 | - Subqueries and CTEs are acceptable and encouraged for complex calculations 21 | - After receiving results, provide an explanation of the answer and an overview of how you arrived at it, if not already explained in SQL comments 22 | - The user can see your SQL query, they will follow up with detailed explanations if needed 23 | 24 | Parameters 25 | ---------- 26 | query : 27 | A valid {{db_type}} SQL SELECT statement. Must follow the database schema provided in the system prompt. Use clear column aliases (e.g., 'AVG(price) AS avg_price') and include SQL comments for complex logic. Subqueries and CTEs are encouraged for readability. 28 | _intent : 29 | A brief, user-friendly description of what this query calculates or retrieves. 30 | 31 | Returns 32 | ------- 33 | : 34 | The tabular data results from executing the SQL query. The query results will be visible to the user in the interface, so you must interpret and explain the data in natural language after receiving it. 35 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/data/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sample datasets for getting started with querychat. 3 | 4 | This module provides easy access to sample datasets that can be used with QueryChat 5 | to quickly get started without needing to install additional dependencies. 6 | """ 7 | 8 | from __future__ import annotations 9 | 10 | from importlib.resources import files 11 | 12 | import pandas as pd 13 | 14 | 15 | def titanic() -> pd.DataFrame: 16 | """ 17 | Load the Titanic dataset. 18 | 19 | This dataset contains information about passengers on the Titanic, including 20 | whether they survived, their class, age, sex, and other demographic information. 21 | 22 | Returns 23 | ------- 24 | pandas.DataFrame 25 | A DataFrame with 891 rows and 15 columns containing Titanic passenger data. 26 | 27 | Examples 28 | -------- 29 | >>> from querychat.data import titanic 30 | >>> from querychat import QueryChat 31 | >>> df = titanic() 32 | >>> qc = QueryChat(df, "titanic") 33 | >>> app = qc.app() 34 | 35 | """ 36 | # Get the path to the gzipped CSV file using importlib.resources 37 | data_file = files("querychat.data") / "titanic.csv.gz" 38 | return pd.read_csv(str(data_file), compression="gzip") 39 | 40 | 41 | def tips() -> pd.DataFrame: 42 | """ 43 | Load the tips dataset. 44 | 45 | This dataset contains information about restaurant tips, including the total 46 | bill, tip amount, and information about the party (sex, smoker status, day, 47 | time, and party size). 48 | 49 | Returns 50 | ------- 51 | pandas.DataFrame 52 | A DataFrame with 244 rows and 7 columns containing restaurant tip data. 53 | 54 | Examples 55 | -------- 56 | >>> from querychat.data import tips 57 | >>> from querychat import QueryChat 58 | >>> df = tips() 59 | >>> qc = QueryChat(df, "tips") 60 | >>> app = qc.app() 61 | 62 | """ 63 | # Get the path to the gzipped CSV file using importlib.resources 64 | data_file = files("querychat.data") / "tips.csv.gz" 65 | return pd.read_csv(str(data_file), compression="gzip") 66 | 67 | 68 | __all__ = ["tips", "titanic"] 69 | -------------------------------------------------------------------------------- /.github/workflows/docs-py-quartodoc.yml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/posit-dev/shinychat/blob/main/.github/workflows/quartodoc.yaml 2 | on: 3 | push: 4 | branches: 5 | - main 6 | paths: 7 | - "docs/**/*" 8 | - "pkg-py/**/*" 9 | - .github/workflows/docs-py-quartodoc.yml 10 | pull_request: 11 | paths: 12 | - "docs/**/*" 13 | - "pkg-py/**/*" 14 | - .github/workflows/docs-py-quartodoc.yml 15 | release: 16 | types: [published] 17 | workflow_dispatch: 18 | 19 | name: docs-py-quartodoc.yml 20 | 21 | permissions: 22 | contents: write 23 | 24 | env: 25 | UV_VERSION: "0.7.x" 26 | PYTHON_VERSION: 3.13 27 | QUARTO_VERSION: 1.7.31 28 | 29 | jobs: 30 | py-docs-quartodoc: 31 | runs-on: ubuntu-latest 32 | 33 | # Only run on release events for tags start with "py/v*" 34 | if: github.event_name != 'release' || startsWith(github.ref, 'refs/tags/py/v') 35 | 36 | steps: 37 | - name: Check out repository 38 | uses: actions/checkout@v4 39 | with: 40 | fetch-depth: 0 41 | 42 | - name: 🔵 Set up Quarto 43 | uses: quarto-dev/quarto-actions/setup@v2 44 | with: 45 | version: ${{ env.QUARTO_VERSION }} 46 | 47 | - name: 🚀 Install uv 48 | uses: astral-sh/setup-uv@v6.1.0 49 | with: 50 | version: ${{ env.UV_VERSION }} 51 | 52 | - name: 🐍 Set up Python ${{ env.PYTHON_VERSION }} 53 | run: uv python install ${{ env.PYTHON_VERSION }} 54 | 55 | - name: 📦 Install package and dependencies 56 | run: uv sync --python ${{ env.PYTHON_VERSION }} --all-extras --all-groups 57 | 58 | - name: 🔌 Activate venv 59 | run: | 60 | source .venv/bin/activate 61 | echo "$VIRTUAL_ENV/bin" >> $GITHUB_PATH 62 | echo "VIRTUAL_ENV=$VIRTUAL_ENV" >> $GITHUB_ENV 63 | 64 | - name: 🏭 Update Python docs 65 | run: | 66 | make py-docs-api 67 | quarto render pkg-py/docs 68 | 69 | - name: 🚢 Deploy to GitHub pages 70 | if: github.event_name != 'pull_request' 71 | uses: JamesIves/github-pages-deploy-action@v4.5.0 72 | with: 73 | clean: false 74 | folder: docs 75 | branch: gh-pages 76 | -------------------------------------------------------------------------------- /pkg-r/tests/testthat/helper-fixtures.R: -------------------------------------------------------------------------------- 1 | # Test fixture constructors for data source tests 2 | 3 | # Simple data frame with id, name, and value columns 4 | new_test_df <- function(rows = 5) { 5 | data.frame( 6 | id = seq_len(rows), 7 | name = c("A", "B", "C", "D", "E")[seq_len(rows)], 8 | value = c(10, 20, 30, 40, 50)[seq_len(rows)], 9 | stringsAsFactors = FALSE 10 | ) 11 | } 12 | 13 | # Data frame with multiple numeric columns for testing min/max ranges 14 | new_metrics_df <- function() { 15 | data.frame( 16 | id = 1:5, 17 | score = c(10.5, 20.3, 15.7, 30.1, 25.9), 18 | count = c(100, 200, 150, 50, 75), 19 | stringsAsFactors = FALSE 20 | ) 21 | } 22 | 23 | # Data frame with mixed types including boolean 24 | new_mixed_types_df <- function() { 25 | data.frame( 26 | id = 1:5, 27 | name = c("A", "B", "C", "D", "E"), 28 | active = c(TRUE, FALSE, TRUE, TRUE, FALSE), 29 | stringsAsFactors = FALSE 30 | ) 31 | } 32 | 33 | # Data frame for testing user data 34 | new_users_df <- function() { 35 | data.frame( 36 | id = 1:5, 37 | name = c("Alice", "Bob", "Charlie", "Diana", "Eve"), 38 | age = c(25, 30, 35, 28, 32), 39 | stringsAsFactors = FALSE 40 | ) 41 | } 42 | 43 | # Data frame with all data types for type testing 44 | new_types_df <- function() { 45 | data.frame( 46 | id = 1:3, 47 | text_col = c("text1", "text2", "text3"), 48 | num_col = c(1.1, 2.2, 3.3), 49 | int_col = c(10L, 20L, 30L), 50 | bool_col = c(TRUE, FALSE, TRUE), 51 | stringsAsFactors = FALSE 52 | ) 53 | } 54 | 55 | # Create a temporary SQLite connection with a test table 56 | local_sqlite_connection <- function( 57 | data = new_test_df(), 58 | table_name = "test_table", 59 | env = parent.frame() 60 | ) { 61 | if (testthat::is_testing()) { 62 | skip_if_not_installed("RSQLite") 63 | } 64 | 65 | temp_db <- withr::local_tempfile(fileext = ".db", .local_envir = env) 66 | conn <- DBI::dbConnect(RSQLite::SQLite(), temp_db) 67 | withr::defer(DBI::dbDisconnect(conn), envir = env) 68 | 69 | DBI::dbWriteTable(conn, table_name, data, overwrite = TRUE) 70 | 71 | list(conn = conn, path = temp_db) 72 | } 73 | 74 | # Create a DataFrameSource with automatic cleanup 75 | local_data_frame_source <- function( 76 | data, 77 | table_name = "test_table", 78 | env = parent.frame() 79 | ) { 80 | df_source <- DataFrameSource$new(data, table_name) 81 | withr::defer(df_source$cleanup(), envir = env) 82 | df_source 83 | } 84 | -------------------------------------------------------------------------------- /pkg-py/tests/test_tools.py: -------------------------------------------------------------------------------- 1 | """Tests for tool functions and utilities.""" 2 | 3 | import warnings 4 | 5 | from querychat._utils import querychat_tool_starts_open 6 | 7 | 8 | def test_querychat_tool_starts_open_default_behavior(monkeypatch): 9 | """Test default behavior when no setting is provided.""" 10 | monkeypatch.delenv("QUERYCHAT_TOOL_DETAILS", raising=False) 11 | 12 | assert querychat_tool_starts_open("query") is True 13 | assert querychat_tool_starts_open("update") is True 14 | assert querychat_tool_starts_open("reset") is False 15 | 16 | 17 | def test_querychat_tool_starts_open_expanded(monkeypatch): 18 | """Test 'expanded' setting.""" 19 | monkeypatch.setenv("QUERYCHAT_TOOL_DETAILS", "expanded") 20 | 21 | assert querychat_tool_starts_open("query") is True 22 | assert querychat_tool_starts_open("update") is True 23 | assert querychat_tool_starts_open("reset") is True 24 | 25 | 26 | def test_querychat_tool_starts_open_collapsed(monkeypatch): 27 | """Test 'collapsed' setting.""" 28 | monkeypatch.setenv("QUERYCHAT_TOOL_DETAILS", "collapsed") 29 | 30 | assert querychat_tool_starts_open("query") is False 31 | assert querychat_tool_starts_open("update") is False 32 | assert querychat_tool_starts_open("reset") is False 33 | 34 | 35 | def test_querychat_tool_starts_open_default_setting(monkeypatch): 36 | """Test 'default' setting.""" 37 | monkeypatch.setenv("QUERYCHAT_TOOL_DETAILS", "default") 38 | 39 | assert querychat_tool_starts_open("query") is True 40 | assert querychat_tool_starts_open("update") is True 41 | assert querychat_tool_starts_open("reset") is False 42 | 43 | 44 | def test_querychat_tool_starts_open_case_insensitive(monkeypatch): 45 | """Test that setting is case-insensitive.""" 46 | monkeypatch.setenv("QUERYCHAT_TOOL_DETAILS", "EXPANDED") 47 | assert querychat_tool_starts_open("query") is True 48 | 49 | monkeypatch.setenv("QUERYCHAT_TOOL_DETAILS", "Collapsed") 50 | assert querychat_tool_starts_open("query") is False 51 | 52 | monkeypatch.setenv("QUERYCHAT_TOOL_DETAILS", "DeFaUlT") 53 | assert querychat_tool_starts_open("query") is True 54 | 55 | 56 | def test_querychat_tool_starts_open_invalid_setting(monkeypatch): 57 | """Test warning on invalid setting.""" 58 | monkeypatch.setenv("QUERYCHAT_TOOL_DETAILS", "invalid") 59 | 60 | with warnings.catch_warnings(record=True) as w: 61 | warnings.simplefilter("always") 62 | result = querychat_tool_starts_open("query") 63 | 64 | assert len(w) == 1 65 | assert "Invalid value" in str(w[0].message) 66 | assert result is True # Falls back to default behavior 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # querychat: Chat with your data in any language 2 | 3 | querychat is a multilingual package that allows you to chat with your data using natural language queries. It's available for: 4 | 5 | - [R - Shiny](pkg-r/README.md) 6 | - [Python - Shiny for Python](pkg-py/README.md) 7 | 8 | ## Overview 9 | 10 | Imagine typing questions like these directly into your dashboard, and seeing the results in realtime: 11 | 12 | * "Show only penguins that are not species Gentoo and have a bill length greater than 50mm." 13 | * "Show only blue states with an incidence rate greater than 100 per 100,000 people." 14 | * "What is the average mpg of cars with 6 cylinders?" 15 | 16 | querychat is a drop-in component for Shiny that allows users to query a data frame using natural language. The results are available as a reactive data frame, so they can be easily used from Shiny outputs, reactive expressions, downloads, etc. 17 | 18 | | ![Animation of a dashboard being filtered by a chatbot in the sidebar](animation.gif) | 19 | |-| 20 | 21 | [Live demo](https://jcheng.shinyapps.io/sidebot/) 22 | 23 | **This is not as terrible an idea as you might think!** We need to be very careful when bringing LLMs into data analysis, as we all know that they are prone to hallucinations and other classes of errors. querychat is designed to excel in reliability, transparency, and reproducibility by using this one technique: denying it raw access to the data, and forcing it to write SQL queries instead. 24 | 25 | ## How it works 26 | 27 | ### Powered by LLMs 28 | 29 | querychat's natural language chat experience is powered by LLMs (like GPT-4o, Claude 3.5 Sonnet, etc.) that support function/tool calling capabilities. 30 | 31 | ### Powered by SQL 32 | 33 | querychat doesn't send the raw data to the LLM, asking it to guess summary statistics. Instead, the LLM generates precise SQL queries to filter the data or directly calculate statistics. This is crucial for ensuring relability, transparency, and reproducibility: 34 | 35 | - **Reliability:** Today's LLMs are excellent at writing SQL, but bad at direct calculation. 36 | - **Transparency:** querychat always displays the SQL to the user, so it can be vetted instead of blindly trusted. 37 | - **Reproducibility:** The SQL query can be easily copied and reused. 38 | 39 | Currently, querychat uses DuckDB for its SQL engine when working with data frames. For database sources, it uses the native SQL dialect of the connected database. 40 | 41 | ## Language-specific Documentation 42 | 43 | For detailed information on how to use querychat in your preferred language, see the language-specific READMEs: 44 | 45 | - [R Documentation](pkg-r/README.md) 46 | - [Python Documentation](pkg-py/README.md) -------------------------------------------------------------------------------- /pkg-r/R/querychat-package.R: -------------------------------------------------------------------------------- 1 | #' querychat: Chat with Your Data Using Natural Language 2 | #' 3 | #' @description 4 | #' querychat provides an interactive chat interface for querying data using 5 | #' natural language. It translates your questions into SQL queries, executes 6 | #' them against your data, and displays the results. The package works with 7 | #' both data frames and database connections. 8 | #' 9 | #' @section Quick Start: 10 | #' The easiest way to get started is with the [QueryChat] R6 class: 11 | #' 12 | #' ```r 13 | #' library(querychat) 14 | #' 15 | #' # Create a QueryChat object (table name inferred from variable) 16 | #' qc <- QueryChat$new(mtcars) 17 | #' 18 | #' # Option 1: Run a complete app with sensible defaults 19 | #' qc$app() 20 | #' 21 | #' # Option 2: Build a custom Shiny app 22 | #' ui <- page_sidebar( 23 | #' qc$sidebar(), 24 | #' dataTableOutput("data") 25 | #' ) 26 | #' 27 | #' server <- function(input, output, session) { 28 | #' qc$server() 29 | #' output$data <- renderDataTable(qc$df()) 30 | #' } 31 | #' 32 | #' shinyApp(ui, server) 33 | #' ``` 34 | #' 35 | #' @section Key Features: 36 | #' - **Natural language queries**: Ask questions in plain English 37 | #' - **SQL transparency**: See the generated SQL queries 38 | #' - **Multiple data sources**: Works with data frames and database connections 39 | #' - **Customizable**: Add data descriptions, extra instructions, and custom greetings 40 | #' - **LLM agnostic**: Works with OpenAI, Anthropic, Google, and other providers via ellmer 41 | #' 42 | #' @section Main Components: 43 | #' - [QueryChat]: The main R6 class for creating chat interfaces 44 | #' - [DataSource], [DataFrameSource], [DBISource]: R6 classes for data sources 45 | #' 46 | #' @section Examples: 47 | #' To see examples included with the package, run: 48 | #' 49 | #' ```r 50 | #' shiny::runExample(package = "querychat") 51 | #' ``` 52 | #' 53 | #' This provides a list of available examples. To run a specific example, like 54 | #' '01-hello-app', use: 55 | #' 56 | #' ```r 57 | #' shiny::runExample("01-hello-app", package = "querychat") 58 | #' ``` 59 | #' 60 | #' 61 | #' @keywords internal 62 | "_PACKAGE" 63 | 64 | ## usethis namespace: start 65 | #' @importFrom lifecycle deprecated 66 | #' @importFrom R6 R6Class 67 | #' @importFrom bslib sidebar 68 | #' @import rlang 69 | ## usethis namespace: end 70 | NULL 71 | 72 | # @staticimports pkg:staticimports 73 | # read_utf8 74 | 75 | # enable usage of @name in package code 76 | #' @rawNamespace if (getRversion() < "4.3.0") importFrom("S7", "@") 77 | NULL 78 | 79 | release_bullets <- function() { 80 | c( 81 | "Run `staticimports::import()` to update static imports", 82 | "Enable `development.mode: auto` in `_pkgdown.yml` and remove this release bullet." 83 | ) 84 | } 85 | 86 | suppress_rcmdcheck <- function() { 87 | duckdb::duckdb 88 | S7::S7_class 89 | } 90 | -------------------------------------------------------------------------------- /pkg-r/man/querychat-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/querychat-package.R 3 | \docType{package} 4 | \name{querychat-package} 5 | \alias{querychat-package} 6 | \title{querychat: Chat with Your Data Using Natural Language} 7 | \description{ 8 | querychat provides an interactive chat interface for querying data using 9 | natural language. It translates your questions into SQL queries, executes 10 | them against your data, and displays the results. The package works with 11 | both data frames and database connections. 12 | } 13 | \section{Quick Start}{ 14 | 15 | The easiest way to get started is with the \link{QueryChat} R6 class: 16 | 17 | \if{html}{\out{
}}\preformatted{library(querychat) 18 | 19 | # Create a QueryChat object (table name inferred from variable) 20 | qc <- QueryChat$new(mtcars) 21 | 22 | # Option 1: Run a complete app with sensible defaults 23 | qc$app() 24 | 25 | # Option 2: Build a custom Shiny app 26 | ui <- page_sidebar( 27 | qc$sidebar(), 28 | dataTableOutput("data") 29 | ) 30 | 31 | server <- function(input, output, session) \{ 32 | qc$server() 33 | output$data <- renderDataTable(qc$df()) 34 | \} 35 | 36 | shinyApp(ui, server) 37 | }\if{html}{\out{
}} 38 | } 39 | 40 | \section{Key Features}{ 41 | 42 | \itemize{ 43 | \item \strong{Natural language queries}: Ask questions in plain English 44 | \item \strong{SQL transparency}: See the generated SQL queries 45 | \item \strong{Multiple data sources}: Works with data frames and database connections 46 | \item \strong{Customizable}: Add data descriptions, extra instructions, and custom greetings 47 | \item \strong{LLM agnostic}: Works with OpenAI, Anthropic, Google, and other providers via ellmer 48 | } 49 | } 50 | 51 | \section{Main Components}{ 52 | 53 | \itemize{ 54 | \item \link{QueryChat}: The main R6 class for creating chat interfaces 55 | \item \link{DataSource}, \link{DataFrameSource}, \link{DBISource}: R6 classes for data sources 56 | } 57 | } 58 | 59 | \section{Examples}{ 60 | 61 | To see examples included with the package, run: 62 | 63 | \if{html}{\out{
}}\preformatted{shiny::runExample(package = "querychat") 64 | }\if{html}{\out{
}} 65 | 66 | This provides a list of available examples. To run a specific example, like 67 | '01-hello-app', use: 68 | 69 | \if{html}{\out{
}}\preformatted{shiny::runExample("01-hello-app", package = "querychat") 70 | }\if{html}{\out{
}} 71 | } 72 | 73 | \seealso{ 74 | Useful links: 75 | \itemize{ 76 | \item \url{https://posit-dev.github.io/querychat/pkg-r} 77 | \item \url{https://posit-dev.github.io/querychat} 78 | } 79 | 80 | } 81 | \author{ 82 | \strong{Maintainer}: Joe Cheng \email{joe@posit.co} 83 | 84 | Other contributors: 85 | \itemize{ 86 | \item Posit Software, PBC [copyright holder, funder] 87 | } 88 | 89 | } 90 | \keyword{internal} 91 | -------------------------------------------------------------------------------- /pkg-py/docs/greet.qmd: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | title: Greet users 4 | --- 5 | 6 | ### Provide a greeting 7 | 8 | When the querychat UI first appears, you will usually want it to greet the user with some basic instructions. By default, these instructions are auto-generated every time a user arrives. In a production setting with multiple users/visitors, this is slow, wasteful, and non-deterministic. Instead, you should create a greeting file and pass it when creating your `QueryChat` object: 9 | 10 | ```{.python filename="titanic-app.py"} 11 | from querychat import QueryChat 12 | from querychat.data import titanic 13 | from pathlib import Path 14 | 15 | app_dir = Path(__file__).parent 16 | 17 | qc = QueryChat(titanic(), "titanic", greeting=app_dir / "greeting.md") 18 | app = qc.app() 19 | ``` 20 | 21 | You can provide suggestions to the user by using the ` ` tag: 22 | 23 | ```markdown 24 | * **Filter and sort the data:** 25 | * Show only survivors 26 | * Filter to first class passengers under 30 27 | * Sort by fare from highest to lowest 28 | 29 | * **Answer questions about the data:** 30 | * What was the survival rate by gender? 31 | * What's the average age of children who survived? 32 | * How many passengers were traveling alone? 33 | ``` 34 | 35 | These suggestions appear in the greeting and automatically populate the chat text box when clicked. 36 | You can see this behavior in our [`querychat template`](https://shiny.posit.co/py/templates/querychat/). 37 | 38 | ### Generate a greeting 39 | 40 | If you need help coming up with a greeting, you can use the `.generate_greeting()` method: 41 | 42 | ```{.python filename="penguins-greeting.py"} 43 | from palmerpenguins import load_penguins 44 | from querychat import QueryChat 45 | from pathlib import Path 46 | 47 | # Create QueryChat object with your dataset 48 | qc = QueryChat(load_penguins(), "penguins") 49 | 50 | # Generate a greeting (this calls the LLM) 51 | greeting_text = qc.generate_greeting() 52 | #> Hello! I'm here to help you explore and analyze the penguins dataset. 53 | #> Here are some example prompts you can try: 54 | #> ... 55 | 56 | # Save it for reuse 57 | with open("penguins_greeting.md", "w") as f: 58 | f.write(greeting_text) 59 | ``` 60 | 61 | This approach generates a greeting once and saves it for reuse, avoiding the latency and cost of generating it for every user. 62 | 63 | ```{.python filename="penguins-app.py"} 64 | from palmerpenguins import load_penguins 65 | from querychat import QueryChat 66 | from pathlib import Path 67 | 68 | # Then use the saved greeting in your app 69 | app_dir = Path(__file__).parent 70 | qc = QueryChat( 71 | load_penguins(), 72 | "penguins", 73 | greeting=app_dir / "penguins_greeting.md", 74 | ) 75 | app = qc.app() 76 | ``` 77 | -------------------------------------------------------------------------------- /pkg-r/inst/examples-shiny/02-sidebar-app/app.R: -------------------------------------------------------------------------------- 1 | library(shiny) 2 | library(bslib) 3 | library(querychat) 4 | library(palmerpenguins) 5 | 6 | # Define a custom greeting for the penguins app 7 | greeting <- r"( 8 | # Welcome to the Palmer Penguins Explorer! 🐧 9 | 10 | I can help you explore and analyze the Palmer Penguins dataset. Ask me questions 11 | about the penguins, and I'll generate SQL queries to get the answers. 12 | 13 | Try asking: 14 | - Show me the first 10 rows of the penguins dataset 15 | - What's the average bill length by species? 16 | - Which species has the largest body mass? 17 | - Create a summary of measurements grouped by species and island 18 | )" 19 | 20 | # Create QueryChat object with custom options 21 | qc <- QueryChat$new( 22 | penguins, 23 | greeting = greeting, 24 | data_description = paste( 25 | "The Palmer Penguins dataset contains measurements of bill", 26 | "dimensions, flipper length, body mass, sex, and species", 27 | "(Adelie, Chinstrap, and Gentoo) collected from three islands in", 28 | "the Palmer Archipelago, Antarctica." 29 | ), 30 | extra_instructions = paste( 31 | "When showing results, always explain what the data represents", 32 | "and highlight any interesting patterns you observe." 33 | ) 34 | ) 35 | 36 | # Define custom UI with sidebar 37 | ui <- page_sidebar( 38 | title = "Palmer Penguins Chat Explorer", 39 | sidebar = qc$sidebar(), 40 | 41 | card( 42 | fill = FALSE, 43 | card_header("Current SQL Query"), 44 | verbatimTextOutput("sql_query") 45 | ), 46 | 47 | card( 48 | full_screen = TRUE, 49 | card_header( 50 | "Current Data View", 51 | tooltip( 52 | bsicons::bs_icon("question-circle-fill", class = "mx-1"), 53 | "The table below shows the current filtered data based on your chat queries" 54 | ), 55 | tooltip( 56 | bsicons::bs_icon("info-circle-fill"), 57 | "The penguins dataset contains measurements on 344 penguins." 58 | ) 59 | ), 60 | DT::DTOutput("data_table"), 61 | card_footer( 62 | markdown( 63 | "Data source: [palmerpenguins package](https://allisonhorst.github.io/palmerpenguins/)" 64 | ) 65 | ) 66 | ) 67 | ) 68 | 69 | # Define server logic 70 | server <- function(input, output, session) { 71 | # Initialize QueryChat server 72 | qc_vals <- qc$server() 73 | 74 | # Render the data table 75 | output$data_table <- DT::renderDT( 76 | { 77 | qc_vals$df() 78 | }, 79 | fillContainer = TRUE, 80 | options = list(pageLength = 25, scrollX = TRUE) 81 | ) 82 | 83 | # Render the SQL query 84 | output$sql_query <- renderText({ 85 | query <- qc_vals$sql() 86 | if (is.null(query) || !nzchar(query)) { 87 | "No filter applied - showing all data" 88 | } else { 89 | query 90 | } 91 | }) 92 | } 93 | 94 | shinyApp(ui = ui, server = server) 95 | -------------------------------------------------------------------------------- /pkg-py/tests/test_init_with_pandas.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import narwhals.stable.v1 as nw 4 | import pandas as pd 5 | import pytest 6 | from querychat import QueryChat 7 | 8 | 9 | @pytest.fixture(autouse=True) 10 | def set_dummy_api_key(): 11 | """Set a dummy OpenAI API key for testing.""" 12 | old_api_key = os.environ.get("OPENAI_API_KEY") 13 | os.environ["OPENAI_API_KEY"] = "sk-dummy-api-key-for-testing" 14 | yield 15 | if old_api_key is not None: 16 | os.environ["OPENAI_API_KEY"] = old_api_key 17 | else: 18 | del os.environ["OPENAI_API_KEY"] 19 | 20 | 21 | def test_init_with_pandas_dataframe(): 22 | """Test that QueryChat() can accept a pandas DataFrame.""" 23 | # Create a simple pandas DataFrame 24 | df = pd.DataFrame( 25 | { 26 | "id": [1, 2, 3], 27 | "name": ["Alice", "Bob", "Charlie"], 28 | "age": [25, 30, 35], 29 | }, 30 | ) 31 | 32 | # Call QueryChat with the pandas DataFrame - it should not raise errors 33 | # The function should accept a pandas DataFrame even with the narwhals import change 34 | qc = QueryChat( 35 | data_source=df, 36 | table_name="test_table", 37 | greeting="hello!", 38 | ) 39 | 40 | # Verify the result is properly configured 41 | assert qc is not None 42 | 43 | 44 | def test_init_with_narwhals_dataframe(): 45 | """Test that QueryChat() can accept a narwhals DataFrame.""" 46 | # Create a pandas DataFrame and convert to narwhals 47 | pdf = pd.DataFrame( 48 | { 49 | "id": [1, 2, 3], 50 | "name": ["Alice", "Bob", "Charlie"], 51 | "age": [25, 30, 35], 52 | }, 53 | ) 54 | nw_df = nw.from_native(pdf) 55 | 56 | # Call QueryChat with the narwhals DataFrame - it should not raise errors 57 | qc = QueryChat( 58 | data_source=nw_df, 59 | table_name="test_table", 60 | greeting="hello!", 61 | ) 62 | 63 | # Verify the result is correctly configured 64 | assert qc is not None 65 | 66 | 67 | def test_init_with_narwhals_lazyframe_direct_query(): 68 | """Test that QueryChat() can accept a narwhals LazyFrame and execute queries.""" 69 | # Create a pandas DataFrame and convert to narwhals LazyFrame 70 | pdf = pd.DataFrame( 71 | { 72 | "id": [1, 2, 3], 73 | "name": ["Alice", "Bob", "Charlie"], 74 | "age": [25, 30, 35], 75 | }, 76 | ) 77 | nw_lazy = nw.from_native(pdf).lazy() 78 | 79 | # Call QueryChat with the narwhals LazyFrame 80 | qc = QueryChat( 81 | data_source=nw_lazy, # TODO(@gadebuie): Fix this type error 82 | table_name="test_table", 83 | greeting="hello!", 84 | ) 85 | 86 | # Verify the result is correctly configured 87 | assert qc is not None 88 | assert hasattr(qc, "data_source") 89 | 90 | # Test that we can run a query on the data source 91 | query_result = qc.data_source.execute_query( 92 | "SELECT * FROM test_table WHERE id = 2", 93 | ) 94 | assert len(query_result) == 1 95 | assert query_result.iloc[0]["name"] == "Bob" 96 | -------------------------------------------------------------------------------- /pkg-r/inst/examples-shiny/sqlite/README.md: -------------------------------------------------------------------------------- 1 | # Database Setup Examples for querychat 2 | 3 | This document provides examples of how to set up querychat with various database types. 4 | 5 | ## SQLite 6 | 7 | ```r 8 | library(DBI) 9 | library(RSQLite) 10 | library(querychat) 11 | 12 | # Connect to SQLite database 13 | conn <- dbConnect(RSQLite::SQLite(), "path/to/your/database.db") 14 | 15 | # Create QueryChat instance 16 | qc <- QueryChat$new( 17 | conn, 18 | "your_table_name", 19 | greeting = "Welcome! Ask me about your data.", 20 | data_description = "Description of your data..." 21 | ) 22 | 23 | # Launch the app 24 | qc$app() 25 | ``` 26 | 27 | ## PostgreSQL 28 | 29 | ```r 30 | library(DBI) 31 | library(RPostgreSQL) # or library(RPostgres) 32 | library(querychat) 33 | 34 | # Connect to PostgreSQL 35 | conn <- dbConnect( 36 | RPostgreSQL::PostgreSQL(), # or RPostgres::Postgres() 37 | dbname = "your_database", 38 | host = "localhost", 39 | port = 5432, 40 | user = "your_username", 41 | password = "your_password" 42 | ) 43 | 44 | # Create QueryChat instance 45 | qc <- QueryChat$new(conn, "your_table_name") 46 | 47 | # Launch the app 48 | qc$app() 49 | ``` 50 | 51 | ## MySQL 52 | 53 | ```r 54 | library(DBI) 55 | library(RMySQL) 56 | library(querychat) 57 | 58 | # Connect to MySQL 59 | conn <- dbConnect( 60 | RMySQL::MySQL(), 61 | dbname = "your_database", 62 | host = "localhost", 63 | user = "your_username", 64 | password = "your_password" 65 | ) 66 | 67 | # Create QueryChat instance 68 | qc <- QueryChat$new(conn, "your_table_name") 69 | 70 | # Launch the app 71 | qc$app() 72 | ``` 73 | 74 | ## Connection Management 75 | 76 | When using database sources in custom Shiny apps, make sure to properly manage connections: 77 | 78 | ```r 79 | server <- function(input, output, session) { 80 | # Initialize QueryChat server 81 | qc$server() 82 | 83 | # Your custom outputs here 84 | output$table <- renderTable(qc$df()) 85 | 86 | # Clean up connection when session ends 87 | session$onSessionEnded(function() { 88 | if (dbIsValid(conn)) { 89 | dbDisconnect(conn) 90 | } 91 | }) 92 | } 93 | ``` 94 | 95 | ## Security Considerations 96 | 97 | - Only SELECT queries are allowed - no INSERT, UPDATE, or DELETE operations 98 | - All SQL queries are visible to users for transparency 99 | - Use appropriate database user permissions (read-only recommended) 100 | - Consider connection pooling for production applications 101 | - Validate that users only have access to intended tables 102 | 103 | ## Error Handling 104 | 105 | The database source implementation includes robust error handling: 106 | 107 | - Validates table existence during creation 108 | - Handles database connection issues gracefully 109 | - Provides informative error messages for invalid queries 110 | - Falls back gracefully when statistical queries fail 111 | 112 | ## Performance Tips 113 | 114 | - Use appropriate database indexes for columns commonly used in queries 115 | - Consider limiting row counts for very large tables 116 | - Database connections are reused for better performance 117 | - Schema information is cached to avoid repeated metadata queries -------------------------------------------------------------------------------- /.claude/R-TESTING.md: -------------------------------------------------------------------------------- 1 | # R Testing Guide 2 | 3 | ## Test Organization 4 | 5 | Use testthat 3rd-edition style tests. For tests covering a single behavior, use standard `test_that()` style. For testing classes, methods and functions, use **BDD style** with `describe()` and `it()` blocks. 6 | 7 | Test files should be placed in `pkg-r/tests/testthat/`. Test file names should directly match the R source file, e.g. `R/{name}.R` --> `tests/testthat/test-{name}.R`. 8 | 9 | ### Key BDD Principles 10 | 11 | 1. **Flat structure**: No nested `describe()` blocks 12 | 2. **Group by method/function**: One `describe()` block per method or function being tested 13 | 3. **Shared fixtures**: Set up at the top of `describe()` blocks, not inside `it()` blocks, when possible 14 | 4. **Self-contained tests**: Each `it()` block should be runnable independently after running the shared setup 15 | 5. Only use `describe()` blocks for grouped behavior-oriented tests. Use `test_that()` for single or unit tests 16 | 17 | ### Describe Block Structure 18 | 19 | ```r 20 | describe("ClassName$method()", { 21 | # Shared fixtures here 22 | test_data <- new_test_df() 23 | 24 | it("describes what the method does", { 25 | # Test implementation 26 | }) 27 | }) 28 | ``` 29 | 30 | ## Fixture Helpers 31 | 32 | Common test fixtures are stored in `pkg-r/tests/testthat/helper-fixtures.R`. 33 | 34 | ### Usage Pattern 35 | 36 | ```r 37 | describe("DataSource$execute_query()", { 38 | # Shared fixture at top 39 | test_df <- new_test_df() 40 | df_source <- local_data_frame_source(test_df) 41 | 42 | it("executes basic queries", { 43 | result <- df_source$execute_query("SELECT * FROM test_table") 44 | expect_s3_class(result, "data.frame") 45 | }) 46 | }) 47 | ``` 48 | 49 | ## Cleanup 50 | 51 | - Use `withr::defer()` for cleanup when not using local_* helpers 52 | - Use `withr::local_*` functions for temporary state (options, envvars, files) 53 | - The fixture helpers handle cleanup automatically via `withr::defer()` 54 | 55 | ## Test Descriptions 56 | 57 | - `describe()`: Use method/function names like `"ClassName$method()"` or `"function_name()"` 58 | - `it()`: Describe behavior at the right level - what does it do, not how 59 | - Group related assertions in a single `it()` block rather than splitting into many small tests 60 | 61 | ### Good Examples 62 | 63 | ```r 64 | describe("querychat_tool_starts_open()", { 65 | it("uses the tool default when options are unset", { 66 | withr::local_options(querychat.tool_details = NULL) 67 | 68 | expect_true(querychat_tool_starts_open("query")) 69 | expect_true(querychat_tool_starts_open("update")) 70 | expect_false(querychat_tool_starts_open("reset")) 71 | }) 72 | }) 73 | ``` 74 | 75 | ## Common patterns 76 | 77 | ### Testing Errors 78 | 79 | Prefer snapshot testing around expected errors so that the error message is captured in the snapshot. 80 | 81 | ```r 82 | # Don't do this 83 | expect_error(foo_will_error()) 84 | 85 | # Do this 86 | expect_snapshot(error = TRUE, foo_will_error()) 87 | ``` 88 | 89 | ## Running Tests 90 | 91 | ```bash 92 | # All tests (from repo root) 93 | make r-check-tests 94 | 95 | # Single file (e.g. to test tests/testthat/test-data-source.R) 96 | testthat::test(filter = "data-source", reporter = "check") 97 | ``` 98 | -------------------------------------------------------------------------------- /pkg-r/inst/examples-shiny/sqlite/app.R: -------------------------------------------------------------------------------- 1 | library(shiny) 2 | library(bslib) 3 | library(querychat) 4 | library(DBI) 5 | library(RSQLite) 6 | library(palmerpenguins) 7 | 8 | # Create a sample SQLite database for demonstration 9 | # In a real app, you would connect to your existing database 10 | temp_db <- tempfile(fileext = ".db") 11 | onStop(function() { 12 | if (file.exists(temp_db)) { 13 | unlink(temp_db) 14 | } 15 | }) 16 | 17 | conn <- dbConnect(RSQLite::SQLite(), temp_db) 18 | 19 | # Create sample data in the database 20 | dbWriteTable(conn, "penguins", palmerpenguins::penguins, overwrite = TRUE) 21 | 22 | # Define a custom greeting for the database app 23 | greeting <- " 24 | # Welcome to the Database Query Assistant! 🐧 25 | 26 | I can help you explore and analyze the Palmer Penguins dataset from the connected database. 27 | Ask me questions about the penguins, and I'll generate SQL queries to get the answers. 28 | 29 | Try asking: 30 | - Show me the first 10 rows of the penguins dataset 31 | - What's the average bill length by species? 32 | - Which species has the largest body mass? 33 | - Create a summary of measurements grouped by species and island 34 | " 35 | 36 | # Create QueryChat object with database connection 37 | qc <- QueryChat$new( 38 | conn, 39 | "penguins", 40 | greeting = greeting, 41 | data_description = "This database contains the Palmer Penguins dataset with measurements of bill dimensions, flipper length, body mass, sex, and species (Adelie, Chinstrap, and Gentoo) collected from three islands in the Palmer Archipelago, Antarctica.", 42 | extra_instructions = "When showing results, always explain what the data represents and highlight any interesting patterns you observe." 43 | ) 44 | 45 | ui <- page_sidebar( 46 | title = "Database Query Chat", 47 | sidebar = qc$sidebar(), 48 | 49 | card( 50 | fill = FALSE, 51 | card_header("Current SQL Query"), 52 | verbatimTextOutput("sql_query") 53 | ), 54 | 55 | card( 56 | full_screen = TRUE, 57 | card_header( 58 | "Current Data View", 59 | tooltip( 60 | bsicons::bs_icon("question-circle-fill", class = "mx-1"), 61 | "The table below shows the current filtered data based on your chat queries" 62 | ), 63 | tooltip( 64 | bsicons::bs_icon("info-circle-fill"), 65 | "The penguins dataset contains measurements on 344 penguins." 66 | ) 67 | ), 68 | DT::DTOutput("data_table"), 69 | card_footer( 70 | markdown( 71 | "Data source: [palmerpenguins package](https://allisonhorst.github.io/palmerpenguins/)" 72 | ) 73 | ) 74 | ) 75 | ) 76 | 77 | server <- function(input, output, session) { 78 | qc_vals <- qc$server() 79 | 80 | output$data_table <- DT::renderDT( 81 | { 82 | qc_vals$df() 83 | }, 84 | fillContainer = TRUE, 85 | options = list(pageLength = 10, scrollX = TRUE) 86 | ) 87 | 88 | output$sql_query <- renderText({ 89 | query <- qc_vals$sql() 90 | if (is.null(query) || !nzchar(query)) { 91 | "No filter applied - showing all data" 92 | } else { 93 | query 94 | } 95 | }) 96 | } 97 | 98 | shinyApp(ui = ui, server = server) 99 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/_deprecated.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import TYPE_CHECKING, Any, Optional, Union 4 | 5 | from shiny import Inputs, Outputs, Session, module, ui 6 | 7 | if TYPE_CHECKING: 8 | from pathlib import Path 9 | 10 | import chatlas 11 | import sqlalchemy 12 | from narwhals.stable.v1.typing import IntoFrame 13 | 14 | from ._datasource import DataSource 15 | 16 | 17 | def init( 18 | data_source: IntoFrame | sqlalchemy.Engine, 19 | table_name: str, 20 | *, 21 | greeting: Optional[str | Path] = None, 22 | data_description: Optional[str | Path] = None, 23 | extra_instructions: Optional[str | Path] = None, 24 | prompt_template: Optional[str | Path] = None, 25 | system_prompt_override: Optional[str] = None, 26 | client: Optional[Union[chatlas.Chat, str]] = None, 27 | ): 28 | """ 29 | Initialize querychat with any compliant data source. 30 | 31 | **Deprecated.** Use `QueryChat()` instead. 32 | """ 33 | raise RuntimeError("init() is deprecated. Use QueryChat() instead.") 34 | 35 | 36 | @module.ui 37 | def mod_ui(**kwargs) -> ui.TagList: 38 | """ 39 | Create the UI for the querychat component. 40 | 41 | **Deprecated.** Use `QueryChat.ui()` instead. 42 | """ 43 | raise RuntimeError("mod_ui() is deprecated. Use QueryChat.ui() instead.") 44 | 45 | 46 | @module.server 47 | def mod_server( 48 | input: Inputs, 49 | output: Outputs, 50 | session: Session, 51 | querychat_config: Any, 52 | ): 53 | """ 54 | Initialize the querychat server. 55 | 56 | **Deprecated.** Use `QueryChat.server()` instead. 57 | """ 58 | raise RuntimeError("mod_server() is deprecated. Use QueryChat.server() instead.") 59 | 60 | 61 | def sidebar( 62 | id: str, 63 | width: int = 400, 64 | height: str = "100%", 65 | **kwargs, 66 | ) -> ui.Sidebar: 67 | """ 68 | Create a sidebar containing the querychat UI. 69 | 70 | **Deprecated.** Use `QueryChat.sidebar()` instead. 71 | """ 72 | raise RuntimeError("sidebar() is deprecated. Use QueryChat.sidebar() instead.") 73 | 74 | 75 | def system_prompt( 76 | data_source: DataSource, 77 | *, 78 | data_description: Optional[str | Path] = None, 79 | extra_instructions: Optional[str | Path] = None, 80 | categorical_threshold: int = 20, 81 | prompt_template: Optional[str | Path] = None, 82 | ) -> str: 83 | """ 84 | Create a system prompt for the chat model based on a data source's schema 85 | and optional additional context and instructions. 86 | 87 | **Deprecated.** Use `QueryChat.set_system_prompt()` instead. 88 | """ 89 | raise RuntimeError( 90 | "system_prompt() is deprecated. Use QueryChat.set_system_prompt() instead." 91 | ) 92 | 93 | 94 | def greeting( 95 | querychat_config, 96 | *, 97 | generate: bool = True, 98 | stream: bool = False, 99 | **kwargs, 100 | ) -> str | None: 101 | """ 102 | Generate or retrieve a greeting message. 103 | 104 | **Deprecated.** Use `QueryChat.generate_greeting()` instead. 105 | """ 106 | raise RuntimeError( 107 | "greeting() is deprecated. Use QueryChat.generate_greeting() instead." 108 | ) 109 | -------------------------------------------------------------------------------- /pkg-r/NEWS.md: -------------------------------------------------------------------------------- 1 | # querychat (development version) 2 | 3 | * **Breaking change:** The `$sql()` method now returns `NULL` instead of `""` (empty string) when no query has been set, aligning with the behavior of `$title()` for consistency. Most code using `isTruthy()` or similar falsy checks will continue working without changes. Code that explicitly checks `sql() == ""` should be updated to use falsy checks (e.g., `!isTruthy(sql())`) or explicit null checks (`is.null(sql())`). (#146) 4 | 5 | * Tool detail cards can now be expanded or collapsed by default when querychat runs a query or updates the dashboard via the `querychat.tool_details` R option or the `QUERYCHAT_TOOL_DETAILS` environment variable. Valid values are `"expanded"`, `"collapsed"`, or `"default"`. (#137) 6 | 7 | * Added bookmarking support to `QueryChat$server()` and `querychat_app()`. When bookmarking is enabled (via `bookmark_store = "url"` or `"server"` in `querychat_app()` or `$app_obj()`, or via `enable_bookmarking = TRUE` in `$server()`), the chat state (including current query, title, and chat history) will be saved and restored with Shiny bookmarks. (#107) 8 | 9 | * Nearly the entire functional API (i.e., `querychat_init()`, `querychat_sidebar()`, `querychat_server()`, etc) has been hard deprecated in favor of a simpler OOP-based API. Namely, the new `QueryChat$new()` class is now the main entry point (instead of `querychat_init()`) and has methods to replace old functions (e.g., `$sidebar()`, `$server()`, etc). (#109) 10 | * In addition, `querychat_data_source()` was renamed to `as_querychat_data_source()`, and remains exported for a developer extension point, but users no longer have to explicitly create a data source. (#109) 11 | 12 | * Added `prompt_template` support for `querychat_system_prompt()`. (Thank you, @oacar! #37, #45) 13 | 14 | * `querychat_init()` now accepts a `client`, replacing the previous `create_chat_func` argument. (#60) 15 | 16 | The `client` can be: 17 | 18 | * an `ellmer::Chat` object, 19 | * a function that returns an `ellmer::Chat` object, 20 | * or a provider-model string, e.g. `"openai/gpt-4.1"`, to be passed to `ellmer::chat()`. 21 | 22 | If `client` is not provided, querychat will use 23 | 24 | * the `querychat.client` R option, which can be any of the above options, 25 | * the `QUERYCHAT_CLIENT` environment variable, which should be a provider-model string, 26 | * or the default model from `ellmer::chat_openai()`. 27 | 28 | * `querychat_server()` now uses a `shiny::ExtendedTask` for streaming the chat response, which allows the dashboard to update and remain responsive while the chat response is streaming in. (#63) 29 | 30 | * querychat now requires `ellmer` version 0.3.0 or later and uses rich tool cards for dashboard updates and database queries. (#65) 31 | 32 | * New `querychat_app()` function lets you quickly launch a Shiny app with a querychat chat interface. (#66) 33 | 34 | * `querychat_ui()` now adds a `.querychat` class to the chat container and `querychat_sidebar()` adds a `.querychat-sidebar` class to the sidebar, allowing for easier customization via CSS. (#68) 35 | 36 | * querychat now uses a separate tool to reset the dashboard. (#80) 37 | 38 | * `querychat_greeting()` can be used to generate a greeting message for your querychat bot. (#87) 39 | 40 | * querychat's system prompt and tool descriptions were rewritten for clarity and future extensibility. (#90) 41 | -------------------------------------------------------------------------------- /pkg-py/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [UNRELEASED] 9 | 10 | 11 | 12 | ## [0.3.0] - 2025-12-10 13 | 14 | ### Breaking Changes 15 | 16 | * The entire functional API (i.e., `init()`, `sidebar()`, `server()`, etc) has been hard deprecated in favor of a simpler OOP-based API. Namely, the new `QueryChat()` class is now the main entry point (instead of `init()`) and has methods to replace old functions (e.g., `.sidebar()`, `.server()`, etc). (#101) 17 | 18 | * The `.sql()` method now returns `None` instead of `""` (empty string) when no query has been set, aligning with the behavior of `.title()` for consistency. Most code using the `or` operator or `req()` for falsy checks will continue working without changes. Code that explicitly checks `sql() == ""` should be updated to use falsy checks (`if not sql()`) or explicit null checks (`if sql() is None`). (#146) 19 | 20 | ### New features 21 | 22 | * New `QueryChat.app()` method enables quicker/easier chatting with a dataset. (#104) 23 | 24 | * Enabled bookmarking by default in both `.app()` and `.server()` methods. In latter case, you'll need to also specify the `bookmark_store` (either in `shiny.App()` or `shiny.express.app_opts()`) for it to take effect. (#104) 25 | 26 | * The current SQL query and title can now be programmatically set through the `.sql()` and `.title()` methods of `QueryChat()`. (#98, #101) 27 | 28 | * New `querychat.data` module provides sample datasets (`titanic()` and `tips()`) to make it easier to get started without external dependencies. (#118) 29 | 30 | * Added a `.generate_greeting()` method to help you create a greeting message for your querychat bot. (#87) 31 | 32 | * Added `querychat_reset_dashboard()` tool for easily resetting the dashboard filters when asked by the user. (#81) 33 | 34 | ### Improvements 35 | 36 | * Added rich tool UI support using shinychat development version and chatlas >= 0.11.1. (#67) 37 | 38 | * querychat's system prompt and tool descriptions were rewritten for clarity and future extensibility. (#90) 39 | 40 | * Tool detail cards can now be expanded or collapsed by default when querychat runs a query or updates the dashboard via the `QUERYCHAT_TOOL_DETAILS` environment variable. Valid values are `"expanded"`, `"collapsed"`, or `"default"`. (#137) 41 | 42 | ## [0.2.2] - 2025-09-04 43 | 44 | * Fixed another issue with data sources that aren't already narwhals DataFrames (#83) 45 | 46 | ## [0.2.1] - 2025-09-04 47 | 48 | * Fixed an issue with the query tool when used with SQLAlchemy data sources. (@npelikan #79) 49 | 50 | ## [0.2.0] - 2025-09-02 51 | 52 | * `querychat.init()` now accepts a `client` argument, replacing the previous `create_chat_callback` argument. (#60) 53 | 54 | The `client` can be: 55 | 56 | * a `chatlas.Chat` object, 57 | * a function that returns a `chatlas.Chat` object, 58 | * or a provider-model string, e.g. `"openai/gpt-4.1"`, to be passed to `chatlas.ChatAuto()`. 59 | 60 | If `client` is not provided, querychat will use the `QUERYCHAT_CLIENT` environment variable, which should be a provider-model string. If the envvar is not set, querychat uses OpenAI with the default model from `chatlas.ChatOpenAI()`. 61 | 62 | * `querychat.ui()` now adds a `.querychat` class to the chat container and `querychat.sidebar()` adds a `.querychat-sidebar` class to the sidebar, allowing for easier customization via CSS. (#68) 63 | 64 | ## [0.1.0] - 2025-05-24 65 | 66 | This first release of the `querychat` package. 67 | -------------------------------------------------------------------------------- /pkg-py/docs/_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: website 3 | output-dir: ../../docs/py 4 | pre-render: 5 | cp ../CHANGELOG.md CHANGELOG.md 6 | 7 | website: 8 | title: "querychat" 9 | site-url: https://posit-dev.github.io/querychat/py 10 | description: Explore data using natural language 11 | page-navigation: true 12 | 13 | bread-crumbs: true 14 | open-graph: true 15 | twitter-card: true 16 | 17 | repo-url: https://github.com/posit-dev/querychat/ 18 | repo-actions: [issue, edit] 19 | repo-subdir: pkg-py/docs 20 | 21 | page-footer: 22 | left: | 23 | Proudly supported by 24 | [![](https://posit.co/wp-content/uploads/2024/06/Posit-Logos-2024_horiz-full-color.svg){fig-alt="Posit" width=65px}](https://posit.co) 25 | 26 | navbar: 27 | background: "#193D56" 28 | search: true 29 | title: 'QueryChat' 30 | #title: 'querychat websiteQueryChat' 31 | 32 | right: 33 | - text: API Reference 34 | href: reference/index.qmd 35 | - text: Changelog 36 | href: /CHANGELOG.html 37 | - icon: github 38 | href: https://github.com/posit-dev/querychat 39 | aria-label: GitHub repository 40 | 41 | sidebar: 42 | - id: get-started 43 | title: Get Started 44 | style: floating 45 | align: left 46 | contents: 47 | - index.qmd 48 | - section: "Overview" 49 | contents: 50 | - models.qmd 51 | - data-sources.qmd 52 | - context.qmd 53 | - build.qmd 54 | - greet.qmd 55 | - tools.qmd 56 | 57 | 58 | format: 59 | html: 60 | theme: 61 | - styles.scss 62 | toc: true 63 | 64 | lightbox: auto 65 | 66 | metadata-files: 67 | - reference/_sidebar.yml 68 | 69 | quartodoc: 70 | package: querychat 71 | render_interlinks: true 72 | sidebar: reference/_sidebar.yml 73 | css: reference/_styles-quartodoc.css 74 | sections: 75 | - title: The Querychat class 76 | desc: The starting point for any QueryChat session 77 | contents: 78 | - name: QueryChat 79 | include_inherited: true 80 | - name: express.QueryChat 81 | include_inherited: true 82 | 83 | - title: Reactive values 84 | desc: Session-specific reactive values representing the current query 85 | contents: 86 | - types.ServerValues 87 | 88 | - title: Data Sources 89 | desc: The underlying logic for managing data sources 90 | contents: 91 | - name: types.DataSource 92 | signature_name: short 93 | - name: types.DataFrameSource 94 | signature_name: short 95 | - name: types.SQLAlchemySource 96 | signature_name: short 97 | 98 | - title: Tools 99 | desc: The underlying tools provided to the LLM 100 | contents: 101 | - name: tools.tool_query 102 | signature_name: short 103 | - name: tools.tool_update_dashboard 104 | signature_name: short 105 | - name: tools.tool_reset_dashboard 106 | signature_name: short 107 | 108 | filters: 109 | - "interlinks" 110 | 111 | interlinks: 112 | fast: true 113 | sources: 114 | pydantic: 115 | url: https://docs.pydantic.dev/latest/ 116 | python: 117 | url: https://docs.python.org/3/ 118 | 119 | editor: 120 | render-on-save: true 121 | markdown: 122 | canonical: true 123 | wrap: sentence 124 | -------------------------------------------------------------------------------- /pkg-py/tests/test_data.py: -------------------------------------------------------------------------------- 1 | """Tests for the querychat.data module.""" 2 | 3 | import pandas as pd 4 | from querychat.data import tips, titanic 5 | 6 | 7 | def test_titanic_returns_dataframe(): 8 | """Test that titanic() returns a pandas DataFrame.""" 9 | df = titanic() 10 | assert isinstance(df, pd.DataFrame) 11 | 12 | 13 | def test_titanic_has_expected_shape(): 14 | """Test that the Titanic dataset has the expected number of rows and columns.""" 15 | df = titanic() 16 | assert df.shape == (891, 15), f"Expected (891, 15) but got {df.shape}" 17 | 18 | 19 | def test_titanic_has_expected_columns(): 20 | """Test that the Titanic dataset has the expected column names.""" 21 | df = titanic() 22 | expected_columns = [ 23 | "survived", 24 | "pclass", 25 | "sex", 26 | "age", 27 | "sibsp", 28 | "parch", 29 | "fare", 30 | "embarked", 31 | "class", 32 | "who", 33 | "adult_male", 34 | "deck", 35 | "embark_town", 36 | "alive", 37 | "alone", 38 | ] 39 | assert list(df.columns) == expected_columns 40 | 41 | 42 | def test_titanic_data_integrity(): 43 | """Test basic data integrity of the Titanic dataset.""" 44 | df = titanic() 45 | 46 | # Check that survived column has only 0 and 1 values 47 | assert set(df["survived"].dropna().unique()) <= {0, 1} 48 | 49 | # Check that pclass has only 1, 2, 3 50 | assert set(df["pclass"].dropna().unique()) <= {1, 2, 3} 51 | 52 | # Check that sex has only 'male' and 'female' 53 | assert set(df["sex"].dropna().unique()) <= {"male", "female"} 54 | 55 | # Check that fare is non-negative 56 | assert (df["fare"].dropna() >= 0).all() 57 | 58 | 59 | def test_titanic_creates_new_copy(): 60 | """Test that titanic() returns a new copy each time it's called.""" 61 | df1 = titanic() 62 | df2 = titanic() 63 | 64 | # They should not be the same object 65 | assert df1 is not df2 66 | 67 | # But they should have the same data 68 | assert df1.equals(df2) 69 | 70 | 71 | def test_tips_returns_dataframe(): 72 | """Test that tips() returns a pandas DataFrame.""" 73 | df = tips() 74 | assert isinstance(df, pd.DataFrame) 75 | 76 | 77 | def test_tips_has_expected_shape(): 78 | """Test that the tips dataset has the expected number of rows and columns.""" 79 | df = tips() 80 | assert df.shape == (244, 7), f"Expected (244, 7) but got {df.shape}" 81 | 82 | 83 | def test_tips_has_expected_columns(): 84 | """Test that the tips dataset has the expected column names.""" 85 | df = tips() 86 | expected_columns = [ 87 | "total_bill", 88 | "tip", 89 | "sex", 90 | "smoker", 91 | "day", 92 | "time", 93 | "size", 94 | ] 95 | assert list(df.columns) == expected_columns 96 | 97 | 98 | def test_tips_data_integrity(): 99 | """Test basic data integrity of the tips dataset.""" 100 | df = tips() 101 | 102 | # Check that total_bill is positive 103 | assert (df["total_bill"] > 0).all() 104 | 105 | # Check that tip is non-negative 106 | assert (df["tip"] >= 0).all() 107 | 108 | # Check that sex has only expected values 109 | assert set(df["sex"].dropna().unique()) <= {"Male", "Female"} 110 | 111 | # Check that smoker has only expected values 112 | assert set(df["smoker"].dropna().unique()) <= {"Yes", "No"} 113 | 114 | # Check that size is positive 115 | assert (df["size"] > 0).all() 116 | 117 | 118 | def test_tips_creates_new_copy(): 119 | """Test that tips() returns a new copy each time it's called.""" 120 | df1 = tips() 121 | df2 = tips() 122 | 123 | # They should not be the same object 124 | assert df1 is not df2 125 | 126 | # But they should have the same data 127 | assert df1.equals(df2) 128 | -------------------------------------------------------------------------------- /pkg-py/tests/test_df_to_html.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import tempfile 3 | from pathlib import Path 4 | 5 | import pandas as pd 6 | import pytest 7 | from querychat._datasource import DataFrameSource, SQLAlchemySource 8 | from querychat._utils import df_to_html 9 | from sqlalchemy import create_engine 10 | 11 | 12 | @pytest.fixture 13 | def sample_dataframe(): 14 | """Create a sample pandas DataFrame for testing.""" 15 | return pd.DataFrame( 16 | { 17 | "id": [1, 2, 3, 4, 5], 18 | "name": ["Alice", "Bob", "Charlie", "Diana", "Eve"], 19 | "age": [25, 30, 35, 28, 32], 20 | "salary": [50000, 60000, 70000, 55000, 65000], 21 | }, 22 | ) 23 | 24 | 25 | @pytest.fixture 26 | def sample_sqlite(): 27 | """Create a temporary SQLite database with test data.""" 28 | temp_db = tempfile.NamedTemporaryFile(delete=False, suffix=".db") # noqa: SIM115 29 | temp_db.close() 30 | 31 | conn = sqlite3.connect(temp_db.name) 32 | cursor = conn.cursor() 33 | 34 | cursor.execute(""" 35 | CREATE TABLE employees ( 36 | id INTEGER PRIMARY KEY, 37 | name TEXT, 38 | age INTEGER, 39 | salary REAL 40 | ) 41 | """) 42 | 43 | test_data = [ 44 | (1, "Alice", 25, 50000), 45 | (2, "Bob", 30, 60000), 46 | (3, "Charlie", 35, 70000), 47 | (4, "Diana", 28, 55000), 48 | (5, "Eve", 32, 65000), 49 | ] 50 | 51 | cursor.executemany( 52 | "INSERT INTO employees (id, name, age, salary) VALUES (?, ?, ?, ?)", 53 | test_data, 54 | ) 55 | 56 | conn.commit() 57 | conn.close() 58 | 59 | engine = create_engine(f"sqlite:///{temp_db.name}") 60 | yield engine 61 | 62 | # Cleanup 63 | Path(temp_db.name).unlink() 64 | 65 | 66 | def test_df_to_html_with_dataframe_source_result(sample_dataframe): 67 | """Test that df_to_html() works with results from DataFrameSource.execute_query().""" 68 | source = DataFrameSource(sample_dataframe, "employees") 69 | 70 | # Execute query to get pandas DataFrame 71 | result_df = source.execute_query("SELECT * FROM employees WHERE age > 25") 72 | 73 | # This should succeed after the fix 74 | html_output = df_to_html(result_df) 75 | 76 | # Verify the HTML contains expected content 77 | assert isinstance(html_output, str) 78 | assert " 25") 91 | 92 | # This should succeed after the fix 93 | html_output = df_to_html(result_df) 94 | 95 | # Verify the HTML contains expected content 96 | assert isinstance(html_output, str) 97 | assert " Optional[Literal["expanded", "collapsed", "default"]]: 58 | """ 59 | Get and validate the tool details setting from environment variable. 60 | 61 | Returns 62 | ------- 63 | Optional[str] 64 | The validated value of QUERYCHAT_TOOL_DETAILS environment variable 65 | (one of 'expanded', 'collapsed', or 'default'), or None if not set 66 | or invalid 67 | 68 | """ 69 | setting = os.environ.get("QUERYCHAT_TOOL_DETAILS") 70 | if setting is None: 71 | return None 72 | 73 | setting_lower = setting.lower() 74 | valid_settings = ("expanded", "collapsed", "default") 75 | 76 | if setting_lower not in valid_settings: 77 | warnings.warn( 78 | f"Invalid value for QUERYCHAT_TOOL_DETAILS: {setting!r}. " 79 | "Must be one of: 'expanded', 'collapsed', or 'default'", 80 | UserWarning, 81 | stacklevel=2, 82 | ) 83 | return None 84 | 85 | return setting_lower 86 | 87 | 88 | def querychat_tool_starts_open(action: Literal["update", "query", "reset"]) -> bool: 89 | """ 90 | Determine whether a tool card should be open based on action and setting. 91 | 92 | Parameters 93 | ---------- 94 | action : str 95 | The action type ('update', 'query', or 'reset') 96 | 97 | Returns 98 | ------- 99 | bool 100 | True if the tool card should be open, False otherwise 101 | 102 | """ 103 | setting = get_tool_details_setting() 104 | 105 | if setting is None: 106 | return action != "reset" 107 | 108 | if setting == "expanded": 109 | return True 110 | elif setting == "collapsed": 111 | return False 112 | else: # setting == "default" 113 | return action != "reset" 114 | 115 | 116 | def df_to_html(df: IntoFrame, maxrows: int = 5) -> str: 117 | """ 118 | Convert a DataFrame to an HTML table for display in chat. 119 | 120 | Parameters 121 | ---------- 122 | df : IntoFrame 123 | The DataFrame to convert 124 | maxrows : int, default=5 125 | Maximum number of rows to display 126 | 127 | Returns 128 | ------- 129 | str 130 | HTML string representation of the table 131 | 132 | """ 133 | ndf = nw.from_native(df) 134 | 135 | if isinstance(ndf, (nw.LazyFrame, nw.DataFrame)): 136 | df_short = ndf.lazy().head(maxrows).collect() 137 | nrow_full = ndf.lazy().select(nw.len()).collect().item() 138 | else: 139 | raise TypeError( 140 | "Must be able to convert `df` into a Narwhals DataFrame or LazyFrame", 141 | ) 142 | 143 | # Generate HTML table 144 | table_html = df_short.to_pandas().to_html( 145 | index=False, 146 | classes="table table-striped", 147 | ) 148 | 149 | # Add note about truncated rows if needed 150 | if len(df_short) != nrow_full: 151 | rows_notice = ( 152 | f"\n\n(Showing only the first {maxrows} rows out of {nrow_full}.)\n" 153 | ) 154 | else: 155 | rows_notice = "" 156 | 157 | return table_html + rows_notice 158 | -------------------------------------------------------------------------------- /pkg-r/man/DataSource.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DataSource.R 3 | \name{DataSource} 4 | \alias{DataSource} 5 | \title{Data Source Base Class} 6 | \description{ 7 | An abstract R6 class defining the interface that custom QueryChat data 8 | sources must implement. This class should not be instantiated directly; 9 | instead, use one of its concrete implementations like \link{DataFrameSource} or 10 | \link{DBISource}. 11 | } 12 | \section{Public fields}{ 13 | \if{html}{\out{
}} 14 | \describe{ 15 | \item{\code{table_name}}{Name of the table to be used in SQL queries} 16 | } 17 | \if{html}{\out{
}} 18 | } 19 | \section{Methods}{ 20 | \subsection{Public methods}{ 21 | \itemize{ 22 | \item \href{#method-DataSource-get_db_type}{\code{DataSource$get_db_type()}} 23 | \item \href{#method-DataSource-get_schema}{\code{DataSource$get_schema()}} 24 | \item \href{#method-DataSource-execute_query}{\code{DataSource$execute_query()}} 25 | \item \href{#method-DataSource-test_query}{\code{DataSource$test_query()}} 26 | \item \href{#method-DataSource-get_data}{\code{DataSource$get_data()}} 27 | \item \href{#method-DataSource-cleanup}{\code{DataSource$cleanup()}} 28 | \item \href{#method-DataSource-clone}{\code{DataSource$clone()}} 29 | } 30 | } 31 | \if{html}{\out{
}} 32 | \if{html}{\out{}} 33 | \if{latex}{\out{\hypertarget{method-DataSource-get_db_type}{}}} 34 | \subsection{Method \code{get_db_type()}}{ 35 | Get the database type 36 | \subsection{Usage}{ 37 | \if{html}{\out{
}}\preformatted{DataSource$get_db_type()}\if{html}{\out{
}} 38 | } 39 | 40 | \subsection{Returns}{ 41 | A string describing the database type (e.g., "DuckDB", "SQLite") 42 | } 43 | } 44 | \if{html}{\out{
}} 45 | \if{html}{\out{}} 46 | \if{latex}{\out{\hypertarget{method-DataSource-get_schema}{}}} 47 | \subsection{Method \code{get_schema()}}{ 48 | Get schema information about the table 49 | \subsection{Usage}{ 50 | \if{html}{\out{
}}\preformatted{DataSource$get_schema(categorical_threshold = 20)}\if{html}{\out{
}} 51 | } 52 | 53 | \subsection{Arguments}{ 54 | \if{html}{\out{
}} 55 | \describe{ 56 | \item{\code{categorical_threshold}}{Maximum number of unique values for a text 57 | column to be considered categorical} 58 | } 59 | \if{html}{\out{
}} 60 | } 61 | \subsection{Returns}{ 62 | A string containing schema information formatted for LLM prompts 63 | } 64 | } 65 | \if{html}{\out{
}} 66 | \if{html}{\out{}} 67 | \if{latex}{\out{\hypertarget{method-DataSource-execute_query}{}}} 68 | \subsection{Method \code{execute_query()}}{ 69 | Execute a SQL query and return results 70 | \subsection{Usage}{ 71 | \if{html}{\out{
}}\preformatted{DataSource$execute_query(query)}\if{html}{\out{
}} 72 | } 73 | 74 | \subsection{Arguments}{ 75 | \if{html}{\out{
}} 76 | \describe{ 77 | \item{\code{query}}{SQL query string to execute} 78 | } 79 | \if{html}{\out{
}} 80 | } 81 | \subsection{Returns}{ 82 | A data frame containing query results 83 | } 84 | } 85 | \if{html}{\out{
}} 86 | \if{html}{\out{}} 87 | \if{latex}{\out{\hypertarget{method-DataSource-test_query}{}}} 88 | \subsection{Method \code{test_query()}}{ 89 | Test a SQL query by fetching only one row 90 | \subsection{Usage}{ 91 | \if{html}{\out{
}}\preformatted{DataSource$test_query(query)}\if{html}{\out{
}} 92 | } 93 | 94 | \subsection{Arguments}{ 95 | \if{html}{\out{
}} 96 | \describe{ 97 | \item{\code{query}}{SQL query string to test} 98 | } 99 | \if{html}{\out{
}} 100 | } 101 | \subsection{Returns}{ 102 | A data frame containing one row of results (or empty if no matches) 103 | } 104 | } 105 | \if{html}{\out{
}} 106 | \if{html}{\out{}} 107 | \if{latex}{\out{\hypertarget{method-DataSource-get_data}{}}} 108 | \subsection{Method \code{get_data()}}{ 109 | Get the unfiltered data as a data frame 110 | \subsection{Usage}{ 111 | \if{html}{\out{
}}\preformatted{DataSource$get_data()}\if{html}{\out{
}} 112 | } 113 | 114 | \subsection{Returns}{ 115 | A data frame containing all data from the table 116 | } 117 | } 118 | \if{html}{\out{
}} 119 | \if{html}{\out{}} 120 | \if{latex}{\out{\hypertarget{method-DataSource-cleanup}{}}} 121 | \subsection{Method \code{cleanup()}}{ 122 | Clean up resources (close connections, etc.) 123 | \subsection{Usage}{ 124 | \if{html}{\out{
}}\preformatted{DataSource$cleanup()}\if{html}{\out{
}} 125 | } 126 | 127 | \subsection{Returns}{ 128 | NULL (invisibly) 129 | } 130 | } 131 | \if{html}{\out{
}} 132 | \if{html}{\out{}} 133 | \if{latex}{\out{\hypertarget{method-DataSource-clone}{}}} 134 | \subsection{Method \code{clone()}}{ 135 | The objects of this class are cloneable with this method. 136 | \subsection{Usage}{ 137 | \if{html}{\out{
}}\preformatted{DataSource$clone(deep = FALSE)}\if{html}{\out{
}} 138 | } 139 | 140 | \subsection{Arguments}{ 141 | \if{html}{\out{
}} 142 | \describe{ 143 | \item{\code{deep}}{Whether to make a deep clone.} 144 | } 145 | \if{html}{\out{
}} 146 | } 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /docs/logo-python.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 23 | 25 | 26 | 28 | image/svg+xml 29 | 31 | 32 | 33 | 34 | 61 | 69 | 70 | 72 | 74 | 78 | 82 | 83 | 85 | 89 | 93 | 94 | 104 | 114 | 115 | 119 | 123 | 124 | -------------------------------------------------------------------------------- /pkg-py/docs/data-sources.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Data Sources 3 | lightbox: true 4 | --- 5 | 6 | `querychat` supports many types of data sources, including: 7 | 8 | 1. Any [narwhals-compatible](https://narwhals-dev.github.io/narwhals/) data frame. 9 | 2. Any [SQLAlchemy](https://www.sqlalchemy.org/) database. 10 | 3. A custom [DataSource](reference/types.DataSource.qmd) interface/protocol. 11 | 12 | The sections below describe how to use each type of data source with `querychat`. 13 | 14 | 15 | ## Data frames 16 | 17 | You can use any [narwhals-compatible](https://narwhals-dev.github.io/narwhals/) data frame as a data source in `querychat`. This includes popular data frame libraries like [pandas](https://pandas.pydata.org/), [polars](https://www.pola.rs/), [pyarrow](https://arrow.apache.org/docs/python/), and many more. 18 | 19 | ::: {.panel-tabset .panel-pills} 20 | 21 | ### Pandas 22 | 23 | ```{.python filename="pandas-app.py"} 24 | import pandas as pd 25 | from querychat import QueryChat 26 | 27 | mtcars = pd.read_csv( 28 | "https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv" 29 | ) 30 | 31 | qc = QueryChat(mtcars, "mtcars") 32 | app = qc.app() 33 | ``` 34 | 35 | ### Polars 36 | 37 | ```{.python filename="polars-app.py"} 38 | import polars as pl 39 | from querychat import QueryChat 40 | 41 | mtcars = pl.read_csv( 42 | "https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv" 43 | ) 44 | 45 | qc = QueryChat(mtcars, "mtcars") 46 | app = qc.app() 47 | ``` 48 | 49 | ### Pyarrow 50 | 51 | ```{.python filename="pyarrow-app.py"} 52 | import pyarrow as pa 53 | import pyarrow.csv as pv 54 | from querychat import QueryChat 55 | 56 | mtcars = pv.read_csv( 57 | "https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv" 58 | ).to_table() 59 | 60 | qc = QueryChat(mtcars, "mtcars") 61 | app = qc.app() 62 | ``` 63 | 64 | ::: 65 | 66 | If you're [building an app](build.qmd), note you can read the queried data frame reactively using the `df()` method, which returns a `pandas.DataFrame` by default. 67 | 68 | ## Databases 69 | 70 | You can also connect `querychat` directly to any database supported by [SQLAlchemy](https://www.sqlalchemy.org/). This includes popular databases like SQLite, DuckDB, PostgreSQL, MySQL, and many more. 71 | 72 | Assuming you have a database set up and accessible, you can pass a SQLAlchemy [database URL](https://docs.sqlalchemy.org/en/20/core/engines.html) to `create_engine()`, and then pass the resulting engine to `QueryChat`. Below are some examples for common databases. 73 | 74 | 75 | ::: {.panel-tabset} 76 | 77 | ### Duck DB 78 | 79 | ```shell 80 | pip install duckdb duckdb-engine 81 | ``` 82 | 83 | ```{.python filename="duckdb-app.py"} 84 | from pathlib import Path 85 | from sqlalchemy import create_engine 86 | from querychat import QueryChat 87 | 88 | # Assumes my_database.duckdb is in the same directory as this script 89 | db_path = Path(__file__).parent / "my_database.duckdb" 90 | engine = create_engine(f"duckdb:///{db_path}") 91 | 92 | qc = QueryChat(engine, "my_table") 93 | app = qc.app() 94 | ``` 95 | 96 | ### SQLite 97 | 98 | ```{.python filename="sqlite-app.py"} 99 | from pathlib import Path 100 | from sqlalchemy import create_engine 101 | from querychat import QueryChat 102 | 103 | # Assumes my_database.db is in the same directory as this script 104 | db_path = Path(__file__).parent / "my_database.db" 105 | engine = create_engine(f"sqlite:///{db_path}") 106 | 107 | qc = QueryChat(engine, "my_table") 108 | app = qc.app() 109 | ``` 110 | 111 | 112 | ### PostgreSQL 113 | 114 | ```shell 115 | pip install psycopg2-binary 116 | ``` 117 | 118 | ```{.python filename="postgresql-app.py"} 119 | from sqlalchemy import create_engine 120 | from querychat import QueryChat 121 | 122 | engine = create_engine("postgresql+psycopg2://user:password@localhost:5432/mydatabase") 123 | qc = QueryChat(engine, "my_table") 124 | app = qc.app() 125 | ``` 126 | 127 | ### MySQL 128 | 129 | ```shell 130 | pip install pymysql 131 | ``` 132 | 133 | ```{.python filename="mysql-app.py"} 134 | from sqlalchemy import create_engine 135 | from querychat import QueryChat 136 | 137 | engine = create_engine("mysql+pymysql://user:password@localhost:3306/mydatabase") 138 | qc = QueryChat(engine, "my_table") 139 | app = qc.app() 140 | ``` 141 | 142 | ::: 143 | 144 | 145 | If you don't have a database set up, you can easily create a local DuckDB database from a CSV file using the following code: 146 | 147 | ```{.python filename="create-duckdb.py"} 148 | import duckdb 149 | 150 | conn = duckdb.connect("my_database.duckdb") 151 | 152 | conn.execute(""" 153 | CREATE TABLE my_table AS 154 | SELECT * FROM read_csv_auto('path/to/your/file.csv') 155 | """) 156 | ``` 157 | 158 | Or, if you have a pandas DataFrame, you can create the DuckDB database like so: 159 | 160 | ```{.python filename="create-duckdb-from-pandas.py"} 161 | import duckdb 162 | import pandas as pd 163 | from querychat.data import titanic 164 | 165 | conn = duckdb.connect("my_database.duckdb") 166 | conn.register('titanic_df', titanic()) 167 | conn.execute(""" 168 | CREATE TABLE titanic AS 169 | SELECT * FROM titanic_df 170 | """) 171 | ``` 172 | 173 | Then you can connect to this database using the DuckDB example above (changing the table name as appropriate): 174 | 175 | ## Custom sources 176 | 177 | If you have a custom data source that doesn't fit into the above categories, you can implement the [DataSource](reference/types.DataSource.qmd) interface/protocol. This requires implementing methods for getting schema information and executing queries. -------------------------------------------------------------------------------- /pkg-r/tests/testthat/_snaps/DataSource.md: -------------------------------------------------------------------------------- 1 | # DataSource base class / throws not_implemented_error for all abstract methods 2 | 3 | Code 4 | base_source$get_db_type() 5 | Condition 6 | Error in `base_source$get_db_type()`: 7 | ! `get_db_type()` must be implemented by subclass 8 | 9 | --- 10 | 11 | Code 12 | base_source$get_schema() 13 | Condition 14 | Error in `base_source$get_schema()`: 15 | ! `get_schema()` must be implemented by subclass 16 | 17 | --- 18 | 19 | Code 20 | base_source$execute_query("SELECT * FROM test") 21 | Condition 22 | Error in `base_source$execute_query()`: 23 | ! `execute_query()` must be implemented by subclass 24 | 25 | --- 26 | 27 | Code 28 | base_source$test_query("SELECT * FROM test LIMIT 1") 29 | Condition 30 | Error in `base_source$test_query()`: 31 | ! `test_query()` must be implemented by subclass 32 | 33 | --- 34 | 35 | Code 36 | base_source$get_data() 37 | Condition 38 | Error in `base_source$get_data()`: 39 | ! `get_data()` must be implemented by subclass 40 | 41 | --- 42 | 43 | Code 44 | base_source$cleanup() 45 | Condition 46 | Error in `base_source$cleanup()`: 47 | ! `cleanup()` must be implemented by subclass 48 | 49 | # DataFrameSource$new() / errors with non-data.frame input 50 | 51 | Code 52 | DataFrameSource$new(list(a = 1, b = 2), "test_table") 53 | Condition 54 | Error in `initialize()`: 55 | ! `df` must be a data frame, not a list. 56 | 57 | --- 58 | 59 | Code 60 | DataFrameSource$new(c(1, 2, 3), "test_table") 61 | Condition 62 | Error in `initialize()`: 63 | ! `df` must be a data frame, not a double vector. 64 | 65 | --- 66 | 67 | Code 68 | DataFrameSource$new(NULL, "test_table") 69 | Condition 70 | Error in `initialize()`: 71 | ! `df` must be a data frame, not `NULL`. 72 | 73 | # DataFrameSource$new() / errors with invalid table names 74 | 75 | Code 76 | DataFrameSource$new(test_df, "123_invalid") 77 | Condition 78 | Error in `initialize()`: 79 | ! `table_name` must be a valid SQL table name 80 | i Table names must begin with a letter and contain only letters, numbers, and underscores 81 | x You provided: "123_invalid" 82 | Code 83 | DataFrameSource$new(test_df, "table-name") 84 | Condition 85 | Error in `initialize()`: 86 | ! `table_name` must be a valid SQL table name 87 | i Table names must begin with a letter and contain only letters, numbers, and underscores 88 | x You provided: "table-name" 89 | Code 90 | DataFrameSource$new(test_df, "table name") 91 | Condition 92 | Error in `initialize()`: 93 | ! `table_name` must be a valid SQL table name 94 | i Table names must begin with a letter and contain only letters, numbers, and underscores 95 | x You provided: "table name" 96 | Code 97 | DataFrameSource$new(test_df, "") 98 | Condition 99 | Error in `initialize()`: 100 | ! `table_name` must be a valid SQL table name 101 | i Table names must begin with a letter and contain only letters, numbers, and underscores 102 | x You provided: "" 103 | Code 104 | DataFrameSource$new(test_df, NULL) 105 | Condition 106 | Error in `initialize()`: 107 | ! `table_name` must be a single string, not `NULL`. 108 | 109 | # DBISource$new() / errors with non-DBI connection 110 | 111 | Code 112 | DBISource$new(list(fake = "connection"), "test_table") 113 | Condition 114 | Error in `initialize()`: 115 | ! `conn` must be a , not a list 116 | 117 | --- 118 | 119 | Code 120 | DBISource$new(NULL, "test_table") 121 | Condition 122 | Error in `initialize()`: 123 | ! `conn` must be a , not NULL 124 | 125 | --- 126 | 127 | Code 128 | DBISource$new("not a connection", "test_table") 129 | Condition 130 | Error in `initialize()`: 131 | ! `conn` must be a , not a string 132 | 133 | # DBISource$new() / errors with invalid table_name types 134 | 135 | Code 136 | DBISource$new(db$conn, 123) 137 | Condition 138 | Error in `initialize()`: 139 | ! `table_name` must be a single character string or a `DBI::Id()` object 140 | 141 | --- 142 | 143 | Code 144 | DBISource$new(db$conn, c("table1", "table2")) 145 | Condition 146 | Error in `initialize()`: 147 | ! `table_name` must be a single character string or a `DBI::Id()` object 148 | 149 | --- 150 | 151 | Code 152 | DBISource$new(db$conn, list(name = "table")) 153 | Condition 154 | Error in `initialize()`: 155 | ! `table_name` must be a single character string or a `DBI::Id()` object 156 | 157 | # DBISource$new() / errors when table does not exist 158 | 159 | Code 160 | DBISource$new(db$conn, "non_existent_table") 161 | Condition 162 | Error in `initialize()`: 163 | ! Table "`non_existent_table`" not found in database 164 | i If you're using a table in a catalog or schema, pass a `DBI::Id()` object to `table_name` 165 | 166 | # assemble_system_prompt() / errors with non-DataSource input 167 | 168 | Code 169 | assemble_system_prompt(list(not = "a data source"), data_description = "Test") 170 | Condition 171 | Error in `assemble_system_prompt()`: 172 | ! `source` must be a object, not a list 173 | 174 | --- 175 | 176 | Code 177 | assemble_system_prompt(data.frame(x = 1:3), data_description = "Test") 178 | Condition 179 | Error in `assemble_system_prompt()`: 180 | ! `source` must be a object, not a data frame 181 | 182 | -------------------------------------------------------------------------------- /pkg-r/inst/prompts/prompt.md: -------------------------------------------------------------------------------- 1 | You are a data dashboard chatbot that operates in a sidebar interface. Your role is to help users interact with their data through filtering, sorting, and answering questions. 2 | 3 | You have access to a {{db_type}} SQL database with the following schema: 4 | 5 | 6 | {{schema}} 7 | 8 | 9 | {{#data_description}} 10 | Here is additional information about the data: 11 | 12 | 13 | {{data_description}} 14 | 15 | {{/data_description}} 16 | 17 | For security reasons, you may only query this specific table. 18 | 19 | {{#is_duck_db}} 20 | ### DuckDB SQL Tips 21 | 22 | **Percentile functions:** In standard SQL, `percentile_cont` and `percentile_disc` are "ordered set" aggregate functions that use the `WITHIN GROUP (ORDER BY sort_expression)` syntax. In DuckDB, you can use the equivalent and more concise `quantile_cont()` and `quantile_disc()` functions instead. 23 | 24 | **When writing DuckDB queries, prefer the `quantile_*` functions** as they are more concise and idiomatic. Both syntaxes are valid in DuckDB. 25 | 26 | Example: 27 | ```sql 28 | -- Standard SQL syntax (works but verbose) 29 | percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) 30 | 31 | -- Preferred DuckDB syntax (more concise) 32 | quantile_cont(salary, 0.5) 33 | ``` 34 | 35 | {{/is_duck_db}} 36 | ## Your Capabilities 37 | 38 | You can handle three types of requests: 39 | 40 | ### 1. Filtering and Sorting Data 41 | 42 | When the user asks you to filter or sort the dashboard, e.g. "Show me..." or "Which ____ have the highest ____?" or "Filter to only include ____": 43 | 44 | - Write a {{db_type}} SQL SELECT query 45 | - Call `querychat_update_dashboard` with the query and a descriptive title 46 | - The query MUST return all columns from the schema (you can use `SELECT *`) 47 | - Use a single SQL query even if complex (subqueries and CTEs are fine) 48 | - Optimize for **readability over efficiency** 49 | - Include SQL comments to explain complex logic 50 | - No confirmation messages are needed: the user will see your query in the dashboard. 51 | 52 | The user may ask to "reset" or "start over"; that means clearing the filter and title. Do this by calling `querychat_reset_dashboard()`. 53 | 54 | ### 2. Answering Questions About Data 55 | 56 | When the user asks you a question about the data, e.g. "What is the average ____?" or "How many ____ are there?" or "Which ____ has the highest ____?": 57 | 58 | - Use the `querychat_query` tool to run SQL queries 59 | - Always use SQL for calculations (counting, averaging, etc.) - NEVER do manual calculations 60 | - Provide both the answer and a comprehensive explanation of how you arrived at it 61 | - Users can see your SQL queries and will ask you to explain the code if needed 62 | - If you cannot complete the request using SQL, politely decline and explain why 63 | 64 | ### 3. Providing Suggestions for Next Steps 65 | 66 | #### Suggestion Syntax 67 | 68 | Use `` tags to create clickable prompt buttons in the UI. The text inside should be a complete, actionable prompt that users can click to continue the conversation. 69 | 70 | #### Syntax Examples 71 | 72 | **List format (most common):** 73 | ```md 74 | * Show me examples of … 75 | * What are the key differences between … 76 | * Explain how … 77 | ``` 78 | 79 | **Inline in prose:** 80 | ```md 81 | You might want to explore the advanced features or show me a practical example. 82 | ``` 83 | 84 | **Nested lists:** 85 | ```md 86 | * Analyze the data 87 | * What's the average …? 88 | * How many …? 89 | * Filter and sort 90 | * Show records from the year … 91 | * Sort the ____ by ____ … 92 | ``` 93 | 94 | #### When to Include Suggestions 95 | 96 | **Always provide suggestions:** 97 | - At the start of a conversation 98 | - When beginning a new line of exploration 99 | - After completing a topic (to suggest new directions) 100 | 101 | **Use best judgment for:** 102 | - Mid-conversation responses (include when they add clear value) 103 | - Follow-up answers (include if multiple paths forward exist) 104 | 105 | **Avoid when:** 106 | - The user has asked a very specific question requiring only a direct answer 107 | - The conversation is clearly wrapping up 108 | 109 | #### Guidelines 110 | 111 | - Suggestions can appear **anywhere** in your response—not just at the end 112 | - Use list format at the end for 2-4 follow-up options (most common pattern) 113 | - Use inline suggestions within prose when contextually appropriate 114 | - Write suggestions as complete, natural prompts (not fragments) 115 | - Only suggest actions you can perform with your tools and capabilities 116 | - Never duplicate the suggestion text in your response 117 | - Never use generic phrases like "If you'd like to..." or "Would you like to explore..." — instead, provide concrete suggestions 118 | - Never refer to suggestions as "prompts" – call them "suggestions" or "ideas" or similar 119 | 120 | 121 | ## Important Guidelines 122 | 123 | - **Ask for clarification** if any request is unclear or ambiguous 124 | - **Be concise** due to the constrained interface 125 | - **Never pretend** you have access to data you don't actually have 126 | - **Use Markdown tables** for any tabular or structured data in your responses 127 | 128 | ## Examples 129 | 130 | **Filtering Example:** 131 | User: "Show only rows where sales are above average" 132 | Tool Call: `querychat_update_dashboard({query: "SELECT * FROM table WHERE sales > (SELECT AVG(sales) FROM table)", title: "Above average sales"})` 133 | Response: "" 134 | 135 | No response needed, the user will see the updated dashboard. 136 | 137 | **Question Example:** 138 | User: "What's the average revenue?" 139 | Tool Call: `querychat_query({query: "SELECT AVG(revenue) AS avg_revenue FROM table"})` 140 | Response: "The average revenue is $X." 141 | 142 | This simple response is sufficient, as the user can see the SQL query used. 143 | 144 | {{#extra_instructions}} 145 | ## Additional Instructions 146 | 147 | {{extra_instructions}} 148 | {{/extra_instructions}} 149 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/prompts/prompt.md: -------------------------------------------------------------------------------- 1 | You are a data dashboard chatbot that operates in a sidebar interface. Your role is to help users interact with their data through filtering, sorting, and answering questions. 2 | 3 | You have access to a {{db_type}} SQL database with the following schema: 4 | 5 | 6 | {{schema}} 7 | 8 | 9 | {{#data_description}} 10 | Here is additional information about the data: 11 | 12 | 13 | {{data_description}} 14 | 15 | {{/data_description}} 16 | 17 | For security reasons, you may only query this specific table. 18 | 19 | {{#is_duck_db}} 20 | ### DuckDB SQL Tips 21 | 22 | **Percentile functions:** In standard SQL, `percentile_cont` and `percentile_disc` are "ordered set" aggregate functions that use the `WITHIN GROUP (ORDER BY sort_expression)` syntax. In DuckDB, you can use the equivalent and more concise `quantile_cont()` and `quantile_disc()` functions instead. 23 | 24 | **When writing DuckDB queries, prefer the `quantile_*` functions** as they are more concise and idiomatic. Both syntaxes are valid in DuckDB. 25 | 26 | Example: 27 | ```sql 28 | -- Standard SQL syntax (works but verbose) 29 | percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) 30 | 31 | -- Preferred DuckDB syntax (more concise) 32 | quantile_cont(salary, 0.5) 33 | ``` 34 | 35 | {{/is_duck_db}} 36 | ## Your Capabilities 37 | 38 | You can handle three types of requests: 39 | 40 | ### 1. Filtering and Sorting Data 41 | 42 | When the user asks you to filter or sort the dashboard, e.g. "Show me..." or "Which ____ have the highest ____?" or "Filter to only include ____": 43 | 44 | - Write a {{db_type}} SQL SELECT query 45 | - Call `querychat_update_dashboard` with the query and a descriptive title 46 | - The query MUST return all columns from the schema (you can use `SELECT *`) 47 | - Use a single SQL query even if complex (subqueries and CTEs are fine) 48 | - Optimize for **readability over efficiency** 49 | - Include SQL comments to explain complex logic 50 | - No confirmation messages are needed: the user will see your query in the dashboard. 51 | 52 | The user may ask to "reset" or "start over"; that means clearing the filter and title. Do this by calling `querychat_reset_dashboard()`. 53 | 54 | ### 2. Answering Questions About Data 55 | 56 | When the user asks you a question about the data, e.g. "What is the average ____?" or "How many ____ are there?" or "Which ____ has the highest ____?": 57 | 58 | - Use the `querychat_query` tool to run SQL queries 59 | - Always use SQL for calculations (counting, averaging, etc.) - NEVER do manual calculations 60 | - Provide both the answer and a comprehensive explanation of how you arrived at it 61 | - Users can see your SQL queries and will ask you to explain the code if needed 62 | - If you cannot complete the request using SQL, politely decline and explain why 63 | 64 | ### 3. Providing Suggestions for Next Steps 65 | 66 | #### Suggestion Syntax 67 | 68 | Use `` tags to create clickable prompt buttons in the UI. The text inside should be a complete, actionable prompt that users can click to continue the conversation. 69 | 70 | #### Syntax Examples 71 | 72 | **List format (most common):** 73 | ```md 74 | * Show me examples of … 75 | * What are the key differences between … 76 | * Explain how … 77 | ``` 78 | 79 | **Inline in prose:** 80 | ```md 81 | You might want to explore the advanced features or show me a practical example. 82 | ``` 83 | 84 | **Nested lists:** 85 | ```md 86 | * Analyze the data 87 | * What's the average …? 88 | * How many …? 89 | * Filter and sort 90 | * Show records from the year … 91 | * Sort the ____ by ____ … 92 | ``` 93 | 94 | #### When to Include Suggestions 95 | 96 | **Always provide suggestions:** 97 | - At the start of a conversation 98 | - When beginning a new line of exploration 99 | - After completing a topic (to suggest new directions) 100 | 101 | **Use best judgment for:** 102 | - Mid-conversation responses (include when they add clear value) 103 | - Follow-up answers (include if multiple paths forward exist) 104 | 105 | **Avoid when:** 106 | - The user has asked a very specific question requiring only a direct answer 107 | - The conversation is clearly wrapping up 108 | 109 | #### Guidelines 110 | 111 | - Suggestions can appear **anywhere** in your response—not just at the end 112 | - Use list format at the end for 2-4 follow-up options (most common pattern) 113 | - Use inline suggestions within prose when contextually appropriate 114 | - Write suggestions as complete, natural prompts (not fragments) 115 | - Only suggest actions you can perform with your tools and capabilities 116 | - Never duplicate the suggestion text in your response 117 | - Never use generic phrases like "If you'd like to..." or "Would you like to explore..." — instead, provide concrete suggestions 118 | - Never refer to suggestions as "prompts" – call them "suggestions" or "ideas" or similar 119 | 120 | 121 | ## Important Guidelines 122 | 123 | - **Ask for clarification** if any request is unclear or ambiguous 124 | - **Be concise** due to the constrained interface 125 | - **Never pretend** you have access to data you don't actually have 126 | - **Use Markdown tables** for any tabular or structured data in your responses 127 | 128 | ## Examples 129 | 130 | **Filtering Example:** 131 | User: "Show only rows where sales are above average" 132 | Tool Call: `querychat_update_dashboard({query: "SELECT * FROM table WHERE sales > (SELECT AVG(sales) FROM table)", title: "Above average sales"})` 133 | Response: "" 134 | 135 | No response needed, the user will see the updated dashboard. 136 | 137 | **Question Example:** 138 | User: "What's the average revenue?" 139 | Tool Call: `querychat_query({query: "SELECT AVG(revenue) AS avg_revenue FROM table"})` 140 | Response: "The average revenue is $X." 141 | 142 | This simple response is sufficient, as the user can see the SQL query used. 143 | 144 | {{#extra_instructions}} 145 | ## Additional Instructions 146 | 147 | {{extra_instructions}} 148 | {{/extra_instructions}} 149 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | animation.screenflow/ 3 | README_files/ 4 | README.html 5 | .DS_Store 6 | python-package/examples/titanic.db 7 | .quarto 8 | *.db 9 | 10 | docs/r 11 | docs/py 12 | 13 | !pkg-py/docs 14 | 15 | # Byte-compiled / optimized / DLL files 16 | __pycache__/ 17 | *.py[cod] 18 | *$py.class 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | wheels/ 37 | share/python-wheels/ 38 | *.egg-info/ 39 | .installed.cfg 40 | *.egg 41 | MANIFEST 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .nox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *.cover 63 | *.py,cover 64 | .hypothesis/ 65 | .pytest_cache/ 66 | cover/ 67 | 68 | # Translations 69 | *.mo 70 | *.pot 71 | 72 | # Django stuff: 73 | *.log 74 | local_settings.py 75 | db.sqlite3 76 | db.sqlite3-journal 77 | 78 | # Flask stuff: 79 | instance/ 80 | .webassets-cache 81 | 82 | # Scrapy stuff: 83 | .scrapy 84 | 85 | # Sphinx documentation 86 | docs/_build/ 87 | 88 | # PyBuilder 89 | .pybuilder/ 90 | target/ 91 | 92 | # Jupyter Notebook 93 | .ipynb_checkpoints 94 | 95 | # IPython 96 | profile_default/ 97 | ipython_config.py 98 | 99 | # pyenv 100 | # For a library or package, you might want to ignore these files since the code is 101 | # intended to run in multiple environments; otherwise, check them in: 102 | # .python-version 103 | 104 | # pipenv 105 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 106 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 107 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 108 | # install all needed dependencies. 109 | #Pipfile.lock 110 | 111 | # UV 112 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 113 | # This is especially recommended for binary packages to ensure reproducibility, and is more 114 | # commonly ignored for libraries. 115 | #uv.lock 116 | 117 | # poetry 118 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 119 | # This is especially recommended for binary packages to ensure reproducibility, and is more 120 | # commonly ignored for libraries. 121 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 122 | #poetry.lock 123 | 124 | # pdm 125 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 126 | #pdm.lock 127 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 128 | # in version control. 129 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 130 | .pdm.toml 131 | .pdm-python 132 | .pdm-build/ 133 | 134 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 135 | __pypackages__/ 136 | 137 | # Celery stuff 138 | celerybeat-schedule 139 | celerybeat.pid 140 | 141 | # SageMath parsed files 142 | *.sage.py 143 | 144 | # Environments 145 | .env 146 | .venv 147 | env/ 148 | venv/ 149 | ENV/ 150 | env.bak/ 151 | venv.bak/ 152 | 153 | # Spyder project settings 154 | .spyderproject 155 | .spyproject 156 | 157 | # Rope project settings 158 | .ropeproject 159 | 160 | # mkdocs documentation 161 | /site 162 | 163 | # mypy 164 | .mypy_cache/ 165 | .dmypy.json 166 | dmypy.json 167 | 168 | # Pyre type checker 169 | .pyre/ 170 | 171 | # pytype static type analyzer 172 | .pytype/ 173 | 174 | # Cython debug symbols 175 | cython_debug/ 176 | 177 | # PyCharm 178 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 179 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 180 | # and can be added to the global gitignore or merged into this file. For a more nuclear 181 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 182 | #.idea/ 183 | 184 | # Visual Studio Code 185 | # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 186 | # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore 187 | # and can be added to the global gitignore or merged into this file. However, if you prefer, 188 | # you could uncomment the following to ignore the enitre vscode folder 189 | # .vscode/ 190 | 191 | # Ruff stuff: 192 | .ruff_cache/ 193 | 194 | # PyPI configuration file 195 | .pypirc 196 | 197 | # Cursor 198 | # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to 199 | # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data 200 | # refer to https://docs.cursor.com/context/ignore-files 201 | .cursorignore 202 | .cursorindexingignore 203 | 204 | # History files 205 | .Rhistory 206 | .Rapp.history 207 | 208 | # Session Data files 209 | .RData 210 | .RDataTmp 211 | 212 | # User-specific files 213 | .Ruserdata 214 | 215 | # Example code in package build process 216 | *-Ex.R 217 | 218 | # Output files from R CMD build 219 | /*.tar.gz 220 | 221 | # Output files from R CMD check 222 | /*.Rcheck/ 223 | 224 | # RStudio files 225 | .Rproj.user/ 226 | 227 | # produced vignettes 228 | vignettes/*.html 229 | vignettes/*.pdf 230 | 231 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 232 | .httr-oauth 233 | 234 | # knitr and R markdown default cache directories 235 | *_cache/ 236 | /cache/ 237 | 238 | # Temporary files created by R markdown 239 | *.utf8.md 240 | *.knit.md 241 | 242 | # R Environment Variables 243 | .Renviron 244 | 245 | # pkgdown site 246 | #docs/ 247 | 248 | # translation temp files 249 | po/*~ 250 | 251 | # RStudio Connect folder 252 | rsconnect/ 253 | python-package/CLAUDE.md 254 | 255 | uv.lock 256 | _dev 257 | 258 | # R ignores 259 | /.quarto/ 260 | .Rprofile 261 | renv/ 262 | renv.lock 263 | 264 | # Claude 265 | .claude/settings.local.json 266 | 267 | /.luarc.json 268 | -------------------------------------------------------------------------------- /pkg-r/man/DataFrameSource.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DataSource.R 3 | \name{DataFrameSource} 4 | \alias{DataFrameSource} 5 | \title{Data Frame Source} 6 | \description{ 7 | A DataSource implementation that wraps a data frame using DuckDB for SQL 8 | query execution. 9 | } 10 | \details{ 11 | This class creates an in-memory DuckDB connection and registers the provided 12 | data frame as a table. All SQL queries are executed against this DuckDB table. 13 | } 14 | \examples{ 15 | \dontrun{ 16 | # Create a data frame source 17 | df_source <- DataFrameSource$new(mtcars, "mtcars") 18 | 19 | # Get database type 20 | df_source$get_db_type() # Returns "DuckDB" 21 | 22 | # Execute a query 23 | result <- df_source$execute_query("SELECT * FROM mtcars WHERE mpg > 25") 24 | 25 | # Clean up when done 26 | df_source$cleanup() 27 | } 28 | 29 | ## ------------------------------------------------ 30 | ## Method `DataFrameSource$new` 31 | ## ------------------------------------------------ 32 | 33 | \dontrun{ 34 | source <- DataFrameSource$new(iris, "iris") 35 | } 36 | } 37 | \section{Super class}{ 38 | \code{\link[querychat:DataSource]{querychat::DataSource}} -> \code{DataFrameSource} 39 | } 40 | \section{Methods}{ 41 | \subsection{Public methods}{ 42 | \itemize{ 43 | \item \href{#method-DataFrameSource-new}{\code{DataFrameSource$new()}} 44 | \item \href{#method-DataFrameSource-get_db_type}{\code{DataFrameSource$get_db_type()}} 45 | \item \href{#method-DataFrameSource-get_schema}{\code{DataFrameSource$get_schema()}} 46 | \item \href{#method-DataFrameSource-execute_query}{\code{DataFrameSource$execute_query()}} 47 | \item \href{#method-DataFrameSource-test_query}{\code{DataFrameSource$test_query()}} 48 | \item \href{#method-DataFrameSource-get_data}{\code{DataFrameSource$get_data()}} 49 | \item \href{#method-DataFrameSource-cleanup}{\code{DataFrameSource$cleanup()}} 50 | \item \href{#method-DataFrameSource-clone}{\code{DataFrameSource$clone()}} 51 | } 52 | } 53 | \if{html}{\out{
}} 54 | \if{html}{\out{}} 55 | \if{latex}{\out{\hypertarget{method-DataFrameSource-new}{}}} 56 | \subsection{Method \code{new()}}{ 57 | Create a new DataFrameSource 58 | \subsection{Usage}{ 59 | \if{html}{\out{
}}\preformatted{DataFrameSource$new(df, table_name)}\if{html}{\out{
}} 60 | } 61 | 62 | \subsection{Arguments}{ 63 | \if{html}{\out{
}} 64 | \describe{ 65 | \item{\code{df}}{A data frame.} 66 | 67 | \item{\code{table_name}}{Name to use for the table in SQL queries. Must be a 68 | valid table name (start with letter, contain only letters, numbers, 69 | and underscores)} 70 | } 71 | \if{html}{\out{
}} 72 | } 73 | \subsection{Returns}{ 74 | A new DataFrameSource object 75 | } 76 | \subsection{Examples}{ 77 | \if{html}{\out{
}} 78 | \preformatted{\dontrun{ 79 | source <- DataFrameSource$new(iris, "iris") 80 | } 81 | } 82 | \if{html}{\out{
}} 83 | 84 | } 85 | 86 | } 87 | \if{html}{\out{
}} 88 | \if{html}{\out{}} 89 | \if{latex}{\out{\hypertarget{method-DataFrameSource-get_db_type}{}}} 90 | \subsection{Method \code{get_db_type()}}{ 91 | Get the database type 92 | \subsection{Usage}{ 93 | \if{html}{\out{
}}\preformatted{DataFrameSource$get_db_type()}\if{html}{\out{
}} 94 | } 95 | 96 | \subsection{Returns}{ 97 | The string "DuckDB" 98 | } 99 | } 100 | \if{html}{\out{
}} 101 | \if{html}{\out{}} 102 | \if{latex}{\out{\hypertarget{method-DataFrameSource-get_schema}{}}} 103 | \subsection{Method \code{get_schema()}}{ 104 | Get schema information for the data frame 105 | \subsection{Usage}{ 106 | \if{html}{\out{
}}\preformatted{DataFrameSource$get_schema(categorical_threshold = 20)}\if{html}{\out{
}} 107 | } 108 | 109 | \subsection{Arguments}{ 110 | \if{html}{\out{
}} 111 | \describe{ 112 | \item{\code{categorical_threshold}}{Maximum number of unique values for a text 113 | column to be considered categorical (default: 20)} 114 | } 115 | \if{html}{\out{
}} 116 | } 117 | \subsection{Returns}{ 118 | A string describing the schema 119 | } 120 | } 121 | \if{html}{\out{
}} 122 | \if{html}{\out{}} 123 | \if{latex}{\out{\hypertarget{method-DataFrameSource-execute_query}{}}} 124 | \subsection{Method \code{execute_query()}}{ 125 | Execute a SQL query 126 | \subsection{Usage}{ 127 | \if{html}{\out{
}}\preformatted{DataFrameSource$execute_query(query)}\if{html}{\out{
}} 128 | } 129 | 130 | \subsection{Arguments}{ 131 | \if{html}{\out{
}} 132 | \describe{ 133 | \item{\code{query}}{SQL query string. If NULL or empty, returns all data} 134 | } 135 | \if{html}{\out{
}} 136 | } 137 | \subsection{Returns}{ 138 | A data frame with query results 139 | } 140 | } 141 | \if{html}{\out{
}} 142 | \if{html}{\out{}} 143 | \if{latex}{\out{\hypertarget{method-DataFrameSource-test_query}{}}} 144 | \subsection{Method \code{test_query()}}{ 145 | Test a SQL query by fetching only one row 146 | \subsection{Usage}{ 147 | \if{html}{\out{
}}\preformatted{DataFrameSource$test_query(query)}\if{html}{\out{
}} 148 | } 149 | 150 | \subsection{Arguments}{ 151 | \if{html}{\out{
}} 152 | \describe{ 153 | \item{\code{query}}{SQL query string} 154 | } 155 | \if{html}{\out{
}} 156 | } 157 | \subsection{Returns}{ 158 | A data frame with one row of results 159 | } 160 | } 161 | \if{html}{\out{
}} 162 | \if{html}{\out{}} 163 | \if{latex}{\out{\hypertarget{method-DataFrameSource-get_data}{}}} 164 | \subsection{Method \code{get_data()}}{ 165 | Get all data from the table 166 | \subsection{Usage}{ 167 | \if{html}{\out{
}}\preformatted{DataFrameSource$get_data()}\if{html}{\out{
}} 168 | } 169 | 170 | \subsection{Returns}{ 171 | A data frame containing all data 172 | } 173 | } 174 | \if{html}{\out{
}} 175 | \if{html}{\out{}} 176 | \if{latex}{\out{\hypertarget{method-DataFrameSource-cleanup}{}}} 177 | \subsection{Method \code{cleanup()}}{ 178 | Close the DuckDB connection 179 | \subsection{Usage}{ 180 | \if{html}{\out{
}}\preformatted{DataFrameSource$cleanup()}\if{html}{\out{
}} 181 | } 182 | 183 | \subsection{Returns}{ 184 | NULL (invisibly) 185 | } 186 | } 187 | \if{html}{\out{
}} 188 | \if{html}{\out{}} 189 | \if{latex}{\out{\hypertarget{method-DataFrameSource-clone}{}}} 190 | \subsection{Method \code{clone()}}{ 191 | The objects of this class are cloneable with this method. 192 | \subsection{Usage}{ 193 | \if{html}{\out{
}}\preformatted{DataFrameSource$clone(deep = FALSE)}\if{html}{\out{
}} 194 | } 195 | 196 | \subsection{Arguments}{ 197 | \if{html}{\out{
}} 198 | \describe{ 199 | \item{\code{deep}}{Whether to make a deep clone.} 200 | } 201 | \if{html}{\out{
}} 202 | } 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /pkg-py/src/querychat/_querychat_module.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import copy 4 | import warnings 5 | from dataclasses import dataclass 6 | from pathlib import Path 7 | from typing import TYPE_CHECKING, Union 8 | 9 | import shinychat 10 | from shiny import module, reactive, ui 11 | 12 | from .tools import tool_query, tool_reset_dashboard, tool_update_dashboard 13 | 14 | if TYPE_CHECKING: 15 | from collections.abc import Callable 16 | 17 | import chatlas 18 | import pandas as pd 19 | from shiny import Inputs, Outputs, Session 20 | from shiny.bookmark import BookmarkState, RestoreState 21 | 22 | from ._datasource import DataSource 23 | 24 | ReactiveString = reactive.Value[str] 25 | """A reactive string value.""" 26 | ReactiveStringOrNone = reactive.Value[Union[str, None]] 27 | """A reactive string (or None) value.""" 28 | 29 | CHAT_ID = "chat" 30 | 31 | 32 | @module.ui 33 | def mod_ui(**kwargs): 34 | css_path = Path(__file__).parent / "static" / "css" / "styles.css" 35 | js_path = Path(__file__).parent / "static" / "js" / "querychat.js" 36 | 37 | tag = shinychat.chat_ui(CHAT_ID, **kwargs) 38 | tag.add_class("querychat") 39 | 40 | return ui.TagList( 41 | ui.head_content( 42 | ui.include_css(css_path), 43 | ui.include_js(js_path), 44 | ), 45 | tag, 46 | ) 47 | 48 | 49 | @dataclass 50 | class ServerValues: 51 | """ 52 | Session-specific reactive values and client returned by QueryChat.server(). 53 | 54 | This dataclass contains all the session-specific reactive state for a QueryChat 55 | instance. Each session gets its own ServerValues to ensure proper isolation 56 | between concurrent sessions. 57 | 58 | Attributes 59 | ---------- 60 | df 61 | A reactive Calc that returns the current filtered data frame. If no SQL 62 | query has been set, this returns the unfiltered data from the data source. 63 | Call it like `.df()` to reactively read the current data frame. 64 | sql 65 | A reactive Value containing the current SQL query string. Access the value 66 | by calling `.sql()`, or set it with `.sql.set("SELECT ...")`. 67 | Returns `None` if no query has been set. 68 | title 69 | A reactive Value containing the current title for the query. The LLM 70 | provides this title when generating a new SQL query. Access it with 71 | `.title()`, or set it with `.title.set("...")`. Returns 72 | `None` if no title has been set. 73 | client 74 | The session-specific chat client instance. This is a deep copy of the 75 | base client configured for this specific session, containing the chat 76 | history and tool registrations for this session only. 77 | 78 | """ 79 | 80 | df: Callable[[], pd.DataFrame] 81 | sql: ReactiveStringOrNone 82 | title: ReactiveStringOrNone 83 | client: chatlas.Chat 84 | 85 | 86 | @module.server 87 | def mod_server( 88 | input: Inputs, 89 | output: Outputs, 90 | session: Session, 91 | *, 92 | data_source: DataSource, 93 | greeting: str | None, 94 | client: chatlas.Chat, 95 | enable_bookmarking: bool, 96 | ): 97 | # Reactive values to store state 98 | sql = ReactiveStringOrNone(None) 99 | title = ReactiveStringOrNone(None) 100 | has_greeted = reactive.value[bool](False) # noqa: FBT003 101 | 102 | # Set up the chat object for this session 103 | chat = copy.deepcopy(client) 104 | 105 | # Create the tool functions 106 | update_dashboard_tool = tool_update_dashboard(data_source, sql, title) 107 | reset_dashboard_tool = tool_reset_dashboard(sql, title) 108 | query_tool = tool_query(data_source) 109 | 110 | # Register tools with annotations for the UI 111 | chat.register_tool(update_dashboard_tool) 112 | chat.register_tool(query_tool) 113 | chat.register_tool(reset_dashboard_tool) 114 | 115 | # Execute query when SQL changes 116 | @reactive.calc 117 | def filtered_df(): 118 | query = sql.get() 119 | if not query: 120 | return data_source.get_data() 121 | else: 122 | return data_source.execute_query(query) 123 | 124 | # Chat UI logic 125 | chat_ui = shinychat.Chat(CHAT_ID) 126 | 127 | # Handle user input 128 | @chat_ui.on_user_submit 129 | async def _(user_input: str): 130 | stream = await chat.stream_async(user_input, echo="none", content="all") 131 | await chat_ui.append_message_stream(stream) 132 | 133 | @reactive.effect 134 | async def greet_on_startup(): 135 | if has_greeted(): 136 | return 137 | 138 | if greeting: 139 | await chat_ui.append_message(greeting) 140 | elif greeting is None: 141 | warnings.warn( 142 | "No greeting provided to `QueryChat()`. Using the LLM `client` to generate one now. " 143 | "For faster startup, lower cost, and determinism, consider providing a greeting " 144 | "to `QueryChat()` and `.generate_greeting()` to generate one beforehand.", 145 | GreetWarning, 146 | stacklevel=2, 147 | ) 148 | stream = await chat.stream_async(GREETING_PROMPT, echo="none") 149 | await chat_ui.append_message_stream(stream) 150 | 151 | has_greeted.set(True) 152 | 153 | # Handle update button clicks 154 | @reactive.effect 155 | @reactive.event(input.chat_update) 156 | def _(): 157 | update = input.chat_update() 158 | if update is None: 159 | return 160 | if not isinstance(update, dict): 161 | return 162 | 163 | new_query = update.get("query") 164 | new_title = update.get("title") 165 | if new_query is not None: 166 | sql.set(new_query) 167 | if new_title is not None: 168 | title.set(new_title) 169 | 170 | if enable_bookmarking: 171 | chat_ui.enable_bookmarking(client) 172 | 173 | @session.bookmark.on_bookmark 174 | def _on_bookmark(x: BookmarkState) -> None: 175 | vals = x.values # noqa: PD011 176 | vals["querychat_sql"] = sql.get() 177 | vals["querychat_title"] = title.get() 178 | vals["querychat_has_greeted"] = has_greeted.get() 179 | 180 | @session.bookmark.on_restore 181 | def _on_restore(x: RestoreState) -> None: 182 | vals = x.values # noqa: PD011 183 | if "querychat_sql" in vals: 184 | sql.set(vals["querychat_sql"]) 185 | if "querychat_title" in vals: 186 | title.set(vals["querychat_title"]) 187 | if "querychat_has_greeted" in vals: 188 | has_greeted.set(vals["querychat_has_greeted"]) 189 | 190 | return ServerValues(df=filtered_df, sql=sql, title=title, client=chat) 191 | 192 | 193 | GREETING_PROMPT: str = "Please give me a friendly greeting. Include a few sample prompts in a two-level bulleted list." 194 | 195 | 196 | class GreetWarning(Warning): 197 | """Warning raised when no greeting is provided to QueryChat.""" 198 | -------------------------------------------------------------------------------- /pkg-r/man/DBISource.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DataSource.R 3 | \name{DBISource} 4 | \alias{DBISource} 5 | \title{DBI Source} 6 | \description{ 7 | A DataSource implementation for DBI database connections (SQLite, PostgreSQL, 8 | MySQL, etc.). 9 | } 10 | \details{ 11 | This class wraps a DBI connection and provides SQL query execution against 12 | a specified table in the database. 13 | } 14 | \examples{ 15 | \dontrun{ 16 | # Connect to a database 17 | conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") 18 | DBI::dbWriteTable(conn, "mtcars", mtcars) 19 | 20 | # Create a DBI source 21 | db_source <- DBISource$new(conn, "mtcars") 22 | 23 | # Get database type 24 | db_source$get_db_type() # Returns "SQLite" 25 | 26 | # Execute a query 27 | result <- db_source$execute_query("SELECT * FROM mtcars WHERE mpg > 25") 28 | 29 | # Note: cleanup() will disconnect the connection 30 | # If you want to keep the connection open, don't call cleanup() 31 | } 32 | 33 | ## ------------------------------------------------ 34 | ## Method `DBISource$new` 35 | ## ------------------------------------------------ 36 | 37 | \dontrun{ 38 | conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") 39 | DBI::dbWriteTable(conn, "iris", iris) 40 | source <- DBISource$new(conn, "iris") 41 | } 42 | } 43 | \section{Super class}{ 44 | \code{\link[querychat:DataSource]{querychat::DataSource}} -> \code{DBISource} 45 | } 46 | \section{Methods}{ 47 | \subsection{Public methods}{ 48 | \itemize{ 49 | \item \href{#method-DBISource-new}{\code{DBISource$new()}} 50 | \item \href{#method-DBISource-get_db_type}{\code{DBISource$get_db_type()}} 51 | \item \href{#method-DBISource-get_schema}{\code{DBISource$get_schema()}} 52 | \item \href{#method-DBISource-execute_query}{\code{DBISource$execute_query()}} 53 | \item \href{#method-DBISource-test_query}{\code{DBISource$test_query()}} 54 | \item \href{#method-DBISource-get_data}{\code{DBISource$get_data()}} 55 | \item \href{#method-DBISource-cleanup}{\code{DBISource$cleanup()}} 56 | \item \href{#method-DBISource-clone}{\code{DBISource$clone()}} 57 | } 58 | } 59 | \if{html}{\out{
}} 60 | \if{html}{\out{}} 61 | \if{latex}{\out{\hypertarget{method-DBISource-new}{}}} 62 | \subsection{Method \code{new()}}{ 63 | Create a new DBISource 64 | \subsection{Usage}{ 65 | \if{html}{\out{
}}\preformatted{DBISource$new(conn, table_name)}\if{html}{\out{
}} 66 | } 67 | 68 | \subsection{Arguments}{ 69 | \if{html}{\out{
}} 70 | \describe{ 71 | \item{\code{conn}}{A DBI connection object} 72 | 73 | \item{\code{table_name}}{Name of the table in the database. Can be a character 74 | string or a \code{\link[DBI:Id]{DBI::Id()}} object for tables in catalogs/schemas} 75 | } 76 | \if{html}{\out{
}} 77 | } 78 | \subsection{Returns}{ 79 | A new DBISource object 80 | } 81 | \subsection{Examples}{ 82 | \if{html}{\out{
}} 83 | \preformatted{\dontrun{ 84 | conn <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") 85 | DBI::dbWriteTable(conn, "iris", iris) 86 | source <- DBISource$new(conn, "iris") 87 | } 88 | } 89 | \if{html}{\out{
}} 90 | 91 | } 92 | 93 | } 94 | \if{html}{\out{
}} 95 | \if{html}{\out{}} 96 | \if{latex}{\out{\hypertarget{method-DBISource-get_db_type}{}}} 97 | \subsection{Method \code{get_db_type()}}{ 98 | Get the database type 99 | \subsection{Usage}{ 100 | \if{html}{\out{
}}\preformatted{DBISource$get_db_type()}\if{html}{\out{
}} 101 | } 102 | 103 | \subsection{Returns}{ 104 | A string identifying the database type 105 | } 106 | } 107 | \if{html}{\out{
}} 108 | \if{html}{\out{}} 109 | \if{latex}{\out{\hypertarget{method-DBISource-get_schema}{}}} 110 | \subsection{Method \code{get_schema()}}{ 111 | Get schema information for the database table 112 | \subsection{Usage}{ 113 | \if{html}{\out{
}}\preformatted{DBISource$get_schema(categorical_threshold = 20)}\if{html}{\out{
}} 114 | } 115 | 116 | \subsection{Arguments}{ 117 | \if{html}{\out{
}} 118 | \describe{ 119 | \item{\code{categorical_threshold}}{Maximum number of unique values for a text 120 | column to be considered categorical (default: 20)} 121 | } 122 | \if{html}{\out{
}} 123 | } 124 | \subsection{Returns}{ 125 | A string describing the schema 126 | } 127 | } 128 | \if{html}{\out{
}} 129 | \if{html}{\out{}} 130 | \if{latex}{\out{\hypertarget{method-DBISource-execute_query}{}}} 131 | \subsection{Method \code{execute_query()}}{ 132 | Execute a SQL query 133 | \subsection{Usage}{ 134 | \if{html}{\out{
}}\preformatted{DBISource$execute_query(query)}\if{html}{\out{
}} 135 | } 136 | 137 | \subsection{Arguments}{ 138 | \if{html}{\out{
}} 139 | \describe{ 140 | \item{\code{query}}{SQL query string. If NULL or empty, returns all data} 141 | } 142 | \if{html}{\out{
}} 143 | } 144 | \subsection{Returns}{ 145 | A data frame with query results 146 | } 147 | } 148 | \if{html}{\out{
}} 149 | \if{html}{\out{}} 150 | \if{latex}{\out{\hypertarget{method-DBISource-test_query}{}}} 151 | \subsection{Method \code{test_query()}}{ 152 | Test a SQL query by fetching only one row 153 | \subsection{Usage}{ 154 | \if{html}{\out{
}}\preformatted{DBISource$test_query(query)}\if{html}{\out{
}} 155 | } 156 | 157 | \subsection{Arguments}{ 158 | \if{html}{\out{
}} 159 | \describe{ 160 | \item{\code{query}}{SQL query string} 161 | } 162 | \if{html}{\out{
}} 163 | } 164 | \subsection{Returns}{ 165 | A data frame with one row of results 166 | } 167 | } 168 | \if{html}{\out{
}} 169 | \if{html}{\out{}} 170 | \if{latex}{\out{\hypertarget{method-DBISource-get_data}{}}} 171 | \subsection{Method \code{get_data()}}{ 172 | Get all data from the table 173 | \subsection{Usage}{ 174 | \if{html}{\out{
}}\preformatted{DBISource$get_data()}\if{html}{\out{
}} 175 | } 176 | 177 | \subsection{Returns}{ 178 | A data frame containing all data 179 | } 180 | } 181 | \if{html}{\out{
}} 182 | \if{html}{\out{}} 183 | \if{latex}{\out{\hypertarget{method-DBISource-cleanup}{}}} 184 | \subsection{Method \code{cleanup()}}{ 185 | Disconnect from the database 186 | \subsection{Usage}{ 187 | \if{html}{\out{
}}\preformatted{DBISource$cleanup()}\if{html}{\out{
}} 188 | } 189 | 190 | \subsection{Returns}{ 191 | NULL (invisibly) 192 | } 193 | } 194 | \if{html}{\out{
}} 195 | \if{html}{\out{}} 196 | \if{latex}{\out{\hypertarget{method-DBISource-clone}{}}} 197 | \subsection{Method \code{clone()}}{ 198 | The objects of this class are cloneable with this method. 199 | \subsection{Usage}{ 200 | \if{html}{\out{
}}\preformatted{DBISource$clone(deep = FALSE)}\if{html}{\out{
}} 201 | } 202 | 203 | \subsection{Arguments}{ 204 | \if{html}{\out{
}} 205 | \describe{ 206 | \item{\code{deep}}{Whether to make a deep clone.} 207 | } 208 | \if{html}{\out{
}} 209 | } 210 | } 211 | } 212 | --------------------------------------------------------------------------------