├── .github
    └── workflows
    │   ├── build.yml
    │   ├── bump_beta_rc.yml
    │   └── tag_release.yml
├── .gitignore
├── LICENSE
├── README.md
├── jupylite_duckdb
    ├── __init__.py
    ├── _version.py
    ├── jdw.py
    └── jdw_magic.py
├── notebooks
    ├── example.ipynb
    ├── example_iris.ipynb
    └── example_python.ipynb
├── pyscript
    ├── pyscript_example.html
    └── pyscript_repl.html
├── setup.py
└── wasm_example.html


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
  1 | name: Build on Tag
  2 | # On Commit, uses a "dev" version and pushes to testpypi
  3 | # On Release created, uses the tag and stores assets in Release
  4 | # TODO: On Release created, push to PYPI
  5 | 
  6 | 
  7 | on:
  8 |   push:
  9 |     tags:
 10 |       - 'v*'
 11 | 
 12 |     #branches: ["main"]
 13 | 
 14 | # On a push, builds with a bumped release + dev suffix, and pushes to testpypi
 15 | # On a release, builds according to the tag (v0.1.1, or v0.1.1.rc1), pushes to pypi and stores assets
 16 | # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#release
 17 | 
 18 | jobs:
 19 |   build:
 20 |     runs-on: ubuntu-latest
 21 |     strategy:
 22 |       fail-fast: false
 23 |       matrix:
 24 |         python-version: ["3.9"]
 25 |     steps:
 26 |       - uses: actions/checkout@v3
 27 |         with: # needed for tags for dunamai
 28 |           fetch-depth: 0
 29 |       - name: Set up Python ${{ matrix.python-version }}
 30 |         uses: actions/setup-python@v3
 31 |         with:
 32 |           python-version: ${{ matrix.python-version }}
 33 |       - name: Install Python dependencies
 34 |         run: |
 35 |           pip install --upgrade pip
 36 |           pip install --user dunamai pytest
 37 |       - name: If push, bump dev version
 38 |         if: github.event_name == 'push' && ! startsWith(github.event.ref, 'refs/tags/v')
 39 |         run: |
 40 |           # Since this is a push, append dev0 to separate from a separate tag event
 41 |           export RDISTANCE=`dunamai from git --format "{distance}"`
 42 |           export RVERSION=$(dunamai from git --bump --no-metadata)
 43 | 
 44 |           if [ $RDISTANCE -eq 0 ] 
 45 |           then
 46 |             export RVERSION=$RVERSION.dev0
 47 |           fi
 48 | 
 49 |           echo "RVERSION=$RVERSION" >> $GITHUB_ENV
 50 |       - name: If a v_tag, unbumped version
 51 |         if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v')
 52 |         run: |
 53 |           echo ${{ github.event.ref }}
 54 |           git tag
 55 |           dunamai from git --debug
 56 |           echo "RVERSION=$(dunamai from git)" >> $GITHUB_ENV
 57 |       - name: Store additional env
 58 |         run: |
 59 |           echo "RCOMMIT=$(git rev-parse HEAD)" >> $GITHUB_ENV
 60 |           echo "RCOMMITSHORT=$(git rev-parse --short HEAD)T" >> $GITHUB_ENV
 61 |       - name: Create _version file
 62 |         run: |
 63 |           export VFILE=$(basename ${{ github.repository }})/_version.py
 64 |           echo __version__=\"${{ env.RVERSION }}\" > $VFILE
 65 |           echo __commit__=\"${{ env.RCOMMIT }}\" >> $VFILE
 66 |           echo __commit_short__=\"${{ env.RCOMMITSHORT }}\" >> $VFILE
 67 |           echo Debug Version path is $VFILE
 68 |           echo Debug Version content is
 69 |           cat $VFILE 
 70 |       - name: Install dependencies from Project
 71 |         run: |
 72 |           if [ -f requirements_dev.txt ]; then pip install --user -r requirements_dev.txt; fi
 73 |           pip install --user .
 74 |       - name: Ruff Check
 75 |         run: |
 76 |           pip install ruff
 77 |           ruff check $(basename ${{ github.repository }}) --config pyproject.toml
 78 |       - name: Test with pytest
 79 |         run: |
 80 |           pytest
 81 |       - name: Build
 82 |         run: |
 83 |           pip wheel --no-deps -w dist .
 84 |           zip wheels.zip dist/*
 85 |       - name: Debug Info
 86 |         run: |
 87 |           ls dist/*.whl
 88 |           ls -l
 89 |       - name: Always publish TestPYPI
 90 |         if: github.event_name == 'push'
 91 |         uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
 92 |         with:
 93 |           user: __token__
 94 |           password: ${{ secrets.TEST_PYPI_API_TOKEN }}
 95 |           repository_url: https://test.pypi.org/legacy/
 96 |       - name: Release Create Draft
 97 |         id: create_release
 98 |         if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v')
 99 |         uses: actions/create-release@v1
100 |         env:
101 |           GITHUB_TOKEN: ${{ secrets.PAT_JDW_GH }}
102 |         with:
103 |           tag_name: ${{ github.ref }}
104 |           release_name: Release ${{ github.ref }}
105 |           draft: true
106 |           prerelease: true
107 |       - name: Release Upload Assets
108 |         if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v')
109 |         uses: actions/upload-release-asset@v1
110 |         env:
111 |           GITHUB_TOKEN: ${{ secrets.PAT_AS_GH }}
112 |         with:
113 |           upload_url: ${{ steps.create_release.outputs.upload_url }}
114 |           asset_path: wheels.zip
115 |           asset_name: wheels.zip
116 |           asset_content_type: application/zip


--------------------------------------------------------------------------------
/.github/workflows/bump_beta_rc.yml:
--------------------------------------------------------------------------------
 1 | # This is a helper to create a release at the appropriate RC#. 
 2 | # You can also just tag the release manually
 3 | 
 4 | name: Bump Beta X.Y[b|rc|c]
 5 | 
 6 | # bumps base version (0.1.1 -> 0.1.2), and appends the next {stage}{revision}
 7 | # if no stage, then "b1" is used
 8 | # otherwise, {stage}{revision+1}
 9 | # Examples: 
10 | # v0.1.1 -> v0.1.2b1
11 | # v0.1.2b1 -> v0.1.2b2
12 | # v0.1.2rc1 -> v0.1.2rc3
13 | #
14 | # Important: The "v" prefix is mandatory if you manually tag.
15 | #
16 | # https://peps.python.org/pep-0440/
17 | # Convention: 
18 | # b for beta
19 | # rc for release candidate. We don't need to use this. 
20 | # c for correction: should be manually tagged post release. 
21 | 
22 | on:
23 |   workflow_dispatch:
24 | 
25 | jobs:
26 |   build:
27 |     runs-on: ubuntu-latest
28 |     strategy:
29 |       fail-fast: false
30 |       matrix:
31 |         python-version: ["3.9"]
32 |     steps:
33 |       - uses: actions/checkout@v3
34 |         with: 
35 |           fetch-depth: 0 # needed for tags for dunamai
36 |           token: ${{ secrets.PAT_JDW_GH }} # needed to create tag and trigger a new workflow
37 |       - name: Set up Python ${{ matrix.python-version }}
38 |         uses: actions/setup-python@v3 
39 |         with:
40 |           python-version: ${{ matrix.python-version }}
41 |       - name: Get RC Version
42 |         run: |
43 |         
44 |           pip install --upgrade pip
45 |           pip install --user dunamai
46 | 
47 |           export RVERSIONBASE=$(dunamai from git --format "{base}" --bump)
48 |           export RVERSIONSTAGE=$(dunamai from git --format "{stage}{revision}" --bump)
49 | 
50 |           if [ -z $RVERSIONBASE ]
51 |           then
52 |               echo First Release, setting 0.0.0
53 |               export RVERSIONBASE=0.0.0
54 |           fi
55 | 
56 |           if [ -z $RVERSIONSTAGE ]
57 |           then
58 |               echo First b1, setting b1
59 |               export RVERSIONSTAGE=b1
60 |           fi
61 |           
62 |           export RVERSION=${RVERSIONBASE}${RVERSIONSTAGE}
63 |           echo $RVERSION
64 | 
65 |           # store in GITHUB_ENV, to access. Prefix with v
66 |           echo "RVERSION=$RVERSION" >> $GITHUB_ENV
67 | 
68 |       - name: Create tag
69 |         run: |
70 |           git config user.name "Git Action"
71 |           git config user.email "gitaction@iqmo.com"
72 |           git tag -a v${{ env.RVERSION }} -m "Generated from GH Action"
73 |           git push origin v${{ env.RVERSION }}
74 |         env: 
75 |           GITHUB_TOKEN: ${{ secrets.PAT_JDW_GH }}


--------------------------------------------------------------------------------
/.github/workflows/tag_release.yml:
--------------------------------------------------------------------------------
 1 | # This is a helper to create a release at the appropriate RC#. 
 2 | # You can also just tag the release manually
 3 | 
 4 | name: Tag Release X.Y
 5 | 
 6 | on:
 7 |   workflow_dispatch:
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version: ["3.9"]
16 |     steps:
17 |       - uses: actions/checkout@v3
18 |         with: 
19 |           fetch-depth: 0 # needed for tags for dunamai
20 |           token: ${{ secrets.PAT_JDW_GH }} # needed to create tag and trigger a new workflow
21 |       - name: Set up Python ${{ matrix.python-version }}
22 |         uses: actions/setup-python@v3 
23 |         with:
24 |           python-version: ${{ matrix.python-version }}
25 |       - name: Get Version
26 |         run: |
27 |         
28 |           pip install --upgrade pip
29 |           pip install --user dunamai
30 | 
31 |           export RVERSION=$(dunamai from git --format "{base}")
32 |           echo Version: $RVERSION
33 | 
34 |           # store in GITHUB_ENV, to access. Prefix with v
35 |           echo "RVERSION=$RVERSION" >> $GITHUB_ENV
36 | 
37 |       - name: Create tag
38 |         run: |
39 |           git config user.name "Git Action"
40 |           git config user.email "gitaction@iqmo.com"
41 |           git tag -a v${{ env.RVERSION }} -m "Generated from GH Action"
42 |           git push origin v${{ env.RVERSION }}
43 |         env: 
44 |           GITHUB_TOKEN: ${{ secrets.PAT_JDW_GH }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | *.code-workspace
131 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2023, iqmo-org
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Experimental
 2 | This is experimental and unstable.
 3 | 
 4 | # Pyodide + DuckDB
 5 | 
 6 | This is a proof of concept at executing duckdb_wasm from a Pyodide kernel. This unlocks a few paths for using duckdb, such as PyScript & JupyterLite. 
 7 | 
 8 | ** The project should probably be called Pyoduckwasm or something like that... it started with JupyterLite as the end goal. 
 9 | 
10 | # Demonstration:
11 | - [Static PyScript Example](https://raw.githack.com/iqmo-org/jupylite_duckdb/main/pyscript/pyscript_example.html)
12 | - [PyScript REPL](https://raw.githack.com/iqmo-org/jupylite_duckdb/main/pyscript/pyscript_repl.html)
13 | - [pyodide console](https://pyodide.org/en/stable/console.html)
14 |     ```
15 |     import micropip;
16 |     await micropip.install('pandas');
17 |     await micropip.install('jupylite-duckdb');
18 |     import jupylite_duckdb as jd;
19 |     conn = await jd.connect();
20 |     r1 = await jd.query("pragma version", conn);
21 |     r2 = await jd.query("create or replace table xyz as select * from 'https://raw.githubusercontent.com/Teradata/kylo/master/samples/sample-data/parquet/userdata2.parquet'", conn);
22 |     r3 = await jd.query("select gender, count(*) as c from xyz group by gender", conn);
23 |     print(r1);
24 |     print(r2);
25 |     print(r3);
26 |     ```
27 | 
28 | - [JupyterLite](https://iqmo-org.github.io/jupyterlite_run/lab/index.html)
29 | - JupyterLite [Code Console REPL](https://iqmo-org.github.io/jupyterlite_run/repl/?kernel=python&code=print%28%22Installing%20packages%22%29%0A%25pip%20install%20jupylite-duckdb%20--pre%0A%25pip%20install%20plotly%0Aprint%28%22Creating%20DuckDB%20Instance%22%29%0Aimport%20jupylite_duckdb%20as%20duckdb%0Aawait%20duckdb.connect%28%29%0Aprint%28%22Printing%20DuckDB%20Version%22%29%0Adf%20%3D%20await%20duckdb.query%28%22pragma%20version%22%29%0Adisplay%28df%29%0A%0Aimport%20plotly.express%20as%20px%0Ar4%20%3D%20await%20duckdb.query%28%22select%20%2A%20from%20read_csv_auto%28%27https%3A%2F%2Fraw.githubusercontent.com%2Fmwaskom%2Fseaborn-data%2Fmaster%2Firis.csv%27%29%22%29%0Apx.scatter%28r4%2C%20x%3D%22sepal_length%22%2C%20y%3D%22petal_length%22%2C%20color%3D%22species%22%29%0A)
30 | 
31 | Note: reloading seems somewhat unreliable with pyodide. CTRL-F5 works more reliably. 
32 | 
33 | Limitations: 
34 | - API: duckdb.connect() and duckdb.query()
35 | - DataFrames are not (yet) registered in the DuckDB database.
36 | - Data is copied from the duckdb_wasm arrow result to a python list[dict], and then to a dataframe. PyArrow is not available (yet) in Pyodide.
37 | 
38 | # Observations:
39 | - It takes about a minute to run the JupyterLite examples. Most of this time is prior to any DuckDB stuff. Some of this time could be shaved off with a custom pyodide build, but PyScript is much faster.
40 | - JupyterLite was unreliable with page reloads, I ended up having to clear the cache a lot.
41 | - Not thrilled with PyScript removing the top level await... will probably just auto-wrap it (like ipython %autoawait)
42 | # Demonstration
43 | ## Code Console REPL Example
44 | 
45 | 
46 | ## jupyterlite_duckdb_wasm
47 | Python wrapper to run DuckDB_WASM within JupyterLite with a Pyodide Kernel
48 | See [notebooks](https://github.com/iqmo-org/jupylite_duckdb/tree/main/notebooks) for example of running this within [jupyterlite](https://jupyter.org/try-jupyter/lab/)
49 | 
50 | ## Cell Magic %%dql
51 | Following the example of [magic_duckdb](https://github.com/iqmo-org/magic_duckdb), there's an initial proof of concept for a duckdb for JupyterLite. 
52 | See [Magic Example](https://github.com/iqmo-org/jupylite_duckdb/blob/main/notebooks/examples_magics.ipynb)
53 | 
54 | ## Pyodide Console
55 | 
56 | [pyodide console](https://pyodide.org/en/stable/console.html)
57 | 
58 | ```
59 | import micropip;
60 | await micropip.install('pandas');
61 | await micropip.install('jupylite-duckdb');
62 | import jupylite_duckdb as jd;
63 | conn = await jd.connect();
64 | r1 = await jd.query("pragma version", conn);
65 | r2 = await jd.query("create or replace table xyz as select * from 'https://raw.githubusercontent.com/Teradata/kylo/master/samples/sample-data/parquet/userdata2.parquet'", conn);
66 | r3 = await jd.query("select gender, count(*) as c from xyz group by gender", conn);
67 | print(r1);
68 | print(r2);
69 | print(r3);
70 | ```
71 | 
72 | ## Various Issues, Todos and Ideas
73 | - Implement a proof of concept version of dataframe registration
74 | - Evaluate startup time reduction, perhaps custom pyodide build
75 | - Handling errors: detect and display errors in Jupyter: too much sfuff buried in console, such as CORS errors
76 | - invalidate pip browser cache (as/if needed); annoying for development purposes
77 | - think through async/await/transform_cell approach and whether there's a better solution.
78 | - Zero copy data exchange (js/duckdb arrow -> python/dataframe and python/df -> js/duckdb): Blocked by Pyarrow support
79 | - If you're adding local .py files, use [importlib.invalidate_caches()](https://pyodide.org/en/stable/usage/faq.html#why-can-t-i-import-a-file-i-just-wrote-to-the-file-system). Even then, it was flaky to import.
80 | - Careful with caching... %pip install will pull from browser cache. I had to clear frequently within dev tools
81 | - To clear local storage, which is annoyingly persistent, https://superuser.com/questions/519628/clear-html5-local-storage-on-a-specific-page
82 | - %autoawait is part of why this works in notebooks, which is enabled by default. The %%dql cell magic patches transform-cell to push an await into the cell transformation.: https://ipython.readthedocs.io/en/stable/interactive/autoawait.html
83 | 


--------------------------------------------------------------------------------
/jupylite_duckdb/__init__.py:
--------------------------------------------------------------------------------
1 | from jupylite_duckdb.jdw import connect, query  # type: ignore
2 | from jupylite_duckdb._version import __version__  # type: ignore


--------------------------------------------------------------------------------
/jupylite_duckdb/_version.py:
--------------------------------------------------------------------------------
1 | __version__="0.0.18a4"
2 | 


--------------------------------------------------------------------------------
/jupylite_duckdb/jdw.py:
--------------------------------------------------------------------------------
  1 | import js
  2 | from pandas import DataFrame
  3 | from typing import Optional
  4 | from js import globalThis
  5 | 
  6 | # TODO Laundry List
  7 | # - Pass arrow from duckdb_wasm to Python efficiently, altho dependent on pyodide pyarrow?
  8 | # - Register pandas dataframes with duckdb_wasm: maybe use get_table_names to determine
  9 | # what needs to be registered, then register
 10 | # - Update connect to take connect(file)
 11 | # - 
 12 | 
 13 | CONNECTION = None
 14 | 
 15 | DEBUG = False
 16 | 
 17 | async def future_to_df(result_promise):
 18 |     try:
 19 |         obj = await result_promise # <class 'pyodide.ffi.JsProxy'>
 20 |         a = obj.toArray()
 21 |         data = [dict(v) for v in a.object_values()] 
 22 | 
 23 |         df = DataFrame(data)
 24 |         return df
 25 |     except Exception as e:
 26 |         print(e)
 27 |         return None
 28 |     
 29 | async def query(sql: str, connection = None, return_future= False) -> DataFrame:
 30 |     """Executes query in a standalone connection"""
 31 |     if connection is None:
 32 |         connection = CONNECTION # if both are None, then a temp db & connection is used
 33 |     try:
 34 |         if connection is not None:
 35 |             result_fut = connection.query(sql)
 36 |         else:
 37 |             if DEBUG:
 38 |                 print("Creating a new connection to a temporary database...")
 39 |             js_function = js.Function('obj', '''
 40 |                 async function executeSqlDuckdb() {
 41 |                         let c = undefined
 42 |                         if(obj.connection == undefined) {
 43 |                             const duckdb = await import('https://cdn.skypack.dev/@duckdb/duckdb-wasm');
 44 |                             const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles();
 45 |                             const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
 46 | 
 47 |                             const worker_url = URL.createObjectURL(
 48 |                                 new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' })
 49 |                             );
 50 |                             const worker = new Worker(worker_url);
 51 |                             const logger = new duckdb.ConsoleLogger();
 52 |                             const db = new duckdb.AsyncDuckDB(logger, worker);
 53 |                             await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
 54 |                             await db.open({
 55 |                                 path: ':memory:',
 56 |                                 query: {
 57 |                                     castBigIntToDouble: true,
 58 |                                     castDecimalToDouble: true
 59 |                                 },
 60 |                             });
 61 |                             c = await db.connect();
 62 |                         }
 63 |                         else {
 64 |                             c = obj.connection
 65 |                         }
 66 | 
 67 |                         console.log('Running SQL: ', obj.sql)
 68 |                         const sql = obj.sql;
 69 |                         const result = await c.query(sql);
 70 |                         
 71 |                         console.log('Result:', result);
 72 |                         
 73 |                         return await result
 74 |                     }
 75 |                 return executeSqlDuckdb()
 76 |             ''')
 77 |             js_obj = js.Object() 
 78 |             js_obj.sql = sql
 79 |             js_obj.connection = connection
 80 | 
 81 |             result_fut = js_function(js_obj)    # <class 'pyodide.webloop.PyodideFuture'>
 82 | 
 83 |         if return_future: 
 84 |             return result_fut
 85 |         else:
 86 |             return await future_to_df(result_fut)
 87 |     except Exception as e:
 88 |         print(e)
 89 |         return None
 90 |     
 91 | 
 92 | async def connect() -> object:
 93 |     # Other approaches:
 94 |     # Store the function in globalThis, instead of creating a new function
 95 |     # on connect
 96 |     # then access via import GlobalThis; globalThis.connectDuckDb()
 97 |     #
 98 |     # //  globalThis.connectDuckDb = connectDuckDb;
 99 | 
100 |     js_function = js.Function('obj', '''
101 |         async function connectDuckdb() {
102 |                 console.log("Connecting to " + obj.connectionstr);
103 |                 const duckdb = await import('https://cdn.skypack.dev/@duckdb/duckdb-wasm');
104 |                 const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles();
105 |                 const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
106 | 
107 |                 const worker_url = URL.createObjectURL(
108 |                     new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' })
109 |                 );
110 |                 const worker = new Worker(worker_url);
111 |                 const logger = new duckdb.ConsoleLogger();
112 |                 const db = new duckdb.AsyncDuckDB(logger, worker);
113 |                 await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
114 |                 const c = await db.connect(obj.connectionstr);
115 | 
116 |                 console.log('Result:', c);
117 | 
118 |                 return c;
119 | 
120 |             }
121 |         return connectDuckdb()
122 |     ''')
123 | 
124 |     js_obj = js.Object()
125 |     js_obj.connectionstr = ":memory:"
126 | 
127 |     result_promise = js_function(js_obj)
128 | 
129 |     thisconnection = await result_promise
130 | 
131 |     global CONNECTION
132 |     CONNECTION = thisconnection
133 |     
134 |     return thisconnection
135 | 
136 | def register_iiafes():
137 |     syncWrapperConnect_js = js.Function('obj', '''
138 |         function syncWrapperConnect(obj) {
139 |             //delete globalThis.connection;
140 |             (async () => {
141 |                 async function executeSqlDuckdb() {
142 |                         console.log("Connecting to " + obj.connectionstr);
143 |                         const duckdb = await import('https://cdn.skypack.dev/@duckdb/duckdb-wasm');
144 |                         const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles();
145 |                         const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
146 | 
147 |                         const worker_url = URL.createObjectURL(
148 |                             new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' })
149 |                         );
150 |                         const worker = new Worker(worker_url);
151 |                         const logger = new duckdb.ConsoleLogger();
152 |                         const db = new duckdb.AsyncDuckDB(logger, worker);
153 |                         await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
154 |                         const c = await db.connect(obj.connectionstr);
155 | 
156 |                         console.log('Result:', c);
157 | 
158 |                         globalThis.connection=result;
159 |                         return result;
160 |                     }
161 |                 try {
162 |       const result = await executeSqlDuckdb();
163 |       // Process the result
164 |       console.log('Result:', result);
165 |       
166 |     } catch (error) {
167 |       // Handle errors
168 |       console.error('Error:', error);
169 |     }
170 |                 }
171 |                 
172 |             )();
173 |         }
174 |         globalThis.syncWrapperConnect=syncWrapperConnect
175 |     ''')
176 | 
177 |     syncWrapperConnect_js() # function stored on globalThis.syncWrapperConnect
178 | 
179 | 
180 |     
181 |     syncWrapper_query = js.Function('obj', '''
182 |         function syncWrapperQuery(obj) {
183 |             globalThis.result=null;
184 |             (async () => {
185 |                 async function executeSqlDuckdb() {
186 |                         //delete globalThis.result;
187 |                         if(obj.connection == undefined) {
188 |                             const duckdb = await import('https://cdn.skypack.dev/@duckdb/duckdb-wasm');
189 |                             const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles();
190 |                             const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
191 | 
192 |                             const worker_url = URL.createObjectURL(
193 |                                 new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' })
194 |                             );
195 |                             const worker = new Worker(worker_url);
196 |                             const logger = new duckdb.ConsoleLogger();
197 |                             const db = new duckdb.AsyncDuckDB(logger, worker);
198 |                             await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
199 |                             c = await db.connect();
200 |                         }
201 |                         else {
202 |                             c = obj.connection
203 |                         }
204 | 
205 |                         console.log('Running SQL: ', obj.sql)
206 |                         const sql = obj.sql;
207 |                         const result = await c.query(sql);
208 | 
209 |                         console.log('Result:', result);
210 |                         globalThis.objresult=result;
211 |                         return result;
212 |                     }
213 |                 try {
214 |       const result = await executeSqlDuckdb();
215 |       // Process the result
216 |       console.log('Result:', result);
217 |       
218 |     } catch (error) {
219 |       // Handle errors
220 |       console.error('Error:', error);
221 |     }
222 |                 }
223 |                 
224 |             )();
225 |         }
226 |         globalThis.syncWrapperQuery=syncWrapperQuery
227 |     ''')
228 |     syncWrapper_query() # function stored on globalThis.syncWrapperQuery
229 | 
230 | def connect_sync(connstr: str = ":memory:"):
231 | 
232 |     js_obj_conn = js.Object()
233 |     js_obj_conn.connectionstr = connstr
234 |     globalThis.syncWrapperConnect(js_obj_conn)
235 | 
236 | 
237 | def query_sync(sql: str) -> DataFrame:
238 |         
239 |     js_obj_q = js.Object()
240 |     js_obj_q.connection = globalThis.connection
241 |     js_obj_q.sql=sql
242 | 
243 |     globalThis.syncWrapperQuery(js_obj_q)
244 |     
245 |     a = globalThis.objresult.toArray()
246 |     data = [dict(v) for v in a.object_values()]
247 |     df = DataFrame(data)
248 |     
249 |     return df


--------------------------------------------------------------------------------
/jupylite_duckdb/jdw_magic.py:
--------------------------------------------------------------------------------
  1 | # Still a work in progress:
  2 | # The basic problem is properly handling the Pyodide Future. 
  3 | # Couldn't handle it in the usual ways (event loop), and simplest solution
  4 | # is to "await" the operation. 
  5 | # But, since Cell/Line magics don't support async, needed to come up with a way to await them.
  6 | # 
  7 | # The monkey patch approach patches the iPython line and cell transformers to rewrite the magics directly
  8 | # to the syntax we want: "await <function>(params)" 
  9 | # instead of its usual behavior which is to rewrite a cell magic to: get_ipython().run_<cell|line>_magic(line, cell)
 10 | #
 11 | # So, the cell/line magic here is only used for registration purposes... but is never called.
 12 | # 
 13 | # The last complexity is that there are three paths for rewriting:
 14 | # Cell Magics: Cells starting with %%magic
 15 | # Line Magics: lines starting with %magic
 16 | # Line Magics with Assignment: lines starting with xyz = %magic
 17 | #
 18 | # Only the first two cases are dealt with here. The assignment case is a bit weird. For now, %dql -o <xyz> instead of xyz = %dql.
 19 | # 
 20 | # ref: https://github.com/ipython/ipython/blob/main/IPython/core/interactiveshell.py
 21 | # https://github.com/ipython/ipython/blob/main/IPython/core/inputtransformer2.py
 22 | # 
 23 | import warnings
 24 | 
 25 | from IPython.core.magic import register_line_magic, register_cell_magic
 26 | from IPython.core import magic_arguments
 27 | from IPython.display import display
 28 | import ipywidgets as widgets
 29 | import asyncio
 30 | import jupylite_duckdb as jd
 31 | import functools
 32 | from IPython.core.getipython import get_ipython
 33 | 
 34 | import warnings
 35 | 
 36 | warnings.filterwarnings("ignore", category=DeprecationWarning)
 37 | 
 38 | DEBUG = True
 39 | async def display_result(result, output, outputvar = None):
 40 |     with output:
 41 |         try:
 42 |             if result is None:
 43 |                 display("Empty Result")
 44 |             else:
 45 |                 if DEBUG:
 46 |                     display(f"Output type: {type(result)}")
 47 |                 display(result)
 48 |                 if outputvar is not None: 
 49 |                     get_ipython().user_ns[outputvar] = result  # type: ignore
 50 |         except Exception as e:
 51 |             print(e)
 52 | 
 53 | @register_line_magic
 54 | @register_cell_magic
 55 | @magic_arguments.magic_arguments()
 56 | @magic_arguments.argument('-o', '--output', nargs=1, help="Output.", type=str)
 57 | @magic_arguments.argument('remainder', nargs='*', help='Everything else')
 58 | async def dql(line = "", cell = ""):
 59 |     outputvar = None
 60 |     if line:
 61 |         args = magic_arguments.parse_argstring(dql, line)
 62 |         if args.output:
 63 |             outputvar = args.output[0]
 64 |     
 65 |     if cell:
 66 |         query=cell
 67 |     else:
 68 |         query=" ".join(args.remainder)
 69 |     
 70 |     result = await jd.query(query)
 71 |     if outputvar:
 72 |         get_ipython().user_ns[outputvar] = result
 73 |         return None
 74 |     else:
 75 |         return result
 76 | 
 77 |             
 78 | def transform_dql_cell(orig_cell: str) -> str:
 79 |     # Use find_cell_magic because we don't know the namespace
 80 |     lines = orig_cell.split("\n")
 81 |     
 82 |     first_line=lines[0]
 83 |     first_line=first_line.replace("%%dql", "") #.replace("-o", "").strip()
 84 |     if len(lines)==1:
 85 |         rest = ""
 86 |     else:
 87 |         rest = "\\n".join(lines[1:])
 88 |         rest=rest.replace("'", "\\'")
 89 |     
 90 |     result= f"await get_ipython().find_cell_magic('dql')(line='{first_line}', cell='{rest}')"
 91 |     return result
 92 | 
 93 | def patch_transformer():
 94 |     
 95 |     shell = get_ipython()
 96 |     transformermanager = shell.input_transformer_manager
 97 |     
 98 |     if not hasattr(transformermanager, "_orig_transform_cell"):
 99 |         transformermanager._orig_transform_cell = transformermanager.transform_cell
100 |     
101 |     def jd_transform_cell(*args, **kwargs) -> bool:
102 |         orig_cell = args[0]
103 |         
104 |         if orig_cell.startswith("%%dql"):
105 |             return transform_dql_cell(orig_cell)
106 |         else:
107 |             result=get_ipython().input_transformer_manager._orig_transform_cell(*args, **kwargs)
108 |             if "%dql" in orig_cell:
109 |                 result = result.replace("get_ipython().run_line_magic('dql',", "await get_ipython().find_line_magic('dql')(line=")
110 |                 #print(result)
111 |             return result
112 |     
113 |     transformermanager.transform_cell = jd_transform_cell
114 | 
115 | def patch_should_run_async():
116 |     shell = get_ipython()
117 |     
118 |     if not hasattr(shell, "_orig_should_run_async"):
119 |         shell._orig_should_run_async = shell.should_run_async
120 |     
121 |     def jd_should_run_async(*args, **kwargs) -> bool:
122 |         orig_cell = args[0]
123 |         if not orig_cell.startswith("%%") and "%dql" in orig_cell:
124 |             return True
125 |         else:
126 |             return shell._orig_should_run_async(*args, **kwargs)
127 |         
128 |     shell.should_run_async = jd_should_run_async
129 |         
130 | patch_transformer()
131 | patch_should_run_async()


--------------------------------------------------------------------------------
/notebooks/example.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "# Install\n",
10 |     "%pip install pandas ipywidgets nbformat>=4.2.0\n",
11 |     "%pip install jupylite_duckdb --pre"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": null,
17 |    "metadata": {},
18 |    "outputs": [],
19 |    "source": [
20 |     "# Import and Load Magics\n",
21 |     "import pandas as pd\n",
22 |     "import jupylite_duckdb as duckdb\n",
23 |     "\n",
24 |     "# Import needed to load the magics: \n",
25 |     "from jupylite_duckdb import jdw_magic\n",
26 |     "\n",
27 |     "print(duckdb.__version__)\n",
28 |     "print(pd.__version__)\n",
29 |     "\n",
30 |     "# Connect. If you skip this step, a transient/temp DB is used for each cell magic.\n",
31 |     "await duckdb.connect()"
32 |    ]
33 |   },
34 |   {
35 |    "cell_type": "code",
36 |    "execution_count": null,
37 |    "metadata": {},
38 |    "outputs": [],
39 |    "source": [
40 |     "%%dql\n",
41 |     "-- Display duckdb version\n",
42 |     "pragma version\n"
43 |    ]
44 |   },
45 |   {
46 |    "cell_type": "code",
47 |    "execution_count": null,
48 |    "metadata": {},
49 |    "outputs": [],
50 |    "source": [
51 |     "%%dql -o abcdf\n",
52 |     "create or replace table xyz as select * from 'https://raw.githubusercontent.com/Teradata/kylo/master/samples/sample-data/parquet/userdata2.parquet';\n",
53 |     "select gender, count(*) from xyz group by gender"
54 |    ]
55 |   },
56 |   {
57 |    "cell_type": "code",
58 |    "execution_count": null,
59 |    "metadata": {},
60 |    "outputs": [],
61 |    "source": [
62 |     "%%dql -o abcdf\n",
63 |     "CREATE OR REPLACE TABLE abc\n",
64 |     "    as \n",
65 |     "SELECT *, v*x from range(10) t(v), range(5) s(x);\n",
66 |     "select * from abc;"
67 |    ]
68 |   },
69 |   {
70 |    "cell_type": "code",
71 |    "execution_count": null,
72 |    "metadata": {},
73 |    "outputs": [],
74 |    "source": [
75 |     "abcdf.describe()"
76 |    ]
77 |   }
78 |  ],
79 |  "metadata": {
80 |   "language_info": {
81 |    "name": "python"
82 |   },
83 |   "orig_nbformat": 4
84 |  },
85 |  "nbformat": 4,
86 |  "nbformat_minor": 2
87 | }
88 | 


--------------------------------------------------------------------------------
/notebooks/example_iris.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "Demonstrates XGBoost over Iris dataset using DuckDB and JupyterLite + Pyodide.\n",
  9 |     "\n",
 10 |     "The DuckDB usage in this example is minimal, it's just used to load the CSV, but imagine you had a more complex query & dataset.\n",
 11 |     "\n",
 12 |     "Note: In a Jupyter environment, you'd swap the jupylite_duckdb for duckdb"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "# Install\n",
 22 |     "%pip install pandas ipywidgets nbformat>=4.2.0\n",
 23 |     "%pip install jupylite_duckdb --pre"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# This is the only JupyterLite / Pyodide specific block.\n",
 33 |     "import jupylite_duckdb as jd\n",
 34 |     "\n",
 35 |     "# Connect (create an in-memory duckdb instance) \n",
 36 |     "conn = await jd.connect()\n",
 37 |     "\n",
 38 |     "# Get the duckdb version\n",
 39 |     "r1 = await jd.query(\"pragma version\", conn)\n",
 40 |     "display(r1)\n",
 41 |     "\n",
 42 |     "# Load the Iris dataset\n",
 43 |     "r4 = await jd.query(\"select * from read_csv_auto('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')\", conn)\n",
 44 |     "display(r4.describe())"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# Display input data\n",
 54 |     "\n",
 55 |     "import plotly.express as px\n",
 56 |     "px.scatter(r4, x=\"sepal_length\", y=\"petal_length\", color=\"species\")\n"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "# OneHotEncode\n",
 66 |     "from sklearn.preprocessing import OneHotEncoder\n",
 67 |     "from sklearn.compose import make_column_transformer\n",
 68 |     "import pandas as pd\n",
 69 |     "transformer = make_column_transformer(\n",
 70 |     "                # make_column_selector(dtype_exclude=numpy.number)\n",
 71 |     "                (OneHotEncoder(drop=\"first\"), [\"species\"]),\n",
 72 |     "                remainder=\"passthrough\",\n",
 73 |     "            )  # type: ignore\n",
 74 |     "\n",
 75 |     "transformed = transformer.fit_transform(r4)\n",
 76 |     "r4_encoded = pd.DataFrame(\n",
 77 |     "    transformed, columns=transformer.get_feature_names_out()\n",
 78 |     ")\n",
 79 |     "\n",
 80 |     "r4_encoded=r4_encoded.rename(columns={col: col.replace(\"remainder__\", \"\") for col in r4_encoded if col.startswith(\"remainder__\")})"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "# Train / Test Split\n",
 90 |     "from sklearn.model_selection import train_test_split\n",
 91 |     "\n",
 92 |     "train, _test = train_test_split(r4_encoded, test_size=0.2)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "# X / y\n",
102 |     "y_col = \"sepal_length\"\n",
103 |     "x_cols = [col for col in train if col != y_col]\n",
104 |     "\n",
105 |     "train_X=train[x_cols]\n",
106 |     "train_y=train[[y_col]]"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "import xgboost as xgb\n",
116 |     "from sklearn.metrics import mean_squared_error\n",
117 |     "\n",
118 |     "# Create an XGBoost regressor\n",
119 |     "xgb_reg = xgb.XGBRegressor(objective='reg:squarederror')\n",
120 |     "\n",
121 |     "# Fit the model on the training data\n",
122 |     "xgb_reg.fit(train_X, train_y)\n",
123 |     "\n",
124 |     "# Make predictions on the training data\n",
125 |     "train_preds = xgb_reg.predict(train_X)"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "# Predict over Train\n",
135 |     "train_preds = xgb_reg.predict(train_X)\n",
136 |     "train[\"prediction\"] = train_preds"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "# Predict over Test\n",
146 |     "test = _test\n",
147 |     "test_X=test[x_cols]\n",
148 |     "test_y=test[[y_col]]\n",
149 |     "\n",
150 |     "test_preds=xgb_reg.predict(test_X)\n",
151 |     "test[\"prediction\"] = test_preds"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "# Calculate the mean squared error on the training data\n",
161 |     "mse = mean_squared_error(train_y, train_preds)\n",
162 |     "print(\"Training MSE:\", mse)\n",
163 |     "\n",
164 |     "# Calculate the mean squared error on the training data\n",
165 |     "mse = mean_squared_error(test_y, test_preds)\n",
166 |     "print(\"Test MSE:\", mse)"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "# Show the Test Fit vs Training Fit\n",
176 |     "\n",
177 |     "import plotly.graph_objs as go\n",
178 |     "combined_data = pd.concat([train, test], ignore_index=True)\n",
179 |     "combined_data[\"dataset\"] = [\"train\"] * len(train) + [\"test\"] * len(test)\n",
180 |     "\n",
181 |     "# Create a scatter plot of the actual and predicted values\n",
182 |     "trace1 = go.Scatter(\n",
183 |     "    x=train[y_col],\n",
184 |     "    y=train[\"prediction\"],\n",
185 |     "    mode=\"markers\",\n",
186 |     "    name=\"Train\"\n",
187 |     ")\n",
188 |     "\n",
189 |     "trace2 = go.Scatter(\n",
190 |     "    x=test[y_col],\n",
191 |     "    y=test[\"prediction\"],\n",
192 |     "    mode=\"markers\",\n",
193 |     "    name=\"Test\"\n",
194 |     ")\n",
195 |     "\n",
196 |     "layout = go.Layout(\n",
197 |     "    title=\"Accuracy against Train vs Test\",\n",
198 |     "    xaxis=dict(title=y_col),\n",
199 |     "    yaxis=dict(title=\"Prediction\")\n",
200 |     ")\n",
201 |     "\n",
202 |     "fig = go.Figure(data=[trace1, trace2], layout=layout)\n",
203 |     "fig.show()\n"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": []
210 |   }
211 |  ],
212 |  "metadata": {
213 |   "kernelspec": {
214 |    "display_name": "iqmo2",
215 |    "language": "python",
216 |    "name": "python3"
217 |   },
218 |   "language_info": {
219 |    "codemirror_mode": {
220 |     "name": "ipython",
221 |     "version": 3
222 |    },
223 |    "file_extension": ".py",
224 |    "mimetype": "text/x-python",
225 |    "name": "python",
226 |    "nbconvert_exporter": "python",
227 |    "pygments_lexer": "ipython3",
228 |    "version": "3.9.16"
229 |   },
230 |   "orig_nbformat": 4
231 |  },
232 |  "nbformat": 4,
233 |  "nbformat_minor": 2
234 | }
235 | 


--------------------------------------------------------------------------------
/notebooks/example_python.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "# Useful tools for local development\n",
10 |     "# %load_ext autoreload\n",
11 |     "# %autoreload 2\n",
12 |     "\n",
13 |     "%pip install pandas\n",
14 |     "%pip install jupylite-duckdb\n",
15 |     "\n",
16 |     "# Alternative install commands:\n",
17 |     "#\n",
18 |     "# import micropip\n",
19 |     "# await micropip.install(['pandas'])\n",
20 |     "#\n",
21 |     "# import piplite \n",
22 |     "#await piplite.install([\"pandas\"])\n"
23 |    ]
24 |   },
25 |   {
26 |    "cell_type": "code",
27 |    "execution_count": null,
28 |    "metadata": {},
29 |    "outputs": [],
30 |    "source": [
31 |     "import jupylite_duckdb as duckdb"
32 |    ]
33 |   },
34 |   {
35 |    "cell_type": "code",
36 |    "execution_count": null,
37 |    "metadata": {},
38 |    "outputs": [],
39 |    "source": [
40 |     "# Jupyter has \"autoawait\" enabled by default\n",
41 |     "# Which allows these top level \"awaits\"\n",
42 |     "# which are needed because the Javascript actions behind this are asynchronous\n",
43 |     "\n",
44 |     "conn = await duckdb.connect()\n",
45 |     "r1 = await duckdb.query(\"pragma version\")\n",
46 |     "r2 = await duckdb.query(\"create or replace table xyz as select * from 'https://raw.githubusercontent.com/Teradata/kylo/master/samples/sample-data/parquet/userdata2.parquet'\")\n",
47 |     "r3 = await duckdb.query(\"select gender, count(*) as c from xyz group by gender\")\n",
48 |     "\n",
49 |     "display(r1)\n",
50 |     "display(r2)\n",
51 |     "display(r3)"
52 |    ]
53 |   },
54 |   {
55 |    "cell_type": "markdown",
56 |    "metadata": {},
57 |    "source": []
58 |   }
59 |  ],
60 |  "metadata": {
61 |   "kernelspec": {
62 |    "display_name": "iqmo2",
63 |    "language": "python",
64 |    "name": "python3"
65 |   },
66 |   "language_info": {
67 |    "codemirror_mode": {
68 |     "name": "ipython",
69 |     "version": 3
70 |    },
71 |    "file_extension": ".py",
72 |    "mimetype": "text/x-python",
73 |    "name": "python",
74 |    "nbconvert_exporter": "python",
75 |    "pygments_lexer": "ipython3",
76 |    "version": "3.9.16"
77 |   },
78 |   "orig_nbformat": 4
79 |  },
80 |  "nbformat": 4,
81 |  "nbformat_minor": 2
82 | }
83 | 


--------------------------------------------------------------------------------
/pyscript/pyscript_example.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <html>
 3 |     <head>
 4 |         <title>DuckDB Example</title>
 5 |         <meta charset="iso-8859-1" />
 6 |         <link rel="icon" type="image/x-icon" href="./favicon.png" />
 7 | 
 8 |         <script type="text/javascript">
 9 |             Bokeh.set_log_level("info");
10 |         </script>
11 |         <link
12 |             rel="stylesheet"
13 |             href="https://pyscript.net/latest/pyscript.css"
14 |         />
15 | 
16 |         <script defer src="https://pyscript.net/latest/pyscript.js"></script>
17 |         <link rel="stylesheet" href="https://pyscript.net/examples/assets/css/examples.css" />
18 |     </head>
19 |     <body>
20 |         <nav class="navbar" style="background-color: #000000">
21 |             <div class="app-header">
22 |                 
23 |                 <a class="title" href="" style="color: #f0ab3c"
24 |                     >DuckDB + Pyodide + PyScript</a
25 |                 >
26 |             </div>
27 |         </nav>
28 |         <py-tutor>
29 |             <section class="pyscript">
30 |                 <div id="myplot"></div>
31 | 
32 |                 <py-config>
33 |                     packages = [
34 |                       "jupylite_duckdb==0.0.18a3"
35 |                       
36 |                     ]
37 |                     
38 |                 </py-config>
39 | 
40 |                 <py-script id="main">
41 |                     import asyncio
42 |                     import jupylite_duckdb as duckdb
43 | 
44 |                     # Wrap top level function in an async, so we can 
45 |                     # await the async js functions
46 |                     async def duckdb_demo():
47 |                         print("Loading duckdb wasm and connecting to the database")
48 |                         await duckdb.connect()
49 |                         print("Executing query to get DuckDB Version")
50 |                         df = await duckdb.query("pragma version")
51 |                         print(df)
52 |                         
53 |                         
54 |                         query2 = "select * from read_csv_auto('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')"
55 |                         df2 = await duckdb.query(query2)
56 |                         print(df2.describe())
57 |                     
58 |                     # Run
59 |                     print("Pyodide is loaded, Modules installed")
60 |                     asyncio.ensure_future(duckdb_demo())
61 |                 </py-script>
62 |             </section>
63 |         </py-tutor>
64 |     </body>
65 | </html>
66 | 


--------------------------------------------------------------------------------
/pyscript/pyscript_repl.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <html>
 3 |     <head>
 4 |         <title>DuckDB Example</title>
 5 |         <meta charset="iso-8859-1" />
 6 |         <link rel="icon" type="image/x-icon" href="./favicon.png" />
 7 | 
 8 |         <script type="text/javascript">
 9 |             Bokeh.set_log_level("info");
10 |         </script>
11 |         <link
12 |             rel="stylesheet"
13 |             href="https://pyscript.net/latest/pyscript.css"
14 |         />
15 | 
16 |         <script defer src="https://pyscript.net/latest/pyscript.js"></script>
17 |         <link rel="stylesheet" href="https://pyscript.net/examples/assets/css/examples.css" />
18 |     </head>
19 |     <body>
20 |         <nav class="navbar" style="background-color: #000000">
21 |             <div class="app-header">
22 |                 
23 |                 <a class="title" href="" style="color: #f0ab3c"
24 |                     >DuckDB + Pyodide + PyScript</a
25 |                 >
26 |             </div>
27 |         </nav>
28 |         <py-tutor>
29 |             <section class="pyscript">
30 | 
31 |                 <h1 class="font-semibold text-2xl ml-5">Custom REPL</h1>
32 |                 <py-tutor>
33 |                     <py-config>
34 |                         packages = [
35 |                         "bokeh",
36 |                         "numpy",
37 |                         "jupylite_duckdb==0.0.18a3"
38 |                         ]
39 |                         plugins = [
40 |                         "https://pyscript.net/latest/plugins/python/py_tutor.py"
41 |                         ]
42 | 
43 |                         [[fetch]]
44 |                         files = ["https://pyscript.net/examples/utils.py", "https://pyscript.net/examples/antigravity.py"]
45 |                     </py-config>
46 |                     <div style="margin-right: 3rem">
47 |                         <py-repl id="my-repl" auto-generate="true">
48 |                             import asyncio
49 |                             import jupylite_duckdb as duckdb
50 |                            
51 |                             async def duckdb_demo():
52 |                                 ##############################
53 |                                 # Ignore the async stuff above and below. Just write your code here.
54 | 
55 |                                 query = """
56 |                                     create table iris as select * from read_csv_auto('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv');
57 |                                     select * from iris;
58 |                                     """
59 | 
60 |                                 print("Loading duckdb wasm and connecting to the database")
61 |                                 await duckdb.connect()
62 |                                 print("Executing query to get DuckDB Version")
63 |                                 df = await duckdb.query("pragma version")
64 |                                 print(df)
65 |                                 
66 |                                 df2 = await duckdb.query(query)
67 |                                 print(df2.describe())
68 |                                 display(df2.head(3))
69 |                                 ##############################################################################################################################
70 |                             # Run
71 |                             print("Pyodide is loaded, Modules installed")
72 |                             asyncio.ensure_future(duckdb_demo())
73 |                         </py-repl>
74 |                         <div id="output" class="p-4">Shift-Enter to Execute. First execution has a slight delay / hang</div>
75 |                     </div>
76 |                 </py-tutor>
77 |             </section>
78 |         </py-tutor>
79 |     </body>
80 | </html>
81 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from codecs import open
 3 | from os import path
 4 | 
 5 | here = path.abspath(path.dirname(__file__))
 6 | 
 7 | with open(path.join(here, "README.md"), encoding="utf-8") as f:
 8 |     long_description = f.read()
 9 | 
10 | with open("jupylite_duckdb/_version.py", "r") as file:
11 |     code = file.read()
12 |     exec(code)
13 |     _version = __version__  # type: ignore # noqa
14 | 
15 | setup(
16 |     name="jupylite_duckdb",
17 |     version=_version,  # type: ignore # noqa
18 |     description="Testing",
19 |     long_description=long_description,
20 |     long_description_content_type="text/markdown",
21 |     url="https://github.com/iqmo-org/",
22 |     author="iqmo",
23 |     author_email="info@iqmo.com",
24 |     classifiers=[],
25 |     keywords="jupyterlite duckdb wasm",
26 |     packages=find_packages(exclude=["tests"]),
27 |     include_package_data=True,
28 |     install_requires=["pandas"]  # , "nbformat>=4.2.0", "ipywidgets"
29 | )
30 | 


--------------------------------------------------------------------------------
/wasm_example.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <title>SQL Query Executor</title>
  6 |     <style>
  7 |         table {
  8 |             border-collapse: collapse;
  9 |             width: 100%;
 10 |         }
 11 |         th,
 12 |         td {
 13 |             border: 1px solid #ddd;
 14 |             padding: 8px;
 15 |             text-align: left;
 16 |         }
 17 | 
 18 |         th {
 19 |             background-color: #f2f2f2;
 20 |             font-weight: bold;
 21 |         }
 22 |     </style>
 23 |     <script>
 24 |         // Populate SQL text with sql parameter
 25 |         const getQueryParam = (param) => new URLSearchParams(window.location.search).get(param);
 26 |     
 27 |         document.addEventListener('DOMContentLoaded', () => {
 28 |           const sql = getQueryParam('sql') || 'PRAGMA version';
 29 |           document.getElementById('sql').value = sql;
 30 |           const form = document.getElementById('queryForm');
 31 |         });
 32 |     </script>
 33 | </head>
 34 | <body>
 35 |     <h1>DuckDB WASM Example</h1>
 36 |     <form id="queryForm">
 37 |         <label for="sql">Enter SQL statement(s):</label>
 38 |         <textarea id="sql" rows="4" cols="50">PRAGMA version</textarea>
 39 |         <button type="submit">Execute</button>
 40 |     </form>
 41 |     <div id="rc"></div>
 42 | 
 43 |     <script src="https://cdn.jsdelivr.net/npm/apache-arrow@11.0.0/Arrow.es2015.min.js"></script>
 44 |     <script>
 45 |         document.getElementById('queryForm').addEventListener('submit', async (e) => {
 46 |             e.preventDefault();
 47 | 
 48 |             const sql = document.getElementById('sql').value;
 49 |             const rc = document.getElementById('rc');
 50 |             rc.innerHTML = '';
 51 | 
 52 |             if (!sql) {
 53 |                 alert('Please enter an SQL query.');
 54 |                 return;
 55 |             }
 56 | 
 57 |             try {
 58 |                 const arrowTable = await executeQuery(sql);
 59 |                 const htmlTable = arrowTableToHtmlTable(arrowTable);
 60 |                 rc.appendChild(htmlTable);
 61 |             } catch (error) {
 62 |                 alert('Error:' + error.message);
 63 |             }
 64 |         });
 65 | 
 66 |         async function executeQuery(sql) {
 67 |             if (typeof globalThis.c !== 'undefined' && globalThis.c !== null) {
 68 |                 console.log("Using existing connection")
 69 |                 c = globalThis.c
 70 |             } 
 71 |             else
 72 |             {
 73 |                 console.log("Initializing and connecting to a new db")
 74 |                 const duckdb = await import('https://cdn.skypack.dev/@duckdb/duckdb-wasm');
 75 |                 const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles();
 76 |                 const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
 77 | 
 78 |                 const worker_url = URL.createObjectURL(
 79 |                     new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' })
 80 |                 );
 81 |                 const worker = new Worker(worker_url);
 82 |                 const logger = new duckdb.ConsoleLogger();
 83 |                 const db = new duckdb.AsyncDuckDB(logger, worker);
 84 |                 await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
 85 |                 await db.open({
 86 |                     path: ':memory:',
 87 |                     query: {
 88 |                         castBigIntToDouble: true,
 89 |                         castDecimalToDouble: true
 90 |                     },
 91 |                 });
 92 |                 c = await db.connect();
 93 |                 globalThis.c = c
 94 |             }
 95 | 
 96 |             console.log('Running SQL: ', sql)
 97 |             const result = await c.query(sql);
 98 |             globalThis.result=result
 99 |             console.log('Result:', result);
100 | 
101 |             return await result
102 |         }
103 | 
104 |         function arrowTableToHtmlTable(table) {
105 |             const rows = table.toArray();
106 |             const tableEl = document.createElement('table');
107 |             const theadEl = document.createElement('thead');
108 |             const headerRowEl = document.createElement('tr');
109 |             for (const name of Object.keys(rows[0])) {
110 |                 const thEl = document.createElement('th');
111 |                 thEl.textContent = name;
112 |                 headerRowEl.appendChild(thEl);
113 |             }
114 |             theadEl.appendChild(headerRowEl);
115 |             tableEl.appendChild(theadEl);
116 | 
117 |             const tbodyEl = document.createElement('tbody');
118 |             for (const row of rows) {
119 |                 const rowEl = document.createElement('tr');
120 |                 for (const value of Object.values(row)) {
121 |                 const tdEl = document.createElement('td');
122 |                 tdEl.textContent = value;
123 |                 rowEl.appendChild(tdEl);
124 |                 }
125 |                 tbodyEl.appendChild(rowEl);
126 |             }
127 |             tableEl.appendChild(tbodyEl);
128 | 
129 |             return tableEl;
130 |             }
131 | 
132 |     </script>
133 | </body>
134 | 
135 | </html>


--------------------------------------------------------------------------------