├── .github └── workflows │ ├── build.yml │ ├── bump_beta_rc.yml │ └── tag_release.yml ├── .gitignore ├── LICENSE ├── README.md ├── jupylite_duckdb ├── __init__.py ├── _version.py ├── jdw.py └── jdw_magic.py ├── notebooks ├── example.ipynb ├── example_iris.ipynb └── example_python.ipynb ├── pyscript ├── pyscript_example.html └── pyscript_repl.html ├── setup.py └── wasm_example.html /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build on Tag 2 | # On Commit, uses a "dev" version and pushes to testpypi 3 | # On Release created, uses the tag and stores assets in Release 4 | # TODO: On Release created, push to PYPI 5 | 6 | 7 | on: 8 | push: 9 | tags: 10 | - 'v*' 11 | 12 | #branches: ["main"] 13 | 14 | # On a push, builds with a bumped release + dev suffix, and pushes to testpypi 15 | # On a release, builds according to the tag (v0.1.1, or v0.1.1.rc1), pushes to pypi and stores assets 16 | # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#release 17 | 18 | jobs: 19 | build: 20 | runs-on: ubuntu-latest 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | python-version: ["3.9"] 25 | steps: 26 | - uses: actions/checkout@v3 27 | with: # needed for tags for dunamai 28 | fetch-depth: 0 29 | - name: Set up Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v3 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install Python dependencies 34 | run: | 35 | pip install --upgrade pip 36 | pip install --user dunamai pytest 37 | - name: If push, bump dev version 38 | if: github.event_name == 'push' && ! startsWith(github.event.ref, 'refs/tags/v') 39 | run: | 40 | # Since this is a push, append dev0 to separate from a separate tag event 41 | export RDISTANCE=`dunamai from git --format "{distance}"` 42 | export RVERSION=$(dunamai from git --bump --no-metadata) 43 | 44 | if [ $RDISTANCE -eq 0 ] 45 | then 46 | export RVERSION=$RVERSION.dev0 47 | fi 48 | 49 | echo "RVERSION=$RVERSION" >> $GITHUB_ENV 50 | - name: If a v_tag, unbumped version 51 | if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') 52 | run: | 53 | echo ${{ github.event.ref }} 54 | git tag 55 | dunamai from git --debug 56 | echo "RVERSION=$(dunamai from git)" >> $GITHUB_ENV 57 | - name: Store additional env 58 | run: | 59 | echo "RCOMMIT=$(git rev-parse HEAD)" >> $GITHUB_ENV 60 | echo "RCOMMITSHORT=$(git rev-parse --short HEAD)T" >> $GITHUB_ENV 61 | - name: Create _version file 62 | run: | 63 | export VFILE=$(basename ${{ github.repository }})/_version.py 64 | echo __version__=\"${{ env.RVERSION }}\" > $VFILE 65 | echo __commit__=\"${{ env.RCOMMIT }}\" >> $VFILE 66 | echo __commit_short__=\"${{ env.RCOMMITSHORT }}\" >> $VFILE 67 | echo Debug Version path is $VFILE 68 | echo Debug Version content is 69 | cat $VFILE 70 | - name: Install dependencies from Project 71 | run: | 72 | if [ -f requirements_dev.txt ]; then pip install --user -r requirements_dev.txt; fi 73 | pip install --user . 74 | - name: Ruff Check 75 | run: | 76 | pip install ruff 77 | ruff check $(basename ${{ github.repository }}) --config pyproject.toml 78 | - name: Test with pytest 79 | run: | 80 | pytest 81 | - name: Build 82 | run: | 83 | pip wheel --no-deps -w dist . 84 | zip wheels.zip dist/* 85 | - name: Debug Info 86 | run: | 87 | ls dist/*.whl 88 | ls -l 89 | - name: Always publish TestPYPI 90 | if: github.event_name == 'push' 91 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 92 | with: 93 | user: __token__ 94 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 95 | repository_url: https://test.pypi.org/legacy/ 96 | - name: Release Create Draft 97 | id: create_release 98 | if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') 99 | uses: actions/create-release@v1 100 | env: 101 | GITHUB_TOKEN: ${{ secrets.PAT_JDW_GH }} 102 | with: 103 | tag_name: ${{ github.ref }} 104 | release_name: Release ${{ github.ref }} 105 | draft: true 106 | prerelease: true 107 | - name: Release Upload Assets 108 | if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') 109 | uses: actions/upload-release-asset@v1 110 | env: 111 | GITHUB_TOKEN: ${{ secrets.PAT_AS_GH }} 112 | with: 113 | upload_url: ${{ steps.create_release.outputs.upload_url }} 114 | asset_path: wheels.zip 115 | asset_name: wheels.zip 116 | asset_content_type: application/zip -------------------------------------------------------------------------------- /.github/workflows/bump_beta_rc.yml: -------------------------------------------------------------------------------- 1 | # This is a helper to create a release at the appropriate RC#. 2 | # You can also just tag the release manually 3 | 4 | name: Bump Beta X.Y[b|rc|c] 5 | 6 | # bumps base version (0.1.1 -> 0.1.2), and appends the next {stage}{revision} 7 | # if no stage, then "b1" is used 8 | # otherwise, {stage}{revision+1} 9 | # Examples: 10 | # v0.1.1 -> v0.1.2b1 11 | # v0.1.2b1 -> v0.1.2b2 12 | # v0.1.2rc1 -> v0.1.2rc3 13 | # 14 | # Important: The "v" prefix is mandatory if you manually tag. 15 | # 16 | # https://peps.python.org/pep-0440/ 17 | # Convention: 18 | # b for beta 19 | # rc for release candidate. We don't need to use this. 20 | # c for correction: should be manually tagged post release. 21 | 22 | on: 23 | workflow_dispatch: 24 | 25 | jobs: 26 | build: 27 | runs-on: ubuntu-latest 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | python-version: ["3.9"] 32 | steps: 33 | - uses: actions/checkout@v3 34 | with: 35 | fetch-depth: 0 # needed for tags for dunamai 36 | token: ${{ secrets.PAT_JDW_GH }} # needed to create tag and trigger a new workflow 37 | - name: Set up Python ${{ matrix.python-version }} 38 | uses: actions/setup-python@v3 39 | with: 40 | python-version: ${{ matrix.python-version }} 41 | - name: Get RC Version 42 | run: | 43 | 44 | pip install --upgrade pip 45 | pip install --user dunamai 46 | 47 | export RVERSIONBASE=$(dunamai from git --format "{base}" --bump) 48 | export RVERSIONSTAGE=$(dunamai from git --format "{stage}{revision}" --bump) 49 | 50 | if [ -z $RVERSIONBASE ] 51 | then 52 | echo First Release, setting 0.0.0 53 | export RVERSIONBASE=0.0.0 54 | fi 55 | 56 | if [ -z $RVERSIONSTAGE ] 57 | then 58 | echo First b1, setting b1 59 | export RVERSIONSTAGE=b1 60 | fi 61 | 62 | export RVERSION=${RVERSIONBASE}${RVERSIONSTAGE} 63 | echo $RVERSION 64 | 65 | # store in GITHUB_ENV, to access. Prefix with v 66 | echo "RVERSION=$RVERSION" >> $GITHUB_ENV 67 | 68 | - name: Create tag 69 | run: | 70 | git config user.name "Git Action" 71 | git config user.email "gitaction@iqmo.com" 72 | git tag -a v${{ env.RVERSION }} -m "Generated from GH Action" 73 | git push origin v${{ env.RVERSION }} 74 | env: 75 | GITHUB_TOKEN: ${{ secrets.PAT_JDW_GH }} -------------------------------------------------------------------------------- /.github/workflows/tag_release.yml: -------------------------------------------------------------------------------- 1 | # This is a helper to create a release at the appropriate RC#. 2 | # You can also just tag the release manually 3 | 4 | name: Tag Release X.Y 5 | 6 | on: 7 | workflow_dispatch: 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: ["3.9"] 16 | steps: 17 | - uses: actions/checkout@v3 18 | with: 19 | fetch-depth: 0 # needed for tags for dunamai 20 | token: ${{ secrets.PAT_JDW_GH }} # needed to create tag and trigger a new workflow 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v3 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Get Version 26 | run: | 27 | 28 | pip install --upgrade pip 29 | pip install --user dunamai 30 | 31 | export RVERSION=$(dunamai from git --format "{base}") 32 | echo Version: $RVERSION 33 | 34 | # store in GITHUB_ENV, to access. Prefix with v 35 | echo "RVERSION=$RVERSION" >> $GITHUB_ENV 36 | 37 | - name: Create tag 38 | run: | 39 | git config user.name "Git Action" 40 | git config user.email "gitaction@iqmo.com" 41 | git tag -a v${{ env.RVERSION }} -m "Generated from GH Action" 42 | git push origin v${{ env.RVERSION }} 43 | env: 44 | GITHUB_TOKEN: ${{ secrets.PAT_JDW_GH }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | *.code-workspace 131 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, iqmo-org 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Experimental 2 | This is experimental and unstable. 3 | 4 | # Pyodide + DuckDB 5 | 6 | This is a proof of concept at executing duckdb_wasm from a Pyodide kernel. This unlocks a few paths for using duckdb, such as PyScript & JupyterLite. 7 | 8 | ** The project should probably be called Pyoduckwasm or something like that... it started with JupyterLite as the end goal. 9 | 10 | # Demonstration: 11 | - [Static PyScript Example](https://raw.githack.com/iqmo-org/jupylite_duckdb/main/pyscript/pyscript_example.html) 12 | - [PyScript REPL](https://raw.githack.com/iqmo-org/jupylite_duckdb/main/pyscript/pyscript_repl.html) 13 | - [pyodide console](https://pyodide.org/en/stable/console.html) 14 | ``` 15 | import micropip; 16 | await micropip.install('pandas'); 17 | await micropip.install('jupylite-duckdb'); 18 | import jupylite_duckdb as jd; 19 | conn = await jd.connect(); 20 | r1 = await jd.query("pragma version", conn); 21 | r2 = await jd.query("create or replace table xyz as select * from 'https://raw.githubusercontent.com/Teradata/kylo/master/samples/sample-data/parquet/userdata2.parquet'", conn); 22 | r3 = await jd.query("select gender, count(*) as c from xyz group by gender", conn); 23 | print(r1); 24 | print(r2); 25 | print(r3); 26 | ``` 27 | 28 | - [JupyterLite](https://iqmo-org.github.io/jupyterlite_run/lab/index.html) 29 | - JupyterLite [Code Console REPL](https://iqmo-org.github.io/jupyterlite_run/repl/?kernel=python&code=print%28%22Installing%20packages%22%29%0A%25pip%20install%20jupylite-duckdb%20--pre%0A%25pip%20install%20plotly%0Aprint%28%22Creating%20DuckDB%20Instance%22%29%0Aimport%20jupylite_duckdb%20as%20duckdb%0Aawait%20duckdb.connect%28%29%0Aprint%28%22Printing%20DuckDB%20Version%22%29%0Adf%20%3D%20await%20duckdb.query%28%22pragma%20version%22%29%0Adisplay%28df%29%0A%0Aimport%20plotly.express%20as%20px%0Ar4%20%3D%20await%20duckdb.query%28%22select%20%2A%20from%20read_csv_auto%28%27https%3A%2F%2Fraw.githubusercontent.com%2Fmwaskom%2Fseaborn-data%2Fmaster%2Firis.csv%27%29%22%29%0Apx.scatter%28r4%2C%20x%3D%22sepal_length%22%2C%20y%3D%22petal_length%22%2C%20color%3D%22species%22%29%0A) 30 | 31 | Note: reloading seems somewhat unreliable with pyodide. CTRL-F5 works more reliably. 32 | 33 | Limitations: 34 | - API: duckdb.connect() and duckdb.query() 35 | - DataFrames are not (yet) registered in the DuckDB database. 36 | - Data is copied from the duckdb_wasm arrow result to a python list[dict], and then to a dataframe. PyArrow is not available (yet) in Pyodide. 37 | 38 | # Observations: 39 | - It takes about a minute to run the JupyterLite examples. Most of this time is prior to any DuckDB stuff. Some of this time could be shaved off with a custom pyodide build, but PyScript is much faster. 40 | - JupyterLite was unreliable with page reloads, I ended up having to clear the cache a lot. 41 | - Not thrilled with PyScript removing the top level await... will probably just auto-wrap it (like ipython %autoawait) 42 | # Demonstration 43 | ## Code Console REPL Example 44 | 45 | 46 | ## jupyterlite_duckdb_wasm 47 | Python wrapper to run DuckDB_WASM within JupyterLite with a Pyodide Kernel 48 | See [notebooks](https://github.com/iqmo-org/jupylite_duckdb/tree/main/notebooks) for example of running this within [jupyterlite](https://jupyter.org/try-jupyter/lab/) 49 | 50 | ## Cell Magic %%dql 51 | Following the example of [magic_duckdb](https://github.com/iqmo-org/magic_duckdb), there's an initial proof of concept for a duckdb for JupyterLite. 52 | See [Magic Example](https://github.com/iqmo-org/jupylite_duckdb/blob/main/notebooks/examples_magics.ipynb) 53 | 54 | ## Pyodide Console 55 | 56 | [pyodide console](https://pyodide.org/en/stable/console.html) 57 | 58 | ``` 59 | import micropip; 60 | await micropip.install('pandas'); 61 | await micropip.install('jupylite-duckdb'); 62 | import jupylite_duckdb as jd; 63 | conn = await jd.connect(); 64 | r1 = await jd.query("pragma version", conn); 65 | r2 = await jd.query("create or replace table xyz as select * from 'https://raw.githubusercontent.com/Teradata/kylo/master/samples/sample-data/parquet/userdata2.parquet'", conn); 66 | r3 = await jd.query("select gender, count(*) as c from xyz group by gender", conn); 67 | print(r1); 68 | print(r2); 69 | print(r3); 70 | ``` 71 | 72 | ## Various Issues, Todos and Ideas 73 | - Implement a proof of concept version of dataframe registration 74 | - Evaluate startup time reduction, perhaps custom pyodide build 75 | - Handling errors: detect and display errors in Jupyter: too much sfuff buried in console, such as CORS errors 76 | - invalidate pip browser cache (as/if needed); annoying for development purposes 77 | - think through async/await/transform_cell approach and whether there's a better solution. 78 | - Zero copy data exchange (js/duckdb arrow -> python/dataframe and python/df -> js/duckdb): Blocked by Pyarrow support 79 | - If you're adding local .py files, use [importlib.invalidate_caches()](https://pyodide.org/en/stable/usage/faq.html#why-can-t-i-import-a-file-i-just-wrote-to-the-file-system). Even then, it was flaky to import. 80 | - Careful with caching... %pip install will pull from browser cache. I had to clear frequently within dev tools 81 | - To clear local storage, which is annoyingly persistent, https://superuser.com/questions/519628/clear-html5-local-storage-on-a-specific-page 82 | - %autoawait is part of why this works in notebooks, which is enabled by default. The %%dql cell magic patches transform-cell to push an await into the cell transformation.: https://ipython.readthedocs.io/en/stable/interactive/autoawait.html 83 | -------------------------------------------------------------------------------- /jupylite_duckdb/__init__.py: -------------------------------------------------------------------------------- 1 | from jupylite_duckdb.jdw import connect, query # type: ignore 2 | from jupylite_duckdb._version import __version__ # type: ignore -------------------------------------------------------------------------------- /jupylite_duckdb/_version.py: -------------------------------------------------------------------------------- 1 | __version__="0.0.18a4" 2 | -------------------------------------------------------------------------------- /jupylite_duckdb/jdw.py: -------------------------------------------------------------------------------- 1 | import js 2 | from pandas import DataFrame 3 | from typing import Optional 4 | from js import globalThis 5 | 6 | # TODO Laundry List 7 | # - Pass arrow from duckdb_wasm to Python efficiently, altho dependent on pyodide pyarrow? 8 | # - Register pandas dataframes with duckdb_wasm: maybe use get_table_names to determine 9 | # what needs to be registered, then register 10 | # - Update connect to take connect(file) 11 | # - 12 | 13 | CONNECTION = None 14 | 15 | DEBUG = False 16 | 17 | async def future_to_df(result_promise): 18 | try: 19 | obj = await result_promise # 20 | a = obj.toArray() 21 | data = [dict(v) for v in a.object_values()] 22 | 23 | df = DataFrame(data) 24 | return df 25 | except Exception as e: 26 | print(e) 27 | return None 28 | 29 | async def query(sql: str, connection = None, return_future= False) -> DataFrame: 30 | """Executes query in a standalone connection""" 31 | if connection is None: 32 | connection = CONNECTION # if both are None, then a temp db & connection is used 33 | try: 34 | if connection is not None: 35 | result_fut = connection.query(sql) 36 | else: 37 | if DEBUG: 38 | print("Creating a new connection to a temporary database...") 39 | js_function = js.Function('obj', ''' 40 | async function executeSqlDuckdb() { 41 | let c = undefined 42 | if(obj.connection == undefined) { 43 | const duckdb = await import('https://cdn.skypack.dev/@duckdb/duckdb-wasm'); 44 | const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles(); 45 | const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES); 46 | 47 | const worker_url = URL.createObjectURL( 48 | new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' }) 49 | ); 50 | const worker = new Worker(worker_url); 51 | const logger = new duckdb.ConsoleLogger(); 52 | const db = new duckdb.AsyncDuckDB(logger, worker); 53 | await db.instantiate(bundle.mainModule, bundle.pthreadWorker); 54 | await db.open({ 55 | path: ':memory:', 56 | query: { 57 | castBigIntToDouble: true, 58 | castDecimalToDouble: true 59 | }, 60 | }); 61 | c = await db.connect(); 62 | } 63 | else { 64 | c = obj.connection 65 | } 66 | 67 | console.log('Running SQL: ', obj.sql) 68 | const sql = obj.sql; 69 | const result = await c.query(sql); 70 | 71 | console.log('Result:', result); 72 | 73 | return await result 74 | } 75 | return executeSqlDuckdb() 76 | ''') 77 | js_obj = js.Object() 78 | js_obj.sql = sql 79 | js_obj.connection = connection 80 | 81 | result_fut = js_function(js_obj) # 82 | 83 | if return_future: 84 | return result_fut 85 | else: 86 | return await future_to_df(result_fut) 87 | except Exception as e: 88 | print(e) 89 | return None 90 | 91 | 92 | async def connect() -> object: 93 | # Other approaches: 94 | # Store the function in globalThis, instead of creating a new function 95 | # on connect 96 | # then access via import GlobalThis; globalThis.connectDuckDb() 97 | # 98 | # // globalThis.connectDuckDb = connectDuckDb; 99 | 100 | js_function = js.Function('obj', ''' 101 | async function connectDuckdb() { 102 | console.log("Connecting to " + obj.connectionstr); 103 | const duckdb = await import('https://cdn.skypack.dev/@duckdb/duckdb-wasm'); 104 | const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles(); 105 | const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES); 106 | 107 | const worker_url = URL.createObjectURL( 108 | new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' }) 109 | ); 110 | const worker = new Worker(worker_url); 111 | const logger = new duckdb.ConsoleLogger(); 112 | const db = new duckdb.AsyncDuckDB(logger, worker); 113 | await db.instantiate(bundle.mainModule, bundle.pthreadWorker); 114 | const c = await db.connect(obj.connectionstr); 115 | 116 | console.log('Result:', c); 117 | 118 | return c; 119 | 120 | } 121 | return connectDuckdb() 122 | ''') 123 | 124 | js_obj = js.Object() 125 | js_obj.connectionstr = ":memory:" 126 | 127 | result_promise = js_function(js_obj) 128 | 129 | thisconnection = await result_promise 130 | 131 | global CONNECTION 132 | CONNECTION = thisconnection 133 | 134 | return thisconnection 135 | 136 | def register_iiafes(): 137 | syncWrapperConnect_js = js.Function('obj', ''' 138 | function syncWrapperConnect(obj) { 139 | //delete globalThis.connection; 140 | (async () => { 141 | async function executeSqlDuckdb() { 142 | console.log("Connecting to " + obj.connectionstr); 143 | const duckdb = await import('https://cdn.skypack.dev/@duckdb/duckdb-wasm'); 144 | const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles(); 145 | const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES); 146 | 147 | const worker_url = URL.createObjectURL( 148 | new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' }) 149 | ); 150 | const worker = new Worker(worker_url); 151 | const logger = new duckdb.ConsoleLogger(); 152 | const db = new duckdb.AsyncDuckDB(logger, worker); 153 | await db.instantiate(bundle.mainModule, bundle.pthreadWorker); 154 | const c = await db.connect(obj.connectionstr); 155 | 156 | console.log('Result:', c); 157 | 158 | globalThis.connection=result; 159 | return result; 160 | } 161 | try { 162 | const result = await executeSqlDuckdb(); 163 | // Process the result 164 | console.log('Result:', result); 165 | 166 | } catch (error) { 167 | // Handle errors 168 | console.error('Error:', error); 169 | } 170 | } 171 | 172 | )(); 173 | } 174 | globalThis.syncWrapperConnect=syncWrapperConnect 175 | ''') 176 | 177 | syncWrapperConnect_js() # function stored on globalThis.syncWrapperConnect 178 | 179 | 180 | 181 | syncWrapper_query = js.Function('obj', ''' 182 | function syncWrapperQuery(obj) { 183 | globalThis.result=null; 184 | (async () => { 185 | async function executeSqlDuckdb() { 186 | //delete globalThis.result; 187 | if(obj.connection == undefined) { 188 | const duckdb = await import('https://cdn.skypack.dev/@duckdb/duckdb-wasm'); 189 | const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles(); 190 | const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES); 191 | 192 | const worker_url = URL.createObjectURL( 193 | new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' }) 194 | ); 195 | const worker = new Worker(worker_url); 196 | const logger = new duckdb.ConsoleLogger(); 197 | const db = new duckdb.AsyncDuckDB(logger, worker); 198 | await db.instantiate(bundle.mainModule, bundle.pthreadWorker); 199 | c = await db.connect(); 200 | } 201 | else { 202 | c = obj.connection 203 | } 204 | 205 | console.log('Running SQL: ', obj.sql) 206 | const sql = obj.sql; 207 | const result = await c.query(sql); 208 | 209 | console.log('Result:', result); 210 | globalThis.objresult=result; 211 | return result; 212 | } 213 | try { 214 | const result = await executeSqlDuckdb(); 215 | // Process the result 216 | console.log('Result:', result); 217 | 218 | } catch (error) { 219 | // Handle errors 220 | console.error('Error:', error); 221 | } 222 | } 223 | 224 | )(); 225 | } 226 | globalThis.syncWrapperQuery=syncWrapperQuery 227 | ''') 228 | syncWrapper_query() # function stored on globalThis.syncWrapperQuery 229 | 230 | def connect_sync(connstr: str = ":memory:"): 231 | 232 | js_obj_conn = js.Object() 233 | js_obj_conn.connectionstr = connstr 234 | globalThis.syncWrapperConnect(js_obj_conn) 235 | 236 | 237 | def query_sync(sql: str) -> DataFrame: 238 | 239 | js_obj_q = js.Object() 240 | js_obj_q.connection = globalThis.connection 241 | js_obj_q.sql=sql 242 | 243 | globalThis.syncWrapperQuery(js_obj_q) 244 | 245 | a = globalThis.objresult.toArray() 246 | data = [dict(v) for v in a.object_values()] 247 | df = DataFrame(data) 248 | 249 | return df -------------------------------------------------------------------------------- /jupylite_duckdb/jdw_magic.py: -------------------------------------------------------------------------------- 1 | # Still a work in progress: 2 | # The basic problem is properly handling the Pyodide Future. 3 | # Couldn't handle it in the usual ways (event loop), and simplest solution 4 | # is to "await" the operation. 5 | # But, since Cell/Line magics don't support async, needed to come up with a way to await them. 6 | # 7 | # The monkey patch approach patches the iPython line and cell transformers to rewrite the magics directly 8 | # to the syntax we want: "await (params)" 9 | # instead of its usual behavior which is to rewrite a cell magic to: get_ipython().run__magic(line, cell) 10 | # 11 | # So, the cell/line magic here is only used for registration purposes... but is never called. 12 | # 13 | # The last complexity is that there are three paths for rewriting: 14 | # Cell Magics: Cells starting with %%magic 15 | # Line Magics: lines starting with %magic 16 | # Line Magics with Assignment: lines starting with xyz = %magic 17 | # 18 | # Only the first two cases are dealt with here. The assignment case is a bit weird. For now, %dql -o instead of xyz = %dql. 19 | # 20 | # ref: https://github.com/ipython/ipython/blob/main/IPython/core/interactiveshell.py 21 | # https://github.com/ipython/ipython/blob/main/IPython/core/inputtransformer2.py 22 | # 23 | import warnings 24 | 25 | from IPython.core.magic import register_line_magic, register_cell_magic 26 | from IPython.core import magic_arguments 27 | from IPython.display import display 28 | import ipywidgets as widgets 29 | import asyncio 30 | import jupylite_duckdb as jd 31 | import functools 32 | from IPython.core.getipython import get_ipython 33 | 34 | import warnings 35 | 36 | warnings.filterwarnings("ignore", category=DeprecationWarning) 37 | 38 | DEBUG = True 39 | async def display_result(result, output, outputvar = None): 40 | with output: 41 | try: 42 | if result is None: 43 | display("Empty Result") 44 | else: 45 | if DEBUG: 46 | display(f"Output type: {type(result)}") 47 | display(result) 48 | if outputvar is not None: 49 | get_ipython().user_ns[outputvar] = result # type: ignore 50 | except Exception as e: 51 | print(e) 52 | 53 | @register_line_magic 54 | @register_cell_magic 55 | @magic_arguments.magic_arguments() 56 | @magic_arguments.argument('-o', '--output', nargs=1, help="Output.", type=str) 57 | @magic_arguments.argument('remainder', nargs='*', help='Everything else') 58 | async def dql(line = "", cell = ""): 59 | outputvar = None 60 | if line: 61 | args = magic_arguments.parse_argstring(dql, line) 62 | if args.output: 63 | outputvar = args.output[0] 64 | 65 | if cell: 66 | query=cell 67 | else: 68 | query=" ".join(args.remainder) 69 | 70 | result = await jd.query(query) 71 | if outputvar: 72 | get_ipython().user_ns[outputvar] = result 73 | return None 74 | else: 75 | return result 76 | 77 | 78 | def transform_dql_cell(orig_cell: str) -> str: 79 | # Use find_cell_magic because we don't know the namespace 80 | lines = orig_cell.split("\n") 81 | 82 | first_line=lines[0] 83 | first_line=first_line.replace("%%dql", "") #.replace("-o", "").strip() 84 | if len(lines)==1: 85 | rest = "" 86 | else: 87 | rest = "\\n".join(lines[1:]) 88 | rest=rest.replace("'", "\\'") 89 | 90 | result= f"await get_ipython().find_cell_magic('dql')(line='{first_line}', cell='{rest}')" 91 | return result 92 | 93 | def patch_transformer(): 94 | 95 | shell = get_ipython() 96 | transformermanager = shell.input_transformer_manager 97 | 98 | if not hasattr(transformermanager, "_orig_transform_cell"): 99 | transformermanager._orig_transform_cell = transformermanager.transform_cell 100 | 101 | def jd_transform_cell(*args, **kwargs) -> bool: 102 | orig_cell = args[0] 103 | 104 | if orig_cell.startswith("%%dql"): 105 | return transform_dql_cell(orig_cell) 106 | else: 107 | result=get_ipython().input_transformer_manager._orig_transform_cell(*args, **kwargs) 108 | if "%dql" in orig_cell: 109 | result = result.replace("get_ipython().run_line_magic('dql',", "await get_ipython().find_line_magic('dql')(line=") 110 | #print(result) 111 | return result 112 | 113 | transformermanager.transform_cell = jd_transform_cell 114 | 115 | def patch_should_run_async(): 116 | shell = get_ipython() 117 | 118 | if not hasattr(shell, "_orig_should_run_async"): 119 | shell._orig_should_run_async = shell.should_run_async 120 | 121 | def jd_should_run_async(*args, **kwargs) -> bool: 122 | orig_cell = args[0] 123 | if not orig_cell.startswith("%%") and "%dql" in orig_cell: 124 | return True 125 | else: 126 | return shell._orig_should_run_async(*args, **kwargs) 127 | 128 | shell.should_run_async = jd_should_run_async 129 | 130 | patch_transformer() 131 | patch_should_run_async() -------------------------------------------------------------------------------- /notebooks/example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Install\n", 10 | "%pip install pandas ipywidgets nbformat>=4.2.0\n", 11 | "%pip install jupylite_duckdb --pre" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# Import and Load Magics\n", 21 | "import pandas as pd\n", 22 | "import jupylite_duckdb as duckdb\n", 23 | "\n", 24 | "# Import needed to load the magics: \n", 25 | "from jupylite_duckdb import jdw_magic\n", 26 | "\n", 27 | "print(duckdb.__version__)\n", 28 | "print(pd.__version__)\n", 29 | "\n", 30 | "# Connect. If you skip this step, a transient/temp DB is used for each cell magic.\n", 31 | "await duckdb.connect()" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "%%dql\n", 41 | "-- Display duckdb version\n", 42 | "pragma version\n" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "%%dql -o abcdf\n", 52 | "create or replace table xyz as select * from 'https://raw.githubusercontent.com/Teradata/kylo/master/samples/sample-data/parquet/userdata2.parquet';\n", 53 | "select gender, count(*) from xyz group by gender" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "%%dql -o abcdf\n", 63 | "CREATE OR REPLACE TABLE abc\n", 64 | " as \n", 65 | "SELECT *, v*x from range(10) t(v), range(5) s(x);\n", 66 | "select * from abc;" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "abcdf.describe()" 76 | ] 77 | } 78 | ], 79 | "metadata": { 80 | "language_info": { 81 | "name": "python" 82 | }, 83 | "orig_nbformat": 4 84 | }, 85 | "nbformat": 4, 86 | "nbformat_minor": 2 87 | } 88 | -------------------------------------------------------------------------------- /notebooks/example_iris.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "Demonstrates XGBoost over Iris dataset using DuckDB and JupyterLite + Pyodide.\n", 9 | "\n", 10 | "The DuckDB usage in this example is minimal, it's just used to load the CSV, but imagine you had a more complex query & dataset.\n", 11 | "\n", 12 | "Note: In a Jupyter environment, you'd swap the jupylite_duckdb for duckdb" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# Install\n", 22 | "%pip install pandas ipywidgets nbformat>=4.2.0\n", 23 | "%pip install jupylite_duckdb --pre" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# This is the only JupyterLite / Pyodide specific block.\n", 33 | "import jupylite_duckdb as jd\n", 34 | "\n", 35 | "# Connect (create an in-memory duckdb instance) \n", 36 | "conn = await jd.connect()\n", 37 | "\n", 38 | "# Get the duckdb version\n", 39 | "r1 = await jd.query(\"pragma version\", conn)\n", 40 | "display(r1)\n", 41 | "\n", 42 | "# Load the Iris dataset\n", 43 | "r4 = await jd.query(\"select * from read_csv_auto('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')\", conn)\n", 44 | "display(r4.describe())" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# Display input data\n", 54 | "\n", 55 | "import plotly.express as px\n", 56 | "px.scatter(r4, x=\"sepal_length\", y=\"petal_length\", color=\"species\")\n" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "# OneHotEncode\n", 66 | "from sklearn.preprocessing import OneHotEncoder\n", 67 | "from sklearn.compose import make_column_transformer\n", 68 | "import pandas as pd\n", 69 | "transformer = make_column_transformer(\n", 70 | " # make_column_selector(dtype_exclude=numpy.number)\n", 71 | " (OneHotEncoder(drop=\"first\"), [\"species\"]),\n", 72 | " remainder=\"passthrough\",\n", 73 | " ) # type: ignore\n", 74 | "\n", 75 | "transformed = transformer.fit_transform(r4)\n", 76 | "r4_encoded = pd.DataFrame(\n", 77 | " transformed, columns=transformer.get_feature_names_out()\n", 78 | ")\n", 79 | "\n", 80 | "r4_encoded=r4_encoded.rename(columns={col: col.replace(\"remainder__\", \"\") for col in r4_encoded if col.startswith(\"remainder__\")})" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "# Train / Test Split\n", 90 | "from sklearn.model_selection import train_test_split\n", 91 | "\n", 92 | "train, _test = train_test_split(r4_encoded, test_size=0.2)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "# X / y\n", 102 | "y_col = \"sepal_length\"\n", 103 | "x_cols = [col for col in train if col != y_col]\n", 104 | "\n", 105 | "train_X=train[x_cols]\n", 106 | "train_y=train[[y_col]]" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "import xgboost as xgb\n", 116 | "from sklearn.metrics import mean_squared_error\n", 117 | "\n", 118 | "# Create an XGBoost regressor\n", 119 | "xgb_reg = xgb.XGBRegressor(objective='reg:squarederror')\n", 120 | "\n", 121 | "# Fit the model on the training data\n", 122 | "xgb_reg.fit(train_X, train_y)\n", 123 | "\n", 124 | "# Make predictions on the training data\n", 125 | "train_preds = xgb_reg.predict(train_X)" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "# Predict over Train\n", 135 | "train_preds = xgb_reg.predict(train_X)\n", 136 | "train[\"prediction\"] = train_preds" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "# Predict over Test\n", 146 | "test = _test\n", 147 | "test_X=test[x_cols]\n", 148 | "test_y=test[[y_col]]\n", 149 | "\n", 150 | "test_preds=xgb_reg.predict(test_X)\n", 151 | "test[\"prediction\"] = test_preds" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "# Calculate the mean squared error on the training data\n", 161 | "mse = mean_squared_error(train_y, train_preds)\n", 162 | "print(\"Training MSE:\", mse)\n", 163 | "\n", 164 | "# Calculate the mean squared error on the training data\n", 165 | "mse = mean_squared_error(test_y, test_preds)\n", 166 | "print(\"Test MSE:\", mse)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "# Show the Test Fit vs Training Fit\n", 176 | "\n", 177 | "import plotly.graph_objs as go\n", 178 | "combined_data = pd.concat([train, test], ignore_index=True)\n", 179 | "combined_data[\"dataset\"] = [\"train\"] * len(train) + [\"test\"] * len(test)\n", 180 | "\n", 181 | "# Create a scatter plot of the actual and predicted values\n", 182 | "trace1 = go.Scatter(\n", 183 | " x=train[y_col],\n", 184 | " y=train[\"prediction\"],\n", 185 | " mode=\"markers\",\n", 186 | " name=\"Train\"\n", 187 | ")\n", 188 | "\n", 189 | "trace2 = go.Scatter(\n", 190 | " x=test[y_col],\n", 191 | " y=test[\"prediction\"],\n", 192 | " mode=\"markers\",\n", 193 | " name=\"Test\"\n", 194 | ")\n", 195 | "\n", 196 | "layout = go.Layout(\n", 197 | " title=\"Accuracy against Train vs Test\",\n", 198 | " xaxis=dict(title=y_col),\n", 199 | " yaxis=dict(title=\"Prediction\")\n", 200 | ")\n", 201 | "\n", 202 | "fig = go.Figure(data=[trace1, trace2], layout=layout)\n", 203 | "fig.show()\n" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [] 210 | } 211 | ], 212 | "metadata": { 213 | "kernelspec": { 214 | "display_name": "iqmo2", 215 | "language": "python", 216 | "name": "python3" 217 | }, 218 | "language_info": { 219 | "codemirror_mode": { 220 | "name": "ipython", 221 | "version": 3 222 | }, 223 | "file_extension": ".py", 224 | "mimetype": "text/x-python", 225 | "name": "python", 226 | "nbconvert_exporter": "python", 227 | "pygments_lexer": "ipython3", 228 | "version": "3.9.16" 229 | }, 230 | "orig_nbformat": 4 231 | }, 232 | "nbformat": 4, 233 | "nbformat_minor": 2 234 | } 235 | -------------------------------------------------------------------------------- /notebooks/example_python.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# Useful tools for local development\n", 10 | "# %load_ext autoreload\n", 11 | "# %autoreload 2\n", 12 | "\n", 13 | "%pip install pandas\n", 14 | "%pip install jupylite-duckdb\n", 15 | "\n", 16 | "# Alternative install commands:\n", 17 | "#\n", 18 | "# import micropip\n", 19 | "# await micropip.install(['pandas'])\n", 20 | "#\n", 21 | "# import piplite \n", 22 | "#await piplite.install([\"pandas\"])\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "import jupylite_duckdb as duckdb" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "# Jupyter has \"autoawait\" enabled by default\n", 41 | "# Which allows these top level \"awaits\"\n", 42 | "# which are needed because the Javascript actions behind this are asynchronous\n", 43 | "\n", 44 | "conn = await duckdb.connect()\n", 45 | "r1 = await duckdb.query(\"pragma version\")\n", 46 | "r2 = await duckdb.query(\"create or replace table xyz as select * from 'https://raw.githubusercontent.com/Teradata/kylo/master/samples/sample-data/parquet/userdata2.parquet'\")\n", 47 | "r3 = await duckdb.query(\"select gender, count(*) as c from xyz group by gender\")\n", 48 | "\n", 49 | "display(r1)\n", 50 | "display(r2)\n", 51 | "display(r3)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [] 58 | } 59 | ], 60 | "metadata": { 61 | "kernelspec": { 62 | "display_name": "iqmo2", 63 | "language": "python", 64 | "name": "python3" 65 | }, 66 | "language_info": { 67 | "codemirror_mode": { 68 | "name": "ipython", 69 | "version": 3 70 | }, 71 | "file_extension": ".py", 72 | "mimetype": "text/x-python", 73 | "name": "python", 74 | "nbconvert_exporter": "python", 75 | "pygments_lexer": "ipython3", 76 | "version": "3.9.16" 77 | }, 78 | "orig_nbformat": 4 79 | }, 80 | "nbformat": 4, 81 | "nbformat_minor": 2 82 | } 83 | -------------------------------------------------------------------------------- /pyscript/pyscript_example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | DuckDB Example 5 | 6 | 7 | 8 | 11 | 15 | 16 | 17 | 18 | 19 | 20 | 28 | 29 |
30 |
31 | 32 | 33 | packages = [ 34 | "jupylite_duckdb==0.0.18a3" 35 | 36 | ] 37 | 38 | 39 | 40 | 41 | import asyncio 42 | import jupylite_duckdb as duckdb 43 | 44 | # Wrap top level function in an async, so we can 45 | # await the async js functions 46 | async def duckdb_demo(): 47 | print("Loading duckdb wasm and connecting to the database") 48 | await duckdb.connect() 49 | print("Executing query to get DuckDB Version") 50 | df = await duckdb.query("pragma version") 51 | print(df) 52 | 53 | 54 | query2 = "select * from read_csv_auto('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')" 55 | df2 = await duckdb.query(query2) 56 | print(df2.describe()) 57 | 58 | # Run 59 | print("Pyodide is loaded, Modules installed") 60 | asyncio.ensure_future(duckdb_demo()) 61 | 62 |
63 |
64 | 65 | 66 | -------------------------------------------------------------------------------- /pyscript/pyscript_repl.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | DuckDB Example 5 | 6 | 7 | 8 | 11 | 15 | 16 | 17 | 18 | 19 | 20 | 28 | 29 |
30 | 31 |

Custom REPL

32 | 33 | 34 | packages = [ 35 | "bokeh", 36 | "numpy", 37 | "jupylite_duckdb==0.0.18a3" 38 | ] 39 | plugins = [ 40 | "https://pyscript.net/latest/plugins/python/py_tutor.py" 41 | ] 42 | 43 | [[fetch]] 44 | files = ["https://pyscript.net/examples/utils.py", "https://pyscript.net/examples/antigravity.py"] 45 | 46 |
47 | 48 | import asyncio 49 | import jupylite_duckdb as duckdb 50 | 51 | async def duckdb_demo(): 52 | ############################## 53 | # Ignore the async stuff above and below. Just write your code here. 54 | 55 | query = """ 56 | create table iris as select * from read_csv_auto('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv'); 57 | select * from iris; 58 | """ 59 | 60 | print("Loading duckdb wasm and connecting to the database") 61 | await duckdb.connect() 62 | print("Executing query to get DuckDB Version") 63 | df = await duckdb.query("pragma version") 64 | print(df) 65 | 66 | df2 = await duckdb.query(query) 67 | print(df2.describe()) 68 | display(df2.head(3)) 69 | ############################################################################################################################## 70 | # Run 71 | print("Pyodide is loaded, Modules installed") 72 | asyncio.ensure_future(duckdb_demo()) 73 | 74 |
Shift-Enter to Execute. First execution has a slight delay / hang
75 |
76 |
77 |
78 |
79 | 80 | 81 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from codecs import open 3 | from os import path 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | 7 | with open(path.join(here, "README.md"), encoding="utf-8") as f: 8 | long_description = f.read() 9 | 10 | with open("jupylite_duckdb/_version.py", "r") as file: 11 | code = file.read() 12 | exec(code) 13 | _version = __version__ # type: ignore # noqa 14 | 15 | setup( 16 | name="jupylite_duckdb", 17 | version=_version, # type: ignore # noqa 18 | description="Testing", 19 | long_description=long_description, 20 | long_description_content_type="text/markdown", 21 | url="https://github.com/iqmo-org/", 22 | author="iqmo", 23 | author_email="info@iqmo.com", 24 | classifiers=[], 25 | keywords="jupyterlite duckdb wasm", 26 | packages=find_packages(exclude=["tests"]), 27 | include_package_data=True, 28 | install_requires=["pandas"] # , "nbformat>=4.2.0", "ipywidgets" 29 | ) 30 | -------------------------------------------------------------------------------- /wasm_example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SQL Query Executor 6 | 23 | 33 | 34 | 35 |

DuckDB WASM Example

36 |
37 | 38 | 39 | 40 |
41 |
42 | 43 | 44 | 133 | 134 | 135 | --------------------------------------------------------------------------------