├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── README.md ├── datasette_export_notebook ├── __init__.py ├── templates │ └── export_notebook.html └── utils.py ├── setup.py └── tests └── test_export_notebook.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python-version: ["3.8", "3.9", "3.10", "3.11"] 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | cache: pip 20 | cache-dependency-path: setup.py 21 | - name: Install dependencies 22 | run: | 23 | pip install -e '.[test]' 24 | - name: Run tests 25 | run: | 26 | pytest 27 | deploy: 28 | runs-on: ubuntu-latest 29 | needs: [test] 30 | steps: 31 | - uses: actions/checkout@v3 32 | - name: Set up Python 33 | uses: actions/setup-python@v4 34 | with: 35 | python-version: '3.11' 36 | cache: pip 37 | cache-dependency-path: setup.py 38 | - name: Install dependencies 39 | run: | 40 | pip install build twine 41 | - name: Publish 42 | env: 43 | TWINE_USERNAME: __token__ 44 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 45 | run: | 46 | python -m build 47 | twine upload dist/* 48 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push] 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.8", "3.9", "3.10", "3.11"] 11 | datasette-version: ["<=1.0a0", ">=1.0a0"] 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python ${{ matrix.python-version }} 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | cache: pip 19 | cache-dependency-path: setup.py 20 | - name: Install dependencies 21 | run: | 22 | pip install -e '.[test]' 23 | pip install 'datasette${{ matrix.datasette-version }}' 24 | - name: Run tests 25 | run: | 26 | pytest 27 | 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | venv 6 | .eggs 7 | .pytest_cache 8 | *.egg-info 9 | .DS_Store 10 | .vscode 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datasette-export-notebook 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/datasette-export-notebook.svg)](https://pypi.org/project/datasette-export-notebook/) 4 | [![Changelog](https://img.shields.io/github/v/release/simonw/datasette-export-notebook?include_prereleases&label=changelog)](https://github.com/simonw/datasette-export-notebook/releases) 5 | [![Tests](https://github.com/simonw/datasette-export-notebook/workflows/Test/badge.svg)](https://github.com/simonw/datasette-export-notebook/actions?query=workflow%3ATest) 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette-export-notebook/blob/main/LICENSE) 7 | 8 | Datasette plugin providing instructions for exporting data to a [Jupyter](https://jupyter.org/) or [Observable](https://observablehq.com/) notebook. 9 | 10 | ## Installation 11 | 12 | Install this plugin in the same environment as Datasette. 13 | 14 | $ datasette install datasette-export-notebook 15 | 16 | ## Usage 17 | 18 | Once installed, the plugin will add a `.Notebook` export option to every table and query. Clicking on this link will show instructions for exporting the data to Jupyter or Observable. 19 | 20 | ## Demo 21 | 22 | You can see this plugin in action on the [latest-with-plugins.datasette.io](https://latest-with-plugins.datasette.io/) Datasette instance - for example on [/github/commits.Notebook](https://latest-with-plugins.datasette.io/github/commits.Notebook). 23 | 24 | ## Development 25 | 26 | To set up this plugin locally, first checkout the code. Then create a new virtual environment: 27 | 28 | cd datasette-export-notebook 29 | python3 -mvenv venv 30 | source venv/bin/activate 31 | 32 | Or if you are using `pipenv`: 33 | 34 | pipenv shell 35 | 36 | Now install the dependencies and tests: 37 | 38 | pip install -e '.[test]' 39 | 40 | To run the tests: 41 | 42 | pytest 43 | -------------------------------------------------------------------------------- /datasette_export_notebook/__init__.py: -------------------------------------------------------------------------------- 1 | from datasette import hookimpl 2 | from datasette.utils.asgi import Response 3 | from .utils import detect_types 4 | import json 5 | 6 | 7 | async def render_notebook(datasette, request, data, rows): 8 | original_path = request.path.replace(".Notebook", "") 9 | back_url = original_path 10 | json_url = original_path + ".json" 11 | if request.query_string: 12 | back_url += "?" + request.query_string 13 | json_url += "?" + request.query_string + "&_shape=array" 14 | else: 15 | json_url += "?_shape=array" 16 | json_url = datasette.absolute_url( 17 | request, 18 | json_url, 19 | ) 20 | total_count = None 21 | count = len(rows) 22 | if "filtered_table_rows_count" in data: 23 | total_count = data["filtered_table_rows_count"] 24 | 25 | csv_stream_url = None 26 | if data.get("next"): 27 | csv_path = original_path + ".csv" 28 | if request.query_string: 29 | csv_path += "?" + request.query_string + "&_stream=on" 30 | else: 31 | csv_path += "?_stream=on" 32 | csv_stream_url = datasette.absolute_url(request, csv_path) 33 | 34 | return Response.html( 35 | await datasette.render_template( 36 | "export_notebook.html", 37 | { 38 | "cors_enabled": datasette.cors, 39 | "allow_csv_stream": datasette.setting("allow_csv_stream"), 40 | "back_url": back_url, 41 | "csv_stream_url": csv_stream_url, 42 | "pandas_stream_code": pandas_stream_code( 43 | csv_stream_url, detect_types(rows) 44 | ), 45 | "json_url": json_url, 46 | "count": count, 47 | "total_count": total_count, 48 | "has_next": bool(data.get("next")), 49 | "json": json, 50 | }, 51 | ) 52 | ) 53 | 54 | 55 | def pandas_stream_code(csv_stream_url, column_types): 56 | dtype = "" 57 | non_string_types = { 58 | col: type for col, type in column_types.items() if type != "str" 59 | } 60 | if non_string_types: 61 | lines = [", dtype={"] 62 | for column, type in non_string_types.items(): 63 | lines.append(" {}: {},".format(json.dumps(column), type)) 64 | lines.append("}") 65 | dtype = "\n".join(lines) 66 | return "df = pandas.read_csv(\n {}{})".format(json.dumps(csv_stream_url), dtype) 67 | 68 | 69 | @hookimpl 70 | def register_output_renderer(datasette): 71 | return { 72 | "extension": "Notebook", 73 | "render": render_notebook, 74 | } 75 | -------------------------------------------------------------------------------- /datasette_export_notebook/templates/export_notebook.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %}Export {{ count }} rows to a notebook{% endblock %} 4 | 5 | {% block extra_head %} 6 | 7 | 14 | {% endblock %} 15 | 16 | {% block content %} 17 |

Export {{ count }} rows to a notebook

18 | 19 |

Back to the rows

20 | 21 |

You can export this data to a Jupyter or Observable notebook by copying and pasting the following:

22 | 23 |

Jupyter

24 | 25 |

Make sure you have Pandas. Import it in a cell like this:

26 | 27 |
 28 | import pandas
 29 | 
30 | 31 | If this shows an error you can run %pip install pandas in a notebook cell to install it.

32 | 33 |

Now paste the following into a cell to load the {{ count }} row{% if count != 1 %}s{% endif %} into a DataFrame called df:

34 | 35 |
 36 | df = pandas.read_json(
 37 |     {{ json.dumps(json_url) }}
 38 | )
 39 | 
40 | 41 |

Run df in a new cell to see the table.

42 | 43 | {% if allow_csv_stream and csv_stream_url and has_next %} 44 |

You can export all {% if total_count %}{{ "{:,}".format(total_count) }} {% endif %}rows using a single streaming CSV export like this:

45 |
{{ pandas_stream_code }}
46 | {% endif %} 47 |

Observable

48 | {% if cors_enabled %} 49 |

Import the data into a variable called rows like this:

50 | 51 |
rows = d3.json(
 52 |   {{ json.dumps(json_url) }}
 53 | )
54 | 55 | {% if allow_csv_stream and csv_stream_url and has_next %} 56 |

You can export all {% if total_count %}{{ "{:,}".format(total_count) }} {% endif %}rows using a single streaming CSV export like this:

57 |
rows = d3.csv(
 58 |   {{ json.dumps(csv_stream_url) }},
 59 |   d3.autoType
 60 | )
61 | {% endif %} 62 | {% else %} 63 |

Export to Observable is only available if Datasette is running with the --cors option.

64 | {% endif %} 65 | 66 | 115 | 116 | {% endblock %} 117 | -------------------------------------------------------------------------------- /datasette_export_notebook/utils.py: -------------------------------------------------------------------------------- 1 | def detect_types(rows): 2 | """ 3 | Returns {colname: string_type} for these rows, where 4 | string_type is one of "int", "str" or "float" 5 | """ 6 | columns = {} 7 | for row in rows: 8 | for column, value in dict(row).items(): 9 | if value is not None: 10 | columns.setdefault(column, set()).add(type(value).__name__) 11 | 12 | # Only suggest type if column had just that type (or that + null) 13 | suggestions = {} 14 | for column, types in columns.items(): 15 | if len(types) == 1: 16 | suggestions[column] = list(types)[0] 17 | 18 | return suggestions 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | VERSION = "1.0.1" 5 | 6 | 7 | def get_long_description(): 8 | with open( 9 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"), 10 | encoding="utf8", 11 | ) as fp: 12 | return fp.read() 13 | 14 | 15 | setup( 16 | name="datasette-export-notebook", 17 | description="Datasette plugin providing instructions for exporting data to Jupyter or Observable", 18 | long_description=get_long_description(), 19 | long_description_content_type="text/markdown", 20 | author="Simon Willison", 21 | url="https://github.com/simonw/datasette-export-notebook", 22 | project_urls={ 23 | "Issues": "https://github.com/simonw/datasette-export-notebook/issues", 24 | "CI": "https://github.com/simonw/datasette-export-notebook/actions", 25 | "Changelog": "https://github.com/simonw/datasette-export-notebook/releases", 26 | }, 27 | license="Apache License, Version 2.0", 28 | version=VERSION, 29 | packages=["datasette_export_notebook"], 30 | entry_points={"datasette": ["export_notebook = datasette_export_notebook"]}, 31 | install_requires=["datasette"], 32 | extras_require={"test": ["pytest", "pytest-asyncio", "sqlite-utils"]}, 33 | tests_require=["datasette-export-notebook[test]"], 34 | package_data={"datasette_export_notebook": ["templates/*.html"]}, 35 | python_requires=">=3.6", 36 | ) 37 | -------------------------------------------------------------------------------- /tests/test_export_notebook.py: -------------------------------------------------------------------------------- 1 | from datasette.app import Datasette 2 | import pytest 3 | import sqlite_utils 4 | import sqlite3 5 | import textwrap 6 | from datasette_export_notebook.utils import detect_types 7 | 8 | 9 | @pytest.fixture 10 | def db_path(tmpdir): 11 | db_path = str(tmpdir / "db.db") 12 | db = sqlite_utils.Database(db_path) 13 | db["blah"].insert_all({"id": i} for i in range(80)) 14 | db["big"].insert_all({"id": i} for i in range(800)) 15 | return db_path 16 | 17 | 18 | @pytest.mark.asyncio 19 | @pytest.mark.parametrize( 20 | "path,expected_json_url,expected_csv_url", 21 | [ 22 | ("/db/blah.Notebook", "http://localhost/db/blah.json?_shape=array", ""), 23 | ( 24 | "/db/big.Notebook", 25 | "http://localhost/db/big.json?_shape=array", 26 | "http://localhost/db/big.csv?_stream=on", 27 | ), 28 | ], 29 | ) 30 | async def test_export_notebook( 31 | db_path, 32 | path, 33 | expected_json_url, 34 | expected_csv_url, 35 | ): 36 | datasette = Datasette([db_path], cors=True) 37 | response = await datasette.client.get(path) 38 | assert 200 == response.status_code 39 | assert "--cors" not in response.text 40 | assert ( 41 | """ 42 | df = pandas.read_json( 43 | "{}" 44 | ) 45 | """.strip().format( 46 | expected_json_url 47 | ) 48 | in response.text 49 | ) 50 | assert ( 51 | """ 52 | rows = d3.json( 53 | "{}" 54 | )""".strip().format( 55 | expected_json_url 56 | ) 57 | in response.text 58 | ) 59 | if not expected_csv_url: 60 | assert ".csv" not in response.text 61 | else: 62 | assert ( 63 | "df = pandas.read_csv(\n "{}"".format(expected_csv_url) 64 | in response.text 65 | ) 66 | assert "rows = d3.csv(\n "{}"".format(expected_csv_url) in response.text 67 | 68 | 69 | @pytest.mark.asyncio 70 | async def test_export_notebook_pandas_stream_with_types(db_path): 71 | datasette = Datasette([db_path], cors=True) 72 | response = await datasette.client.get("/db/big.Notebook") 73 | expected = ( 74 | '
df = pandas.read_csv(\n'
 75 |         "    "http://localhost/db/big.csv?_stream=on", dtype={\n"
 76 |         "    "rowid": int,\n"
 77 |         "    "id": int,\n"
 78 |         "})
" 79 | ) 80 | assert expected in response.text 81 | 82 | 83 | @pytest.mark.asyncio 84 | async def test_notebook_no_cors(db_path): 85 | datasette = Datasette([db_path]) 86 | response = await datasette.client.get("/db/big.Notebook") 87 | assert ( 88 | "Export to Observable is only available if Datasette is running with the" 89 | in response.text 90 | ) 91 | 92 | 93 | @pytest.mark.asyncio 94 | async def test_notebook_no_csv(db_path): 95 | datasette = Datasette([db_path], settings={"allow_csv_stream": False}) 96 | response = await datasette.client.get("/db/big.Notebook") 97 | assert ".csv" not in response.text 98 | 99 | 100 | @pytest.mark.parametrize( 101 | "rows,expected_types", 102 | [ 103 | ([{"id": 1}], {"id": "int"}), 104 | ([{"id": None}], {}), 105 | ([{"id": 1, "name": "bob"}], {"id": "int", "name": "str"}), 106 | ([{"f": 3.5}, {"f": None}], {"f": "float"}), 107 | ], 108 | ) 109 | def test_detect_types(rows, expected_types): 110 | db = sqlite_utils.Database(memory=True) 111 | db["t"].insert_all(rows) 112 | db.conn.row_factory = sqlite3.Row 113 | rows = db.conn.execute("select * from t").fetchall() 114 | column_types = detect_types(rows) 115 | assert column_types == expected_types 116 | --------------------------------------------------------------------------------