├── .github
    └── workflows
    │   ├── publish.yml
    │   └── test.yml
├── .gitignore
├── README.md
├── datasette_export_notebook
    ├── __init__.py
    ├── templates
    │   └── export_notebook.html
    └── utils.py
├── setup.py
└── tests
    └── test_export_notebook.py


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Python Package
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   test:
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       matrix:
12 |         python-version: ["3.8", "3.9", "3.10", "3.11"]
13 |     steps:
14 |     - uses: actions/checkout@v3
15 |     - name: Set up Python ${{ matrix.python-version }}
16 |       uses: actions/setup-python@v4
17 |       with:
18 |         python-version: ${{ matrix.python-version }}
19 |         cache: pip
20 |         cache-dependency-path: setup.py
21 |     - name: Install dependencies
22 |       run: |
23 |         pip install -e '.[test]'
24 |     - name: Run tests
25 |       run: |
26 |         pytest
27 |   deploy:
28 |     runs-on: ubuntu-latest
29 |     needs: [test]
30 |     steps:
31 |     - uses: actions/checkout@v3
32 |     - name: Set up Python
33 |       uses: actions/setup-python@v4
34 |       with:
35 |         python-version: '3.11'
36 |         cache: pip
37 |         cache-dependency-path: setup.py
38 |     - name: Install dependencies
39 |       run: |
40 |         pip install build twine
41 |     - name: Publish
42 |       env:
43 |         TWINE_USERNAME: __token__
44 |         TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
45 |       run: |
46 |         python -m build
47 |         twine upload dist/*
48 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.8", "3.9", "3.10", "3.11"]
11 |         datasette-version: ["<=1.0a0", ">=1.0a0"]
12 |     steps:
13 |     - uses: actions/checkout@v3
14 |     - name: Set up Python ${{ matrix.python-version }}
15 |       uses: actions/setup-python@v4
16 |       with:
17 |         python-version: ${{ matrix.python-version }}
18 |         cache: pip
19 |         cache-dependency-path: setup.py
20 |     - name: Install dependencies
21 |       run: |
22 |         pip install -e '.[test]'
23 |         pip install 'datasette${{ matrix.datasette-version }}'
24 |     - name: Run tests
25 |       run: |
26 |         pytest
27 | 
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | venv
 6 | .eggs
 7 | .pytest_cache
 8 | *.egg-info
 9 | .DS_Store
10 | .vscode
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # datasette-export-notebook
 2 | 
 3 | [![PyPI](https://img.shields.io/pypi/v/datasette-export-notebook.svg)](https://pypi.org/project/datasette-export-notebook/)
 4 | [![Changelog](https://img.shields.io/github/v/release/simonw/datasette-export-notebook?include_prereleases&label=changelog)](https://github.com/simonw/datasette-export-notebook/releases)
 5 | [![Tests](https://github.com/simonw/datasette-export-notebook/workflows/Test/badge.svg)](https://github.com/simonw/datasette-export-notebook/actions?query=workflow%3ATest)
 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/datasette-export-notebook/blob/main/LICENSE)
 7 | 
 8 | Datasette plugin providing instructions for exporting data to a [Jupyter](https://jupyter.org/) or [Observable](https://observablehq.com/) notebook.
 9 | 
10 | ## Installation
11 | 
12 | Install this plugin in the same environment as Datasette.
13 | 
14 |     $ datasette install datasette-export-notebook
15 | 
16 | ## Usage
17 | 
18 | Once installed, the plugin will add a `.Notebook` export option to every table and query. Clicking on this link will show instructions for exporting the data to Jupyter or Observable.
19 | 
20 | ## Demo
21 | 
22 | You can see this plugin in action on the [latest-with-plugins.datasette.io](https://latest-with-plugins.datasette.io/) Datasette instance - for example on [/github/commits.Notebook](https://latest-with-plugins.datasette.io/github/commits.Notebook).
23 | 
24 | ## Development
25 | 
26 | To set up this plugin locally, first checkout the code. Then create a new virtual environment:
27 | 
28 |     cd datasette-export-notebook
29 |     python3 -mvenv venv
30 |     source venv/bin/activate
31 | 
32 | Or if you are using `pipenv`:
33 | 
34 |     pipenv shell
35 | 
36 | Now install the dependencies and tests:
37 | 
38 |     pip install -e '.[test]'
39 | 
40 | To run the tests:
41 | 
42 |     pytest
43 | 


--------------------------------------------------------------------------------
/datasette_export_notebook/__init__.py:
--------------------------------------------------------------------------------
 1 | from datasette import hookimpl
 2 | from datasette.utils.asgi import Response
 3 | from .utils import detect_types
 4 | import json
 5 | 
 6 | 
 7 | async def render_notebook(datasette, request, data, rows):
 8 |     original_path = request.path.replace(".Notebook", "")
 9 |     back_url = original_path
10 |     json_url = original_path + ".json"
11 |     if request.query_string:
12 |         back_url += "?" + request.query_string
13 |         json_url += "?" + request.query_string + "&_shape=array"
14 |     else:
15 |         json_url += "?_shape=array"
16 |     json_url = datasette.absolute_url(
17 |         request,
18 |         json_url,
19 |     )
20 |     total_count = None
21 |     count = len(rows)
22 |     if "filtered_table_rows_count" in data:
23 |         total_count = data["filtered_table_rows_count"]
24 | 
25 |     csv_stream_url = None
26 |     if data.get("next"):
27 |         csv_path = original_path + ".csv"
28 |         if request.query_string:
29 |             csv_path += "?" + request.query_string + "&_stream=on"
30 |         else:
31 |             csv_path += "?_stream=on"
32 |         csv_stream_url = datasette.absolute_url(request, csv_path)
33 | 
34 |     return Response.html(
35 |         await datasette.render_template(
36 |             "export_notebook.html",
37 |             {
38 |                 "cors_enabled": datasette.cors,
39 |                 "allow_csv_stream": datasette.setting("allow_csv_stream"),
40 |                 "back_url": back_url,
41 |                 "csv_stream_url": csv_stream_url,
42 |                 "pandas_stream_code": pandas_stream_code(
43 |                     csv_stream_url, detect_types(rows)
44 |                 ),
45 |                 "json_url": json_url,
46 |                 "count": count,
47 |                 "total_count": total_count,
48 |                 "has_next": bool(data.get("next")),
49 |                 "json": json,
50 |             },
51 |         )
52 |     )
53 | 
54 | 
55 | def pandas_stream_code(csv_stream_url, column_types):
56 |     dtype = ""
57 |     non_string_types = {
58 |         col: type for col, type in column_types.items() if type != "str"
59 |     }
60 |     if non_string_types:
61 |         lines = [", dtype={"]
62 |         for column, type in non_string_types.items():
63 |             lines.append("    {}: {},".format(json.dumps(column), type))
64 |         lines.append("}")
65 |         dtype = "\n".join(lines)
66 |     return "df = pandas.read_csv(\n    {}{})".format(json.dumps(csv_stream_url), dtype)
67 | 
68 | 
69 | @hookimpl
70 | def register_output_renderer(datasette):
71 |     return {
72 |         "extension": "Notebook",
73 |         "render": render_notebook,
74 |     }
75 | 


--------------------------------------------------------------------------------
/datasette_export_notebook/templates/export_notebook.html:
--------------------------------------------------------------------------------
  1 | {% extends "base.html" %}
  2 | 
  3 | {% block title %}Export {{ count }} rows to a notebook{% endblock %}
  4 | 
  5 | {% block extra_head %}
  6 | <meta name="robots" content="noindex">
  7 | <style type="text/css">
  8 | pre {
  9 |   width: 95%;
 10 |   margin-bottom: 1em;
 11 |   white-space: pre-wrap;
 12 | }
 13 | </style>
 14 | {% endblock %}
 15 | 
 16 | {% block content %}
 17 | <h1>Export {{ count }} rows to a notebook</h1>
 18 | 
 19 | <p><a href="{{ back_url }}">Back to the rows</a></p>
 20 | 
 21 | <p>You can export this data to a <a href="https://jupyter.org/">Jupyter</a> or <a href="https://observablehq.com/">Observable</a> notebook by copying and pasting the following:</p>
 22 | 
 23 | <h2>Jupyter</h2>
 24 | 
 25 | <p>Make sure you have <a href="https://pandas.pydata.org/">Pandas</a>. Import it in a cell like this:</p>
 26 | 
 27 | <pre class="copyable">
 28 | import pandas
 29 | </pre>
 30 | 
 31 | If this shows an error you can run <code>%pip install pandas</code> in a notebook cell to install it.</p>
 32 | 
 33 | <p>Now paste the following into a cell to load the {{ count }} row{% if count != 1 %}s{% endif %} into a DataFrame called <code>df</code>:</p>
 34 | 
 35 | <pre class="copyable">
 36 | df = pandas.read_json(
 37 |     {{ json.dumps(json_url) }}
 38 | )
 39 | </pre>
 40 | 
 41 | <p>Run <code>df</code> in a new cell to see the table.</p>
 42 | 
 43 | {% if allow_csv_stream and csv_stream_url and has_next %}
 44 |     <p>You can export all {% if total_count %}{{ "{:,}".format(total_count) }} {% endif %}rows using a single streaming CSV export like this:</p>
 45 |     <pre class="copyable">{{ pandas_stream_code }}</pre>
 46 | {% endif %}
 47 | <h2>Observable</h2>
 48 | {% if cors_enabled %}
 49 |     <p>Import the data into a variable called <code>rows</code> like this:</p>
 50 | 
 51 |     <pre class="copyable">rows = d3.json(
 52 |   {{ json.dumps(json_url) }}
 53 | )</pre>
 54 | 
 55 |     {% if allow_csv_stream and csv_stream_url and has_next %}
 56 |         <p>You can export all {% if total_count %}{{ "{:,}".format(total_count) }} {% endif %}rows using a single streaming CSV export like this:</p>
 57 |         <pre class="copyable">rows = d3.csv(
 58 |   {{ json.dumps(csv_stream_url) }},
 59 |   d3.autoType
 60 | )</pre>
 61 |     {% endif %}
 62 | {% else %}
 63 |     <p class="message-warning">Export to Observable is only available if Datasette is running with the <code>--cors</code> option.</p>
 64 | {% endif %}
 65 | 
 66 | <script>
 67 | var svgCopyIcon = `<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path></svg>`;
 68 | 
 69 | Array.from(document.querySelectorAll("pre.copyable")).forEach((pre) => {
 70 |   var svg = document.createElement("div");
 71 |   svg.innerHTML = svgCopyIcon;
 72 |   svg = svg.querySelector("*");
 73 |   pre.style.position = "relative";
 74 |   svg.style.position = "absolute";
 75 |   svg.style.top = 0;
 76 |   svg.style.right = 0;
 77 |   svg.style.width = "14px";
 78 |   svg.style.cursor = "pointer";
 79 |   svg.addEventListener("click", function () {
 80 |     var textarea = document.createElement("textarea");
 81 |     textarea.style.position = "absolute";
 82 |     textarea.style.opacity = 0;
 83 |     // Everything except for the contents of the span
 84 |     var cloned = pre.cloneNode(true);
 85 |     Array.from(cloned.querySelectorAll("span,svg")).forEach(
 86 |       el => el.parentNode.removeChild(el)
 87 |     );
 88 |     textarea.value = cloned.innerText;
 89 |     pre.appendChild(textarea);
 90 |     textarea.select();
 91 |     document.execCommand("copy");
 92 |     textarea.parentNode.removeChild(textarea);
 93 |     // Show a 'copied' message then fade it out
 94 |     var copied = document.createElement("span");
 95 |     copied.innerHTML = "Copied";
 96 |     copied.style.position = "absolute";
 97 |     copied.style.top = "3ex";
 98 |     copied.style.right = 0;
 99 |     copied.style.color = "#666";
100 |     copied.style.fontFamily = "Helvetica, sans-serif";
101 |     copied.style.fontSize = "0.8em";
102 |     copied.style.fontWeight = "bold";
103 |     copied.style.transition = "opacity 1s";
104 |     pre.appendChild(copied);
105 |     setTimeout(() => {
106 |       copied.parentNode.removeChild(copied);
107 |     }, 1500);
108 |     setTimeout(() => {
109 |       copied.style.opacity = 0;
110 |     }, 500);
111 |   });
112 |   pre.appendChild(svg);
113 | });
114 | </script>
115 | 
116 | {% endblock %}
117 | 


--------------------------------------------------------------------------------
/datasette_export_notebook/utils.py:
--------------------------------------------------------------------------------
 1 | def detect_types(rows):
 2 |     """
 3 |     Returns {colname: string_type} for these rows, where
 4 |     string_type is one of "int", "str" or "float"
 5 |     """
 6 |     columns = {}
 7 |     for row in rows:
 8 |         for column, value in dict(row).items():
 9 |             if value is not None:
10 |                 columns.setdefault(column, set()).add(type(value).__name__)
11 | 
12 |     # Only suggest type if column had just that type (or that + null)
13 |     suggestions = {}
14 |     for column, types in columns.items():
15 |         if len(types) == 1:
16 |             suggestions[column] = list(types)[0]
17 | 
18 |     return suggestions
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | import os
 3 | 
 4 | VERSION = "1.0.1"
 5 | 
 6 | 
 7 | def get_long_description():
 8 |     with open(
 9 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"),
10 |         encoding="utf8",
11 |     ) as fp:
12 |         return fp.read()
13 | 
14 | 
15 | setup(
16 |     name="datasette-export-notebook",
17 |     description="Datasette plugin providing instructions for exporting data to Jupyter or Observable",
18 |     long_description=get_long_description(),
19 |     long_description_content_type="text/markdown",
20 |     author="Simon Willison",
21 |     url="https://github.com/simonw/datasette-export-notebook",
22 |     project_urls={
23 |         "Issues": "https://github.com/simonw/datasette-export-notebook/issues",
24 |         "CI": "https://github.com/simonw/datasette-export-notebook/actions",
25 |         "Changelog": "https://github.com/simonw/datasette-export-notebook/releases",
26 |     },
27 |     license="Apache License, Version 2.0",
28 |     version=VERSION,
29 |     packages=["datasette_export_notebook"],
30 |     entry_points={"datasette": ["export_notebook = datasette_export_notebook"]},
31 |     install_requires=["datasette"],
32 |     extras_require={"test": ["pytest", "pytest-asyncio", "sqlite-utils"]},
33 |     tests_require=["datasette-export-notebook[test]"],
34 |     package_data={"datasette_export_notebook": ["templates/*.html"]},
35 |     python_requires=">=3.6",
36 | )
37 | 


--------------------------------------------------------------------------------
/tests/test_export_notebook.py:
--------------------------------------------------------------------------------
  1 | from datasette.app import Datasette
  2 | import pytest
  3 | import sqlite_utils
  4 | import sqlite3
  5 | import textwrap
  6 | from datasette_export_notebook.utils import detect_types
  7 | 
  8 | 
  9 | @pytest.fixture
 10 | def db_path(tmpdir):
 11 |     db_path = str(tmpdir / "db.db")
 12 |     db = sqlite_utils.Database(db_path)
 13 |     db["blah"].insert_all({"id": i} for i in range(80))
 14 |     db["big"].insert_all({"id": i} for i in range(800))
 15 |     return db_path
 16 | 
 17 | 
 18 | @pytest.mark.asyncio
 19 | @pytest.mark.parametrize(
 20 |     "path,expected_json_url,expected_csv_url",
 21 |     [
 22 |         ("/db/blah.Notebook", "http://localhost/db/blah.json?_shape=array", ""),
 23 |         (
 24 |             "/db/big.Notebook",
 25 |             "http://localhost/db/big.json?_shape=array",
 26 |             "http://localhost/db/big.csv?_stream=on",
 27 |         ),
 28 |     ],
 29 | )
 30 | async def test_export_notebook(
 31 |     db_path,
 32 |     path,
 33 |     expected_json_url,
 34 |     expected_csv_url,
 35 | ):
 36 |     datasette = Datasette([db_path], cors=True)
 37 |     response = await datasette.client.get(path)
 38 |     assert 200 == response.status_code
 39 |     assert "--cors" not in response.text
 40 |     assert (
 41 |         """
 42 | df = pandas.read_json(
 43 |     &#34;{}&#34;
 44 | )
 45 | """.strip().format(
 46 |             expected_json_url
 47 |         )
 48 |         in response.text
 49 |     )
 50 |     assert (
 51 |         """
 52 | rows = d3.json(
 53 |   &#34;{}&#34;
 54 | )""".strip().format(
 55 |             expected_json_url
 56 |         )
 57 |         in response.text
 58 |     )
 59 |     if not expected_csv_url:
 60 |         assert ".csv" not in response.text
 61 |     else:
 62 |         assert (
 63 |             "df = pandas.read_csv(\n    &#34;{}&#34;".format(expected_csv_url)
 64 |             in response.text
 65 |         )
 66 |         assert "rows = d3.csv(\n  &#34;{}&#34".format(expected_csv_url) in response.text
 67 | 
 68 | 
 69 | @pytest.mark.asyncio
 70 | async def test_export_notebook_pandas_stream_with_types(db_path):
 71 |     datasette = Datasette([db_path], cors=True)
 72 |     response = await datasette.client.get("/db/big.Notebook")
 73 |     expected = (
 74 |         '<pre class="copyable">df = pandas.read_csv(\n'
 75 |         "    &#34;http://localhost/db/big.csv?_stream=on&#34;, dtype={\n"
 76 |         "    &#34;rowid&#34;: int,\n"
 77 |         "    &#34;id&#34;: int,\n"
 78 |         "})</pre>"
 79 |     )
 80 |     assert expected in response.text
 81 | 
 82 | 
 83 | @pytest.mark.asyncio
 84 | async def test_notebook_no_cors(db_path):
 85 |     datasette = Datasette([db_path])
 86 |     response = await datasette.client.get("/db/big.Notebook")
 87 |     assert (
 88 |         "Export to Observable is only available if Datasette is running with the"
 89 |         in response.text
 90 |     )
 91 | 
 92 | 
 93 | @pytest.mark.asyncio
 94 | async def test_notebook_no_csv(db_path):
 95 |     datasette = Datasette([db_path], settings={"allow_csv_stream": False})
 96 |     response = await datasette.client.get("/db/big.Notebook")
 97 |     assert ".csv" not in response.text
 98 | 
 99 | 
100 | @pytest.mark.parametrize(
101 |     "rows,expected_types",
102 |     [
103 |         ([{"id": 1}], {"id": "int"}),
104 |         ([{"id": None}], {}),
105 |         ([{"id": 1, "name": "bob"}], {"id": "int", "name": "str"}),
106 |         ([{"f": 3.5}, {"f": None}], {"f": "float"}),
107 |     ],
108 | )
109 | def test_detect_types(rows, expected_types):
110 |     db = sqlite_utils.Database(memory=True)
111 |     db["t"].insert_all(rows)
112 |     db.conn.row_factory = sqlite3.Row
113 |     rows = db.conn.execute("select * from t").fetchall()
114 |     column_types = detect_types(rows)
115 |     assert column_types == expected_types
116 | 


--------------------------------------------------------------------------------