├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── new_feature.md └── workflows │ ├── ci.yml │ ├── create-release.yaml │ └── push.yaml ├── .gitignore ├── COPYING ├── Dockerfile ├── HACKING.md ├── MANIFEST.in ├── Makefile ├── README.md ├── SECURITY.md ├── alembic.ini ├── csvbase ├── VERSION ├── __init__.py ├── bgwork │ ├── __init__.py │ ├── core.py │ ├── main.py │ ├── sql_scheduler.py │ └── task_registry.py ├── cli.py ├── comments_svc.py ├── config.py ├── constants.py ├── conv.py ├── data │ ├── __init__.py │ ├── prohibited-usernames │ └── spdx-licences.csv ├── datadog.py ├── db.py ├── email.py ├── exc.py ├── follow │ ├── __init__.py │ ├── git.py │ └── update.py ├── http.py ├── json.py ├── logging.py ├── markdown.py ├── models.py ├── py.typed ├── repcache.py ├── sentry.py ├── sesh.py ├── streams.py ├── svc.py ├── table_io.py ├── temp.py ├── userdata │ ├── __init__.py │ ├── core.py │ └── pguserdata.py ├── value_objs.py ├── version.py └── web │ ├── __init__.py │ ├── app.py │ ├── avatars.py │ ├── billing │ ├── __init__.py │ ├── bp.py │ ├── svc.py │ ├── templates │ │ └── pricing.html │ └── value_objs.py │ ├── blog │ ├── __init__.py │ ├── bp.py │ ├── cli.py │ ├── svc.py │ └── value_objs.py │ ├── faq │ ├── __init__.py │ ├── bp.py │ └── entries │ │ ├── basic-auth.md │ │ ├── big.md │ │ ├── csvbase-client-cache.md │ │ ├── duckdb.md │ │ ├── edit-faq.md │ │ ├── email-addresses.md │ │ ├── excel.md │ │ ├── formats.md │ │ ├── git.md │ │ ├── github-pat.md │ │ ├── google-sheets.md │ │ ├── gravatar.md │ │ ├── markdown.md │ │ ├── pandas.md │ │ ├── table-api.md │ │ ├── table-url.md │ │ ├── tools-other.md │ │ ├── usage.md │ │ └── what-is-it.md │ ├── func.py │ ├── main │ ├── __init__.py │ ├── bp.py │ ├── comments_views.py │ └── create_table.py │ ├── schemaorg.py │ ├── static │ ├── .gitignore │ ├── bootstrap-enables.js │ ├── comments.js │ ├── dark-mode-hack.js │ ├── duckdb-screenshot.png │ ├── faq │ │ ├── csvbase-tracking-table.png │ │ ├── excel-01-from-web.png │ │ ├── excel-02-from-web-dialog.png │ │ ├── excel-03-navigator.png │ │ ├── excel-04-refresh.png │ │ ├── excel-06-connection-properties.png │ │ ├── github-pat-overview.png │ │ ├── google-sheets-date-fix.png │ │ ├── google-sheets-dates-fixed.png │ │ ├── google-sheets-importdata.png │ │ ├── google-sheets-permissions.png │ │ └── google-sheets-table.png │ ├── github-screenshot.png │ ├── icons │ │ ├── praise.svg │ │ ├── praised.svg │ │ ├── private-table.svg │ │ └── public-table.svg │ ├── logo │ │ ├── 128x128.png │ │ └── 192x192.png │ ├── privacy.txt │ ├── site.css │ ├── table-screenshot.png │ └── terms.txt │ ├── templates │ ├── about.html │ ├── app_base.html │ ├── base.html │ ├── blog.html │ ├── blog_base.html │ ├── captcha_macros.html │ ├── change-password.html │ ├── comment-edit.html │ ├── comment_macros.html │ ├── convert.html │ ├── copy.html │ ├── create-table-confirm.html │ ├── create-table-git.html │ ├── email-verification-sent.html │ ├── email-verified.html │ ├── email │ │ └── verify-email.txt │ ├── error-dynamic.html │ ├── faq │ │ ├── faq-entry.html │ │ └── faq-index.html │ ├── form_macros.html │ ├── index.html │ ├── new-blank-table.html │ ├── new-table.html │ ├── other_macros.html │ ├── post.html │ ├── register.html │ ├── row-add.html │ ├── row-view-or-edit.html │ ├── row.html │ ├── row_delete_check.html │ ├── row_macros.html │ ├── sign_in.html │ ├── sitemap.xml │ ├── table.html │ ├── table_api.html │ ├── table_details.html │ ├── table_macros.html │ ├── table_settings.html │ ├── table_view.html │ ├── table_wait.html │ ├── thread.html │ ├── user-settings.html │ ├── user.html │ └── value_macros.html │ ├── turnstile.py │ └── verify_emails.py ├── docker-compose.yml ├── examples ├── load-moocows.sh └── moocows.csv ├── init-schemas.sql ├── logo └── logo.xcf ├── migrations ├── env.py ├── script.py.mako └── versions │ ├── 173e920c9600_add_celery_schedule_entries.py │ ├── 1dfc9b3a690e_create_settings_json.py │ ├── 1ec343a3a7bd_add_praise_table.py │ ├── 23e66e106c1e_correct_unique_column_table_pkey.py │ ├── 3c8dab82577e_add_stripe_subscription.py │ ├── 47062cc1c8e0_add_unique_columns_table.py │ ├── 5247a5a65c3c_use_a_repo_url_column.py │ ├── 57b35f243fb0_adjust_comment_tables.py │ ├── 5d8f357eca61_add_comment_references_tables.py │ ├── 63cd716e7107_add_copies.py │ ├── 6c7715349588_rename_deleted_to_comments_and_threads.py │ ├── 6d59d431ee77_rename_created_updated_fields.py │ ├── 757b465597b4_add_email_verification_columns.py │ ├── 75a882d6c74e_add_backends.py │ ├── 7dd1bbf902b5_add_mailing_list_column.py │ ├── 878b845f7368_add_stripe_payment_reference_and_.py │ ├── 8951426b65be_add_data_licences.py │ ├── 98e5779863fd_add_github_follows.py │ ├── 9ad42a1ac714_add_thread_and_comments_tables.py │ ├── a0f88c5755b3_initial_migration.py │ ├── bc116d837946_add_table_licences.py │ ├── cb67ce467141_check_constraint_blank_emails.py │ ├── cb79e639ef74_created_licences_table.py │ ├── cf3ddc8fb918_add_last_changed_to_table.py │ ├── eb87fcc5d860_add_user_bio.py │ └── ef0fa56f3fc7_add_prohibited_usernames_table.py ├── mypy.ini ├── pyproject.toml ├── pytest.ini ├── requirements-test.txt ├── requirements.txt ├── tests ├── __init__.py ├── conftest.py ├── email_utils.py ├── test-data │ ├── .gitignore │ ├── WID.csv │ ├── abc123.csv │ ├── blank-headers.csv │ ├── empty.csv │ └── headers-only.csv ├── test_billing.py ├── test_blog.py ├── test_comments_crud.py ├── test_config.py ├── test_conv.py ├── test_convert.py ├── test_cors.py ├── test_create_table_from_git.py ├── test_create_table_via_form_post.py ├── test_email.py ├── test_error_pages.py ├── test_faq.py ├── test_http_cache_headers.py ├── test_json.py ├── test_markdown.py ├── test_pandas_compat.py ├── test_pguserdata.py ├── test_praise.py ├── test_quota.py ├── test_readme_crud.py ├── test_repcache.py ├── test_row_crud.py ├── test_row_pagination.py ├── test_seo.py ├── test_sql_scheduler.py ├── test_streams.py ├── test_table_crud.py ├── test_table_html.py ├── test_table_io.py ├── test_table_pagination.py ├── test_temp.py ├── test_turnstile.py ├── test_update_external_tables.py ├── test_upstream.py ├── test_user_page.py ├── test_user_settings.py ├── test_value_objs.py ├── test_web.py ├── test_web_session.py ├── utils.py └── value_objs.py └── tox.ini /.dockerignore: -------------------------------------------------------------------------------- 1 | bootstrap-5.3.1-dist.zip 2 | .venv 3 | build 4 | csvbase.egg-info 5 | dist 6 | .tox 7 | Dockerfile -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: File a bug report to help us improve 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # Description 11 | 12 | # Steps to reproduce 13 | 14 | 1. Do X 15 | 2. Then Y 16 | 3. And Z 17 | 18 | # Expected result 19 | 20 | An A 21 | 22 | # Actual result 23 | 24 | But in fact B 25 | 26 | # Additional details 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new_feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: User story 3 | about: A plan for implementing some new feature or change 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # Brief overview 11 | 12 | AS A **[persona name]** 13 | 14 | I WANT **[some feature or change to be made]** 15 | 16 | SO THAT **[brief description of goal]** 17 | 18 | # Additional details 19 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-20.04 10 | 11 | env: 12 | GITHUB_ACTIONS: 'yes' 13 | PGPASSWORD: 'csvbase' 14 | 15 | services: 16 | postgres: 17 | image: postgres:13 18 | env: 19 | POSTGRES_USER: csvbase 20 | POSTGRES_PASSWORD: csvbase 21 | POSTGRES_DB: csvbase 22 | ports: 23 | - 5432:5432 24 | options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 25 | 26 | steps: 27 | - uses: actions/checkout@v4 28 | - name: Write config file 29 | run: | 30 | cat < ~/.csvbase.toml 31 | secret_key = "snakeoil" 32 | db_url = "postgresql+psycopg2://csvbase:csvbase@localhost/csvbase" 33 | stripe_api_key = "sample" 34 | stripe_price_id = "price_123" 35 | turnstile_site_key = "1x00000000000000000000AA" 36 | turnstile_secret_key = "1x0000000000000000000000000000000A" 37 | smtp_host = "localhost:2525" 38 | EOF 39 | - name: Set up Python 3.9 40 | uses: actions/setup-python@v5 41 | with: 42 | python-version: 3.9 43 | - name: Install required ubuntu packages 44 | run: sudo apt-get update && sudo apt-get install -y libsystemd-dev postgresql-client 45 | - name: Create schemas 46 | run: psql -h localhost -d csvbase -U csvbase -f init-schemas.sql 47 | - name: Use cache of pip downloads 48 | uses: actions/cache@v4 49 | with: 50 | path: ~/.cache/pip 51 | key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }} 52 | restore-keys: | 53 | ${{ runner.os }}-pip- 54 | - name: Install tox 55 | run: pip install tox 56 | - name: make static-deps 57 | run: make static-deps 58 | - name: Use cache of tox 59 | uses: actions/cache@v4 60 | with: 61 | path: .tox 62 | key: ${{ runner.os }}-tox 63 | restore-keys: | 64 | ${{ runner.os }}-pip- 65 | - name: Make 66 | run: make 67 | -------------------------------------------------------------------------------- /.github/workflows/create-release.yaml: -------------------------------------------------------------------------------- 1 | name: Create Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | 8 | jobs: 9 | release: 10 | name: Create Release 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v3 16 | - name: Create Release 17 | id: create_release 18 | uses: softprops/action-gh-release@v1 19 | -------------------------------------------------------------------------------- /.github/workflows/push.yaml: -------------------------------------------------------------------------------- 1 | # Disabled as unsupported and was failing. If you want to get it working 2 | # again, PR's most welcome. 3 | # on: 4 | # workflow_dispatch: 5 | # push: 6 | # branches: 7 | # - main 8 | # paths: 9 | # - 'Dockerfile' 10 | # - '.github/workflows/push.yml' 11 | 12 | name: Push docker image to ghcr.io 13 | 14 | jobs: 15 | push: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - 19 | name: Checkout 20 | uses: actions/checkout@v3 21 | - 22 | name: Docker meta 23 | id: meta 24 | uses: docker/metadata-action@v4 25 | with: 26 | images: | 27 | ghcr.io/${{ github.repository }} 28 | tags: | 29 | type=raw,value=1.0.${{ github.run_number }},priority=1000 30 | type=ref,event=branch 31 | type=sha 32 | type=raw,value=latest 33 | - 34 | name: Set up QEMU 35 | uses: docker/setup-qemu-action@v2 36 | - 37 | name: Set up Docker Buildx 38 | uses: docker/setup-buildx-action@v2 39 | - 40 | name: Login to Container Registry 41 | uses: docker/login-action@v2 42 | with: 43 | registry: ghcr.io 44 | username: ${{ github.repository_owner }} 45 | password: ${{ secrets.GITHUB_TOKEN }} 46 | - 47 | name: Docker build and push 48 | uses: docker/build-push-action@v3 49 | with: 50 | context: . 51 | file: Dockerfile 52 | tags: ${{ steps.meta.outputs.tags }} 53 | labels: ${{ steps.meta.outputs.labels }} 54 | push: true 55 | cache-from: type=gha 56 | cache-to: type=gha,mode=max 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .tox 2 | .venv 3 | *.pyc 4 | bootstrap-5.3.1-dist.zip 5 | *.egg-info 6 | dist 7 | build -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim-bullseye 2 | WORKDIR /app 3 | RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ 4 | --mount=type=cache,target=/var/lib/apt,sharing=locked \ 5 | apt-get update && \ 6 | apt-get -y install libpq5 libpq-dev python3-dev libsystemd-dev build-essential pkg-config curl unzip 7 | 8 | COPY requirements.txt ./ 9 | RUN --mount=type=cache,target=/root/.cache/pip python -m pip install -r requirements.txt 10 | 11 | COPY . . 12 | 13 | RUN pygmentize -S default -f html -a .highlight > csvbase/web/static/codehilite.css 14 | RUN pygmentize -S lightbulb -f html -a .highlight > csvbase/web/static/codehilite-dark.css 15 | RUN make static-deps 16 | 17 | RUN --mount=type=cache,target=/root/.cache/pip python -m pip install -e . 18 | 19 | ENV FLASK_APP=csvbase.web.app:init_app() 20 | ENV FLASK_DEBUG=0 21 | EXPOSE 6001 22 | CMD ["gunicorn", "csvbase.web.app:init_app()", "-b", ":6001"] 23 | 24 | -------------------------------------------------------------------------------- /HACKING.md: -------------------------------------------------------------------------------- 1 | # Hacking on csvbase 2 | 3 | ## Contributions 4 | 5 | Gratefully accepted! 6 | 7 | But please be patient in PRs - csvbase is a natural product and response times 8 | may vary. 9 | 10 | You can always [write to me for help](mailto:cal@calpaterson.com). 11 | 12 | ## Getting started, with Docker 13 | 14 | Running `docker compose up` should bring up a working instance of the system. 15 | 16 | ## Getting started, with virtual environments 17 | 18 | You can use virtual environments such as python's 19 | [virtualenv](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#installing-virtualenv) 20 | or 21 | [anaconda](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html): 22 | 23 | with virtualenv: 24 | 25 | ```sh 26 | virtualenv venv 27 | source venv/bin/activate 28 | pip install -e .[tests] 29 | ``` 30 | 31 | and anaconda: 32 | 33 | ```sh 34 | conda create -n csvbase python=3.8 35 | conda activate csvbase 36 | pip install -e .[tests] 37 | ``` 38 | 39 | Depending on your base system, you may also need these prerequisites for `pip 40 | install` to work, which are operating system packages, not python: 41 | 42 | - [systemd utility library - development 43 | files](https://packages.debian.org/sid/libsystemd-dev) 44 | - [header files for libpq5 (PostgreSQL 45 | library)](https://packages.debian.org/sid/libpq-dev) 46 | 47 | On most debian/ubuntu systems, this command suffices: 48 | 49 | ```sh 50 | sudo apt install libsystemd-dev libpq-dev 51 | ``` 52 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft csvbase/web/templates 2 | graft csvbase/web/email 3 | graft csvbase/web/billing/templates 4 | graft csvbase/web/static 5 | graft csvbase/web/faq/entries 6 | include requirements.txt 7 | include requirements-test.txt 8 | include csvbase/VERSION 9 | include csvbase/data/prohibited-usernames 10 | include csvbase/data/spdx-licences.csv 11 | global-exclude *.pyc -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export FLASK_APP = csvbase.web.app:init_app() 2 | export FLASK_ENV = development 3 | 4 | version :=$(file < csvbase/VERSION) 5 | 6 | .PHONY: tox serve serve-gunicorn release default static-deps 7 | 8 | default: tox 9 | 10 | static-deps: csvbase/web/static/codehilite.css csvbase/web/static/codehilite-dark.css csvbase/web/static/bootstrap.min.css csvbase/web/static/bootstrap.bundle.js tests/test-data/sitemap.xsd 11 | 12 | .venv: .venv/touchfile 13 | 14 | .venv/touchfile: pyproject.toml requirements.txt 15 | test -d .venv || python3 -m venv .venv 16 | . .venv/bin/activate; python -m pip install . 17 | touch $@ 18 | 19 | csvbase/web/static/codehilite.css: .venv/touchfile 20 | . .venv/bin/activate; pygmentize -S default -f html -a .highlight > $@ 21 | 22 | csvbase/web/static/codehilite-dark.css: .venv/touchfile 23 | . .venv/bin/activate; pygmentize -S lightbulb -f html -a .highlight > $@ 24 | 25 | serve: .venv static-deps 26 | . .venv/bin/activate; flask run -p 6001 27 | 28 | serve-gunicorn: .venv static-deps 29 | . .venv/bin/activate; gunicorn -w 1 '$FLASK_APP' --access-logfile=- -t 30 -b :6001 30 | 31 | tox: static-deps 32 | tox -e py39 33 | 34 | bootstrap-5.3.1-dist.zip: 35 | curl -O -L https://github.com/twbs/bootstrap/releases/download/v5.3.1/bootstrap-5.3.1-dist.zip 36 | 37 | csvbase/web/static/bootstrap.min.css: bootstrap-5.3.1-dist.zip 38 | unzip -p bootstrap-5.3.1-dist.zip bootstrap-5.3.1-dist/css/bootstrap.min.css > $@ 39 | 40 | csvbase/web/static/bootstrap.bundle.js: bootstrap-5.3.1-dist.zip 41 | unzip -p bootstrap-5.3.1-dist.zip bootstrap-5.3.1-dist/js/bootstrap.bundle.js > $@ 42 | 43 | tests/test-data/sitemap.xsd: 44 | curl -s -L https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd > $@ 45 | 46 | dump-schema: 47 | pg_dump -d csvbase --schema-only --schema=metadata 48 | 49 | release: dist/csvbase-$VERSION-py3-none-any.whl 50 | 51 | dist/csvbase-$VERSION-py3-none-any.whl: static-deps 52 | . .venv/bin/activate; python -m pip install build==1.2.1 53 | . .venv/bin/activate; python -m build 54 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security policy 2 | 3 | ## Reporting a vulnerability 4 | 5 | Please DO NOT open an issue on Github for a security issue. 6 | 7 | Please avoid publicly disclosing a security issue for at least 90 days after 8 | discovering it. 9 | 10 | Instead, email cal@calpaterson.com to co-ordinate a fix and public disclosure 11 | of the issue. 12 | 13 | I'm happy to give you full credit for any issues you discover. 14 | -------------------------------------------------------------------------------- /csvbase/VERSION: -------------------------------------------------------------------------------- 1 | 2024.9.5 -------------------------------------------------------------------------------- /csvbase/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/__init__.py -------------------------------------------------------------------------------- /csvbase/bgwork/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import * # noqa: F403 2 | -------------------------------------------------------------------------------- /csvbase/bgwork/core.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | from celery import Celery, Task 4 | from flask import Flask 5 | 6 | from csvbase.config import Config 7 | from csvbase.db import get_db_url 8 | 9 | logger = getLogger(__name__) 10 | 11 | celery = Celery("csvbase.bgwork") 12 | 13 | 14 | def initialise_celery(flask_app: Flask, config: Config) -> None: 15 | celery.conf["broker_url"] = config.celery_broker_url 16 | celery.conf["worker_hijack_root_logger"] = False 17 | 18 | # This retrying on startup is liable to cause confusion. If the broker is 19 | # initially down, best to just crash. 20 | celery.conf["broker_connection_retry_on_startup"] = False 21 | 22 | celery.conf["beat_scheduler"] = "csvbase.bgwork.sql_scheduler:SQLAlchemyScheduler" 23 | celery.conf["beat_sqlalchemy_scheduler_db_url"] = get_db_url() 24 | 25 | # Make sure the flask app context is pushed for all tasks. 26 | class FlaskContextTask(Task): 27 | def __call__(self, *args, **kwargs): 28 | with flask_app.app_context(): 29 | return self.run(*args, **kwargs) 30 | 31 | celery.task_cls = FlaskContextTask # type: ignore 32 | celery.set_default() 33 | logger.info("initialised celery") 34 | -------------------------------------------------------------------------------- /csvbase/bgwork/main.py: -------------------------------------------------------------------------------- 1 | """This module is intended only to be run by the celery binary.""" 2 | 3 | from csvbase.config import get_config 4 | from csvbase.web.app import init_app 5 | from .core import initialise_celery, celery # noqa: F401 6 | from . import task_registry # noqa: F401 7 | 8 | flask_app = init_app() 9 | initialise_celery(flask_app, get_config()) 10 | -------------------------------------------------------------------------------- /csvbase/config.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | from pathlib import Path 3 | from typing import Optional 4 | from dataclasses import dataclass 5 | 6 | import toml 7 | 8 | logger = getLogger(__name__) 9 | 10 | 11 | @dataclass 12 | class Config: 13 | """A typecheckable config object. 14 | 15 | In order to keep the benefits of typechecking, don't pass this into places 16 | where the typechecker can't find it - ie: jinja templates. 17 | 18 | """ 19 | 20 | db_url: str 21 | environment: str 22 | blog_ref: Optional[str] 23 | secret_key: Optional[str] 24 | sentry_dsn: Optional[str] 25 | stripe_api_key: Optional[str] 26 | stripe_price_id: Optional[str] 27 | enable_datadog: bool 28 | x_accel_redirect: bool 29 | smtp_host: Optional[str] 30 | memcache_server: Optional[str] 31 | 32 | # configuration for Cloudflare turnstile (a captcha tool) 33 | turnstile_site_key: Optional[str] 34 | turnstile_secret_key: Optional[str] 35 | 36 | celery_broker_url: Optional[str] = "redis://localhost/3" 37 | 38 | 39 | __config__: Optional[Config] = None 40 | 41 | 42 | def load_config(config_file: Path) -> Config: 43 | """Loads the configuration at the given path. 44 | 45 | Currently this doesn't really validate the config - but that is planned. 46 | 47 | """ 48 | logger.info("loading config from %s", config_file) 49 | if config_file.exists(): 50 | with open(config_file, encoding="utf-8") as config_f: 51 | as_dict = toml.load(config_f) 52 | else: 53 | logger.warning("config file ('%s') not found, using defaults", config_file) 54 | as_dict = {} 55 | return Config( 56 | db_url=as_dict.get("db_url", "postgresql:///csvbase"), 57 | environment=as_dict.get("environment", "local"), 58 | blog_ref=as_dict.get("blog_ref"), 59 | secret_key=as_dict.get("secret_key"), 60 | sentry_dsn=as_dict.get("sentry_dsn"), 61 | stripe_price_id=as_dict.get("stripe_price_id"), 62 | stripe_api_key=as_dict.get("stripe_api_key"), 63 | enable_datadog=as_dict.get("enable_datadog", False), 64 | x_accel_redirect=as_dict.get("x_accel_redirect", False), 65 | turnstile_site_key=as_dict.get("turnstile_site_key"), 66 | turnstile_secret_key=as_dict.get("turnstile_secret_key"), 67 | smtp_host=as_dict.get("smtp_host"), 68 | memcache_server=as_dict.get("memcache_server"), 69 | ) 70 | 71 | 72 | def default_config_file() -> Path: 73 | """Returns the location of the default config file""" 74 | return Path.home() / ".csvbase.toml" 75 | 76 | 77 | def get_config() -> Config: 78 | """Returns the config. 79 | 80 | The the config does not change while the program is running, but in order 81 | to make it easy to test, don't call this function from the top-level (that 82 | makes it hard to mock). 83 | 84 | """ 85 | global __config__ 86 | if __config__ is None: 87 | __config__ = load_config(default_config_file()) 88 | 89 | return __config__ 90 | -------------------------------------------------------------------------------- /csvbase/constants.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from uuid import UUID 3 | 4 | # 128KB, a reasonable size for buffers 5 | COPY_BUFFER_SIZE = 128 * 1024 6 | 7 | MIN_UUID = UUID("0" * 32) 8 | 9 | MAX_UUID = UUID("f" * 32) 10 | 11 | FAR_FUTURE = datetime(9999, 1, 1) 12 | -------------------------------------------------------------------------------- /csvbase/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/data/__init__.py -------------------------------------------------------------------------------- /csvbase/data/prohibited-usernames: -------------------------------------------------------------------------------- 1 | # -*- mode: conf -*- 2 | # these should all be singular, pluralised versions are added programmatially 3 | # when loaded 4 | about 5 | api 6 | archive 7 | articles 8 | billing 9 | blog 10 | blog-static 11 | column 12 | column-type 13 | comment 14 | content 15 | content-type 16 | conversion 17 | convert 18 | cross-table 19 | csv 20 | csvbase 21 | data 22 | data-licence 23 | dataset 24 | docs 25 | download 26 | export 27 | faq 28 | feed 29 | fk 30 | foreign 31 | foreign-key 32 | form 33 | form-builder 34 | function 35 | graphql 36 | html 37 | join 38 | json 39 | keyset 40 | licences 41 | live 42 | new 43 | new-table 44 | newest 45 | ok 46 | page 47 | paste 48 | praise 49 | pricing 50 | readme 51 | ready 52 | register 53 | rest 54 | row 55 | row-id 56 | schema 57 | sign-in 58 | sign-out 59 | status 60 | sub 61 | subscription 62 | system 63 | table 64 | tableset 65 | test 66 | thread 67 | trigger 68 | type 69 | user 70 | view 71 | webhook -------------------------------------------------------------------------------- /csvbase/datadog.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | from .config import get_config 3 | 4 | logger = getLogger(__name__) 5 | 6 | 7 | def configure_datadog(): 8 | config = get_config() 9 | if config.enable_datadog: 10 | logger.info("enabling datadog") 11 | import ddtrace.auto # noqa: F401 12 | else: 13 | logger.info("not enabling datadog") 14 | -------------------------------------------------------------------------------- /csvbase/db.py: -------------------------------------------------------------------------------- 1 | from flask_sqlalchemy import SQLAlchemy 2 | 3 | from csvbase.config import get_config 4 | 5 | db = SQLAlchemy(engine_options={"future": True}, session_options={"future": True}) 6 | 7 | 8 | def get_db_url() -> str: 9 | return get_config().db_url 10 | -------------------------------------------------------------------------------- /csvbase/email.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | from typing import Optional, Generator 3 | from smtplib import SMTP 4 | from email.message import EmailMessage 5 | import urllib.parse 6 | import contextlib 7 | import email.policy 8 | 9 | from csvbase.config import get_config 10 | 11 | logger = getLogger(__name__) 12 | 13 | # A short timeout because this is designed to send to a local MTA 14 | SMTP_TIMEOUT = 1 15 | 16 | 17 | def get_smtp_host_port() -> tuple[str, int]: 18 | parsed = urllib.parse.urlparse(f"//{get_config().smtp_host}") 19 | return parsed.hostname, parsed.port # type: ignore 20 | 21 | 22 | @contextlib.contextmanager 23 | def make_smtp_sesh() -> Generator[SMTP, None, None]: 24 | host, port = get_smtp_host_port() 25 | try: 26 | smtp_sesh = SMTP(host, port, timeout=SMTP_TIMEOUT) 27 | logger.debug("SMTP connection created to %s:%d", host, port) 28 | yield smtp_sesh 29 | finally: 30 | smtp_sesh.quit() 31 | logger.debug("Closed SMTP connection with %s:%d", host, port) 32 | 33 | 34 | def email_is_enabled() -> bool: 35 | return get_config().smtp_host is not None 36 | 37 | 38 | def validate(message: EmailMessage) -> None: 39 | """Check that this email meets the requirements for outgoing emails.""" 40 | if "message-id" not in message: 41 | raise RuntimeError("Must set a message id") 42 | message.policy = email.policy.default 43 | 44 | 45 | def send(message: EmailMessage, smtp_sesh: Optional[SMTP] = None) -> None: 46 | """Send an email.""" 47 | validate(message) 48 | if email_is_enabled(): 49 | if smtp_sesh is None: 50 | with make_smtp_sesh() as smtp_sesh: 51 | smtp_sesh.send_message(message) 52 | else: 53 | smtp_sesh.send_message(message) 54 | logger.info("Sent email: %s", message["message-id"]) 55 | 56 | 57 | class Outbox: 58 | """A "transactional outbox" that allows queuing up email to send and then 59 | sending them all at once at the end. 60 | 61 | """ 62 | 63 | def __init__(self, smtp_sesh: SMTP) -> None: 64 | self.smtp_sesh = smtp_sesh 65 | self.stack: list[EmailMessage] = [] 66 | 67 | def enqueue(self, message: EmailMessage) -> None: 68 | validate(message) 69 | self.stack.append(message) 70 | 71 | def flush(self) -> None: 72 | while len(self.stack) > 0: 73 | message = self.stack.pop(0) 74 | send(message, self.smtp_sesh) 75 | -------------------------------------------------------------------------------- /csvbase/follow/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /csvbase/follow/update.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | from logging import getLogger 3 | 4 | from sqlalchemy.orm import Session 5 | 6 | from ..value_objs import ( 7 | ROW_ID_COLUMN, 8 | Column, 9 | Table, 10 | UpstreamFile, 11 | ) 12 | from .. import svc, streams, table_io 13 | from ..userdata import PGUserdataAdapter 14 | 15 | logger = getLogger(__name__) 16 | 17 | 18 | def update_external_table( 19 | sesh: Session, 20 | backend: PGUserdataAdapter, 21 | table: Table, 22 | upstream_file: UpstreamFile, 23 | ) -> None: 24 | logger.info("updating %s/%s", table.username, table.table_name) 25 | str_buf = streams.byte_buf_to_str_buf(upstream_file.filelike) 26 | dialect, csv_columns = streams.peek_csv(str_buf, table.columns) 27 | rows = table_io.csv_to_rows(str_buf, csv_columns, dialect) 28 | key_column_names = svc.get_key(sesh, table.table_uuid) 29 | key: Sequence[Column] 30 | if len(key_column_names) > 0: 31 | key = [c for c in table.user_columns() if c.name in key_column_names] 32 | else: 33 | key = (ROW_ID_COLUMN,) 34 | backend.upsert_table_data(table, csv_columns, rows, key=key) 35 | svc.set_version(sesh, table.table_uuid, upstream_file.version) 36 | -------------------------------------------------------------------------------- /csvbase/http.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | BASIC_TIMEOUT = (6.1, 10) 4 | 5 | http_sesh = requests.Session() 6 | -------------------------------------------------------------------------------- /csvbase/json.py: -------------------------------------------------------------------------------- 1 | from typing import Union, Sequence, Dict, Any, Optional 2 | from datetime import date 3 | import functools 4 | 5 | from . import exc 6 | from .value_objs import PythonType, ColumnType, Column 7 | 8 | JsonType = Union[str, int, float, bool, None] 9 | 10 | 11 | def value_to_json(value: Optional["PythonType"]) -> JsonType: 12 | if isinstance(value, date): 13 | return value.isoformat() 14 | else: 15 | return value 16 | 17 | 18 | @functools.lru_cache(maxsize=128) 19 | def json_to_value( 20 | column_type: ColumnType, json_value: JsonType 21 | ) -> Optional["PythonType"]: 22 | """Convert a 'json value' (ie: something returned from Python's json 23 | parser) into a value ready to be put into a Row.""" 24 | if isinstance(json_value, str): 25 | if column_type is ColumnType.DATE: 26 | try: 27 | return date.fromisoformat(json_value) 28 | except ValueError as e: 29 | raise exc.UnconvertableValueException(column_type, json_value) from e 30 | elif column_type is ColumnType.TEXT: 31 | return json_value 32 | else: 33 | raise exc.UnconvertableValueException(column_type, json_value) 34 | elif isinstance(json_value, bool) and column_type is ColumnType.BOOLEAN: 35 | # NOTE: this must go ahead of the below case because in Python a bool 36 | # is an instance of int 37 | return json_value 38 | elif isinstance(json_value, (float, int)): 39 | if column_type is ColumnType.FLOAT: 40 | return float(json_value) 41 | elif column_type is ColumnType.INTEGER: 42 | return int(json_value) 43 | else: 44 | raise exc.UnconvertableValueException(column_type, str(json_value)) 45 | elif json_value is None: 46 | return None 47 | else: 48 | # eg if a dict was here 49 | raise exc.UnconvertableValueException(column_type, str(json_value)) 50 | 51 | 52 | def json_to_row( 53 | columns: Sequence[Column], json_dict: Dict[str, Any] 54 | ) -> Dict[Column, Optional[PythonType]]: 55 | row = {} 56 | in_table = set(c.name for c in columns) 57 | present = set(json_dict.keys()) 58 | extra = present.difference(in_table) 59 | if len(extra) > 0: 60 | raise exc.TableDefinitionMismatchException() 61 | for column in columns: 62 | if column.name in json_dict: 63 | row[column] = json_to_value(column.type_, json_dict[column.name]) 64 | else: 65 | row[column] = None 66 | return row 67 | -------------------------------------------------------------------------------- /csvbase/logging.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any 2 | from os import environ 3 | from sys import stderr 4 | import logging 5 | 6 | # logging module doesn't provide an easy way to get this 7 | LOG_LEVELS = [ 8 | "CRITICAL", 9 | "ERROR", 10 | "WARNING", 11 | "INFO", 12 | "DEBUG", 13 | ] 14 | 15 | CONFIGURED = False 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | def configure_logging(level: str = "INFO"): 21 | """Configure our logging - stderr by default but logging nicely to the 22 | journal under systemd.""" 23 | global CONFIGURED 24 | if not CONFIGURED: 25 | under_systemd = "INVOCATION_ID" in environ 26 | kwargs: Dict[str, Any] = dict(level=level) 27 | if under_systemd: 28 | from systemd.journal import JournalHandler 29 | 30 | kwargs["format"] = "%(message)s" 31 | kwargs["handlers"] = [JournalHandler()] 32 | else: 33 | kwargs["format"] = "%(asctime)s %(levelname)-8s %(name)-35s - %(message)s" 34 | kwargs["stream"] = stderr 35 | logging.basicConfig(**kwargs) 36 | CONFIGURED = True 37 | logger.info("logging was configured (level = %s)", level) 38 | -------------------------------------------------------------------------------- /csvbase/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/py.typed -------------------------------------------------------------------------------- /csvbase/sentry.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | 3 | import sentry_sdk 4 | from sentry_sdk.integrations.flask import FlaskIntegration 5 | 6 | from .config import get_config 7 | from .value_objs import User 8 | from .version import get_version 9 | 10 | logger = getLogger(__name__) 11 | 12 | 13 | def configure_sentry(): 14 | config = get_config() 15 | version = get_version() 16 | if config.sentry_dsn is not None: 17 | sentry_sdk.init( 18 | dsn=config.sentry_dsn, 19 | environment=config.environment, 20 | release=version, 21 | in_app_include=["csvbase"], 22 | integrations=[FlaskIntegration()], 23 | ) 24 | logger.info( 25 | "sentry initialised (environment: '%s', release: '%s')", 26 | config.environment, 27 | version, 28 | ) 29 | else: 30 | logger.info("sentry not initialised - dsn not set") 31 | 32 | 33 | def set_user(user: User) -> None: 34 | """Set the user in sentry. 35 | 36 | This allows knowing how many people were affected by a bug.""" 37 | config = get_config() 38 | if config.sentry_dsn is not None: 39 | # wanted to avoid setting username/email but so hard to tell who is 40 | # experiencing what bug, so set it for now 41 | user_dict = { 42 | "id": str(user.user_uuid), 43 | "username": user.username, 44 | } 45 | if user.email is not None: 46 | user_dict["email"] = user.email 47 | sentry_sdk.set_user(user_dict) 48 | -------------------------------------------------------------------------------- /csvbase/sesh.py: -------------------------------------------------------------------------------- 1 | from typing import cast 2 | 3 | from sqlalchemy.orm import Session 4 | 5 | from .db import db 6 | 7 | 8 | def get_sesh() -> Session: 9 | # This isn't actually a session, but it has almost the same interface. 10 | # There may be a better way to type hint this 11 | return cast(Session, db.session) 12 | -------------------------------------------------------------------------------- /csvbase/temp.py: -------------------------------------------------------------------------------- 1 | """A way to store uploads for short periods of time between pages, with auto-cleanup. 2 | 3 | This currently writes to the filesystem and so prevents scaling across multiple 4 | machines. The API encapsulates that, and it should be an easy job later to 5 | adjust to that it writes to an object store. 6 | 7 | """ 8 | 9 | from typing import IO, Generator 10 | from logging import getLogger 11 | from pathlib import Path 12 | import secrets 13 | from datetime import datetime, timedelta, timezone 14 | from contextlib import contextmanager 15 | import shutil 16 | import gzip 17 | 18 | from . import exc 19 | from .streams import rewind, cache_dir 20 | 21 | logger = getLogger(__name__) 22 | 23 | DEFAULT_RETENTION = timedelta(hours=1) 24 | 25 | 26 | def get_temp_dir() -> Path: 27 | # using /tmp would mean we lose the cache between restarts 28 | temp_dir = cache_dir() / "tmp" 29 | if not temp_dir.exists(): 30 | temp_dir.mkdir() 31 | return temp_dir 32 | 33 | 34 | def store_temp_file( 35 | filelike: IO[bytes], duration: timedelta = DEFAULT_RETENTION 36 | ) -> str: 37 | cleanup_temp_files() 38 | file_id = secrets.token_urlsafe() 39 | expiry = datetime.now(timezone.utc) + duration 40 | # probably no reason to care about ntfs here, but colons make it go bonkers 41 | # so omit them 42 | expiry_str = expiry.isoformat().replace(":", "_") 43 | filename = f"expires{expiry_str}__{file_id}.gz" 44 | with rewind(filelike): 45 | with gzip.GzipFile(get_temp_dir() / filename, mode="wb") as temp_file: 46 | shutil.copyfileobj(filelike, temp_file) 47 | return file_id 48 | 49 | 50 | @contextmanager 51 | def retrieve_temp_file(file_id: str) -> Generator[gzip.GzipFile, None, None]: 52 | cleanup_temp_files() 53 | temp_dir = get_temp_dir() 54 | globbed = list(temp_dir.glob(f"*__{file_id}.gz")) 55 | if len(globbed) != 1: 56 | raise exc.MissingTempFile() 57 | else: 58 | filename = globbed[0] 59 | with gzip.GzipFile(filename, mode="rb") as filelike: 60 | yield filelike 61 | 62 | 63 | def cleanup_temp_files() -> None: 64 | temp_dir = get_temp_dir() 65 | delete_count = 0 66 | left_count = 0 67 | now = datetime.now(timezone.utc) 68 | for e in temp_dir.glob("expires*__*.gz"): 69 | expiry_str = e.name.split("__")[0][len("expires") :].replace("_", ":") 70 | expiry = datetime.fromisoformat(expiry_str) 71 | if expiry < now: 72 | e.unlink() 73 | delete_count += 1 74 | else: 75 | left_count += 1 76 | -------------------------------------------------------------------------------- /csvbase/userdata/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List 2 | from .pguserdata import PGUserdataAdapter 3 | 4 | __all__: List[Any] = [PGUserdataAdapter] 5 | -------------------------------------------------------------------------------- /csvbase/userdata/core.py: -------------------------------------------------------------------------------- 1 | from typing import Protocol, Iterable, List, Sequence, Tuple, Optional 2 | from uuid import UUID 3 | 4 | from ..value_objs import Column, Table, PythonType, KeySet, Page 5 | 6 | 7 | # FIXME: this is a work in progress 8 | class UserdataAdapter(Protocol): 9 | def create_table(self, table_uuid: UUID, columns: Iterable[Column]): ... 10 | 11 | def insert_table_data( 12 | self, 13 | table: Table, 14 | columns: Sequence[Column], 15 | rows: Iterable[Sequence[PythonType]], 16 | ): ... 17 | 18 | def table_page(self, table: Table, keyset: KeySet) -> Page: ... 19 | 20 | def row_id_bounds( 21 | self, table_uuid: UUID 22 | ) -> Tuple[Optional[int], Optional[int]]: ... 23 | 24 | def get_columns(self, table_uuid: UUID) -> List[Column]: ... 25 | 26 | def table_as_rows(self, table_uuid: UUID) -> Iterable[Sequence[PythonType]]: ... 27 | -------------------------------------------------------------------------------- /csvbase/version.py: -------------------------------------------------------------------------------- 1 | from contextlib import closing 2 | 3 | import importlib_resources 4 | 5 | 6 | def get_version() -> str: 7 | with closing( 8 | importlib_resources.files("csvbase").joinpath("VERSION").open("r") 9 | ) as text_f: 10 | version = text_f.read().strip() 11 | return version 12 | -------------------------------------------------------------------------------- /csvbase/web/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/__init__.py -------------------------------------------------------------------------------- /csvbase/web/avatars.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | from urllib.parse import urljoin 3 | import hashlib 4 | 5 | import requests 6 | from flask import make_response, Blueprint 7 | from flask.wrappers import Response as FlaskResponse 8 | 9 | from csvbase.http import http_sesh, BASIC_TIMEOUT 10 | from csvbase.sesh import get_sesh 11 | from csvbase.value_objs import User 12 | from csvbase import svc 13 | 14 | logger = getLogger(__name__) 15 | 16 | bp = Blueprint("avatars", __name__) 17 | 18 | BASE_URL = "https://gravatar.com/avatar/" 19 | 20 | 21 | def get_gravatar(user: User) -> requests.Response: 22 | """Makes a request to the gravatar REST API.""" 23 | if user.email is None or not user.settings.use_gravatar: 24 | # They serve the default gravatar for the base url 25 | url = BASE_URL[:-1] 26 | else: 27 | hashed_email = hashlib.sha256(user.email.lower().encode("utf-8")).hexdigest() 28 | url = urljoin(BASE_URL, hashed_email) 29 | logger.info("getting gravatar for '%s' (%s)", user.username, hashed_email) 30 | resp = http_sesh.get(url, params={"d": "mp"}, timeout=BASIC_TIMEOUT) 31 | resp.raise_for_status() 32 | return resp 33 | 34 | 35 | @bp.get("/avatars/") 36 | def image(username: str) -> FlaskResponse: 37 | sesh = get_sesh() 38 | user = svc.user_by_name(sesh, username) 39 | gravatar_response = get_gravatar(user) 40 | response = make_response(gravatar_response.content) 41 | response.headers.set( 42 | "Content-Type", 43 | gravatar_response.headers.get("Content-Type", "application/octet-stream"), 44 | ) 45 | 46 | # cache it for a few minutes, and get CDNs to coalesce reqs (no support for 47 | # this in werkzeug yet) 48 | response.headers.set("Cache-control", "max-age=300, stale-while-revalidate=300") 49 | return response 50 | -------------------------------------------------------------------------------- /csvbase/web/billing/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /csvbase/web/billing/templates/pricing.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% block explainer %}{% endblock %} 5 | 6 | {% block main %} 7 |
8 |
9 |
10 |
11 | logo 12 |

Become a supporter

13 |
14 |
15 |
16 |
17 |
18 |

19 | csvbase is open 20 | source software, available under the terms of the AGPLv3 or 21 | later. 22 |

23 |

24 | It is also free to use for all open data. 25 |

26 |

27 | If you find csvbase useful, or simply think it is cool: please consider 28 | becoming a paid supporter for €3 a month. 29 |

30 |

31 | There is also a 10% discount for paying annually, as this helps with our 32 | cashflow. 33 |

34 |

Supporting csvbase: 35 |

    36 |
  • Helps cover operating expenses
  • 37 |
  • Supports further development
  • 38 |
  • Allows the creation of private tables
  • 39 |
40 |

41 |
42 |
43 | 44 |
45 |
46 |
47 | {% if has_subscription %} 48 |

You are already subscribed. Manage your subscription. 49 | {% else %} 50 |

51 | 55 |
56 | {% endif %} 57 |
58 |

Have a question? Feel free to get in contact

59 |

60 |
61 |
62 |
63 |
64 | {% endblock %} 65 | -------------------------------------------------------------------------------- /csvbase/web/billing/value_objs.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | 4 | @enum.unique 5 | class StripeSubscriptionStatus(enum.Enum): 6 | """Our model of Stripe's own statuses for subscriptions. 7 | 8 | https://stripe.com/docs/api/subscriptions/object#subscription_object-status 9 | 10 | """ 11 | 12 | ACTIVE = 1 13 | PAST_DUE = 2 14 | UNPAID = 3 15 | CANCELED = 4 16 | INCOMPLETE = 5 17 | INCOMPLETE_EXPIRED = 6 18 | TRIALING = 7 19 | PAUSED = 8 20 | -------------------------------------------------------------------------------- /csvbase/web/blog/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/blog/__init__.py -------------------------------------------------------------------------------- /csvbase/web/blog/cli.py: -------------------------------------------------------------------------------- 1 | from . import svc 2 | 3 | import click 4 | 5 | from csvbase.web.app import init_app 6 | from csvbase.sesh import get_sesh 7 | 8 | 9 | @click.command() 10 | def make_blog_table(): 11 | with init_app().app_context(): 12 | sesh = get_sesh() 13 | svc.make_blog_table(sesh) 14 | sesh.commit() 15 | -------------------------------------------------------------------------------- /csvbase/web/blog/value_objs.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | from typing import Optional 3 | from uuid import UUID 4 | from dataclasses import dataclass 5 | 6 | 7 | @dataclass 8 | class Post: 9 | id: int 10 | title: str 11 | uuid: UUID 12 | description: str 13 | draft: bool 14 | markdown: str 15 | cover_image_url: str 16 | cover_image_alt: str 17 | posted: Optional[date] = None 18 | thread_slug: Optional[str] = None 19 | 20 | def render_posted(self) -> str: 21 | if self.posted is not None: 22 | return self.posted.isoformat() 23 | else: 24 | return "(not posted yet)" 25 | -------------------------------------------------------------------------------- /csvbase/web/faq/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/basic-auth.md: -------------------------------------------------------------------------------- 1 | 10 | 11 | ## When an API key is needed and when it one not 12 | 13 | Most usage of csvbase will never need an API key. 14 | 15 | If you are just using `GET` on public tables there is no need to supply an API 16 | key. You do not even have to register. 17 | 18 | If you are doing write operations - eg `PUT` (create/overwrite), `POST` 19 | (append) or `DELETE` (self-explanatory) then you do need to provide an API key. 20 | 21 | If the table is *private* then even `GET` requests require an API key. 22 | 23 | ## Where to find your API key 24 | 25 | Your API key is on your user page - `https://csvbase.com/` (but visible only to you). 26 | 27 | It's a 32 character string and looks like this: `0123456789abcdef0123456789abcdef`. 28 | 29 | ## Basic Auth 30 | 31 | csvbase authentication is done via HTTP Basic Auth. 32 | 33 | Your username is your username. 34 | 35 | Your **API Key** is your password. Please do not put your site password in the 36 | password field. 37 | 38 | Usually, tools that you use allow you to supply a table URL that includes the 39 | username and password. For example: 40 | 41 | `https://calpaterson:0123456789abcdef0123456789abcdef@csvbase.com/calpaterson/countries` 42 | 43 | Occasionally, some tools will have a separate field where you enter the 44 | username and password. 45 | 46 | ## The `~/.netrc` file 47 | 48 | On unix systems, you can put your username and API key into your `~/.netrc` 49 | file. Many tools will pick up credentials in this file and use them 50 | automatically. 51 | 52 | Curl will use credentials from this file if you pass the `-n` argument. 53 | 54 | Here is a sample file: 55 | 56 | ```ini 57 | machine csvbase.com 58 | login calpaterson 59 | password 0123456789abcdef0123456789abcdef 60 | ``` 61 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/big.md: -------------------------------------------------------------------------------- 1 | 8 | 9 | ## When is a table considered big on csvbase? 10 | 11 | csvbase considers a table as "big" if it has more than 1,048,576 rows. 12 | 13 | ## What is different about big tables? 14 | 15 | You need to be patient when working with big tables! 16 | 17 | Big tables are still supported but many operations on them happen 18 | asynchronously. For example, you might have to wait before downloading the 19 | table in a certain file format (while csvbase generates it for you). 20 | 21 | Occasionally, csvbase might return the HTTP status code 503 for things to do 22 | with big tables. That doesn't mean that anything is broken, only the csvbase 23 | is still working on it. 24 | 25 | The HTTP header `Retry-After` will be set to give API clients a hint of how 26 | long to wait before retrying the request. 27 | 28 | ## Why 1,048,576 rows? 29 | 30 | There is no agreed definition of "Big Data" but one useful definition is "too 31 | big for Excel". Microsoft Excel imposes a hard limit of 1,048,576 rows per 32 | sheet. 33 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/csvbase-client-cache.md: -------------------------------------------------------------------------------- 1 | 8 | 9 | The cache built into csvbase-client caches downloaded files, usually in 10 | `~/.cache/csvbase-client`. 11 | 12 | Before returning any data to you, the client first checks whether the cached 13 | data is up-to-date with the server, so no stale data is ever returned from the 14 | cache. 15 | 16 | The size of the cache is currently limited at 100mb. 17 | 18 | You can see the contents via: 19 | 20 | ```bash 21 | csvbase-client cache show 22 | ``` 23 | 24 | And you can wipe the cache with 25 | 26 | ``` 27 | csvbase-client cache wipe 28 | ``` 29 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/duckdb.md: -------------------------------------------------------------------------------- 1 | 9 | 10 | ## Reading csvbase tables into the `duckdb` shell 11 | 12 | When using the DuckDB shell (ie: you've run `duckdb`), you can read any table 13 | from csvbase with a line like the following: 14 | 15 | ```sql 16 | select * from read_parquet("https://csvbase.com/meripaterson/stock-exchanges.parquet"); 17 | ``` 18 | 19 | The above uses DuckDB's [httpfs](https://duckdb.org/docs/extensions/httpfs/overview.html) extension. It also uses csvbase's Parquet output format, which works well with DuckDB. 20 | 21 | You can also use plain csv: 22 | 23 | ```sql 24 | select * from read_csv_auto("https://csvbase.com/meripaterson/stock-exchanges.csv"); 25 | ``` 26 | 27 | But that is, generally, slower. 28 | 29 | ## Read and write access from the `duckdb` Python driver 30 | 31 | If you're using the Python driver for duckdb you can also use `csvbase-client` 32 | to write back to csvbase. 33 | 34 | First, install both duckdb ([the Python 35 | library](https://pypi.org/project/duckdb/)) and 36 | [csvbase-client](https://pypi.org/project/csvbase-client/). 37 | 38 | ```bash 39 | # install duckdb and the csvbase-client 40 | pip install duckdb csvbase-client 41 | ``` 42 | 43 | Then, in Python: 44 | 45 | ```python 46 | import duckdb, fsspec 47 | 48 | # teach DuckDB the csvbase:// url scheme 49 | duckdb.register_filesystem(fsspec.filesystem('csvbase')) 50 | 51 | # create a duckdb table called "stock_exchanges" 52 | duckdb.sql(""" 53 | CREATE TABLE stock_exchanges 54 | AS FROM read_csv_auto('csvbase://meripaterson/stock-exchanges') 55 | """) 56 | 57 | # write that local duckdb table back to my own csvbase account as a public table 58 | duckdb.sql(""" 59 | COPY stock_exchanges TO 60 | 'csvbase://calpaterson/duckdb-example?public=true' (HEADER, DELIMITER ',') 61 | """) 62 | ``` 63 | 64 | Note the following: 65 | 66 | 1. To avoid accidents, tables are created as private by default, so add 67 | `?public=true` when first posting to create a public table 68 | 2. Currently the csvbase-client [works only with csv, not 69 | parquet](https://github.com/calpaterson/csvbase-client/issues/1) 70 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/edit-faq.md: -------------------------------------------------------------------------------- 1 | 8 | 9 | The files behind this FAQ are kept in the csvbase repo on github, inside 10 | [csvbase/web/faq/entries](https://github.com/calpaterson/csvbase/tree/main/csvbase/web/faq/entries). 11 | 12 | The format is markdown, with a leading HTML comment containing TOML metadata. 13 | 14 | If you find an error or want to add something, please feel free to submit [an 15 | issue](https://github.com/calpaterson/csvbase/issues/new/choose) or a even 16 | [pull request](https://github.com/calpaterson/csvbase/compare). 17 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/email-addresses.md: -------------------------------------------------------------------------------- 1 | 8 | 9 | Providing an email is optional on csvbase. 10 | 11 | Your email address is used for: 12 | 1. (Manual) password reset 13 | 2. Displaying your Gravatar 14 | - Opt-in, for privacy reasons 15 | 16 | csvbase does not currently send any emails. 17 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/excel.md: -------------------------------------------------------------------------------- 1 | 9 | 10 | ## Loading data 11 | 12 | 1. On the Ribbon, go to the **Data** tab, then select **From web** 13 | 14 | ![](/static/faq/excel-01-from-web.png) 15 | 16 | 2. Enter in the table url, with `.xlsx` added on the end and click "Ok". 17 | 18 | ![](/static/faq/excel-02-from-web-dialog.png) 19 | 20 | 3. Check the preview and click "Load data" 21 | 22 | ![](/static/faq/excel-03-navigator.png) 23 | 24 | This will load the table as a separate sheet in your workbook. 25 | 26 | You can refresh the table from csvbase by clicking **Refresh** on the tab on 27 | the Ribbon. 28 | 29 | ![](/static/faq/excel-04-refresh.png) 30 | 31 | Or, alternatively, you can configure the frequency of the refresh with the 32 | connection properties: 33 | 34 | ![](/static/faq/excel-06-connection-properties.png) 35 | 36 | This allows you to create spreadsheets that always stay up to date. 37 | 38 | ## Sheet naming conventions 39 | 40 | Excel enforces some limitations on sheet names (`/` is not allowed) so csvbase 41 | names the sheets within the XLSX format by the following convention: 42 | 43 | `;` 44 | 45 | And they are truncated if they exceed 31 characters. 46 | 47 | ## Size limits 48 | 49 | Excel does not support sheets longer than 1,048,576 rows so such tables are not 50 | available in XLSX format. 51 | 52 | ## Writing data from Excel 53 | 54 | csvbase supports writing data back, but as far as we know Excel does not. If 55 | you know of a way, please [open a bug report on 56 | github](https://github.com/calpaterson/csvbase/issues). 57 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/formats.md: -------------------------------------------------------------------------------- 1 | 10 | 11 | csvbase supports the following formats: 12 | 13 | | Format name | File extension | HTTP content type | Paged | 14 | |-----------------------------------------------------------------------------------------|----------------|---------------------------------------------------------------------|-------| 15 | | HTML | `.html` | `text/html` | Yes | 16 | | [JSON](https://en.wikipedia.org/wiki/JSON) | `.json` | `application/json` | Yes | 17 | | CSV ([Comma separated variables](https://en.wikipedia.org/wiki/Comma-separated_values)) | `.csv`. | `text/csv` | No | 18 | | [Parquet](https://en.wikipedia.org/wiki/Apache_Parquet) | `.parquet` | `application/parquet` [non-standard] | No | 19 | | [JSON lines](https://jsonlines.org/) | `.jsonl` | `application/x-jsonlines` [non-standard] | No | 20 | | [Microsoft Excel](https://en.wikipedia.org/wiki/Office_Open_XML) | `.xlsx` | `application/vnd.openxmlformats-officedocument.spreadsheetml.sheet` | No | 21 | 22 | To download a table in a given format, you have three options: 23 | 1. Use the dropdown in the web UI 24 | 2. Append the file extension to the url 25 | - eg [csvbase.com/meripaterson/stock-exchanges.**xlsx**](https://csvbase.com/meripaterson/stock-exchanges.xlsx) 26 | 3. Set the HTTP Accept header (in your HTTP client) 27 | - eg `curl -H 'Accept: application/x-jsonlines' https://csvbase.com/meripaterson/stock-exchanges` 28 | 29 | For "paged" formats (like JSON) you will need to go through the dataset page by 30 | page to read all of it. For unpaged formats (like Parquet or Microsoft Excel) 31 | you will just recieve the entire table as a single file. 32 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/git.md: -------------------------------------------------------------------------------- 1 | 8 | 9 | csvbase can provide read (and write) access to csv files stored in a git repo. 10 | 11 | ![screenshot of a csvbase table tracking a github 12 | file](/static/faq/csvbase-tracking-table.png) 13 | 14 | To create a table with a git upstream, visit the [new table (from 15 | git)](/new-table/git) page and fill out the form. 16 | 17 | When you edit table with a git upstream (via the API, via the website, however) 18 | that change will be commited and pushed to your git repo. 19 | 20 | If your github repo is public and read-only access is all you need, that's it. 21 | 22 | However if your repo is private or you want to be able to edit your data on 23 | csvbase you will need to provide authentication in your repository url, for 24 | example as: 25 | 26 | `https://calpaterson:github_pat_ABCD1234@github.com/calpaterson/csvbase.git` 27 | 28 | To find out how to generate a suitable personal access token for github, [see 29 | that FAQ](/github-pat). 30 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/github-pat.md: -------------------------------------------------------------------------------- 1 | 8 | 9 | csvbase can provide read (and write) access to csv files stored in a git repo. 10 | 11 | If your repository is private or if you want to be able to edit your data on 12 | csvbase, you will need to provide a ["fine grained personal access 13 | token"](https://github.com/settings/tokens?type=beta). 14 | 15 | When [generating a new 16 | token](https://github.com/settings/personal-access-tokens/new) csvbase requires 17 | the following *Repository permissions*: 18 | 19 | 1. **Contents** - "Repository contents, commits, branches, downloads, releases, and 20 | merges." 21 | - **Read & write** allows csvbase to both read the repo, and write to it 22 | 2. Webhooks - "Manage the post-receive hooks for a repository." 23 | - **Read & write** allows csvbase to add webhooks ([in the near 24 | future](https://github.com/calpaterson/csvbase/issues/125)) to stay up to 25 | date instantly instead of having to periodically poll for changes 26 | 3. **Metadata** 27 | - **Read-only** is default and mandatory for all tokens. csvbase does not 28 | currently have any use for this permission. 29 | 30 | ![github PAT overview](/static/faq/github-pat-overview.png) 31 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/google-sheets.md: -------------------------------------------------------------------------------- 1 | 9 | 10 | ## Loading data 11 | 12 | Google Sheets provides the special function 13 | [`IMPORTDATA`](https://support.google.com/docs/answer/3093335) which allows 14 | loading csv files from urls. Use it like this: 15 | 16 | ``` 17 | =IMPORTDATA("https://csvbase.com/meripaterson/stock-exchanges.csv") 18 | ``` 19 | 20 | ![screenshot of IMPORTDATA](/static/faq/google-sheets-importdata.png) 21 | 22 | This function loads the data into the cells below (and to the right of) the 23 | cell in which it is entered: 24 | 25 | ![screenshot of a csvbase table loaded in google sheets](/static/faq/google-sheets-table.png) 26 | 27 | Depending on the permissions on your Google sheet, you may be prompted to click 28 | though a dialog box to allow access to external sources: 29 | 30 | ![screenshot of a permissions-check dialog box](/static/faq/google-sheets-permissions.png) 31 | 32 | ## Dates 33 | 34 | Unfortunately Google Sheets does not parse ISO dates correctly from csv files 35 | (a very similar issue to Excel). 36 | 37 | This is very easily fixed by setting the cell format as so: 38 | 39 | ![screenshot of setting the cell formatting to "Date"](/static/faq/google-sheets-date-fix.png) 40 | 41 | Resulting in dates appearing correctly in the sheet: 42 | 43 | ![screenshot of corrected dates](/static/faq/google-sheets-dates-fixed.png) 44 | 45 | ## Update frequency 46 | 47 | Data loaded with `IMPORTDATA` will [update every hour](https://support.google.com/docs/answer/58515?hl=en#zippy=%2Cchoose-how-often-formulas-calculate). 48 | 49 | ## Writing data from Google Sheets 50 | 51 | This is currently not possible. 52 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/gravatar.md: -------------------------------------------------------------------------------- 1 | 8 | 9 | csvbase avatars are taken from Gravatar, an avatar hosting system run by 10 | Wordpress. 11 | 12 | To set up a Gravatar, visit [gravatar.com](https://gravatar.com/). Then set 13 | the email you used on Gravatar as your csvbase email in your user settings and 14 | tick "Use my Gravatar" on the same page. 15 | 16 | ## Disabled by default 17 | 18 | Gravatars are supported, but disabled by default as a privacy feature. This 19 | helps avoid users accidentally exposing a photo of themselves just because they 20 | entered their email address into csvbase. 21 | 22 | Additionally, csvbase reverse proxies all requests for Avatars to avoid 23 | disclosing the SHA256 hash of your email address. 24 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/markdown.md: -------------------------------------------------------------------------------- 1 | 8 | 9 | csvbase supports Markdown (including some HTML tags). 10 | 11 | The specific version of Markdown is ["Github Flavoured 12 | Markdown"](https://github.github.com/gfm/). 13 | 14 | ## Tables 15 | 16 | You can markup tables like this 17 | 18 | ```markdown 19 | | Column 1 | Column 2 | 20 | | -------- | -------- | 21 | | a | 1 | 22 | | b | 2 | 23 | | c | 3 | 24 | ``` 25 | 26 | See [the GFM specification](https://github.github.com/gfm/#tables-extension-) 27 | for more details and features. 28 | 29 | ## References 30 | 31 | ### Comments 32 | 33 | You can reference other comments in the same thread like this: 34 | 35 | ```markdown 36 | I note in passing comment #3 37 | ``` 38 | 39 | Which will be rendered as a permalink to comment #3 in the same thread (and 40 | your comment will be forward-linked on comment #3 as a reply). 41 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/pandas.md: -------------------------------------------------------------------------------- 1 | 9 | 10 | ## Reading csvbase tables into Pandas 11 | 12 | When using Pandas, you can read any table by copying the url into Panda's 13 | [`read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html) 14 | function. 15 | 16 | ```python 17 | import pandas as pd 18 | 19 | df = pd.read_csv("https://csvbase.com/meripaterson/stock-exchanges", index_col=0) 20 | ``` 21 | 22 | `index_col=0` just makes the csvbase row id the index. 23 | 24 | You can also use Parquet if you have the relevant parquet libraries installed 25 | (generally, that means [pyarrow](https://pypi.org/project/pyarrow/)) but it is 26 | best to install that via the parquet "extra": 27 | 28 | ```bash 29 | pip install pandas[parquet] 30 | ``` 31 | 32 | Then read data with Panda's 33 | [`read_parquet`](https://pandas.pydata.org/docs/reference/api/pandas.read_parquet.html) 34 | funciton. 35 | 36 | ```python 37 | df = pd.read_parquet("https://csvbase.com/meripaterson/stock-exchanges.parquet") 38 | ``` 39 | 40 | ## Reading and writing via `csvbase-client` 41 | 42 | Simple reads are done fine with just Pandas, however if you want to do writes 43 | or benefit from [caching](/faq/csvbase-client-cache) it is best to use the 44 | `csvbase-client` library. 45 | 46 | ```bash 47 | pip install csvbase-client 48 | ``` 49 | 50 | Then you can do reads using the `csvbase://` url scheme (you do not need to 51 | import anything - Pandas will pick it up automatically): 52 | 53 | ```python 54 | df = pd.read_csv("csvbase://meripaterson/stock-exchanges") 55 | ``` 56 | 57 | Writes are done the same way: 58 | 59 | ```python 60 | df.to_csv("csvbase://myuser/stock-exchanges") 61 | ``` 62 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/table-api.md: -------------------------------------------------------------------------------- 1 | 10 | 11 | csvbase has a REST API. 12 | 13 | Each [table URL](table-url) doubles as an API endpoint. 14 | 15 | ## Verbs 16 | 17 | - `GET` retrieves the table 18 | - set the `Accept` header to determine the [format](formats) 19 | - `PUT` creates a new table at `//` 20 | - `DELETE` deletes the table 21 | - `POST` appends new rows 22 | 23 | ## Authentication 24 | 25 | Authentication isn't required for all requests but is required: 26 | - for private tables (available to [supporters](/billing/pricing) 27 | - and when using the verb `PUT`, `DELETE` or `POST` 28 | 29 | Use HTTP [basic auth](basic-auth) to provide your username and API key. 30 | 31 | ## `csvbase_row_id` and id collation 32 | 33 | All csvbase tables contain a column (which is added automatically if not 34 | present) named `csvbase_row_id`. 35 | 36 | This column contains autoincremented, unique integers that are used to refer to 37 | specific rows both from outside of csvbase and internally. 38 | 39 | If this a value in this row is null, a new, unique row id will be generated. 40 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/table-url.md: -------------------------------------------------------------------------------- 1 | 10 | 11 | Every table on csvbase has a url like the following: 12 | 13 | https://csvbase.com// 14 | 15 | For example: 16 | 17 | https://csvbase.com/meripaterson/stock-exchanges 18 | 19 | ## Getting alternate formats 20 | 21 | To get the file in another format, you just add the relevant file extension to 22 | the table url. For Excel: 23 | 24 | ``` 25 | https://csvbase.com//.xlsx 26 | ``` 27 | 28 | For example: 29 | 30 | ``` 31 | https://csvbase.com/meripaterson/stock-exchanges.xlsx 32 | ``` 33 | 34 | Several different [formats](formats) are supported. 35 | 36 | ## Naming restrictions 37 | 38 | Table names are only allowed to contain the character, A-Z, a-z and - and they 39 | must start with a letter. 40 | 41 | In short they must match the regex: 42 | 43 | ``` 44 | [A-Za-z][-A-Za-z0-9]+ 45 | ``` 46 | 47 | ## Table API 48 | 49 | Each table url supports various different verbs that allow you to [use it as an 50 | API](table-api). 51 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/tools-other.md: -------------------------------------------------------------------------------- 1 | 10 | 11 | csvbase has a [very simple interface](table-url) and doesn't usually require explicit 12 | support from a tool. 13 | 14 | If there is a place to enter a url, just enter the table's url with the file 15 | extension for the [format](formats) that tool will be expecting (eg `.csv`). 16 | 17 | Many tools work out of the box this way. 18 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/usage.md: -------------------------------------------------------------------------------- 1 | 9 | 10 | ## How many tables can I create? 11 | 12 | There is no limit on number of public tables a user can create. 13 | 14 | Supporters can also create private tables. 15 | 16 | ## What are the size limits? 17 | 18 | Tables must have fewer than 1,600 columns. 19 | 20 | They can be unlimited size, but you can only upload 50 megabytes of data at 21 | once - so larger tables will need to be built up iteratively via appending. 22 | 23 | ## How long are tables kept? 24 | 25 | Tables are kept forever. 26 | -------------------------------------------------------------------------------- /csvbase/web/faq/entries/what-is-it.md: -------------------------------------------------------------------------------- 1 | 10 | 11 | csvbase is a [website for sharing table data](/about). 12 | 13 | "Table data" means (labelled) columns and (indexed) rows. 14 | 15 | Each table has it's [own url](table-url), following the format: 16 | 17 | https://csvbase.com// 18 | 19 | That url serves both as the web page for the table, and also for [it's 20 | API](/table-api). 21 | 22 | csvbase is called *csv*base because one of the easiest ways to interact with it 23 | is via csv files. To get the csv file for any table, just add `.csv` to the 24 | url, so 25 | 26 | [https://csvbase.com/meripaterson/stock-exchanges](https://csvbase.com/meripaterson/stock-exchanges) 27 | 28 | becomes 29 | 30 | https://csvbase.com/meripaterson/stock-exchanges.csv 31 | 32 | Many other [formats are available](formats), including: 33 | - JSON 34 | - Parquet 35 | - Microsoft Excel 36 | -------------------------------------------------------------------------------- /csvbase/web/main/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/main/__init__.py -------------------------------------------------------------------------------- /csvbase/web/static/.gitignore: -------------------------------------------------------------------------------- 1 | bootstrap.min.css 2 | codehilite.css 3 | codehilite-dark.css 4 | bootstrap.bundle.js -------------------------------------------------------------------------------- /csvbase/web/static/bootstrap-enables.js: -------------------------------------------------------------------------------- 1 | // This file is just for enabling various bootstrap features 2 | // https://getbootstrap.com/docs/5.1/components/tooltips/#example-enable-tooltips-everywhere 3 | var tooltipTriggerList = [].slice.call(document.querySelectorAll('[data-bs-toggle="tooltip"]')); 4 | var tooltipList = tooltipTriggerList.map(function (tooltipTriggerEl) { 5 | return new bootstrap.Tooltip(tooltipTriggerEl) 6 | }); 7 | -------------------------------------------------------------------------------- /csvbase/web/static/comments.js: -------------------------------------------------------------------------------- 1 | document.addEventListener('DOMContentLoaded', () => { 2 | const commentTextarea = document.querySelector('#comment-textarea'); 3 | 4 | if (commentTextarea.value.trim() != ""){ 5 | // Make the textarea fit the text inside. CSS will be able to do this 6 | // soon with content-sizing: fixed 7 | commentTextarea.style.height = ""; // Reset any existing height styles 8 | commentTextarea.style.height = (commentTextarea.scrollHeight + 5) + "px"; 9 | commentTextarea.setSelectionRange(commentTextarea.value.length, commentTextarea.value.length); 10 | } 11 | }); 12 | -------------------------------------------------------------------------------- /csvbase/web/static/dark-mode-hack.js: -------------------------------------------------------------------------------- 1 | // Bootstrap does not currently have a way to do this automatically so custom JS required 2 | if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches){ 3 | document.querySelector('html').setAttribute('data-bs-theme', 'dark'); 4 | document.getElementById('codehilite-stylesheet').href = "/static/codehilite-dark.css"; 5 | } 6 | -------------------------------------------------------------------------------- /csvbase/web/static/duckdb-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/duckdb-screenshot.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/csvbase-tracking-table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/csvbase-tracking-table.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/excel-01-from-web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/excel-01-from-web.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/excel-02-from-web-dialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/excel-02-from-web-dialog.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/excel-03-navigator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/excel-03-navigator.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/excel-04-refresh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/excel-04-refresh.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/excel-06-connection-properties.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/excel-06-connection-properties.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/github-pat-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/github-pat-overview.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/google-sheets-date-fix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/google-sheets-date-fix.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/google-sheets-dates-fixed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/google-sheets-dates-fixed.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/google-sheets-importdata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/google-sheets-importdata.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/google-sheets-permissions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/google-sheets-permissions.png -------------------------------------------------------------------------------- /csvbase/web/static/faq/google-sheets-table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/faq/google-sheets-table.png -------------------------------------------------------------------------------- /csvbase/web/static/github-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/github-screenshot.png -------------------------------------------------------------------------------- /csvbase/web/static/icons/praise.svg: -------------------------------------------------------------------------------- 1 | Body -------------------------------------------------------------------------------- /csvbase/web/static/icons/praised.svg: -------------------------------------------------------------------------------- 1 | Accessibility -------------------------------------------------------------------------------- /csvbase/web/static/icons/private-table.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /csvbase/web/static/icons/public-table.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /csvbase/web/static/logo/128x128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/logo/128x128.png -------------------------------------------------------------------------------- /csvbase/web/static/logo/192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/logo/192x192.png -------------------------------------------------------------------------------- /csvbase/web/static/privacy.txt: -------------------------------------------------------------------------------- 1 | # csvbase privacy policy 2 | 3 | This policy was last updated on 2023-08-25 4 | 5 | ## Contact details 6 | 7 | Data controller: Cal Paterson 8 | 9 | Email address: cal@calpaterson.com 10 | 11 | ## Personal information we collect 12 | 13 | We currently collect the following personal information from users who choose 14 | to register: 15 | 16 | - a username 17 | - a timezone, if you change from the default of UTC 18 | - and, optionally, an email address 19 | 20 | If you experience a crash (eg a 500 error) we collect extra information about 21 | your configuration, strictly for the purposes of debugging that crash. That is 22 | shared with Sentry (https://sentry.io/) and they retain it for 30 days. 23 | 24 | For subscription purposes, our payment provider Stripe collects your payment 25 | data, including credit card details, name, and address. We do not have access 26 | to this data; it's solely used to process your payments. 27 | 28 | ## How we get the personal information and why we have it 29 | 30 | We collect personal information directly during registration. 31 | 32 | We use the information that you have given us to give your account a unique 33 | name. The email address is used if you forget your password. 34 | 35 | Sentry collects data about users who experience a crash. And Stripe only 36 | collect data you provide to them. 37 | 38 | We do not share this information with any other other parties. 39 | 40 | ## Legal basis for processing 41 | 42 | Our processing of your personal information is based on your consent, which you 43 | can withdraw at any time by contacting us. 44 | 45 | ## Storage 46 | 47 | Your information is securely stored and not accessible to anyone else. 48 | 49 | ## Your data protection rights 50 | 51 | Under data protection law, you have certain rights, including: 52 | 53 | - your right of access your right to rectification your right to erasure your 54 | - right to restriction of processing your right to object to processing your 55 | - right to data portability 56 | 57 | You aren't required to pay in order to exercise your rights. If you make a 58 | request we have one month to respond. 59 | 60 | ## How to complain 61 | 62 | You can contact us to make a complaint. 63 | 64 | You can also contact the Information Commissioner's Office if you are unhappy 65 | with how we have used your data. The contact details for the ICO are below: 66 | 67 | Information Commissioner’s Office 68 | Wycliffe House, Water Lane 69 | Wilmslow Cheshire 70 | SK9 5AF 71 | Phone: 0303 123 1113 72 | Website: https://www.ico.org.uk 73 | -------------------------------------------------------------------------------- /csvbase/web/static/site.css: -------------------------------------------------------------------------------- 1 | /* Navbar bg colour */ 2 | #csvbase-nav { 3 | background-color: --bs-body-bg; 4 | } 5 | 6 | .table-card { 7 | margin-bottom: 1em; 8 | } 9 | 10 | /* Stuff to do with praising */ 11 | #praise-button:hover { 12 | background-color: var(--bs-warning); 13 | border-color: var(--bs-warning); 14 | color: var(--bs-body-color); 15 | } 16 | 17 | .icon { 18 | display: inline-block; 19 | overflow: hidden; 20 | height: 1.5em; 21 | width: 1.5em; 22 | margin-right: 0.1em; 23 | } 24 | 25 | #table-icon { 26 | height: 1em; 27 | width: 1em; 28 | } 29 | 30 | @media (prefers-color-scheme: dark) { 31 | #table-icon { 32 | filter: invert(0.8); 33 | } 34 | } 35 | 36 | .icon-invert { 37 | filter: invert(0.8); 38 | } 39 | 40 | .icon-hidden { 41 | display: none; 42 | } 43 | 44 | #praise-button:hover .icon { 45 | display: none; 46 | } 47 | 48 | #praise-button:hover .icon-hidden { 49 | display: inline-block; 50 | } 51 | 52 | #praise-button:hover #praise-span { 53 | display: none; 54 | } 55 | 56 | #praise-button:hover:after { 57 | content: "Praise"; 58 | } 59 | 60 | /* reducing bottom margin off the explain alert */ 61 | #explain-alert { 62 | text-align: center; 63 | padding: 0; 64 | } 65 | 66 | #explain-alert > p { 67 | padding: 0; 68 | margin: 0; 69 | } 70 | 71 | 72 | .blog-post-description { 73 | font-style: oblique; 74 | } 75 | 76 | .blog-post-draft { 77 | font-weight: bold; 78 | text-align: center; 79 | } 80 | 81 | /* FIXME: remove this from wherever it is used */ 82 | .blog-post-blockquote { 83 | margin: 1em; 84 | border-left: 0.2em solid grey; 85 | padding-left: 1em; 86 | font-style: oblique; 87 | } 88 | 89 | blockquote { 90 | margin: 1em; 91 | border-left: 0.2em solid grey; 92 | padding-left: 1em; 93 | font-style: oblique; 94 | } 95 | 96 | .blog-rss { 97 | font-style: oblique; 98 | } 99 | 100 | textarea { 101 | resize: both; 102 | } 103 | 104 | .table-topline { 105 | font-size: large; 106 | } 107 | 108 | .table-caption { 109 | font-style: oblique; 110 | } 111 | 112 | .blog-post img { 113 | width: 100%; 114 | } 115 | 116 | .blog-post figure > img { 117 | width: 80%; 118 | display: block; 119 | margin: auto; 120 | } 121 | 122 | .blog-post-date { 123 | float: right; 124 | } 125 | 126 | .blog-post-2 pre { 127 | white-space: pre-wrap; 128 | } 129 | 130 | .form-type-hint { 131 | font-size: smaller; 132 | } 133 | 134 | input.table-pastable { 135 | font-family: monospace; 136 | font-size: smaller; 137 | } 138 | 139 | .faq-entry-md img { 140 | max-width: 100%; 141 | } 142 | 143 | .hidden-praise-form { 144 | display: none; 145 | } 146 | 147 | #praise-button > img { 148 | max-height: 1.2em; 149 | max-width: 1.2em; 150 | } 151 | 152 | .footer-logo { 153 | margin: auto; 154 | } 155 | 156 | /* Tweaks for threads */ 157 | .comment img { 158 | max-width: 100%; 159 | } 160 | 161 | .comment-avatar { 162 | max-height: 2em; 163 | border-radius: 10px; 164 | margin-right: 0.3em; 165 | margin-left: 0.3em; 166 | } 167 | -------------------------------------------------------------------------------- /csvbase/web/static/table-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/csvbase/web/static/table-screenshot.png -------------------------------------------------------------------------------- /csvbase/web/static/terms.txt: -------------------------------------------------------------------------------- 1 | # csvbase terms of service 2 | 3 | These terms were last updated on 2023-03-23. 4 | 5 | ## Contact details 6 | 7 | Cal Paterson 8 | 9 | cal@calpaterson.com 10 | 11 | ## Outline 12 | 13 | csvbase is a data sharing website. 14 | 15 | ## Data you post on csvbase 16 | 17 | All data you post on csvbase must be legal in both the UK and the EU. 18 | 19 | You must not post personally identifiable information of any kind. 20 | 21 | We do not take ownership or copyright of data you post on csvbase. 22 | 23 | If you select a licence ("PDDL", "ODB-By", etc) when uploading your data you 24 | agree to provide your data to us under that same licence. 25 | 26 | If you do not select a specific licence you, as the copyright holder (and you 27 | must be the copyright holder in this scenario), agree to allow us at least to 28 | redistribute your data to others according to whether you make it public or 29 | private. 30 | 31 | ## Changing your mind 32 | 33 | You can change your mind at any time by deleting your data. 34 | 35 | You can also make your data private at any time. 36 | 37 | You can also change the licence you have selected at any time. 38 | 39 | ## Removal of data 40 | 41 | Data may be removed for any reason or for no reason. This is to allow for 42 | moderation and anti-spam. 43 | -------------------------------------------------------------------------------- /csvbase/web/templates/app_base.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "base.html" %} 3 | 4 | {# This template is for the majority of app pages and will consider 5 | current_user and should sign in/username in the navbar. #} 6 | 7 | {% block navbar_elements %} 8 | {{ super() }} 9 | 28 | {% endblock %} 29 | -------------------------------------------------------------------------------- /csvbase/web/templates/blog.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "blog_base.html" %} 3 | 4 | {% block main %} 5 |
6 |
7 |
8 |

The csvbase blog

9 |
10 |
11 | 12 | {% if posts|length > 0 %} 13 |
14 |
15 |

Latest post

16 |
17 |
18 |
19 |
20 | {{ render_post(posts[0]) }} 21 |
22 |
23 | {% endif %} 24 | 25 | {% if posts|length > 1 %} 26 |
27 |
28 |

Previously

29 | {% for post in posts[1:] %} 30 | {{ render_post(post) }} 31 | {% endfor %} 32 | {% if not posts %} 33 |

No posts yet!

34 | {% endif %} 35 |
36 |
37 | {% endif %} 38 | 39 |
40 |
41 |

Register an account and stay up to date

42 |

Get alerted about new posts via RSS, or register an account and leave the checkbox ticked:

43 |
44 |
45 | 46 |
47 |
48 |
49 | {{ other_macros.username_form_field() }} 50 |
51 |
52 | {{ other_macros.email_form_field() }} 53 |
54 |
55 | {{ other_macros.password_form_field() }} 56 | {{ other_macros.mailing_list_form_field(checked=True) }} 57 | 58 |
59 |
60 |
61 | 62 |
63 | {% endblock %} 64 | 65 | {% macro render_post(post) %} 66 |

{{ post.title }} 67 | 68 |

69 | {% endmacro %} 70 | -------------------------------------------------------------------------------- /csvbase/web/templates/blog_base.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% block head %} 5 | {{ super() }} 6 | 8 | {% endblock %} 9 | -------------------------------------------------------------------------------- /csvbase/web/templates/captcha_macros.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% macro turnstile_script_tag() %} 3 | {% if turnstile_site_key %} 4 | 5 | {% endif %} 6 | {% endmacro %} 7 | 8 | {% macro turnstile_magic_div() %} 9 | {% if turnstile_site_key %} 10 |
11 | {% endif %} 12 | {% endmacro %} 13 | -------------------------------------------------------------------------------- /csvbase/web/templates/change-password.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% import 'other_macros.html' as other_macros %} 5 | 6 | {% block main %} 7 |
8 |

Change password

9 | 10 |
11 |
12 |
13 | 14 |
15 |
16 | 17 |
18 |
19 | 20 |
21 |
22 | 23 |
24 |
25 | 26 |
27 |
28 | 29 |
30 |
31 | 32 |
33 |
34 | 35 |
36 |
37 | 38 | 39 |
40 | 41 |
42 | {% endblock %} 43 | -------------------------------------------------------------------------------- /csvbase/web/templates/comment-edit.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% import 'comment_macros.html' as comment_macros %} 5 | 6 | {% block head %} 7 | {{ super() }} 8 | 9 | {% endblock %} 10 | 11 | {% block main %} 12 |
13 |
14 |

Editing comment #{{comment.comment_id}}

15 | {{ comment_macros.render_comment(comment, for_owner=True) }} 16 |
17 | 18 |
19 |
20 |
21 |
22 |
26 |
27 | 32 |
33 |
34 | 35 | Back to thread 38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 | {% endblock main %} 46 | -------------------------------------------------------------------------------- /csvbase/web/templates/convert.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% from 'value_macros.html' import id_input_validation_attrs %} 5 | 6 | {% block main %} 7 |
8 |
9 |
10 |
11 |

Convert a file

12 | 13 |
14 |
15 | 16 |
17 | 18 |
19 |
20 | 21 |
22 |
23 | 29 |
30 |
31 | 32 |
33 |
34 | 35 |
36 |
37 | 43 |
44 |
45 | 46 | 47 | {% if not current_user %} 48 |
49 |
50 |
51 | Register at the same time: 52 | (If you already have an account, sign in.) 53 |
54 |
55 |
56 | 57 | {{ other_macros.username_form_field() }} 58 | 59 | {{ other_macros.email_form_field() }} 60 | 61 | {{ other_macros.password_form_field() }} 62 |
63 |
64 | 65 |
66 |
67 | {% else %} 68 |
69 |
70 | 71 |
72 |
73 | {% endif %} 74 | 75 |
76 |
77 |
78 |
79 |
80 | {% endblock %} 81 | -------------------------------------------------------------------------------- /csvbase/web/templates/copy.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% from 'value_macros.html' import id_input_validation_attrs %} 5 | 6 | {% import 'other_macros.html' as other_macros %} 7 | 8 | {% block main %} 9 |
10 |
11 |
12 |

Copy {{table.table_name}}/{{table.username}}

13 | 14 |
This will create a complete copy of the table and 15 | data as it currently is, under your own user.
16 | 17 |
20 | 21 | {{ other_macros.table_name_field(label="New table name", value=table.table_name) }} 22 | 23 |
24 | 25 | 26 |
27 | 28 | 29 | {% if not current_user %} 30 |
31 |
32 |
33 | You need to register first 34 | (If you already have an account, sign in.) 35 |
36 |
37 |
38 |
39 |
40 | 41 |
42 |
43 | {% else %} 44 | 45 |
46 | 47 |
48 |
49 | {% endif %} 50 | 51 |
52 |
53 | 54 | 55 | {% endblock %} 56 | -------------------------------------------------------------------------------- /csvbase/web/templates/create-table-confirm.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% block main %} 5 |
6 |
7 |
8 |

Confirm table structure

9 |

Column types

10 |
11 |
12 | {% for column in columns %} 13 |
14 |
15 | 16 |
17 |
18 | 23 |
24 |
25 | {% endfor %} 26 | 27 |
28 |

Unique key

29 |
30 |
31 |

Optionally, select which columns form a unique key. When updating the table, this will be used to avoid cycling row ids.

32 | {% for column in columns %} 33 |
34 | 40 | 41 |
42 | {% endfor %} 43 |
44 |
45 | 46 |
47 |
48 |
49 | 50 |
51 |
52 |
53 |
54 | 55 |
56 |
57 |
58 | {% endblock %} 59 | -------------------------------------------------------------------------------- /csvbase/web/templates/create-table-git.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "new-table.html" %} 3 | 4 | {% block before_form %} 5 |
6 | Creating a table from git is currently experimental, and works only with Github. 7 |
8 | {% endblock %} 9 | 10 | {% block form_section %} 11 |
12 | 20 | 21 |
Example: "https://github.com/calpaterson/csvbase". Currently only Github is supported.
22 |
23 |
24 | 32 | 33 |
Usually "main" or "master"
34 |
35 |
36 | 44 | 45 |
Example: "path/to/a.csv"
46 |
47 |
48 |
49 | 50 |
51 |
52 | 55 |
56 |
The table will be kept updated when the file changes in git
57 |
58 | {% endblock %} 59 | -------------------------------------------------------------------------------- /csvbase/web/templates/email-verification-sent.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% block main %} 5 |
6 |
7 |
8 |

Verification email sent

9 | 10 |

Click the link in the email sent to {{user.email}}.

11 |
12 |
13 |
14 | 15 | {% endblock %} 16 | -------------------------------------------------------------------------------- /csvbase/web/templates/email-verified.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% block main %} 5 |
6 |
7 |
8 |

Email verified

9 | 10 |

Thanks - we're now allowed to send you email.

11 |
12 |
13 |
14 | 15 | {% endblock %} 16 | -------------------------------------------------------------------------------- /csvbase/web/templates/email/verify-email.txt: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | Hi {{user.username}}, 3 | 4 | Please click the following link to verify your email address: 5 | 6 | {{ verification_url }} 7 | 8 | Regards, 9 | csvbase 10 | -------------------------------------------------------------------------------- /csvbase/web/templates/error-dynamic.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | 3 | {# This is a dynamic error page, and is served in cases where the app seems to #} 4 | {# working - so usually for 400 series errors. #} 5 | 6 | {% extends "app_base.html" %} 7 | 8 | {% block main %} 9 |
10 |
11 |
12 |

{{ http_code }} error

13 |
14 |
15 |
16 |
17 |
18 | Sorry - {{ message }}. 19 |
20 |
21 |
22 |
23 | {% endblock %} 24 | -------------------------------------------------------------------------------- /csvbase/web/templates/faq/faq-entry.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% block main %} 5 |
6 |
7 |
8 |

{{ entry.title }}

9 |
10 |
11 |
12 |
13 |

{{ entry.description }}

14 |
15 |
16 |
17 |
18 | {{ rendered|safe }} 19 |
20 |
21 |
22 | 23 | {% endblock %} 24 | -------------------------------------------------------------------------------- /csvbase/web/templates/faq/faq-index.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% block main %} 5 |
6 |
7 |
8 |

FAQ

9 |
10 |
11 | {% for category, entries in entries_by_category.items() %} 12 |
13 |
14 |

{{ category }}

15 | 20 |
21 |
22 | {% endfor %} 23 |
24 | {% endblock %} 25 | -------------------------------------------------------------------------------- /csvbase/web/templates/form_macros.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | 3 | {% macro show_firefox_mobile_warning(user_agent) %} 4 | {% if user_agent.browser.family == "Firefox Mobile" %} 5 | 11 | {% endif %} 12 | {% endmacro %} 13 | 14 | 15 | {% macro render_licence_combobox(ordered_licences, current_licence=None) %} 16 |
17 | 18 |
19 |
20 | 26 |
27 |
(Optionally) Specify which licence this data is released under.
28 | {% endmacro %} 29 | -------------------------------------------------------------------------------- /csvbase/web/templates/index.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% import 'table_macros.html' as table_macros %} 5 | 6 | {% block main %} 7 |
8 | 21 | 22 |
23 | {% for table, page in tables_and_pages %} 24 | {{ table_macros.table_card(table, page) }} 25 | {% endfor %} 26 |
27 |
28 | {% endblock %} 29 | -------------------------------------------------------------------------------- /csvbase/web/templates/new-blank-table.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "new-table.html" %} 3 | 4 | {% block form_section %} 5 |
6 | 7 |
8 | {% for col_name, col_type in cols %} 9 |
10 |
11 | 17 |
18 |
19 | 24 |
25 |
26 | 36 |
37 |
38 | {% endfor %} 39 | 40 |
41 | 50 |
51 | {% endblock %} 52 | -------------------------------------------------------------------------------- /csvbase/web/templates/other_macros.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | 3 | {% from 'value_macros.html' import id_input_validation_attrs %} 4 | 5 | {% macro table_name_field(label="Table name", value=None) %} 6 |
7 | 17 | 18 |
Start with a letter, and then letters, numbers and dashes (-).
19 |
20 | {% endmacro %} 21 | 22 | 23 | {% macro username_form_field(autofocus=False) %} 24 |
25 | 35 | 36 |
Usernames should start with a letter, and 37 | then have only letters, numbers and dashes (-).
38 |
39 | {% endmacro %} 40 | 41 | {% macro password_form_field() %} 42 |
43 | 48 | 49 |
50 | {% endmacro %} 51 | 52 | {% macro email_form_field() %} 53 |
54 | 60 | 61 | 62 |
63 | {% endmacro %} 64 | 65 | {% macro mailing_list_form_field(checked=False) %} 66 |
67 | 68 | 71 |
An alternative to RSS. Don't leave your email blank.
72 |
73 | {% endmacro %} 74 | -------------------------------------------------------------------------------- /csvbase/web/templates/register.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "base.html" %} 3 | 4 | {% from 'value_macros.html' import id_input_validation_attrs %} 5 | {% import 'other_macros.html' as other_macros %} 6 | {% import 'captcha_macros.html' as captcha_macros %} 7 | {% import 'form_macros.html' as form_macros %} 8 | 9 | {% block head %} 10 | {{ super() }} 11 | {{ captcha_macros.turnstile_script_tag() }} 12 | {% endblock %} 13 | 14 | {% block main %} 15 |
16 |
17 |
18 |

Register a new account

19 |
20 | {{ form_macros.show_firefox_mobile_warning(user_agent) }} 21 | {% if whence %} 22 | 23 | {% endif %} 24 | 25 | {{ other_macros.username_form_field() }} 26 | 27 | {{ other_macros.email_form_field() }} 28 | 29 | {{ other_macros.password_form_field() }} 30 | 31 | {{ other_macros.mailing_list_form_field() }} 32 | 33 | {{ captcha_macros.turnstile_magic_div() }} 34 | 35 | 36 |
37 | 38 |

If you already have an 39 | account, sign in 40 | instead 41 |

42 |
43 |
44 | {% endblock %} 45 | -------------------------------------------------------------------------------- /csvbase/web/templates/row-add.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% import 'row_macros.html' as row_macros %} 5 | 6 | {% block main %} 7 | 14 |
15 |
18 | {% for col in table.user_columns() %} 19 |
20 |
21 | 22 |
23 |
24 | {{ row_macros.column_value_input(col, None) }} 25 |
26 |
27 | {% endfor %} 28 |
29 | 30 |
31 |
32 | 33 |
34 | {% endblock %} 35 | -------------------------------------------------------------------------------- /csvbase/web/templates/row-view-or-edit.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "row.html" %} 3 | 4 | {% import 'row_macros.html' as row_macros %} 5 | 6 | {% block body_contents %} 7 |
8 | {% if current_username != table.username %} 9 | {{ row_macros.row_table(row) }} 10 | {% else %} 11 |
12 |
15 | {% for col, v in row.items() %} 16 |
17 |
18 | {% if col.name == "csvbase_row_id" %} 19 | 20 | {% else %} 21 | 22 | {% endif %} 23 | {{ col.type_.pretty_type() }} 24 |
25 |
26 | {{ row_macros.column_value_input(col, v) }} 27 |
28 | {% if col.name == "csvbase_row_id" %} 29 |
30 |
This cannot be changed
31 |
32 | {% endif %} 33 |
34 | {% endfor %} 35 |
36 | 37 | Delete row 39 |
40 |
41 |
42 | {% endif %} 43 |
44 | {% endblock %} 45 | -------------------------------------------------------------------------------- /csvbase/web/templates/row.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% block main %} 5 | 13 | 14 | {% block body_contents %} 15 | {% endblock %} 16 | {% endblock %} 17 | -------------------------------------------------------------------------------- /csvbase/web/templates/row_delete_check.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "row.html" %} 3 | 4 | {% import 'row_macros.html' as row_macros %} 5 | 6 | {% block body_contents %} 7 |
8 |

Delete confirmation

9 |

Are you sure you want to delete this row? This cannot be undone.

10 |
13 |
14 | Cancel 16 | 20 |
21 | {{ row_macros.row_table(row) }} 22 |
23 | {% endblock %} 24 | -------------------------------------------------------------------------------- /csvbase/web/templates/sign_in.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "base.html" %} 3 | 4 | {% from 'value_macros.html' import id_input_validation_attrs %} 5 | {% import 'other_macros.html' as other_macros %} 6 | {% import 'form_macros.html' as form_macros %} 7 | 8 | {% block main %} 9 |
10 |
11 |
12 |

Sign in to your account

13 | 14 | {{ form_macros.show_firefox_mobile_warning(user_agent) }} 15 | {% if whence %} 16 | 17 | {% endif %} 18 | 19 | {{ other_macros.username_form_field() }} 20 | {{ other_macros.password_form_field() }} 21 | 22 | 23 | 24 | 25 |

If you don't already have an account, why not register?

26 |
27 |
28 |
29 | {% endblock %} 30 | -------------------------------------------------------------------------------- /csvbase/web/templates/sitemap.xml: -------------------------------------------------------------------------------- 1 | {#- -*- mode: jinja2 -*- -#} 2 | 3 | 4 | {% for url, lastmod in urls %} 5 | 6 | {{ url }} 7 | {% if lastmod %} 8 | {{ lastmod.isoformat() }} 9 | {% endif %} 10 | 11 | {% endfor %} 12 | 13 | -------------------------------------------------------------------------------- /csvbase/web/templates/table_details.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "table.html" %} 3 | 4 | {% block tab_contents %} 5 |
6 |

Columns

7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | {% for col in table.columns %} 18 | 19 | {% if col.name == "csvbase_row_id" %} 20 | 21 | 22 | 23 | {% else %} 24 | 25 | 26 | 27 | {% endif %} 28 | 29 | {% endfor %} 30 | 31 |
Column nameColumn typeMissing data?
Row ID (csvbase_row_id){{col.type_.pretty_name()}}Not allowed{{ col.name }}{{col.type_.pretty_name()}}Allowed
32 |
33 |
34 | {% endblock %} 35 | 36 | -------------------------------------------------------------------------------- /csvbase/web/templates/table_settings.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "table.html" %} 3 | 4 | {% import 'form_macros.html' as form_macros %} 5 | 6 | {% block tab_contents %} 7 |
8 |
9 |

Edit settings

10 |
11 |
12 |
13 | 14 | 22 |
A short caption outlining what the table contains.
23 |
24 |
25 | 26 |
27 |
28 | 29 | 36 |
Space to explain more. Markdown supported (but not HTML).
37 |
38 |
39 | 40 |
41 | {{ form_macros.render_licence_combobox(ordered_licences, table.licence) }} 42 |
43 | 44 |
45 |
46 | 52 | 53 |
54 |
55 | 56 |
57 |
58 | 59 |
60 |
61 |
62 |
63 | 64 | {% if current_username == table.username %} 65 |

Delete table

66 | 69 |
70 |
71 |
72 | 79 |
80 |
81 | 82 |
83 |
84 |
85 | {% endif %} 86 |
87 | {% endblock %} 88 | 89 | -------------------------------------------------------------------------------- /csvbase/web/templates/table_wait.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% block main %} 5 |
6 |
7 |
8 |

Please wait...

9 |
10 |
11 |
12 |
13 |

14 | That table is big and the representation you want is still being generated. 15 |

16 |

17 | The file you want will download automatically when it's ready. 18 |

19 |
20 |
21 |
22 | {% endblock %} 23 | -------------------------------------------------------------------------------- /csvbase/web/templates/thread.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | {% import 'comment_macros.html' as comment_macros %} 4 | {% import 'captcha_macros.html' as captcha_macros %} 5 | 6 | {% block head %} 7 | {{ super() }} 8 | {# No need to actually load stuff if user not logged in #} 9 | {% if current_user %} 10 | 11 | {{ captcha_macros.turnstile_script_tag() }} 12 | {% endif %} 13 | {% endblock %} 14 | 15 | {% block main %} 16 |
17 |
18 |

{{comment_page.thread.title}}

19 | 20 | {{ comment_macros.render_pagination(comment_page.thread.slug, current_page, max_page) }} 21 | {{ comment_macros.render_comment_page(comment_page, current_user) }} 22 | {{ comment_macros.render_pagination(comment_page.thread.slug, current_page, max_page) }} 23 |
24 | 25 |
26 |
27 |

Add comment

28 | {{ comment_macros.render_submit_comment_form(comment_page.thread.slug, current_user, comment_markdown) }} 29 |
30 |
31 | 32 |
33 | {% endblock main %} 34 | -------------------------------------------------------------------------------- /csvbase/web/templates/user-settings.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% import 'other_macros.html' as other_macros %} 5 | 6 | {% block main %} 7 |
8 |

Edit user settings

9 | 10 |
11 |
12 |
13 | 14 |
15 |
16 | 17 |
18 |
19 |
About yourself (markdown is supported)
20 |
21 |
22 | 23 |
24 |
25 | 26 |
27 |
28 | 29 |
30 |
31 |
Optional. Can help if you forget your password.
32 |
33 |
34 | 35 |
36 |
37 | 38 |
39 |
40 | 46 |
47 | 48 |
49 |
Datetimes will be shown in your timezone, and your timezone will be the default for datetimes you enter.
50 |
51 |
52 | 53 |
54 | {{ other_macros.mailing_list_form_field(checked=user.settings.mailing_list) }} 55 |
56 | 57 |
58 |
59 | 60 | 63 |
Use the Gravatar associated with your email address as your site avatar. (Off by default for privacy reasons.)
64 |
65 |
66 | 67 | 68 | 69 |
70 |
71 | {% endblock %} 72 | -------------------------------------------------------------------------------- /csvbase/web/templates/user.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | {% extends "app_base.html" %} 3 | 4 | {% import 'table_macros.html' as table_macros %} 5 | 6 | {% macro pagination() %} 7 | 19 | {% endmacro %} 20 | 21 | 22 | {% block main %} 23 |
24 |

{{ user.username }}

25 | 26 |

Registered {{ user.registered|dateformat }}

27 | 28 | {% if current_username == user.username %} 29 | 37 | {% endif %} 38 | 39 | {% if user_bio != None %} 40 |

About me

41 | {{ user_bio|safe }} 42 | {% endif %} 43 | 44 |

My tables

45 | 46 | {% if table_page.has_next or table_page.has_prev %} 47 | {{ pagination() }} 48 | {% endif %} 49 | {% for table, page in tables_and_pages %} 50 |
51 | {{ table_macros.table_card(table, page) }} 52 |
53 | {% endfor %} 54 | {% if table_page.has_next or table_page.has_prev %} 55 | {{ pagination() }} 56 | {% endif %} 57 | 58 | {% if current_username == user.username %} 59 |

Account controls

60 | Sign out 61 | {% endif %} 62 |
63 | {% endblock %} 64 | -------------------------------------------------------------------------------- /csvbase/web/templates/value_macros.html: -------------------------------------------------------------------------------- 1 | {# -*- mode: jinja2 -*- #} 2 | 3 | {% macro render_human_datetime(dt) %} 4 | {{ dt|timedeltaformat }} 5 | {% endmacro %} 6 | 7 | 8 | {%- macro id_input_validation_attrs() -%} 9 | {# HTML5 validation attributes for csvbase id parts (usernames and table names) #} 10 | {# Note this uses A-Za-z and NOT A-z (which allows underscore) #} 11 | pattern="^[A-Za-z][\-A-Za-z0-9]+$" title="Start with a letter and then letters, numbers and dashes (-)." 12 | {%- endmacro -%} 13 | 14 | -------------------------------------------------------------------------------- /csvbase/web/turnstile.py: -------------------------------------------------------------------------------- 1 | """Support for Cloudflare Turnstile, a captcha system.""" 2 | 3 | from logging import getLogger 4 | 5 | import werkzeug 6 | 7 | from csvbase import exc 8 | from csvbase.config import get_config 9 | from csvbase.http import http_sesh 10 | 11 | VERIFY_URL = "https://challenges.cloudflare.com/turnstile/v0/siteverify" 12 | 13 | # Wait about 6s to connect, 24 secs for first byte 14 | TIMEOUT = (6.1, 24) 15 | 16 | logger = getLogger(__name__) 17 | 18 | 19 | def get_turnstile_token_from_form(form: werkzeug.datastructures.MultiDict) -> str: 20 | token = form.get("cf-turnstile-response", None) 21 | if token is None: 22 | raise exc.InvalidRequest() 23 | else: 24 | return token 25 | 26 | 27 | def validate_turnstile_token(turnstile_token: str) -> None: 28 | """Check that a turnstile token is valid.""" 29 | # FIXME: It is possibly worthwhile adding a way to make this "fail open". 30 | # Currently it is "fail closed" (ie if we can't connect to 31 | # challenges.cloudflare.com then we fail. 32 | 33 | secret_key = get_config().turnstile_secret_key 34 | if secret_key is None: 35 | logger.warning("turnstile key not set, not checking token") 36 | return 37 | 38 | body = { 39 | "secret": secret_key, 40 | "response": turnstile_token, 41 | } 42 | resp = http_sesh.post( 43 | VERIFY_URL, 44 | data=body, 45 | timeout=TIMEOUT, 46 | ) 47 | response_doc = resp.json() 48 | logger.info("got response doc %s", response_doc) 49 | if not response_doc.get("success", False): 50 | error_codes = response_doc.get("error-codes") 51 | logger.error("captcha check failed for reasons: '%s'", error_codes) 52 | if ( 53 | "invalid-input-response" in error_codes 54 | or "timeout-or-duplicate" in error_codes 55 | ): 56 | raise exc.CaptchaFailureException() 57 | else: 58 | raise RuntimeError(f"Cloudflare turnstile error: {error_codes}") 59 | -------------------------------------------------------------------------------- /csvbase/web/verify_emails.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from email.message import EmailMessage 3 | 4 | from werkzeug.wrappers.response import Response 5 | from flask import render_template, request, url_for, Blueprint, redirect, make_response 6 | 7 | from csvbase import email, svc 8 | from csvbase.sesh import get_sesh 9 | from csvbase.web.func import get_current_user_or_401 10 | 11 | bp = Blueprint("verify_emails", __name__) 12 | 13 | 14 | @bp.route("/verify-email", methods=["GET", "POST"]) 15 | def send_verification_email() -> Response: 16 | sesh = get_sesh() 17 | current_user = get_current_user_or_401() 18 | email_address = current_user.email 19 | if email_address is None: 20 | raise RuntimeError("no email set") 21 | if request.method == "POST": 22 | verification_code: bytes = svc.generate_email_verification_code( 23 | sesh, current_user.user_uuid 24 | ) 25 | urlsafe_code: str = base64.urlsafe_b64encode(verification_code).decode("utf-8") 26 | 27 | verification_url = url_for( 28 | "verify_emails.verify_email", 29 | urlsafe_email_verification_code=urlsafe_code, 30 | _external=True, 31 | ) 32 | 33 | em = EmailMessage() 34 | em.set_content( 35 | render_template( 36 | "email/verify-email.txt", 37 | verification_url=verification_url, 38 | user=current_user, 39 | ) 40 | ) 41 | em["Subject"] = "Verify your email address" 42 | em["To"] = email_address 43 | em["From"] = f"csvbase@{request.host}" 44 | em["Message-ID"] = f"" 45 | 46 | email.validate(em) 47 | sesh.commit() 48 | email.send(em) 49 | return redirect(url_for("verify_emails.send_verification_email")) 50 | else: 51 | return make_response( 52 | render_template("email-verification-sent.html", user=current_user) 53 | ) 54 | 55 | 56 | @bp.route("/verify-email/", methods=["GET"]) 57 | def verify_email(urlsafe_email_verification_code: str) -> Response: 58 | sesh = get_sesh() 59 | current_user = get_current_user_or_401() 60 | email_verification_code = base64.urlsafe_b64decode( 61 | urlsafe_email_verification_code.encode("utf-8") 62 | ) 63 | svc.verify_email(sesh, current_user.user_uuid, email_verification_code) 64 | sesh.commit() 65 | return make_response(render_template("email-verified.html")) 66 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | networks: 2 | csvbase: 3 | name: csvbase 4 | 5 | services: 6 | postgres: 7 | image: postgres:13 8 | ports: 9 | - "5432:5432" 10 | environment: 11 | POSTGRES_PASSWORD: password 12 | POSTGRES_DB: csvbase 13 | networks: 14 | csvbase: 15 | volumes: 16 | - ./init-schemas.sql:/docker-entrypoint-initdb.d/init-schemas.sql 17 | healthcheck: 18 | test: ["CMD-SHELL", "pg_isready -U postgres"] 19 | interval: 5s 20 | timeout: 5s 21 | retries: 5 22 | 23 | migrations: 24 | build: 25 | context: . 26 | environment: 27 | CSVBASE_TOML: | 28 | db_url = "postgresql://postgres:password@postgres:5432/csvbase" 29 | networks: 30 | csvbase: 31 | command: 32 | /bin/bash -c "echo \"$$CSVBASE_TOML\" > ~/.csvbase.toml && alembic upgrade head" 33 | depends_on: 34 | postgres: 35 | condition: service_healthy 36 | 37 | csvbase: 38 | build: . 39 | environment: 40 | CSVBASE_TOML: | 41 | db_url = "postgresql://postgres:password@postgres:5432/csvbase" 42 | command: 43 | /bin/bash -c "echo \"$$CSVBASE_TOML\" > ~/.csvbase.toml && gunicorn 'csvbase.web.app:init_app()' -b :6001" 44 | ports: 45 | - "6001:6001" 46 | networks: 47 | csvbase: 48 | depends_on: 49 | postgres: 50 | condition: service_healthy 51 | migrations: 52 | condition: service_completed_successfully 53 | -------------------------------------------------------------------------------- /examples/load-moocows.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd "$(dirname "$0")" || exit 4 | 5 | curl -X PUT 'http://calpaterson:password@localhost:6001/calpaterson/moocows' -H 'Content-Type: text/csv' --data-binary @moocows.csv 6 | -------------------------------------------------------------------------------- /examples/moocows.csv: -------------------------------------------------------------------------------- 1 | cow name,cow id 2 | molly,1 3 | jenny,2 4 | tabby,3 -------------------------------------------------------------------------------- /init-schemas.sql: -------------------------------------------------------------------------------- 1 | -- This SQL script creates the two schemas that are used for it's own tables 2 | -- and the tables of users respectively 3 | CREATE SCHEMA IF NOT EXISTS metadata; 4 | CREATE SCHEMA IF NOT EXISTS userdata; 5 | 6 | -- And a separate schema for celery beat 7 | CREATE SCHEMA IF NOT EXISTS celery; 8 | -------------------------------------------------------------------------------- /logo/logo.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/logo/logo.xcf -------------------------------------------------------------------------------- /migrations/env.py: -------------------------------------------------------------------------------- 1 | from logging.config import fileConfig 2 | 3 | from sqlalchemy import engine_from_config 4 | from sqlalchemy import pool 5 | 6 | from alembic import context 7 | 8 | # this is the Alembic Config object, which provides 9 | # access to the values within the .ini file in use. 10 | config = context.config 11 | 12 | from csvbase.config import get_config # noqa: E402 13 | 14 | csvbase_config = get_config() 15 | 16 | config.set_main_option("sqlalchemy.url", csvbase_config.db_url) 17 | 18 | # Interpret the config file for Python logging. 19 | # This line sets up loggers basically. 20 | if config.config_file_name is not None: 21 | fileConfig(config.config_file_name) 22 | 23 | # add your model's MetaData object here 24 | # for 'autogenerate' support 25 | # from myapp import mymodel 26 | # target_metadata = mymodel.Base.metadata 27 | from csvbase.models import metadata # noqa: E402 28 | 29 | target_metadata = metadata 30 | 31 | # other values from the config, defined by the needs of env.py, 32 | # can be acquired: 33 | # my_important_option = config.get_main_option("my_important_option") 34 | # ... etc. 35 | 36 | 37 | def run_migrations_offline(): 38 | """Run migrations in 'offline' mode. 39 | 40 | This configures the context with just a URL 41 | and not an Engine, though an Engine is acceptable 42 | here as well. By skipping the Engine creation 43 | we don't even need a DBAPI to be available. 44 | 45 | Calls to context.execute() here emit the given string to the 46 | script output. 47 | 48 | """ 49 | url = config.get_main_option("sqlalchemy.url") 50 | context.configure( 51 | url=url, 52 | target_metadata=target_metadata, 53 | literal_binds=True, 54 | dialect_opts={"paramstyle": "named"}, 55 | ) 56 | 57 | with context.begin_transaction(): 58 | context.run_migrations() 59 | 60 | 61 | def run_migrations_online() -> None: 62 | """Run migrations in 'online' mode. 63 | 64 | In this scenario we need to create an Engine 65 | and associate a connection with the context. 66 | 67 | """ 68 | connectable = engine_from_config( 69 | config.get_section(config.config_ini_section), # type: ignore 70 | prefix="sqlalchemy.", 71 | poolclass=pool.NullPool, 72 | ) 73 | 74 | with connectable.connect() as connection: 75 | context.configure( 76 | connection=connection, 77 | target_metadata=target_metadata, 78 | include_schemas=True, 79 | include_name=include_name, 80 | ) 81 | 82 | with context.begin_transaction(): 83 | context.run_migrations() 84 | 85 | 86 | def include_name(name, type_, parent_names): 87 | # This is necessary to fast skip the massive userdata schema when running 88 | # locally to auto-generate migrations as a starting point 89 | if parent_names.get("schema_name") == "userdata": 90 | return False 91 | else: 92 | return True 93 | 94 | 95 | if context.is_offline_mode(): 96 | run_migrations_offline() 97 | else: 98 | run_migrations_online() 99 | -------------------------------------------------------------------------------- /migrations/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from alembic import op 9 | import sqlalchemy as sa 10 | ${imports if imports else ""} 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = ${repr(up_revision)} 14 | down_revision = ${repr(down_revision)} 15 | branch_labels = ${repr(branch_labels)} 16 | depends_on = ${repr(depends_on)} 17 | 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /migrations/versions/173e920c9600_add_celery_schedule_entries.py: -------------------------------------------------------------------------------- 1 | """Add celery schedule entries 2 | 3 | Revision ID: 173e920c9600 4 | Revises: 9ad42a1ac714 5 | Create Date: 2024-06-29 12:22:57.392839+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "173e920c9600" 15 | down_revision = "9ad42a1ac714" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.create_table( 22 | "schedule_entries", 23 | sa.Column("celery_app_name", sa.String(), nullable=False), 24 | sa.Column("name", sa.String(), nullable=False), 25 | sa.Column("created", sa.DateTime(timezone=True), nullable=False), 26 | sa.Column("updated", sa.DateTime(timezone=True), nullable=False), 27 | sa.Column("pickled_schedule_entry", sa.PickleType(), nullable=False), 28 | sa.PrimaryKeyConstraint( 29 | "celery_app_name", "name", name=op.f("pk_schedule_entries") 30 | ), 31 | schema="celery", 32 | ) 33 | 34 | 35 | def downgrade(): 36 | op.drop_table("schedule_entries", schema="celery") 37 | -------------------------------------------------------------------------------- /migrations/versions/1dfc9b3a690e_create_settings_json.py: -------------------------------------------------------------------------------- 1 | """Create settings json 2 | 3 | Revision ID: 1dfc9b3a690e 4 | Revises: 5d8f357eca61 5 | Create Date: 2024-09-08 12:40:49.583955+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | from sqlalchemy.dialects import postgresql 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "1dfc9b3a690e" 15 | down_revision = "5d8f357eca61" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.add_column( 22 | "users", 23 | sa.Column("settings", postgresql.JSONB(astext_type=sa.Text()), nullable=True), 24 | schema="metadata", 25 | ) 26 | op.execute( 27 | """ 28 | UPDATE 29 | metadata.users 30 | SET 31 | settings = jsonb_build_object('timezone', timezone, 'mailing_list', mailing_list) 32 | """ 33 | ) 34 | op.alter_column("users", "settings", nullable=False, schema="metadata") 35 | op.drop_column("users", "mailing_list", schema="metadata") 36 | op.drop_column("users", "timezone", schema="metadata") 37 | 38 | 39 | def downgrade(): 40 | op.add_column( 41 | "users", 42 | sa.Column("timezone", sa.VARCHAR(), autoincrement=False, nullable=True), 43 | schema="metadata", 44 | ) 45 | op.add_column( 46 | "users", 47 | sa.Column("mailing_list", sa.BOOLEAN(), autoincrement=False, nullable=True), 48 | schema="metadata", 49 | ) 50 | op.execute( 51 | """ 52 | UPDATE metadata.users 53 | SET timezone = settings->>'timezone', 54 | mailing_list = (settings->>'mailing_list')::boolean; 55 | """ 56 | ) 57 | op.alter_column("users", "mailing_list", nullable=False, schema="metadata") 58 | op.alter_column("users", "timezone", nullable=False, schema="metadata") 59 | op.drop_column("users", "settings", schema="metadata") 60 | -------------------------------------------------------------------------------- /migrations/versions/1ec343a3a7bd_add_praise_table.py: -------------------------------------------------------------------------------- 1 | """Add praise table 2 | 3 | Revision ID: 1ec343a3a7bd 4 | Revises: 8951426b65be 5 | Create Date: 2022-04-27 14:49:59.214171+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | from sqlalchemy.dialects import postgresql 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "1ec343a3a7bd" 15 | down_revision = "8951426b65be" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.create_table( 22 | "praise", 23 | sa.Column( 24 | "praise_id", sa.BigInteger(), sa.Identity(always=False), nullable=False 25 | ), 26 | sa.Column("table_uuid", postgresql.UUID(as_uuid=True), nullable=False), 27 | sa.Column("user_uuid", postgresql.UUID(as_uuid=True), nullable=False), 28 | sa.Column( 29 | "praised", 30 | sa.TIMESTAMP(timezone=True), 31 | server_default=sa.text("CURRENT_TIMESTAMP"), 32 | nullable=False, 33 | ), 34 | sa.ForeignKeyConstraint( 35 | ["table_uuid"], 36 | ["metadata.tables.table_uuid"], 37 | name=op.f("fk_praise_table_uuid_tables"), 38 | ), 39 | sa.ForeignKeyConstraint( 40 | ["user_uuid"], 41 | ["metadata.users.user_uuid"], 42 | name=op.f("fk_praise_user_uuid_users"), 43 | ), 44 | sa.PrimaryKeyConstraint("praise_id", name=op.f("pk_praise")), 45 | sa.UniqueConstraint( 46 | "user_uuid", "table_uuid", name=op.f("uq_praise_user_uuid") 47 | ), 48 | schema="metadata", 49 | ) 50 | op.create_index( 51 | op.f("ix_metadata_praise_praised"), 52 | "praise", 53 | ["praised"], 54 | unique=False, 55 | schema="metadata", 56 | ) 57 | op.create_index( 58 | op.f("ix_metadata_praise_table_uuid"), 59 | "praise", 60 | ["table_uuid"], 61 | unique=False, 62 | schema="metadata", 63 | ) 64 | op.create_index( 65 | op.f("ix_metadata_praise_user_uuid"), 66 | "praise", 67 | ["user_uuid"], 68 | unique=False, 69 | schema="metadata", 70 | ) 71 | 72 | 73 | def downgrade(): 74 | op.drop_index( 75 | op.f("ix_metadata_praise_user_uuid"), table_name="praise", schema="metadata" 76 | ) 77 | op.drop_index( 78 | op.f("ix_metadata_praise_table_uuid"), table_name="praise", schema="metadata" 79 | ) 80 | op.drop_index( 81 | op.f("ix_metadata_praise_praised"), table_name="praise", schema="metadata" 82 | ) 83 | op.drop_table("praise", schema="metadata") 84 | -------------------------------------------------------------------------------- /migrations/versions/23e66e106c1e_correct_unique_column_table_pkey.py: -------------------------------------------------------------------------------- 1 | """Correct unique column table pkey 2 | 3 | Revision ID: 23e66e106c1e 4 | Revises: 47062cc1c8e0 5 | Create Date: 2024-05-10 10:15:27.928882+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "23e66e106c1e" 14 | down_revision = "47062cc1c8e0" 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | op.drop_constraint( 21 | "pk_unique_columns", "unique_columns", type_="primary", schema="metadata" 22 | ) 23 | op.create_primary_key( 24 | "pk_unique_columns", 25 | table_name="unique_columns", 26 | columns=["table_uuid", "column_name"], 27 | schema="metadata", 28 | ) 29 | 30 | 31 | def downgrade(): 32 | op.drop_constraint( 33 | "pk_unique_columns", "unique_columns", type_="primary", schema="metadata" 34 | ) 35 | op.create_primary_key( 36 | "pk_unique_columns", 37 | table_name="unique_columns", 38 | columns=["table_uuid"], 39 | schema="metadata", 40 | ) 41 | -------------------------------------------------------------------------------- /migrations/versions/47062cc1c8e0_add_unique_columns_table.py: -------------------------------------------------------------------------------- 1 | """Add unique columns table 2 | 3 | Revision ID: 47062cc1c8e0 4 | Revises: 5247a5a65c3c 5 | Create Date: 2024-05-03 13:18:24.362186+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | from sqlalchemy.dialects import postgresql 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "47062cc1c8e0" 15 | down_revision = "5247a5a65c3c" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.create_table( 22 | "unique_columns", 23 | sa.Column("table_uuid", postgresql.UUID(as_uuid=True), nullable=False), 24 | sa.Column("column_name", sa.String(), nullable=False), 25 | sa.ForeignKeyConstraint( 26 | ["table_uuid"], 27 | ["metadata.tables.table_uuid"], 28 | name=op.f("fk_unique_columns_table_uuid_tables"), 29 | ), 30 | sa.PrimaryKeyConstraint("table_uuid", name=op.f("pk_unique_columns")), 31 | schema="metadata", 32 | ) 33 | 34 | 35 | def downgrade(): 36 | op.drop_table("unique_columns", schema="metadata") 37 | -------------------------------------------------------------------------------- /migrations/versions/5247a5a65c3c_use_a_repo_url_column.py: -------------------------------------------------------------------------------- 1 | """Use a repo_url column 2 | 3 | Revision ID: 5247a5a65c3c 4 | Revises: 7dd1bbf902b5 5 | Create Date: 2024-05-02 07:27:51.387374+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "5247a5a65c3c" 14 | down_revision = "7dd1bbf902b5" 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | op.drop_column("github_follows", "org", schema="metadata") 21 | op.alter_column( 22 | "github_follows", "repo", new_column_name="https_repo_url", schema="metadata" 23 | ) 24 | 25 | 26 | def downgrade(): 27 | op.add_column( 28 | "github_follows", 29 | sa.Column("org", sa.VARCHAR(), autoincrement=False, nullable=False), 30 | schema="metadata", 31 | ) 32 | op.alter_column( 33 | "github_follows", "https_repo_url", new_column_name="repo", schema="metadata" 34 | ) 35 | -------------------------------------------------------------------------------- /migrations/versions/57b35f243fb0_adjust_comment_tables.py: -------------------------------------------------------------------------------- 1 | """Adjust comment tables 2 | 3 | Revision ID: 57b35f243fb0 4 | Revises: eb87fcc5d860 5 | Create Date: 2024-08-24 12:13:52.277857+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "57b35f243fb0" 15 | down_revision = "eb87fcc5d860" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.add_column( 22 | "threads", 23 | sa.Column("thread_slug", sa.String(), nullable=False), 24 | schema="metadata", 25 | ) 26 | op.create_unique_constraint( 27 | op.f("uq_threads_thread_slug"), "threads", ["thread_slug"], schema="metadata" 28 | ) 29 | op.execute("ALTER TABLE metadata.comments ALTER COLUMN comment_id DROP IDENTITY") 30 | op.drop_constraint("pk_comments", "comments", schema="metadata") 31 | op.create_primary_key( 32 | op.f("pk_comments"), "comments", ["thread_id", "comment_id"], schema="metadata" 33 | ) 34 | 35 | 36 | def downgrade(): 37 | op.drop_constraint( 38 | op.f("uq_threads_thread_slug"), "threads", schema="metadata", type_="unique" 39 | ) 40 | op.drop_column("threads", "thread_slug", schema="metadata") 41 | op.alter_column( 42 | "comments", 43 | "comment_id", 44 | existing_type=sa.BIGINT(), 45 | server_default=sa.Identity( 46 | always=False, 47 | start=1, 48 | increment=1, 49 | minvalue=1, 50 | maxvalue=9223372036854775807, 51 | cycle=False, 52 | cache=1, 53 | ), 54 | existing_nullable=False, 55 | schema="metadata", 56 | ) 57 | op.drop_constraint("pk_comments", "comments", schema="metadata") 58 | op.create_primary_key( 59 | op.f("pk_comments"), "comments", ["comment_id"], schema="metadata" 60 | ) 61 | -------------------------------------------------------------------------------- /migrations/versions/63cd716e7107_add_copies.py: -------------------------------------------------------------------------------- 1 | """Add copies 2 | 3 | Revision ID: 63cd716e7107 4 | Revises: 3c8dab82577e 5 | Create Date: 2023-09-10 22:12:50.520564+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | from sqlalchemy.dialects import postgresql 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "63cd716e7107" 15 | down_revision = "3c8dab82577e" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.create_table( 22 | "copies", 23 | sa.Column( 24 | "copy_id", sa.BigInteger(), sa.Identity(always=False), nullable=False 25 | ), 26 | sa.Column("from_uuid", postgresql.UUID(as_uuid=True), nullable=False), 27 | sa.Column("to_uuid", postgresql.UUID(as_uuid=True), nullable=False), 28 | sa.Column("created", sa.DateTime(timezone=True), nullable=False), 29 | sa.ForeignKeyConstraint( 30 | ["from_uuid"], 31 | ["metadata.tables.table_uuid"], 32 | name=op.f("fk_copies_from_uuid_tables"), 33 | ), 34 | sa.ForeignKeyConstraint( 35 | ["to_uuid"], 36 | ["metadata.tables.table_uuid"], 37 | name=op.f("fk_copies_to_uuid_tables"), 38 | ), 39 | sa.PrimaryKeyConstraint("from_uuid", "to_uuid", name=op.f("pk_copies")), 40 | schema="metadata", 41 | ) 42 | op.create_index( 43 | op.f("ix_metadata_copies_copy_id"), 44 | "copies", 45 | ["copy_id"], 46 | unique=True, 47 | schema="metadata", 48 | ) 49 | op.create_index( 50 | op.f("ix_metadata_copies_created"), 51 | "copies", 52 | ["created"], 53 | unique=False, 54 | schema="metadata", 55 | ) 56 | op.create_index( 57 | op.f("ix_metadata_copies_from_uuid"), 58 | "copies", 59 | ["from_uuid"], 60 | unique=False, 61 | schema="metadata", 62 | ) 63 | op.create_index( 64 | op.f("ix_metadata_copies_to_uuid"), 65 | "copies", 66 | ["to_uuid"], 67 | unique=False, 68 | schema="metadata", 69 | ) 70 | 71 | 72 | def downgrade(): 73 | op.drop_index( 74 | op.f("ix_metadata_copies_to_uuid"), table_name="copies", schema="metadata" 75 | ) 76 | op.drop_index( 77 | op.f("ix_metadata_copies_from_uuid"), table_name="copies", schema="metadata" 78 | ) 79 | op.drop_index( 80 | op.f("ix_metadata_copies_created"), table_name="copies", schema="metadata" 81 | ) 82 | op.drop_index( 83 | op.f("ix_metadata_copies_copy_id"), table_name="copies", schema="metadata" 84 | ) 85 | op.drop_table("copies", schema="metadata") 86 | -------------------------------------------------------------------------------- /migrations/versions/6c7715349588_rename_deleted_to_comments_and_threads.py: -------------------------------------------------------------------------------- 1 | """Rename deleted to comments and threads 2 | 3 | Revision ID: 6c7715349588 4 | Revises: 6d59d431ee77 5 | Create Date: 2024-08-27 06:49:41.937417+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "6c7715349588" 15 | down_revision = "6d59d431ee77" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.add_column( 22 | "comments", 23 | sa.Column("deleted", sa.DateTime(timezone=True), nullable=True), 24 | schema="metadata", 25 | ) 26 | op.add_column( 27 | "threads", 28 | sa.Column("deleted", sa.DateTime(timezone=True), nullable=True), 29 | schema="metadata", 30 | ) 31 | 32 | 33 | def downgrade(): 34 | op.drop_column("threads", "deleted", schema="metadata") 35 | op.drop_column("comments", "deleted", schema="metadata") 36 | -------------------------------------------------------------------------------- /migrations/versions/757b465597b4_add_email_verification_columns.py: -------------------------------------------------------------------------------- 1 | """Add email verification columns 2 | 3 | Revision ID: 757b465597b4 4 | Revises: bc116d837946 5 | Create Date: 2024-09-24 12:41:04.535437+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "757b465597b4" 15 | down_revision = "bc116d837946" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.add_column( 22 | "user_emails", 23 | sa.Column("verification_code", sa.LargeBinary(), nullable=True), 24 | schema="metadata", 25 | ) 26 | op.add_column( 27 | "user_emails", 28 | sa.Column( 29 | "verification_code_expiry", sa.DateTime(timezone=True), nullable=True 30 | ), 31 | schema="metadata", 32 | ) 33 | op.add_column( 34 | "user_emails", 35 | sa.Column( 36 | "verification_email_last_sent", sa.DateTime(timezone=True), nullable=True 37 | ), 38 | schema="metadata", 39 | ) 40 | op.add_column( 41 | "user_emails", 42 | sa.Column("verified", sa.DateTime(timezone=True), nullable=True), 43 | schema="metadata", 44 | ) 45 | 46 | 47 | def downgrade(): 48 | op.drop_column("user_emails", "verified", schema="metadata") 49 | op.drop_column("user_emails", "verification_email_last_sent", schema="metadata") 50 | op.drop_column("user_emails", "verification_code_expiry", schema="metadata") 51 | op.drop_column("user_emails", "verification_code", schema="metadata") 52 | -------------------------------------------------------------------------------- /migrations/versions/75a882d6c74e_add_backends.py: -------------------------------------------------------------------------------- 1 | """Add backends 2 | 3 | Revision ID: 75a882d6c74e 4 | Revises: cb67ce467141 5 | Create Date: 2024-04-16 10:44:10.475647+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "75a882d6c74e" 15 | down_revision = "cb67ce467141" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.create_table( 22 | "table_backends", 23 | sa.Column("backend_id", sa.SmallInteger(), autoincrement=False, nullable=False), 24 | sa.Column("backend_name", sa.String(), nullable=False), 25 | sa.PrimaryKeyConstraint("backend_id", name=op.f("pk_table_backends")), 26 | schema="metadata", 27 | ) 28 | op.execute( 29 | "INSERT INTO metadata.table_backends (backend_id, backend_name) VALUES (1, 'postgres');" 30 | ) 31 | op.add_column( 32 | "tables", 33 | sa.Column("backend_id", sa.SmallInteger(), nullable=True), 34 | schema="metadata", 35 | ) 36 | op.execute("UPDATE metadata.tables SET backend_id = 1") 37 | op.alter_column("tables", "backend_id", schema="metadata", nullable=False) 38 | op.create_foreign_key( 39 | op.f("fk_tables_backend_id_table_backends"), 40 | "tables", 41 | "table_backends", 42 | ["backend_id"], 43 | ["backend_id"], 44 | source_schema="metadata", 45 | referent_schema="metadata", 46 | ) 47 | 48 | 49 | def downgrade(): 50 | op.drop_constraint( 51 | op.f("fk_tables_backend_id_table_backends"), 52 | "tables", 53 | schema="metadata", 54 | type_="foreignkey", 55 | ) 56 | op.drop_column("tables", "backend_id", schema="metadata") 57 | op.drop_table("table_backends", schema="metadata") 58 | -------------------------------------------------------------------------------- /migrations/versions/7dd1bbf902b5_add_mailing_list_column.py: -------------------------------------------------------------------------------- 1 | """Add mailing list column 2 | 3 | Revision ID: 7dd1bbf902b5 4 | Revises: 98e5779863fd 5 | Create Date: 2024-04-25 13:28:38.648512+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "7dd1bbf902b5" 15 | down_revision = "98e5779863fd" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.add_column( 22 | "users", 23 | sa.Column("mailing_list", sa.Boolean(), nullable=True), 24 | schema="metadata", 25 | ) 26 | op.execute("update metadata.users set mailing_list = false") 27 | op.alter_column("users", "mailing_list", nullable=False, schema="metadata") 28 | 29 | 30 | def downgrade(): 31 | op.drop_column("users", "mailing_list", schema="metadata") 32 | -------------------------------------------------------------------------------- /migrations/versions/8951426b65be_add_data_licences.py: -------------------------------------------------------------------------------- 1 | """Add data licences 2 | 3 | Revision ID: 8951426b65be 4 | Revises: a0f88c5755b3 5 | Create Date: 2022-04-21 21:47:28.080908+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | from sqlalchemy.sql import table, column 12 | 13 | 14 | # revision identifiers, used by Alembic. 15 | revision = "8951426b65be" 16 | down_revision = "a0f88c5755b3" 17 | branch_labels = None 18 | depends_on = None 19 | 20 | data_licence_table = table( 21 | "data_licences", 22 | column("licence_id", sa.SmallInteger), 23 | column("licence_name", sa.String), 24 | schema="metadata", 25 | ) 26 | 27 | data_licences = [ 28 | (0, "UNKNOWN"), 29 | (1, "ALL_RIGHTS_RESERVED"), 30 | (2, "PDDL"), 31 | (3, "ODC_BY"), 32 | (4, "ODBL"), 33 | (5, "OGL"), 34 | ] 35 | 36 | 37 | def upgrade(): 38 | op.bulk_insert( 39 | data_licence_table, 40 | [{"licence_id": id, "licence_name": name} for id, name in data_licences], 41 | ) 42 | 43 | 44 | def downgrade(): 45 | op.get_bind().execute(sa.text("delete from metadata.data_licences")) 46 | -------------------------------------------------------------------------------- /migrations/versions/98e5779863fd_add_github_follows.py: -------------------------------------------------------------------------------- 1 | """Add github follows 2 | 3 | Revision ID: 98e5779863fd 4 | Revises: 75a882d6c74e 5 | Create Date: 2024-04-20 23:33:29.522264+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | from sqlalchemy.dialects import postgresql 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "98e5779863fd" 15 | down_revision = "75a882d6c74e" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.create_table( 22 | "github_follows", 23 | sa.Column("table_uuid", postgresql.UUID(as_uuid=True), nullable=False), 24 | sa.Column("last_sha", postgresql.BYTEA(), nullable=False), 25 | sa.Column("last_modified", sa.DateTime(timezone=True), nullable=False), 26 | sa.Column("org", sa.String(), nullable=False), 27 | sa.Column("repo", sa.String(), nullable=False), 28 | sa.Column("branch", sa.String(), nullable=False), 29 | sa.Column("path", sa.String(), nullable=False), 30 | sa.ForeignKeyConstraint( 31 | ["table_uuid"], 32 | ["metadata.tables.table_uuid"], 33 | name=op.f("fk_github_follows_table_uuid_tables"), 34 | ), 35 | sa.PrimaryKeyConstraint("table_uuid", name=op.f("pk_github_follows")), 36 | schema="metadata", 37 | ) 38 | 39 | 40 | def downgrade(): 41 | op.drop_table("github_follows", schema="metadata") 42 | -------------------------------------------------------------------------------- /migrations/versions/bc116d837946_add_table_licences.py: -------------------------------------------------------------------------------- 1 | """Add table licences 2 | 3 | Revision ID: bc116d837946 4 | Revises: cb79e639ef74 5 | Create Date: 2024-09-17 12:24:51.691549+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "bc116d837946" 15 | down_revision = "cb79e639ef74" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.create_table( 22 | "table_licences", 23 | sa.Column("table_uuid", sa.UUID(), nullable=False), 24 | sa.Column("licence_id", sa.SmallInteger(), nullable=False), 25 | sa.ForeignKeyConstraint( 26 | ["licence_id"], 27 | ["metadata.licences.licence_id"], 28 | name=op.f("fk_table_licences_licence_id_licences"), 29 | ), 30 | sa.ForeignKeyConstraint( 31 | ["table_uuid"], 32 | ["metadata.tables.table_uuid"], 33 | name=op.f("fk_table_licences_table_uuid_tables"), 34 | ), 35 | sa.PrimaryKeyConstraint( 36 | "table_uuid", "licence_id", name=op.f("pk_table_licences") 37 | ), 38 | sa.UniqueConstraint("table_uuid", name=op.f("uq_table_licences_table_uuid")), 39 | schema="metadata", 40 | ) 41 | op.drop_constraint( 42 | "fk_tables_licence_id_data_licences", 43 | "tables", 44 | type_="foreignkey", 45 | schema="metadata", 46 | ) 47 | op.alter_column( 48 | "tables", 49 | "licence_id", 50 | existing_type=sa.SMALLINT(), 51 | nullable=True, 52 | schema="metadata", 53 | ) 54 | 55 | 56 | def downgrade(): 57 | op.drop_table("table_licences", schema="metadata") 58 | op.alter_column( 59 | "tables", 60 | "licence_id", 61 | existing_type=sa.SMALLINT(), 62 | nullable=False, 63 | schema="metadata", 64 | ) 65 | op.create_foreign_key( 66 | "fk_tables_licence_id_data_licences", 67 | "tables", 68 | "data_licences", 69 | ["licence_id"], 70 | ["licence_id"], 71 | source_schema="metadata", 72 | referent_schema="metadata", 73 | ) 74 | -------------------------------------------------------------------------------- /migrations/versions/cb67ce467141_check_constraint_blank_emails.py: -------------------------------------------------------------------------------- 1 | """check constraint blank emails 2 | 3 | Revision ID: cb67ce467141 4 | Revises: 63cd716e7107 5 | Create Date: 2023-10-08 14:33:09.338971+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "cb67ce467141" 14 | down_revision = "63cd716e7107" 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | op.execute("DELETE FROM metadata.user_emails WHERE email_address = ''") 21 | op.create_check_constraint( 22 | op.f("ck_user_emails_email_address_not_blank"), 23 | "user_emails", 24 | "email_address ~ '@'", 25 | schema="metadata", 26 | ) 27 | 28 | 29 | def downgrade(): 30 | op.drop_constraint( 31 | op.f("ck_user_emails_email_address_not_blank"), 32 | "user_emails", 33 | "check", 34 | schema="metadata", 35 | ) 36 | -------------------------------------------------------------------------------- /migrations/versions/cb79e639ef74_created_licences_table.py: -------------------------------------------------------------------------------- 1 | """Created licences table 2 | 3 | Revision ID: cb79e639ef74 4 | Revises: 1dfc9b3a690e 5 | Create Date: 2024-09-13 11:21:42.681121+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "cb79e639ef74" 15 | down_revision = "1dfc9b3a690e" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.create_table( 22 | "licences", 23 | sa.Column("licence_id", sa.SmallInteger(), autoincrement=True, nullable=False), 24 | sa.Column("spdx_id", sa.String(), nullable=False), 25 | sa.Column("licence_name", sa.String(), nullable=False), 26 | sa.PrimaryKeyConstraint("licence_id", name=op.f("pk_licences")), 27 | sa.UniqueConstraint("spdx_id", name=op.f("uq_licences_spdx_id")), 28 | schema="metadata", 29 | ) 30 | 31 | 32 | def downgrade(): 33 | op.drop_table("licences", schema="metadata") 34 | -------------------------------------------------------------------------------- /migrations/versions/cf3ddc8fb918_add_last_changed_to_table.py: -------------------------------------------------------------------------------- 1 | """Add last_changed to table 2 | 3 | Revision ID: cf3ddc8fb918 4 | Revises: ef0fa56f3fc7 5 | Create Date: 2023-01-14 10:14:01.637543+00:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | # revision identifiers, used by Alembic. 13 | revision = "cf3ddc8fb918" 14 | down_revision = "ef0fa56f3fc7" 15 | branch_labels = None 16 | depends_on = None 17 | 18 | 19 | def upgrade(): 20 | op.add_column( 21 | "tables", 22 | sa.Column("last_changed", sa.DateTime(timezone=True)), 23 | schema="metadata", 24 | ) 25 | op.execute("UPDATE metadata.tables SET last_changed = created") 26 | op.alter_column("tables", "last_changed", nullable=False, schema="metadata") 27 | 28 | 29 | def downgrade(): 30 | op.drop_column("tables", "last_changed", schema="metadata") 31 | -------------------------------------------------------------------------------- /migrations/versions/eb87fcc5d860_add_user_bio.py: -------------------------------------------------------------------------------- 1 | """Add user bio 2 | 3 | Revision ID: eb87fcc5d860 4 | Revises: 173e920c9600 5 | Create Date: 2024-07-29 13:12:37.007565+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | from sqlalchemy.dialects import postgresql 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "eb87fcc5d860" 15 | down_revision = "173e920c9600" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.create_table( 22 | "user_bios", 23 | sa.Column("user_uuid", postgresql.UUID(as_uuid=True), nullable=False), 24 | sa.Column("user_bio_markdown", sa.String(length=10000), nullable=False), 25 | sa.ForeignKeyConstraint( 26 | ["user_uuid"], 27 | ["metadata.users.user_uuid"], 28 | name=op.f("fk_user_bios_user_uuid_users"), 29 | ), 30 | sa.PrimaryKeyConstraint("user_uuid", name=op.f("pk_user_bios")), 31 | schema="metadata", 32 | ) 33 | 34 | 35 | def downgrade(): 36 | op.drop_table("user_bios", schema="metadata") 37 | -------------------------------------------------------------------------------- /migrations/versions/ef0fa56f3fc7_add_prohibited_usernames_table.py: -------------------------------------------------------------------------------- 1 | """Add prohibited usernames table 2 | 3 | Revision ID: ef0fa56f3fc7 4 | Revises: 1ec343a3a7bd 5 | Create Date: 2022-05-11 21:03:11.069249+01:00 6 | 7 | """ 8 | 9 | from alembic import op 10 | import sqlalchemy as sa 11 | 12 | 13 | # revision identifiers, used by Alembic. 14 | revision = "ef0fa56f3fc7" 15 | down_revision = "1ec343a3a7bd" 16 | branch_labels = None 17 | depends_on = None 18 | 19 | 20 | def upgrade(): 21 | op.create_table( 22 | "prohibited_usernames", 23 | sa.Column("username", sa.String(), nullable=False), 24 | sa.PrimaryKeyConstraint("username", name=op.f("pk_prohibited_usernames")), 25 | schema="metadata", 26 | ) 27 | 28 | 29 | def downgrade(): 30 | op.drop_table("prohibited_usernames", schema="metadata") 31 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.9 3 | check_untyped_defs = True 4 | no_implicit_optional = True 5 | warn_unused_configs = True 6 | warn_unused_ignores = True 7 | exclude = build 8 | 9 | [mypy-flask_cors.*] 10 | ignore_missing_imports = True 11 | 12 | [mypy-passlib.context] 13 | ignore_missing_imports = True 14 | 15 | [mypy-flask_babel.*] 16 | ignore_missing_imports = True 17 | 18 | [mypy-feedgen.*] 19 | ignore_missing_imports = True 20 | 21 | [mypy-lxml.*] 22 | ignore_missing_imports = True 23 | 24 | [mypy-feedparser.*] 25 | ignore_missing_imports = True 26 | 27 | [mypy-dateutil.*] 28 | ignore_missing_imports = True 29 | 30 | [mypy-giturlparse] 31 | ignore_missing_imports = True 32 | 33 | [mypy-celery.beat.*] 34 | ignore_missing_imports = True 35 | 36 | [mypy-pandas.*] 37 | ignore_missing_imports = True 38 | 39 | [mypy-systemd.journal] 40 | ignore_missing_imports = True 41 | 42 | [mypy-pgcopy] 43 | ignore_missing_imports = True 44 | 45 | [mypy-xlsxwriter] 46 | ignore_missing_imports = True 47 | 48 | [mypy-pyarrow.*] 49 | ignore_missing_imports = True 50 | 51 | [mypy-exceptiongroup.*] 52 | ignore_missing_imports = True 53 | 54 | [mypy-user_agents] 55 | ignore_missing_imports = True -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "csvbase" 7 | dynamic = ["version", "dependencies", "optional-dependencies"] 8 | 9 | [project.scripts] 10 | csvbase-make-tables = "csvbase.cli:make_tables" 11 | csvbase-make-blog-table = "csvbase.web.blog.cli:make_blog_table" 12 | csvbase-load-prohibited-usernames = "csvbase.cli:load_prohibited_usernames" 13 | csvbase-config = "csvbase.cli:config_cli" 14 | csvbase-update-stripe-subscriptions = "csvbase.cli:update_stripe_subscriptions" 15 | csvbase-repcache-populate = "csvbase.cli:repcache_populate" 16 | csvbase-create-thread = "csvbase.cli:create_thread" 17 | csvbase-populate-licences = "csvbase.cli:populate_licences" 18 | 19 | [tool.setuptools.packages.find] 20 | exclude = ["tests", "migrations"] 21 | 22 | [tool.setuptools.dynamic] 23 | version = {file = "csvbase/VERSION"} 24 | dependencies = {file = ["requirements.txt"]} 25 | optional-dependencies.tests = {file = ["requirements-test.txt"]} -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = tests -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- 1 | aiosmtpd~=1.4.6 2 | black~=24.3.0 3 | bpython~=0.24 4 | cssselect~=1.2.0 5 | feedparser~=6.0.11 6 | mypy~=1.9.0 7 | openpyxl~=3.1.2 8 | pandas 9 | pygments~=2.17.1 10 | pytest-flask~=1.3.0 11 | pytest~=8.1.1 12 | requests-mock~=1.12.1 13 | ruff~=0.8.0 14 | types-bleach==5.0.2 15 | types-cryptography 16 | types-setuptools==65.1.0 17 | types-toml==0.10.8.5 18 | types-requests~=2.32.0 19 | celery-types 20 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | alembic[tz]==1.13.1 2 | argon2-cffi==23.1.0 3 | bleach==6.2.0 4 | charset-normalizer==3.4.0 5 | celery[redis]==5.4.0 6 | click==8.1.7 7 | ddtrace==2.8.1 8 | giturlparse==0.12.0 9 | importlib_resources==6.4.0 10 | python-dateutil==2.9.0 11 | feedgen==1.0.0 12 | flask-babel==4.0.0 13 | flask-cors==5.0.0 14 | flask-sqlalchemy==3.1.1 15 | flask==3.0.3 16 | gunicorn==22.0.0 17 | humanize==4.10.0 18 | inflect==7.2.0 19 | jinja2-humanize-extension==0.4.0 20 | marko[codehilite]==2.0.3 21 | passlib==1.7.4 22 | pgcopy==1.6.0 23 | platformdirs==4.3.6 24 | psycopg2==2.9.10 25 | pyarrow==17.0.0 26 | pydantic==2.7.0 27 | requests==2.32.3 28 | sentry-sdk[flask]==1.45.0 29 | sqlalchemy==2.0.32 30 | stripe==10.7.0 31 | systemd-python==235; sys_platform == "linux" 32 | toml==0.10.2 33 | typing-extensions==4.10.0 34 | user-agents==2.2.0 35 | werkzeug==3.0.6 36 | xlsxwriter==3.2.0 37 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/email_utils.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import typing 3 | from logging import getLogger 4 | import time 5 | from email import message_from_bytes 6 | from email.message import EmailMessage 7 | import email.policy 8 | from unittest.mock import patch 9 | import contextlib 10 | from typing import Generator 11 | 12 | from csvbase.email import get_smtp_host_port 13 | from csvbase.config import get_config 14 | 15 | from aiosmtpd.controller import Controller 16 | from aiosmtpd.smtp import SMTP, Session, Envelope 17 | 18 | logger = getLogger(__name__) 19 | 20 | 21 | class StoringHandler: 22 | """A handler for aiosmtp which just stores emails (as stdlib EmailMessage 23 | objects) in a instance dict. 24 | 25 | """ 26 | 27 | def __init__(self): 28 | self.received: dict[str, EmailMessage] = {} 29 | self.sleep_duration = 0.01 30 | 31 | async def handle_DATA( 32 | self, server: SMTP, session: Session, envelope: Envelope 33 | ) -> str: 34 | message = typing.cast( 35 | EmailMessage, 36 | message_from_bytes( 37 | envelope.original_content or b"", 38 | _class=EmailMessage, 39 | policy=email.policy.default, 40 | ), 41 | ) 42 | # the message id needs to be stripped here, due to a bug in the stdlib 43 | # where whitespace is being left in front of long fields when they are 44 | # unwrapped 45 | # https://github.com/python/cpython/issues/124452 46 | self.received[message["Message-ID"].strip()] = message 47 | logger.info("Received message: '%s'", message) 48 | return "250 Message accepted for delivery" 49 | 50 | def join(self, expected: int = 1) -> None: 51 | """Wait until at least the expected number of emails have arrived (and been parsed)""" 52 | for _ in range(10): 53 | if len(self.received) == expected: 54 | logger.info("Received {self.recieved} emails") 55 | return None 56 | logger.warning( 57 | "Not enough email has arrived, sleeping for %f", self.sleep_duration 58 | ) 59 | time.sleep(self.sleep_duration) 60 | else: 61 | raise RuntimeError("no email was delivered") 62 | 63 | 64 | def get_free_local_port() -> int: 65 | """Returns a (currently) free local port. 66 | 67 | Not 100% effective, but very effective. Used to prevent problems with the 68 | same port being used twice in tests. 69 | 70 | """ 71 | with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: 72 | s.bind(('', 0)) 73 | s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 74 | return s.getsockname()[1] 75 | 76 | @contextlib.contextmanager 77 | def randomise_smtp_port() -> Generator[None, None, None]: 78 | free_local_port = get_free_local_port() 79 | with patch.object(get_config(), "smtp_host", f"localhost:{free_local_port}"): 80 | yield 81 | 82 | 83 | if __name__ == "__main__": 84 | _, port = get_smtp_host_port() 85 | controller = Controller(StoringHandler(), port=port) 86 | controller.start() 87 | input(f"SMTP server running on port {port}. Press Return to stop server and exit.") 88 | controller.stop() 89 | -------------------------------------------------------------------------------- /tests/test-data/.gitignore: -------------------------------------------------------------------------------- 1 | sitemap.xsd -------------------------------------------------------------------------------- /tests/test-data/WID.csv: -------------------------------------------------------------------------------- 1 | Country,Country Code,Short name of variable,Type(s) of variable,Variable category,Variable level(s),Years,Age groups,Population categories,WID.world code,Percentiles,Years available for these percentiles,Simple description of variable,Technical description of variable,Method used for computation,Source,data_quality 2 | Afghanistan,AF,National income,"Average, Macroeconomic variable",Income macro variable,"1, 2",1950-2017,Aggregate on all ages,Individuals,"anninc992i, mnninc999i",Macro variable (not applicable),Macro variable (not applicable),"National income aims to measure the total income available to the residents of a given country. It is equal to the gross domestic product (the total value of goods and services produced on the territory of a given country during a given year), minus fixed capital used in production processes (e.g. replacement of obsolete machines or maintenance of roads) plus the net foreign income earned by residents in the rest of the world.// National income has many limitations. However it is the only income concept that has an internationally agreed definition (established by the United Nations System of National Accounts, see SNA 2008). So we use it as our reference concept (with tax havens correction). To be improved.The national economy - in the national accounts sense - includes all domestic sectors, i.e. all entities that are resident of a given country (in the sense of their economic activity), whether they belong to the private sector, the corporate sector, the governement sector.",[National income]=[Net domestic product]+[Net foreign income],"From 1950 to 1978, we computed net foreign income based on its share in GDP in 1979.","WID.world computations using: [URL][URL_LINK]http://data.imf.org/BOP[/URL_LINK][URL_TEXT]IMF Balance of Payments Statistics[/URL_TEXT][/URL]; [URL][URL_LINK]http://unstats.un.org/unsd/snaama/Introduction.asp[/URL_LINK][URL_TEXT]United Nations National Accounts Main Aggregates Database[/URL_TEXT][/URL]; [URL][URL_LINK]http://www.ggdc.net/maddison/other_books/Contours_World_Economy.pdf[/URL_LINK][URL_TEXT]Maddison, Angus (2007). Contours of the World Economy 1-2030 AD.[/URL_TEXT][/URL]; [URL][URL_LINK]https://www.imf.org/external/pubs/ft/weo/2018/01/weodata/index.aspx/[/URL_LINK][URL_TEXT]IMF World Economic Outlook (04/2018)[/URL_TEXT][/URL]; value from the next year as a % of GDP", 3 | Alabama,US-AL,Fiscal income,Average,Income distributed variable,1,1917-2015,Aggregate on population aged over 20,Tax unit,afiinc992t,"p0p100, p0p90, p0p99, p90p100, p90p95, p95p100, p95p99, p99.5p100, p99.5p99.9, p99.99p100, p99.9p100, p99.9p99.99, p99p100, p99p99.5",Percentiles available for some years only,"Fiscal income is defined as the sum of all income items reported on income tax returns, before any deduction. It includes labour income, capital income and mixed income. The concept of fiscal income varies with national tax legislations, so in order to make international comparisons it is preferable to use the concept of national income.",[Fiscal income ]=[Fiscal labour income [total fiscal income ranking]]+[Fiscal capital income [total fiscal income ranking]],,"Frank, Sommeiller, Price & Saez (2015)", -------------------------------------------------------------------------------- /tests/test-data/abc123.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | "one",2,3.0 -------------------------------------------------------------------------------- /tests/test-data/blank-headers.csv: -------------------------------------------------------------------------------- 1 | ,a,,b,c, 2 | 1,2,3,4,5,6 -------------------------------------------------------------------------------- /tests/test-data/empty.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/tests/test-data/empty.csv -------------------------------------------------------------------------------- /tests/test-data/headers-only.csv: -------------------------------------------------------------------------------- 1 | a,b,c -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import toml 3 | 4 | from csvbase.config import load_config, Config 5 | 6 | 7 | def test_config_file_not_exist(tmpdir): 8 | config = load_config(Path(tmpdir / "config.toml")) 9 | assert config == Config( 10 | db_url="postgresql:///csvbase", 11 | environment="local", 12 | blog_ref=None, 13 | secret_key=None, 14 | sentry_dsn=None, 15 | stripe_api_key=None, 16 | stripe_price_id=None, 17 | enable_datadog=False, 18 | x_accel_redirect=False, 19 | turnstile_site_key=None, 20 | turnstile_secret_key=None, 21 | smtp_host=None, 22 | memcache_server=None, 23 | ) 24 | 25 | 26 | def test_config_file_basic(tmpdir): 27 | config = {"db_url": "postgresql:///csvboth", "environment": "test"} 28 | config_file = Path(tmpdir / "config.toml") 29 | with open(config_file, "w") as config_f: 30 | toml.dump(config, config_f) 31 | 32 | assert load_config(config_file) == Config( 33 | db_url="postgresql:///csvboth", 34 | environment="test", 35 | blog_ref=None, 36 | secret_key=None, 37 | sentry_dsn=None, 38 | stripe_api_key=None, 39 | stripe_price_id=None, 40 | enable_datadog=False, 41 | x_accel_redirect=False, 42 | turnstile_site_key=None, 43 | turnstile_secret_key=None, 44 | smtp_host=None, 45 | memcache_server=None, 46 | ) 47 | -------------------------------------------------------------------------------- /tests/test_convert.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import io 3 | from functools import partial 4 | 5 | from werkzeug.datastructures import FileStorage 6 | import pandas as pd 7 | from pandas.testing import assert_frame_equal 8 | import pytest 9 | 10 | from csvbase.streams import rewind 11 | from csvbase.value_objs import ContentType 12 | 13 | SAMPLE_DATAFRAME = pd.DataFrame( 14 | {"id": [100, 200, 300], "value": ["a", "b", "c"]} 15 | ).set_index("id") 16 | 17 | 18 | @pytest.mark.parametrize( 19 | "from_format, to_format", 20 | [ 21 | (ContentType.CSV, ContentType.CSV), 22 | (ContentType.CSV, ContentType.PARQUET), 23 | (ContentType.CSV, ContentType.XLSX), 24 | (ContentType.CSV, ContentType.JSON_LINES), 25 | (ContentType.PARQUET, ContentType.CSV), 26 | ], 27 | ) 28 | def test_convert__a_to_b(client, test_user, from_format, to_format): 29 | methods = { 30 | ContentType.CSV: SAMPLE_DATAFRAME.to_csv, 31 | ContentType.PARQUET: SAMPLE_DATAFRAME.to_parquet, 32 | } 33 | reverse_methods = { 34 | ContentType.CSV: pd.read_csv, 35 | ContentType.PARQUET: pd.read_parquet, 36 | ContentType.XLSX: pd.read_excel, 37 | ContentType.JSON_LINES: partial(pd.read_json, lines=True), 38 | } 39 | 40 | get_resp = client.get("/convert") 41 | assert get_resp.status_code == 200 42 | 43 | buf = io.BytesIO() 44 | with rewind(buf): 45 | methods[from_format](buf) 46 | 47 | filename = Path("test").with_suffix("." + from_format.file_extension()) 48 | 49 | post_resp = client.post( 50 | "/convert", 51 | data={ 52 | "from-format": from_format.value, 53 | "to-format": to_format.value, 54 | "file": (FileStorage(buf, str(filename))), 55 | }, 56 | content_type="multipart/form-data", 57 | ) 58 | 59 | assert post_resp.status_code == 200 60 | expected_filename = filename.with_suffix("." + to_format.file_extension()) 61 | assert ( 62 | post_resp.headers["Content-Disposition"] 63 | == f'attachment; filename="{expected_filename}"' 64 | ) 65 | 66 | actual_dataframe = reverse_methods[to_format](io.BytesIO(post_resp.data)).set_index( 67 | "id" 68 | ) 69 | assert_frame_equal(SAMPLE_DATAFRAME, actual_dataframe) 70 | 71 | 72 | @pytest.mark.xfail(reason="not implemented") 73 | def test_convert__unreadable_file(): 74 | assert False 75 | 76 | 77 | @pytest.mark.xfail(reason="not implemented") 78 | def test_convert__unknown_content_type(): 79 | assert False 80 | -------------------------------------------------------------------------------- /tests/test_cors.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import ANY 2 | 3 | from flask import url_for 4 | 5 | import pytest 6 | 7 | 8 | @pytest.mark.parametrize( 9 | "method, endpoint, endpoint_args", 10 | [ 11 | ("GET", "csvbase.table_view", {"username": "test", "table_name": "test"}), 12 | ("POST", "csvbase.create_row", {"username": "test", "table_name": "test"}), 13 | ( 14 | "PUT", 15 | "csvbase.row_view", 16 | {"username": "test", "table_name": "test", "row_id": 1}, 17 | ), 18 | ( 19 | "GET", 20 | "csvbase.row_view", 21 | {"username": "test", "table_name": "test", "row_id": 1}, 22 | ), 23 | ( 24 | "DELETE", 25 | "csvbase.row_view", 26 | {"username": "test", "table_name": "test", "row_id": 1}, 27 | ), 28 | ("PUT", "csvbase.table_view", {"username": "test", "table_name": "test"}), 29 | ], 30 | ) 31 | def test_cors__allowed_urls(client, method, endpoint, endpoint_args): 32 | url = url_for(endpoint, **endpoint_args) 33 | resp = client.options(url, headers={"Access-Control-Request-Method": method}) 34 | assert resp.status_code == 200 35 | access_control_headers = { 36 | k: v for k, v in resp.headers.items() if k.startswith("Access-Control-") 37 | } 38 | 39 | assert access_control_headers == { 40 | "Access-Control-Allow-Origin": "*", 41 | "Access-Control-Max-Age": str(8 * 60 * 60), 42 | "Access-Control-Allow-Methods": ANY, 43 | } 44 | assert method in access_control_headers["Access-Control-Allow-Methods"] 45 | -------------------------------------------------------------------------------- /tests/test_email.py: -------------------------------------------------------------------------------- 1 | from email.message import EmailMessage 2 | 3 | import pytest 4 | 5 | from csvbase import email 6 | 7 | from . import utils 8 | 9 | 10 | def make_message() -> EmailMessage: 11 | message = EmailMessage() 12 | message["From"] = "sender@example.com" 13 | message["To"] = "receiver@example.come" 14 | message["Subject"] = "Test" 15 | message.set_content("Test") 16 | 17 | return message 18 | 19 | 20 | def test_email__send_email(mock_smtpd): 21 | message_id = f"" 22 | message = make_message() 23 | message.add_header("Message-ID", message_id) 24 | 25 | email.send(message) 26 | mock_smtpd.join() 27 | 28 | assert message_id in mock_smtpd.received 29 | 30 | 31 | def test_email__send_email_without_message_id(mock_smtpd): 32 | message = make_message() 33 | with pytest.raises(RuntimeError): 34 | email.send(message) 35 | 36 | 37 | def test_email__outbox(mock_smtpd): 38 | message_ids = [] 39 | with email.make_smtp_sesh() as smtp_sesh: 40 | outbox = email.Outbox(smtp_sesh) 41 | for _ in range(5): 42 | message = make_message() 43 | message_id = f"" 44 | message.add_header("Message-ID", message_id) 45 | outbox.enqueue(message) 46 | message_ids.append(message_id) 47 | assert len(mock_smtpd.received) == 0 48 | outbox.flush() 49 | mock_smtpd.join(5) 50 | assert list(mock_smtpd.received.keys()) == message_ids 51 | -------------------------------------------------------------------------------- /tests/test_error_pages.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from .utils import random_string 4 | 5 | 6 | @pytest.mark.parametrize( 7 | "verb, url, expected_status_code", 8 | [ 9 | pytest.param("get", f"/{random_string()}", 404, id="non existent user"), 10 | pytest.param("get", "/user/table/madeup", 404, id="non existent page"), 11 | pytest.param("delete", "/", 405, id="not a supported verb"), 12 | ], 13 | ) 14 | @pytest.mark.parametrize( 15 | "accept", 16 | [ 17 | pytest.param("text/html", id="browser"), 18 | pytest.param("*/*", id="curl"), 19 | ], 20 | ) 21 | def test_404_error_pages(client, verb, url, expected_status_code, accept): 22 | if expected_status_code == 404: 23 | expected_message = "does not exist" 24 | else: 25 | expected_message = "that verb is not allowed" 26 | 27 | resp = client.open(url, method=verb, headers={"Accept": accept}) 28 | 29 | assert resp.status_code == expected_status_code 30 | if accept == "text/html": 31 | assert ( 32 | expected_message in resp.text 33 | ) # checking that our error page was generated 34 | else: 35 | assert expected_message in resp.json["error"] 36 | -------------------------------------------------------------------------------- /tests/test_faq.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | import importlib_resources 3 | 4 | from .utils import random_string 5 | 6 | import pytest 7 | 8 | slugs = [ 9 | entry_fp.stem 10 | for entry_fp in importlib_resources.files("csvbase.web.faq.entries").iterdir() 11 | if entry_fp.suffix == ".md" 12 | ] 13 | 14 | expected_max_age = int(timedelta(days=1).total_seconds()) 15 | 16 | 17 | def test_faq_index(client): 18 | resp = client.get("/faq") 19 | assert resp.status_code == 200 20 | assert resp.cache_control.max_age == expected_max_age 21 | 22 | 23 | @pytest.mark.parametrize("slug", slugs) 24 | def test_faq_entries__all(client, slug): 25 | resp = client.get(f"/faq/{slug}") 26 | assert resp.status_code == 200 27 | assert resp.cache_control.max_age == expected_max_age 28 | 29 | 30 | def test_faq_entries__missing(client): 31 | resp = client.get(f"/faq/{random_string()}") 32 | assert resp.status_code == 404 33 | -------------------------------------------------------------------------------- /tests/test_http_cache_headers.py: -------------------------------------------------------------------------------- 1 | def test_cache_headers_for_landing(client): 2 | resp = client.get("/") 3 | assert resp.headers["Cache-Control"] == "no-store" 4 | -------------------------------------------------------------------------------- /tests/test_json.py: -------------------------------------------------------------------------------- 1 | from datetime import date 2 | import pytest 3 | 4 | from csvbase import exc 5 | from csvbase.value_objs import Column, ColumnType 6 | from csvbase.json import value_to_json, json_to_value 7 | 8 | text_column = Column("some_text", ColumnType.TEXT) 9 | integer_column = Column("an_int", ColumnType.INTEGER) 10 | float_column = Column("a_float", ColumnType.FLOAT) 11 | boolean_column = Column("a_boolean", ColumnType.BOOLEAN) 12 | date_column = Column("a_date", ColumnType.DATE) 13 | 14 | 15 | @pytest.mark.parametrize( 16 | "value, expected_json", 17 | [ 18 | (date(2018, 1, 3), "2018-01-03"), 19 | (None, None), 20 | ], 21 | ) 22 | def test_value_to_json(value, expected_json): 23 | actual_json = value_to_json(value) 24 | assert expected_json == actual_json 25 | 26 | 27 | @pytest.mark.parametrize( 28 | "column, json_value, expected_value", 29 | [ 30 | (text_column, "some text", "some text"), 31 | (integer_column, 1.0, 1.0), 32 | (integer_column, 1, 1), 33 | (float_column, 1.0, 1.0), 34 | (float_column, 1, 1.0), 35 | (boolean_column, True, True), 36 | (boolean_column, False, False), 37 | (date_column, "2018-01-03", date(2018, 1, 3)), 38 | ], 39 | ) 40 | def test_json_to_value(column, json_value, expected_value): 41 | actual_value = json_to_value(column.type_, json_value) 42 | assert expected_value == actual_value 43 | 44 | 45 | @pytest.mark.parametrize( 46 | "column", [text_column, integer_column, float_column, boolean_column, date_column] 47 | ) 48 | def test_json_to_value_with_nulls(column): 49 | assert json_to_value(column.type_, None) is None 50 | 51 | 52 | @pytest.mark.parametrize( 53 | "column, json_value", 54 | [ 55 | (text_column, 1), 56 | (integer_column, "2018-01-02"), 57 | (boolean_column, "nope"), 58 | (date_column, "2018/01/03"), 59 | ], 60 | ) 61 | def test_json_to_value_with_wrong_type(column, json_value): 62 | with pytest.raises(exc.UnconvertableValueException): 63 | json_to_value(column.type_, json_value) 64 | -------------------------------------------------------------------------------- /tests/test_markdown.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | from csvbase import markdown 3 | 4 | 5 | def test_quote(): 6 | input_string = dedent( 7 | """\ 8 | hello 9 | 10 | this is 11 | a test 12 | 13 | of quoting""" 14 | ) 15 | 16 | expected = dedent( 17 | """\ 18 | > hello 19 | > 20 | > this is 21 | > a test 22 | > 23 | > of quoting""" 24 | ) 25 | 26 | actual = markdown.quote_markdown(input_string) 27 | 28 | # doing this makes this file less sensitive to trailing newlines 29 | for actual_line, expected_line in zip(actual.splitlines(), expected.splitlines()): 30 | assert actual_line.strip() == expected_line.strip() 31 | 32 | 33 | def text_extract_references__no_reference(): 34 | assert markdown.extract_references("Hello, World!") == [] 35 | 36 | 37 | def test_extract_references__simple_reference(): 38 | assert markdown.extract_references("Yeah, so about #8 - I think that") == ["#8"] 39 | 40 | 41 | def test_extract_references__multiple_references(): 42 | inp_markdown = """#8 43 | Yes you're right but what about #9 44 | """ 45 | 46 | assert markdown.extract_references(inp_markdown) == ["#8", "#9"] 47 | -------------------------------------------------------------------------------- /tests/test_praise.py: -------------------------------------------------------------------------------- 1 | from csvbase.web.main.bp import get_praise_id_if_exists 2 | from .utils import current_user 3 | 4 | 5 | def test_praise__praise(sesh, client, test_user, ten_rows): 6 | with current_user(test_user): 7 | resp = client.post(f"/{test_user.username}/{ten_rows.table_name}/praise") 8 | assert resp.status_code == 302 9 | assert ( 10 | resp.headers["Location"] == f"/{test_user.username}/{ten_rows.table_name}" 11 | ) 12 | 13 | praise_id = get_praise_id_if_exists(sesh, ten_rows) 14 | assert praise_id is not None 15 | 16 | 17 | def test_praise__unpraise(sesh, client, test_user, ten_rows): 18 | with current_user(test_user): 19 | client.post(f"/{test_user.username}/{ten_rows.table_name}/praise") 20 | praise_id = get_praise_id_if_exists(sesh, ten_rows) 21 | 22 | resp = client.post( 23 | f"/{test_user.username}/{ten_rows.table_name}/praise", 24 | data={"praise-id": praise_id}, 25 | ) 26 | assert resp.status_code == 302 27 | assert ( 28 | resp.headers["Location"] == f"/{test_user.username}/{ten_rows.table_name}" 29 | ) 30 | 31 | 32 | def test_praise__not_signed_in(client, test_user, ten_rows): 33 | resp = client.post(f"/{test_user.username}/{ten_rows.table_name}/praise") 34 | assert resp.status_code == 401 35 | -------------------------------------------------------------------------------- /tests/test_quota.py: -------------------------------------------------------------------------------- 1 | """Tests for usage and quota functionality""" 2 | 3 | from csvbase.userdata import PGUserdataAdapter 4 | 5 | 6 | def test_usage(sesh, test_user, ten_rows): 7 | backend = PGUserdataAdapter(sesh) 8 | byte_count = backend.byte_count(ten_rows.table_uuid) 9 | assert byte_count >= 1 10 | -------------------------------------------------------------------------------- /tests/test_readme_crud.py: -------------------------------------------------------------------------------- 1 | from csvbase.value_objs import ContentType 2 | 3 | from .utils import current_user 4 | 5 | 6 | def test_read__happy(client, test_user, ten_rows): 7 | response = client.get(f"/{test_user.username}/{ten_rows.table_name}/readme") 8 | assert response.status_code == 200 9 | assert response.content_type == ContentType.MARKDOWN.value 10 | 11 | 12 | def test_write__happy(client, test_user, ten_rows): 13 | url = f"/{test_user.username}/{ten_rows.table_name}/readme" 14 | new_readme = "hello, *world*" 15 | 16 | with current_user(test_user): 17 | put_response = client.put(url, data=new_readme) 18 | assert put_response.status_code == 200 19 | 20 | get_response = client.get(url) 21 | assert get_response.status_code == 200 22 | assert get_response.text == new_readme 23 | 24 | 25 | def test_write__too_big(client, test_user, ten_rows): 26 | url = f"/{test_user.username}/{ten_rows.table_name}/readme" 27 | new_readme = "f" * 10_001 28 | 29 | with current_user(test_user): 30 | put_response = client.put(url, data=new_readme) 31 | assert put_response.status_code == 400 32 | -------------------------------------------------------------------------------- /tests/test_repcache.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | 3 | from csvbase.value_objs import ContentType 4 | from csvbase.repcache import RepCache 5 | 6 | from .utils import random_uuid, random_df 7 | 8 | 9 | def test_repcache__miss(): 10 | repcache = RepCache(random_uuid(), ContentType.CSV, datetime.now(timezone.utc)) 11 | 12 | assert not repcache.exists() 13 | 14 | 15 | def test_repcache__hit(): 16 | table_uuid = random_uuid() 17 | content_type = ContentType.CSV 18 | last_changed = datetime.now(timezone.utc) 19 | 20 | repcache = RepCache(table_uuid, content_type, last_changed) 21 | 22 | contents = b"a,b,c\n1,2,3" 23 | 24 | with repcache.open("wb") as rep_file: 25 | assert repcache.write_in_progress() 26 | rep_file.write(contents) 27 | 28 | assert repcache.exists() 29 | assert not repcache.write_in_progress() 30 | 31 | with repcache.open("rb") as rep_file: 32 | assert rep_file.read() == contents 33 | 34 | 35 | def test_repcache__update_wipes_out_old_reps(): 36 | table_uuid = random_uuid() 37 | content_type = ContentType.CSV 38 | 39 | initial_dt = datetime(2018, 1, 3, tzinfo=timezone.utc) 40 | initial_repcache = RepCache(table_uuid, content_type, initial_dt) 41 | initial_contents = b"a,b,c\n1,2,3" 42 | 43 | with initial_repcache.open("wb") as rep_file: 44 | rep_file.write(initial_contents) 45 | 46 | assert initial_repcache.exists() 47 | 48 | update_dt = datetime(2018, 1, 4, tzinfo=timezone.utc) 49 | update_contents = b"a,b,c\n4,5,6" 50 | update_repcache = RepCache(table_uuid, content_type, update_dt) 51 | 52 | with update_repcache.open("wb") as rep_file: 53 | rep_file.write(update_contents) 54 | 55 | assert not initial_repcache.exists() 56 | assert update_repcache.exists() 57 | 58 | 59 | def test_repcache__sizes(): 60 | table_uuid = random_uuid() 61 | last_changed = datetime.now(timezone.utc) 62 | df = random_df() 63 | 64 | csv_repcache = RepCache(table_uuid, ContentType.CSV, last_changed) 65 | with csv_repcache.open("wb") as rep_file: 66 | df.to_csv(rep_file) 67 | 68 | parquet_repcache = RepCache(table_uuid, ContentType.PARQUET, last_changed) 69 | with parquet_repcache.open("wb") as rep_file: 70 | df.to_parquet(rep_file) 71 | 72 | sizes = RepCache.sizes(table_uuid, last_changed) 73 | assert {ContentType.CSV, ContentType.PARQUET} == set(sizes.keys()) 74 | assert {int} == set(type(v) for v in sizes.values()) 75 | 76 | 77 | def test_repcache__path(): 78 | table_uuid = random_uuid() 79 | last_changed = datetime(2018, 1, 3, tzinfo=timezone.utc) 80 | repcache = RepCache(table_uuid, ContentType.CSV, last_changed) 81 | 82 | df = random_df() 83 | 84 | with repcache.open("wb") as rep_file: 85 | df.to_csv(rep_file) 86 | 87 | expected = f"{table_uuid}/2018-01-03T00_00_00+00_00.csv" 88 | actual = repcache.path() 89 | assert expected == actual 90 | -------------------------------------------------------------------------------- /tests/test_sql_scheduler.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone, timedelta 2 | 3 | import pytest 4 | from celery import Celery 5 | from celery.beat import ScheduleEntry 6 | 7 | from csvbase.db import get_db_url 8 | from csvbase.bgwork.sql_scheduler import SQLAlchemyScheduler 9 | 10 | from .utils import random_string 11 | 12 | 13 | @pytest.fixture(scope="function") 14 | def celery_app(): 15 | celery = Celery(random_string()) 16 | celery.conf["beat_sqlalchemy_scheduler_db_url"] = get_db_url() 17 | celery.conf["beat_scheduler"] = "csvbase.bgwork.sql_scheduler:SQLScheduler" 18 | return celery 19 | 20 | 21 | def test_sql_schedule__get_initial_schedule(celery_app): 22 | scheduler = SQLAlchemyScheduler(celery_app) 23 | 24 | schedule = scheduler.get_schedule() 25 | assert set(schedule.keys()) == {"celery.backend_cleanup"} 26 | 27 | 28 | def make_schedule_entry(celery_app) -> ScheduleEntry: 29 | name = f"{random_string()}()" 30 | last_run_at = datetime.now(timezone.utc) 31 | options = {"expires": 100} 32 | return ScheduleEntry( 33 | app=celery_app, 34 | name=name, 35 | task=name, 36 | args=(), 37 | kwargs={}, 38 | options=options, 39 | schedule=None, # FIXME: this should be real 40 | last_run_at=last_run_at, 41 | total_run_count=0, 42 | ) 43 | 44 | 45 | def test_sql_schedule__test_persistence_works(celery_app): 46 | @celery_app.task 47 | def example_task(): 48 | return 49 | 50 | celery_app.add_periodic_task( 51 | timedelta(minutes=30).total_seconds(), example_task.s() 52 | ) 53 | 54 | scheduler = SQLAlchemyScheduler(celery_app) 55 | schedule = scheduler.get_schedule() 56 | assert set(schedule.keys()) == { 57 | "celery.backend_cleanup", 58 | "tests.test_sql_scheduler.example_task()", 59 | } 60 | -------------------------------------------------------------------------------- /tests/test_streams.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | from io import StringIO 4 | 5 | import pytest 6 | 7 | from csvbase import exc 8 | from csvbase.value_objs import Column, ColumnType 9 | from csvbase.streams import peek_csv, rewind 10 | 11 | test_data = Path(__file__).resolve().parent / "test-data" 12 | 13 | 14 | @pytest.mark.parametrize( 15 | "input_filename, expected_columns", 16 | [ 17 | ( 18 | "abc123.csv", 19 | [ 20 | Column("a", ColumnType.TEXT), 21 | Column("b", ColumnType.INTEGER), 22 | Column("c", ColumnType.FLOAT), 23 | ], 24 | ), 25 | ( 26 | "blank-headers.csv", 27 | [ 28 | Column("col1", ColumnType.INTEGER), 29 | Column("a", ColumnType.INTEGER), 30 | Column("col3", ColumnType.INTEGER), 31 | Column("b", ColumnType.INTEGER), 32 | Column("c", ColumnType.INTEGER), 33 | Column("col6", ColumnType.INTEGER), 34 | ], 35 | ), 36 | ( 37 | "headers-only.csv", 38 | [ 39 | Column("a", ColumnType.TEXT), 40 | Column("b", ColumnType.TEXT), 41 | Column("c", ColumnType.TEXT), 42 | ], 43 | ), 44 | ], 45 | ) 46 | def test_peek_csv(input_filename, expected_columns): 47 | input_path = test_data / input_filename 48 | with input_path.open() as input_f: 49 | _, actual_columns = peek_csv(input_f) 50 | 51 | assert actual_columns == expected_columns 52 | 53 | 54 | @pytest.mark.parametrize( 55 | "input_filename, expected_exception, expected_message", 56 | [("empty.csv", exc.CSVParseError, "blank csv")], 57 | ) 58 | def test_peek_csv_with_junk(input_filename, expected_exception, expected_message): 59 | input_path = test_data / input_filename 60 | with input_path.open() as input_f: 61 | with pytest.raises(expected_exception) as e: 62 | peek_csv(input_f) 63 | assert e.msg == expected_message # type: ignore 64 | 65 | 66 | def test_rewind__happy(): 67 | buf = StringIO("hello") 68 | with rewind(buf): 69 | first_three = buf.read(3) 70 | assert first_three == "hel" 71 | assert buf.tell() == 3 72 | 73 | assert buf.tell() == 0 74 | 75 | 76 | def test_rewind__seekback_raise(): 77 | buf = StringIO("hello") 78 | buf.seek(os.SEEK_END) 79 | with pytest.raises(RuntimeError): 80 | with rewind(buf): 81 | pass 82 | 83 | 84 | def test_rewind__partial(): 85 | buf = StringIO("hello") 86 | buf.seek(3) 87 | with rewind(buf, to=buf.tell(), allow_seekback=True): 88 | assert buf.read() == "lo" 89 | assert buf.read() == "lo" 90 | -------------------------------------------------------------------------------- /tests/test_table_html.py: -------------------------------------------------------------------------------- 1 | from .utils import current_user 2 | 3 | 4 | def test_table_rest_api_docs__public(ten_rows, test_user, client): 5 | resp = client.get(f"/{test_user.username}/{ten_rows.table_name}/docs") 6 | assert resp.status_code == 200 7 | 8 | 9 | def test_table_rest_api_docs__private(private_table, test_user, client): 10 | with current_user(test_user): 11 | resp = client.get(f"/{test_user.username}/{private_table}/docs") 12 | assert resp.status_code == 200 13 | -------------------------------------------------------------------------------- /tests/test_table_io.py: -------------------------------------------------------------------------------- 1 | import io 2 | import csv 3 | import pandas as pd 4 | import string 5 | 6 | import pytest 7 | 8 | from csvbase.exc import CSVParseError 9 | from csvbase.value_objs import Column, ColumnType 10 | from csvbase import table_io 11 | from csvbase.streams import rewind 12 | 13 | 14 | def test_scientific_notation_not_put_into_csv(): 15 | columns = [Column("a_float", type_=ColumnType.FLOAT)] 16 | rows = [[9.999999974e-07]] 17 | 18 | buf = table_io.rows_to_csv(columns, rows) 19 | csv_str = buf.read() 20 | 21 | assert csv_str == b"a_float\r\n0.000001\r\n" 22 | 23 | 24 | integer_col = Column("i", ColumnType.INTEGER) 25 | 26 | 27 | @pytest.mark.parametrize( 28 | "csv_str, columns, expected_locations", 29 | [ 30 | pytest.param( 31 | "i\na", 32 | [integer_col], 33 | [table_io.CSVParseErrorLocation(1, integer_col, "a")], 34 | id="text in int column", 35 | ) 36 | ], 37 | ) 38 | def test_csv_to_rows__errors(csv_str, columns, expected_locations): 39 | buf = io.StringIO(csv_str) 40 | with pytest.raises(CSVParseError) as e: 41 | list(table_io.csv_to_rows(buf, columns, csv.excel)) 42 | assert e.value.error_locations == expected_locations 43 | 44 | 45 | def test_csv_to_rows__many_errors(): 46 | df = pd.DataFrame(dict(a=list(string.ascii_letters))) 47 | buf = io.StringIO() 48 | with rewind(buf): 49 | df.to_csv(buf, index=False) 50 | with pytest.raises(CSVParseError): 51 | list(table_io.csv_to_rows(buf, [Column("a", ColumnType.INTEGER)], csv.excel)) 52 | -------------------------------------------------------------------------------- /tests/test_table_pagination.py: -------------------------------------------------------------------------------- 1 | from csvbase.value_objs import Column, ColumnType, BinaryOp 2 | from csvbase import svc 3 | 4 | import pytest 5 | 6 | from .utils import create_table 7 | 8 | 9 | @pytest.fixture() 10 | def user_with_tables(test_user, sesh): 11 | for n in range(1, 11): 12 | table_name = f"table-{n}" 13 | create_table( 14 | sesh, 15 | test_user, 16 | [Column(name="column", type_=ColumnType.TEXT)], 17 | table_name=table_name, 18 | ) 19 | sesh.commit() 20 | return test_user 21 | 22 | 23 | def test_first_page(sesh, user_with_tables): 24 | page = svc.table_page(sesh, user_with_tables.user_uuid, user_with_tables, count=2) 25 | table_names = [t.table_name for t in page.tables] 26 | assert table_names == ["table-10", "table-9"] 27 | assert not page.has_prev 28 | assert page.has_next 29 | 30 | 31 | def test_second_page(sesh, user_with_tables): 32 | first_page = svc.table_page( 33 | sesh, user_with_tables.user_uuid, user_with_tables, count=2 34 | ) 35 | last_on_first_page = first_page.tables[-1] 36 | 37 | second_page = svc.table_page( 38 | sesh, 39 | user_with_tables.user_uuid, 40 | user_with_tables, 41 | count=2, 42 | key=(last_on_first_page.last_changed, last_on_first_page.table_uuid), 43 | ) 44 | table_names = [t.table_name for t in second_page.tables] 45 | assert table_names == ["table-8", "table-7"] 46 | assert second_page.has_prev 47 | assert second_page.has_next 48 | 49 | 50 | def test_back_to_first_page(sesh, user_with_tables): 51 | first_page = svc.table_page( 52 | sesh, user_with_tables.user_uuid, user_with_tables, count=2 53 | ) 54 | last_on_first_page = first_page.tables[-1] 55 | 56 | second_page = svc.table_page( 57 | sesh, 58 | user_with_tables.user_uuid, 59 | user_with_tables, 60 | count=2, 61 | key=(last_on_first_page.last_changed, last_on_first_page.table_uuid), 62 | ) 63 | first_on_second_page = second_page.tables[0] 64 | 65 | back_to_first_page = svc.table_page( 66 | sesh, 67 | user_with_tables.user_uuid, 68 | user_with_tables, 69 | count=2, 70 | key=(first_on_second_page.last_changed, first_on_second_page.table_uuid), 71 | op=BinaryOp.GT, 72 | ) 73 | 74 | table_names = [t.table_name for t in back_to_first_page.tables] 75 | assert table_names == ["table-10", "table-9"] 76 | assert not back_to_first_page.has_prev 77 | assert back_to_first_page.has_next 78 | 79 | 80 | @pytest.mark.xfail(reason="test not implemented") 81 | def test_pagination_over_the_top(): 82 | assert False 83 | 84 | 85 | @pytest.mark.xfail(reason="test not implemented") 86 | def test_pagination_under_the_bottom(): 87 | assert False 88 | 89 | 90 | def test_paging_on_empty_table(sesh, test_user): 91 | page = svc.table_page(sesh, test_user.user_uuid, test_user, count=2) 92 | assert page.tables == [] 93 | assert not page.has_next 94 | assert not page.has_prev 95 | -------------------------------------------------------------------------------- /tests/test_temp.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | from datetime import timedelta 3 | 4 | import pytest 5 | from csvbase import temp, exc 6 | from .utils import random_string 7 | 8 | 9 | def test_temp__set_and_get(): 10 | contents = f"{random_string()}\n".encode("utf-8") 11 | buf = BytesIO(contents) 12 | 13 | file_id = temp.store_temp_file(buf) 14 | with temp.retrieve_temp_file(file_id) as f: 15 | assert f.read() == contents 16 | 17 | 18 | def test_temp__missing(): 19 | with pytest.raises(exc.MissingTempFile): 20 | with temp.retrieve_temp_file("nonsense") as f: 21 | f.read() 22 | 23 | 24 | def test_temp__expiry(): 25 | contents = f"{random_string()}\n".encode("utf-8") 26 | buf = BytesIO(contents) 27 | 28 | file_id = temp.store_temp_file(buf, duration=timedelta(seconds=-1)) 29 | with pytest.raises(exc.MissingTempFile): 30 | with temp.retrieve_temp_file(file_id) as f: 31 | f.read() 32 | -------------------------------------------------------------------------------- /tests/test_turnstile.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import pytest 3 | 4 | from csvbase import exc 5 | from csvbase.web.turnstile import validate_turnstile_token 6 | 7 | from .utils import mock_turnstile, random_string, TURNSTILE_URL 8 | 9 | 10 | def test_turnstile__happy(app, requests_mocker): 11 | mock_turnstile(requests_mocker) 12 | validate_turnstile_token(random_string()) 13 | 14 | 15 | def test_turnstile__non_json_response(requests_mocker): 16 | """Make sure that the turnstile API being down results in an exception""" 17 | requests_mocker.post(TURNSTILE_URL, text="501 service down") 18 | with pytest.raises(requests.exceptions.JSONDecodeError): 19 | validate_turnstile_token(random_string()) 20 | 21 | 22 | @pytest.mark.parametrize( 23 | "error_codes", 24 | [ 25 | ["invalid-input-response"], 26 | ["timeout-or-duplicate"], 27 | ], 28 | ) 29 | def test_turnstile__non_success_for_user_reason(error_codes, requests_mocker): 30 | requests_mocker.post( 31 | TURNSTILE_URL, json={"success": False, "error-codes": error_codes} 32 | ) 33 | with pytest.raises(exc.CaptchaFailureException): 34 | validate_turnstile_token(random_string()) 35 | 36 | 37 | @pytest.mark.parametrize( 38 | "error_codes", 39 | [ 40 | ["missing-input-secret"], 41 | ["invalid-input-secret"], 42 | ["missing-input-response"], 43 | ["bad-request"], 44 | ["internal-error"], 45 | ["anything-else"], 46 | ], 47 | ) 48 | def test_turnstile__non_success_for_other_reason(app, error_codes, requests_mocker): 49 | requests_mocker.post( 50 | TURNSTILE_URL, json={"success": False, "error-codes": error_codes} 51 | ) 52 | with pytest.raises(RuntimeError) as e: 53 | validate_turnstile_token(random_string()) 54 | for error_code in error_codes: 55 | assert error_code in str(e.value) 56 | -------------------------------------------------------------------------------- /tests/test_update_external_tables.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/calpaterson/csvbase/38f2a836c19e7438ef829c5c9964405b29dcde90/tests/test_update_external_tables.py -------------------------------------------------------------------------------- /tests/test_upstream.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from platformdirs import user_cache_dir 4 | 5 | from csvbase.follow.git import get_repo_path 6 | 7 | import pytest 8 | 9 | 10 | @pytest.mark.parametrize( 11 | "url, branch, expected_dirname", 12 | [ 13 | ( 14 | "https://github.com/calpaterson/csvbase.git", 15 | "main", 16 | "github_com_calpaterson_csvbase_git_main_8b3d4d1deb66f6eaec6fc5e6dd9ac9d00cbd4b64e5cc23850c417a21c6ce264cc786816b02dc4798554a5a57b318ef4f986396036c62b5aa1afd11d26d56ccf7", 17 | ), 18 | pytest.param( 19 | "https://github.com/calpaterson/" + ("f" * 2000), 20 | "main", 21 | "github_com_calpaterson_ffffffffffffffffffffffffffffffffffffffffffffffff_265f962f2f89654153b90113677147e870576168fd59bc598ebafdabf9fb5c9297f3b1895798d5866b1e05c442c940c6e85e9ced102734637080ab2d531b4e5b", 22 | id="huge url", 23 | ), 24 | ], 25 | ) 26 | def test_git_repo_path(url, branch, expected_dirname): 27 | expected = Path(user_cache_dir("csvbase")) / "git-repos" / expected_dirname 28 | actual = get_repo_path(url, branch) 29 | assert expected == actual 30 | assert len(actual.name) <= 200 31 | -------------------------------------------------------------------------------- /tests/value_objs.py: -------------------------------------------------------------------------------- 1 | from base64 import b64encode 2 | from dataclasses import dataclass 3 | 4 | from csvbase.value_objs import User 5 | 6 | 7 | @dataclass 8 | class ExtendedUser(User): 9 | password: str 10 | 11 | def basic_auth(self) -> str: 12 | """The HTTP Basic Auth header value for this user""" 13 | hex_api_key = self.hex_api_key() 14 | user_pass = f"{self.username}:{hex_api_key}".encode("utf-8") 15 | encoded = b64encode(user_pass).decode("utf-8") 16 | return f"Basic {encoded}" 17 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py39,py311 3 | skipsdist = True 4 | [testenv] 5 | passenv = CSVBASE_DB_URL,CSVBASE_SECRET_KEY 6 | commands = 7 | pip install --upgrade setuptools pip wheel 8 | python -m pip install -e .[tests] 9 | mypy . 10 | alembic upgrade head 11 | csvbase-populate-licences 12 | python -m pytest 13 | black --check . 14 | ruff check . 15 | [flake8] 16 | max-line-length = 88 --------------------------------------------------------------------------------