├── tests ├── __init__.py ├── urls.py ├── data │ ├── secondary_db.csv │ ├── names.csv │ ├── pipes.csv │ ├── backwards.csv │ ├── quote.csv │ ├── matching_headers.csv │ ├── foreignkeys.csv │ ├── nulls.csv │ └── blanknulls.csv ├── fields.py ├── router.py ├── conftest.py ├── models.py └── test_queries.py ├── postgres_copy ├── py.typed ├── __init__.py ├── copy_to.py ├── psycopg_compat.py ├── managers.py └── copy_from.py ├── .github ├── CODEOWNERS └── workflows │ ├── docs.yml │ └── continuous-deployment.yaml ├── MANIFEST.in ├── README.md ├── docs ├── conf.py ├── Makefile └── index.rst ├── .gitignore ├── LICENSE ├── .pre-commit-config.yaml ├── pyproject.toml ├── CODE_OF_CONDUCT.md └── CLAUDE.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /postgres_copy/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @palewire 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | -------------------------------------------------------------------------------- /tests/urls.py: -------------------------------------------------------------------------------- 1 | # This file is required by Django but not used in tests 2 | urlpatterns = [] 3 | -------------------------------------------------------------------------------- /tests/data/secondary_db.csv: -------------------------------------------------------------------------------- 1 | TEXT 2 | SECONDARY TEXT 1 3 | SECONDARY TEXT 2 4 | SECONDARY TEXT 3 5 | -------------------------------------------------------------------------------- /tests/data/names.csv: -------------------------------------------------------------------------------- 1 | NAME,NUMBER,DATE 2 | ben,1,2012-01-01 3 | joe,2,2012-01-02 4 | jane,3,2012-01-03 5 | -------------------------------------------------------------------------------- /tests/data/pipes.csv: -------------------------------------------------------------------------------- 1 | NAME|NUMBER|DATE 2 | ben|1|2012-01-01 3 | joe|2|2012-01-02 4 | jane|3|2012-01-03 5 | -------------------------------------------------------------------------------- /tests/data/backwards.csv: -------------------------------------------------------------------------------- 1 | NUMBER,NAME,DATE 2 | 1,ben,2012-01-01 3 | 2,joe,2012-01-02 4 | 3,jane,2012-01-03 5 | -------------------------------------------------------------------------------- /tests/data/quote.csv: -------------------------------------------------------------------------------- 1 | NAME NUMBER DATE 2 | `b``en` 1 2012-01-01 3 | `jo e` 2 2012-01-02 4 | jan"e 3 2012-01-03 5 | -------------------------------------------------------------------------------- /tests/data/matching_headers.csv: -------------------------------------------------------------------------------- 1 | name,number,dt 2 | ben,1,2012-01-01 3 | joe,2,2012-01-02 4 | jane,3,2012-01-03 5 | -------------------------------------------------------------------------------- /tests/data/foreignkeys.csv: -------------------------------------------------------------------------------- 1 | NAME,NUMBER,DATE,PARENT 2 | ben,1,2012-01-01,3 3 | joe,2,2012-01-02,2 4 | jane,3,2012-01-03,1 5 | -------------------------------------------------------------------------------- /tests/data/nulls.csv: -------------------------------------------------------------------------------- 1 | NAME,NUMBER,DATE 2 | ben,1,2012-01-01 3 | joe,2,2012-01-02 4 | jane,3,2012-01-03 5 | nullboy,,2012-01-04 6 | badboy,x,2012-01-05 7 | -------------------------------------------------------------------------------- /tests/data/blanknulls.csv: -------------------------------------------------------------------------------- 1 | NAME,NUMBER,DATE,COLOR 2 | ben,1,2012-01-01,red 3 | joe,2,2012-01-02,green 4 | jane,3,2012-01-03,orange 5 | nullboy,,2012-01-04, 6 | badboy,x,2012-01-05,blue 7 | -------------------------------------------------------------------------------- /tests/fields.py: -------------------------------------------------------------------------------- 1 | from django.db.models.fields import IntegerField 2 | 3 | 4 | class MyIntegerField(IntegerField): 5 | copy_template = """ 6 | CASE 7 | WHEN "%(name)s" = 'x' THEN null 8 | ELSE "%(name)s"::int 9 | END 10 | """ 11 | -------------------------------------------------------------------------------- /postgres_copy/__init__.py: -------------------------------------------------------------------------------- 1 | from .copy_from import CopyMapping 2 | from .copy_to import CopyToQuery, SQLCopyToCompiler 3 | from .managers import CopyManager, CopyQuerySet 4 | 5 | __all__ = ( 6 | "CopyManager", 7 | "CopyMapping", 8 | "CopyQuerySet", 9 | "CopyToQuery", 10 | "SQLCopyToCompiler", 11 | ) 12 | -------------------------------------------------------------------------------- /tests/router.py: -------------------------------------------------------------------------------- 1 | class CustomRouter: 2 | def db_for_read(self, model, **hints): 3 | if model.__name__ == "SecondaryMockObject": 4 | return "secondary" 5 | return None 6 | 7 | def db_for_write(self, model, **hints): 8 | if model.__name__ == "SecondaryMockObject": 9 | return "secondary" 10 | return None 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Links 2 | 3 | - Documentation: [palewi.re/docs/django-postgres-copy/](https://palewi.re/docs/django-postgres-copy/) 4 | - Issues: [github.com/palewire/django-postgres-copy/issues](https://github.com/palewire/django-postgres-copy/issues) 5 | - Packaging: [pypi.python.org/pypi/django-postgres-copy](https://pypi.python.org/pypi/django-postgres-copy) 6 | - Testing: [github.com/palewire/django-postgres-copy/actions](https://github.com/palewire/django-postgres-copy/actions/workflows/test.yaml) 7 | 8 | ### Contributing 9 | 10 | To set up a development environment, run `uv sync --all-extras` after forking and cloning the repository. 11 | To run tests, use `uv run pytest tests` 12 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | """Configure Sphinx configuration.""" 2 | 3 | from __future__ import annotations 4 | 5 | import os 6 | import sys 7 | from datetime import datetime 8 | from typing import Any 9 | 10 | sys.path.insert(0, os.path.abspath("..")) 11 | 12 | source_suffix = ".rst" 13 | master_doc = "index" 14 | 15 | project = "django-postgres-copy" 16 | year = datetime.now().year 17 | copyright = f"{year} palewire" 18 | 19 | exclude_patterns = ["_build"] 20 | 21 | html_theme = "palewire" 22 | html_sidebars: dict[Any, Any] = {} 23 | html_theme_options: dict[Any, Any] = { 24 | "canonical_url": f"https://palewi.re/docs/{project}/", 25 | "nosidebar": True, 26 | } 27 | 28 | pygments_style = "sphinx" 29 | 30 | extensions = [ 31 | "sphinx.ext.autodoc", 32 | "sphinx.ext.napoleon", 33 | ] 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | MANIFEST 23 | 24 | # Unit test / coverage reports 25 | htmlcov/ 26 | .tox/ 27 | .coverage 28 | .coverage.* 29 | .cache 30 | nosetests.xml 31 | coverage.xml 32 | *.cover 33 | .hypothesis/ 34 | .pytest_cache/ 35 | 36 | # Environments 37 | .env 38 | .venv 39 | env/ 40 | venv/ 41 | ENV/ 42 | env.bak/ 43 | venv.bak/ 44 | 45 | # uv specific 46 | .uv/ 47 | .venv/ 48 | 49 | # Pipenv 50 | Pipfile.lock 51 | 52 | # Django 53 | *.log 54 | local_settings.py 55 | db.sqlite3 56 | db.sqlite3-journal 57 | 58 | # Sphinx documentation 59 | docs/_build/ 60 | 61 | # VS Code 62 | .vscode/ 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 California Civic Data Coalition 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: 'https://github.com/pre-commit/pre-commit-hooks' 5 | rev: v5.0.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | args: 12 | - '--maxkb=100000' 13 | - id: fix-byte-order-marker 14 | - id: check-case-conflict 15 | - id: check-json 16 | - id: mixed-line-ending 17 | - id: check-ast 18 | - id: check-merge-conflict 19 | 20 | - repo: 'https://github.com/astral-sh/ruff-pre-commit' 21 | rev: v0.12.0 22 | hooks: 23 | - id: ruff 24 | args: 25 | - '--fix' 26 | - id: ruff-format 27 | 28 | - repo: 'https://github.com/asottile/blacken-docs' 29 | rev: 1.19.1 30 | hooks: 31 | - id: blacken-docs 32 | additional_dependencies: 33 | - black 34 | 35 | - repo: 'https://github.com/asottile/pyupgrade' 36 | rev: v3.20.0 37 | hooks: 38 | - id: pyupgrade 39 | args: 40 | - '--py37-plus' 41 | 42 | - repo: 'https://github.com/pre-commit/mirrors-mypy' 43 | rev: v1.16.1 44 | hooks: 45 | - id: mypy 46 | exclude: tests/.* 47 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: "Build documentation" 2 | 3 | on: 4 | push: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | build: 9 | name: Build 10 | runs-on: ubuntu-latest 11 | steps: 12 | - id: checkout 13 | name: Checkout 14 | uses: actions/checkout@v4 15 | 16 | - id: install-uv 17 | name: Install uv 18 | uses: astral-sh/setup-uv@v6 19 | with: 20 | version: "latest" 21 | enable-cache: true 22 | cache-dependency-glob: '**/pyproject.toml' 23 | 24 | - id: install-python 25 | name: Install Python 26 | run: uv python install 3.13 27 | 28 | - id: install-python-dependencies 29 | name: Install Python dependencies 30 | run: uv sync --extra docs 31 | 32 | - id: build-sphinx-documentation 33 | name: Build Sphinx documentation 34 | run: uv run sphinx-build -M html ./docs ./_build/ 35 | 36 | - id: upload-release-candidate 37 | name: Upload release candidate 38 | uses: actions/upload-artifact@v4 39 | with: 40 | name: release-candidate 41 | path: ./_build/html/ 42 | 43 | deploy: 44 | name: Deploy 45 | runs-on: ubuntu-latest 46 | needs: build 47 | if: ${{ github.ref_name == 'main' }} 48 | steps: 49 | - name: Download release candidate 50 | uses: actions/download-artifact@v4 51 | with: 52 | name: release-candidate 53 | path: ./docs/ 54 | 55 | - id: configure-aws 56 | name: Configure AWS Credentials 57 | uses: aws-actions/configure-aws-credentials@v4 58 | with: 59 | aws-access-key-id: ${{ secrets.PALEWIRE_DOCS_AWS_ACCESS_KEY_ID }} 60 | aws-secret-access-key: ${{ secrets.PALEWIRE_DOCS_AWS_SECRET_ACCESS_KEY }} 61 | aws-region: us-east-1 62 | 63 | - id: upload-to-s3 64 | name: Upload documentation to Amazon S3 65 | uses: datadesk/delivery-deploy-action@v1 66 | with: 67 | bucket: ${{ secrets.PALEWIRE_DOCS_AWS_BUCKET }} 68 | base-path: django-postgres-copy/ 69 | dir: ./docs/ 70 | should-cache: false 71 | use-accelerate-endpoint: false 72 | public: true 73 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from django.conf import settings 4 | 5 | ROOT_DIR = Path(__file__).parent.parent 6 | PG_USER = os.environ.get("PG_USER", "postgres") 7 | 8 | 9 | def pytest_configure(): 10 | settings.configure( 11 | DATABASES={ 12 | "default": { 13 | "HOST": "localhost", 14 | "PORT": 5432, 15 | "NAME": "test", 16 | "USER": PG_USER, 17 | "ENGINE": "django.db.backends.postgresql_psycopg2", 18 | }, 19 | "other": { 20 | "HOST": "localhost", 21 | "PORT": 5432, 22 | "NAME": "test_alternative", 23 | "USER": PG_USER, 24 | "ENGINE": "django.db.backends.postgresql_psycopg2", 25 | }, 26 | "sqlite": {"NAME": "sqlite", "ENGINE": "django.db.backends.sqlite3"}, 27 | "secondary": { 28 | "HOST": "localhost", 29 | "PORT": 5432, 30 | "NAME": "test_secondary", 31 | "USER": PG_USER, 32 | "ENGINE": "django.db.backends.postgresql_psycopg2", 33 | }, 34 | }, 35 | INSTALLED_APPS=("tests",), 36 | DATABASE_ROUTERS=["tests.router.CustomRouter"], 37 | DEFAULT_AUTO_FIELD="django.db.models.BigAutoField", 38 | LOGGING={ 39 | "version": 1, 40 | "disable_existing_loggers": False, 41 | "handlers": { 42 | "file": { 43 | "level": "DEBUG", 44 | "class": "logging.FileHandler", 45 | "filename": ROOT_DIR / "tests.log", 46 | }, 47 | }, 48 | "formatters": { 49 | "verbose": { 50 | "format": "%(levelname)s|%(asctime)s|%(module)s|%(message)s", 51 | "datefmt": "%d/%b/%Y %H:%M:%S", 52 | } 53 | }, 54 | "loggers": { 55 | "postgres_copy": { 56 | "handlers": ["file"], 57 | "level": "DEBUG", 58 | "propagate": True, 59 | }, 60 | }, 61 | }, 62 | ) 63 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "django-postgres-copy" 7 | description = "Quickly import and export delimited data with Django support for PostgreSQL's COPY command" 8 | readme = "README.md" 9 | authors = [ 10 | {name = "Ben Welsh", email = "b@palewi.re"}, 11 | ] 12 | license = "MIT" 13 | classifiers = [ 14 | "Development Status :: 5 - Production/Stable", 15 | "Operating System :: OS Independent", 16 | "Intended Audience :: Developers", 17 | "Programming Language :: Python", 18 | "Programming Language :: Python :: 3", 19 | "Programming Language :: Python :: 3.9", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | "Programming Language :: Python :: 3.13", 24 | "Framework :: Django", 25 | "Framework :: Django :: 4.2", 26 | "Framework :: Django :: 5.1", 27 | "Framework :: Django :: 5.2", 28 | ] 29 | requires-python = ">=3.9" 30 | dependencies = [] 31 | dynamic = ["version"] 32 | 33 | [project.urls] 34 | Documentation = "https://palewi.re/docs/django-postgres-copy/" 35 | Source = "https://github.com/palewire/django-postgres-copy" 36 | Tracker = "https://github.com/palewire/django-postgres-copy/issues" 37 | Tests = "https://github.com/palewire/django-postgres-copy/actions/workflows/test.yaml" 38 | 39 | [tool.setuptools] 40 | packages = ["postgres_copy"] 41 | 42 | [tool.setuptools.package-data] 43 | postgres_copy = ["py.typed"] 44 | 45 | [tool.setuptools_scm] 46 | 47 | [tool.pytest] 48 | python_files = ["test*.py", "test_*.py", "*_test.py"] 49 | 50 | [tool.flake8] 51 | max-line-length = 119 52 | ignore = ["D100", "D101", "D102", "D103", "D104", "D106", "D107", "D200", "D205", "D400", "D401", "SIM115", "B006"] 53 | 54 | [tool.mypy] 55 | python_version = "3.13" 56 | warn_return_any = true 57 | warn_unused_configs = true 58 | disallow_untyped_defs = true 59 | disallow_incomplete_defs = true 60 | check_untyped_defs = true 61 | disallow_untyped_decorators = true 62 | no_implicit_optional = true 63 | strict_optional = true 64 | warn_redundant_casts = true 65 | warn_unused_ignores = true 66 | warn_no_return = true 67 | warn_unreachable = true 68 | exclude = "^(tests|docs)/.*" 69 | 70 | [[tool.mypy.overrides]] 71 | module = "django.*" 72 | ignore_missing_imports = true 73 | 74 | [[tool.mypy.overrides]] 75 | module = "psycopg.*" 76 | ignore_missing_imports = true 77 | 78 | [project.optional-dependencies] 79 | dev = [ 80 | "coverage", 81 | "mock", 82 | "pre-commit", 83 | "pytest-runner", 84 | "pytest-cov", 85 | "pytest-env", 86 | "pytest-django", 87 | "setuptools-scm", 88 | "twine", 89 | "wheel", 90 | ] 91 | mypy = [ 92 | "mypy", 93 | "types-psycopg2", 94 | "django-stubs", 95 | ] 96 | docs = [ 97 | "sphinx", 98 | "sphinx-autobuild", 99 | "sphinx-palewire-theme", 100 | "myst-parser", 101 | ] 102 | 103 | 104 | [tool.pytest.ini_options] 105 | addopts = "-p no:warnings --cov=postgres_copy --cov-branch -cov-report=term-missing:skip-covered --cov-context=test --reuse-db --nomigrations" 106 | testpaths = ["tests"] 107 | python_files = "test_*.py" 108 | 109 | [tool.coverage.run] 110 | source = ["postgres_copy"] 111 | relative_files = true 112 | omit = [ 113 | "*/__pycache__/*", 114 | "tests", 115 | ] 116 | -------------------------------------------------------------------------------- /.github/workflows/continuous-deployment.yaml: -------------------------------------------------------------------------------- 1 | name: Testing and distribution 2 | on: 3 | push: 4 | pull_request: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | lint-python: 9 | name: Lint Python code 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout 13 | uses: actions/checkout@v4 14 | 15 | - name: Check with Ruff 16 | uses: astral-sh/ruff-action@v3 17 | with: 18 | args: 'check --exit-zero --verbose' 19 | 20 | - name: Format with Ruff 21 | uses: astral-sh/ruff-action@v3 22 | with: 23 | args: 'format --check --verbose' 24 | 25 | test-python: 26 | name: "Test Python" 27 | runs-on: ubuntu-latest 28 | services: 29 | postgres: 30 | image: postgres:latest 31 | env: 32 | POSTGRES_USER: postgres 33 | POSTGRES_PASSWORD: postgres 34 | POSTGRES_DB: postgres 35 | ports: 36 | - 5432:5432 37 | # needed because the postgres container does not provide a healthcheck 38 | options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 39 | strategy: 40 | matrix: 41 | python: ['3.9', '3.10', '3.11', '3.12', '3.13'] 42 | django: ['4.2', '5.1', '5.2'] 43 | psycopg: ['psycopg2', 'psycopg'] 44 | exclude: 45 | - python: '3.9' 46 | django: '5.1' 47 | - python: '3.9' 48 | django: '5.2' 49 | steps: 50 | - name: Checkout 51 | uses: actions/checkout@v4 52 | 53 | - id: install-uv 54 | name: Install uv 55 | uses: astral-sh/setup-uv@v6 56 | with: 57 | version: "latest" 58 | enable-cache: true 59 | cache-dependency-glob: '**/pyproject.toml' 60 | 61 | - id: install-python 62 | name: Install Python 63 | run: uv python install ${{ matrix.python }} 64 | 65 | - name: Install Python dependencies 66 | run: | 67 | uv sync --extra dev --python ${{ matrix.python }} 68 | uv pip install ${{ matrix.psycopg }} django==${{ matrix.django }} 69 | 70 | - name: Test 71 | run: uv run pytest tests --reuse-db --nomigrations 72 | env: 73 | PGPASSWORD: postgres 74 | 75 | test-build: 76 | name: Build Python package 77 | runs-on: ubuntu-latest 78 | needs: [test-python,lint-python] 79 | steps: 80 | - name: Checkout 81 | uses: actions/checkout@v4 82 | 83 | - id: install-uv 84 | name: Install uv 85 | uses: astral-sh/setup-uv@v6 86 | with: 87 | version: "latest" 88 | enable-cache: true 89 | cache-dependency-glob: '**/pyproject.toml' 90 | 91 | - id: install-python 92 | name: Install Python 93 | run: uv python install 3.13 94 | 95 | - id: build 96 | name: Build releases 97 | run: uv build --sdist --wheel 98 | shell: bash 99 | 100 | - id: save 101 | name: Save artifact 102 | uses: actions/upload-artifact@v4 103 | with: 104 | name: release-candidate 105 | path: ./dist 106 | if-no-files-found: error 107 | 108 | tag-release: 109 | name: Tagged PyPI release 110 | runs-on: ubuntu-latest 111 | needs: [test-build] 112 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 113 | steps: 114 | - name: Fetch artifact 115 | uses: actions/download-artifact@v4 116 | with: 117 | name: release-candidate 118 | path: ./dist 119 | 120 | - name: Publish release 121 | uses: pypa/gh-action-pypi-publish@release/v1 122 | with: 123 | user: __token__ 124 | password: ${{ secrets.PYPI_API_TOKEN }} 125 | verbose: true 126 | -------------------------------------------------------------------------------- /postgres_copy/copy_to.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Handlers for working with PostgreSQL's COPY TO command. 4 | """ 5 | 6 | import logging 7 | import typing 8 | from io import BytesIO 9 | 10 | from django.db import connections 11 | from django.db.models.sql.compiler import SQLCompiler 12 | from django.db.models.sql.query import Query 13 | 14 | from .psycopg_compat import copy_to 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class SQLCopyToCompiler(SQLCompiler): 20 | """ 21 | Custom SQL compiler for creating a COPY TO query (postgres backend only). 22 | """ 23 | 24 | def setup_query(self, **kwargs: typing.Any) -> None: 25 | """ 26 | Extend the default SQLCompiler.setup_query to add re-ordering of items in select. 27 | """ 28 | super().setup_query(**kwargs) 29 | if self.query.copy_to_fields: 30 | self.select = [] 31 | for field in self.query.copy_to_fields: 32 | # raises error if field is not available 33 | expression = self.query.resolve_ref(field) 34 | selection = ( 35 | expression, 36 | self.compile(expression), 37 | field if field in self.query.annotations else None, 38 | ) 39 | self.select.append(selection) 40 | 41 | def execute_sql( 42 | self, 43 | csv_path_or_obj: typing.Optional[typing.Union[str, typing.BinaryIO]] = None, 44 | ) -> typing.Optional[bytes]: 45 | """ 46 | Run the COPY TO query. 47 | """ 48 | logger.debug(f"Copying data to {csv_path_or_obj}") 49 | 50 | params = self.as_sql()[1] 51 | 52 | # use stdout to avoid file permission issues 53 | with connections[self.using].cursor() as c: 54 | # grab the SELECT query 55 | select_sql = self.as_sql()[0] 56 | # then the COPY TO query 57 | copy_to_sql = "COPY ({}) TO STDOUT {} CSV" 58 | copy_to_sql = copy_to_sql.format(select_sql, self.query.copy_to_delimiter) 59 | # Optional extras 60 | options_list = [ 61 | self.query.copy_to_header, 62 | self.query.copy_to_null_string, 63 | self.query.copy_to_quote_char, 64 | self.query.copy_to_force_quote, 65 | self.query.copy_to_encoding, 66 | self.query.copy_to_escape, 67 | ] 68 | options_sql = " ".join([o for o in options_list if o]).strip() 69 | if options_sql: 70 | copy_to_sql = copy_to_sql + " " + options_sql 71 | # then execute 72 | logger.debug(copy_to_sql) 73 | 74 | # If a file-like object was provided, write it out there. 75 | if hasattr(csv_path_or_obj, "write"): 76 | copy_to( 77 | c.cursor, 78 | copy_to_sql, 79 | params, 80 | typing.cast(typing.BinaryIO, csv_path_or_obj), 81 | ) 82 | return None 83 | # If a file path was provided, write it out there. 84 | elif csv_path_or_obj and isinstance(csv_path_or_obj, str): 85 | with open(csv_path_or_obj, "wb") as stdout: 86 | copy_to(c.cursor, copy_to_sql, params, stdout) 87 | return None 88 | # If there's no csv_path, return the output as a string. 89 | else: 90 | stdout_buffer = BytesIO() 91 | copy_to(c.cursor, copy_to_sql, params, stdout_buffer) 92 | return stdout_buffer.getvalue() 93 | 94 | 95 | class CopyToQuery(Query): 96 | """ 97 | Represents a "copy to" SQL query. 98 | """ 99 | 100 | def get_compiler( 101 | self, 102 | using: typing.Optional[str] = None, 103 | connection: typing.Optional[typing.Any] = None, 104 | ) -> SQLCopyToCompiler: 105 | """ 106 | Return a SQLCopyToCompiler object. 107 | """ 108 | return SQLCopyToCompiler(self, connection, using) 109 | -------------------------------------------------------------------------------- /tests/models.py: -------------------------------------------------------------------------------- 1 | import django 2 | from django.db import models 3 | 4 | from postgres_copy import CopyManager, CopyMapping 5 | 6 | from .fields import MyIntegerField 7 | 8 | 9 | class MockObject(models.Model): 10 | name = models.CharField(max_length=500) 11 | number = MyIntegerField(null=True, db_column="num") 12 | dt = models.DateField(null=True) 13 | parent = models.ForeignKey( 14 | "MockObject", on_delete=models.CASCADE, null=True, default=None 15 | ) 16 | objects = CopyManager() 17 | 18 | class Meta: 19 | app_label = "tests" 20 | unique_together = ("name", "number") 21 | 22 | def __init__(self, *args, **kwargs): 23 | super().__init__(*args, **kwargs) 24 | if django.get_version() <= "5.1": 25 | self._meta.index_together = ("name", "number") 26 | else: 27 | self._meta.indexes = [models.Index(fields=["name", "number"])] 28 | 29 | def copy_name_template(self): 30 | return 'upper("%(name)s")' 31 | 32 | 33 | class MockFKObject(models.Model): 34 | id = models.IntegerField(primary_key=True) 35 | name = models.CharField(max_length=500) 36 | number = MyIntegerField(null=True, db_column="num") 37 | dt = models.DateField(null=True) 38 | parent = models.ForeignKey( 39 | "MockFKObject", on_delete=models.CASCADE, null=True, default=None 40 | ) 41 | objects = CopyManager() 42 | 43 | class Meta: 44 | app_label = "tests" 45 | 46 | def copy_name_template(self): 47 | return 'upper("%(name)s")' 48 | 49 | 50 | class MockBlankObject(models.Model): 51 | name = models.CharField(max_length=500) 52 | number = MyIntegerField(null=True, db_column="num") 53 | dt = models.DateField(null=True) 54 | color = models.CharField(max_length=50, blank=True) 55 | parent = models.ForeignKey( 56 | "MockObject", on_delete=models.CASCADE, null=True, default=None 57 | ) 58 | objects = CopyManager() 59 | 60 | class Meta: 61 | app_label = "tests" 62 | 63 | def copy_name_template(self): 64 | return 'upper("%(name)s")' 65 | 66 | 67 | class ExtendedMockObject(models.Model): 68 | static_val = models.IntegerField() 69 | name = models.CharField(max_length=500) 70 | number = MyIntegerField(null=True, db_column="num") 71 | dt = models.DateField(null=True) 72 | static_string = models.CharField(max_length=5) 73 | objects = CopyManager() 74 | 75 | class Meta: 76 | app_label = "tests" 77 | 78 | def copy_name_template(self): 79 | return 'upper("%(name)s")' 80 | 81 | 82 | class LimitedMockObject(models.Model): 83 | name = models.CharField(max_length=500) 84 | dt = models.DateField(null=True) 85 | objects = CopyManager() 86 | 87 | class Meta: 88 | app_label = "tests" 89 | 90 | def copy_name_template(self): 91 | return 'upper("%(name)s")' 92 | 93 | 94 | class OverloadMockObject(models.Model): 95 | name = models.CharField(max_length=500) 96 | upper_name = models.CharField(max_length=500) 97 | lower_name = models.CharField(max_length=500) 98 | number = MyIntegerField(null=True, db_column="num") 99 | dt = models.DateField(null=True) 100 | objects = CopyManager() 101 | 102 | class Meta: 103 | app_label = "tests" 104 | 105 | def copy_upper_name_template(self): 106 | return 'upper("%(name)s")' 107 | 108 | def copy_lower_name_template(self): 109 | return 'lower("%(name)s")' 110 | 111 | 112 | class HookedCopyMapping(CopyMapping): 113 | def pre_copy(self, cursor): 114 | self.ran_pre_copy = True 115 | 116 | def post_copy(self, cursor): 117 | self.ran_post_copy = True 118 | 119 | def pre_insert(self, cursor): 120 | self.ran_pre_insert = True 121 | 122 | def post_insert(self, cursor): 123 | self.ran_post_insert = True 124 | 125 | 126 | class SecondaryMockObject(models.Model): 127 | text = models.CharField(max_length=500) 128 | objects = CopyManager() 129 | 130 | 131 | class UniqueMockObject(models.Model): 132 | name = models.CharField(max_length=500, unique=True) 133 | objects = CopyManager() 134 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | b@palewi.re. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /postgres_copy/psycopg_compat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Compatibility layer between psycopg2 and psycopg3 (psycopg) for COPY operations. 3 | 4 | This module provides a unified interface for PostgreSQL COPY operations that works with 5 | both psycopg2 and psycopg3 database drivers. It automatically detects which driver is 6 | available and provides appropriate implementations of copy_to and copy_from functions. 7 | 8 | The main differences between psycopg2 and psycopg3 COPY operations: 9 | 1. psycopg2 uses copy_expert method which takes an SQL string with parameters already inlined 10 | 2. psycopg3 uses a copy method that returns a context manager and accepts parameters separately 11 | 3. psycopg3 handles encoding differently, requiring explicit decoding for text destinations 12 | 13 | This module abstracts away these differences, allowing code to work with either driver 14 | without modification. 15 | """ 16 | 17 | from __future__ import annotations 18 | 19 | import typing 20 | 21 | 22 | # Define a protocol for cursor objects that have the methods we need 23 | class CursorProtocol(typing.Protocol): 24 | """Protocol for database cursor objects.""" 25 | 26 | def copy_expert(self, sql: str, file: typing.TextIO | typing.BinaryIO) -> None: ... 27 | def copy( 28 | self, sql: str, params: typing.Sequence[typing.Any] | None = None 29 | ) -> typing.Any: ... 30 | 31 | 32 | # Define a protocol for file-like objects 33 | class FilelikeProtocol(typing.Protocol): 34 | """Protocol for file-like objects.""" 35 | 36 | def read(self, size: int = -1) -> str | bytes: ... 37 | def write(self, data: str | bytes) -> int: ... 38 | 39 | 40 | try: 41 | # Try to import psycopg (version 3) 42 | import psycopg # noqa: F401 just detect the presence of psycopg(3) 43 | from io import TextIOBase 44 | 45 | # Buffer size for reading data in chunks 46 | BUFFER_SIZE = 128 * 1024 47 | 48 | # Type alias for text or binary file-like objects 49 | FileObj = typing.Union[typing.TextIO, typing.BinaryIO] 50 | 51 | def copy_to( 52 | cursor: CursorProtocol, 53 | sql: str, 54 | params: typing.Sequence[typing.Any], 55 | destination: FileObj, 56 | ) -> None: 57 | """ 58 | Copy data from the database to a file-like object using psycopg3. 59 | 60 | Args: 61 | cursor: A psycopg3 cursor object 62 | sql: SQL query string with placeholders 63 | params: Parameters for the SQL query 64 | destination: A file-like object to write the data to 65 | 66 | The function handles both text and binary destinations appropriately: 67 | - For text destinations (TextIOBase), it decodes the binary data from PostgreSQL 68 | - For binary destinations, it passes the data through unchanged 69 | """ 70 | # psycopg3 returns binary data that needs to be decoded for text destinations 71 | is_text = isinstance(destination, TextIOBase) 72 | 73 | # Use the psycopg3 copy context manager 74 | with cursor.copy(sql, params) as copy: 75 | # Read data in chunks until there's no more 76 | while True: 77 | data = copy.read() 78 | if not data: 79 | break 80 | 81 | # Decode the data if necessary and write to the destination 82 | if is_text: 83 | # For text destinations, we need to decode to str 84 | text_dest = typing.cast(typing.TextIO, destination) 85 | # Handle both bytes and memoryview objects 86 | if isinstance(data, memoryview): 87 | data = data.tobytes() 88 | text_dest.write(data.decode("utf-8")) 89 | else: 90 | # For binary destinations, we keep as bytes 91 | binary_dest = typing.cast(typing.BinaryIO, destination) 92 | # Handle both bytes and memoryview objects 93 | if isinstance(data, memoryview): 94 | data = data.tobytes() 95 | binary_dest.write(data) 96 | 97 | def copy_from(cursor: CursorProtocol, sql: str, source: FileObj) -> None: 98 | """ 99 | Copy data from a file-like object to the database using psycopg3. 100 | 101 | Args: 102 | cursor: A psycopg3 cursor object 103 | sql: SQL COPY statement string 104 | source: A file-like object to read the data from 105 | 106 | The function reads data from the source in chunks and writes it to 107 | the database using the psycopg3 copy protocol. 108 | """ 109 | # Use the psycopg3 copy context manager 110 | with cursor.copy(sql) as copy: 111 | # Read data in chunks and write to the database 112 | while True: 113 | data = source.read(BUFFER_SIZE) 114 | if not data: 115 | break 116 | copy.write(data) 117 | 118 | except ImportError: 119 | # Fall back to psycopg2 if psycopg3 is not available 120 | from psycopg2.extensions import adapt 121 | 122 | def copy_to( 123 | cursor: CursorProtocol, 124 | sql: str, 125 | params: typing.Sequence[typing.Any], 126 | destination: typing.TextIO | typing.BinaryIO, 127 | ) -> None: 128 | """ 129 | Copy data from the database to a file-like object using psycopg2. 130 | 131 | Args: 132 | cursor: A psycopg2 cursor object 133 | sql: SQL query string with placeholders 134 | params: Parameters for the SQL query 135 | destination: A file-like object to write the data to 136 | 137 | The function adapts the parameters to SQL syntax and inlines them into the query, 138 | then uses psycopg2's copy_expert method to execute the COPY operation. 139 | """ 140 | # psycopg2 requires parameters to be adapted and inlined into the SQL 141 | adapted_params = tuple(adapt(p) for p in params) 142 | inlined_sql = sql % adapted_params 143 | 144 | # Use psycopg2's copy_expert method 145 | cursor.copy_expert(inlined_sql, destination) 146 | 147 | def copy_from( 148 | cursor: CursorProtocol, 149 | sql: str, 150 | source: typing.TextIO | typing.BinaryIO, 151 | ) -> None: 152 | """ 153 | Copy data from a file-like object to the database using psycopg2. 154 | 155 | Args: 156 | cursor: A psycopg2 cursor object 157 | sql: SQL COPY statement string 158 | source: A file-like object to read the data from 159 | 160 | The function uses psycopg2's copy_expert method to execute the COPY operation. 161 | """ 162 | # Use psycopg2's copy_expert method 163 | cursor.copy_expert(sql, source) 164 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " applehelp to make an Apple Help Book" 34 | @echo " devhelp to make HTML files and a Devhelp project" 35 | @echo " epub to make an epub" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | livehtml: 60 | sphinx-autobuild -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 61 | 62 | dirhtml: 63 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 64 | @echo 65 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 66 | 67 | singlehtml: 68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 69 | @echo 70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 71 | 72 | pickle: 73 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 74 | @echo 75 | @echo "Build finished; now you can process the pickle files." 76 | 77 | json: 78 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 79 | @echo 80 | @echo "Build finished; now you can process the JSON files." 81 | 82 | htmlhelp: 83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 84 | @echo 85 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 86 | ".hhp project file in $(BUILDDIR)/htmlhelp." 87 | 88 | qthelp: 89 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 90 | @echo 91 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 92 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 93 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/django-postgres-copy.qhcp" 94 | @echo "To view the help file:" 95 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/django-postgres-copy.qhc" 96 | 97 | applehelp: 98 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 99 | @echo 100 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 101 | @echo "N.B. You won't be able to view it unless you put it in" \ 102 | "~/Library/Documentation/Help or install it in your application" \ 103 | "bundle." 104 | 105 | devhelp: 106 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 107 | @echo 108 | @echo "Build finished." 109 | @echo "To view the help file:" 110 | @echo "# mkdir -p $$HOME/.local/share/devhelp/django-postgres-copy" 111 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/django-postgres-copy" 112 | @echo "# devhelp" 113 | 114 | epub: 115 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 116 | @echo 117 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 118 | 119 | latex: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo 122 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 123 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 124 | "(use \`make latexpdf' here to do that automatically)." 125 | 126 | latexpdf: 127 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 128 | @echo "Running LaTeX files through pdflatex..." 129 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 130 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 131 | 132 | latexpdfja: 133 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 134 | @echo "Running LaTeX files through platex and dvipdfmx..." 135 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 136 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 137 | 138 | text: 139 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 140 | @echo 141 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 142 | 143 | man: 144 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 145 | @echo 146 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 147 | 148 | texinfo: 149 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 150 | @echo 151 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 152 | @echo "Run \`make' in that directory to run these through makeinfo" \ 153 | "(use \`make info' here to do that automatically)." 154 | 155 | info: 156 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 157 | @echo "Running Texinfo files through makeinfo..." 158 | make -C $(BUILDDIR)/texinfo info 159 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 160 | 161 | gettext: 162 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 163 | @echo 164 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 165 | 166 | changes: 167 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 168 | @echo 169 | @echo "The overview file is in $(BUILDDIR)/changes." 170 | 171 | linkcheck: 172 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 173 | @echo 174 | @echo "Link check complete; look for any errors in the above output " \ 175 | "or in $(BUILDDIR)/linkcheck/output.txt." 176 | 177 | doctest: 178 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 179 | @echo "Testing of doctests in the sources finished, look at the " \ 180 | "results in $(BUILDDIR)/doctest/output.txt." 181 | 182 | coverage: 183 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 184 | @echo "Testing of coverage in the sources finished, look at the " \ 185 | "results in $(BUILDDIR)/coverage/python.txt." 186 | 187 | xml: 188 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 189 | @echo 190 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 191 | 192 | pseudoxml: 193 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 194 | @echo 195 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 196 | -------------------------------------------------------------------------------- /postgres_copy/managers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import logging 4 | import typing 5 | 6 | from django.db import connection, models 7 | from django.db.models.fields import Field 8 | from django.db.transaction import TransactionManagementError 9 | from django.db.backends.base.schema import BaseDatabaseSchemaEditor 10 | 11 | from .copy_from import CopyMapping 12 | from .copy_to import CopyToQuery 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class ConstraintQuerySet(models.QuerySet): 18 | """ 19 | Utilities for temporarily dropping and restoring constraints and indexes. 20 | """ 21 | 22 | @property 23 | def constrained_fields(self) -> typing.List[Field]: 24 | """ 25 | Returns list of model's fields with db_constraint set to True. 26 | """ 27 | return [ 28 | f 29 | for f in self.model._meta.fields 30 | if hasattr(f, "db_constraint") and f.db_constraint 31 | ] 32 | 33 | @property 34 | def indexed_fields(self) -> typing.List[Field]: 35 | """ 36 | Returns list of model's fields with db_index set to True. 37 | """ 38 | return [f for f in self.model._meta.fields if f.db_index] 39 | 40 | def edit_schema( 41 | self, 42 | schema_editor: BaseDatabaseSchemaEditor, 43 | method_name: str, 44 | args: typing.Tuple, 45 | ) -> None: 46 | """ 47 | Edits the schema without throwing errors. 48 | 49 | This allows for the add and drop methods to be run frequently and without fear. 50 | """ 51 | try: 52 | getattr(schema_editor, method_name)(*args) 53 | except Exception: 54 | logger.debug(f"Edit of {schema_editor}.{method_name} failed. Skipped") 55 | pass 56 | 57 | def drop_constraints(self) -> None: 58 | """ 59 | Drop constraints on the model and its fields. 60 | """ 61 | logger.debug(f"Dropping constraints from {self.model.__name__}") 62 | with connection.schema_editor() as schema_editor: 63 | # Remove any "unique_together" constraints 64 | # NOTE: "unique_together" may be deprecated in the future 65 | if getattr(self.model._meta, "unique_together", False): 66 | logger.debug( 67 | "Dropping unique_together of {}".format( 68 | self.model._meta.unique_together 69 | ) 70 | ) 71 | args = (self.model, self.model._meta.unique_together, ()) 72 | self.edit_schema(schema_editor, "alter_unique_together", args) 73 | 74 | # Remove any field constraints 75 | for field in self.constrained_fields: 76 | logger.debug(f"Dropping constraints from {field}") 77 | field_copy = field.__copy__() 78 | field_copy.db_constraint = False 79 | args = (self.model, field, field_copy) 80 | self.edit_schema(schema_editor, "alter_field", args) 81 | 82 | def drop_indexes(self) -> None: 83 | """ 84 | Drop indexes on the model and its fields. 85 | """ 86 | logger.debug(f"Dropping indexes from {self.model.__name__}") 87 | with connection.schema_editor() as schema_editor: 88 | if getattr(self.model._meta, "index_together", False): 89 | # Remove any "index_together" constraints 90 | # NOTE: "index_together has been removed from Django 5.1 91 | logger.debug( 92 | f"Dropping index_together of {self.model._meta.index_together}" 93 | ) 94 | args = (self.model, self.model._meta.index_together, ()) 95 | self.edit_schema(schema_editor, "alter_index_together", args) 96 | 97 | # Remove any field indexes 98 | for field in self.indexed_fields: 99 | logger.debug(f"Dropping index from {field}") 100 | field_copy = field.__copy__() 101 | field_copy.db_index = False 102 | args = (self.model, field, field_copy) 103 | self.edit_schema(schema_editor, "alter_field", args) 104 | 105 | def restore_constraints(self) -> None: 106 | """ 107 | Restore constraints on the model and its fields. 108 | """ 109 | logger.debug(f"Adding constraints to {self.model.__name__}") 110 | with connection.schema_editor() as schema_editor: 111 | # Add any "unique_together" contraints from the database 112 | # NOTE: "unique_together" may be deprecated in the future 113 | if getattr(self.model._meta, "unique_together", False): 114 | logger.debug( 115 | "Adding unique_together of {}".format( 116 | self.model._meta.unique_together 117 | ) 118 | ) 119 | args = (self.model, (), self.model._meta.unique_together) 120 | self.edit_schema(schema_editor, "alter_unique_together", args) 121 | 122 | # Add any constraints to the fields 123 | for field in self.constrained_fields: 124 | logger.debug(f"Adding constraints to {field}") 125 | field_copy = field.__copy__() 126 | field_copy.db_constraint = False 127 | args = (self.model, field_copy, field) 128 | self.edit_schema(schema_editor, "alter_field", args) 129 | 130 | def restore_indexes(self) -> None: 131 | """ 132 | Restore indexes on the model and its fields. 133 | """ 134 | logger.debug(f"Adding indexes to {self.model.__name__}") 135 | with connection.schema_editor() as schema_editor: 136 | if getattr(self.model._meta, "index_together", False): 137 | # Add any "index_together" contraints to the database. 138 | # NOTE: "index_together has been removed from Django 5.1 139 | logger.debug( 140 | "Restoring index_together of {}".format( 141 | self.model._meta.index_together 142 | ) 143 | ) 144 | args = (self.model, (), self.model._meta.index_together) 145 | self.edit_schema(schema_editor, "alter_index_together", args) 146 | 147 | # Add any indexes to the fields 148 | for field in self.indexed_fields: 149 | logger.debug(f"Restoring index to {field}") 150 | field_copy = field.__copy__() 151 | field_copy.db_index = False 152 | args = (self.model, field_copy, field) 153 | self.edit_schema(schema_editor, "alter_field", args) 154 | 155 | 156 | class CopyQuerySet(ConstraintQuerySet): 157 | """ 158 | Subclass of QuerySet that adds from_csv and to_csv methods. 159 | """ 160 | 161 | def from_csv( 162 | self, 163 | csv_path: typing.Union[str, typing.BinaryIO, typing.TextIO], 164 | mapping: typing.Optional[typing.Dict[str, str]] = None, 165 | drop_constraints: bool = True, 166 | drop_indexes: bool = True, 167 | silent: bool = True, 168 | **kwargs: typing.Any, 169 | ) -> int: 170 | """ 171 | Copy CSV file from the provided path to the current model using the provided mapping. 172 | """ 173 | # Dropping constraints or indices will fail with an opaque error for all but 174 | # very trivial databases which wouldn't benefit from this optimization anyway. 175 | # So, we prevent the user from even trying to avoid confusion. 176 | if drop_constraints or drop_indexes: 177 | try: 178 | connection.validate_no_atomic_block() 179 | except TransactionManagementError: 180 | raise TransactionManagementError( 181 | "You are attempting to drop constraints or " 182 | "indexes inside a transaction block, which is " 183 | "very likely to fail. If it doesn't fail, you " 184 | "wouldn't gain any significant benefit from it " 185 | "anyway. Either remove the transaction block, or set " 186 | "drop_constraints=False and drop_indexes=False." 187 | ) 188 | 189 | # Create a mapping dictionary if none was provided 190 | mapping_dict = mapping if mapping is not None else {} 191 | 192 | # Create the CopyMapping object 193 | copy_mapping = CopyMapping(self.model, csv_path, mapping_dict, **kwargs) 194 | 195 | if drop_constraints: 196 | self.drop_constraints() 197 | if drop_indexes: 198 | self.drop_indexes() 199 | 200 | insert_count = copy_mapping.save(silent=silent) 201 | 202 | if drop_constraints: 203 | self.restore_constraints() 204 | if drop_indexes: 205 | self.restore_indexes() 206 | 207 | return insert_count 208 | 209 | def to_csv( 210 | self, 211 | csv_path: typing.Optional[typing.Union[str, typing.BinaryIO]] = None, 212 | *fields: str, 213 | **kwargs: typing.Any, 214 | ) -> typing.Optional[bytes]: 215 | """ 216 | Copy current QuerySet to CSV at provided path or file-like object. 217 | """ 218 | try: 219 | # For Django 2.0 forward 220 | query = self.query.chain(CopyToQuery) 221 | except AttributeError: 222 | # For Django 1.11 backward 223 | query = self.query.clone(CopyToQuery) 224 | 225 | # Get fields 226 | query.copy_to_fields = fields 227 | 228 | # Delimiter 229 | query.copy_to_delimiter = "DELIMITER '{}'".format(kwargs.get("delimiter", ",")) 230 | 231 | # Header 232 | with_header = kwargs.get("header", True) 233 | query.copy_to_header = "HEADER" if with_header else "" 234 | 235 | # Null string 236 | null_string = kwargs.get("null") 237 | query.copy_to_null_string = f"NULL '{null_string}'" if null_string else "" 238 | 239 | # Quote character 240 | quote_char = kwargs.get("quote") 241 | query.copy_to_quote_char = f"QUOTE '{quote_char}'" if quote_char else "" 242 | 243 | # Force quote on columns 244 | force_quote = kwargs.get("force_quote") 245 | if force_quote: 246 | # If it's a list of fields, pass them in with commas 247 | if isinstance(force_quote, list): 248 | query.copy_to_force_quote = "FORCE QUOTE {}".format( 249 | ", ".join(column for column in force_quote) 250 | ) 251 | # If it's True or a * force quote everything 252 | elif force_quote is True or force_quote == "*": 253 | query.copy_to_force_quote = "FORCE QUOTE *" 254 | # Otherwise, assume it's a string and pass it through 255 | else: 256 | query.copy_to_force_quote = f"FORCE QUOTE {force_quote}" 257 | else: 258 | query.copy_to_force_quote = "" 259 | 260 | # Encoding 261 | set_encoding = kwargs.get("encoding") 262 | query.copy_to_encoding = f"ENCODING '{set_encoding}'" if set_encoding else "" 263 | 264 | # Escape character 265 | escape_char = kwargs.get("escape") 266 | query.copy_to_escape = f"ESCAPE '{escape_char}'" if escape_char else "" 267 | 268 | # Run the query 269 | compiler = query.get_compiler(self.db, connection=connection) 270 | data = compiler.execute_sql(csv_path) 271 | 272 | # If no csv_path is provided, then the query will come back as bytes. 273 | if csv_path is None and isinstance(data, bytes): 274 | # So return that. 275 | return data 276 | 277 | # Otherwise return None 278 | return None 279 | 280 | 281 | CopyManager = models.Manager.from_queryset(CopyQuerySet) 282 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # Django Postgres Copy 2 | 3 | This document provides a comprehensive overview of the `django-postgres-copy` repository, explaining its purpose, architecture, and how to use it effectively. 4 | 5 | ## Repository Overview 6 | 7 | `django-postgres-copy` is a Django package that provides a simple interface for using PostgreSQL's `COPY` command to efficiently import and export data between CSV files and Django models. The `COPY` command is significantly faster than using Django's ORM for bulk operations, especially for large datasets. 8 | 9 | ## Motivation 10 | 11 | The creators of this library are data journalists who frequently download, clean, and analyze new data. This involves writing many data loaders. Traditionally, this was done by looping through each row and saving it to the database using Django's ORM `create` method: 12 | 13 | ```python 14 | import csv 15 | from myapp.models import MyModel 16 | 17 | data = csv.DictReader(open("./data.csv")) 18 | for row in data: 19 | MyModel.objects.create(name=row["NAME"], number=row["NUMBER"]) 20 | ``` 21 | 22 | This approach works but is inefficient for large files because Django executes a database query for each row, which can take a long time to complete. 23 | 24 | PostgreSQL's built-in `COPY` command can import and export data with a single query, making it much faster. This package makes using `COPY` as easy as any other database operation in Django. 25 | 26 | ## Installation 27 | 28 | The package can be installed from the Python Package Index with `pip`: 29 | 30 | ```bash 31 | pip install django-postgres-copy 32 | ``` 33 | 34 | You will need to have Django, PostgreSQL, and a database adapter (like `psycopg2` or `psycopg3`) already installed. 35 | 36 | ## Key Components 37 | 38 | ### 1. Core Functionality 39 | 40 | The package provides two main operations: 41 | - **Import from CSV**: Load data from CSV files into Django models 42 | - **Export to CSV**: Export data from Django models to CSV files 43 | 44 | ### 2. Main Modules 45 | 46 | - **`managers.py`**: Contains the `CopyManager` and `CopyQuerySet` classes that extend Django's standard manager and queryset with CSV import/export capabilities. 47 | - **`copy_from.py`**: Handles importing data from CSV files to database tables using the `CopyMapping` class. 48 | - **`copy_to.py`**: Handles exporting data from database tables to CSV files using custom SQL compilers. 49 | - **`psycopg_compat.py`**: Provides compatibility between psycopg2 and psycopg3 database drivers for COPY operations. 50 | 51 | ### 3. Database Driver Compatibility 52 | 53 | The package supports both psycopg2 and psycopg3 database drivers through a compatibility layer in `psycopg_compat.py`. This allows users to migrate to the newer driver at their own pace while maintaining the same API. 54 | 55 | ## Architecture 56 | 57 | ### CopyManager and CopyQuerySet 58 | 59 | The `CopyManager` is a custom Django model manager that extends the standard manager with CSV import/export capabilities. It uses the `CopyQuerySet` class, which adds the `from_csv` and `to_csv` methods to Django's standard queryset. 60 | 61 | ```python 62 | # Usage example 63 | from postgres_copy import CopyManager 64 | 65 | 66 | class MyModel(models.Model): 67 | name = models.CharField(max_length=100) 68 | objects = CopyManager() # Use the custom manager 69 | ``` 70 | 71 | ### CopyMapping 72 | 73 | The `CopyMapping` class handles the process of mapping CSV columns to Django model fields and loading the data into the database. It uses a four-step process: 74 | 75 | 1. **Create**: Create a temporary table with the same structure as the CSV file 76 | 2. **Copy**: Copy data from the CSV file into the temporary table 77 | 3. **Insert**: Insert data from the temporary table into the Django model's table 78 | 4. **Drop**: Drop the temporary table 79 | 80 | This approach allows for efficient data loading and validation before committing to the actual database table. 81 | 82 | ### Database Driver Compatibility 83 | 84 | The `psycopg_compat.py` module provides a compatibility layer between psycopg2 and psycopg3 database drivers. It automatically detects which driver is available and provides appropriate implementations of `copy_to` and `copy_from` functions. 85 | 86 | The main differences between the drivers that this module handles: 87 | 1. psycopg2 uses `copy_expert` method which takes an SQL string with parameters already inlined 88 | 2. psycopg3 uses a `copy` method that returns a context manager and accepts parameters separately 89 | 3. psycopg3 handles encoding differently, requiring explicit decoding for text destinations 90 | 91 | ## Usage Examples 92 | 93 | ### Importing Data from CSV 94 | 95 | ```python 96 | # Basic import 97 | MyModel.objects.from_csv( 98 | "path/to/file.csv", 99 | mapping={"name": "NAME_COLUMN", "number": "NUMBER_COLUMN", "date": "DATE_COLUMN"}, 100 | ) 101 | 102 | # With custom options 103 | MyModel.objects.from_csv( 104 | "path/to/file.csv", 105 | mapping={"name": "NAME", "number": "NUMBER"}, 106 | delimiter=";", 107 | null="NULL", 108 | encoding="utf-8", 109 | ) 110 | 111 | # If CSV headers match model fields, mapping is optional 112 | MyModel.objects.from_csv("path/to/file.csv") 113 | ``` 114 | 115 | #### Import Method Parameters 116 | 117 | The `from_csv` method accepts the following parameters: 118 | 119 | - `csv_path_or_obj`: The path to the CSV file or a Python file object 120 | - `mapping`: (Optional) Dictionary mapping model fields to CSV headers 121 | - `drop_constraints`: (Default: True) Whether to drop constraints during import 122 | - `drop_indexes`: (Default: True) Whether to drop indexes during import 123 | - `using`: Database to use for import 124 | - `delimiter`: (Default: ',') Character separating values in the CSV 125 | - `quote_character`: Character used for quoting 126 | - `null`: String representing NULL values 127 | - `force_not_null`: List of columns that should ignore NULL string matches 128 | - `force_null`: List of columns that should convert empty quoted strings to NULL 129 | - `encoding`: Character encoding of the CSV 130 | - `ignore_conflicts`: (Default: False) Whether to ignore constraint violations 131 | - `static_mapping`: Dictionary of static values to set for each row 132 | - `temp_table_name`: Name for the temporary table used during import 133 | 134 | ### Exporting Data to CSV 135 | 136 | ```python 137 | # Basic export 138 | MyModel.objects.to_csv("path/to/output.csv") 139 | 140 | # With filtering and custom options 141 | MyModel.objects.filter(active=True).to_csv( 142 | "path/to/output.csv", 143 | "name", 144 | "number", # Only export these fields 145 | delimiter=";", 146 | header=True, 147 | quote='"', 148 | ) 149 | 150 | # Export to string (no file path provided) 151 | csv_data = MyModel.objects.to_csv() 152 | 153 | # Export with annotations 154 | MyModel.objects.annotate(name_count=Count("name")).to_csv("path/to/output.csv") 155 | ``` 156 | 157 | #### Export Method Parameters 158 | 159 | The `to_csv` method accepts the following parameters: 160 | 161 | - `csv_path`: Path to output file or file-like object (optional - returns string if not provided) 162 | - `*fields`: Field names to include in the export (all fields by default) 163 | - `delimiter`: (Default: ',') Character to use as delimiter 164 | - `header`: (Default: True) Whether to include header row 165 | - `null`: String to use for NULL values 166 | - `encoding`: Character encoding for the output file 167 | - `escape`: Escape character to use 168 | - `quote`: Quote character to use 169 | - `force_quote`: Fields to force quote (field name, list of fields, True, or "*") 170 | 171 | ### Advanced Features 172 | 173 | #### Static Mapping 174 | 175 | You can provide static values for fields that don't exist in the CSV: 176 | 177 | ```python 178 | MyModel.objects.from_csv( 179 | "path/to/file.csv", 180 | mapping={"name": "NAME", "number": "NUMBER"}, 181 | static_mapping={"created_by": "import_script"}, 182 | ) 183 | ``` 184 | 185 | #### Custom Field Processing 186 | 187 | You can customize how fields are processed during import by defining a `copy_template` attribute on your model fields: 188 | 189 | ```python 190 | class MyIntegerField(models.IntegerField): 191 | copy_template = """ 192 | CASE 193 | WHEN "%(name)s" = 'x' THEN null 194 | ELSE "%(name)s"::int 195 | END 196 | """ 197 | ``` 198 | 199 | Or by defining a method on your model: 200 | 201 | ```python 202 | class MyModel(models.Model): 203 | name = models.CharField(max_length=100) 204 | 205 | def copy_name_template(self): 206 | return 'upper("%(name)s")' 207 | ``` 208 | 209 | A common use case is transforming date formats: 210 | 211 | ```python 212 | def copy_mydatefield_template(self): 213 | return """ 214 | CASE 215 | WHEN "%(name)s" = '' THEN NULL 216 | ELSE to_date("%(name)s", 'MM/DD/YYYY') /* The source CSV's date pattern */ 217 | END 218 | """ 219 | ``` 220 | 221 | It's important to handle empty strings by converting them to NULL in date fields to avoid "year out of range" errors. 222 | 223 | #### Hooks 224 | 225 | You can extend the `CopyMapping` class to add custom behavior at different stages of the import process: 226 | 227 | ```python 228 | class CustomCopyMapping(CopyMapping): 229 | def pre_copy(self, cursor): 230 | # Run before copying data 231 | pass 232 | 233 | def post_copy(self, cursor): 234 | # Run after copying data 235 | pass 236 | 237 | def pre_insert(self, cursor): 238 | # Run before inserting data 239 | pass 240 | 241 | def post_insert(self, cursor): 242 | # Run after inserting data 243 | pass 244 | ``` 245 | 246 | ### Working with Related Models 247 | 248 | When exporting data, you can include fields from related models using Django's double underscore notation: 249 | 250 | ```python 251 | # Models 252 | class Hometown(models.Model): 253 | name = models.CharField(max_length=500) 254 | objects = CopyManager() 255 | 256 | 257 | class Person(models.Model): 258 | name = models.CharField(max_length=500) 259 | number = models.IntegerField() 260 | hometown = models.ForeignKey(Hometown, on_delete=models.CASCADE) 261 | objects = CopyManager() 262 | 263 | 264 | # Export with related fields 265 | Person.objects.to_csv("path/to/export.csv", "name", "number", "hometown__name") 266 | ``` 267 | 268 | ## Performance Considerations 269 | 270 | - The package temporarily drops constraints and indexes during import to improve performance 271 | - For large imports, it's recommended to run the import outside of a transaction block 272 | - The package uses PostgreSQL's `COPY` command which is much faster than Django's ORM for bulk operations 273 | - Importing data happens in a four-step process (create temp table, copy data, insert into model table, drop temp table) 274 | 275 | ## Testing 276 | 277 | The package includes comprehensive tests for all functionality, including: 278 | - Basic import/export operations 279 | - Custom field processing 280 | - Error handling 281 | - Multi-database support 282 | - psycopg2 and psycopg3 compatibility 283 | 284 | ## Limitations 285 | 286 | - Only works with PostgreSQL databases 287 | - Requires direct file access (for file-based imports) 288 | - May not handle very complex data transformations without custom field processing 289 | 290 | ## Contributing 291 | 292 | To set up a development environment: 293 | 1. Fork and clone the repository 294 | 2. Run `pipenv install` to install dependencies 295 | 3. Run `pipenv run pytest tests` to run tests 296 | 297 | ## License 298 | 299 | The package is released under the MIT License. 300 | 301 | ## Resources 302 | 303 | - Documentation: [palewi.re/docs/django-postgres-copy/](https://palewi.re/docs/django-postgres-copy/) 304 | - Issues: [github.com/palewire/django-postgres-copy/issues](https://github.com/palewire/django-postgres-copy/issues) 305 | - Packaging: [pypi.python.org/pypi/django-postgres-copy](https://pypi.python.org/pypi/django-postgres-copy) 306 | - Testing: [github.com/palewire/django-postgres-copy/actions](https://github.com/palewire/django-postgres-copy/actions/workflows/test.yaml) 307 | -------------------------------------------------------------------------------- /postgres_copy/copy_from.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Handlers for working with PostgreSQL's COPY command. 4 | """ 5 | 6 | import csv 7 | import logging 8 | import os 9 | import sys 10 | import typing 11 | from collections import OrderedDict 12 | from io import TextIOWrapper 13 | 14 | from django.contrib.humanize.templatetags.humanize import intcomma 15 | from django.core.exceptions import FieldDoesNotExist 16 | from django.db import NotSupportedError, connections, router 17 | from django.db.models import Field, Model 18 | from django.db.backends.utils import CursorWrapper 19 | 20 | from .psycopg_compat import copy_from 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class CopyMapping: 26 | """ 27 | Maps comma-delimited file to Django model and loads it into PostgreSQL database using COPY command. 28 | """ 29 | 30 | def __init__( 31 | self, 32 | model: typing.Type[Model], 33 | csv_path_or_obj: typing.Union[str, typing.BinaryIO, typing.TextIO], 34 | mapping: typing.Dict[str, str], 35 | using: typing.Optional[str] = None, 36 | delimiter: str = ",", 37 | quote_character: typing.Optional[str] = None, 38 | null: typing.Optional[str] = None, 39 | force_not_null: typing.Optional[typing.List[str]] = None, 40 | force_null: typing.Optional[typing.List[str]] = None, 41 | encoding: typing.Optional[str] = None, 42 | ignore_conflicts: bool = False, 43 | static_mapping: typing.Optional[typing.Dict[str, str]] = None, 44 | temp_table_name: typing.Optional[str] = None, 45 | ) -> None: 46 | # Set the required arguments 47 | self.model = model 48 | self.csv_path_or_obj = csv_path_or_obj 49 | 50 | # If the CSV is not a file object already ... 51 | if hasattr(csv_path_or_obj, "read"): 52 | self.csv_file = csv_path_or_obj 53 | else: 54 | # We know it's a string path at this point 55 | csv_path = csv_path_or_obj 56 | # ... verify the path exists ... 57 | if not os.path.exists(csv_path): 58 | raise ValueError("CSV path does not exist") 59 | # ... then open it up. 60 | self.csv_file = open(csv_path) 61 | 62 | # Hook in the other optional settings 63 | self.quote_character = quote_character 64 | self.delimiter = delimiter 65 | self.null = null 66 | self.force_not_null = force_not_null 67 | self.force_null = force_null 68 | self.encoding = encoding 69 | self.supports_ignore_conflicts = True 70 | self.ignore_conflicts = ignore_conflicts 71 | if static_mapping is not None: 72 | self.static_mapping = OrderedDict(static_mapping) 73 | else: 74 | self.static_mapping = OrderedDict() 75 | 76 | # Line up the database connection 77 | if using is not None: 78 | self.using = using 79 | else: 80 | self.using = router.db_for_write(model) 81 | self.conn = connections[self.using] 82 | self.backend = self.conn.ops 83 | 84 | # Verify it is PostgreSQL 85 | if self.conn.vendor != "postgresql": 86 | raise TypeError("Only PostgreSQL backends supported") 87 | 88 | # Check if it is PSQL 9.5 or greater, which determines if ignore_conflicts is supported 89 | self.supports_ignore_conflicts = self.is_postgresql_9_5() 90 | if self.ignore_conflicts and not self.supports_ignore_conflicts: 91 | raise NotSupportedError( 92 | "This database backend does not support ignoring conflicts." 93 | ) 94 | 95 | # Pull the CSV headers 96 | self.headers = self.get_headers() 97 | 98 | # Map them to the model 99 | self.mapping = self.get_mapping(mapping) 100 | 101 | # Make sure the everything is legit 102 | self.validate_mapping() 103 | 104 | # Configure the name of our temporary table to COPY into 105 | self.temp_table_name = temp_table_name or "temp_%s" % self.model._meta.db_table 106 | 107 | def save(self, silent: bool = False, stream: typing.TextIO = sys.stdout) -> int: 108 | """ 109 | Saves the contents of the CSV file to the database. 110 | 111 | Override this method and use 'self.create(cursor)`, 112 | `self.copy(cursor)`, `self.insert(cursor)`, and `self.drop(cursor)` 113 | if you need functionality other than the default create/copy/insert/drop 114 | workflow. 115 | 116 | silent: 117 | By default, non-fatal error notifications are printed to stdout, 118 | but this keyword may be set to disable these notifications. 119 | 120 | stream: 121 | Status information will be written to this file handle. Defaults to 122 | using `sys.stdout`, but any object with a `write` method is 123 | supported. 124 | """ 125 | logger.debug(f"Loading CSV to {self.model.__name__}") 126 | if not silent: 127 | stream.write(f"Loading CSV to {self.model.__name__}\n") 128 | 129 | # Connect to the database 130 | with self.conn.cursor() as c: 131 | self.create(c) 132 | self.copy(c) 133 | insert_count = self.insert(c) 134 | self.drop(c) 135 | 136 | if not silent: 137 | stream.write(f"{intcomma(insert_count)} records loaded\n") 138 | 139 | return insert_count 140 | 141 | def is_postgresql_9_5(self) -> bool: 142 | pg_version = getattr(self.conn, "pg_version", 0) 143 | return pg_version >= 90500 144 | 145 | def get_field(self, name: str) -> typing.Optional[Field]: 146 | """ 147 | Returns any fields on the database model matching the provided name. 148 | """ 149 | try: 150 | return self.model._meta.get_field(name) 151 | except FieldDoesNotExist: 152 | return None 153 | 154 | def get_mapping(self, mapping: typing.Dict[str, str]) -> typing.Dict[str, str]: 155 | """ 156 | Returns a generated mapping based on the CSV header 157 | """ 158 | if mapping: 159 | return OrderedDict(mapping) 160 | return {name: name for name in self.headers} 161 | 162 | def get_headers(self) -> typing.List[str]: 163 | """ 164 | Returns the column headers from the csv as a list. 165 | """ 166 | logger.debug(f"Retrieving headers from {self.csv_file}") 167 | 168 | # Check if it's a text or binary file 169 | is_binary = hasattr(self.csv_file, "mode") and "b" in getattr( 170 | self.csv_file, "mode", "" 171 | ) 172 | 173 | if is_binary: 174 | # For binary files, we need to wrap it in a TextIOWrapper 175 | encoding = self.encoding or "utf-8" 176 | text_file = TextIOWrapper( 177 | typing.cast(typing.BinaryIO, self.csv_file), encoding=encoding 178 | ) 179 | csv_reader = csv.reader(text_file, delimiter=self.delimiter) 180 | headers = next(csv_reader) 181 | # Detach the wrapper so the file stays open 182 | text_file.detach() 183 | else: 184 | # For text files or file-like objects without a mode attribute 185 | try: 186 | # Try to read directly 187 | csv_reader = csv.reader( 188 | typing.cast(typing.Iterable[str], self.csv_file), 189 | delimiter=self.delimiter, 190 | ) 191 | headers = next(csv_reader) 192 | except (csv.Error, TypeError, AttributeError): 193 | # If that fails, try the binary approach as a fallback 194 | if hasattr(self.csv_file, "seek"): 195 | self.csv_file.seek(0) 196 | encoding = self.encoding or "utf-8" 197 | text_file = TextIOWrapper( 198 | typing.cast(typing.BinaryIO, self.csv_file), encoding=encoding 199 | ) 200 | csv_reader = csv.reader(text_file, delimiter=self.delimiter) 201 | headers = next(csv_reader) 202 | text_file.detach() 203 | 204 | # Move back to the top of the file if possible 205 | if hasattr(self.csv_file, "seek"): 206 | self.csv_file.seek(0) 207 | 208 | return headers 209 | 210 | def validate_mapping(self) -> None: 211 | """ 212 | Verify that the mapping provided by the user is acceptable. 213 | 214 | Raises errors if something goes wrong. Returns nothing if everything is kosher. 215 | """ 216 | # Make sure all of the CSV headers in the mapping actually exist 217 | for map_header in self.mapping.values(): 218 | if map_header not in self.headers: 219 | raise ValueError(f"Header '{map_header}' not found in CSV file") 220 | 221 | # Make sure all the model fields in the mapping actually exist 222 | for map_field in self.mapping.keys(): 223 | if not self.get_field(map_field): 224 | raise FieldDoesNotExist(f"Model does not include {map_field} field") 225 | 226 | # Make sure any static mapping columns exist 227 | for static_field in self.static_mapping.keys(): 228 | if not self.get_field(static_field): 229 | raise ValueError(f"Model does not include {static_field} field") 230 | 231 | # 232 | # CREATE commands 233 | # 234 | 235 | def prep_create(self) -> str: 236 | """ 237 | Creates a CREATE statement that makes a new temporary table. 238 | 239 | Returns SQL that can be run. 240 | """ 241 | sql = """CREATE TEMPORARY TABLE "%(table_name)s" (%(field_list)s);""" 242 | options = dict(table_name=self.temp_table_name) 243 | field_list = [] 244 | 245 | # Loop through all the fields and CSV headers together 246 | for header in self.headers: 247 | # Format the SQL create statement 248 | string = '"%s" text' % header 249 | 250 | # Add the string to the list 251 | field_list.append(string) 252 | 253 | # Join all the field strings together 254 | options["field_list"] = ", ".join(field_list) 255 | 256 | # Mash together the SQL and pass it out 257 | return sql % options 258 | 259 | def create(self, cursor: CursorWrapper) -> None: 260 | """ 261 | Generate and run create sql for the temp table. 262 | Runs a DROP on same prior to CREATE to avoid collisions. 263 | 264 | cursor: 265 | A cursor object on the db 266 | """ 267 | logger.debug("Running CREATE command") 268 | self.drop(cursor) 269 | create_sql = self.prep_create() 270 | logger.debug(create_sql) 271 | cursor.execute(create_sql) 272 | 273 | # 274 | # COPY commands 275 | # 276 | 277 | def prep_copy(self) -> str: 278 | """ 279 | Creates a COPY statement that loads the CSV into a temporary table. 280 | 281 | Returns SQL that can be run. 282 | """ 283 | sql = """ 284 | COPY "%(db_table)s" (%(header_list)s) 285 | FROM STDIN 286 | WITH CSV HEADER %(extra_options)s; 287 | """ 288 | options = { 289 | "db_table": self.temp_table_name, 290 | "extra_options": "", 291 | "header_list": ", ".join([f'"{h}"' for h in self.headers]), 292 | } 293 | if self.quote_character: 294 | options["extra_options"] += f" QUOTE '{self.quote_character}'" 295 | if self.delimiter: 296 | options["extra_options"] += f" DELIMITER '{self.delimiter}'" 297 | if self.null is not None: 298 | options["extra_options"] += f" NULL '{self.null}'" 299 | if self.force_not_null is not None: 300 | options["extra_options"] += " FORCE NOT NULL {}".format( 301 | ",".join(f'"{s}"' for s in self.force_not_null) 302 | ) 303 | if self.force_null is not None: 304 | options["extra_options"] += " FORCE NULL {}".format( 305 | ",".join('"%s"' % s for s in self.force_null) 306 | ) 307 | if self.encoding: 308 | options["extra_options"] += f" ENCODING '{self.encoding}'" 309 | return sql % options 310 | 311 | def pre_copy(self, cursor: CursorWrapper) -> None: 312 | pass 313 | 314 | def copy(self, cursor: CursorWrapper) -> None: 315 | """ 316 | Generate and run the COPY command to copy data from csv to temp table. 317 | 318 | Calls `self.pre_copy(cursor)` and `self.post_copy(cursor)` respectively 319 | before and after running copy 320 | 321 | cursor: 322 | A cursor object on the db 323 | """ 324 | # Run pre-copy hook 325 | self.pre_copy(cursor) 326 | 327 | logger.debug("Running COPY command") 328 | copy_sql = self.prep_copy() 329 | logger.debug(copy_sql) 330 | copy_from( 331 | cursor, 332 | copy_sql, 333 | typing.cast(typing.Union[typing.TextIO, typing.BinaryIO], self.csv_file), 334 | ) 335 | 336 | # At this point all data has been loaded to the temp table 337 | if hasattr(self.csv_file, "close"): 338 | self.csv_file.close() 339 | 340 | # Run post-copy hook 341 | self.post_copy(cursor) 342 | 343 | def post_copy(self, cursor: CursorWrapper) -> None: 344 | pass 345 | 346 | # 347 | # INSERT commands 348 | # 349 | 350 | def insert_suffix(self) -> str: 351 | """ 352 | Preps the suffix to the insert query. 353 | """ 354 | if self.ignore_conflicts: 355 | return """ 356 | ON CONFLICT DO NOTHING; 357 | """ 358 | else: 359 | return ";" 360 | 361 | def prep_insert(self) -> str: 362 | """ 363 | Creates a INSERT statement that reorders and cleans up 364 | the fields from the temporary table for insertion into the 365 | Django model. 366 | 367 | Returns SQL that can be run. 368 | """ 369 | sql = """ 370 | INSERT INTO "%(model_table)s" (%(model_fields)s) ( 371 | SELECT %(temp_fields)s 372 | FROM "%(temp_table)s")%(insert_suffix)s 373 | """ 374 | options = dict( 375 | model_table=self.model._meta.db_table, 376 | temp_table=self.temp_table_name, 377 | insert_suffix=self.insert_suffix(), 378 | ) 379 | 380 | # 381 | # The model fields to be inserted into 382 | # 383 | 384 | model_fields = [] 385 | for field_name in self.mapping.keys(): 386 | field = self.get_field(field_name) 387 | if field is not None: 388 | model_fields.append('"%s"' % field.get_attname_column()[1]) 389 | 390 | for k in self.static_mapping.keys(): 391 | model_fields.append('"%s"' % k) 392 | 393 | options["model_fields"] = ", ".join(model_fields) 394 | 395 | # 396 | # The temp fields to SELECT from 397 | # 398 | 399 | temp_fields = [] 400 | for field_name, header in self.mapping.items(): 401 | # Pull the field object from the model 402 | field = self.get_field(field_name) 403 | if field is not None: 404 | field_type = field.db_type(self.conn) 405 | if field_type in ["serial", "bigserial"]: 406 | field_type = "integer" 407 | 408 | # Format the SQL 409 | string = f'cast("{header}" as {field_type})' 410 | 411 | # Apply a datatype template override, if it exists 412 | if hasattr(field, "copy_template"): 413 | string = field.copy_template % dict(name=header) 414 | 415 | # Apply a field specific template override, if it exists 416 | template_method = "copy_%s_template" % field.name 417 | if hasattr(self.model, template_method): 418 | template = getattr(self.model(), template_method)() 419 | string = template % dict(name=header) 420 | 421 | # Add field to list 422 | temp_fields.append(string) 423 | 424 | # Tack on static fields 425 | for v in self.static_mapping.values(): 426 | temp_fields.append("'%s'" % v) 427 | 428 | # Join it all together 429 | options["temp_fields"] = ", ".join(temp_fields) 430 | 431 | # Pass it out 432 | return sql % options 433 | 434 | def pre_insert(self, cursor: CursorWrapper) -> None: 435 | pass 436 | 437 | def insert(self, cursor: CursorWrapper) -> int: 438 | """ 439 | Generate and run the INSERT command to move data from the temp table 440 | to the concrete table. 441 | 442 | Calls `self.pre_copy(cursor)` and `self.post_copy(cursor)` respectively 443 | before and after running copy 444 | 445 | returns: the count of rows inserted 446 | 447 | cursor: 448 | A cursor object on the db 449 | """ 450 | # Pre-insert hook 451 | self.pre_insert(cursor) 452 | 453 | logger.debug("Running INSERT command") 454 | insert_sql = self.prep_insert() 455 | logger.debug(insert_sql) 456 | cursor.execute(insert_sql) 457 | insert_count = cursor.rowcount 458 | logger.debug(f"{insert_count} rows inserted") 459 | 460 | # Post-insert hook 461 | self.post_insert(cursor) 462 | 463 | # Return the row count 464 | return insert_count if isinstance(insert_count, int) else 0 465 | 466 | def post_insert(self, cursor: CursorWrapper) -> None: 467 | pass 468 | 469 | # 470 | # DROP commands 471 | # 472 | 473 | def prep_drop(self) -> str: 474 | """ 475 | Creates a DROP statement that gets rid of the temporary table. 476 | 477 | Return SQL that can be run. 478 | """ 479 | return 'DROP TABLE IF EXISTS "%s";' % self.temp_table_name 480 | 481 | def drop(self, cursor: CursorWrapper) -> None: 482 | """ 483 | Generate and run the DROP command for the temp table. 484 | 485 | cursor: 486 | A cursor object on the db 487 | """ 488 | logger.debug("Running DROP command") 489 | drop_sql = self.prep_drop() 490 | logger.debug(drop_sql) 491 | cursor.execute(drop_sql) 492 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | django-postgres-copy 3 | ==================== 4 | 5 | Quickly import and export delimited data with Django support for PostgreSQL's COPY command 6 | 7 | .. contents:: Table of contents 8 | :depth: 1 9 | :local: 10 | 11 | Why and what for? 12 | ================= 13 | 14 | `The people `_ who made this library are data journalists. We are often downloading, cleaning and analyzing new data. 15 | 16 | That means we write a load of loaders. In the past we did this by looping through each row and saving it to the database using the Django's ORM `create method `_. 17 | 18 | .. code-block:: python 19 | 20 | import csv 21 | from myapp.models import MyModel 22 | 23 | 24 | data = csv.DictReader(open("./data.csv")) 25 | for row in data: 26 | MyModel.objects.create(name=row["NAME"], number=row["NUMBER"]) 27 | 28 | That works, but if you have a big file Django will rack up a database query for each row. That can take a long time to finish. 29 | 30 | Lucky for us, PostgreSQL has a built-in tool called `COPY `_ that hammers data in and out the database with one quick query. 31 | 32 | This package tries to make using COPY as easy as any other database routine supported by Django. It is implemented by a custom `model manager `_. 33 | 34 | Here's how it imports a CSV to a database table. 35 | 36 | .. code-block:: python 37 | 38 | from myapp.models import MyModel 39 | 40 | 41 | MyModel.objects.from_csv( 42 | "./data.csv", # The path to a source file (a Python file object is also acceptable) 43 | dict(name="NAME", number="NUMBER"), # A crosswalk of model fields to CSV headers. 44 | ) 45 | 46 | And here's how it exports a database table to a CSV. 47 | 48 | .. code-block:: python 49 | 50 | from myapp.models import MyModel 51 | 52 | 53 | MyModel.objects.to_csv("./data.csv") 54 | 55 | 56 | Installation 57 | ============ 58 | 59 | The package can be installed from the Python Package Index with `pip`. 60 | 61 | .. code-block:: bash 62 | 63 | $ pip install django-postgres-copy 64 | 65 | You will have to have Django, PostgreSQL and an adapter between the two, such as `psycopg2 `_ or `psycopg 3 `_), installed to put this library to use. 66 | 67 | 68 | An example 69 | ========== 70 | 71 | It all starts with a CSV file you'd like to load into your database. This library is intended to be used with large files but here's something simple as an example. 72 | 73 | .. code-block:: text 74 | 75 | name,number,date 76 | ben,1,2012-01-01 77 | joe,2,2012-01-02 78 | jane,3,2012-01-03 79 | 80 | A Django model that corresponds to the data might look something like this. It should have our custom manager attached. 81 | 82 | .. code-block:: python 83 | :emphasize-lines: 2,9 84 | 85 | from django.db import models 86 | from postgres_copy import CopyManager 87 | 88 | 89 | class Person(models.Model): 90 | name = models.CharField(max_length=500) 91 | number = models.IntegerField(null=True) 92 | date = models.DateField(null=True) 93 | objects = CopyManager() 94 | 95 | If the model hasn't been created in your database, that needs to happen. 96 | 97 | .. code-block:: bash 98 | 99 | $ python manage.py migrate 100 | 101 | 102 | How to import data 103 | ------------------ 104 | 105 | Here's how to create a script to import CSV data into the model. Our favorite way to do this is to write a `custom Django management command `_. 106 | 107 | .. code-block:: python 108 | :emphasize-lines: 1,8-11 109 | 110 | from myapp.models import Person 111 | from django.core.management.base import BaseCommand 112 | 113 | 114 | class Command(BaseCommand): 115 | 116 | def handle(self, *args, **kwargs): 117 | # Since the CSV headers match the model fields, 118 | # you only need to provide the file's path (or a Python file object) 119 | insert_count = Person.objects.from_csv("/path/to/my/import.csv") 120 | print(f"{insert_count} records inserted") 121 | 122 | Run your loader. 123 | 124 | .. code-block:: bash 125 | 126 | $ python manage.py myimportcommand 127 | 128 | 129 | How to export data 130 | ------------------ 131 | 132 | .. code-block:: python 133 | :emphasize-lines: 1,8-10 134 | 135 | from myapp.models import Person 136 | from django.core.management.base import BaseCommand 137 | 138 | 139 | class Command(BaseCommand): 140 | 141 | def handle(self, *args, **kwargs): 142 | # All this method needs is the path to your CSV. 143 | # (If you don't provide one, the method will return the CSV as a string.) 144 | Person.objects.to_csv("/path/to/my/export.csv") 145 | 146 | Run your exporter and that's it. 147 | 148 | .. code-block:: bash 149 | 150 | $ python manage.py myexportcommand 151 | 152 | That's it. You can even export your queryset after any filters or other tricks. This will work: 153 | 154 | .. code-block:: python 155 | 156 | Person.objects.exclude(name="BEN").to_csv("/path/to/my/export.csv") 157 | 158 | And so will something like this: 159 | 160 | .. code-block:: python 161 | 162 | Person.objects.annotate(name_count=Count("name")).to_csv("/path/to/my/export.csv") 163 | 164 | 165 | Import options 166 | ============== 167 | 168 | The ``from_csv`` manager method has the following arguments and keywords options. Returns the number of records added. 169 | 170 | .. method:: from_csv(csv_path_or_obj[, mapping=None, drop_constraints=True, drop_indexes=True, using=None, delimiter=',', null=None, force_not_null=None, force_null=None, encoding=None, static_mapping=None, temp_table_name=None]) 171 | 172 | 173 | =================== ========================================================= 174 | Argument Description 175 | =================== ========================================================= 176 | ``csv_path_or_obj`` The path to the delimited data file, or a Python file 177 | object containing delimited data 178 | =================== ========================================================= 179 | 180 | 181 | ===================== ======================================================= 182 | Keyword Argument Description 183 | ===================== ======================================================= 184 | ``mapping`` A (optional) dictionary: keys are strings corresponding 185 | to the model field, and values correspond to string 186 | field names for the CSV header. If not informed, the 187 | mapping is generated based on the CSV file header. 188 | 189 | ``drop_constraints`` A boolean that indicates whether or not constraints 190 | on the table and fields and should be dropped prior to 191 | loading, then restored afterward. Default is True. 192 | This is done to boost speed. 193 | 194 | ``drop_indexes`` A boolean that indicates whether or not indexes 195 | on the table and fields and should be dropped prior to 196 | loading, then restored afterward. Default is True. 197 | This is done to boost speed. 198 | 199 | ``delimiter`` The character that separates values in the data file. 200 | By default it is ",". This must be a single one-byte 201 | character. 202 | 203 | ``quote_character`` Specifies the quoting character to be used when a 204 | data value is quoted. The default is double-quote. 205 | This must be a single one-byte character. 206 | 207 | ``null`` Specifies the string that represents a null value. 208 | The default is an unquoted empty string. This must 209 | be a single one-byte character. 210 | 211 | ``force_not_null`` Specifies which columns should ignore matches 212 | against the null string. Empty values in these columns 213 | will remain zero-length strings rather than becoming 214 | nulls. The default is None. If passed, this must be 215 | list of column names. 216 | 217 | ``force_null`` Specifies which columns should register matches 218 | against the null string, even if it has been quoted. 219 | In the default case where the null string is empty, 220 | this converts a quoted empty string into NULL. The 221 | default is None. If passed, this must be list of 222 | column names. 223 | 224 | ``encoding`` Specifies the character set encoding of the strings 225 | in the CSV data source. For example, ``'latin-1'``, 226 | ``'utf-8'``, and ``'cp437'`` are all valid encoding 227 | parameters. 228 | 229 | ``ignore_conflicts`` Specify True to ignore unique constraint or exclusion 230 | constraint violation errors. The default is False. 231 | 232 | ``using`` Sets the database to use when importing data. 233 | Default is None, which will use the ``'default'`` 234 | database. 235 | 236 | ``static_mapping`` Set model attributes not in the CSV the same 237 | for every row in the database by providing a dictionary 238 | with the name of the columns as keys and the static 239 | inputs as values. 240 | 241 | ``temp_table_name`` Set the name of the temporary database table name used 242 | to stage data during import. If not provided, a name 243 | will be generated on the fly. The generated name is 244 | not guaranteed to be unique, which could negatively 245 | impact parallel import operations. 246 | ===================== ======================================================= 247 | 248 | 249 | Transforming data 250 | ----------------- 251 | 252 | By default, the COPY command cannot transform data on-the-fly as it is loaded into the database. 253 | 254 | This library first loads the data into a temporary table before inserting all records into the model table. So it is possible to use PostgreSQL's built-in SQL methods to modify values during the insert. 255 | 256 | As an example, imagine a CSV that includes a column of yes and no values that you wanted to store in the database as 1 or 0 in an integer field. 257 | 258 | .. code-block:: text 259 | 260 | NAME,VALUE 261 | ben,yes 262 | joe,no 263 | 264 | A model to store the data as you'd prefer to might look like this. 265 | 266 | .. code-block:: python 267 | 268 | from django.db import models 269 | from postgres_copy import CopyManager 270 | 271 | 272 | class Person(models.Model): 273 | name = models.CharField(max_length=500) 274 | value = models.IntegerField() 275 | objects = CopyManager() 276 | 277 | But if the CSV file was loaded directly into the database, you would receive a data type error when the 'yes' and 'no' strings were inserted into the integer field. 278 | 279 | This library offers two ways you can transform that data during the insert. 280 | 281 | 282 | Custom-field transformations 283 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 284 | 285 | One approach is to create a custom Django field. 286 | 287 | You can provide a SQL statement for how to transform the data during the insert into the model table. The transformation must include a string interpolation keyed to "name", where the title of the database column will be slotted. 288 | 289 | This example uses a `CASE statement `_ to transforms the data. 290 | 291 | .. code-block:: python 292 | 293 | from django.db.models.fields import IntegerField 294 | 295 | 296 | class MyIntegerField(IntegerField): 297 | copy_template = """ 298 | CASE 299 | WHEN "%(name)s" = 'yes' THEN 1 300 | WHEN "%(name)s" = 'no' THEN 0 301 | END 302 | """ 303 | 304 | Back in the models file the custom field can be substituted for the default. 305 | 306 | .. code-block:: python 307 | :emphasize-lines: 3,8 308 | 309 | from django.db import models 310 | from postgres_copy import CopyManager 311 | from myapp.fields import MyIntegerField 312 | 313 | 314 | class Person(models.Model): 315 | name = models.CharField(max_length=500) 316 | value = MyIntegerField() 317 | objects = CopyManager() 318 | 319 | 320 | Run your loader and it should finish fine. 321 | 322 | 323 | Model-method transformations 324 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 325 | 326 | A second approach is to provide a SQL string for how to transform a field during the insert on the model itself. This lets you specify different transformations for different fields of the same type. 327 | 328 | You must name the method so that the field name is sandwiched between ``copy_`` and ``_template``. It must return a SQL statement with a string interpolation keyed to "name", where the name of the database column will be slotted. 329 | 330 | For the example above, the model might be modified to look like this. 331 | 332 | .. code-block:: python 333 | :emphasize-lines: 10-16 334 | 335 | from django.db import models 336 | from postgres_copy import CopyManager 337 | 338 | 339 | class Person(models.Model): 340 | name = models.CharField(max_length=500) 341 | value = models.IntegerField() 342 | objects = CopyManager() 343 | 344 | def copy_value_template(self): 345 | return """ 346 | CASE 347 | WHEN "%(name)s" = 'yes' THEN 1 348 | WHEN "%(name)s" = 'no' THEN 0 349 | END 350 | """ 351 | 352 | And that's it. 353 | 354 | Here's another example of a common issue, transforming the CSV's date format to one PostgreSQL and Django will understand. 355 | 356 | .. code-block:: python 357 | 358 | def copy_mydatefield_template(self): 359 | return """ 360 | CASE 361 | WHEN "%(name)s" = '' THEN NULL 362 | ELSE to_date("%(name)s", 'MM/DD/YYYY') /* The source CSV's date pattern can be set here. */ 363 | END 364 | """ 365 | 366 | It's important to handle empty strings (by converting them to NULL) in this example. PostgreSQL will accept empty strings, but Django won't be able to ingest the field and you'll get a strange "year out of range" error when you call something like ``MyModel.objects.all()``. 367 | 368 | 369 | Inserting static values 370 | ----------------------- 371 | 372 | If your model has columns that are not in the CSV, you can set static values for what is inserted using the ``static_mapping`` keyword argument. It will insert the provided values into every row in the database. 373 | 374 | An example could be if you want to include the name of the source CSV file along with each row. 375 | 376 | Your model might look like this: 377 | 378 | .. code-block:: python 379 | :emphasize-lines: 8 380 | 381 | from django.db import models 382 | from postgres_copy import CopyManager 383 | 384 | 385 | class Person(models.Model): 386 | name = models.CharField(max_length=500) 387 | number = models.IntegerField() 388 | source_csv = models.CharField(max_length=500) 389 | objects = CopyManager() 390 | 391 | 392 | And your loader would look like this: 393 | 394 | .. code-block:: python 395 | :emphasize-lines: 11-13 396 | 397 | from myapp.models import Person 398 | from django.core.management.base import BaseCommand 399 | 400 | 401 | class Command(BaseCommand): 402 | 403 | def handle(self, *args, **kwargs): 404 | Person.objects.from_csv( 405 | "/path/to/my/data.csv", 406 | dict(name="NAME", number="NUMBER"), 407 | static_mapping={"source_csv": "data.csv"}, 408 | ) 409 | 410 | 411 | Extending with hooks 412 | -------------------- 413 | 414 | The ``from_csv`` method connects with a lower level ``CopyMapping`` class with optional hooks that run before and after the COPY statement. They run first when the CSV is into a temporary table and then again before and after the INSERT statement that then slots data into your model's table. 415 | 416 | If you have extra steps or more complicated logic you'd like to work into a loading routine, ``CopyMapping`` and its hooks provide an opportunity to extend the base library. 417 | 418 | To try them out, subclass ``CopyMapping`` and fill in as many of the optional hook methods below as you need. 419 | 420 | .. code-block:: python 421 | 422 | from postgres_copy import CopyMapping 423 | 424 | 425 | class HookedCopyMapping(CopyMapping): 426 | def pre_copy(self, cursor): 427 | print("pre_copy!") 428 | # Doing whatever you'd like here 429 | 430 | def post_copy(self, cursor): 431 | print("post_copy!") 432 | # And here 433 | 434 | def pre_insert(self, cursor): 435 | print("pre_insert!") 436 | # And here 437 | 438 | def post_insert(self, cursor): 439 | print("post_insert!") 440 | # And finally here 441 | 442 | 443 | Now you can run that subclass directly rather than via a manager. The only differences are that model is the first argument ``CopyMapping``, which creates an object that is executed with a call to its ``save`` method. 444 | 445 | .. code-block:: python 446 | :emphasize-lines: 2,9-16 447 | 448 | 449 | from myapp.models import Person 450 | from myapp.loaders import HookedCopyMapping 451 | from django.core.management.base import BaseCommand 452 | 453 | 454 | class Command(BaseCommand): 455 | 456 | def handle(self, *args, **kwargs): 457 | # Note that we're using HookedCopyMapping here 458 | c = HookedCopyMapping( 459 | Person, 460 | "/path/to/my/data.csv", 461 | dict(name="NAME", number="NUMBER"), 462 | ) 463 | # Then save it. 464 | c.save() 465 | 466 | 467 | Export options 468 | ============== 469 | 470 | The ``to_csv`` manager method only requires one argument, the path to where the CSV should be exported. It also allows users to optionally limit or expand the fields written out by providing them as additional parameters. Other options allow for configuration of the output file. 471 | 472 | .. method:: to_csv(csv_path [, *fields, delimiter=',', header=True, null=None, encoding=None, escape=None, quote=None, force_quote=None]) 473 | 474 | 475 | ================= ========================================================= 476 | Argument Description 477 | ================= ========================================================= 478 | ``csv_path`` The path to a file to write out the CSV. Also accepts 479 | file-like objects. Optional. If you don't provide one, 480 | the comma-delimited data is returned as a string. 481 | 482 | ``fields`` Strings corresponding to the model fields to be exported. 483 | All fields on the model are exported by default. Fields 484 | on related models can be included with Django's double 485 | underscore notation. Optional. 486 | 487 | ``delimiter`` String that will be used as a delimiter for the CSV 488 | file. Optional. 489 | 490 | ``header`` Boolean determines if the header should be exported. 491 | Optional. 492 | 493 | ``null`` String to populate exported null values with. Default 494 | is an empty string. Optional. 495 | 496 | ``encoding`` The character encoding that should be used for the file 497 | being written. Optional. 498 | 499 | ``escape`` The escape character to be used. Optional. 500 | 501 | ``quote`` The quote character to be used. Optional. 502 | 503 | ``force_quote`` Force fields to be quoted in the CSV. Default is None. 504 | A field name or list of field names can be submitted. 505 | Pass in True or "*" to quote all fields. Optional. 506 | ================= ========================================================= 507 | 508 | 509 | Reducing the exported fields 510 | ---------------------------- 511 | 512 | You can reduce the number of fields exported by providing the ones you want as a list to the ``to_csv`` method. 513 | 514 | Your model might look like this: 515 | 516 | .. code-block:: python 517 | 518 | from django.db import models 519 | from postgres_copy import CopyManager 520 | 521 | 522 | class Person(models.Model): 523 | name = models.CharField(max_length=500) 524 | number = models.IntegerField() 525 | objects = CopyManager() 526 | 527 | You could export only the name field by providing it as an extra parameter. 528 | 529 | .. code-block:: python 530 | :emphasize-lines: 10 531 | 532 | from myapp.models import Person 533 | from django.core.management.base import BaseCommand 534 | 535 | 536 | class Command(BaseCommand): 537 | 538 | def handle(self, *args, **kwargs): 539 | Person.objects.to_csv("/path/to/my/export.csv", "name") 540 | 541 | 542 | Increasing the exported fields 543 | ------------------------------ 544 | 545 | In cases where your model is connected to other tables with a foreign key, you can increase the number of fields exported to included related tables using Django's double underscore notation. 546 | 547 | Your models might look like this: 548 | 549 | .. code-block:: python 550 | 551 | from django.db import models 552 | from postgres_copy import CopyManager 553 | 554 | 555 | class Hometown(models.Model): 556 | name = models.CharField(max_length=500) 557 | objects = CopyManager() 558 | 559 | 560 | class Person(models.Model): 561 | name = models.CharField(max_length=500) 562 | number = models.IntegerField() 563 | hometown = models.ForeignKey(Hometown) 564 | objects = CopyManager() 565 | 566 | You can reach across to related tables during an export by adding their fields to the export method. 567 | 568 | 569 | .. code-block:: python 570 | :emphasize-lines: 12 571 | 572 | from myapp.models import Person 573 | from django.core.management.base import BaseCommand 574 | 575 | 576 | class Command(BaseCommand): 577 | 578 | def handle(self, *args, **kwargs): 579 | Person.objects.to_csv( 580 | "/path/to/my/export.csv", "name", "number", "hometown__name" 581 | ) 582 | 583 | 584 | Open-source resources 585 | ===================== 586 | 587 | * Code: `github.com/palewire/django-postgres-copy `_ 588 | * Issues: `github.com/palewire/django-postgres-copy/issues `_ 589 | * Packaging: `pypi.python.org/pypi/django-postgres-copy `_ 590 | * Testing: `github.com/palewire/django-postgres-copy/actions `_ 591 | -------------------------------------------------------------------------------- /tests/test_queries.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import io 3 | import os 4 | from datetime import date 5 | from unittest import mock 6 | 7 | import pytest 8 | from django.core.exceptions import FieldDoesNotExist 9 | from django.db import transaction 10 | from django.db.models import Count 11 | from django.db.transaction import TransactionManagementError 12 | from django.test import TestCase 13 | 14 | from postgres_copy import CopyMapping 15 | 16 | from .models import ( 17 | ExtendedMockObject, 18 | HookedCopyMapping, 19 | LimitedMockObject, 20 | MockBlankObject, 21 | MockFKObject, 22 | MockObject, 23 | OverloadMockObject, 24 | SecondaryMockObject, 25 | UniqueMockObject, 26 | ) 27 | 28 | try: 29 | from psycopg.errors import Error 30 | except ImportError: 31 | from psycopg2.errors import Error 32 | 33 | 34 | class BaseTest(TestCase): 35 | databases = ["default", "sqlite", "other", "secondary"] 36 | 37 | def setUp(self): 38 | self.data_dir = os.path.join(os.path.dirname(__file__), "data") 39 | self.name_path = os.path.join(self.data_dir, "names.csv") 40 | self.foreign_path = os.path.join(self.data_dir, "foreignkeys.csv") 41 | self.pipe_path = os.path.join(self.data_dir, "pipes.csv") 42 | self.quote_path = os.path.join(self.data_dir, "quote.csv") 43 | self.blank_null_path = os.path.join(self.data_dir, "blanknulls.csv") 44 | self.null_path = os.path.join(self.data_dir, "nulls.csv") 45 | self.backwards_path = os.path.join(self.data_dir, "backwards.csv") 46 | self.matching_headers_path = os.path.join(self.data_dir, "matching_headers.csv") 47 | self.secondarydb_path = os.path.join(self.data_dir, "secondary_db.csv") 48 | 49 | def tearDown(self): 50 | MockObject.objects.all().delete() 51 | MockFKObject.objects.all().delete() 52 | ExtendedMockObject.objects.all().delete() 53 | LimitedMockObject.objects.all().delete() 54 | OverloadMockObject.objects.all().delete() 55 | SecondaryMockObject.objects.all().delete() 56 | 57 | 58 | class PostgresCopyToTest(BaseTest): 59 | def setUp(self): 60 | super().setUp() 61 | self.export_path = os.path.join(os.path.dirname(__file__), "export.csv") 62 | self.export_files = [io.StringIO(), io.BytesIO()] 63 | 64 | def tearDown(self): 65 | super().tearDown() 66 | if os.path.exists(self.export_path): 67 | os.remove(self.export_path) 68 | 69 | def _load_objects( 70 | self, file_path, mapping=dict(name="NAME", number="NUMBER", dt="DATE") 71 | ): 72 | MockObject.objects.from_csv(file_path, mapping) 73 | 74 | def _load_secondary_objects(self, file_path, mapping=dict(text="TEXT")): 75 | SecondaryMockObject.objects.from_csv(file_path, mapping) 76 | 77 | # These tests are using simple enough databases that they can safely proceed 78 | # with uploading objects from CSV despite being within a transaction block. 79 | # In particular, Django wraps all tests in a transaction so that database 80 | # changes can be rolled back. Therefore, we bypass validate_no_atomic_block 81 | # here and elsewhere. 82 | @mock.patch("django.db.connection.validate_no_atomic_block") 83 | def test_export(self, _): 84 | self._load_objects(self.name_path) 85 | MockObject.objects.to_csv(self.export_path) 86 | self.assertTrue(os.path.exists(self.export_path)) 87 | reader = csv.DictReader(open(self.export_path)) 88 | self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader]) 89 | 90 | @mock.patch("django.db.connection.validate_no_atomic_block") 91 | def test_export_to_file(self, _): 92 | self._load_objects(self.name_path) 93 | for f in self.export_files: 94 | MockObject.objects.to_csv(f) 95 | reader = csv.DictReader(f) 96 | self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader]) 97 | 98 | @mock.patch("django.db.connection.validate_no_atomic_block") 99 | def test_export_to_str(self, _): 100 | self._load_objects(self.name_path) 101 | first_id = MockObject.objects.order_by("id").first().id 102 | export = MockObject.objects.to_csv() 103 | self.assertEqual( 104 | export, 105 | f"""id,name,num,dt,parent_id 106 | {first_id},BEN,1,2012-01-01, 107 | {first_id + 1},JOE,2,2012-01-02, 108 | {first_id + 2},JANE,3,2012-01-03, 109 | """.encode(), 110 | ) 111 | 112 | @mock.patch("django.db.connection.validate_no_atomic_block") 113 | def test_export_header_setting(self, _): 114 | self._load_objects(self.name_path) 115 | MockObject.objects.to_csv(self.export_path) 116 | reader = csv.DictReader(open(self.export_path)) 117 | self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader]) 118 | MockObject.objects.to_csv(self.export_path, header=True) 119 | reader = csv.DictReader(open(self.export_path)) 120 | self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader]) 121 | MockObject.objects.to_csv(self.export_path, header=False) 122 | reader = csv.DictReader(open(self.export_path)) 123 | with self.assertRaises(KeyError): 124 | [i["name"] for i in reader] 125 | self.assertTrue(["JOE", "JANE"], [i["BEN"] for i in reader]) 126 | 127 | @mock.patch("django.db.connection.validate_no_atomic_block") 128 | def test_export_delimiter(self, _): 129 | self._load_objects(self.name_path) 130 | MockObject.objects.to_csv(self.export_path, delimiter=";") 131 | self.assertTrue(os.path.exists(self.export_path)) 132 | reader = csv.DictReader(open(self.export_path), delimiter=";") 133 | self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader]) 134 | 135 | @mock.patch("django.db.connection.validate_no_atomic_block") 136 | def test_export_null_string(self, _): 137 | self._load_objects(self.blank_null_path) 138 | MockObject.objects.to_csv(self.export_path) 139 | self.assertTrue(os.path.exists(self.export_path)) 140 | reader = csv.DictReader(open(self.export_path)) 141 | self.assertTrue(["1", "2", "3", "", ""], [i["num"] for i in reader]) 142 | 143 | MockObject.objects.to_csv(self.export_path, null="NULL") 144 | self.assertTrue(os.path.exists(self.export_path)) 145 | reader = csv.DictReader(open(self.export_path)) 146 | self.assertTrue(["1", "2", "3", "NULL", ""], [i["num"] for i in reader]) 147 | 148 | @mock.patch("django.db.connection.validate_no_atomic_block") 149 | def test_export_quote_character_and_force_quoting(self, _): 150 | self._load_objects(self.name_path) 151 | 152 | # Single column being force_quoted with pipes 153 | MockObject.objects.to_csv(self.export_path, quote="|", force_quote="NAME") 154 | self.assertTrue(os.path.exists(self.export_path)) 155 | reader = csv.DictReader(open(self.export_path)) 156 | self.assertTrue(["|BEN|", "|JOE|", "|JANE|"], [i["name"] for i in reader]) 157 | 158 | # Multiple columns passed as a list and force_quoted with pipes 159 | MockObject.objects.to_csv( 160 | self.export_path, quote="|", force_quote=["NAME", "DT"] 161 | ) 162 | self.assertTrue(os.path.exists(self.export_path)) 163 | reader = csv.DictReader(open(self.export_path)) 164 | self.assertTrue( 165 | [ 166 | ("|BEN|", "|2012-01-01|"), 167 | ("|JOE|", "|2012-01-02|"), 168 | ("|JANE|", "|2012-01-03|"), 169 | ], 170 | [(i["name"], i["dt"]) for i in reader], 171 | ) 172 | 173 | # All columns force_quoted with pipes 174 | MockObject.objects.to_csv(self.export_path, quote="|", force_quote=True) 175 | self.assertTrue(os.path.exists(self.export_path)) 176 | reader = csv.DictReader(open(self.export_path)) 177 | reader = next(reader) 178 | self.assertTrue(["|BEN|", "|1|", "|2012-01-01|"], list(reader.values())[1:]) 179 | 180 | @mock.patch("django.db.connection.validate_no_atomic_block") 181 | def test_export_encoding(self, _): 182 | self._load_objects(self.name_path) 183 | 184 | # Function should pass on valid inputs ('utf-8', 'Unicode', 'LATIN2') 185 | # If these don't raise an error, then they passed nicely 186 | MockObject.objects.to_csv(self.export_path, encoding="utf-8") 187 | MockObject.objects.to_csv(self.export_path, encoding="Unicode") 188 | MockObject.objects.to_csv(self.export_path, encoding="LATIN2") 189 | 190 | # Function should fail on known invalid inputs ('ASCII', 'utf-16') 191 | with pytest.raises(Error) as exc_info: 192 | # since `to_csv` causes a db error we need an atomic block to make 193 | # sure the db connection is restored, so that e.g. the next 194 | # assertion and our teardown can run 195 | with transaction.atomic(): 196 | MockObject.objects.to_csv(self.export_path, encoding="utf-16") 197 | assert "must be a valid encoding" in str(exc_info.value) 198 | 199 | with pytest.raises(Error) as exc_info2: 200 | # since `to_csv` causes a db error we need an atomic block to make 201 | # sure the db connection is restored, so that e.g. our teardown 202 | # can run 203 | with transaction.atomic(): 204 | MockObject.objects.to_csv(self.export_path, encoding="ASCII") 205 | assert "must be a valid encoding" in str(exc_info2.value) 206 | 207 | @mock.patch("django.db.connection.validate_no_atomic_block") 208 | def test_export_escape_character(self, _): 209 | self._load_objects(self.name_path) 210 | 211 | # Function should not fail on known valid inputs 212 | MockObject.objects.to_csv(self.export_path, escape="-") 213 | 214 | # Function should fail on known invalid inputs 215 | with pytest.raises(Error) as exc_info: 216 | # since `to_csv` causes a db error we need an atomic block to make 217 | # sure the db connection is restored, so that e.g. our teardown 218 | # can run 219 | with transaction.atomic(): 220 | MockObject.objects.to_csv(self.export_path, escape="--") 221 | assert "escape must be a single" in str(exc_info.value) 222 | 223 | @mock.patch("django.db.connection.validate_no_atomic_block") 224 | def test_filter(self, _): 225 | self._load_objects(self.name_path) 226 | MockObject.objects.filter(name="BEN").to_csv(self.export_path) 227 | reader = csv.DictReader(open(self.export_path)) 228 | self.assertTrue(["BEN"], [i["name"] for i in reader]) 229 | 230 | @mock.patch("django.db.connection.validate_no_atomic_block") 231 | def test_fewer_fields(self, _): 232 | self._load_objects(self.name_path) 233 | MockObject.objects.to_csv(self.export_path, "name") 234 | reader = csv.DictReader(open(self.export_path)) 235 | for row in reader: 236 | self.assertTrue(row["name"] in ["BEN", "JOE", "JANE"]) 237 | self.assertTrue(len(row.keys()), 1) 238 | 239 | @mock.patch("django.db.connection.validate_no_atomic_block") 240 | def test_related_fields(self, _): 241 | MockFKObject.objects.from_csv( 242 | self.foreign_path, 243 | mapping=dict( 244 | id="NUMBER", name="NAME", number="NUMBER", dt="DATE", parent="PARENT" 245 | ), 246 | ) 247 | MockFKObject.objects.to_csv( 248 | self.export_path, "name", "parent__id", "parent__name" 249 | ) 250 | reader = csv.DictReader(open(self.export_path)) 251 | for row in reader: 252 | self.assertTrue(row["parent_id"] in ["1", "2", "3"]) 253 | self.assertTrue(len(row.keys()), 3) 254 | 255 | @mock.patch("django.db.connection.validate_no_atomic_block") 256 | def test_annotate(self, _): 257 | self._load_objects(self.name_path) 258 | MockObject.objects.annotate(name_count=Count("name")).to_csv(self.export_path) 259 | reader = csv.DictReader(open(self.export_path)) 260 | for row in reader: 261 | self.assertTrue("name_count" in row) 262 | self.assertTrue(row["name_count"] == "1") 263 | 264 | @mock.patch("django.db.connection.validate_no_atomic_block") 265 | def test_extra(self, _): 266 | self._load_objects(self.name_path) 267 | MockObject.objects.extra(select={"lower": 'LOWER("name")'}).to_csv( 268 | self.export_path 269 | ) 270 | reader = csv.DictReader(open(self.export_path)) 271 | for row in reader: 272 | self.assertTrue("lower" in row) 273 | 274 | @mock.patch("django.db.connection.validate_no_atomic_block") 275 | def test_export_multi_db(self, _): 276 | self._load_objects(self.name_path) 277 | self._load_secondary_objects(self.secondarydb_path) 278 | 279 | MockObject.objects.to_csv(self.export_path) 280 | self.assertTrue(os.path.exists(self.export_path)) 281 | reader = csv.DictReader(open(self.export_path)) 282 | self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader]) 283 | 284 | SecondaryMockObject.objects.to_csv(self.export_path) 285 | self.assertTrue(os.path.exists(self.export_path)) 286 | reader = csv.DictReader(open(self.export_path)) 287 | items = [i["text"] for i in reader] 288 | self.assertEqual(len(items), 3) 289 | self.assertEqual( 290 | ["SECONDARY TEXT 1", "SECONDARY TEXT 2", "SECONDARY TEXT 3"], items 291 | ) 292 | 293 | 294 | class PostgresCopyFromTest(BaseTest): 295 | def test_bad_call(self): 296 | with self.assertRaises(TypeError): 297 | CopyMapping() 298 | 299 | def test_bad_csv(self): 300 | with self.assertRaises(ValueError): 301 | CopyMapping( 302 | MockObject, 303 | "/foobar.csv", 304 | dict(name="NAME", number="NUMBER", dt="DATE"), 305 | using="sqlite", 306 | ) 307 | 308 | def test_bad_backend(self): 309 | with self.assertRaises(TypeError): 310 | CopyMapping( 311 | MockObject, 312 | self.name_path, 313 | dict(name="NAME", number="NUMBER", dt="DATE"), 314 | using="sqlite", 315 | ) 316 | 317 | def test_bad_header(self): 318 | with self.assertRaises(ValueError): 319 | CopyMapping( 320 | MockObject, 321 | self.name_path, 322 | dict(name="NAME1", number="NUMBER", dt="DATE"), 323 | ) 324 | 325 | def test_bad_field(self): 326 | with self.assertRaises(FieldDoesNotExist): 327 | CopyMapping( 328 | MockObject, 329 | self.name_path, 330 | dict(name1="NAME", number="NUMBER", dt="DATE"), 331 | ) 332 | 333 | def test_limited_fields(self): 334 | CopyMapping( 335 | LimitedMockObject, 336 | self.name_path, 337 | dict(name="NAME", dt="DATE"), 338 | ) 339 | 340 | @mock.patch("django.db.connection.validate_no_atomic_block") 341 | def test_simple_save_with_fileobject(self, _): 342 | f = open(self.name_path) 343 | MockObject.objects.from_csv(f, dict(name="NAME", number="NUMBER", dt="DATE")) 344 | self.assertEqual(MockObject.objects.count(), 3) 345 | self.assertEqual(MockObject.objects.get(name="BEN").number, 1) 346 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 347 | 348 | @mock.patch("django.db.connection.validate_no_atomic_block") 349 | def test_save_with_binary_fileobject(self, _): 350 | f = open(self.name_path, "rb") 351 | MockObject.objects.from_csv(f, dict(name="NAME", number="NUMBER", dt="DATE")) 352 | self.assertEqual(MockObject.objects.count(), 3) 353 | self.assertEqual(MockObject.objects.get(name="BEN").number, 1) 354 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 355 | 356 | def test_atomic_block(self): 357 | with transaction.atomic(): 358 | try: 359 | f = open(self.name_path) 360 | MockObject.objects.from_csv( 361 | f, dict(name="NAME", number="NUMBER", dt="DATE") 362 | ) 363 | self.fail("Expected TransactionManagementError.") 364 | except TransactionManagementError: 365 | # Expected 366 | pass 367 | 368 | @mock.patch("django.db.connection.validate_no_atomic_block") 369 | def test_simple_save(self, _): 370 | insert_count = MockObject.objects.from_csv( 371 | self.name_path, dict(name="NAME", number="NUMBER", dt="DATE") 372 | ) 373 | self.assertEqual(MockObject.objects.count(), 3) 374 | self.assertEqual(MockObject.objects.get(name="BEN").number, 1) 375 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 376 | self.assertEqual(insert_count, 3) 377 | 378 | @mock.patch("django.db.connection.validate_no_atomic_block") 379 | def test_loud_save(self, _): 380 | MockObject.objects.from_csv( 381 | self.name_path, 382 | mapping=dict(name="NAME", number="NUMBER", dt="DATE"), 383 | silent=False, 384 | ) 385 | 386 | @mock.patch("django.db.connection.validate_no_atomic_block") 387 | def test_match_heading(self, _): 388 | MockObject.objects.from_csv(self.matching_headers_path) 389 | self.assertEqual(MockObject.objects.count(), 3) 390 | self.assertEqual(MockObject.objects.get(name="BEN").number, 1) 391 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 392 | 393 | @mock.patch("django.db.connection.validate_no_atomic_block") 394 | def test_bad_match_heading(self, _): 395 | with self.assertRaises(FieldDoesNotExist): 396 | MockObject.objects.from_csv(self.name_path) 397 | 398 | @mock.patch("django.db.connection.validate_no_atomic_block") 399 | def test_limited_save(self, _): 400 | LimitedMockObject.objects.from_csv(self.name_path, dict(name="NAME", dt="DATE")) 401 | self.assertEqual(LimitedMockObject.objects.count(), 3) 402 | self.assertEqual(LimitedMockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 403 | 404 | @mock.patch("django.db.connection.validate_no_atomic_block") 405 | def test_save_foreign_key(self, _): 406 | MockFKObject.objects.from_csv( 407 | self.foreign_path, 408 | dict(id="NUMBER", name="NAME", number="NUMBER", dt="DATE", parent="PARENT"), 409 | ) 410 | self.assertEqual(MockFKObject.objects.count(), 3) 411 | self.assertEqual(MockFKObject.objects.get(name="BEN").parent_id, 3) 412 | self.assertEqual(MockFKObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 413 | 414 | @mock.patch("django.db.connection.validate_no_atomic_block") 415 | def test_save_foreign_key_by_id(self, _): 416 | MockFKObject.objects.from_csv( 417 | self.foreign_path, 418 | dict( 419 | id="NUMBER", name="NAME", number="NUMBER", dt="DATE", parent_id="PARENT" 420 | ), 421 | ) 422 | self.assertEqual(MockFKObject.objects.count(), 3) 423 | self.assertEqual(MockFKObject.objects.get(name="BEN").parent_id, 3) 424 | self.assertEqual(MockFKObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 425 | 426 | @mock.patch("django.db.connection.validate_no_atomic_block") 427 | def test_save_pk_field_type(self, _): 428 | # Django casts PK fields to "serial" 429 | MockObject.objects.from_csv( 430 | self.name_path, 431 | dict(id="NUMBER", name="NAME", dt="DATE"), 432 | ) 433 | self.assertEqual(MockObject.objects.count(), 3) 434 | 435 | def test_silent_save(self): 436 | c = CopyMapping( 437 | MockObject, 438 | self.name_path, 439 | dict(name="NAME", number="NUMBER", dt="DATE"), 440 | ) 441 | c.save(silent=True) 442 | self.assertEqual(MockObject.objects.count(), 3) 443 | self.assertEqual(MockObject.objects.get(name="BEN").number, 1) 444 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 445 | 446 | @mock.patch("django.db.connection.validate_no_atomic_block") 447 | def test_pipe_save(self, _): 448 | MockObject.objects.from_csv( 449 | self.pipe_path, 450 | dict(name="NAME", number="NUMBER", dt="DATE"), 451 | delimiter="|", 452 | ) 453 | self.assertEqual(MockObject.objects.count(), 3) 454 | self.assertEqual(MockObject.objects.get(name="BEN").number, 1) 455 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 456 | 457 | @mock.patch("django.db.connection.validate_no_atomic_block") 458 | def test_quote_save(self, _): 459 | MockObject.objects.from_csv( 460 | self.quote_path, 461 | dict(name="NAME", number="NUMBER", dt="DATE"), 462 | delimiter="\t", 463 | quote_character="`", 464 | ) 465 | self.assertEqual(MockObject.objects.count(), 3) 466 | self.assertEqual(MockObject.objects.get(number=1).name, "B`EN") 467 | self.assertEqual(MockObject.objects.get(number=2).name, "JO\tE") 468 | self.assertEqual(MockObject.objects.get(number=3).name, 'JAN"E') 469 | 470 | @mock.patch("django.db.connection.validate_no_atomic_block") 471 | def test_null_save(self, _): 472 | MockObject.objects.from_csv( 473 | self.null_path, 474 | dict(name="NAME", number="NUMBER", dt="DATE"), 475 | null="", 476 | ) 477 | self.assertEqual(MockObject.objects.count(), 5) 478 | self.assertEqual(MockObject.objects.get(name="BEN").number, 1) 479 | self.assertEqual(MockObject.objects.get(name="NULLBOY").number, None) 480 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 481 | 482 | @mock.patch("django.db.connection.validate_no_atomic_block") 483 | def test_force_not_null_save(self, _): 484 | MockBlankObject.objects.from_csv( 485 | self.blank_null_path, 486 | dict(name="NAME", number="NUMBER", dt="DATE", color="COLOR"), 487 | force_not_null=("COLOR",), 488 | ) 489 | self.assertEqual(MockBlankObject.objects.count(), 5) 490 | self.assertEqual(MockBlankObject.objects.get(name="BEN").color, "red") 491 | self.assertEqual(MockBlankObject.objects.get(name="NULLBOY").color, "") 492 | self.assertEqual(MockBlankObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 493 | 494 | @mock.patch("django.db.connection.validate_no_atomic_block") 495 | def test_force_null_save(self, _): 496 | MockObject.objects.from_csv( 497 | self.null_path, 498 | dict(name="NAME", number="NUMBER", dt="DATE"), 499 | force_null=("NUMBER",), 500 | ) 501 | self.assertEqual(MockObject.objects.count(), 5) 502 | self.assertEqual(MockObject.objects.get(name="BEN").number, 1) 503 | self.assertEqual(MockObject.objects.get(name="NULLBOY").number, None) 504 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 505 | 506 | @mock.patch("django.db.connection.validate_no_atomic_block") 507 | def test_backwards_save(self, _): 508 | MockObject.objects.from_csv( 509 | self.backwards_path, 510 | dict(name="NAME", number="NUMBER", dt="DATE"), 511 | ) 512 | self.assertEqual(MockObject.objects.count(), 3) 513 | self.assertEqual(MockObject.objects.get(name="BEN").number, 1) 514 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 515 | 516 | @mock.patch("django.db.connection.validate_no_atomic_block") 517 | def test_field_override_save(self, _): 518 | MockObject.objects.from_csv( 519 | self.null_path, 520 | dict(name="NAME", number="NUMBER", dt="DATE"), 521 | ) 522 | self.assertEqual(MockObject.objects.count(), 5) 523 | self.assertEqual(MockObject.objects.get(name="BADBOY").number, None) 524 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 525 | 526 | @mock.patch("django.db.connection.validate_no_atomic_block") 527 | def test_encoding_save(self, _): 528 | MockObject.objects.from_csv( 529 | self.null_path, 530 | dict(name="NAME", number="NUMBER", dt="DATE"), 531 | encoding="UTF-8", 532 | ) 533 | self.assertEqual(MockObject.objects.count(), 5) 534 | self.assertEqual(MockObject.objects.get(name="BADBOY").number, None) 535 | self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1)) 536 | 537 | @mock.patch("django.db.connection.validate_no_atomic_block") 538 | def test_ignore_conflicts(self, _): 539 | UniqueMockObject.objects.from_csv( 540 | self.name_path, dict(name="NAME"), ignore_conflicts=True 541 | ) 542 | UniqueMockObject.objects.from_csv( 543 | self.name_path, dict(name="NAME"), ignore_conflicts=True 544 | ) 545 | 546 | @mock.patch("django.db.connection.validate_no_atomic_block") 547 | def test_static_values(self, _): 548 | ExtendedMockObject.objects.from_csv( 549 | self.name_path, 550 | dict(name="NAME", number="NUMBER", dt="DATE"), 551 | static_mapping=dict(static_val=1, static_string="test"), 552 | ) 553 | self.assertEqual(ExtendedMockObject.objects.filter(static_val=1).count(), 3) 554 | self.assertEqual( 555 | ExtendedMockObject.objects.filter(static_string="test").count(), 3 556 | ) 557 | 558 | @mock.patch("django.db.connection.validate_no_atomic_block") 559 | def test_bad_static_values(self, _): 560 | with self.assertRaises(ValueError): 561 | ExtendedMockObject.objects.from_csv( 562 | self.name_path, 563 | dict(name="NAME", number="NUMBER", dt="DATE"), 564 | encoding="UTF-8", 565 | static_mapping=dict(static_bad=1), 566 | ) 567 | 568 | @mock.patch("django.db.connection.validate_no_atomic_block") 569 | def test_overload_save(self, _): 570 | OverloadMockObject.objects.from_csv( 571 | self.name_path, 572 | dict( 573 | name="NAME", 574 | lower_name="NAME", 575 | upper_name="NAME", 576 | number="NUMBER", 577 | dt="DATE", 578 | ), 579 | ) 580 | self.assertEqual(OverloadMockObject.objects.count(), 3) 581 | self.assertEqual(OverloadMockObject.objects.get(name="ben").number, 1) 582 | self.assertEqual(OverloadMockObject.objects.get(lower_name="ben").number, 1) 583 | self.assertEqual(OverloadMockObject.objects.get(upper_name="BEN").number, 1) 584 | self.assertEqual( 585 | OverloadMockObject.objects.get(name="ben").dt, date(2012, 1, 1) 586 | ) 587 | omo = OverloadMockObject.objects.first() 588 | self.assertEqual(omo.name.lower(), omo.lower_name) 589 | 590 | def test_missing_overload_field(self): 591 | with self.assertRaises(FieldDoesNotExist): 592 | CopyMapping( 593 | OverloadMockObject, 594 | self.name_path, 595 | dict(name="NAME", number="NUMBER", dt="DATE", missing="NAME"), 596 | ) 597 | 598 | def test_save_steps(self): 599 | c = CopyMapping( 600 | MockObject, 601 | self.name_path, 602 | dict(name="NAME", number="NUMBER", dt="DATE"), 603 | ) 604 | cursor = c.conn.cursor() 605 | 606 | c.create(cursor) 607 | cursor.execute("""SELECT count(*) FROM %s;""" % c.temp_table_name) 608 | self.assertEqual(cursor.fetchone()[0], 0) 609 | cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table) 610 | self.assertEqual(cursor.fetchone()[0], 0) 611 | 612 | c.copy(cursor) 613 | cursor.execute("""SELECT count(*) FROM %s;""" % c.temp_table_name) 614 | self.assertEqual(cursor.fetchone()[0], 3) 615 | cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table) 616 | self.assertEqual(cursor.fetchone()[0], 0) 617 | 618 | c.insert(cursor) 619 | cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table) 620 | self.assertEqual(cursor.fetchone()[0], 3) 621 | 622 | c.drop(cursor) 623 | self.assertEqual(cursor.statusmessage, "DROP TABLE") 624 | cursor.close() 625 | 626 | def test_save_steps_with_temp_table_name_override(self): 627 | c = CopyMapping( 628 | MockObject, 629 | self.name_path, 630 | dict(name="NAME", number="NUMBER", dt="DATE"), 631 | temp_table_name="overridden_temp_table_name", 632 | ) 633 | cursor = c.conn.cursor() 634 | 635 | c.create(cursor) 636 | cursor.execute("""SELECT count(*) FROM %s;""" % c.temp_table_name) 637 | self.assertEqual(cursor.fetchone()[0], 0) 638 | cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table) 639 | self.assertEqual(cursor.fetchone()[0], 0) 640 | 641 | c.copy(cursor) 642 | cursor.execute("""SELECT count(*) FROM %s;""" % c.temp_table_name) 643 | self.assertEqual(cursor.fetchone()[0], 3) 644 | cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table) 645 | self.assertEqual(cursor.fetchone()[0], 0) 646 | 647 | c.insert(cursor) 648 | cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table) 649 | self.assertEqual(cursor.fetchone()[0], 3) 650 | 651 | c.drop(cursor) 652 | self.assertEqual(cursor.statusmessage, "DROP TABLE") 653 | cursor.close() 654 | 655 | def test_hooks(self): 656 | c = HookedCopyMapping( 657 | MockObject, 658 | self.name_path, 659 | dict(name="NAME", number="NUMBER", dt="DATE"), 660 | ) 661 | cursor = c.conn.cursor() 662 | 663 | c.create(cursor) 664 | self.assertRaises(AttributeError, lambda: c.ran_pre_copy) 665 | self.assertRaises(AttributeError, lambda: c.ran_post_copy) 666 | self.assertRaises(AttributeError, lambda: c.ran_pre_insert) 667 | self.assertRaises(AttributeError, lambda: c.ran_post_insert) 668 | 669 | c.copy(cursor) 670 | self.assertTrue(c.ran_pre_copy) 671 | self.assertTrue(c.ran_post_copy) 672 | self.assertRaises(AttributeError, lambda: c.ran_pre_insert) 673 | self.assertRaises(AttributeError, lambda: c.ran_post_insert) 674 | 675 | c.insert(cursor) 676 | self.assertTrue(c.ran_pre_copy) 677 | self.assertTrue(c.ran_post_copy) 678 | self.assertTrue(c.ran_pre_insert) 679 | self.assertTrue(c.ran_post_insert) 680 | 681 | c.drop(cursor) 682 | cursor.close() 683 | 684 | 685 | class MultiDbTest(BaseTest): 686 | @mock.patch("django.db.connection.validate_no_atomic_block") 687 | def test_from_csv(self, _): 688 | MockObject.objects.from_csv( 689 | self.name_path, dict(name="NAME", number="NUMBER", dt="DATE"), using="other" 690 | ) 691 | self.assertEqual(MockObject.objects.count(), 0) 692 | self.assertEqual(MockObject.objects.using("other").count(), 3) 693 | self.assertEqual(MockObject.objects.using("other").get(name="BEN").number, 1) 694 | self.assertEqual( 695 | MockObject.objects.using("other").get(name="BEN").dt, date(2012, 1, 1) 696 | ) 697 | MockObject.objects.using("other").all().delete() 698 | 699 | @mock.patch("django.db.connection.validate_no_atomic_block") 700 | def test_to_csv(self, _): 701 | # First with the default database 702 | mapping = dict(name="NAME", number="NUMBER", dt="DATE") 703 | MockObject.objects.from_csv(self.name_path, mapping) 704 | export_path = os.path.join(os.path.dirname(__file__), "default.csv") 705 | MockObject.objects.to_csv(export_path) 706 | self.assertTrue(os.path.exists(export_path)) 707 | reader = csv.DictReader(open(export_path)) 708 | self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader]) 709 | os.remove(export_path) 710 | 711 | @mock.patch("django.db.connection.validate_no_atomic_block") 712 | def test_to_csv_from_alt_db(self, _): 713 | # Next with the other database 714 | mapping = dict(name="NAME", number="NUMBER", dt="DATE") 715 | MockObject.objects.from_csv(self.name_path, mapping, using="other") 716 | export_path = os.path.join(os.path.dirname(__file__), "other.csv") 717 | MockObject.objects.using("other").to_csv(export_path) 718 | self.assertTrue(os.path.exists(export_path)) 719 | reader = csv.DictReader(open(export_path)) 720 | self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader]) 721 | MockObject.objects.using("other").all().delete() 722 | os.remove(export_path) 723 | --------------------------------------------------------------------------------