├── tests
    ├── __init__.py
    ├── urls.py
    ├── data
    │   ├── secondary_db.csv
    │   ├── names.csv
    │   ├── pipes.csv
    │   ├── backwards.csv
    │   ├── quote.csv
    │   ├── matching_headers.csv
    │   ├── foreignkeys.csv
    │   ├── nulls.csv
    │   └── blanknulls.csv
    ├── fields.py
    ├── router.py
    ├── conftest.py
    ├── models.py
    └── test_queries.py
├── postgres_copy
    ├── py.typed
    ├── __init__.py
    ├── copy_to.py
    ├── psycopg_compat.py
    ├── managers.py
    └── copy_from.py
├── .github
    ├── CODEOWNERS
    └── workflows
    │   ├── docs.yml
    │   └── continuous-deployment.yaml
├── MANIFEST.in
├── README.md
├── docs
    ├── conf.py
    ├── Makefile
    └── index.rst
├── .gitignore
├── LICENSE
├── .pre-commit-config.yaml
├── pyproject.toml
├── CODE_OF_CONDUCT.md
└── CLAUDE.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/postgres_copy/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *       @palewire
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md
3 | 


--------------------------------------------------------------------------------
/tests/urls.py:
--------------------------------------------------------------------------------
1 | # This file is required by Django but not used in tests
2 | urlpatterns = []
3 | 


--------------------------------------------------------------------------------
/tests/data/secondary_db.csv:
--------------------------------------------------------------------------------
1 | TEXT
2 | SECONDARY TEXT 1
3 | SECONDARY TEXT 2
4 | SECONDARY TEXT 3
5 | 


--------------------------------------------------------------------------------
/tests/data/names.csv:
--------------------------------------------------------------------------------
1 | NAME,NUMBER,DATE
2 | ben,1,2012-01-01
3 | joe,2,2012-01-02
4 | jane,3,2012-01-03
5 | 


--------------------------------------------------------------------------------
/tests/data/pipes.csv:
--------------------------------------------------------------------------------
1 | NAME|NUMBER|DATE
2 | ben|1|2012-01-01
3 | joe|2|2012-01-02
4 | jane|3|2012-01-03
5 | 


--------------------------------------------------------------------------------
/tests/data/backwards.csv:
--------------------------------------------------------------------------------
1 | NUMBER,NAME,DATE
2 | 1,ben,2012-01-01
3 | 2,joe,2012-01-02
4 | 3,jane,2012-01-03
5 | 


--------------------------------------------------------------------------------
/tests/data/quote.csv:
--------------------------------------------------------------------------------
1 | NAME	NUMBER	DATE
2 | `b``en`	1	2012-01-01
3 | `jo	e`	2	2012-01-02
4 | jan"e	3	2012-01-03
5 | 


--------------------------------------------------------------------------------
/tests/data/matching_headers.csv:
--------------------------------------------------------------------------------
1 | name,number,dt
2 | ben,1,2012-01-01
3 | joe,2,2012-01-02
4 | jane,3,2012-01-03
5 | 


--------------------------------------------------------------------------------
/tests/data/foreignkeys.csv:
--------------------------------------------------------------------------------
1 | NAME,NUMBER,DATE,PARENT
2 | ben,1,2012-01-01,3
3 | joe,2,2012-01-02,2
4 | jane,3,2012-01-03,1
5 | 


--------------------------------------------------------------------------------
/tests/data/nulls.csv:
--------------------------------------------------------------------------------
1 | NAME,NUMBER,DATE
2 | ben,1,2012-01-01
3 | joe,2,2012-01-02
4 | jane,3,2012-01-03
5 | nullboy,,2012-01-04
6 | badboy,x,2012-01-05
7 | 


--------------------------------------------------------------------------------
/tests/data/blanknulls.csv:
--------------------------------------------------------------------------------
1 | NAME,NUMBER,DATE,COLOR
2 | ben,1,2012-01-01,red
3 | joe,2,2012-01-02,green
4 | jane,3,2012-01-03,orange
5 | nullboy,,2012-01-04,
6 | badboy,x,2012-01-05,blue
7 | 


--------------------------------------------------------------------------------
/tests/fields.py:
--------------------------------------------------------------------------------
 1 | from django.db.models.fields import IntegerField
 2 | 
 3 | 
 4 | class MyIntegerField(IntegerField):
 5 |     copy_template = """
 6 |         CASE
 7 |             WHEN "%(name)s" = 'x' THEN null
 8 |             ELSE "%(name)s"::int
 9 |         END
10 |     """
11 | 


--------------------------------------------------------------------------------
/postgres_copy/__init__.py:
--------------------------------------------------------------------------------
 1 | from .copy_from import CopyMapping
 2 | from .copy_to import CopyToQuery, SQLCopyToCompiler
 3 | from .managers import CopyManager, CopyQuerySet
 4 | 
 5 | __all__ = (
 6 |     "CopyManager",
 7 |     "CopyMapping",
 8 |     "CopyQuerySet",
 9 |     "CopyToQuery",
10 |     "SQLCopyToCompiler",
11 | )
12 | 


--------------------------------------------------------------------------------
/tests/router.py:
--------------------------------------------------------------------------------
 1 | class CustomRouter:
 2 |     def db_for_read(self, model, **hints):
 3 |         if model.__name__ == "SecondaryMockObject":
 4 |             return "secondary"
 5 |         return None
 6 | 
 7 |     def db_for_write(self, model, **hints):
 8 |         if model.__name__ == "SecondaryMockObject":
 9 |             return "secondary"
10 |         return None
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### Links
 2 | 
 3 | -  Documentation: [palewi.re/docs/django-postgres-copy/](https://palewi.re/docs/django-postgres-copy/)
 4 | -  Issues: [github.com/palewire/django-postgres-copy/issues](https://github.com/palewire/django-postgres-copy/issues)
 5 | -  Packaging: [pypi.python.org/pypi/django-postgres-copy](https://pypi.python.org/pypi/django-postgres-copy)
 6 | -  Testing: [github.com/palewire/django-postgres-copy/actions](https://github.com/palewire/django-postgres-copy/actions/workflows/test.yaml)
 7 | 
 8 | ### Contributing
 9 | 
10 | To set up a development environment, run `uv sync --all-extras` after forking and cloning the repository.
11 | To run tests, use `uv run pytest tests`
12 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | """Configure Sphinx configuration."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import os
 6 | import sys
 7 | from datetime import datetime
 8 | from typing import Any
 9 | 
10 | sys.path.insert(0, os.path.abspath(".."))
11 | 
12 | source_suffix = ".rst"
13 | master_doc = "index"
14 | 
15 | project = "django-postgres-copy"
16 | year = datetime.now().year
17 | copyright = f"{year} palewire"
18 | 
19 | exclude_patterns = ["_build"]
20 | 
21 | html_theme = "palewire"
22 | html_sidebars: dict[Any, Any] = {}
23 | html_theme_options: dict[Any, Any] = {
24 |     "canonical_url": f"https://palewi.re/docs/{project}/",
25 |     "nosidebar": True,
26 | }
27 | 
28 | pygments_style = "sphinx"
29 | 
30 | extensions = [
31 |     "sphinx.ext.autodoc",
32 |     "sphinx.ext.napoleon",
33 | ]
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | build/
 8 | develop-eggs/
 9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | MANIFEST
23 | 
24 | # Unit test / coverage reports
25 | htmlcov/
26 | .tox/
27 | .coverage
28 | .coverage.*
29 | .cache
30 | nosetests.xml
31 | coverage.xml
32 | *.cover
33 | .hypothesis/
34 | .pytest_cache/
35 | 
36 | # Environments
37 | .env
38 | .venv
39 | env/
40 | venv/
41 | ENV/
42 | env.bak/
43 | venv.bak/
44 | 
45 | # uv specific
46 | .uv/
47 | .venv/
48 | 
49 | # Pipenv
50 | Pipfile.lock
51 | 
52 | # Django
53 | *.log
54 | local_settings.py
55 | db.sqlite3
56 | db.sqlite3-journal
57 | 
58 | # Sphinx documentation
59 | docs/_build/
60 | 
61 | # VS Code
62 | .vscode/
63 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2017 California Civic Data Coalition
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 |   - repo: 'https://github.com/pre-commit/pre-commit-hooks'
 5 |     rev: v5.0.0
 6 |     hooks:
 7 |       - id: trailing-whitespace
 8 |       - id: end-of-file-fixer
 9 |       - id: check-yaml
10 |       - id: check-added-large-files
11 |         args:
12 |           - '--maxkb=100000'
13 |       - id: fix-byte-order-marker
14 |       - id: check-case-conflict
15 |       - id: check-json
16 |       - id: mixed-line-ending
17 |       - id: check-ast
18 |       - id: check-merge-conflict
19 | 
20 |   - repo: 'https://github.com/astral-sh/ruff-pre-commit'
21 |     rev: v0.12.0
22 |     hooks:
23 |       - id: ruff
24 |         args:
25 |           - '--fix'
26 |       - id: ruff-format
27 | 
28 |   - repo: 'https://github.com/asottile/blacken-docs'
29 |     rev: 1.19.1
30 |     hooks:
31 |       - id: blacken-docs
32 |         additional_dependencies:
33 |           - black
34 | 
35 |   - repo: 'https://github.com/asottile/pyupgrade'
36 |     rev: v3.20.0
37 |     hooks:
38 |       - id: pyupgrade
39 |         args:
40 |           - '--py37-plus'
41 | 
42 |   - repo: 'https://github.com/pre-commit/mirrors-mypy'
43 |     rev: v1.16.1
44 |     hooks:
45 |       - id: mypy
46 |         exclude: tests/.*
47 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: "Build documentation"
 2 | 
 3 | on:
 4 |   push:
 5 |   workflow_dispatch:
 6 | 
 7 | jobs:
 8 |   build:
 9 |     name: Build
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - id: checkout
13 |         name: Checkout
14 |         uses: actions/checkout@v4
15 | 
16 |       - id: install-uv
17 |         name: Install uv
18 |         uses: astral-sh/setup-uv@v6
19 |         with:
20 |           version: "latest"
21 |           enable-cache: true
22 |           cache-dependency-glob: '**/pyproject.toml'
23 | 
24 |       - id: install-python
25 |         name: Install Python
26 |         run: uv python install 3.13
27 | 
28 |       - id: install-python-dependencies
29 |         name: Install Python dependencies
30 |         run: uv sync --extra docs
31 | 
32 |       - id: build-sphinx-documentation
33 |         name: Build Sphinx documentation
34 |         run: uv run sphinx-build -M html ./docs ./_build/
35 | 
36 |       - id: upload-release-candidate
37 |         name: Upload release candidate
38 |         uses: actions/upload-artifact@v4
39 |         with:
40 |           name: release-candidate
41 |           path: ./_build/html/
42 | 
43 |   deploy:
44 |     name: Deploy
45 |     runs-on: ubuntu-latest
46 |     needs: build
47 |     if: ${{ github.ref_name == 'main' }}
48 |     steps:
49 |       - name: Download release candidate
50 |         uses: actions/download-artifact@v4
51 |         with:
52 |           name: release-candidate
53 |           path: ./docs/
54 | 
55 |       - id: configure-aws
56 |         name: Configure AWS Credentials
57 |         uses: aws-actions/configure-aws-credentials@v4
58 |         with:
59 |           aws-access-key-id: ${{ secrets.PALEWIRE_DOCS_AWS_ACCESS_KEY_ID }}
60 |           aws-secret-access-key: ${{ secrets.PALEWIRE_DOCS_AWS_SECRET_ACCESS_KEY }}
61 |           aws-region: us-east-1
62 | 
63 |       - id: upload-to-s3
64 |         name: Upload documentation to Amazon S3
65 |         uses: datadesk/delivery-deploy-action@v1
66 |         with:
67 |           bucket: ${{ secrets.PALEWIRE_DOCS_AWS_BUCKET }}
68 |           base-path: django-postgres-copy/
69 |           dir: ./docs/
70 |           should-cache: false
71 |           use-accelerate-endpoint: false
72 |           public: true
73 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from django.conf import settings
 4 | 
 5 | ROOT_DIR = Path(__file__).parent.parent
 6 | PG_USER = os.environ.get("PG_USER", "postgres")
 7 | 
 8 | 
 9 | def pytest_configure():
10 |     settings.configure(
11 |         DATABASES={
12 |             "default": {
13 |                 "HOST": "localhost",
14 |                 "PORT": 5432,
15 |                 "NAME": "test",
16 |                 "USER": PG_USER,
17 |                 "ENGINE": "django.db.backends.postgresql_psycopg2",
18 |             },
19 |             "other": {
20 |                 "HOST": "localhost",
21 |                 "PORT": 5432,
22 |                 "NAME": "test_alternative",
23 |                 "USER": PG_USER,
24 |                 "ENGINE": "django.db.backends.postgresql_psycopg2",
25 |             },
26 |             "sqlite": {"NAME": "sqlite", "ENGINE": "django.db.backends.sqlite3"},
27 |             "secondary": {
28 |                 "HOST": "localhost",
29 |                 "PORT": 5432,
30 |                 "NAME": "test_secondary",
31 |                 "USER": PG_USER,
32 |                 "ENGINE": "django.db.backends.postgresql_psycopg2",
33 |             },
34 |         },
35 |         INSTALLED_APPS=("tests",),
36 |         DATABASE_ROUTERS=["tests.router.CustomRouter"],
37 |         DEFAULT_AUTO_FIELD="django.db.models.BigAutoField",
38 |         LOGGING={
39 |             "version": 1,
40 |             "disable_existing_loggers": False,
41 |             "handlers": {
42 |                 "file": {
43 |                     "level": "DEBUG",
44 |                     "class": "logging.FileHandler",
45 |                     "filename": ROOT_DIR / "tests.log",
46 |                 },
47 |             },
48 |             "formatters": {
49 |                 "verbose": {
50 |                     "format": "%(levelname)s|%(asctime)s|%(module)s|%(message)s",
51 |                     "datefmt": "%d/%b/%Y %H:%M:%S",
52 |                 }
53 |             },
54 |             "loggers": {
55 |                 "postgres_copy": {
56 |                     "handlers": ["file"],
57 |                     "level": "DEBUG",
58 |                     "propagate": True,
59 |                 },
60 |             },
61 |         },
62 |     )
63 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "django-postgres-copy"
  7 | description = "Quickly import and export delimited data with Django support for PostgreSQL's COPY command"
  8 | readme = "README.md"
  9 | authors = [
 10 |     {name = "Ben Welsh", email = "b@palewi.re"},
 11 | ]
 12 | license = "MIT"
 13 | classifiers = [
 14 |     "Development Status :: 5 - Production/Stable",
 15 |     "Operating System :: OS Independent",
 16 |     "Intended Audience :: Developers",
 17 |     "Programming Language :: Python",
 18 |     "Programming Language :: Python :: 3",
 19 |     "Programming Language :: Python :: 3.9",
 20 |     "Programming Language :: Python :: 3.10",
 21 |     "Programming Language :: Python :: 3.11",
 22 |     "Programming Language :: Python :: 3.12",
 23 |     "Programming Language :: Python :: 3.13",
 24 |     "Framework :: Django",
 25 |     "Framework :: Django :: 4.2",
 26 |     "Framework :: Django :: 5.1",
 27 |     "Framework :: Django :: 5.2",
 28 | ]
 29 | requires-python = ">=3.9"
 30 | dependencies = []
 31 | dynamic = ["version"]
 32 | 
 33 | [project.urls]
 34 | Documentation = "https://palewi.re/docs/django-postgres-copy/"
 35 | Source = "https://github.com/palewire/django-postgres-copy"
 36 | Tracker = "https://github.com/palewire/django-postgres-copy/issues"
 37 | Tests = "https://github.com/palewire/django-postgres-copy/actions/workflows/test.yaml"
 38 | 
 39 | [tool.setuptools]
 40 | packages = ["postgres_copy"]
 41 | 
 42 | [tool.setuptools.package-data]
 43 | postgres_copy = ["py.typed"]
 44 | 
 45 | [tool.setuptools_scm]
 46 | 
 47 | [tool.pytest]
 48 | python_files = ["test*.py", "test_*.py", "*_test.py"]
 49 | 
 50 | [tool.flake8]
 51 | max-line-length = 119
 52 | ignore = ["D100", "D101", "D102", "D103", "D104", "D106", "D107", "D200", "D205", "D400", "D401", "SIM115", "B006"]
 53 | 
 54 | [tool.mypy]
 55 | python_version = "3.13"
 56 | warn_return_any = true
 57 | warn_unused_configs = true
 58 | disallow_untyped_defs = true
 59 | disallow_incomplete_defs = true
 60 | check_untyped_defs = true
 61 | disallow_untyped_decorators = true
 62 | no_implicit_optional = true
 63 | strict_optional = true
 64 | warn_redundant_casts = true
 65 | warn_unused_ignores = true
 66 | warn_no_return = true
 67 | warn_unreachable = true
 68 | exclude = "^(tests|docs)/.*"
 69 | 
 70 | [[tool.mypy.overrides]]
 71 | module = "django.*"
 72 | ignore_missing_imports = true
 73 | 
 74 | [[tool.mypy.overrides]]
 75 | module = "psycopg.*"
 76 | ignore_missing_imports = true
 77 | 
 78 | [project.optional-dependencies]
 79 | dev = [
 80 |     "coverage",
 81 |     "mock",
 82 |     "pre-commit",
 83 |     "pytest-runner",
 84 |     "pytest-cov",
 85 |     "pytest-env",
 86 |     "pytest-django",
 87 |     "setuptools-scm",
 88 |     "twine",
 89 |     "wheel",
 90 | ]
 91 | mypy = [
 92 |     "mypy",
 93 |     "types-psycopg2",
 94 |     "django-stubs",
 95 | ]
 96 | docs = [
 97 |     "sphinx",
 98 |     "sphinx-autobuild",
 99 |     "sphinx-palewire-theme",
100 |     "myst-parser",
101 | ]
102 | 
103 | 
104 | [tool.pytest.ini_options]
105 | addopts = "-p no:warnings --cov=postgres_copy --cov-branch -cov-report=term-missing:skip-covered --cov-context=test --reuse-db --nomigrations"
106 | testpaths = ["tests"]
107 | python_files = "test_*.py"
108 | 
109 | [tool.coverage.run]
110 | source = ["postgres_copy"]
111 | relative_files = true
112 | omit = [
113 |     "*/__pycache__/*",
114 |     "tests",
115 | ]
116 | 


--------------------------------------------------------------------------------
/.github/workflows/continuous-deployment.yaml:
--------------------------------------------------------------------------------
  1 | name: Testing and distribution
  2 | on:
  3 |   push:
  4 |   pull_request:
  5 |   workflow_dispatch:
  6 | 
  7 | jobs:
  8 |   lint-python:
  9 |     name: Lint Python code
 10 |     runs-on: ubuntu-latest
 11 |     steps:
 12 |       - name: Checkout
 13 |         uses: actions/checkout@v4
 14 | 
 15 |       - name: Check with Ruff
 16 |         uses: astral-sh/ruff-action@v3
 17 |         with:
 18 |           args: 'check --exit-zero --verbose'
 19 | 
 20 |       - name: Format with Ruff
 21 |         uses: astral-sh/ruff-action@v3
 22 |         with:
 23 |           args: 'format --check --verbose'
 24 | 
 25 |   test-python:
 26 |     name: "Test Python"
 27 |     runs-on: ubuntu-latest
 28 |     services:
 29 |       postgres:
 30 |         image: postgres:latest
 31 |         env:
 32 |           POSTGRES_USER: postgres
 33 |           POSTGRES_PASSWORD: postgres
 34 |           POSTGRES_DB: postgres
 35 |         ports:
 36 |           - 5432:5432
 37 |         # needed because the postgres container does not provide a healthcheck
 38 |         options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
 39 |     strategy:
 40 |       matrix:
 41 |         python: ['3.9', '3.10', '3.11', '3.12', '3.13']
 42 |         django: ['4.2', '5.1', '5.2']
 43 |         psycopg: ['psycopg2', 'psycopg']
 44 |         exclude:
 45 |         - python: '3.9'
 46 |           django: '5.1'
 47 |         - python: '3.9'
 48 |           django: '5.2'
 49 |     steps:
 50 |       - name: Checkout
 51 |         uses: actions/checkout@v4
 52 | 
 53 |       - id: install-uv
 54 |         name: Install uv
 55 |         uses: astral-sh/setup-uv@v6
 56 |         with:
 57 |           version: "latest"
 58 |           enable-cache: true
 59 |           cache-dependency-glob: '**/pyproject.toml'
 60 | 
 61 |       - id: install-python
 62 |         name: Install Python
 63 |         run: uv python install ${{ matrix.python }}
 64 | 
 65 |       - name: Install Python dependencies
 66 |         run: |
 67 |           uv sync --extra dev --python ${{ matrix.python }}
 68 |           uv pip install ${{ matrix.psycopg }} django==${{ matrix.django }}
 69 | 
 70 |       - name: Test
 71 |         run: uv run pytest tests --reuse-db --nomigrations
 72 |         env:
 73 |           PGPASSWORD: postgres
 74 | 
 75 |   test-build:
 76 |     name: Build Python package
 77 |     runs-on: ubuntu-latest
 78 |     needs: [test-python,lint-python]
 79 |     steps:
 80 |       - name: Checkout
 81 |         uses: actions/checkout@v4
 82 | 
 83 |       - id: install-uv
 84 |         name: Install uv
 85 |         uses: astral-sh/setup-uv@v6
 86 |         with:
 87 |           version: "latest"
 88 |           enable-cache: true
 89 |           cache-dependency-glob: '**/pyproject.toml'
 90 | 
 91 |       - id: install-python
 92 |         name: Install Python
 93 |         run: uv python install 3.13
 94 | 
 95 |       - id: build
 96 |         name: Build releases
 97 |         run: uv build --sdist --wheel
 98 |         shell: bash
 99 | 
100 |       - id: save
101 |         name: Save artifact
102 |         uses: actions/upload-artifact@v4
103 |         with:
104 |           name: release-candidate
105 |           path: ./dist
106 |           if-no-files-found: error
107 | 
108 |   tag-release:
109 |     name: Tagged PyPI release
110 |     runs-on: ubuntu-latest
111 |     needs: [test-build]
112 |     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
113 |     steps:
114 |       - name: Fetch artifact
115 |         uses: actions/download-artifact@v4
116 |         with:
117 |           name: release-candidate
118 |           path: ./dist
119 | 
120 |       - name: Publish release
121 |         uses: pypa/gh-action-pypi-publish@release/v1
122 |         with:
123 |           user: __token__
124 |           password: ${{ secrets.PYPI_API_TOKEN }}
125 |           verbose: true
126 | 


--------------------------------------------------------------------------------
/postgres_copy/copy_to.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Handlers for working with PostgreSQL's COPY TO command.
  4 | """
  5 | 
  6 | import logging
  7 | import typing
  8 | from io import BytesIO
  9 | 
 10 | from django.db import connections
 11 | from django.db.models.sql.compiler import SQLCompiler
 12 | from django.db.models.sql.query import Query
 13 | 
 14 | from .psycopg_compat import copy_to
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class SQLCopyToCompiler(SQLCompiler):
 20 |     """
 21 |     Custom SQL compiler for creating a COPY TO query (postgres backend only).
 22 |     """
 23 | 
 24 |     def setup_query(self, **kwargs: typing.Any) -> None:
 25 |         """
 26 |         Extend the default SQLCompiler.setup_query to add re-ordering of items in select.
 27 |         """
 28 |         super().setup_query(**kwargs)
 29 |         if self.query.copy_to_fields:
 30 |             self.select = []
 31 |             for field in self.query.copy_to_fields:
 32 |                 # raises error if field is not available
 33 |                 expression = self.query.resolve_ref(field)
 34 |                 selection = (
 35 |                     expression,
 36 |                     self.compile(expression),
 37 |                     field if field in self.query.annotations else None,
 38 |                 )
 39 |                 self.select.append(selection)
 40 | 
 41 |     def execute_sql(
 42 |         self,
 43 |         csv_path_or_obj: typing.Optional[typing.Union[str, typing.BinaryIO]] = None,
 44 |     ) -> typing.Optional[bytes]:
 45 |         """
 46 |         Run the COPY TO query.
 47 |         """
 48 |         logger.debug(f"Copying data to {csv_path_or_obj}")
 49 | 
 50 |         params = self.as_sql()[1]
 51 | 
 52 |         # use stdout to avoid file permission issues
 53 |         with connections[self.using].cursor() as c:
 54 |             # grab the SELECT query
 55 |             select_sql = self.as_sql()[0]
 56 |             # then the COPY TO query
 57 |             copy_to_sql = "COPY ({}) TO STDOUT {} CSV"
 58 |             copy_to_sql = copy_to_sql.format(select_sql, self.query.copy_to_delimiter)
 59 |             # Optional extras
 60 |             options_list = [
 61 |                 self.query.copy_to_header,
 62 |                 self.query.copy_to_null_string,
 63 |                 self.query.copy_to_quote_char,
 64 |                 self.query.copy_to_force_quote,
 65 |                 self.query.copy_to_encoding,
 66 |                 self.query.copy_to_escape,
 67 |             ]
 68 |             options_sql = " ".join([o for o in options_list if o]).strip()
 69 |             if options_sql:
 70 |                 copy_to_sql = copy_to_sql + " " + options_sql
 71 |             # then execute
 72 |             logger.debug(copy_to_sql)
 73 | 
 74 |             # If a file-like object was provided, write it out there.
 75 |             if hasattr(csv_path_or_obj, "write"):
 76 |                 copy_to(
 77 |                     c.cursor,
 78 |                     copy_to_sql,
 79 |                     params,
 80 |                     typing.cast(typing.BinaryIO, csv_path_or_obj),
 81 |                 )
 82 |                 return None
 83 |             # If a file path was provided, write it out there.
 84 |             elif csv_path_or_obj and isinstance(csv_path_or_obj, str):
 85 |                 with open(csv_path_or_obj, "wb") as stdout:
 86 |                     copy_to(c.cursor, copy_to_sql, params, stdout)
 87 |                     return None
 88 |             # If there's no csv_path, return the output as a string.
 89 |             else:
 90 |                 stdout_buffer = BytesIO()
 91 |                 copy_to(c.cursor, copy_to_sql, params, stdout_buffer)
 92 |                 return stdout_buffer.getvalue()
 93 | 
 94 | 
 95 | class CopyToQuery(Query):
 96 |     """
 97 |     Represents a "copy to" SQL query.
 98 |     """
 99 | 
100 |     def get_compiler(
101 |         self,
102 |         using: typing.Optional[str] = None,
103 |         connection: typing.Optional[typing.Any] = None,
104 |     ) -> SQLCopyToCompiler:
105 |         """
106 |         Return a SQLCopyToCompiler object.
107 |         """
108 |         return SQLCopyToCompiler(self, connection, using)
109 | 


--------------------------------------------------------------------------------
/tests/models.py:
--------------------------------------------------------------------------------
  1 | import django
  2 | from django.db import models
  3 | 
  4 | from postgres_copy import CopyManager, CopyMapping
  5 | 
  6 | from .fields import MyIntegerField
  7 | 
  8 | 
  9 | class MockObject(models.Model):
 10 |     name = models.CharField(max_length=500)
 11 |     number = MyIntegerField(null=True, db_column="num")
 12 |     dt = models.DateField(null=True)
 13 |     parent = models.ForeignKey(
 14 |         "MockObject", on_delete=models.CASCADE, null=True, default=None
 15 |     )
 16 |     objects = CopyManager()
 17 | 
 18 |     class Meta:
 19 |         app_label = "tests"
 20 |         unique_together = ("name", "number")
 21 | 
 22 |     def __init__(self, *args, **kwargs):
 23 |         super().__init__(*args, **kwargs)
 24 |         if django.get_version() <= "5.1":
 25 |             self._meta.index_together = ("name", "number")
 26 |         else:
 27 |             self._meta.indexes = [models.Index(fields=["name", "number"])]
 28 | 
 29 |     def copy_name_template(self):
 30 |         return 'upper("%(name)s")'
 31 | 
 32 | 
 33 | class MockFKObject(models.Model):
 34 |     id = models.IntegerField(primary_key=True)
 35 |     name = models.CharField(max_length=500)
 36 |     number = MyIntegerField(null=True, db_column="num")
 37 |     dt = models.DateField(null=True)
 38 |     parent = models.ForeignKey(
 39 |         "MockFKObject", on_delete=models.CASCADE, null=True, default=None
 40 |     )
 41 |     objects = CopyManager()
 42 | 
 43 |     class Meta:
 44 |         app_label = "tests"
 45 | 
 46 |     def copy_name_template(self):
 47 |         return 'upper("%(name)s")'
 48 | 
 49 | 
 50 | class MockBlankObject(models.Model):
 51 |     name = models.CharField(max_length=500)
 52 |     number = MyIntegerField(null=True, db_column="num")
 53 |     dt = models.DateField(null=True)
 54 |     color = models.CharField(max_length=50, blank=True)
 55 |     parent = models.ForeignKey(
 56 |         "MockObject", on_delete=models.CASCADE, null=True, default=None
 57 |     )
 58 |     objects = CopyManager()
 59 | 
 60 |     class Meta:
 61 |         app_label = "tests"
 62 | 
 63 |     def copy_name_template(self):
 64 |         return 'upper("%(name)s")'
 65 | 
 66 | 
 67 | class ExtendedMockObject(models.Model):
 68 |     static_val = models.IntegerField()
 69 |     name = models.CharField(max_length=500)
 70 |     number = MyIntegerField(null=True, db_column="num")
 71 |     dt = models.DateField(null=True)
 72 |     static_string = models.CharField(max_length=5)
 73 |     objects = CopyManager()
 74 | 
 75 |     class Meta:
 76 |         app_label = "tests"
 77 | 
 78 |     def copy_name_template(self):
 79 |         return 'upper("%(name)s")'
 80 | 
 81 | 
 82 | class LimitedMockObject(models.Model):
 83 |     name = models.CharField(max_length=500)
 84 |     dt = models.DateField(null=True)
 85 |     objects = CopyManager()
 86 | 
 87 |     class Meta:
 88 |         app_label = "tests"
 89 | 
 90 |     def copy_name_template(self):
 91 |         return 'upper("%(name)s")'
 92 | 
 93 | 
 94 | class OverloadMockObject(models.Model):
 95 |     name = models.CharField(max_length=500)
 96 |     upper_name = models.CharField(max_length=500)
 97 |     lower_name = models.CharField(max_length=500)
 98 |     number = MyIntegerField(null=True, db_column="num")
 99 |     dt = models.DateField(null=True)
100 |     objects = CopyManager()
101 | 
102 |     class Meta:
103 |         app_label = "tests"
104 | 
105 |     def copy_upper_name_template(self):
106 |         return 'upper("%(name)s")'
107 | 
108 |     def copy_lower_name_template(self):
109 |         return 'lower("%(name)s")'
110 | 
111 | 
112 | class HookedCopyMapping(CopyMapping):
113 |     def pre_copy(self, cursor):
114 |         self.ran_pre_copy = True
115 | 
116 |     def post_copy(self, cursor):
117 |         self.ran_post_copy = True
118 | 
119 |     def pre_insert(self, cursor):
120 |         self.ran_pre_insert = True
121 | 
122 |     def post_insert(self, cursor):
123 |         self.ran_post_insert = True
124 | 
125 | 
126 | class SecondaryMockObject(models.Model):
127 |     text = models.CharField(max_length=500)
128 |     objects = CopyManager()
129 | 
130 | 
131 | class UniqueMockObject(models.Model):
132 |     name = models.CharField(max_length=500, unique=True)
133 |     objects = CopyManager()
134 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | b@palewi.re.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/postgres_copy/psycopg_compat.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Compatibility layer between psycopg2 and psycopg3 (psycopg) for COPY operations.
  3 | 
  4 | This module provides a unified interface for PostgreSQL COPY operations that works with
  5 | both psycopg2 and psycopg3 database drivers. It automatically detects which driver is
  6 | available and provides appropriate implementations of copy_to and copy_from functions.
  7 | 
  8 | The main differences between psycopg2 and psycopg3 COPY operations:
  9 | 1. psycopg2 uses copy_expert method which takes an SQL string with parameters already inlined
 10 | 2. psycopg3 uses a copy method that returns a context manager and accepts parameters separately
 11 | 3. psycopg3 handles encoding differently, requiring explicit decoding for text destinations
 12 | 
 13 | This module abstracts away these differences, allowing code to work with either driver
 14 | without modification.
 15 | """
 16 | 
 17 | from __future__ import annotations
 18 | 
 19 | import typing
 20 | 
 21 | 
 22 | # Define a protocol for cursor objects that have the methods we need
 23 | class CursorProtocol(typing.Protocol):
 24 |     """Protocol for database cursor objects."""
 25 | 
 26 |     def copy_expert(self, sql: str, file: typing.TextIO | typing.BinaryIO) -> None: ...
 27 |     def copy(
 28 |         self, sql: str, params: typing.Sequence[typing.Any] | None = None
 29 |     ) -> typing.Any: ...
 30 | 
 31 | 
 32 | # Define a protocol for file-like objects
 33 | class FilelikeProtocol(typing.Protocol):
 34 |     """Protocol for file-like objects."""
 35 | 
 36 |     def read(self, size: int = -1) -> str | bytes: ...
 37 |     def write(self, data: str | bytes) -> int: ...
 38 | 
 39 | 
 40 | try:
 41 |     # Try to import psycopg (version 3)
 42 |     import psycopg  # noqa: F401  just detect the presence of psycopg(3)
 43 |     from io import TextIOBase
 44 | 
 45 |     # Buffer size for reading data in chunks
 46 |     BUFFER_SIZE = 128 * 1024
 47 | 
 48 |     # Type alias for text or binary file-like objects
 49 |     FileObj = typing.Union[typing.TextIO, typing.BinaryIO]
 50 | 
 51 |     def copy_to(
 52 |         cursor: CursorProtocol,
 53 |         sql: str,
 54 |         params: typing.Sequence[typing.Any],
 55 |         destination: FileObj,
 56 |     ) -> None:
 57 |         """
 58 |         Copy data from the database to a file-like object using psycopg3.
 59 | 
 60 |         Args:
 61 |             cursor: A psycopg3 cursor object
 62 |             sql: SQL query string with placeholders
 63 |             params: Parameters for the SQL query
 64 |             destination: A file-like object to write the data to
 65 | 
 66 |         The function handles both text and binary destinations appropriately:
 67 |         - For text destinations (TextIOBase), it decodes the binary data from PostgreSQL
 68 |         - For binary destinations, it passes the data through unchanged
 69 |         """
 70 |         # psycopg3 returns binary data that needs to be decoded for text destinations
 71 |         is_text = isinstance(destination, TextIOBase)
 72 | 
 73 |         # Use the psycopg3 copy context manager
 74 |         with cursor.copy(sql, params) as copy:
 75 |             # Read data in chunks until there's no more
 76 |             while True:
 77 |                 data = copy.read()
 78 |                 if not data:
 79 |                     break
 80 | 
 81 |                 # Decode the data if necessary and write to the destination
 82 |                 if is_text:
 83 |                     # For text destinations, we need to decode to str
 84 |                     text_dest = typing.cast(typing.TextIO, destination)
 85 |                     # Handle both bytes and memoryview objects
 86 |                     if isinstance(data, memoryview):
 87 |                         data = data.tobytes()
 88 |                     text_dest.write(data.decode("utf-8"))
 89 |                 else:
 90 |                     # For binary destinations, we keep as bytes
 91 |                     binary_dest = typing.cast(typing.BinaryIO, destination)
 92 |                     # Handle both bytes and memoryview objects
 93 |                     if isinstance(data, memoryview):
 94 |                         data = data.tobytes()
 95 |                     binary_dest.write(data)
 96 | 
 97 |     def copy_from(cursor: CursorProtocol, sql: str, source: FileObj) -> None:
 98 |         """
 99 |         Copy data from a file-like object to the database using psycopg3.
100 | 
101 |         Args:
102 |             cursor: A psycopg3 cursor object
103 |             sql: SQL COPY statement string
104 |             source: A file-like object to read the data from
105 | 
106 |         The function reads data from the source in chunks and writes it to
107 |         the database using the psycopg3 copy protocol.
108 |         """
109 |         # Use the psycopg3 copy context manager
110 |         with cursor.copy(sql) as copy:
111 |             # Read data in chunks and write to the database
112 |             while True:
113 |                 data = source.read(BUFFER_SIZE)
114 |                 if not data:
115 |                     break
116 |                 copy.write(data)
117 | 
118 | except ImportError:
119 |     # Fall back to psycopg2 if psycopg3 is not available
120 |     from psycopg2.extensions import adapt
121 | 
122 |     def copy_to(
123 |         cursor: CursorProtocol,
124 |         sql: str,
125 |         params: typing.Sequence[typing.Any],
126 |         destination: typing.TextIO | typing.BinaryIO,
127 |     ) -> None:
128 |         """
129 |         Copy data from the database to a file-like object using psycopg2.
130 | 
131 |         Args:
132 |             cursor: A psycopg2 cursor object
133 |             sql: SQL query string with placeholders
134 |             params: Parameters for the SQL query
135 |             destination: A file-like object to write the data to
136 | 
137 |         The function adapts the parameters to SQL syntax and inlines them into the query,
138 |         then uses psycopg2's copy_expert method to execute the COPY operation.
139 |         """
140 |         # psycopg2 requires parameters to be adapted and inlined into the SQL
141 |         adapted_params = tuple(adapt(p) for p in params)
142 |         inlined_sql = sql % adapted_params
143 | 
144 |         # Use psycopg2's copy_expert method
145 |         cursor.copy_expert(inlined_sql, destination)
146 | 
147 |     def copy_from(
148 |         cursor: CursorProtocol,
149 |         sql: str,
150 |         source: typing.TextIO | typing.BinaryIO,
151 |     ) -> None:
152 |         """
153 |         Copy data from a file-like object to the database using psycopg2.
154 | 
155 |         Args:
156 |             cursor: A psycopg2 cursor object
157 |             sql: SQL COPY statement string
158 |             source: A file-like object to read the data from
159 | 
160 |         The function uses psycopg2's copy_expert method to execute the COPY operation.
161 |         """
162 |         # Use psycopg2's copy_expert method
163 |         cursor.copy_expert(sql, source)
164 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  applehelp  to make an Apple Help Book"
 34 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 35 | 	@echo "  epub       to make an epub"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | html:
 55 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 58 | 
 59 | livehtml:
 60 | 	sphinx-autobuild -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 61 | 
 62 | dirhtml:
 63 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 66 | 
 67 | singlehtml:
 68 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 69 | 	@echo
 70 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 71 | 
 72 | pickle:
 73 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the pickle files."
 76 | 
 77 | json:
 78 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 79 | 	@echo
 80 | 	@echo "Build finished; now you can process the JSON files."
 81 | 
 82 | htmlhelp:
 83 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 84 | 	@echo
 85 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 86 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 87 | 
 88 | qthelp:
 89 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 90 | 	@echo
 91 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 92 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 93 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/django-postgres-copy.qhcp"
 94 | 	@echo "To view the help file:"
 95 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/django-postgres-copy.qhc"
 96 | 
 97 | applehelp:
 98 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
 99 | 	@echo
100 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
101 | 	@echo "N.B. You won't be able to view it unless you put it in" \
102 | 	      "~/Library/Documentation/Help or install it in your application" \
103 | 	      "bundle."
104 | 
105 | devhelp:
106 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
107 | 	@echo
108 | 	@echo "Build finished."
109 | 	@echo "To view the help file:"
110 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/django-postgres-copy"
111 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/django-postgres-copy"
112 | 	@echo "# devhelp"
113 | 
114 | epub:
115 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
116 | 	@echo
117 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
118 | 
119 | latex:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo
122 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
123 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
124 | 	      "(use \`make latexpdf' here to do that automatically)."
125 | 
126 | latexpdf:
127 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
128 | 	@echo "Running LaTeX files through pdflatex..."
129 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
130 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
131 | 
132 | latexpdfja:
133 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
134 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
135 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
136 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
137 | 
138 | text:
139 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
140 | 	@echo
141 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
142 | 
143 | man:
144 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
145 | 	@echo
146 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
147 | 
148 | texinfo:
149 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
150 | 	@echo
151 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
152 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
153 | 	      "(use \`make info' here to do that automatically)."
154 | 
155 | info:
156 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
157 | 	@echo "Running Texinfo files through makeinfo..."
158 | 	make -C $(BUILDDIR)/texinfo info
159 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
160 | 
161 | gettext:
162 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
163 | 	@echo
164 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
165 | 
166 | changes:
167 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
168 | 	@echo
169 | 	@echo "The overview file is in $(BUILDDIR)/changes."
170 | 
171 | linkcheck:
172 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
173 | 	@echo
174 | 	@echo "Link check complete; look for any errors in the above output " \
175 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
176 | 
177 | doctest:
178 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
179 | 	@echo "Testing of doctests in the sources finished, look at the " \
180 | 	      "results in $(BUILDDIR)/doctest/output.txt."
181 | 
182 | coverage:
183 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
184 | 	@echo "Testing of coverage in the sources finished, look at the " \
185 | 	      "results in $(BUILDDIR)/coverage/python.txt."
186 | 
187 | xml:
188 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
189 | 	@echo
190 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
191 | 
192 | pseudoxml:
193 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
194 | 	@echo
195 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
196 | 


--------------------------------------------------------------------------------
/postgres_copy/managers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import logging
  4 | import typing
  5 | 
  6 | from django.db import connection, models
  7 | from django.db.models.fields import Field
  8 | from django.db.transaction import TransactionManagementError
  9 | from django.db.backends.base.schema import BaseDatabaseSchemaEditor
 10 | 
 11 | from .copy_from import CopyMapping
 12 | from .copy_to import CopyToQuery
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class ConstraintQuerySet(models.QuerySet):
 18 |     """
 19 |     Utilities for temporarily dropping and restoring constraints and indexes.
 20 |     """
 21 | 
 22 |     @property
 23 |     def constrained_fields(self) -> typing.List[Field]:
 24 |         """
 25 |         Returns list of model's fields with db_constraint set to True.
 26 |         """
 27 |         return [
 28 |             f
 29 |             for f in self.model._meta.fields
 30 |             if hasattr(f, "db_constraint") and f.db_constraint
 31 |         ]
 32 | 
 33 |     @property
 34 |     def indexed_fields(self) -> typing.List[Field]:
 35 |         """
 36 |         Returns list of model's fields with db_index set to True.
 37 |         """
 38 |         return [f for f in self.model._meta.fields if f.db_index]
 39 | 
 40 |     def edit_schema(
 41 |         self,
 42 |         schema_editor: BaseDatabaseSchemaEditor,
 43 |         method_name: str,
 44 |         args: typing.Tuple,
 45 |     ) -> None:
 46 |         """
 47 |         Edits the schema without throwing errors.
 48 | 
 49 |         This allows for the add and drop methods to be run frequently and without fear.
 50 |         """
 51 |         try:
 52 |             getattr(schema_editor, method_name)(*args)
 53 |         except Exception:
 54 |             logger.debug(f"Edit of {schema_editor}.{method_name} failed. Skipped")
 55 |             pass
 56 | 
 57 |     def drop_constraints(self) -> None:
 58 |         """
 59 |         Drop constraints on the model and its fields.
 60 |         """
 61 |         logger.debug(f"Dropping constraints from {self.model.__name__}")
 62 |         with connection.schema_editor() as schema_editor:
 63 |             # Remove any "unique_together" constraints
 64 |             # NOTE: "unique_together" may be deprecated in the future
 65 |             if getattr(self.model._meta, "unique_together", False):
 66 |                 logger.debug(
 67 |                     "Dropping unique_together of {}".format(
 68 |                         self.model._meta.unique_together
 69 |                     )
 70 |                 )
 71 |                 args = (self.model, self.model._meta.unique_together, ())
 72 |                 self.edit_schema(schema_editor, "alter_unique_together", args)
 73 | 
 74 |             # Remove any field constraints
 75 |             for field in self.constrained_fields:
 76 |                 logger.debug(f"Dropping constraints from {field}")
 77 |                 field_copy = field.__copy__()
 78 |                 field_copy.db_constraint = False
 79 |                 args = (self.model, field, field_copy)
 80 |                 self.edit_schema(schema_editor, "alter_field", args)
 81 | 
 82 |     def drop_indexes(self) -> None:
 83 |         """
 84 |         Drop indexes on the model and its fields.
 85 |         """
 86 |         logger.debug(f"Dropping indexes from {self.model.__name__}")
 87 |         with connection.schema_editor() as schema_editor:
 88 |             if getattr(self.model._meta, "index_together", False):
 89 |                 # Remove any "index_together" constraints
 90 |                 # NOTE: "index_together has been removed from Django 5.1
 91 |                 logger.debug(
 92 |                     f"Dropping index_together of {self.model._meta.index_together}"
 93 |                 )
 94 |                 args = (self.model, self.model._meta.index_together, ())
 95 |                 self.edit_schema(schema_editor, "alter_index_together", args)
 96 | 
 97 |             # Remove any field indexes
 98 |             for field in self.indexed_fields:
 99 |                 logger.debug(f"Dropping index from {field}")
100 |                 field_copy = field.__copy__()
101 |                 field_copy.db_index = False
102 |                 args = (self.model, field, field_copy)
103 |                 self.edit_schema(schema_editor, "alter_field", args)
104 | 
105 |     def restore_constraints(self) -> None:
106 |         """
107 |         Restore constraints on the model and its fields.
108 |         """
109 |         logger.debug(f"Adding constraints to {self.model.__name__}")
110 |         with connection.schema_editor() as schema_editor:
111 |             # Add any "unique_together" contraints from the database
112 |             # NOTE: "unique_together" may be deprecated in the future
113 |             if getattr(self.model._meta, "unique_together", False):
114 |                 logger.debug(
115 |                     "Adding unique_together of {}".format(
116 |                         self.model._meta.unique_together
117 |                     )
118 |                 )
119 |                 args = (self.model, (), self.model._meta.unique_together)
120 |                 self.edit_schema(schema_editor, "alter_unique_together", args)
121 | 
122 |             # Add any constraints to the fields
123 |             for field in self.constrained_fields:
124 |                 logger.debug(f"Adding constraints to {field}")
125 |                 field_copy = field.__copy__()
126 |                 field_copy.db_constraint = False
127 |                 args = (self.model, field_copy, field)
128 |                 self.edit_schema(schema_editor, "alter_field", args)
129 | 
130 |     def restore_indexes(self) -> None:
131 |         """
132 |         Restore indexes on the model and its fields.
133 |         """
134 |         logger.debug(f"Adding indexes to {self.model.__name__}")
135 |         with connection.schema_editor() as schema_editor:
136 |             if getattr(self.model._meta, "index_together", False):
137 |                 # Add any "index_together" contraints to the database.
138 |                 # NOTE: "index_together has been removed from Django 5.1
139 |                 logger.debug(
140 |                     "Restoring index_together of {}".format(
141 |                         self.model._meta.index_together
142 |                     )
143 |                 )
144 |                 args = (self.model, (), self.model._meta.index_together)
145 |                 self.edit_schema(schema_editor, "alter_index_together", args)
146 | 
147 |             # Add any indexes to the fields
148 |             for field in self.indexed_fields:
149 |                 logger.debug(f"Restoring index to {field}")
150 |                 field_copy = field.__copy__()
151 |                 field_copy.db_index = False
152 |                 args = (self.model, field_copy, field)
153 |                 self.edit_schema(schema_editor, "alter_field", args)
154 | 
155 | 
156 | class CopyQuerySet(ConstraintQuerySet):
157 |     """
158 |     Subclass of QuerySet that adds from_csv and to_csv methods.
159 |     """
160 | 
161 |     def from_csv(
162 |         self,
163 |         csv_path: typing.Union[str, typing.BinaryIO, typing.TextIO],
164 |         mapping: typing.Optional[typing.Dict[str, str]] = None,
165 |         drop_constraints: bool = True,
166 |         drop_indexes: bool = True,
167 |         silent: bool = True,
168 |         **kwargs: typing.Any,
169 |     ) -> int:
170 |         """
171 |         Copy CSV file from the provided path to the current model using the provided mapping.
172 |         """
173 |         # Dropping constraints or indices will fail with an opaque error for all but
174 |         # very trivial databases which wouldn't benefit from this optimization anyway.
175 |         # So, we prevent the user from even trying to avoid confusion.
176 |         if drop_constraints or drop_indexes:
177 |             try:
178 |                 connection.validate_no_atomic_block()
179 |             except TransactionManagementError:
180 |                 raise TransactionManagementError(
181 |                     "You are attempting to drop constraints or "
182 |                     "indexes inside a transaction block, which is "
183 |                     "very likely to fail.  If it doesn't fail, you "
184 |                     "wouldn't gain any significant benefit from it "
185 |                     "anyway.  Either remove the transaction block, or set "
186 |                     "drop_constraints=False and drop_indexes=False."
187 |                 )
188 | 
189 |         # Create a mapping dictionary if none was provided
190 |         mapping_dict = mapping if mapping is not None else {}
191 | 
192 |         # Create the CopyMapping object
193 |         copy_mapping = CopyMapping(self.model, csv_path, mapping_dict, **kwargs)
194 | 
195 |         if drop_constraints:
196 |             self.drop_constraints()
197 |         if drop_indexes:
198 |             self.drop_indexes()
199 | 
200 |         insert_count = copy_mapping.save(silent=silent)
201 | 
202 |         if drop_constraints:
203 |             self.restore_constraints()
204 |         if drop_indexes:
205 |             self.restore_indexes()
206 | 
207 |         return insert_count
208 | 
209 |     def to_csv(
210 |         self,
211 |         csv_path: typing.Optional[typing.Union[str, typing.BinaryIO]] = None,
212 |         *fields: str,
213 |         **kwargs: typing.Any,
214 |     ) -> typing.Optional[bytes]:
215 |         """
216 |         Copy current QuerySet to CSV at provided path or file-like object.
217 |         """
218 |         try:
219 |             # For Django 2.0 forward
220 |             query = self.query.chain(CopyToQuery)
221 |         except AttributeError:
222 |             # For Django 1.11 backward
223 |             query = self.query.clone(CopyToQuery)
224 | 
225 |         # Get fields
226 |         query.copy_to_fields = fields
227 | 
228 |         # Delimiter
229 |         query.copy_to_delimiter = "DELIMITER '{}'".format(kwargs.get("delimiter", ","))
230 | 
231 |         # Header
232 |         with_header = kwargs.get("header", True)
233 |         query.copy_to_header = "HEADER" if with_header else ""
234 | 
235 |         # Null string
236 |         null_string = kwargs.get("null")
237 |         query.copy_to_null_string = f"NULL '{null_string}'" if null_string else ""
238 | 
239 |         # Quote character
240 |         quote_char = kwargs.get("quote")
241 |         query.copy_to_quote_char = f"QUOTE '{quote_char}'" if quote_char else ""
242 | 
243 |         # Force quote on columns
244 |         force_quote = kwargs.get("force_quote")
245 |         if force_quote:
246 |             # If it's a list of fields, pass them in with commas
247 |             if isinstance(force_quote, list):
248 |                 query.copy_to_force_quote = "FORCE QUOTE {}".format(
249 |                     ", ".join(column for column in force_quote)
250 |                 )
251 |             # If it's True or a * force quote everything
252 |             elif force_quote is True or force_quote == "*":
253 |                 query.copy_to_force_quote = "FORCE QUOTE *"
254 |             # Otherwise, assume it's a string and pass it through
255 |             else:
256 |                 query.copy_to_force_quote = f"FORCE QUOTE {force_quote}"
257 |         else:
258 |             query.copy_to_force_quote = ""
259 | 
260 |         # Encoding
261 |         set_encoding = kwargs.get("encoding")
262 |         query.copy_to_encoding = f"ENCODING '{set_encoding}'" if set_encoding else ""
263 | 
264 |         # Escape character
265 |         escape_char = kwargs.get("escape")
266 |         query.copy_to_escape = f"ESCAPE '{escape_char}'" if escape_char else ""
267 | 
268 |         # Run the query
269 |         compiler = query.get_compiler(self.db, connection=connection)
270 |         data = compiler.execute_sql(csv_path)
271 | 
272 |         # If no csv_path is provided, then the query will come back as bytes.
273 |         if csv_path is None and isinstance(data, bytes):
274 |             # So return that.
275 |             return data
276 | 
277 |         # Otherwise return None
278 |         return None
279 | 
280 | 
281 | CopyManager = models.Manager.from_queryset(CopyQuerySet)
282 | 


--------------------------------------------------------------------------------
/CLAUDE.md:
--------------------------------------------------------------------------------
  1 | # Django Postgres Copy
  2 | 
  3 | This document provides a comprehensive overview of the `django-postgres-copy` repository, explaining its purpose, architecture, and how to use it effectively.
  4 | 
  5 | ## Repository Overview
  6 | 
  7 | `django-postgres-copy` is a Django package that provides a simple interface for using PostgreSQL's `COPY` command to efficiently import and export data between CSV files and Django models. The `COPY` command is significantly faster than using Django's ORM for bulk operations, especially for large datasets.
  8 | 
  9 | ## Motivation
 10 | 
 11 | The creators of this library are data journalists who frequently download, clean, and analyze new data. This involves writing many data loaders. Traditionally, this was done by looping through each row and saving it to the database using Django's ORM `create` method:
 12 | 
 13 | ```python
 14 | import csv
 15 | from myapp.models import MyModel
 16 | 
 17 | data = csv.DictReader(open("./data.csv"))
 18 | for row in data:
 19 |     MyModel.objects.create(name=row["NAME"], number=row["NUMBER"])
 20 | ```
 21 | 
 22 | This approach works but is inefficient for large files because Django executes a database query for each row, which can take a long time to complete.
 23 | 
 24 | PostgreSQL's built-in `COPY` command can import and export data with a single query, making it much faster. This package makes using `COPY` as easy as any other database operation in Django.
 25 | 
 26 | ## Installation
 27 | 
 28 | The package can be installed from the Python Package Index with `pip`:
 29 | 
 30 | ```bash
 31 | pip install django-postgres-copy
 32 | ```
 33 | 
 34 | You will need to have Django, PostgreSQL, and a database adapter (like `psycopg2` or `psycopg3`) already installed.
 35 | 
 36 | ## Key Components
 37 | 
 38 | ### 1. Core Functionality
 39 | 
 40 | The package provides two main operations:
 41 | - **Import from CSV**: Load data from CSV files into Django models
 42 | - **Export to CSV**: Export data from Django models to CSV files
 43 | 
 44 | ### 2. Main Modules
 45 | 
 46 | - **`managers.py`**: Contains the `CopyManager` and `CopyQuerySet` classes that extend Django's standard manager and queryset with CSV import/export capabilities.
 47 | - **`copy_from.py`**: Handles importing data from CSV files to database tables using the `CopyMapping` class.
 48 | - **`copy_to.py`**: Handles exporting data from database tables to CSV files using custom SQL compilers.
 49 | - **`psycopg_compat.py`**: Provides compatibility between psycopg2 and psycopg3 database drivers for COPY operations.
 50 | 
 51 | ### 3. Database Driver Compatibility
 52 | 
 53 | The package supports both psycopg2 and psycopg3 database drivers through a compatibility layer in `psycopg_compat.py`. This allows users to migrate to the newer driver at their own pace while maintaining the same API.
 54 | 
 55 | ## Architecture
 56 | 
 57 | ### CopyManager and CopyQuerySet
 58 | 
 59 | The `CopyManager` is a custom Django model manager that extends the standard manager with CSV import/export capabilities. It uses the `CopyQuerySet` class, which adds the `from_csv` and `to_csv` methods to Django's standard queryset.
 60 | 
 61 | ```python
 62 | # Usage example
 63 | from postgres_copy import CopyManager
 64 | 
 65 | 
 66 | class MyModel(models.Model):
 67 |     name = models.CharField(max_length=100)
 68 |     objects = CopyManager()  # Use the custom manager
 69 | ```
 70 | 
 71 | ### CopyMapping
 72 | 
 73 | The `CopyMapping` class handles the process of mapping CSV columns to Django model fields and loading the data into the database. It uses a four-step process:
 74 | 
 75 | 1. **Create**: Create a temporary table with the same structure as the CSV file
 76 | 2. **Copy**: Copy data from the CSV file into the temporary table
 77 | 3. **Insert**: Insert data from the temporary table into the Django model's table
 78 | 4. **Drop**: Drop the temporary table
 79 | 
 80 | This approach allows for efficient data loading and validation before committing to the actual database table.
 81 | 
 82 | ### Database Driver Compatibility
 83 | 
 84 | The `psycopg_compat.py` module provides a compatibility layer between psycopg2 and psycopg3 database drivers. It automatically detects which driver is available and provides appropriate implementations of `copy_to` and `copy_from` functions.
 85 | 
 86 | The main differences between the drivers that this module handles:
 87 | 1. psycopg2 uses `copy_expert` method which takes an SQL string with parameters already inlined
 88 | 2. psycopg3 uses a `copy` method that returns a context manager and accepts parameters separately
 89 | 3. psycopg3 handles encoding differently, requiring explicit decoding for text destinations
 90 | 
 91 | ## Usage Examples
 92 | 
 93 | ### Importing Data from CSV
 94 | 
 95 | ```python
 96 | # Basic import
 97 | MyModel.objects.from_csv(
 98 |     "path/to/file.csv",
 99 |     mapping={"name": "NAME_COLUMN", "number": "NUMBER_COLUMN", "date": "DATE_COLUMN"},
100 | )
101 | 
102 | # With custom options
103 | MyModel.objects.from_csv(
104 |     "path/to/file.csv",
105 |     mapping={"name": "NAME", "number": "NUMBER"},
106 |     delimiter=";",
107 |     null="NULL",
108 |     encoding="utf-8",
109 | )
110 | 
111 | # If CSV headers match model fields, mapping is optional
112 | MyModel.objects.from_csv("path/to/file.csv")
113 | ```
114 | 
115 | #### Import Method Parameters
116 | 
117 | The `from_csv` method accepts the following parameters:
118 | 
119 | - `csv_path_or_obj`: The path to the CSV file or a Python file object
120 | - `mapping`: (Optional) Dictionary mapping model fields to CSV headers
121 | - `drop_constraints`: (Default: True) Whether to drop constraints during import
122 | - `drop_indexes`: (Default: True) Whether to drop indexes during import
123 | - `using`: Database to use for import
124 | - `delimiter`: (Default: ',') Character separating values in the CSV
125 | - `quote_character`: Character used for quoting
126 | - `null`: String representing NULL values
127 | - `force_not_null`: List of columns that should ignore NULL string matches
128 | - `force_null`: List of columns that should convert empty quoted strings to NULL
129 | - `encoding`: Character encoding of the CSV
130 | - `ignore_conflicts`: (Default: False) Whether to ignore constraint violations
131 | - `static_mapping`: Dictionary of static values to set for each row
132 | - `temp_table_name`: Name for the temporary table used during import
133 | 
134 | ### Exporting Data to CSV
135 | 
136 | ```python
137 | # Basic export
138 | MyModel.objects.to_csv("path/to/output.csv")
139 | 
140 | # With filtering and custom options
141 | MyModel.objects.filter(active=True).to_csv(
142 |     "path/to/output.csv",
143 |     "name",
144 |     "number",  # Only export these fields
145 |     delimiter=";",
146 |     header=True,
147 |     quote='"',
148 | )
149 | 
150 | # Export to string (no file path provided)
151 | csv_data = MyModel.objects.to_csv()
152 | 
153 | # Export with annotations
154 | MyModel.objects.annotate(name_count=Count("name")).to_csv("path/to/output.csv")
155 | ```
156 | 
157 | #### Export Method Parameters
158 | 
159 | The `to_csv` method accepts the following parameters:
160 | 
161 | - `csv_path`: Path to output file or file-like object (optional - returns string if not provided)
162 | - `*fields`: Field names to include in the export (all fields by default)
163 | - `delimiter`: (Default: ',') Character to use as delimiter
164 | - `header`: (Default: True) Whether to include header row
165 | - `null`: String to use for NULL values
166 | - `encoding`: Character encoding for the output file
167 | - `escape`: Escape character to use
168 | - `quote`: Quote character to use
169 | - `force_quote`: Fields to force quote (field name, list of fields, True, or "*")
170 | 
171 | ### Advanced Features
172 | 
173 | #### Static Mapping
174 | 
175 | You can provide static values for fields that don't exist in the CSV:
176 | 
177 | ```python
178 | MyModel.objects.from_csv(
179 |     "path/to/file.csv",
180 |     mapping={"name": "NAME", "number": "NUMBER"},
181 |     static_mapping={"created_by": "import_script"},
182 | )
183 | ```
184 | 
185 | #### Custom Field Processing
186 | 
187 | You can customize how fields are processed during import by defining a `copy_template` attribute on your model fields:
188 | 
189 | ```python
190 | class MyIntegerField(models.IntegerField):
191 |     copy_template = """
192 |         CASE
193 |             WHEN "%(name)s" = 'x' THEN null
194 |             ELSE "%(name)s"::int
195 |         END
196 |     """
197 | ```
198 | 
199 | Or by defining a method on your model:
200 | 
201 | ```python
202 | class MyModel(models.Model):
203 |     name = models.CharField(max_length=100)
204 | 
205 |     def copy_name_template(self):
206 |         return 'upper("%(name)s")'
207 | ```
208 | 
209 | A common use case is transforming date formats:
210 | 
211 | ```python
212 | def copy_mydatefield_template(self):
213 |     return """
214 |         CASE
215 |             WHEN "%(name)s" = '' THEN NULL
216 |             ELSE to_date("%(name)s", 'MM/DD/YYYY') /* The source CSV's date pattern */
217 |         END
218 |     """
219 | ```
220 | 
221 | It's important to handle empty strings by converting them to NULL in date fields to avoid "year out of range" errors.
222 | 
223 | #### Hooks
224 | 
225 | You can extend the `CopyMapping` class to add custom behavior at different stages of the import process:
226 | 
227 | ```python
228 | class CustomCopyMapping(CopyMapping):
229 |     def pre_copy(self, cursor):
230 |         # Run before copying data
231 |         pass
232 | 
233 |     def post_copy(self, cursor):
234 |         # Run after copying data
235 |         pass
236 | 
237 |     def pre_insert(self, cursor):
238 |         # Run before inserting data
239 |         pass
240 | 
241 |     def post_insert(self, cursor):
242 |         # Run after inserting data
243 |         pass
244 | ```
245 | 
246 | ### Working with Related Models
247 | 
248 | When exporting data, you can include fields from related models using Django's double underscore notation:
249 | 
250 | ```python
251 | # Models
252 | class Hometown(models.Model):
253 |     name = models.CharField(max_length=500)
254 |     objects = CopyManager()
255 | 
256 | 
257 | class Person(models.Model):
258 |     name = models.CharField(max_length=500)
259 |     number = models.IntegerField()
260 |     hometown = models.ForeignKey(Hometown, on_delete=models.CASCADE)
261 |     objects = CopyManager()
262 | 
263 | 
264 | # Export with related fields
265 | Person.objects.to_csv("path/to/export.csv", "name", "number", "hometown__name")
266 | ```
267 | 
268 | ## Performance Considerations
269 | 
270 | - The package temporarily drops constraints and indexes during import to improve performance
271 | - For large imports, it's recommended to run the import outside of a transaction block
272 | - The package uses PostgreSQL's `COPY` command which is much faster than Django's ORM for bulk operations
273 | - Importing data happens in a four-step process (create temp table, copy data, insert into model table, drop temp table)
274 | 
275 | ## Testing
276 | 
277 | The package includes comprehensive tests for all functionality, including:
278 | - Basic import/export operations
279 | - Custom field processing
280 | - Error handling
281 | - Multi-database support
282 | - psycopg2 and psycopg3 compatibility
283 | 
284 | ## Limitations
285 | 
286 | - Only works with PostgreSQL databases
287 | - Requires direct file access (for file-based imports)
288 | - May not handle very complex data transformations without custom field processing
289 | 
290 | ## Contributing
291 | 
292 | To set up a development environment:
293 | 1. Fork and clone the repository
294 | 2. Run `pipenv install` to install dependencies
295 | 3. Run `pipenv run pytest tests` to run tests
296 | 
297 | ## License
298 | 
299 | The package is released under the MIT License.
300 | 
301 | ## Resources
302 | 
303 | - Documentation: [palewi.re/docs/django-postgres-copy/](https://palewi.re/docs/django-postgres-copy/)
304 | - Issues: [github.com/palewire/django-postgres-copy/issues](https://github.com/palewire/django-postgres-copy/issues)
305 | - Packaging: [pypi.python.org/pypi/django-postgres-copy](https://pypi.python.org/pypi/django-postgres-copy)
306 | - Testing: [github.com/palewire/django-postgres-copy/actions](https://github.com/palewire/django-postgres-copy/actions/workflows/test.yaml)
307 | 


--------------------------------------------------------------------------------
/postgres_copy/copy_from.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | Handlers for working with PostgreSQL's COPY command.
  4 | """
  5 | 
  6 | import csv
  7 | import logging
  8 | import os
  9 | import sys
 10 | import typing
 11 | from collections import OrderedDict
 12 | from io import TextIOWrapper
 13 | 
 14 | from django.contrib.humanize.templatetags.humanize import intcomma
 15 | from django.core.exceptions import FieldDoesNotExist
 16 | from django.db import NotSupportedError, connections, router
 17 | from django.db.models import Field, Model
 18 | from django.db.backends.utils import CursorWrapper
 19 | 
 20 | from .psycopg_compat import copy_from
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | class CopyMapping:
 26 |     """
 27 |     Maps comma-delimited file to Django model and loads it into PostgreSQL database using COPY command.
 28 |     """
 29 | 
 30 |     def __init__(
 31 |         self,
 32 |         model: typing.Type[Model],
 33 |         csv_path_or_obj: typing.Union[str, typing.BinaryIO, typing.TextIO],
 34 |         mapping: typing.Dict[str, str],
 35 |         using: typing.Optional[str] = None,
 36 |         delimiter: str = ",",
 37 |         quote_character: typing.Optional[str] = None,
 38 |         null: typing.Optional[str] = None,
 39 |         force_not_null: typing.Optional[typing.List[str]] = None,
 40 |         force_null: typing.Optional[typing.List[str]] = None,
 41 |         encoding: typing.Optional[str] = None,
 42 |         ignore_conflicts: bool = False,
 43 |         static_mapping: typing.Optional[typing.Dict[str, str]] = None,
 44 |         temp_table_name: typing.Optional[str] = None,
 45 |     ) -> None:
 46 |         # Set the required arguments
 47 |         self.model = model
 48 |         self.csv_path_or_obj = csv_path_or_obj
 49 | 
 50 |         # If the CSV is not a file object already ...
 51 |         if hasattr(csv_path_or_obj, "read"):
 52 |             self.csv_file = csv_path_or_obj
 53 |         else:
 54 |             # We know it's a string path at this point
 55 |             csv_path = csv_path_or_obj
 56 |             # ... verify the path exists ...
 57 |             if not os.path.exists(csv_path):
 58 |                 raise ValueError("CSV path does not exist")
 59 |             # ... then open it up.
 60 |             self.csv_file = open(csv_path)
 61 | 
 62 |         # Hook in the other optional settings
 63 |         self.quote_character = quote_character
 64 |         self.delimiter = delimiter
 65 |         self.null = null
 66 |         self.force_not_null = force_not_null
 67 |         self.force_null = force_null
 68 |         self.encoding = encoding
 69 |         self.supports_ignore_conflicts = True
 70 |         self.ignore_conflicts = ignore_conflicts
 71 |         if static_mapping is not None:
 72 |             self.static_mapping = OrderedDict(static_mapping)
 73 |         else:
 74 |             self.static_mapping = OrderedDict()
 75 | 
 76 |         # Line up the database connection
 77 |         if using is not None:
 78 |             self.using = using
 79 |         else:
 80 |             self.using = router.db_for_write(model)
 81 |         self.conn = connections[self.using]
 82 |         self.backend = self.conn.ops
 83 | 
 84 |         # Verify it is PostgreSQL
 85 |         if self.conn.vendor != "postgresql":
 86 |             raise TypeError("Only PostgreSQL backends supported")
 87 | 
 88 |         # Check if it is PSQL 9.5 or greater, which determines if ignore_conflicts is supported
 89 |         self.supports_ignore_conflicts = self.is_postgresql_9_5()
 90 |         if self.ignore_conflicts and not self.supports_ignore_conflicts:
 91 |             raise NotSupportedError(
 92 |                 "This database backend does not support ignoring conflicts."
 93 |             )
 94 | 
 95 |         # Pull the CSV headers
 96 |         self.headers = self.get_headers()
 97 | 
 98 |         # Map them to the model
 99 |         self.mapping = self.get_mapping(mapping)
100 | 
101 |         # Make sure the everything is legit
102 |         self.validate_mapping()
103 | 
104 |         # Configure the name of our temporary table to COPY into
105 |         self.temp_table_name = temp_table_name or "temp_%s" % self.model._meta.db_table
106 | 
107 |     def save(self, silent: bool = False, stream: typing.TextIO = sys.stdout) -> int:
108 |         """
109 |         Saves the contents of the CSV file to the database.
110 | 
111 |         Override this method and use 'self.create(cursor)`,
112 |         `self.copy(cursor)`, `self.insert(cursor)`, and `self.drop(cursor)`
113 |         if you need functionality other than the default create/copy/insert/drop
114 |         workflow.
115 | 
116 |          silent:
117 |            By default, non-fatal error notifications are printed to stdout,
118 |            but this keyword may be set to disable these notifications.
119 | 
120 |          stream:
121 |            Status information will be written to this file handle. Defaults to
122 |            using `sys.stdout`, but any object with a `write` method is
123 |            supported.
124 |         """
125 |         logger.debug(f"Loading CSV to {self.model.__name__}")
126 |         if not silent:
127 |             stream.write(f"Loading CSV to {self.model.__name__}\n")
128 | 
129 |         # Connect to the database
130 |         with self.conn.cursor() as c:
131 |             self.create(c)
132 |             self.copy(c)
133 |             insert_count = self.insert(c)
134 |             self.drop(c)
135 | 
136 |         if not silent:
137 |             stream.write(f"{intcomma(insert_count)} records loaded\n")
138 | 
139 |         return insert_count
140 | 
141 |     def is_postgresql_9_5(self) -> bool:
142 |         pg_version = getattr(self.conn, "pg_version", 0)
143 |         return pg_version >= 90500
144 | 
145 |     def get_field(self, name: str) -> typing.Optional[Field]:
146 |         """
147 |         Returns any fields on the database model matching the provided name.
148 |         """
149 |         try:
150 |             return self.model._meta.get_field(name)
151 |         except FieldDoesNotExist:
152 |             return None
153 | 
154 |     def get_mapping(self, mapping: typing.Dict[str, str]) -> typing.Dict[str, str]:
155 |         """
156 |         Returns a generated mapping based on the CSV header
157 |         """
158 |         if mapping:
159 |             return OrderedDict(mapping)
160 |         return {name: name for name in self.headers}
161 | 
162 |     def get_headers(self) -> typing.List[str]:
163 |         """
164 |         Returns the column headers from the csv as a list.
165 |         """
166 |         logger.debug(f"Retrieving headers from {self.csv_file}")
167 | 
168 |         # Check if it's a text or binary file
169 |         is_binary = hasattr(self.csv_file, "mode") and "b" in getattr(
170 |             self.csv_file, "mode", ""
171 |         )
172 | 
173 |         if is_binary:
174 |             # For binary files, we need to wrap it in a TextIOWrapper
175 |             encoding = self.encoding or "utf-8"
176 |             text_file = TextIOWrapper(
177 |                 typing.cast(typing.BinaryIO, self.csv_file), encoding=encoding
178 |             )
179 |             csv_reader = csv.reader(text_file, delimiter=self.delimiter)
180 |             headers = next(csv_reader)
181 |             # Detach the wrapper so the file stays open
182 |             text_file.detach()
183 |         else:
184 |             # For text files or file-like objects without a mode attribute
185 |             try:
186 |                 # Try to read directly
187 |                 csv_reader = csv.reader(
188 |                     typing.cast(typing.Iterable[str], self.csv_file),
189 |                     delimiter=self.delimiter,
190 |                 )
191 |                 headers = next(csv_reader)
192 |             except (csv.Error, TypeError, AttributeError):
193 |                 # If that fails, try the binary approach as a fallback
194 |                 if hasattr(self.csv_file, "seek"):
195 |                     self.csv_file.seek(0)
196 |                 encoding = self.encoding or "utf-8"
197 |                 text_file = TextIOWrapper(
198 |                     typing.cast(typing.BinaryIO, self.csv_file), encoding=encoding
199 |                 )
200 |                 csv_reader = csv.reader(text_file, delimiter=self.delimiter)
201 |                 headers = next(csv_reader)
202 |                 text_file.detach()
203 | 
204 |         # Move back to the top of the file if possible
205 |         if hasattr(self.csv_file, "seek"):
206 |             self.csv_file.seek(0)
207 | 
208 |         return headers
209 | 
210 |     def validate_mapping(self) -> None:
211 |         """
212 |         Verify that the mapping provided by the user is acceptable.
213 | 
214 |         Raises errors if something goes wrong. Returns nothing if everything is kosher.
215 |         """
216 |         # Make sure all of the CSV headers in the mapping actually exist
217 |         for map_header in self.mapping.values():
218 |             if map_header not in self.headers:
219 |                 raise ValueError(f"Header '{map_header}' not found in CSV file")
220 | 
221 |         # Make sure all the model fields in the mapping actually exist
222 |         for map_field in self.mapping.keys():
223 |             if not self.get_field(map_field):
224 |                 raise FieldDoesNotExist(f"Model does not include {map_field} field")
225 | 
226 |         # Make sure any static mapping columns exist
227 |         for static_field in self.static_mapping.keys():
228 |             if not self.get_field(static_field):
229 |                 raise ValueError(f"Model does not include {static_field} field")
230 | 
231 |     #
232 |     # CREATE commands
233 |     #
234 | 
235 |     def prep_create(self) -> str:
236 |         """
237 |         Creates a CREATE statement that makes a new temporary table.
238 | 
239 |         Returns SQL that can be run.
240 |         """
241 |         sql = """CREATE TEMPORARY TABLE "%(table_name)s" (%(field_list)s);"""
242 |         options = dict(table_name=self.temp_table_name)
243 |         field_list = []
244 | 
245 |         # Loop through all the fields and CSV headers together
246 |         for header in self.headers:
247 |             # Format the SQL create statement
248 |             string = '"%s" text' % header
249 | 
250 |             # Add the string to the list
251 |             field_list.append(string)
252 | 
253 |         # Join all the field strings together
254 |         options["field_list"] = ", ".join(field_list)
255 | 
256 |         # Mash together the SQL and pass it out
257 |         return sql % options
258 | 
259 |     def create(self, cursor: CursorWrapper) -> None:
260 |         """
261 |         Generate and run create sql for the temp table.
262 |         Runs a DROP on same prior to CREATE to avoid collisions.
263 | 
264 |         cursor:
265 |           A cursor object on the db
266 |         """
267 |         logger.debug("Running CREATE command")
268 |         self.drop(cursor)
269 |         create_sql = self.prep_create()
270 |         logger.debug(create_sql)
271 |         cursor.execute(create_sql)
272 | 
273 |     #
274 |     # COPY commands
275 |     #
276 | 
277 |     def prep_copy(self) -> str:
278 |         """
279 |         Creates a COPY statement that loads the CSV into a temporary table.
280 | 
281 |         Returns SQL that can be run.
282 |         """
283 |         sql = """
284 |             COPY "%(db_table)s" (%(header_list)s)
285 |             FROM STDIN
286 |             WITH CSV HEADER %(extra_options)s;
287 |         """
288 |         options = {
289 |             "db_table": self.temp_table_name,
290 |             "extra_options": "",
291 |             "header_list": ", ".join([f'"{h}"' for h in self.headers]),
292 |         }
293 |         if self.quote_character:
294 |             options["extra_options"] += f" QUOTE '{self.quote_character}'"
295 |         if self.delimiter:
296 |             options["extra_options"] += f" DELIMITER '{self.delimiter}'"
297 |         if self.null is not None:
298 |             options["extra_options"] += f" NULL '{self.null}'"
299 |         if self.force_not_null is not None:
300 |             options["extra_options"] += " FORCE NOT NULL {}".format(
301 |                 ",".join(f'"{s}"' for s in self.force_not_null)
302 |             )
303 |         if self.force_null is not None:
304 |             options["extra_options"] += " FORCE NULL {}".format(
305 |                 ",".join('"%s"' % s for s in self.force_null)
306 |             )
307 |         if self.encoding:
308 |             options["extra_options"] += f" ENCODING '{self.encoding}'"
309 |         return sql % options
310 | 
311 |     def pre_copy(self, cursor: CursorWrapper) -> None:
312 |         pass
313 | 
314 |     def copy(self, cursor: CursorWrapper) -> None:
315 |         """
316 |         Generate and run the COPY command to copy data from csv to temp table.
317 | 
318 |         Calls `self.pre_copy(cursor)` and `self.post_copy(cursor)` respectively
319 |         before and after running copy
320 | 
321 |         cursor:
322 |           A cursor object on the db
323 |         """
324 |         # Run pre-copy hook
325 |         self.pre_copy(cursor)
326 | 
327 |         logger.debug("Running COPY command")
328 |         copy_sql = self.prep_copy()
329 |         logger.debug(copy_sql)
330 |         copy_from(
331 |             cursor,
332 |             copy_sql,
333 |             typing.cast(typing.Union[typing.TextIO, typing.BinaryIO], self.csv_file),
334 |         )
335 | 
336 |         # At this point all data has been loaded to the temp table
337 |         if hasattr(self.csv_file, "close"):
338 |             self.csv_file.close()
339 | 
340 |         # Run post-copy hook
341 |         self.post_copy(cursor)
342 | 
343 |     def post_copy(self, cursor: CursorWrapper) -> None:
344 |         pass
345 | 
346 |     #
347 |     # INSERT commands
348 |     #
349 | 
350 |     def insert_suffix(self) -> str:
351 |         """
352 |         Preps the suffix to the insert query.
353 |         """
354 |         if self.ignore_conflicts:
355 |             return """
356 |                 ON CONFLICT DO NOTHING;
357 |             """
358 |         else:
359 |             return ";"
360 | 
361 |     def prep_insert(self) -> str:
362 |         """
363 |         Creates a INSERT statement that reorders and cleans up
364 |         the fields from the temporary table for insertion into the
365 |         Django model.
366 | 
367 |         Returns SQL that can be run.
368 |         """
369 |         sql = """
370 |             INSERT INTO "%(model_table)s" (%(model_fields)s) (
371 |             SELECT %(temp_fields)s
372 |             FROM "%(temp_table)s")%(insert_suffix)s
373 |         """
374 |         options = dict(
375 |             model_table=self.model._meta.db_table,
376 |             temp_table=self.temp_table_name,
377 |             insert_suffix=self.insert_suffix(),
378 |         )
379 | 
380 |         #
381 |         # The model fields to be inserted into
382 |         #
383 | 
384 |         model_fields = []
385 |         for field_name in self.mapping.keys():
386 |             field = self.get_field(field_name)
387 |             if field is not None:
388 |                 model_fields.append('"%s"' % field.get_attname_column()[1])
389 | 
390 |         for k in self.static_mapping.keys():
391 |             model_fields.append('"%s"' % k)
392 | 
393 |         options["model_fields"] = ", ".join(model_fields)
394 | 
395 |         #
396 |         # The temp fields to SELECT from
397 |         #
398 | 
399 |         temp_fields = []
400 |         for field_name, header in self.mapping.items():
401 |             # Pull the field object from the model
402 |             field = self.get_field(field_name)
403 |             if field is not None:
404 |                 field_type = field.db_type(self.conn)
405 |                 if field_type in ["serial", "bigserial"]:
406 |                     field_type = "integer"
407 | 
408 |                 # Format the SQL
409 |                 string = f'cast("{header}" as {field_type})'
410 | 
411 |                 # Apply a datatype template override, if it exists
412 |                 if hasattr(field, "copy_template"):
413 |                     string = field.copy_template % dict(name=header)
414 | 
415 |                 # Apply a field specific template override, if it exists
416 |                 template_method = "copy_%s_template" % field.name
417 |                 if hasattr(self.model, template_method):
418 |                     template = getattr(self.model(), template_method)()
419 |                     string = template % dict(name=header)
420 | 
421 |             # Add field to list
422 |             temp_fields.append(string)
423 | 
424 |         # Tack on static fields
425 |         for v in self.static_mapping.values():
426 |             temp_fields.append("'%s'" % v)
427 | 
428 |         # Join it all together
429 |         options["temp_fields"] = ", ".join(temp_fields)
430 | 
431 |         # Pass it out
432 |         return sql % options
433 | 
434 |     def pre_insert(self, cursor: CursorWrapper) -> None:
435 |         pass
436 | 
437 |     def insert(self, cursor: CursorWrapper) -> int:
438 |         """
439 |         Generate and run the INSERT command to move data from the temp table
440 |         to the concrete table.
441 | 
442 |         Calls `self.pre_copy(cursor)` and `self.post_copy(cursor)` respectively
443 |         before and after running copy
444 | 
445 |         returns: the count of rows inserted
446 | 
447 |         cursor:
448 |           A cursor object on the db
449 |         """
450 |         # Pre-insert hook
451 |         self.pre_insert(cursor)
452 | 
453 |         logger.debug("Running INSERT command")
454 |         insert_sql = self.prep_insert()
455 |         logger.debug(insert_sql)
456 |         cursor.execute(insert_sql)
457 |         insert_count = cursor.rowcount
458 |         logger.debug(f"{insert_count} rows inserted")
459 | 
460 |         # Post-insert hook
461 |         self.post_insert(cursor)
462 | 
463 |         # Return the row count
464 |         return insert_count if isinstance(insert_count, int) else 0
465 | 
466 |     def post_insert(self, cursor: CursorWrapper) -> None:
467 |         pass
468 | 
469 |     #
470 |     # DROP commands
471 |     #
472 | 
473 |     def prep_drop(self) -> str:
474 |         """
475 |         Creates a DROP statement that gets rid of the temporary table.
476 | 
477 |         Return SQL that can be run.
478 |         """
479 |         return 'DROP TABLE IF EXISTS "%s";' % self.temp_table_name
480 | 
481 |     def drop(self, cursor: CursorWrapper) -> None:
482 |         """
483 |         Generate and run the DROP command for the temp table.
484 | 
485 |         cursor:
486 |           A cursor object on the db
487 |         """
488 |         logger.debug("Running DROP command")
489 |         drop_sql = self.prep_drop()
490 |         logger.debug(drop_sql)
491 |         cursor.execute(drop_sql)
492 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | ====================
  2 | django-postgres-copy
  3 | ====================
  4 | 
  5 | Quickly import and export delimited data with Django support for PostgreSQL's COPY command
  6 | 
  7 | .. contents:: Table of contents
  8 |     :depth: 1
  9 |     :local:
 10 | 
 11 | Why and what for?
 12 | =================
 13 | 
 14 | `The people <http://www.californiacivicdata.org/about/>`_ who made this library are data journalists. We are often downloading, cleaning and analyzing new data.
 15 | 
 16 | That means we write a load of loaders. In the past we did this by looping through each row and saving it to the database using the Django's ORM `create method <https://docs.djangoproject.com/en/dev/ref/models/querysets/#django.db.models.query.QuerySet.create>`_.
 17 | 
 18 | .. code-block:: python
 19 | 
 20 |     import csv
 21 |     from myapp.models import MyModel
 22 | 
 23 | 
 24 |     data = csv.DictReader(open("./data.csv"))
 25 |     for row in data:
 26 |         MyModel.objects.create(name=row["NAME"], number=row["NUMBER"])
 27 | 
 28 | That works, but if you have a big file Django will rack up a database query for each row. That can take a long time to finish.
 29 | 
 30 | Lucky for us, PostgreSQL has a built-in tool called `COPY <http://www.postgresql.org/docs/9.4/static/sql-copy.html>`_ that hammers data in and out the database with one quick query.
 31 | 
 32 | This package tries to make using COPY as easy as any other database routine supported by Django. It is implemented by a custom `model manager <https://docs.djangoproject.com/en/dev/topics/db/managers/>`_.
 33 | 
 34 | Here's how it imports a CSV to a database table.
 35 | 
 36 | .. code-block:: python
 37 | 
 38 |     from myapp.models import MyModel
 39 | 
 40 | 
 41 |     MyModel.objects.from_csv(
 42 |         "./data.csv",  # The path to a source file (a Python file object is also acceptable)
 43 |         dict(name="NAME", number="NUMBER"),  # A crosswalk of model fields to CSV headers.
 44 |     )
 45 | 
 46 | And here's how it exports a database table to a CSV.
 47 | 
 48 | .. code-block:: python
 49 | 
 50 |     from myapp.models import MyModel
 51 | 
 52 | 
 53 |     MyModel.objects.to_csv("./data.csv")
 54 | 
 55 | 
 56 | Installation
 57 | ============
 58 | 
 59 | The package can be installed from the Python Package Index with `pip`.
 60 | 
 61 | .. code-block:: bash
 62 | 
 63 |     $ pip install django-postgres-copy
 64 | 
 65 | You will have to have Django, PostgreSQL and an adapter between the two, such as `psycopg2 <http://initd.org/psycopg/docs/>`_ or `psycopg 3 <https://www.psycopg.org/psycopg3/>`_), installed to put this library to use.
 66 | 
 67 | 
 68 | An example
 69 | ==========
 70 | 
 71 | It all starts with a CSV file you'd like to load into your database. This library is intended to be used with large files but here's something simple as an example.
 72 | 
 73 | .. code-block:: text
 74 | 
 75 |     name,number,date
 76 |     ben,1,2012-01-01
 77 |     joe,2,2012-01-02
 78 |     jane,3,2012-01-03
 79 | 
 80 | A Django model that corresponds to the data might look something like this. It should have our custom manager attached.
 81 | 
 82 | .. code-block:: python
 83 |     :emphasize-lines: 2,9
 84 | 
 85 |     from django.db import models
 86 |     from postgres_copy import CopyManager
 87 | 
 88 | 
 89 |     class Person(models.Model):
 90 |         name = models.CharField(max_length=500)
 91 |         number = models.IntegerField(null=True)
 92 |         date = models.DateField(null=True)
 93 |         objects = CopyManager()
 94 | 
 95 | If the model hasn't been created in your database, that needs to happen.
 96 | 
 97 | .. code-block:: bash
 98 | 
 99 |     $ python manage.py migrate
100 | 
101 | 
102 | How to import data
103 | ------------------
104 | 
105 | Here's how to create a script to import CSV data into the model. Our favorite way to do this is to write a `custom Django management command <https://docs.djangoproject.com/en/2.2/howto/custom-management-commands/>`_.
106 | 
107 | .. code-block:: python
108 |     :emphasize-lines: 1,8-11
109 | 
110 |     from myapp.models import Person
111 |     from django.core.management.base import BaseCommand
112 | 
113 | 
114 |     class Command(BaseCommand):
115 | 
116 |         def handle(self, *args, **kwargs):
117 |             # Since the CSV headers match the model fields,
118 |             # you only need to provide the file's path (or a Python file object)
119 |             insert_count = Person.objects.from_csv("/path/to/my/import.csv")
120 |             print(f"{insert_count} records inserted")
121 | 
122 | Run your loader.
123 | 
124 | .. code-block:: bash
125 | 
126 |     $ python manage.py myimportcommand
127 | 
128 | 
129 | How to export data
130 | ------------------
131 | 
132 | .. code-block:: python
133 |     :emphasize-lines: 1,8-10
134 | 
135 |     from myapp.models import Person
136 |     from django.core.management.base import BaseCommand
137 | 
138 | 
139 |     class Command(BaseCommand):
140 | 
141 |         def handle(self, *args, **kwargs):
142 |             # All this method needs is the path to your CSV.
143 |             # (If you don't provide one, the method will return the CSV as a string.)
144 |             Person.objects.to_csv("/path/to/my/export.csv")
145 | 
146 | Run your exporter and that's it.
147 | 
148 | .. code-block:: bash
149 | 
150 |     $ python manage.py myexportcommand
151 | 
152 | That's it. You can even export your queryset after any filters or other tricks. This will work:
153 | 
154 | .. code-block:: python
155 | 
156 |     Person.objects.exclude(name="BEN").to_csv("/path/to/my/export.csv")
157 | 
158 | And so will something like this:
159 | 
160 | .. code-block:: python
161 | 
162 |     Person.objects.annotate(name_count=Count("name")).to_csv("/path/to/my/export.csv")
163 | 
164 | 
165 | Import options
166 | ==============
167 | 
168 | The ``from_csv`` manager method has the following arguments and keywords options. Returns the number of records added.
169 | 
170 | .. method:: from_csv(csv_path_or_obj[, mapping=None, drop_constraints=True, drop_indexes=True, using=None, delimiter=',', null=None, force_not_null=None, force_null=None, encoding=None, static_mapping=None, temp_table_name=None])
171 | 
172 | 
173 | ===================  =========================================================
174 | Argument             Description
175 | ===================  =========================================================
176 | ``csv_path_or_obj``  The path to the delimited data file, or a Python file
177 |                      object containing delimited data
178 | ===================  =========================================================
179 | 
180 | 
181 | =====================  =======================================================
182 | Keyword Argument       Description
183 | =====================  =======================================================
184 | ``mapping``            A (optional) dictionary: keys are strings corresponding
185 |                        to the model field, and values correspond to string
186 |                        field names for the CSV header. If not informed, the
187 |                        mapping is generated based on the CSV file header.
188 | 
189 | ``drop_constraints``   A boolean that indicates whether or not constraints
190 |                        on the table and fields and should be dropped prior to
191 |                        loading, then restored afterward. Default is True.
192 |                        This is done to boost speed.
193 | 
194 | ``drop_indexes``       A boolean that indicates whether or not indexes
195 |                        on the table and fields and should be dropped prior to
196 |                        loading, then restored afterward. Default is True.
197 |                        This is done to boost speed.
198 | 
199 | ``delimiter``          The character that separates values in the data file.
200 |                        By default  it is ",". This must be a single one-byte
201 |                        character.
202 | 
203 | ``quote_character``    Specifies the quoting character to be used when a
204 |                        data value is quoted. The default is double-quote.
205 |                        This must be a single one-byte character.
206 | 
207 | ``null``               Specifies the string that represents a null value.
208 |                        The default is an unquoted empty string. This must
209 |                        be a single one-byte character.
210 | 
211 | ``force_not_null``     Specifies which columns should ignore matches
212 |                        against the null string. Empty values in these columns
213 |                        will remain zero-length strings rather than becoming
214 |                        nulls. The default is None. If passed, this must be
215 |                        list of column names.
216 | 
217 | ``force_null``         Specifies which columns should register matches
218 |                        against the null string, even if it has been quoted.
219 |                        In the default case where the null string is empty,
220 |                        this converts a quoted empty string into NULL. The
221 |                        default is None. If passed, this must be list of
222 |                        column names.
223 | 
224 | ``encoding``           Specifies the character set encoding of the strings
225 |                        in the CSV data source.  For example, ``'latin-1'``,
226 |                        ``'utf-8'``, and ``'cp437'`` are all valid encoding
227 |                        parameters.
228 | 
229 | ``ignore_conflicts``   Specify True to ignore unique constraint or exclusion
230 |                        constraint violation errors. The default is False.
231 | 
232 | ``using``              Sets the database to use when importing data.
233 |                        Default is None, which will use the ``'default'``
234 |                        database.
235 | 
236 | ``static_mapping``     Set model attributes not in the CSV the same
237 |                        for every row in the database by providing a dictionary
238 |                        with the name of the columns as keys and the static
239 |                        inputs as values.
240 | 
241 | ``temp_table_name``    Set the name of the temporary database table name used
242 |                        to stage data during import. If not provided, a name
243 |                        will be generated on the fly. The generated name is
244 |                        not guaranteed to be unique, which could negatively
245 |                        impact parallel import operations.
246 | =====================  =======================================================
247 | 
248 | 
249 | Transforming data
250 | -----------------
251 | 
252 | By default, the COPY command cannot transform data on-the-fly as it is loaded into the database.
253 | 
254 | This library first loads the data into a temporary table before inserting all records into the model table. So it is possible to use PostgreSQL's built-in SQL methods to modify values during the insert.
255 | 
256 | As an example, imagine a CSV that includes a column of yes and no values that you wanted to store in the database as 1 or 0 in an integer field.
257 | 
258 | .. code-block:: text
259 | 
260 |     NAME,VALUE
261 |     ben,yes
262 |     joe,no
263 | 
264 | A model to store the data as you'd prefer to might look like this.
265 | 
266 | .. code-block:: python
267 | 
268 |     from django.db import models
269 |     from postgres_copy import CopyManager
270 | 
271 | 
272 |     class Person(models.Model):
273 |         name = models.CharField(max_length=500)
274 |         value = models.IntegerField()
275 |         objects = CopyManager()
276 | 
277 | But if the CSV file was loaded directly into the database, you would receive a data type error when the 'yes' and 'no' strings were inserted into the integer field.
278 | 
279 | This library offers two ways you can transform that data during the insert.
280 | 
281 | 
282 | Custom-field transformations
283 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
284 | 
285 | One approach is to create a custom Django field.
286 | 
287 | You can provide a SQL statement for how to transform the data during the insert into the model table. The transformation must include a string interpolation keyed to "name", where the title of the database column will be slotted.
288 | 
289 | This example uses a `CASE statement <http://www.postgresql.org/docs/9.4/static/plpgsql-control-structures.html>`_ to transforms the data.
290 | 
291 | .. code-block:: python
292 | 
293 |   from django.db.models.fields import IntegerField
294 | 
295 | 
296 |   class MyIntegerField(IntegerField):
297 |       copy_template = """
298 |           CASE
299 |               WHEN "%(name)s" = 'yes' THEN 1
300 |               WHEN "%(name)s" = 'no' THEN 0
301 |           END
302 |       """
303 | 
304 | Back in the models file the custom field can be substituted for the default.
305 | 
306 | .. code-block:: python
307 |     :emphasize-lines: 3,8
308 | 
309 |     from django.db import models
310 |     from postgres_copy import CopyManager
311 |     from myapp.fields import MyIntegerField
312 | 
313 | 
314 |     class Person(models.Model):
315 |         name = models.CharField(max_length=500)
316 |         value = MyIntegerField()
317 |         objects = CopyManager()
318 | 
319 | 
320 | Run your loader and it should finish fine.
321 | 
322 | 
323 | Model-method transformations
324 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
325 | 
326 | A second approach is to provide a SQL string for how to transform a field during the insert on the model itself. This lets you specify different transformations for different fields of the same type.
327 | 
328 | You must name the method so that the field name is sandwiched between ``copy_`` and ``_template``. It must return a SQL statement with a string interpolation keyed to "name", where the name of the database column will be slotted.
329 | 
330 | For the example above, the model might be modified to look like this.
331 | 
332 | .. code-block:: python
333 |     :emphasize-lines: 10-16
334 | 
335 |     from django.db import models
336 |     from postgres_copy import CopyManager
337 | 
338 | 
339 |     class Person(models.Model):
340 |         name = models.CharField(max_length=500)
341 |         value = models.IntegerField()
342 |         objects = CopyManager()
343 | 
344 |         def copy_value_template(self):
345 |             return """
346 |               CASE
347 |                   WHEN "%(name)s" = 'yes' THEN 1
348 |                   WHEN "%(name)s" = 'no' THEN 0
349 |               END
350 |               """
351 | 
352 | And that's it.
353 | 
354 | Here's another example of a common issue, transforming the CSV's date format to one PostgreSQL and Django will understand.
355 | 
356 | .. code-block:: python
357 | 
358 |         def copy_mydatefield_template(self):
359 |             return """
360 |                 CASE
361 |                     WHEN "%(name)s" = '' THEN NULL
362 |                     ELSE to_date("%(name)s", 'MM/DD/YYYY') /* The source CSV's date pattern can be set here. */
363 |                 END
364 |             """
365 | 
366 | It's important to handle empty strings (by converting them to NULL) in this example. PostgreSQL will accept empty strings, but Django won't be able to ingest the field and you'll get a strange "year out of range" error when you call something like ``MyModel.objects.all()``.
367 | 
368 | 
369 | Inserting static values
370 | -----------------------
371 | 
372 | If your model has columns that are not in the CSV, you can set static values for what is inserted using the ``static_mapping`` keyword argument. It will insert the provided values into every row in the database.
373 | 
374 | An example could be if you want to include the name of the source CSV file along with each row.
375 | 
376 | Your model might look like this:
377 | 
378 | .. code-block:: python
379 |     :emphasize-lines: 8
380 | 
381 |     from django.db import models
382 |     from postgres_copy import CopyManager
383 | 
384 | 
385 |     class Person(models.Model):
386 |         name = models.CharField(max_length=500)
387 |         number = models.IntegerField()
388 |         source_csv = models.CharField(max_length=500)
389 |         objects = CopyManager()
390 | 
391 | 
392 | And your loader would look like this:
393 | 
394 | .. code-block:: python
395 |     :emphasize-lines: 11-13
396 | 
397 |     from myapp.models import Person
398 |     from django.core.management.base import BaseCommand
399 | 
400 | 
401 |     class Command(BaseCommand):
402 | 
403 |         def handle(self, *args, **kwargs):
404 |             Person.objects.from_csv(
405 |                 "/path/to/my/data.csv",
406 |                 dict(name="NAME", number="NUMBER"),
407 |                 static_mapping={"source_csv": "data.csv"},
408 |             )
409 | 
410 | 
411 | Extending with hooks
412 | --------------------
413 | 
414 | The ``from_csv`` method connects with a lower level ``CopyMapping`` class with optional hooks that run before and after the COPY statement. They run first when the CSV is into a temporary table and then again before and after the INSERT statement that then slots data into your model's table.
415 | 
416 | If you have extra steps or more complicated logic you'd like to work into a loading routine, ``CopyMapping`` and its hooks provide an opportunity to extend the base library.
417 | 
418 | To try them out, subclass ``CopyMapping`` and fill in as many of the optional hook methods below as you need.
419 | 
420 | .. code-block:: python
421 | 
422 |     from postgres_copy import CopyMapping
423 | 
424 | 
425 |     class HookedCopyMapping(CopyMapping):
426 |         def pre_copy(self, cursor):
427 |             print("pre_copy!")
428 |             # Doing whatever you'd like here
429 | 
430 |         def post_copy(self, cursor):
431 |             print("post_copy!")
432 |             # And here
433 | 
434 |         def pre_insert(self, cursor):
435 |             print("pre_insert!")
436 |             # And here
437 | 
438 |         def post_insert(self, cursor):
439 |             print("post_insert!")
440 |             # And finally here
441 | 
442 | 
443 | Now you can run that subclass directly rather than via a manager. The only differences are that model is the first argument ``CopyMapping``, which creates an object that is executed with a call to its ``save`` method.
444 | 
445 | .. code-block:: python
446 |     :emphasize-lines: 2,9-16
447 | 
448 | 
449 |     from myapp.models import Person
450 |     from myapp.loaders import HookedCopyMapping
451 |     from django.core.management.base import BaseCommand
452 | 
453 | 
454 |     class Command(BaseCommand):
455 | 
456 |         def handle(self, *args, **kwargs):
457 |             # Note that we're using HookedCopyMapping here
458 |             c = HookedCopyMapping(
459 |                 Person,
460 |                 "/path/to/my/data.csv",
461 |                 dict(name="NAME", number="NUMBER"),
462 |             )
463 |             # Then save it.
464 |             c.save()
465 | 
466 | 
467 | Export options
468 | ==============
469 | 
470 | The ``to_csv`` manager method only requires one argument, the path to where the CSV should be exported. It also allows users to optionally limit or expand the fields written out by providing them as additional parameters. Other options allow for configuration of the output file.
471 | 
472 | .. method:: to_csv(csv_path [, *fields, delimiter=',', header=True, null=None, encoding=None, escape=None, quote=None, force_quote=None])
473 | 
474 | 
475 | =================  =========================================================
476 | Argument           Description
477 | =================  =========================================================
478 | ``csv_path``       The path to a file to write out the CSV. Also accepts
479 |                    file-like objects. Optional. If you don't provide one,
480 |                    the comma-delimited data is returned as a string.
481 | 
482 | ``fields``         Strings corresponding to the model fields to be exported.
483 |                    All fields on the model are exported by default. Fields
484 |                    on related models can be included with Django's double
485 |                    underscore notation. Optional.
486 | 
487 | ``delimiter``      String that will be used as a delimiter for the CSV
488 |                    file. Optional.
489 | 
490 | ``header``         Boolean determines if the header should be exported.
491 |                    Optional.
492 | 
493 | ``null``           String to populate exported null values with. Default
494 |                    is an empty string. Optional.
495 | 
496 | ``encoding``       The character encoding that should be used for the file
497 |                    being written. Optional.
498 | 
499 | ``escape``         The escape character to be used. Optional.
500 | 
501 | ``quote``          The quote character to be used. Optional.
502 | 
503 | ``force_quote``    Force fields to be quoted in the CSV. Default is None.
504 |                    A field name or list of field names can be submitted.
505 |                    Pass in True or "*" to quote all fields. Optional.
506 | =================  =========================================================
507 | 
508 | 
509 | Reducing the exported fields
510 | ----------------------------
511 | 
512 | You can reduce the number of fields exported by providing the ones you want as a list to the ``to_csv`` method.
513 | 
514 | Your model might look like this:
515 | 
516 | .. code-block:: python
517 | 
518 |     from django.db import models
519 |     from postgres_copy import CopyManager
520 | 
521 | 
522 |     class Person(models.Model):
523 |         name = models.CharField(max_length=500)
524 |         number = models.IntegerField()
525 |         objects = CopyManager()
526 | 
527 | You could export only the name field by providing it as an extra parameter.
528 | 
529 | .. code-block:: python
530 |     :emphasize-lines: 10
531 | 
532 |     from myapp.models import Person
533 |     from django.core.management.base import BaseCommand
534 | 
535 | 
536 |     class Command(BaseCommand):
537 | 
538 |         def handle(self, *args, **kwargs):
539 |             Person.objects.to_csv("/path/to/my/export.csv", "name")
540 | 
541 | 
542 | Increasing the exported fields
543 | ------------------------------
544 | 
545 | In cases where your model is connected to other tables with a foreign key, you can increase the number of fields exported to included related tables using Django's double underscore notation.
546 | 
547 | Your models might look like this:
548 | 
549 | .. code-block:: python
550 | 
551 |     from django.db import models
552 |     from postgres_copy import CopyManager
553 | 
554 | 
555 |     class Hometown(models.Model):
556 |         name = models.CharField(max_length=500)
557 |         objects = CopyManager()
558 | 
559 | 
560 |     class Person(models.Model):
561 |         name = models.CharField(max_length=500)
562 |         number = models.IntegerField()
563 |         hometown = models.ForeignKey(Hometown)
564 |         objects = CopyManager()
565 | 
566 | You can reach across to related tables during an export by adding their fields to the export method.
567 | 
568 | 
569 | .. code-block:: python
570 |     :emphasize-lines: 12
571 | 
572 |     from myapp.models import Person
573 |     from django.core.management.base import BaseCommand
574 | 
575 | 
576 |     class Command(BaseCommand):
577 | 
578 |         def handle(self, *args, **kwargs):
579 |             Person.objects.to_csv(
580 |                 "/path/to/my/export.csv", "name", "number", "hometown__name"
581 |             )
582 | 
583 | 
584 | Open-source resources
585 | =====================
586 | 
587 | * Code: `github.com/palewire/django-postgres-copy <https://github.com/palewire/django-postgres-copy>`_
588 | * Issues: `github.com/palewire/django-postgres-copy/issues <https://github.com/palewire/django-postgres-copy/issues>`_
589 | * Packaging: `pypi.python.org/pypi/django-postgres-copy <https://pypi.python.org/pypi/django-postgres-copy>`_
590 | * Testing: `github.com/palewire/django-postgres-copy/actions <https://github.com/palewire/django-postgres-copy/actions/workflows/test.yaml>`_
591 | 


--------------------------------------------------------------------------------
/tests/test_queries.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import io
  3 | import os
  4 | from datetime import date
  5 | from unittest import mock
  6 | 
  7 | import pytest
  8 | from django.core.exceptions import FieldDoesNotExist
  9 | from django.db import transaction
 10 | from django.db.models import Count
 11 | from django.db.transaction import TransactionManagementError
 12 | from django.test import TestCase
 13 | 
 14 | from postgres_copy import CopyMapping
 15 | 
 16 | from .models import (
 17 |     ExtendedMockObject,
 18 |     HookedCopyMapping,
 19 |     LimitedMockObject,
 20 |     MockBlankObject,
 21 |     MockFKObject,
 22 |     MockObject,
 23 |     OverloadMockObject,
 24 |     SecondaryMockObject,
 25 |     UniqueMockObject,
 26 | )
 27 | 
 28 | try:
 29 |     from psycopg.errors import Error
 30 | except ImportError:
 31 |     from psycopg2.errors import Error
 32 | 
 33 | 
 34 | class BaseTest(TestCase):
 35 |     databases = ["default", "sqlite", "other", "secondary"]
 36 | 
 37 |     def setUp(self):
 38 |         self.data_dir = os.path.join(os.path.dirname(__file__), "data")
 39 |         self.name_path = os.path.join(self.data_dir, "names.csv")
 40 |         self.foreign_path = os.path.join(self.data_dir, "foreignkeys.csv")
 41 |         self.pipe_path = os.path.join(self.data_dir, "pipes.csv")
 42 |         self.quote_path = os.path.join(self.data_dir, "quote.csv")
 43 |         self.blank_null_path = os.path.join(self.data_dir, "blanknulls.csv")
 44 |         self.null_path = os.path.join(self.data_dir, "nulls.csv")
 45 |         self.backwards_path = os.path.join(self.data_dir, "backwards.csv")
 46 |         self.matching_headers_path = os.path.join(self.data_dir, "matching_headers.csv")
 47 |         self.secondarydb_path = os.path.join(self.data_dir, "secondary_db.csv")
 48 | 
 49 |     def tearDown(self):
 50 |         MockObject.objects.all().delete()
 51 |         MockFKObject.objects.all().delete()
 52 |         ExtendedMockObject.objects.all().delete()
 53 |         LimitedMockObject.objects.all().delete()
 54 |         OverloadMockObject.objects.all().delete()
 55 |         SecondaryMockObject.objects.all().delete()
 56 | 
 57 | 
 58 | class PostgresCopyToTest(BaseTest):
 59 |     def setUp(self):
 60 |         super().setUp()
 61 |         self.export_path = os.path.join(os.path.dirname(__file__), "export.csv")
 62 |         self.export_files = [io.StringIO(), io.BytesIO()]
 63 | 
 64 |     def tearDown(self):
 65 |         super().tearDown()
 66 |         if os.path.exists(self.export_path):
 67 |             os.remove(self.export_path)
 68 | 
 69 |     def _load_objects(
 70 |         self, file_path, mapping=dict(name="NAME", number="NUMBER", dt="DATE")
 71 |     ):
 72 |         MockObject.objects.from_csv(file_path, mapping)
 73 | 
 74 |     def _load_secondary_objects(self, file_path, mapping=dict(text="TEXT")):
 75 |         SecondaryMockObject.objects.from_csv(file_path, mapping)
 76 | 
 77 |     # These tests are using simple enough databases that they can safely proceed
 78 |     # with uploading objects from CSV despite being within a transaction block.
 79 |     # In particular, Django wraps all tests in a transaction so that database
 80 |     # changes can be rolled back.  Therefore, we bypass validate_no_atomic_block
 81 |     # here and elsewhere.
 82 |     @mock.patch("django.db.connection.validate_no_atomic_block")
 83 |     def test_export(self, _):
 84 |         self._load_objects(self.name_path)
 85 |         MockObject.objects.to_csv(self.export_path)
 86 |         self.assertTrue(os.path.exists(self.export_path))
 87 |         reader = csv.DictReader(open(self.export_path))
 88 |         self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader])
 89 | 
 90 |     @mock.patch("django.db.connection.validate_no_atomic_block")
 91 |     def test_export_to_file(self, _):
 92 |         self._load_objects(self.name_path)
 93 |         for f in self.export_files:
 94 |             MockObject.objects.to_csv(f)
 95 |             reader = csv.DictReader(f)
 96 |             self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader])
 97 | 
 98 |     @mock.patch("django.db.connection.validate_no_atomic_block")
 99 |     def test_export_to_str(self, _):
100 |         self._load_objects(self.name_path)
101 |         first_id = MockObject.objects.order_by("id").first().id
102 |         export = MockObject.objects.to_csv()
103 |         self.assertEqual(
104 |             export,
105 |             f"""id,name,num,dt,parent_id
106 | {first_id},BEN,1,2012-01-01,
107 | {first_id + 1},JOE,2,2012-01-02,
108 | {first_id + 2},JANE,3,2012-01-03,
109 | """.encode(),
110 |         )
111 | 
112 |     @mock.patch("django.db.connection.validate_no_atomic_block")
113 |     def test_export_header_setting(self, _):
114 |         self._load_objects(self.name_path)
115 |         MockObject.objects.to_csv(self.export_path)
116 |         reader = csv.DictReader(open(self.export_path))
117 |         self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader])
118 |         MockObject.objects.to_csv(self.export_path, header=True)
119 |         reader = csv.DictReader(open(self.export_path))
120 |         self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader])
121 |         MockObject.objects.to_csv(self.export_path, header=False)
122 |         reader = csv.DictReader(open(self.export_path))
123 |         with self.assertRaises(KeyError):
124 |             [i["name"] for i in reader]
125 |         self.assertTrue(["JOE", "JANE"], [i["BEN"] for i in reader])
126 | 
127 |     @mock.patch("django.db.connection.validate_no_atomic_block")
128 |     def test_export_delimiter(self, _):
129 |         self._load_objects(self.name_path)
130 |         MockObject.objects.to_csv(self.export_path, delimiter=";")
131 |         self.assertTrue(os.path.exists(self.export_path))
132 |         reader = csv.DictReader(open(self.export_path), delimiter=";")
133 |         self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader])
134 | 
135 |     @mock.patch("django.db.connection.validate_no_atomic_block")
136 |     def test_export_null_string(self, _):
137 |         self._load_objects(self.blank_null_path)
138 |         MockObject.objects.to_csv(self.export_path)
139 |         self.assertTrue(os.path.exists(self.export_path))
140 |         reader = csv.DictReader(open(self.export_path))
141 |         self.assertTrue(["1", "2", "3", "", ""], [i["num"] for i in reader])
142 | 
143 |         MockObject.objects.to_csv(self.export_path, null="NULL")
144 |         self.assertTrue(os.path.exists(self.export_path))
145 |         reader = csv.DictReader(open(self.export_path))
146 |         self.assertTrue(["1", "2", "3", "NULL", ""], [i["num"] for i in reader])
147 | 
148 |     @mock.patch("django.db.connection.validate_no_atomic_block")
149 |     def test_export_quote_character_and_force_quoting(self, _):
150 |         self._load_objects(self.name_path)
151 | 
152 |         # Single column being force_quoted with pipes
153 |         MockObject.objects.to_csv(self.export_path, quote="|", force_quote="NAME")
154 |         self.assertTrue(os.path.exists(self.export_path))
155 |         reader = csv.DictReader(open(self.export_path))
156 |         self.assertTrue(["|BEN|", "|JOE|", "|JANE|"], [i["name"] for i in reader])
157 | 
158 |         # Multiple columns passed as a list and force_quoted with pipes
159 |         MockObject.objects.to_csv(
160 |             self.export_path, quote="|", force_quote=["NAME", "DT"]
161 |         )
162 |         self.assertTrue(os.path.exists(self.export_path))
163 |         reader = csv.DictReader(open(self.export_path))
164 |         self.assertTrue(
165 |             [
166 |                 ("|BEN|", "|2012-01-01|"),
167 |                 ("|JOE|", "|2012-01-02|"),
168 |                 ("|JANE|", "|2012-01-03|"),
169 |             ],
170 |             [(i["name"], i["dt"]) for i in reader],
171 |         )
172 | 
173 |         # All columns force_quoted with pipes
174 |         MockObject.objects.to_csv(self.export_path, quote="|", force_quote=True)
175 |         self.assertTrue(os.path.exists(self.export_path))
176 |         reader = csv.DictReader(open(self.export_path))
177 |         reader = next(reader)
178 |         self.assertTrue(["|BEN|", "|1|", "|2012-01-01|"], list(reader.values())[1:])
179 | 
180 |     @mock.patch("django.db.connection.validate_no_atomic_block")
181 |     def test_export_encoding(self, _):
182 |         self._load_objects(self.name_path)
183 | 
184 |         # Function should pass on valid inputs ('utf-8', 'Unicode', 'LATIN2')
185 |         # If these don't raise an error, then they passed nicely
186 |         MockObject.objects.to_csv(self.export_path, encoding="utf-8")
187 |         MockObject.objects.to_csv(self.export_path, encoding="Unicode")
188 |         MockObject.objects.to_csv(self.export_path, encoding="LATIN2")
189 | 
190 |         # Function should fail on known invalid inputs ('ASCII', 'utf-16')
191 |         with pytest.raises(Error) as exc_info:
192 |             # since `to_csv` causes a db error we need an atomic block to make
193 |             # sure the db connection is restored, so that e.g. the next
194 |             # assertion and our teardown can run
195 |             with transaction.atomic():
196 |                 MockObject.objects.to_csv(self.export_path, encoding="utf-16")
197 |         assert "must be a valid encoding" in str(exc_info.value)
198 | 
199 |         with pytest.raises(Error) as exc_info2:
200 |             # since `to_csv` causes a db error we need an atomic block to make
201 |             # sure the db connection is restored, so that e.g. our teardown
202 |             # can run
203 |             with transaction.atomic():
204 |                 MockObject.objects.to_csv(self.export_path, encoding="ASCII")
205 |         assert "must be a valid encoding" in str(exc_info2.value)
206 | 
207 |     @mock.patch("django.db.connection.validate_no_atomic_block")
208 |     def test_export_escape_character(self, _):
209 |         self._load_objects(self.name_path)
210 | 
211 |         # Function should not fail on known valid inputs
212 |         MockObject.objects.to_csv(self.export_path, escape="-")
213 | 
214 |         # Function should fail on known invalid inputs
215 |         with pytest.raises(Error) as exc_info:
216 |             # since `to_csv` causes a db error we need an atomic block to make
217 |             # sure the db connection is restored, so that e.g. our teardown
218 |             # can run
219 |             with transaction.atomic():
220 |                 MockObject.objects.to_csv(self.export_path, escape="--")
221 |         assert "escape must be a single" in str(exc_info.value)
222 | 
223 |     @mock.patch("django.db.connection.validate_no_atomic_block")
224 |     def test_filter(self, _):
225 |         self._load_objects(self.name_path)
226 |         MockObject.objects.filter(name="BEN").to_csv(self.export_path)
227 |         reader = csv.DictReader(open(self.export_path))
228 |         self.assertTrue(["BEN"], [i["name"] for i in reader])
229 | 
230 |     @mock.patch("django.db.connection.validate_no_atomic_block")
231 |     def test_fewer_fields(self, _):
232 |         self._load_objects(self.name_path)
233 |         MockObject.objects.to_csv(self.export_path, "name")
234 |         reader = csv.DictReader(open(self.export_path))
235 |         for row in reader:
236 |             self.assertTrue(row["name"] in ["BEN", "JOE", "JANE"])
237 |             self.assertTrue(len(row.keys()), 1)
238 | 
239 |     @mock.patch("django.db.connection.validate_no_atomic_block")
240 |     def test_related_fields(self, _):
241 |         MockFKObject.objects.from_csv(
242 |             self.foreign_path,
243 |             mapping=dict(
244 |                 id="NUMBER", name="NAME", number="NUMBER", dt="DATE", parent="PARENT"
245 |             ),
246 |         )
247 |         MockFKObject.objects.to_csv(
248 |             self.export_path, "name", "parent__id", "parent__name"
249 |         )
250 |         reader = csv.DictReader(open(self.export_path))
251 |         for row in reader:
252 |             self.assertTrue(row["parent_id"] in ["1", "2", "3"])
253 |             self.assertTrue(len(row.keys()), 3)
254 | 
255 |     @mock.patch("django.db.connection.validate_no_atomic_block")
256 |     def test_annotate(self, _):
257 |         self._load_objects(self.name_path)
258 |         MockObject.objects.annotate(name_count=Count("name")).to_csv(self.export_path)
259 |         reader = csv.DictReader(open(self.export_path))
260 |         for row in reader:
261 |             self.assertTrue("name_count" in row)
262 |             self.assertTrue(row["name_count"] == "1")
263 | 
264 |     @mock.patch("django.db.connection.validate_no_atomic_block")
265 |     def test_extra(self, _):
266 |         self._load_objects(self.name_path)
267 |         MockObject.objects.extra(select={"lower": 'LOWER("name")'}).to_csv(
268 |             self.export_path
269 |         )
270 |         reader = csv.DictReader(open(self.export_path))
271 |         for row in reader:
272 |             self.assertTrue("lower" in row)
273 | 
274 |     @mock.patch("django.db.connection.validate_no_atomic_block")
275 |     def test_export_multi_db(self, _):
276 |         self._load_objects(self.name_path)
277 |         self._load_secondary_objects(self.secondarydb_path)
278 | 
279 |         MockObject.objects.to_csv(self.export_path)
280 |         self.assertTrue(os.path.exists(self.export_path))
281 |         reader = csv.DictReader(open(self.export_path))
282 |         self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader])
283 | 
284 |         SecondaryMockObject.objects.to_csv(self.export_path)
285 |         self.assertTrue(os.path.exists(self.export_path))
286 |         reader = csv.DictReader(open(self.export_path))
287 |         items = [i["text"] for i in reader]
288 |         self.assertEqual(len(items), 3)
289 |         self.assertEqual(
290 |             ["SECONDARY TEXT 1", "SECONDARY TEXT 2", "SECONDARY TEXT 3"], items
291 |         )
292 | 
293 | 
294 | class PostgresCopyFromTest(BaseTest):
295 |     def test_bad_call(self):
296 |         with self.assertRaises(TypeError):
297 |             CopyMapping()
298 | 
299 |     def test_bad_csv(self):
300 |         with self.assertRaises(ValueError):
301 |             CopyMapping(
302 |                 MockObject,
303 |                 "/foobar.csv",
304 |                 dict(name="NAME", number="NUMBER", dt="DATE"),
305 |                 using="sqlite",
306 |             )
307 | 
308 |     def test_bad_backend(self):
309 |         with self.assertRaises(TypeError):
310 |             CopyMapping(
311 |                 MockObject,
312 |                 self.name_path,
313 |                 dict(name="NAME", number="NUMBER", dt="DATE"),
314 |                 using="sqlite",
315 |             )
316 | 
317 |     def test_bad_header(self):
318 |         with self.assertRaises(ValueError):
319 |             CopyMapping(
320 |                 MockObject,
321 |                 self.name_path,
322 |                 dict(name="NAME1", number="NUMBER", dt="DATE"),
323 |             )
324 | 
325 |     def test_bad_field(self):
326 |         with self.assertRaises(FieldDoesNotExist):
327 |             CopyMapping(
328 |                 MockObject,
329 |                 self.name_path,
330 |                 dict(name1="NAME", number="NUMBER", dt="DATE"),
331 |             )
332 | 
333 |     def test_limited_fields(self):
334 |         CopyMapping(
335 |             LimitedMockObject,
336 |             self.name_path,
337 |             dict(name="NAME", dt="DATE"),
338 |         )
339 | 
340 |     @mock.patch("django.db.connection.validate_no_atomic_block")
341 |     def test_simple_save_with_fileobject(self, _):
342 |         f = open(self.name_path)
343 |         MockObject.objects.from_csv(f, dict(name="NAME", number="NUMBER", dt="DATE"))
344 |         self.assertEqual(MockObject.objects.count(), 3)
345 |         self.assertEqual(MockObject.objects.get(name="BEN").number, 1)
346 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
347 | 
348 |     @mock.patch("django.db.connection.validate_no_atomic_block")
349 |     def test_save_with_binary_fileobject(self, _):
350 |         f = open(self.name_path, "rb")
351 |         MockObject.objects.from_csv(f, dict(name="NAME", number="NUMBER", dt="DATE"))
352 |         self.assertEqual(MockObject.objects.count(), 3)
353 |         self.assertEqual(MockObject.objects.get(name="BEN").number, 1)
354 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
355 | 
356 |     def test_atomic_block(self):
357 |         with transaction.atomic():
358 |             try:
359 |                 f = open(self.name_path)
360 |                 MockObject.objects.from_csv(
361 |                     f, dict(name="NAME", number="NUMBER", dt="DATE")
362 |                 )
363 |                 self.fail("Expected TransactionManagementError.")
364 |             except TransactionManagementError:
365 |                 # Expected
366 |                 pass
367 | 
368 |     @mock.patch("django.db.connection.validate_no_atomic_block")
369 |     def test_simple_save(self, _):
370 |         insert_count = MockObject.objects.from_csv(
371 |             self.name_path, dict(name="NAME", number="NUMBER", dt="DATE")
372 |         )
373 |         self.assertEqual(MockObject.objects.count(), 3)
374 |         self.assertEqual(MockObject.objects.get(name="BEN").number, 1)
375 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
376 |         self.assertEqual(insert_count, 3)
377 | 
378 |     @mock.patch("django.db.connection.validate_no_atomic_block")
379 |     def test_loud_save(self, _):
380 |         MockObject.objects.from_csv(
381 |             self.name_path,
382 |             mapping=dict(name="NAME", number="NUMBER", dt="DATE"),
383 |             silent=False,
384 |         )
385 | 
386 |     @mock.patch("django.db.connection.validate_no_atomic_block")
387 |     def test_match_heading(self, _):
388 |         MockObject.objects.from_csv(self.matching_headers_path)
389 |         self.assertEqual(MockObject.objects.count(), 3)
390 |         self.assertEqual(MockObject.objects.get(name="BEN").number, 1)
391 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
392 | 
393 |     @mock.patch("django.db.connection.validate_no_atomic_block")
394 |     def test_bad_match_heading(self, _):
395 |         with self.assertRaises(FieldDoesNotExist):
396 |             MockObject.objects.from_csv(self.name_path)
397 | 
398 |     @mock.patch("django.db.connection.validate_no_atomic_block")
399 |     def test_limited_save(self, _):
400 |         LimitedMockObject.objects.from_csv(self.name_path, dict(name="NAME", dt="DATE"))
401 |         self.assertEqual(LimitedMockObject.objects.count(), 3)
402 |         self.assertEqual(LimitedMockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
403 | 
404 |     @mock.patch("django.db.connection.validate_no_atomic_block")
405 |     def test_save_foreign_key(self, _):
406 |         MockFKObject.objects.from_csv(
407 |             self.foreign_path,
408 |             dict(id="NUMBER", name="NAME", number="NUMBER", dt="DATE", parent="PARENT"),
409 |         )
410 |         self.assertEqual(MockFKObject.objects.count(), 3)
411 |         self.assertEqual(MockFKObject.objects.get(name="BEN").parent_id, 3)
412 |         self.assertEqual(MockFKObject.objects.get(name="BEN").dt, date(2012, 1, 1))
413 | 
414 |     @mock.patch("django.db.connection.validate_no_atomic_block")
415 |     def test_save_foreign_key_by_id(self, _):
416 |         MockFKObject.objects.from_csv(
417 |             self.foreign_path,
418 |             dict(
419 |                 id="NUMBER", name="NAME", number="NUMBER", dt="DATE", parent_id="PARENT"
420 |             ),
421 |         )
422 |         self.assertEqual(MockFKObject.objects.count(), 3)
423 |         self.assertEqual(MockFKObject.objects.get(name="BEN").parent_id, 3)
424 |         self.assertEqual(MockFKObject.objects.get(name="BEN").dt, date(2012, 1, 1))
425 | 
426 |     @mock.patch("django.db.connection.validate_no_atomic_block")
427 |     def test_save_pk_field_type(self, _):
428 |         # Django casts PK fields to "serial"
429 |         MockObject.objects.from_csv(
430 |             self.name_path,
431 |             dict(id="NUMBER", name="NAME", dt="DATE"),
432 |         )
433 |         self.assertEqual(MockObject.objects.count(), 3)
434 | 
435 |     def test_silent_save(self):
436 |         c = CopyMapping(
437 |             MockObject,
438 |             self.name_path,
439 |             dict(name="NAME", number="NUMBER", dt="DATE"),
440 |         )
441 |         c.save(silent=True)
442 |         self.assertEqual(MockObject.objects.count(), 3)
443 |         self.assertEqual(MockObject.objects.get(name="BEN").number, 1)
444 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
445 | 
446 |     @mock.patch("django.db.connection.validate_no_atomic_block")
447 |     def test_pipe_save(self, _):
448 |         MockObject.objects.from_csv(
449 |             self.pipe_path,
450 |             dict(name="NAME", number="NUMBER", dt="DATE"),
451 |             delimiter="|",
452 |         )
453 |         self.assertEqual(MockObject.objects.count(), 3)
454 |         self.assertEqual(MockObject.objects.get(name="BEN").number, 1)
455 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
456 | 
457 |     @mock.patch("django.db.connection.validate_no_atomic_block")
458 |     def test_quote_save(self, _):
459 |         MockObject.objects.from_csv(
460 |             self.quote_path,
461 |             dict(name="NAME", number="NUMBER", dt="DATE"),
462 |             delimiter="\t",
463 |             quote_character="`",
464 |         )
465 |         self.assertEqual(MockObject.objects.count(), 3)
466 |         self.assertEqual(MockObject.objects.get(number=1).name, "B`EN")
467 |         self.assertEqual(MockObject.objects.get(number=2).name, "JO\tE")
468 |         self.assertEqual(MockObject.objects.get(number=3).name, 'JAN"E')
469 | 
470 |     @mock.patch("django.db.connection.validate_no_atomic_block")
471 |     def test_null_save(self, _):
472 |         MockObject.objects.from_csv(
473 |             self.null_path,
474 |             dict(name="NAME", number="NUMBER", dt="DATE"),
475 |             null="",
476 |         )
477 |         self.assertEqual(MockObject.objects.count(), 5)
478 |         self.assertEqual(MockObject.objects.get(name="BEN").number, 1)
479 |         self.assertEqual(MockObject.objects.get(name="NULLBOY").number, None)
480 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
481 | 
482 |     @mock.patch("django.db.connection.validate_no_atomic_block")
483 |     def test_force_not_null_save(self, _):
484 |         MockBlankObject.objects.from_csv(
485 |             self.blank_null_path,
486 |             dict(name="NAME", number="NUMBER", dt="DATE", color="COLOR"),
487 |             force_not_null=("COLOR",),
488 |         )
489 |         self.assertEqual(MockBlankObject.objects.count(), 5)
490 |         self.assertEqual(MockBlankObject.objects.get(name="BEN").color, "red")
491 |         self.assertEqual(MockBlankObject.objects.get(name="NULLBOY").color, "")
492 |         self.assertEqual(MockBlankObject.objects.get(name="BEN").dt, date(2012, 1, 1))
493 | 
494 |     @mock.patch("django.db.connection.validate_no_atomic_block")
495 |     def test_force_null_save(self, _):
496 |         MockObject.objects.from_csv(
497 |             self.null_path,
498 |             dict(name="NAME", number="NUMBER", dt="DATE"),
499 |             force_null=("NUMBER",),
500 |         )
501 |         self.assertEqual(MockObject.objects.count(), 5)
502 |         self.assertEqual(MockObject.objects.get(name="BEN").number, 1)
503 |         self.assertEqual(MockObject.objects.get(name="NULLBOY").number, None)
504 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
505 | 
506 |     @mock.patch("django.db.connection.validate_no_atomic_block")
507 |     def test_backwards_save(self, _):
508 |         MockObject.objects.from_csv(
509 |             self.backwards_path,
510 |             dict(name="NAME", number="NUMBER", dt="DATE"),
511 |         )
512 |         self.assertEqual(MockObject.objects.count(), 3)
513 |         self.assertEqual(MockObject.objects.get(name="BEN").number, 1)
514 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
515 | 
516 |     @mock.patch("django.db.connection.validate_no_atomic_block")
517 |     def test_field_override_save(self, _):
518 |         MockObject.objects.from_csv(
519 |             self.null_path,
520 |             dict(name="NAME", number="NUMBER", dt="DATE"),
521 |         )
522 |         self.assertEqual(MockObject.objects.count(), 5)
523 |         self.assertEqual(MockObject.objects.get(name="BADBOY").number, None)
524 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
525 | 
526 |     @mock.patch("django.db.connection.validate_no_atomic_block")
527 |     def test_encoding_save(self, _):
528 |         MockObject.objects.from_csv(
529 |             self.null_path,
530 |             dict(name="NAME", number="NUMBER", dt="DATE"),
531 |             encoding="UTF-8",
532 |         )
533 |         self.assertEqual(MockObject.objects.count(), 5)
534 |         self.assertEqual(MockObject.objects.get(name="BADBOY").number, None)
535 |         self.assertEqual(MockObject.objects.get(name="BEN").dt, date(2012, 1, 1))
536 | 
537 |     @mock.patch("django.db.connection.validate_no_atomic_block")
538 |     def test_ignore_conflicts(self, _):
539 |         UniqueMockObject.objects.from_csv(
540 |             self.name_path, dict(name="NAME"), ignore_conflicts=True
541 |         )
542 |         UniqueMockObject.objects.from_csv(
543 |             self.name_path, dict(name="NAME"), ignore_conflicts=True
544 |         )
545 | 
546 |     @mock.patch("django.db.connection.validate_no_atomic_block")
547 |     def test_static_values(self, _):
548 |         ExtendedMockObject.objects.from_csv(
549 |             self.name_path,
550 |             dict(name="NAME", number="NUMBER", dt="DATE"),
551 |             static_mapping=dict(static_val=1, static_string="test"),
552 |         )
553 |         self.assertEqual(ExtendedMockObject.objects.filter(static_val=1).count(), 3)
554 |         self.assertEqual(
555 |             ExtendedMockObject.objects.filter(static_string="test").count(), 3
556 |         )
557 | 
558 |     @mock.patch("django.db.connection.validate_no_atomic_block")
559 |     def test_bad_static_values(self, _):
560 |         with self.assertRaises(ValueError):
561 |             ExtendedMockObject.objects.from_csv(
562 |                 self.name_path,
563 |                 dict(name="NAME", number="NUMBER", dt="DATE"),
564 |                 encoding="UTF-8",
565 |                 static_mapping=dict(static_bad=1),
566 |             )
567 | 
568 |     @mock.patch("django.db.connection.validate_no_atomic_block")
569 |     def test_overload_save(self, _):
570 |         OverloadMockObject.objects.from_csv(
571 |             self.name_path,
572 |             dict(
573 |                 name="NAME",
574 |                 lower_name="NAME",
575 |                 upper_name="NAME",
576 |                 number="NUMBER",
577 |                 dt="DATE",
578 |             ),
579 |         )
580 |         self.assertEqual(OverloadMockObject.objects.count(), 3)
581 |         self.assertEqual(OverloadMockObject.objects.get(name="ben").number, 1)
582 |         self.assertEqual(OverloadMockObject.objects.get(lower_name="ben").number, 1)
583 |         self.assertEqual(OverloadMockObject.objects.get(upper_name="BEN").number, 1)
584 |         self.assertEqual(
585 |             OverloadMockObject.objects.get(name="ben").dt, date(2012, 1, 1)
586 |         )
587 |         omo = OverloadMockObject.objects.first()
588 |         self.assertEqual(omo.name.lower(), omo.lower_name)
589 | 
590 |     def test_missing_overload_field(self):
591 |         with self.assertRaises(FieldDoesNotExist):
592 |             CopyMapping(
593 |                 OverloadMockObject,
594 |                 self.name_path,
595 |                 dict(name="NAME", number="NUMBER", dt="DATE", missing="NAME"),
596 |             )
597 | 
598 |     def test_save_steps(self):
599 |         c = CopyMapping(
600 |             MockObject,
601 |             self.name_path,
602 |             dict(name="NAME", number="NUMBER", dt="DATE"),
603 |         )
604 |         cursor = c.conn.cursor()
605 | 
606 |         c.create(cursor)
607 |         cursor.execute("""SELECT count(*) FROM %s;""" % c.temp_table_name)
608 |         self.assertEqual(cursor.fetchone()[0], 0)
609 |         cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table)
610 |         self.assertEqual(cursor.fetchone()[0], 0)
611 | 
612 |         c.copy(cursor)
613 |         cursor.execute("""SELECT count(*) FROM %s;""" % c.temp_table_name)
614 |         self.assertEqual(cursor.fetchone()[0], 3)
615 |         cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table)
616 |         self.assertEqual(cursor.fetchone()[0], 0)
617 | 
618 |         c.insert(cursor)
619 |         cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table)
620 |         self.assertEqual(cursor.fetchone()[0], 3)
621 | 
622 |         c.drop(cursor)
623 |         self.assertEqual(cursor.statusmessage, "DROP TABLE")
624 |         cursor.close()
625 | 
626 |     def test_save_steps_with_temp_table_name_override(self):
627 |         c = CopyMapping(
628 |             MockObject,
629 |             self.name_path,
630 |             dict(name="NAME", number="NUMBER", dt="DATE"),
631 |             temp_table_name="overridden_temp_table_name",
632 |         )
633 |         cursor = c.conn.cursor()
634 | 
635 |         c.create(cursor)
636 |         cursor.execute("""SELECT count(*) FROM %s;""" % c.temp_table_name)
637 |         self.assertEqual(cursor.fetchone()[0], 0)
638 |         cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table)
639 |         self.assertEqual(cursor.fetchone()[0], 0)
640 | 
641 |         c.copy(cursor)
642 |         cursor.execute("""SELECT count(*) FROM %s;""" % c.temp_table_name)
643 |         self.assertEqual(cursor.fetchone()[0], 3)
644 |         cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table)
645 |         self.assertEqual(cursor.fetchone()[0], 0)
646 | 
647 |         c.insert(cursor)
648 |         cursor.execute("""SELECT count(*) FROM %s;""" % c.model._meta.db_table)
649 |         self.assertEqual(cursor.fetchone()[0], 3)
650 | 
651 |         c.drop(cursor)
652 |         self.assertEqual(cursor.statusmessage, "DROP TABLE")
653 |         cursor.close()
654 | 
655 |     def test_hooks(self):
656 |         c = HookedCopyMapping(
657 |             MockObject,
658 |             self.name_path,
659 |             dict(name="NAME", number="NUMBER", dt="DATE"),
660 |         )
661 |         cursor = c.conn.cursor()
662 | 
663 |         c.create(cursor)
664 |         self.assertRaises(AttributeError, lambda: c.ran_pre_copy)
665 |         self.assertRaises(AttributeError, lambda: c.ran_post_copy)
666 |         self.assertRaises(AttributeError, lambda: c.ran_pre_insert)
667 |         self.assertRaises(AttributeError, lambda: c.ran_post_insert)
668 | 
669 |         c.copy(cursor)
670 |         self.assertTrue(c.ran_pre_copy)
671 |         self.assertTrue(c.ran_post_copy)
672 |         self.assertRaises(AttributeError, lambda: c.ran_pre_insert)
673 |         self.assertRaises(AttributeError, lambda: c.ran_post_insert)
674 | 
675 |         c.insert(cursor)
676 |         self.assertTrue(c.ran_pre_copy)
677 |         self.assertTrue(c.ran_post_copy)
678 |         self.assertTrue(c.ran_pre_insert)
679 |         self.assertTrue(c.ran_post_insert)
680 | 
681 |         c.drop(cursor)
682 |         cursor.close()
683 | 
684 | 
685 | class MultiDbTest(BaseTest):
686 |     @mock.patch("django.db.connection.validate_no_atomic_block")
687 |     def test_from_csv(self, _):
688 |         MockObject.objects.from_csv(
689 |             self.name_path, dict(name="NAME", number="NUMBER", dt="DATE"), using="other"
690 |         )
691 |         self.assertEqual(MockObject.objects.count(), 0)
692 |         self.assertEqual(MockObject.objects.using("other").count(), 3)
693 |         self.assertEqual(MockObject.objects.using("other").get(name="BEN").number, 1)
694 |         self.assertEqual(
695 |             MockObject.objects.using("other").get(name="BEN").dt, date(2012, 1, 1)
696 |         )
697 |         MockObject.objects.using("other").all().delete()
698 | 
699 |     @mock.patch("django.db.connection.validate_no_atomic_block")
700 |     def test_to_csv(self, _):
701 |         # First with the default database
702 |         mapping = dict(name="NAME", number="NUMBER", dt="DATE")
703 |         MockObject.objects.from_csv(self.name_path, mapping)
704 |         export_path = os.path.join(os.path.dirname(__file__), "default.csv")
705 |         MockObject.objects.to_csv(export_path)
706 |         self.assertTrue(os.path.exists(export_path))
707 |         reader = csv.DictReader(open(export_path))
708 |         self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader])
709 |         os.remove(export_path)
710 | 
711 |     @mock.patch("django.db.connection.validate_no_atomic_block")
712 |     def test_to_csv_from_alt_db(self, _):
713 |         # Next with the other database
714 |         mapping = dict(name="NAME", number="NUMBER", dt="DATE")
715 |         MockObject.objects.from_csv(self.name_path, mapping, using="other")
716 |         export_path = os.path.join(os.path.dirname(__file__), "other.csv")
717 |         MockObject.objects.using("other").to_csv(export_path)
718 |         self.assertTrue(os.path.exists(export_path))
719 |         reader = csv.DictReader(open(export_path))
720 |         self.assertTrue(["BEN", "JOE", "JANE"], [i["name"] for i in reader])
721 |         MockObject.objects.using("other").all().delete()
722 |         os.remove(export_path)
723 | 


--------------------------------------------------------------------------------