├── .gitattributes ├── .github └── workflows │ └── build-and-test.yml ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── cibuildwheel_test.bash ├── pgbuild └── Makefile ├── pgserver-example.ipynb ├── pgserver.png ├── pgserver_square_small.png ├── pyproject.toml ├── setup.py ├── src └── pgserver │ ├── __init__.py │ ├── _build.py │ ├── _commands.py │ ├── postgres_server.py │ ├── py.typed │ └── utils.py └── tests ├── __init__.py └── test_pgserver.py /.gitattributes: -------------------------------------------------------------------------------- 1 | pgserver/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/workflows/build-and-test.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test 2 | on: 3 | push: 4 | branches: 5 | - testing 6 | - main 7 | release: 8 | types: 9 | - created 10 | workflow_dispatch: 11 | jobs: 12 | build_wheels: 13 | name: Build wheels on ${{ matrix.os }} 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | include: 19 | - os: macos-12 20 | arch: x86_64 21 | deployment-target: '10.9' 22 | - os: macos-latest 23 | arch: arm64 24 | deployment-target: '11.0' 25 | - os: ubuntu-latest 26 | arch: x86_64 27 | deployment-target: '' 28 | - os: windows-2022 29 | arch: AMD64 30 | deployment-target: '' 31 | steps: 32 | - uses: actions/checkout@v4 33 | - uses: actions/setup-python@v4 34 | if: matrix.os != 'ubuntu-latest' 35 | with: 36 | python-version: '3.10' 37 | - uses: actions/setup-python@v4 38 | if: matrix.os == 'ubuntu-latest' 39 | # for testing due to docker env issues 40 | with: 41 | python-version: '3.9' 42 | - name: Install cibuildwheel 43 | run: | 44 | python -m pip install --upgrade pip 45 | python -m pip install --upgrade cibuildwheel 46 | - name: Restore postgres build from cache 47 | if: ${{ matrix.os != 'ubuntu-latest' }} 48 | id: restore-postgres 49 | uses: actions/cache/restore@v3 50 | env: 51 | cache-name: cache-postgres 52 | with: 53 | path: | 54 | pgbuild 55 | src/pgserver/pginstall 56 | key: ${{ runner.os }}-${{ runner.arch }}-build-${{ env.cache-name }}-${{ 57 | hashFiles('Makefile', 'pgbuild/Makefile', '.github/workflows/build-and-test.yml') }} 58 | - name: Build postgres and pgvector 59 | if: ${{ matrix.os != 'ubuntu-latest' && ! steps.restore-postgres.outputs.cache-hit }} 60 | env: 61 | MACOSX_DEPLOYMENT_TARGET: ${{ matrix.deployment-target }} 62 | # this step is implied by Build wheels, but we do it here for caching before python tests run 63 | # on ubuntu, cibuildwheel will run this step within a docker container, so it cannot use the cache this way 64 | run: make 65 | - name: Save postgres build 66 | if: ${{ matrix.os != 'ubuntu-latest' && ! steps.restore-postgres.outputs.cache-hit }} 67 | id: cache-postgres 68 | uses: actions/cache/save@v3 69 | env: 70 | cache-name: cache-postgres 71 | with: 72 | path: | 73 | pgbuild 74 | src/pgserver/pginstall 75 | key: ${{ runner.os }}-${{ runner.arch }}-build-${{ env.cache-name }}-${{ 76 | hashFiles('Makefile', 'pgbuild/Makefile', '.github/workflows/build-and-test.yml') }} 77 | - name: Build wheels 78 | env: 79 | CIBW_ARCHS: ${{ matrix.arch }} 80 | CIBW_SKIP: pp* cp38-* *-musllinux* 81 | MACOSX_DEPLOYMENT_TARGET: ${{ matrix.deployment-target }} 82 | run: python -m cibuildwheel --output-dir wheelhouse 83 | - name: Save postgres build 84 | if: ${{ matrix.os == 'ubuntu-latest' && ! steps.restore-postgres.outputs.cache-hit }} 85 | id: cache-postgres2 86 | uses: actions/cache/save@v3 87 | env: 88 | cache-name: cache-postgres 89 | with: 90 | path: | 91 | pgbuild 92 | src/pgserver/pginstall 93 | key: ${{ runner.os }}-${{ runner.arch }}-build-${{ env.cache-name }}-${{ 94 | hashFiles('Makefile', 'pgbuild/Makefile', '.github/workflows/build-and-test.yml') }} 95 | - uses: actions/upload-artifact@v3 96 | with: 97 | path: wheelhouse/*.whl 98 | name: python-package-distributions 99 | publish-to-pypi: 100 | if: ${{ startsWith(github.ref, 'refs/tags/') }} 101 | name: Publish Python dist to PyPI 102 | needs: 103 | - build_wheels 104 | runs-on: ubuntu-latest 105 | environment: 106 | name: pypi 107 | url: https://pypi.org/p/pgserver # Replace with your PyPI project name 108 | permissions: 109 | id-token: write # IMPORTANT: mandatory for trusted publishing 110 | steps: 111 | - name: Download all the dists 112 | uses: actions/download-artifact@v3 113 | with: 114 | name: python-package-distributions 115 | path: dist/ 116 | - name: Publish distribution 📦 to PyPI 117 | uses: pypa/gh-action-pypi-publish@release/v1 118 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | wheelhouse/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # Pycharm 133 | .idea 134 | 135 | build/ 136 | pgbuild/ 137 | src/pgserver/pginstall 138 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft src/pgserver/pginstall -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := build 2 | .PHONY: build wheel install-wheel install-dev clean test 3 | 4 | build: 5 | $(MAKE) -d -C pgbuild all 6 | 7 | wheel: build 8 | python setup.py bdist_wheel 9 | 10 | install-wheel: wheel 11 | python -m pip install --force-reinstall dist/*.whl 12 | 13 | install-dev: build 14 | python -m pip install --force-reinstall -e . 15 | 16 | clean: 17 | rm -rf build/ wheelhouse/ dist/ .eggs/ 18 | $(MAKE) -C pgbuild clean 19 | 20 | test: 21 | python -m pytest tests/ 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Python Version](https://img.shields.io/badge/python-3.9%2C%203.10%2C%203.11%2C%203.12-blue) 2 | ![Postgres Version](https://img.shields.io/badge/PostgreSQL-16.2-blue) 3 | 4 | ![Linux Support](https://img.shields.io/badge/Linux%20Support-manylinux-green) 5 | ![macOS Apple Silicon Support >=11](https://img.shields.io/badge/macOS%20Apple%20Silicon%20Support-%E2%89%A511(BigSur)-green) 6 | ![macOS Intel Support => 10.0](https://img.shields.io/badge/macOS%20Intel%20Support-%E2%89%A510.9-green) 7 | ![Windows Support >= 2022](https://img.shields.io/badge/Windows%20AMD64%20Support-%E2%89%A52022-green) 8 | 9 | [![License](https://img.shields.io/badge/License-Apache%202.0-darkblue.svg)](https://opensource.org/licenses/Apache-2.0) 10 | [![PyPI Package](https://img.shields.io/pypi/v/pgserver?color=darkorange)](https://pypi.org/project/pgserver) 11 | ![PyPI - Downloads](https://img.shields.io/pypi/dm/pgserver) 12 | 13 | 14 |

15 | 16 |

17 | 18 | # pgserver: pip-installable, embedded postgres server + pgvector extension for your python app 19 | 20 | `pgserver` lets you build Postgres-backed python apps with the same convenience afforded by an embedded database (ie, alternatives such as sqlite). 21 | If you build your app with pgserver, your app remains wholly pip-installable, saving your users from needing to understand how to setup a postgres server (they simply pip install your app, and postgres is brought in through dependencies), and letting you get started developing quickly: just `pip install pgserver` and `pgserver.get_server(...)`, as shown in this notebook: Open In Colab 22 | 23 | To achieve this, you need two things which `pgserver` provides 24 | * python binary wheels for multiple-plaforms with postgres binaries 25 | * convenience python methods that handle db initialization and server process management, that deals with things that would normally prevent you from running your python app seamlessly on environments like docker containers, a machine you have no root access in, machines with other running postgres servers, google colab, etc. One main goal of the project is robustness around this. 26 | 27 | Additionally, this package includes the [pgvector](https://github.com/pgvector/pgvector) postgres extension, useful for storing associated vector data and for vector similarity queries. 28 | 29 | ## Basic summary: 30 | * _Pip installable binaries_: built and tested on Manylinux, MacOS and Windows. 31 | * _No sudo or admin rights needed_: Does not require `root` privileges or `sudo`. 32 | * but... _can handle root_: in some environments your python app runs as root, eg docker, google colab, `pgserver` handles this case. 33 | * _Simpler initialization_: `pgserver.get_server(MY_DATA_DIR)` method to initialize data and server if needed, so you don't need to understand `initdb`, `pg_ctl`, port conflicts. 34 | * _Convenient cleanup_: server process cleanup is done for you: when the process using pgserver ends, the server is shutdown, including when multiple independent processes call 35 | `pgserver.get_server(MY_DATA_DIR)` on the same dir (wait for last one). You can blow away your PGDATA dir and start again. 36 | * For lower-level control, wrappers to all binaries, such as `initdb`, `pg_ctl`, `psql`, `pg_config`. Includes header files in case you wish to build some other extension and use it against these binaries. 37 | 38 | ```py 39 | # Example 1: postgres backed application 40 | import pgserver 41 | 42 | db = pgserver.get_server(MYPGDATA) 43 | # server ready for connection. 44 | 45 | print(db.psql('create extension vector')) 46 | db_uri = db.get_uri() 47 | # use uri with sqlalchemy / psycopg, etc, see colab. 48 | 49 | # if no other process is using this server, it will be shutdown at exit, 50 | # if other process use same pgadata, server process will be shutdown when all stop. 51 | ``` 52 | 53 | ```py 54 | # Example 2: Testing 55 | import tempfile 56 | import pytest 57 | @pytest.fixture 58 | def tmp_postgres(): 59 | tmp_pg_data = tempfile.mkdtemp() 60 | pg = pgserver.get_server(tmp_pg_data, cleanup_mode='stop') 61 | yield pg 62 | pg.cleanup() 63 | ``` 64 | 65 | Postgres binaries in the package can be found in the directory pointed 66 | to by the `pgserver.POSTGRES_BIN_PATH` to be used directly. 67 | 68 | This project was originally based on [](https://github.com/michelp/postgresql-wheel), which provides a linux wheel. 69 | But adds the following differences: 70 | 1. binary wheels for multiple platforms (ubuntu x86, MacOS apple silicon, MacOS x86, Windows) 71 | 2. postgres python management: cross-platfurm startup and cleanup including many edge cases, runs on colab etc. 72 | 3. includes `pgvector` extension but currently excludes `postGIS` 73 | -------------------------------------------------------------------------------- /cibuildwheel_test.bash: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | PROJECT=$1 3 | 4 | echo "Running on OSTYPE=$OSTYPE with UID=$UID" 5 | 6 | case "$OSTYPE" in 7 | # linux *) 8 | # echo "Tests disabled on the manylinux docker container for now" 9 | # ;; 10 | *) 11 | pytest -s -v --log-cli-level=INFO $PROJECT/tests 12 | ;; 13 | esac 14 | -------------------------------------------------------------------------------- /pgbuild/Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/bash 2 | INSTALL_PREFIX := $(shell pwd)/../src/pgserver/pginstall/ 3 | BUILD := $(shell pwd)/pgbuild/ 4 | 5 | .PHONY: all 6 | all: pgvector postgres 7 | 8 | ### postgres 9 | POSTGRES_VERSION := 16.2 10 | POSTGRES_URL := https://ftp.postgresql.org/pub/source/v$(POSTGRES_VERSION)/postgresql-$(POSTGRES_VERSION).tar.gz 11 | POSTGRES_SRC := postgresql-$(POSTGRES_VERSION) 12 | POSTGRES_BLD := $(POSTGRES_SRC) 13 | 14 | $(POSTGRES_SRC).tar.gz: 15 | curl -L -O $(POSTGRES_URL) 16 | 17 | ## extract 18 | $(POSTGRES_SRC)/configure: $(POSTGRES_SRC).tar.gz 19 | tar xzf $(POSTGRES_SRC).tar.gz 20 | touch $(POSTGRES_SRC)/configure 21 | 22 | ## configure 23 | $(POSTGRES_BLD)/config.status: $(POSTGRES_SRC)/configure 24 | mkdir -p $(POSTGRES_BLD) 25 | cd $(POSTGRES_BLD) && ../$(POSTGRES_SRC)/configure --prefix=$(INSTALL_PREFIX) --without-readline --without-icu 26 | 27 | ## build 28 | # https://stackoverflow.com/questions/68379786/ 29 | # for explanation of unsetting make env variables prior to calling postgres' own make 30 | $(POSTGRES_BLD)/src/bin/initdb/initdb: $(POSTGRES_BLD)/config.status 31 | unset MAKELEVEL && unset MAKEFLAGS && unset MFLAGS && $(MAKE) -C $(POSTGRES_BLD) -j 32 | 33 | ## install to INSTALL_PREFIX 34 | $(INSTALL_PREFIX)/bin/postgres: $(POSTGRES_BLD)/config.status 35 | mkdir -p $(INSTALL_PREFIX) 36 | unset MAKELEVEL && unset MAKEFLAGS && unset MFLAGS && $(MAKE) -C $(POSTGRES_BLD) install 37 | 38 | .PHONY: postgres 39 | postgres: $(INSTALL_PREFIX)/bin/postgres 40 | 41 | ### pgvector 42 | PGVECTOR_TAG := v0.6.2 43 | PGVECTOR_URL := https://github.com/pgvector/pgvector/archive/refs/tags/$(PGVECTOR_TAG).tar.gz 44 | PGVECTOR_DIR := pgvector-$(PGVECTOR_TAG) 45 | 46 | $(PGVECTOR_DIR).tar.gz: 47 | curl -L -o $(PGVECTOR_DIR).tar.gz $(PGVECTOR_URL) 48 | 49 | $(PGVECTOR_DIR)/Makefile: $(PGVECTOR_DIR).tar.gz 50 | # tar extract into pgvector-$(PGVECTOR_TAG) 51 | mkdir -p $(PGVECTOR_DIR) 52 | tar xzf $(PGVECTOR_DIR).tar.gz -C $(PGVECTOR_DIR) --strip-components=1 53 | touch $(PGVECTOR_DIR)/Makefile 54 | 55 | $(INSTALL_PREFIX)/lib/vector.so: $(PGVECTOR_DIR)/Makefile $(INSTALL_PREFIX)/bin/postgres 56 | unset MAKELEVEL && unset MAKEFLAGS && unset MFLAGS \ 57 | && export PG_CONFIG=$(INSTALL_PREFIX)/bin/pg_config \ 58 | && $(MAKE) -C $(PGVECTOR_DIR) -j \ 59 | && $(MAKE) -C $(PGVECTOR_DIR) install 60 | 61 | .PHONY: pgvector 62 | pgvector: postgres $(INSTALL_PREFIX)/lib/vector.so 63 | 64 | ### other 65 | .PHONY: clean clean-all 66 | clean: 67 | rm -rf $(INSTALL_PREFIX) 68 | rm -rf $(POSTGRES_SRC) 69 | rm -rf $(POSTGRES_BLD) 70 | rm -rf $(PGVECTOR_DIR) 71 | 72 | clean-all: clean 73 | rm -rf *.tar.gz -------------------------------------------------------------------------------- /pgserver-example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "!pip install pgserver sqlalchemy psycopg2-binary sqlalchemy_utils" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pgserver\n", 19 | "srv = pgserver.get_server('./mypgdata')" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 3, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "name": "stdout", 29 | "output_type": "stream", 30 | "text": [ 31 | " res \n", 32 | "-----\n", 33 | " 2\n", 34 | "(1 row)\n", 35 | "\n", 36 | "\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "print(srv.psql('SELECT 1+1 as res;'))" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "from sqlalchemy_utils import create_database, database_exists\n", 51 | "from sqlalchemy import create_engine\n", 52 | "import sqlalchemy as sql" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 5, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "data": { 62 | "text/plain": [ 63 | "'postgresql://postgres:@/mydb?host=/Users/orm/repos/pgserver/mypgdata'" 64 | ] 65 | }, 66 | "metadata": {}, 67 | "output_type": "display_data" 68 | } 69 | ], 70 | "source": [ 71 | "dburi = srv.get_uri(database='mydb')\n", 72 | "display(dburi)\n", 73 | "if not database_exists(dburi):\n", 74 | " create_database(dburi)\n", 75 | "engine = create_engine(dburi)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 6, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "table_name = 'mytable'\n", 85 | "with engine.connect() as conn:\n", 86 | " conn.execute(sql.text(f\"create table {table_name} (id int);\"))\n", 87 | " conn.execute(sql.text(f\"insert into {table_name} values (1);\"))\n", 88 | " cur = conn.execute(sql.text(f\"select * from {table_name};\"))\n", 89 | " result = cur.fetchone()" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 7, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "(1,)" 101 | ] 102 | }, 103 | "execution_count": 7, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "result" 110 | ] 111 | } 112 | ], 113 | "metadata": { 114 | "kernelspec": { 115 | "display_name": "pixeltable_39", 116 | "language": "python", 117 | "name": "python3" 118 | }, 119 | "language_info": { 120 | "codemirror_mode": { 121 | "name": "ipython", 122 | "version": 3 123 | }, 124 | "file_extension": ".py", 125 | "mimetype": "text/x-python", 126 | "name": "python", 127 | "nbconvert_exporter": "python", 128 | "pygments_lexer": "ipython3", 129 | "version": "3.9.19" 130 | } 131 | }, 132 | "nbformat": 4, 133 | "nbformat_minor": 2 134 | } 135 | -------------------------------------------------------------------------------- /pgserver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orm011/pgserver/3b227607a6c94590ba7c16ae827cef9fb7e3920b/pgserver.png -------------------------------------------------------------------------------- /pgserver_square_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orm011/pgserver/3b227607a6c94590ba7c16ae827cef9fb7e3920b/pgserver_square_small.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pgserver" # Required 3 | version = "0.1.4" # Required 4 | description = "Self-contained postgres server for your python applications" # Required 5 | readme = "README.md" # Optional 6 | requires-python = ">=3.9" 7 | license = {file = "LICENSE.txt"} 8 | urls={repository="https://github.com/orm011/pgserver"} 9 | authors=[{ name="Oscar Moll", email="orm@csail.mit.edu" }] 10 | keywords=["postgresql", "pgvector", "pgserver", "rag"] 11 | dependencies = [ 12 | "fasteners>=0.19", 13 | "platformdirs>=4.0.0", 14 | "psutil>=5.9.0", 15 | ] 16 | 17 | [project.optional-dependencies] 18 | dev = [ 19 | "sysv_ipc", 20 | ] 21 | test = [ 22 | "pytest", 23 | "psycopg2-binary", 24 | "sqlalchemy>=2", 25 | "sqlalchemy-utils" 26 | ] 27 | 28 | [tool.setuptools.packages.find] 29 | where = ["src"] # list of folders that contain the packages (["."] by default) 30 | include = ["pgserver*"] # package names should match these glob patterns (["*"] by default) 31 | 32 | [tool.pytest.ini_options] 33 | testpaths = ["tests"] 34 | 35 | [tool.cibuildwheel] 36 | before-all = "make" 37 | test-extras = "test" 38 | test-command = "bash -x {project}/cibuildwheel_test.bash {project}" 39 | 40 | [build-system] 41 | # These are the assumed default build requirements from pip: 42 | # https://pip.pypa.io/en/stable/reference/pip/#pep-517-and-518-support 43 | requires = ["setuptools>=58.0.0", "wheel"] 44 | build-backend = "setuptools.build_meta" 45 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | setup_requires=["cffi"], 5 | # dummy but needed for the binaries to work 6 | cffi_modules=["src/pgserver/_build.py:ffibuilder"], 7 | ) 8 | -------------------------------------------------------------------------------- /src/pgserver/__init__.py: -------------------------------------------------------------------------------- 1 | from ._commands import * 2 | from .postgres_server import PostgresServer, get_server 3 | -------------------------------------------------------------------------------- /src/pgserver/_build.py: -------------------------------------------------------------------------------- 1 | """ 2 | dummy module used in setup() 3 | seems needed to cause the binaries to be well formed 4 | The build is done by the Makefile 5 | """ 6 | from cffi import FFI 7 | 8 | ffibuilder = FFI() 9 | ffibuilder.set_source("_postgresql", "") 10 | ffibuilder.cdef("") 11 | 12 | if __name__ == "__main__": 13 | ffibuilder.compile(verbose=True) 14 | -------------------------------------------------------------------------------- /src/pgserver/_commands.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | import subprocess 4 | from typing import Optional, List, Callable 5 | import logging 6 | import tempfile 7 | 8 | POSTGRES_BIN_PATH = Path(__file__).parent / "pginstall" / "bin" 9 | 10 | _logger = logging.getLogger('pgserver') 11 | 12 | def create_command_function(pg_exe_name : str) -> Callable: 13 | def command(args : List[str], pgdata : Optional[Path] = None, **kwargs) -> str: 14 | """ 15 | Run a command with the given command line arguments. 16 | Args: 17 | args: The command line arguments to pass to the command as a string, 18 | a list of options as would be passed to `subprocess.run` 19 | pgdata: The path to the data directory to use for the command. 20 | If the command does not need a data directory, this should be None. 21 | kwargs: Additional keyword arguments to pass to `subprocess.run`, eg user, timeout. 22 | 23 | Returns: 24 | The stdout of the command as a string. 25 | """ 26 | if pg_exe_name.strip('.exe') in ['initdb', 'pg_ctl', 'pg_dump']: 27 | assert pgdata is not None, "pgdata must be provided for initdb, pg_ctl, and pg_dump" 28 | 29 | if pgdata is not None: 30 | args = ["-D", str(pgdata)] + args 31 | 32 | full_command_line = [str(POSTGRES_BIN_PATH / pg_exe_name)] + args 33 | 34 | with tempfile.TemporaryFile('w+') as stdout, tempfile.TemporaryFile('w+') as stderr: 35 | try: 36 | _logger.info("Running commandline:\n%s\nwith kwargs: `%s`", full_command_line, kwargs) 37 | # NB: capture_output=True, as well as using stdout=subprocess.PIPE and stderr=subprocess.PIPE 38 | # can cause this call to hang, even with a time-out depending on the command, (pg_ctl) 39 | # so we use two temporary files instead 40 | result = subprocess.run(full_command_line, check=True, stdout=stdout, stderr=stderr, text=True, 41 | **kwargs) 42 | stdout.seek(0) 43 | stderr.seek(0) 44 | output = stdout.read() 45 | error = stderr.read() 46 | _logger.info("Successful postgres command %s with kwargs: `%s`\nstdout:\n%s\n---\nstderr:\n%s\n---\n", 47 | result.args, kwargs, output, error) 48 | except subprocess.CalledProcessError as err: 49 | stdout.seek(0) 50 | stderr.seek(0) 51 | output = stdout.read() 52 | error = stderr.read() 53 | _logger.error("Failed postgres command %s with kwargs: `%s`:\nerror:\n%s\nstdout:\n%s\n---\nstderr:\n%s\n---\n", 54 | err.args, kwargs, str(err), output, error) 55 | raise err 56 | 57 | return output 58 | 59 | return command 60 | 61 | __all__ = [] 62 | def _init(): 63 | for path in POSTGRES_BIN_PATH.iterdir(): 64 | exe_name = path.name 65 | prog = create_command_function(exe_name) 66 | # Strip .exe suffix for Windows compatibility 67 | function_name = exe_name.strip('.exe') 68 | setattr(sys.modules[__name__], function_name, prog) 69 | __all__.append(function_name) 70 | 71 | _init() -------------------------------------------------------------------------------- /src/pgserver/postgres_server.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Optional, Dict, Union 3 | import shutil 4 | import atexit 5 | import subprocess 6 | import os 7 | import logging 8 | import platform 9 | import psutil 10 | import time 11 | 12 | from ._commands import POSTGRES_BIN_PATH, initdb, pg_ctl 13 | from .utils import find_suitable_port, find_suitable_socket_dir, DiskList, PostmasterInfo, process_is_running 14 | 15 | if platform.system() != 'Windows': 16 | from .utils import ensure_user_exists, ensure_prefix_permissions, ensure_folder_permissions 17 | 18 | _logger = logging.getLogger('pgserver') 19 | 20 | class PostgresServer: 21 | """ Provides a common interface for interacting with a server. 22 | """ 23 | import platformdirs 24 | import fasteners 25 | 26 | _instances : Dict[Path, 'PostgresServer'] = {} 27 | 28 | # NB home does not always support locking, eg NFS or LUSTRE (eg some clusters) 29 | # so, use user_runtime_path instead, which seems to be in a local filesystem 30 | runtime_path : Path = platformdirs.user_runtime_path('python_PostgresServer') 31 | lock_path = platformdirs.user_runtime_path('python_PostgresServer') / '.lockfile' 32 | _lock = fasteners.InterProcessLock(lock_path) 33 | 34 | def __init__(self, pgdata : Path, *, cleanup_mode : Optional[str] = 'stop'): 35 | """ Initializes the postgresql server instance. 36 | Constructor is intended to be called directly, use get_server() instead. 37 | """ 38 | assert cleanup_mode in [None, 'stop', 'delete'] 39 | 40 | self.pgdata = pgdata 41 | self.log = self.pgdata / 'log' 42 | 43 | # postgres user name, NB not the same as system user name 44 | self.system_user = None 45 | 46 | # note os.geteuid() is not available on windows, so must go after 47 | if platform.system() != 'Windows' and os.geteuid() == 0: 48 | # running as root 49 | # need a different system user to run as 50 | self.system_user = 'pgserver' 51 | ensure_user_exists(self.system_user) 52 | 53 | self.postgres_user = "postgres" 54 | list_path = self.pgdata / '.handle_pids.json' 55 | self.global_process_id_list = DiskList(list_path) 56 | self.cleanup_mode = cleanup_mode 57 | self._postmaster_info : Optional[PostmasterInfo] = None 58 | self._count = 0 59 | 60 | atexit.register(self._cleanup) 61 | with self._lock: 62 | self._instances[self.pgdata] = self 63 | self.ensure_pgdata_inited() 64 | self.ensure_postgres_running() 65 | self.global_process_id_list.get_and_add(os.getpid()) 66 | 67 | def get_postmaster_info(self) -> PostmasterInfo: 68 | assert self._postmaster_info is not None 69 | return self._postmaster_info 70 | 71 | def get_pid(self) -> Optional[int]: 72 | """ Returns the pid of the postgresql server process. 73 | (First line of postmaster.pid file). 74 | If the server is not running, returns None. 75 | """ 76 | return self.get_postmaster_info().pid 77 | 78 | def get_uri(self, database : Optional[str] = None) -> str: 79 | """ Returns a connection string for the postgresql server. 80 | """ 81 | return self.get_postmaster_info().get_uri(database=database) 82 | 83 | def ensure_pgdata_inited(self) -> None: 84 | """ Initializes the pgdata directory if it is not already initialized. 85 | """ 86 | if platform.system() != 'Windows' and os.geteuid() == 0: 87 | import pwd 88 | import stat 89 | assert self.system_user is not None 90 | ensure_prefix_permissions(self.pgdata) 91 | ensure_prefix_permissions(POSTGRES_BIN_PATH) 92 | 93 | read_perm = stat.S_IRGRP | stat.S_IROTH 94 | execute_perm = stat.S_IXGRP | stat.S_IXOTH 95 | # for envs like cibuildwheel docker, where the user is has no permission otherwise 96 | ensure_folder_permissions(POSTGRES_BIN_PATH, execute_perm | read_perm) 97 | ensure_folder_permissions(POSTGRES_BIN_PATH.parent / 'lib', read_perm) 98 | 99 | 100 | os.chown(self.pgdata, pwd.getpwnam(self.system_user).pw_uid, 101 | pwd.getpwnam(self.system_user).pw_gid) 102 | 103 | if not (self.pgdata / 'PG_VERSION').exists(): # making a new PGDATA 104 | # First ensure there are no left-over servers on a previous version of the same pgdata path, 105 | # which does happen on Mac/Linux if the previous pgdata was deleted without stopping the server process 106 | # (the old server continues running for some time, sometimes indefinitely) 107 | # 108 | # It is likely the old server could also corrupt the data beyond the socket file, so it is best to kill it. 109 | # This must be done before initdb to ensure no race conditions with the old server. 110 | # 111 | # Since we do not know PID information of the old server, we stop all servers with the same pgdata path. 112 | # way to test this: python -c 'import pixeltable as pxt; pxt.Client()'; rm -rf ~/.pixeltable/; python -c 'import pixeltable as pxt; pxt.Client()' 113 | _logger.info(f'no PG_VERSION file found within {self.pgdata}. Initializing pgdata') 114 | for proc in psutil.process_iter(attrs=['name', 'cmdline']): 115 | if proc.info['name'] == 'postgres': 116 | if proc.info['cmdline'] is not None and str(self.pgdata) in proc.info['cmdline']: 117 | _logger.info(f"Found a running postgres server with same pgdata: {proc.as_dict(attrs=['name', 'pid', 'cmdline'])=}.\ 118 | Assuming it is a leftover from a previous run on a different version of the same pgdata path, killing it.") 119 | proc.terminate() 120 | try: 121 | proc.wait(2) # wait at most a second 122 | except psutil.TimeoutExpired: 123 | pass 124 | if proc.is_running(): 125 | proc.kill() 126 | assert not proc.is_running() 127 | 128 | initdb(['--auth=trust', '--auth-local=trust', '--encoding=utf8', '-U', self.postgres_user], pgdata=self.pgdata, 129 | user=self.system_user) 130 | else: 131 | _logger.info('PG_VERSION file found, skipping initdb') 132 | 133 | def ensure_postgres_running(self) -> None: 134 | """ pre condition: pgdata is initialized, being run with lock. 135 | post condition: self._postmaster_info is set. 136 | """ 137 | 138 | postmaster_info = PostmasterInfo.read_from_pgdata(self.pgdata) 139 | if postmaster_info is not None and postmaster_info.is_running(): 140 | _logger.info(f"a postgres server is already running: {postmaster_info=} {postmaster_info.process=}") 141 | self._postmaster_info = postmaster_info 142 | else: 143 | if postmaster_info is not None and not postmaster_info.is_running(): 144 | _logger.info(f"found a postmaster.pid file, but the server is not running: {postmaster_info=}") 145 | if postmaster_info is None: 146 | _logger.info(f"no postmaster.pid file found in {self.pgdata}") 147 | 148 | if platform.system() != 'Windows': 149 | # use sockets to avoid any future conflict with port numbers 150 | socket_dir = find_suitable_socket_dir(self.pgdata, self.runtime_path) 151 | 152 | if self.system_user is not None and socket_dir != self.pgdata: 153 | ensure_prefix_permissions(socket_dir) 154 | socket_dir.chmod(0o777) 155 | 156 | pg_ctl_args = ['-w', # wait for server to start 157 | '-o', '-h ""', # no listening on any IP addresses (forwarded to postgres exec) see man postgres for -hj 158 | '-o', f'-k {socket_dir}', # socket option (forwarded to postgres exec) see man postgres for -k 159 | '-l', str(self.log), # log location: set to pgdata dir also 160 | 'start' # action 161 | ] 162 | else: # Windows, 163 | socket_dir = None 164 | # socket.AF_UNIX is undefined when running on Windows, so default to a port 165 | host = "127.0.0.1" 166 | port = find_suitable_port(host) 167 | pg_ctl_args = ['-w', # wait for server to start 168 | '-o', f'-h "{host}"', 169 | '-o', f'-p {port}', 170 | '-l', str(self.log), # log location: set to pgdata dir also 171 | 'start' # action 172 | ] 173 | 174 | try: 175 | _logger.info(f"running pg_ctl... {pg_ctl_args=}") 176 | pg_ctl(pg_ctl_args,pgdata=self.pgdata, user=self.system_user, timeout=10) 177 | except subprocess.CalledProcessError as err: 178 | _logger.error(f"Failed to start server.\nShowing contents of postgres server log ({self.log.absolute()}) below:\n{self.log.read_text()}") 179 | raise err 180 | except subprocess.TimeoutExpired as err: 181 | _logger.error(f"Timeout starting server.\nShowing contents of postgres server log ({self.log.absolute()}) below:\n{self.log.read_text()}") 182 | raise err 183 | 184 | while True: 185 | # in Windows, when there is a postmaster.pid, init_ctl seems to return 186 | # but the file is not immediately updated, here we wait until the file shows 187 | # a new running server. see test_stale_postmaster 188 | _logger.info(f'waiting for postmaster info to show a running process') 189 | pinfo = PostmasterInfo.read_from_pgdata(self.pgdata) 190 | _logger.info(f'running... checking if ready {pinfo=}') 191 | if pinfo is not None and pinfo.is_running() and pinfo.status == 'ready': 192 | self._postmaster_info = pinfo 193 | break 194 | 195 | _logger.info(f'not ready yet... waiting a bit more...') 196 | time.sleep(1.) 197 | 198 | _logger.info(f"Now asserting server is running {self._postmaster_info=}") 199 | assert self._postmaster_info is not None 200 | assert self._postmaster_info.is_running() 201 | assert self._postmaster_info.status == 'ready' 202 | 203 | def _cleanup(self) -> None: 204 | with self._lock: 205 | pids = self.global_process_id_list.get_and_remove(os.getpid()) 206 | _logger.info(f"exiting {os.getpid()} remaining {pids=}") 207 | if pids != [os.getpid()]: # includes case where already cleaned up 208 | return 209 | 210 | _logger.info(f"cleaning last handle for server: {self.pgdata}") 211 | # last handle is being removed 212 | del self._instances[self.pgdata] 213 | if self.cleanup_mode is None: # done 214 | return 215 | 216 | assert self.cleanup_mode in ['stop', 'delete'] 217 | if self._postmaster_info is not None: 218 | if self._postmaster_info.process.is_running(): 219 | try: 220 | pg_ctl(['-w', 'stop'], pgdata=self.pgdata, user=self.system_user) 221 | stopped = True 222 | except subprocess.CalledProcessError: 223 | stopped = False 224 | pass # somehow the server is already stopped. 225 | 226 | if not stopped: 227 | _logger.warning(f"Failed to stop server, killing it instead.") 228 | self._postmaster_info.process.terminate() 229 | try: 230 | self._postmaster_info.process.wait(2) 231 | except psutil.TimeoutExpired: 232 | pass 233 | if self._postmaster_info.process.is_running(): 234 | self._postmaster_info.process.kill() 235 | 236 | if self.cleanup_mode == 'stop': 237 | return 238 | 239 | assert self.cleanup_mode == 'delete' 240 | shutil.rmtree(str(self.pgdata)) 241 | atexit.unregister(self._cleanup) 242 | 243 | def psql(self, command : str) -> str: 244 | """ Runs a psql command on this server. The command is passed to psql via stdin. 245 | """ 246 | executable = POSTGRES_BIN_PATH / 'psql' 247 | stdout = subprocess.check_output(f'{executable} {self.get_uri()}', 248 | input=command.encode(), shell=True) 249 | return stdout.decode("utf-8") 250 | 251 | def __enter__(self): 252 | self._count += 1 253 | return self 254 | 255 | def __exit__(self, exc_type, exc_val, exc_tb): 256 | self._count -= 1 257 | if self._count <= 0: 258 | self._cleanup() 259 | 260 | def cleanup(self) -> None: 261 | """ Stops the postgresql server and removes the pgdata directory. 262 | """ 263 | self._cleanup() 264 | 265 | 266 | def get_server(pgdata : Union[Path,str] , cleanup_mode : Optional[str] = 'stop' ) -> PostgresServer: 267 | """ Returns handle to postgresql server instance for the given pgdata directory. 268 | Args: 269 | pgdata: pddata directory. If the pgdata directory does not exist, it will be created, but its 270 | parent must exists and be a valid directory. 271 | cleanup_mode: If 'stop', the server will be stopped when the last handle is closed (default) 272 | If 'delete', the server will be stopped and the pgdata directory will be deleted. 273 | If None, the server will not be stopped or deleted. 274 | 275 | To create a temporary server, use mkdtemp() to create a temporary directory and pass it as pg_data, 276 | and set cleanup_mode to 'delete'. 277 | """ 278 | if isinstance(pgdata, str): 279 | pgdata = Path(pgdata) 280 | pgdata = pgdata.expanduser().resolve() 281 | 282 | if not pgdata.parent.exists(): 283 | raise FileNotFoundError(f"Parent directory of pgdata does not exist: {pgdata.parent}") 284 | 285 | if not pgdata.exists(): 286 | pgdata.mkdir(parents=False, exist_ok=False) 287 | 288 | if pgdata in PostgresServer._instances: 289 | return PostgresServer._instances[pgdata] 290 | 291 | return PostgresServer(pgdata, cleanup_mode=cleanup_mode) 292 | -------------------------------------------------------------------------------- /src/pgserver/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orm011/pgserver/3b227607a6c94590ba7c16ae827cef9fb7e3920b/src/pgserver/py.typed -------------------------------------------------------------------------------- /src/pgserver/utils.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import typing 3 | from typing import Optional, List, Dict 4 | import subprocess 5 | import json 6 | import logging 7 | import hashlib 8 | import socket 9 | import platform 10 | import stat 11 | import psutil 12 | import datetime 13 | import shutil 14 | 15 | _logger = logging.getLogger('pgserver') 16 | 17 | class PostmasterInfo: 18 | """Struct with contents of the PGDATA/postmaster.pid file, contains information about the running server. 19 | Example of file contents: (comments added for clarity) 20 | cat /Users/orm/Library/Application Support/Postgres/var-15/postmaster.pid 21 | ``` 22 | 3072 # pid 23 | /Users/orm/Library/Application Support/Postgres/var-15 # pgdata 24 | 1712346200 # start_time 25 | 5432 # port 26 | /tmp # socker_dir, where .s.PGSQL.5432 is located 27 | localhost # listening on this hostname 28 | 8826964 65536 # shared mem size?, shmget id (can deallocate with sysv_ipc.remove_shared_memory(shmget_id)) 29 | ready # server status 30 | ``` 31 | """ 32 | 33 | def __init__(self, lines : List[str]): 34 | _lines = ['pid', 'pgdata', 'start_time', 'port', 'socket_dir', 'hostname', 'shared_memory_info', 'status'] 35 | assert len(lines) == len(_lines), f"_lines: {_lines=} lines: {lines=}" 36 | clean_lines = [ line.strip() for line in lines ] 37 | 38 | raw : Dict[str,str] = dict(zip(_lines, clean_lines)) 39 | 40 | self.pid = int(raw['pid']) 41 | self.pgdata = Path(raw['pgdata']) 42 | self.start_time = datetime.datetime.fromtimestamp(int(raw['start_time'])) 43 | 44 | if raw['socket_dir']: 45 | self.socket_dir = Path(raw['socket_dir']) 46 | else: 47 | self.socket_dir = None 48 | 49 | if raw['hostname']: 50 | self.hostname = raw['hostname'] 51 | else: 52 | self.hostname = None 53 | 54 | if raw['port']: 55 | self.port = int(raw['port']) 56 | else: 57 | self.port = None 58 | 59 | # not sure what this is in windows 60 | self.shmem_info = raw['shared_memory_info'] 61 | self.status = raw['status'] 62 | 63 | self.process = None # will be not None if process is running 64 | self._init_process_meta() 65 | 66 | def _init_process_meta(self) -> Optional[psutil.Process]: 67 | if self.pid is None: 68 | return 69 | try: 70 | process = psutil.Process(self.pid) 71 | except psutil.NoSuchProcess: 72 | return 73 | 74 | self.process = process 75 | # exact_create_time = datetime.datetime.fromtimestamp(process.create_time()) 76 | # if abs(self.start_time - exact_create_time) <= datetime.timedelta(seconds=1): 77 | 78 | def is_running(self) -> bool: 79 | return self.process is not None and self.process.is_running() 80 | 81 | @classmethod 82 | def read_from_pgdata(cls, pgdata : Path) -> Optional['PostmasterInfo']: 83 | postmaster_file = pgdata / 'postmaster.pid' 84 | if not postmaster_file.exists(): 85 | return None 86 | 87 | lines = postmaster_file.read_text().splitlines() 88 | return cls(lines) 89 | 90 | def get_uri(self, user : str = 'postgres', database : Optional[str] = None) -> str: 91 | """ Returns a connection uri string for the postgresql server using the information in postmaster.pid""" 92 | if database is None: 93 | database = user 94 | 95 | if self.socket_dir is not None: 96 | return f"postgresql://{user}:@/{database}?host={self.socket_dir}" 97 | elif self.port is not None: 98 | assert self.hostname is not None 99 | return f"postgresql://{user}:@{self.hostname}:{self.port}/{database}" 100 | else: 101 | raise RuntimeError("postmaster.pid does not contain port or socket information") 102 | 103 | @property 104 | def shmget_id(self) -> Optional[int]: 105 | if platform.system() == 'Windows': 106 | return None 107 | 108 | if not self.shmem_info: 109 | return None 110 | raw_id = self.shmem_info.split()[-1] 111 | return int(raw_id) 112 | 113 | @property 114 | def socket_path(self) -> Optional[Path]: 115 | if self.socket_dir is not None: 116 | # TODO: is the port always 5432 for the socket? or does it depend on the port in postmaster.pid? 117 | return self.socket_dir / f'.s.PGSQL.{self.port}' 118 | return None 119 | 120 | def __repr__(self) -> str: 121 | return f"PostmasterInfo(pid={self.pid}, pgdata={self.pgdata}, start_time={self.start_time}, hostname={self.hostname} port={self.port}, socket_dir={self.socket_dir} status={self.status}, process={self.process})" 122 | 123 | def __str__(self) -> str: 124 | return self.__repr__() 125 | 126 | def process_is_running(pid : int) -> bool: 127 | assert pid is not None 128 | return psutil.pid_exists(pid) 129 | 130 | if platform.system() != 'Windows': 131 | def ensure_user_exists(username : str) -> Optional['pwd.struct_passwd']: 132 | """ Ensure system user `username` exists. 133 | Returns their pwentry if user exists, otherwise it creates a user through `useradd`. 134 | Assume permissions to add users, eg run as root. 135 | """ 136 | import pwd 137 | 138 | try: 139 | entry = pwd.getpwnam(username) 140 | except KeyError: 141 | entry = None 142 | 143 | if entry is None: 144 | subprocess.run(["useradd", "-s", "/bin/bash", username], check=True, capture_output=True, text=True) 145 | entry = pwd.getpwnam(username) 146 | 147 | return entry 148 | 149 | def ensure_prefix_permissions(path: Path): 150 | """ Ensure target user can traverse prefix to path 151 | Permissions for everyone will be increased to ensure traversal. 152 | """ 153 | # ensure path exists and user exists 154 | assert path.exists() 155 | prefix = path.parent 156 | # chmod g+rx,o+rx: enable other users to traverse prefix folders 157 | g_rx_o_rx = stat.S_IRGRP | stat.S_IROTH | stat.S_IXGRP | stat.S_IXOTH 158 | while True: 159 | curr_permissions = prefix.stat().st_mode 160 | ensure_permissions = curr_permissions | g_rx_o_rx 161 | # TODO: are symlinks handled ok here? 162 | prefix.chmod(ensure_permissions) 163 | if prefix == prefix.parent: # reached file system root 164 | break 165 | prefix = prefix.parent 166 | 167 | def ensure_folder_permissions(path: Path, flag : int): 168 | """ Ensure target user can read, and execute the folder. 169 | Permissions for everyone will be increased to ensure traversal. 170 | """ 171 | # read and traverse folder 172 | g_rx_o_rx = stat.S_IRGRP | stat.S_IROTH | stat.S_IXGRP | stat.S_IXOTH 173 | 174 | def _helper(path: Path): 175 | if path.is_dir(): 176 | path.chmod(path.stat().st_mode | g_rx_o_rx ) 177 | for child in path.iterdir(): 178 | _helper(child) 179 | else: 180 | path.chmod(path.stat().st_mode | flag) 181 | 182 | _helper(path) 183 | 184 | class DiskList: 185 | """ A list of integers stored in a file on disk. 186 | """ 187 | def __init__(self, path : Path): 188 | self.path = path 189 | 190 | def get_and_add(self, value : int) -> List[int]: 191 | old_values = self.get() 192 | values = old_values.copy() 193 | if value not in values: 194 | values.append(value) 195 | self.put(values) 196 | return old_values 197 | 198 | def get_and_remove(self, value : int) -> List[int]: 199 | old_values = self.get() 200 | values = old_values.copy() 201 | if value in values: 202 | values.remove(value) 203 | self.put(values) 204 | return old_values 205 | 206 | def get(self) -> List[int]: 207 | if not self.path.exists(): 208 | return [] 209 | return json.loads(self.path.read_text()) 210 | 211 | def put(self, values : List[int]) -> None: 212 | self.path.write_text(json.dumps(values)) 213 | 214 | 215 | def socket_name_length_ok(socket_name : Path): 216 | ''' checks whether a socket path is too long for domain sockets 217 | on this system. Returns True if the socket path is ok, False if it is too long. 218 | ''' 219 | if socket_name.exists(): 220 | return socket_name.is_socket() 221 | 222 | sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) 223 | try: 224 | sock.bind(str(socket_name)) 225 | return True 226 | except OSError as err: 227 | if 'AF_UNIX path too long' in str(err): 228 | return False 229 | raise err 230 | finally: 231 | sock.close() 232 | socket_name.unlink(missing_ok=True) 233 | 234 | def find_suitable_socket_dir(pgdata, runtime_path) -> Path: 235 | """ Assumes server is not running. Returns a suitable directory for used as pg_ctl -o '-k ' option. 236 | Usually, this is the same directory as the pgdata directory. 237 | However, if the pgdata directory exceeds the maximum length for domain sockets on this system, 238 | a different directory will be used. 239 | """ 240 | # find a suitable directory for the domain socket 241 | # 1. pgdata. simplest approach, but can be too long for unix socket depending on the path 242 | # 2. runtime_path. This is a directory that is intended for storing runtime data. 243 | 244 | # for shared folders, use a hash of the path to avoid collisions of different folders 245 | # use a hash of the pgdata path combined with inode number to avoid collisions 246 | string_identifier = f'{pgdata}-{pgdata.stat().st_ino}' 247 | path_hash = hashlib.sha256(string_identifier.encode()).hexdigest()[:10] 248 | 249 | candidate_socket_dir = [ 250 | pgdata, 251 | runtime_path / path_hash, 252 | ] 253 | 254 | ok_path = None 255 | for path in candidate_socket_dir: 256 | path.mkdir(parents=True, exist_ok=True) 257 | # name used by postgresql for domain socket is .s.PGSQL.5432 258 | if socket_name_length_ok(path / '.s.PGSQL.5432'): 259 | ok_path = path 260 | _logger.info(f"Using socket path: {path}") 261 | break 262 | else: 263 | _logger.info(f"Socket path too long: {path}. Will try a different directory for socket.") 264 | 265 | if ok_path is None: 266 | raise RuntimeError("Could not find a suitable socket path") 267 | 268 | return ok_path 269 | 270 | def find_suitable_port(address : Optional[str] = None) -> int: 271 | """Find an available TCP port.""" 272 | if address is None: 273 | address = '127.0.0.1' 274 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 275 | sock.bind((address, 0)) 276 | port = sock.getsockname()[1] 277 | sock.close() 278 | return port 279 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orm011/pgserver/3b227607a6c94590ba7c16ae827cef9fb7e3920b/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_pgserver.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pgserver 3 | import subprocess 4 | import tempfile 5 | from typing import Optional, Union 6 | import multiprocessing as mp 7 | import shutil 8 | from pathlib import Path 9 | import pgserver.utils 10 | import socket 11 | from pgserver.utils import find_suitable_port, process_is_running 12 | import psutil 13 | import platform 14 | import sqlalchemy as sa 15 | import datetime 16 | from sqlalchemy_utils import database_exists, create_database 17 | import logging 18 | import os 19 | 20 | def _check_sqlalchemy_works(srv : pgserver.PostgresServer): 21 | database_name = 'testdb' 22 | uri = srv.get_uri(database_name) 23 | 24 | if not database_exists(uri): 25 | create_database(uri) 26 | 27 | engine = sa.create_engine(uri) 28 | conn = engine.connect() 29 | 30 | table_name = 'table_foo' 31 | with conn.begin(): 32 | # if table exists already, drop it 33 | if engine.dialect.has_table(conn, table_name): 34 | conn.execute(sa.text(f"drop table {table_name};")) 35 | conn.execute(sa.text(f"create table {table_name} (id int);")) 36 | conn.execute(sa.text(f"insert into {table_name} values (1);")) 37 | cur = conn.execute(sa.text(f"select * from {table_name};")) 38 | result = cur.fetchone() 39 | assert result 40 | assert result[0] == 1 41 | 42 | def _check_postmaster_info(pgdata : Path, postmaster_info : pgserver.utils.PostmasterInfo): 43 | assert postmaster_info is not None 44 | assert postmaster_info.pgdata is not None 45 | assert postmaster_info.pgdata == pgdata 46 | 47 | assert postmaster_info.is_running() 48 | 49 | if postmaster_info.socket_dir is not None: 50 | assert postmaster_info.socket_dir.exists() 51 | assert postmaster_info.socket_path is not None 52 | assert postmaster_info.socket_path.exists() 53 | assert postmaster_info.socket_path.is_socket() 54 | 55 | 56 | def _check_server(pg : pgserver.PostgresServer) -> int: 57 | assert pg.pgdata.exists() 58 | postmaster_info = pgserver.utils.PostmasterInfo.read_from_pgdata(pg.pgdata) 59 | assert postmaster_info is not None 60 | assert postmaster_info.pid is not None 61 | _check_postmaster_info(pg.pgdata, postmaster_info) 62 | 63 | ret = pg.psql("show data_directory;") 64 | # parse second row (first two are headers) 65 | ret_path = Path(ret.splitlines()[2].strip()) 66 | assert pg.pgdata == ret_path 67 | _check_sqlalchemy_works(pg) 68 | return postmaster_info.pid 69 | 70 | def _kill_server(pid : Union[int,psutil.Process,None]) -> None: 71 | if pid is None: 72 | return 73 | elif isinstance(pid, psutil.Process): 74 | proc = pid 75 | else: 76 | try: 77 | proc = psutil.Process(pid) 78 | except psutil.NoSuchProcess: 79 | return 80 | 81 | if proc.is_running(): 82 | proc.terminate() # attempt cleaner shutdown 83 | try: 84 | proc.wait(3) # wait at most a few seconds 85 | except psutil.TimeoutExpired: 86 | pass 87 | 88 | if proc.is_running(): 89 | proc.kill() 90 | 91 | def test_get_port(): 92 | address = '127.0.0.1' 93 | port = find_suitable_port(address) 94 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 95 | 96 | try: 97 | sock.bind((address, port)) 98 | except OSError as err: 99 | if 'Address already in use' in str(err): 100 | raise RuntimeError(f"Port {port} is already in use.") 101 | raise err 102 | finally: 103 | sock.close() 104 | 105 | def test_get_server(): 106 | with tempfile.TemporaryDirectory() as tmpdir: 107 | pid = None 108 | try: 109 | # check case when initializing the pgdata dir 110 | with pgserver.get_server(tmpdir) as pg: 111 | pid = _check_server(pg) 112 | 113 | assert not process_is_running(pid) 114 | assert pg.pgdata.exists() 115 | 116 | # check case when pgdata dir is already initialized 117 | with pgserver.get_server(tmpdir) as pg: 118 | pid = _check_server(pg) 119 | 120 | assert not process_is_running(pid) 121 | assert pg.pgdata.exists() 122 | finally: 123 | _kill_server(pid) 124 | 125 | def test_reentrant(): 126 | with tempfile.TemporaryDirectory() as tmpdir: 127 | pid = None 128 | try: 129 | with pgserver.get_server(tmpdir) as pg: 130 | pid = _check_server(pg) 131 | with pgserver.get_server(tmpdir) as pg2: 132 | assert pg2 is pg 133 | _check_server(pg) 134 | 135 | _check_server(pg) 136 | 137 | assert not process_is_running(pid) 138 | assert pg.pgdata.exists() 139 | finally: 140 | _kill_server(pid) 141 | 142 | def _start_server_in_separate_process(pgdata, queue_in : Optional[mp.Queue], queue_out : mp.Queue, cleanup_mode : Optional[str]): 143 | with pgserver.get_server(pgdata, cleanup_mode=cleanup_mode) as pg: 144 | pid = _check_server(pg) 145 | queue_out.put(pid) 146 | 147 | if queue_in is not None: 148 | _ = queue_in.get() # wait for signal 149 | return 150 | 151 | def test_unix_domain_socket(): 152 | if platform.system() == 'Windows': 153 | pytest.skip("This test is for unix domain sockets, which are not available on Windows.") 154 | 155 | long_prefix = '_'.join(['long'] + ['1234567890']*12) 156 | assert len(long_prefix) > 120 157 | prefixes = ['short', long_prefix] 158 | 159 | for prefix in prefixes: 160 | with tempfile.TemporaryDirectory(dir='/tmp/', prefix=prefix) as tmpdir: 161 | pid = None 162 | try: 163 | with pgserver.get_server(tmpdir) as pg: 164 | pid = _check_server(pg) 165 | 166 | assert not process_is_running(pid) 167 | assert pg.pgdata.exists() 168 | if len(prefix) > 120: 169 | assert str(tmpdir) not in pg.get_uri() 170 | else: 171 | assert str(tmpdir) in pg.get_uri() 172 | finally: 173 | _kill_server(pid) 174 | 175 | def test_pg_ctl(): 176 | if platform.system() != 'Windows' and os.geteuid() == 0: 177 | # on Linux root, this test would fail. 178 | # we'd need to create a user etc to run the command, which is not worth it 179 | # pgserver does this internally, but not worth it for this test 180 | pytest.skip("This test is not run as root on Linux.") 181 | 182 | with tempfile.TemporaryDirectory() as tmpdir: 183 | pid = None 184 | try: 185 | with pgserver.get_server(tmpdir) as pg: 186 | output = pgserver.pg_ctl(['status'], str(pg.pgdata)) 187 | assert 'server is running' in output.splitlines()[0] 188 | 189 | finally: 190 | _kill_server(pid) 191 | 192 | def test_stale_postmaster(): 193 | """ To simulate a stale postmaster.pid file, we create a postmaster.pid file by starting a server, 194 | back the file up, then restore the backup to the original location after killing the server. 195 | ( our method to kill the server is graceful to avoid running out of shmem, but this seems to also 196 | remove the postmaster.pid file, so we need to go to these lengths to simulate a stale postmaster.pid file ) 197 | """ 198 | if platform.system() != 'Windows' and os.geteuid() == 0: 199 | # on Linux as root, this test fails bc of permissions for the postmaster.pid file 200 | # we simply skip it in this case, as in practice, the permissions issue would not occur 201 | pytest.skip("This test is not run as root on Linux.") 202 | 203 | with tempfile.TemporaryDirectory() as tmpdir: 204 | pid = None 205 | pid2 = None 206 | 207 | try: 208 | with pgserver.get_server(tmpdir, cleanup_mode='stop') as pg: 209 | pid = _check_server(pg) 210 | pgdata = pg.pgdata 211 | postmaster_pid = pgdata / 'postmaster.pid' 212 | 213 | ## make a backup of the postmaster.pid file 214 | shutil.copy2(str(postmaster_pid), str(postmaster_pid) + '.bak') 215 | 216 | # restore the backup to gurantee a stale postmaster.pid file 217 | shutil.copy2(str(postmaster_pid) + '.bak', str(postmaster_pid)) 218 | with pgserver.get_server(tmpdir) as pg: 219 | pid2 = _check_server(pg) 220 | finally: 221 | _kill_server(pid) 222 | _kill_server(pid2) 223 | 224 | 225 | def test_cleanup_delete(): 226 | with tempfile.TemporaryDirectory() as tmpdir: 227 | pid = None 228 | try: 229 | with pgserver.get_server(tmpdir, cleanup_mode='delete') as pg: 230 | pid = _check_server(pg) 231 | 232 | assert not process_is_running(pid) 233 | assert not pg.pgdata.exists() 234 | finally: 235 | _kill_server(pid) 236 | 237 | def test_cleanup_none(): 238 | with tempfile.TemporaryDirectory() as tmpdir: 239 | pid = None 240 | try: 241 | with pgserver.get_server(tmpdir, cleanup_mode=None) as pg: 242 | pid = _check_server(pg) 243 | 244 | assert process_is_running(pid) 245 | assert pg.pgdata.exists() 246 | finally: 247 | _kill_server(pid) 248 | 249 | @pytest.fixture 250 | def tmp_postgres(): 251 | tmp_pg_data = tempfile.mkdtemp() 252 | with pgserver.get_server(tmp_pg_data, cleanup_mode='delete') as pg: 253 | yield pg 254 | 255 | def test_pgvector(tmp_postgres): 256 | ret = tmp_postgres.psql("CREATE EXTENSION vector;") 257 | assert ret.strip() == "CREATE EXTENSION" 258 | 259 | def test_start_failure_log(caplog): 260 | """ Test server log contents are shown in python log when failures 261 | """ 262 | with tempfile.TemporaryDirectory() as tmpdir: 263 | with pgserver.get_server(tmpdir) as _: 264 | pass 265 | 266 | ## now delete some files to make it fail 267 | for f in Path(tmpdir).glob('**/postgresql.conf'): 268 | f.unlink() 269 | 270 | with pytest.raises(subprocess.CalledProcessError): 271 | with pgserver.get_server(tmpdir) as _: 272 | pass 273 | 274 | assert 'postgres: could not access the server configuration file' in caplog.text 275 | 276 | 277 | def test_no_conflict(): 278 | """ test we can start pgservers on two different datadirs with no conflict (eg port conflict) 279 | """ 280 | pid1 = None 281 | pid2 = None 282 | try: 283 | with tempfile.TemporaryDirectory() as tmpdir1, tempfile.TemporaryDirectory() as tmpdir2: 284 | with pgserver.get_server(tmpdir1) as pg1, pgserver.get_server(tmpdir2) as pg2: 285 | pid1 = _check_server(pg1) 286 | pid2 = _check_server(pg2) 287 | finally: 288 | _kill_server(pid1) 289 | _kill_server(pid2) 290 | 291 | 292 | def _reuse_deleted_datadir(prefix: str): 293 | """ test common scenario where we repeatedly delete the datadir and start a new server on it """ 294 | """ NB: currently this test is not reproducing the problem """ 295 | # one can reproduce the problem by running the following in a loop: 296 | # python -c 'import pixeltable as pxt; pxt.Client()'; rm -rf ~/.pixeltable/; python -c 'import pixeltable as pxt; pxt.Client()' 297 | # which creates a database with more contents etc 298 | tmpdir = tempfile.mkdtemp(prefix=prefix) 299 | pgdata = Path(tmpdir) / 'pgdata' 300 | server_processes = [] 301 | shmem_ids = [] 302 | 303 | num_tries = 3 304 | try: 305 | for _ in range(num_tries): 306 | assert not pgdata.exists() 307 | 308 | queue_from_child = mp.Queue() 309 | child = mp.Process(target=_start_server_in_separate_process, args=(pgdata, None, queue_from_child, None)) 310 | child.start() 311 | # wait for child to start server 312 | curr_pid = queue_from_child.get() 313 | child.join() 314 | server_proc = psutil.Process(curr_pid) 315 | assert server_proc.is_running() 316 | server_processes.append(server_proc) 317 | postmaster = pgserver.utils.PostmasterInfo.read_from_pgdata(pgdata) 318 | 319 | if postmaster.shmget_id is not None: 320 | shmem_ids.append(postmaster.shmget_id) 321 | 322 | if platform.system() == 'Windows': 323 | # windows will not allow deletion of the directory while the server is running 324 | _kill_server(server_proc) 325 | 326 | shutil.rmtree(pgdata) 327 | finally: 328 | if platform.system() != 'Windows': 329 | # if sysv_ipc is installed (eg locally), remove the shared memory segment 330 | # done this way because of CI/CD issues with sysv_ipc 331 | # this avoids having to restart the machine to clear the shared memory 332 | try: 333 | import sysv_ipc 334 | do_shmem_cleanup = True 335 | except ImportError: 336 | do_shmem_cleanup = False 337 | logging.warning("sysv_ipc not installed, skipping shared memory cleanup...") 338 | 339 | if do_shmem_cleanup: 340 | for shmid in shmem_ids: 341 | try: 342 | sysv_ipc.remove_shared_memory(shmid) 343 | except sysv_ipc.ExistentialError as e: 344 | logging.info(f"shared memory already removed: {e}") 345 | 346 | for proc in server_processes: 347 | _kill_server(proc) 348 | 349 | shutil.rmtree(tmpdir) 350 | 351 | def test_reuse_deleted_datadir_short(): 352 | """ test that new server starts normally on same datadir after datadir is deleted 353 | """ 354 | _reuse_deleted_datadir('short_prefix') 355 | 356 | def test_reuse_deleted_datadir_long(): 357 | """ test that new server starts normally on same datadir after datadir is deleted 358 | """ 359 | long_prefix = '_'.join(['long_prefix'] + ['1234567890']*12) 360 | assert len(long_prefix) > 120 361 | _reuse_deleted_datadir(long_prefix) 362 | 363 | def test_multiprocess_shared(): 364 | """ Test that multiple processes can share the same server. 365 | 366 | 1. get server in a child process, 367 | 2. then, get server in the parent process 368 | 3. then, exiting the child process 369 | 4. checking the parent can still use the server. 370 | """ 371 | pid = None 372 | try: 373 | with tempfile.TemporaryDirectory() as tmpdir: 374 | queue_to_child = mp.Queue() 375 | queue_from_child = mp.Queue() 376 | child = mp.Process(target=_start_server_in_separate_process, args=(tmpdir,queue_to_child,queue_from_child, 'stop')) 377 | child.start() 378 | # wait for child to start server 379 | server_pid_child = queue_from_child.get() 380 | 381 | with pgserver.get_server(tmpdir) as pg: 382 | server_pid_parent = _check_server(pg) 383 | assert server_pid_child == server_pid_parent 384 | 385 | # tell child to continue 386 | queue_to_child.put(None) 387 | child.join() 388 | 389 | # check server still works 390 | _check_server(pg) 391 | 392 | assert not process_is_running(server_pid_parent) 393 | finally: 394 | _kill_server(pid) --------------------------------------------------------------------------------