├── neo4j └── .keep ├── testkit ├── .dockerignore ├── testkit.json ├── stress.py ├── integration.py ├── backend.py ├── unittests.py ├── build.py ├── _common.py └── Dockerfile ├── tests ├── requirements.txt ├── __init__.py ├── codec │ ├── __init__.py │ └── packstream │ │ ├── __init__.py │ │ ├── v1 │ │ ├── __init__.py │ │ ├── from_driver │ │ │ ├── __init__.py │ │ │ └── test_packstream.py │ │ └── test_injection.py │ │ ├── from_driver │ │ ├── __init__.py │ │ └── test_structure.py │ │ └── test_structure.py ├── benchmarks │ ├── __init__.py │ ├── test_vector_benchmarks.py │ └── test_macro_benchmarks.py ├── vector │ ├── __init__.py │ ├── from_driver │ │ ├── __init__.py │ │ ├── test_import_vector.py │ │ └── test_vector.py │ └── test_injection.py └── conftest.py ├── changelog.d ├── 75.feature.md ├── README.md └── template.md ├── .semgrepignore ├── .gitmodules ├── .editorconfig ├── Cargo.toml ├── tox.ini ├── LICENSE.txt ├── .gitignore ├── bin ├── check_version.sh └── target_driver.sh ├── src ├── codec.rs ├── lib.rs ├── codec │ ├── packstream │ │ ├── v1.rs │ │ └── v1 │ │ │ ├── unpack.rs │ │ │ └── pack.rs │ └── packstream.rs └── vector.rs ├── LICENSE.MIT.txt ├── .pre-commit-config.yaml ├── README.md ├── Cargo.lock ├── CONTRIBUTING.md ├── pyproject.toml ├── CHANGELOG.md └── LICENSE.APACHE2.txt /neo4j/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /testkit/.dockerignore: -------------------------------------------------------------------------------- 1 | *.py 2 | *.json 3 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | numpy 3 | pandas 4 | pyarrow 5 | -------------------------------------------------------------------------------- /changelog.d/75.feature.md: -------------------------------------------------------------------------------- 1 | Add support for Python 3.14 . 2 | -------------------------------------------------------------------------------- /.semgrepignore: -------------------------------------------------------------------------------- 1 | :include .gitignore 2 | 3 | /driver/ 4 | /testkit/ 5 | /tests/ 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "driver"] 2 | path = driver 3 | url = https://github.com/neo4j/neo4j-python-driver 4 | -------------------------------------------------------------------------------- /testkit/testkit.json: -------------------------------------------------------------------------------- 1 | { 2 | "testkit": { 3 | "uri": "https://github.com/neo4j-drivers/testkit.git", 4 | "ref": "6.x" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # top-most EditorConfig file 2 | root = true 3 | 4 | # Unix-style newlines with a newline ending every file 5 | [*] 6 | end_of_line = lf 7 | insert_final_newline = true 8 | charset = utf-8 9 | 10 | [*.sh] 11 | indent_style = space 12 | indent_size = 4 13 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "neo4j-rust-ext" 3 | version = "0.1.0" 4 | edition = "2021" 5 | rust-version = "1.77" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | [lib] 9 | name = "neo4j_rust_ext" 10 | crate-type = ["cdylib"] 11 | 12 | [dependencies] 13 | pyo3 = "0.27.1" 14 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{310,311,312,313,314}-{test}-{releasedriver,devdriver} 3 | 4 | [testenv] 5 | dependency_groups = 6 | test 7 | extras = 8 | numpy 9 | pandas 10 | pyarrow 11 | commands_pre = 12 | devdriver: python -m pip install ./driver --no-deps 13 | commands = 14 | test: python -m pytest -v --benchmark-skip {posargs} tests 15 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Unless stated otherwise, this software is distributed under the terms of the Apache License 2.0. 2 | See the LICENSE.APACHE2.txt file for the full license text. 3 | 4 | Parts of this software is distributed under the terms of the MIT License. 5 | See the LICENSE.MIT.txt file for the full license text. 6 | The pieces of code covered by the MIT License are marked as such. 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | 3 | *~ 4 | *.py[cod] 5 | __pycache__/ 6 | .pytest_cache/ 7 | *.lprof 8 | *.class 9 | 10 | *.so 11 | 12 | .DS_Store 13 | 14 | .Python 15 | .venv/ 16 | .tox/ 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | include/ 28 | man/ 29 | venv/ 30 | /.venv*/ 31 | /venv*/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | .benchmarks 36 | 37 | # PyCharm 38 | .idea/ 39 | 40 | # VSCode 41 | .vscode/ 42 | 43 | testkit/CAs 44 | testkit/CustomCAs 45 | -------------------------------------------------------------------------------- /testkit/stress.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /bin/check_version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | version="$1"; shift 5 | version_matches=$(grep -o --perl-regexp '(?m)(?. 19 | " "+.feature.md" 20 | echo "=== Please rename the changelog file to match the PR number. ===" 21 | -------------------------------------------------------------------------------- /testkit/backend.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from pathlib import Path 18 | 19 | from _common import run_python 20 | 21 | 22 | if __name__ == "__main__": 23 | driver_path = (Path(__file__).parents[1] / "driver").absolute() 24 | run_python(["-m", "testkitbackend"], cwd=driver_path) 25 | -------------------------------------------------------------------------------- /testkit/unittests.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from _common import ( 18 | run_python, 19 | TEST_LOCAL_DRIVER, 20 | ) 21 | 22 | 23 | if __name__ == "__main__": 24 | driver_env = "devdriver" if TEST_LOCAL_DRIVER else "releasedriver" 25 | run_python(["-m", "tox", "-vv", "-f", driver_env, "test"]) 26 | -------------------------------------------------------------------------------- /changelog.d/README.md: -------------------------------------------------------------------------------- 1 | To create a new entry, you can run: 2 | ```bash 3 | towncrier create "." 4 | ``` 5 | This will create a new file in the `changelog.d/` directory for you to fill in. 6 | If there is no issue or PR number, you can use `+.` instead. 7 | The `` determines how the entry will be grouped in the changelog. 8 | For available types, see the `[[tool.towncrier.type]]` entries in the `pyproject.toml` file. 9 | 10 | You can include `` in the entry to determine where the list of PR/issue links goes. 11 | Usually, the entries will look like this: 12 | 13 | ```markdown 14 | Some summary of the change. 15 | ``` 16 | 17 | or if there's more to say (note the trailing spaces after the first line): 18 | ```markdown 19 | Some summary of the change. 20 | Some more details. Feel free to use markdown features like 21 | * lists 22 | * more lists 23 | * nested lists 24 | * [links](https://example.com) 25 | * etc. 26 | ``` 27 | -------------------------------------------------------------------------------- /src/codec.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) "Neo4j" 2 | // Neo4j Sweden AB [https://neo4j.com] 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // https://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | mod packstream; 17 | 18 | use pyo3::prelude::*; 19 | 20 | use crate::register_package; 21 | 22 | pub(super) fn init_module(m: &Bound, name: &str) -> PyResult<()> { 23 | let py = m.py(); 24 | 25 | m.gil_used(false)?; 26 | register_package(m, name)?; 27 | 28 | let mod_packstream = PyModule::new(py, "packstream")?; 29 | m.add_submodule(&mod_packstream)?; 30 | packstream::init_module(&mod_packstream, format!("{name}.packstream").as_str())?; 31 | 32 | Ok(()) 33 | } 34 | -------------------------------------------------------------------------------- /tests/vector/test_injection.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import neo4j.vector 18 | 19 | 20 | def test_endian_swap_was_imported(): 21 | swap = neo4j.vector._swap_endian_unchecked_rust 22 | assert swap is not None 23 | assert swap is neo4j._rust.vector.swap_endian 24 | assert neo4j.vector._swap_endian_unchecked is swap 25 | 26 | 27 | def test_endian_swap_was_injected(mocker): 28 | mock = mocker.patch("neo4j.vector._swap_endian_unchecked") 29 | neo4j.vector._swap_endian(2, b"\x01\x02\x03\x04") 30 | mock.assert_called_once_with(2, b"\x01\x02\x03\x04") 31 | -------------------------------------------------------------------------------- /LICENSE.MIT.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2025, Neo4j Sweden AB [https://neo4j.com] 2 | Copyright (c) 2015, Amber Brown and the towncrier contributors 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining 5 | a copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /testkit/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from _common import ( 18 | run_python, 19 | TEST_LOCAL_DRIVER, 20 | ) 21 | 22 | 23 | if __name__ == "__main__": 24 | run_python(["-m", "pip", "install", "-U", "pip"]) 25 | run_python(["-m", "pip", "install", "-U", "--group", "packaging"]) 26 | run_python(["-m", "build", "."]) 27 | run_python( 28 | [ 29 | "-m", 30 | "pip", 31 | "install", 32 | "-U", 33 | "--group", 34 | "driver/pyproject.toml:testkit", 35 | "-e", 36 | ".", 37 | ] 38 | ) 39 | if TEST_LOCAL_DRIVER: 40 | run_python(["-m", "pip", "install", "./driver", "--no-deps"]) 41 | -------------------------------------------------------------------------------- /tests/benchmarks/test_vector_benchmarks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from __future__ import annotations 18 | 19 | import pytest 20 | 21 | from ..vector.from_driver.test_vector import ( 22 | _mock_mask_extensions, 23 | _swap_endian, 24 | ) 25 | 26 | 27 | @pytest.mark.parametrize("ext", ("numpy", "rust", "python")) 28 | @pytest.mark.parametrize("type_size", (2, 4, 8)) 29 | @pytest.mark.parametrize("length", (1, 100_000)) 30 | def test_bench_swap_endian(benchmark, mocker, ext, type_size, length): 31 | data = bytes(i % 256 for i in range(8 * length)) 32 | _mock_mask_extensions(mocker, ext) 33 | rounds = max(min(1_000_000 // length, 100_000), 100) 34 | 35 | benchmark.pedantic(lambda: _swap_endian(type_size, data), rounds=rounds) 36 | -------------------------------------------------------------------------------- /testkit/_common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import os 18 | import subprocess 19 | import sys 20 | 21 | 22 | _TRUE_ENV_VALS = {"1", "y", "yes", "true", "t", "on"} 23 | 24 | 25 | TEST_BACKEND_VERSION = os.getenv("TEST_BACKEND_VERSION", "python") 26 | TEST_LOCAL_DRIVER = ( 27 | os.environ.get("TEST_LOCAL_DRIVER", "").lower() in _TRUE_ENV_VALS 28 | ) 29 | 30 | 31 | def run(args, env=None, **kwargs): 32 | print(args) 33 | return subprocess.run( 34 | args, 35 | text=True, 36 | stdout=sys.stdout, 37 | stderr=sys.stderr, 38 | check=True, 39 | env=env, 40 | **kwargs, 41 | ) 42 | 43 | 44 | def run_python(args, env=None, **kwargs): 45 | cmd = [TEST_BACKEND_VERSION, "-u", *args] 46 | run(cmd, env=env, **kwargs) 47 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | exclude: ^(driver/.*|tests/(.*/)?from_driver/.*)$ 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.1.0 7 | hooks: 8 | - id: check-byte-order-marker 9 | - id: check-case-conflict 10 | - id: check-executables-have-shebangs 11 | - id: check-shebang-scripts-are-executable 12 | - id: check-merge-conflict 13 | - id: check-symlinks 14 | - id: destroyed-symlinks 15 | - id: end-of-file-fixer 16 | - id: mixed-line-ending 17 | args: [ --fix=lf ] 18 | exclude_types: 19 | - batch 20 | - id: trailing-whitespace 21 | args: [ --markdown-linebreak-ext=md ] 22 | - repo: local 23 | hooks: 24 | - id: cargo-fmt 25 | name: cargo fmt 26 | entry: cargo fmt 27 | language: system 28 | types: [rust] 29 | pass_filenames: false 30 | - repo: local 31 | hooks: 32 | - id: cargo-clippy 33 | name: cargo clippy 34 | entry: cargo clippy --all-features --tests -- -D warnings 35 | language: system 36 | types: [rust] 37 | pass_filenames: false 38 | - repo: local 39 | hooks: 40 | - id: isort 41 | name: isort 42 | entry: isort 43 | types_or: [ python, pyi ] 44 | language: system 45 | - repo: https://github.com/astral-sh/ruff-pre-commit 46 | rev: v0.6.4 47 | hooks: 48 | - id: ruff-format 49 | - id: ruff 50 | args: [ --fix ] 51 | - id: ruff-format 52 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) "Neo4j" 2 | // Neo4j Sweden AB [https://neo4j.com] 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // https://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | mod codec; 17 | mod vector; 18 | 19 | use pyo3::prelude::*; 20 | 21 | #[pymodule(gil_used = false)] 22 | #[pyo3(name = "_rust")] 23 | fn init_module(m: &Bound) -> PyResult<()> { 24 | let py = m.py(); 25 | 26 | let mod_codec = PyModule::new(py, "codec")?; 27 | m.add_submodule(&mod_codec)?; 28 | codec::init_module(&mod_codec, "codec")?; 29 | 30 | let mod_vector = PyModule::new(py, "vector")?; 31 | m.add_submodule(&mod_vector)?; 32 | vector::init_module(&mod_vector, "vector")?; 33 | 34 | Ok(()) 35 | } 36 | 37 | // hack to make python pick up the submodule as a package 38 | // https://github.com/PyO3/pyo3/issues/1517#issuecomment-808664021 39 | fn register_package(m: &Bound, name: &str) -> PyResult<()> { 40 | let py = m.py(); 41 | let module_name = format!("neo4j._rust.{name}").into_pyobject(py)?; 42 | 43 | py.import("sys")? 44 | .getattr("modules")? 45 | .set_item(&module_name, m)?; 46 | m.setattr("__name__", &module_name)?; 47 | 48 | Ok(()) 49 | } 50 | -------------------------------------------------------------------------------- /tests/codec/packstream/test_structure.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from __future__ import annotations 18 | 19 | import gc 20 | from contextlib import contextmanager 21 | 22 | from neo4j._codec.packstream import Structure 23 | 24 | 25 | @contextmanager 26 | def gc_disabled(): 27 | try: 28 | gc.disable() 29 | yield 30 | finally: 31 | gc.enable() 32 | gc.collect() 33 | 34 | 35 | class StructureHolder: 36 | s: Structure | None = None 37 | 38 | 39 | def test_memory_leak() -> None: 40 | iterations = 10_000 41 | 42 | gc.collect() 43 | with gc_disabled(): 44 | for _ in range(iterations): 45 | # create a reference cycle 46 | holder1 = StructureHolder() 47 | structure1 = Structure(b"\x00", [holder1]) 48 | holder2 = StructureHolder() 49 | structure2 = Structure(b"\x01", [holder2]) 50 | holder1.s = structure2 51 | holder2.s = structure1 52 | del structure1, structure2, holder1, holder2 53 | 54 | cleaned = gc.collect() 55 | assert cleaned >= 4 * iterations 56 | -------------------------------------------------------------------------------- /src/codec/packstream/v1.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) "Neo4j" 2 | // Neo4j Sweden AB [https://neo4j.com] 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // https://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | mod pack; 17 | mod unpack; 18 | 19 | use pyo3::prelude::*; 20 | use pyo3::wrap_pyfunction; 21 | 22 | use crate::register_package; 23 | 24 | const TINY_STRING: u8 = 0x80; 25 | const TINY_LIST: u8 = 0x90; 26 | const TINY_MAP: u8 = 0xA0; 27 | const TINY_STRUCT: u8 = 0xB0; 28 | const NULL: u8 = 0xC0; 29 | const FALSE: u8 = 0xC2; 30 | const TRUE: u8 = 0xC3; 31 | const INT_8: u8 = 0xC8; 32 | const INT_16: u8 = 0xC9; 33 | const INT_32: u8 = 0xCA; 34 | const INT_64: u8 = 0xCB; 35 | const FLOAT_64: u8 = 0xC1; 36 | const STRING_8: u8 = 0xD0; 37 | const STRING_16: u8 = 0xD1; 38 | const STRING_32: u8 = 0xD2; 39 | const LIST_8: u8 = 0xD4; 40 | const LIST_16: u8 = 0xD5; 41 | const LIST_32: u8 = 0xD6; 42 | const MAP_8: u8 = 0xD8; 43 | const MAP_16: u8 = 0xD9; 44 | const MAP_32: u8 = 0xDA; 45 | const BYTES_8: u8 = 0xCC; 46 | const BYTES_16: u8 = 0xCD; 47 | const BYTES_32: u8 = 0xCE; 48 | 49 | pub(crate) fn init_module(m: &Bound, name: &str) -> PyResult<()> { 50 | m.gil_used(false)?; 51 | register_package(m, name)?; 52 | 53 | m.add_function(wrap_pyfunction!(unpack::unpack, m)?)?; 54 | m.add_function(wrap_pyfunction!(pack::pack, m)?)?; 55 | 56 | Ok(()) 57 | } 58 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from collections import defaultdict 18 | 19 | import pytest 20 | 21 | 22 | @pytest.hookimpl(wrapper=True) 23 | def pytest_benchmark_group_stats(config, benchmarks, group_by): 24 | outcome = yield 25 | 26 | if group_by != "group": 27 | # not default grouping, so let the user have what they asked for 28 | return outcome 29 | 30 | result = defaultdict(list) 31 | for bench in benchmarks: 32 | param_start = bench["fullname"].rfind("[") 33 | if param_start < 0: 34 | base_name = bench["fullname"] 35 | else: 36 | base_name = bench["fullname"][:param_start] 37 | params = bench.get("params", None) 38 | if params is None: 39 | result[base_name].append(bench) 40 | continue 41 | ext = params.get("ext", None) 42 | if ext is None: 43 | result[base_name].append(bench) 44 | continue 45 | param_keys = sorted(params.keys()) 46 | name_params = "-".join( 47 | str(params[k]) for k in param_keys if k != "ext" 48 | ) 49 | group_name = f"{base_name}[{name_params}]" 50 | result[group_name].append(bench) 51 | 52 | return result.items() 53 | -------------------------------------------------------------------------------- /tests/benchmarks/test_macro_benchmarks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import neo4j 18 | 19 | 20 | URL = "neo4j://localhost:7687" 21 | AUTH = ("neo4j", "pass") 22 | 23 | 24 | def test_little_data(benchmark): 25 | def work(): 26 | driver.execute_query("RETURN 1 AS n") 27 | 28 | with neo4j.GraphDatabase.driver(URL, auth=AUTH) as driver: 29 | driver.verify_connectivity() 30 | benchmark.pedantic(work, rounds=5000) 31 | 32 | 33 | def test_import(benchmark): 34 | def work(): 35 | driver.execute_query("RETURN 1 AS n", param=data) 36 | 37 | data = [ 38 | *range(1000), 39 | *( 40 | { 41 | "name": f"Person {i}", 42 | "age": i, 43 | } 44 | for i in range(1000) 45 | ), 46 | f"L{'o' * 10000}ng string", 47 | ] 48 | 49 | with neo4j.GraphDatabase.driver(URL, auth=AUTH) as driver: 50 | driver.verify_connectivity() 51 | benchmark.pedantic(work, rounds=1000) 52 | 53 | 54 | def test_export_single_record(benchmark): 55 | def work(): 56 | driver.execute_query("RETURN [x IN range(0, 100000)] AS x") 57 | 58 | with neo4j.GraphDatabase.driver(URL, auth=AUTH) as driver: 59 | driver.verify_connectivity() 60 | benchmark.pedantic(work, rounds=300) 61 | 62 | 63 | def test_export_many_records(benchmark): 64 | def work(): 65 | driver.execute_query("UNWIND range(0, 1000) AS x RETURN x") 66 | 67 | with neo4j.GraphDatabase.driver(URL, auth=AUTH) as driver: 68 | driver.verify_connectivity() 69 | benchmark.pedantic(work, rounds=150) 70 | -------------------------------------------------------------------------------- /tests/vector/from_driver/test_import_vector.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import importlib 18 | 19 | import pytest 20 | 21 | 22 | MODULE_PATH = "neo4j.vector" 23 | VECTOR_ATTRIBUTES = ( 24 | # (name, warning) 25 | ("Vector", None), 26 | ("VectorDType", None), 27 | ("VectorEndian", None), 28 | ) 29 | 30 | 31 | def _get_module(): 32 | module = importlib.__import__(MODULE_PATH) 33 | for submodule in MODULE_PATH.split(".")[1:]: 34 | module = getattr(module, submodule) 35 | return module 36 | 37 | 38 | @pytest.mark.parametrize(("name", "warning"), VECTOR_ATTRIBUTES) 39 | def test_attribute_import(name, warning): 40 | module = _get_module() 41 | if warning: 42 | with pytest.warns(warning): 43 | getattr(module, name) 44 | else: 45 | getattr(module, name) 46 | 47 | 48 | @pytest.mark.parametrize(("name", "warning"), VECTOR_ATTRIBUTES) 49 | def test_attribute_from_import(name, warning): 50 | if warning: 51 | with pytest.warns(warning): 52 | importlib.__import__(MODULE_PATH, fromlist=(name,)) 53 | else: 54 | importlib.__import__(MODULE_PATH, fromlist=(name,)) 55 | 56 | 57 | def test_all(): 58 | module = _get_module() 59 | 60 | assert sorted(module.__all__) == sorted([i[0] for i in VECTOR_ATTRIBUTES]) 61 | 62 | 63 | def test_dir(): 64 | module = _get_module() 65 | 66 | dir_attrs = (attr for attr in dir(module) if not attr.startswith("_")) 67 | assert sorted(dir_attrs) == sorted([i[0] for i in VECTOR_ATTRIBUTES]) 68 | 69 | 70 | def test_import_star(): 71 | # ignore PT029: purposefully capturing all warnings to then apply further 72 | # checks on them 73 | importlib.__import__(MODULE_PATH, fromlist=("*",)) 74 | -------------------------------------------------------------------------------- /testkit/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN apt-get update && \ 5 | apt-get install -y locales && \ 6 | apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ 7 | localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 \ 8 | && rm -rf /var/lib/apt/lists/* 9 | ENV LANG=en_US.UTF-8 10 | 11 | # Using apt-get update alone in a RUN statement causes caching issues and subsequent apt-get install instructions fail. 12 | RUN apt-get --quiet update && apt-get --quiet install -y \ 13 | software-properties-common \ 14 | bash \ 15 | python3 \ 16 | python3-pip \ 17 | git \ 18 | curl \ 19 | tar \ 20 | wget \ 21 | && rm -rf /var/lib/apt/lists/* 22 | 23 | # Install Build Tools 24 | RUN apt-get update && \ 25 | apt-get install -y --no-install-recommends \ 26 | make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev \ 27 | libsqlite3-dev wget curl llvm libncurses5-dev xz-utils tk-dev \ 28 | libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \ 29 | ca-certificates && \ 30 | apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 31 | 32 | # Install our own CAs on the image. 33 | # Assumes Linux Debian based image. 34 | COPY CAs/* /usr/local/share/ca-certificates/ 35 | # Store custom CAs somewhere where the backend can find them later. 36 | COPY CustomCAs/* /usr/local/share/custom-ca-certificates/ 37 | RUN update-ca-certificates 38 | 39 | # Install pyenv 40 | RUN git clone https://github.com/pyenv/pyenv.git .pyenv 41 | ENV PYENV_ROOT=/.pyenv 42 | ENV PATH="$PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH" 43 | 44 | # Setup python version 45 | ENV PYTHON_VERSIONS="3.14 3.13 3.12 3.11 3.10" 46 | 47 | RUN for version in $PYTHON_VERSIONS; do \ 48 | pyenv install $version; \ 49 | done 50 | RUN pyenv rehash 51 | RUN pyenv global $(pyenv versions --bare --skip-aliases | sort --version-sort --reverse) 52 | 53 | # Install Latest pip for each environment 54 | # + tox and tools for starting the tests 55 | # https://pip.pypa.io/en/stable/news/ 56 | RUN for version in $PYTHON_VERSIONS; do \ 57 | python$version -m pip install -U pip && \ 58 | python$version -m pip install -U coverage tox; \ 59 | done 60 | 61 | # Install Rust toolchain 62 | RUN curl https://sh.rustup.rs -sSf | bash -s -- -y 63 | ENV PATH="/root/.cargo/bin:${PATH}" 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rust Extensions for a Faster Neo4j Bolt Driver for Python 2 | 3 | This project contains Rust extensions to speed up the [official Python driver for Neo4j](https://github.com/neo4j/neo4j-python-driver). 4 | 5 | The exact speedup depends on the use-case but has been measured to be up to 10x faster. 6 | Use-cases moving only few but big records out of the DBMS tend to benefit the most. 7 | 8 | 9 | ## Installation 10 | Adjust your dependencies (`requirements.txt`, `pyproject.toml` or similar) like so: 11 | ``` 12 | # remove: 13 | # neo4j == X.Y.Z # needs to be at least 5.14.1 for a matching Rust extension to exist 14 | # add: 15 | neo4j-rust-ext == X.Y.Z.* 16 | ``` 17 | 18 | I.e., install the same version of `neo4j-rust-ext` as you would install of `neo4j` (except for the last segment which is used for patches of this library). 19 | That's it! 20 | You don't have to change your code but can use the driver as you normally would. 21 | This package will install the driver as its dependency and then inject itself in a place where the driver can find it and pick it up. 22 | 23 | N.B., since the driver is a simple Python dependency of this package, you can also manually install/specify both packages at the same time without issues. 24 | However, make sure the versions match if you do so or leave the version of one of the two unspecified to let the package manager pick a compatible version for you (resolution might be slow, however). 25 | 26 | If you experience issues with the driver, consider troubleshooting without the Rust extension first. 27 | For that, simply make sure you haven't installed `neo4j-rust-ext` but *only* `neo4j`. 28 | 29 | > [!NOTE] 30 | > For pre-release versions of the driver (alpha, beta, etc.), the version scheme is slightly different: 31 | > `neo4j == X.Y.ZaA` (for alpha - `a` is not a variable number but a literal) needs to be replaced with `neo4j-rust-ext == X.Y.Z.NaA` where `N` needs to be explicitly specified and cannot be `*`. 32 | > This is a limitation of allowed version identifiers in the Python packaging ecosystem. 33 | 34 | 35 | ## Requirements 36 | For many operating systems and architectures, the pre-built wheels will work out of the box. 37 | If they don't, pip (or any other Python packaging front-end) will try to build the extension from source. 38 | Here's what you'll need for this: 39 | * Rust 1.77 or later: 40 | https://www.rust-lang.org/tools/install 41 | * Further build tools (depending on the platform). 42 | E.g., `gcc` on Ubuntu: `sudo apt install gcc` 43 | -------------------------------------------------------------------------------- /src/vector.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) "Neo4j" 2 | // Neo4j Sweden AB [https://neo4j.com] 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // https://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use pyo3::exceptions::PyValueError; 17 | use pyo3::prelude::*; 18 | use pyo3::types::{PyBytes, PyInt}; 19 | use pyo3::{pyfunction, Bound, PyErr, PyResult}; 20 | 21 | use crate::register_package; 22 | 23 | #[pyfunction] 24 | fn swap_endian<'py>( 25 | type_size: Bound<'py, PyInt>, 26 | data: Bound<'py, PyBytes>, 27 | ) -> PyResult> { 28 | let py = type_size.py(); 29 | 30 | let type_size: usize = match type_size.extract::() { 31 | Ok(type_size @ 2) | Ok(type_size @ 4) | Ok(type_size @ 8) => type_size, 32 | _ => { 33 | return Err(PyErr::new::(format!( 34 | "Unsupported type size {type_size}", 35 | ))) 36 | } 37 | }; 38 | let bytes = &data.as_bytes(); 39 | let len = bytes.len(); 40 | if len % type_size != 0 { 41 | return Err(PyErr::new::(format!( 42 | "Data length {len} not a multiple of type_size {type_size}", 43 | ))); 44 | } 45 | 46 | PyBytes::new_with(py, bytes.len(), |out| { 47 | match type_size { 48 | 2 => swap_n::<2>(bytes, out), 49 | 4 => swap_n::<4>(bytes, out), 50 | 8 => swap_n::<8>(bytes, out), 51 | _ => unreachable!(), 52 | } 53 | Ok(()) 54 | }) 55 | } 56 | 57 | #[inline(always)] 58 | fn swap_n(src: &[u8], dst: &mut [u8]) { 59 | // Doesn't technically need to be a function with a const generic, but this 60 | // allows the compiler to optimize the code better. 61 | assert_eq!(src.len(), dst.len()); 62 | assert_eq!(src.len() % N, 0); 63 | for i in (0..src.len()).step_by(N) { 64 | for j in 0..N { 65 | dst[i + j] = src[i + N - j - 1]; 66 | } 67 | } 68 | } 69 | 70 | pub(super) fn init_module(m: &Bound, name: &str) -> PyResult<()> { 71 | m.gil_used(false)?; 72 | register_package(m, name)?; 73 | 74 | m.add_function(wrap_pyfunction!(swap_endian, m)?)?; 75 | 76 | Ok(()) 77 | } 78 | -------------------------------------------------------------------------------- /src/codec/packstream.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) "Neo4j" 2 | // Neo4j Sweden AB [https://neo4j.com] 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // https://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | mod v1; 17 | 18 | use pyo3::basic::CompareOp; 19 | use pyo3::exceptions::{PyIndexError, PyValueError}; 20 | use pyo3::prelude::*; 21 | use pyo3::types::PyBytes; 22 | use pyo3::{IntoPyObjectExt, PyTraverseError, PyVisit}; 23 | 24 | use crate::register_package; 25 | 26 | pub(super) fn init_module(m: &Bound, name: &str) -> PyResult<()> { 27 | let py = m.py(); 28 | 29 | m.gil_used(false)?; 30 | register_package(m, name)?; 31 | 32 | let mod_v1 = PyModule::new(py, "v1")?; 33 | m.add_submodule(&mod_v1)?; 34 | v1::init_module(&mod_v1, format!("{name}.v1").as_str())?; 35 | 36 | m.add_class::()?; 37 | 38 | Ok(()) 39 | } 40 | 41 | #[pyclass] 42 | #[derive(Debug)] 43 | pub struct Structure { 44 | tag: u8, 45 | #[pyo3(get)] 46 | fields: Vec>, 47 | } 48 | 49 | impl Structure { 50 | fn eq(&self, other: &Self, py: Python<'_>) -> PyResult { 51 | if self.tag != other.tag || self.fields.len() != other.fields.len() { 52 | return Ok(false); 53 | } 54 | for (a, b) in self 55 | .fields 56 | .iter() 57 | .map(|e| e.bind(py)) 58 | .zip(other.fields.iter().map(|e| e.bind(py))) 59 | { 60 | if !a.eq(b)? { 61 | return Ok(false); 62 | } 63 | } 64 | Ok(true) 65 | } 66 | 67 | fn compute_index(&self, index: isize) -> PyResult { 68 | Ok(if index < 0 { 69 | self.fields 70 | .len() 71 | .checked_sub(-index as usize) 72 | .ok_or_else(|| PyErr::new::("field index out of range"))? 73 | } else { 74 | let index = index as usize; 75 | if index >= self.fields.len() { 76 | return Err(PyErr::new::("field index out of range")); 77 | } 78 | index 79 | }) 80 | } 81 | } 82 | 83 | #[pymethods] 84 | impl Structure { 85 | #[new] 86 | #[pyo3(signature = (tag, *fields))] 87 | #[pyo3(text_signature = "(tag, *fields)")] 88 | fn new(tag: &[u8], fields: Vec>) -> PyResult { 89 | if tag.len() != 1 { 90 | return Err(PyErr::new::("tag must be a single byte")); 91 | } 92 | let tag = tag[0]; 93 | Ok(Self { tag, fields }) 94 | } 95 | 96 | #[getter(tag)] 97 | fn read_tag<'py>(&self, py: Python<'py>) -> Bound<'py, PyBytes> { 98 | PyBytes::new(py, &[self.tag]) 99 | } 100 | 101 | fn __repr__(&self, py: Python<'_>) -> PyResult { 102 | let mut args = format!(r"b'{}'", self.tag as char); 103 | self.fields.iter().try_for_each(|field| { 104 | let repr = field.bind(py).repr()?; 105 | args.push_str(", "); 106 | args.push_str(&repr.to_cow()?); 107 | Ok::<_, PyErr>(()) 108 | })?; 109 | Ok(format!("Structure({args})")) 110 | } 111 | 112 | fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> PyResult> { 113 | Ok(match op { 114 | CompareOp::Eq => self.eq(other, py)?.into_py_any(py)?, 115 | CompareOp::Ne => (!self.eq(other, py)?).into_py_any(py)?, 116 | _ => py.NotImplemented(), 117 | }) 118 | } 119 | 120 | fn __len__(&self) -> usize { 121 | self.fields.len() 122 | } 123 | 124 | fn __getitem__(&self, index: isize, py: Python<'_>) -> PyResult> { 125 | Ok(self.fields[self.compute_index(index)?].clone_ref(py)) 126 | } 127 | 128 | fn __setitem__(&mut self, index: isize, value: Py) -> PyResult<()> { 129 | let index = self.compute_index(index)?; 130 | self.fields[index] = value; 131 | Ok(()) 132 | } 133 | 134 | fn __traverse__(&self, visit: PyVisit<'_>) -> Result<(), PyTraverseError> { 135 | for field in &self.fields { 136 | visit.call(field)?; 137 | } 138 | Ok(()) 139 | } 140 | 141 | fn __clear__(&mut self) { 142 | self.fields.clear(); 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /tests/codec/packstream/v1/test_injection.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import importlib 18 | import sys 19 | import traceback 20 | 21 | import pytest 22 | 23 | from neo4j._codec.hydration import DehydrationHooks 24 | from neo4j._codec.packstream import Structure 25 | from neo4j._codec.packstream.v1 import ( 26 | Packer, 27 | Unpacker, 28 | ) 29 | 30 | 31 | @pytest.fixture 32 | def packer_with_buffer(): 33 | packable_buffer = Packer.new_packable_buffer() 34 | return Packer(packable_buffer), packable_buffer 35 | 36 | 37 | @pytest.fixture 38 | def unpacker_with_buffer(): 39 | unpackable_buffer = Unpacker.new_unpackable_buffer() 40 | return Unpacker(unpackable_buffer), unpackable_buffer 41 | 42 | 43 | def test_pack_injection_works(packer_with_buffer): 44 | class TestClass: 45 | pass 46 | 47 | class TestError(Exception): 48 | pass 49 | 50 | def raise_test_exception(*args, **kwargs): 51 | raise TestError 52 | 53 | dehydration_hooks = DehydrationHooks( 54 | exact_types={TestClass: raise_test_exception}, 55 | subtypes={}, 56 | ) 57 | test_object = TestClass() 58 | packer, _ = packer_with_buffer 59 | 60 | with pytest.raises(TestError) as exc: 61 | packer.pack(test_object, dehydration_hooks=dehydration_hooks) 62 | 63 | # printing the traceback to stdout to make it easier to debug 64 | traceback.print_exception(exc.type, exc.value, exc.tb, file=sys.stdout) 65 | 66 | assert any("_rust_pack" in str(entry.statement) for entry in exc.traceback) 67 | assert not any( 68 | "_py_pack" in str(entry.statement) for entry in exc.traceback 69 | ) 70 | 71 | 72 | def test_unpack_injection_works(unpacker_with_buffer): 73 | class TestError(Exception): 74 | pass 75 | 76 | def raise_test_exception(*args, **kwargs): 77 | raise TestError 78 | 79 | hydration_hooks = {Structure: raise_test_exception} 80 | unpacker, buffer = unpacker_with_buffer 81 | 82 | buffer.reset() 83 | buffer.data = bytearray(b"\xb0\xff") 84 | 85 | with pytest.raises(TestError) as exc: 86 | unpacker.unpack(hydration_hooks) 87 | 88 | # printing the traceback to stdout to make it easier to debug 89 | traceback.print_exception(exc.type, exc.value, exc.tb, file=sys.stdout) 90 | 91 | assert any( 92 | "_rust_unpack" in str(entry.statement) for entry in exc.traceback 93 | ) 94 | assert not any( 95 | "_py_unpack" in str(entry.statement) for entry in exc.traceback 96 | ) 97 | 98 | 99 | @pytest.mark.parametrize( 100 | ("name", "submodule_names"), 101 | ( 102 | # packstream v1 103 | ("neo4j._rust.codec.packstream.v1", ()), 104 | ("neo4j._rust.codec.packstream", ("v1",)), 105 | ("neo4j._rust.codec", ("packstream",)), 106 | ("neo4j._rust", ("codec",)), 107 | ("neo4j", ("_rust",)), 108 | ), 109 | ) 110 | def test_import_module(name, submodule_names): 111 | module = importlib.import_module(name) 112 | 113 | assert module.__name__ == name 114 | 115 | for submodule_name in submodule_names: 116 | package = getattr(module, submodule_name) 117 | assert package.__name__ == f"{name}.{submodule_name}" 118 | 119 | 120 | def test_rust_struct_access(): 121 | tag = b"F" 122 | fields = ["foo", False, 42, 3.14, b"bar"] 123 | struct = Structure(tag, *fields) 124 | 125 | assert struct.tag == tag 126 | assert isinstance(struct.tag, bytes) 127 | assert struct.fields == fields 128 | 129 | 130 | def test_rust_struct_equal(): 131 | struct1 = Structure(b"F", "foo", False, 42, 3.14, b"bar") 132 | struct2 = Structure(b"F", "foo", False, 42, 3.14, b"bar") 133 | assert struct1 == struct2 134 | # [noqa] for testing correctness of equality 135 | assert not struct1 != struct2 # noqa: SIM202 136 | 137 | 138 | @pytest.mark.parametrize( 139 | "args", 140 | ( 141 | (b"F", "foo", True, 42, 3.14, b"bar"), 142 | (b"f", "foo", False, 42, 3.14, b"baz"), 143 | ), 144 | ) 145 | def test_rust_struct_not_equal(args): 146 | struct1 = Structure(b"F", "foo", False, 42, 3.14, b"bar") 147 | struct2 = Structure(*args) 148 | assert struct1 != struct2 149 | # [noqa] for testing correctness of equality 150 | assert not struct1 == struct2 # noqa: SIM201 151 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "autocfg" 7 | version = "1.3.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" 10 | 11 | [[package]] 12 | name = "heck" 13 | version = "0.5.0" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 16 | 17 | [[package]] 18 | name = "indoc" 19 | version = "2.0.5" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" 22 | 23 | [[package]] 24 | name = "libc" 25 | version = "0.2.155" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" 28 | 29 | [[package]] 30 | name = "memoffset" 31 | version = "0.9.1" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" 34 | dependencies = [ 35 | "autocfg", 36 | ] 37 | 38 | [[package]] 39 | name = "neo4j-rust-ext" 40 | version = "0.1.0" 41 | dependencies = [ 42 | "pyo3", 43 | ] 44 | 45 | [[package]] 46 | name = "once_cell" 47 | version = "1.21.3" 48 | source = "registry+https://github.com/rust-lang/crates.io-index" 49 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 50 | 51 | [[package]] 52 | name = "portable-atomic" 53 | version = "1.6.0" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" 56 | 57 | [[package]] 58 | name = "proc-macro2" 59 | version = "1.0.86" 60 | source = "registry+https://github.com/rust-lang/crates.io-index" 61 | checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" 62 | dependencies = [ 63 | "unicode-ident", 64 | ] 65 | 66 | [[package]] 67 | name = "pyo3" 68 | version = "0.27.1" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | checksum = "37a6df7eab65fc7bee654a421404947e10a0f7085b6951bf2ea395f4659fb0cf" 71 | dependencies = [ 72 | "indoc", 73 | "libc", 74 | "memoffset", 75 | "once_cell", 76 | "portable-atomic", 77 | "pyo3-build-config", 78 | "pyo3-ffi", 79 | "pyo3-macros", 80 | "unindent", 81 | ] 82 | 83 | [[package]] 84 | name = "pyo3-build-config" 85 | version = "0.27.1" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "f77d387774f6f6eec64a004eac0ed525aab7fa1966d94b42f743797b3e395afb" 88 | dependencies = [ 89 | "target-lexicon", 90 | ] 91 | 92 | [[package]] 93 | name = "pyo3-ffi" 94 | version = "0.27.1" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "2dd13844a4242793e02df3e2ec093f540d948299a6a77ea9ce7afd8623f542be" 97 | dependencies = [ 98 | "libc", 99 | "pyo3-build-config", 100 | ] 101 | 102 | [[package]] 103 | name = "pyo3-macros" 104 | version = "0.27.1" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "eaf8f9f1108270b90d3676b8679586385430e5c0bb78bb5f043f95499c821a71" 107 | dependencies = [ 108 | "proc-macro2", 109 | "pyo3-macros-backend", 110 | "quote", 111 | "syn", 112 | ] 113 | 114 | [[package]] 115 | name = "pyo3-macros-backend" 116 | version = "0.27.1" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "70a3b2274450ba5288bc9b8c1b69ff569d1d61189d4bff38f8d22e03d17f932b" 119 | dependencies = [ 120 | "heck", 121 | "proc-macro2", 122 | "pyo3-build-config", 123 | "quote", 124 | "syn", 125 | ] 126 | 127 | [[package]] 128 | name = "quote" 129 | version = "1.0.36" 130 | source = "registry+https://github.com/rust-lang/crates.io-index" 131 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 132 | dependencies = [ 133 | "proc-macro2", 134 | ] 135 | 136 | [[package]] 137 | name = "syn" 138 | version = "2.0.68" 139 | source = "registry+https://github.com/rust-lang/crates.io-index" 140 | checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" 141 | dependencies = [ 142 | "proc-macro2", 143 | "quote", 144 | "unicode-ident", 145 | ] 146 | 147 | [[package]] 148 | name = "target-lexicon" 149 | version = "0.13.2" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" 152 | 153 | [[package]] 154 | name = "unicode-ident" 155 | version = "1.0.12" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 158 | 159 | [[package]] 160 | name = "unindent" 161 | version = "0.2.3" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" 164 | -------------------------------------------------------------------------------- /tests/codec/packstream/from_driver/test_structure.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import pytest 18 | 19 | from neo4j._codec.packstream import Structure 20 | 21 | 22 | @pytest.mark.parametrize( 23 | "args", 24 | ( 25 | (b"T", 1, 2, 3, "abc", 1.2, None, False), 26 | (b"F",), 27 | ), 28 | ) 29 | def test_structure_accessors(args): 30 | tag = args[0] 31 | fields = list(args[1:]) 32 | s1 = Structure(*args) 33 | assert s1.tag == tag 34 | assert s1.fields == fields 35 | 36 | 37 | @pytest.mark.parametrize( 38 | ("other", "expected"), 39 | ( 40 | (Structure(b"T", 1, 2, 3, "abc", 1.2, [{"a": "b"}, None]), True), 41 | (Structure(b"T", 1, 2, 3, "abc", 1.2, [{"a": "b"}, 0]), False), 42 | (Structure(b"T", 1, 2, 3, "abc", 1.2, [{"a": "B"}, None]), False), 43 | (Structure(b"T", 1, 2, 3, "abc", 1.2, [{"A": "b"}, None]), False), 44 | (Structure(b"T", 1, 2, 3, "abc", 1.3, [{"a": "b"}, None]), False), 45 | ( 46 | Structure(b"T", 1, 2, 3, "aBc", float("Nan"), [{"a": "b"}, None]), 47 | False, 48 | ), 49 | (Structure(b"T", 2, 2, 3, "abc", 1.2, [{"a": "b"}, None]), False), 50 | (Structure(b"T", 2, 3, "abc", 1.2, [{"a": "b"}, None]), False), 51 | (Structure(b"T", [1, 2, 3, "abc", 1.2, [{"a": "b"}, None]]), False), 52 | (object(), NotImplemented), 53 | ), 54 | ) 55 | def test_structure_equality(other, expected): 56 | s1 = Structure(b"T", 1, 2, 3, "abc", 1.2, [{"a": "b"}, None]) 57 | assert s1.__eq__(other) is expected # noqa: PLC2801 58 | if expected is NotImplemented: 59 | assert s1.__ne__(other) is NotImplemented # noqa: PLC2801 60 | else: 61 | assert s1.__ne__(other) is not expected # noqa: PLC2801 62 | 63 | 64 | @pytest.mark.parametrize( 65 | ("args", "expected"), 66 | ( 67 | ((b"F", 1, 2), "Structure(b'F', 1, 2)"), 68 | ((b"f", [1, 2]), "Structure(b'f', [1, 2])"), 69 | ( 70 | (b"T", 1.3, None, {"a": "b"}), 71 | "Structure(b'T', 1.3, None, {'a': 'b'})", 72 | ), 73 | ), 74 | ) 75 | def test_structure_repr(args, expected): 76 | s1 = Structure(*args) 77 | assert repr(s1) == expected 78 | assert str(s1) == expected 79 | 80 | # Ensure that the repr is consistent with the constructor 81 | assert eval(repr(s1)) == s1 82 | assert eval(str(s1)) == s1 83 | 84 | 85 | @pytest.mark.parametrize( 86 | ("fields", "expected"), 87 | ( 88 | ((), 0), 89 | (([],), 1), 90 | ((1, 2), 2), 91 | ((1, 2, []), 3), 92 | (([1, 2], {"a": "foo", "b": "bar"}), 2), 93 | ), 94 | ) 95 | def test_structure_len(fields, expected): 96 | structure = Structure(b"F", *fields) 97 | assert len(structure) == expected 98 | 99 | 100 | def test_structure_getitem(): 101 | fields = [1, 2, 3, "abc", 1.2, None, False, {"a": "b"}] 102 | structure = Structure(b"F", *fields) 103 | for i, field in enumerate(fields): 104 | assert structure[i] == field 105 | assert structure[-len(fields) + i] == field 106 | with pytest.raises(IndexError): 107 | _ = structure[len(fields)] 108 | with pytest.raises(IndexError): 109 | _ = structure[-len(fields) - 1] 110 | 111 | 112 | def test_structure_setitem(): 113 | test_value = object() 114 | fields = [1, 2, 3, "abc", 1.2, None, False, {"a": "b"}] 115 | structure = Structure(b"F", *fields) 116 | for i, original_value in enumerate(fields): 117 | structure[i] = test_value 118 | assert structure[i] == test_value 119 | assert structure[-len(fields) + i] == test_value 120 | assert structure[i] != original_value 121 | assert structure[-len(fields) + i] != original_value 122 | 123 | structure[i] = original_value 124 | assert structure[i] == original_value 125 | assert structure[-len(fields) + i] == original_value 126 | 127 | structure[-len(fields) + i] = test_value 128 | assert structure[i] == test_value 129 | assert structure[-len(fields) + i] == test_value 130 | assert structure[i] != original_value 131 | assert structure[-len(fields) + i] != original_value 132 | 133 | structure[-len(fields) + i] = original_value 134 | assert structure[i] == original_value 135 | assert structure[-len(fields) + i] == original_value 136 | with pytest.raises(IndexError): 137 | structure[len(fields)] = test_value 138 | with pytest.raises(IndexError): 139 | structure[-len(fields) - 1] = test_value 140 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to the Neo4j Ecosystem 2 | 3 | At [Neo4j](https://neo4j.com/), we develop our software in the open at GitHub. 4 | This provides transparency for you, our users, and allows you to fork the software to make your own additions and enhancements. 5 | We also provide areas specifically for community contributions, in particular the [neo4j-contrib](https://github.com/neo4j-contrib) space. 6 | 7 | There's an active [Neo4j Online Community](https://community.neo4j.com/) where we work directly with the community. 8 | If you're not already a member, sign up! 9 | 10 | We love our community and wouldn't be where we are without you. 11 | 12 | 13 | ## Need to raise an issue? 14 | 15 | Where you raise an issue depends largely on the nature of the problem. 16 | 17 | Firstly, if you are an Enterprise customer, you might want to head over to our [Customer Support Portal](https://support.neo4j.com/). 18 | 19 | There are plenty of public channels available too, though. 20 | If you simply want to get started or have a question on how to use a particular feature, ask a question in [Neo4j Online Community](https://community.neo4j.com/). 21 | If you think you might have hit a bug in our software (it happens occasionally!) or you have specific feature request then use the issue feature on the relevant GitHub repository. 22 | Check first though as someone else may have already raised something similar. 23 | 24 | [StackOverflow](https://stackoverflow.com/questions/tagged/neo4j) also hosts a ton of questions and might already have a discussion around your problem. 25 | Make sure you have a look there too. 26 | 27 | Include as much information as you can in any request you make: 28 | 29 | - Which versions of our products are you using? 30 | - Which language (and which version of that language) are you developing with? 31 | - What operating system are you on? 32 | - Are you working with a cluster or on a single machine? 33 | - What code are you running? 34 | - What errors are you seeing? 35 | - What solutions have you tried already? 36 | 37 | 38 | ## Want to contribute? 39 | 40 | If you want to contribute a pull request, we have a little bit of process you'll need to follow: 41 | 42 | - Do all your work in a personal fork of the original repository 43 | - [Rebase](https://github.com/edx/edx-platform/wiki/How-to-Rebase-a-Pull-Request), don't merge (we prefer to keep our history clean) 44 | - Create a branch (with a useful name) for your contribution 45 | - Make sure you're familiar with the appropriate coding style (this varies by language so ask if you're in doubt) 46 | - Include unit tests if appropriate (obviously not necessary for documentation changes) 47 | - Take a moment to read and sign our [CLA](https://neo4j.com/developer/cla) 48 | 49 | We can't guarantee that we'll accept pull requests and may ask you to make some changes before they go in. 50 | Occasionally, we might also have logistical, commercial, or legal reasons why we can't accept your work but we'll try to find an alternative way for you to contribute in that case. 51 | Remember that many community members have become regular contributors and some are now even Neo employees! 52 | 53 | 54 | ## Specifically for this project: 55 | 56 | ### Setting up the Development Environment 57 | * Install Python 3.10+ 58 | * Install the requirements (needs pip 25.1+) 59 | ```bash 60 | # recommended to use a virtual environment 61 | $ python3 -m venv .venv 62 | $ source .venv/bin/activate 63 | # make sure pip is up to date 64 | $ pip install -U pip 65 | # install all development dependencies and driver 66 | $ pip install -U --group dev -e . 67 | ``` 68 | * Install pre-commit hooks to notice mistakes before the CI does it for you ;) 69 | ```bash 70 | $ pre-commit install 71 | ``` 72 | 73 | ### Working with Pre-commit 74 | If you want to run the pre-commit checks manually, you can do so: 75 | ```bash 76 | $ pre-commit run --all-files 77 | # or 78 | $ pre-commit run --file path/to/a/file 79 | ``` 80 | 81 | To commit skipping the pre-commit checks, you can do so: 82 | ```bash 83 | git commit --no-verify ... 84 | ``` 85 | 86 | ### Running Tests 87 | ```bash 88 | # in the project root 89 | pip install . -r tests/requirements.txt 90 | python -m pytest tests 91 | ``` 92 | 93 | ### Running Benchmarks 94 | Go into `tests/benchmarks/test_benchmarks.py` and adjust the connection details to the database you want to benchmark against. 95 | This implies you're having a running database. 96 | Then run the benchmarks with: 97 | ```bash 98 | python -m tox -e py312-test -- --benchmark-only --benchmark-autosave 99 | # or to compare the results with the previous run 100 | python -m tox -e py312-test -- --benchmark-only --benchmark-autosave --benchmark-compare 101 | ``` 102 | 103 | ### Changelog Entry 104 | This project uses [towncrier](https://towncrier.readthedocs.io/en/stable) to manage the changelog. 105 | See [changelog.d/README.md](changelog.d/README.md) for more information on how to add entries to the changelog. 106 | 107 | 108 | ## Got an idea for a new project? 109 | 110 | If you have an idea for a new tool or library, start by talking to other people in the community. 111 | Chances are that someone has a similar idea or may have already started working on it. 112 | The best software comes from getting like minds together to solve a problem. 113 | And we'll do our best to help you promote and co-ordinate your Neo4j ecosystem projects. 114 | 115 | 116 | ## Further reading 117 | 118 | If you want to find out more about how you can contribute, head over to our website for [more information](https://neo4j.com/developer/contributing-code/). 119 | -------------------------------------------------------------------------------- /changelog.d/template.md: -------------------------------------------------------------------------------- 1 | {#- 2 | TEMPLATE FOR CHANGELOG GENERATION 3 | 4 | This template is derived from towncrier's default markdown template **licensed under the MIT license**. 5 | https://github.com/twisted/towncrier/blob/7b447ecabd3dc18e6ae368e05a7c87595cf1337b/src/towncrier/templates/default.md 6 | 7 | Copyright (c) 2015, Amber Brown and the towncrier contributors 8 | Copyright (c) 2025, Neo4j Sweden AB [https://neo4j.com] 9 | -#} 10 | 11 | {#- 12 | ══════════════════════════════════════════════════════════════════════════════ 13 | TOWNCRIER MARKDOWN TEMPLATE 14 | ══════════════════════════════════════════════════════════════════════════════ 15 | 16 | ─── Macro: heading ───────────────────────────────────────────────────────── 17 | Purpose: 18 | Generates Markdown headings with the appropriate number of # characters. 19 | Based on header_prefix (default: "#") and the level argument. 20 | 21 | Arguments: 22 | level The relative heading level (1=#, 2=##, 3=###, etc.) 23 | -#} 24 | {#- temporary fix until a new towncrier version is released -#} 25 | {%- set header_prefix = "##" %} 26 | {%- macro heading(level) -%} 27 | {{- "#" * ( header_prefix | length + level -1 ) }} 28 | {%- endmacro -%} 29 | 30 | {%- set newline = "\n" -%} 31 | 32 | {#- ════════════════════════ TEMPLATE GENERATION ════════════════════════ -#} 33 | {#- ─── TITLE HEADING ─── #} 34 | {#- render_title is false when title_format is specified in the config #} 35 | {%- if render_title %} 36 | {%- if versiondata.name %} 37 | {{- heading(1) ~ " " ~ versiondata.name ~ " " ~ versiondata.version ~ " (" ~ versiondata.date ~ ")" ~ newline }} 38 | {%- else %} 39 | {{- heading(1) ~ " " ~ versiondata.version ~ " (" ~ versiondata.date ~ ")" ~ newline }} 40 | {%- endif %} 41 | {%- endif %} 42 | {%- for section, _ in sections.items() %} 43 | {#- ─── SECTION HEADING ─── #} 44 | {%- if section %} 45 | {{- newline }} 46 | {{- heading(2) ~ " " ~ section ~ newline }} 47 | {{- newline }} 48 | {%- endif %} 49 | 50 | {%- if sections[section] %} 51 | 52 | {%- for category, val in definitions.items() if category in sections[section] %} 53 | {%- set issue_pks = [] %} 54 | {#- ─── CATEGORY HEADING ─── #} 55 | {#- Increase heading level if section is not present #} 56 | {{- heading(3 if section else 2) ~" **" ~ definitions[category]['name'] ~ "**" ~ newline }} 57 | {#- ─── RENDER ENTRIES ─── #} 58 | {%- for text, values in sections[section][category].items() %} 59 | {#- Prepare the string of issue numbers (e.g., "#1, #9, #142") #} 60 | {%- set issue_pks = [] %} 61 | {%- for v_issue in values %} 62 | {%- set _ = issue_pks.append(v_issue.split(": ", 1)[0]) %} 63 | {%- endfor %} 64 | {%- set issues_list = issue_pks | join(", ") %} 65 | 66 | {#- Check if text contains a sublist #} 67 | {%- set text_has_sublist = (("\n - " in text) or ("\n * " in text)) %} 68 | {%- set has_issues_placeholder = "" in text %} 69 | {%- if has_issues_placeholder %} 70 | {%- if issues_list %} 71 | {%- set text = text | replace("", " (" ~ issues_list ~ ")") %} 72 | {%- set issues_list = [] %} 73 | {%- else %} 74 | {%- set text = text | replace("", "") %} 75 | {%- endif %} 76 | {%- endif %} 77 | 78 | {#- CASE 1: No text, only issues #} 79 | {#- Output: - #1, #9, #142 #} 80 | {%- if not text and issues_list %} 81 | {{- "* " ~ issues_list ~ newline }} 82 | 83 | {#- Cases where both text and issues exist #} 84 | {%- elif text and issues_list %} 85 | {%- if text_has_sublist %} 86 | {#- CASE 3: Text with sublist #} 87 | {#- Output: - TEXT\n\n (#1, #9, #142) #} 88 | {{- "* " ~ text ~ newline ~ newline ~ " (" ~ issues_list ~ ")" ~ newline }} 89 | {%- else %} 90 | {#- CASE 2: Text, no sublist #} 91 | {#- Output: - TEXT (#1, #9, #142) #} 92 | {{- "* " ~ text ~ " (" ~ issues_list ~ ")" ~ newline }} 93 | {%- endif %} 94 | 95 | {%- elif text %} 96 | {#- Implicit Case: Text, but no issues #} 97 | {#- Output: - TEXT #} 98 | {{- "* " ~ text ~ newline }} 99 | {%- endif %} 100 | {%- endfor %} 101 | 102 | {#- New line between list and link references #} 103 | {{- newline }} 104 | 105 | {#- Link references #} 106 | {%- if issues_by_category[section][category] and "]: " in issues_by_category[section][category][0] %} 107 | {%- for issue in issues_by_category[section][category] %} 108 | {{- issue ~ newline }} 109 | {%- endfor %} 110 | {{- newline }} 111 | {%- endif %} 112 | 113 | {#- No changes in this category #} 114 | {%- if sections[section][category]|length == 0 %} 115 | {{- newline }} 116 | {{- "No significant changes." ~ newline * 2 }} 117 | {%- endif %} 118 | {%- endfor %} 119 | {%- else %} 120 | {#- No changes in this section #} 121 | {{- "No significant changes." ~ newline * 2 }} 122 | {%- endif %} 123 | {%- endfor %} 124 | {#- 125 | Newline at the end of the rendered newsfile content. 126 | In this way the there are 2 newlines between the latest release and the previous release content. 127 | -#} 128 | {{- newline -}} 129 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | [project] 17 | name = "neo4j-rust-ext" 18 | description = "Rust Extensions for a Faster Neo4j Bolt Driver for Python" 19 | license = "Apache-2.0 AND MIT" 20 | license-files = [ 21 | "LICENSE*.txt", 22 | "NOTICE*.txt", 23 | ] 24 | readme = "README.md" 25 | authors = [ 26 | {name = "Neo4j, Inc.", email = "drivers@neo4j.com"}, 27 | ] 28 | dependencies = [ 29 | "neo4j == 6.0.3" 30 | ] 31 | requires-python = ">=3.10" 32 | keywords = ["neo4j", "graph", "database"] 33 | classifiers = [ 34 | "Development Status :: 5 - Production/Stable", 35 | "Intended Audience :: Developers", 36 | "Operating System :: OS Independent", 37 | "Programming Language :: Python :: 3.10", 38 | "Programming Language :: Python :: 3.11", 39 | "Programming Language :: Python :: 3.12", 40 | "Programming Language :: Python :: 3.13", 41 | "Programming Language :: Python :: 3.14", 42 | "Programming Language :: Rust", 43 | "Topic :: Database", 44 | "Topic :: Software Development", 45 | ] 46 | version = "6.0.3.0" 47 | 48 | [project.urls] 49 | Homepage = "https://neo4j.com/" 50 | Repository = "https://github.com/neo4j/neo4j-python-driver-rust-ext" 51 | "Issue Tracker" = "https://github.com/neo4j/neo4j-python-driver-rust-ext/issues" 52 | Changelog = "https://github.com/neo4j/neo4j-python-driver-rust-ext/blob/HEAD/CHANGELOG.md" 53 | Forum = "https://community.neo4j.com/c/drivers-stacks/python/" 54 | Discord = "https://discord.com/invite/neo4j" 55 | 56 | [project.optional-dependencies] 57 | numpy = ["neo4j[numpy]"] 58 | pandas = ["neo4j[pandas]"] 59 | pyarrow = ["neo4j[pyarrow]"] 60 | 61 | [build-system] 62 | requires = ["maturin ~= 1.9.6"] 63 | build-backend = "maturin" 64 | 65 | [dependency-groups] 66 | # To install all development dependencies as well as the driver with all optional dependencies, 67 | # run `pip install --group dev -e .` inside repository root folder. 68 | dev = [ 69 | # dev tools 70 | {include-group = "dep-project-dependencies"}, 71 | {include-group = "tox"}, 72 | {include-group = "precommit"}, 73 | {include-group = "test"}, 74 | {include-group = "packaging"}, 75 | ] 76 | tox = [ 77 | "tox >= 4.25.0", 78 | ] 79 | precommit = [ 80 | "pre-commit >= 4.2.0", 81 | "isort >= 6.0.1", 82 | ] 83 | test = [ 84 | "pytest >= 8.3.5", 85 | "pytest-benchmark >= 5.1.0", 86 | "pytest-mock >= 3.14.1", 87 | ] 88 | packaging = [ 89 | "build", 90 | "maturin ~= 1.9.6", 91 | "towncrier >= 24.8.0", 92 | ] 93 | 94 | # single dependencies and other include-groups (not really meant to be installed as a group, but to avoid duplication) 95 | dep-project-dependencies = [ 96 | "neo4j[numpy,pandas,pyarrow] == 6.0.3", 97 | ] 98 | 99 | [tool.maturin] 100 | features = ["pyo3/extension-module", "pyo3/generate-import-lib"] 101 | module-name = "neo4j._rust" 102 | exclude = [ 103 | "/.editorconfig", 104 | ".gitignore", 105 | ".gitmodules", 106 | ".pre-commit-config.yaml", 107 | "bin/**/*", 108 | "driver/**/*", 109 | "test*/**/*", 110 | "CONTRIBUTING.md", 111 | "requirements*.txt", 112 | "tox.ini", 113 | { path = "neo4j/**/.keep", format = "wheel" } 114 | ] 115 | 116 | [tool.towncrier] 117 | directory = "changelog.d" 118 | version = "6.0.3.0" 119 | filename = "CHANGELOG.md" 120 | title_format = "## [{version}](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/{version}) ({project_date})\n***" 121 | issue_format = "[#{issue}]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/{issue}" 122 | template = "changelog.d/template.md" 123 | 124 | [[tool.towncrier.type]] 125 | directory = "feature" 126 | name = "⭐️ New Features" 127 | showcontent = true 128 | [[tool.towncrier.type]] 129 | directory = "preview" 130 | name = "🔮️ New Preview Features" 131 | showcontent = true 132 | [[tool.towncrier.type]] 133 | directory = "fix" 134 | name = "🔧️ Fixes" 135 | showcontent = true 136 | [[tool.towncrier.type]] 137 | directory = "improve" 138 | name = "👏️ Improvements" 139 | showcontent = true 140 | [[tool.towncrier.type]] 141 | directory = "doc" 142 | name = "📚️ Docs" 143 | showcontent = true 144 | [[tool.towncrier.type]] 145 | directory = "mature" 146 | name = "🌳️ Maturing" 147 | showcontent = true 148 | [[tool.towncrier.type]] 149 | directory = "clean" 150 | name = "🧹️ Clean-up" 151 | showcontent = true 152 | [[tool.towncrier.type]] 153 | directory = "pkg" 154 | name = "📦️ Packaging" 155 | showcontent = true 156 | [[tool.towncrier.type]] 157 | directory = "dev" 158 | name = "🧑️‍💻️ Development" 159 | showcontent = true 160 | 161 | [tool.isort] 162 | combine_as_imports = true 163 | ensure_newline_before_comments = true 164 | force_grid_wrap = 2 165 | # breaks order of relative imports 166 | # https://github.com/PyCQA/isort/issues/1944 167 | #force_sort_within_sections = true 168 | include_trailing_comma = true 169 | # currently broken 170 | # https://github.com/PyCQA/isort/issues/1855 171 | #lines_before_imports = 2 172 | lines_after_imports = 2 173 | lines_between_sections = 1 174 | multi_line_output = 3 175 | order_by_type = false 176 | remove_redundant_aliases = true 177 | use_parentheses = true 178 | known_first_party = ["neo4j"] 179 | 180 | [tool.ruff] 181 | line-length = 79 182 | extend-exclude = [ 183 | "driver", 184 | ] 185 | 186 | [tool.ruff.lint] 187 | preview = true # to get CPY lints 188 | extend-ignore = [ 189 | "RUF002", # allow ’ (RIGHT SINGLE QUOTATION MARK) to be used as an apostrophe (e.g. "it’s") 190 | 191 | # pydocstyle 192 | "D1", # disable check for undocumented items (way too noisy) 193 | "D203", # `one-blank-line-before-class` 194 | "D212", # `multi-line-summary-first-line` 195 | 196 | # comprehensions 197 | "C417", # map is ok, no need to rewrite to list comprehension 198 | 199 | # too noisy and opinionated pytest lints 200 | "PT007", 201 | "PT011", 202 | "PT012", 203 | "PT018", 204 | 205 | # too noisy and opinionated pylint lints 206 | "PLC0415", 207 | "PLC1901", 208 | "PLC2401", 209 | "PLC2701", 210 | "PLR09", 211 | "PLR1702", 212 | "PLR1704", 213 | "PLR2004", 214 | "PLR6301", 215 | "PLW2901", 216 | "PLW1641", 217 | 218 | # too noisy and opinionated tryceratops lints 219 | "TRY003", 220 | "TRY300", 221 | "TRY301", 222 | "TRY400", 223 | 224 | # too noisy and opinionated return statement lints 225 | "RET505", 226 | "RET506", 227 | "RET507", 228 | "RET508", 229 | 230 | "PERF203", # try-except within loop is fine. Especially in a retry scenario 231 | 232 | # too noisy and opinionated FURB lints 233 | "FURB113", 234 | "FURB118", 235 | "FURB140", 236 | "FURB154", 237 | # needs fixing in ruff to work with typing.Protocol 238 | # https://github.com/astral-sh/ruff/issues/13307 239 | "FURB180", 240 | ] 241 | select = [ 242 | # ruff 243 | "RUF", 244 | # pycodestyle 245 | "E", 246 | "W", 247 | # Pyflakes 248 | "F", 249 | # pyupgrade 250 | "UP", 251 | # flake8-bugbear 252 | "B", 253 | # flake8-simplify 254 | "SIM", 255 | # pep8-naming 256 | "N", 257 | # pydocstyle 258 | "D", 259 | # pydocstyle: explicit rules not selected by the chosen convention 260 | "D404", 261 | # Does not yet fully support sphinx style docstrings 262 | # https://github.com/astral-sh/ruff/pull/13286 263 | # # pydoclint 264 | # "DOC", 265 | # pylint 266 | "PL", 267 | # tryceratops 268 | "TRY", 269 | # flynt 270 | "FLY", 271 | # Perflint 272 | "PERF", 273 | # refurb 274 | "FURB", 275 | # async checks 276 | "ASYNC", 277 | # check comprehensions 278 | "C4", 279 | # check for left-over debugger calls 280 | "T100", 281 | # check for left-over print calls 282 | "T20", 283 | # qoute styles 284 | "Q", 285 | # check for unnecessary parantheses in raise statements 286 | "RSE", 287 | # check return statements 288 | "RET", 289 | # check type-checking usage 290 | "TCH", 291 | # copyright notice 292 | "CPY", 293 | # check shebangs 294 | "EXE", 295 | # logging calls + formats 296 | "LOG", 297 | "G", 298 | # flake8-pie 299 | "PIE", 300 | # pytest lints 301 | "PT", 302 | ] 303 | 304 | [tool.ruff.lint.per-file-ignores] 305 | "{testkit,tests}/**" = [ 306 | "T20", # print statements are ok in the testing infrastructure 307 | ] 308 | "tests/**" = [ 309 | "PLW1641", # no need for production grade test code 310 | "FURB152", # don't tell us to use math.pi, when all we need is just some random float 311 | # allow async functions without await to enable type checking, pretending to be async, matching type signatures 312 | "RUF029", 313 | ] 314 | "bin/**" = [ 315 | "T20", # print statements are ok in our helper scripts 316 | ] 317 | 318 | [tool.ruff.lint.pycodestyle] 319 | max-doc-length = 83 # 79 (max line length) + 4 indentation of code blocks 320 | 321 | [tool.ruff.lint.pep8-naming] 322 | extend-ignore-names = ["mcs"] 323 | 324 | [tool.ruff.lint.flake8-copyright] 325 | notice-rgx = "# Copyright \\(c\\) \"Neo4j\"" 326 | 327 | [tool.ruff.lint.pydocstyle] 328 | convention = "pep257" 329 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | ⚠️ marks breaking changes, pending breaking changes (deprecations), or other critical changes. 5 | 6 | 7 | 8 | ## [6.0.3.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/6.0.3.0) (2025-11-07) 9 | *** 10 | ### **⭐️ New Features** 11 | * Target driver version 6.0.3 ([#72]). 12 | 13 | [#72]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/72 14 | 15 | ### **👏️ Improvements** 16 | * Update dependencies ([#71]): 17 | * Bump dependency PyO3 (Rust binding for Python) from `0.26.0` to `0.27.1`. 18 | * Update `maturin` (Python package builder) from `~= 1.9.1` to `~= 1.9.6`. 19 | 20 | [#71]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/71 21 | 22 | ### **🧹️ Clean-up** 23 | * Remove now unused helper functions for converting `Vector` values to/from native Python `lists` ([#70]). 24 | For more details, see [neo4j-python-driver#1263](https://github.com/neo4j/neo4j-python-driver/pull/1263). 25 | 26 | [#70]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/70 27 | 28 | ### **🧑️‍💻️ Development** 29 | * Improve details of internal error message ([#74]). 30 | 31 | [#74]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/74 32 | 33 | 34 | ## [6.0.2.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/6.0.2.0) (2025-10-02) 35 | *** 36 | ### **⭐️ New Features** 37 | * Target driver version 6.0.2 ([#69]). 38 | 39 | [#69]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/69 40 | 41 | 42 | ## [6.0.1.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/6.0.1.0) (2025-10-01) 43 | *** 44 | ### **⭐️ New Features** 45 | * Target driver version 6.0.1 ([#68]). 46 | 47 | [#68]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/68 48 | 49 | 50 | ## [6.0.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/6.0.0.0) (2025-09-30) 51 | *** 52 | ### **⭐️ New Features** 53 | * Target driver version 6.0.0 ([#67]). 54 | 55 | [#67]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/67 56 | 57 | ### **🔧️ Fixes** 58 | * Fix decoding of map keys of certain sizes ([#59]). 59 | 60 | [#59]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/59 61 | 62 | ### **👏️ Improvements** 63 | * Bump dependency PyO3 (Rust binding for Python) from `0.25.1` to `0.26.0` ([#66]). 64 | 65 | [#66]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/66 66 | 67 | ### **🧹️ Clean-up** 68 | * Improve packstream `Structure` class ([#63]). 69 | * Implement `repr` to match Python implementation. 70 | * Remove `__hash__` implementation to match Python implementation. 71 | * Implement `__getitem__` and `__setitem__` to be on par with Python implementation. 72 | * Copy tests for `Structure` from the driver project. 73 | 74 | [#63]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/63 75 | 76 | ### **🧑️‍💻️ Development** 77 | * Fix broken `cp` command in `bin/target_driver.sh` ([#67]). 78 | 79 | [#67]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/67 80 | 81 | 82 | ## [6.0.0.0a1](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/6.0.0.0a1) (2025-07-29) 83 | *** 84 | ### **⭐️ New Features** 85 | * Add extension for the `Vector` type ([#45]). 86 | * Speed up endian conversion (byte flipping). 87 | * Speed up conversion from and to native python types. 88 | * Target driver version 6.0.0a1 ([#47]). 89 | 90 | [#45]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/45 91 | [#47]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/47 92 | 93 | ### **👏️ Improvements** 94 | * Update dependencies ([#32]): 95 | * ⚠️ Bump minimum supported Rust version (MSRV) from `1.67` to `1.77`. 96 | * Security fix by bumping PyO3 (Rust binding for Python) from `0.22.4` to `0.24.2`. 97 | * Update `maturin` (Python package builder) from `~= 1.6.0` to `~= 1.8.3`. 98 | * Harden `Structure` class against memory leak ([#50]). 99 | The extensions' implementation of packstream `Structure` could leak memory when being part of a reference cycle. 100 | In reality this doesn't matter because the driver never constructs cyclic `Structure`s. 101 | Every packstream value is a tree in terms of references (both directions: packing and unpacking). 102 | This change is meant to harden the extensions against introducing effective memory leaks in the driver should the driver's usage of `Structure` change in the future. 103 | * Optimize packing of `bytearray` ([#51]). 104 | By special-casing `bytearray`, we can avoid an allocation and complete extra copy of the data when packing it. 105 | This speeds up packing of `bytearray`s by roughly 1/3. 106 | 107 | [#32]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/32 108 | [#50]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/50 109 | [#51]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/51 110 | 111 | ### **🧹️ Clean-up** 112 | * ⚠️ Drop support for Python 3.7, 3.8, and 3.9 ([#37]). 113 | 114 | [#37]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/37 115 | 116 | ### **📦️ Packaging** 117 | * Update licensing meta data to be PEP 639 compliant ([#38]). 118 | * Update `maturin` (Python package builder) from `~= 1.8.3` to `~= 1.9.0`. 119 | * ⚠️ Change licensing from "Apache-2.0" to "Apache-2.0 AND MIT" ([#40]). 120 | * Update dependencies ([#46]). 121 | * `PyO3`: `0.24.2` -> `0.25.1` 122 | * `maturin`: `1.9.0` -> `1.9.1` 123 | 124 | [#38]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/38 125 | [#40]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/40 126 | [#46]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/46 127 | 128 | ### **🧑️‍💻️ Development** 129 | * Introduce [towncrier](https://towncrier.readthedocs.io/) for managing changelog entries ([#40]). 130 | * Use dependency groups in `pyproject.toml` for development dependencies ([#44]). 131 | 132 | [#40]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/40 133 | [#44]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/44 134 | 135 | 136 | ## [5.28.2.1](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.28.2.1) (2025-08-15) 137 | *** 138 | ### **🔧️ Fixes** 139 | * Fix decoding of map keys of certain sizes ([#60]). 140 | 141 | [#60]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/60 142 | 143 | 144 | ## [5.28.2.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.28.2.0) (2025-07-30) 145 | *** 146 | ### **⭐️ New Features** 147 | * Target driver version 5.28.2 ([#54]). 148 | 149 | [#54]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/54 150 | 151 | ### **👏️ Improvements** 152 | * Update dependencies ([#32]): 153 | * ⚠️ Bump minimum supported Rust version (MSRV) from `1.67` to `1.77`. 154 | * Security fix by bumping PyO3 (Rust binding for Python) from `0.22.4` to `0.24.2`. 155 | * Update `maturin` (Python package builder) from `~= 1.6.0` to `~= 1.8.3`. 156 | * Harden `Structure` class against memory leak ([#53]). 157 | The extensions' implementation of packstream `Structure` could leak memory when being part of a reference cycle. 158 | In reality this doesn't matter because the driver never constructs cyclic `Structure`s. 159 | Every packstream value is a tree in terms of references (both directions: packing and unpacking). 160 | This change is meant to harden the extensions against introducing effective memory leaks in the driver should the driver's usage of `Structure` change in the future. 161 | 162 | [#32]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/32 163 | [#53]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/53 164 | 165 | ### **📦️ Packaging** 166 | * ⚠️ Change licensing from "Apache-2.0" to "Apache-2.0 AND MIT" ([#43]). 167 | * Update licensing meta data to be PEP 639 compliant ([#41]). 168 | * Update `maturin` (Python package builder) from `~= 1.8.3` to `~= 1.9.0`. 169 | * Update `maturin` (Python package builder) from `~= 1.9.0` to `~= 1.9.1` ([#54]). 170 | 171 | [#41]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/41 172 | [#43]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/43 173 | [#54]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/54 174 | 175 | ### **🧑️‍💻️ Development** 176 | * Introduce [towncrier](https://towncrier.readthedocs.io/) for managing changelog entries ([#43]). 177 | 178 | [#43]: https://github.com/neo4j/neo4j-python-driver-rust-ext/pull/43 179 | 180 | 181 | ## [5.28.1.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.28.1.0) (2025-02-10) 182 | *** 183 | * Target driver version 5.28.1 184 | 185 | 186 | ## [5.28.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.28.0.0) (2025-02-05) 187 | *** 188 | * Target driver version 5.28.0 189 | 190 | 191 | ## [5.27.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.27.0.0) (2024-11-28) 192 | *** 193 | * Target driver version 5.27.0 194 | 195 | 196 | ## [5.26.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.26.0.0) (2024-11-01) 197 | *** 198 | * Target driver version 5.26.0 199 | * Add support for Python 3.13 200 | * Bump PyO3 to 0.22.4 201 | * Introduce `isort` and `ruff` 202 | 203 | 204 | ## [5.25.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.25.0.0) (2024-09-26) 205 | *** 206 | * Target driver version 5.25.0 207 | 208 | 209 | ## [5.24.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.24.0.0) (2024-08-29) 210 | *** 211 | * Target driver version 5.24.0 212 | 213 | 214 | ## [5.23.1.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.23.1.0) (2024-08-05) 215 | *** 216 | * Target driver version 5.23.1 217 | * Moved repository: 218 | from https://github.com/neo4j-drivers/neo4j-python-driver-rust-ext 219 | to https://github.com/neo4j/neo4j-python-driver-rust-ext 220 | * Metadata: removed `Beta` tag, added `Production/Stable`. 221 | * Bump MSRV (minimum supported Rust version) to 1.67.0. 222 | * Clarify installation documentation: `neo4j` and `neo4j-rust-ext` can both be installed at the same time. 223 | ℹ️ Make sure to specify matching versions if you do so. 224 | 225 | 226 | ## [5.23.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.23.0.0) (2024-07-29) 227 | *** 228 | * Target driver version 5.23.0 229 | 230 | 231 | ## [5.22.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.22.0.0) (2024-06-27) 232 | *** 233 | * Target driver version 5.22.0 234 | 235 | 236 | ## [5.21.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.21.0.0) (2024-06-11) 237 | *** 238 | * Target driver version 5.21.0 239 | 240 | 241 | ## [5.20.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.20.0.0) (2024-04-26) 242 | *** 243 | * Target driver version 5.20.0 244 | 245 | 246 | ## [5.19.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.19.0.0) (2024-05-02) 247 | *** 248 | * Target driver version 5.19.0 249 | 250 | 251 | ## [5.18.0.0](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.18.0.0) (2024-02-29) 252 | *** 253 | * Target driver version 5.18.0 254 | 255 | 256 | ## [5.17.0.0b1](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.17.0.0b1) (2024-01-29) 257 | *** 258 | * Target driver version 5.17.0 259 | 260 | 261 | ## [5.16.0.0b1](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.16.0.0b1) (2023-12-28) 262 | *** 263 | * Target driver version 5.16.0 264 | 265 | 266 | ## [5.15.0.0b1](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.15.0.0b1) (2023-11-28) 267 | *** 268 | * Target driver version 5.15.0 269 | 270 | 271 | ## [5.14.1.0a1](https://github.com/neo4j/neo4j-python-driver-rust-ext/tree/5.14.1.0a1) (2023-11-03) 272 | *** 273 | * Target driver version 5.14.1 274 | * Initial release. 275 | -------------------------------------------------------------------------------- /LICENSE.APACHE2.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | https://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | https://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /src/codec/packstream/v1/unpack.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) "Neo4j" 2 | // Neo4j Sweden AB [https://neo4j.com] 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // https://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use pyo3::exceptions::PyValueError; 17 | use pyo3::prelude::*; 18 | use pyo3::sync::with_critical_section; 19 | use pyo3::types::{IntoPyDict, PyByteArray, PyBytes, PyDict, PyList, PyTuple}; 20 | use pyo3::{intern, IntoPyObjectExt}; 21 | 22 | use super::super::Structure; 23 | use super::{ 24 | BYTES_16, BYTES_32, BYTES_8, FALSE, FLOAT_64, INT_16, INT_32, INT_64, INT_8, LIST_16, LIST_32, 25 | LIST_8, MAP_16, MAP_32, MAP_8, NULL, STRING_16, STRING_32, STRING_8, TINY_LIST, TINY_MAP, 26 | TINY_STRING, TINY_STRUCT, TRUE, 27 | }; 28 | 29 | #[pyfunction] 30 | #[pyo3(signature = (bytes, idx, hydration_hooks=None))] 31 | pub(super) fn unpack( 32 | bytes: Bound, 33 | idx: usize, 34 | hydration_hooks: Option>, 35 | ) -> PyResult<(Py, usize)> { 36 | let py = bytes.py(); 37 | let mut decoder = PackStreamDecoder::new(py, bytes, idx, hydration_hooks); 38 | let result = decoder.read()?; 39 | Ok((result, decoder.index)) 40 | } 41 | 42 | struct PackStreamDecoder<'a> { 43 | py: Python<'a>, 44 | bytes: Bound<'a, PyByteArray>, 45 | index: usize, 46 | hydration_hooks: Option>, 47 | } 48 | 49 | impl<'a> PackStreamDecoder<'a> { 50 | fn new( 51 | py: Python<'a>, 52 | bytes: Bound<'a, PyByteArray>, 53 | idx: usize, 54 | hydration_hooks: Option>, 55 | ) -> Self { 56 | Self { 57 | py, 58 | bytes, 59 | index: idx, 60 | hydration_hooks, 61 | } 62 | } 63 | 64 | fn read(&mut self) -> PyResult> { 65 | let marker = self.read_byte()?; 66 | self.read_value(marker) 67 | } 68 | 69 | fn read_value(&mut self, marker: u8) -> PyResult> { 70 | let high_nibble = marker & 0xF0; 71 | 72 | Ok(match marker { 73 | // tiny int 74 | _ if marker as i8 >= -16 => (marker as i8).into_py_any(self.py)?, 75 | NULL => self.py.None(), 76 | FLOAT_64 => self.read_f64()?.into_py_any(self.py)?, 77 | FALSE => false.into_py_any(self.py)?, 78 | TRUE => true.into_py_any(self.py)?, 79 | INT_8 => self.read_i8()?.into_py_any(self.py)?, 80 | INT_16 => self.read_i16()?.into_py_any(self.py)?, 81 | INT_32 => self.read_i32()?.into_py_any(self.py)?, 82 | INT_64 => self.read_i64()?.into_py_any(self.py)?, 83 | BYTES_8 => { 84 | let len = self.read_u8()?; 85 | self.read_bytes(len)? 86 | } 87 | BYTES_16 => { 88 | let len = self.read_u16()?; 89 | self.read_bytes(len)? 90 | } 91 | BYTES_32 => { 92 | let len = self.read_u32()?; 93 | self.read_bytes(len)? 94 | } 95 | _ if high_nibble == TINY_STRING => self.read_string((marker & 0x0F).into())?, 96 | STRING_8 => { 97 | let len = self.read_u8()?; 98 | self.read_string(len)? 99 | } 100 | STRING_16 => { 101 | let len = self.read_u16()?; 102 | self.read_string(len)? 103 | } 104 | STRING_32 => { 105 | let len = self.read_u32()?; 106 | self.read_string(len)? 107 | } 108 | _ if high_nibble == TINY_LIST => self.read_list((marker & 0x0F).into())?, 109 | LIST_8 => { 110 | let len = self.read_u8()?; 111 | self.read_list(len)? 112 | } 113 | LIST_16 => { 114 | let len = self.read_u16()?; 115 | self.read_list(len)? 116 | } 117 | LIST_32 => { 118 | let len = self.read_u32()?; 119 | self.read_list(len)? 120 | } 121 | _ if high_nibble == TINY_MAP => self.read_map((marker & 0x0F).into())?, 122 | MAP_8 => { 123 | let len = self.read_u8()?; 124 | self.read_map(len)? 125 | } 126 | MAP_16 => { 127 | let len = self.read_u16()?; 128 | self.read_map(len)? 129 | } 130 | MAP_32 => { 131 | let len = self.read_u32()?; 132 | self.read_map(len)? 133 | } 134 | _ if high_nibble == TINY_STRUCT => self.read_struct((marker & 0x0F).into())?, 135 | _ => { 136 | // raise ValueError("Unknown PackStream marker %02X" % marker) 137 | return Err(PyErr::new::(format!( 138 | "Unknown PackStream marker {marker:02X}", 139 | ))); 140 | } 141 | }) 142 | } 143 | 144 | fn read_list(&mut self, length: usize) -> PyResult> { 145 | if length == 0 { 146 | return Ok(PyList::empty(self.py).into_any().unbind()); 147 | } 148 | let mut items = Vec::with_capacity(length); 149 | for _ in 0..length { 150 | items.push(self.read()?); 151 | } 152 | items.into_py_any(self.py) 153 | } 154 | 155 | fn read_string(&mut self, length: usize) -> PyResult> { 156 | if length == 0 { 157 | return "".into_py_any(self.py); 158 | } 159 | let data = with_critical_section(&self.bytes, || { 160 | // Safety: 161 | // * We're using a critical section to avoid other threads mutating the bytes while 162 | // we're reading them. 163 | // * We're not mutating the bytes ourselves. 164 | // * We're not interacting with Python while using the bytes as that might indirectly 165 | // cause the bytes to be mutated. 166 | unsafe { 167 | let data = &self.bytes.as_bytes()[self.index..self.index + length]; 168 | // We have to copy the data to uphold the safety invariant. 169 | String::from_utf8(Vec::from(data)) 170 | } 171 | }); 172 | let data = data.map_err(|e| PyErr::new::(e.to_string()))?; 173 | self.index += length; 174 | data.into_py_any(self.py) 175 | } 176 | 177 | fn read_map(&mut self, length: usize) -> PyResult> { 178 | if length == 0 { 179 | return Ok(PyDict::new(self.py).into_any().unbind()); 180 | } 181 | let mut key_value_pairs: Vec<(Py, Py)> = Vec::with_capacity(length); 182 | for _ in 0..length { 183 | let len = self.read_string_length()?; 184 | let key = self.read_string(len)?; 185 | let value = self.read()?; 186 | key_value_pairs.push((key, value)); 187 | } 188 | Ok(key_value_pairs.into_py_dict(self.py)?.into()) 189 | } 190 | 191 | fn read_bytes(&mut self, length: usize) -> PyResult> { 192 | if length == 0 { 193 | return Ok(PyBytes::new(self.py, &[]).into_any().unbind()); 194 | } 195 | let data = with_critical_section(&self.bytes, || { 196 | // Safety: 197 | // * We're using a critical section to avoid other threads mutating the bytes while 198 | // we're reading them. 199 | // * We're not mutating the bytes ourselves. 200 | // * We're not interacting with Python while using the bytes as that might indirectly 201 | // cause the bytes to be mutated. 202 | unsafe { 203 | // We have to copy the data to uphold the safety invariant. 204 | self.bytes.as_bytes()[self.index..self.index + length].to_vec() 205 | } 206 | }); 207 | self.index += length; 208 | Ok(PyBytes::new(self.py, &data).into_any().unbind()) 209 | } 210 | 211 | fn read_struct(&mut self, length: usize) -> PyResult> { 212 | let tag = self.read_byte()?; 213 | let mut fields = Vec::with_capacity(length); 214 | for _ in 0..length { 215 | fields.push(self.read()?) 216 | } 217 | let mut bolt_struct = Structure { tag, fields } 218 | .into_pyobject(self.py)? 219 | .into_any() 220 | .unbind(); 221 | let Some(hooks) = &self.hydration_hooks else { 222 | return Ok(bolt_struct); 223 | }; 224 | 225 | let attr = bolt_struct.getattr(self.py, intern!(self.py, "__class__"))?; 226 | if let Some(res) = hooks.get_item(attr)? { 227 | bolt_struct = res 228 | .call(PyTuple::new(self.py, [bolt_struct])?, None)? 229 | .into_any() 230 | .unbind(); 231 | } 232 | 233 | Ok(bolt_struct) 234 | } 235 | 236 | fn read_string_length(&mut self) -> PyResult { 237 | let marker = self.read_byte()?; 238 | let high_nibble = marker & 0xF0; 239 | match marker { 240 | _ if high_nibble == TINY_STRING => Ok((marker & 0x0F).into()), 241 | STRING_8 => self.read_u8(), 242 | STRING_16 => self.read_u16(), 243 | STRING_32 => self.read_u32(), 244 | _ => Err(PyErr::new::(format!( 245 | "Invalid string length marker: {marker}", 246 | ))), 247 | } 248 | } 249 | 250 | fn read_byte(&mut self) -> PyResult { 251 | let byte = with_critical_section(&self.bytes, || { 252 | // Safety: 253 | // * We're using a critical section to avoid other threads mutating the bytes while 254 | // we're reading them. 255 | // * We're not mutating the bytes ourselves. 256 | // * We're not interacting with Python while using the bytes as that might indirectly 257 | // cause the bytes to be mutated. 258 | unsafe { self.bytes.as_bytes().get(self.index).copied() } 259 | }) 260 | .ok_or_else(|| PyErr::new::("Nothing to unpack"))?; 261 | self.index += 1; 262 | Ok(byte) 263 | } 264 | 265 | fn read_n_bytes(&mut self) -> PyResult<[u8; N]> { 266 | let to = self.index + N; 267 | with_critical_section(&self.bytes, || { 268 | // Safety: 269 | // * We're using a critical section to avoid other threads mutating the bytes while 270 | // we're reading them. 271 | // * We're not mutating the bytes ourselves. 272 | // * We're not interacting with Python while using the bytes as that might indirectly 273 | // cause the bytes to be mutated. 274 | unsafe { 275 | match self.bytes.as_bytes().get(self.index..to) { 276 | Some(b) => { 277 | self.index = to; 278 | Ok(<[u8; N]>::try_from(b).expect("we know the slice has exactly N values")) 279 | } 280 | None => Err(PyErr::new::("Nothing to unpack")), 281 | } 282 | } 283 | }) 284 | } 285 | 286 | fn read_u8(&mut self) -> PyResult { 287 | self.read_byte().map(Into::into) 288 | } 289 | 290 | fn read_u16(&mut self) -> PyResult { 291 | let data = self.read_n_bytes()?; 292 | Ok(u16::from_be_bytes(data).into()) 293 | } 294 | 295 | fn read_u32(&mut self) -> PyResult { 296 | let data = self.read_n_bytes()?; 297 | u32::from_be_bytes(data).try_into().map_err(|_| { 298 | PyErr::new::( 299 | "Server announced 32 bit sized data. Not supported by this architecture.", 300 | ) 301 | }) 302 | } 303 | 304 | fn read_i8(&mut self) -> PyResult { 305 | self.read_byte().map(|b| i8::from_be_bytes([b])) 306 | } 307 | 308 | fn read_i16(&mut self) -> PyResult { 309 | self.read_n_bytes().map(i16::from_be_bytes) 310 | } 311 | 312 | fn read_i32(&mut self) -> PyResult { 313 | self.read_n_bytes().map(i32::from_be_bytes) 314 | } 315 | 316 | fn read_i64(&mut self) -> PyResult { 317 | self.read_n_bytes().map(i64::from_be_bytes) 318 | } 319 | 320 | fn read_f64(&mut self) -> PyResult { 321 | self.read_n_bytes().map(f64::from_be_bytes) 322 | } 323 | } 324 | -------------------------------------------------------------------------------- /src/codec/packstream/v1/pack.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) "Neo4j" 2 | // Neo4j Sweden AB [https://neo4j.com] 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // https://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | use std::borrow::Cow; 17 | use std::sync::OnceLock; 18 | 19 | use pyo3::exceptions::{PyOverflowError, PyTypeError, PyValueError}; 20 | use pyo3::prelude::*; 21 | use pyo3::sync::with_critical_section; 22 | use pyo3::sync::OnceLockExt; 23 | use pyo3::types::{PyByteArray, PyBytes, PyDict, PyString, PyTuple, PyType}; 24 | use pyo3::{intern, IntoPyObjectExt}; 25 | 26 | use super::super::Structure; 27 | use super::{ 28 | BYTES_16, BYTES_32, BYTES_8, FALSE, FLOAT_64, INT_16, INT_32, INT_64, INT_8, LIST_16, LIST_32, 29 | LIST_8, MAP_16, MAP_32, MAP_8, NULL, STRING_16, STRING_32, STRING_8, TINY_LIST, TINY_MAP, 30 | TINY_STRING, TINY_STRUCT, TRUE, 31 | }; 32 | 33 | #[derive(Debug)] 34 | struct TypeMappings { 35 | none_values: Vec>, 36 | true_values: Vec>, 37 | false_values: Vec>, 38 | int_types: Py, 39 | float_types: Py, 40 | sequence_types: Py, 41 | mapping_types: Py, 42 | bytes_types: Py, 43 | } 44 | 45 | impl TypeMappings { 46 | fn new(locals: &Bound) -> PyResult { 47 | /// Remove some byte types from an iterable of types. 48 | /// Types removed are `bytes`, `bytearray`, as those are handled specially in `pack`. 49 | /// If the filtering fails for any reason, it returns the original input. 50 | fn filter_bytes_types(types: Bound) -> Bound { 51 | fn inner<'py>(types: &Bound<'py, PyAny>) -> PyResult> { 52 | fn is_of_known_bytes_types(typ: &Bound) -> PyResult { 53 | Ok(typ.is_subclass_of::()? || typ.is_subclass_of::()?) 54 | } 55 | 56 | let py = types.py(); 57 | let types = types 58 | .try_iter()? 59 | .filter(|typ| { 60 | let Ok(typ) = typ else { 61 | return true; 62 | }; 63 | let Ok(typ) = typ.cast::() else { 64 | return true; 65 | }; 66 | is_of_known_bytes_types(typ).map(|b| !b).unwrap_or(true) 67 | }) 68 | .collect::, _>>()?; 69 | 70 | Ok(PyTuple::new(py, types)?.into_any()) 71 | } 72 | 73 | inner(&types).unwrap_or(types) 74 | } 75 | 76 | let py = locals.py(); 77 | Ok(Self { 78 | none_values: locals 79 | .get_item("NONE_VALUES")? 80 | .ok_or_else(|| { 81 | PyErr::new::("Type mappings are missing NONE_VALUES.") 82 | })? 83 | .extract()?, 84 | true_values: locals 85 | .get_item("TRUE_VALUES")? 86 | .ok_or_else(|| { 87 | PyErr::new::("Type mappings are missing TRUE_VALUES.") 88 | })? 89 | .extract()?, 90 | false_values: locals 91 | .get_item("FALSE_VALUES")? 92 | .ok_or_else(|| { 93 | PyErr::new::("Type mappings are missing FALSE_VALUES.") 94 | })? 95 | .extract()?, 96 | int_types: locals 97 | .get_item("INT_TYPES")? 98 | .ok_or_else(|| { 99 | PyErr::new::("Type mappings are missing INT_TYPES.") 100 | })? 101 | .into_py_any(py)?, 102 | float_types: locals 103 | .get_item("FLOAT_TYPES")? 104 | .ok_or_else(|| { 105 | PyErr::new::("Type mappings are missing FLOAT_TYPES.") 106 | })? 107 | .into_py_any(py)?, 108 | sequence_types: locals 109 | .get_item("SEQUENCE_TYPES")? 110 | .ok_or_else(|| { 111 | PyErr::new::("Type mappings are missing SEQUENCE_TYPES.") 112 | })? 113 | .into_py_any(py)?, 114 | mapping_types: locals 115 | .get_item("MAPPING_TYPES")? 116 | .ok_or_else(|| { 117 | PyErr::new::("Type mappings are missing MAPPING_TYPES.") 118 | })? 119 | .into_py_any(py)?, 120 | bytes_types: filter_bytes_types( 121 | locals 122 | .get_item("BYTES_TYPES")? 123 | .ok_or_else(|| { 124 | PyErr::new::("Type mappings are missing BYTES_TYPES.") 125 | })? 126 | .into_bound_py_any(py)?, 127 | ) 128 | .unbind(), 129 | }) 130 | } 131 | } 132 | 133 | static TYPE_MAPPINGS: OnceLock> = OnceLock::new(); 134 | 135 | fn get_type_mappings(py: Python<'_>) -> PyResult<&'static TypeMappings> { 136 | let mappings = TYPE_MAPPINGS.get_or_init_py_attached(py, || { 137 | let locals = PyDict::new(py); 138 | py.run( 139 | c"from neo4j._codec.packstream.v1.types import *", 140 | None, 141 | Some(&locals), 142 | )?; 143 | TypeMappings::new(&locals) 144 | }); 145 | mappings.as_ref().map_err(|e| e.clone_ref(py)) 146 | } 147 | 148 | #[pyfunction] 149 | #[pyo3(signature = (value, dehydration_hooks=None))] 150 | pub(super) fn pack<'py>( 151 | value: &Bound<'py, PyAny>, 152 | dehydration_hooks: Option<&Bound<'py, PyAny>>, 153 | ) -> PyResult> { 154 | let py = value.py(); 155 | let type_mappings = get_type_mappings(py)?; 156 | let mut encoder = PackStreamEncoder::new(dehydration_hooks, type_mappings); 157 | encoder.write(value)?; 158 | Ok(PyBytes::new(py, &encoder.buffer)) 159 | } 160 | 161 | struct PackStreamEncoder<'a> { 162 | dehydration_hooks: Option<&'a Bound<'a, PyAny>>, 163 | type_mappings: &'a TypeMappings, 164 | buffer: Vec, 165 | } 166 | 167 | impl<'a> PackStreamEncoder<'a> { 168 | fn new( 169 | dehydration_hooks: Option<&'a Bound<'a, PyAny>>, 170 | type_mappings: &'a TypeMappings, 171 | ) -> Self { 172 | Self { 173 | dehydration_hooks, 174 | type_mappings, 175 | buffer: Default::default(), 176 | } 177 | } 178 | 179 | fn write(&mut self, value: &Bound) -> PyResult<()> { 180 | let py = value.py(); 181 | 182 | if self.write_exact_value(value, &self.type_mappings.none_values, &[NULL])? { 183 | return Ok(()); 184 | } 185 | if self.write_exact_value(value, &self.type_mappings.true_values, &[TRUE])? { 186 | return Ok(()); 187 | } 188 | if self.write_exact_value(value, &self.type_mappings.false_values, &[FALSE])? { 189 | return Ok(()); 190 | } 191 | 192 | if value.is_instance(self.type_mappings.float_types.bind(py))? { 193 | let value = value.extract::()?; 194 | return self.write_float(value); 195 | } 196 | 197 | if value.is_instance(self.type_mappings.int_types.bind(py))? { 198 | let value = value.extract::()?; 199 | return self.write_int(value); 200 | } 201 | 202 | if value.is_instance(&PyType::new::(py))? { 203 | return self.write_string(value.extract::<&str>()?); 204 | } 205 | 206 | if let Ok(value) = value.cast::() { 207 | return self.write_bytes(value.as_bytes()); 208 | } else if let Ok(value) = value.cast::() { 209 | return with_critical_section(value, || { 210 | // SAFETY: 211 | // * we're holding the GIL/are attached to the Python interpreter 212 | // * we're using a critical section to ensure exclusive access to the byte array 213 | // * we don't interact with the interpreter/PyO3 APIs while reading the bytes 214 | unsafe { self.write_bytes(value.as_bytes()) } 215 | }); 216 | } else if value.is_instance(self.type_mappings.bytes_types.bind(py))? { 217 | return self.write_bytes(&value.extract::>()?); 218 | } 219 | 220 | if value.is_instance(self.type_mappings.sequence_types.bind(py))? { 221 | let size = Self::usize_to_u64(value.len()?)?; 222 | self.write_list_header(size)?; 223 | return value.try_iter()?.try_for_each(|item| self.write(&item?)); 224 | } 225 | 226 | if value.is_instance(self.type_mappings.mapping_types.bind(py))? { 227 | let size = Self::usize_to_u64(value.getattr(intern!(py, "keys"))?.call0()?.len()?)?; 228 | self.write_dict_header(size)?; 229 | let items = value.getattr(intern!(py, "items"))?.call0()?; 230 | return items.try_iter()?.try_for_each(|item| { 231 | let (key, value) = item?.extract::<(Bound, Bound)>()?; 232 | let key = match key.extract::<&str>() { 233 | Ok(key) => key, 234 | Err(_) => { 235 | return Err(PyErr::new::(format!( 236 | "Map keys must be strings, not {}", 237 | key.get_type().str()? 238 | ))) 239 | } 240 | }; 241 | self.write_string(key)?; 242 | self.write(&value) 243 | }); 244 | } 245 | 246 | if let Ok(value) = value.extract::>() { 247 | let value_ref = value.borrow(); 248 | let size = value_ref.fields.len().try_into().map_err(|_| { 249 | PyErr::new::("Structure header size out of range") 250 | })?; 251 | self.write_struct_header(value_ref.tag, size)?; 252 | return value_ref 253 | .fields 254 | .iter() 255 | .try_for_each(|item| self.write(item.bind(py))); 256 | } 257 | 258 | if let Some(dehydration_hooks) = self.dehydration_hooks { 259 | let transformer = 260 | dehydration_hooks.call_method1(intern!(py, "get_transformer"), (value,))?; 261 | if !transformer.is_none() { 262 | let value = transformer.call1((value,))?; 263 | return self.write(&value); 264 | } 265 | } 266 | 267 | // raise ValueError("Values of type %s are not supported" % type(value)) 268 | Err(PyErr::new::(format!( 269 | "Values of type {} are not supported", 270 | value.get_type().str()? 271 | ))) 272 | } 273 | 274 | fn write_exact_value( 275 | &mut self, 276 | value: &Bound, 277 | values: &[Py], 278 | bytes: &[u8], 279 | ) -> PyResult { 280 | for v in values { 281 | if value.is(v) { 282 | self.buffer.extend(bytes); 283 | return Ok(true); 284 | } 285 | } 286 | Ok(false) 287 | } 288 | 289 | fn write_int(&mut self, i: i64) -> PyResult<()> { 290 | if (-16..=127).contains(&i) { 291 | self.buffer.extend(&i8::to_be_bytes(i as i8)); 292 | } else if (-128..=127).contains(&i) { 293 | self.buffer.extend(&[INT_8]); 294 | self.buffer.extend(&i8::to_be_bytes(i as i8)); 295 | } else if (-32_768..=32_767).contains(&i) { 296 | self.buffer.extend(&[INT_16]); 297 | self.buffer.extend(&i16::to_be_bytes(i as i16)); 298 | } else if (-2_147_483_648..=2_147_483_647).contains(&i) { 299 | self.buffer.extend(&[INT_32]); 300 | self.buffer.extend(&i32::to_be_bytes(i as i32)); 301 | } else { 302 | self.buffer.extend(&[INT_64]); 303 | self.buffer.extend(&i64::to_be_bytes(i)); 304 | } 305 | Ok(()) 306 | } 307 | 308 | fn write_float(&mut self, f: f64) -> PyResult<()> { 309 | self.buffer.extend(&[FLOAT_64]); 310 | self.buffer.extend(&f64::to_be_bytes(f)); 311 | Ok(()) 312 | } 313 | 314 | fn write_bytes(&mut self, b: &[u8]) -> PyResult<()> { 315 | let size = Self::usize_to_u64(b.len())?; 316 | if size <= 255 { 317 | self.buffer.extend(&[BYTES_8]); 318 | self.buffer.extend(&u8::to_be_bytes(size as u8)); 319 | } else if size <= 65_535 { 320 | self.buffer.extend(&[BYTES_16]); 321 | self.buffer.extend(&u16::to_be_bytes(size as u16)); 322 | } else if size <= 2_147_483_647 { 323 | self.buffer.extend(&[BYTES_32]); 324 | self.buffer.extend(&u32::to_be_bytes(size as u32)); 325 | } else { 326 | return Err(PyErr::new::( 327 | "Bytes header size out of range", 328 | )); 329 | } 330 | self.buffer.extend(b.iter()); 331 | Ok(()) 332 | } 333 | 334 | fn usize_to_u64(size: usize) -> PyResult { 335 | u64::try_from(size).map_err(|e| PyErr::new::(e.to_string())) 336 | } 337 | 338 | fn write_string(&mut self, s: &str) -> PyResult<()> { 339 | let bytes = s.as_bytes(); 340 | let size = Self::usize_to_u64(bytes.len())?; 341 | if size <= 15 { 342 | self.buffer.extend(&[TINY_STRING + size as u8]); 343 | } else if size <= 255 { 344 | self.buffer.extend(&[STRING_8]); 345 | self.buffer.extend(&u8::to_be_bytes(size as u8)); 346 | } else if size <= 65_535 { 347 | self.buffer.extend(&[STRING_16]); 348 | self.buffer.extend(&u16::to_be_bytes(size as u16)); 349 | } else if size <= 2_147_483_647 { 350 | self.buffer.extend(&[STRING_32]); 351 | self.buffer.extend(&u32::to_be_bytes(size as u32)); 352 | } else { 353 | return Err(PyErr::new::( 354 | "String header size out of range", 355 | )); 356 | } 357 | self.buffer.extend(bytes); 358 | Ok(()) 359 | } 360 | 361 | fn write_list_header(&mut self, size: u64) -> PyResult<()> { 362 | if size <= 15 { 363 | self.buffer.extend(&[TINY_LIST + size as u8]); 364 | } else if size <= 255 { 365 | self.buffer.extend(&[LIST_8]); 366 | self.buffer.extend(&u8::to_be_bytes(size as u8)); 367 | } else if size <= 65_535 { 368 | self.buffer.extend(&[LIST_16]); 369 | self.buffer.extend(&u16::to_be_bytes(size as u16)); 370 | } else if size <= 2_147_483_647 { 371 | self.buffer.extend(&[LIST_32]); 372 | self.buffer.extend(&u32::to_be_bytes(size as u32)); 373 | } else { 374 | return Err(PyErr::new::( 375 | "List header size out of range", 376 | )); 377 | } 378 | Ok(()) 379 | } 380 | 381 | fn write_dict_header(&mut self, size: u64) -> PyResult<()> { 382 | if size <= 15 { 383 | self.buffer.extend(&[TINY_MAP + size as u8]); 384 | } else if size <= 255 { 385 | self.buffer.extend(&[MAP_8]); 386 | self.buffer.extend(&u8::to_be_bytes(size as u8)); 387 | } else if size <= 65_535 { 388 | self.buffer.extend(&[MAP_16]); 389 | self.buffer.extend(&u16::to_be_bytes(size as u16)); 390 | } else if size <= 2_147_483_647 { 391 | self.buffer.extend(&[MAP_32]); 392 | self.buffer.extend(&u32::to_be_bytes(size as u32)); 393 | } else { 394 | return Err(PyErr::new::( 395 | "Map header size out of range", 396 | )); 397 | } 398 | Ok(()) 399 | } 400 | 401 | fn write_struct_header(&mut self, tag: u8, size: u8) -> PyResult<()> { 402 | if size > 15 { 403 | return Err(PyErr::new::( 404 | "Structure size out of range", 405 | )); 406 | } 407 | self.buffer.extend(&[TINY_STRUCT + size, tag]); 408 | Ok(()) 409 | } 410 | } 411 | -------------------------------------------------------------------------------- /tests/codec/packstream/v1/from_driver/test_packstream.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | import struct 18 | from io import BytesIO 19 | from math import ( 20 | isnan, 21 | pi, 22 | ) 23 | from uuid import uuid4 24 | 25 | import numpy as np 26 | import pandas as pd 27 | import pytest 28 | 29 | from neo4j._codec.packstream import Structure 30 | from neo4j._codec.packstream.v1 import ( 31 | PackableBuffer, 32 | Packer, 33 | UnpackableBuffer, 34 | Unpacker, 35 | ) 36 | 37 | 38 | standard_ascii = [chr(i) for i in range(128)] 39 | not_ascii = "♥O◘♦♥O◘♦" 40 | 41 | 42 | @pytest.fixture 43 | def packer_with_buffer(): 44 | packable_buffer = Packer.new_packable_buffer() 45 | return Packer(packable_buffer), packable_buffer 46 | 47 | 48 | @pytest.fixture 49 | def unpacker_with_buffer(): 50 | unpackable_buffer = Unpacker.new_unpackable_buffer() 51 | return Unpacker(unpackable_buffer), unpackable_buffer 52 | 53 | 54 | def test_packable_buffer(packer_with_buffer): 55 | packer, packable_buffer = packer_with_buffer 56 | assert isinstance(packable_buffer, PackableBuffer) 57 | assert packable_buffer is packer.stream 58 | 59 | 60 | def test_unpackable_buffer(unpacker_with_buffer): 61 | unpacker, unpackable_buffer = unpacker_with_buffer 62 | assert isinstance(unpackable_buffer, UnpackableBuffer) 63 | assert unpackable_buffer is unpacker.unpackable 64 | 65 | 66 | @pytest.fixture 67 | def pack(packer_with_buffer): 68 | packer, packable_buffer = packer_with_buffer 69 | 70 | def _pack(*values, dehydration_hooks=None): 71 | for value in values: 72 | packer.pack(value, dehydration_hooks=dehydration_hooks) 73 | data = bytearray(packable_buffer.data) 74 | packable_buffer.clear() 75 | return data 76 | 77 | return _pack 78 | 79 | 80 | _default_out_value = object() 81 | 82 | 83 | @pytest.fixture 84 | def assert_packable(packer_with_buffer, unpacker_with_buffer): 85 | def _recursive_nan_equal(a, b): 86 | if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): 87 | return len(a) == len(b) and all( 88 | _recursive_nan_equal(x, y) for x, y in zip(a, b, strict=True) 89 | ) 90 | elif isinstance(a, dict) and isinstance(b, dict): 91 | return len(a) == len(b) and all( 92 | _recursive_nan_equal(a[k], b[k]) for k in a 93 | ) 94 | else: 95 | return a == b or (isnan(a) and isnan(b)) 96 | 97 | def _assert(in_value, packed_value, out_value=_default_out_value): 98 | if out_value is _default_out_value: 99 | out_value = in_value 100 | nonlocal packer_with_buffer, unpacker_with_buffer 101 | packer, packable_buffer = packer_with_buffer 102 | unpacker, unpackable_buffer = unpacker_with_buffer 103 | packable_buffer.clear() 104 | unpackable_buffer.reset() 105 | 106 | packer.pack(in_value) 107 | packed_data = packable_buffer.data 108 | assert packed_data == packed_value 109 | 110 | unpackable_buffer.data = bytearray(packed_data) 111 | unpackable_buffer.used = len(packed_data) 112 | unpacked_data = unpacker.unpack() 113 | assert _recursive_nan_equal(unpacked_data, out_value) 114 | 115 | return _assert 116 | 117 | 118 | @pytest.fixture(params=(True, False)) 119 | def np_float_overflow_as_error(request): 120 | should_raise = request.param 121 | if should_raise: 122 | old_err = np.seterr(over="raise") 123 | else: 124 | old_err = np.seterr(over="ignore") 125 | yield 126 | np.seterr(**old_err) 127 | 128 | 129 | @pytest.fixture( 130 | params=( 131 | int, 132 | np.int8, 133 | np.int16, 134 | np.int32, 135 | np.int64, 136 | np.longlong, 137 | np.uint8, 138 | np.uint16, 139 | np.uint32, 140 | np.uint64, 141 | np.ulonglong, 142 | ) 143 | ) 144 | def int_type(request): 145 | if issubclass(request.param, np.number): 146 | 147 | def _int_type(value): 148 | # this avoids deprecation warning from NEP50 and forces 149 | # c-style wrapping of the value 150 | return np.array(value).astype(request.param).item() 151 | 152 | return _int_type 153 | else: 154 | return request.param 155 | 156 | 157 | @pytest.fixture( 158 | params=(float, np.float16, np.float32, np.float64, np.longdouble) 159 | ) 160 | def float_type(request, np_float_overflow_as_error): 161 | return request.param 162 | 163 | 164 | @pytest.fixture(params=(bool, np.bool_)) 165 | def bool_type(request): 166 | return request.param 167 | 168 | 169 | @pytest.fixture(params=(bytes, bytearray, np.bytes_)) 170 | def bytes_type(request): 171 | return request.param 172 | 173 | 174 | @pytest.fixture(params=(str, np.str_)) 175 | def str_type(request): 176 | return request.param 177 | 178 | 179 | @pytest.fixture( 180 | params=(list, tuple, np.array, pd.Series, pd.array, pd.arrays.SparseArray) 181 | ) 182 | def sequence_type(request): 183 | if request.param is pd.Series: 184 | 185 | def constructor(value): 186 | if not value: 187 | return pd.Series(dtype=object) 188 | return pd.Series(value) 189 | 190 | return constructor 191 | return request.param 192 | 193 | 194 | class TestPackStream: 195 | @pytest.mark.parametrize("value", (None, pd.NA)) 196 | def test_none(self, value, assert_packable): 197 | assert_packable(value, b"\xc0", None) 198 | 199 | def test_boolean(self, bool_type, assert_packable): 200 | assert_packable(bool_type(True), b"\xc3") 201 | assert_packable(bool_type(False), b"\xc2") 202 | 203 | @pytest.mark.parametrize("dtype", (bool, pd.BooleanDtype())) 204 | def test_boolean_pandas_series(self, dtype, assert_packable): 205 | value = [True, False] 206 | value_series = pd.Series(value, dtype=dtype) 207 | assert_packable(value_series, b"\x92\xc3\xc2", value) 208 | 209 | def test_negative_tiny_int(self, int_type, assert_packable): 210 | for z in range(-16, 0): 211 | z_typed = int_type(z) 212 | if z != int(z_typed): 213 | continue # not representable 214 | assert_packable(z_typed, bytes(bytearray([z + 0x100]))) 215 | 216 | @pytest.mark.parametrize( 217 | "dtype", 218 | ( 219 | int, 220 | pd.Int8Dtype(), 221 | pd.Int16Dtype(), 222 | pd.Int32Dtype(), 223 | pd.Int64Dtype(), 224 | np.int8, 225 | np.int16, 226 | np.int32, 227 | np.int64, 228 | np.longlong, 229 | ), 230 | ) 231 | def test_negative_tiny_int_pandas_series(self, dtype, assert_packable): 232 | for z in range(-16, 0): 233 | z_typed = pd.Series(z, dtype=dtype) 234 | assert_packable(z_typed, bytes(bytearray([0x91, z + 0x100])), [z]) 235 | 236 | def test_positive_tiny_int(self, int_type, assert_packable): 237 | for z in range(128): 238 | z_typed = int_type(z) 239 | if z != int(z_typed): 240 | continue # not representable 241 | assert_packable(z_typed, bytes(bytearray([z]))) 242 | 243 | def test_negative_int8(self, int_type, assert_packable): 244 | for z in range(-128, -16): 245 | z_typed = int_type(z) 246 | if z != int(z_typed): 247 | continue # not representable 248 | assert_packable(z_typed, bytes(bytearray([0xC8, z + 0x100]))) 249 | 250 | def test_positive_int16(self, int_type, assert_packable): 251 | for z in range(128, 32768): 252 | z_typed = int_type(z) 253 | if z != int(z_typed): 254 | continue # not representable 255 | expected = b"\xc9" + struct.pack(">h", z) 256 | assert_packable(z_typed, expected) 257 | 258 | def test_negative_int16(self, int_type, assert_packable): 259 | for z in range(-32768, -128): 260 | z_typed = int_type(z) 261 | if z != int(z_typed): 262 | continue # not representable 263 | expected = b"\xc9" + struct.pack(">h", z) 264 | assert_packable(z_typed, expected) 265 | 266 | def test_positive_int32(self, int_type, assert_packable): 267 | for e in range(15, 31): 268 | z = 2**e 269 | z_typed = int_type(z) 270 | if z != int(z_typed): 271 | continue # not representable 272 | expected = b"\xca" + struct.pack(">i", z) 273 | assert_packable(z_typed, expected) 274 | 275 | def test_negative_int32(self, int_type, assert_packable): 276 | for e in range(15, 31): 277 | z = -(2**e + 1) 278 | z_typed = int_type(z) 279 | if z != int(z_typed): 280 | continue # not representable 281 | expected = b"\xca" + struct.pack(">i", z) 282 | assert_packable(z_typed, expected) 283 | 284 | def test_positive_int64(self, int_type, assert_packable): 285 | for e in range(31, 63): 286 | z = 2**e 287 | z_typed = int_type(z) 288 | if z != int(z_typed): 289 | continue # not representable 290 | expected = b"\xcb" + struct.pack(">q", z) 291 | assert_packable(z_typed, expected) 292 | 293 | @pytest.mark.parametrize( 294 | "dtype", 295 | ( 296 | int, 297 | pd.Int64Dtype(), 298 | pd.UInt64Dtype(), 299 | np.int64, 300 | np.longlong, 301 | np.uint64, 302 | np.ulonglong, 303 | ), 304 | ) 305 | def test_positive_int64_pandas_series(self, dtype, assert_packable): 306 | for e in range(31, 63): 307 | z = 2**e 308 | z_typed = pd.Series(z, dtype=dtype) 309 | expected = b"\x91\xcb" + struct.pack(">q", z) 310 | assert_packable(z_typed, expected, [z]) 311 | 312 | def test_negative_int64(self, int_type, assert_packable): 313 | for e in range(31, 63): 314 | z = -(2**e + 1) 315 | z_typed = int_type(z) 316 | if z != int(z_typed): 317 | continue # not representable 318 | expected = b"\xcb" + struct.pack(">q", z) 319 | assert_packable(z_typed, expected) 320 | 321 | @pytest.mark.parametrize( 322 | "dtype", 323 | ( 324 | int, 325 | pd.Int64Dtype(), 326 | np.int64, 327 | np.longlong, 328 | ), 329 | ) 330 | def test_negative_int64_pandas_series(self, dtype, assert_packable): 331 | for e in range(31, 63): 332 | z = -(2**e + 1) 333 | z_typed = pd.Series(z, dtype=dtype) 334 | expected = b"\x91\xcb" + struct.pack(">q", z) 335 | assert_packable(z_typed, expected, [z]) 336 | 337 | def test_integer_positive_overflow(self, int_type, pack, assert_packable): 338 | with pytest.raises(OverflowError): 339 | z = 2**63 + 1 340 | z_typed = int_type(z) 341 | if z != int(z_typed): 342 | pytest.skip("not representable") 343 | pack(z_typed) 344 | 345 | def test_integer_negative_overflow(self, int_type, pack, assert_packable): 346 | with pytest.raises(OverflowError): 347 | z = -(2**63) - 1 348 | z_typed = int_type(z) 349 | if z != int(z_typed): 350 | pytest.skip("not representable") 351 | pack(z_typed) 352 | 353 | def test_float(self, float_type, assert_packable): 354 | for z in ( 355 | 0.0, 356 | -0.0, 357 | pi, 358 | 2 * pi, 359 | float("inf"), 360 | float("-inf"), 361 | float("nan"), 362 | *(float(2**e) + 0.5 for e in range(100)), 363 | *(-float(2**e) + 0.5 for e in range(100)), 364 | ): 365 | try: 366 | z_typed = float_type(z) 367 | except FloatingPointError: 368 | continue # not representable 369 | expected = b"\xc1" + struct.pack(">d", float(z_typed)) 370 | assert_packable(z_typed, expected) 371 | 372 | @pytest.mark.parametrize( 373 | "dtype", 374 | ( 375 | float, 376 | pd.Float32Dtype(), 377 | pd.Float64Dtype(), 378 | np.float16, 379 | np.float32, 380 | np.float64, 381 | np.longdouble, 382 | ), 383 | ) 384 | def test_float_pandas_series( 385 | self, dtype, np_float_overflow_as_error, assert_packable 386 | ): 387 | for z in ( 388 | 0.0, 389 | -0.0, 390 | pi, 391 | 2 * pi, 392 | float("inf"), 393 | float("-inf"), 394 | float("nan"), 395 | *(float(2**e) + 0.5 for e in range(100)), 396 | *(-float(2**e) + 0.5 for e in range(100)), 397 | ): 398 | try: 399 | z_typed = pd.Series(z, dtype=dtype) 400 | except FloatingPointError: 401 | continue # not representable 402 | if z_typed[0] is pd.NA: 403 | expected_bytes = b"\x91\xc0" # encoded as NULL 404 | expected_value = [None] 405 | else: 406 | expected_bytes = b"\x91\xc1" + struct.pack( 407 | ">d", float(z_typed[0]) 408 | ) 409 | expected_value = [float(z_typed[0])] 410 | assert_packable(z_typed, expected_bytes, expected_value) 411 | 412 | def test_empty_bytes(self, bytes_type, assert_packable): 413 | b = bytes_type(b"") 414 | assert_packable(b, b"\xcc\x00") 415 | 416 | def test_bytes_8(self, bytes_type, assert_packable): 417 | b = bytes_type(b"hello") 418 | assert_packable(b, b"\xcc\x05hello") 419 | 420 | def test_bytes_16(self, bytes_type, assert_packable): 421 | b = bytearray(40000) 422 | b_typed = bytes_type(b) 423 | assert_packable(b_typed, b"\xcd\x9c\x40" + b) 424 | 425 | def test_bytes_32(self, bytes_type, assert_packable): 426 | b = bytearray(80000) 427 | b_typed = bytes_type(b) 428 | assert_packable(b_typed, b"\xce\x00\x01\x38\x80" + b) 429 | 430 | def test_bytes_pandas_series(self, assert_packable): 431 | for b, header in ( 432 | (b"", b"\xcc\x00"), 433 | (b"hello", b"\xcc\x05"), 434 | (bytearray(40000), b"\xcd\x9c\x40"), 435 | (bytearray(80000), b"\xce\x00\x01\x38\x80"), 436 | ): 437 | b_typed = pd.Series([b]) 438 | assert_packable(b_typed, b"\x91" + header + b, [b]) 439 | 440 | def test_bytearray_size_overflow(self, bytes_type, assert_packable): 441 | stream_out = BytesIO() 442 | packer = Packer(stream_out) 443 | with pytest.raises(OverflowError): 444 | packer._pack_bytes_header(2**32) 445 | 446 | def test_empty_string(self, str_type, assert_packable): 447 | assert_packable(str_type(""), b"\x80") 448 | 449 | def test_tiny_strings(self, str_type, assert_packable): 450 | for size in range(0x10): 451 | s = str_type("A" * size) 452 | assert_packable(s, bytes(bytearray([0x80 + size]) + (b"A" * size))) 453 | 454 | def test_string_8(self, str_type, assert_packable): 455 | t = "A" * 40 456 | b = t.encode("utf-8") 457 | t_typed = str_type(t) 458 | assert_packable(t_typed, b"\xd0\x28" + b) 459 | 460 | def test_string_16(self, str_type, assert_packable): 461 | t = "A" * 40000 462 | b = t.encode("utf-8") 463 | t_typed = str_type(t) 464 | assert_packable(t_typed, b"\xd1\x9c\x40" + b) 465 | 466 | def test_string_32(self, str_type, assert_packable): 467 | t = "A" * 80000 468 | b = t.encode("utf-8") 469 | t_typed = str_type(t) 470 | assert_packable(t_typed, b"\xd2\x00\x01\x38\x80" + b) 471 | 472 | def test_unicode_string(self, str_type, assert_packable): 473 | t = "héllö" 474 | b = t.encode("utf-8") 475 | t_typed = str_type(t) 476 | assert_packable(t_typed, bytes(bytearray([0x80 + len(b)])) + b) 477 | 478 | @pytest.mark.parametrize( 479 | "dtype", 480 | ( 481 | str, 482 | np.str_, 483 | pd.StringDtype("python"), 484 | pd.StringDtype("pyarrow"), 485 | ), 486 | ) 487 | def test_string_pandas_series(self, dtype, assert_packable): 488 | values = ( 489 | ("", b"\x80"), 490 | ("A" * 40, b"\xd0\x28"), 491 | ("A" * 40000, b"\xd1\x9c\x40"), 492 | ("A" * 80000, b"\xd2\x00\x01\x38\x80"), 493 | ) 494 | for t, header in values: 495 | t_typed = pd.Series([t], dtype=dtype) 496 | assert_packable(t_typed, b"\x91" + header + t.encode("utf-8"), [t]) 497 | 498 | t_typed = pd.Series([t for t, _ in values], dtype=dtype) 499 | expected = bytes([0x90 + len(values)]) + b"".join( 500 | header + t.encode("utf-8") for t, header in values 501 | ) 502 | assert_packable(t_typed, expected, [t for t, _ in values]) 503 | 504 | def test_string_size_overflow(self): 505 | stream_out = BytesIO() 506 | packer = Packer(stream_out) 507 | with pytest.raises(OverflowError): 508 | packer._pack_string_header(2**32) 509 | 510 | def test_empty_list(self, sequence_type, assert_packable): 511 | list_ = [] 512 | list_typed = sequence_type(list_) 513 | assert_packable(list_typed, b"\x90", list_) 514 | 515 | def test_tiny_lists(self, sequence_type, assert_packable): 516 | for size in range(0x10): 517 | nums = [1] * size 518 | nums_typed = sequence_type(nums) 519 | data_out = bytearray([0x90 + size]) + bytearray([1] * size) 520 | assert_packable(nums_typed, bytes(data_out), nums) 521 | 522 | def test_list_8(self, sequence_type, assert_packable): 523 | nums = [1] * 40 524 | nums_typed = sequence_type(nums) 525 | assert_packable(nums_typed, b"\xd4\x28" + (b"\x01" * 40), nums) 526 | 527 | def test_list_16(self, sequence_type, assert_packable): 528 | nums = [1] * 40000 529 | nums_typed = sequence_type(nums) 530 | assert_packable(nums_typed, b"\xd5\x9c\x40" + (b"\x01" * 40000), nums) 531 | 532 | def test_list_32(self, sequence_type, assert_packable): 533 | nums = [1] * 80000 534 | nums_typed = sequence_type(nums) 535 | assert_packable( 536 | nums_typed, b"\xd6\x00\x01\x38\x80" + (b"\x01" * 80000), nums 537 | ) 538 | 539 | def test_nested_lists(self, sequence_type, assert_packable): 540 | list_ = [[[]]] 541 | l_typed = sequence_type([sequence_type([sequence_type([])])]) 542 | assert_packable(l_typed, b"\x91\x91\x90", list_) 543 | 544 | @pytest.mark.parametrize("as_series", (True, False)) 545 | def test_list_pandas_categorical(self, as_series, pack, assert_packable): 546 | animals = ["cat", "dog", "cat", "cat", "dog", "horse"] 547 | animals_typed = pd.Categorical(animals) 548 | if as_series: 549 | animals_typed = pd.Series(animals_typed) 550 | b = b"".join([b"\x96", *(pack(e) for e in animals)]) 551 | assert_packable(animals_typed, b, animals) 552 | 553 | def test_list_size_overflow(self): 554 | stream_out = BytesIO() 555 | packer = Packer(stream_out) 556 | with pytest.raises(OverflowError): 557 | packer._pack_list_header(2**32) 558 | 559 | def test_empty_map(self, assert_packable): 560 | assert_packable({}, b"\xa0") 561 | 562 | @pytest.mark.parametrize("size", range(0x10)) 563 | def test_tiny_maps(self, assert_packable, size): 564 | data_in = {} 565 | data_out = bytearray([0xA0 + size]) 566 | for el in range(1, size + 1): 567 | data_in[chr(64 + el)] = el 568 | data_out += bytearray([0x81, 64 + el, el]) 569 | assert_packable(data_in, bytes(data_out)) 570 | 571 | @pytest.mark.parametrize("size", range(0x10)) 572 | def test_tiny_maps_padded_key(self, assert_packable, size): 573 | data_in = {} 574 | data_out = bytearray([0xA0 + size]) 575 | padding = b"1234567890abcdefghijklmnopqrstuvwxyz" 576 | for el in range(1, size + 1): 577 | data_in[padding.decode("ascii") + chr(64 + el)] = el 578 | data_out += bytearray([0xD0, 37, *padding, 64 + el, el]) 579 | assert_packable(data_in, bytes(data_out)) 580 | 581 | def test_map_8(self, pack, assert_packable): 582 | d = {f"A{i}": 1 for i in range(40)} 583 | b = b"".join(pack(f"A{i}", 1) for i in range(40)) 584 | assert_packable(d, b"\xd8\x28" + b) 585 | 586 | def test_map_8_padded_key(self, pack, assert_packable): 587 | padding = "1234567890abcdefghijklmnopqrstuvwxyz" 588 | d = {f"{padding}-{i}": 1 for i in range(40)} 589 | b = b"".join(pack(f"{padding}-{i}", 1) for i in range(40)) 590 | assert_packable(d, b"\xd8\x28" + b) 591 | 592 | def test_map_16(self, pack, assert_packable): 593 | d = {f"A{i}": 1 for i in range(40000)} 594 | b = b"".join(pack(f"A{i}", 1) for i in range(40000)) 595 | assert_packable(d, b"\xd9\x9c\x40" + b) 596 | 597 | def test_map_32(self, pack, assert_packable): 598 | d = {f"A{i}": 1 for i in range(80000)} 599 | b = b"".join(pack(f"A{i}", 1) for i in range(80000)) 600 | assert_packable(d, b"\xda\x00\x01\x38\x80" + b) 601 | 602 | def test_map_key_tiny_string(self, assert_packable): 603 | key = "A" 604 | d = {key: 1} 605 | data_out = b"\xa1\x81" + key.encode("utf-8") + b"\x01" 606 | assert_packable(d, bytes(data_out)) 607 | 608 | def test_map_key_string_8(self, assert_packable): 609 | key = "A" * 40 610 | d = {key: 1} 611 | data_out = b"\xa1\xd0\x28" + key.encode("utf-8") + b"\x01" 612 | assert_packable(d, data_out) 613 | 614 | def test_map_key_string_16(self, assert_packable): 615 | key = "A" * 40000 616 | d = {key: 1} 617 | data_out = b"\xa1\xd1\x9c\x40" + key.encode("utf-8") + b"\x01" 618 | assert_packable(d, data_out) 619 | 620 | def test_map_key_string_32(self, assert_packable): 621 | key = "A" * 80000 622 | d = {key: 1} 623 | data_out = b"\xa1\xd2\x00\x01\x38\x80" + key.encode("utf-8") + b"\x01" 624 | assert_packable(d, data_out) 625 | 626 | def test_empty_dataframe_maps(self, assert_packable): 627 | df = pd.DataFrame() 628 | assert_packable(df, b"\xa0", {}) 629 | 630 | @pytest.mark.parametrize("size", range(0x10)) 631 | def test_tiny_dataframes_maps(self, assert_packable, size): 632 | data_in = {} 633 | data_out = bytearray([0xA0 + size]) 634 | for el in range(1, size + 1): 635 | data_in[chr(64 + el)] = [el] 636 | data_out += bytearray([0x81, 64 + el, 0x91, el]) 637 | data_in_typed = pd.DataFrame(data_in) 638 | assert_packable(data_in_typed, bytes(data_out), data_in) 639 | 640 | def test_map_size_overflow(self): 641 | stream_out = BytesIO() 642 | packer = Packer(stream_out) 643 | with pytest.raises(OverflowError): 644 | packer._pack_map_header(2**32) 645 | 646 | @pytest.mark.parametrize( 647 | ("map_", "exc_type"), 648 | ( 649 | ({1: "1"}, TypeError), 650 | (pd.DataFrame({1: ["1"]}), TypeError), 651 | (pd.DataFrame({(1, 2): ["1"]}), TypeError), 652 | ({"x": {1: "eins", 2: "zwei", 3: "drei"}}, TypeError), 653 | ({"x": {(1, 2): "1+2i", (2, 0): "2"}}, TypeError), 654 | ), 655 | ) 656 | def test_map_key_type(self, packer_with_buffer, map_, exc_type): 657 | # maps must have string keys 658 | packer, _packable_buffer = packer_with_buffer 659 | with pytest.raises(exc_type, match="strings"): 660 | packer._pack(map_) 661 | 662 | def test_illegal_signature(self, assert_packable): 663 | with pytest.raises(ValueError): 664 | assert_packable(Structure(b"XXX"), b"\xb0XXX") 665 | 666 | def test_empty_struct(self, assert_packable): 667 | assert_packable(Structure(b"X"), b"\xb0X") 668 | 669 | def test_tiny_structs(self, assert_packable): 670 | for size in range(0x10): 671 | fields = [1] * size 672 | data_in = Structure(b"A", *fields) 673 | data_out = bytearray((0xB0 + size, 0x41, *fields)) 674 | assert_packable(data_in, bytes(data_out)) 675 | 676 | def test_struct_size_overflow(self, pack): 677 | with pytest.raises(OverflowError): 678 | fields = [1] * 16 679 | pack(Structure(b"X", *fields)) 680 | 681 | def test_illegal_uuid(self, assert_packable): 682 | with pytest.raises(ValueError): 683 | assert_packable(uuid4(), b"\xb0XXX") 684 | -------------------------------------------------------------------------------- /tests/vector/from_driver/test_vector.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) "Neo4j" 2 | # Neo4j Sweden AB [https://neo4j.com] 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # https://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from __future__ import annotations 18 | 19 | import abc 20 | import math 21 | import random 22 | import struct 23 | import sys 24 | import typing as t 25 | 26 | import pytest 27 | 28 | from neo4j._optional_deps import ( 29 | np, 30 | pa, 31 | ) 32 | from neo4j.vector import ( 33 | _swap_endian, 34 | Vector, 35 | VectorDType, 36 | VectorEndian, 37 | ) 38 | 39 | 40 | if t.TYPE_CHECKING: 41 | import numpy 42 | import pyarrow 43 | from pytest_mock import MockFixture 44 | 45 | T_ENDIAN_LITERAL: t.TypeAlias = t.Literal["big", "little"] | VectorEndian 46 | T_DTYPE_LITERAL: t.TypeAlias = ( 47 | t.Literal["i8", "i16", "i32", "i64", "f32", "f64"] | VectorDType 48 | ) 49 | T_DTYPE_INT_LITERAL: t.TypeAlias = t.Literal[ 50 | "i8", 51 | "i16", 52 | "i32", 53 | "i64", 54 | VectorDType.I8, 55 | VectorDType.I16, 56 | VectorDType.I32, 57 | VectorDType.I64, 58 | ] 59 | T_DTYPE_FLOAT_LITERAL: t.TypeAlias = t.Literal[ 60 | "f32", "f64", VectorDType.F32, VectorDType.F64 61 | ] 62 | T_EXT_LITERAL: t.TypeAlias = t.Literal["numpy", "rust", "python"] 63 | 64 | 65 | ENDIAN_LITERALS: tuple[T_ENDIAN_LITERAL, ...] = ( 66 | "big", 67 | "little", 68 | *VectorEndian, 69 | ) 70 | DTYPE_LITERALS: tuple[T_DTYPE_LITERAL, ...] = ( 71 | "i8", 72 | "i16", 73 | "i32", 74 | "i64", 75 | "f32", 76 | "f64", 77 | *VectorDType, 78 | ) 79 | DTYPE_INT_LITERALS: tuple[T_DTYPE_INT_LITERAL, ...] = ( 80 | "i8", 81 | "i16", 82 | "i32", 83 | "i64", 84 | VectorDType.I8, 85 | VectorDType.I16, 86 | VectorDType.I32, 87 | VectorDType.I64, 88 | ) 89 | DTYPE_FLOAT_LITERALS: tuple[T_DTYPE_FLOAT_LITERAL, ...] = ( 90 | "f32", 91 | "f64", 92 | VectorDType.F32, 93 | VectorDType.F64, 94 | ) 95 | 96 | 97 | def _max_value_be_bytes(size: t.Literal[1, 2, 4, 8], count: int = 1) -> bytes: 98 | def generator(count_: int) -> t.Iterable[int]: 99 | pack_format = { 100 | 1: ">b", 101 | 2: ">h", 102 | 4: ">i", 103 | 8: ">q", 104 | }[size] 105 | if count_ <= 0: 106 | return 107 | yield from struct.pack(pack_format, 0) 108 | count_ -= 1 109 | i = 0 110 | min_value = -(2 ** (size * 8 - 1)) 111 | max_value = 2 ** (size * 8 - 1) - 1 112 | while True: 113 | if count_ <= 0: 114 | return 115 | yield from struct.pack(pack_format, min_value + i) 116 | count_ -= 1 117 | if count_ == 0: 118 | return 119 | yield from struct.pack(pack_format, max_value - i) 120 | count_ -= 1 121 | i += 1 122 | i %= 2 ** (size * 8) 123 | 124 | return bytes(generator(count)) 125 | 126 | 127 | def _random_value_be_bytes( 128 | size: t.Literal[1, 2, 4, 8], count: int = 1 129 | ) -> bytes: 130 | def generator(count_: int) -> t.Iterable[int]: 131 | pack_format = { 132 | 1: ">B", 133 | 2: ">H", 134 | 4: ">I", 135 | 8: ">Q", 136 | }[size] 137 | while count_ > 0: 138 | yield from struct.pack( 139 | pack_format, random.randint(0, 2 ** (size * 8) - 1) 140 | ) 141 | count_ -= 1 142 | 143 | return bytes(generator(count)) 144 | 145 | 146 | def _get_type_size(dtype: str) -> t.Literal[1, 2, 4, 8]: 147 | lookup: dict[str, t.Literal[1, 2, 4, 8]] = { 148 | "i8": 1, 149 | "i16": 2, 150 | "i32": 4, 151 | "i64": 8, 152 | "f32": 4, 153 | "f64": 8, 154 | } 155 | return lookup[dtype] 156 | 157 | 158 | class NormalizableBytes(abc.ABC): 159 | @abc.abstractmethod 160 | def normalized_bytes(self) -> bytes: ... 161 | 162 | @abc.abstractmethod 163 | def raw_bytes(self) -> bytes: ... 164 | 165 | 166 | class Bytes(NormalizableBytes): 167 | _data: bytes 168 | 169 | def __init__(self, data: bytes) -> None: 170 | self._data = data 171 | 172 | def normalized_bytes(self) -> bytes: 173 | return self._data 174 | 175 | def raw_bytes(self) -> bytes: 176 | return self._data 177 | 178 | 179 | class Float32NanPayloadBytes(NormalizableBytes): 180 | _data: bytes 181 | 182 | def __init__(self, data: bytes) -> None: 183 | self._data = data 184 | 185 | def normalized_bytes(self) -> bytes: 186 | type_size = _get_type_size("f32") 187 | pack_format = _dtype_to_pack_format("f32") 188 | 189 | # Python <3.14 does not preserve NaN payloads on struct pack/unpack 190 | # for float32: 191 | # https://github.com/python/cpython/issues/130317 192 | if sys.version_info >= (3, 14): 193 | return self._data 194 | chunks = ( 195 | self._data[i : i + type_size] 196 | for i in range(0, len(self._data), type_size) 197 | ) 198 | return bytes( 199 | b 200 | for chunk in chunks 201 | for b in struct.pack( 202 | pack_format, struct.unpack(pack_format, chunk)[0] 203 | ) 204 | ) 205 | 206 | def raw_bytes(self) -> bytes: 207 | return self._data 208 | 209 | 210 | def _dtype_to_pack_format(dtype: str) -> str: 211 | return { 212 | "i8": ">b", 213 | "i16": ">h", 214 | "i32": ">i", 215 | "i64": ">q", 216 | "f32": ">f", 217 | "f64": ">d", 218 | }[dtype] 219 | 220 | 221 | def _mock_mask_extensions( 222 | used_ext: T_EXT_LITERAL, mocker: MockFixture 223 | ) -> None: 224 | from neo4j.vector import ( 225 | _swap_endian_unchecked_np, 226 | _swap_endian_unchecked_py, 227 | _swap_endian_unchecked_rust, 228 | ) 229 | 230 | match used_ext: 231 | case "numpy": 232 | if _swap_endian_unchecked_np is None: 233 | pytest.skip("numpy not installed") 234 | mocker.patch( 235 | "neo4j.vector._swap_endian_unchecked", 236 | new=_swap_endian_unchecked_np, 237 | ) 238 | case "rust": 239 | if _swap_endian_unchecked_rust is None: 240 | pytest.skip("rust extensions are not installed") 241 | mocker.patch( 242 | "neo4j.vector._swap_endian_unchecked", 243 | new=_swap_endian_unchecked_rust, 244 | ) 245 | case "python": 246 | mocker.patch( 247 | "neo4j.vector._swap_endian_unchecked", 248 | new=_swap_endian_unchecked_py, 249 | ) 250 | case _: 251 | raise ValueError(f"Invalid ext value {used_ext}") 252 | 253 | 254 | @pytest.mark.parametrize("ext", ("numpy", "rust", "python")) 255 | def test_swap_endian(mocker: MockFixture, ext: T_EXT_LITERAL) -> None: 256 | data = bytes(range(1, 17)) 257 | _mock_mask_extensions(ext, mocker) 258 | res = _swap_endian(2, data) 259 | assert isinstance(res, bytes) 260 | assert res == bytes( 261 | (2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15) 262 | ) 263 | res = _swap_endian(4, data) 264 | assert isinstance(res, bytes) 265 | assert res == bytes( 266 | (4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13) 267 | ) 268 | res = _swap_endian(8, data) 269 | assert isinstance(res, bytes) 270 | assert res == bytes( 271 | (8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9) 272 | ) 273 | 274 | 275 | @pytest.mark.parametrize("ext", ("numpy", "rust", "python")) 276 | @pytest.mark.parametrize("type_size", (-1, 0, 3, 5, 7, 9, 16, 32)) 277 | def test_swap_endian_unhandled_size( 278 | ext: T_EXT_LITERAL, type_size: int, mocker: MockFixture 279 | ) -> None: 280 | data = bytes(i % 256 for i in range(1, abs(type_size) * 4)) 281 | _mock_mask_extensions(ext, mocker) 282 | 283 | with pytest.raises(ValueError, match=str(type_size)): 284 | _swap_endian(type_size, data) 285 | 286 | 287 | @pytest.mark.parametrize( 288 | ("dtype", "data"), 289 | ( 290 | pytest.param( 291 | "i8", 292 | b"", 293 | id="i8-empty", 294 | ), 295 | pytest.param( 296 | "i8", 297 | bytes.fromhex("01"), 298 | id="i8-single", 299 | ), 300 | pytest.param( 301 | "i8", 302 | bytes.fromhex("01020304"), 303 | id="i8-some", 304 | ), 305 | pytest.param( 306 | "i8", 307 | _max_value_be_bytes(1, 4096), 308 | id="i8-limit", 309 | ), 310 | pytest.param( 311 | "i16", 312 | b"", 313 | id="i16-empty", 314 | ), 315 | pytest.param( 316 | "i16", 317 | bytes.fromhex("0001"), 318 | id="i16-single", 319 | ), 320 | pytest.param( 321 | "i16", 322 | bytes.fromhex("00010002"), 323 | id="i16-some", 324 | ), 325 | pytest.param( 326 | "i16", 327 | _max_value_be_bytes(2, 4096), 328 | id="i16-limit", 329 | ), 330 | pytest.param( 331 | "i32", 332 | b"", 333 | id="i32-empty", 334 | ), 335 | pytest.param( 336 | "i32", 337 | bytes.fromhex("00000001"), 338 | id="i32-single", 339 | ), 340 | pytest.param( 341 | "i32", 342 | bytes.fromhex("0000000100000002"), 343 | id="i32-some", 344 | ), 345 | pytest.param( 346 | "i32", 347 | _max_value_be_bytes(4, 4096), 348 | id="i32-limit", 349 | ), 350 | pytest.param( 351 | "i64", 352 | b"", 353 | id="i64-empty", 354 | ), 355 | pytest.param( 356 | "i64", 357 | bytes.fromhex("0000000000000001"), 358 | id="i64-single", 359 | ), 360 | pytest.param( 361 | "i64", 362 | bytes.fromhex("0000000000000001 0000000000000002"), 363 | id="i64-some", 364 | ), 365 | pytest.param( 366 | "i64", 367 | _max_value_be_bytes(8, 4096), 368 | id="i64-limit", 369 | ), 370 | pytest.param( 371 | "f32", 372 | b"", 373 | id="f32-empty", 374 | ), 375 | pytest.param( 376 | "f32", 377 | _random_value_be_bytes(4, 4096), 378 | id="f32-limit", 379 | ), 380 | pytest.param( 381 | "f64", 382 | b"", 383 | id="f64-empty", 384 | ), 385 | pytest.param( 386 | "f64", 387 | _random_value_be_bytes(8, 4096), 388 | id="f64-limit", 389 | ), 390 | ), 391 | ) 392 | @pytest.mark.parametrize("input_endian", (None, *ENDIAN_LITERALS)) 393 | @pytest.mark.parametrize("as_bytearray", (False, True)) 394 | def test_raw_data_limits( 395 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 396 | data: bytes, 397 | input_endian: T_ENDIAN_LITERAL | None, 398 | as_bytearray: bool, 399 | ) -> None: 400 | swapped_data = _swap_endian(_get_type_size(dtype), data) 401 | if input_endian is None: 402 | input_data = bytearray(data) if as_bytearray else data 403 | v = Vector(input_data, dtype) 404 | elif input_endian == "big": 405 | input_data = bytearray(data) if as_bytearray else data 406 | v = Vector(input_data, dtype, byteorder=input_endian) 407 | elif input_endian == "little": 408 | input_data = bytearray(swapped_data) if as_bytearray else swapped_data 409 | v = Vector(input_data, dtype, byteorder=input_endian) 410 | else: 411 | raise ValueError(f"Invalid input_endian {input_endian}") 412 | assert v.dtype == dtype 413 | assert v.raw() == data 414 | assert v.raw(byteorder="big") == data 415 | assert v.raw(byteorder=VectorEndian.BIG) == data 416 | assert v.raw(byteorder="little") == swapped_data 417 | assert v.raw(byteorder=VectorEndian.LITTLE) == swapped_data 418 | 419 | 420 | def nan_equals(a: list[object], b: list[object]) -> bool: 421 | if len(a) != len(b): 422 | return False 423 | for i in range(len(a)): 424 | ai = a[i] 425 | bi = b[i] 426 | if ai != bi and not ( 427 | isinstance(ai, float) 428 | and isinstance(bi, float) 429 | and math.isnan(ai) 430 | and math.isnan(bi) 431 | ): 432 | return False 433 | return True 434 | 435 | 436 | @pytest.mark.parametrize("dtype", DTYPE_INT_LITERALS) 437 | @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) 438 | @pytest.mark.parametrize("use_init", (False, True)) 439 | def test_from_native_int_random( 440 | dtype: T_DTYPE_INT_LITERAL, 441 | repeat: int, 442 | size: int, 443 | use_init: bool, 444 | ) -> None: 445 | type_size = _get_type_size(dtype) 446 | for _ in range(repeat): 447 | data = _random_value_be_bytes(type_size, size) 448 | values = [ 449 | struct.unpack( 450 | _dtype_to_pack_format(dtype), data[i : i + type_size] 451 | )[0] 452 | for i in range(0, len(data), type_size) 453 | ] 454 | assert all(type(v) is int for v in values) 455 | if use_init: 456 | v = Vector(values, dtype) 457 | else: 458 | v = Vector.from_native(values, dtype) 459 | expected_raw = data 460 | if dtype == "f32": 461 | expected_raw = Float32NanPayloadBytes(data).normalized_bytes() 462 | assert v.raw() == expected_raw 463 | 464 | 465 | @pytest.mark.parametrize("dtype", DTYPE_FLOAT_LITERALS) 466 | @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) 467 | @pytest.mark.parametrize("use_init", (False, True)) 468 | def test_from_native_float_random( 469 | dtype: T_DTYPE_FLOAT_LITERAL, 470 | repeat: int, 471 | size: int, 472 | use_init: bool, 473 | ) -> None: 474 | type_size = _get_type_size(dtype) 475 | for _ in range(repeat): 476 | data = _random_value_be_bytes(type_size, size) 477 | values = [ 478 | struct.unpack( 479 | _dtype_to_pack_format(dtype), data[i : i + type_size] 480 | )[0] 481 | for i in range(0, len(data), type_size) 482 | ] 483 | assert all(type(v) is float for v in values) 484 | if use_init: 485 | v = Vector(values, dtype) 486 | else: 487 | v = Vector.from_native(values, dtype) 488 | expected_raw = data 489 | if dtype == "f32": 490 | expected_raw = Float32NanPayloadBytes(data).normalized_bytes() 491 | assert v.raw() == expected_raw 492 | 493 | 494 | SPECIAL_INT_VALUES: tuple[ 495 | tuple[T_DTYPE_INT_LITERAL, int, NormalizableBytes], ... 496 | ] = ( 497 | # (dtype, value, packed_bytes_be) 498 | # i8 499 | ("i8", -128, Bytes(bytes.fromhex("80"))), 500 | ("i8", 0, Bytes(bytes.fromhex("00"))), 501 | ("i8", 127, Bytes(bytes.fromhex("7f"))), 502 | # i16 503 | ("i16", -32768, Bytes(bytes.fromhex("8000"))), 504 | ("i16", 0, Bytes(bytes.fromhex("0000"))), 505 | ("i16", 32767, Bytes(bytes.fromhex("7fff"))), 506 | # i32 507 | ("i32", -2147483648, Bytes(bytes.fromhex("80000000"))), 508 | ("i32", 0, Bytes(bytes.fromhex("00000000"))), 509 | ("i32", 2147483647, Bytes(bytes.fromhex("7fffffff"))), 510 | # i64 511 | ("i64", -9223372036854775808, Bytes(bytes.fromhex("8000000000000000"))), 512 | ("i64", 0, Bytes(bytes.fromhex("0000000000000000"))), 513 | ("i64", 9223372036854775807, Bytes(bytes.fromhex("7fffffffffffffff"))), 514 | ) 515 | SPECIAL_FLOAT_VALUES: tuple[ 516 | tuple[T_DTYPE_FLOAT_LITERAL, float, NormalizableBytes], ... 517 | ] = ( 518 | # (dtype, value, packed_bytes_be) 519 | # f32 520 | # NaN 521 | ( 522 | "f32", 523 | float("nan"), 524 | Bytes(bytes.fromhex("7fc00000")), 525 | ), 526 | ( 527 | "f32", 528 | float("-nan"), 529 | Bytes(bytes.fromhex("ffc00000")), 530 | ), 531 | ( 532 | "f32", 533 | struct.unpack(">f", bytes.fromhex("7fc00011"))[0], 534 | Bytes(bytes.fromhex("7fc00011")), 535 | ), 536 | ( 537 | "f32", 538 | struct.unpack(">f", bytes.fromhex("7f800001"))[0], 539 | Float32NanPayloadBytes(bytes.fromhex("7f800001")), 540 | ), 541 | # ±inf 542 | ( 543 | "f32", 544 | float("inf"), 545 | Bytes(bytes.fromhex("7f800000")), 546 | ), 547 | ( 548 | "f32", 549 | float("-inf"), 550 | Bytes(bytes.fromhex("ff800000")), 551 | ), 552 | # ±0.0 553 | ( 554 | "f32", 555 | 0.0, 556 | Bytes(bytes.fromhex("00000000")), 557 | ), 558 | ( 559 | "f32", 560 | -0.0, 561 | Bytes(bytes.fromhex("80000000")), 562 | ), 563 | # smallest normal 564 | ( 565 | "f32", 566 | struct.unpack(">f", bytes.fromhex("00800000"))[0], 567 | Bytes(bytes.fromhex("00800000")), 568 | ), 569 | ( 570 | "f32", 571 | struct.unpack(">f", bytes.fromhex("80800000"))[0], 572 | Bytes(bytes.fromhex("80800000")), 573 | ), 574 | # subnormal 575 | ( 576 | "f32", 577 | struct.unpack(">f", bytes.fromhex("00000001"))[0], 578 | Bytes(bytes.fromhex("00000001")), 579 | ), 580 | ( 581 | "f32", 582 | struct.unpack(">f", bytes.fromhex("80000001"))[0], 583 | Bytes(bytes.fromhex("80000001")), 584 | ), 585 | # largest normal 586 | ( 587 | "f32", 588 | struct.unpack(">f", bytes.fromhex("7f7fffff"))[0], 589 | Bytes(bytes.fromhex("7f7fffff")), 590 | ), 591 | ( 592 | "f32", 593 | struct.unpack(">f", bytes.fromhex("ff7fffff"))[0], 594 | Bytes(bytes.fromhex("ff7fffff")), 595 | ), 596 | # very small f64 being rounded to ±0 in f32 597 | ( 598 | "f32", 599 | struct.unpack(">d", bytes.fromhex("3686d601ad376ab9"))[0], 600 | Bytes(bytes.fromhex("00000000")), 601 | ), 602 | ( 603 | "f32", 604 | struct.unpack(">d", bytes.fromhex("b686d601ad376ab9"))[0], 605 | Bytes(bytes.fromhex("80000000")), 606 | ), 607 | # f64 608 | # NaN 609 | ( 610 | "f64", 611 | float("nan"), 612 | Bytes(bytes.fromhex("7ff8000000000000")), 613 | ), 614 | ( 615 | "f64", 616 | float("-nan"), 617 | Bytes(bytes.fromhex("fff8000000000000")), 618 | ), 619 | ( 620 | "f64", 621 | struct.unpack(">d", bytes.fromhex("7ff8000000000011"))[0], 622 | Bytes(bytes.fromhex("7ff8000000000011")), 623 | ), 624 | ( 625 | "f64", 626 | struct.unpack(">d", bytes.fromhex("7ff0000100000001"))[0], 627 | Bytes(bytes.fromhex("7ff0000100000001")), 628 | ), 629 | # ±inf 630 | ( 631 | "f64", 632 | float("inf"), 633 | Bytes(bytes.fromhex("7ff0000000000000")), 634 | ), 635 | ( 636 | "f64", 637 | float("-inf"), 638 | Bytes(bytes.fromhex("fff0000000000000")), 639 | ), 640 | # ±0.0 641 | ( 642 | "f64", 643 | 0.0, 644 | Bytes(bytes.fromhex("0000000000000000")), 645 | ), 646 | ( 647 | "f64", 648 | -0.0, 649 | Bytes(bytes.fromhex("8000000000000000")), 650 | ), 651 | # smallest normal 652 | ( 653 | "f64", 654 | struct.unpack(">d", bytes.fromhex("0010000000000000"))[0], 655 | Bytes(bytes.fromhex("0010000000000000")), 656 | ), 657 | ( 658 | "f64", 659 | struct.unpack(">d", bytes.fromhex("8010000000000000"))[0], 660 | Bytes(bytes.fromhex("8010000000000000")), 661 | ), 662 | # subnormal 663 | ( 664 | "f64", 665 | struct.unpack(">d", bytes.fromhex("0000000000000001"))[0], 666 | Bytes(bytes.fromhex("0000000000000001")), 667 | ), 668 | ( 669 | "f64", 670 | struct.unpack(">d", bytes.fromhex("8000000000000001"))[0], 671 | Bytes(bytes.fromhex("8000000000000001")), 672 | ), 673 | # largest normal 674 | ( 675 | "f64", 676 | struct.unpack(">d", bytes.fromhex("7fefffffffffffff"))[0], 677 | Bytes(bytes.fromhex("7fefffffffffffff")), 678 | ), 679 | ( 680 | "f64", 681 | struct.unpack(">d", bytes.fromhex("ffefffffffffffff"))[0], 682 | Bytes(bytes.fromhex("ffefffffffffffff")), 683 | ), 684 | ) 685 | SPECIAL_VALUES = SPECIAL_INT_VALUES + SPECIAL_FLOAT_VALUES 686 | 687 | 688 | @pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) 689 | def test_from_native_special_values( 690 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 691 | value: object, 692 | data_be_raw: NormalizableBytes, 693 | ) -> None: 694 | data_be = data_be_raw.normalized_bytes() 695 | if dtype in {"f32", "f64"}: 696 | assert isinstance(value, float) 697 | dtype_f = t.cast(t.Literal["f32", "f64"], dtype) 698 | v = Vector.from_native([value], dtype_f) 699 | elif dtype in {"i8", "i16", "i32", "i64"}: 700 | assert isinstance(value, int) 701 | dtype_i = t.cast(t.Literal["i8", "i16", "i32", "i64"], dtype) 702 | v = Vector.from_native([value], dtype_i) 703 | else: 704 | raise ValueError(f"Invalid dtype {dtype}") 705 | assert v.raw() == data_be 706 | 707 | 708 | @pytest.mark.parametrize( 709 | ("dtype", "value"), 710 | ( 711 | ("i8", "1"), 712 | ("i8", None), 713 | ("i8", 1.0), 714 | ("i16", "1"), 715 | ("i16", None), 716 | ("i16", 1.0), 717 | ("i32", "1"), 718 | ("i32", None), 719 | ("i32", 1.0), 720 | ("i64", "1"), 721 | ("i64", None), 722 | ("i64", 1.0), 723 | ("f32", "1.0"), 724 | ("f32", None), 725 | ("f32", 1), 726 | ("f64", "1.0"), 727 | ("f64", None), 728 | ("f64", 1), 729 | ), 730 | ) 731 | def test_from_native_wrong_type( 732 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 733 | value: object, 734 | ) -> None: 735 | with pytest.raises(TypeError) as exc: 736 | Vector.from_native([value], dtype) # type: ignore 737 | 738 | assert dtype in str(exc.value) 739 | assert str(type(value).__name__) in str(exc.value) 740 | 741 | 742 | @pytest.mark.parametrize( 743 | ("dtype", "value"), 744 | ( 745 | ("i8", -129), 746 | ("i8", 128), 747 | ("i16", -32769), 748 | ("i16", 32768), 749 | ("i32", -2147483649), 750 | ("i32", 2147483648), 751 | ("i64", -9223372036854775809), 752 | ("i64", 9223372036854775808), 753 | # positive value, positive exponent overflow 754 | ("f32", struct.unpack(">d", bytes.fromhex("47f0000020000000"))[0]), 755 | # negative value, positive exponent overflow 756 | ("f32", struct.unpack(">d", bytes.fromhex("c7f0000020000000"))[0]), 757 | # no such thing as negative exponent overflow: 758 | # very small values become 0.0 759 | # positive value, positive exponent, mantiassa overflow 760 | ("f32", struct.unpack(">d", bytes.fromhex("47effffff0000000"))[0]), 761 | # negative value, positive exponent, mantiassa overflow 762 | ("f32", struct.unpack(">d", bytes.fromhex("c7effffff0000000"))[0]), 763 | ), 764 | ) 765 | def test_from_native_overflow( 766 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 767 | value: object, 768 | ) -> None: 769 | with pytest.raises(OverflowError) as exc: 770 | Vector.from_native([value], dtype) # type: ignore 771 | 772 | assert dtype in str(exc.value) 773 | 774 | 775 | def _vector_from_data( 776 | data: bytes, 777 | dtype: T_DTYPE_LITERAL, 778 | endian: T_ENDIAN_LITERAL | None, 779 | ) -> Vector: 780 | match endian: 781 | case None: 782 | return Vector(data, dtype) 783 | case "big": 784 | return Vector(data, dtype, byteorder=endian) 785 | case "little": 786 | type_size = _get_type_size(dtype) 787 | data_le = _swap_endian(type_size, data) 788 | return Vector(data_le, dtype, byteorder=endian) 789 | case _: 790 | raise ValueError(f"Invalid endian {endian}") 791 | 792 | 793 | @pytest.mark.parametrize("dtype", DTYPE_LITERALS) 794 | @pytest.mark.parametrize( 795 | "endian", 796 | ( 797 | None, 798 | *ENDIAN_LITERALS, 799 | ), 800 | ) 801 | @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) 802 | def test_to_native_random( 803 | dtype: T_DTYPE_LITERAL, 804 | endian: T_ENDIAN_LITERAL | None, 805 | repeat: int, 806 | size: int, 807 | ) -> None: 808 | type_size = _get_type_size(dtype) 809 | for _ in range(repeat): 810 | data = _random_value_be_bytes(type_size, size) 811 | expected = [ 812 | struct.unpack( 813 | _dtype_to_pack_format(dtype), data[i : i + type_size] 814 | )[0] 815 | for i in range(0, len(data), type_size) 816 | ] 817 | v = _vector_from_data(data, dtype, endian) 818 | assert nan_equals(v.to_native(), expected) 819 | 820 | 821 | @pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) 822 | def test_to_native_special_values( 823 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 824 | value: object, 825 | data_be_raw: NormalizableBytes, 826 | ) -> None: 827 | data_be = data_be_raw.raw_bytes() 828 | type_size = _get_type_size(dtype) 829 | pack_format = _dtype_to_pack_format(dtype) 830 | expected = [ 831 | struct.unpack(pack_format, data_be[i : i + type_size])[0] 832 | for i in range(0, len(data_be), type_size) 833 | ] 834 | v = Vector(data_be, dtype) 835 | assert nan_equals(v.to_native(), expected) 836 | 837 | 838 | def _get_numpy_dtype(dtype: str) -> str: 839 | return { 840 | "i8": "i1", 841 | "i16": "i2", 842 | "i32": "i4", 843 | "i64": "i8", 844 | "f32": "f4", 845 | "f64": "f8", 846 | }[dtype] 847 | 848 | 849 | def _get_numpy_array( 850 | data_be: bytes, dtype: str, endian: t.Literal["big", "little", "native"] 851 | ) -> numpy.ndarray: 852 | np_type = _get_numpy_dtype(dtype) 853 | type_size = _get_type_size(dtype) 854 | data_in = data_be 855 | match endian: 856 | case "big": 857 | data_in = data_be 858 | np_type = f">{np_type}" 859 | case "little": 860 | data_in = _swap_endian(type_size, data_be) 861 | np_type = f"<{np_type}" 862 | case "native": 863 | if sys.byteorder == "little": 864 | data_in = _swap_endian(type_size, data_be) 865 | np_type = f"={np_type}" 866 | return np.frombuffer(data_in, dtype=np_type) 867 | 868 | 869 | @pytest.mark.skipif(np is None, reason="numpy not installed") 870 | @pytest.mark.parametrize("dtype", ("i8", "i16", "i32", "i64", "f32", "f64")) 871 | @pytest.mark.parametrize("endian", ("big", "little", "native")) 872 | @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) 873 | @pytest.mark.parametrize("use_init", (False, True)) 874 | def test_from_numpy_random( 875 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 876 | endian: t.Literal["big", "little", "native"], 877 | repeat: int, 878 | size: int, 879 | use_init: bool, 880 | ) -> None: 881 | type_size = _get_type_size(dtype) 882 | for _ in range(repeat): 883 | data_be = _random_value_be_bytes(type_size, size) 884 | array = _get_numpy_array(data_be, dtype, endian) 885 | v = Vector(array) if use_init else Vector.from_numpy(array) 886 | assert v.dtype == dtype 887 | assert v.raw() == data_be 888 | assert nan_equals(array.tolist(), v.to_native()) 889 | 890 | 891 | @pytest.mark.skipif(np is None, reason="numpy not installed") 892 | @pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) 893 | @pytest.mark.parametrize("endian", ("big", "little", "native")) 894 | def test_from_numpy_special_values( 895 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 896 | endian: t.Literal["big", "little", "native"], 897 | value: object, 898 | data_be_raw: NormalizableBytes, 899 | ) -> None: 900 | data_be = data_be_raw.raw_bytes() 901 | array = _get_numpy_array(data_be, dtype, endian) 902 | v = Vector.from_numpy(array) 903 | assert v.dtype == dtype 904 | assert v.raw() == data_be 905 | assert nan_equals(array.tolist(), v.to_native()) 906 | 907 | 908 | @pytest.mark.skipif(np is None, reason="numpy not installed") 909 | @pytest.mark.parametrize("dtype", ("i8", "i16", "i32", "i64", "f32", "f64")) 910 | @pytest.mark.parametrize( 911 | "endian", 912 | ( 913 | None, 914 | *ENDIAN_LITERALS, 915 | ), 916 | ) 917 | @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) 918 | def test_to_numpy_random( 919 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 920 | endian: T_ENDIAN_LITERAL | None, 921 | repeat: int, 922 | size: int, 923 | ) -> None: 924 | type_size = _get_type_size(dtype) 925 | np_type = _get_numpy_dtype(dtype) 926 | for _ in range(repeat): 927 | data = _random_value_be_bytes(type_size, size) 928 | v = _vector_from_data(data, dtype, endian) 929 | array = v.to_numpy() 930 | assert array.dtype == np.dtype(f">{np_type}") 931 | assert array.size == len(data) // type_size 932 | assert array.tobytes() == data 933 | assert nan_equals(array.tolist(), v.to_native()) 934 | 935 | 936 | @pytest.mark.skipif(np is None, reason="numpy not installed") 937 | @pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) 938 | @pytest.mark.parametrize( 939 | "endian", 940 | ( 941 | None, 942 | *ENDIAN_LITERALS, 943 | ), 944 | ) 945 | def test_to_numpy_special_values( 946 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 947 | endian: T_ENDIAN_LITERAL | None, 948 | value: object, 949 | data_be_raw: NormalizableBytes, 950 | ) -> None: 951 | data_be = data_be_raw.raw_bytes() 952 | np_type = _get_numpy_dtype(dtype) 953 | v = _vector_from_data(data_be, dtype, endian) 954 | array = v.to_numpy() 955 | assert array.dtype == np.dtype(f">{np_type}") 956 | assert array.size == 1 957 | assert array.tobytes() == data_be 958 | assert nan_equals(array.tolist(), v.to_native()) 959 | 960 | 961 | def _get_pyarrow_dtype(dtype: str) -> pyarrow.DataType: 962 | return { 963 | "i8": pa.int8(), 964 | "i16": pa.int16(), 965 | "i32": pa.int32(), 966 | "i64": pa.int64(), 967 | "f32": pa.float32(), 968 | "f64": pa.float64(), 969 | }[dtype] 970 | 971 | 972 | def _get_pyarrow_array(data_be: bytes, dtype: str) -> pyarrow.Array: 973 | type_size = _get_type_size(dtype) 974 | length = len(data_be) // type_size 975 | data_in = data_be 976 | if sys.byteorder == "little": 977 | data_in = _swap_endian(type_size, data_be) 978 | pa_type = _get_pyarrow_dtype(dtype) 979 | buffers = [None, pa.py_buffer(data_in)] 980 | return pa.Array.from_buffers(pa_type, length, buffers, 0) 981 | 982 | 983 | @pytest.mark.skipif(pa is None, reason="pyarrow not installed") 984 | @pytest.mark.parametrize("dtype", ("i8", "i16", "i32", "i64", "f32", "f64")) 985 | @pytest.mark.parametrize("endian", ("big", "little", "native")) 986 | @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) 987 | @pytest.mark.parametrize("use_init", (False, True)) 988 | def test_from_pyarrow_random( 989 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 990 | endian: t.Literal["big", "little", "native"], 991 | repeat: int, 992 | size: int, 993 | use_init: bool, 994 | ) -> None: 995 | type_size = _get_type_size(dtype) 996 | for _ in range(repeat): 997 | data_be = _random_value_be_bytes(type_size, size) 998 | array = _get_pyarrow_array(data_be, dtype) 999 | 1000 | v = Vector(array) if use_init else Vector.from_pyarrow(array) 1001 | assert v.dtype == dtype 1002 | assert v.raw() == data_be 1003 | assert nan_equals(array.to_pylist(), v.to_native()) 1004 | 1005 | 1006 | @pytest.mark.skipif(pa is None, reason="pyarrow not installed") 1007 | @pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) 1008 | def test_from_pyarrow_special_values( 1009 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 1010 | value: object, 1011 | data_be_raw: NormalizableBytes, 1012 | ) -> None: 1013 | data_be = data_be_raw.raw_bytes() 1014 | array = _get_pyarrow_array(data_be, dtype) 1015 | v = Vector.from_pyarrow(array) 1016 | assert v.dtype == dtype 1017 | assert v.raw() == data_be 1018 | assert nan_equals(array.to_pylist(), v.to_native()) 1019 | 1020 | 1021 | @pytest.mark.skipif(pa is None, reason="pyarrow not installed") 1022 | @pytest.mark.parametrize("dtype", ("i8", "i16", "i32", "i64", "f32", "f64")) 1023 | @pytest.mark.parametrize( 1024 | "endian", 1025 | ( 1026 | None, 1027 | *ENDIAN_LITERALS, 1028 | ), 1029 | ) 1030 | @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) 1031 | def test_to_pyarrow_random( 1032 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 1033 | endian: T_ENDIAN_LITERAL | None, 1034 | repeat: int, 1035 | size: int, 1036 | ) -> None: 1037 | type_size = _get_type_size(dtype) 1038 | pa_type = _get_pyarrow_dtype(dtype) 1039 | for _ in range(repeat): 1040 | data_be = _random_value_be_bytes(type_size, size) 1041 | data_ne = data_be 1042 | if sys.byteorder == "little": 1043 | data_ne = _swap_endian(type_size, data_be) 1044 | v = _vector_from_data(data_be, dtype, endian) 1045 | array = v.to_pyarrow() 1046 | assert array.type == pa_type 1047 | assert pa.compute.count(array, mode="only_null").as_py() == 0 1048 | buffers = array.buffers() 1049 | assert len(buffers) == 2 1050 | assert buffers[0] is None 1051 | assert buffers[1].to_pybytes() == data_ne 1052 | assert nan_equals(array.tolist(), v.to_native()) 1053 | 1054 | 1055 | @pytest.mark.skipif(pa is None, reason="pyarrow not installed") 1056 | @pytest.mark.parametrize(("dtype", "value", "data_be_raw"), SPECIAL_VALUES) 1057 | @pytest.mark.parametrize( 1058 | "endian", 1059 | ( 1060 | None, 1061 | *ENDIAN_LITERALS, 1062 | ), 1063 | ) 1064 | def test_to_pyarrow_special_values( 1065 | dtype: t.Literal["i8", "i16", "i32", "i64", "f32", "f64"], 1066 | endian: T_ENDIAN_LITERAL | None, 1067 | value: object, 1068 | data_be_raw: NormalizableBytes, 1069 | ) -> None: 1070 | data_be = data_be_raw.raw_bytes() 1071 | type_size = _get_type_size(dtype) 1072 | data_ne = data_be 1073 | if sys.byteorder == "little": 1074 | data_ne = _swap_endian(type_size, data_be) 1075 | pa_type = _get_pyarrow_dtype(dtype) 1076 | v = _vector_from_data(data_be, dtype, endian) 1077 | array = v.to_pyarrow() 1078 | assert array.type == pa_type 1079 | assert pa.compute.count(array, mode="only_null").as_py() == 0 1080 | buffers = array.buffers() 1081 | assert len(buffers) == 2 1082 | assert buffers[0] is None 1083 | assert buffers[1].to_pybytes() == data_ne 1084 | assert nan_equals(array.tolist(), v.to_native()) 1085 | 1086 | 1087 | @pytest.mark.parametrize( 1088 | "vector", 1089 | ( 1090 | Vector([], "i8"), 1091 | Vector([], "i16"), 1092 | Vector([], "i32"), 1093 | Vector([], "i64"), 1094 | Vector([], "f32"), 1095 | Vector([], "f64"), 1096 | *( 1097 | Vector([value], dtype) 1098 | for (dtype, value, packed_bytes_be_) in SPECIAL_INT_VALUES 1099 | ), 1100 | *( 1101 | Vector([value], dtype) 1102 | for (dtype, value, packed_bytes_be_) in SPECIAL_FLOAT_VALUES 1103 | ), 1104 | ), 1105 | ) 1106 | def test_vector_repr(vector: Vector) -> None: 1107 | expected = f"Vector({vector.raw()!r}, {vector.dtype.value!r})" 1108 | assert repr(vector) == expected 1109 | 1110 | 1111 | @pytest.mark.parametrize("dtype", DTYPE_LITERALS) 1112 | @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) 1113 | def test_vector_repr_random( 1114 | dtype: T_DTYPE_LITERAL, 1115 | repeat: int, 1116 | size: int, 1117 | ) -> None: 1118 | type_size = _get_type_size(dtype) 1119 | for _ in range(repeat): 1120 | data = _random_value_be_bytes(type_size, size) 1121 | v = Vector(data, dtype) 1122 | if isinstance(dtype, VectorDType): 1123 | expected_dtype = dtype.value 1124 | else: 1125 | expected_dtype = dtype 1126 | expected = f"Vector({data!r}, {expected_dtype!r})" 1127 | assert repr(v) == expected 1128 | 1129 | 1130 | def _dtype_to_cypher_type(dtype: T_DTYPE_LITERAL) -> str: 1131 | return { 1132 | "i8": "INTEGER8 NOT NULL", 1133 | "i16": "INTEGER16 NOT NULL", 1134 | "i32": "INTEGER32 NOT NULL", 1135 | "i64": "INTEGER NOT NULL", 1136 | "f32": "FLOAT32 NOT NULL", 1137 | "f64": "FLOAT NOT NULL", 1138 | }[dtype] 1139 | 1140 | 1141 | def _vec_element_cypher_repr(value: t.Any, dtype: T_DTYPE_LITERAL) -> str: 1142 | if isinstance(value, float) and dtype in {"f32", "f64"}: 1143 | if math.isnan(value): 1144 | return "NaN" 1145 | if math.isinf(value): 1146 | return "Infinity" if value > 0 else "-Infinity" 1147 | if dtype == "f32": 1148 | # account for float32 precision loss 1149 | compressed = struct.unpack(">f", struct.pack(">f", value))[0] 1150 | return repr(compressed) 1151 | return repr(value) 1152 | 1153 | 1154 | @pytest.mark.parametrize( 1155 | ("vector", "expected"), 1156 | ( 1157 | (Vector([], "i8"), "vector([], 0, INTEGER8 NOT NULL)"), 1158 | (Vector([], "i16"), "vector([], 0, INTEGER16 NOT NULL)"), 1159 | (Vector([], "i32"), "vector([], 0, INTEGER32 NOT NULL)"), 1160 | (Vector([], "i64"), "vector([], 0, INTEGER NOT NULL)"), 1161 | (Vector([], "f32"), "vector([], 0, FLOAT32 NOT NULL)"), 1162 | (Vector([], "f64"), "vector([], 0, FLOAT NOT NULL)"), 1163 | *( 1164 | ( 1165 | Vector([value], dtype), 1166 | ( 1167 | f"vector([{_vec_element_cypher_repr(value, dtype)}], 1, " 1168 | f"{_dtype_to_cypher_type(dtype)})" 1169 | ), 1170 | ) 1171 | for (dtype, value, packed_bytes_be) in SPECIAL_INT_VALUES 1172 | ), 1173 | *( 1174 | ( 1175 | Vector([value], dtype), 1176 | ( 1177 | f"vector([{_vec_element_cypher_repr(value, dtype)}], 1, " 1178 | f"{_dtype_to_cypher_type(dtype)})" 1179 | ), 1180 | ) 1181 | for (dtype, value, packed_bytes_be) in SPECIAL_FLOAT_VALUES 1182 | ), 1183 | ), 1184 | ) 1185 | def test_vector_str(vector: Vector, expected: str) -> None: 1186 | assert str(vector) == expected 1187 | 1188 | 1189 | @pytest.mark.parametrize("dtype", DTYPE_LITERALS) 1190 | @pytest.mark.parametrize(("repeat", "size"), ((10_000, 1), (1, 10_000))) 1191 | def test_vector_str_random( 1192 | dtype: T_DTYPE_LITERAL, 1193 | repeat: int, 1194 | size: int, 1195 | ) -> None: 1196 | type_size = _get_type_size(dtype) 1197 | cypher_dtype = _dtype_to_cypher_type(dtype) 1198 | for _ in range(repeat): 1199 | data = _random_value_be_bytes(type_size, size) 1200 | v = Vector(data, dtype) 1201 | values_reprs = ( 1202 | _vec_element_cypher_repr(value, dtype) for value in v.to_native() 1203 | ) 1204 | values_repr = f"[{', '.join(values_reprs)}]" 1205 | expected = f"vector({values_repr}, {size}, {cypher_dtype})" 1206 | assert str(v) == expected 1207 | --------------------------------------------------------------------------------