├── requirements.test.txt ├── requirements.txt ├── .coveragerc ├── AUTHORS ├── MANIFEST.in ├── requirements.full.txt ├── .gitignore ├── .github └── workflows │ ├── lint.yml │ ├── publish.yml │ ├── lint-autoupdate.yml │ └── ci.yml ├── serialize ├── testsuite │ ├── __init__.py │ └── test_basic.py ├── msgpack.py ├── dill.py ├── serpent.py ├── bson.py ├── json.py ├── __init__.py ├── yaml_legacy.py ├── pickle.py ├── yaml.py ├── phpserialize.py ├── simplejson.py └── all.py ├── .pre-commit-config.yaml ├── CHANGES ├── LICENSE ├── pyproject.toml ├── conftest.py └── README.md /requirements.test.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | packaging 3 | typing_extensions 4 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = . 3 | omit = serialize/testsuite/* 4 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Serialize is written and maintained by Hernan E. Grecco . 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README AUTHORS CHANGES LICENSE 2 | include *.txt 3 | recursive-include serialize *.py 4 | -------------------------------------------------------------------------------- /requirements.full.txt: -------------------------------------------------------------------------------- 1 | bson 2 | dill 3 | msgpack-python 4 | phpserialize 5 | serpent 6 | simplejson 7 | pyyaml 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | __pycache__ 3 | .eggs 4 | *egg-info* 5 | *.pyc 6 | .DS_Store 7 | docs/_build/ 8 | .idea 9 | build/ 10 | dist/ 11 | MANIFEST 12 | .tox 13 | 14 | # WebDAV file system cache files 15 | .DAV/ 16 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: actions/setup-python@v2 12 | with: 13 | python-version: 3.x 14 | - name: Lint 15 | uses: pre-commit/action@v2.0.0 16 | with: 17 | extra_args: --all-files --show-diff-on-failure 18 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Build and publish to PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | 14 | - uses: actions/setup-python@v4 15 | with: 16 | python-version: '3.x' 17 | 18 | - name: Install dependencies 19 | run: python -m pip install build 20 | 21 | - name: Build package 22 | run: python -m build 23 | 24 | - name: Publish to PyPI 25 | uses: pypa/gh-action-pypi-publish@release/v1 26 | with: 27 | password: ${{ secrets.PYPI_API_TOKEN }} 28 | -------------------------------------------------------------------------------- /serialize/testsuite/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import, division, print_function, unicode_literals 4 | 5 | import os 6 | import unittest 7 | 8 | 9 | def testsuite(): 10 | """A testsuite that has all the pint tests.""" 11 | return unittest.TestLoader().discover(os.path.dirname(__file__)) 12 | 13 | 14 | def main(): 15 | """Runs the testsuite as command line application.""" 16 | try: 17 | unittest.main() 18 | except Exception as e: 19 | print("Error: %s" % e) 20 | 21 | 22 | def run(): 23 | """Run all tests. 24 | 25 | :return: a :class:`unittest.TestResult` object 26 | """ 27 | test_runner = unittest.TextTestRunner() 28 | return test_runner.run(testsuite()) 29 | -------------------------------------------------------------------------------- /serialize/msgpack.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.msgpack 4 | ~~~~~~~~~~~~~~~~~ 5 | 6 | Helpers for Msgpack Serialization. 7 | 8 | See https://pypi.python.org/pypi/msgpack-python for more details. 9 | 10 | :copyright: (c) 2016 by Hernan E. Grecco. 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from . import all 15 | 16 | try: 17 | import msgpack 18 | except ImportError: 19 | all.register_unavailable("msgpack", pkg="msgpack-python") 20 | raise 21 | 22 | 23 | def dumps(obj): 24 | return msgpack.packb(obj, default=all.encode) 25 | 26 | 27 | def loads(content): 28 | return msgpack.unpackb(content, object_hook=all.decode, raw=False) 29 | 30 | 31 | all.register_format("msgpack", dumps, loads) 32 | -------------------------------------------------------------------------------- /serialize/dill.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.dill 4 | ~~~~~~~~~~~~~~ 5 | 6 | Helpers for Dill Serialization. 7 | 8 | See https://pypi.python.org/pypi/dill for more details. 9 | 10 | :copyright: (c) 2016 by Hernan E. Grecco. 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from . import all, pickle 15 | 16 | try: 17 | import dill 18 | except ImportError: 19 | all.register_unavailable("dill", pkg="dill") 20 | raise 21 | 22 | 23 | class MyPickler(dill.Pickler): 24 | dispatch_table = pickle.DispatchTable() 25 | 26 | 27 | def dump(obj, fp): 28 | MyPickler(fp).dump(obj) 29 | 30 | 31 | def load(fp): 32 | return dill.Unpickler(fp).load() 33 | 34 | 35 | all.register_format("dill", dumper=dump, loader=load) 36 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - id: trailing-whitespace 8 | - repo: https://github.com/charliermarsh/ruff-pre-commit 9 | rev: v0.1.5 10 | hooks: 11 | - id: ruff 12 | args: ["--fix"] 13 | - id: ruff-format 14 | - repo: https://github.com/executablebooks/mdformat 15 | rev: 0.7.16 16 | hooks: 17 | - id: mdformat 18 | additional_dependencies: 19 | - mdformat-gfm # GitHub-flavored Markdown 20 | - mdformat-black 21 | - repo: https://github.com/kynan/nbstripout 22 | rev: 0.6.1 23 | hooks: 24 | - id: nbstripout 25 | args: [--extra-keys=metadata.kernelspec metadata.language_info.version] 26 | -------------------------------------------------------------------------------- /serialize/serpent.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.phpserialize 4 | ~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Helpers for Serpent Serialization. 7 | 8 | See https://pypi.python.org/pypi/serpent for more details. 9 | 10 | :copyright: (c) 2016 by Hernan E. Grecco. 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from . import all 15 | 16 | try: 17 | import serpent 18 | except ImportError: 19 | all.register_unavailable("serpent", pkg="serpent") 20 | raise 21 | 22 | 23 | class MySerializer(serpent.Serializer): 24 | def _serialize(self, obj, out, level): 25 | obj = all.encode(obj) 26 | return super()._serialize(obj, out, level) 27 | 28 | 29 | def dumps(obj): 30 | return MySerializer().serialize(obj) 31 | 32 | 33 | def loads(content): 34 | return all.traverse_and_decode(serpent.loads(content)) 35 | 36 | 37 | all.register_format("serpent", dumps, loads) 38 | -------------------------------------------------------------------------------- /serialize/bson.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.dill 4 | ~~~~~~~~~~~~~~ 5 | 6 | Helpers for Bson Serialization. 7 | 8 | See https://pypi.python.org/pypi/bson for more details. 9 | 10 | :copyright: (c) 2016 by Hernan E. Grecco. 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from . import all 15 | 16 | try: 17 | import bson 18 | except ImportError: 19 | all.register_unavailable("bson", pkg="bson") 20 | raise 21 | 22 | 23 | # In the BSON format the top level object must be a dictionary. 24 | # If necessary, we put the object in dummy dictionary 25 | # under the key __bson_follow__ 26 | 27 | 28 | def dumps(obj): 29 | if not isinstance(obj, dict): 30 | obj = dict(__bson_follow__=obj) 31 | return bson.dumps(all.traverse_and_encode(obj)) 32 | 33 | 34 | def loads(content): 35 | obj = all.traverse_and_decode(bson.loads(content)) 36 | return obj.get("__bson_follow__", obj) 37 | 38 | 39 | all.register_format("bson", dumps, loads) 40 | -------------------------------------------------------------------------------- /serialize/json.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.dill 4 | ~~~~~~~~~~~~~~ 5 | 6 | Helpers for JSON Serialization. 7 | 8 | See https://docs.python.org/3/library/json.html for more details. 9 | 10 | :copyright: (c) 2016 by Hernan E. Grecco. 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from . import all 15 | 16 | try: 17 | import json 18 | except ImportError: # pragma: no cover 19 | all.register_unavailable("json", pkg="json") 20 | raise 21 | 22 | 23 | class Encoder(json.JSONEncoder): 24 | def default(self, obj): 25 | return all.encode(obj, super().default) 26 | 27 | 28 | def dumps(obj): 29 | return json.dumps(obj, cls=Encoder).encode("utf-8") 30 | 31 | 32 | def dumps_pretty(obj): 33 | return json.dumps( 34 | obj, cls=Encoder, sort_keys=True, indent=4, separators=(",", ": ") 35 | ).encode("utf-8") 36 | 37 | 38 | def loads(content): 39 | return json.loads(content.decode("utf-8"), object_hook=all.decode) 40 | 41 | 42 | # We create two different subformats for json. 43 | # The first (default) is compact, the second is pretty. 44 | 45 | all.register_format("json", dumps, loads) 46 | all.register_format("json:pretty", dumps_pretty, loads) 47 | -------------------------------------------------------------------------------- /serialize/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize 4 | ~~~~~~~~~ 5 | 6 | 7 | 8 | :copyright: (c) 2016 by Hernan E. Grecco. 9 | :license: BSD, see LICENSE for more details. 10 | """ 11 | 12 | from importlib import import_module 13 | 14 | try: 15 | from importlib.metadata import version 16 | except ImportError: 17 | # Backport for Python < 3.8 18 | from importlib_metadata import version 19 | 20 | try: # pragma: no cover 21 | __version__ = version("serialize") 22 | except Exception: # pragma: no cover 23 | # we seem to have a local copy not installed without setuptools 24 | # so the reported version will be unknown 25 | __version__ = "unknown" 26 | 27 | 28 | # Modules that help serialize use other packages. 29 | 30 | _MODULES = ( 31 | "bson", 32 | "dill", 33 | "json", 34 | "msgpack", 35 | "phpserialize", 36 | "pickle", 37 | "serpent", 38 | "yaml", 39 | "yaml_legacy", 40 | ) 41 | 42 | for name in _MODULES: 43 | try: 44 | import_module("." + name, "serialize") 45 | except Exception: 46 | pass 47 | 48 | # Others to consider in the future for specialized serialization: 49 | # CSV, pandas.DATAFRAMES, hickle, hdf5 50 | 51 | from .all import dump, dumps, load, loads, register_class # noqa: E402 52 | 53 | __all__ = ["dump", "dumps", "load", "loads", "register_class"] 54 | -------------------------------------------------------------------------------- /serialize/yaml_legacy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.yaml_legacy 4 | ~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Helpers for YAML Serialization. 7 | 8 | See https://pypi.python.org/pypi/pyyaml for more details. 9 | 10 | :copyright: (c) 2016 by Hernan E. Grecco. 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from . import all 15 | 16 | try: 17 | import yaml 18 | from yaml.constructor import MappingNode 19 | except ImportError: 20 | all.register_unavailable("yaml:legacy", pkg="pyyaml") 21 | raise 22 | 23 | 24 | class Dumper(yaml.Dumper): 25 | def represent_data(self, data): 26 | return super().represent_data(all.encode(data)) 27 | 28 | 29 | class Loader(yaml.Loader): 30 | def construct_object(self, node, deep=False): 31 | # It seems that pyyaml is changing the internal structure of the node 32 | tmp = super().construct_object(node, deep) 33 | 34 | if isinstance(node, MappingNode): 35 | dct = super().construct_mapping(node, deep) 36 | decoded = all.decode(dct) 37 | if decoded is not dct: 38 | return decoded 39 | 40 | return tmp 41 | 42 | 43 | def dumps(obj): 44 | return yaml.dump(obj, Dumper=Dumper).encode("utf-8") 45 | 46 | 47 | def loads(content): 48 | return yaml.load(content.decode("utf-8"), Loader=Loader) 49 | 50 | 51 | all.register_format("yaml:legacy", dumps, loads) 52 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | Serialize Changelog 2 | =================== 3 | 4 | 5 | 0.3 (unreleased) 6 | ---------------- 7 | 8 | - Nothing changed yet. 9 | 10 | 11 | 0.2.1 (2022-01-12) 12 | ------------------ 13 | 14 | - Fixed package installation. 15 | 16 | 17 | 0.2 (2021-12-26) 18 | ---------------- 19 | 20 | - Added support for simplejson package https://pypi.org/project/simplejson/ 21 | - Fixed bug in yaml support. 22 | - Added support for pathlib.Path. 23 | (PR, #10, thanks @maurosilber) 24 | - Call register_class again when a new format is dynamically added. 25 | (PR #8, thanks @jtbraun) 26 | - Change yaml support to support objects using __reduce__ 27 | (PR #8, thanks @jtbraun) 28 | - Renamed the existing 'yaml' format to 'yaml:legacy' 29 | (PR #8, thanks @jtbraun) 30 | 31 | 0.1 (2016-01-28) 32 | ---------------- 33 | - Initial Release. Implement a generic dump/dumps and load/loads. 34 | - Added support for registering new serializer/deserializer. 35 | - Added support for registering custom classes. 36 | - Format inference for file extension. 37 | - Added support for bson using https://pypi.python.org/pypi/bson 38 | - Added support for dill using https://pypi.python.org/pypi/dill 39 | - Added support for json using https://docs.python.org/3/library/json.html 40 | - Added support for msgpack using https://pypi.python.org/pypi/msgpack-python 41 | - Added support for phpserialize using https://pypi.python.org/pypi/phpserialize 42 | - Added support for pickle using https://docs.python.org/3/library/pickle.html 43 | - Added support for serpent using https://pypi.python.org/pypi/serpent 44 | - Added support for yaml using https://pypi.python.org/pypi/pyyaml 45 | -------------------------------------------------------------------------------- /.github/workflows/lint-autoupdate.yml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | schedule: 5 | - cron: "0 0 * * 0" # every Sunday at 00:00 UTC 6 | workflow_dispatch: 7 | 8 | 9 | jobs: 10 | autoupdate: 11 | name: autoupdate 12 | runs-on: ubuntu-latest 13 | if: github.repository == 'hgrecco/serialize' 14 | steps: 15 | - name: checkout 16 | uses: actions/checkout@v2 17 | - name: Cache pip and pre-commit 18 | uses: actions/cache@v2 19 | with: 20 | path: | 21 | ~/.cache/pre-commit 22 | ~/.cache/pip 23 | key: ${{ runner.os }}-pre-commit-autoupdate 24 | - name: setup python 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: 3.x 28 | - name: upgrade pip 29 | run: python -m pip install --upgrade pip 30 | - name: install dependencies 31 | run: python -m pip install --upgrade pre-commit 32 | - name: version info 33 | run: python -m pip list 34 | - name: autoupdate 35 | uses: technote-space/create-pr-action@bfd4392c80dbeb54e0bacbcf4750540aecae6ed4 36 | with: 37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 38 | EXECUTE_COMMANDS: | 39 | python -m pre_commit autoupdate 40 | python -m pre_commit run --all-files 41 | COMMIT_MESSAGE: 'pre-commit: autoupdate hook versions' 42 | COMMIT_NAME: 'github-actions[bot]' 43 | COMMIT_EMAIL: 'github-actions[bot]@users.noreply.github.com' 44 | PR_TITLE: 'pre-commit: autoupdate hook versions' 45 | PR_BRANCH_PREFIX: 'pre-commit/' 46 | PR_BRANCH_NAME: 'autoupdate-${PR_ID}' 47 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 by Hernan E. Grecco and contributors. See AUTHORS 2 | for more details. 3 | 4 | Some rights reserved. 5 | 6 | Redistribution and use in source and binary forms of the software as well 7 | as documentation, with or without modification, are permitted provided 8 | that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above 14 | copyright notice, this list of conditions and the following 15 | disclaimer in the documentation and/or other materials provided 16 | with the distribution. 17 | 18 | * The names of the contributors may not be used to endorse or 19 | promote products derived from this software without specific 20 | prior written permission. 21 | 22 | THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND 23 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT 24 | NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 26 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 29 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 30 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32 | SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 33 | DAMAGE. 34 | -------------------------------------------------------------------------------- /serialize/pickle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.phpserialize 4 | ~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Helpers for Pickle Serialization. 7 | 8 | See https://docs.python.org/3/library/pickle.html for more details. 9 | 10 | :copyright: (c) 2016 by Hernan E. Grecco. 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from collections.abc import MutableMapping 15 | 16 | from . import all 17 | 18 | try: 19 | import copyreg 20 | import pickle 21 | except ImportError: # pragma: no cover 22 | all.register_unavailable("pickle", pkg="pickle") 23 | raise 24 | 25 | 26 | class DispatchTable(MutableMapping): 27 | def __getitem__(self, item): 28 | if item in all.CLASSES: 29 | return lambda obj: ( 30 | all.CLASSES[item].from_builtin, 31 | (all.CLASSES[item].to_builtin(obj),), 32 | None, 33 | None, 34 | None, 35 | ) 36 | 37 | return copyreg.dispatch_table[item] # pragma: no cover 38 | 39 | def __setitem__(self, key, value): # pragma: no cover 40 | copyreg.dispatch_table[key] = value 41 | 42 | def __delitem__(self, key): # pragma: no cover 43 | del copyreg.dispatch_table[key] 44 | 45 | def __iter__(self): # pragma: no cover 46 | return copyreg.dispatch_table.__iter__() 47 | 48 | def __len__(self): # pragma: no cover 49 | return copyreg.dispatch_table.__len__() 50 | 51 | 52 | class MyPickler(pickle.Pickler): 53 | dispatch_table = DispatchTable() 54 | 55 | 56 | def dump(obj, fp): 57 | MyPickler(fp).dump(obj) 58 | 59 | 60 | def load(fp): 61 | return pickle.Unpickler(fp).load() 62 | 63 | 64 | all.register_format("pickle", dumper=dump, loader=load) 65 | -------------------------------------------------------------------------------- /serialize/yaml.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.yaml 4 | ~~~~~~~~~~~~~~ 5 | 6 | Helpers for YAML Serialization. 7 | 8 | See https://pypi.python.org/pypi/pyyaml for more details. 9 | 10 | :copyright: (c) 2021 by Hernan E. Grecco. 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from . import all 15 | 16 | try: 17 | import yaml 18 | from yaml.constructor import MappingNode 19 | except ImportError: 20 | all.register_unavailable("yaml", pkg="pyyaml") 21 | raise 22 | 23 | 24 | # Note: these tags a global namespace, so we need a unique URL/tag to use here. 25 | # 26 | # Pyyaml uses "tag:yaml.org,2002:...", some of which are defined by 27 | # yaml.org/spec/..., but the python-specific ones (like python/tuple, 28 | # python/complex, etc) seem to be their own, and probably should have used the 29 | # hostname pyyaml.org. Serialize's github homepage ought to be a pretty stable 30 | # URL we can use. 31 | SERIALIZED_TAG = "tag:github.com/hgrecco/serialize,2019:python/serialize-encode" 32 | 33 | 34 | class Dumper(yaml.Dumper): 35 | def represent_serialized(self, data): 36 | return self.represent_mapping(SERIALIZED_TAG, all.encode(data)) 37 | 38 | 39 | class Loader(yaml.Loader): 40 | def construct_serialized(self, node): 41 | assert node.tag == SERIALIZED_TAG 42 | assert isinstance(node, MappingNode) 43 | dct = self.construct_mapping(node, deep=True) 44 | return all.decode(dct) 45 | 46 | 47 | def dumps(obj): 48 | return yaml.dump(obj, Dumper=Dumper).encode("utf-8") 49 | 50 | 51 | def loads(content): 52 | return yaml.load(content.decode("utf-8"), Loader=Loader) 53 | 54 | 55 | def _register_class(klass): 56 | Dumper.add_representer(klass, Dumper.represent_serialized) 57 | 58 | Loader.add_constructor(SERIALIZED_TAG, Loader.construct_serialized) 59 | 60 | 61 | all.register_format("yaml", dumps, loads, register_class=_register_class) 62 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "Serialize" 3 | authors = [ 4 | {name="Hernan E. Grecco", email="hernan.grecco@gmail.com"} 5 | ] 6 | license = {text = "BSD-3-Clause"} 7 | description = "A common API for multiple serialization formats with support for custom classes" 8 | readme = "README.rst" 9 | maintainers = [ 10 | {name="Hernan E. Grecco", email="hernan.grecco@gmail.com"}, 11 | ] 12 | keywords = ["serialization", "deserialization", "packing", "unpacking"] 13 | classifiers = [ 14 | "Development Status :: 4 - Beta", 15 | "Intended Audience :: Developers", 16 | "Intended Audience :: End Users/Desktop", 17 | "License :: OSI Approved :: BSD License", 18 | "Operating System :: MacOS :: MacOS X", 19 | "Operating System :: Microsoft :: Windows", 20 | "Operating System :: POSIX", 21 | "Programming Language :: Python", 22 | "Topic :: Software Development :: Libraries", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | "Programming Language :: Python :: 3.11", 26 | "Programming Language :: Python :: 3.12", 27 | ] 28 | requires-python = ">=3.9" 29 | dynamic = ["dependencies", "optional-dependencies", "version"] 30 | 31 | [tool.setuptools.package-data] 32 | serialize = ["py.typed", ] 33 | 34 | [tool.setuptools] 35 | packages = ["serialize"] 36 | 37 | [build-system] 38 | requires = ["setuptools", "setuptools-scm"] 39 | build-backend = "setuptools.build_meta" 40 | 41 | [tool.setuptools.dynamic] 42 | dependencies = {file = "requirements.txt"} 43 | optional-dependencies.test = {file = "requirements.test.txt"} 44 | optional-dependencies.full = {file = "requirements.full.txt"} 45 | 46 | [project.urls] 47 | "Homepage" = "https://github.com/hgrecco/serialize" 48 | "Bug Tracker" = "https://github.com/hgrecco/serialize/issues" 49 | 50 | [tool.setuptools_scm] 51 | 52 | [tool.pytest.ini_options] 53 | addopts = "--import-mode=importlib --doctest-modules" 54 | pythonpath = "." 55 | 56 | [tool.ruff] 57 | select = ["E", "F", "I"] 58 | extend-include = ["*.ipynb"] 59 | -------------------------------------------------------------------------------- /serialize/phpserialize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.phpserialize 4 | ~~~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Helpers for PHP Serialization. 7 | 8 | See https://pypi.python.org/pypi/phpserialize for more details. 9 | 10 | :copyright: (c) 2016 by Hernan E. Grecco. 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from collections import ChainMap 15 | 16 | from . import all 17 | 18 | try: 19 | import phpserialize 20 | except ImportError: 21 | all.register_unavailable("phpserialize", pkg="phpserialize") 22 | raise 23 | 24 | # PHP Serialize does not support list and tuples, so we convert them to maps. 25 | 26 | 27 | def _traverse_list_ec(obj, ef, td): 28 | return dict( 29 | __class_name__="builtin_list", 30 | __dumped_obj__={ 31 | ndx: all.traverse_and_encode(val, ef, td) for ndx, val in enumerate(obj) 32 | }, 33 | ) 34 | 35 | 36 | def _traverse_tuple_ec(obj, ef, td): 37 | return dict( 38 | __class_name__="builtin_tuple", 39 | __dumped_obj__={ 40 | ndx: all.traverse_and_encode(val, ef, td) for ndx, val in enumerate(obj) 41 | }, 42 | ) 43 | 44 | 45 | CUSTOM_TRAVERSE = ChainMap( 46 | {list: _traverse_list_ec, tuple: _traverse_tuple_ec}, all.DEFAULT_TRAVERSE_EC 47 | ) 48 | 49 | 50 | def _helper(dct): 51 | return (mytransverse(dct[n]) for n in range(len(dct))) 52 | 53 | 54 | CUSTOM_CLASSES_BY_NAME = ChainMap( 55 | { 56 | "builtin_list": all.ClassHelper(None, lambda obj: list(_helper(obj))), 57 | "builtin_tuple": all.ClassHelper(None, lambda obj: tuple(_helper(obj))), 58 | }, 59 | all.CLASSES_BY_NAME, 60 | ) 61 | 62 | 63 | def mytransverse(obj): 64 | return all.traverse_and_decode(obj, lambda o: all.decode(o, CUSTOM_CLASSES_BY_NAME)) 65 | 66 | 67 | def dumps(obj): 68 | return phpserialize.dumps(all.traverse_and_encode(obj, None, CUSTOM_TRAVERSE)) 69 | 70 | 71 | def loads(content): 72 | obj = phpserialize.loads(content, charset="utf-8", decode_strings=True) 73 | return mytransverse(obj) 74 | 75 | 76 | all.register_format("phpserialize", dumps, loads) 77 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | test-linux: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.9", "3.10", "3.11", "3.12"] 11 | 12 | env: 13 | TEST_OPTS: "-rfsxEX -s --cov=. --cov-config=.coveragerc" 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | cache: 'pip' 23 | 24 | - name: Upgrade pip 25 | run: python -m pip install --upgrade pip 26 | 27 | - name: Install coverage 28 | run: pip install pytest-cov 29 | 30 | - name: Install package 31 | run: python -m pip install .[test] 32 | 33 | - name: Install package all dependencies 34 | run: python -m pip install .[full] 35 | 36 | - name: Test with pytest 37 | run: pytest $TEST_OPTS 38 | 39 | - name: Coverage report 40 | run: coverage report -m 41 | 42 | - name: Coveralls Parallel 43 | env: 44 | COVERALLS_FLAG_NAME: ${{ matrix.test-number }} 45 | COVERALLS_PARALLEL: true 46 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 47 | COVERALLS_SERVICE_NAME: github 48 | run: | 49 | pip install coveralls 50 | coveralls 51 | 52 | coveralls: 53 | needs: test-linux 54 | runs-on: ubuntu-latest 55 | steps: 56 | - uses: actions/setup-python@v2 57 | with: 58 | python-version: 3.x 59 | - name: Coveralls Finished 60 | continue-on-error: true 61 | env: 62 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 63 | COVERALLS_SERVICE_NAME: github 64 | run: | 65 | pip install coveralls 66 | coveralls --finish 67 | 68 | # Dummy task to summarize all. See https://github.com/bors-ng/bors-ng/issues/1300 69 | ci-success: 70 | name: ci 71 | if: ${{ success() }} 72 | needs: test-linux 73 | runs-on: ubuntu-latest 74 | steps: 75 | - name: CI succeeded 76 | run: exit 0 77 | -------------------------------------------------------------------------------- /serialize/simplejson.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.simplejson 4 | ~~~~~~~~~~~~~~~~~~~~ 5 | 6 | Helpers for JSON Serialization. 7 | 8 | See https://github.com/simplejson/simplejson for more details. 9 | 10 | :copyright: (c) 2016 by Hernan E. Grecco, Pieter T. Eendebak 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from . import all 15 | from .all import _traverse_dict_ec, _traverse_list_ec 16 | 17 | try: 18 | import simplejson as json 19 | except ImportError: # pragma: no cover 20 | all.register_unavailable("simplejson", pkg="simplejson") 21 | raise 22 | 23 | 24 | class Encoder(json.JSONEncoder): 25 | def default(self, obj): 26 | return all.encode(obj, super().default) 27 | 28 | 29 | def _traverse_tuple_ec(obj, ef, td): 30 | return { 31 | "__class_name__": "tuple", 32 | "__dumped_obj": list(all.traverse_and_encode(el, ef, td) for el in obj), 33 | } 34 | 35 | 36 | trav_dict = { 37 | dict: _traverse_dict_ec, 38 | list: _traverse_list_ec, 39 | tuple: _traverse_tuple_ec, 40 | } 41 | 42 | 43 | def df(obj): 44 | """Decode function that handles encoded tuples""" 45 | if obj["__class_name__"] == "tuple": 46 | return tuple(obj["__dumped_obj"]) 47 | else: 48 | return obj["__dumped_obj"] 49 | 50 | 51 | def dumps(obj): 52 | return json.dumps( 53 | all.traverse_and_encode(obj, trav_dict=trav_dict), 54 | cls=Encoder, 55 | tuple_as_array=True, 56 | ).encode("utf-8") 57 | 58 | 59 | def dumps_pretty(obj): 60 | return json.dumps( 61 | all.traverse_and_encode(obj, trav_dict=trav_dict), 62 | cls=Encoder, 63 | sort_keys=True, 64 | tuple_as_array=True, 65 | indent=4, 66 | separators=(",", ": "), 67 | ).encode("utf-8") 68 | 69 | 70 | def loads(content): 71 | obj = json.loads(content.decode("utf-8"), object_hook=all.decode) 72 | return all.traverse_and_decode(obj, decode_func=df) 73 | 74 | 75 | # We create two different subformats for json. 76 | # The first (default) is compact, the second is pretty. 77 | 78 | all.register_format("simplejson", dumps, loads) 79 | all.register_format("simplejson:pretty", dumps_pretty, loads) 80 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | import doctest 2 | import pathlib 3 | from doctest import OutputChecker 4 | 5 | ROOT = pathlib.Path(__file__).parent 6 | PATH = ROOT / "serialize" / "testsuite" 7 | GENERATED_PATH = PATH / "generated" 8 | TEST_README = GENERATED_PATH / "test_readme.py" 9 | README = ROOT / "README.md" 10 | 11 | 12 | def setup_test_readme(): 13 | GENERATED_PATH.mkdir(exist_ok=True) 14 | 15 | INDENT = " " * 4 16 | with TEST_README.open("w") as out, README.open("r") as readme: 17 | mode = None 18 | output = [] 19 | 20 | output.append("def test_all():\n") 21 | for i, line in enumerate(readme.readlines()): 22 | output.append("\n") 23 | if mode is None and line.strip() == "```python": 24 | mode = "first_line" 25 | output[i] = INDENT + "# line %04d" % i 26 | # output[i] = 'def test_line_%04d():\n' % i 27 | continue 28 | elif line.strip() == "```": 29 | continue 30 | elif mode == "first_line": 31 | if line.strip() == "": 32 | mode = None 33 | output[i - 1] = "\n" 34 | continue 35 | if line.strip().startswith(">>>"): 36 | mode = "doctest" 37 | output[i - 2] = ( 38 | output[i - 1][:-1] + " " + output[i - 2] 39 | ) # move the def line one line up 40 | output[i - 1] = ' """\n' 41 | else: 42 | mode = "test" 43 | if mode in ("doctest", "test"): 44 | output[i] = " " + line 45 | else: 46 | pass 47 | # output[i] = '# %s' % line 48 | 49 | output.append(INDENT + '"""\n') 50 | out.writelines(output) 51 | 52 | 53 | def pytest_sessionstart(session): 54 | try: 55 | setup_test_readme() 56 | except ImportError: 57 | pass 58 | 59 | 60 | def rm_tree(pth): 61 | for child in pth.glob("*"): 62 | if child.is_file(): 63 | child.unlink() 64 | else: 65 | rm_tree(child) 66 | pth.rmdir() 67 | 68 | 69 | def pytest_sessionfinish(session, exitstatus): 70 | try: 71 | rm_tree(GENERATED_PATH) 72 | pass 73 | except FileNotFoundError: 74 | pass 75 | 76 | 77 | class HexOutputChecker(OutputChecker): 78 | def check_output(self, want, got, optionflags): 79 | if want.startswith("b'") or want.startswith('b"'): 80 | return True 81 | else: 82 | return OutputChecker.check_output(self, want, got, optionflags) 83 | 84 | 85 | doctest.OutputChecker = HexOutputChecker 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Latest Version](https://img.shields.io/pypi/v/serialize.svg)](https://pypi.python.org/pypi/serialize) 2 | [![License](https://img.shields.io/pypi/l/serialize.svg)](https://pypi.python.org/pypi/serialize) 3 | [![Python Versions](https://img.shields.io/pypi/pyversions/serialize.svg)](https://pypi.python.org/pypi/serialize) 4 | [![CI](https://github.com/hgrecco/serialize/workflows/CI/badge.svg)](https://github.com/hgrecco/serialize/actions?query=workflow%3ACI) 5 | [![LINTER](https://github.com/hgrecco/serialize/workflows/Lint/badge.svg)](https://github.com/hgrecco/serialize/actions?query=workflow%3ALint) 6 | [![Coverage](https://coveralls.io/repos/github/hgrecco/serialize/badge.svg?branch=master)](https://coveralls.io/github/hgrecco/serialize?branch=master) 7 | 8 | # Serialize: A common Python API for multiple serialization formats 9 | 10 | ``` 11 | There are multiple serialization formats out there ... 12 | ... and great packages to use them. 13 | ``` 14 | 15 | But they all have a different API and switching among them is not so 16 | simple as it should be. Serialize helps you to do it, including dealing 17 | with custom classes. Let's dump a dict using the `pickle` format: 18 | 19 | ```python 20 | >>> from serialize import dumps, loads 21 | >>> dumps(dict(answer=42), fmt='pickle') 22 | b'\x80\x04\x95\x0f\x00\x00\x00\x00\x00\x00\x00}\x94\x8c\x06answer\x94K*s.' 23 | >>> loads(_, fmt='pickle') 24 | {'answer': 42} 25 | ``` 26 | 27 | And here comes the cool thing, you can just change the serialization 28 | format without having to learn a new API. Let's now dump it using 29 | msgpack: 30 | 31 | ```python 32 | >>> dumps(dict(answer=42), fmt='msgpack') 33 | b'\x81\xa6answer*' 34 | >>> loads(_, fmt='msgpack') 35 | {'answer': 42} 36 | ``` 37 | 38 | Serialize currently support 8 different formats: 39 | 40 | - bson 41 | - dill 42 | - json (builtin or with simplejson package), 43 | - msgpack 44 | - phpserialize 45 | - pickle 46 | - serpent 47 | - yaml 48 | 49 | Serialize does not implement these formats but rather relies on established, well tested packages. If they are installed, serialize will use them. 50 | 51 | **Serialize allows you to use them all with the same API!** 52 | 53 | You can also use the `dump` and `load` to write directly to file-like 54 | object: 55 | 56 | ```python 57 | >>> from serialize import dump, load 58 | >>> with open('output.yaml', 'wb') as fp: 59 | ... dump(dict(answer=42), fp, fmt='yaml') 60 | >>> with open('output.yaml', 'rb') as fp: 61 | ... load(fp, fmt='yaml') 62 | {'answer': 42} 63 | ``` 64 | 65 | or use directly the filename and the format will be inferred: 66 | 67 | ```python 68 | >>> dump(dict(answer=42), 'output.yaml') 69 | >>> load('output.yaml') 70 | {'answer': 42} 71 | ``` 72 | 73 | A very common case is to dump and load objects from custom classes such 74 | as: 75 | 76 | ```python 77 | >>> class User: 78 | ... def __init__(self, name, age): 79 | ... self.name = name 80 | ... self.age = age 81 | ... 82 | >>> john = User('John Smith', 27) 83 | ``` 84 | 85 | But some serialization packages do not support this important feature 86 | and the rest usually have very different API between them. Serialize 87 | provides you a common, simple interface for this. You just need to 88 | define a function that is able to convert the object to an instance of a 89 | builtin type and the converse: 90 | 91 | ```python 92 | >>> from serialize import register_class 93 | >>> def user_to_builtin(u): 94 | ... return (u.name, u.age) 95 | ... 96 | >>> def user_from_builtin(c): 97 | ... return User(c[0], c[1]) 98 | ... 99 | 100 | >>> register_class(User, user_to_builtin, user_from_builtin) 101 | ``` 102 | 103 | And that's all. You can then use it directly without any hassle: 104 | 105 | ```python 106 | >>> dumps(john, fmt='bson') 107 | b"x\x00\x00\x00\x03__bson_follow__\x00b\x00\x00\x00\x02__class_name__\x00\x1b\x00\x00\x00\x00\x04__dumped_obj__\x00\x1e\x00\x00\x00\x020\x00\x0b\x00\x00\x00John Smith\x00\x101\x00\x1b\x00\x00\x00\x00\x00\x00" 108 | ain__.Username'>\x00\x00\x00" 109 | >>> v = loads(_, fmt='bson') 110 | >>> v.name 111 | 'John Smith' 112 | >>> v.age 113 | 27 114 | ``` 115 | 116 | Enjoy! 117 | -------------------------------------------------------------------------------- /serialize/testsuite/test_basic.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import pathlib 4 | import sys 5 | 6 | import pytest 7 | 8 | from serialize import dump, dumps, load, loads, register_class 9 | from serialize.all import ( 10 | FORMATS, 11 | UNAVAILABLE_FORMATS, 12 | _get_format, 13 | _get_format_from_ext, 14 | register_format, 15 | unregister_format, 16 | ) 17 | 18 | 19 | class X: 20 | def __init__(self, a, b): 21 | self.a = a 22 | self.b = b 23 | 24 | def __eq__(self, other): 25 | if self.__class__ is not other.__class__: 26 | return False 27 | if self.a != other.a: 28 | return False 29 | if self.b != other.b: 30 | return False 31 | return True 32 | 33 | def __str__(self): 34 | return "X(%s, %s)" % (self.a, self.b) 35 | 36 | __repr__ = __str__ 37 | 38 | 39 | def to_builtin(obj): 40 | return obj.a, obj.b 41 | 42 | 43 | def from_builtin(content): 44 | return X(content[0], content[1]) 45 | 46 | 47 | register_class(X, to_builtin, from_builtin) 48 | 49 | 50 | @pytest.mark.parametrize("fmt", FORMATS) 51 | def test_available(fmt): 52 | assert fmt not in UNAVAILABLE_FORMATS 53 | 54 | 55 | def test_unknown_format(): 56 | with pytest.raises(ValueError): 57 | _get_format_from_ext("dummy_format") 58 | 59 | with pytest.raises(ValueError): 60 | dumps("hello", "dummy_format") 61 | 62 | 63 | NESTED_DICT = { 64 | "level1_1": {"level2_1": [1, 2, 3], "level2_2": [4, 5, 6]}, 65 | "level1_2": {"level2_1": [1, 2, 3], "level2_2": [4, 5, 6]}, 66 | "level1_3": { 67 | "level2_1": {"level3_1": [1, 2, 3], "level3_2": [4, 5, 6]}, 68 | "level2_2": [4, 5, 6], 69 | }, 70 | } 71 | 72 | 73 | VALUES = [ 74 | "hello", 75 | 1, 76 | 1.2, 77 | None, 78 | True, 79 | False, 80 | dict(), 81 | dict(x=1, y=2, z=3), 82 | [], 83 | [1, 2, 3], 84 | NESTED_DICT, 85 | X(3, 4), 86 | dict(a=X(3, 4), b=X(1, 2), d=[X(0, 1), X(2, 3)]), 87 | ] 88 | 89 | 90 | def _test_round_trip(obj, fmt): 91 | dumped = dumps(obj, fmt) 92 | 93 | # dumps / loads 94 | assert obj == loads(dumped, fmt) 95 | 96 | buf = io.BytesIO() 97 | dump(obj, buf, fmt) 98 | 99 | # dump / dumps 100 | assert dumped == buf.getvalue() 101 | 102 | buf.seek(0) 103 | # dump / load 104 | assert obj == load(buf, fmt) 105 | 106 | 107 | @pytest.mark.parametrize("obj", VALUES) 108 | @pytest.mark.parametrize("fmt", FORMATS) 109 | def test_round_trip(obj, fmt): 110 | if fmt == "_test" or fmt == "dill": 111 | return 112 | 113 | with pytest.warns(None) as record: 114 | _test_round_trip(obj, fmt) 115 | 116 | assert len(record) == 0 117 | 118 | 119 | @pytest.mark.parametrize("fmt", FORMATS) 120 | def test_file_by_name(fmt): 121 | if fmt == "_test": 122 | return 123 | fh = _get_format(fmt) 124 | obj = dict(answer=42) 125 | 126 | filename1 = "tmp." + fh.extension 127 | 128 | try: 129 | dump(obj, filename1) 130 | obj1 = load(filename1) 131 | assert obj == obj1 132 | finally: 133 | os.remove(filename1) 134 | 135 | filename2 = "tmp." + fh.extension + ".bla" 136 | 137 | try: 138 | dump(obj, filename2, fmt=fmt) 139 | obj2 = load(filename2, fmt=fmt) 140 | assert obj == obj2 141 | finally: 142 | os.remove(filename2) 143 | 144 | 145 | @pytest.mark.parametrize("fmt", FORMATS) 146 | def test_file_by_name_pathlib(fmt): 147 | if fmt == "_test": 148 | return 149 | fh = _get_format(fmt) 150 | obj = dict(answer=42) 151 | 152 | filename1 = "tmp." + fh.extension 153 | filename1 = pathlib.Path(filename1) 154 | 155 | try: 156 | dump(obj, filename1) 157 | obj1 = load(filename1) 158 | assert obj == obj1 159 | finally: 160 | filename1.unlink() 161 | 162 | filename2 = "tmp." + fh.extension + ".bla" 163 | filename2 = pathlib.Path(filename2) 164 | try: 165 | dump(obj, filename2, fmt=fmt) 166 | obj2 = load(filename2, fmt=fmt) 167 | assert obj == obj2 168 | finally: 169 | filename2.unlink() 170 | 171 | 172 | @pytest.mark.parametrize("fmt", FORMATS) 173 | def test_format_from_ext(fmt): 174 | if fmt == "_test": 175 | return 176 | if ":" in fmt: 177 | return 178 | fh = FORMATS[fmt] 179 | assert _get_format_from_ext(fh.extension) == fmt 180 | 181 | 182 | @pytest.mark.parametrize("fmt", FORMATS) 183 | def test_response_bytes(fmt): 184 | if fmt == "_test": 185 | return 186 | obj = "here I am" 187 | 188 | assert isinstance(dumps(obj, fmt), bytes) 189 | 190 | 191 | def test_no_replace(): 192 | register_format("_test") 193 | with pytest.raises(ValueError): 194 | register_format("_test") 195 | unregister_format("_test") 196 | 197 | 198 | def test_no_dumper_no_loader(): 199 | with pytest.raises(ValueError): 200 | dumps("hello", "_test") 201 | 202 | with pytest.raises(ValueError): 203 | loads("hello", "_test") 204 | 205 | buf = io.BytesIO() 206 | with pytest.raises(ValueError): 207 | dump("hello", buf, "test") 208 | 209 | buf = io.BytesIO() 210 | with pytest.raises(ValueError): 211 | load(buf, "test") 212 | 213 | 214 | # 215 | # Some classes that exercise various parts of pickle's __reduce__() protocol 216 | # 217 | class Reduce_string(object): 218 | def __reduce__(self): 219 | if self is GLOBAL_X: 220 | return "GLOBAL_X" 221 | elif self is GLOBAL_Y: 222 | return "GLOBAL_Y" 223 | else: 224 | raise Exception("Unknown Reduce_string()") 225 | 226 | 227 | GLOBAL_X = Reduce_string() 228 | GLOBAL_Y = Reduce_string() 229 | 230 | OBJECT_STATE = ("This", "Is", 2, "Object", "State") 231 | OBJECT_MEMBERS = ("These", "are", ("object", "members")) 232 | 233 | 234 | class Reduce_x(dict): 235 | def __init__(self, *args, **kwargs): 236 | try: 237 | super(Reduce_x, self).__init__(*args, **kwargs) 238 | except Exception: 239 | raise Exception(repr(args)) 240 | self.__dict__ = self 241 | 242 | def extend(self, *args): 243 | assert tuple(*args) == OBJECT_MEMBERS 244 | 245 | def __setstate__(self, state): 246 | # State should already have been initialized via __init__, just check 247 | # for roundtrip 248 | assert state == OBJECT_STATE 249 | 250 | def _getstate(self): 251 | # State should already have been initialized via __init__, just check 252 | # for roundtrip 253 | return OBJECT_STATE 254 | 255 | def __setitem__(self, key, value): 256 | # State should already have been initialized via __init__, just check 257 | # for roundtrip 258 | if key in self: 259 | assert self[key] == value 260 | super(Reduce_x, self).__setitem__(key, value) 261 | 262 | def _initargs(self): 263 | args = list(zip(self.keys(), self.values())) 264 | return (args,) 265 | 266 | 267 | class Reduce_2(Reduce_x): 268 | def __reduce__(self): 269 | return (self.__class__, self._initargs()) 270 | 271 | 272 | class Reduce_3(Reduce_x): 273 | def __reduce__(self): 274 | return (self.__class__, self._initargs(), self._getstate()) 275 | 276 | 277 | class Reduce_4(Reduce_x): 278 | def __reduce__(self): 279 | return ( 280 | self.__class__, 281 | self._initargs(), 282 | self._getstate(), 283 | iter(OBJECT_MEMBERS), 284 | ) 285 | 286 | 287 | class Reduce_5(Reduce_x): 288 | def __reduce__(self): 289 | return ( 290 | self.__class__, 291 | self._initargs(), 292 | self._getstate(), 293 | iter(OBJECT_MEMBERS), 294 | zip(self.keys(), self.values()), 295 | ) 296 | 297 | 298 | @pytest.mark.parametrize("fmt", ["pickle"]) 299 | def test_reduce_string(fmt): 300 | # Most formats don't support this (pickle does) 301 | _test_round_trip(GLOBAL_X, fmt) 302 | _test_round_trip(GLOBAL_Y, fmt) 303 | 304 | 305 | @pytest.mark.parametrize("fmt", FORMATS) 306 | @pytest.mark.parametrize("klass1", [Reduce_2, Reduce_3, Reduce_4, Reduce_5]) 307 | @pytest.mark.parametrize("klass2", [Reduce_2, Reduce_3, Reduce_4, Reduce_5]) 308 | @pytest.mark.skipif(sys.version_info < (3, 8), reason="requires python3.8 or higher") 309 | def test_reduce(fmt, klass1, klass2): 310 | # yaml:legacy exists because it did not handle these case, so skip these tests 311 | if fmt == "yaml:legacy" or fmt == "_test" or fmt == "dill": 312 | return 313 | 314 | a = klass1(a=1, b=2, c=dict(d=3, e=4)) 315 | _test_round_trip(a, fmt) 316 | 317 | b = klass2(f=8, g=9, h=dict(i=9, j=10)) 318 | a["B"] = b 319 | _test_round_trip(b, fmt) 320 | -------------------------------------------------------------------------------- /serialize/all.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | serialize.all 4 | ~~~~~~~~~~~~~ 5 | 6 | Common routines for serialization and deserialization. 7 | 8 | :copyright: (c) 2016 by Hernan E. Grecco. 9 | :license: BSD, see LICENSE for more details. 10 | """ 11 | 12 | 13 | import pathlib 14 | from collections import namedtuple 15 | from io import BytesIO 16 | 17 | #: Stores the functions to convert custom classes to and from builtin types. 18 | ClassHelper = namedtuple("ClassHelper", "to_builtin from_builtin") 19 | 20 | #: Stores information and function about each format type. 21 | Format = namedtuple("Format", "extension dump dumps load loads register_class") 22 | UnavailableFormat = namedtuple("UnavailableFormat", "extension msg") 23 | 24 | #: Map unavailable formats to the corresponding error message. 25 | # :type: str -> UnavailableFormat 26 | UNAVAILABLE_FORMATS = {} 27 | 28 | #: Map available format names to the corresponding dumper and loader. 29 | # :type: str -> Format 30 | FORMATS = {} 31 | 32 | #: Map extension to format name. 33 | # :type: str -> str 34 | FORMAT_BY_EXTENSION = {} 35 | 36 | #: Map registered classes to the corresponding to_builtin and from_builtin. 37 | # :type: type -> ClassHelper 38 | CLASSES = {} 39 | 40 | #: Map class name obtained from str(class) to class. 41 | #: :type: str -> ClassHelper 42 | CLASSES_BY_NAME = {} 43 | 44 | 45 | def _get_format(fmt): 46 | """Convenience function to get the format information. 47 | 48 | Raises a nice error if the format is unavailable or unknown. 49 | """ 50 | 51 | if fmt in FORMATS: 52 | return FORMATS[fmt] 53 | 54 | if fmt in UNAVAILABLE_FORMATS: 55 | raise ValueError( 56 | ("'%s' is an unavailable format. " % fmt) + UNAVAILABLE_FORMATS[fmt].msg 57 | ) 58 | 59 | raise ValueError( 60 | "'%s' is an unknown format. Valid options are %s" 61 | % (fmt, ", ".join(FORMATS.keys())) 62 | ) 63 | 64 | 65 | def _get_format_from_ext(ext): 66 | """Convenience function to get the format information from a file extension. 67 | 68 | Raises a nice error if the extension is unknown. 69 | """ 70 | 71 | ext = ext.lower() 72 | if ext in FORMAT_BY_EXTENSION: 73 | return FORMAT_BY_EXTENSION[ext] 74 | 75 | valid = ", ".join(FORMAT_BY_EXTENSION.keys()) 76 | 77 | raise ValueError( 78 | "'%s' is an unknown extension. " "Valid options are %s" % (ext, valid) 79 | ) 80 | 81 | 82 | def encode_helper(obj, to_builtin): 83 | """Encode an object into a two element dict using a function 84 | that can convert it to a builtin data type. 85 | """ 86 | 87 | return dict(__class_name__=str(obj.__class__), __dumped_obj__=to_builtin(obj)) 88 | 89 | 90 | def encode(obj, defaultfunc=None): 91 | """Encode registered types using the corresponding functions. 92 | For other types, the defaultfunc will be used 93 | """ 94 | 95 | for klass, (to_builtin, _) in CLASSES.items(): 96 | if isinstance(obj, klass): 97 | return encode_helper(obj, to_builtin) 98 | 99 | if defaultfunc is None: 100 | return obj 101 | 102 | return defaultfunc(obj) 103 | 104 | 105 | def _traverse_dict_ec(obj, ef, td): 106 | return { 107 | traverse_and_encode(k, ef, td): traverse_and_encode(v, ef, td) 108 | for k, v in obj.items() 109 | } 110 | 111 | 112 | def _traverse_list_ec(obj, ef, td): 113 | return [traverse_and_encode(el, ef, td) for el in obj] 114 | 115 | 116 | def _traverse_tuple_ec(obj, ef, td): 117 | return tuple(traverse_and_encode(el, ef, td) for el in obj) 118 | 119 | 120 | DEFAULT_TRAVERSE_EC = { 121 | dict: _traverse_dict_ec, 122 | list: _traverse_list_ec, 123 | tuple: _traverse_tuple_ec, 124 | } 125 | 126 | 127 | def traverse_and_encode(obj, encode_func=None, trav_dict=None): 128 | """Transverse a Python data structure encoding each element with encode_func. 129 | 130 | It is used with serialization packages that do not support 131 | custom types. 132 | 133 | `trav_dict` can be used to provide custom ways of traversing structures. 134 | """ 135 | encode_func = encode_func or encode 136 | trav_dict = trav_dict or DEFAULT_TRAVERSE_EC 137 | for t, func in trav_dict.items(): 138 | if isinstance(obj, t): 139 | value = func(obj, encode_func, trav_dict) 140 | break 141 | else: 142 | value = encode_func(obj) 143 | 144 | return value 145 | 146 | 147 | def decode(dct, classes_by_name=None): 148 | """If the dict contains a __class__ and __serialized__ field tries to 149 | decode it using the registered classes within the encoder/decoder 150 | instance. 151 | """ 152 | if not isinstance(dct, dict): 153 | return dct 154 | 155 | s = dct.get("__class_name__", None) 156 | if s is None: 157 | return dct 158 | 159 | classes_by_name = classes_by_name or CLASSES_BY_NAME 160 | try: 161 | _, from_builtin = classes_by_name[s] 162 | c = dct["__dumped_obj__"] 163 | except KeyError: 164 | return dct 165 | 166 | return from_builtin(c) 167 | 168 | 169 | def _traverse_dict_dc(obj, df, td): 170 | if "__class_name__" in obj: 171 | return df(obj) 172 | 173 | return { 174 | traverse_and_decode(k, df, td): traverse_and_decode(v, df, td) 175 | for k, v in obj.items() 176 | } 177 | 178 | 179 | def _traverse_list_dc(obj, df, td): 180 | return [traverse_and_decode(el, df, td) for el in obj] 181 | 182 | 183 | def _traverse_tuple_dc(obj, df, td): 184 | return tuple(traverse_and_decode(el, df, td) for el in obj) 185 | 186 | 187 | DEFAULT_TRAVERSE_DC = { 188 | dict: _traverse_dict_dc, 189 | list: _traverse_list_dc, 190 | tuple: _traverse_tuple_dc, 191 | } 192 | 193 | 194 | def traverse_and_decode(obj, decode_func=None, trav_dict=None): 195 | """Traverse an arbitrary Python object structure 196 | calling a callback function for every element in the structure, 197 | and inserting the return value of the callback as the new value. 198 | 199 | This is used for serialization with libraries that do not have object hooks. 200 | """ 201 | decode_func = decode_func or decode 202 | trav_dict = trav_dict or DEFAULT_TRAVERSE_DC 203 | for t, func in trav_dict.items(): 204 | if isinstance(obj, t): 205 | value = func(obj, decode_func, trav_dict) 206 | break 207 | else: 208 | value = obj 209 | 210 | return value 211 | 212 | 213 | # A Sentinel for a missing argument. 214 | MISSING = object() 215 | 216 | 217 | def unregister_format(fmt): 218 | """Register an available serialization format.""" 219 | del FORMATS[fmt] 220 | 221 | 222 | def register_format( 223 | fmt, 224 | dumpser=None, 225 | loadser=None, 226 | dumper=None, 227 | loader=None, 228 | extension=MISSING, 229 | register_class=None, 230 | ): 231 | """Register an available serialization format. 232 | 233 | `fmt` is a unique string identifying the format, such as `json`. Use a colon (`:`) 234 | to separate between subformats. 235 | 236 | `dumpser` and `dumper` should be callables with the same purpose and arguments 237 | that `json.dumps` and `json.dump`. If one of those is missing, it will be 238 | generated automatically from the other. 239 | 240 | `loadser` and `loader` should be callables with the same purpose and arguments 241 | that `json.loads` and `json.load`. If one of those is missing, it will be 242 | generated automatically from the other. 243 | 244 | `extension` is the file extension used to guess the desired serialization format 245 | when loading from or dumping to a file. If not given, the part before the colon of 246 | `fmt` will be used. If `None`, the format will not be associated with any extension. 247 | 248 | `register_class` is a callback made when a class is registered with 249 | `serialize.register_class`. When a new format is registered, 250 | previously registered classes are called. It takes on argument, the 251 | class to register. See `serialize.yaml.py` for an example. 252 | """ 253 | 254 | # For simplicity. We do not allow to overwrite format. 255 | if fmt in FORMATS: 256 | raise ValueError("%s is already defined." % fmt) 257 | 258 | # Here we generate register_class if it is not present 259 | if not register_class: 260 | 261 | def register_class(klass): 262 | pass 263 | 264 | # Here we generate dumper/dumpser if they are not present. 265 | if dumper and not dumpser: 266 | 267 | def dumpser(obj): 268 | buf = BytesIO() 269 | dumper(obj, buf) 270 | return buf.getvalue() 271 | 272 | elif not dumper and dumpser: 273 | 274 | def dumper(obj, fp): 275 | fp.write(dumpser(obj)) 276 | 277 | elif not dumper and not dumpser: 278 | 279 | def raiser(*args, **kwargs): 280 | raise ValueError("dump/dumps is not defined for %s" % fmt) 281 | 282 | dumper = dumpser = raiser 283 | 284 | # Here we generate loader/loadser if they are not present. 285 | if loader and not loadser: 286 | 287 | def loadser(serialized): 288 | return loader(BytesIO(serialized)) 289 | 290 | elif not loader and loadser: 291 | 292 | def loader(fp): 293 | return loadser(fp.read()) 294 | 295 | elif not loader and not loadser: 296 | 297 | def raiser(*args, **kwargs): 298 | raise ValueError("load/loads is not defined for %s" % fmt) 299 | 300 | loader = loadser = raiser 301 | 302 | if extension is MISSING: 303 | extension = fmt.split(":", 1)[0] 304 | 305 | FORMATS[fmt] = Format(extension, dumper, dumpser, loader, loadser, register_class) 306 | 307 | if extension and extension not in FORMAT_BY_EXTENSION: 308 | FORMAT_BY_EXTENSION[extension.lower()] = fmt 309 | 310 | # register previously registered classes with the new format 311 | for klass in CLASSES: 312 | FORMATS[fmt].register_class(klass) 313 | 314 | 315 | def register_unavailable(fmt, msg="", pkg="", extension=MISSING): 316 | """Register an unavailable serialization format. 317 | 318 | Unavailable formats are those known by Serialize but that cannot be used 319 | due to a missing requirement (e.g. the package that does the work). 320 | 321 | """ 322 | if pkg: 323 | msg = "This serialization format requires the %s package." % pkg 324 | 325 | if extension is MISSING: 326 | extension = fmt.split(":", 1)[0] 327 | 328 | UNAVAILABLE_FORMATS[fmt] = UnavailableFormat(extension, msg) 329 | 330 | if extension and extension not in FORMAT_BY_EXTENSION: 331 | FORMAT_BY_EXTENSION[extension.lower()] = fmt 332 | 333 | 334 | def dumps(obj, fmt): 335 | """Serialize `obj` to bytes using the format specified by `fmt`""" 336 | 337 | return _get_format(fmt).dumps(obj) 338 | 339 | 340 | def dump(obj, file, fmt=None): 341 | """Serialize `obj` to a file using the format specified by `fmt` 342 | 343 | The file can be specified by a file-like object or filename. 344 | In the latter case the fmt is not need if it can be guessed from the extension. 345 | """ 346 | if isinstance(file, str): 347 | file = pathlib.Path(file) 348 | 349 | if isinstance(file, pathlib.Path): 350 | if fmt is None: 351 | fmt = _get_format_from_ext(file.suffix.lstrip(".")) 352 | with file.open(mode="wb") as fp: 353 | dump(obj, fp, fmt) 354 | else: 355 | _get_format(fmt).dump(obj, file) 356 | 357 | 358 | def loads(serialized, fmt): 359 | """Deserialize bytes using the format specified by `fmt`""" 360 | 361 | return _get_format(fmt).loads(serialized) 362 | 363 | 364 | def load(file, fmt=None): 365 | """Deserialize from a file using the format specified by `fmt` 366 | 367 | The file can be specified by a file-like object or filename. 368 | In the latter case the fmt is not need if it can be guessed from the extension. 369 | """ 370 | if isinstance(file, str): 371 | file = pathlib.Path(file) 372 | 373 | if isinstance(file, pathlib.Path): 374 | if fmt is None: 375 | fmt = _get_format_from_ext(file.suffix.lstrip(".")) 376 | with file.open(mode="rb") as fp: 377 | return load(fp, fmt) 378 | 379 | return _get_format(fmt).load(file) 380 | 381 | 382 | def register_class(klass, to_builtin, from_builtin): 383 | """Register a custom class for serialization and deserialization. 384 | 385 | `to_builtin` must be a function that takes an object from the custom class 386 | and returns an object consisting only of Python builtin types. 387 | 388 | `from_builtin` must be a function that takes the output of `to_builtin` and 389 | returns an object from the custom class. 390 | 391 | In other words: 392 | >>> obj == from_builtin(to_builtin(obj)) # doctest: +SKIP 393 | """ 394 | CLASSES[klass] = CLASSES_BY_NAME[str(klass)] = ClassHelper(to_builtin, from_builtin) 395 | --------------------------------------------------------------------------------