├── pandas_msgpack ├── tests │ └── __init__.py ├── msgpack │ ├── _version.py │ ├── exceptions.py │ ├── __init__.py │ ├── _packer.pyx │ └── _unpacker.pyx ├── __init__.py ├── includes │ ├── unpack_define.h │ ├── pack.h │ ├── sysdep.h │ ├── unpack.h │ ├── unpack_template.h │ └── pack_template.h ├── move.c ├── _version.py └── packers.py ├── requirements.txt ├── ci ├── requirements-2.7.pip ├── requirements-3.6.pip ├── install_travis.sh ├── install.ps1 └── run_with_env.cmd ├── test.sh ├── .gitattributes ├── test.bat ├── docs ├── source │ ├── _static │ │ └── style.css │ ├── _templates │ │ └── layout.html │ ├── api.rst │ ├── read_write.rst │ ├── changelog.rst │ ├── install.rst │ ├── index.rst │ ├── tutorial.rst │ ├── compression.rst │ ├── Makefile │ └── conf.py ├── requirements-docs.txt └── README.rst ├── codecov.yml ├── MANIFEST.in ├── setup.cfg ├── release-procedure.md ├── .travis.yml ├── .gitignore ├── README.rst ├── appveyor.yml ├── LICENSE.md └── setup.py /pandas_msgpack/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas>=0.19.2 2 | -------------------------------------------------------------------------------- /ci/requirements-2.7.pip: -------------------------------------------------------------------------------- 1 | blosc 2 | sqlalchemy 3 | -------------------------------------------------------------------------------- /ci/requirements-3.6.pip: -------------------------------------------------------------------------------- 1 | blosc 2 | sqlalchemy 3 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | pytest pandas_msgpack "$@" 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | pandas_gbq/_version.py export-subst 2 | -------------------------------------------------------------------------------- /pandas_msgpack/msgpack/_version.py: -------------------------------------------------------------------------------- 1 | version = (0, 4, 6) 2 | -------------------------------------------------------------------------------- /test.bat: -------------------------------------------------------------------------------- 1 | :: test on windows 2 | 3 | pytest pandas_msgpack %* 4 | -------------------------------------------------------------------------------- /docs/source/_static/style.css: -------------------------------------------------------------------------------- 1 | @import url("theme.css"); 2 | 3 | a.internal em {font-style: normal} 4 | -------------------------------------------------------------------------------- /docs/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {% set css_files = css_files + ["_static/style.css"] %} 3 | -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | ipython 3 | numpydoc 4 | sphinx 5 | sphinx_rtd_theme 6 | pandas 7 | blosc 8 | cython 9 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | target: '30' 6 | patch: 7 | default: 8 | target: '50' 9 | branches: null 10 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: pandas_msgpack 2 | .. _api: 3 | 4 | ************* 5 | API Reference 6 | ************* 7 | 8 | .. autosummary:: 9 | 10 | read_msgpack 11 | to_msgpack 12 | 13 | .. autofunction:: read_msgpack 14 | .. autofunction:: to_msgpack 15 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include README.rst 3 | include LICENSE.md 4 | include setup.py 5 | 6 | graft pandas_msgpack 7 | 8 | global-exclude *.so 9 | global-exclude *.pyd 10 | global-exclude *.pyc 11 | global-exclude *~ 12 | global-exclude \#* 13 | global-exclude .git* 14 | global-exclude .DS_Store 15 | global-exclude *.png 16 | 17 | include versioneer.py 18 | include pandas_msgpack/_version.py 19 | -------------------------------------------------------------------------------- /docs/README.rst: -------------------------------------------------------------------------------- 1 | To build a local copy of the pandas-msgpack docs, install the programs in 2 | requirements-docs.txt and run 'make html'. If you use the conda package manager 3 | these commands suffice:: 4 | 5 | git clone git@github.com:pydata/pandas-msgpack.git 6 | cd dask/docs 7 | conda create -n pandas-msgpack-docs --file requirements-docs.txt 8 | source activate pandas-msgpack-docs 9 | make html 10 | open build/html/index.html 11 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | 2 | # See the docstring in versioneer.py for instructions. Note that you must 3 | # re-run 'versioneer.py setup' after changing this section, and commit the 4 | # resulting files. 5 | 6 | [versioneer] 7 | VCS = git 8 | style = pep440 9 | versionfile_source = pandas_msgpack/_version.py 10 | versionfile_build = pandas_msgpack/_version.py 11 | tag_prefix = 12 | parentdir_prefix = pandas_msgpack- 13 | 14 | [flake8] 15 | ignore = E731 16 | -------------------------------------------------------------------------------- /release-procedure.md: -------------------------------------------------------------------------------- 1 | * Tag commit 2 | 3 | git tag -a x.x.x -m 'Version x.x.x' 4 | 5 | * and push to github 6 | 7 | git push origin master --tags 8 | 9 | * Upload to PyPI 10 | 11 | git clean -xfd 12 | python setup.py register sdist --formats=gztar 13 | twine upload dist/* 14 | 15 | * Do a pull-request to the feedstock on `pandas-msgpack-feedstock `__ 16 | 17 | update the version 18 | update the SHA256 (retrieve from PyPI) 19 | -------------------------------------------------------------------------------- /pandas_msgpack/msgpack/exceptions.py: -------------------------------------------------------------------------------- 1 | class UnpackException(Exception): 2 | pass 3 | 4 | 5 | class BufferFull(UnpackException): 6 | pass 7 | 8 | 9 | class OutOfData(UnpackException): 10 | pass 11 | 12 | 13 | class UnpackValueError(UnpackException, ValueError): 14 | pass 15 | 16 | 17 | class ExtraData(ValueError): 18 | 19 | def __init__(self, unpacked, extra): 20 | self.unpacked = unpacked 21 | self.extra = extra 22 | 23 | def __str__(self): 24 | return "unpack(b) received extra data." 25 | 26 | 27 | class PackException(Exception): 28 | pass 29 | 30 | 31 | class PackValueError(PackException, ValueError): 32 | pass 33 | -------------------------------------------------------------------------------- /pandas_msgpack/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | # pandas versioning 4 | import pandas 5 | 6 | from distutils.version import LooseVersion 7 | pv = LooseVersion(pandas.__version__) 8 | 9 | if pv < '0.19.0': 10 | raise ValueError("pandas_msgpack requires at least pandas 0.19.0") 11 | _is_pandas_legacy_version = pv.version[1] == 19 and len(pv.version) == 3 12 | 13 | from .packers import to_msgpack, read_msgpack 14 | 15 | # versioning 16 | from ._version import get_versions 17 | 18 | versions = get_versions() 19 | __version__ = versions.get('closest-tag', versions['version']) 20 | __git_revision__ = versions['full-revisionid'] 21 | del get_versions, versions, pv, LooseVersion, pandas 22 | -------------------------------------------------------------------------------- /docs/source/read_write.rst: -------------------------------------------------------------------------------- 1 | .. _read_write: 2 | 3 | .. ipython:: python 4 | :suppress: 5 | 6 | import pandas as pd 7 | 8 | 9 | Read/Write API 10 | -------------- 11 | 12 | Msgpacks can also be read from and written to strings. 13 | 14 | .. ipython:: python 15 | 16 | import pandas as pd 17 | from pandas_msgpack import to_msgpack, read_msgpack 18 | 19 | df = pd.DataFrame({'A': np.arange(10), 20 | 'B': np.random.randn(10), 21 | 'C': 'foo'}) 22 | 23 | to_msgpack(None, df) 24 | 25 | Furthermore you can concatenate the strings to produce a list of the original objects. 26 | 27 | .. ipython:: python 28 | 29 | read_msgpack(to_msgpack(None, df) + to_msgpack(None, df.A)) 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | 4 | env: 5 | - PYTHON=2.7 PANDAS=0.19.2 6 | - PYTHON=3.4 PANDAS=0.19.2 7 | - PYTHON=3.5 PANDAS=0.19.2 COVERAGE='true' 8 | - PYTHON=3.6 PANDAS='master' LINT='true' 9 | 10 | before_install: 11 | - echo "before_install" 12 | - export PATH="$HOME/miniconda3/bin:$PATH" 13 | - df -h 14 | - pwd 15 | - uname -a 16 | - git --version 17 | - git tag 18 | 19 | install: 20 | - ci/install_travis.sh 21 | 22 | script: 23 | - source activate test-environment && pytest -s -v --cov=pandas_msgpack --cov-report xml:/tmp/pytest-cov.xml pandas_msgpack 24 | - if [ $COVERAGE ]; then source activate test-environment && codecov ; fi 25 | - if [ $LINT ]; then source activate test-environment && flake8 pandas_msgpack ; fi 26 | -------------------------------------------------------------------------------- /docs/source/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | 0.1.4 / 2017-03-30 5 | ------------------ 6 | 7 | Initial release of transfered code from `pandas `__ 8 | 9 | Includes patches since the 0.19.2 release on pandas with the following: 10 | 11 | - Bug in ``read_msgpack()`` in which ``Series`` categoricals were being improperly processed, see `pandas-GH#14901 `__ 12 | - Bug in ``read_msgpack()`` which did not allow loading of a dataframe with an index of type ``CategoricalIndex``, see `pandas-GH#15487 `__ 13 | - Bug in ``read_msgpack()`` when deserializing a ``CategoricalIndex``, see `pandas-GH#15487 `__ 14 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | You can install pandas-msgpack with ``conda``, ``pip``, or by installing from source. 5 | 6 | Conda 7 | ----- 8 | 9 | .. code-block:: shell 10 | 11 | $ conda install pandas-msgpack --channel conda-forge 12 | 13 | This installs pandas-msgpack and all common dependencies, including ``pandas``. 14 | 15 | Pip 16 | --- 17 | 18 | To install the latest version of pandas-msgpack: 19 | 20 | .. code-block:: shell 21 | 22 | $ pip install pandas-msgpack -U 23 | 24 | This installs pandas-msgpack and all common dependencies, including ``pandas``. 25 | 26 | 27 | Install from Source 28 | ------------------- 29 | 30 | .. code-block:: shell 31 | 32 | $ pip install git+https://github.com/pydata/pandas-msgpack.git 33 | 34 | 35 | Dependencies 36 | ------------ 37 | 38 | - `pandas `__ ``>=0.19.2`` 39 | - `blosc `__ library can be optionally installed as a compressor. 40 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. pandas-msgpack documentation master file, created by 2 | sphinx-quickstart on Wed Feb 8 10:52:12 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to pandas-msgpack's documentation! 7 | ========================================== 8 | 9 | The :mod:`pandas_msgpack` module provides an interface from `pandas https://pandas.pydata.org` 10 | to the `msgpack `__ library. This is a lightweight portable 11 | binary format, similar to binary JSON, that is highly space efficient, and provides good performance 12 | both on the writing (serialization), and reading (deserialization). 13 | 14 | Contents: 15 | 16 | .. toctree:: 17 | :maxdepth: 2 18 | 19 | install.rst 20 | tutorial.rst 21 | compression.rst 22 | read_write.rst 23 | api.rst 24 | changelog.rst 25 | 26 | 27 | Indices and tables 28 | ================== 29 | 30 | * :ref:`genindex` 31 | * :ref:`modindex` 32 | * :ref:`search` 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | gi######################################### 2 | # Editor temporary/working/backup files # 3 | .#* 4 | *\#*\# 5 | [#]*# 6 | *~ 7 | *$ 8 | *.bak 9 | *flymake* 10 | *.kdev4 11 | *.log 12 | *.swp 13 | *.pdb 14 | .project 15 | .pydevproject 16 | .settings 17 | .idea 18 | .vagrant 19 | .noseids 20 | .ipynb_checkpoints 21 | .tags 22 | 23 | # Docs # 24 | ######## 25 | docs/source/_build 26 | 27 | # Coverage # 28 | ############ 29 | .coverage 30 | coverage.xml 31 | coverage_html_report 32 | 33 | # Compiled source # 34 | ################### 35 | *.a 36 | *.com 37 | *.class 38 | *.dll 39 | *.exe 40 | *.pxi 41 | *.o 42 | *.py[ocd] 43 | *.so 44 | .build_cache_dir 45 | MANIFEST 46 | 47 | # Python files # 48 | ################ 49 | # setup.py working directory 50 | build 51 | # setup.py dist directory 52 | dist 53 | # Egg metadata 54 | *.egg-info 55 | .eggs 56 | .pypirc 57 | 58 | # tox testing tool 59 | .tox 60 | # rope 61 | .ropeproject 62 | # wheel files 63 | *.whl 64 | **/wheelhouse/* 65 | # coverage 66 | .coverage 67 | 68 | # OS generated files # 69 | ###################### 70 | .directory 71 | .gdb_history 72 | .DS_Store 73 | ehthumbs.db 74 | Icon? 75 | Thumbs.db 76 | 77 | # caches # 78 | .cache 79 | -------------------------------------------------------------------------------- /ci/install_travis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # install miniconda 4 | MINICONDA_DIR="$HOME/miniconda3" 5 | 6 | if [ -d "$MINICONDA_DIR" ]; then 7 | rm -rf "$MINICONDA_DIR" 8 | fi 9 | 10 | # install miniconda 11 | if [ "${TRAVIS_OS_NAME}" == "osx" ]; then 12 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1 13 | else 14 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1 15 | fi 16 | bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1 17 | 18 | conda config --set ssl_verify false || exit 1 19 | conda config --set always_yes true --set changeps1 false || exit 1 20 | conda update -q conda 21 | 22 | conda info -a || exit 1 23 | 24 | conda create -n test-environment python=$PYTHON cython pytest 25 | source activate test-environment 26 | 27 | pip install coverage pytest-cov flake8 codecov 28 | if [ $PANDAS == 'master' ]; then 29 | 30 | echo "installing deps" 31 | pip install numpy pytz python-dateutil 32 | 33 | echo "installing pandas master wheel" 34 | PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com" 35 | pip install --pre --timeout=60 -f $PRE_WHEELS pandas==0.19.0+699.gecaeea1 36 | 37 | else 38 | conda install pandas=$PANDAS 39 | fi 40 | 41 | REQ="ci/requirements-${PYTHON}.pip" 42 | if [ -e $REQ ]; then 43 | pip install -r $REQ; 44 | fi 45 | 46 | conda list 47 | python setup.py develop 48 | -------------------------------------------------------------------------------- /pandas_msgpack/msgpack/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from collections import namedtuple 4 | 5 | from pandas_msgpack.msgpack.exceptions import * # noqa 6 | from pandas_msgpack.msgpack._version import version # noqa 7 | 8 | 9 | class ExtType(namedtuple('ExtType', 'code data')): 10 | """ExtType represents ext type in msgpack.""" 11 | def __new__(cls, code, data): 12 | if not isinstance(code, int): 13 | raise TypeError("code must be int") 14 | if not isinstance(data, bytes): 15 | raise TypeError("data must be bytes") 16 | if not 0 <= code <= 127: 17 | raise ValueError("code must be 0~127") 18 | return super(ExtType, cls).__new__(cls, code, data) 19 | 20 | import os # noqa 21 | 22 | from pandas_msgpack.msgpack._packer import Packer # noqa 23 | from pandas_msgpack.msgpack._unpacker import unpack, unpackb, Unpacker # noqa 24 | 25 | 26 | def pack(o, stream, **kwargs): 27 | """ 28 | Pack object `o` and write it to `stream` 29 | 30 | See :class:`Packer` for options. 31 | """ 32 | packer = Packer(**kwargs) 33 | stream.write(packer.pack(o)) 34 | 35 | 36 | def packb(o, **kwargs): 37 | """ 38 | Pack object `o` and return packed bytes 39 | 40 | See :class:`Packer` for options. 41 | """ 42 | return Packer(**kwargs).pack(o) 43 | 44 | 45 | # alias for compatibility to simplejson/marshal/pickle. 46 | load = unpack 47 | loads = unpackb 48 | 49 | dump = pack 50 | dumps = packb 51 | -------------------------------------------------------------------------------- /docs/source/tutorial.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial: 2 | 3 | .. ipython:: python 4 | :suppress: 5 | 6 | import pandas as pd 7 | import os 8 | 9 | Tutorial 10 | -------- 11 | 12 | .. ipython:: python 13 | 14 | import pandas as pd 15 | from pandas_msgpack import to_msgpack, read_msgpack 16 | 17 | .. ipython:: python 18 | 19 | df = pd.DataFrame(np.random.rand(5,2), columns=list('AB')) 20 | to_msgpack('foo.msg', df) 21 | read_msgpack('foo.msg') 22 | s = pd.Series(np.random.rand(5),index=pd.date_range('20130101',periods=5)) 23 | 24 | You can pass a list of objects and you will receive them back on deserialization. 25 | 26 | .. ipython:: python 27 | 28 | to_msgpack('foo.msg', df, 'foo', np.array([1,2,3]), s) 29 | read_msgpack('foo.msg') 30 | 31 | You can pass ``iterator=True`` to iterate over the unpacked results 32 | 33 | .. ipython:: python 34 | 35 | for o in read_msgpack('foo.msg',iterator=True): 36 | print(o) 37 | 38 | You can pass ``append=True`` to the writer to append to an existing pack 39 | 40 | .. ipython:: python 41 | 42 | to_msgpack('foo.msg', df, append=True) 43 | read_msgpack('foo.msg') 44 | 45 | Furthermore you can pass in arbitrary python objects. 46 | 47 | .. ipython:: python 48 | 49 | to_msgpack('foo2.msg', { 'dict' : [ { 'df' : df }, { 'string' : 'foo' }, { 'scalar' : 1. }, { 's' : s } ] }) 50 | read_msgpack('foo2.msg') 51 | 52 | .. ipython:: python 53 | :suppress: 54 | :okexcept: 55 | 56 | import os 57 | os.remove('foo.msg') 58 | os.remove('foo2.msg') 59 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pandas-msgpack 2 | ============== 3 | 4 | THIS LIBRARY IS NO LONGER IN DEVELOPMENT OR MAINTAINED 5 | ------------------------------------------------------ 6 | 7 | |Travis Build Status| |Appveyor Build Status| |Version Status| |Coverage Status| 8 | 9 | **pandas-msgpack** is a package providing an interface to msgpack from pandas 10 | 11 | 12 | Installation 13 | ------------ 14 | 15 | 16 | Install latest release version via conda 17 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 | 19 | .. code-block:: shell 20 | 21 | $ conda install pandas-msgpack --channel conda-forge 22 | 23 | Install latest release version via pip 24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 25 | 26 | .. code-block:: shell 27 | 28 | $ pip install pandas-msgpack 29 | 30 | Install latest development version 31 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 32 | 33 | .. code-block:: shell 34 | 35 | $ pip install git+https://github.com/pydata/pandas-msgpack.git 36 | 37 | 38 | Usage 39 | ----- 40 | 41 | See the `pandas-msgpack documentation `_ for more details. 42 | 43 | .. |Travis Build Status| image:: https://travis-ci.org/pydata/pandas-msgpack.svg?branch=master 44 | :target: https://travis-ci.org/pydata/pandas-msgpack 45 | .. |Appveyor Build Status| image:: https://ci.appveyor.com/api/projects/status/5716aqchorgwmwxf/branch/master?svg=true 46 | :target: https://ci.appveyor.com/project/jreback/pandas-msgpack 47 | .. |Version Status| image:: https://img.shields.io/pypi/v/pandas-msgpack.svg 48 | :target: https://pypi.python.org/pypi/pandas-msgpack/ 49 | .. |Coverage Status| image:: https://img.shields.io/codecov/c/github/pydata/pandas-msgpack.svg 50 | :target: https://codecov.io/gh/pydata/pandas-msgpack/ 51 | -------------------------------------------------------------------------------- /docs/source/compression.rst: -------------------------------------------------------------------------------- 1 | .. _compression: 2 | 3 | .. ipython:: python 4 | :suppress: 5 | 6 | import pandas as pd 7 | import os 8 | 9 | Compression 10 | ----------- 11 | 12 | Optionally, a ``compression`` argument will compress the resulting bytes. 13 | These can take a bit more time to write. The available compressors are 14 | ``zlib`` and `blosc `__. 15 | 16 | Generally compression will increase the writing time. 17 | 18 | .. ipython:: python 19 | 20 | import pandas as pd 21 | from pandas_msgpack import to_msgpack, read_msgpack 22 | 23 | df = pd.DataFrame({'A': np.arange(100000), 24 | 'B': np.random.randn(100000), 25 | 'C': 'foo'}) 26 | 27 | .. ipython:: python 28 | 29 | %timeit -n 1 -r 1 to_msgpack('uncompressed.msg', df) 30 | 31 | .. ipython:: python 32 | 33 | %timeit -n 1 -r 1 to_msgpack('compressed_blosc.msg', df, compress='blosc') 34 | 35 | .. ipython:: python 36 | 37 | %timeit -n 1 -r 1 to_msgpack('compressed_zlib.msg', df, compress='zlib') 38 | 39 | If compressed, it will be be automatically inferred and de-compressed upon reading. 40 | 41 | .. ipython:: python 42 | 43 | %timeit -n 1 -r 1 read_msgpack('uncompressed.msg') 44 | 45 | .. ipython:: python 46 | 47 | %timeit -n 1 -r 1 read_msgpack('compressed_blosc.msg') 48 | 49 | .. ipython:: python 50 | 51 | %timeit -n 1 -r 1 read_msgpack('compressed_zlib.msg') 52 | 53 | These can provide storage space savings. 54 | 55 | .. ipython:: python 56 | 57 | !ls -ltr *.msg 58 | 59 | .. ipython:: python 60 | :suppress: 61 | :okexcept: 62 | 63 | os.remove('uncompressed.msg') 64 | os.remove('compressed_blosc.msg') 65 | os.remove('compressed_zlib.msg') 66 | -------------------------------------------------------------------------------- /pandas_msgpack/includes/unpack_define.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MessagePack unpacking routine template 3 | * 4 | * Copyright (C) 2008-2010 FURUHASHI Sadayuki 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | #ifndef MSGPACK_UNPACK_DEFINE_H__ 19 | #define MSGPACK_UNPACK_DEFINE_H__ 20 | 21 | #include "sysdep.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | 32 | #ifndef MSGPACK_EMBED_STACK_SIZE 33 | #define MSGPACK_EMBED_STACK_SIZE 32 34 | #endif 35 | 36 | 37 | // CS is first byte & 0x1f 38 | typedef enum { 39 | CS_HEADER = 0x00, // nil 40 | 41 | //CS_ = 0x01, 42 | //CS_ = 0x02, // false 43 | //CS_ = 0x03, // true 44 | 45 | CS_BIN_8 = 0x04, 46 | CS_BIN_16 = 0x05, 47 | CS_BIN_32 = 0x06, 48 | 49 | CS_EXT_8 = 0x07, 50 | CS_EXT_16 = 0x08, 51 | CS_EXT_32 = 0x09, 52 | 53 | CS_FLOAT = 0x0a, 54 | CS_DOUBLE = 0x0b, 55 | CS_UINT_8 = 0x0c, 56 | CS_UINT_16 = 0x0d, 57 | CS_UINT_32 = 0x0e, 58 | CS_UINT_64 = 0x0f, 59 | CS_INT_8 = 0x10, 60 | CS_INT_16 = 0x11, 61 | CS_INT_32 = 0x12, 62 | CS_INT_64 = 0x13, 63 | 64 | //CS_FIXEXT1 = 0x14, 65 | //CS_FIXEXT2 = 0x15, 66 | //CS_FIXEXT4 = 0x16, 67 | //CS_FIXEXT8 = 0x17, 68 | //CS_FIXEXT16 = 0x18, 69 | 70 | CS_RAW_8 = 0x19, 71 | CS_RAW_16 = 0x1a, 72 | CS_RAW_32 = 0x1b, 73 | CS_ARRAY_16 = 0x1c, 74 | CS_ARRAY_32 = 0x1d, 75 | CS_MAP_16 = 0x1e, 76 | CS_MAP_32 = 0x1f, 77 | 78 | ACS_RAW_VALUE, 79 | ACS_BIN_VALUE, 80 | ACS_EXT_VALUE, 81 | } msgpack_unpack_state; 82 | 83 | 84 | typedef enum { 85 | CT_ARRAY_ITEM, 86 | CT_MAP_KEY, 87 | CT_MAP_VALUE, 88 | } msgpack_container_type; 89 | 90 | 91 | #ifdef __cplusplus 92 | } 93 | #endif 94 | 95 | #endif /* msgpack/unpack_define.h */ 96 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # With infos from 2 | # http://tjelvarolsson.com/blog/how-to-continuously-test-your-python-code-on-windows-using-appveyor/ 3 | # https://packaging.python.org/en/latest/appveyor/ 4 | # https://github.com/rmcgibbo/python-appveyor-conda-example 5 | 6 | # Backslashes in quotes need to be escaped: \ -> "\\" 7 | 8 | matrix: 9 | fast_finish: true # immediately finish build once one of the jobs fails. 10 | 11 | environment: 12 | global: 13 | # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the 14 | # /E:ON and /V:ON options are not enabled in the batch script intepreter 15 | # See: http://stackoverflow.com/a/13751649/163740 16 | CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci\\run_with_env.cmd" 17 | clone_folder: C:\projects\pandas-msgpack 18 | 19 | matrix: 20 | 21 | - CONDA_ROOT: "C:\\Miniconda3_64" 22 | PYTHON_VERSION: "3.6" 23 | PYTHON_ARCH: "64" 24 | CONDA_PY: "36" 25 | CONDA_NPY: "112" 26 | 27 | - CONDA_ROOT: "C:\\Miniconda3_64" 28 | PYTHON_VERSION: "3.5" 29 | PYTHON_ARCH: "64" 30 | CONDA_PY: "35" 31 | CONDA_NPY: "111" 32 | 33 | - CONDA_ROOT: "C:\\Miniconda3_64" 34 | PYTHON_VERSION: "2.7" 35 | PYTHON_ARCH: "64" 36 | CONDA_PY: "27" 37 | CONDA_NPY: "110" 38 | 39 | # We always use a 64-bit machine, but can build x86 distributions 40 | # with the PYTHON_ARCH variable (which is used by CMD_IN_ENV). 41 | platform: 42 | - x64 43 | 44 | # all our python builds have to happen in tests_script... 45 | build: false 46 | 47 | install: 48 | # cancel older builds for the same PR 49 | - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod ` 50 | https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | ` 51 | Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { ` 52 | throw "There are newer queued builds for this pull request, failing early." } 53 | 54 | # this installs the appropriate Miniconda (Py2/Py3, 32/64 bit) 55 | # updates conda & installs: conda-build jinja2 anaconda-client 56 | - powershell .\ci\install.ps1 57 | - SET PATH=%CONDA_ROOT%;%CONDA_ROOT%\Scripts;%PATH% 58 | - echo "install" 59 | - cd 60 | - ls -ltr 61 | - git tag --sort v:refname 62 | 63 | # this can conflict with git 64 | - cmd: rmdir C:\cygwin /s /q 65 | 66 | # install our build environment 67 | - cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false 68 | - cmd: conda update -q conda 69 | - cmd: conda config --set ssl_verify false 70 | - cmd: conda config --add channels conda-forge 71 | 72 | # this is now the downloaded conda... 73 | - cmd: conda info -a 74 | 75 | # create our env 76 | - cmd: conda create -n pandas-msgpack python=%PYTHON_VERSION% pandas cython pytest 77 | - cmd: activate pandas-msgpack 78 | - cmd: conda list -n pandas-msgpack 79 | 80 | # build wheels 81 | - cmd: '%CMD_IN_ENV% python setup.py bdist_wheel' 82 | 83 | # install 84 | - pip install --no-index --find-links=.\dist\ pandas-msgpack 85 | 86 | test_script: 87 | # tests 88 | - cmd: activate pandas-msgpack 89 | - cmd: cd c:\\projects 90 | - cmd: pytest --pyargs pandas_msgpack -v 91 | 92 | artifacts: 93 | - path: dist\* 94 | -------------------------------------------------------------------------------- /ci/install.ps1: -------------------------------------------------------------------------------- 1 | # Sample script to install Miniconda under Windows 2 | # Authors: Olivier Grisel, Jonathan Helmus and Kyle Kastner, Robert McGibbon 3 | # License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ 4 | 5 | $MINICONDA_URL = "http://repo.continuum.io/miniconda/" 6 | 7 | 8 | function DownloadMiniconda ($python_version, $platform_suffix) { 9 | $webclient = New-Object System.Net.WebClient 10 | $filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe" 11 | $url = $MINICONDA_URL + $filename 12 | 13 | $basedir = $pwd.Path + "\" 14 | $filepath = $basedir + $filename 15 | if (Test-Path $filename) { 16 | Write-Host "Reusing" $filepath 17 | return $filepath 18 | } 19 | 20 | # Download and retry up to 3 times in case of network transient errors. 21 | Write-Host "Downloading" $filename "from" $url 22 | $retry_attempts = 2 23 | for($i=0; $i -lt $retry_attempts; $i++){ 24 | try { 25 | $webclient.DownloadFile($url, $filepath) 26 | break 27 | } 28 | Catch [Exception]{ 29 | Start-Sleep 1 30 | } 31 | } 32 | if (Test-Path $filepath) { 33 | Write-Host "File saved at" $filepath 34 | } else { 35 | # Retry once to get the error message if any at the last try 36 | $webclient.DownloadFile($url, $filepath) 37 | } 38 | return $filepath 39 | } 40 | 41 | 42 | function InstallMiniconda ($python_version, $architecture, $python_home) { 43 | Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home 44 | if (Test-Path $python_home) { 45 | Write-Host $python_home "already exists, skipping." 46 | return $false 47 | } 48 | if ($architecture -match "32") { 49 | $platform_suffix = "x86" 50 | } else { 51 | $platform_suffix = "x86_64" 52 | } 53 | 54 | $filepath = DownloadMiniconda $python_version $platform_suffix 55 | Write-Host "Installing" $filepath "to" $python_home 56 | $install_log = $python_home + ".log" 57 | $args = "/S /D=$python_home" 58 | Write-Host $filepath $args 59 | Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru 60 | if (Test-Path $python_home) { 61 | Write-Host "Python $python_version ($architecture) installation complete" 62 | } else { 63 | Write-Host "Failed to install Python in $python_home" 64 | Get-Content -Path $install_log 65 | Exit 1 66 | } 67 | } 68 | 69 | 70 | function InstallCondaPackages ($python_home, $spec) { 71 | $conda_path = $python_home + "\Scripts\conda.exe" 72 | $args = "install --yes " + $spec 73 | Write-Host ("conda " + $args) 74 | Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru 75 | } 76 | 77 | function UpdateConda ($python_home) { 78 | $conda_path = $python_home + "\Scripts\conda.exe" 79 | Write-Host "Updating conda..." 80 | $args = "update --yes conda" 81 | Write-Host $conda_path $args 82 | Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru 83 | } 84 | 85 | 86 | function main () { 87 | InstallMiniconda "3.5" $env:PYTHON_ARCH $env:CONDA_ROOT 88 | UpdateConda $env:CONDA_ROOT 89 | InstallCondaPackages $env:CONDA_ROOT "conda-build jinja2 anaconda-client" 90 | } 91 | 92 | main 93 | -------------------------------------------------------------------------------- /ci/run_with_env.cmd: -------------------------------------------------------------------------------- 1 | :: EXPECTED ENV VARS: PYTHON_ARCH (either x86 or x64) 2 | :: CONDA_PY (either 27, 33, 35 etc. - only major version is extracted) 3 | :: 4 | :: 5 | :: To build extensions for 64 bit Python 3, we need to configure environment 6 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: 7 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1) 8 | :: 9 | :: To build extensions for 64 bit Python 2, we need to configure environment 10 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of: 11 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0) 12 | :: 13 | :: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific 14 | :: environment configurations. 15 | :: 16 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the 17 | :: cmd interpreter, at least for (SDK v7.0) 18 | :: 19 | :: More details at: 20 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows 21 | :: http://stackoverflow.com/a/13751649/163740 22 | :: 23 | :: Author: Phil Elson 24 | :: Original Author: Olivier Grisel (https://github.com/ogrisel/python-appveyor-demo) 25 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ 26 | :: 27 | :: Notes about batch files for Python people: 28 | :: 29 | :: Quotes in values are literally part of the values: 30 | :: SET FOO="bar" 31 | :: FOO is now five characters long: " b a r " 32 | :: If you don't want quotes, don't include them on the right-hand side. 33 | :: 34 | :: The CALL lines at the end of this file look redundant, but if you move them 35 | :: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y 36 | :: case, I don't know why. 37 | :: originally from https://github.com/pelson/Obvious-CI/blob/master/scripts/obvci_appveyor_python_build_env.cmd 38 | @ECHO OFF 39 | 40 | SET COMMAND_TO_RUN=%* 41 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows 42 | 43 | :: Extract the major and minor versions, and allow for the minor version to be 44 | :: more than 9. This requires the version number to have two dots in it. 45 | SET MAJOR_PYTHON_VERSION=%CONDA_PY:~0,1% 46 | 47 | IF "%CONDA_PY:~2,1%" == "" ( 48 | :: CONDA_PY style, such as 27, 34 etc. 49 | SET MINOR_PYTHON_VERSION=%CONDA_PY:~1,1% 50 | ) ELSE ( 51 | IF "%CONDA_PY:~3,1%" == "." ( 52 | SET MINOR_PYTHON_VERSION=%CONDA_PY:~2,1% 53 | ) ELSE ( 54 | SET MINOR_PYTHON_VERSION=%CONDA_PY:~2,2% 55 | ) 56 | ) 57 | 58 | :: Based on the Python version, determine what SDK version to use, and whether 59 | :: to set the SDK for 64-bit. 60 | IF %MAJOR_PYTHON_VERSION% == 2 ( 61 | SET WINDOWS_SDK_VERSION="v7.0" 62 | SET SET_SDK_64=Y 63 | ) ELSE ( 64 | IF %MAJOR_PYTHON_VERSION% == 3 ( 65 | SET WINDOWS_SDK_VERSION="v7.1" 66 | IF %MINOR_PYTHON_VERSION% LEQ 4 ( 67 | SET SET_SDK_64=Y 68 | ) ELSE ( 69 | SET SET_SDK_64=N 70 | ) 71 | ) ELSE ( 72 | ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%" 73 | EXIT /B 1 74 | ) 75 | ) 76 | 77 | IF "%PYTHON_ARCH%"=="64" ( 78 | IF %SET_SDK_64% == Y ( 79 | ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture 80 | SET DISTUTILS_USE_SDK=1 81 | SET MSSdk=1 82 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION% 83 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release 84 | ECHO Executing: %COMMAND_TO_RUN% 85 | call %COMMAND_TO_RUN% || EXIT /B 1 86 | ) ELSE ( 87 | ECHO Using default MSVC build environment for 64 bit architecture 88 | ECHO Executing: %COMMAND_TO_RUN% 89 | call %COMMAND_TO_RUN% || EXIT /B 1 90 | ) 91 | ) ELSE ( 92 | ECHO Using default MSVC build environment for 32 bit architecture 93 | ECHO Executing: %COMMAND_TO_RUN% 94 | call %COMMAND_TO_RUN% || EXIT /B 1 95 | ) 96 | -------------------------------------------------------------------------------- /pandas_msgpack/includes/pack.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MessagePack for Python packing routine 3 | * 4 | * Copyright (C) 2009 Naoki INADA 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #include 20 | #include 21 | #include "sysdep.h" 22 | #include 23 | #include 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | #if defined(_MSC_VER) && (_MSC_VER < 1900) 30 | #define inline __inline 31 | #endif 32 | 33 | typedef struct msgpack_packer { 34 | char *buf; 35 | size_t length; 36 | size_t buf_size; 37 | bool use_bin_type; 38 | } msgpack_packer; 39 | 40 | typedef struct Packer Packer; 41 | 42 | static inline int msgpack_pack_int(msgpack_packer* pk, int d); 43 | static inline int msgpack_pack_long(msgpack_packer* pk, long d); 44 | static inline int msgpack_pack_long_long(msgpack_packer* pk, long long d); 45 | static inline int msgpack_pack_unsigned_short(msgpack_packer* pk, unsigned short d); 46 | static inline int msgpack_pack_unsigned_int(msgpack_packer* pk, unsigned int d); 47 | static inline int msgpack_pack_unsigned_long(msgpack_packer* pk, unsigned long d); 48 | //static inline int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d); 49 | 50 | static inline int msgpack_pack_uint8(msgpack_packer* pk, uint8_t d); 51 | static inline int msgpack_pack_uint16(msgpack_packer* pk, uint16_t d); 52 | static inline int msgpack_pack_uint32(msgpack_packer* pk, uint32_t d); 53 | static inline int msgpack_pack_uint64(msgpack_packer* pk, uint64_t d); 54 | static inline int msgpack_pack_int8(msgpack_packer* pk, int8_t d); 55 | static inline int msgpack_pack_int16(msgpack_packer* pk, int16_t d); 56 | static inline int msgpack_pack_int32(msgpack_packer* pk, int32_t d); 57 | static inline int msgpack_pack_int64(msgpack_packer* pk, int64_t d); 58 | 59 | static inline int msgpack_pack_float(msgpack_packer* pk, float d); 60 | static inline int msgpack_pack_double(msgpack_packer* pk, double d); 61 | 62 | static inline int msgpack_pack_nil(msgpack_packer* pk); 63 | static inline int msgpack_pack_true(msgpack_packer* pk); 64 | static inline int msgpack_pack_false(msgpack_packer* pk); 65 | 66 | static inline int msgpack_pack_array(msgpack_packer* pk, unsigned int n); 67 | 68 | static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n); 69 | 70 | static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); 71 | static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l); 72 | static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); 73 | 74 | static inline int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l); 75 | 76 | static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) 77 | { 78 | char* buf = pk->buf; 79 | size_t bs = pk->buf_size; 80 | size_t len = pk->length; 81 | 82 | if (len + l > bs) { 83 | bs = (len + l) * 2; 84 | buf = (char*)realloc(buf, bs); 85 | if (!buf) return -1; 86 | } 87 | memcpy(buf + len, data, l); 88 | len += l; 89 | 90 | pk->buf = buf; 91 | pk->buf_size = bs; 92 | pk->length = len; 93 | return 0; 94 | } 95 | 96 | #define msgpack_pack_append_buffer(user, buf, len) \ 97 | return msgpack_pack_write(user, (const char*)buf, len) 98 | 99 | #include "pack_template.h" 100 | 101 | #ifdef __cplusplus 102 | } 103 | #endif 104 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ======= 2 | License 3 | ======= 4 | 5 | pandas-msgpack is distributed under a 3-clause ("Simplified" or "New") BSD 6 | license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have 7 | BSD-compatible licenses, are included. Their licenses follow the pandas 8 | license. 9 | 10 | pandas license 11 | ============== 12 | 13 | Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team 14 | All rights reserved. 15 | 16 | Copyright (c) 2008-2011 AQR Capital Management, LLC 17 | All rights reserved. 18 | 19 | Redistribution and use in source and binary forms, with or without 20 | modification, are permitted provided that the following conditions are 21 | met: 22 | 23 | * Redistributions of source code must retain the above copyright 24 | notice, this list of conditions and the following disclaimer. 25 | 26 | * Redistributions in binary form must reproduce the above 27 | copyright notice, this list of conditions and the following 28 | disclaimer in the documentation and/or other materials provided 29 | with the distribution. 30 | 31 | * Neither the name of the copyright holder nor the names of any 32 | contributors may be used to endorse or promote products derived 33 | from this software without specific prior written permission. 34 | 35 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 36 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 37 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 38 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 39 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 41 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 42 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 43 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 44 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 45 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 46 | 47 | About the Copyright Holders 48 | =========================== 49 | 50 | AQR Capital Management began pandas development in 2008. Development was 51 | led by Wes McKinney. AQR released the source under this license in 2009. 52 | Wes is now an employee of Lambda Foundry, and remains the pandas project 53 | lead. 54 | 55 | The PyData Development Team is the collection of developers of the PyData 56 | project. This includes all of the PyData sub-projects, including pandas. The 57 | core team that coordinates development on GitHub can be found here: 58 | http://github.com/pydata. 59 | 60 | Full credits for pandas contributors can be found in the documentation. 61 | 62 | Our Copyright Policy 63 | ==================== 64 | 65 | PyData uses a shared copyright model. Each contributor maintains copyright 66 | over their contributions to PyData. However, it is important to note that 67 | these contributions are typically only changes to the repositories. Thus, 68 | the PyData source code, in its entirety, is not the copyright of any single 69 | person or institution. Instead, it is the collective copyright of the 70 | entire PyData Development Team. If individual contributors want to maintain 71 | a record of what changes/contributions they have specific copyright on, 72 | they should indicate their copyright in the commit message of the change 73 | when they commit the change to one of the PyData repositories. 74 | 75 | With this in mind, the following banner should be used in any source code 76 | file to indicate the copyright and license terms: 77 | 78 | #----------------------------------------------------------------------------- 79 | # Copyright (c) 2012, PyData Development Team 80 | # All rights reserved. 81 | # 82 | # Distributed under the terms of the BSD Simplified License. 83 | # 84 | # The full license is in the LICENSE file, distributed with this software. 85 | #----------------------------------------------------------------------------- 86 | 87 | Other licenses can be found in the LICENSES directory. 88 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import sys 5 | from setuptools import setup 6 | import pkg_resources 7 | from distutils.extension import Extension 8 | from distutils.command.build_ext import build_ext as build_ext 9 | 10 | NAME = 'pandas-msgpack' 11 | 12 | def is_platform_windows(): 13 | return sys.platform == 'win32' or sys.platform == 'cygwin' 14 | 15 | def is_platform_linux(): 16 | return sys.platform == 'linux2' 17 | 18 | def is_platform_mac(): 19 | return sys.platform == 'darwin' 20 | 21 | # versioning 22 | import versioneer 23 | cmdclass = versioneer.get_cmdclass() 24 | 25 | try: 26 | import Cython 27 | from Cython.Build import cythonize 28 | except ImportError: 29 | raise ImportError("cython is required for building") 30 | 31 | # args to ignore warnings 32 | if is_platform_windows(): 33 | extra_compile_args=[] 34 | else: 35 | extra_compile_args=['-Wno-unused-function'] 36 | 37 | 38 | if sys.byteorder == 'big': 39 | macros = [('__BIG_ENDIAN__', '1')] 40 | else: 41 | macros = [('__LITTLE_ENDIAN__', '1')] 42 | 43 | extensions = [] 44 | packer_ext = Extension('pandas_msgpack.msgpack._packer', 45 | depends=['pandas_msgpack/includes/pack.h', 46 | 'pandas_msgpack/includes/pack_template.h'], 47 | sources = ['pandas_msgpack/msgpack/_packer.pyx'], 48 | language='c++', 49 | include_dirs=['pandas_msgack/includes'], 50 | define_macros=macros, 51 | extra_compile_args=extra_compile_args) 52 | unpacker_ext = Extension('pandas_msgpack.msgpack._unpacker', 53 | depends=['pandas_msgpack/includes/unpack.h', 54 | 'pandas_msgpack/includes/unpack_define.h', 55 | 'pandas_msgpack/includes/unpack_template.h'], 56 | sources = ['pandas_msgpack/msgpack/_unpacker.pyx'], 57 | language='c++', 58 | include_dirs=['pandas_msgpack/includes'], 59 | define_macros=macros, 60 | extra_compile_args=extra_compile_args) 61 | extensions.append(packer_ext) 62 | extensions.append(unpacker_ext) 63 | 64 | #---------------------------------------------------------------------- 65 | # util 66 | # extension for pseudo-safely moving bytes into mutable buffers 67 | _move_ext = Extension('pandas_msgpack._move', 68 | depends=[], 69 | sources=['pandas_msgpack/move.c']) 70 | extensions.append(_move_ext) 71 | 72 | 73 | def readme(): 74 | with open('README.rst') as f: 75 | return f.read() 76 | 77 | INSTALL_REQUIRES = ( 78 | ['pandas'] 79 | ) 80 | 81 | setup( 82 | name=NAME, 83 | version=versioneer.get_version(), 84 | cmdclass=cmdclass, 85 | description="Pandas interface to msgpack", 86 | long_description=readme(), 87 | license='BSD License', 88 | author='The PyData Development Team', 89 | author_email='pydata@googlegroups.com', 90 | url='https://github.com/pydata/pandas-msgpack', 91 | classifiers=[ 92 | 'Development Status :: 4 - Beta', 93 | 'Environment :: Console', 94 | 'Intended Audience :: Science/Research', 95 | 'Operating System :: OS Independent', 96 | 'Programming Language :: Python', 97 | 'Programming Language :: Python :: 2', 98 | 'Programming Language :: Python :: 2.7', 99 | 'Programming Language :: Python :: 3', 100 | 'Programming Language :: Python :: 3.4', 101 | 'Programming Language :: Python :: 3.5', 102 | 'Programming Language :: Python :: 3.6', 103 | 'Topic :: Scientific/Engineering', 104 | ], 105 | ext_modules=cythonize(extensions), 106 | keywords='data', 107 | install_requires=INSTALL_REQUIRES, 108 | packages=['pandas_msgpack', 109 | 'pandas_msgpack.includes', 110 | 'pandas_msgpack.msgpack', 111 | 'pandas_msgpack.tests'], 112 | test_suite='tests', 113 | ) 114 | -------------------------------------------------------------------------------- /pandas_msgpack/includes/sysdep.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MessagePack system dependencies 3 | * 4 | * Copyright (C) 2008-2010 FURUHASHI Sadayuki 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | #ifndef MSGPACK_SYSDEP_H__ 19 | #define MSGPACK_SYSDEP_H__ 20 | 21 | #include 22 | #include 23 | #if defined(_MSC_VER) && _MSC_VER < 1600 24 | typedef __int8 int8_t; 25 | typedef unsigned __int8 uint8_t; 26 | typedef __int16 int16_t; 27 | typedef unsigned __int16 uint16_t; 28 | typedef __int32 int32_t; 29 | typedef unsigned __int32 uint32_t; 30 | typedef __int64 int64_t; 31 | typedef unsigned __int64 uint64_t; 32 | #elif defined(_MSC_VER) // && _MSC_VER >= 1600 33 | #include 34 | #else 35 | #include 36 | #include 37 | #endif 38 | 39 | #ifdef _WIN32 40 | #define _msgpack_atomic_counter_header 41 | typedef long _msgpack_atomic_counter_t; 42 | #define _msgpack_sync_decr_and_fetch(ptr) InterlockedDecrement(ptr) 43 | #define _msgpack_sync_incr_and_fetch(ptr) InterlockedIncrement(ptr) 44 | #elif defined(__GNUC__) && ((__GNUC__*10 + __GNUC_MINOR__) < 41) 45 | #define _msgpack_atomic_counter_header "gcc_atomic.h" 46 | #else 47 | typedef unsigned int _msgpack_atomic_counter_t; 48 | #define _msgpack_sync_decr_and_fetch(ptr) __sync_sub_and_fetch(ptr, 1) 49 | #define _msgpack_sync_incr_and_fetch(ptr) __sync_add_and_fetch(ptr, 1) 50 | #endif 51 | 52 | #ifdef _WIN32 53 | 54 | #ifdef __cplusplus 55 | /* numeric_limits::min,max */ 56 | #ifdef max 57 | #undef max 58 | #endif 59 | #ifdef min 60 | #undef min 61 | #endif 62 | #endif 63 | 64 | #else 65 | #include /* __BYTE_ORDER */ 66 | #endif 67 | 68 | #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) 69 | #if __BYTE_ORDER == __LITTLE_ENDIAN 70 | #define __LITTLE_ENDIAN__ 71 | #elif __BYTE_ORDER == __BIG_ENDIAN 72 | #define __BIG_ENDIAN__ 73 | #elif _WIN32 74 | #define __LITTLE_ENDIAN__ 75 | #endif 76 | #endif 77 | 78 | 79 | #ifdef __LITTLE_ENDIAN__ 80 | 81 | #ifdef _WIN32 82 | # if defined(ntohs) 83 | # define _msgpack_be16(x) ntohs(x) 84 | # elif defined(_byteswap_ushort) || (defined(_MSC_VER) && _MSC_VER >= 1400) 85 | # define _msgpack_be16(x) ((uint16_t)_byteswap_ushort((unsigned short)x)) 86 | # else 87 | # define _msgpack_be16(x) ( \ 88 | ((((uint16_t)x) << 8) ) | \ 89 | ((((uint16_t)x) >> 8) ) ) 90 | # endif 91 | #else 92 | # define _msgpack_be16(x) ntohs(x) 93 | #endif 94 | 95 | #ifdef _WIN32 96 | # if defined(ntohl) 97 | # define _msgpack_be32(x) ntohl(x) 98 | # elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400) 99 | # define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) 100 | # else 101 | # define _msgpack_be32(x) \ 102 | ( ((((uint32_t)x) << 24) ) | \ 103 | ((((uint32_t)x) << 8) & 0x00ff0000U ) | \ 104 | ((((uint32_t)x) >> 8) & 0x0000ff00U ) | \ 105 | ((((uint32_t)x) >> 24) ) ) 106 | # endif 107 | #else 108 | # define _msgpack_be32(x) ntohl(x) 109 | #endif 110 | 111 | #if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400) 112 | # define _msgpack_be64(x) (_byteswap_uint64(x)) 113 | #elif defined(bswap_64) 114 | # define _msgpack_be64(x) bswap_64(x) 115 | #elif defined(__DARWIN_OSSwapInt64) 116 | # define _msgpack_be64(x) __DARWIN_OSSwapInt64(x) 117 | #else 118 | #define _msgpack_be64(x) \ 119 | ( ((((uint64_t)x) << 56) ) | \ 120 | ((((uint64_t)x) << 40) & 0x00ff000000000000ULL ) | \ 121 | ((((uint64_t)x) << 24) & 0x0000ff0000000000ULL ) | \ 122 | ((((uint64_t)x) << 8) & 0x000000ff00000000ULL ) | \ 123 | ((((uint64_t)x) >> 8) & 0x00000000ff000000ULL ) | \ 124 | ((((uint64_t)x) >> 24) & 0x0000000000ff0000ULL ) | \ 125 | ((((uint64_t)x) >> 40) & 0x000000000000ff00ULL ) | \ 126 | ((((uint64_t)x) >> 56) ) ) 127 | #endif 128 | 129 | #define _msgpack_load16(cast, from) ((cast)( \ 130 | (((uint16_t)((uint8_t*)(from))[0]) << 8) | \ 131 | (((uint16_t)((uint8_t*)(from))[1]) ) )) 132 | 133 | #define _msgpack_load32(cast, from) ((cast)( \ 134 | (((uint32_t)((uint8_t*)(from))[0]) << 24) | \ 135 | (((uint32_t)((uint8_t*)(from))[1]) << 16) | \ 136 | (((uint32_t)((uint8_t*)(from))[2]) << 8) | \ 137 | (((uint32_t)((uint8_t*)(from))[3]) ) )) 138 | 139 | #define _msgpack_load64(cast, from) ((cast)( \ 140 | (((uint64_t)((uint8_t*)(from))[0]) << 56) | \ 141 | (((uint64_t)((uint8_t*)(from))[1]) << 48) | \ 142 | (((uint64_t)((uint8_t*)(from))[2]) << 40) | \ 143 | (((uint64_t)((uint8_t*)(from))[3]) << 32) | \ 144 | (((uint64_t)((uint8_t*)(from))[4]) << 24) | \ 145 | (((uint64_t)((uint8_t*)(from))[5]) << 16) | \ 146 | (((uint64_t)((uint8_t*)(from))[6]) << 8) | \ 147 | (((uint64_t)((uint8_t*)(from))[7]) ) )) 148 | 149 | #else 150 | 151 | #define _msgpack_be16(x) (x) 152 | #define _msgpack_be32(x) (x) 153 | #define _msgpack_be64(x) (x) 154 | 155 | #define _msgpack_load16(cast, from) ((cast)( \ 156 | (((uint16_t)((uint8_t*)from)[0]) << 8) | \ 157 | (((uint16_t)((uint8_t*)from)[1]) ) )) 158 | 159 | #define _msgpack_load32(cast, from) ((cast)( \ 160 | (((uint32_t)((uint8_t*)from)[0]) << 24) | \ 161 | (((uint32_t)((uint8_t*)from)[1]) << 16) | \ 162 | (((uint32_t)((uint8_t*)from)[2]) << 8) | \ 163 | (((uint32_t)((uint8_t*)from)[3]) ) )) 164 | 165 | #define _msgpack_load64(cast, from) ((cast)( \ 166 | (((uint64_t)((uint8_t*)from)[0]) << 56) | \ 167 | (((uint64_t)((uint8_t*)from)[1]) << 48) | \ 168 | (((uint64_t)((uint8_t*)from)[2]) << 40) | \ 169 | (((uint64_t)((uint8_t*)from)[3]) << 32) | \ 170 | (((uint64_t)((uint8_t*)from)[4]) << 24) | \ 171 | (((uint64_t)((uint8_t*)from)[5]) << 16) | \ 172 | (((uint64_t)((uint8_t*)from)[6]) << 8) | \ 173 | (((uint64_t)((uint8_t*)from)[7]) ) )) 174 | #endif 175 | 176 | 177 | #define _msgpack_store16(to, num) \ 178 | do { uint16_t val = _msgpack_be16(num); memcpy(to, &val, 2); } while(0) 179 | #define _msgpack_store32(to, num) \ 180 | do { uint32_t val = _msgpack_be32(num); memcpy(to, &val, 4); } while(0) 181 | #define _msgpack_store64(to, num) \ 182 | do { uint64_t val = _msgpack_be64(num); memcpy(to, &val, 8); } while(0) 183 | 184 | /* 185 | #define _msgpack_load16(cast, from) \ 186 | ({ cast val; memcpy(&val, (char*)from, 2); _msgpack_be16(val); }) 187 | #define _msgpack_load32(cast, from) \ 188 | ({ cast val; memcpy(&val, (char*)from, 4); _msgpack_be32(val); }) 189 | #define _msgpack_load64(cast, from) \ 190 | ({ cast val; memcpy(&val, (char*)from, 8); _msgpack_be64(val); }) 191 | */ 192 | 193 | 194 | #endif /* sysdep.h */ 195 | -------------------------------------------------------------------------------- /docs/source/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help 18 | help: 19 | @echo "Please use \`make ' where is one of" 20 | @echo " html to make standalone HTML files" 21 | @echo " dirhtml to make HTML files named index.html in directories" 22 | @echo " singlehtml to make a single large HTML file" 23 | @echo " pickle to make pickle files" 24 | @echo " json to make JSON files" 25 | @echo " htmlhelp to make HTML files and a HTML help project" 26 | @echo " qthelp to make HTML files and a qthelp project" 27 | @echo " applehelp to make an Apple Help Book" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " epub3 to make an epub3" 31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 32 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 33 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " xml to make Docutils-native XML files" 41 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 42 | @echo " linkcheck to check all external links for integrity" 43 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 44 | @echo " coverage to run coverage check of the documentation (if enabled)" 45 | @echo " dummy to check syntax errors of document sources" 46 | 47 | .PHONY: clean 48 | clean: 49 | rm -rf $(BUILDDIR)/* 50 | 51 | .PHONY: html 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | .PHONY: dirhtml 58 | dirhtml: 59 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 60 | @echo 61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 62 | 63 | .PHONY: singlehtml 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | .PHONY: pickle 70 | pickle: 71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 72 | @echo 73 | @echo "Build finished; now you can process the pickle files." 74 | 75 | .PHONY: json 76 | json: 77 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 78 | @echo 79 | @echo "Build finished; now you can process the JSON files." 80 | 81 | .PHONY: htmlhelp 82 | htmlhelp: 83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 84 | @echo 85 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 86 | ".hhp project file in $(BUILDDIR)/htmlhelp." 87 | 88 | .PHONY: qthelp 89 | qthelp: 90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 91 | @echo 92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pandas-gbq.qhcp" 95 | @echo "To view the help file:" 96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pandas-gbq.qhc" 97 | 98 | .PHONY: applehelp 99 | applehelp: 100 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 101 | @echo 102 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 103 | @echo "N.B. You won't be able to view it unless you put it in" \ 104 | "~/Library/Documentation/Help or install it in your application" \ 105 | "bundle." 106 | 107 | .PHONY: devhelp 108 | devhelp: 109 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 110 | @echo 111 | @echo "Build finished." 112 | @echo "To view the help file:" 113 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pandas-gbq" 114 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pandas-gbq" 115 | @echo "# devhelp" 116 | 117 | .PHONY: epub 118 | epub: 119 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 120 | @echo 121 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 122 | 123 | .PHONY: epub3 124 | epub3: 125 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 126 | @echo 127 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 128 | 129 | .PHONY: latex 130 | latex: 131 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 132 | @echo 133 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 134 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 135 | "(use \`make latexpdf' here to do that automatically)." 136 | 137 | .PHONY: latexpdf 138 | latexpdf: 139 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 140 | @echo "Running LaTeX files through pdflatex..." 141 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 142 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 143 | 144 | .PHONY: latexpdfja 145 | latexpdfja: 146 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 147 | @echo "Running LaTeX files through platex and dvipdfmx..." 148 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 149 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 150 | 151 | .PHONY: text 152 | text: 153 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 154 | @echo 155 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 156 | 157 | .PHONY: man 158 | man: 159 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 160 | @echo 161 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 162 | 163 | .PHONY: texinfo 164 | texinfo: 165 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 166 | @echo 167 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 168 | @echo "Run \`make' in that directory to run these through makeinfo" \ 169 | "(use \`make info' here to do that automatically)." 170 | 171 | .PHONY: info 172 | info: 173 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 174 | @echo "Running Texinfo files through makeinfo..." 175 | make -C $(BUILDDIR)/texinfo info 176 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 177 | 178 | .PHONY: gettext 179 | gettext: 180 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 181 | @echo 182 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 183 | 184 | .PHONY: changes 185 | changes: 186 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 187 | @echo 188 | @echo "The overview file is in $(BUILDDIR)/changes." 189 | 190 | .PHONY: linkcheck 191 | linkcheck: 192 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 193 | @echo 194 | @echo "Link check complete; look for any errors in the above output " \ 195 | "or in $(BUILDDIR)/linkcheck/output.txt." 196 | 197 | .PHONY: doctest 198 | doctest: 199 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 200 | @echo "Testing of doctests in the sources finished, look at the " \ 201 | "results in $(BUILDDIR)/doctest/output.txt." 202 | 203 | .PHONY: coverage 204 | coverage: 205 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 206 | @echo "Testing of coverage in the sources finished, look at the " \ 207 | "results in $(BUILDDIR)/coverage/python.txt." 208 | 209 | .PHONY: xml 210 | xml: 211 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 212 | @echo 213 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 214 | 215 | .PHONY: pseudoxml 216 | pseudoxml: 217 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 218 | @echo 219 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 220 | 221 | .PHONY: dummy 222 | dummy: 223 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 224 | @echo 225 | @echo "Build finished. Dummy builder generates no files." 226 | -------------------------------------------------------------------------------- /pandas_msgpack/includes/unpack.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MessagePack for Python unpacking routine 3 | * 4 | * Copyright (C) 2009 Naoki INADA 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #define MSGPACK_EMBED_STACK_SIZE (1024) 20 | #include "unpack_define.h" 21 | 22 | typedef struct unpack_user { 23 | int use_list; 24 | PyObject *object_hook; 25 | bool has_pairs_hook; 26 | PyObject *list_hook; 27 | PyObject *ext_hook; 28 | const char *encoding; 29 | const char *unicode_errors; 30 | Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; 31 | } unpack_user; 32 | 33 | typedef PyObject* msgpack_unpack_object; 34 | struct unpack_context; 35 | typedef struct unpack_context unpack_context; 36 | typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off); 37 | 38 | static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) 39 | { 40 | return NULL; 41 | } 42 | 43 | static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) 44 | { 45 | PyObject *p = PyInt_FromLong((long)d); 46 | if (!p) 47 | return -1; 48 | *o = p; 49 | return 0; 50 | } 51 | static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpack_object* o) 52 | { 53 | return unpack_callback_uint16(u, d, o); 54 | } 55 | 56 | 57 | static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) 58 | { 59 | PyObject *p = PyInt_FromSize_t((size_t)d); 60 | if (!p) 61 | return -1; 62 | *o = p; 63 | return 0; 64 | } 65 | 66 | static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unpack_object* o) 67 | { 68 | PyObject *p; 69 | if (d > LONG_MAX) { 70 | p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); 71 | } else { 72 | p = PyInt_FromSize_t((size_t)d); 73 | } 74 | if (!p) 75 | return -1; 76 | *o = p; 77 | return 0; 78 | } 79 | 80 | static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) 81 | { 82 | PyObject *p = PyInt_FromLong(d); 83 | if (!p) 84 | return -1; 85 | *o = p; 86 | return 0; 87 | } 88 | 89 | static inline int unpack_callback_int16(unpack_user* u, int16_t d, msgpack_unpack_object* o) 90 | { 91 | return unpack_callback_int32(u, d, o); 92 | } 93 | 94 | static inline int unpack_callback_int8(unpack_user* u, int8_t d, msgpack_unpack_object* o) 95 | { 96 | return unpack_callback_int32(u, d, o); 97 | } 98 | 99 | static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpack_object* o) 100 | { 101 | PyObject *p; 102 | if (d > LONG_MAX || d < LONG_MIN) { 103 | p = PyLong_FromLongLong((unsigned PY_LONG_LONG)d); 104 | } else { 105 | p = PyInt_FromLong((long)d); 106 | } 107 | *o = p; 108 | return 0; 109 | } 110 | 111 | static inline int unpack_callback_double(unpack_user* u, double d, msgpack_unpack_object* o) 112 | { 113 | PyObject *p = PyFloat_FromDouble(d); 114 | if (!p) 115 | return -1; 116 | *o = p; 117 | return 0; 118 | } 119 | 120 | static inline int unpack_callback_float(unpack_user* u, float d, msgpack_unpack_object* o) 121 | { 122 | return unpack_callback_double(u, d, o); 123 | } 124 | 125 | static inline int unpack_callback_nil(unpack_user* u, msgpack_unpack_object* o) 126 | { Py_INCREF(Py_None); *o = Py_None; return 0; } 127 | 128 | static inline int unpack_callback_true(unpack_user* u, msgpack_unpack_object* o) 129 | { Py_INCREF(Py_True); *o = Py_True; return 0; } 130 | 131 | static inline int unpack_callback_false(unpack_user* u, msgpack_unpack_object* o) 132 | { Py_INCREF(Py_False); *o = Py_False; return 0; } 133 | 134 | static inline int unpack_callback_array(unpack_user* u, unsigned int n, msgpack_unpack_object* o) 135 | { 136 | if (n > u->max_array_len) { 137 | PyErr_Format(PyExc_ValueError, "%u exceeds max_array_len(%zd)", n, u->max_array_len); 138 | return -1; 139 | } 140 | PyObject *p = u->use_list ? PyList_New(n) : PyTuple_New(n); 141 | 142 | if (!p) 143 | return -1; 144 | *o = p; 145 | return 0; 146 | } 147 | 148 | static inline int unpack_callback_array_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object o) 149 | { 150 | if (u->use_list) 151 | PyList_SET_ITEM(*c, current, o); 152 | else 153 | PyTuple_SET_ITEM(*c, current, o); 154 | return 0; 155 | } 156 | 157 | static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c) 158 | { 159 | if (u->list_hook) { 160 | PyObject *new_c = PyObject_CallFunctionObjArgs(u->list_hook, *c, NULL); 161 | if (!new_c) 162 | return -1; 163 | Py_DECREF(*c); 164 | *c = new_c; 165 | } 166 | return 0; 167 | } 168 | 169 | static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o) 170 | { 171 | if (n > u->max_map_len) { 172 | PyErr_Format(PyExc_ValueError, "%u exceeds max_map_len(%zd)", n, u->max_map_len); 173 | return -1; 174 | } 175 | PyObject *p; 176 | if (u->has_pairs_hook) { 177 | p = PyList_New(n); // Or use tuple? 178 | } 179 | else { 180 | p = PyDict_New(); 181 | } 182 | if (!p) 183 | return -1; 184 | *o = p; 185 | return 0; 186 | } 187 | 188 | static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) 189 | { 190 | if (u->has_pairs_hook) { 191 | msgpack_unpack_object item = PyTuple_Pack(2, k, v); 192 | if (!item) 193 | return -1; 194 | Py_DECREF(k); 195 | Py_DECREF(v); 196 | PyList_SET_ITEM(*c, current, item); 197 | return 0; 198 | } 199 | else if (PyDict_SetItem(*c, k, v) == 0) { 200 | Py_DECREF(k); 201 | Py_DECREF(v); 202 | return 0; 203 | } 204 | return -1; 205 | } 206 | 207 | static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c) 208 | { 209 | if (u->object_hook) { 210 | PyObject *new_c = PyObject_CallFunctionObjArgs(u->object_hook, *c, NULL); 211 | if (!new_c) 212 | return -1; 213 | 214 | Py_DECREF(*c); 215 | *c = new_c; 216 | } 217 | return 0; 218 | } 219 | 220 | static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) 221 | { 222 | if (l > u->max_str_len) { 223 | PyErr_Format(PyExc_ValueError, "%u exceeds max_str_len(%zd)", l, u->max_str_len); 224 | return -1; 225 | } 226 | 227 | PyObject *py; 228 | if(u->encoding) { 229 | py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); 230 | } else { 231 | py = PyBytes_FromStringAndSize(p, l); 232 | } 233 | if (!py) 234 | return -1; 235 | *o = py; 236 | return 0; 237 | } 238 | 239 | static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) 240 | { 241 | if (l > u->max_bin_len) { 242 | PyErr_Format(PyExc_ValueError, "%u exceeds max_bin_len(%zd)", l, u->max_bin_len); 243 | return -1; 244 | } 245 | 246 | PyObject *py = PyBytes_FromStringAndSize(p, l); 247 | if (!py) 248 | return -1; 249 | *o = py; 250 | return 0; 251 | } 252 | 253 | static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, 254 | unsigned int length, msgpack_unpack_object* o) 255 | { 256 | PyObject *py; 257 | int8_t typecode = (int8_t)*pos++; 258 | if (!u->ext_hook) { 259 | PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); 260 | return -1; 261 | } 262 | if (length-1 > u->max_ext_len) { 263 | PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len); 264 | return -1; 265 | } 266 | // length also includes the typecode, so the actual data is length-1 267 | #if PY_MAJOR_VERSION == 2 268 | py = PyObject_CallFunction(u->ext_hook, (char*)"(is#)", typecode, pos, (Py_ssize_t)length-1); 269 | #else 270 | py = PyObject_CallFunction(u->ext_hook, (char*)"(iy#)", typecode, pos, (Py_ssize_t)length-1); 271 | #endif 272 | if (!py) 273 | return -1; 274 | *o = py; 275 | return 0; 276 | } 277 | 278 | #include "unpack_template.h" 279 | -------------------------------------------------------------------------------- /pandas_msgpack/move.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define COMPILING_IN_PY2 (PY_VERSION_HEX <= 0x03000000) 4 | 5 | #if !COMPILING_IN_PY2 6 | /* alias this because it is not aliased in Python 3 */ 7 | #define PyString_CheckExact PyBytes_CheckExact 8 | #define PyString_AS_STRING PyBytes_AS_STRING 9 | #define PyString_GET_SIZE PyBytes_GET_SIZE 10 | 11 | /* in python 3, we cannot intern bytes objects so this is always false */ 12 | #define PyString_CHECK_INTERNED(cs) 0 13 | #endif /* !COMPILING_IN_PY2 */ 14 | 15 | #ifndef Py_TPFLAGS_HAVE_GETCHARBUFFER 16 | #define Py_TPFLAGS_HAVE_GETCHARBUFFER 0 17 | #endif 18 | 19 | #ifndef Py_TPFLAGS_HAVE_NEWBUFFER 20 | #define Py_TPFLAGS_HAVE_NEWBUFFER 0 21 | #endif 22 | 23 | PyObject *badmove; /* bad move exception class */ 24 | 25 | typedef struct { 26 | PyObject_HEAD 27 | /* the bytes that own the buffer we are mutating */ 28 | PyObject *invalid_bytes; 29 | } stolenbufobject; 30 | 31 | PyTypeObject stolenbuf_type; /* forward declare type */ 32 | 33 | static void 34 | stolenbuf_dealloc(stolenbufobject *self) 35 | { 36 | Py_DECREF(self->invalid_bytes); 37 | PyObject_Del(self); 38 | } 39 | 40 | static int 41 | stolenbuf_getbuffer(stolenbufobject *self, Py_buffer *view, int flags) 42 | { 43 | return PyBuffer_FillInfo(view, 44 | (PyObject*) self, 45 | (void*) PyString_AS_STRING(self->invalid_bytes), 46 | PyString_GET_SIZE(self->invalid_bytes), 47 | 0, /* not readonly */ 48 | flags); 49 | } 50 | 51 | #if COMPILING_IN_PY2 52 | 53 | static Py_ssize_t 54 | stolenbuf_getreadwritebuf(stolenbufobject *self, Py_ssize_t segment, void **out) 55 | { 56 | if (segment != 0) { 57 | PyErr_SetString(PyExc_SystemError, 58 | "accessing non-existent string segment"); 59 | return -1; 60 | } 61 | *out = PyString_AS_STRING(self->invalid_bytes); 62 | return PyString_GET_SIZE(self->invalid_bytes); 63 | } 64 | 65 | static Py_ssize_t 66 | stolenbuf_getsegcount(stolenbufobject *self, Py_ssize_t *len) 67 | { 68 | if (len) { 69 | *len = PyString_GET_SIZE(self->invalid_bytes); 70 | } 71 | return 1; 72 | } 73 | 74 | PyBufferProcs stolenbuf_as_buffer = { 75 | (readbufferproc) stolenbuf_getreadwritebuf, 76 | (writebufferproc) stolenbuf_getreadwritebuf, 77 | (segcountproc) stolenbuf_getsegcount, 78 | (charbufferproc) stolenbuf_getreadwritebuf, 79 | (getbufferproc) stolenbuf_getbuffer, 80 | }; 81 | 82 | #else /* Python 3 */ 83 | 84 | PyBufferProcs stolenbuf_as_buffer = { 85 | (getbufferproc) stolenbuf_getbuffer, 86 | NULL, 87 | }; 88 | 89 | #endif /* COMPILING_IN_PY2 */ 90 | 91 | PyDoc_STRVAR(stolenbuf_doc, 92 | "A buffer that is wrapping a stolen bytes object's buffer."); 93 | 94 | PyTypeObject stolenbuf_type = { 95 | PyVarObject_HEAD_INIT(NULL, 0) 96 | "pandas.util._move.stolenbuf", /* tp_name */ 97 | sizeof(stolenbufobject), /* tp_basicsize */ 98 | 0, /* tp_itemsize */ 99 | (destructor) stolenbuf_dealloc, /* tp_dealloc */ 100 | 0, /* tp_print */ 101 | 0, /* tp_getattr */ 102 | 0, /* tp_setattr */ 103 | 0, /* tp_reserved */ 104 | 0, /* tp_repr */ 105 | 0, /* tp_as_number */ 106 | 0, /* tp_as_sequence */ 107 | 0, /* tp_as_mapping */ 108 | 0, /* tp_hash */ 109 | 0, /* tp_call */ 110 | 0, /* tp_str */ 111 | 0, /* tp_getattro */ 112 | 0, /* tp_setattro */ 113 | &stolenbuf_as_buffer, /* tp_as_buffer */ 114 | Py_TPFLAGS_DEFAULT | 115 | Py_TPFLAGS_HAVE_NEWBUFFER | 116 | Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */ 117 | stolenbuf_doc, /* tp_doc */ 118 | }; 119 | 120 | PyDoc_STRVAR( 121 | move_into_mutable_buffer_doc, 122 | "Moves a bytes object that is about to be destroyed into a mutable buffer\n" 123 | "without copying the data.\n" 124 | "\n" 125 | "Parameters\n" 126 | "----------\n" 127 | "bytes_rvalue : bytes with 1 refcount.\n" 128 | " The bytes object that you want to move into a mutable buffer. This\n" 129 | " cannot be a named object. It must only have a single reference.\n" 130 | "\n" 131 | "Returns\n" 132 | "-------\n" 133 | "buf : stolenbuf\n" 134 | " An object that supports the buffer protocol which can give a mutable\n" 135 | " view of the data that was previously owned by ``bytes_rvalue``.\n" 136 | "\n" 137 | "Raises\n" 138 | "------\n" 139 | "BadMove\n" 140 | " Raised when a move is attempted on an object with more than one\n" 141 | " reference.\n" 142 | "\n" 143 | "Notes\n" 144 | "-----\n" 145 | "If you want to use this function you are probably wrong.\n" 146 | "\n" 147 | "Warning: Do not call this function through *unpacking. This can\n" 148 | "potentially trick the reference checks which may allow you to get a\n" 149 | "mutable reference to a shared string!\n" 150 | "\n"); 151 | 152 | /* This is implemented as a standalone function instead of the ``tp_new`` of 153 | ``stolenbuf`` because we need to create a function using the METH_O flag 154 | to support Python 3.6. In python 3.6, PyCFunction calls from python code now 155 | count the reference owned by the argument tuple. This would cause the object 156 | to have 2 references if used with a direct call like: ``stolenbuf(a)``; 157 | however, if called through *unpacking like ``stolenbuf(*(a,))`` it would 158 | only have the one reference (the tuple). */ 159 | static PyObject* 160 | move_into_mutable_buffer(PyObject *self, PyObject *bytes_rvalue) 161 | { 162 | stolenbufobject *ret; 163 | 164 | if (!PyString_CheckExact(bytes_rvalue)) { 165 | PyErr_SetString(PyExc_TypeError, 166 | "stolenbuf can only steal from bytes objects"); 167 | return NULL; 168 | } 169 | 170 | if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) { 171 | /* there is a reference other than the caller's stack or the string is 172 | interned */ 173 | PyErr_SetObject(badmove, bytes_rvalue); 174 | return NULL; 175 | } 176 | 177 | if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) { 178 | return NULL; 179 | } 180 | 181 | /* store the original bytes object in a field that is not 182 | exposed to python */ 183 | Py_INCREF(bytes_rvalue); 184 | ret->invalid_bytes = bytes_rvalue; 185 | return (PyObject*) ret; 186 | } 187 | 188 | PyMethodDef methods[] = { 189 | {"move_into_mutable_buffer", 190 | (PyCFunction) move_into_mutable_buffer, 191 | METH_O, 192 | move_into_mutable_buffer_doc}, 193 | {NULL}, 194 | }; 195 | 196 | #define MODULE_NAME "pandas_msgpack._move" 197 | 198 | #if !COMPILING_IN_PY2 199 | PyModuleDef _move_module = { 200 | PyModuleDef_HEAD_INIT, 201 | MODULE_NAME, 202 | NULL, 203 | -1, 204 | methods, 205 | }; 206 | #endif /* !COMPILING_IN_PY2 */ 207 | 208 | PyDoc_STRVAR( 209 | badmove_doc, 210 | "Exception used to indicate that a move was attempted on a value with\n" 211 | "more than a single reference.\n" 212 | "\n" 213 | "Parameters\n" 214 | "----------\n" 215 | "data : any\n" 216 | " The data which was passed to ``move_into_mutable_buffer``.\n" 217 | "\n" 218 | "See Also\n" 219 | "--------\n" 220 | "pandas.util._move.move_into_mutable_buffer\n"); 221 | 222 | PyMODINIT_FUNC 223 | #if !COMPILING_IN_PY2 224 | #define ERROR_RETURN NULL 225 | PyInit__move(void) 226 | #else 227 | #define ERROR_RETURN 228 | init_move(void) 229 | #endif /* !COMPILING_IN_PY2 */ 230 | { 231 | PyObject *m; 232 | 233 | if (!(badmove = PyErr_NewExceptionWithDoc("pandas.util._move.BadMove", 234 | badmove_doc, 235 | NULL, 236 | NULL))) { 237 | return ERROR_RETURN; 238 | } 239 | 240 | if (PyType_Ready(&stolenbuf_type)) { 241 | return ERROR_RETURN; 242 | } 243 | 244 | #if !COMPILING_IN_PY2 245 | if (!(m = PyModule_Create(&_move_module))) 246 | #else 247 | if (!(m = Py_InitModule(MODULE_NAME, methods))) 248 | #endif /* !COMPILING_IN_PY2 */ 249 | { 250 | return ERROR_RETURN; 251 | } 252 | 253 | if (PyModule_AddObject(m, 254 | "stolenbuf", 255 | (PyObject*) &stolenbuf_type)) { 256 | Py_DECREF(m); 257 | return ERROR_RETURN; 258 | } 259 | 260 | if (PyModule_AddObject(m, "BadMove", badmove)) { 261 | Py_DECREF(m); 262 | return ERROR_RETURN; 263 | } 264 | 265 | #if !COMPILING_IN_PY2 266 | return m; 267 | #endif /* !COMPILING_IN_PY2 */ 268 | } 269 | -------------------------------------------------------------------------------- /pandas_msgpack/msgpack/_packer.pyx: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | #cython: embedsignature=True 3 | 4 | from cpython cimport * 5 | from libc.stdlib cimport * 6 | from libc.string cimport * 7 | from libc.limits cimport * 8 | 9 | from .exceptions import PackValueError 10 | from . import ExtType 11 | 12 | 13 | cdef extern from "../includes/pack.h": 14 | struct msgpack_packer: 15 | char* buf 16 | size_t length 17 | size_t buf_size 18 | bint use_bin_type 19 | 20 | int msgpack_pack_int(msgpack_packer* pk, int d) 21 | int msgpack_pack_nil(msgpack_packer* pk) 22 | int msgpack_pack_true(msgpack_packer* pk) 23 | int msgpack_pack_false(msgpack_packer* pk) 24 | int msgpack_pack_long(msgpack_packer* pk, long d) 25 | int msgpack_pack_long_long(msgpack_packer* pk, long long d) 26 | int msgpack_pack_unsigned_long_long(msgpack_packer* pk, 27 | unsigned long long d) 28 | int msgpack_pack_float(msgpack_packer* pk, float d) 29 | int msgpack_pack_double(msgpack_packer* pk, double d) 30 | int msgpack_pack_array(msgpack_packer* pk, size_t l) 31 | int msgpack_pack_map(msgpack_packer* pk, size_t l) 32 | int msgpack_pack_raw(msgpack_packer* pk, size_t l) 33 | int msgpack_pack_bin(msgpack_packer* pk, size_t l) 34 | int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) 35 | int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) 36 | 37 | cdef int DEFAULT_RECURSE_LIMIT=511 38 | 39 | 40 | cdef class Packer(object): 41 | """ 42 | MessagePack Packer 43 | 44 | usage:: 45 | 46 | packer = Packer() 47 | astream.write(packer.pack(a)) 48 | astream.write(packer.pack(b)) 49 | 50 | Packer's constructor has some keyword arguments: 51 | 52 | :param callable default: 53 | Convert user type to builtin type that Packer supports. 54 | See also simplejson's document. 55 | :param str encoding: 56 | Convert unicode to bytes with this encoding. (default: 'utf-8') 57 | :param str unicode_errors: 58 | Error handler for encoding unicode. (default: 'strict') 59 | :param bool use_single_float: 60 | Use single precision float type for float. (default: False) 61 | :param bool autoreset: 62 | Reset buffer after each pack and return it's 63 | content as `bytes`. (default: True). 64 | If set this to false, use `bytes()` to get 65 | content and `.reset()` to clear buffer. 66 | :param bool use_bin_type: 67 | Use bin type introduced in msgpack spec 2.0 for bytes. 68 | It also enable str8 type for unicode. 69 | """ 70 | cdef msgpack_packer pk 71 | cdef object _default 72 | cdef object _bencoding 73 | cdef object _berrors 74 | cdef char *encoding 75 | cdef char *unicode_errors 76 | cdef bool use_float 77 | cdef bint autoreset 78 | 79 | def __cinit__(self): 80 | cdef int buf_size = 1024 * 1024 81 | self.pk.buf = malloc(buf_size) 82 | if self.pk.buf == NULL: 83 | raise MemoryError("Unable to allocate internal buffer.") 84 | self.pk.buf_size = buf_size 85 | self.pk.length = 0 86 | 87 | def __init__(self, default=None, encoding='utf-8', 88 | unicode_errors='strict', use_single_float=False, 89 | bint autoreset=1, bint use_bin_type=0): 90 | """ 91 | """ 92 | self.use_float = use_single_float 93 | self.autoreset = autoreset 94 | self.pk.use_bin_type = use_bin_type 95 | if default is not None: 96 | if not PyCallable_Check(default): 97 | raise TypeError("default must be a callable.") 98 | self._default = default 99 | if encoding is None: 100 | self.encoding = NULL 101 | self.unicode_errors = NULL 102 | else: 103 | if isinstance(encoding, unicode): 104 | self._bencoding = encoding.encode('ascii') 105 | else: 106 | self._bencoding = encoding 107 | self.encoding = PyBytes_AsString(self._bencoding) 108 | if isinstance(unicode_errors, unicode): 109 | self._berrors = unicode_errors.encode('ascii') 110 | else: 111 | self._berrors = unicode_errors 112 | self.unicode_errors = PyBytes_AsString(self._berrors) 113 | 114 | def __dealloc__(self): 115 | free(self.pk.buf); 116 | 117 | cdef int _pack(self, object o, 118 | int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: 119 | cdef long long llval 120 | cdef unsigned long long ullval 121 | cdef long longval 122 | cdef float fval 123 | cdef double dval 124 | cdef char* rawval 125 | cdef int ret 126 | cdef dict d 127 | cdef size_t L 128 | cdef int default_used = 0 129 | 130 | if nest_limit < 0: 131 | raise PackValueError("recursion limit exceeded.") 132 | 133 | while True: 134 | if o is None: 135 | ret = msgpack_pack_nil(&self.pk) 136 | elif isinstance(o, bool): 137 | if o: 138 | ret = msgpack_pack_true(&self.pk) 139 | else: 140 | ret = msgpack_pack_false(&self.pk) 141 | elif PyLong_Check(o): 142 | # PyInt_Check(long) is True for Python 3. 143 | # Sow we should test long before int. 144 | if o > 0: 145 | ullval = o 146 | ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) 147 | else: 148 | llval = o 149 | ret = msgpack_pack_long_long(&self.pk, llval) 150 | elif PyInt_Check(o): 151 | longval = o 152 | ret = msgpack_pack_long(&self.pk, longval) 153 | elif PyFloat_Check(o): 154 | if self.use_float: 155 | fval = o 156 | ret = msgpack_pack_float(&self.pk, fval) 157 | else: 158 | dval = o 159 | ret = msgpack_pack_double(&self.pk, dval) 160 | elif PyBytes_Check(o): 161 | L = len(o) 162 | if L > (2**32) - 1: 163 | raise ValueError("bytes is too large") 164 | rawval = o 165 | ret = msgpack_pack_bin(&self.pk, L) 166 | if ret == 0: 167 | ret = msgpack_pack_raw_body(&self.pk, rawval, L) 168 | elif PyUnicode_Check(o): 169 | if not self.encoding: 170 | raise TypeError("Can't encode unicode string: " 171 | "no encoding is specified") 172 | o = PyUnicode_AsEncodedString(o, self.encoding, 173 | self.unicode_errors) 174 | L = len(o) 175 | if L > (2**32) - 1: 176 | raise ValueError("dict is too large") 177 | rawval = o 178 | ret = msgpack_pack_raw(&self.pk, len(o)) 179 | if ret == 0: 180 | ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) 181 | elif PyDict_CheckExact(o): 182 | d = o 183 | L = len(d) 184 | if L > (2**32) - 1: 185 | raise ValueError("dict is too large") 186 | ret = msgpack_pack_map(&self.pk, L) 187 | if ret == 0: 188 | for k, v in d.iteritems(): 189 | ret = self._pack(k, nest_limit - 1) 190 | if ret != 0: break 191 | ret = self._pack(v, nest_limit - 1) 192 | if ret != 0: break 193 | elif PyDict_Check(o): 194 | L = len(o) 195 | if L > (2**32) - 1: 196 | raise ValueError("dict is too large") 197 | ret = msgpack_pack_map(&self.pk, L) 198 | if ret == 0: 199 | for k, v in o.items(): 200 | ret = self._pack(k, nest_limit - 1) 201 | if ret != 0: break 202 | ret = self._pack(v, nest_limit - 1) 203 | if ret != 0: break 204 | elif isinstance(o, ExtType): 205 | # This should be before Tuple because ExtType is namedtuple. 206 | longval = o.code 207 | rawval = o.data 208 | L = len(o.data) 209 | if L > (2**32) - 1: 210 | raise ValueError("EXT data is too large") 211 | ret = msgpack_pack_ext(&self.pk, longval, L) 212 | ret = msgpack_pack_raw_body(&self.pk, rawval, L) 213 | elif PyTuple_Check(o) or PyList_Check(o): 214 | L = len(o) 215 | if L > (2**32) - 1: 216 | raise ValueError("list is too large") 217 | ret = msgpack_pack_array(&self.pk, L) 218 | if ret == 0: 219 | for v in o: 220 | ret = self._pack(v, nest_limit - 1) 221 | if ret != 0: break 222 | elif not default_used and self._default: 223 | o = self._default(o) 224 | default_used = 1 225 | continue 226 | else: 227 | raise TypeError("can't serialize %r" % (o,)) 228 | return ret 229 | 230 | cpdef pack(self, object obj): 231 | cdef int ret 232 | ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) 233 | if ret == -1: 234 | raise MemoryError 235 | elif ret: # should not happen. 236 | raise TypeError 237 | if self.autoreset: 238 | buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) 239 | self.pk.length = 0 240 | return buf 241 | 242 | def pack_ext_type(self, typecode, data): 243 | msgpack_pack_ext(&self.pk, typecode, len(data)) 244 | msgpack_pack_raw_body(&self.pk, data, len(data)) 245 | 246 | def pack_array_header(self, size_t size): 247 | if size > (2**32) - 1: 248 | raise ValueError 249 | cdef int ret = msgpack_pack_array(&self.pk, size) 250 | if ret == -1: 251 | raise MemoryError 252 | elif ret: # should not happen 253 | raise TypeError 254 | if self.autoreset: 255 | buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) 256 | self.pk.length = 0 257 | return buf 258 | 259 | def pack_map_header(self, size_t size): 260 | if size > (2**32) - 1: 261 | raise ValueError 262 | cdef int ret = msgpack_pack_map(&self.pk, size) 263 | if ret == -1: 264 | raise MemoryError 265 | elif ret: # should not happen 266 | raise TypeError 267 | if self.autoreset: 268 | buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) 269 | self.pk.length = 0 270 | return buf 271 | 272 | def pack_map_pairs(self, object pairs): 273 | """ 274 | Pack *pairs* as msgpack map type. 275 | 276 | *pairs* should sequence of pair. 277 | (`len(pairs)` and `for k, v in pairs:` should be supported.) 278 | """ 279 | cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) 280 | if ret == 0: 281 | for k, v in pairs: 282 | ret = self._pack(k) 283 | if ret != 0: break 284 | ret = self._pack(v) 285 | if ret != 0: break 286 | if ret == -1: 287 | raise MemoryError 288 | elif ret: # should not happen 289 | raise TypeError 290 | if self.autoreset: 291 | buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) 292 | self.pk.length = 0 293 | return buf 294 | 295 | def reset(self): 296 | """Clear internal buffer.""" 297 | self.pk.length = 0 298 | 299 | def bytes(self): 300 | """Return buffer content.""" 301 | return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) 302 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # pandas-msgpack documentation build configuration file, created by 4 | # sphinx-quickstart on Wed Feb 8 10:52:12 2017. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # 19 | import os 20 | import sys 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | # 27 | # needs_sphinx = '1.0' 28 | 29 | # Add any Sphinx extension module names here, as strings. They can be 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 31 | # ones. 32 | extensions = ['sphinx.ext.autodoc', 33 | 'sphinx.ext.autosummary', 34 | 'sphinx.ext.doctest', 35 | 'sphinx.ext.extlinks', 36 | 'sphinx.ext.todo', 37 | 'numpydoc', # used to parse numpy-style docstrings for autodoc 38 | 'IPython.sphinxext.ipython_console_highlighting', 39 | 'IPython.sphinxext.ipython_directive', 40 | 'sphinx.ext.intersphinx', 41 | 'sphinx.ext.coverage', 42 | 'sphinx.ext.ifconfig', 43 | ] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | 48 | # The suffix(es) of source filenames. 49 | # You can specify multiple suffix as a list of string: 50 | # 51 | # source_suffix = ['.rst', '.md'] 52 | source_suffix = '.rst' 53 | 54 | # The encoding of source files. 55 | # 56 | # source_encoding = 'utf-8-sig' 57 | 58 | # The master toctree document. 59 | master_doc = 'index' 60 | 61 | # General information about the project. 62 | project = u'pandas-msgpack' 63 | copyright = u'2017, PyData Development Team' 64 | author = u'PyData Development Team' 65 | 66 | # The version info for the project you're documenting, acts as replacement for 67 | # |version| and |release|, also used in various other places throughout the 68 | # built documents. 69 | # 70 | # The short X.Y version. 71 | version = u'0.1.0' 72 | # The full version, including alpha/beta/rc tags. 73 | release = u'0.1.0' 74 | 75 | # The language for content autogenerated by Sphinx. Refer to documentation 76 | # for a list of supported languages. 77 | # 78 | # This is also used if you do content translation via gettext catalogs. 79 | # Usually you set "language" from the command line for these cases. 80 | language = None 81 | 82 | # There are two options for replacing |today|: either, you set today to some 83 | # non-false value, then it is used: 84 | # 85 | # today = '' 86 | # 87 | # Else, today_fmt is used as the format for a strftime call. 88 | # 89 | # today_fmt = '%B %d, %Y' 90 | 91 | # List of patterns, relative to source directory, that match files and 92 | # directories to ignore when looking for source files. 93 | # This patterns also effect to html_static_path and html_extra_path 94 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 95 | 96 | # The reST default role (used for this markup: `text`) to use for all 97 | # documents. 98 | # 99 | # default_role = None 100 | 101 | # If true, '()' will be appended to :func: etc. cross-reference text. 102 | # 103 | # add_function_parentheses = True 104 | 105 | # If true, the current module name will be prepended to all description 106 | # unit titles (such as .. function::). 107 | # 108 | # add_module_names = True 109 | 110 | # If true, sectionauthor and moduleauthor directives will be shown in the 111 | # output. They are ignored by default. 112 | # 113 | # show_authors = False 114 | 115 | # The name of the Pygments (syntax highlighting) style to use. 116 | pygments_style = 'sphinx' 117 | 118 | # A list of ignored prefixes for module index sorting. 119 | # modindex_common_prefix = [] 120 | 121 | # If true, keep warnings as "system message" paragraphs in the built documents. 122 | # keep_warnings = False 123 | 124 | # If true, `todo` and `todoList` produce output, else they produce nothing. 125 | todo_include_todos = False 126 | 127 | 128 | # -- Options for HTML output ---------------------------------------------- 129 | 130 | # Taken from docs.readthedocs.io: 131 | # on_rtd is whether we are on readthedocs.io 132 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 133 | 134 | if not on_rtd: # only import and set the theme if we're building docs locally 135 | import sphinx_rtd_theme 136 | html_theme = 'sphinx_rtd_theme' 137 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 138 | 139 | # The theme to use for HTML and HTML Help pages. See the documentation for 140 | # a list of builtin themes. 141 | # 142 | # html_theme = 'alabaster' 143 | 144 | # Theme options are theme-specific and customize the look and feel of a theme 145 | # further. For a list of options available for each theme, see the 146 | # documentation. 147 | # 148 | # html_theme_options = {} 149 | 150 | # Add any paths that contain custom themes here, relative to this directory. 151 | # html_theme_path = [] 152 | 153 | # The name for this set of Sphinx documents. 154 | # " v documentation" by default. 155 | # 156 | # html_title = u'pandas-msgpack v0.1.0' 157 | 158 | # A shorter title for the navigation bar. Default is the same as html_title. 159 | # 160 | # html_short_title = None 161 | 162 | # The name of an image file (relative to this directory) to place at the top 163 | # of the sidebar. 164 | # 165 | # html_logo = None 166 | 167 | # The name of an image file (relative to this directory) to use as a favicon of 168 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 169 | # pixels large. 170 | # 171 | # html_favicon = None 172 | 173 | # Add any paths that contain custom static files (such as style sheets) here, 174 | # relative to this directory. They are copied after the builtin static files, 175 | # so a file named "default.css" will overwrite the builtin "default.css". 176 | html_static_path = ['_static'] 177 | 178 | # Add any extra paths that contain custom files (such as robots.txt or 179 | # .htaccess) here, relative to this directory. These files are copied 180 | # directly to the root of the documentation. 181 | # 182 | # html_extra_path = [] 183 | 184 | # If not None, a 'Last updated on:' timestamp is inserted at every page 185 | # bottom, using the given strftime format. 186 | # The empty string is equivalent to '%b %d, %Y'. 187 | # 188 | # html_last_updated_fmt = None 189 | 190 | # If true, SmartyPants will be used to convert quotes and dashes to 191 | # typographically correct entities. 192 | # 193 | # html_use_smartypants = True 194 | 195 | # Custom sidebar templates, maps document names to template names. 196 | # 197 | # html_sidebars = {} 198 | 199 | # Additional templates that should be rendered to pages, maps page names to 200 | # template names. 201 | # 202 | # html_additional_pages = {} 203 | 204 | # If false, no module index is generated. 205 | # 206 | # html_domain_indices = True 207 | 208 | # If false, no index is generated. 209 | # 210 | # html_use_index = True 211 | 212 | # If true, the index is split into individual pages for each letter. 213 | # 214 | # html_split_index = False 215 | 216 | # If true, links to the reST sources are added to the pages. 217 | # 218 | # html_show_sourcelink = True 219 | 220 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 221 | # 222 | # html_show_sphinx = True 223 | 224 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 225 | # 226 | # html_show_copyright = True 227 | 228 | # If true, an OpenSearch description file will be output, and all pages will 229 | # contain a tag referring to it. The value of this option must be the 230 | # base URL from which the finished HTML is served. 231 | # 232 | # html_use_opensearch = '' 233 | 234 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 235 | # html_file_suffix = None 236 | 237 | # Language to be used for generating the HTML full-text search index. 238 | # Sphinx supports the following languages: 239 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 240 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' 241 | # 242 | # html_search_language = 'en' 243 | 244 | # A dictionary with options for the search language support, empty by default. 245 | # 'ja' uses this config value. 246 | # 'zh' user can custom change `jieba` dictionary path. 247 | # 248 | # html_search_options = {'type': 'default'} 249 | 250 | # The name of a javascript file (relative to the configuration directory) that 251 | # implements a search results scorer. If empty, the default will be used. 252 | # 253 | # html_search_scorer = 'scorer.js' 254 | 255 | # Output file base name for HTML help builder. 256 | htmlhelp_basename = 'pandas-msgpackdoc' 257 | 258 | # -- Options for LaTeX output --------------------------------------------- 259 | 260 | latex_elements = { 261 | # The paper size ('letterpaper' or 'a4paper'). 262 | # 263 | # 'papersize': 'letterpaper', 264 | 265 | # The font size ('10pt', '11pt' or '12pt'). 266 | # 267 | # 'pointsize': '10pt', 268 | 269 | # Additional stuff for the LaTeX preamble. 270 | # 271 | # 'preamble': '', 272 | 273 | # Latex figure (float) alignment 274 | # 275 | # 'figure_align': 'htbp', 276 | } 277 | 278 | # Grouping the document tree into LaTeX files. List of tuples 279 | # (source start file, target name, title, 280 | # author, documentclass [howto, manual, or own class]). 281 | latex_documents = [ 282 | (master_doc, 'pandas-msgpack.tex', u'pandas-msgpack Documentation', 283 | u'PyData Development Team', 'manual'), 284 | ] 285 | 286 | # The name of an image file (relative to this directory) to place at the top of 287 | # the title page. 288 | # 289 | # latex_logo = None 290 | 291 | # For "manual" documents, if this is true, then toplevel headings are parts, 292 | # not chapters. 293 | # 294 | # latex_use_parts = False 295 | 296 | # If true, show page references after internal links. 297 | # 298 | # latex_show_pagerefs = False 299 | 300 | # If true, show URL addresses after external links. 301 | # 302 | # latex_show_urls = False 303 | 304 | # Documents to append as an appendix to all manuals. 305 | # 306 | # latex_appendices = [] 307 | 308 | # It false, will not define \strong, \code, itleref, \crossref ... but only 309 | # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added 310 | # packages. 311 | # 312 | # latex_keep_old_macro_names = True 313 | 314 | # If false, no module index is generated. 315 | # 316 | # latex_domain_indices = True 317 | 318 | 319 | # -- Options for manual page output --------------------------------------- 320 | 321 | # One entry per manual page. List of tuples 322 | # (source start file, name, description, authors, manual section). 323 | man_pages = [ 324 | (master_doc, 'pandas-msgpack', u'pandas-msgpack Documentation', 325 | [author], 1) 326 | ] 327 | 328 | # If true, show URL addresses after external links. 329 | # 330 | # man_show_urls = False 331 | 332 | 333 | # -- Options for Texinfo output ------------------------------------------- 334 | 335 | # Grouping the document tree into Texinfo files. List of tuples 336 | # (source start file, target name, title, author, 337 | # dir menu entry, description, category) 338 | texinfo_documents = [ 339 | (master_doc, 'pandas-msgpack', u'pandas-msgpack Documentation', 340 | author, 'pandas-msgpack', 'One line description of project.', 341 | 'Miscellaneous'), 342 | ] 343 | 344 | # Documents to append as an appendix to all manuals. 345 | # 346 | # texinfo_appendices = [] 347 | 348 | # If false, no module index is generated. 349 | # 350 | # texinfo_domain_indices = True 351 | 352 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 353 | # 354 | # texinfo_show_urls = 'footnote' 355 | 356 | # If true, do not generate a @detailmenu in the "Top" node's menu. 357 | # 358 | # texinfo_no_detailmenu = False 359 | 360 | 361 | # Example configuration for intersphinx: refer to the Python standard library. 362 | intersphinx_mapping = {'https://docs.python.org/': None} 363 | 364 | extlinks = {'issue': ('https://github.com/pydata/pandas-msgpack/issues/%s', 365 | 'GH#'), 366 | 'pr': ('https://github.com/pydata/pandas-msgpack/pull/%s', 'GH#')} 367 | -------------------------------------------------------------------------------- /pandas_msgpack/includes/unpack_template.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MessagePack unpacking routine template 3 | * 4 | * Copyright (C) 2008-2010 FURUHASHI Sadayuki 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #ifndef USE_CASE_RANGE 20 | #ifdef __GNUC__ 21 | #define USE_CASE_RANGE 22 | #endif 23 | #endif 24 | 25 | typedef struct unpack_stack { 26 | PyObject* obj; 27 | size_t size; 28 | size_t count; 29 | unsigned int ct; 30 | PyObject* map_key; 31 | } unpack_stack; 32 | 33 | struct unpack_context { 34 | unpack_user user; 35 | unsigned int cs; 36 | unsigned int trail; 37 | unsigned int top; 38 | /* 39 | unpack_stack* stack; 40 | unsigned int stack_size; 41 | unpack_stack embed_stack[MSGPACK_EMBED_STACK_SIZE]; 42 | */ 43 | unpack_stack stack[MSGPACK_EMBED_STACK_SIZE]; 44 | }; 45 | 46 | 47 | static inline void unpack_init(unpack_context* ctx) 48 | { 49 | ctx->cs = CS_HEADER; 50 | ctx->trail = 0; 51 | ctx->top = 0; 52 | /* 53 | ctx->stack = ctx->embed_stack; 54 | ctx->stack_size = MSGPACK_EMBED_STACK_SIZE; 55 | */ 56 | ctx->stack[0].obj = unpack_callback_root(&ctx->user); 57 | } 58 | 59 | /* 60 | static inline void unpack_destroy(unpack_context* ctx) 61 | { 62 | if(ctx->stack_size != MSGPACK_EMBED_STACK_SIZE) { 63 | free(ctx->stack); 64 | } 65 | } 66 | */ 67 | 68 | static inline PyObject* unpack_data(unpack_context* ctx) 69 | { 70 | return (ctx)->stack[0].obj; 71 | } 72 | 73 | 74 | template 75 | static inline int unpack_execute(unpack_context* ctx, const char* data, size_t len, size_t* off) 76 | { 77 | assert(len >= *off); 78 | 79 | const unsigned char* p = (unsigned char*)data + *off; 80 | const unsigned char* const pe = (unsigned char*)data + len; 81 | const void* n = NULL; 82 | 83 | unsigned int trail = ctx->trail; 84 | unsigned int cs = ctx->cs; 85 | unsigned int top = ctx->top; 86 | unpack_stack* stack = ctx->stack; 87 | /* 88 | unsigned int stack_size = ctx->stack_size; 89 | */ 90 | unpack_user* user = &ctx->user; 91 | 92 | PyObject* obj = NULL; 93 | unpack_stack* c = NULL; 94 | 95 | int ret; 96 | 97 | #define construct_cb(name) \ 98 | construct && unpack_callback ## name 99 | 100 | #define push_simple_value(func) \ 101 | if(construct_cb(func)(user, &obj) < 0) { goto _failed; } \ 102 | goto _push 103 | #define push_fixed_value(func, arg) \ 104 | if(construct_cb(func)(user, arg, &obj) < 0) { goto _failed; } \ 105 | goto _push 106 | #define push_variable_value(func, base, pos, len) \ 107 | if(construct_cb(func)(user, \ 108 | (const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \ 109 | goto _push 110 | 111 | #define again_fixed_trail(_cs, trail_len) \ 112 | trail = trail_len; \ 113 | cs = _cs; \ 114 | goto _fixed_trail_again 115 | #define again_fixed_trail_if_zero(_cs, trail_len, ifzero) \ 116 | trail = trail_len; \ 117 | if(trail == 0) { goto ifzero; } \ 118 | cs = _cs; \ 119 | goto _fixed_trail_again 120 | 121 | #define start_container(func, count_, ct_) \ 122 | if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ 123 | if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ 124 | if((count_) == 0) { obj = stack[top].obj; \ 125 | if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \ 126 | goto _push; } \ 127 | stack[top].ct = ct_; \ 128 | stack[top].size = count_; \ 129 | stack[top].count = 0; \ 130 | ++top; \ 131 | /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \ 132 | /*printf("stack push %d\n", top);*/ \ 133 | /* FIXME \ 134 | if(top >= stack_size) { \ 135 | if(stack_size == MSGPACK_EMBED_STACK_SIZE) { \ 136 | size_t csize = sizeof(unpack_stack) * MSGPACK_EMBED_STACK_SIZE; \ 137 | size_t nsize = csize * 2; \ 138 | unpack_stack* tmp = (unpack_stack*)malloc(nsize); \ 139 | if(tmp == NULL) { goto _failed; } \ 140 | memcpy(tmp, ctx->stack, csize); \ 141 | ctx->stack = stack = tmp; \ 142 | ctx->stack_size = stack_size = MSGPACK_EMBED_STACK_SIZE * 2; \ 143 | } else { \ 144 | size_t nsize = sizeof(unpack_stack) * ctx->stack_size * 2; \ 145 | unpack_stack* tmp = (unpack_stack*)realloc(ctx->stack, nsize); \ 146 | if(tmp == NULL) { goto _failed; } \ 147 | ctx->stack = stack = tmp; \ 148 | ctx->stack_size = stack_size = stack_size * 2; \ 149 | } \ 150 | } \ 151 | */ \ 152 | goto _header_again 153 | 154 | #define NEXT_CS(p) ((unsigned int)*p & 0x1f) 155 | 156 | #ifdef USE_CASE_RANGE 157 | #define SWITCH_RANGE_BEGIN switch(*p) { 158 | #define SWITCH_RANGE(FROM, TO) case FROM ... TO: 159 | #define SWITCH_RANGE_DEFAULT default: 160 | #define SWITCH_RANGE_END } 161 | #else 162 | #define SWITCH_RANGE_BEGIN { if(0) { 163 | #define SWITCH_RANGE(FROM, TO) } else if(FROM <= *p && *p <= TO) { 164 | #define SWITCH_RANGE_DEFAULT } else { 165 | #define SWITCH_RANGE_END } } 166 | #endif 167 | 168 | if(p == pe) { goto _out; } 169 | do { 170 | switch(cs) { 171 | case CS_HEADER: 172 | SWITCH_RANGE_BEGIN 173 | SWITCH_RANGE(0x00, 0x7f) // Positive Fixnum 174 | push_fixed_value(_uint8, *(uint8_t*)p); 175 | SWITCH_RANGE(0xe0, 0xff) // Negative Fixnum 176 | push_fixed_value(_int8, *(int8_t*)p); 177 | SWITCH_RANGE(0xc0, 0xdf) // Variable 178 | switch(*p) { 179 | case 0xc0: // nil 180 | push_simple_value(_nil); 181 | //case 0xc1: // never used 182 | case 0xc2: // false 183 | push_simple_value(_false); 184 | case 0xc3: // true 185 | push_simple_value(_true); 186 | case 0xc4: // bin 8 187 | again_fixed_trail(NEXT_CS(p), 1); 188 | case 0xc5: // bin 16 189 | again_fixed_trail(NEXT_CS(p), 2); 190 | case 0xc6: // bin 32 191 | again_fixed_trail(NEXT_CS(p), 4); 192 | case 0xc7: // ext 8 193 | again_fixed_trail(NEXT_CS(p), 1); 194 | case 0xc8: // ext 16 195 | again_fixed_trail(NEXT_CS(p), 2); 196 | case 0xc9: // ext 32 197 | again_fixed_trail(NEXT_CS(p), 4); 198 | case 0xca: // float 199 | case 0xcb: // double 200 | case 0xcc: // unsigned int 8 201 | case 0xcd: // unsigned int 16 202 | case 0xce: // unsigned int 32 203 | case 0xcf: // unsigned int 64 204 | case 0xd0: // signed int 8 205 | case 0xd1: // signed int 16 206 | case 0xd2: // signed int 32 207 | case 0xd3: // signed int 64 208 | again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); 209 | case 0xd4: // fixext 1 210 | case 0xd5: // fixext 2 211 | case 0xd6: // fixext 4 212 | case 0xd7: // fixext 8 213 | again_fixed_trail_if_zero(ACS_EXT_VALUE, 214 | (1 << (((unsigned int)*p) & 0x03))+1, 215 | _ext_zero); 216 | case 0xd8: // fixext 16 217 | again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero); 218 | case 0xd9: // str 8 219 | again_fixed_trail(NEXT_CS(p), 1); 220 | case 0xda: // raw 16 221 | case 0xdb: // raw 32 222 | case 0xdc: // array 16 223 | case 0xdd: // array 32 224 | case 0xde: // map 16 225 | case 0xdf: // map 32 226 | again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); 227 | default: 228 | goto _failed; 229 | } 230 | SWITCH_RANGE(0xa0, 0xbf) // FixRaw 231 | again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); 232 | SWITCH_RANGE(0x90, 0x9f) // FixArray 233 | start_container(_array, ((unsigned int)*p) & 0x0f, CT_ARRAY_ITEM); 234 | SWITCH_RANGE(0x80, 0x8f) // FixMap 235 | start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); 236 | 237 | SWITCH_RANGE_DEFAULT 238 | goto _failed; 239 | SWITCH_RANGE_END 240 | // end CS_HEADER 241 | 242 | 243 | _fixed_trail_again: 244 | ++p; 245 | 246 | default: 247 | if((size_t)(pe - p) < trail) { goto _out; } 248 | n = p; p += trail - 1; 249 | switch(cs) { 250 | case CS_EXT_8: 251 | again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero); 252 | case CS_EXT_16: 253 | again_fixed_trail_if_zero(ACS_EXT_VALUE, 254 | _msgpack_load16(uint16_t,n)+1, 255 | _ext_zero); 256 | case CS_EXT_32: 257 | again_fixed_trail_if_zero(ACS_EXT_VALUE, 258 | _msgpack_load32(uint32_t,n)+1, 259 | _ext_zero); 260 | case CS_FLOAT: { 261 | union { uint32_t i; float f; } mem; 262 | mem.i = _msgpack_load32(uint32_t,n); 263 | push_fixed_value(_float, mem.f); } 264 | case CS_DOUBLE: { 265 | union { uint64_t i; double f; } mem; 266 | mem.i = _msgpack_load64(uint64_t,n); 267 | #if defined(__arm__) && !(__ARM_EABI__) // arm-oabi 268 | // https://github.com/msgpack/msgpack-perl/pull/1 269 | mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); 270 | #endif 271 | push_fixed_value(_double, mem.f); } 272 | case CS_UINT_8: 273 | push_fixed_value(_uint8, *(uint8_t*)n); 274 | case CS_UINT_16: 275 | push_fixed_value(_uint16, _msgpack_load16(uint16_t,n)); 276 | case CS_UINT_32: 277 | push_fixed_value(_uint32, _msgpack_load32(uint32_t,n)); 278 | case CS_UINT_64: 279 | push_fixed_value(_uint64, _msgpack_load64(uint64_t,n)); 280 | 281 | case CS_INT_8: 282 | push_fixed_value(_int8, *(int8_t*)n); 283 | case CS_INT_16: 284 | push_fixed_value(_int16, _msgpack_load16(int16_t,n)); 285 | case CS_INT_32: 286 | push_fixed_value(_int32, _msgpack_load32(int32_t,n)); 287 | case CS_INT_64: 288 | push_fixed_value(_int64, _msgpack_load64(int64_t,n)); 289 | 290 | case CS_BIN_8: 291 | again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero); 292 | case CS_BIN_16: 293 | again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load16(uint16_t,n), _bin_zero); 294 | case CS_BIN_32: 295 | again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load32(uint32_t,n), _bin_zero); 296 | case ACS_BIN_VALUE: 297 | _bin_zero: 298 | push_variable_value(_bin, data, n, trail); 299 | 300 | case CS_RAW_8: 301 | again_fixed_trail_if_zero(ACS_RAW_VALUE, *(uint8_t*)n, _raw_zero); 302 | case CS_RAW_16: 303 | again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load16(uint16_t,n), _raw_zero); 304 | case CS_RAW_32: 305 | again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load32(uint32_t,n), _raw_zero); 306 | case ACS_RAW_VALUE: 307 | _raw_zero: 308 | push_variable_value(_raw, data, n, trail); 309 | 310 | case ACS_EXT_VALUE: 311 | _ext_zero: 312 | push_variable_value(_ext, data, n, trail); 313 | 314 | case CS_ARRAY_16: 315 | start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); 316 | case CS_ARRAY_32: 317 | /* FIXME security guard */ 318 | start_container(_array, _msgpack_load32(uint32_t,n), CT_ARRAY_ITEM); 319 | 320 | case CS_MAP_16: 321 | start_container(_map, _msgpack_load16(uint16_t,n), CT_MAP_KEY); 322 | case CS_MAP_32: 323 | /* FIXME security guard */ 324 | start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); 325 | 326 | default: 327 | goto _failed; 328 | } 329 | } 330 | 331 | _push: 332 | if(top == 0) { goto _finish; } 333 | c = &stack[top-1]; 334 | switch(c->ct) { 335 | case CT_ARRAY_ITEM: 336 | if(construct_cb(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; } 337 | if(++c->count == c->size) { 338 | obj = c->obj; 339 | if (construct_cb(_array_end)(user, &obj) < 0) { goto _failed; } 340 | --top; 341 | /*printf("stack pop %d\n", top);*/ 342 | goto _push; 343 | } 344 | goto _header_again; 345 | case CT_MAP_KEY: 346 | c->map_key = obj; 347 | c->ct = CT_MAP_VALUE; 348 | goto _header_again; 349 | case CT_MAP_VALUE: 350 | if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; } 351 | if(++c->count == c->size) { 352 | obj = c->obj; 353 | if (construct_cb(_map_end)(user, &obj) < 0) { goto _failed; } 354 | --top; 355 | /*printf("stack pop %d\n", top);*/ 356 | goto _push; 357 | } 358 | c->ct = CT_MAP_KEY; 359 | goto _header_again; 360 | 361 | default: 362 | goto _failed; 363 | } 364 | 365 | _header_again: 366 | cs = CS_HEADER; 367 | ++p; 368 | } while(p != pe); 369 | goto _out; 370 | 371 | 372 | _finish: 373 | if (!construct) 374 | unpack_callback_nil(user, &obj); 375 | stack[0].obj = obj; 376 | ++p; 377 | ret = 1; 378 | /*printf("-- finish --\n"); */ 379 | goto _end; 380 | 381 | _failed: 382 | /*printf("** FAILED **\n"); */ 383 | ret = -1; 384 | goto _end; 385 | 386 | _out: 387 | ret = 0; 388 | goto _end; 389 | 390 | _end: 391 | ctx->cs = cs; 392 | ctx->trail = trail; 393 | ctx->top = top; 394 | *off = p - (const unsigned char*)data; 395 | 396 | return ret; 397 | #undef construct_cb 398 | } 399 | 400 | #undef SWITCH_RANGE_BEGIN 401 | #undef SWITCH_RANGE 402 | #undef SWITCH_RANGE_DEFAULT 403 | #undef SWITCH_RANGE_END 404 | #undef push_simple_value 405 | #undef push_fixed_value 406 | #undef push_variable_value 407 | #undef again_fixed_trail 408 | #undef again_fixed_trail_if_zero 409 | #undef start_container 410 | 411 | template 412 | static inline int unpack_container_header(unpack_context* ctx, const char* data, size_t len, size_t* off) 413 | { 414 | assert(len >= *off); 415 | uint32_t size; 416 | const unsigned char *const p = (unsigned char*)data + *off; 417 | 418 | #define inc_offset(inc) \ 419 | if (len - *off < inc) \ 420 | return 0; \ 421 | *off += inc; 422 | 423 | switch (*p) { 424 | case var_offset: 425 | inc_offset(3); 426 | size = _msgpack_load16(uint16_t, p + 1); 427 | break; 428 | case var_offset + 1: 429 | inc_offset(5); 430 | size = _msgpack_load32(uint32_t, p + 1); 431 | break; 432 | #ifdef USE_CASE_RANGE 433 | case fixed_offset + 0x0 ... fixed_offset + 0xf: 434 | #else 435 | case fixed_offset + 0x0: 436 | case fixed_offset + 0x1: 437 | case fixed_offset + 0x2: 438 | case fixed_offset + 0x3: 439 | case fixed_offset + 0x4: 440 | case fixed_offset + 0x5: 441 | case fixed_offset + 0x6: 442 | case fixed_offset + 0x7: 443 | case fixed_offset + 0x8: 444 | case fixed_offset + 0x9: 445 | case fixed_offset + 0xa: 446 | case fixed_offset + 0xb: 447 | case fixed_offset + 0xc: 448 | case fixed_offset + 0xd: 449 | case fixed_offset + 0xe: 450 | case fixed_offset + 0xf: 451 | #endif 452 | ++*off; 453 | size = ((unsigned int)*p) & 0x0f; 454 | break; 455 | default: 456 | PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); 457 | return -1; 458 | } 459 | unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); 460 | return 1; 461 | } 462 | 463 | #undef SWITCH_RANGE_BEGIN 464 | #undef SWITCH_RANGE 465 | #undef SWITCH_RANGE_DEFAULT 466 | #undef SWITCH_RANGE_END 467 | 468 | static const execute_fn unpack_construct = &unpack_execute; 469 | static const execute_fn unpack_skip = &unpack_execute; 470 | static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>; 471 | static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>; 472 | 473 | #undef NEXT_CS 474 | 475 | /* vim: set ts=4 sw=4 sts=4 expandtab */ 476 | -------------------------------------------------------------------------------- /pandas_msgpack/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.15 (https://github.com/warner/python-versioneer) 10 | 11 | # flake8: noqa 12 | 13 | import errno 14 | import os 15 | import re 16 | import subprocess 17 | import sys 18 | 19 | 20 | def get_keywords(): 21 | # these strings will be replaced by git during git-archive. 22 | # setup.py/versioneer.py will grep for the variable names, so they must 23 | # each be defined on a line of their own. _version.py will just call 24 | # get_keywords(). 25 | git_refnames = "$Format:%d$" 26 | git_full = "$Format:%H$" 27 | keywords = {"refnames": git_refnames, "full": git_full} 28 | return keywords 29 | 30 | 31 | class VersioneerConfig: 32 | pass 33 | 34 | 35 | def get_config(): 36 | # these strings are filled in when 'setup.py versioneer' creates 37 | # _version.py 38 | cfg = VersioneerConfig() 39 | cfg.VCS = "git" 40 | cfg.style = "pep440" 41 | cfg.tag_prefix = "" 42 | cfg.parentdir_prefix = "pandas-msgpack" 43 | cfg.versionfile_source = "pandas_msgpack/_version.py" 44 | cfg.verbose = False 45 | return cfg 46 | 47 | 48 | class NotThisMethod(Exception): 49 | pass 50 | 51 | 52 | LONG_VERSION_PY = {} 53 | HANDLERS = {} 54 | 55 | 56 | def register_vcs_handler(vcs, method): # decorator 57 | def decorate(f): 58 | if vcs not in HANDLERS: 59 | HANDLERS[vcs] = {} 60 | HANDLERS[vcs][method] = f 61 | return f 62 | return decorate 63 | 64 | 65 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 66 | assert isinstance(commands, list) 67 | p = None 68 | for c in commands: 69 | try: 70 | dispcmd = str([c] + args) 71 | # remember shell=False, so use git.cmd on windows, not just git 72 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 73 | stderr=(subprocess.PIPE if hide_stderr 74 | else None)) 75 | break 76 | except EnvironmentError: 77 | e = sys.exc_info()[1] 78 | if e.errno == errno.ENOENT: 79 | continue 80 | if verbose: 81 | print("unable to run %s" % dispcmd) 82 | print(e) 83 | return None 84 | else: 85 | if verbose: 86 | print("unable to find command, tried %s" % (commands,)) 87 | return None 88 | stdout = p.communicate()[0].strip() 89 | if sys.version_info[0] >= 3: 90 | stdout = stdout.decode() 91 | if p.returncode != 0: 92 | if verbose: 93 | print("unable to run %s (error)" % dispcmd) 94 | return None 95 | return stdout 96 | 97 | 98 | def versions_from_parentdir(parentdir_prefix, root, verbose): 99 | # Source tarballs conventionally unpack into a directory that includes 100 | # both the project name and a version string. 101 | dirname = os.path.basename(root) 102 | if not dirname.startswith(parentdir_prefix): 103 | if verbose: 104 | print("guessing rootdir is '%s', but '%s' doesn't start with " 105 | "prefix '%s'" % (root, dirname, parentdir_prefix)) 106 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 107 | return {"version": dirname[len(parentdir_prefix):], 108 | "full-revisionid": None, 109 | "dirty": False, "error": None} 110 | 111 | 112 | @register_vcs_handler("git", "get_keywords") 113 | def git_get_keywords(versionfile_abs): 114 | # the code embedded in _version.py can just fetch the value of these 115 | # keywords. When used from setup.py, we don't want to import _version.py, 116 | # so we do it with a regexp instead. This function is not used from 117 | # _version.py. 118 | keywords = {} 119 | try: 120 | f = open(versionfile_abs, "r") 121 | for line in f.readlines(): 122 | if line.strip().startswith("git_refnames ="): 123 | mo = re.search(r'=\s*"(.*)"', line) 124 | if mo: 125 | keywords["refnames"] = mo.group(1) 126 | if line.strip().startswith("git_full ="): 127 | mo = re.search(r'=\s*"(.*)"', line) 128 | if mo: 129 | keywords["full"] = mo.group(1) 130 | f.close() 131 | except EnvironmentError: 132 | pass 133 | return keywords 134 | 135 | 136 | @register_vcs_handler("git", "keywords") 137 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 138 | if not keywords: 139 | raise NotThisMethod("no keywords at all, weird") 140 | refnames = keywords["refnames"].strip() 141 | if refnames.startswith("$Format"): 142 | if verbose: 143 | print("keywords are unexpanded, not using") 144 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 145 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 146 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 147 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 148 | TAG = "tag: " 149 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 150 | if not tags: 151 | # Either we're using git < 1.8.3, or there really are no tags. We use 152 | # a heuristic: assume all version tags have a digit. The old git %d 153 | # expansion behaves like git log --decorate=short and strips out the 154 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 155 | # between branches and tags. By ignoring refnames without digits, we 156 | # filter out many common branch names like "release" and 157 | # "stabilization", as well as "HEAD" and "master". 158 | tags = set([r for r in refs if re.search(r'\d', r)]) 159 | if verbose: 160 | print("discarding '%s', no digits" % ",".join(refs - tags)) 161 | if verbose: 162 | print("likely tags: %s" % ",".join(sorted(tags))) 163 | for ref in sorted(tags): 164 | # sorting will prefer e.g. "2.0" over "2.0rc1" 165 | if ref.startswith(tag_prefix): 166 | r = ref[len(tag_prefix):] 167 | if verbose: 168 | print("picking %s" % r) 169 | return {"version": r, 170 | "full-revisionid": keywords["full"].strip(), 171 | "dirty": False, "error": None 172 | } 173 | # no suitable tags, so version is "0+unknown", but full hex is still there 174 | if verbose: 175 | print("no suitable tags, using unknown + full revision id") 176 | return {"version": "0+unknown", 177 | "full-revisionid": keywords["full"].strip(), 178 | "dirty": False, "error": "no suitable tags"} 179 | 180 | 181 | @register_vcs_handler("git", "pieces_from_vcs") 182 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 183 | # this runs 'git' from the root of the source tree. This only gets called 184 | # if the git-archive 'subst' keywords were *not* expanded, and 185 | # _version.py hasn't already been rewritten with a short version string, 186 | # meaning we're inside a checked out source tree. 187 | 188 | if not os.path.exists(os.path.join(root, ".git")): 189 | if verbose: 190 | print("no .git in %s" % root) 191 | raise NotThisMethod("no .git directory") 192 | 193 | GITS = ["git"] 194 | if sys.platform == "win32": 195 | GITS = ["git.cmd", "git.exe"] 196 | # if there is a tag, this yields TAG-NUM-gHEX[-dirty] 197 | # if there are no tags, this yields HEX[-dirty] (no NUM) 198 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", 199 | "--always", "--long"], 200 | cwd=root) 201 | # --long was added in git-1.5.5 202 | if describe_out is None: 203 | raise NotThisMethod("'git describe' failed") 204 | describe_out = describe_out.strip() 205 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 206 | if full_out is None: 207 | raise NotThisMethod("'git rev-parse' failed") 208 | full_out = full_out.strip() 209 | 210 | pieces = {} 211 | pieces["long"] = full_out 212 | pieces["short"] = full_out[:7] # maybe improved later 213 | pieces["error"] = None 214 | 215 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 216 | # TAG might have hyphens. 217 | git_describe = describe_out 218 | 219 | # look for -dirty suffix 220 | dirty = git_describe.endswith("-dirty") 221 | pieces["dirty"] = dirty 222 | if dirty: 223 | git_describe = git_describe[:git_describe.rindex("-dirty")] 224 | 225 | # now we have TAG-NUM-gHEX or HEX 226 | 227 | if "-" in git_describe: 228 | # TAG-NUM-gHEX 229 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 230 | if not mo: 231 | # unparseable. Maybe git-describe is misbehaving? 232 | pieces["error"] = ("unable to parse git-describe output: '%s'" 233 | % describe_out) 234 | return pieces 235 | 236 | # tag 237 | full_tag = mo.group(1) 238 | if not full_tag.startswith(tag_prefix): 239 | if verbose: 240 | fmt = "tag '%s' doesn't start with prefix '%s'" 241 | print(fmt % (full_tag, tag_prefix)) 242 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 243 | % (full_tag, tag_prefix)) 244 | return pieces 245 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 246 | 247 | # distance: number of commits since tag 248 | pieces["distance"] = int(mo.group(2)) 249 | 250 | # commit: short hex revision ID 251 | pieces["short"] = mo.group(3) 252 | 253 | else: 254 | # HEX: no tags 255 | pieces["closest-tag"] = None 256 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], 257 | cwd=root) 258 | pieces["distance"] = int(count_out) # total number of commits 259 | 260 | return pieces 261 | 262 | 263 | def plus_or_dot(pieces): 264 | if "+" in pieces.get("closest-tag", ""): 265 | return "." 266 | return "+" 267 | 268 | 269 | def render_pep440(pieces): 270 | # now build up version string, with post-release "local version 271 | # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 272 | # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 273 | 274 | # exceptions: 275 | # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 276 | 277 | if pieces["closest-tag"]: 278 | rendered = pieces["closest-tag"] 279 | if pieces["distance"] or pieces["dirty"]: 280 | rendered += plus_or_dot(pieces) 281 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 282 | if pieces["dirty"]: 283 | rendered += ".dirty" 284 | else: 285 | # exception #1 286 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 287 | pieces["short"]) 288 | if pieces["dirty"]: 289 | rendered += ".dirty" 290 | return rendered 291 | 292 | 293 | def render_pep440_pre(pieces): 294 | # TAG[.post.devDISTANCE] . No -dirty 295 | 296 | # exceptions: 297 | # 1: no tags. 0.post.devDISTANCE 298 | 299 | if pieces["closest-tag"]: 300 | rendered = pieces["closest-tag"] 301 | if pieces["distance"]: 302 | rendered += ".post.dev%d" % pieces["distance"] 303 | else: 304 | # exception #1 305 | rendered = "0.post.dev%d" % pieces["distance"] 306 | return rendered 307 | 308 | 309 | def render_pep440_post(pieces): 310 | # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that 311 | # .dev0 sorts backwards (a dirty tree will appear "older" than the 312 | # corresponding clean one), but you shouldn't be releasing software with 313 | # -dirty anyways. 314 | 315 | # exceptions: 316 | # 1: no tags. 0.postDISTANCE[.dev0] 317 | 318 | if pieces["closest-tag"]: 319 | rendered = pieces["closest-tag"] 320 | if pieces["distance"] or pieces["dirty"]: 321 | rendered += ".post%d" % pieces["distance"] 322 | if pieces["dirty"]: 323 | rendered += ".dev0" 324 | rendered += plus_or_dot(pieces) 325 | rendered += "g%s" % pieces["short"] 326 | else: 327 | # exception #1 328 | rendered = "0.post%d" % pieces["distance"] 329 | if pieces["dirty"]: 330 | rendered += ".dev0" 331 | rendered += "+g%s" % pieces["short"] 332 | return rendered 333 | 334 | 335 | def render_pep440_old(pieces): 336 | # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. 337 | 338 | # exceptions: 339 | # 1: no tags. 0.postDISTANCE[.dev0] 340 | 341 | if pieces["closest-tag"]: 342 | rendered = pieces["closest-tag"] 343 | if pieces["distance"] or pieces["dirty"]: 344 | rendered += ".post%d" % pieces["distance"] 345 | if pieces["dirty"]: 346 | rendered += ".dev0" 347 | else: 348 | # exception #1 349 | rendered = "0.post%d" % pieces["distance"] 350 | if pieces["dirty"]: 351 | rendered += ".dev0" 352 | return rendered 353 | 354 | 355 | def render_git_describe(pieces): 356 | # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty 357 | # --always' 358 | 359 | # exceptions: 360 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 361 | 362 | if pieces["closest-tag"]: 363 | rendered = pieces["closest-tag"] 364 | if pieces["distance"]: 365 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 366 | else: 367 | # exception #1 368 | rendered = pieces["short"] 369 | if pieces["dirty"]: 370 | rendered += "-dirty" 371 | return rendered 372 | 373 | 374 | def render_git_describe_long(pieces): 375 | # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty 376 | # --always -long'. The distance/hash is unconditional. 377 | 378 | # exceptions: 379 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 380 | 381 | if pieces["closest-tag"]: 382 | rendered = pieces["closest-tag"] 383 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 384 | else: 385 | # exception #1 386 | rendered = pieces["short"] 387 | if pieces["dirty"]: 388 | rendered += "-dirty" 389 | return rendered 390 | 391 | 392 | def render(pieces, style): 393 | if pieces["error"]: 394 | return {"version": "unknown", 395 | "full-revisionid": pieces.get("long"), 396 | "dirty": None, 397 | "error": pieces["error"]} 398 | 399 | if not style or style == "default": 400 | style = "pep440" # the default 401 | 402 | if style == "pep440": 403 | rendered = render_pep440(pieces) 404 | elif style == "pep440-pre": 405 | rendered = render_pep440_pre(pieces) 406 | elif style == "pep440-post": 407 | rendered = render_pep440_post(pieces) 408 | elif style == "pep440-old": 409 | rendered = render_pep440_old(pieces) 410 | elif style == "git-describe": 411 | rendered = render_git_describe(pieces) 412 | elif style == "git-describe-long": 413 | rendered = render_git_describe_long(pieces) 414 | else: 415 | raise ValueError("unknown style '%s'" % style) 416 | 417 | return {"version": rendered, "full-revisionid": pieces["long"], 418 | "dirty": pieces["dirty"], "error": None} 419 | 420 | 421 | def get_versions(): 422 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 423 | # __file__, we can work backwards from there to the root. Some 424 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 425 | # case we can only use expanded keywords. 426 | 427 | cfg = get_config() 428 | verbose = cfg.verbose 429 | 430 | try: 431 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 432 | verbose) 433 | except NotThisMethod: 434 | pass 435 | 436 | try: 437 | root = os.path.realpath(__file__) 438 | # versionfile_source is the relative path from the top of the source 439 | # tree (where the .git directory might live) to this file. Invert 440 | # this to find the root from __file__. 441 | for i in cfg.versionfile_source.split('/'): 442 | root = os.path.dirname(root) 443 | except NameError: 444 | return {"version": "0+unknown", "full-revisionid": None, 445 | "dirty": None, 446 | "error": "unable to find root of source tree"} 447 | 448 | try: 449 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 450 | return render(pieces, cfg.style) 451 | except NotThisMethod: 452 | pass 453 | 454 | try: 455 | if cfg.parentdir_prefix: 456 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 457 | except NotThisMethod: 458 | pass 459 | 460 | return {"version": "0+unknown", "full-revisionid": None, 461 | "dirty": None, 462 | "error": "unable to compute version"} 463 | -------------------------------------------------------------------------------- /pandas_msgpack/msgpack/_unpacker.pyx: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | #cython: embedsignature=True 3 | 4 | from cpython cimport * 5 | cdef extern from "Python.h": 6 | ctypedef struct PyObject 7 | cdef int PyObject_AsReadBuffer(object o, const void** buff, 8 | Py_ssize_t* buf_len) except -1 9 | 10 | from libc.stdlib cimport * 11 | from libc.string cimport * 12 | from libc.limits cimport * 13 | 14 | from .exceptions import (BufferFull, OutOfData, 15 | UnpackValueError, ExtraData) 16 | from . import ExtType 17 | 18 | 19 | cdef extern from "../includes/unpack.h": 20 | ctypedef struct msgpack_user: 21 | bint use_list 22 | PyObject* object_hook 23 | bint has_pairs_hook # call object_hook with k-v pairs 24 | PyObject* list_hook 25 | PyObject* ext_hook 26 | char *encoding 27 | char *unicode_errors 28 | Py_ssize_t max_str_len 29 | Py_ssize_t max_bin_len 30 | Py_ssize_t max_array_len 31 | Py_ssize_t max_map_len 32 | Py_ssize_t max_ext_len 33 | 34 | ctypedef struct unpack_context: 35 | msgpack_user user 36 | PyObject* obj 37 | size_t count 38 | 39 | ctypedef int (*execute_fn)(unpack_context* ctx, const char* data, 40 | size_t len, size_t* off) except? -1 41 | execute_fn unpack_construct 42 | execute_fn unpack_skip 43 | execute_fn read_array_header 44 | execute_fn read_map_header 45 | void unpack_init(unpack_context* ctx) 46 | object unpack_data(unpack_context* ctx) 47 | 48 | cdef inline init_ctx(unpack_context *ctx, 49 | object object_hook, object object_pairs_hook, 50 | object list_hook, object ext_hook, 51 | bint use_list, char* encoding, char* unicode_errors, 52 | Py_ssize_t max_str_len, Py_ssize_t max_bin_len, 53 | Py_ssize_t max_array_len, Py_ssize_t max_map_len, 54 | Py_ssize_t max_ext_len): 55 | unpack_init(ctx) 56 | ctx.user.use_list = use_list 57 | ctx.user.object_hook = ctx.user.list_hook = NULL 58 | ctx.user.max_str_len = max_str_len 59 | ctx.user.max_bin_len = max_bin_len 60 | ctx.user.max_array_len = max_array_len 61 | ctx.user.max_map_len = max_map_len 62 | ctx.user.max_ext_len = max_ext_len 63 | 64 | if object_hook is not None and object_pairs_hook is not None: 65 | raise TypeError("object_pairs_hook and object_hook " 66 | "are mutually exclusive.") 67 | 68 | if object_hook is not None: 69 | if not PyCallable_Check(object_hook): 70 | raise TypeError("object_hook must be a callable.") 71 | ctx.user.object_hook = object_hook 72 | 73 | if object_pairs_hook is None: 74 | ctx.user.has_pairs_hook = False 75 | else: 76 | if not PyCallable_Check(object_pairs_hook): 77 | raise TypeError("object_pairs_hook must be a callable.") 78 | ctx.user.object_hook = object_pairs_hook 79 | ctx.user.has_pairs_hook = True 80 | 81 | if list_hook is not None: 82 | if not PyCallable_Check(list_hook): 83 | raise TypeError("list_hook must be a callable.") 84 | ctx.user.list_hook = list_hook 85 | 86 | if ext_hook is not None: 87 | if not PyCallable_Check(ext_hook): 88 | raise TypeError("ext_hook must be a callable.") 89 | ctx.user.ext_hook = ext_hook 90 | 91 | ctx.user.encoding = encoding 92 | ctx.user.unicode_errors = unicode_errors 93 | 94 | 95 | def default_read_extended_type(typecode, data): 96 | raise NotImplementedError("Cannot decode extended type " 97 | "with typecode=%d" % typecode) 98 | 99 | 100 | def unpackb(object packed, object object_hook=None, object list_hook=None, 101 | bint use_list=1, encoding=None, unicode_errors="strict", 102 | object_pairs_hook=None, ext_hook=ExtType, 103 | Py_ssize_t max_str_len=2147483647, # 2**32-1 104 | Py_ssize_t max_bin_len=2147483647, 105 | Py_ssize_t max_array_len=2147483647, 106 | Py_ssize_t max_map_len=2147483647, 107 | Py_ssize_t max_ext_len=2147483647): 108 | """ 109 | Unpack packed_bytes to object. Returns an unpacked object. 110 | 111 | Raises `ValueError` when `packed` contains extra bytes. 112 | 113 | See :class:`Unpacker` for options. 114 | """ 115 | cdef unpack_context ctx 116 | cdef size_t off = 0 117 | cdef int ret 118 | 119 | cdef char* buf 120 | cdef Py_ssize_t buf_len 121 | cdef char* cenc = NULL 122 | cdef char* cerr = NULL 123 | 124 | PyObject_AsReadBuffer(packed, &buf, &buf_len) 125 | 126 | if encoding is not None: 127 | if isinstance(encoding, unicode): 128 | encoding = encoding.encode('ascii') 129 | cenc = PyBytes_AsString(encoding) 130 | 131 | if unicode_errors is not None: 132 | if isinstance(unicode_errors, unicode): 133 | unicode_errors = unicode_errors.encode('ascii') 134 | cerr = PyBytes_AsString(unicode_errors) 135 | 136 | init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, 137 | use_list, cenc, cerr, 138 | max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) 139 | ret = unpack_construct(&ctx, buf, buf_len, &off) 140 | if ret == 1: 141 | obj = unpack_data(&ctx) 142 | if off < buf_len: 143 | raise ExtraData(obj, PyBytes_FromStringAndSize( 144 | buf + off, buf_len - off)) 145 | return obj 146 | else: 147 | raise UnpackValueError("Unpack failed: error = %d" % (ret,)) 148 | 149 | 150 | def unpack(object stream, object object_hook=None, object list_hook=None, 151 | bint use_list=1, encoding=None, unicode_errors="strict", 152 | object_pairs_hook=None, 153 | ): 154 | """ 155 | Unpack an object from `stream`. 156 | 157 | Raises `ValueError` when `stream` has extra bytes. 158 | 159 | See :class:`Unpacker` for options. 160 | """ 161 | return unpackb(stream.read(), use_list=use_list, 162 | object_hook=object_hook, 163 | object_pairs_hook=object_pairs_hook, list_hook=list_hook, 164 | encoding=encoding, unicode_errors=unicode_errors) 165 | 166 | 167 | cdef class Unpacker(object): 168 | """Streaming unpacker. 169 | 170 | arguments: 171 | 172 | :param file_like: 173 | File-like object having `.read(n)` method. 174 | If specified, unpacker reads serialized data from it and 175 | :meth:`feed()` is not usable. 176 | 177 | :param int read_size: 178 | Used as `file_like.read(read_size)`. (default: 179 | `min(1024**2, max_buffer_size)`) 180 | 181 | :param bool use_list: 182 | If true, unpack msgpack array to Python list. 183 | Otherwise, unpack to Python tuple. (default: True) 184 | 185 | :param callable object_hook: 186 | When specified, it should be callable. 187 | Unpacker calls it with a dict argument after unpacking msgpack map. 188 | (See also simplejson) 189 | 190 | :param callable object_pairs_hook: 191 | When specified, it should be callable. Unpacker calls it with a list 192 | of key-value pairs after unpacking msgpack map. (See also simplejson) 193 | 194 | :param str encoding: 195 | Encoding used for decoding msgpack raw. 196 | If it is None (default), msgpack raw is deserialized to Python bytes. 197 | 198 | :param str unicode_errors: 199 | Used for decoding msgpack raw with *encoding*. 200 | (default: `'strict'`) 201 | 202 | :param int max_buffer_size: 203 | Limits size of data waiting unpacked. 0 means system's 204 | INT_MAX (default). Raises `BufferFull` exception when it 205 | is insufficient. You shoud set this parameter when unpacking 206 | data from untrasted source. 207 | 208 | :param int max_str_len: 209 | Limits max length of str. (default: 2**31-1) 210 | 211 | :param int max_bin_len: 212 | Limits max length of bin. (default: 2**31-1) 213 | 214 | :param int max_array_len: 215 | Limits max length of array. (default: 2**31-1) 216 | 217 | :param int max_map_len: 218 | Limits max length of map. (default: 2**31-1) 219 | 220 | 221 | example of streaming deserialize from file-like object:: 222 | 223 | unpacker = Unpacker(file_like) 224 | for o in unpacker: 225 | process(o) 226 | 227 | example of streaming deserialize from socket:: 228 | 229 | unpacker = Unpacker() 230 | while True: 231 | buf = sock.recv(1024**2) 232 | if not buf: 233 | break 234 | unpacker.feed(buf) 235 | for o in unpacker: 236 | process(o) 237 | """ 238 | cdef unpack_context ctx 239 | cdef char* buf 240 | cdef size_t buf_size, buf_head, buf_tail 241 | cdef object file_like 242 | cdef object file_like_read 243 | cdef Py_ssize_t read_size 244 | # To maintain refcnt. 245 | cdef object object_hook, object_pairs_hook, list_hook, ext_hook 246 | cdef object encoding, unicode_errors 247 | cdef size_t max_buffer_size 248 | 249 | def __cinit__(self): 250 | self.buf = NULL 251 | 252 | def __dealloc__(self): 253 | free(self.buf) 254 | self.buf = NULL 255 | 256 | def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, 257 | object object_hook=None, object object_pairs_hook=None, 258 | object list_hook=None, encoding=None, unicode_errors='strict', 259 | int max_buffer_size=0, object ext_hook=ExtType, 260 | Py_ssize_t max_str_len=2147483647, # 2**32-1 261 | Py_ssize_t max_bin_len=2147483647, 262 | Py_ssize_t max_array_len=2147483647, 263 | Py_ssize_t max_map_len=2147483647, 264 | Py_ssize_t max_ext_len=2147483647): 265 | cdef char *cenc=NULL, 266 | cdef char *cerr=NULL 267 | 268 | self.object_hook = object_hook 269 | self.object_pairs_hook = object_pairs_hook 270 | self.list_hook = list_hook 271 | self.ext_hook = ext_hook 272 | 273 | self.file_like = file_like 274 | if file_like: 275 | self.file_like_read = file_like.read 276 | if not PyCallable_Check(self.file_like_read): 277 | raise TypeError("`file_like.read` must be a callable.") 278 | if not max_buffer_size: 279 | max_buffer_size = INT_MAX 280 | if read_size > max_buffer_size: 281 | raise ValueError("read_size should be less or " 282 | "equal to max_buffer_size") 283 | if not read_size: 284 | read_size = min(max_buffer_size, 1024**2) 285 | self.max_buffer_size = max_buffer_size 286 | self.read_size = read_size 287 | self.buf = malloc(read_size) 288 | if self.buf == NULL: 289 | raise MemoryError("Unable to allocate internal buffer.") 290 | self.buf_size = read_size 291 | self.buf_head = 0 292 | self.buf_tail = 0 293 | 294 | if encoding is not None: 295 | if isinstance(encoding, unicode): 296 | self.encoding = encoding.encode('ascii') 297 | elif isinstance(encoding, bytes): 298 | self.encoding = encoding 299 | else: 300 | raise TypeError("encoding should be bytes or unicode") 301 | cenc = PyBytes_AsString(self.encoding) 302 | 303 | if unicode_errors is not None: 304 | if isinstance(unicode_errors, unicode): 305 | self.unicode_errors = unicode_errors.encode('ascii') 306 | elif isinstance(unicode_errors, bytes): 307 | self.unicode_errors = unicode_errors 308 | else: 309 | raise TypeError("unicode_errors should be bytes or unicode") 310 | cerr = PyBytes_AsString(self.unicode_errors) 311 | 312 | init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, 313 | ext_hook, use_list, cenc, cerr, 314 | max_str_len, max_bin_len, max_array_len, 315 | max_map_len, max_ext_len) 316 | 317 | def feed(self, object next_bytes): 318 | """Append `next_bytes` to internal buffer.""" 319 | cdef Py_buffer pybuff 320 | if self.file_like is not None: 321 | raise AssertionError("unpacker.feed() is not be able " 322 | "to use with `file_like`.") 323 | PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE) 324 | try: 325 | self.append_buffer(pybuff.buf, pybuff.len) 326 | finally: 327 | PyBuffer_Release(&pybuff) 328 | 329 | cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): 330 | cdef: 331 | char* buf = self.buf 332 | char* new_buf 333 | size_t head = self.buf_head 334 | size_t tail = self.buf_tail 335 | size_t buf_size = self.buf_size 336 | size_t new_size 337 | 338 | if tail + _buf_len > buf_size: 339 | if ((tail - head) + _buf_len) <= buf_size: 340 | # move to front. 341 | memmove(buf, buf + head, tail - head) 342 | tail -= head 343 | head = 0 344 | else: 345 | # expand buffer. 346 | new_size = (tail - head) + _buf_len 347 | if new_size > self.max_buffer_size: 348 | raise BufferFull 349 | new_size = min(new_size * 2, self.max_buffer_size) 350 | new_buf = malloc(new_size) 351 | if new_buf == NULL: 352 | # self.buf still holds old buffer and will be freed during 353 | # obj destruction 354 | raise MemoryError("Unable to enlarge internal buffer.") 355 | memcpy(new_buf, buf + head, tail - head) 356 | free(buf) 357 | 358 | buf = new_buf 359 | buf_size = new_size 360 | tail -= head 361 | head = 0 362 | 363 | memcpy(buf + tail, (_buf), _buf_len) 364 | self.buf = buf 365 | self.buf_head = head 366 | self.buf_size = buf_size 367 | self.buf_tail = tail + _buf_len 368 | 369 | cdef read_from_file(self): 370 | next_bytes = self.file_like_read( 371 | min(self.read_size, 372 | self.max_buffer_size - (self.buf_tail - self.buf_head))) 373 | if next_bytes: 374 | self.append_buffer(PyBytes_AsString(next_bytes), 375 | PyBytes_Size(next_bytes)) 376 | else: 377 | self.file_like = None 378 | 379 | cdef object _unpack(self, execute_fn execute, 380 | object write_bytes, bint iter=0): 381 | cdef int ret 382 | cdef object obj 383 | cdef size_t prev_head 384 | 385 | if self.buf_head >= self.buf_tail and self.file_like is not None: 386 | self.read_from_file() 387 | 388 | while 1: 389 | prev_head = self.buf_head 390 | if prev_head >= self.buf_tail: 391 | if iter: 392 | raise StopIteration("No more data to unpack.") 393 | else: 394 | raise OutOfData("No more data to unpack.") 395 | 396 | ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) 397 | if write_bytes is not None: 398 | write_bytes(PyBytes_FromStringAndSize( 399 | self.buf + prev_head, self.buf_head - prev_head)) 400 | 401 | if ret == 1: 402 | obj = unpack_data(&self.ctx) 403 | unpack_init(&self.ctx) 404 | return obj 405 | elif ret == 0: 406 | if self.file_like is not None: 407 | self.read_from_file() 408 | continue 409 | if iter: 410 | raise StopIteration("No more data to unpack.") 411 | else: 412 | raise OutOfData("No more data to unpack.") 413 | else: 414 | raise ValueError("Unpack failed: error = %d" % (ret,)) 415 | 416 | def read_bytes(self, Py_ssize_t nbytes): 417 | """Read a specified number of raw bytes from the stream""" 418 | cdef size_t nread 419 | nread = min(self.buf_tail - self.buf_head, nbytes) 420 | ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) 421 | self.buf_head += nread 422 | if len(ret) < nbytes and self.file_like is not None: 423 | ret += self.file_like.read(nbytes - len(ret)) 424 | return ret 425 | 426 | def unpack(self, object write_bytes=None): 427 | """Unpack one object 428 | 429 | If write_bytes is not None, it will be called with parts of the raw 430 | message as it is unpacked. 431 | 432 | Raises `OutOfData` when there are no more bytes to unpack. 433 | """ 434 | return self._unpack(unpack_construct, write_bytes) 435 | 436 | def skip(self, object write_bytes=None): 437 | """Read and ignore one object, returning None 438 | 439 | If write_bytes is not None, it will be called with parts of the raw 440 | message as it is unpacked. 441 | 442 | Raises `OutOfData` when there are no more bytes to unpack. 443 | """ 444 | return self._unpack(unpack_skip, write_bytes) 445 | 446 | def read_array_header(self, object write_bytes=None): 447 | """assuming the next object is an array, return its size n, such that 448 | the next n unpack() calls will iterate over its contents. 449 | 450 | Raises `OutOfData` when there are no more bytes to unpack. 451 | """ 452 | return self._unpack(read_array_header, write_bytes) 453 | 454 | def read_map_header(self, object write_bytes=None): 455 | """assuming the next object is a map, return its size n, such that the 456 | next n * 2 unpack() calls will iterate over its key-value pairs. 457 | 458 | Raises `OutOfData` when there are no more bytes to unpack. 459 | """ 460 | return self._unpack(read_map_header, write_bytes) 461 | 462 | def __iter__(self): 463 | return self 464 | 465 | def __next__(self): 466 | return self._unpack(unpack_construct, None, 1) 467 | 468 | # for debug. 469 | #def _buf(self): 470 | # return PyString_FromStringAndSize(self.buf, self.buf_tail) 471 | 472 | #def _off(self): 473 | # return self.buf_head 474 | -------------------------------------------------------------------------------- /pandas_msgpack/includes/pack_template.h: -------------------------------------------------------------------------------- 1 | /* 2 | * MessagePack packing routine template 3 | * 4 | * Copyright (C) 2008-2010 FURUHASHI Sadayuki 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #if defined(__LITTLE_ENDIAN__) 20 | #define TAKE8_8(d) ((uint8_t*)&d)[0] 21 | #define TAKE8_16(d) ((uint8_t*)&d)[0] 22 | #define TAKE8_32(d) ((uint8_t*)&d)[0] 23 | #define TAKE8_64(d) ((uint8_t*)&d)[0] 24 | #elif defined(__BIG_ENDIAN__) 25 | #define TAKE8_8(d) ((uint8_t*)&d)[0] 26 | #define TAKE8_16(d) ((uint8_t*)&d)[1] 27 | #define TAKE8_32(d) ((uint8_t*)&d)[3] 28 | #define TAKE8_64(d) ((uint8_t*)&d)[7] 29 | #endif 30 | 31 | #ifndef msgpack_pack_append_buffer 32 | #error msgpack_pack_append_buffer callback is not defined 33 | #endif 34 | 35 | 36 | /* 37 | * Integer 38 | */ 39 | 40 | #define msgpack_pack_real_uint8(x, d) \ 41 | do { \ 42 | if(d < (1<<7)) { \ 43 | /* fixnum */ \ 44 | msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ 45 | } else { \ 46 | /* unsigned 8 */ \ 47 | unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ 48 | msgpack_pack_append_buffer(x, buf, 2); \ 49 | } \ 50 | } while(0) 51 | 52 | #define msgpack_pack_real_uint16(x, d) \ 53 | do { \ 54 | if(d < (1<<7)) { \ 55 | /* fixnum */ \ 56 | msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ 57 | } else if(d < (1<<8)) { \ 58 | /* unsigned 8 */ \ 59 | unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ 60 | msgpack_pack_append_buffer(x, buf, 2); \ 61 | } else { \ 62 | /* unsigned 16 */ \ 63 | unsigned char buf[3]; \ 64 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ 65 | msgpack_pack_append_buffer(x, buf, 3); \ 66 | } \ 67 | } while(0) 68 | 69 | #define msgpack_pack_real_uint32(x, d) \ 70 | do { \ 71 | if(d < (1<<8)) { \ 72 | if(d < (1<<7)) { \ 73 | /* fixnum */ \ 74 | msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ 75 | } else { \ 76 | /* unsigned 8 */ \ 77 | unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ 78 | msgpack_pack_append_buffer(x, buf, 2); \ 79 | } \ 80 | } else { \ 81 | if(d < (1<<16)) { \ 82 | /* unsigned 16 */ \ 83 | unsigned char buf[3]; \ 84 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ 85 | msgpack_pack_append_buffer(x, buf, 3); \ 86 | } else { \ 87 | /* unsigned 32 */ \ 88 | unsigned char buf[5]; \ 89 | buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ 90 | msgpack_pack_append_buffer(x, buf, 5); \ 91 | } \ 92 | } \ 93 | } while(0) 94 | 95 | #define msgpack_pack_real_uint64(x, d) \ 96 | do { \ 97 | if(d < (1ULL<<8)) { \ 98 | if(d < (1ULL<<7)) { \ 99 | /* fixnum */ \ 100 | msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ 101 | } else { \ 102 | /* unsigned 8 */ \ 103 | unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ 104 | msgpack_pack_append_buffer(x, buf, 2); \ 105 | } \ 106 | } else { \ 107 | if(d < (1ULL<<16)) { \ 108 | /* unsigned 16 */ \ 109 | unsigned char buf[3]; \ 110 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ 111 | msgpack_pack_append_buffer(x, buf, 3); \ 112 | } else if(d < (1ULL<<32)) { \ 113 | /* unsigned 32 */ \ 114 | unsigned char buf[5]; \ 115 | buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ 116 | msgpack_pack_append_buffer(x, buf, 5); \ 117 | } else { \ 118 | /* unsigned 64 */ \ 119 | unsigned char buf[9]; \ 120 | buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ 121 | msgpack_pack_append_buffer(x, buf, 9); \ 122 | } \ 123 | } \ 124 | } while(0) 125 | 126 | #define msgpack_pack_real_int8(x, d) \ 127 | do { \ 128 | if(d < -(1<<5)) { \ 129 | /* signed 8 */ \ 130 | unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ 131 | msgpack_pack_append_buffer(x, buf, 2); \ 132 | } else { \ 133 | /* fixnum */ \ 134 | msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ 135 | } \ 136 | } while(0) 137 | 138 | #define msgpack_pack_real_int16(x, d) \ 139 | do { \ 140 | if(d < -(1<<5)) { \ 141 | if(d < -(1<<7)) { \ 142 | /* signed 16 */ \ 143 | unsigned char buf[3]; \ 144 | buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ 145 | msgpack_pack_append_buffer(x, buf, 3); \ 146 | } else { \ 147 | /* signed 8 */ \ 148 | unsigned char buf[2] = {0xd0, TAKE8_16(d)}; \ 149 | msgpack_pack_append_buffer(x, buf, 2); \ 150 | } \ 151 | } else if(d < (1<<7)) { \ 152 | /* fixnum */ \ 153 | msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ 154 | } else { \ 155 | if(d < (1<<8)) { \ 156 | /* unsigned 8 */ \ 157 | unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ 158 | msgpack_pack_append_buffer(x, buf, 2); \ 159 | } else { \ 160 | /* unsigned 16 */ \ 161 | unsigned char buf[3]; \ 162 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ 163 | msgpack_pack_append_buffer(x, buf, 3); \ 164 | } \ 165 | } \ 166 | } while(0) 167 | 168 | #define msgpack_pack_real_int32(x, d) \ 169 | do { \ 170 | if(d < -(1<<5)) { \ 171 | if(d < -(1<<15)) { \ 172 | /* signed 32 */ \ 173 | unsigned char buf[5]; \ 174 | buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ 175 | msgpack_pack_append_buffer(x, buf, 5); \ 176 | } else if(d < -(1<<7)) { \ 177 | /* signed 16 */ \ 178 | unsigned char buf[3]; \ 179 | buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ 180 | msgpack_pack_append_buffer(x, buf, 3); \ 181 | } else { \ 182 | /* signed 8 */ \ 183 | unsigned char buf[2] = {0xd0, TAKE8_32(d)}; \ 184 | msgpack_pack_append_buffer(x, buf, 2); \ 185 | } \ 186 | } else if(d < (1<<7)) { \ 187 | /* fixnum */ \ 188 | msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ 189 | } else { \ 190 | if(d < (1<<8)) { \ 191 | /* unsigned 8 */ \ 192 | unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ 193 | msgpack_pack_append_buffer(x, buf, 2); \ 194 | } else if(d < (1<<16)) { \ 195 | /* unsigned 16 */ \ 196 | unsigned char buf[3]; \ 197 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ 198 | msgpack_pack_append_buffer(x, buf, 3); \ 199 | } else { \ 200 | /* unsigned 32 */ \ 201 | unsigned char buf[5]; \ 202 | buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ 203 | msgpack_pack_append_buffer(x, buf, 5); \ 204 | } \ 205 | } \ 206 | } while(0) 207 | 208 | #define msgpack_pack_real_int64(x, d) \ 209 | do { \ 210 | if(d < -(1LL<<5)) { \ 211 | if(d < -(1LL<<15)) { \ 212 | if(d < -(1LL<<31)) { \ 213 | /* signed 64 */ \ 214 | unsigned char buf[9]; \ 215 | buf[0] = 0xd3; _msgpack_store64(&buf[1], d); \ 216 | msgpack_pack_append_buffer(x, buf, 9); \ 217 | } else { \ 218 | /* signed 32 */ \ 219 | unsigned char buf[5]; \ 220 | buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ 221 | msgpack_pack_append_buffer(x, buf, 5); \ 222 | } \ 223 | } else { \ 224 | if(d < -(1<<7)) { \ 225 | /* signed 16 */ \ 226 | unsigned char buf[3]; \ 227 | buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ 228 | msgpack_pack_append_buffer(x, buf, 3); \ 229 | } else { \ 230 | /* signed 8 */ \ 231 | unsigned char buf[2] = {0xd0, TAKE8_64(d)}; \ 232 | msgpack_pack_append_buffer(x, buf, 2); \ 233 | } \ 234 | } \ 235 | } else if(d < (1<<7)) { \ 236 | /* fixnum */ \ 237 | msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ 238 | } else { \ 239 | if(d < (1LL<<16)) { \ 240 | if(d < (1<<8)) { \ 241 | /* unsigned 8 */ \ 242 | unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ 243 | msgpack_pack_append_buffer(x, buf, 2); \ 244 | } else { \ 245 | /* unsigned 16 */ \ 246 | unsigned char buf[3]; \ 247 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ 248 | msgpack_pack_append_buffer(x, buf, 3); \ 249 | } \ 250 | } else { \ 251 | if(d < (1LL<<32)) { \ 252 | /* unsigned 32 */ \ 253 | unsigned char buf[5]; \ 254 | buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ 255 | msgpack_pack_append_buffer(x, buf, 5); \ 256 | } else { \ 257 | /* unsigned 64 */ \ 258 | unsigned char buf[9]; \ 259 | buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ 260 | msgpack_pack_append_buffer(x, buf, 9); \ 261 | } \ 262 | } \ 263 | } \ 264 | } while(0) 265 | 266 | 267 | static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d) 268 | { 269 | msgpack_pack_real_uint8(x, d); 270 | } 271 | 272 | static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d) 273 | { 274 | msgpack_pack_real_uint16(x, d); 275 | } 276 | 277 | static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d) 278 | { 279 | msgpack_pack_real_uint32(x, d); 280 | } 281 | 282 | static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d) 283 | { 284 | msgpack_pack_real_uint64(x, d); 285 | } 286 | 287 | static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d) 288 | { 289 | msgpack_pack_real_int8(x, d); 290 | } 291 | 292 | static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d) 293 | { 294 | msgpack_pack_real_int16(x, d); 295 | } 296 | 297 | static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d) 298 | { 299 | msgpack_pack_real_int32(x, d); 300 | } 301 | 302 | static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d) 303 | { 304 | msgpack_pack_real_int64(x, d); 305 | } 306 | 307 | 308 | //#ifdef msgpack_pack_inline_func_cint 309 | 310 | static inline int msgpack_pack_short(msgpack_packer* x, short d) 311 | { 312 | #if defined(SIZEOF_SHORT) 313 | #if SIZEOF_SHORT == 2 314 | msgpack_pack_real_int16(x, d); 315 | #elif SIZEOF_SHORT == 4 316 | msgpack_pack_real_int32(x, d); 317 | #else 318 | msgpack_pack_real_int64(x, d); 319 | #endif 320 | 321 | #elif defined(SHRT_MAX) 322 | #if SHRT_MAX == 0x7fff 323 | msgpack_pack_real_int16(x, d); 324 | #elif SHRT_MAX == 0x7fffffff 325 | msgpack_pack_real_int32(x, d); 326 | #else 327 | msgpack_pack_real_int64(x, d); 328 | #endif 329 | 330 | #else 331 | if(sizeof(short) == 2) { 332 | msgpack_pack_real_int16(x, d); 333 | } else if(sizeof(short) == 4) { 334 | msgpack_pack_real_int32(x, d); 335 | } else { 336 | msgpack_pack_real_int64(x, d); 337 | } 338 | #endif 339 | } 340 | 341 | static inline int msgpack_pack_int(msgpack_packer* x, int d) 342 | { 343 | #if defined(SIZEOF_INT) 344 | #if SIZEOF_INT == 2 345 | msgpack_pack_real_int16(x, d); 346 | #elif SIZEOF_INT == 4 347 | msgpack_pack_real_int32(x, d); 348 | #else 349 | msgpack_pack_real_int64(x, d); 350 | #endif 351 | 352 | #elif defined(INT_MAX) 353 | #if INT_MAX == 0x7fff 354 | msgpack_pack_real_int16(x, d); 355 | #elif INT_MAX == 0x7fffffff 356 | msgpack_pack_real_int32(x, d); 357 | #else 358 | msgpack_pack_real_int64(x, d); 359 | #endif 360 | 361 | #else 362 | if(sizeof(int) == 2) { 363 | msgpack_pack_real_int16(x, d); 364 | } else if(sizeof(int) == 4) { 365 | msgpack_pack_real_int32(x, d); 366 | } else { 367 | msgpack_pack_real_int64(x, d); 368 | } 369 | #endif 370 | } 371 | 372 | static inline int msgpack_pack_long(msgpack_packer* x, long d) 373 | { 374 | #if defined(SIZEOF_LONG) 375 | #if SIZEOF_LONG == 2 376 | msgpack_pack_real_int16(x, d); 377 | #elif SIZEOF_LONG == 4 378 | msgpack_pack_real_int32(x, d); 379 | #else 380 | msgpack_pack_real_int64(x, d); 381 | #endif 382 | 383 | #elif defined(LONG_MAX) 384 | #if LONG_MAX == 0x7fffL 385 | msgpack_pack_real_int16(x, d); 386 | #elif LONG_MAX == 0x7fffffffL 387 | msgpack_pack_real_int32(x, d); 388 | #else 389 | msgpack_pack_real_int64(x, d); 390 | #endif 391 | 392 | #else 393 | if(sizeof(long) == 2) { 394 | msgpack_pack_real_int16(x, d); 395 | } else if(sizeof(long) == 4) { 396 | msgpack_pack_real_int32(x, d); 397 | } else { 398 | msgpack_pack_real_int64(x, d); 399 | } 400 | #endif 401 | } 402 | 403 | static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) 404 | { 405 | #if defined(SIZEOF_LONG_LONG) 406 | #if SIZEOF_LONG_LONG == 2 407 | msgpack_pack_real_int16(x, d); 408 | #elif SIZEOF_LONG_LONG == 4 409 | msgpack_pack_real_int32(x, d); 410 | #else 411 | msgpack_pack_real_int64(x, d); 412 | #endif 413 | 414 | #elif defined(LLONG_MAX) 415 | #if LLONG_MAX == 0x7fffL 416 | msgpack_pack_real_int16(x, d); 417 | #elif LLONG_MAX == 0x7fffffffL 418 | msgpack_pack_real_int32(x, d); 419 | #else 420 | msgpack_pack_real_int64(x, d); 421 | #endif 422 | 423 | #else 424 | if(sizeof(long long) == 2) { 425 | msgpack_pack_real_int16(x, d); 426 | } else if(sizeof(long long) == 4) { 427 | msgpack_pack_real_int32(x, d); 428 | } else { 429 | msgpack_pack_real_int64(x, d); 430 | } 431 | #endif 432 | } 433 | 434 | static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d) 435 | { 436 | #if defined(SIZEOF_SHORT) 437 | #if SIZEOF_SHORT == 2 438 | msgpack_pack_real_uint16(x, d); 439 | #elif SIZEOF_SHORT == 4 440 | msgpack_pack_real_uint32(x, d); 441 | #else 442 | msgpack_pack_real_uint64(x, d); 443 | #endif 444 | 445 | #elif defined(USHRT_MAX) 446 | #if USHRT_MAX == 0xffffU 447 | msgpack_pack_real_uint16(x, d); 448 | #elif USHRT_MAX == 0xffffffffU 449 | msgpack_pack_real_uint32(x, d); 450 | #else 451 | msgpack_pack_real_uint64(x, d); 452 | #endif 453 | 454 | #else 455 | if(sizeof(unsigned short) == 2) { 456 | msgpack_pack_real_uint16(x, d); 457 | } else if(sizeof(unsigned short) == 4) { 458 | msgpack_pack_real_uint32(x, d); 459 | } else { 460 | msgpack_pack_real_uint64(x, d); 461 | } 462 | #endif 463 | } 464 | 465 | static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d) 466 | { 467 | #if defined(SIZEOF_INT) 468 | #if SIZEOF_INT == 2 469 | msgpack_pack_real_uint16(x, d); 470 | #elif SIZEOF_INT == 4 471 | msgpack_pack_real_uint32(x, d); 472 | #else 473 | msgpack_pack_real_uint64(x, d); 474 | #endif 475 | 476 | #elif defined(UINT_MAX) 477 | #if UINT_MAX == 0xffffU 478 | msgpack_pack_real_uint16(x, d); 479 | #elif UINT_MAX == 0xffffffffU 480 | msgpack_pack_real_uint32(x, d); 481 | #else 482 | msgpack_pack_real_uint64(x, d); 483 | #endif 484 | 485 | #else 486 | if(sizeof(unsigned int) == 2) { 487 | msgpack_pack_real_uint16(x, d); 488 | } else if(sizeof(unsigned int) == 4) { 489 | msgpack_pack_real_uint32(x, d); 490 | } else { 491 | msgpack_pack_real_uint64(x, d); 492 | } 493 | #endif 494 | } 495 | 496 | static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d) 497 | { 498 | #if defined(SIZEOF_LONG) 499 | #if SIZEOF_LONG == 2 500 | msgpack_pack_real_uint16(x, d); 501 | #elif SIZEOF_LONG == 4 502 | msgpack_pack_real_uint32(x, d); 503 | #else 504 | msgpack_pack_real_uint64(x, d); 505 | #endif 506 | 507 | #elif defined(ULONG_MAX) 508 | #if ULONG_MAX == 0xffffUL 509 | msgpack_pack_real_uint16(x, d); 510 | #elif ULONG_MAX == 0xffffffffUL 511 | msgpack_pack_real_uint32(x, d); 512 | #else 513 | msgpack_pack_real_uint64(x, d); 514 | #endif 515 | 516 | #else 517 | if(sizeof(unsigned long) == 2) { 518 | msgpack_pack_real_uint16(x, d); 519 | } else if(sizeof(unsigned long) == 4) { 520 | msgpack_pack_real_uint32(x, d); 521 | } else { 522 | msgpack_pack_real_uint64(x, d); 523 | } 524 | #endif 525 | } 526 | 527 | static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) 528 | { 529 | #if defined(SIZEOF_LONG_LONG) 530 | #if SIZEOF_LONG_LONG == 2 531 | msgpack_pack_real_uint16(x, d); 532 | #elif SIZEOF_LONG_LONG == 4 533 | msgpack_pack_real_uint32(x, d); 534 | #else 535 | msgpack_pack_real_uint64(x, d); 536 | #endif 537 | 538 | #elif defined(ULLONG_MAX) 539 | #if ULLONG_MAX == 0xffffUL 540 | msgpack_pack_real_uint16(x, d); 541 | #elif ULLONG_MAX == 0xffffffffUL 542 | msgpack_pack_real_uint32(x, d); 543 | #else 544 | msgpack_pack_real_uint64(x, d); 545 | #endif 546 | 547 | #else 548 | if(sizeof(unsigned long long) == 2) { 549 | msgpack_pack_real_uint16(x, d); 550 | } else if(sizeof(unsigned long long) == 4) { 551 | msgpack_pack_real_uint32(x, d); 552 | } else { 553 | msgpack_pack_real_uint64(x, d); 554 | } 555 | #endif 556 | } 557 | 558 | //#undef msgpack_pack_inline_func_cint 559 | //#endif 560 | 561 | 562 | 563 | /* 564 | * Float 565 | */ 566 | 567 | static inline int msgpack_pack_float(msgpack_packer* x, float d) 568 | { 569 | union { float f; uint32_t i; } mem; 570 | mem.f = d; 571 | unsigned char buf[5]; 572 | buf[0] = 0xca; _msgpack_store32(&buf[1], mem.i); 573 | msgpack_pack_append_buffer(x, buf, 5); 574 | } 575 | 576 | static inline int msgpack_pack_double(msgpack_packer* x, double d) 577 | { 578 | union { double f; uint64_t i; } mem; 579 | mem.f = d; 580 | unsigned char buf[9]; 581 | buf[0] = 0xcb; 582 | #if defined(__arm__) && !(__ARM_EABI__) // arm-oabi 583 | // https://github.com/msgpack/msgpack-perl/pull/1 584 | mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); 585 | #endif 586 | _msgpack_store64(&buf[1], mem.i); 587 | msgpack_pack_append_buffer(x, buf, 9); 588 | } 589 | 590 | 591 | /* 592 | * Nil 593 | */ 594 | 595 | static inline int msgpack_pack_nil(msgpack_packer* x) 596 | { 597 | static const unsigned char d = 0xc0; 598 | msgpack_pack_append_buffer(x, &d, 1); 599 | } 600 | 601 | 602 | /* 603 | * Boolean 604 | */ 605 | 606 | static inline int msgpack_pack_true(msgpack_packer* x) 607 | { 608 | static const unsigned char d = 0xc3; 609 | msgpack_pack_append_buffer(x, &d, 1); 610 | } 611 | 612 | static inline int msgpack_pack_false(msgpack_packer* x) 613 | { 614 | static const unsigned char d = 0xc2; 615 | msgpack_pack_append_buffer(x, &d, 1); 616 | } 617 | 618 | 619 | /* 620 | * Array 621 | */ 622 | 623 | static inline int msgpack_pack_array(msgpack_packer* x, unsigned int n) 624 | { 625 | if(n < 16) { 626 | unsigned char d = 0x90 | n; 627 | msgpack_pack_append_buffer(x, &d, 1); 628 | } else if(n < 65536) { 629 | unsigned char buf[3]; 630 | buf[0] = 0xdc; _msgpack_store16(&buf[1], (uint16_t)n); 631 | msgpack_pack_append_buffer(x, buf, 3); 632 | } else { 633 | unsigned char buf[5]; 634 | buf[0] = 0xdd; _msgpack_store32(&buf[1], (uint32_t)n); 635 | msgpack_pack_append_buffer(x, buf, 5); 636 | } 637 | } 638 | 639 | 640 | /* 641 | * Map 642 | */ 643 | 644 | static inline int msgpack_pack_map(msgpack_packer* x, unsigned int n) 645 | { 646 | if(n < 16) { 647 | unsigned char d = 0x80 | n; 648 | msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); 649 | } else if(n < 65536) { 650 | unsigned char buf[3]; 651 | buf[0] = 0xde; _msgpack_store16(&buf[1], (uint16_t)n); 652 | msgpack_pack_append_buffer(x, buf, 3); 653 | } else { 654 | unsigned char buf[5]; 655 | buf[0] = 0xdf; _msgpack_store32(&buf[1], (uint32_t)n); 656 | msgpack_pack_append_buffer(x, buf, 5); 657 | } 658 | } 659 | 660 | 661 | /* 662 | * Raw 663 | */ 664 | 665 | static inline int msgpack_pack_raw(msgpack_packer* x, size_t l) 666 | { 667 | if (l < 32) { 668 | unsigned char d = 0xa0 | (uint8_t)l; 669 | msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); 670 | } else if (x->use_bin_type && l < 256) { // str8 is new format introduced with bin. 671 | unsigned char buf[2] = {0xd9, (uint8_t)l}; 672 | msgpack_pack_append_buffer(x, buf, 2); 673 | } else if (l < 65536) { 674 | unsigned char buf[3]; 675 | buf[0] = 0xda; _msgpack_store16(&buf[1], (uint16_t)l); 676 | msgpack_pack_append_buffer(x, buf, 3); 677 | } else { 678 | unsigned char buf[5]; 679 | buf[0] = 0xdb; _msgpack_store32(&buf[1], (uint32_t)l); 680 | msgpack_pack_append_buffer(x, buf, 5); 681 | } 682 | } 683 | 684 | /* 685 | * bin 686 | */ 687 | static inline int msgpack_pack_bin(msgpack_packer *x, size_t l) 688 | { 689 | if (!x->use_bin_type) { 690 | return msgpack_pack_raw(x, l); 691 | } 692 | if (l < 256) { 693 | unsigned char buf[2] = {0xc4, (unsigned char)l}; 694 | msgpack_pack_append_buffer(x, buf, 2); 695 | } else if (l < 65536) { 696 | unsigned char buf[3] = {0xc5}; 697 | _msgpack_store16(&buf[1], (uint16_t)l); 698 | msgpack_pack_append_buffer(x, buf, 3); 699 | } else { 700 | unsigned char buf[5] = {0xc6}; 701 | _msgpack_store32(&buf[1], (uint32_t)l); 702 | msgpack_pack_append_buffer(x, buf, 5); 703 | } 704 | } 705 | 706 | static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t l) 707 | { 708 | if (l > 0) msgpack_pack_append_buffer(x, (const unsigned char*)b, l); 709 | return 0; 710 | } 711 | 712 | /* 713 | * Ext 714 | */ 715 | static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l) 716 | { 717 | if (l == 1) { 718 | unsigned char buf[2]; 719 | buf[0] = 0xd4; 720 | buf[1] = (unsigned char)typecode; 721 | msgpack_pack_append_buffer(x, buf, 2); 722 | } 723 | else if(l == 2) { 724 | unsigned char buf[2]; 725 | buf[0] = 0xd5; 726 | buf[1] = (unsigned char)typecode; 727 | msgpack_pack_append_buffer(x, buf, 2); 728 | } 729 | else if(l == 4) { 730 | unsigned char buf[2]; 731 | buf[0] = 0xd6; 732 | buf[1] = (unsigned char)typecode; 733 | msgpack_pack_append_buffer(x, buf, 2); 734 | } 735 | else if(l == 8) { 736 | unsigned char buf[2]; 737 | buf[0] = 0xd7; 738 | buf[1] = (unsigned char)typecode; 739 | msgpack_pack_append_buffer(x, buf, 2); 740 | } 741 | else if(l == 16) { 742 | unsigned char buf[2]; 743 | buf[0] = 0xd8; 744 | buf[1] = (unsigned char)typecode; 745 | msgpack_pack_append_buffer(x, buf, 2); 746 | } 747 | else if(l < 256) { 748 | unsigned char buf[3]; 749 | buf[0] = 0xc7; 750 | buf[1] = l; 751 | buf[2] = (unsigned char)typecode; 752 | msgpack_pack_append_buffer(x, buf, 3); 753 | } else if(l < 65536) { 754 | unsigned char buf[4]; 755 | buf[0] = 0xc8; 756 | _msgpack_store16(&buf[1], (uint16_t)l); 757 | buf[3] = (unsigned char)typecode; 758 | msgpack_pack_append_buffer(x, buf, 4); 759 | } else { 760 | unsigned char buf[6]; 761 | buf[0] = 0xc9; 762 | _msgpack_store32(&buf[1], (uint32_t)l); 763 | buf[5] = (unsigned char)typecode; 764 | msgpack_pack_append_buffer(x, buf, 6); 765 | } 766 | 767 | } 768 | 769 | 770 | 771 | #undef msgpack_pack_append_buffer 772 | 773 | #undef TAKE8_8 774 | #undef TAKE8_16 775 | #undef TAKE8_32 776 | #undef TAKE8_64 777 | 778 | #undef msgpack_pack_real_uint8 779 | #undef msgpack_pack_real_uint16 780 | #undef msgpack_pack_real_uint32 781 | #undef msgpack_pack_real_uint64 782 | #undef msgpack_pack_real_int8 783 | #undef msgpack_pack_real_int16 784 | #undef msgpack_pack_real_int32 785 | #undef msgpack_pack_real_int64 786 | -------------------------------------------------------------------------------- /pandas_msgpack/packers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Msgpack serializer support for reading and writing pandas data structures 3 | to disk 4 | 5 | portions of msgpack_numpy package, by Lev Givon were incorporated 6 | into this module (and tests_packers.py) 7 | 8 | License 9 | ======= 10 | 11 | Copyright (c) 2013, Lev Givon. 12 | All rights reserved. 13 | 14 | Redistribution and use in source and binary forms, with or without 15 | modification, are permitted provided that the following conditions are 16 | met: 17 | 18 | * Redistributions of source code must retain the above copyright 19 | notice, this list of conditions and the following disclaimer. 20 | * Redistributions in binary form must reproduce the above 21 | copyright notice, this list of conditions and the following 22 | disclaimer in the documentation and/or other materials provided 23 | with the distribution. 24 | * Neither the name of Lev Givon nor the names of any 25 | contributors may be used to endorse or promote products derived 26 | from this software without specific prior written permission. 27 | 28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 29 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 30 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 31 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 32 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 33 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 34 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 38 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 | """ 40 | 41 | from datetime import datetime, date, timedelta 42 | from dateutil.parser import parse 43 | import os 44 | from textwrap import dedent 45 | import warnings 46 | 47 | import numpy as np 48 | from pandas import compat 49 | from pandas.compat import u, u_safe 50 | 51 | from pandas.types.common import (is_categorical_dtype, is_object_dtype, 52 | needs_i8_conversion, pandas_dtype) 53 | 54 | from pandas import (Timestamp, Period, Series, DataFrame, # noqa 55 | Index, MultiIndex, Float64Index, Int64Index, 56 | Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, 57 | Categorical, CategoricalIndex) 58 | from pandas.sparse.api import SparseSeries, SparseDataFrame 59 | from pandas.sparse.array import BlockIndex, IntIndex 60 | from pandas.core.generic import NDFrame 61 | from pandas.core.common import PerformanceWarning 62 | from pandas.io.common import get_filepath_or_buffer 63 | from pandas.core.internals import BlockManager, make_block, _safe_reshape 64 | import pandas.core.internals as internals 65 | 66 | from pandas_msgpack import _is_pandas_legacy_version 67 | from pandas_msgpack.msgpack import (Unpacker as _Unpacker, 68 | Packer as _Packer, 69 | ExtType) 70 | from pandas_msgpack._move import ( 71 | BadMove as _BadMove, 72 | move_into_mutable_buffer as _move_into_mutable_buffer, 73 | ) 74 | 75 | NaTType = type(NaT) 76 | 77 | # check which compression libs we have installed 78 | try: 79 | import zlib 80 | 81 | def _check_zlib(): 82 | pass 83 | except ImportError: 84 | def _check_zlib(): 85 | raise ImportError('zlib is not installed') 86 | 87 | _check_zlib.__doc__ = dedent( 88 | """\ 89 | Check if zlib is installed. 90 | 91 | Raises 92 | ------ 93 | ImportError 94 | Raised when zlib is not installed. 95 | """, 96 | ) 97 | 98 | try: 99 | import blosc 100 | 101 | def _check_blosc(): 102 | pass 103 | except ImportError: 104 | def _check_blosc(): 105 | raise ImportError('blosc is not installed') 106 | 107 | _check_blosc.__doc__ = dedent( 108 | """\ 109 | Check if blosc is installed. 110 | 111 | Raises 112 | ------ 113 | ImportError 114 | Raised when blosc is not installed. 115 | """, 116 | ) 117 | 118 | # until we can pass this into our conversion functions, 119 | # this is pretty hacky 120 | compressor = None 121 | 122 | 123 | def to_msgpack(path_or_buf, *args, **kwargs): 124 | """ 125 | msgpack (serialize) object to input file path 126 | 127 | Parameters 128 | ---------- 129 | path_or_buf : string File path, buffer-like, or None 130 | if None, return generated string 131 | args : an object or objects to serialize 132 | encoding: encoding for unicode objects 133 | append : boolean whether to append to an existing msgpack 134 | (default is False) 135 | compress : type of compressor (zlib or blosc), default to None (no 136 | compression) 137 | """ 138 | global compressor 139 | compressor = kwargs.pop('compress', None) 140 | if compressor: 141 | compressor = u(compressor) 142 | append = kwargs.pop('append', None) 143 | if append: 144 | mode = 'a+b' 145 | else: 146 | mode = 'wb' 147 | 148 | def writer(fh): 149 | for a in args: 150 | fh.write(pack(a, **kwargs)) 151 | 152 | if isinstance(path_or_buf, compat.string_types): 153 | with open(path_or_buf, mode) as fh: 154 | writer(fh) 155 | elif path_or_buf is None: 156 | buf = compat.BytesIO() 157 | writer(buf) 158 | return buf.getvalue() 159 | else: 160 | writer(path_or_buf) 161 | 162 | 163 | def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): 164 | """ 165 | Load msgpack pandas object from the specified 166 | file path 167 | 168 | Parameters 169 | ---------- 170 | path_or_buf : string File path, BytesIO like or string 171 | encoding: Encoding for decoding msgpack str type 172 | iterator : boolean, if True, return an iterator to the unpacker 173 | (default is False) 174 | 175 | Returns 176 | ------- 177 | obj : type of object stored in file 178 | 179 | """ 180 | path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf) 181 | if iterator: 182 | return Iterator(path_or_buf) 183 | 184 | def read(fh): 185 | l = list(unpack(fh, encoding=encoding, **kwargs)) 186 | if len(l) == 1: 187 | return l[0] 188 | return l 189 | 190 | # see if we have an actual file 191 | if isinstance(path_or_buf, compat.string_types): 192 | 193 | try: 194 | exists = os.path.exists(path_or_buf) 195 | except (TypeError, ValueError): 196 | exists = False 197 | 198 | if exists: 199 | with open(path_or_buf, 'rb') as fh: 200 | return read(fh) 201 | 202 | # treat as a binary-like 203 | if isinstance(path_or_buf, compat.binary_type): 204 | fh = None 205 | try: 206 | fh = compat.BytesIO(path_or_buf) 207 | return read(fh) 208 | finally: 209 | if fh is not None: 210 | fh.close() 211 | 212 | # a buffer like 213 | if hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): 214 | return read(path_or_buf) 215 | 216 | raise ValueError('path_or_buf needs to be a string file path or file-like') 217 | 218 | 219 | dtype_dict = {21: np.dtype('M8[ns]'), 220 | u('datetime64[ns]'): np.dtype('M8[ns]'), 221 | u('datetime64[us]'): np.dtype('M8[us]'), 222 | 22: np.dtype('m8[ns]'), 223 | u('timedelta64[ns]'): np.dtype('m8[ns]'), 224 | u('timedelta64[us]'): np.dtype('m8[us]'), 225 | 226 | # this is platform int, which we need to remap to np.int64 227 | # for compat on windows platforms 228 | 7: np.dtype('int64'), 229 | 'category': 'category' 230 | } 231 | 232 | 233 | def dtype_for(t): 234 | """ return my dtype mapping, whether number or name """ 235 | if t in dtype_dict: 236 | return dtype_dict[t] 237 | return np.typeDict.get(t, t) 238 | 239 | 240 | c2f_dict = {'complex': np.float64, 241 | 'complex128': np.float64, 242 | 'complex64': np.float32} 243 | 244 | # numpy 1.6.1 compat 245 | if hasattr(np, 'float128'): 246 | c2f_dict['complex256'] = np.float128 247 | 248 | 249 | def c2f(r, i, ctype_name): 250 | """ 251 | Convert strings to complex number instance with specified numpy type. 252 | """ 253 | 254 | ftype = c2f_dict[ctype_name] 255 | return np.typeDict[ctype_name](ftype(r) + 1j * ftype(i)) 256 | 257 | 258 | def convert(values): 259 | """ convert the numpy values to a list """ 260 | 261 | dtype = values.dtype 262 | 263 | if is_categorical_dtype(values): 264 | return values 265 | 266 | elif is_object_dtype(dtype): 267 | return values.ravel().tolist() 268 | 269 | if needs_i8_conversion(dtype): 270 | values = values.view('i8') 271 | v = values.ravel() 272 | 273 | if compressor == 'zlib': 274 | _check_zlib() 275 | 276 | # return string arrays like they are 277 | if dtype == np.object_: 278 | return v.tolist() 279 | 280 | # convert to a bytes array 281 | v = v.tostring() 282 | return ExtType(0, zlib.compress(v)) 283 | 284 | elif compressor == 'blosc': 285 | _check_blosc() 286 | 287 | # return string arrays like they are 288 | if dtype == np.object_: 289 | return v.tolist() 290 | 291 | # convert to a bytes array 292 | v = v.tostring() 293 | return ExtType(0, blosc.compress(v, typesize=dtype.itemsize)) 294 | 295 | # ndarray (on original dtype) 296 | return ExtType(0, v.tostring()) 297 | 298 | 299 | def unconvert(values, dtype, compress=None): 300 | 301 | as_is_ext = isinstance(values, ExtType) and values.code == 0 302 | 303 | if as_is_ext: 304 | values = values.data 305 | 306 | if is_categorical_dtype(dtype): 307 | return values 308 | 309 | elif is_object_dtype(dtype): 310 | return np.array(values, dtype=object) 311 | 312 | dtype = pandas_dtype(dtype).base 313 | 314 | if not as_is_ext: 315 | values = values.encode('latin1') 316 | 317 | if compress: 318 | if compress == u'zlib': 319 | _check_zlib() 320 | decompress = zlib.decompress 321 | elif compress == u'blosc': 322 | _check_blosc() 323 | decompress = blosc.decompress 324 | else: 325 | raise ValueError("compress must be one of 'zlib' or 'blosc'") 326 | 327 | try: 328 | return np.frombuffer( 329 | _move_into_mutable_buffer(decompress(values)), 330 | dtype=dtype, 331 | ) 332 | except _BadMove as e: 333 | # Pull the decompressed data off of the `_BadMove` exception. 334 | # We don't just store this in the locals because we want to 335 | # minimize the risk of giving users access to a `bytes` object 336 | # whose data is also given to a mutable buffer. 337 | values = e.args[0] 338 | if len(values) > 1: 339 | # The empty string and single characters are memoized in many 340 | # string creating functions in the capi. This case should not 341 | # warn even though we need to make a copy because we are only 342 | # copying at most 1 byte. 343 | warnings.warn( 344 | 'copying data after decompressing; this may mean that' 345 | ' decompress is caching its result', 346 | PerformanceWarning, 347 | ) 348 | # fall through to copying `np.fromstring` 349 | 350 | # Copy the string into a numpy array. 351 | return np.fromstring(values, dtype=dtype) 352 | 353 | 354 | def encode(obj): 355 | """ 356 | Data encoder 357 | """ 358 | tobj = type(obj) 359 | if isinstance(obj, Index): 360 | if isinstance(obj, RangeIndex): 361 | return {u'typ': u'range_index', 362 | u'klass': u(obj.__class__.__name__), 363 | u'name': getattr(obj, 'name', None), 364 | u'start': getattr(obj, '_start', None), 365 | u'stop': getattr(obj, '_stop', None), 366 | u'step': getattr(obj, '_step', None)} 367 | elif isinstance(obj, PeriodIndex): 368 | return {u'typ': u'period_index', 369 | u'klass': u(obj.__class__.__name__), 370 | u'name': getattr(obj, 'name', None), 371 | u'freq': u_safe(getattr(obj, 'freqstr', None)), 372 | u'dtype': u(obj.dtype.name), 373 | u'data': convert(obj.asi8), 374 | u'compress': compressor} 375 | elif isinstance(obj, DatetimeIndex): 376 | tz = getattr(obj, 'tz', None) 377 | 378 | # store tz info and data as UTC 379 | if tz is not None: 380 | tz = u(tz.zone) 381 | obj = obj.tz_convert('UTC') 382 | return {u'typ': u'datetime_index', 383 | u'klass': u(obj.__class__.__name__), 384 | u'name': getattr(obj, 'name', None), 385 | u'dtype': u(obj.dtype.name), 386 | u'data': convert(obj.asi8), 387 | u'freq': u_safe(getattr(obj, 'freqstr', None)), 388 | u'tz': tz, 389 | u'compress': compressor} 390 | elif isinstance(obj, MultiIndex): 391 | return {u'typ': u'multi_index', 392 | u'klass': u(obj.__class__.__name__), 393 | u'names': getattr(obj, 'names', None), 394 | u'dtype': u(obj.dtype.name), 395 | u'data': convert(obj.values), 396 | u'compress': compressor} 397 | else: 398 | return {u'typ': u'index', 399 | u'klass': u(obj.__class__.__name__), 400 | u'name': getattr(obj, 'name', None), 401 | u'dtype': u(obj.dtype.name), 402 | u'data': convert(obj.values), 403 | u'compress': compressor} 404 | 405 | elif isinstance(obj, Categorical): 406 | return {u'typ': u'category', 407 | u'klass': u(obj.__class__.__name__), 408 | u'name': getattr(obj, 'name', None), 409 | u'codes': obj.codes, 410 | u'categories': obj.categories, 411 | u'ordered': obj.ordered, 412 | u'compress': compressor} 413 | 414 | elif isinstance(obj, Series): 415 | if isinstance(obj, SparseSeries): 416 | raise NotImplementedError( 417 | 'msgpack sparse series is not implemented' 418 | ) 419 | # d = {'typ': 'sparse_series', 420 | # 'klass': obj.__class__.__name__, 421 | # 'dtype': obj.dtype.name, 422 | # 'index': obj.index, 423 | # 'sp_index': obj.sp_index, 424 | # 'sp_values': convert(obj.sp_values), 425 | # 'compress': compressor} 426 | # for f in ['name', 'fill_value', 'kind']: 427 | # d[f] = getattr(obj, f, None) 428 | # return d 429 | else: 430 | return {u'typ': u'series', 431 | u'klass': u(obj.__class__.__name__), 432 | u'name': getattr(obj, 'name', None), 433 | u'index': obj.index, 434 | u'dtype': u(obj.dtype.name), 435 | u'data': convert(obj.values), 436 | u'compress': compressor} 437 | elif issubclass(tobj, NDFrame): 438 | if isinstance(obj, SparseDataFrame): 439 | raise NotImplementedError( 440 | 'msgpack sparse frame is not implemented' 441 | ) 442 | # d = {'typ': 'sparse_dataframe', 443 | # 'klass': obj.__class__.__name__, 444 | # 'columns': obj.columns} 445 | # for f in ['default_fill_value', 'default_kind']: 446 | # d[f] = getattr(obj, f, None) 447 | # d['data'] = dict([(name, ss) 448 | # for name, ss in compat.iteritems(obj)]) 449 | # return d 450 | else: 451 | 452 | data = obj._data 453 | if not data.is_consolidated(): 454 | data = data.consolidate() 455 | 456 | # the block manager 457 | return {u'typ': u'block_manager', 458 | u'klass': u(obj.__class__.__name__), 459 | u'axes': data.axes, 460 | u'blocks': [{u'locs': b.mgr_locs.as_array, 461 | u'values': convert(b.values), 462 | u'shape': b.values.shape, 463 | u'dtype': u(b.dtype.name), 464 | u'klass': u(b.__class__.__name__), 465 | u'compress': compressor} for b in data.blocks] 466 | } 467 | 468 | elif isinstance(obj, (datetime, date, np.datetime64, timedelta, 469 | np.timedelta64, NaTType)): 470 | if isinstance(obj, Timestamp): 471 | tz = obj.tzinfo 472 | if tz is not None: 473 | tz = u(tz.zone) 474 | freq = obj.freq 475 | if freq is not None: 476 | freq = u(freq.freqstr) 477 | return {u'typ': u'timestamp', 478 | u'value': obj.value, 479 | u'freq': freq, 480 | u'tz': tz} 481 | if isinstance(obj, NaTType): 482 | return {u'typ': u'nat'} 483 | elif isinstance(obj, np.timedelta64): 484 | return {u'typ': u'timedelta64', 485 | u'data': obj.view('i8')} 486 | elif isinstance(obj, timedelta): 487 | return {u'typ': u'timedelta', 488 | u'data': (obj.days, obj.seconds, obj.microseconds)} 489 | elif isinstance(obj, np.datetime64): 490 | return {u'typ': u'datetime64', 491 | u'data': u(str(obj))} 492 | elif isinstance(obj, datetime): 493 | return {u'typ': u'datetime', 494 | u'data': u(obj.isoformat())} 495 | elif isinstance(obj, date): 496 | return {u'typ': u'date', 497 | u'data': u(obj.isoformat())} 498 | raise Exception("cannot encode this datetimelike object: %s" % obj) 499 | elif isinstance(obj, Period): 500 | return {u'typ': u'period', 501 | u'ordinal': obj.ordinal, 502 | u'freq': u(obj.freq)} 503 | elif isinstance(obj, BlockIndex): 504 | return {u'typ': u'block_index', 505 | u'klass': u(obj.__class__.__name__), 506 | u'blocs': obj.blocs, 507 | u'blengths': obj.blengths, 508 | u'length': obj.length} 509 | elif isinstance(obj, IntIndex): 510 | return {u'typ': u'int_index', 511 | u'klass': u(obj.__class__.__name__), 512 | u'indices': obj.indices, 513 | u'length': obj.length} 514 | elif isinstance(obj, np.ndarray): 515 | return {u'typ': u'ndarray', 516 | u'shape': obj.shape, 517 | u'ndim': obj.ndim, 518 | u'dtype': u(obj.dtype.name), 519 | u'data': convert(obj), 520 | u'compress': compressor} 521 | elif isinstance(obj, np.number): 522 | if np.iscomplexobj(obj): 523 | return {u'typ': u'np_scalar', 524 | u'sub_typ': u'np_complex', 525 | u'dtype': u(obj.dtype.name), 526 | u'real': u(obj.real.__repr__()), 527 | u'imag': u(obj.imag.__repr__())} 528 | else: 529 | return {u'typ': u'np_scalar', 530 | u'dtype': u(obj.dtype.name), 531 | u'data': u(obj.__repr__())} 532 | elif isinstance(obj, complex): 533 | return {u'typ': u'np_complex', 534 | u'real': u(obj.real.__repr__()), 535 | u'imag': u(obj.imag.__repr__())} 536 | 537 | return obj 538 | 539 | 540 | def decode(obj): 541 | """ 542 | Decoder for deserializing numpy data types. 543 | """ 544 | 545 | typ = obj.get(u'typ') 546 | if typ is None: 547 | return obj 548 | elif typ == u'timestamp': 549 | freq = obj[u'freq'] if 'freq' in obj else obj[u'offset'] 550 | return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq) 551 | elif typ == u'nat': 552 | return NaT 553 | elif typ == u'period': 554 | return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq']) 555 | elif typ == u'index': 556 | dtype = dtype_for(obj[u'dtype']) 557 | data = unconvert(obj[u'data'], dtype, 558 | obj.get(u'compress')) 559 | return globals()[obj[u'klass']](data, dtype=dtype, name=obj[u'name']) 560 | elif typ == u'range_index': 561 | return globals()[obj[u'klass']](obj[u'start'], 562 | obj[u'stop'], 563 | obj[u'step'], 564 | name=obj[u'name']) 565 | elif typ == u'multi_index': 566 | dtype = dtype_for(obj[u'dtype']) 567 | data = unconvert(obj[u'data'], dtype, 568 | obj.get(u'compress')) 569 | data = [tuple(x) for x in data] 570 | return globals()[obj[u'klass']].from_tuples(data, names=obj[u'names']) 571 | elif typ == u'period_index': 572 | data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) 573 | d = dict(name=obj[u'name'], freq=obj[u'freq']) 574 | if _is_pandas_legacy_version: 575 | # legacy 576 | return globals()[obj[u'klass']](data, **d) 577 | else: 578 | return globals()[obj[u'klass']]._from_ordinals(data, **d) 579 | elif typ == u'datetime_index': 580 | data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) 581 | d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False) 582 | result = globals()[obj[u'klass']](data, **d) 583 | tz = obj[u'tz'] 584 | 585 | # reverse tz conversion 586 | if tz is not None: 587 | result = result.tz_localize('UTC').tz_convert(tz) 588 | return result 589 | 590 | elif typ == u'category': 591 | from_codes = globals()[obj[u'klass']].from_codes 592 | return from_codes(codes=obj[u'codes'], 593 | categories=obj[u'categories'], 594 | ordered=obj[u'ordered']) 595 | 596 | elif typ == u'series': 597 | dtype = dtype_for(obj[u'dtype']) 598 | pd_dtype = pandas_dtype(dtype) 599 | 600 | index = obj[u'index'] 601 | result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype, 602 | obj[u'compress']), 603 | index=index, 604 | dtype=pd_dtype, 605 | name=obj[u'name']) 606 | return result 607 | 608 | elif typ == u'block_manager': 609 | axes = obj[u'axes'] 610 | 611 | def create_block(b): 612 | values = _safe_reshape(unconvert( 613 | b[u'values'], dtype_for(b[u'dtype']), 614 | b[u'compress']), b[u'shape']) 615 | 616 | # locs handles duplicate column names, and should be used instead 617 | # of items; see GH 9618 618 | if u'locs' in b: 619 | placement = b[u'locs'] 620 | else: 621 | placement = axes[0].get_indexer(b[u'items']) 622 | return make_block(values=values, 623 | klass=getattr(internals, b[u'klass']), 624 | placement=placement, 625 | dtype=b[u'dtype']) 626 | 627 | blocks = [create_block(b) for b in obj[u'blocks']] 628 | return globals()[obj[u'klass']](BlockManager(blocks, axes)) 629 | elif typ == u'datetime': 630 | return parse(obj[u'data']) 631 | elif typ == u'datetime64': 632 | return np.datetime64(parse(obj[u'data'])) 633 | elif typ == u'date': 634 | return parse(obj[u'data']).date() 635 | elif typ == u'timedelta': 636 | return timedelta(*obj[u'data']) 637 | elif typ == u'timedelta64': 638 | return np.timedelta64(int(obj[u'data'])) 639 | # elif typ == 'sparse_series': 640 | # dtype = dtype_for(obj['dtype']) 641 | # return globals()[obj['klass']]( 642 | # unconvert(obj['sp_values'], dtype, obj['compress']), 643 | # sparse_index=obj['sp_index'], index=obj['index'], 644 | # fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name']) 645 | # elif typ == 'sparse_dataframe': 646 | # return globals()[obj['klass']]( 647 | # obj['data'], columns=obj['columns'], 648 | # default_fill_value=obj['default_fill_value'], 649 | # default_kind=obj['default_kind'] 650 | # ) 651 | # elif typ == 'sparse_panel': 652 | # return globals()[obj['klass']]( 653 | # obj['data'], items=obj['items'], 654 | # default_fill_value=obj['default_fill_value'], 655 | # default_kind=obj['default_kind']) 656 | elif typ == u'block_index': 657 | return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'], 658 | obj[u'blengths']) 659 | elif typ == u'int_index': 660 | return globals()[obj[u'klass']](obj[u'length'], obj[u'indices']) 661 | elif typ == u'ndarray': 662 | return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']], 663 | obj.get(u'compress')).reshape(obj[u'shape']) 664 | elif typ == u'np_scalar': 665 | if obj.get(u'sub_typ') == u'np_complex': 666 | return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype']) 667 | else: 668 | dtype = dtype_for(obj[u'dtype']) 669 | try: 670 | return dtype(obj[u'data']) 671 | except: 672 | return dtype.type(obj[u'data']) 673 | elif typ == u'np_complex': 674 | return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j') 675 | elif isinstance(obj, (dict, list, set)): 676 | return obj 677 | else: 678 | return obj 679 | 680 | 681 | def pack(o, default=encode, 682 | encoding='utf-8', unicode_errors='strict', use_single_float=False, 683 | autoreset=1, use_bin_type=1): 684 | """ 685 | Pack an object and return the packed bytes. 686 | """ 687 | 688 | return Packer(default=default, encoding=encoding, 689 | unicode_errors=unicode_errors, 690 | use_single_float=use_single_float, 691 | autoreset=autoreset, 692 | use_bin_type=use_bin_type).pack(o) 693 | 694 | 695 | def unpack(packed, object_hook=decode, 696 | list_hook=None, use_list=False, encoding='utf-8', 697 | unicode_errors='strict', object_pairs_hook=None, 698 | max_buffer_size=0, ext_hook=ExtType): 699 | """ 700 | Unpack a packed object, return an iterator 701 | Note: packed lists will be returned as tuples 702 | """ 703 | 704 | return Unpacker(packed, object_hook=object_hook, 705 | list_hook=list_hook, 706 | use_list=use_list, encoding=encoding, 707 | unicode_errors=unicode_errors, 708 | object_pairs_hook=object_pairs_hook, 709 | max_buffer_size=max_buffer_size, 710 | ext_hook=ext_hook) 711 | 712 | 713 | class Packer(_Packer): 714 | 715 | def __init__(self, default=encode, 716 | encoding='utf-8', 717 | unicode_errors='strict', 718 | use_single_float=False, 719 | autoreset=1, 720 | use_bin_type=1): 721 | super(Packer, self).__init__(default=default, 722 | encoding=encoding, 723 | unicode_errors=unicode_errors, 724 | use_single_float=use_single_float, 725 | autoreset=autoreset, 726 | use_bin_type=use_bin_type) 727 | 728 | 729 | class Unpacker(_Unpacker): 730 | 731 | def __init__(self, file_like=None, read_size=0, use_list=False, 732 | object_hook=decode, 733 | object_pairs_hook=None, list_hook=None, encoding='utf-8', 734 | unicode_errors='strict', max_buffer_size=0, ext_hook=ExtType): 735 | super(Unpacker, self).__init__(file_like=file_like, 736 | read_size=read_size, 737 | use_list=use_list, 738 | object_hook=object_hook, 739 | object_pairs_hook=object_pairs_hook, 740 | list_hook=list_hook, 741 | encoding=encoding, 742 | unicode_errors=unicode_errors, 743 | max_buffer_size=max_buffer_size, 744 | ext_hook=ext_hook) 745 | 746 | 747 | class Iterator(object): 748 | 749 | """ manage the unpacking iteration, 750 | close the file on completion """ 751 | 752 | def __init__(self, path, **kwargs): 753 | self.path = path 754 | self.kwargs = kwargs 755 | 756 | def __iter__(self): 757 | 758 | needs_closing = True 759 | try: 760 | 761 | # see if we have an actual file 762 | if isinstance(self.path, compat.string_types): 763 | 764 | try: 765 | path_exists = os.path.exists(self.path) 766 | except TypeError: 767 | path_exists = False 768 | 769 | if path_exists: 770 | fh = open(self.path, 'rb') 771 | else: 772 | fh = compat.BytesIO(self.path) 773 | 774 | else: 775 | 776 | if not hasattr(self.path, 'read'): 777 | fh = compat.BytesIO(self.path) 778 | 779 | else: 780 | 781 | # a file-like 782 | needs_closing = False 783 | fh = self.path 784 | 785 | unpacker = unpack(fh) 786 | for o in unpacker: 787 | yield o 788 | finally: 789 | if needs_closing: 790 | fh.close() 791 | --------------------------------------------------------------------------------