├── pandas_msgpack
├── tests
│ └── __init__.py
├── msgpack
│ ├── _version.py
│ ├── exceptions.py
│ ├── __init__.py
│ ├── _packer.pyx
│ └── _unpacker.pyx
├── __init__.py
├── includes
│ ├── unpack_define.h
│ ├── pack.h
│ ├── sysdep.h
│ ├── unpack.h
│ ├── unpack_template.h
│ └── pack_template.h
├── move.c
├── _version.py
└── packers.py
├── requirements.txt
├── ci
├── requirements-2.7.pip
├── requirements-3.6.pip
├── install_travis.sh
├── install.ps1
└── run_with_env.cmd
├── test.sh
├── .gitattributes
├── test.bat
├── docs
├── source
│ ├── _static
│ │ └── style.css
│ ├── _templates
│ │ └── layout.html
│ ├── api.rst
│ ├── read_write.rst
│ ├── changelog.rst
│ ├── install.rst
│ ├── index.rst
│ ├── tutorial.rst
│ ├── compression.rst
│ ├── Makefile
│ └── conf.py
├── requirements-docs.txt
└── README.rst
├── codecov.yml
├── MANIFEST.in
├── setup.cfg
├── release-procedure.md
├── .travis.yml
├── .gitignore
├── README.rst
├── appveyor.yml
├── LICENSE.md
└── setup.py
/pandas_msgpack/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas>=0.19.2
2 |
--------------------------------------------------------------------------------
/ci/requirements-2.7.pip:
--------------------------------------------------------------------------------
1 | blosc
2 | sqlalchemy
3 |
--------------------------------------------------------------------------------
/ci/requirements-3.6.pip:
--------------------------------------------------------------------------------
1 | blosc
2 | sqlalchemy
3 |
--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | pytest pandas_msgpack "$@"
3 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | pandas_gbq/_version.py export-subst
2 |
--------------------------------------------------------------------------------
/pandas_msgpack/msgpack/_version.py:
--------------------------------------------------------------------------------
1 | version = (0, 4, 6)
2 |
--------------------------------------------------------------------------------
/test.bat:
--------------------------------------------------------------------------------
1 | :: test on windows
2 |
3 | pytest pandas_msgpack %*
4 |
--------------------------------------------------------------------------------
/docs/source/_static/style.css:
--------------------------------------------------------------------------------
1 | @import url("theme.css");
2 |
3 | a.internal em {font-style: normal}
4 |
--------------------------------------------------------------------------------
/docs/source/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 | {% set css_files = css_files + ["_static/style.css"] %}
3 |
--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | ipython
3 | numpydoc
4 | sphinx
5 | sphinx_rtd_theme
6 | pandas
7 | blosc
8 | cython
9 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 | status:
3 | project:
4 | default:
5 | target: '30'
6 | patch:
7 | default:
8 | target: '50'
9 | branches: null
10 |
--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
1 | .. currentmodule:: pandas_msgpack
2 | .. _api:
3 |
4 | *************
5 | API Reference
6 | *************
7 |
8 | .. autosummary::
9 |
10 | read_msgpack
11 | to_msgpack
12 |
13 | .. autofunction:: read_msgpack
14 | .. autofunction:: to_msgpack
15 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include MANIFEST.in
2 | include README.rst
3 | include LICENSE.md
4 | include setup.py
5 |
6 | graft pandas_msgpack
7 |
8 | global-exclude *.so
9 | global-exclude *.pyd
10 | global-exclude *.pyc
11 | global-exclude *~
12 | global-exclude \#*
13 | global-exclude .git*
14 | global-exclude .DS_Store
15 | global-exclude *.png
16 |
17 | include versioneer.py
18 | include pandas_msgpack/_version.py
19 |
--------------------------------------------------------------------------------
/docs/README.rst:
--------------------------------------------------------------------------------
1 | To build a local copy of the pandas-msgpack docs, install the programs in
2 | requirements-docs.txt and run 'make html'. If you use the conda package manager
3 | these commands suffice::
4 |
5 | git clone git@github.com:pydata/pandas-msgpack.git
6 | cd dask/docs
7 | conda create -n pandas-msgpack-docs --file requirements-docs.txt
8 | source activate pandas-msgpack-docs
9 | make html
10 | open build/html/index.html
11 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 |
2 | # See the docstring in versioneer.py for instructions. Note that you must
3 | # re-run 'versioneer.py setup' after changing this section, and commit the
4 | # resulting files.
5 |
6 | [versioneer]
7 | VCS = git
8 | style = pep440
9 | versionfile_source = pandas_msgpack/_version.py
10 | versionfile_build = pandas_msgpack/_version.py
11 | tag_prefix =
12 | parentdir_prefix = pandas_msgpack-
13 |
14 | [flake8]
15 | ignore = E731
16 |
--------------------------------------------------------------------------------
/release-procedure.md:
--------------------------------------------------------------------------------
1 | * Tag commit
2 |
3 | git tag -a x.x.x -m 'Version x.x.x'
4 |
5 | * and push to github
6 |
7 | git push origin master --tags
8 |
9 | * Upload to PyPI
10 |
11 | git clean -xfd
12 | python setup.py register sdist --formats=gztar
13 | twine upload dist/*
14 |
15 | * Do a pull-request to the feedstock on `pandas-msgpack-feedstock `__
16 |
17 | update the version
18 | update the SHA256 (retrieve from PyPI)
19 |
--------------------------------------------------------------------------------
/pandas_msgpack/msgpack/exceptions.py:
--------------------------------------------------------------------------------
1 | class UnpackException(Exception):
2 | pass
3 |
4 |
5 | class BufferFull(UnpackException):
6 | pass
7 |
8 |
9 | class OutOfData(UnpackException):
10 | pass
11 |
12 |
13 | class UnpackValueError(UnpackException, ValueError):
14 | pass
15 |
16 |
17 | class ExtraData(ValueError):
18 |
19 | def __init__(self, unpacked, extra):
20 | self.unpacked = unpacked
21 | self.extra = extra
22 |
23 | def __str__(self):
24 | return "unpack(b) received extra data."
25 |
26 |
27 | class PackException(Exception):
28 | pass
29 |
30 |
31 | class PackValueError(PackException, ValueError):
32 | pass
33 |
--------------------------------------------------------------------------------
/pandas_msgpack/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 |
3 | # pandas versioning
4 | import pandas
5 |
6 | from distutils.version import LooseVersion
7 | pv = LooseVersion(pandas.__version__)
8 |
9 | if pv < '0.19.0':
10 | raise ValueError("pandas_msgpack requires at least pandas 0.19.0")
11 | _is_pandas_legacy_version = pv.version[1] == 19 and len(pv.version) == 3
12 |
13 | from .packers import to_msgpack, read_msgpack
14 |
15 | # versioning
16 | from ._version import get_versions
17 |
18 | versions = get_versions()
19 | __version__ = versions.get('closest-tag', versions['version'])
20 | __git_revision__ = versions['full-revisionid']
21 | del get_versions, versions, pv, LooseVersion, pandas
22 |
--------------------------------------------------------------------------------
/docs/source/read_write.rst:
--------------------------------------------------------------------------------
1 | .. _read_write:
2 |
3 | .. ipython:: python
4 | :suppress:
5 |
6 | import pandas as pd
7 |
8 |
9 | Read/Write API
10 | --------------
11 |
12 | Msgpacks can also be read from and written to strings.
13 |
14 | .. ipython:: python
15 |
16 | import pandas as pd
17 | from pandas_msgpack import to_msgpack, read_msgpack
18 |
19 | df = pd.DataFrame({'A': np.arange(10),
20 | 'B': np.random.randn(10),
21 | 'C': 'foo'})
22 |
23 | to_msgpack(None, df)
24 |
25 | Furthermore you can concatenate the strings to produce a list of the original objects.
26 |
27 | .. ipython:: python
28 |
29 | read_msgpack(to_msgpack(None, df) + to_msgpack(None, df.A))
30 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | language: python
3 |
4 | env:
5 | - PYTHON=2.7 PANDAS=0.19.2
6 | - PYTHON=3.4 PANDAS=0.19.2
7 | - PYTHON=3.5 PANDAS=0.19.2 COVERAGE='true'
8 | - PYTHON=3.6 PANDAS='master' LINT='true'
9 |
10 | before_install:
11 | - echo "before_install"
12 | - export PATH="$HOME/miniconda3/bin:$PATH"
13 | - df -h
14 | - pwd
15 | - uname -a
16 | - git --version
17 | - git tag
18 |
19 | install:
20 | - ci/install_travis.sh
21 |
22 | script:
23 | - source activate test-environment && pytest -s -v --cov=pandas_msgpack --cov-report xml:/tmp/pytest-cov.xml pandas_msgpack
24 | - if [ $COVERAGE ]; then source activate test-environment && codecov ; fi
25 | - if [ $LINT ]; then source activate test-environment && flake8 pandas_msgpack ; fi
26 |
--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
1 | Changelog
2 | =========
3 |
4 | 0.1.4 / 2017-03-30
5 | ------------------
6 |
7 | Initial release of transfered code from `pandas `__
8 |
9 | Includes patches since the 0.19.2 release on pandas with the following:
10 |
11 | - Bug in ``read_msgpack()`` in which ``Series`` categoricals were being improperly processed, see `pandas-GH#14901 `__
12 | - Bug in ``read_msgpack()`` which did not allow loading of a dataframe with an index of type ``CategoricalIndex``, see `pandas-GH#15487 `__
13 | - Bug in ``read_msgpack()`` when deserializing a ``CategoricalIndex``, see `pandas-GH#15487 `__
14 |
--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 |
4 | You can install pandas-msgpack with ``conda``, ``pip``, or by installing from source.
5 |
6 | Conda
7 | -----
8 |
9 | .. code-block:: shell
10 |
11 | $ conda install pandas-msgpack --channel conda-forge
12 |
13 | This installs pandas-msgpack and all common dependencies, including ``pandas``.
14 |
15 | Pip
16 | ---
17 |
18 | To install the latest version of pandas-msgpack:
19 |
20 | .. code-block:: shell
21 |
22 | $ pip install pandas-msgpack -U
23 |
24 | This installs pandas-msgpack and all common dependencies, including ``pandas``.
25 |
26 |
27 | Install from Source
28 | -------------------
29 |
30 | .. code-block:: shell
31 |
32 | $ pip install git+https://github.com/pydata/pandas-msgpack.git
33 |
34 |
35 | Dependencies
36 | ------------
37 |
38 | - `pandas `__ ``>=0.19.2``
39 | - `blosc `__ library can be optionally installed as a compressor.
40 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. pandas-msgpack documentation master file, created by
2 | sphinx-quickstart on Wed Feb 8 10:52:12 2017.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to pandas-msgpack's documentation!
7 | ==========================================
8 |
9 | The :mod:`pandas_msgpack` module provides an interface from `pandas https://pandas.pydata.org`
10 | to the `msgpack `__ library. This is a lightweight portable
11 | binary format, similar to binary JSON, that is highly space efficient, and provides good performance
12 | both on the writing (serialization), and reading (deserialization).
13 |
14 | Contents:
15 |
16 | .. toctree::
17 | :maxdepth: 2
18 |
19 | install.rst
20 | tutorial.rst
21 | compression.rst
22 | read_write.rst
23 | api.rst
24 | changelog.rst
25 |
26 |
27 | Indices and tables
28 | ==================
29 |
30 | * :ref:`genindex`
31 | * :ref:`modindex`
32 | * :ref:`search`
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | gi#########################################
2 | # Editor temporary/working/backup files #
3 | .#*
4 | *\#*\#
5 | [#]*#
6 | *~
7 | *$
8 | *.bak
9 | *flymake*
10 | *.kdev4
11 | *.log
12 | *.swp
13 | *.pdb
14 | .project
15 | .pydevproject
16 | .settings
17 | .idea
18 | .vagrant
19 | .noseids
20 | .ipynb_checkpoints
21 | .tags
22 |
23 | # Docs #
24 | ########
25 | docs/source/_build
26 |
27 | # Coverage #
28 | ############
29 | .coverage
30 | coverage.xml
31 | coverage_html_report
32 |
33 | # Compiled source #
34 | ###################
35 | *.a
36 | *.com
37 | *.class
38 | *.dll
39 | *.exe
40 | *.pxi
41 | *.o
42 | *.py[ocd]
43 | *.so
44 | .build_cache_dir
45 | MANIFEST
46 |
47 | # Python files #
48 | ################
49 | # setup.py working directory
50 | build
51 | # setup.py dist directory
52 | dist
53 | # Egg metadata
54 | *.egg-info
55 | .eggs
56 | .pypirc
57 |
58 | # tox testing tool
59 | .tox
60 | # rope
61 | .ropeproject
62 | # wheel files
63 | *.whl
64 | **/wheelhouse/*
65 | # coverage
66 | .coverage
67 |
68 | # OS generated files #
69 | ######################
70 | .directory
71 | .gdb_history
72 | .DS_Store
73 | ehthumbs.db
74 | Icon?
75 | Thumbs.db
76 |
77 | # caches #
78 | .cache
79 |
--------------------------------------------------------------------------------
/ci/install_travis.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # install miniconda
4 | MINICONDA_DIR="$HOME/miniconda3"
5 |
6 | if [ -d "$MINICONDA_DIR" ]; then
7 | rm -rf "$MINICONDA_DIR"
8 | fi
9 |
10 | # install miniconda
11 | if [ "${TRAVIS_OS_NAME}" == "osx" ]; then
12 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1
13 | else
14 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1
15 | fi
16 | bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1
17 |
18 | conda config --set ssl_verify false || exit 1
19 | conda config --set always_yes true --set changeps1 false || exit 1
20 | conda update -q conda
21 |
22 | conda info -a || exit 1
23 |
24 | conda create -n test-environment python=$PYTHON cython pytest
25 | source activate test-environment
26 |
27 | pip install coverage pytest-cov flake8 codecov
28 | if [ $PANDAS == 'master' ]; then
29 |
30 | echo "installing deps"
31 | pip install numpy pytz python-dateutil
32 |
33 | echo "installing pandas master wheel"
34 | PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com"
35 | pip install --pre --timeout=60 -f $PRE_WHEELS pandas==0.19.0+699.gecaeea1
36 |
37 | else
38 | conda install pandas=$PANDAS
39 | fi
40 |
41 | REQ="ci/requirements-${PYTHON}.pip"
42 | if [ -e $REQ ]; then
43 | pip install -r $REQ;
44 | fi
45 |
46 | conda list
47 | python setup.py develop
48 |
--------------------------------------------------------------------------------
/pandas_msgpack/msgpack/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from collections import namedtuple
4 |
5 | from pandas_msgpack.msgpack.exceptions import * # noqa
6 | from pandas_msgpack.msgpack._version import version # noqa
7 |
8 |
9 | class ExtType(namedtuple('ExtType', 'code data')):
10 | """ExtType represents ext type in msgpack."""
11 | def __new__(cls, code, data):
12 | if not isinstance(code, int):
13 | raise TypeError("code must be int")
14 | if not isinstance(data, bytes):
15 | raise TypeError("data must be bytes")
16 | if not 0 <= code <= 127:
17 | raise ValueError("code must be 0~127")
18 | return super(ExtType, cls).__new__(cls, code, data)
19 |
20 | import os # noqa
21 |
22 | from pandas_msgpack.msgpack._packer import Packer # noqa
23 | from pandas_msgpack.msgpack._unpacker import unpack, unpackb, Unpacker # noqa
24 |
25 |
26 | def pack(o, stream, **kwargs):
27 | """
28 | Pack object `o` and write it to `stream`
29 |
30 | See :class:`Packer` for options.
31 | """
32 | packer = Packer(**kwargs)
33 | stream.write(packer.pack(o))
34 |
35 |
36 | def packb(o, **kwargs):
37 | """
38 | Pack object `o` and return packed bytes
39 |
40 | See :class:`Packer` for options.
41 | """
42 | return Packer(**kwargs).pack(o)
43 |
44 |
45 | # alias for compatibility to simplejson/marshal/pickle.
46 | load = unpack
47 | loads = unpackb
48 |
49 | dump = pack
50 | dumps = packb
51 |
--------------------------------------------------------------------------------
/docs/source/tutorial.rst:
--------------------------------------------------------------------------------
1 | .. _tutorial:
2 |
3 | .. ipython:: python
4 | :suppress:
5 |
6 | import pandas as pd
7 | import os
8 |
9 | Tutorial
10 | --------
11 |
12 | .. ipython:: python
13 |
14 | import pandas as pd
15 | from pandas_msgpack import to_msgpack, read_msgpack
16 |
17 | .. ipython:: python
18 |
19 | df = pd.DataFrame(np.random.rand(5,2), columns=list('AB'))
20 | to_msgpack('foo.msg', df)
21 | read_msgpack('foo.msg')
22 | s = pd.Series(np.random.rand(5),index=pd.date_range('20130101',periods=5))
23 |
24 | You can pass a list of objects and you will receive them back on deserialization.
25 |
26 | .. ipython:: python
27 |
28 | to_msgpack('foo.msg', df, 'foo', np.array([1,2,3]), s)
29 | read_msgpack('foo.msg')
30 |
31 | You can pass ``iterator=True`` to iterate over the unpacked results
32 |
33 | .. ipython:: python
34 |
35 | for o in read_msgpack('foo.msg',iterator=True):
36 | print(o)
37 |
38 | You can pass ``append=True`` to the writer to append to an existing pack
39 |
40 | .. ipython:: python
41 |
42 | to_msgpack('foo.msg', df, append=True)
43 | read_msgpack('foo.msg')
44 |
45 | Furthermore you can pass in arbitrary python objects.
46 |
47 | .. ipython:: python
48 |
49 | to_msgpack('foo2.msg', { 'dict' : [ { 'df' : df }, { 'string' : 'foo' }, { 'scalar' : 1. }, { 's' : s } ] })
50 | read_msgpack('foo2.msg')
51 |
52 | .. ipython:: python
53 | :suppress:
54 | :okexcept:
55 |
56 | import os
57 | os.remove('foo.msg')
58 | os.remove('foo2.msg')
59 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | pandas-msgpack
2 | ==============
3 |
4 | THIS LIBRARY IS NO LONGER IN DEVELOPMENT OR MAINTAINED
5 | ------------------------------------------------------
6 |
7 | |Travis Build Status| |Appveyor Build Status| |Version Status| |Coverage Status|
8 |
9 | **pandas-msgpack** is a package providing an interface to msgpack from pandas
10 |
11 |
12 | Installation
13 | ------------
14 |
15 |
16 | Install latest release version via conda
17 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18 |
19 | .. code-block:: shell
20 |
21 | $ conda install pandas-msgpack --channel conda-forge
22 |
23 | Install latest release version via pip
24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
25 |
26 | .. code-block:: shell
27 |
28 | $ pip install pandas-msgpack
29 |
30 | Install latest development version
31 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
32 |
33 | .. code-block:: shell
34 |
35 | $ pip install git+https://github.com/pydata/pandas-msgpack.git
36 |
37 |
38 | Usage
39 | -----
40 |
41 | See the `pandas-msgpack documentation `_ for more details.
42 |
43 | .. |Travis Build Status| image:: https://travis-ci.org/pydata/pandas-msgpack.svg?branch=master
44 | :target: https://travis-ci.org/pydata/pandas-msgpack
45 | .. |Appveyor Build Status| image:: https://ci.appveyor.com/api/projects/status/5716aqchorgwmwxf/branch/master?svg=true
46 | :target: https://ci.appveyor.com/project/jreback/pandas-msgpack
47 | .. |Version Status| image:: https://img.shields.io/pypi/v/pandas-msgpack.svg
48 | :target: https://pypi.python.org/pypi/pandas-msgpack/
49 | .. |Coverage Status| image:: https://img.shields.io/codecov/c/github/pydata/pandas-msgpack.svg
50 | :target: https://codecov.io/gh/pydata/pandas-msgpack/
51 |
--------------------------------------------------------------------------------
/docs/source/compression.rst:
--------------------------------------------------------------------------------
1 | .. _compression:
2 |
3 | .. ipython:: python
4 | :suppress:
5 |
6 | import pandas as pd
7 | import os
8 |
9 | Compression
10 | -----------
11 |
12 | Optionally, a ``compression`` argument will compress the resulting bytes.
13 | These can take a bit more time to write. The available compressors are
14 | ``zlib`` and `blosc `__.
15 |
16 | Generally compression will increase the writing time.
17 |
18 | .. ipython:: python
19 |
20 | import pandas as pd
21 | from pandas_msgpack import to_msgpack, read_msgpack
22 |
23 | df = pd.DataFrame({'A': np.arange(100000),
24 | 'B': np.random.randn(100000),
25 | 'C': 'foo'})
26 |
27 | .. ipython:: python
28 |
29 | %timeit -n 1 -r 1 to_msgpack('uncompressed.msg', df)
30 |
31 | .. ipython:: python
32 |
33 | %timeit -n 1 -r 1 to_msgpack('compressed_blosc.msg', df, compress='blosc')
34 |
35 | .. ipython:: python
36 |
37 | %timeit -n 1 -r 1 to_msgpack('compressed_zlib.msg', df, compress='zlib')
38 |
39 | If compressed, it will be be automatically inferred and de-compressed upon reading.
40 |
41 | .. ipython:: python
42 |
43 | %timeit -n 1 -r 1 read_msgpack('uncompressed.msg')
44 |
45 | .. ipython:: python
46 |
47 | %timeit -n 1 -r 1 read_msgpack('compressed_blosc.msg')
48 |
49 | .. ipython:: python
50 |
51 | %timeit -n 1 -r 1 read_msgpack('compressed_zlib.msg')
52 |
53 | These can provide storage space savings.
54 |
55 | .. ipython:: python
56 |
57 | !ls -ltr *.msg
58 |
59 | .. ipython:: python
60 | :suppress:
61 | :okexcept:
62 |
63 | os.remove('uncompressed.msg')
64 | os.remove('compressed_blosc.msg')
65 | os.remove('compressed_zlib.msg')
66 |
--------------------------------------------------------------------------------
/pandas_msgpack/includes/unpack_define.h:
--------------------------------------------------------------------------------
1 | /*
2 | * MessagePack unpacking routine template
3 | *
4 | * Copyright (C) 2008-2010 FURUHASHI Sadayuki
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | #ifndef MSGPACK_UNPACK_DEFINE_H__
19 | #define MSGPACK_UNPACK_DEFINE_H__
20 |
21 | #include "sysdep.h"
22 | #include
23 | #include
24 | #include
25 | #include
26 |
27 | #ifdef __cplusplus
28 | extern "C" {
29 | #endif
30 |
31 |
32 | #ifndef MSGPACK_EMBED_STACK_SIZE
33 | #define MSGPACK_EMBED_STACK_SIZE 32
34 | #endif
35 |
36 |
37 | // CS is first byte & 0x1f
38 | typedef enum {
39 | CS_HEADER = 0x00, // nil
40 |
41 | //CS_ = 0x01,
42 | //CS_ = 0x02, // false
43 | //CS_ = 0x03, // true
44 |
45 | CS_BIN_8 = 0x04,
46 | CS_BIN_16 = 0x05,
47 | CS_BIN_32 = 0x06,
48 |
49 | CS_EXT_8 = 0x07,
50 | CS_EXT_16 = 0x08,
51 | CS_EXT_32 = 0x09,
52 |
53 | CS_FLOAT = 0x0a,
54 | CS_DOUBLE = 0x0b,
55 | CS_UINT_8 = 0x0c,
56 | CS_UINT_16 = 0x0d,
57 | CS_UINT_32 = 0x0e,
58 | CS_UINT_64 = 0x0f,
59 | CS_INT_8 = 0x10,
60 | CS_INT_16 = 0x11,
61 | CS_INT_32 = 0x12,
62 | CS_INT_64 = 0x13,
63 |
64 | //CS_FIXEXT1 = 0x14,
65 | //CS_FIXEXT2 = 0x15,
66 | //CS_FIXEXT4 = 0x16,
67 | //CS_FIXEXT8 = 0x17,
68 | //CS_FIXEXT16 = 0x18,
69 |
70 | CS_RAW_8 = 0x19,
71 | CS_RAW_16 = 0x1a,
72 | CS_RAW_32 = 0x1b,
73 | CS_ARRAY_16 = 0x1c,
74 | CS_ARRAY_32 = 0x1d,
75 | CS_MAP_16 = 0x1e,
76 | CS_MAP_32 = 0x1f,
77 |
78 | ACS_RAW_VALUE,
79 | ACS_BIN_VALUE,
80 | ACS_EXT_VALUE,
81 | } msgpack_unpack_state;
82 |
83 |
84 | typedef enum {
85 | CT_ARRAY_ITEM,
86 | CT_MAP_KEY,
87 | CT_MAP_VALUE,
88 | } msgpack_container_type;
89 |
90 |
91 | #ifdef __cplusplus
92 | }
93 | #endif
94 |
95 | #endif /* msgpack/unpack_define.h */
96 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | # With infos from
2 | # http://tjelvarolsson.com/blog/how-to-continuously-test-your-python-code-on-windows-using-appveyor/
3 | # https://packaging.python.org/en/latest/appveyor/
4 | # https://github.com/rmcgibbo/python-appveyor-conda-example
5 |
6 | # Backslashes in quotes need to be escaped: \ -> "\\"
7 |
8 | matrix:
9 | fast_finish: true # immediately finish build once one of the jobs fails.
10 |
11 | environment:
12 | global:
13 | # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
14 | # /E:ON and /V:ON options are not enabled in the batch script intepreter
15 | # See: http://stackoverflow.com/a/13751649/163740
16 | CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci\\run_with_env.cmd"
17 | clone_folder: C:\projects\pandas-msgpack
18 |
19 | matrix:
20 |
21 | - CONDA_ROOT: "C:\\Miniconda3_64"
22 | PYTHON_VERSION: "3.6"
23 | PYTHON_ARCH: "64"
24 | CONDA_PY: "36"
25 | CONDA_NPY: "112"
26 |
27 | - CONDA_ROOT: "C:\\Miniconda3_64"
28 | PYTHON_VERSION: "3.5"
29 | PYTHON_ARCH: "64"
30 | CONDA_PY: "35"
31 | CONDA_NPY: "111"
32 |
33 | - CONDA_ROOT: "C:\\Miniconda3_64"
34 | PYTHON_VERSION: "2.7"
35 | PYTHON_ARCH: "64"
36 | CONDA_PY: "27"
37 | CONDA_NPY: "110"
38 |
39 | # We always use a 64-bit machine, but can build x86 distributions
40 | # with the PYTHON_ARCH variable (which is used by CMD_IN_ENV).
41 | platform:
42 | - x64
43 |
44 | # all our python builds have to happen in tests_script...
45 | build: false
46 |
47 | install:
48 | # cancel older builds for the same PR
49 | - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
50 | https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
51 | Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
52 | throw "There are newer queued builds for this pull request, failing early." }
53 |
54 | # this installs the appropriate Miniconda (Py2/Py3, 32/64 bit)
55 | # updates conda & installs: conda-build jinja2 anaconda-client
56 | - powershell .\ci\install.ps1
57 | - SET PATH=%CONDA_ROOT%;%CONDA_ROOT%\Scripts;%PATH%
58 | - echo "install"
59 | - cd
60 | - ls -ltr
61 | - git tag --sort v:refname
62 |
63 | # this can conflict with git
64 | - cmd: rmdir C:\cygwin /s /q
65 |
66 | # install our build environment
67 | - cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false
68 | - cmd: conda update -q conda
69 | - cmd: conda config --set ssl_verify false
70 | - cmd: conda config --add channels conda-forge
71 |
72 | # this is now the downloaded conda...
73 | - cmd: conda info -a
74 |
75 | # create our env
76 | - cmd: conda create -n pandas-msgpack python=%PYTHON_VERSION% pandas cython pytest
77 | - cmd: activate pandas-msgpack
78 | - cmd: conda list -n pandas-msgpack
79 |
80 | # build wheels
81 | - cmd: '%CMD_IN_ENV% python setup.py bdist_wheel'
82 |
83 | # install
84 | - pip install --no-index --find-links=.\dist\ pandas-msgpack
85 |
86 | test_script:
87 | # tests
88 | - cmd: activate pandas-msgpack
89 | - cmd: cd c:\\projects
90 | - cmd: pytest --pyargs pandas_msgpack -v
91 |
92 | artifacts:
93 | - path: dist\*
94 |
--------------------------------------------------------------------------------
/ci/install.ps1:
--------------------------------------------------------------------------------
1 | # Sample script to install Miniconda under Windows
2 | # Authors: Olivier Grisel, Jonathan Helmus and Kyle Kastner, Robert McGibbon
3 | # License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
4 |
5 | $MINICONDA_URL = "http://repo.continuum.io/miniconda/"
6 |
7 |
8 | function DownloadMiniconda ($python_version, $platform_suffix) {
9 | $webclient = New-Object System.Net.WebClient
10 | $filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe"
11 | $url = $MINICONDA_URL + $filename
12 |
13 | $basedir = $pwd.Path + "\"
14 | $filepath = $basedir + $filename
15 | if (Test-Path $filename) {
16 | Write-Host "Reusing" $filepath
17 | return $filepath
18 | }
19 |
20 | # Download and retry up to 3 times in case of network transient errors.
21 | Write-Host "Downloading" $filename "from" $url
22 | $retry_attempts = 2
23 | for($i=0; $i -lt $retry_attempts; $i++){
24 | try {
25 | $webclient.DownloadFile($url, $filepath)
26 | break
27 | }
28 | Catch [Exception]{
29 | Start-Sleep 1
30 | }
31 | }
32 | if (Test-Path $filepath) {
33 | Write-Host "File saved at" $filepath
34 | } else {
35 | # Retry once to get the error message if any at the last try
36 | $webclient.DownloadFile($url, $filepath)
37 | }
38 | return $filepath
39 | }
40 |
41 |
42 | function InstallMiniconda ($python_version, $architecture, $python_home) {
43 | Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home
44 | if (Test-Path $python_home) {
45 | Write-Host $python_home "already exists, skipping."
46 | return $false
47 | }
48 | if ($architecture -match "32") {
49 | $platform_suffix = "x86"
50 | } else {
51 | $platform_suffix = "x86_64"
52 | }
53 |
54 | $filepath = DownloadMiniconda $python_version $platform_suffix
55 | Write-Host "Installing" $filepath "to" $python_home
56 | $install_log = $python_home + ".log"
57 | $args = "/S /D=$python_home"
58 | Write-Host $filepath $args
59 | Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru
60 | if (Test-Path $python_home) {
61 | Write-Host "Python $python_version ($architecture) installation complete"
62 | } else {
63 | Write-Host "Failed to install Python in $python_home"
64 | Get-Content -Path $install_log
65 | Exit 1
66 | }
67 | }
68 |
69 |
70 | function InstallCondaPackages ($python_home, $spec) {
71 | $conda_path = $python_home + "\Scripts\conda.exe"
72 | $args = "install --yes " + $spec
73 | Write-Host ("conda " + $args)
74 | Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
75 | }
76 |
77 | function UpdateConda ($python_home) {
78 | $conda_path = $python_home + "\Scripts\conda.exe"
79 | Write-Host "Updating conda..."
80 | $args = "update --yes conda"
81 | Write-Host $conda_path $args
82 | Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru
83 | }
84 |
85 |
86 | function main () {
87 | InstallMiniconda "3.5" $env:PYTHON_ARCH $env:CONDA_ROOT
88 | UpdateConda $env:CONDA_ROOT
89 | InstallCondaPackages $env:CONDA_ROOT "conda-build jinja2 anaconda-client"
90 | }
91 |
92 | main
93 |
--------------------------------------------------------------------------------
/ci/run_with_env.cmd:
--------------------------------------------------------------------------------
1 | :: EXPECTED ENV VARS: PYTHON_ARCH (either x86 or x64)
2 | :: CONDA_PY (either 27, 33, 35 etc. - only major version is extracted)
3 | ::
4 | ::
5 | :: To build extensions for 64 bit Python 3, we need to configure environment
6 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
7 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
8 | ::
9 | :: To build extensions for 64 bit Python 2, we need to configure environment
10 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
11 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
12 | ::
13 | :: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific
14 | :: environment configurations.
15 | ::
16 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the
17 | :: cmd interpreter, at least for (SDK v7.0)
18 | ::
19 | :: More details at:
20 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
21 | :: http://stackoverflow.com/a/13751649/163740
22 | ::
23 | :: Author: Phil Elson
24 | :: Original Author: Olivier Grisel (https://github.com/ogrisel/python-appveyor-demo)
25 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
26 | ::
27 | :: Notes about batch files for Python people:
28 | ::
29 | :: Quotes in values are literally part of the values:
30 | :: SET FOO="bar"
31 | :: FOO is now five characters long: " b a r "
32 | :: If you don't want quotes, don't include them on the right-hand side.
33 | ::
34 | :: The CALL lines at the end of this file look redundant, but if you move them
35 | :: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y
36 | :: case, I don't know why.
37 | :: originally from https://github.com/pelson/Obvious-CI/blob/master/scripts/obvci_appveyor_python_build_env.cmd
38 | @ECHO OFF
39 |
40 | SET COMMAND_TO_RUN=%*
41 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
42 |
43 | :: Extract the major and minor versions, and allow for the minor version to be
44 | :: more than 9. This requires the version number to have two dots in it.
45 | SET MAJOR_PYTHON_VERSION=%CONDA_PY:~0,1%
46 |
47 | IF "%CONDA_PY:~2,1%" == "" (
48 | :: CONDA_PY style, such as 27, 34 etc.
49 | SET MINOR_PYTHON_VERSION=%CONDA_PY:~1,1%
50 | ) ELSE (
51 | IF "%CONDA_PY:~3,1%" == "." (
52 | SET MINOR_PYTHON_VERSION=%CONDA_PY:~2,1%
53 | ) ELSE (
54 | SET MINOR_PYTHON_VERSION=%CONDA_PY:~2,2%
55 | )
56 | )
57 |
58 | :: Based on the Python version, determine what SDK version to use, and whether
59 | :: to set the SDK for 64-bit.
60 | IF %MAJOR_PYTHON_VERSION% == 2 (
61 | SET WINDOWS_SDK_VERSION="v7.0"
62 | SET SET_SDK_64=Y
63 | ) ELSE (
64 | IF %MAJOR_PYTHON_VERSION% == 3 (
65 | SET WINDOWS_SDK_VERSION="v7.1"
66 | IF %MINOR_PYTHON_VERSION% LEQ 4 (
67 | SET SET_SDK_64=Y
68 | ) ELSE (
69 | SET SET_SDK_64=N
70 | )
71 | ) ELSE (
72 | ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%"
73 | EXIT /B 1
74 | )
75 | )
76 |
77 | IF "%PYTHON_ARCH%"=="64" (
78 | IF %SET_SDK_64% == Y (
79 | ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture
80 | SET DISTUTILS_USE_SDK=1
81 | SET MSSdk=1
82 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
83 | "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
84 | ECHO Executing: %COMMAND_TO_RUN%
85 | call %COMMAND_TO_RUN% || EXIT /B 1
86 | ) ELSE (
87 | ECHO Using default MSVC build environment for 64 bit architecture
88 | ECHO Executing: %COMMAND_TO_RUN%
89 | call %COMMAND_TO_RUN% || EXIT /B 1
90 | )
91 | ) ELSE (
92 | ECHO Using default MSVC build environment for 32 bit architecture
93 | ECHO Executing: %COMMAND_TO_RUN%
94 | call %COMMAND_TO_RUN% || EXIT /B 1
95 | )
96 |
--------------------------------------------------------------------------------
/pandas_msgpack/includes/pack.h:
--------------------------------------------------------------------------------
1 | /*
2 | * MessagePack for Python packing routine
3 | *
4 | * Copyright (C) 2009 Naoki INADA
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | #include
20 | #include
21 | #include "sysdep.h"
22 | #include
23 | #include
24 |
25 | #ifdef __cplusplus
26 | extern "C" {
27 | #endif
28 |
29 | #if defined(_MSC_VER) && (_MSC_VER < 1900)
30 | #define inline __inline
31 | #endif
32 |
33 | typedef struct msgpack_packer {
34 | char *buf;
35 | size_t length;
36 | size_t buf_size;
37 | bool use_bin_type;
38 | } msgpack_packer;
39 |
40 | typedef struct Packer Packer;
41 |
42 | static inline int msgpack_pack_int(msgpack_packer* pk, int d);
43 | static inline int msgpack_pack_long(msgpack_packer* pk, long d);
44 | static inline int msgpack_pack_long_long(msgpack_packer* pk, long long d);
45 | static inline int msgpack_pack_unsigned_short(msgpack_packer* pk, unsigned short d);
46 | static inline int msgpack_pack_unsigned_int(msgpack_packer* pk, unsigned int d);
47 | static inline int msgpack_pack_unsigned_long(msgpack_packer* pk, unsigned long d);
48 | //static inline int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d);
49 |
50 | static inline int msgpack_pack_uint8(msgpack_packer* pk, uint8_t d);
51 | static inline int msgpack_pack_uint16(msgpack_packer* pk, uint16_t d);
52 | static inline int msgpack_pack_uint32(msgpack_packer* pk, uint32_t d);
53 | static inline int msgpack_pack_uint64(msgpack_packer* pk, uint64_t d);
54 | static inline int msgpack_pack_int8(msgpack_packer* pk, int8_t d);
55 | static inline int msgpack_pack_int16(msgpack_packer* pk, int16_t d);
56 | static inline int msgpack_pack_int32(msgpack_packer* pk, int32_t d);
57 | static inline int msgpack_pack_int64(msgpack_packer* pk, int64_t d);
58 |
59 | static inline int msgpack_pack_float(msgpack_packer* pk, float d);
60 | static inline int msgpack_pack_double(msgpack_packer* pk, double d);
61 |
62 | static inline int msgpack_pack_nil(msgpack_packer* pk);
63 | static inline int msgpack_pack_true(msgpack_packer* pk);
64 | static inline int msgpack_pack_false(msgpack_packer* pk);
65 |
66 | static inline int msgpack_pack_array(msgpack_packer* pk, unsigned int n);
67 |
68 | static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n);
69 |
70 | static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l);
71 | static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l);
72 | static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l);
73 |
74 | static inline int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l);
75 |
76 | static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l)
77 | {
78 | char* buf = pk->buf;
79 | size_t bs = pk->buf_size;
80 | size_t len = pk->length;
81 |
82 | if (len + l > bs) {
83 | bs = (len + l) * 2;
84 | buf = (char*)realloc(buf, bs);
85 | if (!buf) return -1;
86 | }
87 | memcpy(buf + len, data, l);
88 | len += l;
89 |
90 | pk->buf = buf;
91 | pk->buf_size = bs;
92 | pk->length = len;
93 | return 0;
94 | }
95 |
96 | #define msgpack_pack_append_buffer(user, buf, len) \
97 | return msgpack_pack_write(user, (const char*)buf, len)
98 |
99 | #include "pack_template.h"
100 |
101 | #ifdef __cplusplus
102 | }
103 | #endif
104 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | =======
2 | License
3 | =======
4 |
5 | pandas-msgpack is distributed under a 3-clause ("Simplified" or "New") BSD
6 | license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
7 | BSD-compatible licenses, are included. Their licenses follow the pandas
8 | license.
9 |
10 | pandas license
11 | ==============
12 |
13 | Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
14 | All rights reserved.
15 |
16 | Copyright (c) 2008-2011 AQR Capital Management, LLC
17 | All rights reserved.
18 |
19 | Redistribution and use in source and binary forms, with or without
20 | modification, are permitted provided that the following conditions are
21 | met:
22 |
23 | * Redistributions of source code must retain the above copyright
24 | notice, this list of conditions and the following disclaimer.
25 |
26 | * Redistributions in binary form must reproduce the above
27 | copyright notice, this list of conditions and the following
28 | disclaimer in the documentation and/or other materials provided
29 | with the distribution.
30 |
31 | * Neither the name of the copyright holder nor the names of any
32 | contributors may be used to endorse or promote products derived
33 | from this software without specific prior written permission.
34 |
35 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
36 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
37 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
38 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
39 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
42 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
43 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
44 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
45 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46 |
47 | About the Copyright Holders
48 | ===========================
49 |
50 | AQR Capital Management began pandas development in 2008. Development was
51 | led by Wes McKinney. AQR released the source under this license in 2009.
52 | Wes is now an employee of Lambda Foundry, and remains the pandas project
53 | lead.
54 |
55 | The PyData Development Team is the collection of developers of the PyData
56 | project. This includes all of the PyData sub-projects, including pandas. The
57 | core team that coordinates development on GitHub can be found here:
58 | http://github.com/pydata.
59 |
60 | Full credits for pandas contributors can be found in the documentation.
61 |
62 | Our Copyright Policy
63 | ====================
64 |
65 | PyData uses a shared copyright model. Each contributor maintains copyright
66 | over their contributions to PyData. However, it is important to note that
67 | these contributions are typically only changes to the repositories. Thus,
68 | the PyData source code, in its entirety, is not the copyright of any single
69 | person or institution. Instead, it is the collective copyright of the
70 | entire PyData Development Team. If individual contributors want to maintain
71 | a record of what changes/contributions they have specific copyright on,
72 | they should indicate their copyright in the commit message of the change
73 | when they commit the change to one of the PyData repositories.
74 |
75 | With this in mind, the following banner should be used in any source code
76 | file to indicate the copyright and license terms:
77 |
78 | #-----------------------------------------------------------------------------
79 | # Copyright (c) 2012, PyData Development Team
80 | # All rights reserved.
81 | #
82 | # Distributed under the terms of the BSD Simplified License.
83 | #
84 | # The full license is in the LICENSE file, distributed with this software.
85 | #-----------------------------------------------------------------------------
86 |
87 | Other licenses can be found in the LICENSES directory.
88 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import sys
5 | from setuptools import setup
6 | import pkg_resources
7 | from distutils.extension import Extension
8 | from distutils.command.build_ext import build_ext as build_ext
9 |
10 | NAME = 'pandas-msgpack'
11 |
12 | def is_platform_windows():
13 | return sys.platform == 'win32' or sys.platform == 'cygwin'
14 |
15 | def is_platform_linux():
16 | return sys.platform == 'linux2'
17 |
18 | def is_platform_mac():
19 | return sys.platform == 'darwin'
20 |
21 | # versioning
22 | import versioneer
23 | cmdclass = versioneer.get_cmdclass()
24 |
25 | try:
26 | import Cython
27 | from Cython.Build import cythonize
28 | except ImportError:
29 | raise ImportError("cython is required for building")
30 |
31 | # args to ignore warnings
32 | if is_platform_windows():
33 | extra_compile_args=[]
34 | else:
35 | extra_compile_args=['-Wno-unused-function']
36 |
37 |
38 | if sys.byteorder == 'big':
39 | macros = [('__BIG_ENDIAN__', '1')]
40 | else:
41 | macros = [('__LITTLE_ENDIAN__', '1')]
42 |
43 | extensions = []
44 | packer_ext = Extension('pandas_msgpack.msgpack._packer',
45 | depends=['pandas_msgpack/includes/pack.h',
46 | 'pandas_msgpack/includes/pack_template.h'],
47 | sources = ['pandas_msgpack/msgpack/_packer.pyx'],
48 | language='c++',
49 | include_dirs=['pandas_msgack/includes'],
50 | define_macros=macros,
51 | extra_compile_args=extra_compile_args)
52 | unpacker_ext = Extension('pandas_msgpack.msgpack._unpacker',
53 | depends=['pandas_msgpack/includes/unpack.h',
54 | 'pandas_msgpack/includes/unpack_define.h',
55 | 'pandas_msgpack/includes/unpack_template.h'],
56 | sources = ['pandas_msgpack/msgpack/_unpacker.pyx'],
57 | language='c++',
58 | include_dirs=['pandas_msgpack/includes'],
59 | define_macros=macros,
60 | extra_compile_args=extra_compile_args)
61 | extensions.append(packer_ext)
62 | extensions.append(unpacker_ext)
63 |
64 | #----------------------------------------------------------------------
65 | # util
66 | # extension for pseudo-safely moving bytes into mutable buffers
67 | _move_ext = Extension('pandas_msgpack._move',
68 | depends=[],
69 | sources=['pandas_msgpack/move.c'])
70 | extensions.append(_move_ext)
71 |
72 |
73 | def readme():
74 | with open('README.rst') as f:
75 | return f.read()
76 |
77 | INSTALL_REQUIRES = (
78 | ['pandas']
79 | )
80 |
81 | setup(
82 | name=NAME,
83 | version=versioneer.get_version(),
84 | cmdclass=cmdclass,
85 | description="Pandas interface to msgpack",
86 | long_description=readme(),
87 | license='BSD License',
88 | author='The PyData Development Team',
89 | author_email='pydata@googlegroups.com',
90 | url='https://github.com/pydata/pandas-msgpack',
91 | classifiers=[
92 | 'Development Status :: 4 - Beta',
93 | 'Environment :: Console',
94 | 'Intended Audience :: Science/Research',
95 | 'Operating System :: OS Independent',
96 | 'Programming Language :: Python',
97 | 'Programming Language :: Python :: 2',
98 | 'Programming Language :: Python :: 2.7',
99 | 'Programming Language :: Python :: 3',
100 | 'Programming Language :: Python :: 3.4',
101 | 'Programming Language :: Python :: 3.5',
102 | 'Programming Language :: Python :: 3.6',
103 | 'Topic :: Scientific/Engineering',
104 | ],
105 | ext_modules=cythonize(extensions),
106 | keywords='data',
107 | install_requires=INSTALL_REQUIRES,
108 | packages=['pandas_msgpack',
109 | 'pandas_msgpack.includes',
110 | 'pandas_msgpack.msgpack',
111 | 'pandas_msgpack.tests'],
112 | test_suite='tests',
113 | )
114 |
--------------------------------------------------------------------------------
/pandas_msgpack/includes/sysdep.h:
--------------------------------------------------------------------------------
1 | /*
2 | * MessagePack system dependencies
3 | *
4 | * Copyright (C) 2008-2010 FURUHASHI Sadayuki
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | #ifndef MSGPACK_SYSDEP_H__
19 | #define MSGPACK_SYSDEP_H__
20 |
21 | #include
22 | #include
23 | #if defined(_MSC_VER) && _MSC_VER < 1600
24 | typedef __int8 int8_t;
25 | typedef unsigned __int8 uint8_t;
26 | typedef __int16 int16_t;
27 | typedef unsigned __int16 uint16_t;
28 | typedef __int32 int32_t;
29 | typedef unsigned __int32 uint32_t;
30 | typedef __int64 int64_t;
31 | typedef unsigned __int64 uint64_t;
32 | #elif defined(_MSC_VER) // && _MSC_VER >= 1600
33 | #include
34 | #else
35 | #include
36 | #include
37 | #endif
38 |
39 | #ifdef _WIN32
40 | #define _msgpack_atomic_counter_header
41 | typedef long _msgpack_atomic_counter_t;
42 | #define _msgpack_sync_decr_and_fetch(ptr) InterlockedDecrement(ptr)
43 | #define _msgpack_sync_incr_and_fetch(ptr) InterlockedIncrement(ptr)
44 | #elif defined(__GNUC__) && ((__GNUC__*10 + __GNUC_MINOR__) < 41)
45 | #define _msgpack_atomic_counter_header "gcc_atomic.h"
46 | #else
47 | typedef unsigned int _msgpack_atomic_counter_t;
48 | #define _msgpack_sync_decr_and_fetch(ptr) __sync_sub_and_fetch(ptr, 1)
49 | #define _msgpack_sync_incr_and_fetch(ptr) __sync_add_and_fetch(ptr, 1)
50 | #endif
51 |
52 | #ifdef _WIN32
53 |
54 | #ifdef __cplusplus
55 | /* numeric_limits::min,max */
56 | #ifdef max
57 | #undef max
58 | #endif
59 | #ifdef min
60 | #undef min
61 | #endif
62 | #endif
63 |
64 | #else
65 | #include /* __BYTE_ORDER */
66 | #endif
67 |
68 | #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
69 | #if __BYTE_ORDER == __LITTLE_ENDIAN
70 | #define __LITTLE_ENDIAN__
71 | #elif __BYTE_ORDER == __BIG_ENDIAN
72 | #define __BIG_ENDIAN__
73 | #elif _WIN32
74 | #define __LITTLE_ENDIAN__
75 | #endif
76 | #endif
77 |
78 |
79 | #ifdef __LITTLE_ENDIAN__
80 |
81 | #ifdef _WIN32
82 | # if defined(ntohs)
83 | # define _msgpack_be16(x) ntohs(x)
84 | # elif defined(_byteswap_ushort) || (defined(_MSC_VER) && _MSC_VER >= 1400)
85 | # define _msgpack_be16(x) ((uint16_t)_byteswap_ushort((unsigned short)x))
86 | # else
87 | # define _msgpack_be16(x) ( \
88 | ((((uint16_t)x) << 8) ) | \
89 | ((((uint16_t)x) >> 8) ) )
90 | # endif
91 | #else
92 | # define _msgpack_be16(x) ntohs(x)
93 | #endif
94 |
95 | #ifdef _WIN32
96 | # if defined(ntohl)
97 | # define _msgpack_be32(x) ntohl(x)
98 | # elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400)
99 | # define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x))
100 | # else
101 | # define _msgpack_be32(x) \
102 | ( ((((uint32_t)x) << 24) ) | \
103 | ((((uint32_t)x) << 8) & 0x00ff0000U ) | \
104 | ((((uint32_t)x) >> 8) & 0x0000ff00U ) | \
105 | ((((uint32_t)x) >> 24) ) )
106 | # endif
107 | #else
108 | # define _msgpack_be32(x) ntohl(x)
109 | #endif
110 |
111 | #if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400)
112 | # define _msgpack_be64(x) (_byteswap_uint64(x))
113 | #elif defined(bswap_64)
114 | # define _msgpack_be64(x) bswap_64(x)
115 | #elif defined(__DARWIN_OSSwapInt64)
116 | # define _msgpack_be64(x) __DARWIN_OSSwapInt64(x)
117 | #else
118 | #define _msgpack_be64(x) \
119 | ( ((((uint64_t)x) << 56) ) | \
120 | ((((uint64_t)x) << 40) & 0x00ff000000000000ULL ) | \
121 | ((((uint64_t)x) << 24) & 0x0000ff0000000000ULL ) | \
122 | ((((uint64_t)x) << 8) & 0x000000ff00000000ULL ) | \
123 | ((((uint64_t)x) >> 8) & 0x00000000ff000000ULL ) | \
124 | ((((uint64_t)x) >> 24) & 0x0000000000ff0000ULL ) | \
125 | ((((uint64_t)x) >> 40) & 0x000000000000ff00ULL ) | \
126 | ((((uint64_t)x) >> 56) ) )
127 | #endif
128 |
129 | #define _msgpack_load16(cast, from) ((cast)( \
130 | (((uint16_t)((uint8_t*)(from))[0]) << 8) | \
131 | (((uint16_t)((uint8_t*)(from))[1]) ) ))
132 |
133 | #define _msgpack_load32(cast, from) ((cast)( \
134 | (((uint32_t)((uint8_t*)(from))[0]) << 24) | \
135 | (((uint32_t)((uint8_t*)(from))[1]) << 16) | \
136 | (((uint32_t)((uint8_t*)(from))[2]) << 8) | \
137 | (((uint32_t)((uint8_t*)(from))[3]) ) ))
138 |
139 | #define _msgpack_load64(cast, from) ((cast)( \
140 | (((uint64_t)((uint8_t*)(from))[0]) << 56) | \
141 | (((uint64_t)((uint8_t*)(from))[1]) << 48) | \
142 | (((uint64_t)((uint8_t*)(from))[2]) << 40) | \
143 | (((uint64_t)((uint8_t*)(from))[3]) << 32) | \
144 | (((uint64_t)((uint8_t*)(from))[4]) << 24) | \
145 | (((uint64_t)((uint8_t*)(from))[5]) << 16) | \
146 | (((uint64_t)((uint8_t*)(from))[6]) << 8) | \
147 | (((uint64_t)((uint8_t*)(from))[7]) ) ))
148 |
149 | #else
150 |
151 | #define _msgpack_be16(x) (x)
152 | #define _msgpack_be32(x) (x)
153 | #define _msgpack_be64(x) (x)
154 |
155 | #define _msgpack_load16(cast, from) ((cast)( \
156 | (((uint16_t)((uint8_t*)from)[0]) << 8) | \
157 | (((uint16_t)((uint8_t*)from)[1]) ) ))
158 |
159 | #define _msgpack_load32(cast, from) ((cast)( \
160 | (((uint32_t)((uint8_t*)from)[0]) << 24) | \
161 | (((uint32_t)((uint8_t*)from)[1]) << 16) | \
162 | (((uint32_t)((uint8_t*)from)[2]) << 8) | \
163 | (((uint32_t)((uint8_t*)from)[3]) ) ))
164 |
165 | #define _msgpack_load64(cast, from) ((cast)( \
166 | (((uint64_t)((uint8_t*)from)[0]) << 56) | \
167 | (((uint64_t)((uint8_t*)from)[1]) << 48) | \
168 | (((uint64_t)((uint8_t*)from)[2]) << 40) | \
169 | (((uint64_t)((uint8_t*)from)[3]) << 32) | \
170 | (((uint64_t)((uint8_t*)from)[4]) << 24) | \
171 | (((uint64_t)((uint8_t*)from)[5]) << 16) | \
172 | (((uint64_t)((uint8_t*)from)[6]) << 8) | \
173 | (((uint64_t)((uint8_t*)from)[7]) ) ))
174 | #endif
175 |
176 |
177 | #define _msgpack_store16(to, num) \
178 | do { uint16_t val = _msgpack_be16(num); memcpy(to, &val, 2); } while(0)
179 | #define _msgpack_store32(to, num) \
180 | do { uint32_t val = _msgpack_be32(num); memcpy(to, &val, 4); } while(0)
181 | #define _msgpack_store64(to, num) \
182 | do { uint64_t val = _msgpack_be64(num); memcpy(to, &val, 8); } while(0)
183 |
184 | /*
185 | #define _msgpack_load16(cast, from) \
186 | ({ cast val; memcpy(&val, (char*)from, 2); _msgpack_be16(val); })
187 | #define _msgpack_load32(cast, from) \
188 | ({ cast val; memcpy(&val, (char*)from, 4); _msgpack_be32(val); })
189 | #define _msgpack_load64(cast, from) \
190 | ({ cast val; memcpy(&val, (char*)from, 8); _msgpack_be64(val); })
191 | */
192 |
193 |
194 | #endif /* sysdep.h */
195 |
--------------------------------------------------------------------------------
/docs/source/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # Internal variables.
11 | PAPEROPT_a4 = -D latex_paper_size=a4
12 | PAPEROPT_letter = -D latex_paper_size=letter
13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
14 | # the i18n builder cannot share the environment and doctrees with the others
15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
16 |
17 | .PHONY: help
18 | help:
19 | @echo "Please use \`make ' where is one of"
20 | @echo " html to make standalone HTML files"
21 | @echo " dirhtml to make HTML files named index.html in directories"
22 | @echo " singlehtml to make a single large HTML file"
23 | @echo " pickle to make pickle files"
24 | @echo " json to make JSON files"
25 | @echo " htmlhelp to make HTML files and a HTML help project"
26 | @echo " qthelp to make HTML files and a qthelp project"
27 | @echo " applehelp to make an Apple Help Book"
28 | @echo " devhelp to make HTML files and a Devhelp project"
29 | @echo " epub to make an epub"
30 | @echo " epub3 to make an epub3"
31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
32 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
33 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
34 | @echo " text to make text files"
35 | @echo " man to make manual pages"
36 | @echo " texinfo to make Texinfo files"
37 | @echo " info to make Texinfo files and run them through makeinfo"
38 | @echo " gettext to make PO message catalogs"
39 | @echo " changes to make an overview of all changed/added/deprecated items"
40 | @echo " xml to make Docutils-native XML files"
41 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
42 | @echo " linkcheck to check all external links for integrity"
43 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
44 | @echo " coverage to run coverage check of the documentation (if enabled)"
45 | @echo " dummy to check syntax errors of document sources"
46 |
47 | .PHONY: clean
48 | clean:
49 | rm -rf $(BUILDDIR)/*
50 |
51 | .PHONY: html
52 | html:
53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
54 | @echo
55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
56 |
57 | .PHONY: dirhtml
58 | dirhtml:
59 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
60 | @echo
61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
62 |
63 | .PHONY: singlehtml
64 | singlehtml:
65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
66 | @echo
67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
68 |
69 | .PHONY: pickle
70 | pickle:
71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
72 | @echo
73 | @echo "Build finished; now you can process the pickle files."
74 |
75 | .PHONY: json
76 | json:
77 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
78 | @echo
79 | @echo "Build finished; now you can process the JSON files."
80 |
81 | .PHONY: htmlhelp
82 | htmlhelp:
83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
84 | @echo
85 | @echo "Build finished; now you can run HTML Help Workshop with the" \
86 | ".hhp project file in $(BUILDDIR)/htmlhelp."
87 |
88 | .PHONY: qthelp
89 | qthelp:
90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
91 | @echo
92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pandas-gbq.qhcp"
95 | @echo "To view the help file:"
96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pandas-gbq.qhc"
97 |
98 | .PHONY: applehelp
99 | applehelp:
100 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
101 | @echo
102 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
103 | @echo "N.B. You won't be able to view it unless you put it in" \
104 | "~/Library/Documentation/Help or install it in your application" \
105 | "bundle."
106 |
107 | .PHONY: devhelp
108 | devhelp:
109 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
110 | @echo
111 | @echo "Build finished."
112 | @echo "To view the help file:"
113 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pandas-gbq"
114 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pandas-gbq"
115 | @echo "# devhelp"
116 |
117 | .PHONY: epub
118 | epub:
119 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
120 | @echo
121 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
122 |
123 | .PHONY: epub3
124 | epub3:
125 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
126 | @echo
127 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
128 |
129 | .PHONY: latex
130 | latex:
131 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
132 | @echo
133 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
134 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
135 | "(use \`make latexpdf' here to do that automatically)."
136 |
137 | .PHONY: latexpdf
138 | latexpdf:
139 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
140 | @echo "Running LaTeX files through pdflatex..."
141 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
142 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
143 |
144 | .PHONY: latexpdfja
145 | latexpdfja:
146 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
147 | @echo "Running LaTeX files through platex and dvipdfmx..."
148 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
149 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
150 |
151 | .PHONY: text
152 | text:
153 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
154 | @echo
155 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
156 |
157 | .PHONY: man
158 | man:
159 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
160 | @echo
161 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
162 |
163 | .PHONY: texinfo
164 | texinfo:
165 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
166 | @echo
167 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
168 | @echo "Run \`make' in that directory to run these through makeinfo" \
169 | "(use \`make info' here to do that automatically)."
170 |
171 | .PHONY: info
172 | info:
173 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
174 | @echo "Running Texinfo files through makeinfo..."
175 | make -C $(BUILDDIR)/texinfo info
176 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
177 |
178 | .PHONY: gettext
179 | gettext:
180 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
181 | @echo
182 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
183 |
184 | .PHONY: changes
185 | changes:
186 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
187 | @echo
188 | @echo "The overview file is in $(BUILDDIR)/changes."
189 |
190 | .PHONY: linkcheck
191 | linkcheck:
192 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
193 | @echo
194 | @echo "Link check complete; look for any errors in the above output " \
195 | "or in $(BUILDDIR)/linkcheck/output.txt."
196 |
197 | .PHONY: doctest
198 | doctest:
199 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
200 | @echo "Testing of doctests in the sources finished, look at the " \
201 | "results in $(BUILDDIR)/doctest/output.txt."
202 |
203 | .PHONY: coverage
204 | coverage:
205 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
206 | @echo "Testing of coverage in the sources finished, look at the " \
207 | "results in $(BUILDDIR)/coverage/python.txt."
208 |
209 | .PHONY: xml
210 | xml:
211 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
212 | @echo
213 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
214 |
215 | .PHONY: pseudoxml
216 | pseudoxml:
217 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
218 | @echo
219 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
220 |
221 | .PHONY: dummy
222 | dummy:
223 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
224 | @echo
225 | @echo "Build finished. Dummy builder generates no files."
226 |
--------------------------------------------------------------------------------
/pandas_msgpack/includes/unpack.h:
--------------------------------------------------------------------------------
1 | /*
2 | * MessagePack for Python unpacking routine
3 | *
4 | * Copyright (C) 2009 Naoki INADA
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | #define MSGPACK_EMBED_STACK_SIZE (1024)
20 | #include "unpack_define.h"
21 |
22 | typedef struct unpack_user {
23 | int use_list;
24 | PyObject *object_hook;
25 | bool has_pairs_hook;
26 | PyObject *list_hook;
27 | PyObject *ext_hook;
28 | const char *encoding;
29 | const char *unicode_errors;
30 | Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len;
31 | } unpack_user;
32 |
33 | typedef PyObject* msgpack_unpack_object;
34 | struct unpack_context;
35 | typedef struct unpack_context unpack_context;
36 | typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off);
37 |
38 | static inline msgpack_unpack_object unpack_callback_root(unpack_user* u)
39 | {
40 | return NULL;
41 | }
42 |
43 | static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o)
44 | {
45 | PyObject *p = PyInt_FromLong((long)d);
46 | if (!p)
47 | return -1;
48 | *o = p;
49 | return 0;
50 | }
51 | static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpack_object* o)
52 | {
53 | return unpack_callback_uint16(u, d, o);
54 | }
55 |
56 |
57 | static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o)
58 | {
59 | PyObject *p = PyInt_FromSize_t((size_t)d);
60 | if (!p)
61 | return -1;
62 | *o = p;
63 | return 0;
64 | }
65 |
66 | static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unpack_object* o)
67 | {
68 | PyObject *p;
69 | if (d > LONG_MAX) {
70 | p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d);
71 | } else {
72 | p = PyInt_FromSize_t((size_t)d);
73 | }
74 | if (!p)
75 | return -1;
76 | *o = p;
77 | return 0;
78 | }
79 |
80 | static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o)
81 | {
82 | PyObject *p = PyInt_FromLong(d);
83 | if (!p)
84 | return -1;
85 | *o = p;
86 | return 0;
87 | }
88 |
89 | static inline int unpack_callback_int16(unpack_user* u, int16_t d, msgpack_unpack_object* o)
90 | {
91 | return unpack_callback_int32(u, d, o);
92 | }
93 |
94 | static inline int unpack_callback_int8(unpack_user* u, int8_t d, msgpack_unpack_object* o)
95 | {
96 | return unpack_callback_int32(u, d, o);
97 | }
98 |
99 | static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpack_object* o)
100 | {
101 | PyObject *p;
102 | if (d > LONG_MAX || d < LONG_MIN) {
103 | p = PyLong_FromLongLong((unsigned PY_LONG_LONG)d);
104 | } else {
105 | p = PyInt_FromLong((long)d);
106 | }
107 | *o = p;
108 | return 0;
109 | }
110 |
111 | static inline int unpack_callback_double(unpack_user* u, double d, msgpack_unpack_object* o)
112 | {
113 | PyObject *p = PyFloat_FromDouble(d);
114 | if (!p)
115 | return -1;
116 | *o = p;
117 | return 0;
118 | }
119 |
120 | static inline int unpack_callback_float(unpack_user* u, float d, msgpack_unpack_object* o)
121 | {
122 | return unpack_callback_double(u, d, o);
123 | }
124 |
125 | static inline int unpack_callback_nil(unpack_user* u, msgpack_unpack_object* o)
126 | { Py_INCREF(Py_None); *o = Py_None; return 0; }
127 |
128 | static inline int unpack_callback_true(unpack_user* u, msgpack_unpack_object* o)
129 | { Py_INCREF(Py_True); *o = Py_True; return 0; }
130 |
131 | static inline int unpack_callback_false(unpack_user* u, msgpack_unpack_object* o)
132 | { Py_INCREF(Py_False); *o = Py_False; return 0; }
133 |
134 | static inline int unpack_callback_array(unpack_user* u, unsigned int n, msgpack_unpack_object* o)
135 | {
136 | if (n > u->max_array_len) {
137 | PyErr_Format(PyExc_ValueError, "%u exceeds max_array_len(%zd)", n, u->max_array_len);
138 | return -1;
139 | }
140 | PyObject *p = u->use_list ? PyList_New(n) : PyTuple_New(n);
141 |
142 | if (!p)
143 | return -1;
144 | *o = p;
145 | return 0;
146 | }
147 |
148 | static inline int unpack_callback_array_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object o)
149 | {
150 | if (u->use_list)
151 | PyList_SET_ITEM(*c, current, o);
152 | else
153 | PyTuple_SET_ITEM(*c, current, o);
154 | return 0;
155 | }
156 |
157 | static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c)
158 | {
159 | if (u->list_hook) {
160 | PyObject *new_c = PyObject_CallFunctionObjArgs(u->list_hook, *c, NULL);
161 | if (!new_c)
162 | return -1;
163 | Py_DECREF(*c);
164 | *c = new_c;
165 | }
166 | return 0;
167 | }
168 |
169 | static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o)
170 | {
171 | if (n > u->max_map_len) {
172 | PyErr_Format(PyExc_ValueError, "%u exceeds max_map_len(%zd)", n, u->max_map_len);
173 | return -1;
174 | }
175 | PyObject *p;
176 | if (u->has_pairs_hook) {
177 | p = PyList_New(n); // Or use tuple?
178 | }
179 | else {
180 | p = PyDict_New();
181 | }
182 | if (!p)
183 | return -1;
184 | *o = p;
185 | return 0;
186 | }
187 |
188 | static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v)
189 | {
190 | if (u->has_pairs_hook) {
191 | msgpack_unpack_object item = PyTuple_Pack(2, k, v);
192 | if (!item)
193 | return -1;
194 | Py_DECREF(k);
195 | Py_DECREF(v);
196 | PyList_SET_ITEM(*c, current, item);
197 | return 0;
198 | }
199 | else if (PyDict_SetItem(*c, k, v) == 0) {
200 | Py_DECREF(k);
201 | Py_DECREF(v);
202 | return 0;
203 | }
204 | return -1;
205 | }
206 |
207 | static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c)
208 | {
209 | if (u->object_hook) {
210 | PyObject *new_c = PyObject_CallFunctionObjArgs(u->object_hook, *c, NULL);
211 | if (!new_c)
212 | return -1;
213 |
214 | Py_DECREF(*c);
215 | *c = new_c;
216 | }
217 | return 0;
218 | }
219 |
220 | static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o)
221 | {
222 | if (l > u->max_str_len) {
223 | PyErr_Format(PyExc_ValueError, "%u exceeds max_str_len(%zd)", l, u->max_str_len);
224 | return -1;
225 | }
226 |
227 | PyObject *py;
228 | if(u->encoding) {
229 | py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors);
230 | } else {
231 | py = PyBytes_FromStringAndSize(p, l);
232 | }
233 | if (!py)
234 | return -1;
235 | *o = py;
236 | return 0;
237 | }
238 |
239 | static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o)
240 | {
241 | if (l > u->max_bin_len) {
242 | PyErr_Format(PyExc_ValueError, "%u exceeds max_bin_len(%zd)", l, u->max_bin_len);
243 | return -1;
244 | }
245 |
246 | PyObject *py = PyBytes_FromStringAndSize(p, l);
247 | if (!py)
248 | return -1;
249 | *o = py;
250 | return 0;
251 | }
252 |
253 | static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
254 | unsigned int length, msgpack_unpack_object* o)
255 | {
256 | PyObject *py;
257 | int8_t typecode = (int8_t)*pos++;
258 | if (!u->ext_hook) {
259 | PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL");
260 | return -1;
261 | }
262 | if (length-1 > u->max_ext_len) {
263 | PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len);
264 | return -1;
265 | }
266 | // length also includes the typecode, so the actual data is length-1
267 | #if PY_MAJOR_VERSION == 2
268 | py = PyObject_CallFunction(u->ext_hook, (char*)"(is#)", typecode, pos, (Py_ssize_t)length-1);
269 | #else
270 | py = PyObject_CallFunction(u->ext_hook, (char*)"(iy#)", typecode, pos, (Py_ssize_t)length-1);
271 | #endif
272 | if (!py)
273 | return -1;
274 | *o = py;
275 | return 0;
276 | }
277 |
278 | #include "unpack_template.h"
279 |
--------------------------------------------------------------------------------
/pandas_msgpack/move.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #define COMPILING_IN_PY2 (PY_VERSION_HEX <= 0x03000000)
4 |
5 | #if !COMPILING_IN_PY2
6 | /* alias this because it is not aliased in Python 3 */
7 | #define PyString_CheckExact PyBytes_CheckExact
8 | #define PyString_AS_STRING PyBytes_AS_STRING
9 | #define PyString_GET_SIZE PyBytes_GET_SIZE
10 |
11 | /* in python 3, we cannot intern bytes objects so this is always false */
12 | #define PyString_CHECK_INTERNED(cs) 0
13 | #endif /* !COMPILING_IN_PY2 */
14 |
15 | #ifndef Py_TPFLAGS_HAVE_GETCHARBUFFER
16 | #define Py_TPFLAGS_HAVE_GETCHARBUFFER 0
17 | #endif
18 |
19 | #ifndef Py_TPFLAGS_HAVE_NEWBUFFER
20 | #define Py_TPFLAGS_HAVE_NEWBUFFER 0
21 | #endif
22 |
23 | PyObject *badmove; /* bad move exception class */
24 |
25 | typedef struct {
26 | PyObject_HEAD
27 | /* the bytes that own the buffer we are mutating */
28 | PyObject *invalid_bytes;
29 | } stolenbufobject;
30 |
31 | PyTypeObject stolenbuf_type; /* forward declare type */
32 |
33 | static void
34 | stolenbuf_dealloc(stolenbufobject *self)
35 | {
36 | Py_DECREF(self->invalid_bytes);
37 | PyObject_Del(self);
38 | }
39 |
40 | static int
41 | stolenbuf_getbuffer(stolenbufobject *self, Py_buffer *view, int flags)
42 | {
43 | return PyBuffer_FillInfo(view,
44 | (PyObject*) self,
45 | (void*) PyString_AS_STRING(self->invalid_bytes),
46 | PyString_GET_SIZE(self->invalid_bytes),
47 | 0, /* not readonly */
48 | flags);
49 | }
50 |
51 | #if COMPILING_IN_PY2
52 |
53 | static Py_ssize_t
54 | stolenbuf_getreadwritebuf(stolenbufobject *self, Py_ssize_t segment, void **out)
55 | {
56 | if (segment != 0) {
57 | PyErr_SetString(PyExc_SystemError,
58 | "accessing non-existent string segment");
59 | return -1;
60 | }
61 | *out = PyString_AS_STRING(self->invalid_bytes);
62 | return PyString_GET_SIZE(self->invalid_bytes);
63 | }
64 |
65 | static Py_ssize_t
66 | stolenbuf_getsegcount(stolenbufobject *self, Py_ssize_t *len)
67 | {
68 | if (len) {
69 | *len = PyString_GET_SIZE(self->invalid_bytes);
70 | }
71 | return 1;
72 | }
73 |
74 | PyBufferProcs stolenbuf_as_buffer = {
75 | (readbufferproc) stolenbuf_getreadwritebuf,
76 | (writebufferproc) stolenbuf_getreadwritebuf,
77 | (segcountproc) stolenbuf_getsegcount,
78 | (charbufferproc) stolenbuf_getreadwritebuf,
79 | (getbufferproc) stolenbuf_getbuffer,
80 | };
81 |
82 | #else /* Python 3 */
83 |
84 | PyBufferProcs stolenbuf_as_buffer = {
85 | (getbufferproc) stolenbuf_getbuffer,
86 | NULL,
87 | };
88 |
89 | #endif /* COMPILING_IN_PY2 */
90 |
91 | PyDoc_STRVAR(stolenbuf_doc,
92 | "A buffer that is wrapping a stolen bytes object's buffer.");
93 |
94 | PyTypeObject stolenbuf_type = {
95 | PyVarObject_HEAD_INIT(NULL, 0)
96 | "pandas.util._move.stolenbuf", /* tp_name */
97 | sizeof(stolenbufobject), /* tp_basicsize */
98 | 0, /* tp_itemsize */
99 | (destructor) stolenbuf_dealloc, /* tp_dealloc */
100 | 0, /* tp_print */
101 | 0, /* tp_getattr */
102 | 0, /* tp_setattr */
103 | 0, /* tp_reserved */
104 | 0, /* tp_repr */
105 | 0, /* tp_as_number */
106 | 0, /* tp_as_sequence */
107 | 0, /* tp_as_mapping */
108 | 0, /* tp_hash */
109 | 0, /* tp_call */
110 | 0, /* tp_str */
111 | 0, /* tp_getattro */
112 | 0, /* tp_setattro */
113 | &stolenbuf_as_buffer, /* tp_as_buffer */
114 | Py_TPFLAGS_DEFAULT |
115 | Py_TPFLAGS_HAVE_NEWBUFFER |
116 | Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */
117 | stolenbuf_doc, /* tp_doc */
118 | };
119 |
120 | PyDoc_STRVAR(
121 | move_into_mutable_buffer_doc,
122 | "Moves a bytes object that is about to be destroyed into a mutable buffer\n"
123 | "without copying the data.\n"
124 | "\n"
125 | "Parameters\n"
126 | "----------\n"
127 | "bytes_rvalue : bytes with 1 refcount.\n"
128 | " The bytes object that you want to move into a mutable buffer. This\n"
129 | " cannot be a named object. It must only have a single reference.\n"
130 | "\n"
131 | "Returns\n"
132 | "-------\n"
133 | "buf : stolenbuf\n"
134 | " An object that supports the buffer protocol which can give a mutable\n"
135 | " view of the data that was previously owned by ``bytes_rvalue``.\n"
136 | "\n"
137 | "Raises\n"
138 | "------\n"
139 | "BadMove\n"
140 | " Raised when a move is attempted on an object with more than one\n"
141 | " reference.\n"
142 | "\n"
143 | "Notes\n"
144 | "-----\n"
145 | "If you want to use this function you are probably wrong.\n"
146 | "\n"
147 | "Warning: Do not call this function through *unpacking. This can\n"
148 | "potentially trick the reference checks which may allow you to get a\n"
149 | "mutable reference to a shared string!\n"
150 | "\n");
151 |
152 | /* This is implemented as a standalone function instead of the ``tp_new`` of
153 | ``stolenbuf`` because we need to create a function using the METH_O flag
154 | to support Python 3.6. In python 3.6, PyCFunction calls from python code now
155 | count the reference owned by the argument tuple. This would cause the object
156 | to have 2 references if used with a direct call like: ``stolenbuf(a)``;
157 | however, if called through *unpacking like ``stolenbuf(*(a,))`` it would
158 | only have the one reference (the tuple). */
159 | static PyObject*
160 | move_into_mutable_buffer(PyObject *self, PyObject *bytes_rvalue)
161 | {
162 | stolenbufobject *ret;
163 |
164 | if (!PyString_CheckExact(bytes_rvalue)) {
165 | PyErr_SetString(PyExc_TypeError,
166 | "stolenbuf can only steal from bytes objects");
167 | return NULL;
168 | }
169 |
170 | if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) {
171 | /* there is a reference other than the caller's stack or the string is
172 | interned */
173 | PyErr_SetObject(badmove, bytes_rvalue);
174 | return NULL;
175 | }
176 |
177 | if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) {
178 | return NULL;
179 | }
180 |
181 | /* store the original bytes object in a field that is not
182 | exposed to python */
183 | Py_INCREF(bytes_rvalue);
184 | ret->invalid_bytes = bytes_rvalue;
185 | return (PyObject*) ret;
186 | }
187 |
188 | PyMethodDef methods[] = {
189 | {"move_into_mutable_buffer",
190 | (PyCFunction) move_into_mutable_buffer,
191 | METH_O,
192 | move_into_mutable_buffer_doc},
193 | {NULL},
194 | };
195 |
196 | #define MODULE_NAME "pandas_msgpack._move"
197 |
198 | #if !COMPILING_IN_PY2
199 | PyModuleDef _move_module = {
200 | PyModuleDef_HEAD_INIT,
201 | MODULE_NAME,
202 | NULL,
203 | -1,
204 | methods,
205 | };
206 | #endif /* !COMPILING_IN_PY2 */
207 |
208 | PyDoc_STRVAR(
209 | badmove_doc,
210 | "Exception used to indicate that a move was attempted on a value with\n"
211 | "more than a single reference.\n"
212 | "\n"
213 | "Parameters\n"
214 | "----------\n"
215 | "data : any\n"
216 | " The data which was passed to ``move_into_mutable_buffer``.\n"
217 | "\n"
218 | "See Also\n"
219 | "--------\n"
220 | "pandas.util._move.move_into_mutable_buffer\n");
221 |
222 | PyMODINIT_FUNC
223 | #if !COMPILING_IN_PY2
224 | #define ERROR_RETURN NULL
225 | PyInit__move(void)
226 | #else
227 | #define ERROR_RETURN
228 | init_move(void)
229 | #endif /* !COMPILING_IN_PY2 */
230 | {
231 | PyObject *m;
232 |
233 | if (!(badmove = PyErr_NewExceptionWithDoc("pandas.util._move.BadMove",
234 | badmove_doc,
235 | NULL,
236 | NULL))) {
237 | return ERROR_RETURN;
238 | }
239 |
240 | if (PyType_Ready(&stolenbuf_type)) {
241 | return ERROR_RETURN;
242 | }
243 |
244 | #if !COMPILING_IN_PY2
245 | if (!(m = PyModule_Create(&_move_module)))
246 | #else
247 | if (!(m = Py_InitModule(MODULE_NAME, methods)))
248 | #endif /* !COMPILING_IN_PY2 */
249 | {
250 | return ERROR_RETURN;
251 | }
252 |
253 | if (PyModule_AddObject(m,
254 | "stolenbuf",
255 | (PyObject*) &stolenbuf_type)) {
256 | Py_DECREF(m);
257 | return ERROR_RETURN;
258 | }
259 |
260 | if (PyModule_AddObject(m, "BadMove", badmove)) {
261 | Py_DECREF(m);
262 | return ERROR_RETURN;
263 | }
264 |
265 | #if !COMPILING_IN_PY2
266 | return m;
267 | #endif /* !COMPILING_IN_PY2 */
268 | }
269 |
--------------------------------------------------------------------------------
/pandas_msgpack/msgpack/_packer.pyx:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | #cython: embedsignature=True
3 |
4 | from cpython cimport *
5 | from libc.stdlib cimport *
6 | from libc.string cimport *
7 | from libc.limits cimport *
8 |
9 | from .exceptions import PackValueError
10 | from . import ExtType
11 |
12 |
13 | cdef extern from "../includes/pack.h":
14 | struct msgpack_packer:
15 | char* buf
16 | size_t length
17 | size_t buf_size
18 | bint use_bin_type
19 |
20 | int msgpack_pack_int(msgpack_packer* pk, int d)
21 | int msgpack_pack_nil(msgpack_packer* pk)
22 | int msgpack_pack_true(msgpack_packer* pk)
23 | int msgpack_pack_false(msgpack_packer* pk)
24 | int msgpack_pack_long(msgpack_packer* pk, long d)
25 | int msgpack_pack_long_long(msgpack_packer* pk, long long d)
26 | int msgpack_pack_unsigned_long_long(msgpack_packer* pk,
27 | unsigned long long d)
28 | int msgpack_pack_float(msgpack_packer* pk, float d)
29 | int msgpack_pack_double(msgpack_packer* pk, double d)
30 | int msgpack_pack_array(msgpack_packer* pk, size_t l)
31 | int msgpack_pack_map(msgpack_packer* pk, size_t l)
32 | int msgpack_pack_raw(msgpack_packer* pk, size_t l)
33 | int msgpack_pack_bin(msgpack_packer* pk, size_t l)
34 | int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
35 | int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
36 |
37 | cdef int DEFAULT_RECURSE_LIMIT=511
38 |
39 |
40 | cdef class Packer(object):
41 | """
42 | MessagePack Packer
43 |
44 | usage::
45 |
46 | packer = Packer()
47 | astream.write(packer.pack(a))
48 | astream.write(packer.pack(b))
49 |
50 | Packer's constructor has some keyword arguments:
51 |
52 | :param callable default:
53 | Convert user type to builtin type that Packer supports.
54 | See also simplejson's document.
55 | :param str encoding:
56 | Convert unicode to bytes with this encoding. (default: 'utf-8')
57 | :param str unicode_errors:
58 | Error handler for encoding unicode. (default: 'strict')
59 | :param bool use_single_float:
60 | Use single precision float type for float. (default: False)
61 | :param bool autoreset:
62 | Reset buffer after each pack and return it's
63 | content as `bytes`. (default: True).
64 | If set this to false, use `bytes()` to get
65 | content and `.reset()` to clear buffer.
66 | :param bool use_bin_type:
67 | Use bin type introduced in msgpack spec 2.0 for bytes.
68 | It also enable str8 type for unicode.
69 | """
70 | cdef msgpack_packer pk
71 | cdef object _default
72 | cdef object _bencoding
73 | cdef object _berrors
74 | cdef char *encoding
75 | cdef char *unicode_errors
76 | cdef bool use_float
77 | cdef bint autoreset
78 |
79 | def __cinit__(self):
80 | cdef int buf_size = 1024 * 1024
81 | self.pk.buf = malloc(buf_size)
82 | if self.pk.buf == NULL:
83 | raise MemoryError("Unable to allocate internal buffer.")
84 | self.pk.buf_size = buf_size
85 | self.pk.length = 0
86 |
87 | def __init__(self, default=None, encoding='utf-8',
88 | unicode_errors='strict', use_single_float=False,
89 | bint autoreset=1, bint use_bin_type=0):
90 | """
91 | """
92 | self.use_float = use_single_float
93 | self.autoreset = autoreset
94 | self.pk.use_bin_type = use_bin_type
95 | if default is not None:
96 | if not PyCallable_Check(default):
97 | raise TypeError("default must be a callable.")
98 | self._default = default
99 | if encoding is None:
100 | self.encoding = NULL
101 | self.unicode_errors = NULL
102 | else:
103 | if isinstance(encoding, unicode):
104 | self._bencoding = encoding.encode('ascii')
105 | else:
106 | self._bencoding = encoding
107 | self.encoding = PyBytes_AsString(self._bencoding)
108 | if isinstance(unicode_errors, unicode):
109 | self._berrors = unicode_errors.encode('ascii')
110 | else:
111 | self._berrors = unicode_errors
112 | self.unicode_errors = PyBytes_AsString(self._berrors)
113 |
114 | def __dealloc__(self):
115 | free(self.pk.buf);
116 |
117 | cdef int _pack(self, object o,
118 | int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
119 | cdef long long llval
120 | cdef unsigned long long ullval
121 | cdef long longval
122 | cdef float fval
123 | cdef double dval
124 | cdef char* rawval
125 | cdef int ret
126 | cdef dict d
127 | cdef size_t L
128 | cdef int default_used = 0
129 |
130 | if nest_limit < 0:
131 | raise PackValueError("recursion limit exceeded.")
132 |
133 | while True:
134 | if o is None:
135 | ret = msgpack_pack_nil(&self.pk)
136 | elif isinstance(o, bool):
137 | if o:
138 | ret = msgpack_pack_true(&self.pk)
139 | else:
140 | ret = msgpack_pack_false(&self.pk)
141 | elif PyLong_Check(o):
142 | # PyInt_Check(long) is True for Python 3.
143 | # Sow we should test long before int.
144 | if o > 0:
145 | ullval = o
146 | ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
147 | else:
148 | llval = o
149 | ret = msgpack_pack_long_long(&self.pk, llval)
150 | elif PyInt_Check(o):
151 | longval = o
152 | ret = msgpack_pack_long(&self.pk, longval)
153 | elif PyFloat_Check(o):
154 | if self.use_float:
155 | fval = o
156 | ret = msgpack_pack_float(&self.pk, fval)
157 | else:
158 | dval = o
159 | ret = msgpack_pack_double(&self.pk, dval)
160 | elif PyBytes_Check(o):
161 | L = len(o)
162 | if L > (2**32) - 1:
163 | raise ValueError("bytes is too large")
164 | rawval = o
165 | ret = msgpack_pack_bin(&self.pk, L)
166 | if ret == 0:
167 | ret = msgpack_pack_raw_body(&self.pk, rawval, L)
168 | elif PyUnicode_Check(o):
169 | if not self.encoding:
170 | raise TypeError("Can't encode unicode string: "
171 | "no encoding is specified")
172 | o = PyUnicode_AsEncodedString(o, self.encoding,
173 | self.unicode_errors)
174 | L = len(o)
175 | if L > (2**32) - 1:
176 | raise ValueError("dict is too large")
177 | rawval = o
178 | ret = msgpack_pack_raw(&self.pk, len(o))
179 | if ret == 0:
180 | ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
181 | elif PyDict_CheckExact(o):
182 | d = o
183 | L = len(d)
184 | if L > (2**32) - 1:
185 | raise ValueError("dict is too large")
186 | ret = msgpack_pack_map(&self.pk, L)
187 | if ret == 0:
188 | for k, v in d.iteritems():
189 | ret = self._pack(k, nest_limit - 1)
190 | if ret != 0: break
191 | ret = self._pack(v, nest_limit - 1)
192 | if ret != 0: break
193 | elif PyDict_Check(o):
194 | L = len(o)
195 | if L > (2**32) - 1:
196 | raise ValueError("dict is too large")
197 | ret = msgpack_pack_map(&self.pk, L)
198 | if ret == 0:
199 | for k, v in o.items():
200 | ret = self._pack(k, nest_limit - 1)
201 | if ret != 0: break
202 | ret = self._pack(v, nest_limit - 1)
203 | if ret != 0: break
204 | elif isinstance(o, ExtType):
205 | # This should be before Tuple because ExtType is namedtuple.
206 | longval = o.code
207 | rawval = o.data
208 | L = len(o.data)
209 | if L > (2**32) - 1:
210 | raise ValueError("EXT data is too large")
211 | ret = msgpack_pack_ext(&self.pk, longval, L)
212 | ret = msgpack_pack_raw_body(&self.pk, rawval, L)
213 | elif PyTuple_Check(o) or PyList_Check(o):
214 | L = len(o)
215 | if L > (2**32) - 1:
216 | raise ValueError("list is too large")
217 | ret = msgpack_pack_array(&self.pk, L)
218 | if ret == 0:
219 | for v in o:
220 | ret = self._pack(v, nest_limit - 1)
221 | if ret != 0: break
222 | elif not default_used and self._default:
223 | o = self._default(o)
224 | default_used = 1
225 | continue
226 | else:
227 | raise TypeError("can't serialize %r" % (o,))
228 | return ret
229 |
230 | cpdef pack(self, object obj):
231 | cdef int ret
232 | ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)
233 | if ret == -1:
234 | raise MemoryError
235 | elif ret: # should not happen.
236 | raise TypeError
237 | if self.autoreset:
238 | buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
239 | self.pk.length = 0
240 | return buf
241 |
242 | def pack_ext_type(self, typecode, data):
243 | msgpack_pack_ext(&self.pk, typecode, len(data))
244 | msgpack_pack_raw_body(&self.pk, data, len(data))
245 |
246 | def pack_array_header(self, size_t size):
247 | if size > (2**32) - 1:
248 | raise ValueError
249 | cdef int ret = msgpack_pack_array(&self.pk, size)
250 | if ret == -1:
251 | raise MemoryError
252 | elif ret: # should not happen
253 | raise TypeError
254 | if self.autoreset:
255 | buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
256 | self.pk.length = 0
257 | return buf
258 |
259 | def pack_map_header(self, size_t size):
260 | if size > (2**32) - 1:
261 | raise ValueError
262 | cdef int ret = msgpack_pack_map(&self.pk, size)
263 | if ret == -1:
264 | raise MemoryError
265 | elif ret: # should not happen
266 | raise TypeError
267 | if self.autoreset:
268 | buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
269 | self.pk.length = 0
270 | return buf
271 |
272 | def pack_map_pairs(self, object pairs):
273 | """
274 | Pack *pairs* as msgpack map type.
275 |
276 | *pairs* should sequence of pair.
277 | (`len(pairs)` and `for k, v in pairs:` should be supported.)
278 | """
279 | cdef int ret = msgpack_pack_map(&self.pk, len(pairs))
280 | if ret == 0:
281 | for k, v in pairs:
282 | ret = self._pack(k)
283 | if ret != 0: break
284 | ret = self._pack(v)
285 | if ret != 0: break
286 | if ret == -1:
287 | raise MemoryError
288 | elif ret: # should not happen
289 | raise TypeError
290 | if self.autoreset:
291 | buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
292 | self.pk.length = 0
293 | return buf
294 |
295 | def reset(self):
296 | """Clear internal buffer."""
297 | self.pk.length = 0
298 |
299 | def bytes(self):
300 | """Return buffer content."""
301 | return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
302 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # pandas-msgpack documentation build configuration file, created by
4 | # sphinx-quickstart on Wed Feb 8 10:52:12 2017.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | # If extensions (or modules to document with autodoc) are in another directory,
16 | # add these directories to sys.path here. If the directory is relative to the
17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
18 | #
19 | import os
20 | import sys
21 | # sys.path.insert(0, os.path.abspath('.'))
22 |
23 | # -- General configuration ------------------------------------------------
24 |
25 | # If your documentation needs a minimal Sphinx version, state it here.
26 | #
27 | # needs_sphinx = '1.0'
28 |
29 | # Add any Sphinx extension module names here, as strings. They can be
30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
31 | # ones.
32 | extensions = ['sphinx.ext.autodoc',
33 | 'sphinx.ext.autosummary',
34 | 'sphinx.ext.doctest',
35 | 'sphinx.ext.extlinks',
36 | 'sphinx.ext.todo',
37 | 'numpydoc', # used to parse numpy-style docstrings for autodoc
38 | 'IPython.sphinxext.ipython_console_highlighting',
39 | 'IPython.sphinxext.ipython_directive',
40 | 'sphinx.ext.intersphinx',
41 | 'sphinx.ext.coverage',
42 | 'sphinx.ext.ifconfig',
43 | ]
44 |
45 | # Add any paths that contain templates here, relative to this directory.
46 | templates_path = ['_templates']
47 |
48 | # The suffix(es) of source filenames.
49 | # You can specify multiple suffix as a list of string:
50 | #
51 | # source_suffix = ['.rst', '.md']
52 | source_suffix = '.rst'
53 |
54 | # The encoding of source files.
55 | #
56 | # source_encoding = 'utf-8-sig'
57 |
58 | # The master toctree document.
59 | master_doc = 'index'
60 |
61 | # General information about the project.
62 | project = u'pandas-msgpack'
63 | copyright = u'2017, PyData Development Team'
64 | author = u'PyData Development Team'
65 |
66 | # The version info for the project you're documenting, acts as replacement for
67 | # |version| and |release|, also used in various other places throughout the
68 | # built documents.
69 | #
70 | # The short X.Y version.
71 | version = u'0.1.0'
72 | # The full version, including alpha/beta/rc tags.
73 | release = u'0.1.0'
74 |
75 | # The language for content autogenerated by Sphinx. Refer to documentation
76 | # for a list of supported languages.
77 | #
78 | # This is also used if you do content translation via gettext catalogs.
79 | # Usually you set "language" from the command line for these cases.
80 | language = None
81 |
82 | # There are two options for replacing |today|: either, you set today to some
83 | # non-false value, then it is used:
84 | #
85 | # today = ''
86 | #
87 | # Else, today_fmt is used as the format for a strftime call.
88 | #
89 | # today_fmt = '%B %d, %Y'
90 |
91 | # List of patterns, relative to source directory, that match files and
92 | # directories to ignore when looking for source files.
93 | # This patterns also effect to html_static_path and html_extra_path
94 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
95 |
96 | # The reST default role (used for this markup: `text`) to use for all
97 | # documents.
98 | #
99 | # default_role = None
100 |
101 | # If true, '()' will be appended to :func: etc. cross-reference text.
102 | #
103 | # add_function_parentheses = True
104 |
105 | # If true, the current module name will be prepended to all description
106 | # unit titles (such as .. function::).
107 | #
108 | # add_module_names = True
109 |
110 | # If true, sectionauthor and moduleauthor directives will be shown in the
111 | # output. They are ignored by default.
112 | #
113 | # show_authors = False
114 |
115 | # The name of the Pygments (syntax highlighting) style to use.
116 | pygments_style = 'sphinx'
117 |
118 | # A list of ignored prefixes for module index sorting.
119 | # modindex_common_prefix = []
120 |
121 | # If true, keep warnings as "system message" paragraphs in the built documents.
122 | # keep_warnings = False
123 |
124 | # If true, `todo` and `todoList` produce output, else they produce nothing.
125 | todo_include_todos = False
126 |
127 |
128 | # -- Options for HTML output ----------------------------------------------
129 |
130 | # Taken from docs.readthedocs.io:
131 | # on_rtd is whether we are on readthedocs.io
132 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
133 |
134 | if not on_rtd: # only import and set the theme if we're building docs locally
135 | import sphinx_rtd_theme
136 | html_theme = 'sphinx_rtd_theme'
137 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
138 |
139 | # The theme to use for HTML and HTML Help pages. See the documentation for
140 | # a list of builtin themes.
141 | #
142 | # html_theme = 'alabaster'
143 |
144 | # Theme options are theme-specific and customize the look and feel of a theme
145 | # further. For a list of options available for each theme, see the
146 | # documentation.
147 | #
148 | # html_theme_options = {}
149 |
150 | # Add any paths that contain custom themes here, relative to this directory.
151 | # html_theme_path = []
152 |
153 | # The name for this set of Sphinx documents.
154 | # " v documentation" by default.
155 | #
156 | # html_title = u'pandas-msgpack v0.1.0'
157 |
158 | # A shorter title for the navigation bar. Default is the same as html_title.
159 | #
160 | # html_short_title = None
161 |
162 | # The name of an image file (relative to this directory) to place at the top
163 | # of the sidebar.
164 | #
165 | # html_logo = None
166 |
167 | # The name of an image file (relative to this directory) to use as a favicon of
168 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
169 | # pixels large.
170 | #
171 | # html_favicon = None
172 |
173 | # Add any paths that contain custom static files (such as style sheets) here,
174 | # relative to this directory. They are copied after the builtin static files,
175 | # so a file named "default.css" will overwrite the builtin "default.css".
176 | html_static_path = ['_static']
177 |
178 | # Add any extra paths that contain custom files (such as robots.txt or
179 | # .htaccess) here, relative to this directory. These files are copied
180 | # directly to the root of the documentation.
181 | #
182 | # html_extra_path = []
183 |
184 | # If not None, a 'Last updated on:' timestamp is inserted at every page
185 | # bottom, using the given strftime format.
186 | # The empty string is equivalent to '%b %d, %Y'.
187 | #
188 | # html_last_updated_fmt = None
189 |
190 | # If true, SmartyPants will be used to convert quotes and dashes to
191 | # typographically correct entities.
192 | #
193 | # html_use_smartypants = True
194 |
195 | # Custom sidebar templates, maps document names to template names.
196 | #
197 | # html_sidebars = {}
198 |
199 | # Additional templates that should be rendered to pages, maps page names to
200 | # template names.
201 | #
202 | # html_additional_pages = {}
203 |
204 | # If false, no module index is generated.
205 | #
206 | # html_domain_indices = True
207 |
208 | # If false, no index is generated.
209 | #
210 | # html_use_index = True
211 |
212 | # If true, the index is split into individual pages for each letter.
213 | #
214 | # html_split_index = False
215 |
216 | # If true, links to the reST sources are added to the pages.
217 | #
218 | # html_show_sourcelink = True
219 |
220 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
221 | #
222 | # html_show_sphinx = True
223 |
224 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
225 | #
226 | # html_show_copyright = True
227 |
228 | # If true, an OpenSearch description file will be output, and all pages will
229 | # contain a tag referring to it. The value of this option must be the
230 | # base URL from which the finished HTML is served.
231 | #
232 | # html_use_opensearch = ''
233 |
234 | # This is the file name suffix for HTML files (e.g. ".xhtml").
235 | # html_file_suffix = None
236 |
237 | # Language to be used for generating the HTML full-text search index.
238 | # Sphinx supports the following languages:
239 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
240 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
241 | #
242 | # html_search_language = 'en'
243 |
244 | # A dictionary with options for the search language support, empty by default.
245 | # 'ja' uses this config value.
246 | # 'zh' user can custom change `jieba` dictionary path.
247 | #
248 | # html_search_options = {'type': 'default'}
249 |
250 | # The name of a javascript file (relative to the configuration directory) that
251 | # implements a search results scorer. If empty, the default will be used.
252 | #
253 | # html_search_scorer = 'scorer.js'
254 |
255 | # Output file base name for HTML help builder.
256 | htmlhelp_basename = 'pandas-msgpackdoc'
257 |
258 | # -- Options for LaTeX output ---------------------------------------------
259 |
260 | latex_elements = {
261 | # The paper size ('letterpaper' or 'a4paper').
262 | #
263 | # 'papersize': 'letterpaper',
264 |
265 | # The font size ('10pt', '11pt' or '12pt').
266 | #
267 | # 'pointsize': '10pt',
268 |
269 | # Additional stuff for the LaTeX preamble.
270 | #
271 | # 'preamble': '',
272 |
273 | # Latex figure (float) alignment
274 | #
275 | # 'figure_align': 'htbp',
276 | }
277 |
278 | # Grouping the document tree into LaTeX files. List of tuples
279 | # (source start file, target name, title,
280 | # author, documentclass [howto, manual, or own class]).
281 | latex_documents = [
282 | (master_doc, 'pandas-msgpack.tex', u'pandas-msgpack Documentation',
283 | u'PyData Development Team', 'manual'),
284 | ]
285 |
286 | # The name of an image file (relative to this directory) to place at the top of
287 | # the title page.
288 | #
289 | # latex_logo = None
290 |
291 | # For "manual" documents, if this is true, then toplevel headings are parts,
292 | # not chapters.
293 | #
294 | # latex_use_parts = False
295 |
296 | # If true, show page references after internal links.
297 | #
298 | # latex_show_pagerefs = False
299 |
300 | # If true, show URL addresses after external links.
301 | #
302 | # latex_show_urls = False
303 |
304 | # Documents to append as an appendix to all manuals.
305 | #
306 | # latex_appendices = []
307 |
308 | # It false, will not define \strong, \code, itleref, \crossref ... but only
309 | # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
310 | # packages.
311 | #
312 | # latex_keep_old_macro_names = True
313 |
314 | # If false, no module index is generated.
315 | #
316 | # latex_domain_indices = True
317 |
318 |
319 | # -- Options for manual page output ---------------------------------------
320 |
321 | # One entry per manual page. List of tuples
322 | # (source start file, name, description, authors, manual section).
323 | man_pages = [
324 | (master_doc, 'pandas-msgpack', u'pandas-msgpack Documentation',
325 | [author], 1)
326 | ]
327 |
328 | # If true, show URL addresses after external links.
329 | #
330 | # man_show_urls = False
331 |
332 |
333 | # -- Options for Texinfo output -------------------------------------------
334 |
335 | # Grouping the document tree into Texinfo files. List of tuples
336 | # (source start file, target name, title, author,
337 | # dir menu entry, description, category)
338 | texinfo_documents = [
339 | (master_doc, 'pandas-msgpack', u'pandas-msgpack Documentation',
340 | author, 'pandas-msgpack', 'One line description of project.',
341 | 'Miscellaneous'),
342 | ]
343 |
344 | # Documents to append as an appendix to all manuals.
345 | #
346 | # texinfo_appendices = []
347 |
348 | # If false, no module index is generated.
349 | #
350 | # texinfo_domain_indices = True
351 |
352 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
353 | #
354 | # texinfo_show_urls = 'footnote'
355 |
356 | # If true, do not generate a @detailmenu in the "Top" node's menu.
357 | #
358 | # texinfo_no_detailmenu = False
359 |
360 |
361 | # Example configuration for intersphinx: refer to the Python standard library.
362 | intersphinx_mapping = {'https://docs.python.org/': None}
363 |
364 | extlinks = {'issue': ('https://github.com/pydata/pandas-msgpack/issues/%s',
365 | 'GH#'),
366 | 'pr': ('https://github.com/pydata/pandas-msgpack/pull/%s', 'GH#')}
367 |
--------------------------------------------------------------------------------
/pandas_msgpack/includes/unpack_template.h:
--------------------------------------------------------------------------------
1 | /*
2 | * MessagePack unpacking routine template
3 | *
4 | * Copyright (C) 2008-2010 FURUHASHI Sadayuki
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | #ifndef USE_CASE_RANGE
20 | #ifdef __GNUC__
21 | #define USE_CASE_RANGE
22 | #endif
23 | #endif
24 |
25 | typedef struct unpack_stack {
26 | PyObject* obj;
27 | size_t size;
28 | size_t count;
29 | unsigned int ct;
30 | PyObject* map_key;
31 | } unpack_stack;
32 |
33 | struct unpack_context {
34 | unpack_user user;
35 | unsigned int cs;
36 | unsigned int trail;
37 | unsigned int top;
38 | /*
39 | unpack_stack* stack;
40 | unsigned int stack_size;
41 | unpack_stack embed_stack[MSGPACK_EMBED_STACK_SIZE];
42 | */
43 | unpack_stack stack[MSGPACK_EMBED_STACK_SIZE];
44 | };
45 |
46 |
47 | static inline void unpack_init(unpack_context* ctx)
48 | {
49 | ctx->cs = CS_HEADER;
50 | ctx->trail = 0;
51 | ctx->top = 0;
52 | /*
53 | ctx->stack = ctx->embed_stack;
54 | ctx->stack_size = MSGPACK_EMBED_STACK_SIZE;
55 | */
56 | ctx->stack[0].obj = unpack_callback_root(&ctx->user);
57 | }
58 |
59 | /*
60 | static inline void unpack_destroy(unpack_context* ctx)
61 | {
62 | if(ctx->stack_size != MSGPACK_EMBED_STACK_SIZE) {
63 | free(ctx->stack);
64 | }
65 | }
66 | */
67 |
68 | static inline PyObject* unpack_data(unpack_context* ctx)
69 | {
70 | return (ctx)->stack[0].obj;
71 | }
72 |
73 |
74 | template
75 | static inline int unpack_execute(unpack_context* ctx, const char* data, size_t len, size_t* off)
76 | {
77 | assert(len >= *off);
78 |
79 | const unsigned char* p = (unsigned char*)data + *off;
80 | const unsigned char* const pe = (unsigned char*)data + len;
81 | const void* n = NULL;
82 |
83 | unsigned int trail = ctx->trail;
84 | unsigned int cs = ctx->cs;
85 | unsigned int top = ctx->top;
86 | unpack_stack* stack = ctx->stack;
87 | /*
88 | unsigned int stack_size = ctx->stack_size;
89 | */
90 | unpack_user* user = &ctx->user;
91 |
92 | PyObject* obj = NULL;
93 | unpack_stack* c = NULL;
94 |
95 | int ret;
96 |
97 | #define construct_cb(name) \
98 | construct && unpack_callback ## name
99 |
100 | #define push_simple_value(func) \
101 | if(construct_cb(func)(user, &obj) < 0) { goto _failed; } \
102 | goto _push
103 | #define push_fixed_value(func, arg) \
104 | if(construct_cb(func)(user, arg, &obj) < 0) { goto _failed; } \
105 | goto _push
106 | #define push_variable_value(func, base, pos, len) \
107 | if(construct_cb(func)(user, \
108 | (const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \
109 | goto _push
110 |
111 | #define again_fixed_trail(_cs, trail_len) \
112 | trail = trail_len; \
113 | cs = _cs; \
114 | goto _fixed_trail_again
115 | #define again_fixed_trail_if_zero(_cs, trail_len, ifzero) \
116 | trail = trail_len; \
117 | if(trail == 0) { goto ifzero; } \
118 | cs = _cs; \
119 | goto _fixed_trail_again
120 |
121 | #define start_container(func, count_, ct_) \
122 | if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \
123 | if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \
124 | if((count_) == 0) { obj = stack[top].obj; \
125 | if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \
126 | goto _push; } \
127 | stack[top].ct = ct_; \
128 | stack[top].size = count_; \
129 | stack[top].count = 0; \
130 | ++top; \
131 | /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \
132 | /*printf("stack push %d\n", top);*/ \
133 | /* FIXME \
134 | if(top >= stack_size) { \
135 | if(stack_size == MSGPACK_EMBED_STACK_SIZE) { \
136 | size_t csize = sizeof(unpack_stack) * MSGPACK_EMBED_STACK_SIZE; \
137 | size_t nsize = csize * 2; \
138 | unpack_stack* tmp = (unpack_stack*)malloc(nsize); \
139 | if(tmp == NULL) { goto _failed; } \
140 | memcpy(tmp, ctx->stack, csize); \
141 | ctx->stack = stack = tmp; \
142 | ctx->stack_size = stack_size = MSGPACK_EMBED_STACK_SIZE * 2; \
143 | } else { \
144 | size_t nsize = sizeof(unpack_stack) * ctx->stack_size * 2; \
145 | unpack_stack* tmp = (unpack_stack*)realloc(ctx->stack, nsize); \
146 | if(tmp == NULL) { goto _failed; } \
147 | ctx->stack = stack = tmp; \
148 | ctx->stack_size = stack_size = stack_size * 2; \
149 | } \
150 | } \
151 | */ \
152 | goto _header_again
153 |
154 | #define NEXT_CS(p) ((unsigned int)*p & 0x1f)
155 |
156 | #ifdef USE_CASE_RANGE
157 | #define SWITCH_RANGE_BEGIN switch(*p) {
158 | #define SWITCH_RANGE(FROM, TO) case FROM ... TO:
159 | #define SWITCH_RANGE_DEFAULT default:
160 | #define SWITCH_RANGE_END }
161 | #else
162 | #define SWITCH_RANGE_BEGIN { if(0) {
163 | #define SWITCH_RANGE(FROM, TO) } else if(FROM <= *p && *p <= TO) {
164 | #define SWITCH_RANGE_DEFAULT } else {
165 | #define SWITCH_RANGE_END } }
166 | #endif
167 |
168 | if(p == pe) { goto _out; }
169 | do {
170 | switch(cs) {
171 | case CS_HEADER:
172 | SWITCH_RANGE_BEGIN
173 | SWITCH_RANGE(0x00, 0x7f) // Positive Fixnum
174 | push_fixed_value(_uint8, *(uint8_t*)p);
175 | SWITCH_RANGE(0xe0, 0xff) // Negative Fixnum
176 | push_fixed_value(_int8, *(int8_t*)p);
177 | SWITCH_RANGE(0xc0, 0xdf) // Variable
178 | switch(*p) {
179 | case 0xc0: // nil
180 | push_simple_value(_nil);
181 | //case 0xc1: // never used
182 | case 0xc2: // false
183 | push_simple_value(_false);
184 | case 0xc3: // true
185 | push_simple_value(_true);
186 | case 0xc4: // bin 8
187 | again_fixed_trail(NEXT_CS(p), 1);
188 | case 0xc5: // bin 16
189 | again_fixed_trail(NEXT_CS(p), 2);
190 | case 0xc6: // bin 32
191 | again_fixed_trail(NEXT_CS(p), 4);
192 | case 0xc7: // ext 8
193 | again_fixed_trail(NEXT_CS(p), 1);
194 | case 0xc8: // ext 16
195 | again_fixed_trail(NEXT_CS(p), 2);
196 | case 0xc9: // ext 32
197 | again_fixed_trail(NEXT_CS(p), 4);
198 | case 0xca: // float
199 | case 0xcb: // double
200 | case 0xcc: // unsigned int 8
201 | case 0xcd: // unsigned int 16
202 | case 0xce: // unsigned int 32
203 | case 0xcf: // unsigned int 64
204 | case 0xd0: // signed int 8
205 | case 0xd1: // signed int 16
206 | case 0xd2: // signed int 32
207 | case 0xd3: // signed int 64
208 | again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03));
209 | case 0xd4: // fixext 1
210 | case 0xd5: // fixext 2
211 | case 0xd6: // fixext 4
212 | case 0xd7: // fixext 8
213 | again_fixed_trail_if_zero(ACS_EXT_VALUE,
214 | (1 << (((unsigned int)*p) & 0x03))+1,
215 | _ext_zero);
216 | case 0xd8: // fixext 16
217 | again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero);
218 | case 0xd9: // str 8
219 | again_fixed_trail(NEXT_CS(p), 1);
220 | case 0xda: // raw 16
221 | case 0xdb: // raw 32
222 | case 0xdc: // array 16
223 | case 0xdd: // array 32
224 | case 0xde: // map 16
225 | case 0xdf: // map 32
226 | again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01));
227 | default:
228 | goto _failed;
229 | }
230 | SWITCH_RANGE(0xa0, 0xbf) // FixRaw
231 | again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero);
232 | SWITCH_RANGE(0x90, 0x9f) // FixArray
233 | start_container(_array, ((unsigned int)*p) & 0x0f, CT_ARRAY_ITEM);
234 | SWITCH_RANGE(0x80, 0x8f) // FixMap
235 | start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY);
236 |
237 | SWITCH_RANGE_DEFAULT
238 | goto _failed;
239 | SWITCH_RANGE_END
240 | // end CS_HEADER
241 |
242 |
243 | _fixed_trail_again:
244 | ++p;
245 |
246 | default:
247 | if((size_t)(pe - p) < trail) { goto _out; }
248 | n = p; p += trail - 1;
249 | switch(cs) {
250 | case CS_EXT_8:
251 | again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero);
252 | case CS_EXT_16:
253 | again_fixed_trail_if_zero(ACS_EXT_VALUE,
254 | _msgpack_load16(uint16_t,n)+1,
255 | _ext_zero);
256 | case CS_EXT_32:
257 | again_fixed_trail_if_zero(ACS_EXT_VALUE,
258 | _msgpack_load32(uint32_t,n)+1,
259 | _ext_zero);
260 | case CS_FLOAT: {
261 | union { uint32_t i; float f; } mem;
262 | mem.i = _msgpack_load32(uint32_t,n);
263 | push_fixed_value(_float, mem.f); }
264 | case CS_DOUBLE: {
265 | union { uint64_t i; double f; } mem;
266 | mem.i = _msgpack_load64(uint64_t,n);
267 | #if defined(__arm__) && !(__ARM_EABI__) // arm-oabi
268 | // https://github.com/msgpack/msgpack-perl/pull/1
269 | mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL);
270 | #endif
271 | push_fixed_value(_double, mem.f); }
272 | case CS_UINT_8:
273 | push_fixed_value(_uint8, *(uint8_t*)n);
274 | case CS_UINT_16:
275 | push_fixed_value(_uint16, _msgpack_load16(uint16_t,n));
276 | case CS_UINT_32:
277 | push_fixed_value(_uint32, _msgpack_load32(uint32_t,n));
278 | case CS_UINT_64:
279 | push_fixed_value(_uint64, _msgpack_load64(uint64_t,n));
280 |
281 | case CS_INT_8:
282 | push_fixed_value(_int8, *(int8_t*)n);
283 | case CS_INT_16:
284 | push_fixed_value(_int16, _msgpack_load16(int16_t,n));
285 | case CS_INT_32:
286 | push_fixed_value(_int32, _msgpack_load32(int32_t,n));
287 | case CS_INT_64:
288 | push_fixed_value(_int64, _msgpack_load64(int64_t,n));
289 |
290 | case CS_BIN_8:
291 | again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero);
292 | case CS_BIN_16:
293 | again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load16(uint16_t,n), _bin_zero);
294 | case CS_BIN_32:
295 | again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load32(uint32_t,n), _bin_zero);
296 | case ACS_BIN_VALUE:
297 | _bin_zero:
298 | push_variable_value(_bin, data, n, trail);
299 |
300 | case CS_RAW_8:
301 | again_fixed_trail_if_zero(ACS_RAW_VALUE, *(uint8_t*)n, _raw_zero);
302 | case CS_RAW_16:
303 | again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load16(uint16_t,n), _raw_zero);
304 | case CS_RAW_32:
305 | again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load32(uint32_t,n), _raw_zero);
306 | case ACS_RAW_VALUE:
307 | _raw_zero:
308 | push_variable_value(_raw, data, n, trail);
309 |
310 | case ACS_EXT_VALUE:
311 | _ext_zero:
312 | push_variable_value(_ext, data, n, trail);
313 |
314 | case CS_ARRAY_16:
315 | start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM);
316 | case CS_ARRAY_32:
317 | /* FIXME security guard */
318 | start_container(_array, _msgpack_load32(uint32_t,n), CT_ARRAY_ITEM);
319 |
320 | case CS_MAP_16:
321 | start_container(_map, _msgpack_load16(uint16_t,n), CT_MAP_KEY);
322 | case CS_MAP_32:
323 | /* FIXME security guard */
324 | start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY);
325 |
326 | default:
327 | goto _failed;
328 | }
329 | }
330 |
331 | _push:
332 | if(top == 0) { goto _finish; }
333 | c = &stack[top-1];
334 | switch(c->ct) {
335 | case CT_ARRAY_ITEM:
336 | if(construct_cb(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; }
337 | if(++c->count == c->size) {
338 | obj = c->obj;
339 | if (construct_cb(_array_end)(user, &obj) < 0) { goto _failed; }
340 | --top;
341 | /*printf("stack pop %d\n", top);*/
342 | goto _push;
343 | }
344 | goto _header_again;
345 | case CT_MAP_KEY:
346 | c->map_key = obj;
347 | c->ct = CT_MAP_VALUE;
348 | goto _header_again;
349 | case CT_MAP_VALUE:
350 | if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; }
351 | if(++c->count == c->size) {
352 | obj = c->obj;
353 | if (construct_cb(_map_end)(user, &obj) < 0) { goto _failed; }
354 | --top;
355 | /*printf("stack pop %d\n", top);*/
356 | goto _push;
357 | }
358 | c->ct = CT_MAP_KEY;
359 | goto _header_again;
360 |
361 | default:
362 | goto _failed;
363 | }
364 |
365 | _header_again:
366 | cs = CS_HEADER;
367 | ++p;
368 | } while(p != pe);
369 | goto _out;
370 |
371 |
372 | _finish:
373 | if (!construct)
374 | unpack_callback_nil(user, &obj);
375 | stack[0].obj = obj;
376 | ++p;
377 | ret = 1;
378 | /*printf("-- finish --\n"); */
379 | goto _end;
380 |
381 | _failed:
382 | /*printf("** FAILED **\n"); */
383 | ret = -1;
384 | goto _end;
385 |
386 | _out:
387 | ret = 0;
388 | goto _end;
389 |
390 | _end:
391 | ctx->cs = cs;
392 | ctx->trail = trail;
393 | ctx->top = top;
394 | *off = p - (const unsigned char*)data;
395 |
396 | return ret;
397 | #undef construct_cb
398 | }
399 |
400 | #undef SWITCH_RANGE_BEGIN
401 | #undef SWITCH_RANGE
402 | #undef SWITCH_RANGE_DEFAULT
403 | #undef SWITCH_RANGE_END
404 | #undef push_simple_value
405 | #undef push_fixed_value
406 | #undef push_variable_value
407 | #undef again_fixed_trail
408 | #undef again_fixed_trail_if_zero
409 | #undef start_container
410 |
411 | template
412 | static inline int unpack_container_header(unpack_context* ctx, const char* data, size_t len, size_t* off)
413 | {
414 | assert(len >= *off);
415 | uint32_t size;
416 | const unsigned char *const p = (unsigned char*)data + *off;
417 |
418 | #define inc_offset(inc) \
419 | if (len - *off < inc) \
420 | return 0; \
421 | *off += inc;
422 |
423 | switch (*p) {
424 | case var_offset:
425 | inc_offset(3);
426 | size = _msgpack_load16(uint16_t, p + 1);
427 | break;
428 | case var_offset + 1:
429 | inc_offset(5);
430 | size = _msgpack_load32(uint32_t, p + 1);
431 | break;
432 | #ifdef USE_CASE_RANGE
433 | case fixed_offset + 0x0 ... fixed_offset + 0xf:
434 | #else
435 | case fixed_offset + 0x0:
436 | case fixed_offset + 0x1:
437 | case fixed_offset + 0x2:
438 | case fixed_offset + 0x3:
439 | case fixed_offset + 0x4:
440 | case fixed_offset + 0x5:
441 | case fixed_offset + 0x6:
442 | case fixed_offset + 0x7:
443 | case fixed_offset + 0x8:
444 | case fixed_offset + 0x9:
445 | case fixed_offset + 0xa:
446 | case fixed_offset + 0xb:
447 | case fixed_offset + 0xc:
448 | case fixed_offset + 0xd:
449 | case fixed_offset + 0xe:
450 | case fixed_offset + 0xf:
451 | #endif
452 | ++*off;
453 | size = ((unsigned int)*p) & 0x0f;
454 | break;
455 | default:
456 | PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream");
457 | return -1;
458 | }
459 | unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj);
460 | return 1;
461 | }
462 |
463 | #undef SWITCH_RANGE_BEGIN
464 | #undef SWITCH_RANGE
465 | #undef SWITCH_RANGE_DEFAULT
466 | #undef SWITCH_RANGE_END
467 |
468 | static const execute_fn unpack_construct = &unpack_execute;
469 | static const execute_fn unpack_skip = &unpack_execute;
470 | static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>;
471 | static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>;
472 |
473 | #undef NEXT_CS
474 |
475 | /* vim: set ts=4 sw=4 sts=4 expandtab */
476 |
--------------------------------------------------------------------------------
/pandas_msgpack/_version.py:
--------------------------------------------------------------------------------
1 |
2 | # This file helps to compute a version number in source trees obtained from
3 | # git-archive tarball (such as those provided by githubs download-from-tag
4 | # feature). Distribution tarballs (built by setup.py sdist) and build
5 | # directories (produced by setup.py build) will contain a much shorter file
6 | # that just contains the computed version number.
7 |
8 | # This file is released into the public domain. Generated by
9 | # versioneer-0.15 (https://github.com/warner/python-versioneer)
10 |
11 | # flake8: noqa
12 |
13 | import errno
14 | import os
15 | import re
16 | import subprocess
17 | import sys
18 |
19 |
20 | def get_keywords():
21 | # these strings will be replaced by git during git-archive.
22 | # setup.py/versioneer.py will grep for the variable names, so they must
23 | # each be defined on a line of their own. _version.py will just call
24 | # get_keywords().
25 | git_refnames = "$Format:%d$"
26 | git_full = "$Format:%H$"
27 | keywords = {"refnames": git_refnames, "full": git_full}
28 | return keywords
29 |
30 |
31 | class VersioneerConfig:
32 | pass
33 |
34 |
35 | def get_config():
36 | # these strings are filled in when 'setup.py versioneer' creates
37 | # _version.py
38 | cfg = VersioneerConfig()
39 | cfg.VCS = "git"
40 | cfg.style = "pep440"
41 | cfg.tag_prefix = ""
42 | cfg.parentdir_prefix = "pandas-msgpack"
43 | cfg.versionfile_source = "pandas_msgpack/_version.py"
44 | cfg.verbose = False
45 | return cfg
46 |
47 |
48 | class NotThisMethod(Exception):
49 | pass
50 |
51 |
52 | LONG_VERSION_PY = {}
53 | HANDLERS = {}
54 |
55 |
56 | def register_vcs_handler(vcs, method): # decorator
57 | def decorate(f):
58 | if vcs not in HANDLERS:
59 | HANDLERS[vcs] = {}
60 | HANDLERS[vcs][method] = f
61 | return f
62 | return decorate
63 |
64 |
65 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
66 | assert isinstance(commands, list)
67 | p = None
68 | for c in commands:
69 | try:
70 | dispcmd = str([c] + args)
71 | # remember shell=False, so use git.cmd on windows, not just git
72 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE,
73 | stderr=(subprocess.PIPE if hide_stderr
74 | else None))
75 | break
76 | except EnvironmentError:
77 | e = sys.exc_info()[1]
78 | if e.errno == errno.ENOENT:
79 | continue
80 | if verbose:
81 | print("unable to run %s" % dispcmd)
82 | print(e)
83 | return None
84 | else:
85 | if verbose:
86 | print("unable to find command, tried %s" % (commands,))
87 | return None
88 | stdout = p.communicate()[0].strip()
89 | if sys.version_info[0] >= 3:
90 | stdout = stdout.decode()
91 | if p.returncode != 0:
92 | if verbose:
93 | print("unable to run %s (error)" % dispcmd)
94 | return None
95 | return stdout
96 |
97 |
98 | def versions_from_parentdir(parentdir_prefix, root, verbose):
99 | # Source tarballs conventionally unpack into a directory that includes
100 | # both the project name and a version string.
101 | dirname = os.path.basename(root)
102 | if not dirname.startswith(parentdir_prefix):
103 | if verbose:
104 | print("guessing rootdir is '%s', but '%s' doesn't start with "
105 | "prefix '%s'" % (root, dirname, parentdir_prefix))
106 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
107 | return {"version": dirname[len(parentdir_prefix):],
108 | "full-revisionid": None,
109 | "dirty": False, "error": None}
110 |
111 |
112 | @register_vcs_handler("git", "get_keywords")
113 | def git_get_keywords(versionfile_abs):
114 | # the code embedded in _version.py can just fetch the value of these
115 | # keywords. When used from setup.py, we don't want to import _version.py,
116 | # so we do it with a regexp instead. This function is not used from
117 | # _version.py.
118 | keywords = {}
119 | try:
120 | f = open(versionfile_abs, "r")
121 | for line in f.readlines():
122 | if line.strip().startswith("git_refnames ="):
123 | mo = re.search(r'=\s*"(.*)"', line)
124 | if mo:
125 | keywords["refnames"] = mo.group(1)
126 | if line.strip().startswith("git_full ="):
127 | mo = re.search(r'=\s*"(.*)"', line)
128 | if mo:
129 | keywords["full"] = mo.group(1)
130 | f.close()
131 | except EnvironmentError:
132 | pass
133 | return keywords
134 |
135 |
136 | @register_vcs_handler("git", "keywords")
137 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
138 | if not keywords:
139 | raise NotThisMethod("no keywords at all, weird")
140 | refnames = keywords["refnames"].strip()
141 | if refnames.startswith("$Format"):
142 | if verbose:
143 | print("keywords are unexpanded, not using")
144 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
145 | refs = set([r.strip() for r in refnames.strip("()").split(",")])
146 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
147 | # just "foo-1.0". If we see a "tag: " prefix, prefer those.
148 | TAG = "tag: "
149 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
150 | if not tags:
151 | # Either we're using git < 1.8.3, or there really are no tags. We use
152 | # a heuristic: assume all version tags have a digit. The old git %d
153 | # expansion behaves like git log --decorate=short and strips out the
154 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish
155 | # between branches and tags. By ignoring refnames without digits, we
156 | # filter out many common branch names like "release" and
157 | # "stabilization", as well as "HEAD" and "master".
158 | tags = set([r for r in refs if re.search(r'\d', r)])
159 | if verbose:
160 | print("discarding '%s', no digits" % ",".join(refs - tags))
161 | if verbose:
162 | print("likely tags: %s" % ",".join(sorted(tags)))
163 | for ref in sorted(tags):
164 | # sorting will prefer e.g. "2.0" over "2.0rc1"
165 | if ref.startswith(tag_prefix):
166 | r = ref[len(tag_prefix):]
167 | if verbose:
168 | print("picking %s" % r)
169 | return {"version": r,
170 | "full-revisionid": keywords["full"].strip(),
171 | "dirty": False, "error": None
172 | }
173 | # no suitable tags, so version is "0+unknown", but full hex is still there
174 | if verbose:
175 | print("no suitable tags, using unknown + full revision id")
176 | return {"version": "0+unknown",
177 | "full-revisionid": keywords["full"].strip(),
178 | "dirty": False, "error": "no suitable tags"}
179 |
180 |
181 | @register_vcs_handler("git", "pieces_from_vcs")
182 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
183 | # this runs 'git' from the root of the source tree. This only gets called
184 | # if the git-archive 'subst' keywords were *not* expanded, and
185 | # _version.py hasn't already been rewritten with a short version string,
186 | # meaning we're inside a checked out source tree.
187 |
188 | if not os.path.exists(os.path.join(root, ".git")):
189 | if verbose:
190 | print("no .git in %s" % root)
191 | raise NotThisMethod("no .git directory")
192 |
193 | GITS = ["git"]
194 | if sys.platform == "win32":
195 | GITS = ["git.cmd", "git.exe"]
196 | # if there is a tag, this yields TAG-NUM-gHEX[-dirty]
197 | # if there are no tags, this yields HEX[-dirty] (no NUM)
198 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty",
199 | "--always", "--long"],
200 | cwd=root)
201 | # --long was added in git-1.5.5
202 | if describe_out is None:
203 | raise NotThisMethod("'git describe' failed")
204 | describe_out = describe_out.strip()
205 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
206 | if full_out is None:
207 | raise NotThisMethod("'git rev-parse' failed")
208 | full_out = full_out.strip()
209 |
210 | pieces = {}
211 | pieces["long"] = full_out
212 | pieces["short"] = full_out[:7] # maybe improved later
213 | pieces["error"] = None
214 |
215 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
216 | # TAG might have hyphens.
217 | git_describe = describe_out
218 |
219 | # look for -dirty suffix
220 | dirty = git_describe.endswith("-dirty")
221 | pieces["dirty"] = dirty
222 | if dirty:
223 | git_describe = git_describe[:git_describe.rindex("-dirty")]
224 |
225 | # now we have TAG-NUM-gHEX or HEX
226 |
227 | if "-" in git_describe:
228 | # TAG-NUM-gHEX
229 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
230 | if not mo:
231 | # unparseable. Maybe git-describe is misbehaving?
232 | pieces["error"] = ("unable to parse git-describe output: '%s'"
233 | % describe_out)
234 | return pieces
235 |
236 | # tag
237 | full_tag = mo.group(1)
238 | if not full_tag.startswith(tag_prefix):
239 | if verbose:
240 | fmt = "tag '%s' doesn't start with prefix '%s'"
241 | print(fmt % (full_tag, tag_prefix))
242 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
243 | % (full_tag, tag_prefix))
244 | return pieces
245 | pieces["closest-tag"] = full_tag[len(tag_prefix):]
246 |
247 | # distance: number of commits since tag
248 | pieces["distance"] = int(mo.group(2))
249 |
250 | # commit: short hex revision ID
251 | pieces["short"] = mo.group(3)
252 |
253 | else:
254 | # HEX: no tags
255 | pieces["closest-tag"] = None
256 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"],
257 | cwd=root)
258 | pieces["distance"] = int(count_out) # total number of commits
259 |
260 | return pieces
261 |
262 |
263 | def plus_or_dot(pieces):
264 | if "+" in pieces.get("closest-tag", ""):
265 | return "."
266 | return "+"
267 |
268 |
269 | def render_pep440(pieces):
270 | # now build up version string, with post-release "local version
271 | # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
272 | # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
273 |
274 | # exceptions:
275 | # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
276 |
277 | if pieces["closest-tag"]:
278 | rendered = pieces["closest-tag"]
279 | if pieces["distance"] or pieces["dirty"]:
280 | rendered += plus_or_dot(pieces)
281 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
282 | if pieces["dirty"]:
283 | rendered += ".dirty"
284 | else:
285 | # exception #1
286 | rendered = "0+untagged.%d.g%s" % (pieces["distance"],
287 | pieces["short"])
288 | if pieces["dirty"]:
289 | rendered += ".dirty"
290 | return rendered
291 |
292 |
293 | def render_pep440_pre(pieces):
294 | # TAG[.post.devDISTANCE] . No -dirty
295 |
296 | # exceptions:
297 | # 1: no tags. 0.post.devDISTANCE
298 |
299 | if pieces["closest-tag"]:
300 | rendered = pieces["closest-tag"]
301 | if pieces["distance"]:
302 | rendered += ".post.dev%d" % pieces["distance"]
303 | else:
304 | # exception #1
305 | rendered = "0.post.dev%d" % pieces["distance"]
306 | return rendered
307 |
308 |
309 | def render_pep440_post(pieces):
310 | # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that
311 | # .dev0 sorts backwards (a dirty tree will appear "older" than the
312 | # corresponding clean one), but you shouldn't be releasing software with
313 | # -dirty anyways.
314 |
315 | # exceptions:
316 | # 1: no tags. 0.postDISTANCE[.dev0]
317 |
318 | if pieces["closest-tag"]:
319 | rendered = pieces["closest-tag"]
320 | if pieces["distance"] or pieces["dirty"]:
321 | rendered += ".post%d" % pieces["distance"]
322 | if pieces["dirty"]:
323 | rendered += ".dev0"
324 | rendered += plus_or_dot(pieces)
325 | rendered += "g%s" % pieces["short"]
326 | else:
327 | # exception #1
328 | rendered = "0.post%d" % pieces["distance"]
329 | if pieces["dirty"]:
330 | rendered += ".dev0"
331 | rendered += "+g%s" % pieces["short"]
332 | return rendered
333 |
334 |
335 | def render_pep440_old(pieces):
336 | # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty.
337 |
338 | # exceptions:
339 | # 1: no tags. 0.postDISTANCE[.dev0]
340 |
341 | if pieces["closest-tag"]:
342 | rendered = pieces["closest-tag"]
343 | if pieces["distance"] or pieces["dirty"]:
344 | rendered += ".post%d" % pieces["distance"]
345 | if pieces["dirty"]:
346 | rendered += ".dev0"
347 | else:
348 | # exception #1
349 | rendered = "0.post%d" % pieces["distance"]
350 | if pieces["dirty"]:
351 | rendered += ".dev0"
352 | return rendered
353 |
354 |
355 | def render_git_describe(pieces):
356 | # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty
357 | # --always'
358 |
359 | # exceptions:
360 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix)
361 |
362 | if pieces["closest-tag"]:
363 | rendered = pieces["closest-tag"]
364 | if pieces["distance"]:
365 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
366 | else:
367 | # exception #1
368 | rendered = pieces["short"]
369 | if pieces["dirty"]:
370 | rendered += "-dirty"
371 | return rendered
372 |
373 |
374 | def render_git_describe_long(pieces):
375 | # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty
376 | # --always -long'. The distance/hash is unconditional.
377 |
378 | # exceptions:
379 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix)
380 |
381 | if pieces["closest-tag"]:
382 | rendered = pieces["closest-tag"]
383 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
384 | else:
385 | # exception #1
386 | rendered = pieces["short"]
387 | if pieces["dirty"]:
388 | rendered += "-dirty"
389 | return rendered
390 |
391 |
392 | def render(pieces, style):
393 | if pieces["error"]:
394 | return {"version": "unknown",
395 | "full-revisionid": pieces.get("long"),
396 | "dirty": None,
397 | "error": pieces["error"]}
398 |
399 | if not style or style == "default":
400 | style = "pep440" # the default
401 |
402 | if style == "pep440":
403 | rendered = render_pep440(pieces)
404 | elif style == "pep440-pre":
405 | rendered = render_pep440_pre(pieces)
406 | elif style == "pep440-post":
407 | rendered = render_pep440_post(pieces)
408 | elif style == "pep440-old":
409 | rendered = render_pep440_old(pieces)
410 | elif style == "git-describe":
411 | rendered = render_git_describe(pieces)
412 | elif style == "git-describe-long":
413 | rendered = render_git_describe_long(pieces)
414 | else:
415 | raise ValueError("unknown style '%s'" % style)
416 |
417 | return {"version": rendered, "full-revisionid": pieces["long"],
418 | "dirty": pieces["dirty"], "error": None}
419 |
420 |
421 | def get_versions():
422 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
423 | # __file__, we can work backwards from there to the root. Some
424 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
425 | # case we can only use expanded keywords.
426 |
427 | cfg = get_config()
428 | verbose = cfg.verbose
429 |
430 | try:
431 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
432 | verbose)
433 | except NotThisMethod:
434 | pass
435 |
436 | try:
437 | root = os.path.realpath(__file__)
438 | # versionfile_source is the relative path from the top of the source
439 | # tree (where the .git directory might live) to this file. Invert
440 | # this to find the root from __file__.
441 | for i in cfg.versionfile_source.split('/'):
442 | root = os.path.dirname(root)
443 | except NameError:
444 | return {"version": "0+unknown", "full-revisionid": None,
445 | "dirty": None,
446 | "error": "unable to find root of source tree"}
447 |
448 | try:
449 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
450 | return render(pieces, cfg.style)
451 | except NotThisMethod:
452 | pass
453 |
454 | try:
455 | if cfg.parentdir_prefix:
456 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
457 | except NotThisMethod:
458 | pass
459 |
460 | return {"version": "0+unknown", "full-revisionid": None,
461 | "dirty": None,
462 | "error": "unable to compute version"}
463 |
--------------------------------------------------------------------------------
/pandas_msgpack/msgpack/_unpacker.pyx:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | #cython: embedsignature=True
3 |
4 | from cpython cimport *
5 | cdef extern from "Python.h":
6 | ctypedef struct PyObject
7 | cdef int PyObject_AsReadBuffer(object o, const void** buff,
8 | Py_ssize_t* buf_len) except -1
9 |
10 | from libc.stdlib cimport *
11 | from libc.string cimport *
12 | from libc.limits cimport *
13 |
14 | from .exceptions import (BufferFull, OutOfData,
15 | UnpackValueError, ExtraData)
16 | from . import ExtType
17 |
18 |
19 | cdef extern from "../includes/unpack.h":
20 | ctypedef struct msgpack_user:
21 | bint use_list
22 | PyObject* object_hook
23 | bint has_pairs_hook # call object_hook with k-v pairs
24 | PyObject* list_hook
25 | PyObject* ext_hook
26 | char *encoding
27 | char *unicode_errors
28 | Py_ssize_t max_str_len
29 | Py_ssize_t max_bin_len
30 | Py_ssize_t max_array_len
31 | Py_ssize_t max_map_len
32 | Py_ssize_t max_ext_len
33 |
34 | ctypedef struct unpack_context:
35 | msgpack_user user
36 | PyObject* obj
37 | size_t count
38 |
39 | ctypedef int (*execute_fn)(unpack_context* ctx, const char* data,
40 | size_t len, size_t* off) except? -1
41 | execute_fn unpack_construct
42 | execute_fn unpack_skip
43 | execute_fn read_array_header
44 | execute_fn read_map_header
45 | void unpack_init(unpack_context* ctx)
46 | object unpack_data(unpack_context* ctx)
47 |
48 | cdef inline init_ctx(unpack_context *ctx,
49 | object object_hook, object object_pairs_hook,
50 | object list_hook, object ext_hook,
51 | bint use_list, char* encoding, char* unicode_errors,
52 | Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
53 | Py_ssize_t max_array_len, Py_ssize_t max_map_len,
54 | Py_ssize_t max_ext_len):
55 | unpack_init(ctx)
56 | ctx.user.use_list = use_list
57 | ctx.user.object_hook = ctx.user.list_hook = NULL
58 | ctx.user.max_str_len = max_str_len
59 | ctx.user.max_bin_len = max_bin_len
60 | ctx.user.max_array_len = max_array_len
61 | ctx.user.max_map_len = max_map_len
62 | ctx.user.max_ext_len = max_ext_len
63 |
64 | if object_hook is not None and object_pairs_hook is not None:
65 | raise TypeError("object_pairs_hook and object_hook "
66 | "are mutually exclusive.")
67 |
68 | if object_hook is not None:
69 | if not PyCallable_Check(object_hook):
70 | raise TypeError("object_hook must be a callable.")
71 | ctx.user.object_hook = object_hook
72 |
73 | if object_pairs_hook is None:
74 | ctx.user.has_pairs_hook = False
75 | else:
76 | if not PyCallable_Check(object_pairs_hook):
77 | raise TypeError("object_pairs_hook must be a callable.")
78 | ctx.user.object_hook = object_pairs_hook
79 | ctx.user.has_pairs_hook = True
80 |
81 | if list_hook is not None:
82 | if not PyCallable_Check(list_hook):
83 | raise TypeError("list_hook must be a callable.")
84 | ctx.user.list_hook = list_hook
85 |
86 | if ext_hook is not None:
87 | if not PyCallable_Check(ext_hook):
88 | raise TypeError("ext_hook must be a callable.")
89 | ctx.user.ext_hook = ext_hook
90 |
91 | ctx.user.encoding = encoding
92 | ctx.user.unicode_errors = unicode_errors
93 |
94 |
95 | def default_read_extended_type(typecode, data):
96 | raise NotImplementedError("Cannot decode extended type "
97 | "with typecode=%d" % typecode)
98 |
99 |
100 | def unpackb(object packed, object object_hook=None, object list_hook=None,
101 | bint use_list=1, encoding=None, unicode_errors="strict",
102 | object_pairs_hook=None, ext_hook=ExtType,
103 | Py_ssize_t max_str_len=2147483647, # 2**32-1
104 | Py_ssize_t max_bin_len=2147483647,
105 | Py_ssize_t max_array_len=2147483647,
106 | Py_ssize_t max_map_len=2147483647,
107 | Py_ssize_t max_ext_len=2147483647):
108 | """
109 | Unpack packed_bytes to object. Returns an unpacked object.
110 |
111 | Raises `ValueError` when `packed` contains extra bytes.
112 |
113 | See :class:`Unpacker` for options.
114 | """
115 | cdef unpack_context ctx
116 | cdef size_t off = 0
117 | cdef int ret
118 |
119 | cdef char* buf
120 | cdef Py_ssize_t buf_len
121 | cdef char* cenc = NULL
122 | cdef char* cerr = NULL
123 |
124 | PyObject_AsReadBuffer(packed, &buf, &buf_len)
125 |
126 | if encoding is not None:
127 | if isinstance(encoding, unicode):
128 | encoding = encoding.encode('ascii')
129 | cenc = PyBytes_AsString(encoding)
130 |
131 | if unicode_errors is not None:
132 | if isinstance(unicode_errors, unicode):
133 | unicode_errors = unicode_errors.encode('ascii')
134 | cerr = PyBytes_AsString(unicode_errors)
135 |
136 | init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
137 | use_list, cenc, cerr,
138 | max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
139 | ret = unpack_construct(&ctx, buf, buf_len, &off)
140 | if ret == 1:
141 | obj = unpack_data(&ctx)
142 | if off < buf_len:
143 | raise ExtraData(obj, PyBytes_FromStringAndSize(
144 | buf + off, buf_len - off))
145 | return obj
146 | else:
147 | raise UnpackValueError("Unpack failed: error = %d" % (ret,))
148 |
149 |
150 | def unpack(object stream, object object_hook=None, object list_hook=None,
151 | bint use_list=1, encoding=None, unicode_errors="strict",
152 | object_pairs_hook=None,
153 | ):
154 | """
155 | Unpack an object from `stream`.
156 |
157 | Raises `ValueError` when `stream` has extra bytes.
158 |
159 | See :class:`Unpacker` for options.
160 | """
161 | return unpackb(stream.read(), use_list=use_list,
162 | object_hook=object_hook,
163 | object_pairs_hook=object_pairs_hook, list_hook=list_hook,
164 | encoding=encoding, unicode_errors=unicode_errors)
165 |
166 |
167 | cdef class Unpacker(object):
168 | """Streaming unpacker.
169 |
170 | arguments:
171 |
172 | :param file_like:
173 | File-like object having `.read(n)` method.
174 | If specified, unpacker reads serialized data from it and
175 | :meth:`feed()` is not usable.
176 |
177 | :param int read_size:
178 | Used as `file_like.read(read_size)`. (default:
179 | `min(1024**2, max_buffer_size)`)
180 |
181 | :param bool use_list:
182 | If true, unpack msgpack array to Python list.
183 | Otherwise, unpack to Python tuple. (default: True)
184 |
185 | :param callable object_hook:
186 | When specified, it should be callable.
187 | Unpacker calls it with a dict argument after unpacking msgpack map.
188 | (See also simplejson)
189 |
190 | :param callable object_pairs_hook:
191 | When specified, it should be callable. Unpacker calls it with a list
192 | of key-value pairs after unpacking msgpack map. (See also simplejson)
193 |
194 | :param str encoding:
195 | Encoding used for decoding msgpack raw.
196 | If it is None (default), msgpack raw is deserialized to Python bytes.
197 |
198 | :param str unicode_errors:
199 | Used for decoding msgpack raw with *encoding*.
200 | (default: `'strict'`)
201 |
202 | :param int max_buffer_size:
203 | Limits size of data waiting unpacked. 0 means system's
204 | INT_MAX (default). Raises `BufferFull` exception when it
205 | is insufficient. You shoud set this parameter when unpacking
206 | data from untrasted source.
207 |
208 | :param int max_str_len:
209 | Limits max length of str. (default: 2**31-1)
210 |
211 | :param int max_bin_len:
212 | Limits max length of bin. (default: 2**31-1)
213 |
214 | :param int max_array_len:
215 | Limits max length of array. (default: 2**31-1)
216 |
217 | :param int max_map_len:
218 | Limits max length of map. (default: 2**31-1)
219 |
220 |
221 | example of streaming deserialize from file-like object::
222 |
223 | unpacker = Unpacker(file_like)
224 | for o in unpacker:
225 | process(o)
226 |
227 | example of streaming deserialize from socket::
228 |
229 | unpacker = Unpacker()
230 | while True:
231 | buf = sock.recv(1024**2)
232 | if not buf:
233 | break
234 | unpacker.feed(buf)
235 | for o in unpacker:
236 | process(o)
237 | """
238 | cdef unpack_context ctx
239 | cdef char* buf
240 | cdef size_t buf_size, buf_head, buf_tail
241 | cdef object file_like
242 | cdef object file_like_read
243 | cdef Py_ssize_t read_size
244 | # To maintain refcnt.
245 | cdef object object_hook, object_pairs_hook, list_hook, ext_hook
246 | cdef object encoding, unicode_errors
247 | cdef size_t max_buffer_size
248 |
249 | def __cinit__(self):
250 | self.buf = NULL
251 |
252 | def __dealloc__(self):
253 | free(self.buf)
254 | self.buf = NULL
255 |
256 | def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
257 | object object_hook=None, object object_pairs_hook=None,
258 | object list_hook=None, encoding=None, unicode_errors='strict',
259 | int max_buffer_size=0, object ext_hook=ExtType,
260 | Py_ssize_t max_str_len=2147483647, # 2**32-1
261 | Py_ssize_t max_bin_len=2147483647,
262 | Py_ssize_t max_array_len=2147483647,
263 | Py_ssize_t max_map_len=2147483647,
264 | Py_ssize_t max_ext_len=2147483647):
265 | cdef char *cenc=NULL,
266 | cdef char *cerr=NULL
267 |
268 | self.object_hook = object_hook
269 | self.object_pairs_hook = object_pairs_hook
270 | self.list_hook = list_hook
271 | self.ext_hook = ext_hook
272 |
273 | self.file_like = file_like
274 | if file_like:
275 | self.file_like_read = file_like.read
276 | if not PyCallable_Check(self.file_like_read):
277 | raise TypeError("`file_like.read` must be a callable.")
278 | if not max_buffer_size:
279 | max_buffer_size = INT_MAX
280 | if read_size > max_buffer_size:
281 | raise ValueError("read_size should be less or "
282 | "equal to max_buffer_size")
283 | if not read_size:
284 | read_size = min(max_buffer_size, 1024**2)
285 | self.max_buffer_size = max_buffer_size
286 | self.read_size = read_size
287 | self.buf = malloc(read_size)
288 | if self.buf == NULL:
289 | raise MemoryError("Unable to allocate internal buffer.")
290 | self.buf_size = read_size
291 | self.buf_head = 0
292 | self.buf_tail = 0
293 |
294 | if encoding is not None:
295 | if isinstance(encoding, unicode):
296 | self.encoding = encoding.encode('ascii')
297 | elif isinstance(encoding, bytes):
298 | self.encoding = encoding
299 | else:
300 | raise TypeError("encoding should be bytes or unicode")
301 | cenc = PyBytes_AsString(self.encoding)
302 |
303 | if unicode_errors is not None:
304 | if isinstance(unicode_errors, unicode):
305 | self.unicode_errors = unicode_errors.encode('ascii')
306 | elif isinstance(unicode_errors, bytes):
307 | self.unicode_errors = unicode_errors
308 | else:
309 | raise TypeError("unicode_errors should be bytes or unicode")
310 | cerr = PyBytes_AsString(self.unicode_errors)
311 |
312 | init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
313 | ext_hook, use_list, cenc, cerr,
314 | max_str_len, max_bin_len, max_array_len,
315 | max_map_len, max_ext_len)
316 |
317 | def feed(self, object next_bytes):
318 | """Append `next_bytes` to internal buffer."""
319 | cdef Py_buffer pybuff
320 | if self.file_like is not None:
321 | raise AssertionError("unpacker.feed() is not be able "
322 | "to use with `file_like`.")
323 | PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE)
324 | try:
325 | self.append_buffer(pybuff.buf, pybuff.len)
326 | finally:
327 | PyBuffer_Release(&pybuff)
328 |
329 | cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len):
330 | cdef:
331 | char* buf = self.buf
332 | char* new_buf
333 | size_t head = self.buf_head
334 | size_t tail = self.buf_tail
335 | size_t buf_size = self.buf_size
336 | size_t new_size
337 |
338 | if tail + _buf_len > buf_size:
339 | if ((tail - head) + _buf_len) <= buf_size:
340 | # move to front.
341 | memmove(buf, buf + head, tail - head)
342 | tail -= head
343 | head = 0
344 | else:
345 | # expand buffer.
346 | new_size = (tail - head) + _buf_len
347 | if new_size > self.max_buffer_size:
348 | raise BufferFull
349 | new_size = min(new_size * 2, self.max_buffer_size)
350 | new_buf = malloc(new_size)
351 | if new_buf == NULL:
352 | # self.buf still holds old buffer and will be freed during
353 | # obj destruction
354 | raise MemoryError("Unable to enlarge internal buffer.")
355 | memcpy(new_buf, buf + head, tail - head)
356 | free(buf)
357 |
358 | buf = new_buf
359 | buf_size = new_size
360 | tail -= head
361 | head = 0
362 |
363 | memcpy(buf + tail, (_buf), _buf_len)
364 | self.buf = buf
365 | self.buf_head = head
366 | self.buf_size = buf_size
367 | self.buf_tail = tail + _buf_len
368 |
369 | cdef read_from_file(self):
370 | next_bytes = self.file_like_read(
371 | min(self.read_size,
372 | self.max_buffer_size - (self.buf_tail - self.buf_head)))
373 | if next_bytes:
374 | self.append_buffer(PyBytes_AsString(next_bytes),
375 | PyBytes_Size(next_bytes))
376 | else:
377 | self.file_like = None
378 |
379 | cdef object _unpack(self, execute_fn execute,
380 | object write_bytes, bint iter=0):
381 | cdef int ret
382 | cdef object obj
383 | cdef size_t prev_head
384 |
385 | if self.buf_head >= self.buf_tail and self.file_like is not None:
386 | self.read_from_file()
387 |
388 | while 1:
389 | prev_head = self.buf_head
390 | if prev_head >= self.buf_tail:
391 | if iter:
392 | raise StopIteration("No more data to unpack.")
393 | else:
394 | raise OutOfData("No more data to unpack.")
395 |
396 | ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head)
397 | if write_bytes is not None:
398 | write_bytes(PyBytes_FromStringAndSize(
399 | self.buf + prev_head, self.buf_head - prev_head))
400 |
401 | if ret == 1:
402 | obj = unpack_data(&self.ctx)
403 | unpack_init(&self.ctx)
404 | return obj
405 | elif ret == 0:
406 | if self.file_like is not None:
407 | self.read_from_file()
408 | continue
409 | if iter:
410 | raise StopIteration("No more data to unpack.")
411 | else:
412 | raise OutOfData("No more data to unpack.")
413 | else:
414 | raise ValueError("Unpack failed: error = %d" % (ret,))
415 |
416 | def read_bytes(self, Py_ssize_t nbytes):
417 | """Read a specified number of raw bytes from the stream"""
418 | cdef size_t nread
419 | nread = min(self.buf_tail - self.buf_head, nbytes)
420 | ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread)
421 | self.buf_head += nread
422 | if len(ret) < nbytes and self.file_like is not None:
423 | ret += self.file_like.read(nbytes - len(ret))
424 | return ret
425 |
426 | def unpack(self, object write_bytes=None):
427 | """Unpack one object
428 |
429 | If write_bytes is not None, it will be called with parts of the raw
430 | message as it is unpacked.
431 |
432 | Raises `OutOfData` when there are no more bytes to unpack.
433 | """
434 | return self._unpack(unpack_construct, write_bytes)
435 |
436 | def skip(self, object write_bytes=None):
437 | """Read and ignore one object, returning None
438 |
439 | If write_bytes is not None, it will be called with parts of the raw
440 | message as it is unpacked.
441 |
442 | Raises `OutOfData` when there are no more bytes to unpack.
443 | """
444 | return self._unpack(unpack_skip, write_bytes)
445 |
446 | def read_array_header(self, object write_bytes=None):
447 | """assuming the next object is an array, return its size n, such that
448 | the next n unpack() calls will iterate over its contents.
449 |
450 | Raises `OutOfData` when there are no more bytes to unpack.
451 | """
452 | return self._unpack(read_array_header, write_bytes)
453 |
454 | def read_map_header(self, object write_bytes=None):
455 | """assuming the next object is a map, return its size n, such that the
456 | next n * 2 unpack() calls will iterate over its key-value pairs.
457 |
458 | Raises `OutOfData` when there are no more bytes to unpack.
459 | """
460 | return self._unpack(read_map_header, write_bytes)
461 |
462 | def __iter__(self):
463 | return self
464 |
465 | def __next__(self):
466 | return self._unpack(unpack_construct, None, 1)
467 |
468 | # for debug.
469 | #def _buf(self):
470 | # return PyString_FromStringAndSize(self.buf, self.buf_tail)
471 |
472 | #def _off(self):
473 | # return self.buf_head
474 |
--------------------------------------------------------------------------------
/pandas_msgpack/includes/pack_template.h:
--------------------------------------------------------------------------------
1 | /*
2 | * MessagePack packing routine template
3 | *
4 | * Copyright (C) 2008-2010 FURUHASHI Sadayuki
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | #if defined(__LITTLE_ENDIAN__)
20 | #define TAKE8_8(d) ((uint8_t*)&d)[0]
21 | #define TAKE8_16(d) ((uint8_t*)&d)[0]
22 | #define TAKE8_32(d) ((uint8_t*)&d)[0]
23 | #define TAKE8_64(d) ((uint8_t*)&d)[0]
24 | #elif defined(__BIG_ENDIAN__)
25 | #define TAKE8_8(d) ((uint8_t*)&d)[0]
26 | #define TAKE8_16(d) ((uint8_t*)&d)[1]
27 | #define TAKE8_32(d) ((uint8_t*)&d)[3]
28 | #define TAKE8_64(d) ((uint8_t*)&d)[7]
29 | #endif
30 |
31 | #ifndef msgpack_pack_append_buffer
32 | #error msgpack_pack_append_buffer callback is not defined
33 | #endif
34 |
35 |
36 | /*
37 | * Integer
38 | */
39 |
40 | #define msgpack_pack_real_uint8(x, d) \
41 | do { \
42 | if(d < (1<<7)) { \
43 | /* fixnum */ \
44 | msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \
45 | } else { \
46 | /* unsigned 8 */ \
47 | unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \
48 | msgpack_pack_append_buffer(x, buf, 2); \
49 | } \
50 | } while(0)
51 |
52 | #define msgpack_pack_real_uint16(x, d) \
53 | do { \
54 | if(d < (1<<7)) { \
55 | /* fixnum */ \
56 | msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \
57 | } else if(d < (1<<8)) { \
58 | /* unsigned 8 */ \
59 | unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \
60 | msgpack_pack_append_buffer(x, buf, 2); \
61 | } else { \
62 | /* unsigned 16 */ \
63 | unsigned char buf[3]; \
64 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \
65 | msgpack_pack_append_buffer(x, buf, 3); \
66 | } \
67 | } while(0)
68 |
69 | #define msgpack_pack_real_uint32(x, d) \
70 | do { \
71 | if(d < (1<<8)) { \
72 | if(d < (1<<7)) { \
73 | /* fixnum */ \
74 | msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \
75 | } else { \
76 | /* unsigned 8 */ \
77 | unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \
78 | msgpack_pack_append_buffer(x, buf, 2); \
79 | } \
80 | } else { \
81 | if(d < (1<<16)) { \
82 | /* unsigned 16 */ \
83 | unsigned char buf[3]; \
84 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \
85 | msgpack_pack_append_buffer(x, buf, 3); \
86 | } else { \
87 | /* unsigned 32 */ \
88 | unsigned char buf[5]; \
89 | buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \
90 | msgpack_pack_append_buffer(x, buf, 5); \
91 | } \
92 | } \
93 | } while(0)
94 |
95 | #define msgpack_pack_real_uint64(x, d) \
96 | do { \
97 | if(d < (1ULL<<8)) { \
98 | if(d < (1ULL<<7)) { \
99 | /* fixnum */ \
100 | msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \
101 | } else { \
102 | /* unsigned 8 */ \
103 | unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \
104 | msgpack_pack_append_buffer(x, buf, 2); \
105 | } \
106 | } else { \
107 | if(d < (1ULL<<16)) { \
108 | /* unsigned 16 */ \
109 | unsigned char buf[3]; \
110 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \
111 | msgpack_pack_append_buffer(x, buf, 3); \
112 | } else if(d < (1ULL<<32)) { \
113 | /* unsigned 32 */ \
114 | unsigned char buf[5]; \
115 | buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \
116 | msgpack_pack_append_buffer(x, buf, 5); \
117 | } else { \
118 | /* unsigned 64 */ \
119 | unsigned char buf[9]; \
120 | buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \
121 | msgpack_pack_append_buffer(x, buf, 9); \
122 | } \
123 | } \
124 | } while(0)
125 |
126 | #define msgpack_pack_real_int8(x, d) \
127 | do { \
128 | if(d < -(1<<5)) { \
129 | /* signed 8 */ \
130 | unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \
131 | msgpack_pack_append_buffer(x, buf, 2); \
132 | } else { \
133 | /* fixnum */ \
134 | msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \
135 | } \
136 | } while(0)
137 |
138 | #define msgpack_pack_real_int16(x, d) \
139 | do { \
140 | if(d < -(1<<5)) { \
141 | if(d < -(1<<7)) { \
142 | /* signed 16 */ \
143 | unsigned char buf[3]; \
144 | buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \
145 | msgpack_pack_append_buffer(x, buf, 3); \
146 | } else { \
147 | /* signed 8 */ \
148 | unsigned char buf[2] = {0xd0, TAKE8_16(d)}; \
149 | msgpack_pack_append_buffer(x, buf, 2); \
150 | } \
151 | } else if(d < (1<<7)) { \
152 | /* fixnum */ \
153 | msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \
154 | } else { \
155 | if(d < (1<<8)) { \
156 | /* unsigned 8 */ \
157 | unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \
158 | msgpack_pack_append_buffer(x, buf, 2); \
159 | } else { \
160 | /* unsigned 16 */ \
161 | unsigned char buf[3]; \
162 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \
163 | msgpack_pack_append_buffer(x, buf, 3); \
164 | } \
165 | } \
166 | } while(0)
167 |
168 | #define msgpack_pack_real_int32(x, d) \
169 | do { \
170 | if(d < -(1<<5)) { \
171 | if(d < -(1<<15)) { \
172 | /* signed 32 */ \
173 | unsigned char buf[5]; \
174 | buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \
175 | msgpack_pack_append_buffer(x, buf, 5); \
176 | } else if(d < -(1<<7)) { \
177 | /* signed 16 */ \
178 | unsigned char buf[3]; \
179 | buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \
180 | msgpack_pack_append_buffer(x, buf, 3); \
181 | } else { \
182 | /* signed 8 */ \
183 | unsigned char buf[2] = {0xd0, TAKE8_32(d)}; \
184 | msgpack_pack_append_buffer(x, buf, 2); \
185 | } \
186 | } else if(d < (1<<7)) { \
187 | /* fixnum */ \
188 | msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \
189 | } else { \
190 | if(d < (1<<8)) { \
191 | /* unsigned 8 */ \
192 | unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \
193 | msgpack_pack_append_buffer(x, buf, 2); \
194 | } else if(d < (1<<16)) { \
195 | /* unsigned 16 */ \
196 | unsigned char buf[3]; \
197 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \
198 | msgpack_pack_append_buffer(x, buf, 3); \
199 | } else { \
200 | /* unsigned 32 */ \
201 | unsigned char buf[5]; \
202 | buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \
203 | msgpack_pack_append_buffer(x, buf, 5); \
204 | } \
205 | } \
206 | } while(0)
207 |
208 | #define msgpack_pack_real_int64(x, d) \
209 | do { \
210 | if(d < -(1LL<<5)) { \
211 | if(d < -(1LL<<15)) { \
212 | if(d < -(1LL<<31)) { \
213 | /* signed 64 */ \
214 | unsigned char buf[9]; \
215 | buf[0] = 0xd3; _msgpack_store64(&buf[1], d); \
216 | msgpack_pack_append_buffer(x, buf, 9); \
217 | } else { \
218 | /* signed 32 */ \
219 | unsigned char buf[5]; \
220 | buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \
221 | msgpack_pack_append_buffer(x, buf, 5); \
222 | } \
223 | } else { \
224 | if(d < -(1<<7)) { \
225 | /* signed 16 */ \
226 | unsigned char buf[3]; \
227 | buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \
228 | msgpack_pack_append_buffer(x, buf, 3); \
229 | } else { \
230 | /* signed 8 */ \
231 | unsigned char buf[2] = {0xd0, TAKE8_64(d)}; \
232 | msgpack_pack_append_buffer(x, buf, 2); \
233 | } \
234 | } \
235 | } else if(d < (1<<7)) { \
236 | /* fixnum */ \
237 | msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \
238 | } else { \
239 | if(d < (1LL<<16)) { \
240 | if(d < (1<<8)) { \
241 | /* unsigned 8 */ \
242 | unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \
243 | msgpack_pack_append_buffer(x, buf, 2); \
244 | } else { \
245 | /* unsigned 16 */ \
246 | unsigned char buf[3]; \
247 | buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \
248 | msgpack_pack_append_buffer(x, buf, 3); \
249 | } \
250 | } else { \
251 | if(d < (1LL<<32)) { \
252 | /* unsigned 32 */ \
253 | unsigned char buf[5]; \
254 | buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \
255 | msgpack_pack_append_buffer(x, buf, 5); \
256 | } else { \
257 | /* unsigned 64 */ \
258 | unsigned char buf[9]; \
259 | buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \
260 | msgpack_pack_append_buffer(x, buf, 9); \
261 | } \
262 | } \
263 | } \
264 | } while(0)
265 |
266 |
267 | static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d)
268 | {
269 | msgpack_pack_real_uint8(x, d);
270 | }
271 |
272 | static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d)
273 | {
274 | msgpack_pack_real_uint16(x, d);
275 | }
276 |
277 | static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d)
278 | {
279 | msgpack_pack_real_uint32(x, d);
280 | }
281 |
282 | static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d)
283 | {
284 | msgpack_pack_real_uint64(x, d);
285 | }
286 |
287 | static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d)
288 | {
289 | msgpack_pack_real_int8(x, d);
290 | }
291 |
292 | static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d)
293 | {
294 | msgpack_pack_real_int16(x, d);
295 | }
296 |
297 | static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d)
298 | {
299 | msgpack_pack_real_int32(x, d);
300 | }
301 |
302 | static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d)
303 | {
304 | msgpack_pack_real_int64(x, d);
305 | }
306 |
307 |
308 | //#ifdef msgpack_pack_inline_func_cint
309 |
310 | static inline int msgpack_pack_short(msgpack_packer* x, short d)
311 | {
312 | #if defined(SIZEOF_SHORT)
313 | #if SIZEOF_SHORT == 2
314 | msgpack_pack_real_int16(x, d);
315 | #elif SIZEOF_SHORT == 4
316 | msgpack_pack_real_int32(x, d);
317 | #else
318 | msgpack_pack_real_int64(x, d);
319 | #endif
320 |
321 | #elif defined(SHRT_MAX)
322 | #if SHRT_MAX == 0x7fff
323 | msgpack_pack_real_int16(x, d);
324 | #elif SHRT_MAX == 0x7fffffff
325 | msgpack_pack_real_int32(x, d);
326 | #else
327 | msgpack_pack_real_int64(x, d);
328 | #endif
329 |
330 | #else
331 | if(sizeof(short) == 2) {
332 | msgpack_pack_real_int16(x, d);
333 | } else if(sizeof(short) == 4) {
334 | msgpack_pack_real_int32(x, d);
335 | } else {
336 | msgpack_pack_real_int64(x, d);
337 | }
338 | #endif
339 | }
340 |
341 | static inline int msgpack_pack_int(msgpack_packer* x, int d)
342 | {
343 | #if defined(SIZEOF_INT)
344 | #if SIZEOF_INT == 2
345 | msgpack_pack_real_int16(x, d);
346 | #elif SIZEOF_INT == 4
347 | msgpack_pack_real_int32(x, d);
348 | #else
349 | msgpack_pack_real_int64(x, d);
350 | #endif
351 |
352 | #elif defined(INT_MAX)
353 | #if INT_MAX == 0x7fff
354 | msgpack_pack_real_int16(x, d);
355 | #elif INT_MAX == 0x7fffffff
356 | msgpack_pack_real_int32(x, d);
357 | #else
358 | msgpack_pack_real_int64(x, d);
359 | #endif
360 |
361 | #else
362 | if(sizeof(int) == 2) {
363 | msgpack_pack_real_int16(x, d);
364 | } else if(sizeof(int) == 4) {
365 | msgpack_pack_real_int32(x, d);
366 | } else {
367 | msgpack_pack_real_int64(x, d);
368 | }
369 | #endif
370 | }
371 |
372 | static inline int msgpack_pack_long(msgpack_packer* x, long d)
373 | {
374 | #if defined(SIZEOF_LONG)
375 | #if SIZEOF_LONG == 2
376 | msgpack_pack_real_int16(x, d);
377 | #elif SIZEOF_LONG == 4
378 | msgpack_pack_real_int32(x, d);
379 | #else
380 | msgpack_pack_real_int64(x, d);
381 | #endif
382 |
383 | #elif defined(LONG_MAX)
384 | #if LONG_MAX == 0x7fffL
385 | msgpack_pack_real_int16(x, d);
386 | #elif LONG_MAX == 0x7fffffffL
387 | msgpack_pack_real_int32(x, d);
388 | #else
389 | msgpack_pack_real_int64(x, d);
390 | #endif
391 |
392 | #else
393 | if(sizeof(long) == 2) {
394 | msgpack_pack_real_int16(x, d);
395 | } else if(sizeof(long) == 4) {
396 | msgpack_pack_real_int32(x, d);
397 | } else {
398 | msgpack_pack_real_int64(x, d);
399 | }
400 | #endif
401 | }
402 |
403 | static inline int msgpack_pack_long_long(msgpack_packer* x, long long d)
404 | {
405 | #if defined(SIZEOF_LONG_LONG)
406 | #if SIZEOF_LONG_LONG == 2
407 | msgpack_pack_real_int16(x, d);
408 | #elif SIZEOF_LONG_LONG == 4
409 | msgpack_pack_real_int32(x, d);
410 | #else
411 | msgpack_pack_real_int64(x, d);
412 | #endif
413 |
414 | #elif defined(LLONG_MAX)
415 | #if LLONG_MAX == 0x7fffL
416 | msgpack_pack_real_int16(x, d);
417 | #elif LLONG_MAX == 0x7fffffffL
418 | msgpack_pack_real_int32(x, d);
419 | #else
420 | msgpack_pack_real_int64(x, d);
421 | #endif
422 |
423 | #else
424 | if(sizeof(long long) == 2) {
425 | msgpack_pack_real_int16(x, d);
426 | } else if(sizeof(long long) == 4) {
427 | msgpack_pack_real_int32(x, d);
428 | } else {
429 | msgpack_pack_real_int64(x, d);
430 | }
431 | #endif
432 | }
433 |
434 | static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d)
435 | {
436 | #if defined(SIZEOF_SHORT)
437 | #if SIZEOF_SHORT == 2
438 | msgpack_pack_real_uint16(x, d);
439 | #elif SIZEOF_SHORT == 4
440 | msgpack_pack_real_uint32(x, d);
441 | #else
442 | msgpack_pack_real_uint64(x, d);
443 | #endif
444 |
445 | #elif defined(USHRT_MAX)
446 | #if USHRT_MAX == 0xffffU
447 | msgpack_pack_real_uint16(x, d);
448 | #elif USHRT_MAX == 0xffffffffU
449 | msgpack_pack_real_uint32(x, d);
450 | #else
451 | msgpack_pack_real_uint64(x, d);
452 | #endif
453 |
454 | #else
455 | if(sizeof(unsigned short) == 2) {
456 | msgpack_pack_real_uint16(x, d);
457 | } else if(sizeof(unsigned short) == 4) {
458 | msgpack_pack_real_uint32(x, d);
459 | } else {
460 | msgpack_pack_real_uint64(x, d);
461 | }
462 | #endif
463 | }
464 |
465 | static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d)
466 | {
467 | #if defined(SIZEOF_INT)
468 | #if SIZEOF_INT == 2
469 | msgpack_pack_real_uint16(x, d);
470 | #elif SIZEOF_INT == 4
471 | msgpack_pack_real_uint32(x, d);
472 | #else
473 | msgpack_pack_real_uint64(x, d);
474 | #endif
475 |
476 | #elif defined(UINT_MAX)
477 | #if UINT_MAX == 0xffffU
478 | msgpack_pack_real_uint16(x, d);
479 | #elif UINT_MAX == 0xffffffffU
480 | msgpack_pack_real_uint32(x, d);
481 | #else
482 | msgpack_pack_real_uint64(x, d);
483 | #endif
484 |
485 | #else
486 | if(sizeof(unsigned int) == 2) {
487 | msgpack_pack_real_uint16(x, d);
488 | } else if(sizeof(unsigned int) == 4) {
489 | msgpack_pack_real_uint32(x, d);
490 | } else {
491 | msgpack_pack_real_uint64(x, d);
492 | }
493 | #endif
494 | }
495 |
496 | static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d)
497 | {
498 | #if defined(SIZEOF_LONG)
499 | #if SIZEOF_LONG == 2
500 | msgpack_pack_real_uint16(x, d);
501 | #elif SIZEOF_LONG == 4
502 | msgpack_pack_real_uint32(x, d);
503 | #else
504 | msgpack_pack_real_uint64(x, d);
505 | #endif
506 |
507 | #elif defined(ULONG_MAX)
508 | #if ULONG_MAX == 0xffffUL
509 | msgpack_pack_real_uint16(x, d);
510 | #elif ULONG_MAX == 0xffffffffUL
511 | msgpack_pack_real_uint32(x, d);
512 | #else
513 | msgpack_pack_real_uint64(x, d);
514 | #endif
515 |
516 | #else
517 | if(sizeof(unsigned long) == 2) {
518 | msgpack_pack_real_uint16(x, d);
519 | } else if(sizeof(unsigned long) == 4) {
520 | msgpack_pack_real_uint32(x, d);
521 | } else {
522 | msgpack_pack_real_uint64(x, d);
523 | }
524 | #endif
525 | }
526 |
527 | static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d)
528 | {
529 | #if defined(SIZEOF_LONG_LONG)
530 | #if SIZEOF_LONG_LONG == 2
531 | msgpack_pack_real_uint16(x, d);
532 | #elif SIZEOF_LONG_LONG == 4
533 | msgpack_pack_real_uint32(x, d);
534 | #else
535 | msgpack_pack_real_uint64(x, d);
536 | #endif
537 |
538 | #elif defined(ULLONG_MAX)
539 | #if ULLONG_MAX == 0xffffUL
540 | msgpack_pack_real_uint16(x, d);
541 | #elif ULLONG_MAX == 0xffffffffUL
542 | msgpack_pack_real_uint32(x, d);
543 | #else
544 | msgpack_pack_real_uint64(x, d);
545 | #endif
546 |
547 | #else
548 | if(sizeof(unsigned long long) == 2) {
549 | msgpack_pack_real_uint16(x, d);
550 | } else if(sizeof(unsigned long long) == 4) {
551 | msgpack_pack_real_uint32(x, d);
552 | } else {
553 | msgpack_pack_real_uint64(x, d);
554 | }
555 | #endif
556 | }
557 |
558 | //#undef msgpack_pack_inline_func_cint
559 | //#endif
560 |
561 |
562 |
563 | /*
564 | * Float
565 | */
566 |
567 | static inline int msgpack_pack_float(msgpack_packer* x, float d)
568 | {
569 | union { float f; uint32_t i; } mem;
570 | mem.f = d;
571 | unsigned char buf[5];
572 | buf[0] = 0xca; _msgpack_store32(&buf[1], mem.i);
573 | msgpack_pack_append_buffer(x, buf, 5);
574 | }
575 |
576 | static inline int msgpack_pack_double(msgpack_packer* x, double d)
577 | {
578 | union { double f; uint64_t i; } mem;
579 | mem.f = d;
580 | unsigned char buf[9];
581 | buf[0] = 0xcb;
582 | #if defined(__arm__) && !(__ARM_EABI__) // arm-oabi
583 | // https://github.com/msgpack/msgpack-perl/pull/1
584 | mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL);
585 | #endif
586 | _msgpack_store64(&buf[1], mem.i);
587 | msgpack_pack_append_buffer(x, buf, 9);
588 | }
589 |
590 |
591 | /*
592 | * Nil
593 | */
594 |
595 | static inline int msgpack_pack_nil(msgpack_packer* x)
596 | {
597 | static const unsigned char d = 0xc0;
598 | msgpack_pack_append_buffer(x, &d, 1);
599 | }
600 |
601 |
602 | /*
603 | * Boolean
604 | */
605 |
606 | static inline int msgpack_pack_true(msgpack_packer* x)
607 | {
608 | static const unsigned char d = 0xc3;
609 | msgpack_pack_append_buffer(x, &d, 1);
610 | }
611 |
612 | static inline int msgpack_pack_false(msgpack_packer* x)
613 | {
614 | static const unsigned char d = 0xc2;
615 | msgpack_pack_append_buffer(x, &d, 1);
616 | }
617 |
618 |
619 | /*
620 | * Array
621 | */
622 |
623 | static inline int msgpack_pack_array(msgpack_packer* x, unsigned int n)
624 | {
625 | if(n < 16) {
626 | unsigned char d = 0x90 | n;
627 | msgpack_pack_append_buffer(x, &d, 1);
628 | } else if(n < 65536) {
629 | unsigned char buf[3];
630 | buf[0] = 0xdc; _msgpack_store16(&buf[1], (uint16_t)n);
631 | msgpack_pack_append_buffer(x, buf, 3);
632 | } else {
633 | unsigned char buf[5];
634 | buf[0] = 0xdd; _msgpack_store32(&buf[1], (uint32_t)n);
635 | msgpack_pack_append_buffer(x, buf, 5);
636 | }
637 | }
638 |
639 |
640 | /*
641 | * Map
642 | */
643 |
644 | static inline int msgpack_pack_map(msgpack_packer* x, unsigned int n)
645 | {
646 | if(n < 16) {
647 | unsigned char d = 0x80 | n;
648 | msgpack_pack_append_buffer(x, &TAKE8_8(d), 1);
649 | } else if(n < 65536) {
650 | unsigned char buf[3];
651 | buf[0] = 0xde; _msgpack_store16(&buf[1], (uint16_t)n);
652 | msgpack_pack_append_buffer(x, buf, 3);
653 | } else {
654 | unsigned char buf[5];
655 | buf[0] = 0xdf; _msgpack_store32(&buf[1], (uint32_t)n);
656 | msgpack_pack_append_buffer(x, buf, 5);
657 | }
658 | }
659 |
660 |
661 | /*
662 | * Raw
663 | */
664 |
665 | static inline int msgpack_pack_raw(msgpack_packer* x, size_t l)
666 | {
667 | if (l < 32) {
668 | unsigned char d = 0xa0 | (uint8_t)l;
669 | msgpack_pack_append_buffer(x, &TAKE8_8(d), 1);
670 | } else if (x->use_bin_type && l < 256) { // str8 is new format introduced with bin.
671 | unsigned char buf[2] = {0xd9, (uint8_t)l};
672 | msgpack_pack_append_buffer(x, buf, 2);
673 | } else if (l < 65536) {
674 | unsigned char buf[3];
675 | buf[0] = 0xda; _msgpack_store16(&buf[1], (uint16_t)l);
676 | msgpack_pack_append_buffer(x, buf, 3);
677 | } else {
678 | unsigned char buf[5];
679 | buf[0] = 0xdb; _msgpack_store32(&buf[1], (uint32_t)l);
680 | msgpack_pack_append_buffer(x, buf, 5);
681 | }
682 | }
683 |
684 | /*
685 | * bin
686 | */
687 | static inline int msgpack_pack_bin(msgpack_packer *x, size_t l)
688 | {
689 | if (!x->use_bin_type) {
690 | return msgpack_pack_raw(x, l);
691 | }
692 | if (l < 256) {
693 | unsigned char buf[2] = {0xc4, (unsigned char)l};
694 | msgpack_pack_append_buffer(x, buf, 2);
695 | } else if (l < 65536) {
696 | unsigned char buf[3] = {0xc5};
697 | _msgpack_store16(&buf[1], (uint16_t)l);
698 | msgpack_pack_append_buffer(x, buf, 3);
699 | } else {
700 | unsigned char buf[5] = {0xc6};
701 | _msgpack_store32(&buf[1], (uint32_t)l);
702 | msgpack_pack_append_buffer(x, buf, 5);
703 | }
704 | }
705 |
706 | static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t l)
707 | {
708 | if (l > 0) msgpack_pack_append_buffer(x, (const unsigned char*)b, l);
709 | return 0;
710 | }
711 |
712 | /*
713 | * Ext
714 | */
715 | static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l)
716 | {
717 | if (l == 1) {
718 | unsigned char buf[2];
719 | buf[0] = 0xd4;
720 | buf[1] = (unsigned char)typecode;
721 | msgpack_pack_append_buffer(x, buf, 2);
722 | }
723 | else if(l == 2) {
724 | unsigned char buf[2];
725 | buf[0] = 0xd5;
726 | buf[1] = (unsigned char)typecode;
727 | msgpack_pack_append_buffer(x, buf, 2);
728 | }
729 | else if(l == 4) {
730 | unsigned char buf[2];
731 | buf[0] = 0xd6;
732 | buf[1] = (unsigned char)typecode;
733 | msgpack_pack_append_buffer(x, buf, 2);
734 | }
735 | else if(l == 8) {
736 | unsigned char buf[2];
737 | buf[0] = 0xd7;
738 | buf[1] = (unsigned char)typecode;
739 | msgpack_pack_append_buffer(x, buf, 2);
740 | }
741 | else if(l == 16) {
742 | unsigned char buf[2];
743 | buf[0] = 0xd8;
744 | buf[1] = (unsigned char)typecode;
745 | msgpack_pack_append_buffer(x, buf, 2);
746 | }
747 | else if(l < 256) {
748 | unsigned char buf[3];
749 | buf[0] = 0xc7;
750 | buf[1] = l;
751 | buf[2] = (unsigned char)typecode;
752 | msgpack_pack_append_buffer(x, buf, 3);
753 | } else if(l < 65536) {
754 | unsigned char buf[4];
755 | buf[0] = 0xc8;
756 | _msgpack_store16(&buf[1], (uint16_t)l);
757 | buf[3] = (unsigned char)typecode;
758 | msgpack_pack_append_buffer(x, buf, 4);
759 | } else {
760 | unsigned char buf[6];
761 | buf[0] = 0xc9;
762 | _msgpack_store32(&buf[1], (uint32_t)l);
763 | buf[5] = (unsigned char)typecode;
764 | msgpack_pack_append_buffer(x, buf, 6);
765 | }
766 |
767 | }
768 |
769 |
770 |
771 | #undef msgpack_pack_append_buffer
772 |
773 | #undef TAKE8_8
774 | #undef TAKE8_16
775 | #undef TAKE8_32
776 | #undef TAKE8_64
777 |
778 | #undef msgpack_pack_real_uint8
779 | #undef msgpack_pack_real_uint16
780 | #undef msgpack_pack_real_uint32
781 | #undef msgpack_pack_real_uint64
782 | #undef msgpack_pack_real_int8
783 | #undef msgpack_pack_real_int16
784 | #undef msgpack_pack_real_int32
785 | #undef msgpack_pack_real_int64
786 |
--------------------------------------------------------------------------------
/pandas_msgpack/packers.py:
--------------------------------------------------------------------------------
1 | """
2 | Msgpack serializer support for reading and writing pandas data structures
3 | to disk
4 |
5 | portions of msgpack_numpy package, by Lev Givon were incorporated
6 | into this module (and tests_packers.py)
7 |
8 | License
9 | =======
10 |
11 | Copyright (c) 2013, Lev Givon.
12 | All rights reserved.
13 |
14 | Redistribution and use in source and binary forms, with or without
15 | modification, are permitted provided that the following conditions are
16 | met:
17 |
18 | * Redistributions of source code must retain the above copyright
19 | notice, this list of conditions and the following disclaimer.
20 | * Redistributions in binary form must reproduce the above
21 | copyright notice, this list of conditions and the following
22 | disclaimer in the documentation and/or other materials provided
23 | with the distribution.
24 | * Neither the name of Lev Givon nor the names of any
25 | contributors may be used to endorse or promote products derived
26 | from this software without specific prior written permission.
27 |
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 | """
40 |
41 | from datetime import datetime, date, timedelta
42 | from dateutil.parser import parse
43 | import os
44 | from textwrap import dedent
45 | import warnings
46 |
47 | import numpy as np
48 | from pandas import compat
49 | from pandas.compat import u, u_safe
50 |
51 | from pandas.types.common import (is_categorical_dtype, is_object_dtype,
52 | needs_i8_conversion, pandas_dtype)
53 |
54 | from pandas import (Timestamp, Period, Series, DataFrame, # noqa
55 | Index, MultiIndex, Float64Index, Int64Index,
56 | Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT,
57 | Categorical, CategoricalIndex)
58 | from pandas.sparse.api import SparseSeries, SparseDataFrame
59 | from pandas.sparse.array import BlockIndex, IntIndex
60 | from pandas.core.generic import NDFrame
61 | from pandas.core.common import PerformanceWarning
62 | from pandas.io.common import get_filepath_or_buffer
63 | from pandas.core.internals import BlockManager, make_block, _safe_reshape
64 | import pandas.core.internals as internals
65 |
66 | from pandas_msgpack import _is_pandas_legacy_version
67 | from pandas_msgpack.msgpack import (Unpacker as _Unpacker,
68 | Packer as _Packer,
69 | ExtType)
70 | from pandas_msgpack._move import (
71 | BadMove as _BadMove,
72 | move_into_mutable_buffer as _move_into_mutable_buffer,
73 | )
74 |
75 | NaTType = type(NaT)
76 |
77 | # check which compression libs we have installed
78 | try:
79 | import zlib
80 |
81 | def _check_zlib():
82 | pass
83 | except ImportError:
84 | def _check_zlib():
85 | raise ImportError('zlib is not installed')
86 |
87 | _check_zlib.__doc__ = dedent(
88 | """\
89 | Check if zlib is installed.
90 |
91 | Raises
92 | ------
93 | ImportError
94 | Raised when zlib is not installed.
95 | """,
96 | )
97 |
98 | try:
99 | import blosc
100 |
101 | def _check_blosc():
102 | pass
103 | except ImportError:
104 | def _check_blosc():
105 | raise ImportError('blosc is not installed')
106 |
107 | _check_blosc.__doc__ = dedent(
108 | """\
109 | Check if blosc is installed.
110 |
111 | Raises
112 | ------
113 | ImportError
114 | Raised when blosc is not installed.
115 | """,
116 | )
117 |
118 | # until we can pass this into our conversion functions,
119 | # this is pretty hacky
120 | compressor = None
121 |
122 |
123 | def to_msgpack(path_or_buf, *args, **kwargs):
124 | """
125 | msgpack (serialize) object to input file path
126 |
127 | Parameters
128 | ----------
129 | path_or_buf : string File path, buffer-like, or None
130 | if None, return generated string
131 | args : an object or objects to serialize
132 | encoding: encoding for unicode objects
133 | append : boolean whether to append to an existing msgpack
134 | (default is False)
135 | compress : type of compressor (zlib or blosc), default to None (no
136 | compression)
137 | """
138 | global compressor
139 | compressor = kwargs.pop('compress', None)
140 | if compressor:
141 | compressor = u(compressor)
142 | append = kwargs.pop('append', None)
143 | if append:
144 | mode = 'a+b'
145 | else:
146 | mode = 'wb'
147 |
148 | def writer(fh):
149 | for a in args:
150 | fh.write(pack(a, **kwargs))
151 |
152 | if isinstance(path_or_buf, compat.string_types):
153 | with open(path_or_buf, mode) as fh:
154 | writer(fh)
155 | elif path_or_buf is None:
156 | buf = compat.BytesIO()
157 | writer(buf)
158 | return buf.getvalue()
159 | else:
160 | writer(path_or_buf)
161 |
162 |
163 | def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
164 | """
165 | Load msgpack pandas object from the specified
166 | file path
167 |
168 | Parameters
169 | ----------
170 | path_or_buf : string File path, BytesIO like or string
171 | encoding: Encoding for decoding msgpack str type
172 | iterator : boolean, if True, return an iterator to the unpacker
173 | (default is False)
174 |
175 | Returns
176 | -------
177 | obj : type of object stored in file
178 |
179 | """
180 | path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf)
181 | if iterator:
182 | return Iterator(path_or_buf)
183 |
184 | def read(fh):
185 | l = list(unpack(fh, encoding=encoding, **kwargs))
186 | if len(l) == 1:
187 | return l[0]
188 | return l
189 |
190 | # see if we have an actual file
191 | if isinstance(path_or_buf, compat.string_types):
192 |
193 | try:
194 | exists = os.path.exists(path_or_buf)
195 | except (TypeError, ValueError):
196 | exists = False
197 |
198 | if exists:
199 | with open(path_or_buf, 'rb') as fh:
200 | return read(fh)
201 |
202 | # treat as a binary-like
203 | if isinstance(path_or_buf, compat.binary_type):
204 | fh = None
205 | try:
206 | fh = compat.BytesIO(path_or_buf)
207 | return read(fh)
208 | finally:
209 | if fh is not None:
210 | fh.close()
211 |
212 | # a buffer like
213 | if hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read):
214 | return read(path_or_buf)
215 |
216 | raise ValueError('path_or_buf needs to be a string file path or file-like')
217 |
218 |
219 | dtype_dict = {21: np.dtype('M8[ns]'),
220 | u('datetime64[ns]'): np.dtype('M8[ns]'),
221 | u('datetime64[us]'): np.dtype('M8[us]'),
222 | 22: np.dtype('m8[ns]'),
223 | u('timedelta64[ns]'): np.dtype('m8[ns]'),
224 | u('timedelta64[us]'): np.dtype('m8[us]'),
225 |
226 | # this is platform int, which we need to remap to np.int64
227 | # for compat on windows platforms
228 | 7: np.dtype('int64'),
229 | 'category': 'category'
230 | }
231 |
232 |
233 | def dtype_for(t):
234 | """ return my dtype mapping, whether number or name """
235 | if t in dtype_dict:
236 | return dtype_dict[t]
237 | return np.typeDict.get(t, t)
238 |
239 |
240 | c2f_dict = {'complex': np.float64,
241 | 'complex128': np.float64,
242 | 'complex64': np.float32}
243 |
244 | # numpy 1.6.1 compat
245 | if hasattr(np, 'float128'):
246 | c2f_dict['complex256'] = np.float128
247 |
248 |
249 | def c2f(r, i, ctype_name):
250 | """
251 | Convert strings to complex number instance with specified numpy type.
252 | """
253 |
254 | ftype = c2f_dict[ctype_name]
255 | return np.typeDict[ctype_name](ftype(r) + 1j * ftype(i))
256 |
257 |
258 | def convert(values):
259 | """ convert the numpy values to a list """
260 |
261 | dtype = values.dtype
262 |
263 | if is_categorical_dtype(values):
264 | return values
265 |
266 | elif is_object_dtype(dtype):
267 | return values.ravel().tolist()
268 |
269 | if needs_i8_conversion(dtype):
270 | values = values.view('i8')
271 | v = values.ravel()
272 |
273 | if compressor == 'zlib':
274 | _check_zlib()
275 |
276 | # return string arrays like they are
277 | if dtype == np.object_:
278 | return v.tolist()
279 |
280 | # convert to a bytes array
281 | v = v.tostring()
282 | return ExtType(0, zlib.compress(v))
283 |
284 | elif compressor == 'blosc':
285 | _check_blosc()
286 |
287 | # return string arrays like they are
288 | if dtype == np.object_:
289 | return v.tolist()
290 |
291 | # convert to a bytes array
292 | v = v.tostring()
293 | return ExtType(0, blosc.compress(v, typesize=dtype.itemsize))
294 |
295 | # ndarray (on original dtype)
296 | return ExtType(0, v.tostring())
297 |
298 |
299 | def unconvert(values, dtype, compress=None):
300 |
301 | as_is_ext = isinstance(values, ExtType) and values.code == 0
302 |
303 | if as_is_ext:
304 | values = values.data
305 |
306 | if is_categorical_dtype(dtype):
307 | return values
308 |
309 | elif is_object_dtype(dtype):
310 | return np.array(values, dtype=object)
311 |
312 | dtype = pandas_dtype(dtype).base
313 |
314 | if not as_is_ext:
315 | values = values.encode('latin1')
316 |
317 | if compress:
318 | if compress == u'zlib':
319 | _check_zlib()
320 | decompress = zlib.decompress
321 | elif compress == u'blosc':
322 | _check_blosc()
323 | decompress = blosc.decompress
324 | else:
325 | raise ValueError("compress must be one of 'zlib' or 'blosc'")
326 |
327 | try:
328 | return np.frombuffer(
329 | _move_into_mutable_buffer(decompress(values)),
330 | dtype=dtype,
331 | )
332 | except _BadMove as e:
333 | # Pull the decompressed data off of the `_BadMove` exception.
334 | # We don't just store this in the locals because we want to
335 | # minimize the risk of giving users access to a `bytes` object
336 | # whose data is also given to a mutable buffer.
337 | values = e.args[0]
338 | if len(values) > 1:
339 | # The empty string and single characters are memoized in many
340 | # string creating functions in the capi. This case should not
341 | # warn even though we need to make a copy because we are only
342 | # copying at most 1 byte.
343 | warnings.warn(
344 | 'copying data after decompressing; this may mean that'
345 | ' decompress is caching its result',
346 | PerformanceWarning,
347 | )
348 | # fall through to copying `np.fromstring`
349 |
350 | # Copy the string into a numpy array.
351 | return np.fromstring(values, dtype=dtype)
352 |
353 |
354 | def encode(obj):
355 | """
356 | Data encoder
357 | """
358 | tobj = type(obj)
359 | if isinstance(obj, Index):
360 | if isinstance(obj, RangeIndex):
361 | return {u'typ': u'range_index',
362 | u'klass': u(obj.__class__.__name__),
363 | u'name': getattr(obj, 'name', None),
364 | u'start': getattr(obj, '_start', None),
365 | u'stop': getattr(obj, '_stop', None),
366 | u'step': getattr(obj, '_step', None)}
367 | elif isinstance(obj, PeriodIndex):
368 | return {u'typ': u'period_index',
369 | u'klass': u(obj.__class__.__name__),
370 | u'name': getattr(obj, 'name', None),
371 | u'freq': u_safe(getattr(obj, 'freqstr', None)),
372 | u'dtype': u(obj.dtype.name),
373 | u'data': convert(obj.asi8),
374 | u'compress': compressor}
375 | elif isinstance(obj, DatetimeIndex):
376 | tz = getattr(obj, 'tz', None)
377 |
378 | # store tz info and data as UTC
379 | if tz is not None:
380 | tz = u(tz.zone)
381 | obj = obj.tz_convert('UTC')
382 | return {u'typ': u'datetime_index',
383 | u'klass': u(obj.__class__.__name__),
384 | u'name': getattr(obj, 'name', None),
385 | u'dtype': u(obj.dtype.name),
386 | u'data': convert(obj.asi8),
387 | u'freq': u_safe(getattr(obj, 'freqstr', None)),
388 | u'tz': tz,
389 | u'compress': compressor}
390 | elif isinstance(obj, MultiIndex):
391 | return {u'typ': u'multi_index',
392 | u'klass': u(obj.__class__.__name__),
393 | u'names': getattr(obj, 'names', None),
394 | u'dtype': u(obj.dtype.name),
395 | u'data': convert(obj.values),
396 | u'compress': compressor}
397 | else:
398 | return {u'typ': u'index',
399 | u'klass': u(obj.__class__.__name__),
400 | u'name': getattr(obj, 'name', None),
401 | u'dtype': u(obj.dtype.name),
402 | u'data': convert(obj.values),
403 | u'compress': compressor}
404 |
405 | elif isinstance(obj, Categorical):
406 | return {u'typ': u'category',
407 | u'klass': u(obj.__class__.__name__),
408 | u'name': getattr(obj, 'name', None),
409 | u'codes': obj.codes,
410 | u'categories': obj.categories,
411 | u'ordered': obj.ordered,
412 | u'compress': compressor}
413 |
414 | elif isinstance(obj, Series):
415 | if isinstance(obj, SparseSeries):
416 | raise NotImplementedError(
417 | 'msgpack sparse series is not implemented'
418 | )
419 | # d = {'typ': 'sparse_series',
420 | # 'klass': obj.__class__.__name__,
421 | # 'dtype': obj.dtype.name,
422 | # 'index': obj.index,
423 | # 'sp_index': obj.sp_index,
424 | # 'sp_values': convert(obj.sp_values),
425 | # 'compress': compressor}
426 | # for f in ['name', 'fill_value', 'kind']:
427 | # d[f] = getattr(obj, f, None)
428 | # return d
429 | else:
430 | return {u'typ': u'series',
431 | u'klass': u(obj.__class__.__name__),
432 | u'name': getattr(obj, 'name', None),
433 | u'index': obj.index,
434 | u'dtype': u(obj.dtype.name),
435 | u'data': convert(obj.values),
436 | u'compress': compressor}
437 | elif issubclass(tobj, NDFrame):
438 | if isinstance(obj, SparseDataFrame):
439 | raise NotImplementedError(
440 | 'msgpack sparse frame is not implemented'
441 | )
442 | # d = {'typ': 'sparse_dataframe',
443 | # 'klass': obj.__class__.__name__,
444 | # 'columns': obj.columns}
445 | # for f in ['default_fill_value', 'default_kind']:
446 | # d[f] = getattr(obj, f, None)
447 | # d['data'] = dict([(name, ss)
448 | # for name, ss in compat.iteritems(obj)])
449 | # return d
450 | else:
451 |
452 | data = obj._data
453 | if not data.is_consolidated():
454 | data = data.consolidate()
455 |
456 | # the block manager
457 | return {u'typ': u'block_manager',
458 | u'klass': u(obj.__class__.__name__),
459 | u'axes': data.axes,
460 | u'blocks': [{u'locs': b.mgr_locs.as_array,
461 | u'values': convert(b.values),
462 | u'shape': b.values.shape,
463 | u'dtype': u(b.dtype.name),
464 | u'klass': u(b.__class__.__name__),
465 | u'compress': compressor} for b in data.blocks]
466 | }
467 |
468 | elif isinstance(obj, (datetime, date, np.datetime64, timedelta,
469 | np.timedelta64, NaTType)):
470 | if isinstance(obj, Timestamp):
471 | tz = obj.tzinfo
472 | if tz is not None:
473 | tz = u(tz.zone)
474 | freq = obj.freq
475 | if freq is not None:
476 | freq = u(freq.freqstr)
477 | return {u'typ': u'timestamp',
478 | u'value': obj.value,
479 | u'freq': freq,
480 | u'tz': tz}
481 | if isinstance(obj, NaTType):
482 | return {u'typ': u'nat'}
483 | elif isinstance(obj, np.timedelta64):
484 | return {u'typ': u'timedelta64',
485 | u'data': obj.view('i8')}
486 | elif isinstance(obj, timedelta):
487 | return {u'typ': u'timedelta',
488 | u'data': (obj.days, obj.seconds, obj.microseconds)}
489 | elif isinstance(obj, np.datetime64):
490 | return {u'typ': u'datetime64',
491 | u'data': u(str(obj))}
492 | elif isinstance(obj, datetime):
493 | return {u'typ': u'datetime',
494 | u'data': u(obj.isoformat())}
495 | elif isinstance(obj, date):
496 | return {u'typ': u'date',
497 | u'data': u(obj.isoformat())}
498 | raise Exception("cannot encode this datetimelike object: %s" % obj)
499 | elif isinstance(obj, Period):
500 | return {u'typ': u'period',
501 | u'ordinal': obj.ordinal,
502 | u'freq': u(obj.freq)}
503 | elif isinstance(obj, BlockIndex):
504 | return {u'typ': u'block_index',
505 | u'klass': u(obj.__class__.__name__),
506 | u'blocs': obj.blocs,
507 | u'blengths': obj.blengths,
508 | u'length': obj.length}
509 | elif isinstance(obj, IntIndex):
510 | return {u'typ': u'int_index',
511 | u'klass': u(obj.__class__.__name__),
512 | u'indices': obj.indices,
513 | u'length': obj.length}
514 | elif isinstance(obj, np.ndarray):
515 | return {u'typ': u'ndarray',
516 | u'shape': obj.shape,
517 | u'ndim': obj.ndim,
518 | u'dtype': u(obj.dtype.name),
519 | u'data': convert(obj),
520 | u'compress': compressor}
521 | elif isinstance(obj, np.number):
522 | if np.iscomplexobj(obj):
523 | return {u'typ': u'np_scalar',
524 | u'sub_typ': u'np_complex',
525 | u'dtype': u(obj.dtype.name),
526 | u'real': u(obj.real.__repr__()),
527 | u'imag': u(obj.imag.__repr__())}
528 | else:
529 | return {u'typ': u'np_scalar',
530 | u'dtype': u(obj.dtype.name),
531 | u'data': u(obj.__repr__())}
532 | elif isinstance(obj, complex):
533 | return {u'typ': u'np_complex',
534 | u'real': u(obj.real.__repr__()),
535 | u'imag': u(obj.imag.__repr__())}
536 |
537 | return obj
538 |
539 |
540 | def decode(obj):
541 | """
542 | Decoder for deserializing numpy data types.
543 | """
544 |
545 | typ = obj.get(u'typ')
546 | if typ is None:
547 | return obj
548 | elif typ == u'timestamp':
549 | freq = obj[u'freq'] if 'freq' in obj else obj[u'offset']
550 | return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq)
551 | elif typ == u'nat':
552 | return NaT
553 | elif typ == u'period':
554 | return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq'])
555 | elif typ == u'index':
556 | dtype = dtype_for(obj[u'dtype'])
557 | data = unconvert(obj[u'data'], dtype,
558 | obj.get(u'compress'))
559 | return globals()[obj[u'klass']](data, dtype=dtype, name=obj[u'name'])
560 | elif typ == u'range_index':
561 | return globals()[obj[u'klass']](obj[u'start'],
562 | obj[u'stop'],
563 | obj[u'step'],
564 | name=obj[u'name'])
565 | elif typ == u'multi_index':
566 | dtype = dtype_for(obj[u'dtype'])
567 | data = unconvert(obj[u'data'], dtype,
568 | obj.get(u'compress'))
569 | data = [tuple(x) for x in data]
570 | return globals()[obj[u'klass']].from_tuples(data, names=obj[u'names'])
571 | elif typ == u'period_index':
572 | data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
573 | d = dict(name=obj[u'name'], freq=obj[u'freq'])
574 | if _is_pandas_legacy_version:
575 | # legacy
576 | return globals()[obj[u'klass']](data, **d)
577 | else:
578 | return globals()[obj[u'klass']]._from_ordinals(data, **d)
579 | elif typ == u'datetime_index':
580 | data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
581 | d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)
582 | result = globals()[obj[u'klass']](data, **d)
583 | tz = obj[u'tz']
584 |
585 | # reverse tz conversion
586 | if tz is not None:
587 | result = result.tz_localize('UTC').tz_convert(tz)
588 | return result
589 |
590 | elif typ == u'category':
591 | from_codes = globals()[obj[u'klass']].from_codes
592 | return from_codes(codes=obj[u'codes'],
593 | categories=obj[u'categories'],
594 | ordered=obj[u'ordered'])
595 |
596 | elif typ == u'series':
597 | dtype = dtype_for(obj[u'dtype'])
598 | pd_dtype = pandas_dtype(dtype)
599 |
600 | index = obj[u'index']
601 | result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype,
602 | obj[u'compress']),
603 | index=index,
604 | dtype=pd_dtype,
605 | name=obj[u'name'])
606 | return result
607 |
608 | elif typ == u'block_manager':
609 | axes = obj[u'axes']
610 |
611 | def create_block(b):
612 | values = _safe_reshape(unconvert(
613 | b[u'values'], dtype_for(b[u'dtype']),
614 | b[u'compress']), b[u'shape'])
615 |
616 | # locs handles duplicate column names, and should be used instead
617 | # of items; see GH 9618
618 | if u'locs' in b:
619 | placement = b[u'locs']
620 | else:
621 | placement = axes[0].get_indexer(b[u'items'])
622 | return make_block(values=values,
623 | klass=getattr(internals, b[u'klass']),
624 | placement=placement,
625 | dtype=b[u'dtype'])
626 |
627 | blocks = [create_block(b) for b in obj[u'blocks']]
628 | return globals()[obj[u'klass']](BlockManager(blocks, axes))
629 | elif typ == u'datetime':
630 | return parse(obj[u'data'])
631 | elif typ == u'datetime64':
632 | return np.datetime64(parse(obj[u'data']))
633 | elif typ == u'date':
634 | return parse(obj[u'data']).date()
635 | elif typ == u'timedelta':
636 | return timedelta(*obj[u'data'])
637 | elif typ == u'timedelta64':
638 | return np.timedelta64(int(obj[u'data']))
639 | # elif typ == 'sparse_series':
640 | # dtype = dtype_for(obj['dtype'])
641 | # return globals()[obj['klass']](
642 | # unconvert(obj['sp_values'], dtype, obj['compress']),
643 | # sparse_index=obj['sp_index'], index=obj['index'],
644 | # fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
645 | # elif typ == 'sparse_dataframe':
646 | # return globals()[obj['klass']](
647 | # obj['data'], columns=obj['columns'],
648 | # default_fill_value=obj['default_fill_value'],
649 | # default_kind=obj['default_kind']
650 | # )
651 | # elif typ == 'sparse_panel':
652 | # return globals()[obj['klass']](
653 | # obj['data'], items=obj['items'],
654 | # default_fill_value=obj['default_fill_value'],
655 | # default_kind=obj['default_kind'])
656 | elif typ == u'block_index':
657 | return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'],
658 | obj[u'blengths'])
659 | elif typ == u'int_index':
660 | return globals()[obj[u'klass']](obj[u'length'], obj[u'indices'])
661 | elif typ == u'ndarray':
662 | return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']],
663 | obj.get(u'compress')).reshape(obj[u'shape'])
664 | elif typ == u'np_scalar':
665 | if obj.get(u'sub_typ') == u'np_complex':
666 | return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype'])
667 | else:
668 | dtype = dtype_for(obj[u'dtype'])
669 | try:
670 | return dtype(obj[u'data'])
671 | except:
672 | return dtype.type(obj[u'data'])
673 | elif typ == u'np_complex':
674 | return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j')
675 | elif isinstance(obj, (dict, list, set)):
676 | return obj
677 | else:
678 | return obj
679 |
680 |
681 | def pack(o, default=encode,
682 | encoding='utf-8', unicode_errors='strict', use_single_float=False,
683 | autoreset=1, use_bin_type=1):
684 | """
685 | Pack an object and return the packed bytes.
686 | """
687 |
688 | return Packer(default=default, encoding=encoding,
689 | unicode_errors=unicode_errors,
690 | use_single_float=use_single_float,
691 | autoreset=autoreset,
692 | use_bin_type=use_bin_type).pack(o)
693 |
694 |
695 | def unpack(packed, object_hook=decode,
696 | list_hook=None, use_list=False, encoding='utf-8',
697 | unicode_errors='strict', object_pairs_hook=None,
698 | max_buffer_size=0, ext_hook=ExtType):
699 | """
700 | Unpack a packed object, return an iterator
701 | Note: packed lists will be returned as tuples
702 | """
703 |
704 | return Unpacker(packed, object_hook=object_hook,
705 | list_hook=list_hook,
706 | use_list=use_list, encoding=encoding,
707 | unicode_errors=unicode_errors,
708 | object_pairs_hook=object_pairs_hook,
709 | max_buffer_size=max_buffer_size,
710 | ext_hook=ext_hook)
711 |
712 |
713 | class Packer(_Packer):
714 |
715 | def __init__(self, default=encode,
716 | encoding='utf-8',
717 | unicode_errors='strict',
718 | use_single_float=False,
719 | autoreset=1,
720 | use_bin_type=1):
721 | super(Packer, self).__init__(default=default,
722 | encoding=encoding,
723 | unicode_errors=unicode_errors,
724 | use_single_float=use_single_float,
725 | autoreset=autoreset,
726 | use_bin_type=use_bin_type)
727 |
728 |
729 | class Unpacker(_Unpacker):
730 |
731 | def __init__(self, file_like=None, read_size=0, use_list=False,
732 | object_hook=decode,
733 | object_pairs_hook=None, list_hook=None, encoding='utf-8',
734 | unicode_errors='strict', max_buffer_size=0, ext_hook=ExtType):
735 | super(Unpacker, self).__init__(file_like=file_like,
736 | read_size=read_size,
737 | use_list=use_list,
738 | object_hook=object_hook,
739 | object_pairs_hook=object_pairs_hook,
740 | list_hook=list_hook,
741 | encoding=encoding,
742 | unicode_errors=unicode_errors,
743 | max_buffer_size=max_buffer_size,
744 | ext_hook=ext_hook)
745 |
746 |
747 | class Iterator(object):
748 |
749 | """ manage the unpacking iteration,
750 | close the file on completion """
751 |
752 | def __init__(self, path, **kwargs):
753 | self.path = path
754 | self.kwargs = kwargs
755 |
756 | def __iter__(self):
757 |
758 | needs_closing = True
759 | try:
760 |
761 | # see if we have an actual file
762 | if isinstance(self.path, compat.string_types):
763 |
764 | try:
765 | path_exists = os.path.exists(self.path)
766 | except TypeError:
767 | path_exists = False
768 |
769 | if path_exists:
770 | fh = open(self.path, 'rb')
771 | else:
772 | fh = compat.BytesIO(self.path)
773 |
774 | else:
775 |
776 | if not hasattr(self.path, 'read'):
777 | fh = compat.BytesIO(self.path)
778 |
779 | else:
780 |
781 | # a file-like
782 | needs_closing = False
783 | fh = self.path
784 |
785 | unpacker = unpack(fh)
786 | for o in unpacker:
787 | yield o
788 | finally:
789 | if needs_closing:
790 | fh.close()
791 |
--------------------------------------------------------------------------------