├── tests
├── samples
│ ├── list-depths.avro
│ ├── list-lengths.avro
│ ├── mc10events.root
│ ├── list-depths.parquet
│ ├── list-lengths.parquet
│ ├── nano-2017-08-31.root
│ ├── nullable-depths.avro
│ ├── nullable-levels.avro
│ ├── record-primitives.avro
│ ├── list-depths-records.avro
│ ├── list-depths-simple.avro
│ ├── list-depths-strings.avro
│ ├── nonnullable-depths.avro
│ ├── nullable-depths.parquet
│ ├── nullable-levels.parquet
│ ├── list-depths-records.parquet
│ ├── list-depths-simple.parquet
│ ├── list-depths-strings.parquet
│ ├── nonnullable-depths.parquet
│ ├── nullable-list-depths.avro
│ ├── record-primitives.parquet
│ ├── list-depths-records-list.avro
│ ├── nullable-list-depths.parquet
│ ├── list-depths-records-list.parquet
│ ├── nullable-record-primitives.avro
│ ├── nullable-list-depths-records.avro
│ ├── nullable-list-depths-strings.avro
│ ├── nullable-record-primitives.parquet
│ ├── nullable-list-depths-records.parquet
│ ├── nullable-list-depths-strings.parquet
│ ├── nullable-list-depths-records-list.avro
│ ├── nullable-record-primitives-simple.avro
│ ├── nullable-list-depths-records-list.parquet
│ └── nullable-record-primitives-simple.parquet
├── test_issues.py
├── __init__.py
├── test_backend_numpyfile.py
├── test_backend_root.py
├── test_fill.py
├── test_database.py
└── test_proxy.py
├── .travis-conda.py
├── .travis.yml
├── LICENSE
├── oamap
├── backend
│ ├── __init__.py
│ ├── numpyfile.py
│ ├── arrow.py
│ ├── root
│ │ ├── cmsnano.py
│ │ └── __init__.py
│ └── packing.py
├── extension
│ ├── __init__.py
│ └── common.py
├── version.py
├── __init__.py
├── util.py
├── proxy.py
├── inference.py
├── fill.py
├── fillable.py
└── dataset.py
├── .gitignore
├── setup.py
└── README.rst
/tests/samples/list-depths.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths.avro
--------------------------------------------------------------------------------
/tests/samples/list-lengths.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-lengths.avro
--------------------------------------------------------------------------------
/tests/samples/mc10events.root:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/mc10events.root
--------------------------------------------------------------------------------
/tests/samples/list-depths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths.parquet
--------------------------------------------------------------------------------
/tests/samples/list-lengths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-lengths.parquet
--------------------------------------------------------------------------------
/tests/samples/nano-2017-08-31.root:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nano-2017-08-31.root
--------------------------------------------------------------------------------
/tests/samples/nullable-depths.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-depths.avro
--------------------------------------------------------------------------------
/tests/samples/nullable-levels.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-levels.avro
--------------------------------------------------------------------------------
/tests/samples/record-primitives.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/record-primitives.avro
--------------------------------------------------------------------------------
/tests/samples/list-depths-records.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-records.avro
--------------------------------------------------------------------------------
/tests/samples/list-depths-simple.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-simple.avro
--------------------------------------------------------------------------------
/tests/samples/list-depths-strings.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-strings.avro
--------------------------------------------------------------------------------
/tests/samples/nonnullable-depths.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nonnullable-depths.avro
--------------------------------------------------------------------------------
/tests/samples/nullable-depths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-depths.parquet
--------------------------------------------------------------------------------
/tests/samples/nullable-levels.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-levels.parquet
--------------------------------------------------------------------------------
/tests/samples/list-depths-records.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-records.parquet
--------------------------------------------------------------------------------
/tests/samples/list-depths-simple.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-simple.parquet
--------------------------------------------------------------------------------
/tests/samples/list-depths-strings.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-strings.parquet
--------------------------------------------------------------------------------
/tests/samples/nonnullable-depths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nonnullable-depths.parquet
--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths.avro
--------------------------------------------------------------------------------
/tests/samples/record-primitives.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/record-primitives.parquet
--------------------------------------------------------------------------------
/tests/samples/list-depths-records-list.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-records-list.avro
--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths.parquet
--------------------------------------------------------------------------------
/tests/samples/list-depths-records-list.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-records-list.parquet
--------------------------------------------------------------------------------
/tests/samples/nullable-record-primitives.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-record-primitives.avro
--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-records.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-records.avro
--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-strings.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-strings.avro
--------------------------------------------------------------------------------
/tests/samples/nullable-record-primitives.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-record-primitives.parquet
--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-records.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-records.parquet
--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-strings.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-strings.parquet
--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-records-list.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-records-list.avro
--------------------------------------------------------------------------------
/tests/samples/nullable-record-primitives-simple.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-record-primitives-simple.avro
--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-records-list.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-records-list.parquet
--------------------------------------------------------------------------------
/tests/samples/nullable-record-primitives-simple.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-record-primitives-simple.parquet
--------------------------------------------------------------------------------
/tests/test_issues.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 |
4 | class TestIssues(unittest.TestCase):
5 |
6 | def runTest(self):
7 | pass
8 |
9 | def test_issue7(self):
10 | with open('README.rst') as f:
11 | try:
12 | content = f.read()
13 | except UnicodeDecodeError as e:
14 | self.fail("Cannot read README.rst: " + str(e))
15 |
--------------------------------------------------------------------------------
/.travis-conda.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import os
4 |
5 | if os.environ["TRAVIS_PYTHON_VERSION"] == "2.6":
6 | miniconda = False
7 |
8 | elif os.environ["TRAVIS_PYTHON_VERSION"] == "2.7":
9 | miniconda = True
10 | os.system("wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh")
11 |
12 | else:
13 | miniconda = True
14 | os.system("wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh")
15 |
16 | if miniconda:
17 | os.system("bash miniconda.sh -b -p {0}/miniconda".format(os.environ["HOME"]))
18 | os.system("{0}/miniconda/bin/conda config --set always_yes yes --set changeps1 no".format(os.environ["HOME"]))
19 | os.system("{0}/miniconda/bin/conda update -q conda".format(os.environ["HOME"]))
20 | os.system("{0}/miniconda/bin/conda info -a".format(os.environ["HOME"]))
21 | os.system("{0}/miniconda/bin/conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numba".format(os.environ["HOME"]))
22 | os.system("source {0}/miniconda/bin/activate test-environment; python setup.py install".format(os.environ["HOME"]))
23 |
24 | else:
25 | os.system("mkdir -p miniconda/bin")
26 | open("miniconda/bin/activate", "w").write("")
27 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | os:
4 | - linux
5 |
6 | python:
7 | - 2.6
8 | - 2.7
9 | - 3.4
10 | - 3.5
11 | - 3.6
12 |
13 | addons:
14 | apt:
15 | packages:
16 | - python-setuptools
17 | - libsnappy-dev
18 |
19 | install:
20 | - sudo apt-get update
21 | - pip install --upgrade pip
22 |
23 | install:
24 | - pip install --upgrade setuptools_scm
25 |
26 | script:
27 | python .travis-conda.py ; source $HOME/miniconda/bin/activate test-environment; python setup.py test
28 |
29 | deploy:
30 | provider: pypi
31 | user: pivarski
32 | password:
33 | secure: "irt16TqzfFa1A47AgrSEnZz89Tam7g36wUMFRB2cseipVDzk1pmN8xcxj2xebpRXWHhyKmpPUetQ1gwgYn5brK5xl0iQ/eNT4U3tWLWowtBxINYhhErSSAnMVGX+FJliex5fv/yEuU158BviLPLjhYMDXjtFH6TQmFExSoHTaZL8aX0Xswt8Ku0etJHgf4O8D2b1L5yQ1fOHy2vBhfGXhT8jI/rvwGu9DF2iJYIdnrf1jdy3aCvpiBhTUbxLO0sJVSGVpbC3L7uKwPMt+t3gb8iQL7llZL9DgCj4YEIAhLnIRhuTTXkKQ2cfYMX+b6hFiSV816Z1VR+sckfY915mPF+M/k9+m7xqcDRtYYeRsS68sKFgICdDUONR3nMvCJxYPmfSWOo0qvXPh0tjMfJ1lQOideY9ToR2fYzwzL4MGyzn/FrlXUoMNRfYJ8an1X9Xds2Bm9AVF6W1JviKOboHDDg0TqJXScy2LmMaaSdub2lN/a3iioYdK/0RtKWZ6N/qg8b0E91sVFD4zgZ/1qDm7JQhvoqmvhMQQ091Yl0xOmxmmERhMxEyYlcp+8RcAwAxl5KqwkZv2Ni2ReVBJxqfe5wvC7FP412pG1Zdd2FL2UqbyEIq2GJPE+LQGS5KhjfajWacb9wW+6tp4aCUITjU2Eboqq/y0L/R4QKS6HAWitc="
34 | on:
35 | tags: true
36 | branch: master
37 | condition: "$TRAVIS_PYTHON_VERSION = 2.7"
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2017, DIANA-HEP
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/oamap/backend/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/oamap/extension/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
--------------------------------------------------------------------------------
/oamap/version.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import re
32 |
33 | __version__ = "0.12.4"
34 | version = __version__
35 | version_info = tuple(re.split(r"[-\.]", __version__))
36 |
37 | del re
38 |
--------------------------------------------------------------------------------
/oamap/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | from oamap.schema import *
32 | import oamap.compiler
33 |
34 | # convenient access to the version number
35 | from oamap.version import __version__
36 |
37 |
--------------------------------------------------------------------------------
/tests/test_backend_numpyfile.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import math
32 | import tempfile
33 | import shutil
34 |
35 | import unittest
36 |
37 | from oamap.schema import *
38 | from oamap.backend.numpyfile import *
39 |
40 | class TestBackendNumpyfile(unittest.TestCase):
41 | def runTest(self):
42 | pass
43 |
44 | def test_database(self):
45 | tmpdir = tempfile.mkdtemp()
46 | try:
47 | db = NumpyFileDatabase(tmpdir)
48 | db.fromdata("one", List(Record({"x": "int32", "y": "float64"})), [{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 3, "y": 3.3}], [{"x": 4, "y": 4.4}, {"x": 5, "y": 5.5}, {"x": 6, "y": 6.6}])
49 |
50 | db.data.two = db.data.one.define("z", lambda obj: obj.x + obj.y)
51 |
52 | self.assertEqual([(obj.x, obj.y, obj.z) for obj in db.data.two], [(1, 1.1, 2.1), (2, 2.2, 4.2), (3, 3.3, 6.3), (4, 4.4, 8.4), (5, 5.5, 10.5), (6, 6.6, 12.6)])
53 |
54 | del db.data.one
55 | del db.data.two
56 |
57 | finally:
58 | shutil.rmtree(tmpdir)
59 |
--------------------------------------------------------------------------------
/tests/test_backend_root.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import math
32 | import tempfile
33 | import shutil
34 |
35 | import unittest
36 |
37 | import oamap.backend.root
38 | import oamap.database
39 |
40 | class TestBackendRoot(unittest.TestCase):
41 | def runTest(self):
42 | pass
43 |
44 | def test_database(self):
45 | dataset = oamap.backend.root.dataset("tests/samples/mc10events.root", "Events")
46 |
47 | self.assertEqual(repr(dataset[0].Electron[0].pt), "28.555809")
48 |
49 | db = oamap.database.InMemoryDatabase()
50 |
51 | db.data.one = dataset
52 |
53 | self.assertEqual(repr(db.data.one[0].Electron[0].pt), "28.555809")
54 |
55 | def test_transform(self):
56 | dataset = oamap.backend.root.dataset("tests/samples/mc10events.root", "Events")
57 |
58 | self.assertEqual(repr(dataset[0].Electron[0].pt * math.sinh(dataset[0].Electron[0].eta)), "-17.956890574044056")
59 |
60 | db = oamap.database.InMemoryDatabase.writable(oamap.database.DictBackend())
61 | db.data.one = dataset.define("pz", lambda x: x.pt * math.sinh(x.eta), at="Electron", numba=False)
62 |
63 | self.assertEqual(repr(db.data.one[0].Electron[0].pz), "-17.956890574044056")
64 |
--------------------------------------------------------------------------------
/oamap/backend/numpyfile.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import numpy
32 |
33 | import oamap.database
34 |
35 | class NumpyFileBackend(oamap.database.FilesystemBackend):
36 | def __init__(self, directory):
37 | super(NumpyFileBackend, self).__init__(directory, arraysuffix=".npy")
38 |
39 | @property
40 | def args(self):
41 | return (self._directory,)
42 |
43 | def tojson(self):
44 | return {"class": self.__class__.__module__ + "." + self.__class__.__name__,
45 | "directory": self._directory}
46 |
47 | @staticmethod
48 | def fromjson(obj, namespace):
49 | return NumpyFileBackend(obj["directory"])
50 |
51 | def instantiate(self, partitionid):
52 | return NumpyArrays(lambda name: self.fullname(partitionid, name, create=False),
53 | lambda name: self.fullname(partitionid, name, create=True))
54 |
55 | class NumpyArrays(object):
56 | def __init__(self, loadname, storename):
57 | self._loadname = loadname
58 | self._storename = storename
59 |
60 | def __getitem__(self, name):
61 | return numpy.load(self._loadname(name))
62 |
63 | def __setitem__(self, name, value):
64 | numpy.save(self._storename(name), value)
65 |
66 | class NumpyFileDatabase(oamap.database.FilesystemDatabase):
67 | def __init__(self, directory, namespace=""):
68 | super(NumpyFileDatabase, self).__init__(directory, backends={namespace: NumpyFileBackend(directory)}, namespace=namespace)
69 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2017, DIANA-HEP
5 | # All rights reserved.
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # * Redistributions of source code must retain the above copyright notice, this
11 | # list of conditions and the following disclaimer.
12 | #
13 | # * Redistributions in binary form must reproduce the above copyright notice,
14 | # this list of conditions and the following disclaimer in the documentation
15 | # and/or other materials provided with the distribution.
16 | #
17 | # * Neither the name of the copyright holder nor the names of its
18 | # contributors may be used to endorse or promote products derived from
19 | # this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
32 | import os.path
33 |
34 | from setuptools import find_packages
35 | from setuptools import setup
36 |
37 | def get_version():
38 | g = {}
39 | exec(open(os.path.join("oamap", "version.py")).read(), g)
40 | return g["__version__"]
41 |
42 | setup(name = "oamap",
43 | version = get_version(),
44 | packages = find_packages(exclude = ["tests"]),
45 | scripts = [],
46 | data_files = ["README.rst"],
47 | description = "Perform high-speed calculations on columnar data without creating intermediate objects.",
48 | long_description = open("README.rst").read().strip(),
49 | author = "Jim Pivarski (DIANA-HEP)",
50 | author_email = "pivarski@fnal.gov",
51 | maintainer = "Jim Pivarski (DIANA-HEP)",
52 | maintainer_email = "pivarski@fnal.gov",
53 | url = "https://github.com/diana-hep/oamap",
54 | download_url = "https://github.com/diana-hep/oamap/releases",
55 | license = "BSD 3-clause",
56 | test_suite = "tests",
57 | install_requires = ["numpy"],
58 | tests_require = ["uproot", "thriftpy", "python-snappy"],
59 | classifiers = [
60 | "Development Status :: 4 - Beta",
61 | "Intended Audience :: Developers",
62 | "Intended Audience :: Information Technology",
63 | "Intended Audience :: Science/Research",
64 | "License :: OSI Approved :: BSD License",
65 | "Operating System :: MacOS",
66 | "Operating System :: POSIX",
67 | "Operating System :: Unix",
68 | "Programming Language :: Python",
69 | "Programming Language :: Python :: 2.7",
70 | "Programming Language :: Python :: 3.4",
71 | "Programming Language :: Python :: 3.5",
72 | "Programming Language :: Python :: 3.6",
73 | "Programming Language :: Python :: 3.7",
74 | "Topic :: Scientific/Engineering",
75 | "Topic :: Scientific/Engineering :: Information Analysis",
76 | "Topic :: Scientific/Engineering :: Mathematics",
77 | "Topic :: Scientific/Engineering :: Physics",
78 | "Topic :: Software Development",
79 | "Topic :: Utilities",
80 | ],
81 | platforms = "Any",
82 | )
83 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | OAMap: Object-Array Mapping
2 | ===========================
3 |
4 | .. image:: https://travis-ci.org/diana-hep/oamap.svg?branch=master
5 | :target: https://travis-ci.org/diana-hep/oamap
6 |
7 | Introduction
8 | ------------
9 |
10 | Data analysts are often faced with a choice between speed and flexibility. Tabular data, such as SQL tables, can be processed rapidly enough for a truly interactive analysis session, but hierarchically nested formats, such as JSON, are better at representing relationships in complex data models. In some domains (such as particle physics), we want to perform calculations on JSON-like structures at the speed of SQL.
11 |
12 | The key to high throughput on large datasets, particularly ones with more attributes than are accessed in a single pass, is laying out the data in "columns." All values of an attribute should be contiguous on disk or memory because data are paged from one cache to the next in locally contiguous blocks. The `ROOT `_ and `Parquet `_ file formats represent JSON-like data in columns on disk, but these data are usually deserialized into objects for processing in memory. Higher performance can be achieved by maintaining the columnar structure through all stages of the calculation (see `this talk `_ and `this paper `_).
13 |
14 | The OAMap toolkit implements an Object Array Mapping in Python. Object Array Mappings, by analogy with Object Relational Mappings (ORMs) are one-to-one relationships between conceptual objects and physical arrays. You can write functions that appear to be operating on ordinary Python objects-- lists, tuples, class instances-- but are actually being performed on low-level, contiguous buffers (Numpy arrays). The result is fast processing of large, complex datasets with a low memory footprint.
15 |
16 | OAMap has two primary modes: (1) pure-Python object proxies, which pretend to be Python objects but actually access array data on demand, and (2) bare-metal bytecode compiled by `Numba `_. The pure-Python form is good for low-latency, exploratory work, while the compiled form is good for high throughput. They are seamlessly interchangeable: a Python proxy converts to the compiled form when it enters a Numba-compiled function and switches back when it leaves. You can, for instance, do a fast search in compiled code and examine the results more fully by hand.
17 |
18 | Any columnar file format or database can be used as a data source: OAMap can get arrays of data from any dict-like object (any Python object implementing ``__getitem__``), even from within a Numba-compiled function. Backends to ROOT, Parquet, and HDF5 are included, as well as a Python ``shelve`` alternative. Storing and accessing a complete dataset, including metadata, requires no more infrastructure than a collection of named arrays. (Data types are encoded in the names, values in the arrays.) OAMap is intended as a middleware layer above file formats and databases but below a fully integrated analysis suite.
19 |
20 | Installation
21 | ------------
22 |
23 | Install OAMap like any other Python package:
24 |
25 | .. code-block:: bash
26 |
27 | pip install oamap --user
28 |
29 | or similar (use ``sudo``, ``virtualenv``, or ``conda`` if you wish).
30 |
31 | **Strict dependencies:**
32 |
33 | - `Python `_ (2.6+, 3.4+)
34 | - `Numpy `_
35 |
36 | **Recommended dependencies:**
37 |
38 | - `Numba and LLVM `_ to JIT-compile functions (requires a particular version of LLVM, follow instructions)
39 | - `thriftpy `_ to read Parquet files (pure Python, pip is fine)
40 | - `uproot `_ to read ROOT files (pure Python, pip is fine)
41 | - `h5py `_ to read HDF5 files (requires binary libraries; follow instructions)
42 |
43 | **Optional dependencies:** (all are bindings to binaries that can be package-installed)
44 |
45 | - `lz4 `_ compression used by some ROOT and Parquet files
46 | - `python-snappy `_ compression used by some Parquet files
47 | - `lzo `_ compression used by some Parquet files
48 | - `brotli `_ compression used by some Parquet files
49 |
--------------------------------------------------------------------------------
/oamap/extension/common.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import codecs
32 | import sys
33 |
34 | import numpy
35 |
36 | import oamap.generator
37 |
38 | class _GenerateBytes(object):
39 | py3 = sys.version_info[0] >= 3
40 |
41 | def _generatebytes(self, arrays, index, cache):
42 | listgen = self.generic
43 | primgen = self.generic.content
44 |
45 | if isinstance(listgen, oamap.generator.MaskedListGenerator):
46 | mask = cache[listgen.maskidx]
47 | if mask is None:
48 | self._getarrays(arrays, cache, listgen._toget(arrays, cache))
49 | mask = cache[listgen.maskidx]
50 |
51 | value = mask[index]
52 | if value == listgen.maskedvalue:
53 | return None
54 | else:
55 | index = value
56 |
57 | starts = cache[listgen.startsidx]
58 | stops = cache[listgen.stopsidx]
59 | data = cache[primgen.dataidx]
60 | if starts is None or stops is None or data is None:
61 | toget = listgen._toget(arrays, cache)
62 | toget.update(primgen._toget(arrays, cache))
63 | self._getarrays(arrays, cache, toget)
64 | starts = cache[listgen.startsidx]
65 | stops = cache[listgen.stopsidx]
66 | data = cache[primgen.dataidx]
67 |
68 | array = data[starts[index]:stops[index]]
69 |
70 | if isinstance(array, bytes):
71 | return array
72 | elif isinstance(array, numpy.ndarray):
73 | return array.tostring()
74 | elif self.py3:
75 | return bytes(array)
76 | else:
77 | return "".join(map(chr, array))
78 |
79 | def degenerate(self, obj):
80 | if obj is None:
81 | return obj
82 |
83 | elif self.py3:
84 | if isinstance(obj, bytes):
85 | return obj
86 | else:
87 | return codecs.utf_8_encode(obj)[0]
88 |
89 | else:
90 | if isinstance(obj, str):
91 | return map(ord, obj)
92 | else:
93 | return map(ord, codecs.utf_8_encode(obj)[0])
94 |
95 | class ByteStringGenerator(_GenerateBytes, oamap.generator.ExtendedGenerator):
96 | pattern = {"name": "ByteString", "type": "list", "content": {"type": "primitive", "dtype": "uint8", "nullable": False}}
97 |
98 | def _generate(self, arrays, index, cache):
99 | return self._generatebytes(arrays, index, cache)
100 |
101 | class UTF8StringGenerator(_GenerateBytes, oamap.generator.ExtendedGenerator):
102 | pattern = {"name": "UTF8String", "type": "list", "content": {"type": "primitive", "dtype": "uint8", "nullable": False}}
103 |
104 | def _generate(self, arrays, index, cache):
105 | out = self._generatebytes(arrays, index, cache)
106 | if out is None:
107 | return out
108 | else:
109 | return codecs.utf_8_decode(out)[0]
110 |
111 | def ByteString(nullable=False, starts=None, stops=None, data=None, mask=None, packing=None, doc=None, metadata=None):
112 | import oamap.schema
113 | return oamap.schema.List(oamap.schema.Primitive(numpy.uint8, data=data), nullable=nullable, starts=starts, stops=stops, mask=mask, packing=packing, name="ByteString", doc=doc, metadata=metadata)
114 |
115 | def UTF8String(nullable=False, starts=None, stops=None, data=None, mask=None, packing=None, doc=None, metadata=None):
116 | import oamap.schema
117 | return oamap.schema.List(oamap.schema.Primitive(numpy.uint8, data=data), nullable=nullable, starts=starts, stops=stops, mask=mask, packing=packing, name="UTF8String", doc=doc, metadata=metadata)
118 |
--------------------------------------------------------------------------------
/tests/test_fill.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import unittest
32 |
33 | import oamap.inference
34 | import oamap.fill
35 | import oamap.proxy
36 | from oamap.schema import *
37 |
38 | class TestFill(unittest.TestCase):
39 | def runTest(self):
40 | pass
41 |
42 | def check(self, value, schema=None, debug=False):
43 | if schema is None:
44 | schema = oamap.inference.fromdata(value)
45 | if debug:
46 | print("schema: {0}".format(schema))
47 | arrays = oamap.fill.fromdata(value, schema)
48 | if debug:
49 | print("arrays:")
50 | for n in sorted(arrays):
51 | print(" {0}: {1}".format(repr(n), arrays[n]))
52 | columnar = schema(arrays)
53 | if debug:
54 | print("columnar: {0}".format(columnar))
55 | value2 = oamap.proxy.tojson(columnar)
56 | self.assertEqual(value, value2)
57 |
58 | def test_Primitive(self):
59 | self.check(3)
60 | self.check(3.14)
61 | self.check({"real": 3, "imag": 4})
62 | self.check("inf")
63 | self.check("-inf")
64 | self.check("nan")
65 | # self.check([[1, 2], [3, 4]], Primitive("i8", (2, 2)))
66 |
67 | def test_List(self):
68 | self.check([], schema=List(Primitive("i8")))
69 | self.check([], schema=List(List(List(List(Primitive("i8"))))))
70 | self.check([[[[]]]], schema=List(List(List(List(Primitive("i8"))))))
71 | self.check([1, 2, 3])
72 | self.check([[1, 2, 3], [], [4, 5]])
73 | self.check([[1, 2, None], [], [4, 5]])
74 |
75 | def test_Union(self):
76 | self.check([1, 2, 3, 4.4, 5.5, 6.6], schema=List(Union([Primitive("i8"), Primitive("f8")])))
77 | self.check([3.14, [], 1.1, 2.2, [1, 2, 3]])
78 | self.check([3.14, [], 1.1, None, [1, 2, 3]])
79 |
80 | def test_Record(self):
81 | self.check({"one": 1, "two": 2.2})
82 | self.check({"one": {"uno": 1, "dos": 2}, "two": 2.2})
83 | self.check({"one": {"uno": 1, "dos": [2]}, "two": 2.2})
84 | self.check([{"one": 1, "two": 2.2}, {"one": 1.1, "two": 2.2}]) # two of same Record
85 | self.check([{"one": 1, "two": 2.2}, {"one": [1, 2, 3], "two": 2.2}]) # Union of attribute
86 | self.check([{"one": 1, "two": 2.2}, {"two": 2.2}]) # Union of Records
87 | self.check([{"one": 1, "two": 2.2}, None]) # nullable Record
88 |
89 | def test_Tuple(self):
90 | self.check([1, [2, 3], [[4, 5], [6]]], schema=Tuple([Primitive("i8"), List(Primitive("i8")), List(List(Primitive("i8")))]))
91 | self.check([1, [2, 3], None], schema=Tuple([Primitive("i8"), List(Primitive("i8")), List(List(Primitive("i8")), nullable=True)]))
92 |
93 | def test_Pointer(self):
94 | class Node(object):
95 | def __init__(self, label, next):
96 | self.label = label
97 | self.next = next
98 |
99 | schema = Record({"label": Primitive("i8")}, name="Node")
100 | schema["next"] = Pointer(schema)
101 | value = Node(0, Node(1, Node(2, None)))
102 | value.next.next.next = value
103 |
104 | arrays = oamap.fill.fromdata(value, schema)
105 | columnar = schema(arrays)
106 |
107 | self.assertEqual(value.label, columnar.label)
108 | self.assertEqual(value.next.label, columnar.next.label)
109 | self.assertEqual(value.next.next.label, columnar.next.next.label)
110 | self.assertEqual(value.next.next.next.label, columnar.next.next.next.label)
111 | self.assertEqual(value.next.next.next.next.label, columnar.next.next.next.next.label)
112 | self.assertEqual(value.next.next.next.next.next.label, columnar.next.next.next.next.next.label)
113 | self.assertEqual(value.next.next.next.next.next.next.label, columnar.next.next.next.next.next.next.label)
114 |
--------------------------------------------------------------------------------
/oamap/backend/arrow.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import numpy
32 |
33 | import oamap.schema
34 | import oamap.generator
35 | from oamap.util import OrderedDict
36 |
37 | def schema(table):
38 | import pyarrow
39 | def recurse(node, name, index, nullable):
40 | if isinstance(node, pyarrow.lib.ListType):
41 | return oamap.schema.List(recurse(node.value_type, name, index + 2, nullable),
42 | nullable=nullable,
43 | starts="{0}/{1}".format(name, index + 1),
44 | stops="{0}/{1}".format(name, index + 1),
45 | mask="{0}/{1}".format(name, index))
46 | elif isinstance(node, pyarrow.lib.DataType):
47 | return oamap.schema.Primitive(node.to_pandas_dtype(),
48 | nullable=nullable,
49 | data="{0}/{1}".format(name, index + 1),
50 | mask="{0}/{1}".format(name, index))
51 | else:
52 | raise NotImplementedError(type(node))
53 |
54 | fields = []
55 | for n in table.schema.names:
56 | field = table.schema.field_by_name(n)
57 | fields.append((n, recurse(field.type, n, 0, field.nullable)))
58 |
59 | return oamap.schema.List(
60 | oamap.schema.Record(OrderedDict(fields)),
61 | starts="",
62 | stops="")
63 |
64 | def proxy(table):
65 | import pyarrow
66 | class _ArrayDict(object):
67 | def __init__(self, table):
68 | self.table = table
69 |
70 | def chop(self, name):
71 | slashindex = name.rindex("/")
72 | return name[:slashindex], int(name[slashindex + 1 :])
73 |
74 | def frombuffer(self, chunk, bufferindex):
75 | def truncate(array, length, offset=0):
76 | return array[:length + offset]
77 |
78 | def mask(index, length):
79 | buf = chunk.buffers()[index]
80 | if buf is None:
81 | return numpy.arange(length, dtype=oamap.generator.Masked.maskdtype)
82 | else:
83 | unmasked = truncate(numpy.unpackbits(numpy.frombuffer(buf, dtype=numpy.uint8)).view(numpy.bool_), length)
84 | mask = numpy.empty(len(unmasked), dtype=oamap.generator.Masked.maskdtype)
85 | mask[unmasked] = numpy.arange(unmasked.sum(), dtype=mask.dtype)
86 | mask[~unmasked] = oamap.generator.Masked.maskedvalue
87 | return mask
88 |
89 | def recurse(tpe, index, length):
90 | if isinstance(tpe, pyarrow.lib.ListType):
91 | if index == bufferindex:
92 | # list mask
93 | return mask(index, length)
94 | elif index + 1 == bufferindex:
95 | # list starts
96 | return truncate(numpy.frombuffer(chunk.buffers()[index + 1], dtype=numpy.int32), length, 1)
97 | else:
98 | # descend into list
99 | length = truncate(numpy.frombuffer(chunk.buffers()[index + 1], dtype=numpy.int32), length, 1)[-1]
100 | return recurse(tpe.value_type, index + 2, length)
101 |
102 | elif isinstance(tpe, pyarrow.lib.DataType):
103 | if index == bufferindex:
104 | # data mask
105 | return mask(index, length)
106 | elif index + 1 == bufferindex:
107 | # data
108 | return truncate(numpy.frombuffer(chunk.buffers()[index + 1], dtype=tpe.to_pandas_dtype()), length)
109 | else:
110 | raise AssertionError
111 |
112 | else:
113 | raise NotImplementedError
114 |
115 | return recurse(chunk.type, 0, len(chunk))
116 |
117 | def getall(self, names):
118 | out = {}
119 | for name in names:
120 | if len(str(name)) == 0:
121 | if isinstance(name, oamap.generator.StartsRole):
122 | out[name] = numpy.array([0], dtype=oamap.generator.ListGenerator.posdtype)
123 | elif isinstance(name, oamap.generator.StopsRole):
124 | out[name] = numpy.array([self.table.num_rows], dtype=oamap.generator.ListGenerator.posdtype)
125 | else:
126 | raise AssertionError
127 |
128 | elif isinstance(name, oamap.generator.StopsRole):
129 | out[name] = out[name.starts][1:]
130 |
131 | else:
132 | columnname, bufferindex = self.chop(str(name))
133 | column = self.table[self.table.schema.names.index(columnname)]
134 | chunks = column.data.chunks
135 | if len(chunks) == 0:
136 | raise ValueError("Arrow column {0} has no chunks".format(repr(columnname)))
137 | elif len(chunks) == 1:
138 | out[name] = self.frombuffer(chunks[0], bufferindex)
139 | else:
140 | out[name] = numpy.concatenate([self.frombuffer(chunk, bufferindex) for chunk in chunks])
141 |
142 | return out
143 |
144 | return schema(table)(_ArrayDict(table))
145 |
--------------------------------------------------------------------------------
/oamap/backend/root/cmsnano.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import numpy
32 |
33 | import oamap.backend.root
34 | import oamap.schema
35 | import oamap.dataset
36 | import oamap.proxy
37 | from oamap.util import OrderedDict
38 |
39 | def dataset(path, treepath="Events", namespace=None, **kwargs):
40 | import uproot
41 |
42 | if namespace is None:
43 | namespace = "root.cmsnano({0})".format(repr(path))
44 |
45 | if "localsource" not in kwargs:
46 | kwargs["localsource"] = lambda path: uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
47 | kwargs["total"] = False
48 | kwargs["blocking"] = True
49 |
50 | paths2entries = uproot.tree.numentries(path, treepath, **kwargs)
51 | if len(paths2entries) == 0:
52 | raise ValueError("path {0} matched no TTrees".format(repr(path)))
53 |
54 | offsets = [0]
55 | paths = []
56 | for path, numentries in paths2entries.items():
57 | offsets.append(offsets[-1] + numentries)
58 | paths.append(path)
59 |
60 | sch = schema(paths[0], namespace=namespace)
61 | doc = sch.doc
62 | sch.doc = None
63 |
64 | return oamap.dataset.Dataset(treepath,
65 | sch,
66 | {namespace: oamap.backend.root.ROOTBackend(paths, treepath, namespace)},
67 | oamap.dataset.SingleThreadExecutor(),
68 | offsets,
69 | extension=None,
70 | packing=None,
71 | doc=doc,
72 | metadata={"schemafrom": paths[0]})
73 |
74 | def proxy(path, treepath="Events", namespace=None, extension=oamap.extension.common):
75 | import uproot
76 |
77 | if namespace is None:
78 | namespace = "root.cmsnano({0})".format(repr(path))
79 |
80 | def localsource(path):
81 | return uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
82 |
83 | return _proxy(uproot.open(path, localsource=localsource)[treepath], namespace=namespace, extension=extension)
84 |
85 | def _proxy(tree, namespace=None, extension=oamap.extension.common):
86 | if namespace is None:
87 | namespace = "root.cmsnano({0})".format(repr(path))
88 |
89 | schema = _schema(tree, namespace=namespace)
90 | generator = schema.generator(extension=extension)
91 |
92 | return oamap.proxy.ListProxy(generator, oamap.backend.root.ROOTArrays(tree, oamap.backend.root.ROOTBackend([tree._context.sourcepath], tree._context.treename, namespace)), generator._newcache(), 0, 1, tree.numentries)
93 |
94 | def schema(path, treepath="Events", namespace=None):
95 | import uproot
96 |
97 | if namespace is None:
98 | namespace = "root.cmsnano({0})".format(repr(path))
99 |
100 | def localsource(path):
101 | return uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
102 |
103 | return _schema(uproot.open(path, localsource=localsource)[treepath], namespace=namespace)
104 |
105 | def _schema(tree, namespace=None):
106 | if namespace is None:
107 | namespace = "root.cmsnano({0})".format(repr(path))
108 |
109 | schema = oamap.backend.root._schema(tree, namespace=namespace)
110 |
111 | groups = OrderedDict()
112 | for name in list(schema.content.keys()):
113 | if isinstance(schema.content[name], oamap.schema.List) and "_" in name:
114 | try:
115 | branch = tree[schema.content[name].starts]
116 | except KeyError:
117 | pass
118 | else:
119 | underscore = name.index("_")
120 | groupname, fieldname = name[:underscore], name[underscore + 1:]
121 | countbranchname = branch.countbranch.name
122 | if not isinstance(countbranchname, str):
123 | countbranchname = countbranchname.decode("ascii")
124 | if groupname not in groups:
125 | groups[groupname] = schema.content[groupname] = \
126 | oamap.schema.List(oamap.schema.Record({}, name=groupname), starts=countbranchname, stops=countbranchname, namespace=namespace)
127 | assert countbranchname == schema.content[groupname].starts
128 | groups[groupname].content[fieldname] = schema.content[name].content
129 | del schema.content[name]
130 |
131 | elif "MET_" in name or name.startswith("LHE_") or name.startswith("Pileup_") or name.startswith("PV_"):
132 | underscore = name.index("_")
133 | groupname, fieldname = name[:underscore], name[underscore + 1:]
134 | if groupname not in groups:
135 | groups[groupname] = schema.content[groupname] = \
136 | oamap.schema.Record({}, name=groupname)
137 | groups[groupname][fieldname] = schema.content[name]
138 | del schema.content[name]
139 |
140 | hlt = oamap.schema.Record({}, name="HLT")
141 | flag = oamap.schema.Record({}, name="Flag")
142 | for name in schema.content.keys():
143 | if name.startswith("HLT_"):
144 | hlt[name[4:]] = schema.content[name]
145 | del schema.content[name]
146 | if name.startswith("Flag_"):
147 | flag[name[5:]] = schema.content[name]
148 | del schema.content[name]
149 |
150 | schema.content["HLT"] = hlt
151 | schema.content["Flag"] = flag
152 | schema.content.name = "Event"
153 | return schema
154 |
--------------------------------------------------------------------------------
/tests/test_database.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import math
32 |
33 | import unittest
34 |
35 | from oamap.schema import *
36 | from oamap.database import *
37 | from oamap.dataset import *
38 | import oamap.operations
39 |
40 | class TestDatabase(unittest.TestCase):
41 | def runTest(self):
42 | pass
43 |
44 | def test_data(self):
45 | db = InMemoryDatabase()
46 | db.fromdata("one", Record({"x": List("int32"), "y": List("float64")}), {"x": [1, 2, 3, 4, 5], "y": [1.1, 2.2, 3.3]})
47 |
48 | one = db.data.one
49 | self.assertEqual(one().x[0], 1)
50 | self.assertEqual(one().x[1], 2)
51 | self.assertEqual(one().x[2], 3)
52 | self.assertEqual(one().y[0], 1.1)
53 | self.assertEqual(one().y[1], 2.2)
54 | self.assertEqual(one().y[2], 3.3)
55 |
56 | # recasting
57 | db.data.two = one.project("x")
58 | two = db.data.two
59 | self.assertEqual(two[0], 1)
60 | self.assertEqual(two[1], 2)
61 | self.assertEqual(two[2], 3)
62 | self.assertEqual(two[3], 4)
63 | self.assertEqual(two[4], 5)
64 |
65 | db.data.two = one.drop("y")
66 | two = db.data.two
67 | self.assertEqual(two().x[0], 1)
68 | self.assertEqual(two().x[1], 2)
69 | self.assertEqual(two().x[2], 3)
70 | self.assertEqual(two().x[3], 4)
71 | self.assertEqual(two().x[4], 5)
72 |
73 | db.data.two = one.drop("y").keep("x")
74 | two = db.data.two
75 | self.assertEqual(two().x[0], 1)
76 | self.assertEqual(two().x[1], 2)
77 | self.assertEqual(two().x[2], 3)
78 | self.assertEqual(two().x[3], 4)
79 | self.assertEqual(two().x[4], 5)
80 |
81 | # transformation
82 | db.data.three = one.filter(lambda x: x % 2 == 0, at="x")
83 | three = db.data.three
84 | self.assertEqual(three().x, [2, 4])
85 |
86 | db.data.three = one.filter(lambda x: x > 1, at="x").filter(lambda x: x < 5, at="x")
87 | three = db.data.three
88 | self.assertEqual(three().x, [2, 3, 4])
89 |
90 | # action
91 | table = one.map(lambda x: x**2, at="x")
92 | self.assertEqual(table.result().tolist(), [1, 4, 9, 16, 25])
93 |
94 | summary = one.reduce(0, lambda x, tally: x + tally, at="x")
95 | self.assertEqual(summary.result(), sum([1, 2, 3, 4, 5]))
96 |
97 | def test_dataset(self):
98 | db = InMemoryDatabase()
99 | db.fromdata("one", List(Record({"x": "int32", "y": "float64"})), [{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 3, "y": 3.3}], [{"x": 4, "y": 4.4}, {"x": 5, "y": 5.5}, {"x": 6, "y": 6.6}])
100 | one = db.data.one
101 | self.assertEqual(one[0].x, 1)
102 | self.assertEqual(one[1].x, 2)
103 | self.assertEqual(one[2].x, 3)
104 | self.assertEqual(one[3].x, 4)
105 | self.assertEqual(one[4].x, 5)
106 | self.assertEqual(one[5].x, 6)
107 | self.assertEqual([obj.x for obj in one], [1, 2, 3, 4, 5, 6])
108 | self.assertEqual([obj.y for obj in one], [1.1, 2.2, 3.3, 4.4, 5.5, 6.6])
109 | self.assertEqual(oamap.operations.project(one.partition(0), "x"), [1, 2, 3])
110 | self.assertEqual(oamap.operations.project(one.partition(1), "x"), [4, 5, 6])
111 |
112 | # recasting
113 | db.data.two = one.project("x")
114 | two = db.data.two
115 | self.assertEqual(two.partition(0), [1, 2, 3])
116 | self.assertEqual(two.partition(1), [4, 5, 6])
117 | self.assertEqual([x for x in two], [1, 2, 3, 4, 5, 6])
118 |
119 | db.data.two = one.drop("y").project("x")
120 | two = db.data.two
121 | self.assertEqual([x for x in two], [1, 2, 3, 4, 5, 6])
122 | self.assertEqual(two.partition(0), [1, 2, 3])
123 | self.assertEqual(two.partition(1), [4, 5, 6])
124 |
125 | # transformation
126 | db.data.three = one.filter(lambda obj: obj.x % 2 == 0)
127 | three = db.data.three
128 | self.assertEqual([obj.x for obj in three], [2, 4, 6])
129 | self.assertEqual([obj.y for obj in three], [2.2, 4.4, 6.6])
130 | self.assertEqual(oamap.operations.project(three.partition(0), "x"), [2])
131 | self.assertEqual(oamap.operations.project(three.partition(1), "x"), [4, 6])
132 |
133 | db.data.three = one.filter(lambda obj: obj.x > 1).filter(lambda obj: obj.x < 6)
134 | three = db.data.three
135 |
136 | self.assertEqual([obj.x for obj in three], [2, 3, 4, 5])
137 | self.assertEqual([obj.y for obj in three], [2.2, 3.3, 4.4, 5.5])
138 | self.assertEqual(oamap.operations.project(three.partition(0), "x"), [2, 3])
139 | self.assertEqual(oamap.operations.project(three.partition(1), "x"), [4, 5])
140 |
141 | # action
142 | table = one.map(lambda obj: None if obj.x % 2 == 0 else (obj.x, obj.y, obj.x + obj.y))
143 | self.assertEqual(table.result().tolist(), [(1, 1.1, 2.1), (3, 3.3, 6.3), (5, 5.5, 10.5)])
144 |
145 | summary = one.reduce(0, lambda obj, tally: obj.x + tally)
146 | self.assertEqual(summary.result(), sum([1, 2, 3, 4, 5, 6]))
147 |
148 | # print
149 | # print "one"
150 | # for n, x in db._backends[db._namespace]._arrays[0].items():
151 | # print db._backends[db._namespace]._refcounts[0][n], n, x
152 |
153 | del db.data.one
154 | # print "two"
155 | # for n, x in db._backends[db._namespace]._arrays[0].items():
156 | # print db._backends[db._namespace]._refcounts[0][n], n, x
157 |
158 | del db.data.two
159 | # print "three"
160 | # for n, x in db._backends[db._namespace]._arrays[0].items():
161 | # print db._backends[db._namespace]._refcounts[0][n], n, x
162 |
163 | del db.data.three
164 | # print "done"
165 | # for n, x in db._backends[db._namespace]._arrays[0].items():
166 | # print db._backends[db._namespace]._refcounts[0][n], n, x
167 |
168 | self.assertEqual(len(db._backends[db._namespace]._refcounts.get(0, {})), 0)
169 | self.assertEqual(len(db._backends[db._namespace]._refcounts.get(1, {})), 0)
170 |
--------------------------------------------------------------------------------
/oamap/util.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import ast
32 | import math
33 | import sys
34 | import types
35 |
36 | import numpy
37 |
38 | if sys.version_info[0] > 2:
39 | basestring = str
40 | unicode = str
41 | def MethodType(function, instance, cls):
42 | if instance is None:
43 | return function
44 | else:
45 | return types.MethodType(function, instance)
46 | else:
47 | MethodType = types.MethodType
48 |
49 | try:
50 | from collections import OrderedDict
51 | except ImportError:
52 | # simple OrderedDict implementation for Python 2.6
53 | class OrderedDict(dict):
54 | def __init__(self, items=(), **kwds):
55 | items = list(items)
56 | self._order = [k for k, v in items] + [k for k, v in kwds.items()]
57 | super(OrderedDict, self).__init__(items)
58 | def keys(self):
59 | return self._order
60 | def values(self):
61 | return [self[k] for k in self._order]
62 | def items(self):
63 | return [(k, self[k]) for k in self._order]
64 | def __setitem__(self, name, value):
65 | if name not in self._order:
66 | self._order.append(name)
67 | super(OrderedDict, self).__setitem__(name, value)
68 | def __delitem__(self, name):
69 | if name in self._order:
70 | self._order.remove(name)
71 | super(OrderedDict, self).__delitem__(name)
72 | def __repr__(self):
73 | return "OrderedDict([{0}])".format(", ".join("({0}, {1})".format(repr(k), repr(v)) for k, v in self.items()))
74 |
75 | try:
76 | from UserDict import DictMixin as MutableMapping
77 | except ImportError:
78 | from collections import MutableMapping
79 |
80 | try:
81 | from importlib import import_module
82 | except ImportError:
83 | def import_module(modulename):
84 | module = __import__(modulename)
85 | for name in modulename.split(".")[1:]:
86 | module = module.__dict__[name]
87 | return module
88 |
89 | def slice2sss(index, length):
90 | step = 1 if index.step is None else index.step
91 |
92 | if step == 0:
93 | raise ValueError("slice step cannot be zero")
94 |
95 | elif step > 0:
96 | if index.start is None:
97 | start = 0 # in-range
98 | elif index.start >= 0:
99 | start = min(index.start, length) # in-range or length
100 | else:
101 | start = max(0, index.start + length) # in-range
102 |
103 | if index.stop is None:
104 | stop = length # length
105 | elif index.stop >= 0:
106 | stop = max(start, min(length, index.stop)) # in-range or length
107 | else:
108 | stop = max(start, index.stop + length) # in-range or length
109 |
110 | else:
111 | if index.start is None:
112 | start = length - 1 # in-range
113 | elif index.start >= 0:
114 | start = min(index.start, length - 1) # in-range
115 | else:
116 | start = max(index.start + length, -1) # in-range or -1
117 |
118 | if index.stop is None:
119 | stop = -1 # -1
120 | elif index.stop >= 0:
121 | stop = min(start, index.stop) # in-range or -1
122 | else:
123 | stop = min(start, max(-1, index.stop + length)) # in-range or -1
124 |
125 | return start, stop, step
126 |
127 | def json2python(value):
128 | def recurse(value):
129 | if isinstance(value, dict) and len(value) == 2 and set(value.keys()) == set(["real", "imag"]) and all(isinstance(x, (int, float)) for x in value.values()):
130 | return value["real"] + value["imag"]*1j
131 | elif value == "inf":
132 | return float("inf")
133 | elif value == "-inf":
134 | return float("-inf")
135 | elif value == "nan":
136 | return float("nan")
137 | elif isinstance(value, list):
138 | return [recurse(x) for x in value]
139 | elif isinstance(value, dict):
140 | return dict((n, recurse(x)) for n, x in value.items())
141 | else:
142 | return value
143 | return recurse(value)
144 |
145 | def python2json(value, allowlinks=False):
146 | def recurse(value, memo):
147 | if id(value) in memo:
148 | if allowlinks:
149 | return memo[id(value)]
150 | else:
151 | raise TypeError("cross-linking within an object is not allowed")
152 |
153 | if value is None:
154 | memo[id(value)] = None
155 |
156 | elif isinstance(value, (numbers.Integral, numpy.integer)):
157 | memo[id(value)] = int(value)
158 |
159 | elif isinstance(value, (numbers.Real, numpy.floating)):
160 | if math.isnan(value):
161 | memo[id(value)] = "nan"
162 | elif math.isinf(value) and value > 0:
163 | memo[id(value)] = "inf"
164 | elif math.isinf(value):
165 | memo[id(value)] = "-inf"
166 | else:
167 | memo[id(value)] = float(value)
168 |
169 | elif isinstance(value, (numbers.Complex, numpy.complex)):
170 | memo[id(value)] = {"real": float(value.real), "imag": float(value.imag)}
171 |
172 | elif isinstance(value, basestring):
173 | memo[id(value)] = value
174 |
175 | elif isinstance(value, dict):
176 | memo[id(value)] = {}
177 | for n, x in value.items():
178 | if not isinstance(n, basestring):
179 | raise TypeError("dict keys for JSON must be strings")
180 | memo[id(value)][n] = recurse(x, memo)
181 |
182 | else:
183 | memo[id(value)] = []
184 | for x in value:
185 | memo[id(value)].append(recurse(x, memo))
186 |
187 | return memo[id(value)]
188 |
189 | return recurse(value, {})
190 |
191 | def python2hashable(value):
192 | def recurse(value):
193 | if isinstance(value, dict):
194 | return tuple((n, recurse(value[n])) for n in sorted(value))
195 | elif isinstance(value, list):
196 | return tuple(recurse(x) for x in value)
197 | else:
198 | return value
199 | return recurse(python2json(value))
200 |
201 | def varname(avoid, trial=None):
202 | while trial is None or trial in avoid:
203 | trial = "v" + str(len(avoid))
204 | avoid.add(trial)
205 | return trial
206 |
207 | def paramtypes(args):
208 | try:
209 | import numba as nb
210 | except ImportError:
211 | return None
212 | else:
213 | return tuple(nb.typeof(x) for x in args)
214 |
215 | def doexec(module, env):
216 | exec(module, env)
217 |
218 | def stringfcn(fcn):
219 | if isinstance(fcn, basestring):
220 | parsed = ast.parse(fcn).body
221 | if isinstance(parsed[-1], ast.Expr):
222 | parsed[-1] = ast.Return(parsed[-1].value)
223 | parsed[-1].lineno = parsed[-1].value.lineno
224 | parsed[-1].col_offset = parsed[-1].value.col_offset
225 |
226 | env = dict(math.__dict__)
227 | env.update(globals())
228 |
229 | free = set()
230 | defined = set(["None", "False", "True"])
231 | defined.update(env)
232 | def recurse(node):
233 | if isinstance(node, ast.Name):
234 | if isinstance(node.ctx, ast.Store):
235 | defined.add(node.id)
236 | elif isinstance(node.ctx, ast.Load) and node.id not in defined:
237 | free.add(node.id)
238 | elif isinstance(node, ast.AST):
239 | for n in node._fields:
240 | recurse(getattr(node, n))
241 | elif isinstance(node, list):
242 | for x in node:
243 | recurse(x)
244 | recurse(parsed)
245 |
246 | avoid = free.union(defined)
247 | fcnname = varname(avoid, "fcn")
248 |
249 | module = ast.parse("""
250 | def {fcn}({params}):
251 | REPLACEME
252 | """.format(fcn=fcnname, params=",".join(free)))
253 | module.body[0].body = parsed
254 | module = compile(module, "", "exec")
255 |
256 | doexec(module, env)
257 | fcn = env[fcnname]
258 |
259 | return fcn
260 |
261 | def trycompile(fcn, paramtypes=None, numba=True):
262 | fcn = stringfcn(fcn)
263 |
264 | if numba is None or numba is False:
265 | return fcn
266 |
267 | try:
268 | import numba as nb
269 | except ImportError:
270 | return fcn
271 |
272 | if numba is True:
273 | numbaopts = {}
274 | else:
275 | numbaopts = numba
276 |
277 | if isinstance(fcn, nb.dispatcher.Dispatcher):
278 | fcn = fcn.py_fcn
279 |
280 | if paramtypes is None:
281 | return nb.jit(**numbaopts)(fcn)
282 | else:
283 | return nb.jit(paramtypes, **numbaopts)(fcn)
284 |
285 | def returntype(fcn, paramtypes):
286 | try:
287 | import numba as nb
288 | except ImportError:
289 | return None
290 |
291 | if isinstance(fcn, nb.dispatcher.Dispatcher):
292 | overload = fcn.overloads.get(paramtypes, None)
293 | if overload is None:
294 | return None
295 | else:
296 | return overload.signature.return_type
297 |
--------------------------------------------------------------------------------
/oamap/backend/packing.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import json
32 | import sys
33 |
34 | import numpy
35 |
36 | import oamap.generator
37 |
38 | if sys.version_info[0] > 2:
39 | basestring = str
40 |
41 | class PackedSource(object):
42 | def __init__(self, source, suffix):
43 | self.source = source
44 | self.suffix = suffix
45 |
46 | def __repr__(self):
47 | return "{0}({1}{2})".format(self.__class__.__name__, repr(self.source), "".join(", " + repr(x) for x in self._tojsonargs()))
48 |
49 | def getall(self, roles):
50 | if hasattr(self.source, "getall"):
51 | return self.source.getall(roles)
52 | else:
53 | return dict((n, self.source[str(n)]) for n in roles)
54 |
55 | def putall(self, roles2arrays):
56 | if hasattr(self.source, "putall"):
57 | self.source.putall(roles2arrays)
58 | else:
59 | for n, x in roles2arrays.items():
60 | self.source[str(n)] = x
61 |
62 | def copy(self):
63 | return self.__class__(self.source, self.suffix)
64 |
65 | def anchor(self, source):
66 | if self.source is None:
67 | return self.__class__(source, self.suffix)
68 | else:
69 | return self.__class__(self.source.anchor(source), self.suffix)
70 |
71 | def __eq__(self, other):
72 | return self.__class__.__name__ == other.__class__.__name__ and self._tojsonargs() == other._tojsonargs()
73 |
74 | def __ne__(self, other):
75 | return not self.__eq__(other)
76 |
77 | def __hash__(self):
78 | return hash((PackedSource, self.__class__.__name__, tuple(self._tojsonargs())))
79 |
80 | def tojsonfile(self, file, *args, **kwds):
81 | json.dump(self.tojson(), file, *args, **kwds)
82 |
83 | def tojsonstring(self, *args, **kwds):
84 | return json.dumps(self.tojson(), *args, **kwds)
85 |
86 | def tojson(self):
87 | out = []
88 | node = self
89 | while isinstance(node, PackedSource):
90 | args = self._tojsonargs()
91 | if len(args) == 0:
92 | out.append(self.__class__.__name__)
93 | else:
94 | out.append({self.__class__.__name__: args})
95 | node = node.source
96 | return out
97 |
98 | @staticmethod
99 | def fromjsonfile(file, *args, **kwds):
100 | return PackedSource.fromjson(json.load(file, *args, **kwds))
101 |
102 | @staticmethod
103 | def fromjsonstring(data, *args, **kwds):
104 | return PackedSource.fromjson(json.loads(data, *args, **kwds))
105 |
106 | @staticmethod
107 | def fromjson(data):
108 | if isinstance(data, list):
109 | source = None
110 | for datum in reversed(data):
111 | if isinstance(datum, basestring):
112 | classname = datum
113 | args = ()
114 | elif isinstance(datum, dict) and len(datum) == 1:
115 | classname, = datum.keys()
116 | args, = datum.values()
117 | else:
118 | raise ValueError("source packings JSON must be a list of strings or {\"classname\": [args]} dicts")
119 | try:
120 | cls = globals()[classname]
121 | except KeyError:
122 | raise ValueError("source packing class {0} not found".format(repr(classname)))
123 | source = cls(source, *args)
124 | return source
125 | else:
126 | raise ValueError("source packings JSON must be a list of strings or {\"classname\": [args]} dicts")
127 |
128 | ################################################################ BitPackMasks
129 |
130 | class MaskBitPack(PackedSource):
131 | def __init__(self, source, suffix="-bitpacked"):
132 | super(MaskBitPack, self).__init__(source, suffix)
133 |
134 | def _tojsonargs(self):
135 | if self.suffix == "-bitpacked":
136 | return []
137 | else:
138 | return [self.suffix]
139 |
140 | def getall(self, roles):
141 | others = [n for n in roles if not isinstance(n, oamap.generator.MaskRole)]
142 | renamed = dict((oamap.generator.NoRole(str(n) + self.suffix, n.namespace), n) for n in roles if isinstance(n, oamap.generator.MaskRole))
143 | out = super(MaskBitPack, self).getall(others + list(renamed))
144 | for suffixedname, name in renamed.items():
145 | out[name] = self.unpack(out[suffixedname])
146 | del out[suffixedname]
147 | return out
148 |
149 | def putall(self, roles2arrays):
150 | out = {}
151 | for n, x in roles2arrays.items():
152 | if isinstance(n, oamap.generator.MaskRole):
153 | out[oamap.generator.NoRole(str(n) + self.suffix, n.namespace)] = self.pack(x)
154 | else:
155 | out[n] = x
156 | super(MaskBitPack, self).putall(out)
157 |
158 | @staticmethod
159 | def unpack(array):
160 | if not isinstance(array, numpy.ndarray):
161 | array = numpy.array(array, dtype=numpy.dtype(numpy.uint8))
162 | unmasked = numpy.unpackbits(array).view(numpy.bool_)
163 | mask = numpy.empty(len(unmasked), dtype=oamap.generator.Masked.maskdtype)
164 | mask[unmasked] = numpy.arange(unmasked.sum(), dtype=mask.dtype)
165 | mask[~unmasked] = oamap.generator.Masked.maskedvalue
166 | return mask
167 |
168 | @staticmethod
169 | def pack(array):
170 | if not isinstance(array, numpy.ndarray):
171 | array = numpy.array(array, dtype=oamap.generator.Masked.maskdtype)
172 | return numpy.packbits(array != oamap.generator.Masked.maskedvalue)
173 |
174 | ################################################################ RunLengthMasks
175 |
176 | # TODO: run-length encoding for masks
177 |
178 | ################################################################ ListsAsCounts
179 |
180 | class ListCounts(PackedSource):
181 | def __init__(self, source, suffix="-counts"):
182 | super(ListCounts, self).__init__(source, suffix)
183 |
184 | def _tojsonargs(self):
185 | if self.suffix == "-counts":
186 | return []
187 | else:
188 | return [self.suffix]
189 |
190 | def getall(self, roles):
191 | others = [n for n in roles if not isinstance(n, (oamap.generator.StartsRole, oamap.generator.StopsRole))]
192 | renamed = dict((oamap.generator.NoRole(str(n) + self.suffix, n.namespace), n) for n in roles if isinstance(n, oamap.generator.StartsRole))
193 | out = super(ListCounts, self).getall(others + list(renamed))
194 | for suffixedname, name in renamed.items():
195 | out[name], out[name.stops] = self.fromcounts(out[suffixedname])
196 | del out[suffixedname]
197 | return out
198 |
199 | def putall(self, roles2arrays):
200 | out = {}
201 | for n, x in roles2arrays.items():
202 | if isinstance(n, oamap.generator.StartsRole):
203 | out[oamap.generator.NoRole(str(n) + self.suffix, n.namespace)] = self.tocounts(x, roles2arrays[n.stops])
204 | elif isinstance(n, oamap.generator.StopsRole):
205 | pass
206 | else:
207 | out[n] = x
208 | super(ListCounts, self).putall(out)
209 |
210 | @staticmethod
211 | def fromcounts(array):
212 | offsets = numpy.empty(len(array) + 1, dtype=oamap.generator.ListGenerator.posdtype)
213 | offsets[0] = 0
214 | offsets[1:] = numpy.cumsum(array)
215 | return offsets[:-1], offsets[1:]
216 |
217 | @staticmethod
218 | def tocounts(starts, stops):
219 | if not isinstance(starts, numpy.ndarray):
220 | starts = numpy.array(starts, dtype=oamap.generator.ListGenerator.posdtype)
221 | if not isinstance(starts, numpy.ndarray):
222 | stops = numpy.array(stops, dtype=oamap.generator.ListGenerator.posdtype)
223 | if not starts[0] == 0 or not numpy.array_equal(starts[1:], stops[:-1]):
224 | raise ValueError("starts and stops cannot be converted to a single counts array")
225 | return stops - starts
226 |
227 | ################################################################ DropUnionOffsets
228 |
229 | class UnionDropOffsets(PackedSource):
230 | def __init__(self, source):
231 | super(DropUnionOffsets, self).__init__(source, "")
232 |
233 | def _tojsonargs(self):
234 | return []
235 |
236 | def getall(self, roles):
237 | nooffsets = [n for n in roles if not isinstance(n, oamap.generator.OffsetsRole)]
238 | out = super(UnionDropOffsets, self).getall(nooffsets)
239 | for n in roles:
240 | if isinstance(n, oamap.generator.TagsRole):
241 | out[n.offsets] = self.tags2offsets(out[n])
242 | return out
243 |
244 | def putall(self, roles2arrays):
245 | super(UnionDropOffsets, self).putall(dict((n, x) for n, x in roles2arrays.items() if not isinstance(n, oamap.generator.OffsetsRole)))
246 |
247 | @staticmethod
248 | def tags2offsets(tags):
249 | if not isinstance(tags, numpy.ndarray):
250 | tags = numpy.array(tags, dtype=oamap.generator.UnionGenerator.tagdtype)
251 | offsets = numpy.empty(len(tags), dtype=oamap.generator.UnionGenerator.offsetdtype)
252 | for tag in numpy.unique(tags):
253 | hastag = (tags == tag)
254 | offsets[hastag] = numpy.arange(hastag.sum(), dtype=offsets.dtype)
255 | return offsets
256 |
257 | ################################################################ CompressAll
258 |
259 | # TODO: apply a named compression algorithm
260 |
--------------------------------------------------------------------------------
/oamap/backend/root/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import numpy
32 |
33 | import oamap.schema
34 | import oamap.dataset
35 | import oamap.database
36 | import oamap.proxy
37 | import oamap.backend.packing
38 | from oamap.util import OrderedDict
39 |
40 | def dataset(path, treepath, namespace=None, **kwargs):
41 | import uproot
42 |
43 | if namespace is None:
44 | namespace = "root({0}, {1})".format(repr(path), repr(treepath))
45 |
46 | if "localsource" not in kwargs:
47 | kwargs["localsource"] = lambda path: uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
48 | kwargs["total"] = False
49 | kwargs["blocking"] = True
50 |
51 | paths2entries = uproot.tree.numentries(path, treepath, **kwargs)
52 | if len(paths2entries) == 0:
53 | raise ValueError("path {0} matched no TTrees".format(repr(path)))
54 |
55 | offsets = [0]
56 | paths = []
57 | for path, numentries in paths2entries.items():
58 | offsets.append(offsets[-1] + numentries)
59 | paths.append(path)
60 |
61 | sch = schema(paths[0], treepath, namespace=namespace)
62 | doc = sch.doc
63 | sch.doc = None
64 |
65 | return oamap.dataset.Dataset(treepath.split("/")[-1].split(";")[0],
66 | sch,
67 | {namespace: ROOTBackend(paths, treepath, namespace)},
68 | oamap.dataset.SingleThreadExecutor(),
69 | offsets,
70 | extension=None,
71 | packing=None,
72 | doc=doc,
73 | metadata={"schemafrom": paths[0]})
74 |
75 | def proxy(path, treepath, namespace="", extension=oamap.extension.common):
76 | import uproot
77 | def localsource(path):
78 | return uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
79 | return _proxy(uproot.open(path, localsource=localsource)[treepath], namespace=namespace, extension=extension)
80 |
81 | def _proxy(tree, namespace="", extension=oamap.extension.common):
82 | schema = _schema(tree, namespace=namespace)
83 | generator = schema.generator(extension=extension)
84 | return oamap.proxy.ListProxy(generator, ROOTArrays(tree, ROOTBackend([tree._context.sourcepath], tree._context.treename, namespace)), generator._newcache(), 0, 1, tree.numentries)
85 |
86 | def schema(path, treepath, namespace=""):
87 | import uproot
88 | def localsource(path):
89 | return uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
90 | return _schema(uproot.open(path, localsource=localsource)[treepath], namespace=namespace)
91 |
92 | def _schema(tree, namespace=None):
93 | import uproot
94 |
95 | if namespace is None:
96 | namespace = "root({0}, {1})".format(repr(path), repr(treepath))
97 |
98 | def accumulate(node):
99 | out = oamap.schema.Record(OrderedDict(), namespace=namespace)
100 | for branchname, branch in node.iteritems(aliases=False) if isinstance(node, uproot.tree.TTreeMethods) else node.iteritems():
101 | if not isinstance(branchname, str):
102 | branchname = branchname.decode("ascii")
103 | fieldname = branchname.split(".")[-1]
104 |
105 | if len(branch.fBranches) > 0:
106 | subrecord = accumulate(branch)
107 | if len(subrecord.fields) > 0:
108 | out[fieldname] = subrecord
109 |
110 | elif isinstance(branch.interpretation, (uproot.interp.asdtype, uproot.interp.numerical.asdouble32)):
111 | subnode = oamap.schema.Primitive(branch.interpretation.todtype, data=branchname, namespace=namespace)
112 | for i in range(len(branch.interpretation.todims)):
113 | subnode = oamap.schema.List(subnode, starts="{0}:/{1}".format(branchname, i), stops="{0}:/{1}".format(branchname, i), namespace=namespace)
114 | out[fieldname] = subnode
115 |
116 | elif isinstance(branch.interpretation, uproot.interp.asjagged) and isinstance(branch.interpretation.asdtype, uproot.interp.asdtype):
117 | subnode = oamap.schema.Primitive(branch.interpretation.asdtype.todtype, data=branchname, namespace=namespace)
118 | for i in range(len(branch.interpretation.asdtype.todims)):
119 | subnode = oamap.schema.List(subnode, starts="{0}:/{1}".format(branchname, i), stops="{0}:/{1}".format(branchname, i), namespace=namespace)
120 | out[fieldname] = oamap.schema.List(subnode, starts=branchname, stops=branchname, namespace=namespace)
121 |
122 | elif isinstance(branch.interpretation, uproot.interp.asstrings):
123 | out[fieldname] = oamap.schema.List(oamap.schema.Primitive(oamap.interp.strings.CHARTYPE, data=branchname, namespace=namespace), starts=branchname, stops=branchname, namespace=namespace, name="ByteString")
124 |
125 | return out
126 |
127 | def combinelists(schema):
128 | if isinstance(schema, oamap.schema.Record) and all(isinstance(x, oamap.schema.List) for x in schema.fields.values()):
129 | out = oamap.schema.List(oamap.schema.Record(OrderedDict(), namespace=namespace), namespace=namespace)
130 |
131 | countbranch = None
132 | for fieldname, field in schema.items():
133 | try:
134 | branch = tree[field.starts]
135 | except KeyError:
136 | return schema
137 |
138 | if branch.countbranch is None:
139 | return schema
140 |
141 | if countbranch is None:
142 | countbranch = branch.countbranch
143 | elif countbranch is not branch.countbranch:
144 | return schema
145 |
146 | out.content[fieldname] = field.content
147 |
148 | if countbranch is not None:
149 | countbranchname = countbranch.name
150 | if not isinstance(countbranchname, str):
151 | countbranchname = countbranchname.decode("ascii")
152 | out.starts = countbranchname
153 | out.stops = countbranchname
154 | return out
155 |
156 | return schema
157 |
158 | entries = accumulate(tree).replace(combinelists)
159 | entries.name = "Entry"
160 |
161 | doc = tree.title
162 | if not isinstance(doc, str):
163 | doc = doc.decode("ascii")
164 |
165 | return oamap.schema.List(entries, namespace=namespace, doc=doc)
166 |
167 | class ROOTBackend(oamap.database.Backend):
168 | def __init__(self, paths, treepath, namespace):
169 | self._paths = tuple(paths)
170 | self._treepath = treepath
171 | self._namespace = namespace
172 |
173 | @property
174 | def args(self):
175 | return (self._paths, self._treepath)
176 |
177 | def tojson(self):
178 | return {"class": self.__class__.__module__ + "." + self.__class__.__name__,
179 | "paths": list(self._paths),
180 | "treepath": self._treepath}
181 |
182 | @staticmethod
183 | def fromjson(obj, namespace):
184 | return ROOTBackend(obj["paths"], obj["treepath"], namespace)
185 |
186 | @property
187 | def namespace(self):
188 | return self._namespace
189 |
190 | def instantiate(self, partitionid):
191 | return ROOTArrays.frompath(self._paths[partitionid], self._treepath, self)
192 |
193 | class ROOTArrays(object):
194 | @staticmethod
195 | def frompath(path, treepath, backend):
196 | import uproot
197 | file = uproot.open(path)
198 | out = ROOTArrays(file[treepath], backend)
199 | out._source = file._context.source
200 | return out
201 |
202 | def __init__(self, tree, backend):
203 | self._tree = tree
204 | self._backend = backend
205 | self._keycache = {}
206 |
207 | @property
208 | def tree(self):
209 | return self._tree
210 |
211 | @property
212 | def backend(self):
213 | return self._backend
214 |
215 | def getall(self, roles):
216 | import uproot
217 |
218 | def chop(role):
219 | name = str(role).encode("ascii")
220 | try:
221 | colon = name.rindex(b":")
222 | except ValueError:
223 | return name, None
224 | else:
225 | return name[:colon], name[colon + 1:]
226 |
227 | arrays = self._tree.arrays(set(chop(x)[0] for x in roles), keycache=self._keycache)
228 |
229 | out = {}
230 | for role in roles:
231 | branchname, leafname = chop(role)
232 | array = arrays[branchname]
233 |
234 | if leafname is not None and leafname.startswith(b"/"):
235 | if isinstance(array, (uproot.interp.jagged.JaggedArray, uproot.interp.strings.Strings)):
236 | array = array.content
237 |
238 | length = array.shape[0]
239 | stride = 1
240 | for depth in range(int(leafname[1:])):
241 | length *= array.shape[depth + 1]
242 | stride *= array.shape[depth + 1]
243 |
244 | if isinstance(role, oamap.generator.StartsRole) and role not in out:
245 | offsets = numpy.arange(0, (length + 1)*stride, stride)
246 | out[role] = offsets[:-1]
247 | out[role.stops] = offsets[1:]
248 |
249 | elif isinstance(role, oamap.generator.StopsRole) and role not in out:
250 | offsets = numpy.arange(0, (length + 1)*stride, stride)
251 | out[role.starts] = offsets[:-1]
252 | out[role] = offsets[1:]
253 |
254 | elif isinstance(array, numpy.ndarray):
255 | if isinstance(role, oamap.generator.StartsRole) and role not in out:
256 | starts, stops = oamap.backend.packing.ListCounts.fromcounts(array)
257 | out[role] = starts
258 | out[role.stops] = stops
259 |
260 | elif isinstance(role, oamap.generator.StopsRole) and role not in out:
261 | starts, stops = oamap.backend.packing.ListCounts.fromcounts(array)
262 | out[role.starts] = starts
263 | out[role] = stops
264 |
265 | elif isinstance(role, oamap.generator.DataRole):
266 | if leafname is None:
267 | out[role] = array.reshape(-1)
268 | else:
269 | out[role] = array[leafname].reshape(-1)
270 |
271 | elif isinstance(array, (uproot.interp.jagged.JaggedArray, uproot.interp.strings.Strings)):
272 | if isinstance(role, oamap.generator.StartsRole):
273 | out[role] = array.starts
274 |
275 | elif isinstance(role, oamap.generator.StopsRole):
276 | out[role] = array.stops
277 |
278 | elif isinstance(role, oamap.generator.DataRole):
279 | if leafname is None:
280 | out[role] = array.content.reshape(-1)
281 | else:
282 | out[role] = array.content[leafname].reshape(-1)
283 |
284 | if role not in out:
285 | raise AssertionError(role)
286 |
287 | return out
288 |
289 | def close(self):
290 | if hasattr(self, "_source"):
291 | self._source.close()
292 | self._tree = None
293 |
--------------------------------------------------------------------------------
/tests/test_proxy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import unittest
32 |
33 | import oamap.proxy
34 | from oamap.schema import *
35 |
36 | class TestProxy(unittest.TestCase):
37 | def runTest(self):
38 | pass
39 |
40 | def test_ListProxy_slicing(self):
41 | range100 = list(range(100))
42 | proxy100 = List(Primitive("i8"))({"object-B": [0], "object-E": [100], "object-L-Di8": range100})
43 | self.assertEqual(range100, proxy100)
44 | for start1 in [None, 0, 5, 95, 110, -1, -5, -95, -110]:
45 | for stop1 in [None, 0, 5, 95, 110, -1, -5, -95, -110]:
46 | for step1 in [None, 1, 2, 5, 90, 110, -1, -2, -5, -90, -110]:
47 | sliced_range100 = range100[start1:stop1:step1]
48 | sliced_proxy100 = proxy100[start1:stop1:step1]
49 | self.assertEqual(sliced_range100, sliced_proxy100)
50 | if len(sliced_range100) > 0:
51 | for start2 in [None, 0, 5, -1, -5]:
52 | for stop2 in [None, 0, 5, -1, -5]:
53 | for step2 in [None, 1, 3, -1, -3]:
54 | self.assertEqual(sliced_range100[start2:stop2:step2], sliced_proxy100[start2:stop2:step2])
55 |
56 | def test_Primitive(self):
57 | self.assertEqual(Primitive("f8")({"object-Df8": [3.14]}), 3.14)
58 | # self.assertEqual(Primitive("f8", dims=(2, 2))({"object-Df8-2-2": [[[1, 2], [3, 4]]]}), [[1, 2], [3, 4]])
59 | self.assertEqual(Primitive("f8", nullable=True)({"object-Df8": [], "object-M": [-1]}), None)
60 | self.assertEqual(Primitive("f8", nullable=True)({"object-Df8": [3.14], "object-M": [0]}), 3.14)
61 |
62 | def test_List(self):
63 | self.assertEqual(List(Primitive("f8"))({"object-B": [0], "object-E": [5], "object-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]}), [1.1, 2.2, 3.3, 4.4, 5.5])
64 | self.assertEqual(len(List(Primitive("f8"))({"object-B": [0], "object-E": [5], "object-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]})), 5)
65 | self.assertEqual(List(List(Primitive("f8")))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2, 2], "object-L-E": [2, 2, 5], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]}), [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
66 | self.assertEqual(len(List(List(Primitive("f8")))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2, 2], "object-L-E": [2, 2, 5], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]})), 3)
67 | self.assertEqual(list(map(len, List(List(Primitive("f8")))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2, 2], "object-L-E": [2, 2, 5], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]}))), [2, 0, 3])
68 | self.assertEqual(List(List(Primitive("f8")), nullable=True)({"object-B": [], "object-E": [], "object-L-B": [], "object-L-E": [], "object-L-L-Df8": [], "object-M": [-1]}), None)
69 | self.assertEqual(List(List(Primitive("f8")), nullable=True)({"object-B": [0], "object-E": [3], "object-L-B": [0, 2, 2], "object-L-E": [2, 2, 5], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5], "object-M": [0]}), [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
70 | self.assertEqual(List(List(Primitive("f8"), nullable=True))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2], "object-L-E": [2, 2], "object-L-L-Df8": [1.1, 2.2], "object-L-M": [0, 1, -1]}), [[1.1, 2.2], [], None])
71 | self.assertEqual(List(List(Primitive("f8"), nullable=True), nullable=True)({"object-B": [0], "object-E": [3], "object-L-B": [0, 2], "object-L-E": [2, 2], "object-L-L-Df8": [1.1, 2.2], "object-M": [0], "object-L-M": [0, 1, -1]}), [[1.1, 2.2], [], None])
72 |
73 | def test_List_slices(self):
74 | x = List(List(Primitive("f8")))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2, 2], "object-L-E": [2, 2, 5], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]})
75 |
76 | self.assertEqual(x[0], [1.1, 2.2])
77 | self.assertEqual(x[1], [])
78 | self.assertEqual(x[2], [3.3, 4.4, 5.5])
79 | self.assertEqual(x[-1], [3.3, 4.4, 5.5])
80 | self.assertEqual(x[-2], [])
81 | self.assertEqual(x[-3], [1.1, 2.2])
82 | self.assertRaises(IndexError, lambda: x[3])
83 | self.assertRaises(IndexError, lambda: x[-4])
84 |
85 | self.assertEqual(x[0:1], [[1.1, 2.2]])
86 | self.assertEqual(x[0:2], [[1.1, 2.2], []])
87 | self.assertEqual(x[0:3], [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
88 | self.assertEqual(x[:], [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
89 | self.assertEqual(x[:10], [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
90 | self.assertEqual(x[1:3], [[], [3.3, 4.4, 5.5]])
91 | self.assertEqual(x[2:3], [[3.3, 4.4, 5.5]])
92 | self.assertEqual(x[3:3], [])
93 | self.assertEqual(x[-3:1], [[1.1, 2.2]])
94 | self.assertEqual(x[-3:2], [[1.1, 2.2], []])
95 | self.assertEqual(x[-3:3], [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
96 | self.assertEqual(x[-2:3], [[], [3.3, 4.4, 5.5]])
97 | self.assertEqual(x[-1:3], [[3.3, 4.4, 5.5]])
98 | self.assertEqual(x[-1:-1], [])
99 | self.assertEqual(x[-10:3], [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
100 | self.assertEqual(x[::2], [[1.1, 2.2], [3.3, 4.4, 5.5]])
101 | self.assertEqual(x[1::2], [[]])
102 |
103 | x = List(List(Primitive("f8"), nullable=True))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2], "object-L-E": [2, 5], "object-L-M": [0, -1, 1], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]})
104 |
105 | self.assertEqual(x[1], None)
106 | self.assertEqual(x[-2], None)
107 | self.assertEqual(x[0:2], [[1.1, 2.2], None])
108 | self.assertEqual(x[0:3], [[1.1, 2.2], None, [3.3, 4.4, 5.5]])
109 | self.assertEqual(x[:], [[1.1, 2.2], None, [3.3, 4.4, 5.5]])
110 | self.assertEqual(x[:10], [[1.1, 2.2], None, [3.3, 4.4, 5.5]])
111 | self.assertEqual(x[1:3], [None, [3.3, 4.4, 5.5]])
112 | self.assertEqual(x[3:3], [])
113 | self.assertEqual(x[-3:2], [[1.1, 2.2], None])
114 | self.assertEqual(x[-3:3], [[1.1, 2.2], None, [3.3, 4.4, 5.5]])
115 | self.assertEqual(x[-2:3], [None, [3.3, 4.4, 5.5]])
116 | self.assertEqual(x[-1:-1], [])
117 | self.assertEqual(x[-10:3], [[1.1, 2.2], None, [3.3, 4.4, 5.5]])
118 | self.assertEqual(x[1::2], [None])
119 |
120 | def test_Union(self):
121 | self.assertEqual(Union([Primitive("i8"), Primitive("f8")])({"object-T": [0], "object-O": [0], "object-U0-Di8": [1], "object-U1-Df8": []}), 1)
122 | self.assertEqual(List(Union([Primitive("i8"), Primitive("f8")]))({"object-B": [0], "object-E": [7], "object-L-T": [0, 0, 1, 1, 1, 0, 0], "object-L-O": [0, 1, 0, 1, 2, 2, 3], "object-L-U0-Di8": [1, 2, 3, 4], "object-L-U1-Df8": [1.1, 2.2, 3.3]}), [1, 2, 1.1, 2.2, 3.3, 3, 4])
123 |
124 | self.assertEqual(list(List(Union([Primitive("i8"), Primitive("f8")], nullable=True))({"object-L-U1-Df8": [1.1, 3.3], "object-L-T": [0, 1, 1, 0], "object-E": [7], "object-L-O": [0, 0, 1, 1], "object-L-M": [0, -1, 1, -1, 2, 3, -1], "object-L-U0-Di8": [1, 3], "object-B": [0]})), [1, None, 1.1, None, 3.3, 3, None])
125 | self.assertEqual(List(Union([Primitive("i8", nullable=True), Primitive("f8")]))({"object-L-U0-M": [0, -1, 1, -1], "object-L-T": [0, 0, 1, 1, 1, 0, 0], "object-E": [7], "object-L-O": [0, 1, 0, 1, 2, 2, 3], "object-L-U1-Df8": [1.1, 2.2, 3.3], "object-L-U0-Di8": [1, 3], "object-B": [0]}), [1, None, 1.1, 2.2, 3.3, 3, None])
126 |
127 | self.assertEqual(List(Union([Primitive("i8"), List(Primitive("f8"))]))({"object-B": [0], "object-E": [2], "object-L-T": [0, 1], "object-L-O": [0, 0], "object-L-U0-Di8": [3], "object-L-U1-B": [0], "object-L-U1-E": [3], "object-L-U1-L-Df8": [1.1, 2.2, 3.3]}), [3, [1.1, 2.2, 3.3]])
128 |
129 | def test_Record(self):
130 | x = Record({"x": Primitive("i8"), "y": Primitive("f8")})({"object-Fx-Di8": [3], "object-Fy-Df8": [3.14]})
131 | self.assertEqual(x.x, 3)
132 | self.assertEqual(x.y, 3.14)
133 |
134 | x = List(Record({"x": Primitive("i8"), "y": Primitive("f8")}))({"object-B": [0], "object-E": [3], "object-L-Fx-Di8": [1, 2, 3], "object-L-Fy-Df8": [1.1, 2.2, 3.3]})
135 | self.assertEqual(x[0].x, 1)
136 | self.assertEqual(x[1].x, 2)
137 | self.assertEqual(x[2].x, 3)
138 | self.assertEqual(x[0].y, 1.1)
139 | self.assertEqual(x[1].y, 2.2)
140 | self.assertEqual(x[2].y, 3.3)
141 |
142 | x = List(Record({"x": Primitive("i8"), "y": Primitive("f8", nullable=True)}))({"object-B": [0], "object-E": [3], "object-L-Fx-Di8": [1, 2, 3], "object-L-Fy-Df8": [2.2], "object-L-Fy-M": [-1, 0, -1]})
143 | self.assertEqual(x[0].x, 1)
144 | self.assertEqual(x[1].x, 2)
145 | self.assertEqual(x[2].x, 3)
146 | self.assertEqual(x[0].y, None)
147 | self.assertEqual(x[1].y, 2.2)
148 | self.assertEqual(x[2].y, None)
149 |
150 | x = List(Record({"x": Primitive("i8"), "y": Primitive("f8")}, nullable=True))({"object-B": [0], "object-E": [3], "object-L-M": [0, -1, 1], "object-L-Fx-Di8": [1, 3], "object-L-Fy-Df8": [1.1, 3.3]})
151 | self.assertEqual(x[0].x, 1)
152 | self.assertEqual(x[1], None)
153 | self.assertEqual(x[2].x, 3)
154 | self.assertEqual(x[0].y, 1.1)
155 | self.assertEqual(x[1], None)
156 | self.assertEqual(x[2].y, 3.3)
157 |
158 | x = Record({"x": Primitive("i8"), "y": List(Primitive("f8"))})({"object-Fx-Di8": [3], "object-Fy-B": [0], "object-Fy-E": [3], "object-Fy-L-Df8": [1.1, 2.2, 3.3]})
159 | self.assertEqual(x.x, 3)
160 | self.assertEqual(x.y, [1.1, 2.2, 3.3])
161 |
162 | x = Record({"x": Primitive("i8"), "y": Union([Primitive("i8"), Primitive("f8")])})({"object-Fx-Di8": [3], "object-Fy-T": [0], "object-Fy-O": [0], "object-Fy-U0-Di8": [1], "object-Fy-U1-Df8": [1.1]})
163 | self.assertEqual(x.x, 3)
164 | self.assertEqual(x.y, 1)
165 |
166 | x = Record({"x": Primitive("i8"), "y": List(Union([Primitive("i8"), Primitive("f8")]))})({"object-Fx-Di8": [3], "object-Fy-B": [0], "object-Fy-E": [3], "object-Fy-L-T": [0, 1, 1], "object-Fy-L-O": [0, 0, 1], "object-Fy-L-U0-Di8": [1], "object-Fy-L-U1-Df8": [1.1, 2.2]})
167 | self.assertEqual(x.x, 3)
168 | self.assertEqual(x.y, [1, 1.1, 2.2])
169 |
170 | x = List(Union([Primitive("i8"), Record({"x": Primitive("i8"), "y": Primitive("f8")})]))({"object-B": [0], "object-E": [4], "object-L-T": [0, 1, 1, 0], "object-L-O": [0, 0, 1, 1], "object-L-U0-Di8": [99, 98], "object-L-U1-Fx-Di8": [1, 2], "object-L-U1-Fy-Df8": [1.1, 2.2]})
171 | self.assertEqual(x[0], 99)
172 | self.assertEqual(x[1].x, 1)
173 | self.assertEqual(x[1].y, 1.1)
174 | self.assertEqual(x[2].x, 2)
175 | self.assertEqual(x[2].y, 2.2)
176 | self.assertEqual(x[3], 98)
177 |
178 | def test_Tuple(self):
179 | x = Tuple((Primitive("i8"), Primitive("f8")))({"object-F0-Di8": [3], "object-F1-Df8": [3.14]})
180 | self.assertEqual(x[0], 3)
181 | self.assertEqual(x[1], 3.14)
182 |
183 | x = List(Tuple((Primitive("i8"), Primitive("f8"))))({"object-B": [0], "object-E": [3], "object-L-F0-Di8": [1, 2, 3], "object-L-F1-Df8": [1.1, 2.2, 3.3]})
184 | self.assertEqual(x[0][0], 1)
185 | self.assertEqual(x[1][0], 2)
186 | self.assertEqual(x[2][0], 3)
187 | self.assertEqual(x[0][1], 1.1)
188 | self.assertEqual(x[1][1], 2.2)
189 | self.assertEqual(x[2][1], 3.3)
190 |
191 | x = List(Tuple((Primitive("i8"), Primitive("f8", nullable=True))))({"object-B": [0], "object-E": [3], "object-L-F0-Di8": [1, 2, 3], "object-L-F1-Df8": [2.2], "object-L-F1-M": [-1, 0, -1]})
192 | self.assertEqual(x[0][0], 1)
193 | self.assertEqual(x[1][0], 2)
194 | self.assertEqual(x[2][0], 3)
195 | self.assertEqual(x[0][1], None)
196 | self.assertEqual(x[1][1], 2.2)
197 | self.assertEqual(x[2][1], None)
198 |
199 | x = List(Tuple((Primitive("i8"), Primitive("f8")), nullable=True))({"object-B": [0], "object-E": [3], "object-L-M": [0, -1, 1], "object-L-F0-Di8": [1, 3], "object-L-F1-Df8": [1.1, 3.3]})
200 | self.assertEqual(x[0][0], 1)
201 | self.assertEqual(x[1], None)
202 | self.assertEqual(x[2][0], 3)
203 | self.assertEqual(x[0][1], 1.1)
204 | self.assertEqual(x[1], None)
205 | self.assertEqual(x[2][1], 3.3)
206 |
207 | x = Tuple((Primitive("i8"), List(Primitive("f8"))))({"object-F0-Di8": [3], "object-F1-B": [0], "object-F1-E": [3], "object-F1-L-Df8": [1.1, 2.2, 3.3]})
208 | self.assertEqual(x[0], 3)
209 | self.assertEqual(x[1], [1.1, 2.2, 3.3])
210 |
211 | x = Tuple((Primitive("i8"), Union([Primitive("i8"), Primitive("f8")])))({"object-F0-Di8": [3], "object-F1-T": [0], "object-F1-O": [0], "object-F1-U0-Di8": [1], "object-F1-U1-Df8": [1.1]})
212 | self.assertEqual(x[0], 3)
213 | self.assertEqual(x[1], 1)
214 |
215 | x = Tuple((Primitive("i8"), List(Union([Primitive("i8"), Primitive("f8")]))))({"object-F0-Di8": [3], "object-F1-B": [0], "object-F1-E": [3], "object-F1-L-T": [0, 1, 1], "object-F1-L-O": [0, 0, 1], "object-F1-L-U0-Di8": [1], "object-F1-L-U1-Df8": [1.1, 2.2]})
216 | self.assertEqual(x[0], 3)
217 | self.assertEqual(x[1], [1, 1.1, 2.2])
218 |
219 | x = List(Union([Primitive("i8"), Tuple((Primitive("i8"), Primitive("f8")))]))({"object-B": [0], "object-E": [4], "object-L-T": [0, 1, 1, 0], "object-L-O": [0, 0, 1, 1], "object-L-U0-Di8": [99, 98], "object-L-U1-F0-Di8": [1, 2], "object-L-U1-F1-Df8": [1.1, 2.2]})
220 | self.assertEqual(x[0], 99)
221 | self.assertEqual(x[1][0], 1)
222 | self.assertEqual(x[1][1], 1.1)
223 | self.assertEqual(x[2][0], 2)
224 | self.assertEqual(x[2][1], 2.2)
225 | self.assertEqual(x[3], 98)
226 |
227 | def test_Pointer(self):
228 | self.assertEqual(Pointer(Primitive("f8"))({"object-P": [3], "object-X-Df8": [0.0, 1.1, 2.2, 3.3, 4.4]}), 3.3)
229 |
230 | tree = Pointer(None)
231 | tree.target = List(tree)
232 |
233 | self.assertEqual(tree({"object-P": [0], "object-X-B": [0], "object-X-E": [0], "object-X-L-P-object-X-Df8": []}), [])
234 |
235 | self.assertEqual(repr(tree({"object-P": [0], "object-X-B": [0], "object-X-E": [1], "object-X-L-P-object-X": [0]})), "[[...]]")
236 |
237 | self.assertEqual(tree({"object-P": [0, 1], "object-X-B": [0, 1], "object-X-E": [1, 1], "object-X-L-P-object-X": [1]}), [[]])
238 | self.assertEqual(tree({"object-P": [0, 1], "object-X-B": [0, 2], "object-X-E": [2, 2], "object-X-L-P-object-X": [1, 1]}), [[], []])
239 |
240 | linkedlist = Record({"label": Primitive("i8")})
241 | linkedlist["next"] = Pointer(linkedlist)
242 |
243 | x = linkedlist({"object-Flabel-Di8": [0, 1, 2], "object-Fnext-P-object": [1, 2, 0]})
244 | self.assertEqual(x.label, 0)
245 | self.assertEqual(x.next.label, 1)
246 | self.assertEqual(x.next.next.label, 2)
247 | self.assertEqual(x.next.next.next.label, 0)
248 |
249 | linkedlist = Record({"label": Primitive("i8")})
250 | linkedlist["next"] = Pointer(linkedlist, nullable=True)
251 |
252 | x = linkedlist({"object-Flabel-Di8": [0, 1, 2], "object-Fnext-P-object": [1, 2], "object-Fnext-M": [0, 1, -1]})
253 | self.assertEqual(x.label, 0)
254 | self.assertEqual(x.next.label, 1)
255 | self.assertEqual(x.next.next.label, 2)
256 | self.assertEqual(x.next.next.next, None)
257 |
--------------------------------------------------------------------------------
/oamap/proxy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import bisect
32 | import json
33 | import numbers
34 | import sys
35 | import math
36 |
37 | import numpy
38 |
39 | import oamap.util
40 |
41 | if sys.version_info[0] > 2:
42 | xrange = range
43 |
44 | # base class of all runtime types that require proxies: List, Record, and Tuple
45 | class Proxy(object): pass
46 |
47 | def tojson(value):
48 | if isinstance(value, ListProxy):
49 | return [tojson(x) for x in value]
50 | elif isinstance(value, RecordProxy):
51 | return dict((n, tojson(getattr(value, n))) for n in value._fields)
52 | elif isinstance(value, TupleProxy):
53 | return [tojson(x) for x in value]
54 | elif isinstance(value, (numbers.Integral, numpy.integer)):
55 | return int(value)
56 | elif isinstance(value, (numbers.Real, numpy.floating)):
57 | if math.isnan(value):
58 | return "nan"
59 | elif value == float("-inf"):
60 | return "-inf"
61 | elif value == float("inf"):
62 | return "inf"
63 | else:
64 | return float(value)
65 | elif isinstance(value, (numbers.Complex, numpy.complex)):
66 | return {"real": tojson(value.real), "imag": tojson(value.imag)}
67 | elif isinstance(value, numpy.ndarray):
68 | return value.tolist()
69 | else:
70 | return value
71 |
72 | def tojsonstring(value, *args, **kwds):
73 | return json.dumps(tojson(value), *args, **kwds)
74 |
75 | def tojsonfile(file, value, *args, **kwds):
76 | json.dump(file, tojson(value), *args, **kwds)
77 |
78 | ################################################################ Lists
79 |
80 | class ListProxy(Proxy):
81 | __slots__ = ["_generator", "_arrays", "_cache", "_whence", "_stride", "_length"]
82 |
83 | def __init__(self, generator, arrays, cache, whence, stride, length):
84 | assert stride != 0
85 | assert length >= 0
86 | self._generator = generator
87 | self._arrays = arrays
88 | self._cache = cache
89 | self._whence = whence
90 | self._stride = stride
91 | self._length = length
92 |
93 | def __repr__(self, memo=None):
94 | if memo is None:
95 | memo = set()
96 | key = (id(self._generator), self._whence, self._stride, self._length)
97 | if key in memo:
98 | return "[...]"
99 | memo = memo.union(set([key]))
100 | if len(self) > 10:
101 | before = self[:5]
102 | after = self[-5:]
103 | return "[{0}, ..., {1}]".format(", ".join(x.__repr__(memo) if isinstance(x, (ListProxy, TupleProxy)) else repr(x) for x in before),
104 | ", ".join(x.__repr__(memo) if isinstance(x, (ListProxy, TupleProxy)) else repr(x) for x in after))
105 | else:
106 | return "[{0}]".format(", ".join(x.__repr__(memo) if isinstance(x, (ListProxy, TupleProxy)) else repr(x) for x in self))
107 |
108 | def __str__(self):
109 | return repr(self)
110 |
111 | def __getattr__(self, field):
112 | if field in self.__dict__:
113 | return self.__dict__[field]
114 | else:
115 | import oamap.operations
116 | for n, x in reversed(list(oamap.operations.actions.items()) + list(oamap.operations.transformations.items()) + list(oamap.operations.recastings.items())):
117 | if field == n:
118 | return lambda *args, **kwargs: x(self, *args, **kwargs)
119 | raise AttributeError("ListProxy has no attribute {0}".format(repr(field)))
120 |
121 | @property
122 | def schema(self):
123 | return self._generator.schema
124 |
125 | @property
126 | def fields(self):
127 | generator = self._generator
128 | while isinstance(generator, oamap.generator.ListGenerator):
129 | generator = generator.content
130 | if isinstance(generator, oamap.generator.RecordGenerator):
131 | return list(generator.fields)
132 | else:
133 | raise TypeError("list does not contain records")
134 |
135 | def indexed(self):
136 | return self
137 |
138 | def __len__(self):
139 | return self._length
140 |
141 | def __getslice__(self, start, stop):
142 | return self.__getitem__(slice(start, stop))
143 |
144 | def __getitem__(self, index):
145 | if isinstance(index, slice):
146 | start, stop, step = oamap.util.slice2sss(index, self._length)
147 |
148 | whence = self._whence + self._stride*start
149 | stride = self._stride*step
150 |
151 | # length = int(math.ceil(float(abs(stop - start)) / abs(step)))
152 | d, m = divmod(abs(start - stop), abs(step))
153 | length = d + (1 if m != 0 else 0)
154 |
155 | return ListProxy(self._generator, self._arrays, self._cache, whence, stride, length)
156 |
157 | else:
158 | normalindex = index if index >= 0 else index + self._length
159 | if not 0 <= normalindex < self._length:
160 | raise IndexError("index {0} is out of bounds for size {1}".format(index, self._length))
161 | return self._generator.content._generate(self._arrays, self._whence + self._stride*normalindex, self._cache)
162 |
163 | def __iter__(self):
164 | return (self._generator.content._generate(self._arrays, i, self._cache) for i in xrange(self._whence, self._whence + self._stride*self._length, self._stride))
165 |
166 | def __hash__(self):
167 | # lists aren't usually hashable, but since ListProxy is immutable, we can add this feature
168 | return hash((ListProxy,) + tuple(self))
169 |
170 | def __eq__(self, other):
171 | if isinstance(other, ListProxy):
172 | return list(self) == list(other)
173 | elif isinstance(other, list):
174 | return list(self) == other
175 | else:
176 | return False
177 |
178 | def __lt__(self, other):
179 | if isinstance(other, ListProxy):
180 | return list(self) < list(other)
181 | elif isinstance(other, list):
182 | return list(self) < other
183 | else:
184 | raise TypeError("unorderable types: list() < {1}()".format(other.__class__))
185 |
186 | # all of the following emulate normal list functionality using the overloaded methods above
187 |
188 | def __ne__(self, other): return not self.__eq__(other)
189 | def __le__(self, other): return self.__lt__(other) or self.__eq__(other)
190 | def __gt__(self, other): return not self.__lt__(other) and not self.__eq__(other)
191 | def __ge__(self, other): return not self.__lt__(other)
192 |
193 | def __add__(self, other): return list(self) + list(other)
194 | def __mul__(self, reps): return list(self) * reps
195 | def __rmul__(self, reps): return reps * list(self)
196 | def __reversed__(self):
197 | if sys.version_info[0] <= 2:
198 | return (self[i - 1] for i in xrange(len(self), 0, -1))
199 | else:
200 | return (self[i - 1] for i in range(len(self), 0, -1))
201 | def count(self, value): return sum(1 for x in self if x == value)
202 | def index(self, value, *args):
203 | if len(args) == 0:
204 | start = 0
205 | stop = len(self)
206 | elif len(args) == 1:
207 | start = args[0]
208 | stop = len(self)
209 | elif len(args) == 2:
210 | start, stop = args
211 | else:
212 | raise TypeError("index() takes at most 3 arguments ({0} given)".format(1 + len(args)))
213 | for i, x in enumerate(self):
214 | if x == value:
215 | return i
216 | raise ValueError("{0} is not in list".format(value))
217 |
218 | def __contains__(self, value):
219 | for x in self:
220 | if x == value:
221 | return True
222 | return False
223 |
224 | ################################################################ Records
225 |
226 | class RecordProxy(Proxy):
227 | __slots__ = ["_generator", "_arrays", "_cache", "_index"]
228 |
229 | def __init__(self, generator, arrays, cache, index):
230 | self._generator = generator
231 | self._arrays = arrays
232 | self._cache = cache
233 | self._index = index
234 |
235 | def __repr__(self):
236 | return "<{0} at index {1}>".format("Record" if self._generator.name is None else self._generator.name, self._index)
237 |
238 | def __str__(self):
239 | return repr(self)
240 |
241 | @property
242 | def _fields(self):
243 | return list(self._generator.fields)
244 |
245 | def __dir__(self):
246 | return dir(super(RecordProxy, self)) + list(str(x) for x in self._fields)
247 |
248 | def __getattr__(self, field):
249 | try:
250 | # actual field names get priority (they're not allowed to start with underscore)
251 | generator = self._generator.fields[field]
252 | except KeyError:
253 | # barring any conflicts with actual field names, "schema" and "fields" are convenient
254 | if field == "schema":
255 | return self._generator.schema
256 | elif field == "fields":
257 | return self._fields
258 | elif field == "name":
259 | return self._generator.name
260 | else:
261 | import oamap.operations
262 | for n, x in reversed(list(oamap.operations.actions.items()) + list(oamap.operations.transformations.items()) + list(oamap.operations.recastings.items())):
263 | if field == n:
264 | return lambda *args, **kwargs: x(self, *args, **kwargs)
265 | raise AttributeError("{0} object has no attribute {1}".format(repr("Record" if self._generator.name is None else self._generator.name), repr(field)))
266 | else:
267 | return generator._generate(self._arrays, self._index, self._cache)
268 |
269 | def __hash__(self):
270 | return hash((RecordProxy, self._generator.name) + tuple(self._generator.fields.items()))
271 |
272 | def __eq__(self, other):
273 | return isinstance(other, RecordProxy) and self._generator.name == other._generator.name and set(self._generator.fields) == set(other._generator.fields) and all(self.__getattr__(n) == other.__getattr__(n) for n in self._generator.fields)
274 |
275 | def __lt__(self, other):
276 | if isinstance(other, RecordProxy) and self._generator.name == other._generator.name and set(self._generator.fields) == set(other._generator.fields):
277 | return [self.__getattr__(n) for n in self._generator.fields] < [other.__getattr__(n) for n in self._generator.fields]
278 | else:
279 | raise TypeError("unorderable types: {0}() < {1}()".format("" if self._generator.name is None else "".format(repr(self._generator.name)), other.__class__))
280 |
281 | def __ne__(self, other): return not self.__eq__(other)
282 | def __le__(self, other): return self.__lt__(other) or self.__eq__(other)
283 | def __gt__(self, other): return not self.__lt__(other) and not self.__eq__(other)
284 | def __ge__(self, other): return not self.__lt__(other)
285 |
286 | ################################################################ Tuples
287 |
288 | class TupleProxy(Proxy):
289 | __slots__ = ["_generator", "_arrays", "_cache", "_index"]
290 |
291 | def __init__(self, generator, arrays, cache, index):
292 | self._generator = generator
293 | self._arrays = arrays
294 | self._cache = cache
295 | self._index = index
296 |
297 | def __repr__(self, memo=None):
298 | if memo is None:
299 | memo = set()
300 | key = (self._index,) + tuple(id(x) for x in self._generator.types)
301 | if key in memo:
302 | return "(...)"
303 | memo = memo.union(set([key]))
304 | return "({0}{1})".format(", ".join(x.__repr__(memo) if isinstance(x, (ListProxy, TupleProxy)) else repr(x) for x in self), "," if len(self) == 1 else "")
305 |
306 | def __str__(self):
307 | return repr(self)
308 |
309 | def __getattr__(self, field):
310 | if field in self.__dict__:
311 | return self.__dict__[field]
312 | else:
313 | import oamap.operations
314 | for n, x in reversed(list(oamap.operations.actions.items()) + list(oamap.operations.transformations.items()) + list(oamap.operations.recastings.items())):
315 | if field == n:
316 | return lambda *args, **kwargs: x(self, *args, **kwargs)
317 | raise AttributeError("TupleProxy has no attribute {0}".format(repr(field)))
318 |
319 | def __len__(self):
320 | return len(self._generator.types)
321 |
322 | def __getslice__(self, start, stop):
323 | # for old-Python compatibility
324 | return self.__getitem__(slice(start, stop))
325 |
326 | def __getitem__(self, index):
327 | if isinstance(index, slice):
328 | lenself = len(self)
329 | start = 0 if index.start is None else index.start
330 | stop = lenself if index.stop is None else index.stop
331 | step = 1 if index.step is None else index.step
332 | return tuple(self[i] for i in range(start, stop, step))
333 |
334 | else:
335 | return self._generator.types[index]._generate(self._arrays, self._index, self._cache)
336 |
337 | def __iter__(self):
338 | return (t._generate(self._arrays, self._index, self._cache) for t in self._generator.types)
339 |
340 | def __hash__(self):
341 | return hash(tuple(self))
342 |
343 | def __eq__(self, other):
344 | if isinstance(other, TupleProxy):
345 | return tuple(self) == tuple(other)
346 | elif isinstance(other, tuple):
347 | return tuple(self) == other
348 | else:
349 | return False
350 |
351 | def __lt__(self, other):
352 | if isinstance(other, TupleProxy):
353 | return tuple(self) < tuple(other)
354 | elif isinstance(other, tuple):
355 | return tuple(self) < other
356 | else:
357 | raise TypeError("unorderable types: tuple() < {1}()".format(other.__class__))
358 |
359 | # all of the following emulate normal tuple functionality using the overloaded methods above
360 |
361 | def __ne__(self, other): return not self.__eq__(other)
362 | def __le__(self, other): return self.__lt__(other) or self.__eq__(other)
363 | def __gt__(self, other): return not self.__lt__(other) and not self.__eq__(other)
364 | def __ge__(self, other): return not self.__lt__(other)
365 |
366 | def __add__(self, other): return tuple(self) + tuple(other)
367 | def __mul__(self, reps): return tuple(self) * reps
368 | def __rmul__(self, reps): return reps * tuple(self)
369 | def __reversed__(self):
370 | return (self[i - 1] for i in range(len(self), 0, -1))
371 | def count(self, value): return sum(1 for x in self if x == value)
372 | def index(self, value, *args):
373 | if len(args) == 0:
374 | start = 0
375 | stop = len(self)
376 | elif len(args) == 1:
377 | start = args[0]
378 | stop = len(self)
379 | elif len(args) == 2:
380 | start, stop = args
381 | else:
382 | raise TypeError("index() takes at most 3 arguments ({0} given)".format(1 + len(args)))
383 | for i, x in enumerate(self):
384 | if x == value:
385 | return i
386 | raise ValueError("{0} is not in list".format(value))
387 |
388 | def __contains__(self, value):
389 | for x in self:
390 | if x == value:
391 | return True
392 | return False
393 |
--------------------------------------------------------------------------------
/oamap/inference.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import re
32 | import numbers
33 | import sys
34 | import math
35 |
36 | import numpy
37 |
38 | import oamap.schema
39 | from oamap.util import OrderedDict
40 |
41 | if sys.version_info[0] > 2:
42 | basestring = str
43 |
44 | ################################################################ inferring schemas from data
45 |
46 | def fromdata(obj, limit=None):
47 | if limit is None or (isinstance(limit, (numbers.Integral, numpy.integer)) and limit >= 0):
48 | pass
49 | else:
50 | raise TypeError("limit must be None or a non-negative integer, not {0}".format(limit))
51 |
52 | class Intermediate(object):
53 | def __init__(self, nullable):
54 | self.nullable = nullable
55 |
56 | class Unknown(Intermediate):
57 | def resolve(self):
58 | raise TypeError("could not resolve a type (e.g. all examples of a List-typed attribute are empty, can't determine its content type)")
59 |
60 | class Boolean(Intermediate):
61 | def resolve(self):
62 | return oamap.schema.Primitive(numpy.dtype(numpy.bool_), nullable=self.nullable)
63 |
64 | class Number(Intermediate):
65 | max_uint8 = numpy.iinfo(numpy.uint8).max
66 | max_uint16 = numpy.iinfo(numpy.uint16).max
67 | max_uint32 = numpy.iinfo(numpy.uint32).max
68 | max_uint64 = numpy.iinfo(numpy.uint64).max
69 | min_int8 = numpy.iinfo(numpy.int8).min
70 | max_int8 = numpy.iinfo(numpy.int8).max
71 | min_int16 = numpy.iinfo(numpy.int16).min
72 | max_int16 = numpy.iinfo(numpy.int16).max
73 | min_int32 = numpy.iinfo(numpy.int32).min
74 | max_int32 = numpy.iinfo(numpy.int32).max
75 | min_int64 = numpy.iinfo(numpy.int64).min
76 | max_int64 = numpy.iinfo(numpy.int64).max
77 | def __init__(self, nullable, min, max, whole, real):
78 | Intermediate.__init__(self, nullable)
79 | self.min = min
80 | self.max = max
81 | self.whole = whole
82 | self.real = real
83 | def resolve(self):
84 | if self.whole:
85 | if self.min >= 0:
86 | if self.max <= self.max_uint8:
87 | t = numpy.uint8
88 | elif self.max <= self.max_uint16:
89 | t = numpy.uint16
90 | elif self.max <= self.max_uint32:
91 | t = numpy.uint32
92 | elif self.max <= self.max_uint64:
93 | t = numpy.uint64
94 | else:
95 | t = numpy.float64
96 | else:
97 | if self.min_int8 <= self.min and self.max <= self.max_int8:
98 | t = numpy.int8
99 | elif self.min_int16 <= self.min and self.max <= self.max_int16:
100 | t = numpy.int16
101 | elif self.min_int32 <= self.min and self.max <= self.max_int32:
102 | t = numpy.int32
103 | elif self.min_int64 <= self.min and self.max <= self.max_int64:
104 | t = numpy.int64
105 | else:
106 | t = numpy.float64
107 | elif self.real:
108 | t = numpy.float64
109 | else:
110 | t = numpy.complex128
111 | return oamap.schema.Primitive(numpy.dtype(t), nullable=self.nullable)
112 |
113 | class String(Intermediate):
114 | def __init__(self, nullable, utf8):
115 | Intermediate.__init__(self, nullable)
116 | self.utf8 = utf8
117 | def resolve(self):
118 | return oamap.schema.List(oamap.schema.Primitive(numpy.uint8), nullable=self.nullable, name=("UTF8String" if self.utf8 else "ByteString"))
119 |
120 | class IntermediateList(Intermediate):
121 | def __init__(self, nullable, content):
122 | Intermediate.__init__(self, nullable)
123 | self.content = content
124 | def resolve(self):
125 | return oamap.schema.List(self.content.resolve(), nullable=self.nullable)
126 |
127 | class IntermediateRecord(Intermediate):
128 | def __init__(self, nullable, fields, name):
129 | Intermediate.__init__(self, nullable)
130 | self.fields = fields
131 | self.name = name
132 | def resolve(self):
133 | return oamap.schema.Record(dict((n, x.resolve()) for n, x in self.fields.items()), nullable=self.nullable, name=self.name)
134 |
135 | class IntermediateTuple(Intermediate):
136 | def __init__(self, nullable, types):
137 | Intermediate.__init__(self, nullable)
138 | self.types = types
139 | def resolve(self):
140 | return oamap.schema.Tuple([x.resolve() for x in self.types], nullable=self.nullable)
141 |
142 | # Unions are special for type-inference
143 | class IntermediateUnion(Intermediate):
144 | def __init__(self, nullable, possibilities):
145 | Intermediate.__init__(self, nullable)
146 | self.possibilities = possibilities
147 | def resolve(self):
148 | return oamap.schema.Union([x.resolve() for x in self.possibilities], nullable=self.nullable)
149 |
150 | # no Pointers in type-inference (we'd have to keep a big map of *everything*!)
151 |
152 | def flatten(possibilities):
153 | return [y for x in possibilities if isinstance(x, IntermediateUnion) for y in x.possibilities] + [x for x in possibilities if not isinstance(x, IntermediateUnion)]
154 |
155 | def unify2(x, y):
156 | nullable = x.nullable or y.nullable
157 |
158 | if isinstance(x, Unknown) and isinstance(y, Unknown):
159 | return Unknown(nullable)
160 |
161 | elif isinstance(x, Unknown):
162 | y.nullable = nullable
163 | return y
164 |
165 | elif isinstance(y, Unknown):
166 | x.nullable = nullable
167 | return x
168 |
169 | elif isinstance(x, Boolean) and isinstance(y, Boolean):
170 | return Boolean(nullable)
171 |
172 | elif isinstance(x, Number) and isinstance(y, Number):
173 | return Number(nullable, min(x.min, y.min), max(x.max, y.max), x.whole and y.whole, x.real and y.real)
174 |
175 | elif isinstance(x, String) and isinstance(y, String):
176 | return String(nullable, x.utf8 or y.utf8)
177 |
178 | elif isinstance(x, IntermediateList) and isinstance(y, IntermediateList):
179 | return IntermediateList(nullable, unify2(x.content, y.content))
180 |
181 | elif isinstance(x, IntermediateRecord) and isinstance(y, IntermediateRecord) and set(x.fields) == set(y.fields) and (x.name is None or y.name is None or x.name == y.name):
182 | return IntermediateRecord(nullable, dict((n, unify2(x.fields[n], y.fields[n])) for n in x.fields), name=(y.name if x.name is None else x.name))
183 |
184 | elif isinstance(x, IntermediateTuple) and isinstance(y, IntermediateTuple) and len(x.types) == len(y.types):
185 | return IntermediateTuple(nullable, [unify2(xi, yi) for xi, yi in zip(x.types, y.types)])
186 |
187 | elif isinstance(x, IntermediateUnion) and isinstance(y, IntermediateUnion):
188 | return unify(x.possibilities + y.possibilities)
189 |
190 | elif isinstance(x, IntermediateUnion):
191 | return unify(x.possibilities + [y])
192 |
193 | elif isinstance(y, IntermediateUnion):
194 | return unify([x] + y.possibilities)
195 |
196 | else:
197 | # can't be unified
198 | return IntermediateUnion(nullable, flatten([x, y]))
199 |
200 | def unify(possibilities):
201 | if len(possibilities) == 0:
202 | return Unknown(False)
203 |
204 | elif len(possibilities) == 1:
205 | return possibilities[0]
206 |
207 | elif len(possibilities) == 2:
208 | return unify2(possibilities[0], possibilities[1])
209 |
210 | else:
211 | distinct = []
212 | for x in flatten(possibilities):
213 | found = False
214 |
215 | for i, y in enumerate(distinct):
216 | merged = unify2(x, y)
217 | if not isinstance(merged, IntermediateUnion):
218 | distinct[i] = merged
219 | found = True
220 | break
221 |
222 | if not found:
223 | distinct.append(x)
224 |
225 | if len(distinct) == 1:
226 | return distinct[0]
227 | else:
228 | return IntermediateUnion(False, flatten(distinct))
229 |
230 | def buildintermediate(obj, limit, memo):
231 | if id(obj) in memo:
232 | raise ValueError("cyclic reference in Python object at {0} (Pointer types cannot be inferred)".format(obj))
233 |
234 | # by copying, rather than modifying in-place (memo.add), we find cyclic references, rather than DAGs
235 | memo = memo.union(set([id(obj)]))
236 |
237 | if obj is None:
238 | return Unknown(True)
239 |
240 | elif obj is False or obj is True:
241 | return Boolean(False)
242 |
243 | elif isinstance(obj, (numbers.Integral, numpy.integer)):
244 | return Number(False, int(obj), int(obj), True, True)
245 |
246 | elif isinstance(obj, (numbers.Real, numpy.floating)):
247 | return Number(False, float(obj), float(obj), False, True)
248 |
249 | elif isinstance(obj, (numbers.Complex, numpy.complex)):
250 | return Number(False, float("-inf"), float("inf"), False, False)
251 |
252 | elif isinstance(obj, bytes):
253 | return String(False, False)
254 |
255 | elif isinstance(obj, basestring):
256 | return String(False, True)
257 |
258 | elif isinstance(obj, dict):
259 | return IntermediateRecord(False, dict((n, buildintermediate(x, limit, memo)) for n, x in obj.items()), None)
260 |
261 | elif isinstance(obj, tuple) and hasattr(obj, "_fields"):
262 | # this is a namedtuple; interpret it as a Record, rather than a Tuple
263 | return IntermediateRecord(False, dict((n, buildintermediate(getattr(obj, n), limit, memo)) for n in obj._fields), obj.__class__.__name__)
264 |
265 | elif isinstance(obj, tuple):
266 | return IntermediateTuple(False, [buildintermediate(x, limit, memo) for x in obj])
267 |
268 | else:
269 | try:
270 | limited = []
271 | for x in obj:
272 | if limit is None or len(limited) < limit:
273 | limited.append(x)
274 | else:
275 | break
276 | except TypeError:
277 | # not iterable, so interpret it as a Record
278 | return IntermediateRecord(False, dict((n, buildintermediate(getattr(obj, n), limit, memo)) for n in dir(obj) if not n.startswith("_") and not callable(getattr(obj, n))), obj.__class__.__name__)
279 | else:
280 | # iterable, so interpret it as a List
281 | return IntermediateList(False, unify([buildintermediate(x, None, memo) for x in obj]))
282 |
283 | return buildintermediate(obj, limit, set()).resolve()
284 |
285 | ################################################################ inferring schemas from a namespace
286 |
287 | def fromnames(arraynames, prefix="object", delimiter="-"):
288 | def filter(arraynames, prefix):
289 | return [x for x in arraynames if x.startswith(prefix)]
290 |
291 | def recurse(arraynames, prefix, byname, internalpointers):
292 | prefixdelimiter = prefix + delimiter
293 | name = None
294 | for n in arraynames:
295 | if n.startswith(prefixdelimiter):
296 | if n[len(prefixdelimiter)] == "N":
297 | match = oamap.schema.Schema._identifier.match(n[len(prefixdelimiter) + 1:])
298 | if match is not None:
299 | name = match.group(0)
300 | break
301 |
302 | if name is not None:
303 | prefix = prefixdelimiter + "N" + name
304 | prefixdelimiter = prefix + delimiter
305 |
306 | mask = prefixdelimiter + "M"
307 | starts = prefixdelimiter + "B"
308 | stops = prefixdelimiter + "E"
309 | content = prefixdelimiter + "L"
310 | tags = prefixdelimiter + "T"
311 | offsets = prefixdelimiter + "O"
312 | uniondata = prefixdelimiter + "U"
313 | field = prefixdelimiter + "F"
314 | positions = prefixdelimiter + "P"
315 | external = prefixdelimiter + "X"
316 | primitive = prefixdelimiter + "D"
317 |
318 | nullable = mask in arraynames
319 | if not nullable:
320 | mask = None
321 |
322 | if starts in arraynames and stops in arraynames:
323 | byname[prefix] = None
324 | byname[prefix] = oamap.schema.List(recurse(filter(arraynames, content), content, byname, internalpointers), nullable=nullable, starts=None, stops=None, mask=None, name=name, doc=None)
325 |
326 | elif tags in arraynames:
327 | possibilities = []
328 | while True:
329 | possibility = uniondata + repr(len(possibilities))
330 | if any(x.startswith(possibility) for x in arraynames):
331 | possibilities.append(possibility)
332 | else:
333 | break
334 | byname[prefix] = None
335 | byname[prefix] = oamap.schema.Union([recurse(filter(arraynames, x), x, byname, internalpointers) for x in possibilities], nullable=nullable, tags=None, offsets=None, mask=None, name=name, doc=None)
336 |
337 | elif any(x.startswith(field) for x in arraynames):
338 | pattern = re.compile("^" + field + "(" + oamap.schema.Schema._identifier.pattern + ")")
339 | fields = {}
340 | for x in arraynames:
341 | matches = pattern.match(x)
342 | if matches is not None:
343 | if matches.group(1) not in fields:
344 | fields[matches.group(1)] = []
345 | fields[matches.group(1)].append(x)
346 |
347 | types = []
348 | while True:
349 | tpe = field + repr(len(types))
350 | if any(x.startswith(tpe) for x in arraynames):
351 | types.append(tpe)
352 | else:
353 | break
354 |
355 | if len(fields) >= 0 and len(types) == 0:
356 | byname[prefix] = oamap.schema.Record(oamap.schema.OrderedDict([(n, recurse(fields[n], field + n, byname, internalpointers)) for n in sorted(fields)]), nullable=nullable, mask=None, name=name, doc=None)
357 | elif len(fields) == 0 and len(types) > 0:
358 | byname[prefix] = oamap.schema.Tuple([recurse(filter(arraynames, n), n, byname, internalpointers) for n in types], nullable=nullable, mask=None, name=name, doc=None)
359 | else:
360 | raise KeyError("ambiguous set of array names: may be Record or Tuple at {0}".format(repr(prefix)))
361 |
362 | elif any(x.startswith(positions) for x in arraynames):
363 | if positions in arraynames:
364 | # external
365 | byname2 = {}
366 | internalpointers2 = []
367 | target = finalize(recurse(filter(arraynames, external), external, byname2, internalpointers2), byname2, internalpointers2)
368 | byname[prefix] = oamap.schema.Pointer(target, nullable=nullable, positions=None, mask=None, name=name, doc=None)
369 |
370 | else:
371 | # internal
372 | matches = [x[len(positions) + 1:] for x in arraynames if x.startswith(positions)]
373 | if len(matches) != 1:
374 | raise KeyError("ambiguous set of array names: more than one internal Pointer at {0}".format(repr(prefix)))
375 | target = None # placeholder! see finalize
376 | byname[prefix] = oamap.schema.Pointer(target, nullable=nullable, positions=None, mask=None, name=name, doc=None)
377 | internalpointers.append((byname[prefix], matches[0]))
378 |
379 | elif any(x.startswith(primitive) for x in arraynames):
380 | matches = [x[len(primitive) - 1:] for x in arraynames if x.startswith(primitive)]
381 | if len(matches) != 1:
382 | raise KeyError("ambiguous set of array names: more than one Primitive at {0}".format(repr(prefix)))
383 | dtype = oamap.schema.Primitive._str2dtype(matches[0], delimiter)
384 | byname[prefix] = oamap.schema.Primitive(dtype, nullable=nullable, data=None, mask=None, name=name, doc=None)
385 |
386 | else:
387 | raise KeyError("missing array names: nothing found as {0} contents".format(repr(prefix)))
388 |
389 | return byname[prefix]
390 |
391 | def finalize(out, byname, internalpointers):
392 | for pointer, targetname in internalpointers:
393 | if targetname in byname:
394 | pointer.target = byname[targetname]
395 | else:
396 | raise KeyError("Pointer's internal target is {0}, but there is no object with that prefix".format(repr(targetname)))
397 | return out
398 |
399 | byname = {}
400 | internalpointers = []
401 | return finalize(recurse(filter(arraynames, prefix), prefix, byname, internalpointers), byname, internalpointers)
402 |
--------------------------------------------------------------------------------
/oamap/fill.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import re
32 | from functools import reduce
33 |
34 | import oamap.generator
35 | import oamap.inference
36 | import oamap.fillable
37 |
38 | def toarrays(fillables):
39 | return dict((n, x[:]) for n, x in fillables.items())
40 |
41 | ################################################################ Python data, possibly made by json.load
42 |
43 | def _fromdata_initialize(gen, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs):
44 | if isinstance(gen, oamap.generator.PrimitiveGenerator):
45 | fillables[gen.data].revert()
46 | forefront = len(fillables[gen.data])
47 | fillables_leaf_to_root.append(fillables[gen.data])
48 |
49 | elif isinstance(gen, oamap.generator.ListGenerator):
50 | _fromdata_initialize(gen.content, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
51 | fillables[gen.starts].revert()
52 | fillables[gen.stops].revert()
53 | assert len(fillables[gen.starts]) == len(fillables[gen.stops])
54 | forefront = len(fillables[gen.stops])
55 | fillables_leaf_to_root.append(fillables[gen.starts])
56 | fillables_leaf_to_root.append(fillables[gen.stops])
57 |
58 | elif isinstance(gen, oamap.generator.UnionGenerator):
59 | for x in gen.possibilities:
60 | _fromdata_initialize(x, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
61 | fillables[gen.tags].revert()
62 | fillables[gen.offsets].revert()
63 | assert len(fillables[gen.tags]) == len(fillables[gen.offsets])
64 | forefront = len(fillables[gen.tags])
65 | fillables_leaf_to_root.append(fillables[gen.tags])
66 | fillables_leaf_to_root.append(fillables[gen.offsets])
67 |
68 | elif isinstance(gen, oamap.generator.RecordGenerator):
69 | uniques = set(_fromdata_initialize(x, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs) for x in gen.fields.values())
70 | assert len(uniques) == 1
71 | forefront = list(uniques)[0]
72 |
73 | elif isinstance(gen, oamap.generator.TupleGenerator):
74 | uniques = set(_fromdata_initialize(x, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs) for x in gen.types)
75 | assert len(uniques) == 1
76 | forefront = list(uniques)[0]
77 |
78 | elif isinstance(gen, oamap.generator.PointerGenerator):
79 | if gen._internal and gen.target is generator and len(fillables[gen.positions]) != 0:
80 | raise TypeError("the root of a Schema may be the target of a Pointer, but if so, it can only be filled from data once")
81 |
82 | if gen not in pointers:
83 | pointers.append(gen)
84 | pointerobjs_keys.append(id(gen))
85 | targetids_keys.append(id(gen.target))
86 |
87 | if not gen._internal:
88 | _fromdata_initialize(gen.target, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
89 | fillables[gen.positions].revert()
90 | forefront = len(fillables[gen.positions])
91 | fillables_leaf_to_root.append(fillables[gen.positions])
92 | positions_to_pointerobjs[gen.positions] = id(gen)
93 |
94 | elif isinstance(gen, oamap.generator.ExtendedGenerator):
95 | forefront = _fromdata_initialize(gen.generic, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
96 |
97 | else:
98 | raise TypeError("unrecognized generator: {0}".format(repr(gen)))
99 |
100 | if isinstance(gen, oamap.generator.Masked):
101 | fillables[gen.mask].revert()
102 | # mask forefront overrides any other arrays
103 | forefront = len(fillables[gen.mask])
104 | fillables_leaf_to_root.append(fillables[gen.mask])
105 |
106 | return forefront
107 |
108 | def _fromdata_forefront(gen, fillables, pointerobjs, secondary=False):
109 | if not secondary and isinstance(gen, oamap.generator.Masked):
110 | # mask forefront overrides any other arrays
111 | return fillables[gen.mask].forefront()
112 |
113 | elif isinstance(gen, oamap.generator.PrimitiveGenerator):
114 | return fillables[gen.data].forefront()
115 |
116 | elif isinstance(gen, oamap.generator.ListGenerator):
117 | return fillables[gen.stops].forefront()
118 |
119 | elif isinstance(gen, oamap.generator.UnionGenerator):
120 | return fillables[gen.tags].forefront()
121 |
122 | elif isinstance(gen, oamap.generator.RecordGenerator):
123 | for x in gen.fields.values():
124 | return _fromdata_forefront(x, fillables, pointerobjs)
125 |
126 | elif isinstance(gen, oamap.generator.TupleGenerator):
127 | for x in gen.types:
128 | return _fromdata_forefront(x, fillables, pointerobjs)
129 |
130 | elif isinstance(gen, oamap.generator.PointerGenerator):
131 | return len(pointerobjs[id(gen)])
132 |
133 | elif isinstance(gen, oamap.generator.ExtendedGenerator):
134 | return _fromdata_forefront(gen.generic, fillables, pointerobjs)
135 |
136 | def _fromdata_unionnullable(union):
137 | for possibility in union.possibilities:
138 | if isinstance(possibility, oamap.generator.Masked):
139 | return True
140 | elif isinstance(possibility, oamap.generator.UnionGenerator):
141 | return _fromdata_unionnullable(possibility)
142 | return False
143 |
144 | def _fromdata_fill(obj, gen, fillables, targetids, pointerobjs, at, pointerat):
145 | if id(gen) in targetids:
146 | targetids[id(gen)][id(obj)] = (_fromdata_forefront(gen, fillables, pointerobjs), obj)
147 |
148 | if obj is None:
149 | if isinstance(gen, oamap.generator.Masked):
150 | fillables[gen.mask].append(gen.maskedvalue)
151 | return # only mask is filled
152 | elif isinstance(gen, oamap.generator.UnionGenerator) and _fromdata_unionnullable(gen):
153 | pass # mask to fill is in a Union possibility
154 | elif isinstance(gen, oamap.generator.ExtendedGenerator) and isinstance(gen.generic, oamap.generator.Masked):
155 | _fromdata_fill(obj, gen.generic, fillables, targetids, pointerobjs, at, pointerat)
156 | return # filled the generic generator's mask
157 | else:
158 | raise TypeError("cannot fill None where expecting type {0} at {1}".format(gen.schema, at))
159 |
160 | # obj is not None (except for the Union case)
161 | if isinstance(gen, oamap.generator.Masked):
162 | fillables[gen.mask].append(_fromdata_forefront(gen, fillables, pointerobjs, secondary=True))
163 |
164 | if isinstance(gen, oamap.generator.PrimitiveGenerator):
165 | fillables[gen.data].append(obj)
166 |
167 | elif isinstance(gen, oamap.generator.ListGenerator):
168 | start = stop = _fromdata_forefront(gen.content, fillables, pointerobjs)
169 | try:
170 | if isinstance(obj, dict) or (isinstance(obj, tuple) and hasattr(obj, "_fields")):
171 | raise TypeError
172 | it = iter(obj)
173 | except TypeError:
174 | raise TypeError("cannot fill {0} where expecting type {1} at {2}".format(repr(obj), gen.schema, at))
175 | else:
176 | for x in it:
177 | _fromdata_fill(x, gen.content, fillables, targetids, pointerobjs, at + (stop - start,), pointerat)
178 | stop += 1
179 |
180 | fillables[gen.starts].append(start)
181 | fillables[gen.stops].append(stop)
182 |
183 | elif isinstance(gen, oamap.generator.UnionGenerator):
184 | tag = None
185 | for i, possibility in enumerate(gen.possibilities):
186 | if obj in possibility.schema:
187 | tag = i
188 | break
189 | if tag is None:
190 | raise TypeError("cannot fill {0} where expecting type {1} at {2}".format(repr(obj), gen.schema, at))
191 |
192 | offset = _fromdata_forefront(possibility, fillables, pointerobjs)
193 | _fromdata_fill(obj, possibility, fillables, targetids, pointerobjs, at + ("tag" + repr(tag),), pointerat)
194 |
195 | fillables[gen.tags].append(tag)
196 | fillables[gen.offsets].append(offset)
197 |
198 | elif isinstance(gen, oamap.generator.RecordGenerator):
199 | if isinstance(obj, dict):
200 | for n, x in gen.fields.items():
201 | if n not in obj:
202 | raise TypeError("cannot fill {0} because its {1} field is missing at {2}".format(repr(obj), repr(n), at))
203 | _fromdata_fill(obj[n], x, fillables, targetids, pointerobjs, at + (n,), pointerat)
204 | else:
205 | for n, x in gen.fields.items():
206 | if not hasattr(obj, n):
207 | raise TypeError("cannot fill {0} because its {1} field is missing at {2}".format(repr(obj), repr(n), at))
208 | _fromdata_fill(getattr(obj, n), x, fillables, targetids, pointerobjs, at + (n,), pointerat)
209 |
210 | elif isinstance(gen, oamap.generator.TupleGenerator):
211 | for i, x in enumerate(gen.types):
212 | try:
213 | v = obj[i]
214 | except (TypeError, IndexError):
215 | raise TypeError("cannot fill {0} because it does not have a field {1} at {2}".format(repr(obj), i, at))
216 | else:
217 | _fromdata_fill(v, x, fillables, targetids, pointerobjs, at + (i,), pointerat)
218 |
219 | elif isinstance(gen, oamap.generator.PointerGenerator):
220 | # Pointers will be set after we see all the target values
221 | pointerobjs[id(gen)].append(obj)
222 | if id(gen) not in pointerat:
223 | pointerat[id(gen)] = at
224 |
225 | elif isinstance(gen, oamap.generator.ExtendedGenerator):
226 | _fromdata_fill(gen.degenerate(obj), gen.generic, fillables, targetids, pointerobjs, at, pointerat)
227 |
228 | def _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root):
229 | # do the pointers after everything else
230 | for pointer in pointers:
231 | while len(pointerobjs[id(pointer)]) > 0:
232 | pointerobjs2 = {id(pointer): []}
233 | for obj in pointerobjs[id(pointer)]:
234 | if id(obj) in targetids[id(pointer.target)] and targetids[id(pointer.target)][id(obj)][1] == obj:
235 | # case 1: an object in the target *is* the object in the pointer (same ids)
236 | position, _ = targetids[id(pointer.target)][id(obj)]
237 |
238 | else:
239 | position = None
240 | if pointer_fromequal:
241 | # fallback to quadratic complexity search
242 | for key, (pos, obj2) in targetids[id(pointer.target)].items():
243 | if obj == obj2:
244 | position = pos
245 | break
246 |
247 | if position is not None:
248 | # case 2: an object in the target *is equal to* the object in the pointer (only check if pointer_fromequal)
249 | pass
250 |
251 | else:
252 | # case 3: the object was not found; it must be added to the target (beyond indexes where it can be found)
253 | _fromdata_fill(obj, pointer.target, fillables, targetids, pointerobjs2, pointerat[id(pointer)], pointerat)
254 | position, _ = targetids[id(pointer.target)][id(obj)]
255 |
256 | # every obj in pointerobjs[id(pointer)] gets *one* append
257 | fillables[pointer.positions].append(position)
258 |
259 | pointerobjs[id(pointer)] = pointerobjs2[id(pointer)]
260 |
261 | for fillable in fillables_leaf_to_root:
262 | fillable.update()
263 |
264 | def fromdata(value, generator=None, pointer_fromequal=False):
265 | if generator is None:
266 | generator = oamap.inference.fromdata(value).generator()
267 | if not isinstance(generator, oamap.generator.Generator):
268 | generator = generator.generator()
269 |
270 | return toarrays(fromdatamore(value, oamap.fillable.arrays(generator), generator=generator, pointer_fromequal=pointer_fromequal))
271 |
272 | def fromdatamore(value, fillables, generator=None, pointer_fromequal=False):
273 | if generator is None:
274 | generator = oamap.inference.fromdata(value).generator()
275 | if not isinstance(generator, oamap.generator.Generator):
276 | generator = generator.generator()
277 |
278 | pointers = []
279 | pointerobjs_keys = []
280 | targetids_keys = []
281 | fillables_leaf_to_root = []
282 | positions_to_pointerobjs = {}
283 |
284 | _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
285 |
286 | pointerat = {}
287 | targetids = dict((x, {}) for x in targetids_keys)
288 | pointerobjs = dict((x, []) for x in pointerobjs_keys)
289 |
290 | if _fromdata_forefront(generator, fillables, pointerobjs) != 0 and not isinstance(generator, oamap.generator.ListGenerator):
291 | raise TypeError("non-Lists can only be filled from data once")
292 |
293 | _fromdata_fill(value, generator, fillables, targetids, pointerobjs, (), pointerat)
294 | _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root)
295 |
296 | return fillables
297 |
298 | def fromiterdata(values, generator=None, limit=lambda entries, arrayitems, arraybytes: False, pointer_fromequal=False):
299 | if generator is None:
300 | generator = oamap.inference.fromdata(values).generator()
301 | if not isinstance(generator, oamap.generator.Generator):
302 | generator = generator.generator()
303 | if not isinstance(generator, oamap.generator.ListGenerator):
304 | raise TypeError("non-Lists cannot be filled iteratively")
305 |
306 | # starting set of fillables
307 | fillables = oamap.fillable.arrays(generator)
308 | factor = dict((n, x.dtype.itemsize) for n, x in fillables.items())
309 |
310 | pointers = []
311 | pointerobjs_keys = []
312 | targetids_keys = []
313 | fillables_leaf_to_root = []
314 | positions_to_pointerobjs = {}
315 |
316 | _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
317 |
318 | pointerat = {}
319 | targetids = dict((x, {}) for x in targetids_keys)
320 | pointerobjs = dict((x, []) for x in pointerobjs_keys)
321 |
322 | start = stop = _fromdata_forefront(generator.content, fillables, pointerobjs)
323 |
324 | for value in values:
325 | # prospectively fill a value
326 | _fromdata_fill(value, generator.content, fillables, targetids, pointerobjs, (), pointerat)
327 |
328 | # criteria for ending a limit based on forefront (_potential_ size), rather than len (_accepted_ size)
329 | arrayitems = {}
330 | arraybytes = {}
331 | for n, x in fillables.items():
332 | if n in positions_to_pointerobjs:
333 | arrayitems[n] = len(pointerobjs[positions_to_pointerobjs[n]])
334 | else:
335 | arrayitems[n] = x.forefront()
336 | arraybytes[n] = arrayitems[n]*factor[n]
337 |
338 | if not limit((stop - start) + 1, arrayitems, arraybytes):
339 | # accepting this entry would make the limit too large
340 | fillables[generator.starts].append(start)
341 | fillables[generator.stops].append(stop)
342 | _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root)
343 | # yield a new limit of arrays
344 | yield stop - start, toarrays(fillables)
345 |
346 | # and make a new set of fillables (along with everything that depends on it)
347 | fillables = oamap.fillable.arrays(generator)
348 |
349 | pointers = []
350 | pointerobjs_keys = []
351 | targetids_keys = []
352 | fillables_leaf_to_root = []
353 | positions_to_pointerobjs = {}
354 |
355 | _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
356 |
357 | pointerat = {}
358 | targetids = dict((x, {}) for x in targetids_keys)
359 | pointerobjs = dict((x, []) for x in pointerobjs_keys)
360 |
361 | start = stop = _fromdata_forefront(generator.content, fillables, pointerobjs)
362 |
363 | # really fill it in this new partition
364 | _fromdata_fill(value, generator.content, fillables, targetids, pointerobjs, (), pointerat)
365 | stop += 1
366 | for fillable in fillables_leaf_to_root:
367 | fillable.update()
368 |
369 | else:
370 | # else accept the data into the fillables and move on
371 | stop += 1
372 | for fillable in fillables_leaf_to_root:
373 | fillable.update()
374 |
375 | # always yield at the end
376 | fillables[generator.starts].append(start)
377 | fillables[generator.stops].append(stop)
378 | _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root)
379 | yield (stop - start), toarrays(fillables)
380 |
--------------------------------------------------------------------------------
/oamap/fillable.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Copyright (c) 2017, DIANA-HEP
4 | # All rights reserved.
5 | #
6 | # Redistribution and use in source and binary forms, with or without
7 | # modification, are permitted provided that the following conditions are met:
8 | #
9 | # * Redistributions of source code must retain the above copyright notice, this
10 | # list of conditions and the following disclaimer.
11 | #
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | # this list of conditions and the following disclaimer in the documentation
14 | # and/or other materials provided with the distribution.
15 | #
16 | # * Neither the name of the copyright holder nor the names of its
17 | # contributors may be used to endorse or promote products derived from
18 | # this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import os
32 | import math
33 | import struct
34 | import sys
35 |
36 | import numpy
37 |
38 | import oamap.generator
39 |
40 | if sys.version_info[0] > 2:
41 | xrange = range
42 |
43 | class Fillable(object):
44 | def __init__(self, dtype):
45 | raise NotImplementedError
46 |
47 | def __len__(self):
48 | return self._len
49 |
50 | def forefront(self):
51 | return self._chunkindex*self.chunksize + self._indexinchunk
52 |
53 | def append(self, value):
54 | raise NotImplementedError
55 |
56 | def extend(self, values):
57 | raise NotImplementedError
58 |
59 | def update(self):
60 | self._len = self.forefront()
61 |
62 | def revert(self):
63 | self._chunkindex, self._indexinchunk = divmod(self._len, self.chunksize)
64 |
65 | def close(self):
66 | pass
67 |
68 | def __getitem__(self, index):
69 | raise NotImplementedError
70 |
71 | def __array__(self, dtype=None, copy=False, order="K", subok=False, ndmin=0):
72 | if dtype is None:
73 | dtype = self.dtype
74 | elif not isinstance(dtype, numpy.dtype):
75 | dtype = numpy.dtype(dtype)
76 |
77 | if dtype == self.dtype and not copy and not subok and ndmin == 0:
78 | return self[:]
79 | else:
80 | return numpy.array(self[:], dtype=dtype, copy=copy, order=order, subok=subok, ndmin=ndmin)
81 |
82 | ################################################################ make fillables
83 |
84 | def _makefillables(generator, fillables, makefillable):
85 | if isinstance(generator, oamap.generator.Masked):
86 | fillables[generator.mask] = makefillable(generator.mask, generator.maskdtype)
87 |
88 | if isinstance(generator, oamap.generator.PrimitiveGenerator):
89 | if generator.dtype is None:
90 | raise ValueError("dtype is unknown (None) for Primitive generator at {0}".format(repr(generator.data)))
91 | fillables[generator.data] = makefillable(generator.data, generator.dtype)
92 |
93 | elif isinstance(generator, oamap.generator.ListGenerator):
94 | fillables[generator.starts] = makefillable(generator.starts, generator.posdtype)
95 | fillables[generator.stops] = makefillable(generator.stops, generator.posdtype)
96 | _makefillables(generator.content, fillables, makefillable)
97 |
98 | elif isinstance(generator, oamap.generator.UnionGenerator):
99 | fillables[generator.tags] = makefillable(generator.tags, generator.tagdtype)
100 | fillables[generator.offsets] = makefillable(generator.offsets, generator.offsetdtype)
101 | for possibility in generator.possibilities:
102 | _makefillables(possibility, fillables, makefillable)
103 |
104 | elif isinstance(generator, oamap.generator.RecordGenerator):
105 | for field in generator.fields.values():
106 | _makefillables(field, fillables, makefillable)
107 |
108 | elif isinstance(generator, oamap.generator.TupleGenerator):
109 | for field in generator.types:
110 | _makefillables(field, fillables, makefillable)
111 |
112 | elif isinstance(generator, oamap.generator.PointerGenerator):
113 | fillables[generator.positions] = makefillable(generator.positions, generator.posdtype)
114 | if not generator._internal:
115 | _makefillables(generator.target, fillables, makefillable)
116 |
117 | elif isinstance(generator, oamap.generator.ExtendedGenerator):
118 | _makefillables(generator.generic, fillables, makefillable)
119 |
120 | else:
121 | raise AssertionError("unrecognized generator type: {0}".format(generator))
122 |
123 | def arrays(generator, chunksize=8192):
124 | if not isinstance(generator, oamap.generator.Generator):
125 | generator = generator.generator()
126 | fillables = {}
127 | _makefillables(generator, fillables, lambda name, dtype: FillableArray(dtype, chunksize=chunksize))
128 | return fillables
129 |
130 | def files(generator, directory, chunksize=8192, lendigits=16):
131 | if not isinstance(generator, oamap.generator.Generator):
132 | generator = generator.generator()
133 | if not os.path.exists(directory):
134 | os.mkdir(directory)
135 | fillables = {}
136 | _makefillables(generator, fillables, lambda name, dtype: FillableFile(os.path.join(directory, name), dtype, chunksize=chunksize, lendigits=lendigits))
137 | return fillables
138 |
139 | def numpyfiles(generator, directory, chunksize=8192, lendigits=16):
140 | if not isinstance(generator, oamap.generator.Generator):
141 | generator = generator.generator()
142 | if not os.path.exists(directory):
143 | os.mkdir(directory)
144 | fillables = {}
145 | _makefillables(generator, fillables, lambda name, dtype: FillableNumpyFile(os.path.join(directory, name), dtype, chunksize=chunksize, lendigits=lendigits))
146 | return fillables
147 |
148 | ################################################################ FillableArray
149 |
150 | class FillableArray(Fillable):
151 | # Numpy arrays and list items have 96+8 byte (80+8 byte) overhead in Python 2 (Python 3)
152 | # compared to 8192 1-byte values (8-byte values), this is 1% overhead (0.1% overhead)
153 | def __init__(self, dtype, chunksize=8192):
154 | if not isinstance(dtype, numpy.dtype):
155 | dtype = numpy.dtype(dtype)
156 | self._data = [numpy.empty(chunksize, dtype=dtype)]
157 | self._len = 0
158 | self._indexinchunk = 0
159 | self._chunkindex = 0
160 |
161 | @property
162 | def dtype(self):
163 | return self._data[0].dtype
164 |
165 | @property
166 | def chunksize(self):
167 | return self._data[0].shape[0]
168 |
169 | def append(self, value):
170 | if self._indexinchunk >= len(self._data[self._chunkindex]):
171 | while len(self._data) <= self._chunkindex + 1:
172 | self._data.append(numpy.empty(self.chunksize, dtype=self.dtype))
173 | self._indexinchunk = 0
174 | self._chunkindex += 1
175 |
176 | self._data[self._chunkindex][self._indexinchunk] = value
177 | self._indexinchunk += 1
178 |
179 | def extend(self, values):
180 | chunkindex = self._chunkindex
181 | indexinchunk = self._indexinchunk
182 |
183 | while len(values) > 0:
184 | if indexinchunk >= len(self._data[chunkindex]):
185 | while len(self._data) <= chunkindex + 1:
186 | self._data.append(numpy.empty(self.chunksize, dtype=self.dtype))
187 | indexinchunk = 0
188 | chunkindex += 1
189 |
190 | tofill = min(len(values), self.chunksize - indexinchunk)
191 | self._data[chunkindex][indexinchunk : indexinchunk + tofill] = values[:tofill]
192 | indexinchunk += tofill
193 | values = values[tofill:]
194 |
195 | self._chunkindex = chunkindex
196 | self._indexinchunk = indexinchunk
197 |
198 | def __getitem__(self, index):
199 | if isinstance(index, slice):
200 | lenself = len(self)
201 | step = 1 if index.step is None else index.step
202 | if step > 0:
203 | start = 0 if index.start is None else index.start
204 | stop = lenself if index.stop is None else index.stop
205 | else:
206 | start = lenself - 1 if index.start is None else index.start
207 | stop = 0 if index.stop is None else index.stop
208 |
209 | if start < 0:
210 | start += lenself
211 | if stop < 0:
212 | stop += lenself
213 |
214 | start = min(lenself, max(0, start))
215 | stop = min(lenself, max(0, stop))
216 |
217 | if step == 0:
218 | raise ValueError("slice step cannot be zero")
219 |
220 | else:
221 | if step > 0:
222 | start_chunkindex = int(math.floor(float(start) / self.chunksize))
223 | stop_chunkindex = int(math.ceil(float(stop) / self.chunksize))
224 | start_indexinchunk = start - start_chunkindex*self.chunksize
225 | stop_indexinchunk = stop - (stop_chunkindex - 1)*self.chunksize
226 | else:
227 | start_chunkindex = int(math.floor(float(start) / self.chunksize))
228 | stop_chunkindex = int(math.floor(float(stop) / self.chunksize)) - 1
229 | start_indexinchunk = start - start_chunkindex*self.chunksize
230 | stop_indexinchunk = stop - (stop_chunkindex + 1)*self.chunksize
231 |
232 | def beginend():
233 | offset = 0
234 | for chunkindex in xrange(start_chunkindex, stop_chunkindex, 1 if step > 0 else -1):
235 | if step > 0:
236 | if chunkindex == start_chunkindex:
237 | begin = start_indexinchunk
238 | else:
239 | begin = offset
240 | if chunkindex == stop_chunkindex - 1:
241 | end = stop_indexinchunk
242 | else:
243 | end = self.chunksize
244 | offset = (begin - self.chunksize) % step
245 | else:
246 | if chunkindex == start_chunkindex:
247 | begin = start_indexinchunk
248 | else:
249 | begin = self.chunksize - 1 - offset
250 | if chunkindex == stop_chunkindex + 1 and index.stop is not None:
251 | end = stop_indexinchunk
252 | else:
253 | end = None
254 | offset = (begin - -1) % -step
255 | yield chunkindex, begin, end
256 |
257 | length = 0
258 | for chunkindex, begin, end in beginend():
259 | if step > 0:
260 | length += int(math.ceil(float(end - begin) / step))
261 | elif end is None:
262 | length += int(math.ceil(-float(begin + 1) / step))
263 | else:
264 | length += int(math.ceil(-float(begin - end) / step))
265 |
266 | out = numpy.empty(length, dtype=self.dtype)
267 | outi = 0
268 |
269 | for chunkindex, begin, end in beginend():
270 | array = self._data[chunkindex][begin:end:step]
271 |
272 | out[outi : outi + len(array)] = array
273 | outi += len(array)
274 | if outi >= len(out):
275 | break
276 |
277 | return out
278 |
279 | else:
280 | lenself = len(self)
281 | normalindex = index if index >= 0 else index + lenself
282 | if not 0 <= normalindex < lenself:
283 | raise IndexError("index {0} is out of bounds for size {1}".format(index, lenself))
284 |
285 | chunkindex, indexinchunk = divmod(index, self.chunksize)
286 | return self._data[chunkindex][indexinchunk]
287 |
288 | ################################################################ FillableFile
289 |
290 | class FillableFile(Fillable):
291 | def __init__(self, filename, dtype, chunksize=8192, lendigits=16):
292 | if not isinstance(dtype, numpy.dtype):
293 | dtype = numpy.dtype(dtype)
294 | self._data = numpy.zeros(chunksize, dtype=dtype) # 'zeros', not 'empty' for security
295 | self._len = 0
296 | self._indexinchunk = 0
297 | self._chunkindex = 0
298 | self._filename = filename
299 | self._openfile(filename, lendigits)
300 |
301 | def _openfile(self, filename, lendigits):
302 | open(filename, "wb", 0).close()
303 | self._file = open(filename, "r+b", 0)
304 | self._datapos = 0
305 | # a plain file has no header
306 |
307 | @property
308 | def filename(self):
309 | return self._file.name
310 |
311 | @property
312 | def dtype(self):
313 | return self._data.dtype
314 |
315 | @property
316 | def chunksize(self):
317 | return self._data.shape[0]
318 |
319 | def append(self, value):
320 | self._data[self._indexinchunk] = value
321 | self._indexinchunk += 1
322 |
323 | if self._indexinchunk == self.chunksize:
324 | self._flush()
325 | self._indexinchunk = 0
326 | self._chunkindex += 1
327 |
328 | def _flush(self):
329 | self._file.seek(self._datapos + self._chunkindex*self.chunksize*self.dtype.itemsize)
330 | self._file.write(self._data.tostring())
331 |
332 | def extend(self, values):
333 | chunkindex = self._chunkindex
334 | indexinchunk = self._indexinchunk
335 |
336 | while len(values) > 0:
337 | tofill = min(len(values), self.chunksize - indexinchunk)
338 | self._data[indexinchunk : indexinchunk + tofill] = values[:tofill]
339 | indexinchunk += tofill
340 | values = values[tofill:]
341 |
342 | if indexinchunk == self.chunksize:
343 | self._file.seek(self._datapos + chunkindex*self.chunksize*self.dtype.itemsize)
344 | self._file.write(self._data.tostring())
345 | indexinchunk = 0
346 | chunkindex += 1
347 |
348 | self._chunkindex = chunkindex
349 | self._indexinchunk = indexinchunk
350 |
351 | def revert(self):
352 | chunkindex, self._indexinchunk = divmod(self._len, self.chunksize)
353 | if self._chunkindex != chunkindex:
354 | self._file.seek(self._datapos + chunkindex*self.chunksize*self.dtype.itemsize)
355 | olddata = numpy.frombuffer(self._file.read(self.chunksize*self.dtype.itemsize), dtype=self.dtype)
356 | self._data[:len(olddata)] = olddata
357 |
358 | self._chunkindex = chunkindex
359 |
360 | def close(self):
361 | if hasattr(self, "_file"):
362 | self._flush()
363 | self._file.close()
364 |
365 | def __del__(self):
366 | self.close()
367 |
368 | def __enter__(self, *args, **kwds):
369 | return self
370 |
371 | def __exit__(self, *args, **kwds):
372 | self.close()
373 |
374 | def __getitem__(self, value):
375 | if not self._file.closed:
376 | self._flush()
377 |
378 | if isinstance(value, slice):
379 | lenself = len(self)
380 | if lenself == 0:
381 | array = numpy.empty(lenself, dtype=self.dtype)
382 | else:
383 | array = numpy.memmap(self.filename, self.dtype, "r", self._datapos, lenself, "C")
384 | if value.start is None and value.stop is None and value.step is None:
385 | return array
386 | else:
387 | return array[value]
388 |
389 | else:
390 | lenself = len(self)
391 | normalindex = index if index >= 0 else index + lenself
392 | if not 0 <= normalindex < lenself:
393 | raise IndexError("index {0} is out of bounds for size {1}".format(index, lenself))
394 |
395 | if not self._file.closed:
396 | # since the file's still open, get it from here instead of making a new filehandle
397 | itemsize = self.dtype.itemsize
398 | try:
399 | self._file.seek(self._datapos + normalindex*itemsize)
400 | return numpy.frombuffer(self._file.read(itemsize), self.dtype)[0]
401 | finally:
402 | self._file.seek(self._datapos + self._chunkindex*self.chunksize*self.dtype.itemsize)
403 | else:
404 | # otherwise, you have to open a new file
405 | with open(self.filename, "rb") as file:
406 | file.seek(self._datapos + normalindex*itemsize)
407 | return numpy.frombuffer(file.read(itemsize), self.dtype)[0]
408 |
409 | ################################################################ FillableNumpyFile (FillableFile with a self-describing header)
410 |
411 | class FillableNumpyFile(FillableFile):
412 | def _openfile(self, filename, lendigits):
413 | magic = b"\x93NUMPY\x01\x00"
414 | header1 = "{{'descr': {0}, 'fortran_order': False, 'shape': (".format(repr(str(self.dtype))).encode("ascii")
415 | header2 = "{0}, }}".format(repr((10**lendigits - 1,))).encode("ascii")[1:]
416 |
417 | unpaddedlen = len(magic) + 2 + len(header1) + len(header2)
418 | paddedlen = int(math.ceil(float(unpaddedlen) / self.dtype.itemsize)) * self.dtype.itemsize
419 | header2 = header2 + b" " * (paddedlen - unpaddedlen)
420 | self._lenpos = len(magic) + 2 + len(header1)
421 | self._datapos = len(magic) + 2 + len(header1) + len(header2)
422 | assert self._datapos % self.dtype.itemsize == 0
423 |
424 | open(filename, "wb", 0).close()
425 | self._file = open(filename, "r+b", 0)
426 | self._formatter = "{0:%dd}" % lendigits
427 | self._file.write(magic)
428 | self._file.write(struct.pack("".format(self.__class__.__name__, self._name, repr(self._args), repr(self._kwargs))
70 |
71 | def __str__(self):
72 | return ".{0}({1}{2})".format(self._name, ", ".join(repr(x) for x in self._args), "".join(", {0}={1}".format(n, repr(x)) for n, x in self._kwargs.items()))
73 |
74 | @property
75 | def name(self):
76 | return self._name
77 |
78 | @property
79 | def args(self):
80 | return self._args
81 |
82 | @property
83 | def kwargs(self):
84 | return self._kwargs
85 |
86 | @property
87 | def function(self):
88 | return self._function
89 |
90 | def apply(self, data):
91 | return self._function(*((data,) + self._args), **self._kwargs)
92 |
93 | class Recasting(Operation): pass
94 | class Transformation(Operation): pass
95 | class Action(Operation): pass
96 |
97 | class Operable(object):
98 | def __init__(self):
99 | self._operations = ()
100 |
101 | @staticmethod
102 | def update_operations():
103 | def newrecasting(name, function):
104 | @functools.wraps(function)
105 | def recasting(self, *args, **kwargs):
106 | out = self.__class__.__new__(self.__class__)
107 | Operable.__init__(out)
108 | out.__dict__ = self.__dict__.copy()
109 | out._operations = self._operations + (Recasting(name, args, kwargs, function),)
110 | return out
111 | return recasting
112 |
113 | def newtransformation(name, function):
114 | @functools.wraps(function)
115 | def transformation(self, *args, **kwargs):
116 | out = self.__class__.__new__(self.__class__)
117 | Operable.__init__(out)
118 | out.__dict__ = self.__dict__.copy()
119 | out._operations = self._operations + (Transformation(name, args, kwargs, function),)
120 | return out
121 | return transformation
122 |
123 | def newaction(name, function):
124 | @functools.wraps(function)
125 | def action(self, *args, **kwargs):
126 | try:
127 | combiner = kwargs.pop("combiner")
128 | except KeyError:
129 | combiner = function.combiner
130 | out = self.__class__.__new__(self.__class__)
131 | Operable.__init__(out)
132 | out.__dict__ = self.__dict__.copy()
133 | out._operations = self._operations + (Action(name, args, kwargs, function),)
134 | return out.act(combiner)
135 | return action
136 |
137 | for n, x in oamap.operations.recastings.items():
138 | setattr(Operable, n, oamap.util.MethodType(newrecasting(n, x), None, Operable))
139 |
140 | for n, x in oamap.operations.transformations.items():
141 | setattr(Operable, n, oamap.util.MethodType(newtransformation(n, x), None, Operable))
142 |
143 | for n, x in oamap.operations.actions.items():
144 | setattr(Operable, n, oamap.util.MethodType(newaction(n, x), None, Operable))
145 |
146 | def _nooperations(self):
147 | return len(self._operations) == 0
148 |
149 | def _notransformations(self):
150 | return all(isinstance(x, Recasting) for x in self._operations)
151 |
152 | Operable.update_operations()
153 |
154 | class _Data(Operable):
155 | def __init__(self, name, schema, backends, executor, extension=None, packing=None, doc=None, metadata=None):
156 | super(_Data, self).__init__()
157 | self._name = name
158 | self._schema = schema
159 | self._backends = backends
160 | self._executor = executor
161 | self._extension = extension
162 | self._packing = packing
163 | self._doc = doc
164 | self._metadata = metadata
165 | self._cachedobject = None
166 |
167 | def __repr__(self):
168 | return "{1}".format(repr(self._name), "".join(str(x) for x in self._operations))
169 |
170 | def __str__(self):
171 | return "{1}".format(repr(self._name), "".join("\n " + str(x) for x in self._operations))
172 |
173 | @property
174 | def name(self):
175 | return self._name
176 |
177 | @property
178 | def schema(self):
179 | return self._schema.deepcopy()
180 |
181 | @property
182 | def extension(self):
183 | return self._extension
184 |
185 | @property
186 | def packing(self):
187 | return self._packing
188 |
189 | @property
190 | def doc(self):
191 | return self._doc
192 |
193 | @property
194 | def metadata(self):
195 | return self._metadata
196 |
197 | def arrays(self):
198 | return DataArrays(self._backends)
199 |
200 | def transform(self, name, namespace, update):
201 | if self._nooperations():
202 | return [SingleThreadExecutor.PseudoFuture(update(self))]
203 |
204 | elif self._notransformations():
205 | result = self()
206 | for operation in self._operations:
207 | result = operation.apply(result)
208 | if isinstance(result, oamap.proxy.ListProxy):
209 | out = Dataset(name, result._generator.schema, self._backends, self._executor, [0, len(result)], extension=self._extension, packing=None, doc=self._doc, metadata=self._metadata)
210 | else:
211 | out = Data(name, result._generator.schema, self._backends, self._executor, extension=self._extension, packing=None, doc=self._doc, metadata=self._metadata)
212 | return [SingleThreadExecutor.PseudoFuture(update(out))]
213 |
214 | else:
215 | def task(name, dataset, namespace, update):
216 | result = dataset()
217 | for operation in dataset._operations:
218 | result = operation.apply(result)
219 |
220 | backend = dataset._backends[namespace]
221 | schema, roles2arrays = oamap.operations._DualSource.collect(result._generator.namedschema(), result._arrays, namespace, backend.prefix(name), backend.delimiter())
222 |
223 | active = backend.instantiate(0)
224 | if hasattr(active, "putall"):
225 | active.putall(roles2arrays)
226 | else:
227 | for n, x in roles2arrays.items():
228 | active[str(n)] = x
229 |
230 | if isinstance(result, oamap.proxy.ListProxy):
231 | out = Dataset(name, schema, dataset._backends, dataset._executor, [0, len(result)], extension=dataset._extension, packing=None, doc=dataset._doc, metadata=dataset._metadata)
232 | else:
233 | out = Data(name, schema, dataset._backends, dataset._executor, extension=dataset._extension, packing=None, doc=dataset._doc, metadata=dataset._metadata)
234 | return update(out)
235 |
236 | return [self._executor.submit(task, name, self, namespace, update)]
237 |
238 | def act(self, combiner):
239 | def task(dataset):
240 | result = dataset()
241 | for operation in dataset._operations:
242 | result = operation.apply(result)
243 | return result
244 |
245 | return combiner([self._executor.submit(task, self)])
246 |
247 | class Data(_Data):
248 | def __call__(self):
249 | if self._cachedobject is None:
250 | if self._extension is None:
251 | extension = oamap.util.import_module("oamap.extension.common")
252 | elif isinstance(self._extension, basestring):
253 | extension = oamap.util.import_module(self._extension)
254 | else:
255 | extension = [oamap.util.import_module(x) for x in self._extension]
256 |
257 | self._cachedobject = self._schema(self.arrays(), extension=extension, packing=self._packing)
258 |
259 | return self._cachedobject
260 |
261 | class DataArrays(object):
262 | def __init__(self, backends):
263 | self._backends = backends
264 | self._active = {}
265 | self._partitionid = 0
266 |
267 | def _toplevel(self, out, filtered):
268 | return filtered
269 |
270 | def getall(self, roles):
271 | out = {}
272 | for namespace, backend in self._backends.items():
273 | filtered = self._toplevel(out, [x for x in roles if x.namespace == namespace])
274 |
275 | if len(filtered) > 0:
276 | active = self._active.get(namespace, None)
277 | if active is None:
278 | active = self._active[namespace] = backend.instantiate(self._partitionid)
279 |
280 | if hasattr(active, "getall"):
281 | out.update(active.getall(filtered))
282 | else:
283 | for x in roles:
284 | out[x] = active[str(x)]
285 |
286 | return out
287 |
288 | def close(self):
289 | for namespace, active in self._active.items():
290 | if hasattr(active, "close"):
291 | active.close()
292 | self._active[namespace] = None
293 |
294 | class Dataset(_Data):
295 | def __init__(self, name, schema, backends, executor, offsets, extension=None, packing=None, doc=None, metadata=None):
296 | if not isinstance(schema, oamap.schema.List):
297 | raise TypeError("Dataset must have a list schema, not\n\n {0}".format(schema.__repr__(indent=" ")))
298 |
299 | super(Dataset, self).__init__(name, schema, backends, executor, extension=extension, packing=packing, doc=doc, metadata=metadata)
300 |
301 | if not isinstance(offsets, numpy.ndarray):
302 | try:
303 | if not all(isinstance(x, (numbers.Integral, numpy.integer)) and x >= 0 for x in offsets):
304 | raise TypeError
305 | except TypeError:
306 | raise TypeError("offsets must be an iterable of non-negative integers")
307 | offsets = numpy.array(offsets, dtype=numpy.int64)
308 | if len(offsets.shape) != 1:
309 | raise ValueError("offsets must be one-dimensional")
310 | if len(offsets) < 2 or offsets[0] != 0:
311 | raise ValueError("offsets must have at least two items, and the first one must be zero")
312 | if not numpy.all(offsets[:-1] <= offsets[1:]):
313 | raise ValueError("offsets must be monotonically increasing")
314 | self._offsets = offsets
315 | self._cachedpartition = None
316 |
317 | def __repr__(self):
318 | return "{3}".format(repr(self._name), self.numpartitions, self.numentries, "".join(str(x) for x in self._operations))
319 |
320 | def __str__(self):
321 | return "{3}".format(repr(self._name), self.numpartitions, self.numentries, "".join("\n " + str(x) for x in self._operations))
322 |
323 | @property
324 | def offsets(self):
325 | return self._offsets.tolist()
326 |
327 | @property
328 | def starts(self):
329 | return self._offsets[:-1].tolist()
330 |
331 | @property
332 | def stops(self):
333 | return self._offsets[1:].tolist()
334 |
335 | @property
336 | def partitions(self):
337 | return zip(self.start, self.stop)
338 |
339 | @property
340 | def numpartitions(self):
341 | return len(self._offsets) - 1
342 |
343 | @property
344 | def numentries(self):
345 | return int(self._offsets[-1])
346 |
347 | def partition(self, partitionid):
348 | if self._cachedpartition != partitionid:
349 | self._cachedpartition = partitionid
350 |
351 | if self._extension is None:
352 | extension = oamap.util.import_module("oamap.extension.common")
353 | elif isinstance(self._extension, basestring):
354 | extension = oamap.util.import_module(self._extension)
355 | else:
356 | extension = [oamap.util.import_module(x) for x in self._extension]
357 |
358 | self._cachedobject = self._schema(self.arrays(partitionid), extension=extension, packing=self._packing)
359 |
360 | return self._cachedobject
361 |
362 | def __iter__(self):
363 | for partitionid in range(self.numpartitions):
364 | for i in range(self._offsets[partitionid], self._offsets[partitionid + 1]):
365 | yield self[i]
366 |
367 | def __getitem__(self, index):
368 | if isinstance(index, slice):
369 | start, stop, step = oamap.util.slice2sss(index, self.numentries)
370 | partitionid = max(0, min(numpy.searchsorted(self._offsets, start, side="right") - 1, self.numpartitions - 1))
371 | localstart = start - self._offsets[partitionid]
372 | localstop = stop - self._offsets[partitionid]
373 | if localstop < -1 or localstop > (self._offsets[partitionid + 1] - self._offsets[partitionid]):
374 | raise IndexError("slice spans multiple partitions")
375 |
376 | out = self.partition(partitionid)
377 | out._whence = localstart
378 | out._stride = step
379 |
380 | # out._length = int(math.ceil(float(abs(localstop - localstart)) / abs(step)))
381 | d, m = divmod(abs(localstart - localstop), abs(step))
382 | out._length = d + (1 if m != 0 else 0)
383 | return out
384 |
385 | else:
386 | normindex = index if index >= 0 else index + self.numentries
387 | if not 0 <= normindex < self.numentries:
388 | raise IndexError("index {0} out of range for {1} entries".format(index, self.numentries))
389 | partitionid = numpy.searchsorted(self._offsets, normindex, side="right") - 1
390 | localindex = normindex - self._offsets[partitionid]
391 | return self.partition(partitionid)[localindex]
392 |
393 | def arrays(self, partitionid):
394 | normid = partitionid if partitionid >= 0 else partitionid + self.numpartitions
395 | if not 0 <= normid < self.numpartitions:
396 | raise IndexError("partitionid {0} out of range for {1} partitions".format(partitionid, self.numpartitions))
397 |
398 | startsrole = oamap.generator.StartsRole(self._schema._get_starts("object", "-"), self._schema.namespace, None)
399 | stopsrole = oamap.generator.StopsRole(self._schema._get_stops("object", "-"), self._schema.namespace, None)
400 | startsrole.stops = stopsrole
401 | stopsrole.starts = startsrole
402 | return DatasetArrays(normid, startsrole, stopsrole, self._offsets[normid + 1] - self._offsets[normid], self._backends)
403 |
404 | def transform(self, name, namespace, update):
405 | if self._nooperations():
406 | return [SingleThreadExecutor.PseudoFuture(update(self))]
407 |
408 | elif self._notransformations():
409 | result = self.partition(0)
410 | for operation in self._operations:
411 | result = operation.apply(result)
412 | if isinstance(result, oamap.proxy.ListProxy):
413 | out = Dataset(name, result._generator.schema, self._backends, self._executor, self._offsets, extension=self._extension, packing=None, doc=self._doc, metadata=self._metadata)
414 | else:
415 | out = Data(name, result._generator.schema, self._backends, self._executor, extension=self._extension, packing=None, doc=self._doc, metadata=self._metadata)
416 | return [SingleThreadExecutor.PseudoFuture(update(out))]
417 |
418 | else:
419 | def task(name, dataset, namespace, partitionid):
420 | result = dataset.partition(partitionid)
421 | for operation in dataset._operations:
422 | result = operation.apply(result)
423 |
424 | backend = dataset._backends[namespace]
425 | schema, roles2arrays = oamap.operations._DualSource.collect(result._generator.namedschema(), result._arrays, namespace, backend.prefix(name), backend.delimiter())
426 |
427 | active = backend.instantiate(partitionid)
428 | if hasattr(active, "putall"):
429 | active.putall(roles2arrays)
430 | else:
431 | for n, x in roles2arrays.items():
432 | active[str(n)] = x
433 | if isinstance(result, oamap.proxy.ListProxy):
434 | return schema, len(result)
435 | else:
436 | return schema, 1
437 |
438 | tasks = [self._executor.submit(task, name, self, namespace, i) for i in range(self.numpartitions)]
439 |
440 | def collect(name, dataset, results, update):
441 | if isinstance(results[0], tuple) and len(results[0]) == 2 and isinstance(results[0][0], oamap.schema.Schema):
442 | offsets = numpy.cumsum([0] + [numentries for schema, numentries in results], dtype=numpy.int64)
443 | schema = results[0][0]
444 | else:
445 | offsets = numpy.cumsum([0] + [x.result()[1] for x in results], dtype=numpy.int64)
446 | schema = results[0].result()[0]
447 |
448 | if isinstance(schema, oamap.schema.List):
449 | out = Dataset(name, schema, dataset._backends, dataset._executor, offsets, extension=dataset._extension, packing=None, doc=dataset._doc, metadata=dataset._metadata)
450 | else:
451 | out = Data(name, schema, dataset._backends, dataset._executor, extension=dataset._extension, packing=None, doc=dataset._doc, metadata=dataset._metadata)
452 | return update(out)
453 |
454 | tasks.append(self._executor.submit(collect, name, self, tuple(tasks), update))
455 | return tasks
456 |
457 | def act(self, combiner):
458 | def task(dataset, partitionid):
459 | result = dataset.partition(partitionid)
460 | for operation in dataset._operations:
461 | result = operation.apply(result)
462 | return result
463 |
464 | return combiner([self._executor.submit(task, self, i) for i in range(self.numpartitions)])
465 |
466 | class DatasetArrays(DataArrays):
467 | def __init__(self, partitionid, startsrole, stopsrole, numentries, backends):
468 | super(DatasetArrays, self).__init__(backends)
469 | self._partitionid = partitionid
470 | self._startsrole = startsrole
471 | self._stopsrole = stopsrole
472 | self._numentries = numentries
473 |
474 | def _toplevel(self, out, filtered):
475 | try:
476 | index = filtered.index(self._startsrole)
477 | except ValueError:
478 | pass
479 | else:
480 | del filtered[index]
481 | out[self._startsrole] = numpy.array([0], dtype=oamap.generator.ListGenerator.posdtype)
482 |
483 | try:
484 | index = filtered.index(self._stopsrole)
485 | except ValueError:
486 | pass
487 | else:
488 | del filtered[index]
489 | out[self._stopsrole] = numpy.array([self._numentries], dtype=oamap.generator.ListGenerator.posdtype)
490 |
491 | return filtered
492 |
493 |
--------------------------------------------------------------------------------