├── tests
    ├── samples
    │   ├── list-depths.avro
    │   ├── list-lengths.avro
    │   ├── mc10events.root
    │   ├── list-depths.parquet
    │   ├── list-lengths.parquet
    │   ├── nano-2017-08-31.root
    │   ├── nullable-depths.avro
    │   ├── nullable-levels.avro
    │   ├── record-primitives.avro
    │   ├── list-depths-records.avro
    │   ├── list-depths-simple.avro
    │   ├── list-depths-strings.avro
    │   ├── nonnullable-depths.avro
    │   ├── nullable-depths.parquet
    │   ├── nullable-levels.parquet
    │   ├── list-depths-records.parquet
    │   ├── list-depths-simple.parquet
    │   ├── list-depths-strings.parquet
    │   ├── nonnullable-depths.parquet
    │   ├── nullable-list-depths.avro
    │   ├── record-primitives.parquet
    │   ├── list-depths-records-list.avro
    │   ├── nullable-list-depths.parquet
    │   ├── list-depths-records-list.parquet
    │   ├── nullable-record-primitives.avro
    │   ├── nullable-list-depths-records.avro
    │   ├── nullable-list-depths-strings.avro
    │   ├── nullable-record-primitives.parquet
    │   ├── nullable-list-depths-records.parquet
    │   ├── nullable-list-depths-strings.parquet
    │   ├── nullable-list-depths-records-list.avro
    │   ├── nullable-record-primitives-simple.avro
    │   ├── nullable-list-depths-records-list.parquet
    │   └── nullable-record-primitives-simple.parquet
    ├── test_issues.py
    ├── __init__.py
    ├── test_backend_numpyfile.py
    ├── test_backend_root.py
    ├── test_fill.py
    ├── test_database.py
    └── test_proxy.py
├── .travis-conda.py
├── .travis.yml
├── LICENSE
├── oamap
    ├── backend
    │   ├── __init__.py
    │   ├── numpyfile.py
    │   ├── arrow.py
    │   ├── root
    │   │   ├── cmsnano.py
    │   │   └── __init__.py
    │   └── packing.py
    ├── extension
    │   ├── __init__.py
    │   └── common.py
    ├── version.py
    ├── __init__.py
    ├── util.py
    ├── proxy.py
    ├── inference.py
    ├── fill.py
    ├── fillable.py
    └── dataset.py
├── .gitignore
├── setup.py
└── README.rst


/tests/samples/list-depths.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths.avro


--------------------------------------------------------------------------------
/tests/samples/list-lengths.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-lengths.avro


--------------------------------------------------------------------------------
/tests/samples/mc10events.root:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/mc10events.root


--------------------------------------------------------------------------------
/tests/samples/list-depths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths.parquet


--------------------------------------------------------------------------------
/tests/samples/list-lengths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-lengths.parquet


--------------------------------------------------------------------------------
/tests/samples/nano-2017-08-31.root:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nano-2017-08-31.root


--------------------------------------------------------------------------------
/tests/samples/nullable-depths.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-depths.avro


--------------------------------------------------------------------------------
/tests/samples/nullable-levels.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-levels.avro


--------------------------------------------------------------------------------
/tests/samples/record-primitives.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/record-primitives.avro


--------------------------------------------------------------------------------
/tests/samples/list-depths-records.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-records.avro


--------------------------------------------------------------------------------
/tests/samples/list-depths-simple.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-simple.avro


--------------------------------------------------------------------------------
/tests/samples/list-depths-strings.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-strings.avro


--------------------------------------------------------------------------------
/tests/samples/nonnullable-depths.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nonnullable-depths.avro


--------------------------------------------------------------------------------
/tests/samples/nullable-depths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-depths.parquet


--------------------------------------------------------------------------------
/tests/samples/nullable-levels.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-levels.parquet


--------------------------------------------------------------------------------
/tests/samples/list-depths-records.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-records.parquet


--------------------------------------------------------------------------------
/tests/samples/list-depths-simple.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-simple.parquet


--------------------------------------------------------------------------------
/tests/samples/list-depths-strings.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-strings.parquet


--------------------------------------------------------------------------------
/tests/samples/nonnullable-depths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nonnullable-depths.parquet


--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths.avro


--------------------------------------------------------------------------------
/tests/samples/record-primitives.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/record-primitives.parquet


--------------------------------------------------------------------------------
/tests/samples/list-depths-records-list.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-records-list.avro


--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths.parquet


--------------------------------------------------------------------------------
/tests/samples/list-depths-records-list.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/list-depths-records-list.parquet


--------------------------------------------------------------------------------
/tests/samples/nullable-record-primitives.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-record-primitives.avro


--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-records.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-records.avro


--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-strings.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-strings.avro


--------------------------------------------------------------------------------
/tests/samples/nullable-record-primitives.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-record-primitives.parquet


--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-records.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-records.parquet


--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-strings.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-strings.parquet


--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-records-list.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-records-list.avro


--------------------------------------------------------------------------------
/tests/samples/nullable-record-primitives-simple.avro:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-record-primitives-simple.avro


--------------------------------------------------------------------------------
/tests/samples/nullable-list-depths-records-list.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-list-depths-records-list.parquet


--------------------------------------------------------------------------------
/tests/samples/nullable-record-primitives-simple.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/diana-hep/oamap/HEAD/tests/samples/nullable-record-primitives-simple.parquet


--------------------------------------------------------------------------------
/tests/test_issues.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | 
 4 | class TestIssues(unittest.TestCase):
 5 | 
 6 |     def runTest(self):
 7 |         pass
 8 | 
 9 |     def test_issue7(self):
10 |         with open('README.rst') as f:
11 |             try:
12 |                 content = f.read()
13 |             except UnicodeDecodeError as e:
14 |                 self.fail("Cannot read README.rst: " + str(e))
15 | 


--------------------------------------------------------------------------------
/.travis-conda.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | 
 5 | if os.environ["TRAVIS_PYTHON_VERSION"] == "2.6":
 6 |     miniconda = False
 7 | 
 8 | elif os.environ["TRAVIS_PYTHON_VERSION"] == "2.7":
 9 |     miniconda = True
10 |     os.system("wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh")
11 | 
12 | else:
13 |     miniconda = True
14 |     os.system("wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh")
15 | 
16 | if miniconda:
17 |     os.system("bash miniconda.sh -b -p {0}/miniconda".format(os.environ["HOME"]))
18 |     os.system("{0}/miniconda/bin/conda config --set always_yes yes --set changeps1 no".format(os.environ["HOME"]))
19 |     os.system("{0}/miniconda/bin/conda update -q conda".format(os.environ["HOME"]))
20 |     os.system("{0}/miniconda/bin/conda info -a".format(os.environ["HOME"]))
21 |     os.system("{0}/miniconda/bin/conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numba".format(os.environ["HOME"]))
22 |     os.system("source {0}/miniconda/bin/activate test-environment; python setup.py install".format(os.environ["HOME"]))
23 | 
24 | else:
25 |     os.system("mkdir -p miniconda/bin")
26 |     open("miniconda/bin/activate", "w").write("")
27 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | os:
 4 |   - linux
 5 | 
 6 | python:
 7 |   - 2.6
 8 |   - 2.7
 9 |   - 3.4
10 |   - 3.5
11 |   - 3.6
12 | 
13 | addons:
14 |   apt:
15 |     packages:
16 |       - python-setuptools
17 |       - libsnappy-dev
18 | 
19 | install:
20 |   - sudo apt-get update
21 |   - pip install --upgrade pip
22 | 
23 | install:
24 |   - pip install --upgrade setuptools_scm
25 | 
26 | script:
27 |   python .travis-conda.py ; source $HOME/miniconda/bin/activate test-environment; python setup.py test
28 | 
29 | deploy:
30 |   provider: pypi
31 |   user: pivarski
32 |   password:
33 |     secure: "irt16TqzfFa1A47AgrSEnZz89Tam7g36wUMFRB2cseipVDzk1pmN8xcxj2xebpRXWHhyKmpPUetQ1gwgYn5brK5xl0iQ/eNT4U3tWLWowtBxINYhhErSSAnMVGX+FJliex5fv/yEuU158BviLPLjhYMDXjtFH6TQmFExSoHTaZL8aX0Xswt8Ku0etJHgf4O8D2b1L5yQ1fOHy2vBhfGXhT8jI/rvwGu9DF2iJYIdnrf1jdy3aCvpiBhTUbxLO0sJVSGVpbC3L7uKwPMt+t3gb8iQL7llZL9DgCj4YEIAhLnIRhuTTXkKQ2cfYMX+b6hFiSV816Z1VR+sckfY915mPF+M/k9+m7xqcDRtYYeRsS68sKFgICdDUONR3nMvCJxYPmfSWOo0qvXPh0tjMfJ1lQOideY9ToR2fYzwzL4MGyzn/FrlXUoMNRfYJ8an1X9Xds2Bm9AVF6W1JviKOboHDDg0TqJXScy2LmMaaSdub2lN/a3iioYdK/0RtKWZ6N/qg8b0E91sVFD4zgZ/1qDm7JQhvoqmvhMQQ091Yl0xOmxmmERhMxEyYlcp+8RcAwAxl5KqwkZv2Ni2ReVBJxqfe5wvC7FP412pG1Zdd2FL2UqbyEIq2GJPE+LQGS5KhjfajWacb9wW+6tp4aCUITjU2Eboqq/y0L/R4QKS6HAWitc="
34 |   on:
35 |     tags: true
36 |     branch: master
37 |     condition: "$TRAVIS_PYTHON_VERSION = 2.7"
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, DIANA-HEP
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2017, DIANA-HEP
 4 | # All rights reserved.
 5 | # 
 6 | # Redistribution and use in source and binary forms, with or without
 7 | # modification, are permitted provided that the following conditions are met:
 8 | # 
 9 | # * Redistributions of source code must retain the above copyright notice, this
10 | #   list of conditions and the following disclaimer.
11 | # 
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | # 
16 | # * Neither the name of the copyright holder nor the names of its
17 | #   contributors may be used to endorse or promote products derived from
18 | #   this software without specific prior written permission.
19 | # 
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/oamap/backend/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2017, DIANA-HEP
 4 | # All rights reserved.
 5 | # 
 6 | # Redistribution and use in source and binary forms, with or without
 7 | # modification, are permitted provided that the following conditions are met:
 8 | # 
 9 | # * Redistributions of source code must retain the above copyright notice, this
10 | #   list of conditions and the following disclaimer.
11 | # 
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | # 
16 | # * Neither the name of the copyright holder nor the names of its
17 | #   contributors may be used to endorse or promote products derived from
18 | #   this software without specific prior written permission.
19 | # 
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/oamap/extension/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2017, DIANA-HEP
 4 | # All rights reserved.
 5 | # 
 6 | # Redistribution and use in source and binary forms, with or without
 7 | # modification, are permitted provided that the following conditions are met:
 8 | # 
 9 | # * Redistributions of source code must retain the above copyright notice, this
10 | #   list of conditions and the following disclaimer.
11 | # 
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | # 
16 | # * Neither the name of the copyright holder nor the names of its
17 | #   contributors may be used to endorse or promote products derived from
18 | #   this software without specific prior written permission.
19 | # 
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/oamap/version.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2017, DIANA-HEP
 4 | # All rights reserved.
 5 | # 
 6 | # Redistribution and use in source and binary forms, with or without
 7 | # modification, are permitted provided that the following conditions are met:
 8 | # 
 9 | # * Redistributions of source code must retain the above copyright notice, this
10 | #   list of conditions and the following disclaimer.
11 | # 
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | # 
16 | # * Neither the name of the copyright holder nor the names of its
17 | #   contributors may be used to endorse or promote products derived from
18 | #   this software without specific prior written permission.
19 | # 
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 
31 | import re
32 | 
33 | __version__ = "0.12.4"
34 | version = __version__
35 | version_info = tuple(re.split(r"[-\.]", __version__))
36 | 
37 | del re
38 | 


--------------------------------------------------------------------------------
/oamap/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2017, DIANA-HEP
 4 | # All rights reserved.
 5 | # 
 6 | # Redistribution and use in source and binary forms, with or without
 7 | # modification, are permitted provided that the following conditions are met:
 8 | # 
 9 | # * Redistributions of source code must retain the above copyright notice, this
10 | #   list of conditions and the following disclaimer.
11 | # 
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | # 
16 | # * Neither the name of the copyright holder nor the names of its
17 | #   contributors may be used to endorse or promote products derived from
18 | #   this software without specific prior written permission.
19 | # 
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 
31 | from oamap.schema import *
32 | import oamap.compiler
33 | 
34 | # convenient access to the version number
35 | from oamap.version import __version__
36 | 
37 | 


--------------------------------------------------------------------------------
/tests/test_backend_numpyfile.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2017, DIANA-HEP
 4 | # All rights reserved.
 5 | # 
 6 | # Redistribution and use in source and binary forms, with or without
 7 | # modification, are permitted provided that the following conditions are met:
 8 | # 
 9 | # * Redistributions of source code must retain the above copyright notice, this
10 | #   list of conditions and the following disclaimer.
11 | # 
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | # 
16 | # * Neither the name of the copyright holder nor the names of its
17 | #   contributors may be used to endorse or promote products derived from
18 | #   this software without specific prior written permission.
19 | # 
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 
31 | import math
32 | import tempfile
33 | import shutil
34 | 
35 | import unittest
36 | 
37 | from oamap.schema import *
38 | from oamap.backend.numpyfile import *
39 | 
40 | class TestBackendNumpyfile(unittest.TestCase):
41 |     def runTest(self):
42 |         pass
43 | 
44 |     def test_database(self):
45 |         tmpdir = tempfile.mkdtemp()
46 |         try:
47 |             db = NumpyFileDatabase(tmpdir)
48 |             db.fromdata("one", List(Record({"x": "int32", "y": "float64"})), [{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 3, "y": 3.3}], [{"x": 4, "y": 4.4}, {"x": 5, "y": 5.5}, {"x": 6, "y": 6.6}])
49 | 
50 |             db.data.two = db.data.one.define("z", lambda obj: obj.x + obj.y)
51 | 
52 |             self.assertEqual([(obj.x, obj.y, obj.z) for obj in db.data.two], [(1, 1.1, 2.1), (2, 2.2, 4.2), (3, 3.3, 6.3), (4, 4.4, 8.4), (5, 5.5, 10.5), (6, 6.6, 12.6)])
53 | 
54 |             del db.data.one
55 |             del db.data.two
56 | 
57 |         finally:
58 |             shutil.rmtree(tmpdir)
59 | 


--------------------------------------------------------------------------------
/tests/test_backend_root.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2017, DIANA-HEP
 4 | # All rights reserved.
 5 | # 
 6 | # Redistribution and use in source and binary forms, with or without
 7 | # modification, are permitted provided that the following conditions are met:
 8 | # 
 9 | # * Redistributions of source code must retain the above copyright notice, this
10 | #   list of conditions and the following disclaimer.
11 | # 
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | # 
16 | # * Neither the name of the copyright holder nor the names of its
17 | #   contributors may be used to endorse or promote products derived from
18 | #   this software without specific prior written permission.
19 | # 
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 
31 | import math
32 | import tempfile
33 | import shutil
34 | 
35 | import unittest
36 | 
37 | import oamap.backend.root
38 | import oamap.database
39 | 
40 | class TestBackendRoot(unittest.TestCase):
41 |     def runTest(self):
42 |         pass
43 | 
44 |     def test_database(self):
45 |         dataset = oamap.backend.root.dataset("tests/samples/mc10events.root", "Events")
46 | 
47 |         self.assertEqual(repr(dataset[0].Electron[0].pt), "28.555809")
48 | 
49 |         db = oamap.database.InMemoryDatabase()
50 | 
51 |         db.data.one = dataset
52 | 
53 |         self.assertEqual(repr(db.data.one[0].Electron[0].pt), "28.555809")
54 | 
55 |     def test_transform(self):
56 |         dataset = oamap.backend.root.dataset("tests/samples/mc10events.root", "Events")
57 | 
58 |         self.assertEqual(repr(dataset[0].Electron[0].pt * math.sinh(dataset[0].Electron[0].eta)), "-17.956890574044056")
59 | 
60 |         db = oamap.database.InMemoryDatabase.writable(oamap.database.DictBackend())
61 |         db.data.one = dataset.define("pz", lambda x: x.pt * math.sinh(x.eta), at="Electron", numba=False)
62 | 
63 |         self.assertEqual(repr(db.data.one[0].Electron[0].pz), "-17.956890574044056")
64 | 


--------------------------------------------------------------------------------
/oamap/backend/numpyfile.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Copyright (c) 2017, DIANA-HEP
 4 | # All rights reserved.
 5 | # 
 6 | # Redistribution and use in source and binary forms, with or without
 7 | # modification, are permitted provided that the following conditions are met:
 8 | # 
 9 | # * Redistributions of source code must retain the above copyright notice, this
10 | #   list of conditions and the following disclaimer.
11 | # 
12 | # * Redistributions in binary form must reproduce the above copyright notice,
13 | #   this list of conditions and the following disclaimer in the documentation
14 | #   and/or other materials provided with the distribution.
15 | # 
16 | # * Neither the name of the copyright holder nor the names of its
17 | #   contributors may be used to endorse or promote products derived from
18 | #   this software without specific prior written permission.
19 | # 
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 
31 | import numpy
32 | 
33 | import oamap.database
34 | 
35 | class NumpyFileBackend(oamap.database.FilesystemBackend):
36 |     def __init__(self, directory):
37 |         super(NumpyFileBackend, self).__init__(directory, arraysuffix=".npy")
38 | 
39 |     @property
40 |     def args(self):
41 |         return (self._directory,)
42 | 
43 |     def tojson(self):
44 |         return {"class": self.__class__.__module__ + "." + self.__class__.__name__,
45 |                 "directory": self._directory}
46 | 
47 |     @staticmethod
48 |     def fromjson(obj, namespace):
49 |         return NumpyFileBackend(obj["directory"])
50 | 
51 |     def instantiate(self, partitionid):
52 |         return NumpyArrays(lambda name: self.fullname(partitionid, name, create=False),
53 |                            lambda name: self.fullname(partitionid, name, create=True))
54 | 
55 | class NumpyArrays(object):
56 |     def __init__(self, loadname, storename):
57 |         self._loadname = loadname
58 |         self._storename = storename
59 | 
60 |     def __getitem__(self, name):
61 |         return numpy.load(self._loadname(name))
62 | 
63 |     def __setitem__(self, name, value):
64 |         numpy.save(self._storename(name), value)
65 | 
66 | class NumpyFileDatabase(oamap.database.FilesystemDatabase):
67 |     def __init__(self, directory, namespace=""):
68 |         super(NumpyFileDatabase, self).__init__(directory, backends={namespace: NumpyFileBackend(directory)}, namespace=namespace)
69 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2017, DIANA-HEP
 5 | # All rights reserved.
 6 | # 
 7 | # Redistribution and use in source and binary forms, with or without
 8 | # modification, are permitted provided that the following conditions are met:
 9 | # 
10 | # * Redistributions of source code must retain the above copyright notice, this
11 | #   list of conditions and the following disclaimer.
12 | # 
13 | # * Redistributions in binary form must reproduce the above copyright notice,
14 | #   this list of conditions and the following disclaimer in the documentation
15 | #   and/or other materials provided with the distribution.
16 | # 
17 | # * Neither the name of the copyright holder nor the names of its
18 | #   contributors may be used to endorse or promote products derived from
19 | #   this software without specific prior written permission.
20 | # 
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 
32 | import os.path
33 | 
34 | from setuptools import find_packages
35 | from setuptools import setup
36 | 
37 | def get_version():
38 |     g = {}
39 |     exec(open(os.path.join("oamap", "version.py")).read(), g)
40 |     return g["__version__"]
41 | 
42 | setup(name = "oamap",
43 |       version = get_version(),
44 |       packages = find_packages(exclude = ["tests"]),
45 |       scripts = [],
46 |       data_files = ["README.rst"],
47 |       description = "Perform high-speed calculations on columnar data without creating intermediate objects.",
48 |       long_description = open("README.rst").read().strip(),
49 |       author = "Jim Pivarski (DIANA-HEP)",
50 |       author_email = "pivarski@fnal.gov",
51 |       maintainer = "Jim Pivarski (DIANA-HEP)",
52 |       maintainer_email = "pivarski@fnal.gov",
53 |       url = "https://github.com/diana-hep/oamap",
54 |       download_url = "https://github.com/diana-hep/oamap/releases",
55 |       license = "BSD 3-clause",
56 |       test_suite = "tests",
57 |       install_requires = ["numpy"],
58 |       tests_require = ["uproot", "thriftpy", "python-snappy"],
59 |       classifiers = [
60 |           "Development Status :: 4 - Beta",
61 |           "Intended Audience :: Developers",
62 |           "Intended Audience :: Information Technology",
63 |           "Intended Audience :: Science/Research",
64 |           "License :: OSI Approved :: BSD License",
65 |           "Operating System :: MacOS",
66 |           "Operating System :: POSIX",
67 |           "Operating System :: Unix",
68 |           "Programming Language :: Python",
69 |           "Programming Language :: Python :: 2.7",
70 |           "Programming Language :: Python :: 3.4",
71 |           "Programming Language :: Python :: 3.5",
72 |           "Programming Language :: Python :: 3.6",
73 |           "Programming Language :: Python :: 3.7",
74 |           "Topic :: Scientific/Engineering",
75 |           "Topic :: Scientific/Engineering :: Information Analysis",
76 |           "Topic :: Scientific/Engineering :: Mathematics",
77 |           "Topic :: Scientific/Engineering :: Physics",
78 |           "Topic :: Software Development",
79 |           "Topic :: Utilities",
80 |           ],
81 |       platforms = "Any",
82 |       )
83 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | OAMap: Object-Array Mapping
 2 | ===========================
 3 | 
 4 | .. image:: https://travis-ci.org/diana-hep/oamap.svg?branch=master
 5 |    :target: https://travis-ci.org/diana-hep/oamap
 6 | 
 7 | Introduction
 8 | ------------
 9 | 
10 | Data analysts are often faced with a choice between speed and flexibility. Tabular data, such as SQL tables, can be processed rapidly enough for a truly interactive analysis session, but hierarchically nested formats, such as JSON, are better at representing relationships in complex data models. In some domains (such as particle physics), we want to perform calculations on JSON-like structures at the speed of SQL.
11 | 
12 | The key to high throughput on large datasets, particularly ones with more attributes than are accessed in a single pass, is laying out the data in "columns." All values of an attribute should be contiguous on disk or memory because data are paged from one cache to the next in locally contiguous blocks. The `ROOT <https://root.cern/>`_ and `Parquet <http://parquet.apache.org/>`_ file formats represent JSON-like data in columns on disk, but these data are usually deserialized into objects for processing in memory. Higher performance can be achieved by maintaining the columnar structure through all stages of the calculation (see `this talk <https://youtu.be/jvt4v2LTGK0>`_ and `this paper <https://arxiv.org/abs/1711.01229>`_).
13 | 
14 | The OAMap toolkit implements an Object Array Mapping in Python. Object Array Mappings, by analogy with Object Relational Mappings (ORMs) are one-to-one relationships between conceptual objects and physical arrays. You can write functions that appear to be operating on ordinary Python objects-- lists, tuples, class instances-- but are actually being performed on low-level, contiguous buffers (Numpy arrays). The result is fast processing of large, complex datasets with a low memory footprint.
15 | 
16 | OAMap has two primary modes: (1) pure-Python object proxies, which pretend to be Python objects but actually access array data on demand, and (2) bare-metal bytecode compiled by `Numba <http://numba.pydata.org/>`_. The pure-Python form is good for low-latency, exploratory work, while the compiled form is good for high throughput. They are seamlessly interchangeable: a Python proxy converts to the compiled form when it enters a Numba-compiled function and switches back when it leaves. You can, for instance, do a fast search in compiled code and examine the results more fully by hand.
17 | 
18 | Any columnar file format or database can be used as a data source: OAMap can get arrays of data from any dict-like object (any Python object implementing ``__getitem__``), even from within a Numba-compiled function. Backends to ROOT, Parquet, and HDF5 are included, as well as a Python ``shelve`` alternative. Storing and accessing a complete dataset, including metadata, requires no more infrastructure than a collection of named arrays. (Data types are encoded in the names, values in the arrays.) OAMap is intended as a middleware layer above file formats and databases but below a fully integrated analysis suite.
19 | 
20 | Installation
21 | ------------
22 | 
23 | Install OAMap like any other Python package:
24 | 
25 | .. code-block:: bash
26 | 
27 |     pip install oamap --user
28 | 
29 | or similar (use ``sudo``, ``virtualenv``, or ``conda`` if you wish).
30 | 
31 | **Strict dependencies:**
32 | 
33 | - `Python <http://docs.python-guide.org/en/latest/starting/installation/>`_ (2.6+, 3.4+)
34 | - `Numpy <https://scipy.org/install.html>`_
35 | 
36 | **Recommended dependencies:**
37 | 
38 | - `Numba and LLVM <http://numba.pydata.org/numba-doc/latest/user/installing.html>`_ to JIT-compile functions (requires a particular version of LLVM, follow instructions)
39 | - `thriftpy <https://pypi.python.org/pypi/thriftpy>`_ to read Parquet files (pure Python, pip is fine)
40 | - `uproot <https://pypi.python.org/pypi/uproot/>`_ to read ROOT files (pure Python, pip is fine)
41 | - `h5py <http://docs.h5py.org/en/latest/build.html>`_ to read HDF5 files (requires binary libraries; follow instructions)
42 | 
43 | **Optional dependencies:** (all are bindings to binaries that can be package-installed)
44 | 
45 | - `lz4 <https://anaconda.org/anaconda/lz4>`_ compression used by some ROOT and Parquet files
46 | - `python-snappy <https://anaconda.org/anaconda/python-snappy>`_ compression used by some Parquet files
47 | - `lzo <https://anaconda.org/anaconda/lzo>`_ compression used by some Parquet files
48 | - `brotli <https://anaconda.org/conda-forge/brotli>`_ compression used by some Parquet files
49 | 


--------------------------------------------------------------------------------
/oamap/extension/common.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import codecs
 32 | import sys
 33 | 
 34 | import numpy
 35 | 
 36 | import oamap.generator
 37 | 
 38 | class _GenerateBytes(object):
 39 |     py3 = sys.version_info[0] >= 3
 40 | 
 41 |     def _generatebytes(self, arrays, index, cache):
 42 |         listgen = self.generic
 43 |         primgen = self.generic.content
 44 | 
 45 |         if isinstance(listgen, oamap.generator.MaskedListGenerator):
 46 |             mask = cache[listgen.maskidx]
 47 |             if mask is None:
 48 |                 self._getarrays(arrays, cache, listgen._toget(arrays, cache))
 49 |                 mask = cache[listgen.maskidx]
 50 | 
 51 |             value = mask[index]
 52 |             if value == listgen.maskedvalue:
 53 |                 return None
 54 |             else:
 55 |                 index = value
 56 | 
 57 |         starts = cache[listgen.startsidx]
 58 |         stops  = cache[listgen.stopsidx]
 59 |         data   = cache[primgen.dataidx]
 60 |         if starts is None or stops is None or data is None:
 61 |             toget = listgen._toget(arrays, cache)
 62 |             toget.update(primgen._toget(arrays, cache))
 63 |             self._getarrays(arrays, cache, toget)
 64 |             starts = cache[listgen.startsidx]
 65 |             stops  = cache[listgen.stopsidx]
 66 |             data   = cache[primgen.dataidx]
 67 | 
 68 |         array = data[starts[index]:stops[index]]
 69 | 
 70 |         if isinstance(array, bytes):
 71 |             return array
 72 |         elif isinstance(array, numpy.ndarray):
 73 |             return array.tostring()
 74 |         elif self.py3:
 75 |             return bytes(array)
 76 |         else:
 77 |             return "".join(map(chr, array))
 78 | 
 79 |     def degenerate(self, obj):
 80 |         if obj is None:
 81 |             return obj
 82 | 
 83 |         elif self.py3:
 84 |             if isinstance(obj, bytes):
 85 |                 return obj
 86 |             else:
 87 |                 return codecs.utf_8_encode(obj)[0]
 88 | 
 89 |         else:
 90 |             if isinstance(obj, str):
 91 |                 return map(ord, obj)
 92 |             else:
 93 |                 return map(ord, codecs.utf_8_encode(obj)[0])
 94 | 
 95 | class ByteStringGenerator(_GenerateBytes, oamap.generator.ExtendedGenerator):
 96 |     pattern = {"name": "ByteString", "type": "list", "content": {"type": "primitive", "dtype": "uint8", "nullable": False}}
 97 | 
 98 |     def _generate(self, arrays, index, cache):
 99 |         return self._generatebytes(arrays, index, cache)
100 |             
101 | class UTF8StringGenerator(_GenerateBytes, oamap.generator.ExtendedGenerator):
102 |     pattern = {"name": "UTF8String", "type": "list", "content": {"type": "primitive", "dtype": "uint8", "nullable": False}}
103 | 
104 |     def _generate(self, arrays, index, cache):
105 |         out = self._generatebytes(arrays, index, cache)
106 |         if out is None:
107 |             return out
108 |         else:
109 |             return codecs.utf_8_decode(out)[0]
110 | 
111 | def ByteString(nullable=False, starts=None, stops=None, data=None, mask=None, packing=None, doc=None, metadata=None):
112 |     import oamap.schema
113 |     return oamap.schema.List(oamap.schema.Primitive(numpy.uint8, data=data), nullable=nullable, starts=starts, stops=stops, mask=mask, packing=packing, name="ByteString", doc=doc, metadata=metadata)
114 | 
115 | def UTF8String(nullable=False, starts=None, stops=None, data=None, mask=None, packing=None, doc=None, metadata=None):
116 |     import oamap.schema
117 |     return oamap.schema.List(oamap.schema.Primitive(numpy.uint8, data=data), nullable=nullable, starts=starts, stops=stops, mask=mask, packing=packing, name="UTF8String", doc=doc, metadata=metadata)
118 | 


--------------------------------------------------------------------------------
/tests/test_fill.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import unittest
 32 | 
 33 | import oamap.inference
 34 | import oamap.fill
 35 | import oamap.proxy
 36 | from oamap.schema import *
 37 | 
 38 | class TestFill(unittest.TestCase):
 39 |     def runTest(self):
 40 |         pass
 41 | 
 42 |     def check(self, value, schema=None, debug=False):
 43 |         if schema is None:
 44 |             schema = oamap.inference.fromdata(value)
 45 |         if debug:
 46 |             print("schema: {0}".format(schema))
 47 |         arrays = oamap.fill.fromdata(value, schema)
 48 |         if debug:
 49 |             print("arrays:")
 50 |             for n in sorted(arrays):
 51 |                 print("  {0}: {1}".format(repr(n), arrays[n]))
 52 |         columnar = schema(arrays)
 53 |         if debug:
 54 |             print("columnar: {0}".format(columnar))
 55 |         value2 = oamap.proxy.tojson(columnar)
 56 |         self.assertEqual(value, value2)
 57 | 
 58 |     def test_Primitive(self):
 59 |         self.check(3)
 60 |         self.check(3.14)
 61 |         self.check({"real": 3, "imag": 4})
 62 |         self.check("inf")
 63 |         self.check("-inf")
 64 |         self.check("nan")
 65 |         # self.check([[1, 2], [3, 4]], Primitive("i8", (2, 2)))
 66 | 
 67 |     def test_List(self):
 68 |         self.check([], schema=List(Primitive("i8")))
 69 |         self.check([], schema=List(List(List(List(Primitive("i8"))))))
 70 |         self.check([[[[]]]], schema=List(List(List(List(Primitive("i8"))))))
 71 |         self.check([1, 2, 3])
 72 |         self.check([[1, 2, 3], [], [4, 5]])
 73 |         self.check([[1, 2, None], [], [4, 5]])
 74 | 
 75 |     def test_Union(self):
 76 |         self.check([1, 2, 3, 4.4, 5.5, 6.6], schema=List(Union([Primitive("i8"), Primitive("f8")])))
 77 |         self.check([3.14, [], 1.1, 2.2, [1, 2, 3]])
 78 |         self.check([3.14, [], 1.1, None, [1, 2, 3]])
 79 | 
 80 |     def test_Record(self):
 81 |         self.check({"one": 1, "two": 2.2})
 82 |         self.check({"one": {"uno": 1, "dos": 2}, "two": 2.2})
 83 |         self.check({"one": {"uno": 1, "dos": [2]}, "two": 2.2})
 84 |         self.check([{"one": 1, "two": 2.2}, {"one": 1.1, "two": 2.2}])         # two of same Record
 85 |         self.check([{"one": 1, "two": 2.2}, {"one": [1, 2, 3], "two": 2.2}])   # Union of attribute
 86 |         self.check([{"one": 1, "two": 2.2}, {"two": 2.2}])                     # Union of Records
 87 |         self.check([{"one": 1, "two": 2.2}, None])                             # nullable Record
 88 | 
 89 |     def test_Tuple(self):
 90 |         self.check([1, [2, 3], [[4, 5], [6]]], schema=Tuple([Primitive("i8"), List(Primitive("i8")), List(List(Primitive("i8")))]))
 91 |         self.check([1, [2, 3], None], schema=Tuple([Primitive("i8"), List(Primitive("i8")), List(List(Primitive("i8")), nullable=True)]))
 92 | 
 93 |     def test_Pointer(self):
 94 |         class Node(object):
 95 |             def __init__(self, label, next):
 96 |                 self.label = label
 97 |                 self.next = next
 98 | 
 99 |         schema = Record({"label": Primitive("i8")}, name="Node")
100 |         schema["next"] = Pointer(schema)
101 |         value = Node(0, Node(1, Node(2, None)))
102 |         value.next.next.next = value
103 | 
104 |         arrays = oamap.fill.fromdata(value, schema)
105 |         columnar = schema(arrays)
106 | 
107 |         self.assertEqual(value.label, columnar.label)
108 |         self.assertEqual(value.next.label, columnar.next.label)
109 |         self.assertEqual(value.next.next.label, columnar.next.next.label)
110 |         self.assertEqual(value.next.next.next.label, columnar.next.next.next.label)
111 |         self.assertEqual(value.next.next.next.next.label, columnar.next.next.next.next.label)
112 |         self.assertEqual(value.next.next.next.next.next.label, columnar.next.next.next.next.next.label)
113 |         self.assertEqual(value.next.next.next.next.next.next.label, columnar.next.next.next.next.next.next.label)
114 | 


--------------------------------------------------------------------------------
/oamap/backend/arrow.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import numpy
 32 | 
 33 | import oamap.schema
 34 | import oamap.generator
 35 | from oamap.util import OrderedDict
 36 | 
 37 | def schema(table):
 38 |     import pyarrow
 39 |     def recurse(node, name, index, nullable):
 40 |         if isinstance(node, pyarrow.lib.ListType):
 41 |             return oamap.schema.List(recurse(node.value_type, name, index + 2, nullable),
 42 |                                      nullable=nullable,
 43 |                                      starts="{0}/{1}".format(name, index + 1),
 44 |                                      stops="{0}/{1}".format(name, index + 1),
 45 |                                      mask="{0}/{1}".format(name, index))
 46 |         elif isinstance(node, pyarrow.lib.DataType):
 47 |             return oamap.schema.Primitive(node.to_pandas_dtype(),
 48 |                                           nullable=nullable,
 49 |                                           data="{0}/{1}".format(name, index + 1),
 50 |                                           mask="{0}/{1}".format(name, index))
 51 |         else:
 52 |             raise NotImplementedError(type(node))
 53 | 
 54 |     fields = []
 55 |     for n in table.schema.names:
 56 |         field = table.schema.field_by_name(n)
 57 |         fields.append((n, recurse(field.type, n, 0, field.nullable)))
 58 |     
 59 |     return oamap.schema.List(
 60 |         oamap.schema.Record(OrderedDict(fields)),
 61 |         starts="",
 62 |         stops="")
 63 | 
 64 | def proxy(table):
 65 |     import pyarrow
 66 |     class _ArrayDict(object):
 67 |         def __init__(self, table):
 68 |             self.table = table
 69 | 
 70 |         def chop(self, name):
 71 |             slashindex = name.rindex("/")
 72 |             return name[:slashindex], int(name[slashindex + 1 :])
 73 | 
 74 |         def frombuffer(self, chunk, bufferindex):
 75 |             def truncate(array, length, offset=0):
 76 |                 return array[:length + offset]
 77 | 
 78 |             def mask(index, length):
 79 |                 buf = chunk.buffers()[index]
 80 |                 if buf is None:
 81 |                     return numpy.arange(length, dtype=oamap.generator.Masked.maskdtype)
 82 |                 else:
 83 |                     unmasked = truncate(numpy.unpackbits(numpy.frombuffer(buf, dtype=numpy.uint8)).view(numpy.bool_), length)
 84 |                     mask = numpy.empty(len(unmasked), dtype=oamap.generator.Masked.maskdtype)
 85 |                     mask[unmasked] = numpy.arange(unmasked.sum(), dtype=mask.dtype)
 86 |                     mask[~unmasked] = oamap.generator.Masked.maskedvalue
 87 |                     return mask
 88 | 
 89 |             def recurse(tpe, index, length):
 90 |                 if isinstance(tpe, pyarrow.lib.ListType):
 91 |                     if index == bufferindex:
 92 |                         # list mask
 93 |                         return mask(index, length)
 94 |                     elif index + 1 == bufferindex:
 95 |                         # list starts
 96 |                         return truncate(numpy.frombuffer(chunk.buffers()[index + 1], dtype=numpy.int32), length, 1)
 97 |                     else:
 98 |                         # descend into list
 99 |                         length = truncate(numpy.frombuffer(chunk.buffers()[index + 1], dtype=numpy.int32), length, 1)[-1]
100 |                         return recurse(tpe.value_type, index + 2, length)
101 | 
102 |                 elif isinstance(tpe, pyarrow.lib.DataType):
103 |                     if index == bufferindex:
104 |                         # data mask
105 |                         return mask(index, length)
106 |                     elif index + 1 == bufferindex:
107 |                         # data
108 |                         return truncate(numpy.frombuffer(chunk.buffers()[index + 1], dtype=tpe.to_pandas_dtype()), length)
109 |                     else:
110 |                         raise AssertionError
111 | 
112 |                 else:
113 |                     raise NotImplementedError
114 |                 
115 |             return recurse(chunk.type, 0, len(chunk))
116 | 
117 |         def getall(self, names):
118 |             out = {}
119 |             for name in names:
120 |                 if len(str(name)) == 0:
121 |                     if isinstance(name, oamap.generator.StartsRole):
122 |                         out[name] = numpy.array([0], dtype=oamap.generator.ListGenerator.posdtype)
123 |                     elif isinstance(name, oamap.generator.StopsRole):
124 |                         out[name] = numpy.array([self.table.num_rows], dtype=oamap.generator.ListGenerator.posdtype)
125 |                     else:
126 |                         raise AssertionError
127 | 
128 |                 elif isinstance(name, oamap.generator.StopsRole):
129 |                     out[name] = out[name.starts][1:]
130 | 
131 |                 else:
132 |                     columnname, bufferindex = self.chop(str(name))
133 |                     column = self.table[self.table.schema.names.index(columnname)]
134 |                     chunks = column.data.chunks
135 |                     if len(chunks) == 0:
136 |                         raise ValueError("Arrow column {0} has no chunks".format(repr(columnname)))
137 |                     elif len(chunks) == 1:
138 |                         out[name] = self.frombuffer(chunks[0], bufferindex)
139 |                     else:
140 |                         out[name] = numpy.concatenate([self.frombuffer(chunk, bufferindex) for chunk in chunks])
141 | 
142 |             return out
143 | 
144 |     return schema(table)(_ArrayDict(table))
145 | 


--------------------------------------------------------------------------------
/oamap/backend/root/cmsnano.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import numpy
 32 | 
 33 | import oamap.backend.root
 34 | import oamap.schema
 35 | import oamap.dataset
 36 | import oamap.proxy
 37 | from oamap.util import OrderedDict
 38 | 
 39 | def dataset(path, treepath="Events", namespace=None, **kwargs):
 40 |     import uproot
 41 | 
 42 |     if namespace is None:
 43 |         namespace = "root.cmsnano({0})".format(repr(path))
 44 | 
 45 |     if "localsource" not in kwargs:
 46 |         kwargs["localsource"] = lambda path: uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
 47 |     kwargs["total"] = False
 48 |     kwargs["blocking"] = True
 49 | 
 50 |     paths2entries = uproot.tree.numentries(path, treepath, **kwargs)
 51 |     if len(paths2entries) == 0:
 52 |         raise ValueError("path {0} matched no TTrees".format(repr(path)))
 53 | 
 54 |     offsets = [0]
 55 |     paths = []
 56 |     for path, numentries in paths2entries.items():
 57 |         offsets.append(offsets[-1] + numentries)
 58 |         paths.append(path)
 59 | 
 60 |     sch = schema(paths[0], namespace=namespace)
 61 |     doc = sch.doc
 62 |     sch.doc = None
 63 | 
 64 |     return oamap.dataset.Dataset(treepath,
 65 |                                  sch,
 66 |                                  {namespace: oamap.backend.root.ROOTBackend(paths, treepath, namespace)},
 67 |                                  oamap.dataset.SingleThreadExecutor(),
 68 |                                  offsets,
 69 |                                  extension=None,
 70 |                                  packing=None,
 71 |                                  doc=doc,
 72 |                                  metadata={"schemafrom": paths[0]})
 73 | 
 74 | def proxy(path, treepath="Events", namespace=None, extension=oamap.extension.common):
 75 |     import uproot
 76 | 
 77 |     if namespace is None:
 78 |         namespace = "root.cmsnano({0})".format(repr(path))
 79 | 
 80 |     def localsource(path):
 81 |         return uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
 82 | 
 83 |     return _proxy(uproot.open(path, localsource=localsource)[treepath], namespace=namespace, extension=extension)
 84 | 
 85 | def _proxy(tree, namespace=None, extension=oamap.extension.common):
 86 |     if namespace is None:
 87 |         namespace = "root.cmsnano({0})".format(repr(path))
 88 | 
 89 |     schema = _schema(tree, namespace=namespace)
 90 |     generator = schema.generator(extension=extension)
 91 | 
 92 |     return oamap.proxy.ListProxy(generator, oamap.backend.root.ROOTArrays(tree, oamap.backend.root.ROOTBackend([tree._context.sourcepath], tree._context.treename, namespace)), generator._newcache(), 0, 1, tree.numentries)
 93 | 
 94 | def schema(path, treepath="Events", namespace=None):
 95 |     import uproot
 96 | 
 97 |     if namespace is None:
 98 |         namespace = "root.cmsnano({0})".format(repr(path))
 99 | 
100 |     def localsource(path):
101 |         return uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
102 | 
103 |     return _schema(uproot.open(path, localsource=localsource)[treepath], namespace=namespace)
104 | 
105 | def _schema(tree, namespace=None):
106 |     if namespace is None:
107 |         namespace = "root.cmsnano({0})".format(repr(path))
108 | 
109 |     schema = oamap.backend.root._schema(tree, namespace=namespace)
110 | 
111 |     groups = OrderedDict()
112 |     for name in list(schema.content.keys()):
113 |         if isinstance(schema.content[name], oamap.schema.List) and "_" in name:
114 |             try:
115 |                 branch = tree[schema.content[name].starts]
116 |             except KeyError:
117 |                 pass
118 |             else:
119 |                 underscore = name.index("_")
120 |                 groupname, fieldname = name[:underscore], name[underscore + 1:]
121 |                 countbranchname = branch.countbranch.name
122 |                 if not isinstance(countbranchname, str):
123 |                     countbranchname = countbranchname.decode("ascii")
124 |                 if groupname not in groups:
125 |                     groups[groupname] = schema.content[groupname] = \
126 |                         oamap.schema.List(oamap.schema.Record({}, name=groupname), starts=countbranchname, stops=countbranchname, namespace=namespace)
127 |                 assert countbranchname == schema.content[groupname].starts
128 |                 groups[groupname].content[fieldname] = schema.content[name].content
129 |                 del schema.content[name]
130 | 
131 |         elif "MET_" in name or name.startswith("LHE_") or name.startswith("Pileup_") or name.startswith("PV_"):
132 |             underscore = name.index("_")
133 |             groupname, fieldname = name[:underscore], name[underscore + 1:]
134 |             if groupname not in groups:
135 |                 groups[groupname] = schema.content[groupname] = \
136 |                     oamap.schema.Record({}, name=groupname)
137 |             groups[groupname][fieldname] = schema.content[name]
138 |             del schema.content[name]
139 | 
140 |     hlt = oamap.schema.Record({}, name="HLT")
141 |     flag = oamap.schema.Record({}, name="Flag")
142 |     for name in schema.content.keys():
143 |         if name.startswith("HLT_"):
144 |             hlt[name[4:]] = schema.content[name]
145 |             del schema.content[name]
146 |         if name.startswith("Flag_"):
147 |             flag[name[5:]] = schema.content[name]
148 |             del schema.content[name]
149 | 
150 |     schema.content["HLT"] = hlt
151 |     schema.content["Flag"] = flag
152 |     schema.content.name = "Event"
153 |     return schema
154 | 


--------------------------------------------------------------------------------
/tests/test_database.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import math
 32 | 
 33 | import unittest
 34 | 
 35 | from oamap.schema import *
 36 | from oamap.database import *
 37 | from oamap.dataset import *
 38 | import oamap.operations
 39 | 
 40 | class TestDatabase(unittest.TestCase):
 41 |     def runTest(self):
 42 |         pass
 43 | 
 44 |     def test_data(self):
 45 |         db = InMemoryDatabase()
 46 |         db.fromdata("one", Record({"x": List("int32"), "y": List("float64")}), {"x": [1, 2, 3, 4, 5], "y": [1.1, 2.2, 3.3]})
 47 | 
 48 |         one = db.data.one
 49 |         self.assertEqual(one().x[0], 1)
 50 |         self.assertEqual(one().x[1], 2)
 51 |         self.assertEqual(one().x[2], 3)
 52 |         self.assertEqual(one().y[0], 1.1)
 53 |         self.assertEqual(one().y[1], 2.2)
 54 |         self.assertEqual(one().y[2], 3.3)
 55 | 
 56 |         # recasting
 57 |         db.data.two = one.project("x")
 58 |         two = db.data.two
 59 |         self.assertEqual(two[0], 1)
 60 |         self.assertEqual(two[1], 2)
 61 |         self.assertEqual(two[2], 3)
 62 |         self.assertEqual(two[3], 4)
 63 |         self.assertEqual(two[4], 5)
 64 | 
 65 |         db.data.two = one.drop("y")
 66 |         two = db.data.two
 67 |         self.assertEqual(two().x[0], 1)
 68 |         self.assertEqual(two().x[1], 2)
 69 |         self.assertEqual(two().x[2], 3)
 70 |         self.assertEqual(two().x[3], 4)
 71 |         self.assertEqual(two().x[4], 5)
 72 | 
 73 |         db.data.two = one.drop("y").keep("x")
 74 |         two = db.data.two
 75 |         self.assertEqual(two().x[0], 1)
 76 |         self.assertEqual(two().x[1], 2)
 77 |         self.assertEqual(two().x[2], 3)
 78 |         self.assertEqual(two().x[3], 4)
 79 |         self.assertEqual(two().x[4], 5)
 80 | 
 81 |         # transformation
 82 |         db.data.three = one.filter(lambda x: x % 2 == 0, at="x")
 83 |         three = db.data.three
 84 |         self.assertEqual(three().x, [2, 4])
 85 | 
 86 |         db.data.three = one.filter(lambda x: x > 1, at="x").filter(lambda x: x < 5, at="x")
 87 |         three = db.data.three
 88 |         self.assertEqual(three().x, [2, 3, 4])
 89 | 
 90 |         # action
 91 |         table = one.map(lambda x: x**2, at="x")
 92 |         self.assertEqual(table.result().tolist(), [1, 4, 9, 16, 25])
 93 | 
 94 |         summary = one.reduce(0, lambda x, tally: x + tally, at="x")
 95 |         self.assertEqual(summary.result(), sum([1, 2, 3, 4, 5]))
 96 | 
 97 |     def test_dataset(self):
 98 |         db = InMemoryDatabase()
 99 |         db.fromdata("one", List(Record({"x": "int32", "y": "float64"})), [{"x": 1, "y": 1.1}, {"x": 2, "y": 2.2}, {"x": 3, "y": 3.3}], [{"x": 4, "y": 4.4}, {"x": 5, "y": 5.5}, {"x": 6, "y": 6.6}])
100 |         one = db.data.one
101 |         self.assertEqual(one[0].x, 1)
102 |         self.assertEqual(one[1].x, 2)
103 |         self.assertEqual(one[2].x, 3)
104 |         self.assertEqual(one[3].x, 4)
105 |         self.assertEqual(one[4].x, 5)
106 |         self.assertEqual(one[5].x, 6)
107 |         self.assertEqual([obj.x for obj in one], [1, 2, 3, 4, 5, 6])
108 |         self.assertEqual([obj.y for obj in one], [1.1, 2.2, 3.3, 4.4, 5.5, 6.6])
109 |         self.assertEqual(oamap.operations.project(one.partition(0), "x"), [1, 2, 3])
110 |         self.assertEqual(oamap.operations.project(one.partition(1), "x"), [4, 5, 6])
111 | 
112 |         # recasting
113 |         db.data.two = one.project("x")
114 |         two = db.data.two
115 |         self.assertEqual(two.partition(0), [1, 2, 3])
116 |         self.assertEqual(two.partition(1), [4, 5, 6])
117 |         self.assertEqual([x for x in two], [1, 2, 3, 4, 5, 6])
118 | 
119 |         db.data.two = one.drop("y").project("x")
120 |         two = db.data.two
121 |         self.assertEqual([x for x in two], [1, 2, 3, 4, 5, 6])
122 |         self.assertEqual(two.partition(0), [1, 2, 3])
123 |         self.assertEqual(two.partition(1), [4, 5, 6])
124 | 
125 |         # transformation
126 |         db.data.three = one.filter(lambda obj: obj.x % 2 == 0)
127 |         three = db.data.three
128 |         self.assertEqual([obj.x for obj in three], [2, 4, 6])
129 |         self.assertEqual([obj.y for obj in three], [2.2, 4.4, 6.6])
130 |         self.assertEqual(oamap.operations.project(three.partition(0), "x"), [2])
131 |         self.assertEqual(oamap.operations.project(three.partition(1), "x"), [4, 6])
132 | 
133 |         db.data.three = one.filter(lambda obj: obj.x > 1).filter(lambda obj: obj.x < 6)
134 |         three = db.data.three
135 | 
136 |         self.assertEqual([obj.x for obj in three], [2, 3, 4, 5])
137 |         self.assertEqual([obj.y for obj in three], [2.2, 3.3, 4.4, 5.5])
138 |         self.assertEqual(oamap.operations.project(three.partition(0), "x"), [2, 3])
139 |         self.assertEqual(oamap.operations.project(three.partition(1), "x"), [4, 5])
140 | 
141 |         # action
142 |         table = one.map(lambda obj: None if obj.x % 2 == 0 else (obj.x, obj.y, obj.x + obj.y))
143 |         self.assertEqual(table.result().tolist(), [(1, 1.1, 2.1), (3, 3.3, 6.3), (5, 5.5, 10.5)])
144 | 
145 |         summary = one.reduce(0, lambda obj, tally: obj.x + tally)
146 |         self.assertEqual(summary.result(), sum([1, 2, 3, 4, 5, 6]))
147 | 
148 |         # print
149 |         # print "one"
150 |         # for n, x in db._backends[db._namespace]._arrays[0].items():
151 |         #     print db._backends[db._namespace]._refcounts[0][n], n, x
152 | 
153 |         del db.data.one
154 |         # print "two"
155 |         # for n, x in db._backends[db._namespace]._arrays[0].items():
156 |         #     print db._backends[db._namespace]._refcounts[0][n], n, x
157 | 
158 |         del db.data.two
159 |         # print "three"
160 |         # for n, x in db._backends[db._namespace]._arrays[0].items():
161 |         #     print db._backends[db._namespace]._refcounts[0][n], n, x
162 | 
163 |         del db.data.three
164 |         # print "done"
165 |         # for n, x in db._backends[db._namespace]._arrays[0].items():
166 |         #     print db._backends[db._namespace]._refcounts[0][n], n, x
167 | 
168 |         self.assertEqual(len(db._backends[db._namespace]._refcounts.get(0, {})), 0)
169 |         self.assertEqual(len(db._backends[db._namespace]._refcounts.get(1, {})), 0)
170 | 


--------------------------------------------------------------------------------
/oamap/util.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import ast
 32 | import math
 33 | import sys
 34 | import types
 35 | 
 36 | import numpy
 37 | 
 38 | if sys.version_info[0] > 2:
 39 |     basestring = str
 40 |     unicode = str
 41 |     def MethodType(function, instance, cls):
 42 |         if instance is None:
 43 |             return function
 44 |         else:
 45 |             return types.MethodType(function, instance)
 46 | else:
 47 |     MethodType = types.MethodType
 48 | 
 49 | try:
 50 |     from collections import OrderedDict
 51 | except ImportError:
 52 |     # simple OrderedDict implementation for Python 2.6
 53 |     class OrderedDict(dict):
 54 |         def __init__(self, items=(), **kwds):
 55 |             items = list(items)
 56 |             self._order = [k for k, v in items] + [k for k, v in kwds.items()]
 57 |             super(OrderedDict, self).__init__(items)
 58 |         def keys(self):
 59 |             return self._order
 60 |         def values(self):
 61 |             return [self[k] for k in self._order]
 62 |         def items(self):
 63 |             return [(k, self[k]) for k in self._order]
 64 |         def __setitem__(self, name, value):
 65 |             if name not in self._order:
 66 |                 self._order.append(name)
 67 |             super(OrderedDict, self).__setitem__(name, value)
 68 |         def __delitem__(self, name):
 69 |             if name in self._order:
 70 |                 self._order.remove(name)
 71 |             super(OrderedDict, self).__delitem__(name)
 72 |         def __repr__(self):
 73 |             return "OrderedDict([{0}])".format(", ".join("({0}, {1})".format(repr(k), repr(v)) for k, v in self.items()))
 74 | 
 75 | try:
 76 |     from UserDict import DictMixin as MutableMapping
 77 | except ImportError:
 78 |     from collections import MutableMapping
 79 | 
 80 | try:
 81 |     from importlib import import_module
 82 | except ImportError:
 83 |     def import_module(modulename):
 84 |         module = __import__(modulename)
 85 |         for name in modulename.split(".")[1:]:
 86 |             module = module.__dict__[name]
 87 |         return module
 88 | 
 89 | def slice2sss(index, length):
 90 |     step = 1 if index.step is None else index.step
 91 | 
 92 |     if step == 0:
 93 |         raise ValueError("slice step cannot be zero")
 94 | 
 95 |     elif step > 0:
 96 |         if index.start is None:
 97 |             start = 0                                         # in-range
 98 |         elif index.start >= 0:
 99 |             start = min(index.start, length)                  # in-range or length
100 |         else:
101 |             start = max(0, index.start + length)              # in-range
102 | 
103 |         if index.stop is None:
104 |             stop = length                                     # length
105 |         elif index.stop >= 0:
106 |             stop = max(start, min(length, index.stop))        # in-range or length
107 |         else:
108 |             stop = max(start, index.stop + length)            # in-range or length
109 | 
110 |     else:
111 |         if index.start is None:
112 |             start = length - 1                                # in-range
113 |         elif index.start >= 0:
114 |             start = min(index.start, length - 1)              # in-range
115 |         else:
116 |             start = max(index.start + length, -1)             # in-range or -1
117 | 
118 |         if index.stop is None:
119 |             stop = -1                                         # -1
120 |         elif index.stop >= 0:
121 |             stop = min(start, index.stop)                     # in-range or -1
122 |         else:
123 |             stop = min(start, max(-1, index.stop + length))   # in-range or -1
124 | 
125 |     return start, stop, step
126 | 
127 | def json2python(value):
128 |     def recurse(value):
129 |         if isinstance(value, dict) and len(value) == 2 and set(value.keys()) == set(["real", "imag"]) and all(isinstance(x, (int, float)) for x in value.values()):
130 |             return value["real"] + value["imag"]*1j
131 |         elif value == "inf":
132 |             return float("inf")
133 |         elif value == "-inf":
134 |             return float("-inf")
135 |         elif value == "nan":
136 |             return float("nan")
137 |         elif isinstance(value, list):
138 |             return [recurse(x) for x in value]
139 |         elif isinstance(value, dict):
140 |             return dict((n, recurse(x)) for n, x in value.items())
141 |         else:
142 |             return value
143 |     return recurse(value)
144 | 
145 | def python2json(value, allowlinks=False):
146 |     def recurse(value, memo):
147 |         if id(value) in memo:
148 |             if allowlinks:
149 |                 return memo[id(value)]
150 |             else:
151 |                 raise TypeError("cross-linking within an object is not allowed")
152 | 
153 |         if value is None:
154 |             memo[id(value)] = None
155 | 
156 |         elif isinstance(value, (numbers.Integral, numpy.integer)):
157 |             memo[id(value)] = int(value)
158 | 
159 |         elif isinstance(value, (numbers.Real, numpy.floating)):
160 |             if math.isnan(value):
161 |                 memo[id(value)] = "nan"
162 |             elif math.isinf(value) and value > 0:
163 |                 memo[id(value)] = "inf"
164 |             elif math.isinf(value):
165 |                 memo[id(value)] = "-inf"
166 |             else:
167 |                 memo[id(value)] = float(value)
168 | 
169 |         elif isinstance(value, (numbers.Complex, numpy.complex)):
170 |             memo[id(value)] = {"real": float(value.real), "imag": float(value.imag)}
171 | 
172 |         elif isinstance(value, basestring):
173 |             memo[id(value)] = value
174 | 
175 |         elif isinstance(value, dict):
176 |             memo[id(value)] = {}
177 |             for n, x in value.items():
178 |                 if not isinstance(n, basestring):
179 |                     raise TypeError("dict keys for JSON must be strings")
180 |                 memo[id(value)][n] = recurse(x, memo)
181 | 
182 |         else:
183 |             memo[id(value)] = []
184 |             for x in value:
185 |                 memo[id(value)].append(recurse(x, memo))
186 | 
187 |         return memo[id(value)]
188 | 
189 |     return recurse(value, {})
190 | 
191 | def python2hashable(value):
192 |     def recurse(value):
193 |         if isinstance(value, dict):
194 |             return tuple((n, recurse(value[n])) for n in sorted(value))
195 |         elif isinstance(value, list):
196 |             return tuple(recurse(x) for x in value)
197 |         else:
198 |             return value
199 |     return recurse(python2json(value))
200 | 
201 | def varname(avoid, trial=None):
202 |     while trial is None or trial in avoid:
203 |         trial = "v" + str(len(avoid))
204 |     avoid.add(trial)
205 |     return trial
206 | 
207 | def paramtypes(args):
208 |     try:
209 |         import numba as nb
210 |     except ImportError:
211 |         return None
212 |     else:
213 |         return tuple(nb.typeof(x) for x in args)
214 | 
215 | def doexec(module, env):
216 |     exec(module, env)
217 | 
218 | def stringfcn(fcn):
219 |     if isinstance(fcn, basestring):
220 |         parsed = ast.parse(fcn).body
221 |         if isinstance(parsed[-1], ast.Expr):
222 |             parsed[-1] = ast.Return(parsed[-1].value)
223 |             parsed[-1].lineno = parsed[-1].value.lineno
224 |             parsed[-1].col_offset = parsed[-1].value.col_offset
225 | 
226 |         env = dict(math.__dict__)
227 |         env.update(globals())
228 | 
229 |         free = set()
230 |         defined = set(["None", "False", "True"])
231 |         defined.update(env)
232 |         def recurse(node):
233 |             if isinstance(node, ast.Name):
234 |                 if isinstance(node.ctx, ast.Store):
235 |                     defined.add(node.id)
236 |                 elif isinstance(node.ctx, ast.Load) and node.id not in defined:
237 |                     free.add(node.id)
238 |             elif isinstance(node, ast.AST):
239 |                 for n in node._fields:
240 |                     recurse(getattr(node, n))
241 |             elif isinstance(node, list):
242 |                 for x in node:
243 |                     recurse(x)
244 |         recurse(parsed)
245 | 
246 |         avoid = free.union(defined)
247 |         fcnname = varname(avoid, "fcn")
248 | 
249 |         module = ast.parse("""
250 | def {fcn}({params}):
251 |     REPLACEME
252 | """.format(fcn=fcnname, params=",".join(free)))
253 |         module.body[0].body = parsed
254 |         module = compile(module, "<fcn string>", "exec")
255 | 
256 |         doexec(module, env)
257 |         fcn = env[fcnname]
258 | 
259 |     return fcn
260 | 
261 | def trycompile(fcn, paramtypes=None, numba=True):
262 |     fcn = stringfcn(fcn)
263 | 
264 |     if numba is None or numba is False:
265 |         return fcn
266 | 
267 |     try:
268 |         import numba as nb
269 |     except ImportError:
270 |         return fcn
271 | 
272 |     if numba is True:
273 |         numbaopts = {}
274 |     else:
275 |         numbaopts = numba
276 | 
277 |     if isinstance(fcn, nb.dispatcher.Dispatcher):
278 |         fcn = fcn.py_fcn
279 | 
280 |     if paramtypes is None:
281 |         return nb.jit(**numbaopts)(fcn)
282 |     else:
283 |         return nb.jit(paramtypes, **numbaopts)(fcn)
284 | 
285 | def returntype(fcn, paramtypes):
286 |     try:
287 |         import numba as nb
288 |     except ImportError:
289 |         return None
290 | 
291 |     if isinstance(fcn, nb.dispatcher.Dispatcher):
292 |         overload = fcn.overloads.get(paramtypes, None)
293 |         if overload is None:
294 |             return None
295 |         else:
296 |             return overload.signature.return_type
297 | 


--------------------------------------------------------------------------------
/oamap/backend/packing.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import json
 32 | import sys
 33 | 
 34 | import numpy
 35 | 
 36 | import oamap.generator
 37 | 
 38 | if sys.version_info[0] > 2:
 39 |     basestring = str
 40 | 
 41 | class PackedSource(object):
 42 |     def __init__(self, source, suffix):
 43 |         self.source = source
 44 |         self.suffix = suffix
 45 | 
 46 |     def __repr__(self):
 47 |         return "{0}({1}{2})".format(self.__class__.__name__, repr(self.source), "".join(", " + repr(x) for x in self._tojsonargs()))
 48 | 
 49 |     def getall(self, roles):
 50 |         if hasattr(self.source, "getall"):
 51 |             return self.source.getall(roles)
 52 |         else:
 53 |             return dict((n, self.source[str(n)]) for n in roles)
 54 | 
 55 |     def putall(self, roles2arrays):
 56 |         if hasattr(self.source, "putall"):
 57 |             self.source.putall(roles2arrays)
 58 |         else:
 59 |             for n, x in roles2arrays.items():
 60 |                 self.source[str(n)] = x
 61 | 
 62 |     def copy(self):
 63 |         return self.__class__(self.source, self.suffix)
 64 | 
 65 |     def anchor(self, source):
 66 |         if self.source is None:
 67 |             return self.__class__(source, self.suffix)
 68 |         else:
 69 |             return self.__class__(self.source.anchor(source), self.suffix)
 70 | 
 71 |     def __eq__(self, other):
 72 |         return self.__class__.__name__ == other.__class__.__name__ and self._tojsonargs() == other._tojsonargs()
 73 | 
 74 |     def __ne__(self, other):
 75 |         return not self.__eq__(other)
 76 | 
 77 |     def __hash__(self):
 78 |         return hash((PackedSource, self.__class__.__name__, tuple(self._tojsonargs())))
 79 | 
 80 |     def tojsonfile(self, file, *args, **kwds):
 81 |         json.dump(self.tojson(), file, *args, **kwds)
 82 | 
 83 |     def tojsonstring(self, *args, **kwds):
 84 |         return json.dumps(self.tojson(), *args, **kwds)
 85 | 
 86 |     def tojson(self):
 87 |         out = []
 88 |         node = self
 89 |         while isinstance(node, PackedSource):
 90 |             args = self._tojsonargs()
 91 |             if len(args) == 0:
 92 |                 out.append(self.__class__.__name__)
 93 |             else:
 94 |                 out.append({self.__class__.__name__: args})
 95 |             node = node.source
 96 |         return out
 97 | 
 98 |     @staticmethod
 99 |     def fromjsonfile(file, *args, **kwds):
100 |         return PackedSource.fromjson(json.load(file, *args, **kwds))
101 | 
102 |     @staticmethod
103 |     def fromjsonstring(data, *args, **kwds):
104 |         return PackedSource.fromjson(json.loads(data, *args, **kwds))
105 | 
106 |     @staticmethod
107 |     def fromjson(data):
108 |         if isinstance(data, list):
109 |             source = None
110 |             for datum in reversed(data):
111 |                 if isinstance(datum, basestring):
112 |                     classname = datum
113 |                     args = ()
114 |                 elif isinstance(datum, dict) and len(datum) == 1:
115 |                     classname, = datum.keys()
116 |                     args, = datum.values()
117 |                 else:
118 |                     raise ValueError("source packings JSON must be a list of strings or {\"classname\": [args]} dicts")
119 |                 try:
120 |                     cls = globals()[classname]
121 |                 except KeyError:
122 |                     raise ValueError("source packing class {0} not found".format(repr(classname)))
123 |                 source = cls(source, *args)
124 |             return source
125 |         else:
126 |             raise ValueError("source packings JSON must be a list of strings or {\"classname\": [args]} dicts")
127 | 
128 | ################################################################ BitPackMasks
129 | 
130 | class MaskBitPack(PackedSource):
131 |     def __init__(self, source, suffix="-bitpacked"):
132 |         super(MaskBitPack, self).__init__(source, suffix)
133 | 
134 |     def _tojsonargs(self):
135 |         if self.suffix == "-bitpacked":
136 |             return []
137 |         else:
138 |             return [self.suffix]
139 | 
140 |     def getall(self, roles):
141 |         others  = [n for n in roles if not isinstance(n, oamap.generator.MaskRole)]
142 |         renamed = dict((oamap.generator.NoRole(str(n) + self.suffix, n.namespace), n) for n in roles if isinstance(n, oamap.generator.MaskRole))
143 |         out = super(MaskBitPack, self).getall(others + list(renamed))
144 |         for suffixedname, name in renamed.items():
145 |             out[name] = self.unpack(out[suffixedname])
146 |             del out[suffixedname]
147 |         return out
148 | 
149 |     def putall(self, roles2arrays):
150 |         out = {}
151 |         for n, x in roles2arrays.items():
152 |             if isinstance(n, oamap.generator.MaskRole):
153 |                 out[oamap.generator.NoRole(str(n) + self.suffix, n.namespace)] = self.pack(x)
154 |             else:
155 |                 out[n] = x
156 |         super(MaskBitPack, self).putall(out)
157 | 
158 |     @staticmethod
159 |     def unpack(array):
160 |         if not isinstance(array, numpy.ndarray):
161 |             array = numpy.array(array, dtype=numpy.dtype(numpy.uint8))
162 |         unmasked = numpy.unpackbits(array).view(numpy.bool_)
163 |         mask = numpy.empty(len(unmasked), dtype=oamap.generator.Masked.maskdtype)
164 |         mask[unmasked] = numpy.arange(unmasked.sum(), dtype=mask.dtype)
165 |         mask[~unmasked] = oamap.generator.Masked.maskedvalue
166 |         return mask
167 | 
168 |     @staticmethod
169 |     def pack(array):
170 |         if not isinstance(array, numpy.ndarray):
171 |             array = numpy.array(array, dtype=oamap.generator.Masked.maskdtype)
172 |         return numpy.packbits(array != oamap.generator.Masked.maskedvalue)
173 | 
174 | ################################################################ RunLengthMasks
175 | 
176 | # TODO: run-length encoding for masks
177 | 
178 | ################################################################ ListsAsCounts
179 | 
180 | class ListCounts(PackedSource):
181 |     def __init__(self, source, suffix="-counts"):
182 |         super(ListCounts, self).__init__(source, suffix)
183 | 
184 |     def _tojsonargs(self):
185 |         if self.suffix == "-counts":
186 |             return []
187 |         else:
188 |             return [self.suffix]
189 | 
190 |     def getall(self, roles):
191 |         others  = [n for n in roles if not isinstance(n, (oamap.generator.StartsRole, oamap.generator.StopsRole))]
192 |         renamed = dict((oamap.generator.NoRole(str(n) + self.suffix, n.namespace), n) for n in roles if isinstance(n, oamap.generator.StartsRole))
193 |         out = super(ListCounts, self).getall(others + list(renamed))
194 |         for suffixedname, name in renamed.items():
195 |             out[name], out[name.stops] = self.fromcounts(out[suffixedname])
196 |             del out[suffixedname]
197 |         return out
198 | 
199 |     def putall(self, roles2arrays):
200 |         out = {}
201 |         for n, x in roles2arrays.items():
202 |             if isinstance(n, oamap.generator.StartsRole):
203 |                 out[oamap.generator.NoRole(str(n) + self.suffix, n.namespace)] = self.tocounts(x, roles2arrays[n.stops])
204 |             elif isinstance(n, oamap.generator.StopsRole):
205 |                 pass
206 |             else:
207 |                 out[n] = x
208 |         super(ListCounts, self).putall(out)
209 | 
210 |     @staticmethod
211 |     def fromcounts(array):
212 |         offsets = numpy.empty(len(array) + 1, dtype=oamap.generator.ListGenerator.posdtype)
213 |         offsets[0] = 0
214 |         offsets[1:] = numpy.cumsum(array)
215 |         return offsets[:-1], offsets[1:]
216 | 
217 |     @staticmethod
218 |     def tocounts(starts, stops):
219 |         if not isinstance(starts, numpy.ndarray):
220 |             starts = numpy.array(starts, dtype=oamap.generator.ListGenerator.posdtype)
221 |         if not isinstance(starts, numpy.ndarray):
222 |             stops = numpy.array(stops, dtype=oamap.generator.ListGenerator.posdtype)
223 |         if not starts[0] == 0 or not numpy.array_equal(starts[1:], stops[:-1]):
224 |             raise ValueError("starts and stops cannot be converted to a single counts array")
225 |         return stops - starts
226 | 
227 | ################################################################ DropUnionOffsets
228 | 
229 | class UnionDropOffsets(PackedSource):
230 |     def __init__(self, source):
231 |         super(DropUnionOffsets, self).__init__(source, "")
232 | 
233 |     def _tojsonargs(self):
234 |         return []
235 | 
236 |     def getall(self, roles):
237 |         nooffsets = [n for n in roles if not isinstance(n, oamap.generator.OffsetsRole)]
238 |         out = super(UnionDropOffsets, self).getall(nooffsets)
239 |         for n in roles:
240 |             if isinstance(n, oamap.generator.TagsRole):
241 |                 out[n.offsets] = self.tags2offsets(out[n])
242 |         return out
243 | 
244 |     def putall(self, roles2arrays):
245 |         super(UnionDropOffsets, self).putall(dict((n, x) for n, x in roles2arrays.items() if not isinstance(n, oamap.generator.OffsetsRole)))
246 | 
247 |     @staticmethod
248 |     def tags2offsets(tags):
249 |         if not isinstance(tags, numpy.ndarray):
250 |             tags = numpy.array(tags, dtype=oamap.generator.UnionGenerator.tagdtype)
251 |         offsets = numpy.empty(len(tags), dtype=oamap.generator.UnionGenerator.offsetdtype)
252 |         for tag in numpy.unique(tags):
253 |             hastag = (tags == tag)
254 |             offsets[hastag] = numpy.arange(hastag.sum(), dtype=offsets.dtype)
255 |         return offsets
256 | 
257 | ################################################################ CompressAll
258 | 
259 | # TODO: apply a named compression algorithm
260 | 


--------------------------------------------------------------------------------
/oamap/backend/root/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import numpy
 32 | 
 33 | import oamap.schema
 34 | import oamap.dataset
 35 | import oamap.database
 36 | import oamap.proxy
 37 | import oamap.backend.packing
 38 | from oamap.util import OrderedDict
 39 | 
 40 | def dataset(path, treepath, namespace=None, **kwargs):
 41 |     import uproot
 42 | 
 43 |     if namespace is None:
 44 |         namespace = "root({0}, {1})".format(repr(path), repr(treepath))
 45 | 
 46 |     if "localsource" not in kwargs:
 47 |         kwargs["localsource"] = lambda path: uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
 48 |     kwargs["total"] = False
 49 |     kwargs["blocking"] = True
 50 | 
 51 |     paths2entries = uproot.tree.numentries(path, treepath, **kwargs)
 52 |     if len(paths2entries) == 0:
 53 |         raise ValueError("path {0} matched no TTrees".format(repr(path)))
 54 | 
 55 |     offsets = [0]
 56 |     paths = []
 57 |     for path, numentries in paths2entries.items():
 58 |         offsets.append(offsets[-1] + numentries)
 59 |         paths.append(path)
 60 | 
 61 |     sch = schema(paths[0], treepath, namespace=namespace)
 62 |     doc = sch.doc
 63 |     sch.doc = None
 64 | 
 65 |     return oamap.dataset.Dataset(treepath.split("/")[-1].split(";")[0],
 66 |                                  sch,
 67 |                                  {namespace: ROOTBackend(paths, treepath, namespace)},
 68 |                                  oamap.dataset.SingleThreadExecutor(),
 69 |                                  offsets,
 70 |                                  extension=None,
 71 |                                  packing=None,
 72 |                                  doc=doc,
 73 |                                  metadata={"schemafrom": paths[0]})
 74 | 
 75 | def proxy(path, treepath, namespace="", extension=oamap.extension.common):
 76 |     import uproot
 77 |     def localsource(path):
 78 |         return uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
 79 |     return _proxy(uproot.open(path, localsource=localsource)[treepath], namespace=namespace, extension=extension)
 80 | 
 81 | def _proxy(tree, namespace="", extension=oamap.extension.common):
 82 |     schema = _schema(tree, namespace=namespace)
 83 |     generator = schema.generator(extension=extension)
 84 |     return oamap.proxy.ListProxy(generator, ROOTArrays(tree, ROOTBackend([tree._context.sourcepath], tree._context.treename, namespace)), generator._newcache(), 0, 1, tree.numentries)
 85 | 
 86 | def schema(path, treepath, namespace=""):
 87 |     import uproot
 88 |     def localsource(path):
 89 |         return uproot.source.file.FileSource(path, chunkbytes=8*1024, limitbytes=None)
 90 |     return _schema(uproot.open(path, localsource=localsource)[treepath], namespace=namespace)
 91 | 
 92 | def _schema(tree, namespace=None):
 93 |     import uproot
 94 | 
 95 |     if namespace is None:
 96 |         namespace = "root({0}, {1})".format(repr(path), repr(treepath))
 97 | 
 98 |     def accumulate(node):
 99 |         out = oamap.schema.Record(OrderedDict(), namespace=namespace)
100 |         for branchname, branch in node.iteritems(aliases=False) if isinstance(node, uproot.tree.TTreeMethods) else node.iteritems():
101 |             if not isinstance(branchname, str):
102 |                 branchname = branchname.decode("ascii")
103 |             fieldname = branchname.split(".")[-1]
104 | 
105 |             if len(branch.fBranches) > 0:
106 |                 subrecord = accumulate(branch)
107 |                 if len(subrecord.fields) > 0:
108 |                     out[fieldname] = subrecord
109 | 
110 |             elif isinstance(branch.interpretation, (uproot.interp.asdtype, uproot.interp.numerical.asdouble32)):
111 |                 subnode = oamap.schema.Primitive(branch.interpretation.todtype, data=branchname, namespace=namespace)
112 |                 for i in range(len(branch.interpretation.todims)):
113 |                     subnode = oamap.schema.List(subnode, starts="{0}:/{1}".format(branchname, i), stops="{0}:/{1}".format(branchname, i), namespace=namespace)
114 |                 out[fieldname] = subnode
115 | 
116 |             elif isinstance(branch.interpretation, uproot.interp.asjagged) and isinstance(branch.interpretation.asdtype, uproot.interp.asdtype):
117 |                 subnode = oamap.schema.Primitive(branch.interpretation.asdtype.todtype, data=branchname, namespace=namespace)
118 |                 for i in range(len(branch.interpretation.asdtype.todims)):
119 |                     subnode = oamap.schema.List(subnode, starts="{0}:/{1}".format(branchname, i), stops="{0}:/{1}".format(branchname, i), namespace=namespace)
120 |                 out[fieldname] = oamap.schema.List(subnode, starts=branchname, stops=branchname, namespace=namespace)
121 | 
122 |             elif isinstance(branch.interpretation, uproot.interp.asstrings):
123 |                 out[fieldname] = oamap.schema.List(oamap.schema.Primitive(oamap.interp.strings.CHARTYPE, data=branchname, namespace=namespace), starts=branchname, stops=branchname, namespace=namespace, name="ByteString")
124 |         
125 |         return out
126 | 
127 |     def combinelists(schema):
128 |         if isinstance(schema, oamap.schema.Record) and all(isinstance(x, oamap.schema.List) for x in schema.fields.values()):
129 |             out = oamap.schema.List(oamap.schema.Record(OrderedDict(), namespace=namespace), namespace=namespace)
130 | 
131 |             countbranch = None
132 |             for fieldname, field in schema.items():
133 |                 try:
134 |                     branch = tree[field.starts]
135 |                 except KeyError:
136 |                     return schema
137 | 
138 |                 if branch.countbranch is None:
139 |                     return schema
140 | 
141 |                 if countbranch is None:
142 |                     countbranch = branch.countbranch
143 |                 elif countbranch is not branch.countbranch:
144 |                     return schema
145 | 
146 |                 out.content[fieldname] = field.content
147 | 
148 |             if countbranch is not None:
149 |                 countbranchname = countbranch.name
150 |                 if not isinstance(countbranchname, str):
151 |                     countbranchname = countbranchname.decode("ascii")
152 |                 out.starts = countbranchname
153 |                 out.stops = countbranchname
154 |                 return out
155 | 
156 |         return schema
157 | 
158 |     entries = accumulate(tree).replace(combinelists)
159 |     entries.name = "Entry"
160 | 
161 |     doc = tree.title
162 |     if not isinstance(doc, str):
163 |         doc = doc.decode("ascii")
164 |         
165 |     return oamap.schema.List(entries, namespace=namespace, doc=doc)
166 | 
167 | class ROOTBackend(oamap.database.Backend):
168 |     def __init__(self, paths, treepath, namespace):
169 |         self._paths = tuple(paths)
170 |         self._treepath = treepath
171 |         self._namespace = namespace
172 | 
173 |     @property
174 |     def args(self):
175 |         return (self._paths, self._treepath)
176 | 
177 |     def tojson(self):
178 |         return {"class": self.__class__.__module__ + "." + self.__class__.__name__,
179 |                 "paths": list(self._paths),
180 |                 "treepath": self._treepath}
181 | 
182 |     @staticmethod
183 |     def fromjson(obj, namespace):
184 |         return ROOTBackend(obj["paths"], obj["treepath"], namespace)
185 | 
186 |     @property
187 |     def namespace(self):
188 |         return self._namespace
189 | 
190 |     def instantiate(self, partitionid):
191 |         return ROOTArrays.frompath(self._paths[partitionid], self._treepath, self)
192 |         
193 | class ROOTArrays(object):
194 |     @staticmethod
195 |     def frompath(path, treepath, backend):
196 |         import uproot
197 |         file = uproot.open(path)
198 |         out = ROOTArrays(file[treepath], backend)
199 |         out._source = file._context.source
200 |         return out
201 | 
202 |     def __init__(self, tree, backend):
203 |         self._tree = tree
204 |         self._backend = backend
205 |         self._keycache = {}
206 | 
207 |     @property
208 |     def tree(self):
209 |         return self._tree
210 | 
211 |     @property
212 |     def backend(self):
213 |         return self._backend
214 | 
215 |     def getall(self, roles):
216 |         import uproot
217 | 
218 |         def chop(role):
219 |             name = str(role).encode("ascii")
220 |             try:
221 |                 colon = name.rindex(b":")
222 |             except ValueError:
223 |                 return name, None
224 |             else:
225 |                 return name[:colon], name[colon + 1:]
226 |             
227 |         arrays = self._tree.arrays(set(chop(x)[0] for x in roles), keycache=self._keycache)
228 | 
229 |         out = {}
230 |         for role in roles:
231 |             branchname, leafname = chop(role)
232 |             array = arrays[branchname]
233 | 
234 |             if leafname is not None and leafname.startswith(b"/"):
235 |                 if isinstance(array, (uproot.interp.jagged.JaggedArray, uproot.interp.strings.Strings)):
236 |                     array = array.content
237 | 
238 |                 length = array.shape[0]
239 |                 stride = 1
240 |                 for depth in range(int(leafname[1:])):
241 |                     length *= array.shape[depth + 1]
242 |                     stride *= array.shape[depth + 1]
243 | 
244 |                 if isinstance(role, oamap.generator.StartsRole) and role not in out:
245 |                     offsets = numpy.arange(0, (length + 1)*stride, stride)
246 |                     out[role] = offsets[:-1]
247 |                     out[role.stops] = offsets[1:]
248 | 
249 |                 elif isinstance(role, oamap.generator.StopsRole) and role not in out:
250 |                     offsets = numpy.arange(0, (length + 1)*stride, stride)
251 |                     out[role.starts] = offsets[:-1]
252 |                     out[role] = offsets[1:]
253 | 
254 |             elif isinstance(array, numpy.ndarray):
255 |                 if isinstance(role, oamap.generator.StartsRole) and role not in out:
256 |                     starts, stops = oamap.backend.packing.ListCounts.fromcounts(array)
257 |                     out[role] = starts
258 |                     out[role.stops] = stops
259 | 
260 |                 elif isinstance(role, oamap.generator.StopsRole) and role not in out:
261 |                     starts, stops = oamap.backend.packing.ListCounts.fromcounts(array)
262 |                     out[role.starts] = starts
263 |                     out[role] = stops
264 | 
265 |                 elif isinstance(role, oamap.generator.DataRole):
266 |                     if leafname is None:
267 |                         out[role] = array.reshape(-1)
268 |                     else:
269 |                         out[role] = array[leafname].reshape(-1)
270 | 
271 |             elif isinstance(array, (uproot.interp.jagged.JaggedArray, uproot.interp.strings.Strings)):
272 |                 if isinstance(role, oamap.generator.StartsRole):
273 |                     out[role] = array.starts
274 | 
275 |                 elif isinstance(role, oamap.generator.StopsRole):
276 |                     out[role] = array.stops
277 | 
278 |                 elif isinstance(role, oamap.generator.DataRole):
279 |                     if leafname is None:
280 |                         out[role] = array.content.reshape(-1)
281 |                     else:
282 |                         out[role] = array.content[leafname].reshape(-1)
283 | 
284 |             if role not in out:
285 |                 raise AssertionError(role)
286 | 
287 |         return out
288 | 
289 |     def close(self):
290 |         if hasattr(self, "_source"):
291 |             self._source.close()
292 |         self._tree = None
293 | 


--------------------------------------------------------------------------------
/tests/test_proxy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import unittest
 32 | 
 33 | import oamap.proxy
 34 | from oamap.schema import *
 35 | 
 36 | class TestProxy(unittest.TestCase):
 37 |     def runTest(self):
 38 |         pass
 39 | 
 40 |     def test_ListProxy_slicing(self):
 41 |         range100 = list(range(100))
 42 |         proxy100 = List(Primitive("i8"))({"object-B": [0], "object-E": [100], "object-L-Di8": range100})
 43 |         self.assertEqual(range100, proxy100)
 44 |         for start1 in [None, 0, 5, 95, 110, -1, -5, -95, -110]:
 45 |             for stop1 in [None, 0, 5, 95, 110, -1, -5, -95, -110]:
 46 |                 for step1 in [None, 1, 2, 5, 90, 110, -1, -2, -5, -90, -110]:
 47 |                     sliced_range100 = range100[start1:stop1:step1]
 48 |                     sliced_proxy100 = proxy100[start1:stop1:step1]
 49 |                     self.assertEqual(sliced_range100, sliced_proxy100)
 50 |                     if len(sliced_range100) > 0:
 51 |                         for start2 in [None, 0, 5, -1, -5]:
 52 |                             for stop2 in [None, 0, 5, -1, -5]:
 53 |                                 for step2 in [None, 1, 3, -1, -3]:
 54 |                                     self.assertEqual(sliced_range100[start2:stop2:step2], sliced_proxy100[start2:stop2:step2])
 55 | 
 56 |     def test_Primitive(self):
 57 |         self.assertEqual(Primitive("f8")({"object-Df8": [3.14]}), 3.14)
 58 |         # self.assertEqual(Primitive("f8", dims=(2, 2))({"object-Df8-2-2": [[[1, 2], [3, 4]]]}), [[1, 2], [3, 4]])
 59 |         self.assertEqual(Primitive("f8", nullable=True)({"object-Df8": [], "object-M": [-1]}), None)
 60 |         self.assertEqual(Primitive("f8", nullable=True)({"object-Df8": [3.14], "object-M": [0]}), 3.14)
 61 | 
 62 |     def test_List(self):
 63 |         self.assertEqual(List(Primitive("f8"))({"object-B": [0], "object-E": [5], "object-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]}), [1.1, 2.2, 3.3, 4.4, 5.5])
 64 |         self.assertEqual(len(List(Primitive("f8"))({"object-B": [0], "object-E": [5], "object-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]})), 5)
 65 |         self.assertEqual(List(List(Primitive("f8")))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2, 2], "object-L-E": [2, 2, 5], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]}), [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
 66 |         self.assertEqual(len(List(List(Primitive("f8")))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2, 2], "object-L-E": [2, 2, 5], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]})), 3)
 67 |         self.assertEqual(list(map(len, List(List(Primitive("f8")))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2, 2], "object-L-E": [2, 2, 5], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]}))), [2, 0, 3])
 68 |         self.assertEqual(List(List(Primitive("f8")), nullable=True)({"object-B": [], "object-E": [], "object-L-B": [], "object-L-E": [], "object-L-L-Df8": [], "object-M": [-1]}), None)
 69 |         self.assertEqual(List(List(Primitive("f8")), nullable=True)({"object-B": [0], "object-E": [3], "object-L-B": [0, 2, 2], "object-L-E": [2, 2, 5], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5], "object-M": [0]}), [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
 70 |         self.assertEqual(List(List(Primitive("f8"), nullable=True))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2], "object-L-E": [2, 2], "object-L-L-Df8": [1.1, 2.2], "object-L-M": [0, 1, -1]}), [[1.1, 2.2], [], None])
 71 |         self.assertEqual(List(List(Primitive("f8"), nullable=True), nullable=True)({"object-B": [0], "object-E": [3], "object-L-B": [0, 2], "object-L-E": [2, 2], "object-L-L-Df8": [1.1, 2.2], "object-M": [0], "object-L-M": [0, 1, -1]}), [[1.1, 2.2], [], None])
 72 | 
 73 |     def test_List_slices(self):
 74 |         x = List(List(Primitive("f8")))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2, 2], "object-L-E": [2, 2, 5], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]})
 75 | 
 76 |         self.assertEqual(x[0], [1.1, 2.2])
 77 |         self.assertEqual(x[1], [])
 78 |         self.assertEqual(x[2], [3.3, 4.4, 5.5])
 79 |         self.assertEqual(x[-1], [3.3, 4.4, 5.5])
 80 |         self.assertEqual(x[-2], [])
 81 |         self.assertEqual(x[-3], [1.1, 2.2])
 82 |         self.assertRaises(IndexError, lambda: x[3])
 83 |         self.assertRaises(IndexError, lambda: x[-4])
 84 | 
 85 |         self.assertEqual(x[0:1], [[1.1, 2.2]])
 86 |         self.assertEqual(x[0:2], [[1.1, 2.2], []])
 87 |         self.assertEqual(x[0:3], [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
 88 |         self.assertEqual(x[:], [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
 89 |         self.assertEqual(x[:10], [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
 90 |         self.assertEqual(x[1:3], [[], [3.3, 4.4, 5.5]])
 91 |         self.assertEqual(x[2:3], [[3.3, 4.4, 5.5]])
 92 |         self.assertEqual(x[3:3], [])
 93 |         self.assertEqual(x[-3:1], [[1.1, 2.2]])
 94 |         self.assertEqual(x[-3:2], [[1.1, 2.2], []])
 95 |         self.assertEqual(x[-3:3], [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
 96 |         self.assertEqual(x[-2:3], [[], [3.3, 4.4, 5.5]])
 97 |         self.assertEqual(x[-1:3], [[3.3, 4.4, 5.5]])
 98 |         self.assertEqual(x[-1:-1], [])
 99 |         self.assertEqual(x[-10:3], [[1.1, 2.2], [], [3.3, 4.4, 5.5]])
100 |         self.assertEqual(x[::2], [[1.1, 2.2], [3.3, 4.4, 5.5]])
101 |         self.assertEqual(x[1::2], [[]])
102 | 
103 |         x = List(List(Primitive("f8"), nullable=True))({"object-B": [0], "object-E": [3], "object-L-B": [0, 2], "object-L-E": [2, 5], "object-L-M": [0, -1, 1], "object-L-L-Df8": [1.1, 2.2, 3.3, 4.4, 5.5]})
104 | 
105 |         self.assertEqual(x[1], None)
106 |         self.assertEqual(x[-2], None)
107 |         self.assertEqual(x[0:2], [[1.1, 2.2], None])
108 |         self.assertEqual(x[0:3], [[1.1, 2.2], None, [3.3, 4.4, 5.5]])
109 |         self.assertEqual(x[:], [[1.1, 2.2], None, [3.3, 4.4, 5.5]])
110 |         self.assertEqual(x[:10], [[1.1, 2.2], None, [3.3, 4.4, 5.5]])
111 |         self.assertEqual(x[1:3], [None, [3.3, 4.4, 5.5]])
112 |         self.assertEqual(x[3:3], [])
113 |         self.assertEqual(x[-3:2], [[1.1, 2.2], None])
114 |         self.assertEqual(x[-3:3], [[1.1, 2.2], None, [3.3, 4.4, 5.5]])
115 |         self.assertEqual(x[-2:3], [None, [3.3, 4.4, 5.5]])
116 |         self.assertEqual(x[-1:-1], [])
117 |         self.assertEqual(x[-10:3], [[1.1, 2.2], None, [3.3, 4.4, 5.5]])
118 |         self.assertEqual(x[1::2], [None])
119 | 
120 |     def test_Union(self):
121 |         self.assertEqual(Union([Primitive("i8"), Primitive("f8")])({"object-T": [0], "object-O": [0], "object-U0-Di8": [1], "object-U1-Df8": []}), 1)
122 |         self.assertEqual(List(Union([Primitive("i8"), Primitive("f8")]))({"object-B": [0], "object-E": [7], "object-L-T": [0, 0, 1, 1, 1, 0, 0], "object-L-O": [0, 1, 0, 1, 2, 2, 3], "object-L-U0-Di8": [1, 2, 3, 4], "object-L-U1-Df8": [1.1, 2.2, 3.3]}), [1, 2, 1.1, 2.2, 3.3, 3, 4])
123 | 
124 |         self.assertEqual(list(List(Union([Primitive("i8"), Primitive("f8")], nullable=True))({"object-L-U1-Df8": [1.1, 3.3], "object-L-T": [0, 1, 1, 0], "object-E": [7], "object-L-O": [0, 0, 1, 1], "object-L-M": [0, -1, 1, -1, 2, 3, -1], "object-L-U0-Di8": [1, 3], "object-B": [0]})), [1, None, 1.1, None, 3.3, 3, None])
125 |         self.assertEqual(List(Union([Primitive("i8", nullable=True), Primitive("f8")]))({"object-L-U0-M": [0, -1, 1, -1], "object-L-T": [0, 0, 1, 1, 1, 0, 0], "object-E": [7], "object-L-O": [0, 1, 0, 1, 2, 2, 3], "object-L-U1-Df8": [1.1, 2.2, 3.3], "object-L-U0-Di8": [1, 3], "object-B": [0]}), [1, None, 1.1, 2.2, 3.3, 3, None])
126 | 
127 |         self.assertEqual(List(Union([Primitive("i8"), List(Primitive("f8"))]))({"object-B": [0], "object-E": [2], "object-L-T": [0, 1], "object-L-O": [0, 0], "object-L-U0-Di8": [3], "object-L-U1-B": [0], "object-L-U1-E": [3], "object-L-U1-L-Df8": [1.1, 2.2, 3.3]}), [3, [1.1, 2.2, 3.3]])
128 | 
129 |     def test_Record(self):
130 |         x = Record({"x": Primitive("i8"), "y": Primitive("f8")})({"object-Fx-Di8": [3], "object-Fy-Df8": [3.14]})
131 |         self.assertEqual(x.x, 3)
132 |         self.assertEqual(x.y, 3.14)
133 | 
134 |         x = List(Record({"x": Primitive("i8"), "y": Primitive("f8")}))({"object-B": [0], "object-E": [3], "object-L-Fx-Di8": [1, 2, 3], "object-L-Fy-Df8": [1.1, 2.2, 3.3]})
135 |         self.assertEqual(x[0].x, 1)
136 |         self.assertEqual(x[1].x, 2)
137 |         self.assertEqual(x[2].x, 3)
138 |         self.assertEqual(x[0].y, 1.1)
139 |         self.assertEqual(x[1].y, 2.2)
140 |         self.assertEqual(x[2].y, 3.3)
141 | 
142 |         x = List(Record({"x": Primitive("i8"), "y": Primitive("f8", nullable=True)}))({"object-B": [0], "object-E": [3], "object-L-Fx-Di8": [1, 2, 3], "object-L-Fy-Df8": [2.2], "object-L-Fy-M": [-1, 0, -1]})
143 |         self.assertEqual(x[0].x, 1)
144 |         self.assertEqual(x[1].x, 2)
145 |         self.assertEqual(x[2].x, 3)
146 |         self.assertEqual(x[0].y, None)
147 |         self.assertEqual(x[1].y, 2.2)
148 |         self.assertEqual(x[2].y, None)
149 | 
150 |         x = List(Record({"x": Primitive("i8"), "y": Primitive("f8")}, nullable=True))({"object-B": [0], "object-E": [3], "object-L-M": [0, -1, 1], "object-L-Fx-Di8": [1, 3], "object-L-Fy-Df8": [1.1, 3.3]})
151 |         self.assertEqual(x[0].x, 1)
152 |         self.assertEqual(x[1], None)
153 |         self.assertEqual(x[2].x, 3)
154 |         self.assertEqual(x[0].y, 1.1)
155 |         self.assertEqual(x[1], None)
156 |         self.assertEqual(x[2].y, 3.3)
157 | 
158 |         x = Record({"x": Primitive("i8"), "y": List(Primitive("f8"))})({"object-Fx-Di8": [3], "object-Fy-B": [0], "object-Fy-E": [3], "object-Fy-L-Df8": [1.1, 2.2, 3.3]})
159 |         self.assertEqual(x.x, 3)
160 |         self.assertEqual(x.y, [1.1, 2.2, 3.3])
161 | 
162 |         x = Record({"x": Primitive("i8"), "y": Union([Primitive("i8"), Primitive("f8")])})({"object-Fx-Di8": [3], "object-Fy-T": [0], "object-Fy-O": [0], "object-Fy-U0-Di8": [1], "object-Fy-U1-Df8": [1.1]})
163 |         self.assertEqual(x.x, 3)
164 |         self.assertEqual(x.y, 1)
165 | 
166 |         x = Record({"x": Primitive("i8"), "y": List(Union([Primitive("i8"), Primitive("f8")]))})({"object-Fx-Di8": [3], "object-Fy-B": [0], "object-Fy-E": [3], "object-Fy-L-T": [0, 1, 1], "object-Fy-L-O": [0, 0, 1], "object-Fy-L-U0-Di8": [1], "object-Fy-L-U1-Df8": [1.1, 2.2]})
167 |         self.assertEqual(x.x, 3)
168 |         self.assertEqual(x.y, [1, 1.1, 2.2])
169 | 
170 |         x = List(Union([Primitive("i8"), Record({"x": Primitive("i8"), "y": Primitive("f8")})]))({"object-B": [0], "object-E": [4], "object-L-T": [0, 1, 1, 0], "object-L-O": [0, 0, 1, 1], "object-L-U0-Di8": [99, 98], "object-L-U1-Fx-Di8": [1, 2], "object-L-U1-Fy-Df8": [1.1, 2.2]})
171 |         self.assertEqual(x[0], 99)
172 |         self.assertEqual(x[1].x, 1)
173 |         self.assertEqual(x[1].y, 1.1)
174 |         self.assertEqual(x[2].x, 2)
175 |         self.assertEqual(x[2].y, 2.2)
176 |         self.assertEqual(x[3], 98)
177 | 
178 |     def test_Tuple(self):
179 |         x = Tuple((Primitive("i8"), Primitive("f8")))({"object-F0-Di8": [3], "object-F1-Df8": [3.14]})
180 |         self.assertEqual(x[0], 3)
181 |         self.assertEqual(x[1], 3.14)
182 | 
183 |         x = List(Tuple((Primitive("i8"), Primitive("f8"))))({"object-B": [0], "object-E": [3], "object-L-F0-Di8": [1, 2, 3], "object-L-F1-Df8": [1.1, 2.2, 3.3]})
184 |         self.assertEqual(x[0][0], 1)
185 |         self.assertEqual(x[1][0], 2)
186 |         self.assertEqual(x[2][0], 3)
187 |         self.assertEqual(x[0][1], 1.1)
188 |         self.assertEqual(x[1][1], 2.2)
189 |         self.assertEqual(x[2][1], 3.3)
190 | 
191 |         x = List(Tuple((Primitive("i8"), Primitive("f8", nullable=True))))({"object-B": [0], "object-E": [3], "object-L-F0-Di8": [1, 2, 3], "object-L-F1-Df8": [2.2], "object-L-F1-M": [-1, 0, -1]})
192 |         self.assertEqual(x[0][0], 1)
193 |         self.assertEqual(x[1][0], 2)
194 |         self.assertEqual(x[2][0], 3)
195 |         self.assertEqual(x[0][1], None)
196 |         self.assertEqual(x[1][1], 2.2)
197 |         self.assertEqual(x[2][1], None)
198 | 
199 |         x = List(Tuple((Primitive("i8"), Primitive("f8")), nullable=True))({"object-B": [0], "object-E": [3], "object-L-M": [0, -1, 1], "object-L-F0-Di8": [1, 3], "object-L-F1-Df8": [1.1, 3.3]})
200 |         self.assertEqual(x[0][0], 1)
201 |         self.assertEqual(x[1], None)
202 |         self.assertEqual(x[2][0], 3)
203 |         self.assertEqual(x[0][1], 1.1)
204 |         self.assertEqual(x[1], None)
205 |         self.assertEqual(x[2][1], 3.3)
206 | 
207 |         x = Tuple((Primitive("i8"), List(Primitive("f8"))))({"object-F0-Di8": [3], "object-F1-B": [0], "object-F1-E": [3], "object-F1-L-Df8": [1.1, 2.2, 3.3]})
208 |         self.assertEqual(x[0], 3)
209 |         self.assertEqual(x[1], [1.1, 2.2, 3.3])
210 | 
211 |         x = Tuple((Primitive("i8"), Union([Primitive("i8"), Primitive("f8")])))({"object-F0-Di8": [3], "object-F1-T": [0], "object-F1-O": [0], "object-F1-U0-Di8": [1], "object-F1-U1-Df8": [1.1]})
212 |         self.assertEqual(x[0], 3)
213 |         self.assertEqual(x[1], 1)
214 | 
215 |         x = Tuple((Primitive("i8"), List(Union([Primitive("i8"), Primitive("f8")]))))({"object-F0-Di8": [3], "object-F1-B": [0], "object-F1-E": [3], "object-F1-L-T": [0, 1, 1], "object-F1-L-O": [0, 0, 1], "object-F1-L-U0-Di8": [1], "object-F1-L-U1-Df8": [1.1, 2.2]})
216 |         self.assertEqual(x[0], 3)
217 |         self.assertEqual(x[1], [1, 1.1, 2.2])
218 | 
219 |         x = List(Union([Primitive("i8"), Tuple((Primitive("i8"), Primitive("f8")))]))({"object-B": [0], "object-E": [4], "object-L-T": [0, 1, 1, 0], "object-L-O": [0, 0, 1, 1], "object-L-U0-Di8": [99, 98], "object-L-U1-F0-Di8": [1, 2], "object-L-U1-F1-Df8": [1.1, 2.2]})
220 |         self.assertEqual(x[0], 99)
221 |         self.assertEqual(x[1][0], 1)
222 |         self.assertEqual(x[1][1], 1.1)
223 |         self.assertEqual(x[2][0], 2)
224 |         self.assertEqual(x[2][1], 2.2)
225 |         self.assertEqual(x[3], 98)
226 | 
227 |     def test_Pointer(self):
228 |         self.assertEqual(Pointer(Primitive("f8"))({"object-P": [3], "object-X-Df8": [0.0, 1.1, 2.2, 3.3, 4.4]}), 3.3)
229 | 
230 |         tree = Pointer(None)
231 |         tree.target = List(tree)
232 | 
233 |         self.assertEqual(tree({"object-P": [0], "object-X-B": [0], "object-X-E": [0], "object-X-L-P-object-X-Df8": []}), [])
234 | 
235 |         self.assertEqual(repr(tree({"object-P": [0], "object-X-B": [0], "object-X-E": [1], "object-X-L-P-object-X": [0]})), "[[...]]")
236 | 
237 |         self.assertEqual(tree({"object-P": [0, 1], "object-X-B": [0, 1], "object-X-E": [1, 1], "object-X-L-P-object-X": [1]}), [[]])
238 |         self.assertEqual(tree({"object-P": [0, 1], "object-X-B": [0, 2], "object-X-E": [2, 2], "object-X-L-P-object-X": [1, 1]}), [[], []])
239 | 
240 |         linkedlist = Record({"label": Primitive("i8")})
241 |         linkedlist["next"] = Pointer(linkedlist)
242 | 
243 |         x = linkedlist({"object-Flabel-Di8": [0, 1, 2], "object-Fnext-P-object": [1, 2, 0]})
244 |         self.assertEqual(x.label, 0)
245 |         self.assertEqual(x.next.label, 1)
246 |         self.assertEqual(x.next.next.label, 2)
247 |         self.assertEqual(x.next.next.next.label, 0)
248 | 
249 |         linkedlist = Record({"label": Primitive("i8")})
250 |         linkedlist["next"] = Pointer(linkedlist, nullable=True)
251 | 
252 |         x = linkedlist({"object-Flabel-Di8": [0, 1, 2], "object-Fnext-P-object": [1, 2], "object-Fnext-M": [0, 1, -1]})
253 |         self.assertEqual(x.label, 0)
254 |         self.assertEqual(x.next.label, 1)
255 |         self.assertEqual(x.next.next.label, 2)
256 |         self.assertEqual(x.next.next.next, None)
257 | 


--------------------------------------------------------------------------------
/oamap/proxy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import bisect
 32 | import json
 33 | import numbers
 34 | import sys
 35 | import math
 36 | 
 37 | import numpy
 38 | 
 39 | import oamap.util
 40 | 
 41 | if sys.version_info[0] > 2:
 42 |     xrange = range
 43 | 
 44 | # base class of all runtime types that require proxies: List, Record, and Tuple
 45 | class Proxy(object): pass
 46 | 
 47 | def tojson(value):
 48 |     if isinstance(value, ListProxy):
 49 |         return [tojson(x) for x in value]
 50 |     elif isinstance(value, RecordProxy):
 51 |         return dict((n, tojson(getattr(value, n))) for n in value._fields)
 52 |     elif isinstance(value, TupleProxy):
 53 |         return [tojson(x) for x in value]
 54 |     elif isinstance(value, (numbers.Integral, numpy.integer)):
 55 |         return int(value)
 56 |     elif isinstance(value, (numbers.Real, numpy.floating)):
 57 |         if math.isnan(value):
 58 |             return "nan"
 59 |         elif value == float("-inf"):
 60 |             return "-inf"
 61 |         elif value == float("inf"):
 62 |             return "inf"
 63 |         else:
 64 |             return float(value)
 65 |     elif isinstance(value, (numbers.Complex, numpy.complex)):
 66 |         return {"real": tojson(value.real), "imag": tojson(value.imag)}
 67 |     elif isinstance(value, numpy.ndarray):
 68 |         return value.tolist()
 69 |     else:
 70 |         return value
 71 | 
 72 | def tojsonstring(value, *args, **kwds):
 73 |     return json.dumps(tojson(value), *args, **kwds)
 74 | 
 75 | def tojsonfile(file, value, *args, **kwds):
 76 |     json.dump(file, tojson(value), *args, **kwds)
 77 | 
 78 | ################################################################ Lists
 79 | 
 80 | class ListProxy(Proxy):
 81 |     __slots__ = ["_generator", "_arrays", "_cache", "_whence", "_stride", "_length"]
 82 | 
 83 |     def __init__(self, generator, arrays, cache, whence, stride, length):
 84 |         assert stride != 0
 85 |         assert length >= 0
 86 |         self._generator = generator
 87 |         self._arrays = arrays
 88 |         self._cache = cache
 89 |         self._whence = whence
 90 |         self._stride = stride
 91 |         self._length = length
 92 | 
 93 |     def __repr__(self, memo=None):
 94 |         if memo is None:
 95 |             memo = set()
 96 |         key = (id(self._generator), self._whence, self._stride, self._length)
 97 |         if key in memo:
 98 |             return "[...]"
 99 |         memo = memo.union(set([key]))
100 |         if len(self) > 10:
101 |             before = self[:5]
102 |             after = self[-5:]
103 |             return "[{0}, ..., {1}]".format(", ".join(x.__repr__(memo) if isinstance(x, (ListProxy, TupleProxy)) else repr(x) for x in before),
104 |                                             ", ".join(x.__repr__(memo) if isinstance(x, (ListProxy, TupleProxy)) else repr(x) for x in after))
105 |         else:
106 |             return "[{0}]".format(", ".join(x.__repr__(memo) if isinstance(x, (ListProxy, TupleProxy)) else repr(x) for x in self))
107 | 
108 |     def __str__(self):
109 |         return repr(self)
110 | 
111 |     def __getattr__(self, field):
112 |         if field in self.__dict__:
113 |             return self.__dict__[field]
114 |         else:
115 |             import oamap.operations
116 |             for n, x in reversed(list(oamap.operations.actions.items()) + list(oamap.operations.transformations.items()) + list(oamap.operations.recastings.items())):
117 |                 if field == n:
118 |                     return lambda *args, **kwargs: x(self, *args, **kwargs)
119 |             raise AttributeError("ListProxy has no attribute {0}".format(repr(field)))
120 | 
121 |     @property
122 |     def schema(self):
123 |         return self._generator.schema
124 | 
125 |     @property
126 |     def fields(self):
127 |         generator = self._generator
128 |         while isinstance(generator, oamap.generator.ListGenerator):
129 |             generator = generator.content
130 |         if isinstance(generator, oamap.generator.RecordGenerator):
131 |             return list(generator.fields)
132 |         else:
133 |             raise TypeError("list does not contain records")
134 | 
135 |     def indexed(self):
136 |         return self
137 | 
138 |     def __len__(self):
139 |         return self._length
140 | 
141 |     def __getslice__(self, start, stop):
142 |         return self.__getitem__(slice(start, stop))
143 | 
144 |     def __getitem__(self, index):
145 |         if isinstance(index, slice):
146 |             start, stop, step = oamap.util.slice2sss(index, self._length)
147 | 
148 |             whence = self._whence + self._stride*start
149 |             stride = self._stride*step
150 | 
151 |             # length = int(math.ceil(float(abs(stop - start)) / abs(step)))
152 |             d, m = divmod(abs(start - stop), abs(step))
153 |             length = d + (1 if m != 0 else 0)
154 | 
155 |             return ListProxy(self._generator, self._arrays, self._cache, whence, stride, length)
156 | 
157 |         else:
158 |             normalindex = index if index >= 0 else index + self._length
159 |             if not 0 <= normalindex < self._length:
160 |                 raise IndexError("index {0} is out of bounds for size {1}".format(index, self._length))
161 |             return self._generator.content._generate(self._arrays, self._whence + self._stride*normalindex, self._cache)
162 | 
163 |     def __iter__(self):
164 |         return (self._generator.content._generate(self._arrays, i, self._cache) for i in xrange(self._whence, self._whence + self._stride*self._length, self._stride))
165 | 
166 |     def __hash__(self):
167 |         # lists aren't usually hashable, but since ListProxy is immutable, we can add this feature
168 |         return hash((ListProxy,) + tuple(self))
169 | 
170 |     def __eq__(self, other):
171 |         if isinstance(other, ListProxy):
172 |             return list(self) == list(other)
173 |         elif isinstance(other, list):
174 |             return list(self) == other
175 |         else:
176 |             return False
177 | 
178 |     def __lt__(self, other):
179 |         if isinstance(other, ListProxy):
180 |             return list(self) < list(other)
181 |         elif isinstance(other, list):
182 |             return list(self) < other
183 |         else:
184 |             raise TypeError("unorderable types: list() < {1}()".format(other.__class__))
185 | 
186 |     # all of the following emulate normal list functionality using the overloaded methods above
187 | 
188 |     def __ne__(self, other): return not self.__eq__(other)
189 |     def __le__(self, other): return self.__lt__(other) or self.__eq__(other)
190 |     def __gt__(self, other): return not self.__lt__(other) and not self.__eq__(other)
191 |     def __ge__(self, other): return not self.__lt__(other)
192 | 
193 |     def __add__(self, other): return list(self) + list(other)
194 |     def __mul__(self, reps): return list(self) * reps
195 |     def __rmul__(self, reps): return reps * list(self)
196 |     def __reversed__(self):
197 |         if sys.version_info[0] <= 2:
198 |             return (self[i - 1] for i in xrange(len(self), 0, -1))
199 |         else:
200 |             return (self[i - 1] for i in range(len(self), 0, -1))
201 |     def count(self, value): return sum(1 for x in self if x == value)
202 |     def index(self, value, *args):
203 |         if len(args) == 0:
204 |             start = 0
205 |             stop = len(self)
206 |         elif len(args) == 1:
207 |             start = args[0]
208 |             stop = len(self)
209 |         elif len(args) == 2:
210 |             start, stop = args
211 |         else:
212 |             raise TypeError("index() takes at most 3 arguments ({0} given)".format(1 + len(args)))
213 |         for i, x in enumerate(self):
214 |             if x == value:
215 |                 return i
216 |         raise ValueError("{0} is not in list".format(value))
217 | 
218 |     def __contains__(self, value):
219 |         for x in self:
220 |             if x == value:
221 |                 return True
222 |         return False
223 | 
224 | ################################################################ Records
225 | 
226 | class RecordProxy(Proxy):
227 |     __slots__ = ["_generator", "_arrays", "_cache", "_index"]
228 | 
229 |     def __init__(self, generator, arrays, cache, index):
230 |         self._generator = generator
231 |         self._arrays = arrays
232 |         self._cache = cache
233 |         self._index = index
234 | 
235 |     def __repr__(self):
236 |         return "<{0} at index {1}>".format("Record" if self._generator.name is None else self._generator.name, self._index)
237 | 
238 |     def __str__(self):
239 |         return repr(self)
240 | 
241 |     @property
242 |     def _fields(self):
243 |         return list(self._generator.fields)
244 | 
245 |     def __dir__(self):
246 |         return dir(super(RecordProxy, self)) + list(str(x) for x in self._fields)
247 | 
248 |     def __getattr__(self, field):
249 |         try:
250 |             # actual field names get priority (they're not allowed to start with underscore)
251 |             generator = self._generator.fields[field]
252 |         except KeyError:
253 |             # barring any conflicts with actual field names, "schema" and "fields" are convenient
254 |             if field == "schema":
255 |                 return self._generator.schema
256 |             elif field == "fields":
257 |                 return self._fields
258 |             elif field == "name":
259 |                 return self._generator.name
260 |             else:
261 |                 import oamap.operations
262 |                 for n, x in reversed(list(oamap.operations.actions.items()) + list(oamap.operations.transformations.items()) + list(oamap.operations.recastings.items())):
263 |                     if field == n:
264 |                         return lambda *args, **kwargs: x(self, *args, **kwargs)
265 |                 raise AttributeError("{0} object has no attribute {1}".format(repr("Record" if self._generator.name is None else self._generator.name), repr(field)))
266 |         else:
267 |             return generator._generate(self._arrays, self._index, self._cache)
268 | 
269 |     def __hash__(self):
270 |         return hash((RecordProxy, self._generator.name) + tuple(self._generator.fields.items()))
271 | 
272 |     def __eq__(self, other):
273 |         return isinstance(other, RecordProxy) and self._generator.name == other._generator.name and set(self._generator.fields) == set(other._generator.fields) and all(self.__getattr__(n) == other.__getattr__(n) for n in self._generator.fields)
274 | 
275 |     def __lt__(self, other):
276 |         if isinstance(other, RecordProxy) and self._generator.name == other._generator.name and set(self._generator.fields) == set(other._generator.fields):
277 |             return [self.__getattr__(n) for n in self._generator.fields] < [other.__getattr__(n) for n in self._generator.fields]
278 |         else:
279 |             raise TypeError("unorderable types: {0}() < {1}()".format("<type 'Record'>" if self._generator.name is None else "<type {0}>".format(repr(self._generator.name)), other.__class__))
280 | 
281 |     def __ne__(self, other): return not self.__eq__(other)
282 |     def __le__(self, other): return self.__lt__(other) or self.__eq__(other)
283 |     def __gt__(self, other): return not self.__lt__(other) and not self.__eq__(other)
284 |     def __ge__(self, other): return not self.__lt__(other)
285 | 
286 | ################################################################ Tuples
287 | 
288 | class TupleProxy(Proxy):
289 |     __slots__ = ["_generator", "_arrays", "_cache", "_index"]
290 | 
291 |     def __init__(self, generator, arrays, cache, index):
292 |         self._generator = generator
293 |         self._arrays = arrays
294 |         self._cache = cache
295 |         self._index = index
296 | 
297 |     def __repr__(self, memo=None):
298 |         if memo is None:
299 |             memo = set()
300 |         key = (self._index,) + tuple(id(x) for x in self._generator.types)
301 |         if key in memo:
302 |             return "(...)"
303 |         memo = memo.union(set([key]))
304 |         return "({0}{1})".format(", ".join(x.__repr__(memo) if isinstance(x, (ListProxy, TupleProxy)) else repr(x) for x in self), "," if len(self) == 1 else "")
305 | 
306 |     def __str__(self):
307 |         return repr(self)
308 | 
309 |     def __getattr__(self, field):
310 |         if field in self.__dict__:
311 |             return self.__dict__[field]
312 |         else:
313 |             import oamap.operations
314 |             for n, x in reversed(list(oamap.operations.actions.items()) + list(oamap.operations.transformations.items()) + list(oamap.operations.recastings.items())):
315 |                 if field == n:
316 |                     return lambda *args, **kwargs: x(self, *args, **kwargs)
317 |             raise AttributeError("TupleProxy has no attribute {0}".format(repr(field)))
318 | 
319 |     def __len__(self):
320 |         return len(self._generator.types)
321 | 
322 |     def __getslice__(self, start, stop):
323 |         # for old-Python compatibility
324 |         return self.__getitem__(slice(start, stop))
325 | 
326 |     def __getitem__(self, index):
327 |         if isinstance(index, slice):
328 |             lenself = len(self)
329 |             start = 0       if index.start is None else index.start
330 |             stop  = lenself if index.stop  is None else index.stop
331 |             step  = 1       if index.step  is None else index.step
332 |             return tuple(self[i] for i in range(start, stop, step))
333 | 
334 |         else:
335 |             return self._generator.types[index]._generate(self._arrays, self._index, self._cache)
336 | 
337 |     def __iter__(self):
338 |         return (t._generate(self._arrays, self._index, self._cache) for t in self._generator.types)
339 | 
340 |     def __hash__(self):
341 |         return hash(tuple(self))
342 | 
343 |     def __eq__(self, other):
344 |         if isinstance(other, TupleProxy):
345 |             return tuple(self) == tuple(other)
346 |         elif isinstance(other, tuple):
347 |             return tuple(self) == other
348 |         else:
349 |             return False
350 | 
351 |     def __lt__(self, other):
352 |         if isinstance(other, TupleProxy):
353 |             return tuple(self) < tuple(other)
354 |         elif isinstance(other, tuple):
355 |             return tuple(self) < other
356 |         else:
357 |             raise TypeError("unorderable types: tuple() < {1}()".format(other.__class__))
358 | 
359 |     # all of the following emulate normal tuple functionality using the overloaded methods above
360 | 
361 |     def __ne__(self, other): return not self.__eq__(other)
362 |     def __le__(self, other): return self.__lt__(other) or self.__eq__(other)
363 |     def __gt__(self, other): return not self.__lt__(other) and not self.__eq__(other)
364 |     def __ge__(self, other): return not self.__lt__(other)
365 | 
366 |     def __add__(self, other): return tuple(self) + tuple(other)
367 |     def __mul__(self, reps): return tuple(self) * reps
368 |     def __rmul__(self, reps): return reps * tuple(self)
369 |     def __reversed__(self):
370 |         return (self[i - 1] for i in range(len(self), 0, -1))
371 |     def count(self, value): return sum(1 for x in self if x == value)
372 |     def index(self, value, *args):
373 |         if len(args) == 0:
374 |             start = 0
375 |             stop = len(self)
376 |         elif len(args) == 1:
377 |             start = args[0]
378 |             stop = len(self)
379 |         elif len(args) == 2:
380 |             start, stop = args
381 |         else:
382 |             raise TypeError("index() takes at most 3 arguments ({0} given)".format(1 + len(args)))
383 |         for i, x in enumerate(self):
384 |             if x == value:
385 |                 return i
386 |         raise ValueError("{0} is not in list".format(value))
387 | 
388 |     def __contains__(self, value):
389 |         for x in self:
390 |             if x == value:
391 |                 return True
392 |         return False
393 | 


--------------------------------------------------------------------------------
/oamap/inference.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import re
 32 | import numbers
 33 | import sys
 34 | import math
 35 | 
 36 | import numpy
 37 | 
 38 | import oamap.schema
 39 | from oamap.util import OrderedDict
 40 | 
 41 | if sys.version_info[0] > 2:
 42 |     basestring = str
 43 | 
 44 | ################################################################ inferring schemas from data
 45 | 
 46 | def fromdata(obj, limit=None):
 47 |     if limit is None or (isinstance(limit, (numbers.Integral, numpy.integer)) and limit >= 0):
 48 |         pass
 49 |     else:
 50 |         raise TypeError("limit must be None or a non-negative integer, not {0}".format(limit))
 51 | 
 52 |     class Intermediate(object):
 53 |         def __init__(self, nullable):
 54 |             self.nullable = nullable
 55 | 
 56 |     class Unknown(Intermediate):
 57 |         def resolve(self):
 58 |             raise TypeError("could not resolve a type (e.g. all examples of a List-typed attribute are empty, can't determine its content type)")
 59 | 
 60 |     class Boolean(Intermediate):
 61 |         def resolve(self):
 62 |             return oamap.schema.Primitive(numpy.dtype(numpy.bool_), nullable=self.nullable)
 63 | 
 64 |     class Number(Intermediate):
 65 |         max_uint8 = numpy.iinfo(numpy.uint8).max
 66 |         max_uint16 = numpy.iinfo(numpy.uint16).max
 67 |         max_uint32 = numpy.iinfo(numpy.uint32).max
 68 |         max_uint64 = numpy.iinfo(numpy.uint64).max
 69 |         min_int8 = numpy.iinfo(numpy.int8).min
 70 |         max_int8 = numpy.iinfo(numpy.int8).max
 71 |         min_int16 = numpy.iinfo(numpy.int16).min
 72 |         max_int16 = numpy.iinfo(numpy.int16).max
 73 |         min_int32 = numpy.iinfo(numpy.int32).min
 74 |         max_int32 = numpy.iinfo(numpy.int32).max
 75 |         min_int64 = numpy.iinfo(numpy.int64).min
 76 |         max_int64 = numpy.iinfo(numpy.int64).max
 77 |         def __init__(self, nullable, min, max, whole, real):
 78 |             Intermediate.__init__(self, nullable)
 79 |             self.min = min
 80 |             self.max = max
 81 |             self.whole = whole
 82 |             self.real = real
 83 |         def resolve(self):
 84 |             if self.whole:
 85 |                 if self.min >= 0:
 86 |                     if self.max <= self.max_uint8:
 87 |                         t = numpy.uint8
 88 |                     elif self.max <= self.max_uint16:
 89 |                         t = numpy.uint16
 90 |                     elif self.max <= self.max_uint32:
 91 |                         t = numpy.uint32
 92 |                     elif self.max <= self.max_uint64:
 93 |                         t = numpy.uint64
 94 |                     else:
 95 |                         t = numpy.float64
 96 |                 else:
 97 |                     if self.min_int8 <= self.min and self.max <= self.max_int8:
 98 |                         t = numpy.int8
 99 |                     elif self.min_int16 <= self.min and self.max <= self.max_int16:
100 |                         t = numpy.int16
101 |                     elif self.min_int32 <= self.min and self.max <= self.max_int32:
102 |                         t = numpy.int32
103 |                     elif self.min_int64 <= self.min and self.max <= self.max_int64:
104 |                         t = numpy.int64
105 |                     else:
106 |                         t = numpy.float64
107 |             elif self.real:
108 |                 t = numpy.float64
109 |             else:
110 |                 t = numpy.complex128
111 |             return oamap.schema.Primitive(numpy.dtype(t), nullable=self.nullable)
112 | 
113 |     class String(Intermediate):
114 |         def __init__(self, nullable, utf8):
115 |             Intermediate.__init__(self, nullable)
116 |             self.utf8 = utf8
117 |         def resolve(self):
118 |             return oamap.schema.List(oamap.schema.Primitive(numpy.uint8), nullable=self.nullable, name=("UTF8String" if self.utf8 else "ByteString"))
119 | 
120 |     class IntermediateList(Intermediate):
121 |         def __init__(self, nullable, content):
122 |             Intermediate.__init__(self, nullable)
123 |             self.content = content
124 |         def resolve(self):
125 |             return oamap.schema.List(self.content.resolve(), nullable=self.nullable)
126 | 
127 |     class IntermediateRecord(Intermediate):
128 |         def __init__(self, nullable, fields, name):
129 |             Intermediate.__init__(self, nullable)
130 |             self.fields = fields
131 |             self.name = name
132 |         def resolve(self):
133 |             return oamap.schema.Record(dict((n, x.resolve()) for n, x in self.fields.items()), nullable=self.nullable, name=self.name)
134 | 
135 |     class IntermediateTuple(Intermediate):
136 |         def __init__(self, nullable, types):
137 |             Intermediate.__init__(self, nullable)
138 |             self.types = types
139 |         def resolve(self):
140 |             return oamap.schema.Tuple([x.resolve() for x in self.types], nullable=self.nullable)
141 | 
142 |     # Unions are special for type-inference
143 |     class IntermediateUnion(Intermediate):
144 |         def __init__(self, nullable, possibilities):
145 |             Intermediate.__init__(self, nullable)
146 |             self.possibilities = possibilities
147 |         def resolve(self):
148 |             return oamap.schema.Union([x.resolve() for x in self.possibilities], nullable=self.nullable)
149 | 
150 |     # no Pointers in type-inference (we'd have to keep a big map of *everything*!)
151 | 
152 |     def flatten(possibilities):
153 |         return [y for x in possibilities if isinstance(x, IntermediateUnion) for y in x.possibilities] + [x for x in possibilities if not isinstance(x, IntermediateUnion)]
154 | 
155 |     def unify2(x, y):
156 |         nullable = x.nullable or y.nullable
157 | 
158 |         if isinstance(x, Unknown) and isinstance(y, Unknown):
159 |             return Unknown(nullable)
160 | 
161 |         elif isinstance(x, Unknown):
162 |             y.nullable = nullable
163 |             return y
164 | 
165 |         elif isinstance(y, Unknown):
166 |             x.nullable = nullable
167 |             return x
168 | 
169 |         elif isinstance(x, Boolean) and isinstance(y, Boolean):
170 |             return Boolean(nullable)
171 | 
172 |         elif isinstance(x, Number) and isinstance(y, Number):
173 |             return Number(nullable, min(x.min, y.min), max(x.max, y.max), x.whole and y.whole, x.real and y.real)
174 | 
175 |         elif isinstance(x, String) and isinstance(y, String):
176 |             return String(nullable, x.utf8 or y.utf8)
177 | 
178 |         elif isinstance(x, IntermediateList) and isinstance(y, IntermediateList):
179 |             return IntermediateList(nullable, unify2(x.content, y.content))
180 | 
181 |         elif isinstance(x, IntermediateRecord) and isinstance(y, IntermediateRecord) and set(x.fields) == set(y.fields) and (x.name is None or y.name is None or x.name == y.name):
182 |             return IntermediateRecord(nullable, dict((n, unify2(x.fields[n], y.fields[n])) for n in x.fields), name=(y.name if x.name is None else x.name))
183 | 
184 |         elif isinstance(x, IntermediateTuple) and isinstance(y, IntermediateTuple) and len(x.types) == len(y.types):
185 |             return IntermediateTuple(nullable, [unify2(xi, yi) for xi, yi in zip(x.types, y.types)])
186 | 
187 |         elif isinstance(x, IntermediateUnion) and isinstance(y, IntermediateUnion):
188 |             return unify(x.possibilities + y.possibilities)
189 | 
190 |         elif isinstance(x, IntermediateUnion):
191 |             return unify(x.possibilities + [y])
192 | 
193 |         elif isinstance(y, IntermediateUnion):
194 |             return unify([x] + y.possibilities)
195 | 
196 |         else:
197 |             # can't be unified
198 |             return IntermediateUnion(nullable, flatten([x, y]))
199 | 
200 |     def unify(possibilities):
201 |         if len(possibilities) == 0:
202 |             return Unknown(False)
203 | 
204 |         elif len(possibilities) == 1:
205 |             return possibilities[0]
206 | 
207 |         elif len(possibilities) == 2:
208 |             return unify2(possibilities[0], possibilities[1])
209 | 
210 |         else:
211 |             distinct = []
212 |             for x in flatten(possibilities):
213 |                 found = False
214 | 
215 |                 for i, y in enumerate(distinct):
216 |                     merged = unify2(x, y)
217 |                     if not isinstance(merged, IntermediateUnion):
218 |                         distinct[i] = merged
219 |                         found = True
220 |                         break
221 | 
222 |                 if not found:
223 |                     distinct.append(x)
224 | 
225 |             if len(distinct) == 1:
226 |                 return distinct[0]
227 |             else:
228 |                 return IntermediateUnion(False, flatten(distinct))
229 | 
230 |     def buildintermediate(obj, limit, memo):
231 |         if id(obj) in memo:
232 |             raise ValueError("cyclic reference in Python object at {0} (Pointer types cannot be inferred)".format(obj))
233 | 
234 |         # by copying, rather than modifying in-place (memo.add), we find cyclic references, rather than DAGs
235 |         memo = memo.union(set([id(obj)]))
236 | 
237 |         if obj is None:
238 |             return Unknown(True)
239 | 
240 |         elif obj is False or obj is True:
241 |             return Boolean(False)
242 | 
243 |         elif isinstance(obj, (numbers.Integral, numpy.integer)):
244 |             return Number(False, int(obj), int(obj), True, True)
245 | 
246 |         elif isinstance(obj, (numbers.Real, numpy.floating)):
247 |             return Number(False, float(obj), float(obj), False, True)
248 | 
249 |         elif isinstance(obj, (numbers.Complex, numpy.complex)):
250 |             return Number(False, float("-inf"), float("inf"), False, False)
251 | 
252 |         elif isinstance(obj, bytes):
253 |             return String(False, False)
254 | 
255 |         elif isinstance(obj, basestring):
256 |             return String(False, True)
257 | 
258 |         elif isinstance(obj, dict):
259 |             return IntermediateRecord(False, dict((n, buildintermediate(x, limit, memo)) for n, x in obj.items()), None)
260 | 
261 |         elif isinstance(obj, tuple) and hasattr(obj, "_fields"):
262 |             # this is a namedtuple; interpret it as a Record, rather than a Tuple
263 |             return IntermediateRecord(False, dict((n, buildintermediate(getattr(obj, n), limit, memo)) for n in obj._fields), obj.__class__.__name__)
264 | 
265 |         elif isinstance(obj, tuple):
266 |             return IntermediateTuple(False, [buildintermediate(x, limit, memo) for x in obj])
267 | 
268 |         else:
269 |             try:
270 |                 limited = []
271 |                 for x in obj:
272 |                     if limit is None or len(limited) < limit:
273 |                         limited.append(x)
274 |                     else:
275 |                         break
276 |             except TypeError:
277 |                 # not iterable, so interpret it as a Record
278 |                 return IntermediateRecord(False, dict((n, buildintermediate(getattr(obj, n), limit, memo)) for n in dir(obj) if not n.startswith("_") and not callable(getattr(obj, n))), obj.__class__.__name__)
279 |             else:
280 |                 # iterable, so interpret it as a List
281 |                 return IntermediateList(False, unify([buildintermediate(x, None, memo) for x in obj]))
282 | 
283 |     return buildintermediate(obj, limit, set()).resolve()
284 | 
285 | ################################################################ inferring schemas from a namespace
286 | 
287 | def fromnames(arraynames, prefix="object", delimiter="-"):
288 |     def filter(arraynames, prefix):
289 |         return [x for x in arraynames if x.startswith(prefix)]
290 |     
291 |     def recurse(arraynames, prefix, byname, internalpointers):
292 |         prefixdelimiter = prefix + delimiter
293 |         name = None
294 |         for n in arraynames:
295 |             if n.startswith(prefixdelimiter):
296 |                 if n[len(prefixdelimiter)] == "N":
297 |                     match = oamap.schema.Schema._identifier.match(n[len(prefixdelimiter) + 1:])
298 |                     if match is not None:
299 |                         name = match.group(0)
300 |                         break
301 | 
302 |         if name is not None:
303 |             prefix = prefixdelimiter + "N" + name
304 |             prefixdelimiter = prefix + delimiter
305 |             
306 |         mask      = prefixdelimiter + "M"
307 |         starts    = prefixdelimiter + "B"
308 |         stops     = prefixdelimiter + "E"
309 |         content   = prefixdelimiter + "L"
310 |         tags      = prefixdelimiter + "T"
311 |         offsets   = prefixdelimiter + "O"
312 |         uniondata = prefixdelimiter + "U"
313 |         field     = prefixdelimiter + "F"
314 |         positions = prefixdelimiter + "P"
315 |         external  = prefixdelimiter + "X"
316 |         primitive = prefixdelimiter + "D"
317 | 
318 |         nullable = mask in arraynames
319 |         if not nullable:
320 |             mask = None
321 | 
322 |         if starts in arraynames and stops in arraynames:
323 |             byname[prefix] = None
324 |             byname[prefix] = oamap.schema.List(recurse(filter(arraynames, content), content, byname, internalpointers), nullable=nullable, starts=None, stops=None, mask=None, name=name, doc=None)
325 | 
326 |         elif tags in arraynames:
327 |             possibilities = []
328 |             while True:
329 |                 possibility = uniondata + repr(len(possibilities))
330 |                 if any(x.startswith(possibility) for x in arraynames):
331 |                     possibilities.append(possibility)
332 |                 else:
333 |                     break
334 |             byname[prefix] = None
335 |             byname[prefix] = oamap.schema.Union([recurse(filter(arraynames, x), x, byname, internalpointers) for x in possibilities], nullable=nullable, tags=None, offsets=None, mask=None, name=name, doc=None)
336 | 
337 |         elif any(x.startswith(field) for x in arraynames):
338 |             pattern = re.compile("^" + field + "(" + oamap.schema.Schema._identifier.pattern + ")")
339 |             fields = {}
340 |             for x in arraynames:
341 |                 matches = pattern.match(x)
342 |                 if matches is not None:
343 |                     if matches.group(1) not in fields:
344 |                         fields[matches.group(1)] = []
345 |                     fields[matches.group(1)].append(x)
346 | 
347 |             types = []
348 |             while True:
349 |                 tpe = field + repr(len(types))
350 |                 if any(x.startswith(tpe) for x in arraynames):
351 |                     types.append(tpe)
352 |                 else:
353 |                     break
354 | 
355 |             if len(fields) >= 0 and len(types) == 0:
356 |                 byname[prefix] = oamap.schema.Record(oamap.schema.OrderedDict([(n, recurse(fields[n], field + n, byname, internalpointers)) for n in sorted(fields)]), nullable=nullable, mask=None, name=name, doc=None)
357 |             elif len(fields) == 0 and len(types) > 0:
358 |                 byname[prefix] = oamap.schema.Tuple([recurse(filter(arraynames, n), n, byname, internalpointers) for n in types], nullable=nullable, mask=None, name=name, doc=None)
359 |             else:
360 |                 raise KeyError("ambiguous set of array names: may be Record or Tuple at {0}".format(repr(prefix)))
361 | 
362 |         elif any(x.startswith(positions) for x in arraynames):
363 |             if positions in arraynames:
364 |                 # external
365 |                 byname2 = {}
366 |                 internalpointers2 = []
367 |                 target = finalize(recurse(filter(arraynames, external), external, byname2, internalpointers2), byname2, internalpointers2)
368 |                 byname[prefix] = oamap.schema.Pointer(target, nullable=nullable, positions=None, mask=None, name=name, doc=None)
369 | 
370 |             else:
371 |                 # internal
372 |                 matches = [x[len(positions) + 1:] for x in arraynames if x.startswith(positions)]
373 |                 if len(matches) != 1:
374 |                     raise KeyError("ambiguous set of array names: more than one internal Pointer at {0}".format(repr(prefix)))
375 |                 target = None   # placeholder! see finalize
376 |                 byname[prefix] = oamap.schema.Pointer(target, nullable=nullable, positions=None, mask=None, name=name, doc=None)
377 |                 internalpointers.append((byname[prefix], matches[0]))
378 | 
379 |         elif any(x.startswith(primitive) for x in arraynames):
380 |             matches = [x[len(primitive) - 1:] for x in arraynames if x.startswith(primitive)]
381 |             if len(matches) != 1:
382 |                 raise KeyError("ambiguous set of array names: more than one Primitive at {0}".format(repr(prefix)))
383 |             dtype = oamap.schema.Primitive._str2dtype(matches[0], delimiter)
384 |             byname[prefix] = oamap.schema.Primitive(dtype, nullable=nullable, data=None, mask=None, name=name, doc=None)
385 | 
386 |         else:
387 |             raise KeyError("missing array names: nothing found as {0} contents".format(repr(prefix)))
388 | 
389 |         return byname[prefix]
390 | 
391 |     def finalize(out, byname, internalpointers):
392 |         for pointer, targetname in internalpointers:
393 |             if targetname in byname:
394 |                 pointer.target = byname[targetname]
395 |             else:
396 |                 raise KeyError("Pointer's internal target is {0}, but there is no object with that prefix".format(repr(targetname)))
397 |         return out
398 | 
399 |     byname = {}
400 |     internalpointers = []
401 |     return finalize(recurse(filter(arraynames, prefix), prefix, byname, internalpointers), byname, internalpointers)
402 | 


--------------------------------------------------------------------------------
/oamap/fill.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import re
 32 | from functools import reduce
 33 | 
 34 | import oamap.generator
 35 | import oamap.inference
 36 | import oamap.fillable
 37 | 
 38 | def toarrays(fillables):
 39 |     return dict((n, x[:]) for n, x in fillables.items())
 40 | 
 41 | ################################################################ Python data, possibly made by json.load
 42 | 
 43 | def _fromdata_initialize(gen, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs):
 44 |     if isinstance(gen, oamap.generator.PrimitiveGenerator):
 45 |         fillables[gen.data].revert()
 46 |         forefront = len(fillables[gen.data])
 47 |         fillables_leaf_to_root.append(fillables[gen.data])
 48 | 
 49 |     elif isinstance(gen, oamap.generator.ListGenerator):
 50 |         _fromdata_initialize(gen.content, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
 51 |         fillables[gen.starts].revert()
 52 |         fillables[gen.stops].revert()
 53 |         assert len(fillables[gen.starts]) == len(fillables[gen.stops])
 54 |         forefront = len(fillables[gen.stops])
 55 |         fillables_leaf_to_root.append(fillables[gen.starts])
 56 |         fillables_leaf_to_root.append(fillables[gen.stops])
 57 | 
 58 |     elif isinstance(gen, oamap.generator.UnionGenerator):
 59 |         for x in gen.possibilities:
 60 |             _fromdata_initialize(x, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
 61 |         fillables[gen.tags].revert()
 62 |         fillables[gen.offsets].revert()
 63 |         assert len(fillables[gen.tags]) == len(fillables[gen.offsets])
 64 |         forefront = len(fillables[gen.tags])
 65 |         fillables_leaf_to_root.append(fillables[gen.tags])
 66 |         fillables_leaf_to_root.append(fillables[gen.offsets])
 67 | 
 68 |     elif isinstance(gen, oamap.generator.RecordGenerator):
 69 |         uniques = set(_fromdata_initialize(x, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs) for x in gen.fields.values())
 70 |         assert len(uniques) == 1
 71 |         forefront = list(uniques)[0]
 72 | 
 73 |     elif isinstance(gen, oamap.generator.TupleGenerator):
 74 |         uniques = set(_fromdata_initialize(x, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs) for x in gen.types)
 75 |         assert len(uniques) == 1
 76 |         forefront = list(uniques)[0]
 77 | 
 78 |     elif isinstance(gen, oamap.generator.PointerGenerator):
 79 |         if gen._internal and gen.target is generator and len(fillables[gen.positions]) != 0:
 80 |             raise TypeError("the root of a Schema may be the target of a Pointer, but if so, it can only be filled from data once")
 81 | 
 82 |         if gen not in pointers:
 83 |             pointers.append(gen)
 84 |         pointerobjs_keys.append(id(gen))
 85 |         targetids_keys.append(id(gen.target))
 86 | 
 87 |         if not gen._internal:
 88 |             _fromdata_initialize(gen.target, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
 89 |         fillables[gen.positions].revert()
 90 |         forefront = len(fillables[gen.positions])
 91 |         fillables_leaf_to_root.append(fillables[gen.positions])
 92 |         positions_to_pointerobjs[gen.positions] = id(gen)
 93 | 
 94 |     elif isinstance(gen, oamap.generator.ExtendedGenerator):
 95 |         forefront = _fromdata_initialize(gen.generic, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
 96 | 
 97 |     else:
 98 |         raise TypeError("unrecognized generator: {0}".format(repr(gen)))
 99 | 
100 |     if isinstance(gen, oamap.generator.Masked):
101 |         fillables[gen.mask].revert()
102 |         # mask forefront overrides any other arrays
103 |         forefront = len(fillables[gen.mask])
104 |         fillables_leaf_to_root.append(fillables[gen.mask])
105 | 
106 |     return forefront
107 | 
108 | def _fromdata_forefront(gen, fillables, pointerobjs, secondary=False):
109 |     if not secondary and isinstance(gen, oamap.generator.Masked):
110 |         # mask forefront overrides any other arrays
111 |         return fillables[gen.mask].forefront()
112 | 
113 |     elif isinstance(gen, oamap.generator.PrimitiveGenerator):
114 |         return fillables[gen.data].forefront()
115 | 
116 |     elif isinstance(gen, oamap.generator.ListGenerator):
117 |         return fillables[gen.stops].forefront()
118 | 
119 |     elif isinstance(gen, oamap.generator.UnionGenerator):
120 |         return fillables[gen.tags].forefront()
121 | 
122 |     elif isinstance(gen, oamap.generator.RecordGenerator):
123 |         for x in gen.fields.values():
124 |             return _fromdata_forefront(x, fillables, pointerobjs)
125 | 
126 |     elif isinstance(gen, oamap.generator.TupleGenerator):
127 |         for x in gen.types:
128 |             return _fromdata_forefront(x, fillables, pointerobjs)
129 | 
130 |     elif isinstance(gen, oamap.generator.PointerGenerator):
131 |         return len(pointerobjs[id(gen)])
132 | 
133 |     elif isinstance(gen, oamap.generator.ExtendedGenerator):
134 |         return _fromdata_forefront(gen.generic, fillables, pointerobjs)
135 | 
136 | def _fromdata_unionnullable(union):
137 |     for possibility in union.possibilities:
138 |         if isinstance(possibility, oamap.generator.Masked):
139 |             return True
140 |         elif isinstance(possibility, oamap.generator.UnionGenerator):
141 |             return _fromdata_unionnullable(possibility)
142 |     return False
143 | 
144 | def _fromdata_fill(obj, gen, fillables, targetids, pointerobjs, at, pointerat):
145 |     if id(gen) in targetids:
146 |         targetids[id(gen)][id(obj)] = (_fromdata_forefront(gen, fillables, pointerobjs), obj)
147 | 
148 |     if obj is None:
149 |         if isinstance(gen, oamap.generator.Masked):
150 |             fillables[gen.mask].append(gen.maskedvalue)
151 |             return   # only mask is filled
152 |         elif isinstance(gen, oamap.generator.UnionGenerator) and _fromdata_unionnullable(gen):
153 |             pass     # mask to fill is in a Union possibility
154 |         elif isinstance(gen, oamap.generator.ExtendedGenerator) and isinstance(gen.generic, oamap.generator.Masked):
155 |             _fromdata_fill(obj, gen.generic, fillables, targetids, pointerobjs, at, pointerat)
156 |             return   # filled the generic generator's mask
157 |         else:
158 |             raise TypeError("cannot fill None where expecting type {0} at {1}".format(gen.schema, at))
159 | 
160 |     # obj is not None (except for the Union case)
161 |     if isinstance(gen, oamap.generator.Masked):
162 |         fillables[gen.mask].append(_fromdata_forefront(gen, fillables, pointerobjs, secondary=True))
163 | 
164 |     if isinstance(gen, oamap.generator.PrimitiveGenerator):
165 |         fillables[gen.data].append(obj)
166 | 
167 |     elif isinstance(gen, oamap.generator.ListGenerator):
168 |         start = stop = _fromdata_forefront(gen.content, fillables, pointerobjs)
169 |         try:
170 |             if isinstance(obj, dict) or (isinstance(obj, tuple) and hasattr(obj, "_fields")):
171 |                 raise TypeError
172 |             it = iter(obj)
173 |         except TypeError:
174 |             raise TypeError("cannot fill {0} where expecting type {1} at {2}".format(repr(obj), gen.schema, at))
175 |         else:
176 |             for x in it:
177 |                 _fromdata_fill(x, gen.content, fillables, targetids, pointerobjs, at + (stop - start,), pointerat)
178 |                 stop += 1
179 | 
180 |         fillables[gen.starts].append(start)
181 |         fillables[gen.stops].append(stop)
182 | 
183 |     elif isinstance(gen, oamap.generator.UnionGenerator):
184 |         tag = None
185 |         for i, possibility in enumerate(gen.possibilities):
186 |             if obj in possibility.schema:
187 |                 tag = i
188 |                 break
189 |         if tag is None:
190 |             raise TypeError("cannot fill {0} where expecting type {1} at {2}".format(repr(obj), gen.schema, at))
191 | 
192 |         offset = _fromdata_forefront(possibility, fillables, pointerobjs)
193 |         _fromdata_fill(obj, possibility, fillables, targetids, pointerobjs, at + ("tag" + repr(tag),), pointerat)
194 | 
195 |         fillables[gen.tags].append(tag)
196 |         fillables[gen.offsets].append(offset)
197 | 
198 |     elif isinstance(gen, oamap.generator.RecordGenerator):
199 |         if isinstance(obj, dict):
200 |             for n, x in gen.fields.items():
201 |                 if n not in obj:
202 |                     raise TypeError("cannot fill {0} because its {1} field is missing at {2}".format(repr(obj), repr(n), at))
203 |                 _fromdata_fill(obj[n], x, fillables, targetids, pointerobjs, at + (n,), pointerat)
204 |         else:
205 |             for n, x in gen.fields.items():
206 |                 if not hasattr(obj, n):
207 |                     raise TypeError("cannot fill {0} because its {1} field is missing at {2}".format(repr(obj), repr(n), at))
208 |                 _fromdata_fill(getattr(obj, n), x, fillables, targetids, pointerobjs, at + (n,), pointerat)
209 | 
210 |     elif isinstance(gen, oamap.generator.TupleGenerator):
211 |         for i, x in enumerate(gen.types):
212 |             try:
213 |                 v = obj[i]
214 |             except (TypeError, IndexError):
215 |                 raise TypeError("cannot fill {0} because it does not have a field {1} at {2}".format(repr(obj), i, at))
216 |             else:
217 |                 _fromdata_fill(v, x, fillables, targetids, pointerobjs, at + (i,), pointerat)
218 | 
219 |     elif isinstance(gen, oamap.generator.PointerGenerator):
220 |         # Pointers will be set after we see all the target values
221 |         pointerobjs[id(gen)].append(obj)
222 |         if id(gen) not in pointerat:
223 |             pointerat[id(gen)] = at
224 | 
225 |     elif isinstance(gen, oamap.generator.ExtendedGenerator):
226 |         _fromdata_fill(gen.degenerate(obj), gen.generic, fillables, targetids, pointerobjs, at, pointerat)
227 | 
228 | def _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root):
229 |     # do the pointers after everything else
230 |     for pointer in pointers:
231 |         while len(pointerobjs[id(pointer)]) > 0:
232 |             pointerobjs2 = {id(pointer): []}
233 |             for obj in pointerobjs[id(pointer)]:
234 |                 if id(obj) in targetids[id(pointer.target)] and targetids[id(pointer.target)][id(obj)][1] == obj:
235 |                     # case 1: an object in the target *is* the object in the pointer (same ids)
236 |                     position, _ = targetids[id(pointer.target)][id(obj)]
237 | 
238 |                 else:
239 |                     position = None
240 |                     if pointer_fromequal:
241 |                         # fallback to quadratic complexity search
242 |                         for key, (pos, obj2) in targetids[id(pointer.target)].items():
243 |                             if obj == obj2:
244 |                                 position = pos
245 |                                 break
246 | 
247 |                     if position is not None:
248 |                         # case 2: an object in the target *is equal to* the object in the pointer (only check if pointer_fromequal)
249 |                         pass
250 | 
251 |                     else:
252 |                         # case 3: the object was not found; it must be added to the target (beyond indexes where it can be found)
253 |                         _fromdata_fill(obj, pointer.target, fillables, targetids, pointerobjs2, pointerat[id(pointer)], pointerat)
254 |                         position, _ = targetids[id(pointer.target)][id(obj)]
255 | 
256 |                 # every obj in pointerobjs[id(pointer)] gets *one* append
257 |                 fillables[pointer.positions].append(position)
258 | 
259 |             pointerobjs[id(pointer)] = pointerobjs2[id(pointer)]
260 | 
261 |     for fillable in fillables_leaf_to_root:
262 |         fillable.update()
263 | 
264 | def fromdata(value, generator=None, pointer_fromequal=False):
265 |     if generator is None:
266 |         generator = oamap.inference.fromdata(value).generator()
267 |     if not isinstance(generator, oamap.generator.Generator):
268 |         generator = generator.generator()
269 | 
270 |     return toarrays(fromdatamore(value, oamap.fillable.arrays(generator), generator=generator, pointer_fromequal=pointer_fromequal))
271 | 
272 | def fromdatamore(value, fillables, generator=None, pointer_fromequal=False):
273 |     if generator is None:
274 |         generator = oamap.inference.fromdata(value).generator()
275 |     if not isinstance(generator, oamap.generator.Generator):
276 |         generator = generator.generator()
277 | 
278 |     pointers = []
279 |     pointerobjs_keys = []
280 |     targetids_keys = []
281 |     fillables_leaf_to_root = []
282 |     positions_to_pointerobjs = {}
283 |     
284 |     _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
285 | 
286 |     pointerat = {}
287 |     targetids = dict((x, {}) for x in targetids_keys)
288 |     pointerobjs = dict((x, []) for x in pointerobjs_keys)
289 | 
290 |     if _fromdata_forefront(generator, fillables, pointerobjs) != 0 and not isinstance(generator, oamap.generator.ListGenerator):
291 |         raise TypeError("non-Lists can only be filled from data once")
292 | 
293 |     _fromdata_fill(value, generator, fillables, targetids, pointerobjs, (), pointerat)
294 |     _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root)
295 | 
296 |     return fillables
297 | 
298 | def fromiterdata(values, generator=None, limit=lambda entries, arrayitems, arraybytes: False, pointer_fromequal=False):
299 |     if generator is None:
300 |         generator = oamap.inference.fromdata(values).generator()
301 |     if not isinstance(generator, oamap.generator.Generator):
302 |         generator = generator.generator()
303 |     if not isinstance(generator, oamap.generator.ListGenerator):
304 |         raise TypeError("non-Lists cannot be filled iteratively")
305 | 
306 |     # starting set of fillables
307 |     fillables = oamap.fillable.arrays(generator)
308 |     factor = dict((n, x.dtype.itemsize) for n, x in fillables.items())
309 |     
310 |     pointers = []
311 |     pointerobjs_keys = []
312 |     targetids_keys = []
313 |     fillables_leaf_to_root = []
314 |     positions_to_pointerobjs = {}
315 |     
316 |     _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
317 | 
318 |     pointerat = {}
319 |     targetids = dict((x, {}) for x in targetids_keys)
320 |     pointerobjs = dict((x, []) for x in pointerobjs_keys)
321 | 
322 |     start = stop = _fromdata_forefront(generator.content, fillables, pointerobjs)
323 | 
324 |     for value in values:
325 |         # prospectively fill a value
326 |         _fromdata_fill(value, generator.content, fillables, targetids, pointerobjs, (), pointerat)
327 | 
328 |         # criteria for ending a limit based on forefront (_potential_ size), rather than len (_accepted_ size)
329 |         arrayitems = {}
330 |         arraybytes = {}
331 |         for n, x in fillables.items():
332 |             if n in positions_to_pointerobjs:
333 |                 arrayitems[n] = len(pointerobjs[positions_to_pointerobjs[n]])
334 |             else:
335 |                 arrayitems[n] = x.forefront()
336 |             arraybytes[n] = arrayitems[n]*factor[n]
337 | 
338 |         if not limit((stop - start) + 1, arrayitems, arraybytes):
339 |             # accepting this entry would make the limit too large
340 |             fillables[generator.starts].append(start)
341 |             fillables[generator.stops].append(stop)
342 |             _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root)
343 |             # yield a new limit of arrays
344 |             yield stop - start, toarrays(fillables)
345 | 
346 |             # and make a new set of fillables (along with everything that depends on it)
347 |             fillables = oamap.fillable.arrays(generator)
348 | 
349 |             pointers = []
350 |             pointerobjs_keys = []
351 |             targetids_keys = []
352 |             fillables_leaf_to_root = []
353 |             positions_to_pointerobjs = {}
354 | 
355 |             _fromdata_initialize(generator, generator, fillables, pointers, pointerobjs_keys, targetids_keys, fillables_leaf_to_root, positions_to_pointerobjs)
356 | 
357 |             pointerat = {}
358 |             targetids = dict((x, {}) for x in targetids_keys)
359 |             pointerobjs = dict((x, []) for x in pointerobjs_keys)
360 | 
361 |             start = stop = _fromdata_forefront(generator.content, fillables, pointerobjs)
362 | 
363 |             # really fill it in this new partition
364 |             _fromdata_fill(value, generator.content, fillables, targetids, pointerobjs, (), pointerat)
365 |             stop += 1
366 |             for fillable in fillables_leaf_to_root:
367 |                 fillable.update()
368 | 
369 |         else:
370 |             # else accept the data into the fillables and move on
371 |             stop += 1
372 |             for fillable in fillables_leaf_to_root:
373 |                 fillable.update()
374 |             
375 |     # always yield at the end
376 |     fillables[generator.starts].append(start)
377 |     fillables[generator.stops].append(stop)
378 |     _fromdata_finish(fillables, pointers, pointerobjs, targetids, pointerat, pointer_fromequal, fillables_leaf_to_root)
379 |     yield (stop - start), toarrays(fillables)
380 | 


--------------------------------------------------------------------------------
/oamap/fillable.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import os
 32 | import math
 33 | import struct
 34 | import sys
 35 | 
 36 | import numpy
 37 | 
 38 | import oamap.generator
 39 | 
 40 | if sys.version_info[0] > 2:
 41 |     xrange = range
 42 | 
 43 | class Fillable(object):
 44 |     def __init__(self, dtype):
 45 |         raise NotImplementedError
 46 | 
 47 |     def __len__(self):
 48 |         return self._len
 49 | 
 50 |     def forefront(self):
 51 |         return self._chunkindex*self.chunksize + self._indexinchunk
 52 | 
 53 |     def append(self, value):
 54 |         raise NotImplementedError
 55 | 
 56 |     def extend(self, values):
 57 |         raise NotImplementedError
 58 | 
 59 |     def update(self):
 60 |         self._len = self.forefront()
 61 | 
 62 |     def revert(self):
 63 |         self._chunkindex, self._indexinchunk = divmod(self._len, self.chunksize)
 64 | 
 65 |     def close(self):
 66 |         pass
 67 | 
 68 |     def __getitem__(self, index):
 69 |         raise NotImplementedError
 70 | 
 71 |     def __array__(self, dtype=None, copy=False, order="K", subok=False, ndmin=0):
 72 |         if dtype is None:
 73 |             dtype = self.dtype
 74 |         elif not isinstance(dtype, numpy.dtype):
 75 |             dtype = numpy.dtype(dtype)
 76 | 
 77 |         if dtype == self.dtype and not copy and not subok and ndmin == 0:
 78 |             return self[:]
 79 |         else:
 80 |             return numpy.array(self[:], dtype=dtype, copy=copy, order=order, subok=subok, ndmin=ndmin)
 81 | 
 82 | ################################################################ make fillables
 83 | 
 84 | def _makefillables(generator, fillables, makefillable):
 85 |     if isinstance(generator, oamap.generator.Masked):
 86 |         fillables[generator.mask] = makefillable(generator.mask, generator.maskdtype)
 87 | 
 88 |     if isinstance(generator, oamap.generator.PrimitiveGenerator):
 89 |         if generator.dtype is None:
 90 |             raise ValueError("dtype is unknown (None) for Primitive generator at {0}".format(repr(generator.data)))
 91 |         fillables[generator.data] = makefillable(generator.data, generator.dtype)
 92 | 
 93 |     elif isinstance(generator, oamap.generator.ListGenerator):
 94 |         fillables[generator.starts] = makefillable(generator.starts, generator.posdtype)
 95 |         fillables[generator.stops]  = makefillable(generator.stops,  generator.posdtype)
 96 |         _makefillables(generator.content, fillables, makefillable)
 97 | 
 98 |     elif isinstance(generator, oamap.generator.UnionGenerator):
 99 |         fillables[generator.tags]    = makefillable(generator.tags,    generator.tagdtype)
100 |         fillables[generator.offsets] = makefillable(generator.offsets, generator.offsetdtype)
101 |         for possibility in generator.possibilities:
102 |             _makefillables(possibility, fillables, makefillable)
103 | 
104 |     elif isinstance(generator, oamap.generator.RecordGenerator):
105 |         for field in generator.fields.values():
106 |             _makefillables(field, fillables, makefillable)
107 | 
108 |     elif isinstance(generator, oamap.generator.TupleGenerator):
109 |         for field in generator.types:
110 |             _makefillables(field, fillables, makefillable)
111 | 
112 |     elif isinstance(generator, oamap.generator.PointerGenerator):
113 |         fillables[generator.positions] = makefillable(generator.positions, generator.posdtype)
114 |         if not generator._internal:
115 |             _makefillables(generator.target, fillables, makefillable)
116 | 
117 |     elif isinstance(generator, oamap.generator.ExtendedGenerator):
118 |         _makefillables(generator.generic, fillables, makefillable)
119 | 
120 |     else:
121 |         raise AssertionError("unrecognized generator type: {0}".format(generator))
122 | 
123 | def arrays(generator, chunksize=8192):
124 |     if not isinstance(generator, oamap.generator.Generator):
125 |         generator = generator.generator()
126 |     fillables = {}
127 |     _makefillables(generator, fillables, lambda name, dtype: FillableArray(dtype, chunksize=chunksize))
128 |     return fillables
129 | 
130 | def files(generator, directory, chunksize=8192, lendigits=16):
131 |     if not isinstance(generator, oamap.generator.Generator):
132 |         generator = generator.generator()
133 |     if not os.path.exists(directory):
134 |         os.mkdir(directory)
135 |     fillables = {}
136 |     _makefillables(generator, fillables, lambda name, dtype: FillableFile(os.path.join(directory, name), dtype, chunksize=chunksize, lendigits=lendigits))
137 |     return fillables
138 | 
139 | def numpyfiles(generator, directory, chunksize=8192, lendigits=16):
140 |     if not isinstance(generator, oamap.generator.Generator):
141 |         generator = generator.generator()
142 |     if not os.path.exists(directory):
143 |         os.mkdir(directory)
144 |     fillables = {}
145 |     _makefillables(generator, fillables, lambda name, dtype: FillableNumpyFile(os.path.join(directory, name), dtype, chunksize=chunksize, lendigits=lendigits))
146 |     return fillables
147 | 
148 | ################################################################ FillableArray
149 | 
150 | class FillableArray(Fillable):
151 |     # Numpy arrays and list items have 96+8 byte (80+8 byte) overhead in Python 2 (Python 3)
152 |     # compared to 8192 1-byte values (8-byte values), this is 1% overhead (0.1% overhead)
153 |     def __init__(self, dtype, chunksize=8192):
154 |         if not isinstance(dtype, numpy.dtype):
155 |             dtype = numpy.dtype(dtype)
156 |         self._data = [numpy.empty(chunksize, dtype=dtype)]
157 |         self._len = 0
158 |         self._indexinchunk = 0
159 |         self._chunkindex = 0
160 | 
161 |     @property
162 |     def dtype(self):
163 |         return self._data[0].dtype
164 | 
165 |     @property
166 |     def chunksize(self):
167 |         return self._data[0].shape[0]
168 | 
169 |     def append(self, value):
170 |         if self._indexinchunk >= len(self._data[self._chunkindex]):
171 |             while len(self._data) <= self._chunkindex + 1:
172 |                 self._data.append(numpy.empty(self.chunksize, dtype=self.dtype))
173 |             self._indexinchunk = 0
174 |             self._chunkindex += 1
175 | 
176 |         self._data[self._chunkindex][self._indexinchunk] = value
177 |         self._indexinchunk += 1
178 | 
179 |     def extend(self, values):
180 |         chunkindex = self._chunkindex
181 |         indexinchunk = self._indexinchunk
182 | 
183 |         while len(values) > 0:
184 |             if indexinchunk >= len(self._data[chunkindex]):
185 |                 while len(self._data) <= chunkindex + 1:
186 |                     self._data.append(numpy.empty(self.chunksize, dtype=self.dtype))
187 |                 indexinchunk = 0
188 |                 chunkindex += 1
189 | 
190 |             tofill = min(len(values), self.chunksize - indexinchunk)
191 |             self._data[chunkindex][indexinchunk : indexinchunk + tofill] = values[:tofill]
192 |             indexinchunk += tofill
193 |             values = values[tofill:]
194 | 
195 |         self._chunkindex = chunkindex
196 |         self._indexinchunk = indexinchunk
197 | 
198 |     def __getitem__(self, index):
199 |         if isinstance(index, slice):
200 |             lenself = len(self)
201 |             step  = 1 if index.step is None else index.step
202 |             if step > 0:
203 |                 start = 0       if index.start is None else index.start
204 |                 stop  = lenself if index.stop  is None else index.stop
205 |             else:
206 |                 start = lenself - 1 if index.start is None else index.start
207 |                 stop  = 0           if index.stop  is None else index.stop
208 |                 
209 |             if start < 0:
210 |                 start += lenself
211 |             if stop < 0:
212 |                 stop += lenself
213 | 
214 |             start = min(lenself, max(0, start))
215 |             stop  = min(lenself, max(0, stop))
216 | 
217 |             if step == 0:
218 |                 raise ValueError("slice step cannot be zero")
219 | 
220 |             else:
221 |                 if step > 0:
222 |                     start_chunkindex = int(math.floor(float(start) / self.chunksize))
223 |                     stop_chunkindex = int(math.ceil(float(stop) / self.chunksize))
224 |                     start_indexinchunk = start - start_chunkindex*self.chunksize
225 |                     stop_indexinchunk = stop - (stop_chunkindex - 1)*self.chunksize
226 |                 else:
227 |                     start_chunkindex = int(math.floor(float(start) / self.chunksize))
228 |                     stop_chunkindex = int(math.floor(float(stop) / self.chunksize)) - 1
229 |                     start_indexinchunk = start - start_chunkindex*self.chunksize
230 |                     stop_indexinchunk = stop - (stop_chunkindex + 1)*self.chunksize
231 | 
232 |                 def beginend():
233 |                     offset = 0
234 |                     for chunkindex in xrange(start_chunkindex, stop_chunkindex, 1 if step > 0 else -1):
235 |                         if step > 0:
236 |                             if chunkindex == start_chunkindex:
237 |                                 begin = start_indexinchunk
238 |                             else:
239 |                                 begin = offset
240 |                             if chunkindex == stop_chunkindex - 1:
241 |                                 end = stop_indexinchunk
242 |                             else:
243 |                                 end = self.chunksize
244 |                                 offset = (begin - self.chunksize) % step
245 |                         else:
246 |                             if chunkindex == start_chunkindex:
247 |                                 begin = start_indexinchunk
248 |                             else:
249 |                                 begin = self.chunksize - 1 - offset
250 |                             if chunkindex == stop_chunkindex + 1 and index.stop is not None:
251 |                                 end = stop_indexinchunk
252 |                             else:
253 |                                 end = None
254 |                                 offset = (begin - -1) % -step
255 |                         yield chunkindex, begin, end
256 | 
257 |                 length = 0
258 |                 for chunkindex, begin, end in beginend():
259 |                     if step > 0:
260 |                         length += int(math.ceil(float(end - begin) / step))
261 |                     elif end is None:
262 |                         length += int(math.ceil(-float(begin + 1) / step))
263 |                     else:
264 |                         length += int(math.ceil(-float(begin - end) / step))
265 | 
266 |                 out = numpy.empty(length, dtype=self.dtype)
267 |                 outi = 0
268 | 
269 |                 for chunkindex, begin, end in beginend():
270 |                     array = self._data[chunkindex][begin:end:step]
271 | 
272 |                     out[outi : outi + len(array)] = array
273 |                     outi += len(array)
274 |                     if outi >= len(out):
275 |                         break
276 | 
277 |                 return out
278 | 
279 |         else:
280 |             lenself = len(self)
281 |             normalindex = index if index >= 0 else index + lenself
282 |             if not 0 <= normalindex < lenself:
283 |                 raise IndexError("index {0} is out of bounds for size {1}".format(index, lenself))
284 | 
285 |             chunkindex, indexinchunk = divmod(index, self.chunksize)
286 |             return self._data[chunkindex][indexinchunk]
287 | 
288 | ################################################################ FillableFile
289 | 
290 | class FillableFile(Fillable):
291 |     def __init__(self, filename, dtype, chunksize=8192, lendigits=16):
292 |         if not isinstance(dtype, numpy.dtype):
293 |             dtype = numpy.dtype(dtype)
294 |         self._data = numpy.zeros(chunksize, dtype=dtype)  # 'zeros', not 'empty' for security
295 |         self._len = 0
296 |         self._indexinchunk = 0
297 |         self._chunkindex = 0
298 |         self._filename = filename
299 |         self._openfile(filename, lendigits)
300 | 
301 |     def _openfile(self, filename, lendigits):
302 |         open(filename, "wb", 0).close()
303 |         self._file = open(filename, "r+b", 0)
304 |         self._datapos = 0
305 |         # a plain file has no header
306 | 
307 |     @property
308 |     def filename(self):
309 |         return self._file.name
310 | 
311 |     @property
312 |     def dtype(self):
313 |         return self._data.dtype
314 | 
315 |     @property
316 |     def chunksize(self):
317 |         return self._data.shape[0]
318 | 
319 |     def append(self, value):
320 |         self._data[self._indexinchunk] = value
321 |         self._indexinchunk += 1
322 | 
323 |         if self._indexinchunk == self.chunksize:
324 |             self._flush()
325 |             self._indexinchunk = 0
326 |             self._chunkindex += 1
327 | 
328 |     def _flush(self):
329 |         self._file.seek(self._datapos + self._chunkindex*self.chunksize*self.dtype.itemsize)
330 |         self._file.write(self._data.tostring())
331 |         
332 |     def extend(self, values):
333 |         chunkindex = self._chunkindex
334 |         indexinchunk = self._indexinchunk
335 | 
336 |         while len(values) > 0:
337 |             tofill = min(len(values), self.chunksize - indexinchunk)
338 |             self._data[indexinchunk : indexinchunk + tofill] = values[:tofill]
339 |             indexinchunk += tofill
340 |             values = values[tofill:]
341 | 
342 |             if indexinchunk == self.chunksize:
343 |                 self._file.seek(self._datapos + chunkindex*self.chunksize*self.dtype.itemsize)
344 |                 self._file.write(self._data.tostring())
345 |                 indexinchunk = 0
346 |                 chunkindex += 1
347 | 
348 |         self._chunkindex = chunkindex
349 |         self._indexinchunk = indexinchunk
350 | 
351 |     def revert(self):
352 |         chunkindex, self._indexinchunk = divmod(self._len, self.chunksize)
353 |         if self._chunkindex != chunkindex:
354 |             self._file.seek(self._datapos + chunkindex*self.chunksize*self.dtype.itemsize)
355 |             olddata = numpy.frombuffer(self._file.read(self.chunksize*self.dtype.itemsize), dtype=self.dtype)
356 |             self._data[:len(olddata)] = olddata
357 | 
358 |         self._chunkindex = chunkindex
359 | 
360 |     def close(self):
361 |         if hasattr(self, "_file"):
362 |             self._flush()
363 |             self._file.close()
364 | 
365 |     def __del__(self):
366 |         self.close()
367 | 
368 |     def __enter__(self, *args, **kwds):
369 |         return self
370 | 
371 |     def __exit__(self, *args, **kwds):
372 |         self.close()
373 | 
374 |     def __getitem__(self, value):
375 |         if not self._file.closed:
376 |             self._flush()
377 | 
378 |         if isinstance(value, slice):
379 |             lenself = len(self)
380 |             if lenself == 0:
381 |                 array = numpy.empty(lenself, dtype=self.dtype)
382 |             else:
383 |                 array = numpy.memmap(self.filename, self.dtype, "r", self._datapos, lenself, "C")
384 |             if value.start is None and value.stop is None and value.step is None:
385 |                 return array
386 |             else:
387 |                 return array[value]
388 | 
389 |         else:
390 |             lenself = len(self)
391 |             normalindex = index if index >= 0 else index + lenself
392 |             if not 0 <= normalindex < lenself:
393 |                 raise IndexError("index {0} is out of bounds for size {1}".format(index, lenself))
394 | 
395 |             if not self._file.closed:
396 |                 # since the file's still open, get it from here instead of making a new filehandle
397 |                 itemsize = self.dtype.itemsize
398 |                 try:
399 |                     self._file.seek(self._datapos + normalindex*itemsize)
400 |                     return numpy.frombuffer(self._file.read(itemsize), self.dtype)[0]
401 |                 finally:
402 |                     self._file.seek(self._datapos + self._chunkindex*self.chunksize*self.dtype.itemsize)
403 |             else:
404 |                 # otherwise, you have to open a new file
405 |                 with open(self.filename, "rb") as file:
406 |                     file.seek(self._datapos + normalindex*itemsize)
407 |                     return numpy.frombuffer(file.read(itemsize), self.dtype)[0]
408 | 
409 | ################################################################ FillableNumpyFile (FillableFile with a self-describing header)
410 | 
411 | class FillableNumpyFile(FillableFile):
412 |     def _openfile(self, filename, lendigits):
413 |         magic = b"\x93NUMPY\x01\x00"
414 |         header1 = "{{'descr': {0}, 'fortran_order': False, 'shape': (".format(repr(str(self.dtype))).encode("ascii")
415 |         header2 = "{0}, }}".format(repr((10**lendigits - 1,))).encode("ascii")[1:]
416 | 
417 |         unpaddedlen = len(magic) + 2 + len(header1) + len(header2)
418 |         paddedlen = int(math.ceil(float(unpaddedlen) / self.dtype.itemsize)) * self.dtype.itemsize
419 |         header2 = header2 + b" " * (paddedlen - unpaddedlen)
420 |         self._lenpos = len(magic) + 2 + len(header1)
421 |         self._datapos = len(magic) + 2 + len(header1) + len(header2)
422 |         assert self._datapos % self.dtype.itemsize == 0
423 | 
424 |         open(filename, "wb", 0).close()
425 |         self._file = open(filename, "r+b", 0)
426 |         self._formatter = "{0:%dd}" % lendigits
427 |         self._file.write(magic)
428 |         self._file.write(struct.pack("<H", len(header1) + len(header2)))
429 |         self._file.write(header1)
430 |         self._file.write(self._formatter.format(len(self)).encode("ascii"))
431 |         self._file.write(header2[lendigits:])
432 | 
433 |     def _flush(self):
434 |         super(FillableNumpyFile, self)._flush()
435 |         self._file.seek(self._lenpos)
436 |         self._file.write(self._formatter.format(len(self)).encode("ascii"))
437 | 


--------------------------------------------------------------------------------
/oamap/dataset.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Copyright (c) 2017, DIANA-HEP
  4 | # All rights reserved.
  5 | # 
  6 | # Redistribution and use in source and binary forms, with or without
  7 | # modification, are permitted provided that the following conditions are met:
  8 | # 
  9 | # * Redistributions of source code must retain the above copyright notice, this
 10 | #   list of conditions and the following disclaimer.
 11 | # 
 12 | # * Redistributions in binary form must reproduce the above copyright notice,
 13 | #   this list of conditions and the following disclaimer in the documentation
 14 | #   and/or other materials provided with the distribution.
 15 | # 
 16 | # * Neither the name of the copyright holder nor the names of its
 17 | #   contributors may be used to endorse or promote products derived from
 18 | #   this software without specific prior written permission.
 19 | # 
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import copy
 32 | import numbers
 33 | import functools
 34 | 
 35 | import numpy
 36 | 
 37 | import oamap.generator
 38 | import oamap.operations
 39 | import oamap.proxy
 40 | import oamap.schema
 41 | import oamap.util
 42 |         
 43 | class SingleThreadExecutor(object):
 44 |     class PseudoFuture(object):
 45 |         def __init__(self, result):
 46 |             self._result = result
 47 |         def result(self, timeout=None):
 48 |             return self._result
 49 |         def done(self):
 50 |             return True
 51 |         def exception(self, timeout=None):
 52 |             raise NotImplementedError
 53 |         def traceback(self, timeout=None):
 54 |             raise NotImplementedError
 55 | 
 56 |     def submit(self, fcn, *args, **kwargs):
 57 |         args = tuple(x.result() if isinstance(x, self.PseudoFuture) else x for x in args)
 58 |         kwargs = dict((n, x.result() if isinstance(x, self.PseudoFuture) else x) for n, x in kwargs.items())
 59 |         return self.PseudoFuture(fcn(*args, **kwargs))
 60 | 
 61 | class Operation(object):
 62 |     def __init__(self, name, args, kwargs, function):
 63 |         self._name = name
 64 |         self._args = args
 65 |         self._kwargs = kwargs
 66 |         self._function = function
 67 | 
 68 |     def __repr__(self):
 69 |         return "<{0} {1} {2} {3}>".format(self.__class__.__name__, self._name, repr(self._args), repr(self._kwargs))
 70 | 
 71 |     def __str__(self):
 72 |         return ".{0}({1}{2})".format(self._name, ", ".join(repr(x) for x in self._args), "".join(", {0}={1}".format(n, repr(x)) for n, x in self._kwargs.items()))
 73 | 
 74 |     @property
 75 |     def name(self):
 76 |         return self._name
 77 | 
 78 |     @property
 79 |     def args(self):
 80 |         return self._args
 81 | 
 82 |     @property
 83 |     def kwargs(self):
 84 |         return self._kwargs
 85 | 
 86 |     @property
 87 |     def function(self):
 88 |         return self._function
 89 | 
 90 |     def apply(self, data):
 91 |         return self._function(*((data,) + self._args), **self._kwargs)
 92 | 
 93 | class Recasting(Operation): pass
 94 | class Transformation(Operation): pass
 95 | class Action(Operation): pass
 96 | 
 97 | class Operable(object):
 98 |     def __init__(self):
 99 |         self._operations = ()
100 | 
101 |     @staticmethod
102 |     def update_operations():
103 |         def newrecasting(name, function):
104 |             @functools.wraps(function)
105 |             def recasting(self, *args, **kwargs):
106 |                 out = self.__class__.__new__(self.__class__)
107 |                 Operable.__init__(out)
108 |                 out.__dict__ = self.__dict__.copy()
109 |                 out._operations = self._operations + (Recasting(name, args, kwargs, function),)
110 |                 return out
111 |             return recasting
112 | 
113 |         def newtransformation(name, function):
114 |             @functools.wraps(function)
115 |             def transformation(self, *args, **kwargs):
116 |                 out = self.__class__.__new__(self.__class__)
117 |                 Operable.__init__(out)
118 |                 out.__dict__ = self.__dict__.copy()
119 |                 out._operations = self._operations + (Transformation(name, args, kwargs, function),)
120 |                 return out
121 |             return transformation
122 | 
123 |         def newaction(name, function):
124 |             @functools.wraps(function)
125 |             def action(self, *args, **kwargs):
126 |                 try:
127 |                     combiner = kwargs.pop("combiner")
128 |                 except KeyError:
129 |                     combiner = function.combiner
130 |                 out = self.__class__.__new__(self.__class__)
131 |                 Operable.__init__(out)
132 |                 out.__dict__ = self.__dict__.copy()
133 |                 out._operations = self._operations + (Action(name, args, kwargs, function),)
134 |                 return out.act(combiner)
135 |             return action
136 | 
137 |         for n, x in oamap.operations.recastings.items():
138 |             setattr(Operable, n, oamap.util.MethodType(newrecasting(n, x), None, Operable))
139 | 
140 |         for n, x in oamap.operations.transformations.items():
141 |             setattr(Operable, n, oamap.util.MethodType(newtransformation(n, x), None, Operable))
142 | 
143 |         for n, x in oamap.operations.actions.items():
144 |             setattr(Operable, n, oamap.util.MethodType(newaction(n, x), None, Operable))
145 | 
146 |     def _nooperations(self):
147 |         return len(self._operations) == 0
148 | 
149 |     def _notransformations(self):
150 |         return all(isinstance(x, Recasting) for x in self._operations)
151 | 
152 | Operable.update_operations()
153 | 
154 | class _Data(Operable):
155 |     def __init__(self, name, schema, backends, executor, extension=None, packing=None, doc=None, metadata=None):
156 |         super(_Data, self).__init__()
157 |         self._name = name
158 |         self._schema = schema
159 |         self._backends = backends
160 |         self._executor = executor
161 |         self._extension = extension
162 |         self._packing = packing
163 |         self._doc = doc
164 |         self._metadata = metadata
165 |         self._cachedobject = None
166 | 
167 |     def __repr__(self):
168 |         return "<Data {0}>{1}".format(repr(self._name), "".join(str(x) for x in self._operations))
169 | 
170 |     def __str__(self):
171 |         return "<Data {0}>{1}".format(repr(self._name), "".join("\n    " + str(x) for x in self._operations))
172 |     
173 |     @property
174 |     def name(self):
175 |         return self._name
176 | 
177 |     @property
178 |     def schema(self):
179 |         return self._schema.deepcopy()
180 | 
181 |     @property
182 |     def extension(self):
183 |         return self._extension
184 | 
185 |     @property
186 |     def packing(self):
187 |         return self._packing
188 | 
189 |     @property
190 |     def doc(self):
191 |         return self._doc
192 | 
193 |     @property
194 |     def metadata(self):
195 |         return self._metadata
196 | 
197 |     def arrays(self):
198 |         return DataArrays(self._backends)
199 | 
200 |     def transform(self, name, namespace, update):
201 |         if self._nooperations():
202 |             return [SingleThreadExecutor.PseudoFuture(update(self))]
203 | 
204 |         elif self._notransformations():
205 |             result = self()
206 |             for operation in self._operations:
207 |                 result = operation.apply(result)
208 |             if isinstance(result, oamap.proxy.ListProxy):
209 |                 out = Dataset(name, result._generator.schema, self._backends, self._executor, [0, len(result)], extension=self._extension, packing=None, doc=self._doc, metadata=self._metadata)
210 |             else:
211 |                 out = Data(name, result._generator.schema, self._backends, self._executor, extension=self._extension, packing=None, doc=self._doc, metadata=self._metadata)
212 |             return [SingleThreadExecutor.PseudoFuture(update(out))]
213 | 
214 |         else:
215 |             def task(name, dataset, namespace, update):
216 |                 result = dataset()
217 |                 for operation in dataset._operations:
218 |                     result = operation.apply(result)
219 | 
220 |                 backend = dataset._backends[namespace]
221 |                 schema, roles2arrays = oamap.operations._DualSource.collect(result._generator.namedschema(), result._arrays, namespace, backend.prefix(name), backend.delimiter())
222 | 
223 |                 active = backend.instantiate(0)
224 |                 if hasattr(active, "putall"):
225 |                     active.putall(roles2arrays)
226 |                 else:
227 |                     for n, x in roles2arrays.items():
228 |                         active[str(n)] = x
229 |                 
230 |                 if isinstance(result, oamap.proxy.ListProxy):
231 |                     out = Dataset(name, schema, dataset._backends, dataset._executor, [0, len(result)], extension=dataset._extension, packing=None, doc=dataset._doc, metadata=dataset._metadata)
232 |                 else:
233 |                     out = Data(name, schema, dataset._backends, dataset._executor, extension=dataset._extension, packing=None, doc=dataset._doc, metadata=dataset._metadata)
234 |                 return update(out)
235 | 
236 |             return [self._executor.submit(task, name, self, namespace, update)]
237 | 
238 |     def act(self, combiner):
239 |         def task(dataset):
240 |             result = dataset()
241 |             for operation in dataset._operations:
242 |                 result = operation.apply(result)
243 |             return result
244 | 
245 |         return combiner([self._executor.submit(task, self)])
246 |             
247 | class Data(_Data):
248 |     def __call__(self):
249 |         if self._cachedobject is None:
250 |             if self._extension is None:
251 |                 extension = oamap.util.import_module("oamap.extension.common")
252 |             elif isinstance(self._extension, basestring):
253 |                 extension = oamap.util.import_module(self._extension)
254 |             else:
255 |                 extension = [oamap.util.import_module(x) for x in self._extension]
256 | 
257 |             self._cachedobject = self._schema(self.arrays(), extension=extension, packing=self._packing)
258 | 
259 |         return self._cachedobject
260 | 
261 | class DataArrays(object):
262 |     def __init__(self, backends):
263 |         self._backends = backends
264 |         self._active = {}
265 |         self._partitionid = 0
266 | 
267 |     def _toplevel(self, out, filtered):
268 |         return filtered
269 | 
270 |     def getall(self, roles):
271 |         out = {}
272 |         for namespace, backend in self._backends.items():
273 |             filtered = self._toplevel(out, [x for x in roles if x.namespace == namespace])
274 | 
275 |             if len(filtered) > 0:
276 |                 active = self._active.get(namespace, None)
277 |                 if active is None:
278 |                     active = self._active[namespace] = backend.instantiate(self._partitionid)
279 | 
280 |                 if hasattr(active, "getall"):
281 |                     out.update(active.getall(filtered))
282 |                 else:
283 |                     for x in roles:
284 |                         out[x] = active[str(x)]
285 | 
286 |         return out
287 | 
288 |     def close(self):
289 |         for namespace, active in self._active.items():
290 |             if hasattr(active, "close"):
291 |                 active.close()
292 |             self._active[namespace] = None
293 |                 
294 | class Dataset(_Data):
295 |     def __init__(self, name, schema, backends, executor, offsets, extension=None, packing=None, doc=None, metadata=None):
296 |         if not isinstance(schema, oamap.schema.List):
297 |             raise TypeError("Dataset must have a list schema, not\n\n    {0}".format(schema.__repr__(indent="    ")))
298 | 
299 |         super(Dataset, self).__init__(name, schema, backends, executor, extension=extension, packing=packing, doc=doc, metadata=metadata)
300 | 
301 |         if not isinstance(offsets, numpy.ndarray):
302 |             try:
303 |                 if not all(isinstance(x, (numbers.Integral, numpy.integer)) and x >= 0 for x in offsets):
304 |                     raise TypeError
305 |             except TypeError:
306 |                 raise TypeError("offsets must be an iterable of non-negative integers")
307 |             offsets = numpy.array(offsets, dtype=numpy.int64)
308 |         if len(offsets.shape) != 1:
309 |             raise ValueError("offsets must be one-dimensional")
310 |         if len(offsets) < 2 or offsets[0] != 0:
311 |             raise ValueError("offsets must have at least two items, and the first one must be zero")
312 |         if not numpy.all(offsets[:-1] <= offsets[1:]):
313 |             raise ValueError("offsets must be monotonically increasing")
314 |         self._offsets = offsets
315 |         self._cachedpartition = None
316 | 
317 |     def __repr__(self):
318 |         return "<Dataset {0} {1} partitions {2} entries>{3}".format(repr(self._name), self.numpartitions, self.numentries, "".join(str(x) for x in self._operations))
319 | 
320 |     def __str__(self):
321 |         return "<Dataset {0} {1} partitions {2} entries>{3}".format(repr(self._name), self.numpartitions, self.numentries, "".join("\n    " + str(x) for x in self._operations))
322 | 
323 |     @property
324 |     def offsets(self):
325 |         return self._offsets.tolist()
326 | 
327 |     @property
328 |     def starts(self):
329 |         return self._offsets[:-1].tolist()
330 | 
331 |     @property
332 |     def stops(self):
333 |         return self._offsets[1:].tolist()
334 | 
335 |     @property
336 |     def partitions(self):
337 |         return zip(self.start, self.stop)
338 | 
339 |     @property
340 |     def numpartitions(self):
341 |         return len(self._offsets) - 1
342 | 
343 |     @property
344 |     def numentries(self):
345 |         return int(self._offsets[-1])
346 | 
347 |     def partition(self, partitionid):
348 |         if self._cachedpartition != partitionid:
349 |             self._cachedpartition = partitionid
350 | 
351 |             if self._extension is None:
352 |                 extension = oamap.util.import_module("oamap.extension.common")
353 |             elif isinstance(self._extension, basestring):
354 |                 extension = oamap.util.import_module(self._extension)
355 |             else:
356 |                 extension = [oamap.util.import_module(x) for x in self._extension]
357 | 
358 |             self._cachedobject = self._schema(self.arrays(partitionid), extension=extension, packing=self._packing)
359 | 
360 |         return self._cachedobject
361 | 
362 |     def __iter__(self):
363 |         for partitionid in range(self.numpartitions):
364 |             for i in range(self._offsets[partitionid], self._offsets[partitionid + 1]):
365 |                 yield self[i]
366 | 
367 |     def __getitem__(self, index):
368 |         if isinstance(index, slice):
369 |             start, stop, step = oamap.util.slice2sss(index, self.numentries)
370 |             partitionid = max(0, min(numpy.searchsorted(self._offsets, start, side="right") - 1, self.numpartitions - 1))
371 |             localstart = start - self._offsets[partitionid]
372 |             localstop = stop - self._offsets[partitionid]
373 |             if localstop < -1 or localstop > (self._offsets[partitionid + 1] - self._offsets[partitionid]):
374 |                 raise IndexError("slice spans multiple partitions")
375 | 
376 |             out = self.partition(partitionid)
377 |             out._whence = localstart
378 |             out._stride = step
379 | 
380 |             # out._length = int(math.ceil(float(abs(localstop - localstart)) / abs(step)))
381 |             d, m = divmod(abs(localstart - localstop), abs(step))
382 |             out._length = d + (1 if m != 0 else 0)
383 |             return out
384 | 
385 |         else:
386 |             normindex = index if index >= 0 else index + self.numentries
387 |             if not 0 <= normindex < self.numentries:
388 |                 raise IndexError("index {0} out of range for {1} entries".format(index, self.numentries))
389 |             partitionid = numpy.searchsorted(self._offsets, normindex, side="right") - 1
390 |             localindex = normindex - self._offsets[partitionid]
391 |             return self.partition(partitionid)[localindex]
392 | 
393 |     def arrays(self, partitionid):
394 |         normid = partitionid if partitionid >= 0 else partitionid + self.numpartitions
395 |         if not 0 <= normid < self.numpartitions:
396 |             raise IndexError("partitionid {0} out of range for {1} partitions".format(partitionid, self.numpartitions))
397 | 
398 |         startsrole = oamap.generator.StartsRole(self._schema._get_starts("object", "-"), self._schema.namespace, None)
399 |         stopsrole = oamap.generator.StopsRole(self._schema._get_stops("object", "-"), self._schema.namespace, None)
400 |         startsrole.stops = stopsrole
401 |         stopsrole.starts = startsrole
402 |         return DatasetArrays(normid, startsrole, stopsrole, self._offsets[normid + 1] - self._offsets[normid], self._backends)
403 | 
404 |     def transform(self, name, namespace, update):
405 |         if self._nooperations():
406 |             return [SingleThreadExecutor.PseudoFuture(update(self))]
407 | 
408 |         elif self._notransformations():
409 |             result = self.partition(0)
410 |             for operation in self._operations:
411 |                 result = operation.apply(result)
412 |             if isinstance(result, oamap.proxy.ListProxy):
413 |                 out = Dataset(name, result._generator.schema, self._backends, self._executor, self._offsets, extension=self._extension, packing=None, doc=self._doc, metadata=self._metadata)
414 |             else:
415 |                 out = Data(name, result._generator.schema, self._backends, self._executor, extension=self._extension, packing=None, doc=self._doc, metadata=self._metadata)
416 |             return [SingleThreadExecutor.PseudoFuture(update(out))]
417 | 
418 |         else:
419 |             def task(name, dataset, namespace, partitionid):
420 |                 result = dataset.partition(partitionid)
421 |                 for operation in dataset._operations:
422 |                     result = operation.apply(result)
423 | 
424 |                 backend = dataset._backends[namespace]
425 |                 schema, roles2arrays = oamap.operations._DualSource.collect(result._generator.namedschema(), result._arrays, namespace, backend.prefix(name), backend.delimiter())
426 | 
427 |                 active = backend.instantiate(partitionid)
428 |                 if hasattr(active, "putall"):
429 |                     active.putall(roles2arrays)
430 |                 else:
431 |                     for n, x in roles2arrays.items():
432 |                         active[str(n)] = x
433 |                 if isinstance(result, oamap.proxy.ListProxy):
434 |                     return schema, len(result)
435 |                 else:
436 |                     return schema, 1
437 | 
438 |             tasks = [self._executor.submit(task, name, self, namespace, i) for i in range(self.numpartitions)]
439 | 
440 |             def collect(name, dataset, results, update):
441 |                 if isinstance(results[0], tuple) and len(results[0]) == 2 and isinstance(results[0][0], oamap.schema.Schema):
442 |                     offsets = numpy.cumsum([0] + [numentries for schema, numentries in results], dtype=numpy.int64)
443 |                     schema = results[0][0]
444 |                 else:
445 |                     offsets = numpy.cumsum([0] + [x.result()[1] for x in results], dtype=numpy.int64)
446 |                     schema = results[0].result()[0]
447 | 
448 |                 if isinstance(schema, oamap.schema.List):
449 |                     out = Dataset(name, schema, dataset._backends, dataset._executor, offsets, extension=dataset._extension, packing=None, doc=dataset._doc, metadata=dataset._metadata)
450 |                 else:
451 |                     out = Data(name, schema, dataset._backends, dataset._executor, extension=dataset._extension, packing=None, doc=dataset._doc, metadata=dataset._metadata)
452 |                 return update(out)
453 | 
454 |             tasks.append(self._executor.submit(collect, name, self, tuple(tasks), update))
455 |             return tasks
456 | 
457 |     def act(self, combiner):
458 |         def task(dataset, partitionid):
459 |             result = dataset.partition(partitionid)
460 |             for operation in dataset._operations:
461 |                 result = operation.apply(result)
462 |             return result
463 | 
464 |         return combiner([self._executor.submit(task, self, i) for i in range(self.numpartitions)])
465 | 
466 | class DatasetArrays(DataArrays):
467 |     def __init__(self, partitionid, startsrole, stopsrole, numentries, backends):
468 |         super(DatasetArrays, self).__init__(backends)
469 |         self._partitionid = partitionid
470 |         self._startsrole = startsrole
471 |         self._stopsrole = stopsrole
472 |         self._numentries = numentries
473 | 
474 |     def _toplevel(self, out, filtered):
475 |         try:
476 |             index = filtered.index(self._startsrole)
477 |         except ValueError:
478 |             pass
479 |         else:
480 |             del filtered[index]
481 |             out[self._startsrole] = numpy.array([0], dtype=oamap.generator.ListGenerator.posdtype)
482 | 
483 |         try:
484 |             index = filtered.index(self._stopsrole)
485 |         except ValueError:
486 |             pass
487 |         else:
488 |             del filtered[index]
489 |             out[self._stopsrole] = numpy.array([self._numentries], dtype=oamap.generator.ListGenerator.posdtype)
490 | 
491 |         return filtered
492 |     
493 | 


--------------------------------------------------------------------------------