├── .binstar.yml ├── .gitattributes ├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── conda.recipe ├── bld.bat ├── build.sh └── meta.yaml ├── datashape ├── __init__.py ├── _version.py ├── coretypes.py ├── discovery.py ├── dispatch.py ├── error.py ├── internal_utils.py ├── lexer.py ├── parser.py ├── predicates.py ├── promote.py ├── py2help.py ├── tests │ ├── __init__.py │ ├── test_coretypes.py │ ├── test_creation.py │ ├── test_discovery.py │ ├── test_lexer.py │ ├── test_operations.py │ ├── test_parser.py │ ├── test_predicates.py │ ├── test_promote.py │ ├── test_str.py │ ├── test_typeset.py │ ├── test_user.py │ ├── test_util.py │ └── test_version.py ├── type_symbol_table.py ├── typesets.py ├── user.py ├── util │ ├── __init__.py │ ├── testing.py │ └── tests │ │ └── test_testing.py └── validation.py ├── docs ├── Makefile ├── make.bat └── source │ ├── conf.py │ ├── grammar.rst │ ├── index.rst │ ├── overview.rst │ ├── pattern_matching.rst │ ├── releases.rst │ ├── svg │ └── type_expand.png │ ├── types.rst │ └── whatsnew │ ├── 0.4.7.txt │ ├── 0.5.0.txt │ ├── 0.5.1.txt │ ├── 0.5.2.txt │ ├── 0.5.3.txt │ └── 0.5.4.txt ├── requirements.txt ├── setup.cfg ├── setup.py └── versioneer.py /.binstar.yml: -------------------------------------------------------------------------------- 1 | package: datashape 2 | 3 | platform: 4 | - linux-64 5 | - linux-32 6 | - osx-64 7 | - win-32 8 | - win-64 9 | 10 | engine: 11 | - python=2.6 12 | - python=2.7 13 | - python=3.3 14 | - python=3.4 15 | 16 | install: 17 | - conda config --set always_yes true 18 | 19 | script: 20 | - conda build conda.recipe 21 | 22 | build_targets: 23 | files: conda 24 | channels: main 25 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | datashape/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | bin/ 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Installer logs 24 | pip-log.txt 25 | pip-delete-this-directory.txt 26 | 27 | # Unit test / coverage reports 28 | .tox/ 29 | .coverage 30 | .cache 31 | nosetests.xml 32 | coverage.xml 33 | 34 | # Translations 35 | *.mo 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | 42 | # Rope 43 | .ropeproject 44 | 45 | # Django stuff: 46 | *.log 47 | *.pot 48 | 49 | # Sphinx documentation 50 | docs/_build/ 51 | 52 | # IDE 53 | .idea/ 54 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | sudo: false 4 | 5 | matrix: 6 | fast_finish: true 7 | include: 8 | - python: 2.7 9 | - python: 3.4 10 | - python: 3.5 11 | - python: 3.6 12 | 13 | # command to install dependencies 14 | install: 15 | - pip install -U pip 16 | - pip install -r requirements.txt 17 | - pip install pytest 18 | - pip install mock 19 | 20 | # command to run tests, e.g. python setup.py test 21 | script: 22 | - py.test -v -x --doctest-modules --pyargs datashape -rsX --tb=short 23 | 24 | notifications: 25 | email: false 26 | flowdock: b08b3ba4fb86fa48121e90b5f67ccb75 27 | on_success: "change" 28 | on_failure: "always" 29 | 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Continuum Analytics, Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include datashape *.py 2 | recursive-include docs *.rst 3 | 4 | include requirements.txt 5 | include setup.py 6 | include README.rst 7 | include LICENSE 8 | include MANIFEST.in 9 | include versioneer.py 10 | include datashape/_version.py 11 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | DataShape 3 | ========= 4 | 5 | |Build Status| |PyPI| |Monthly Downloads| 6 | 7 | DataShape is a language for describing data. It is an extension of the 8 | NumPy dtype with an emphasis on cross language support. 9 | 10 | History 11 | ------- 12 | 13 | DataShape was originally developed by the Blaze project. The git history 14 | of the blaze.datashape module has been preserved here but for more 15 | complete history see the Blaze project. 16 | 17 | Contributing 18 | ------------ 19 | 20 | Anyone wishing to discuss on DataShape should join the 21 | `blaze-dev `__ 22 | mailing list at: blaze-dev@continuum.io 23 | 24 | License 25 | ------- 26 | 27 | DataShape development is sponsored by Continuum Analytics. 28 | 29 | Released under BSD license. See LICENSE for details. 30 | 31 | 32 | .. |Build Status| image:: https://travis-ci.org/plures/datashape.svg?branch=master 33 | :target: https://travis-ci.org/plures/datashape 34 | 35 | .. |PyPI| image:: https://img.shields.io/pypi/v/datashape.svg 36 | :target: https://pypi.python.org/pypi/DataShape 37 | 38 | .. |Monthly Downloads| image:: https://img.shields.io/pypi/dm/datashape.svg 39 | :target: https://pypi.python.org/pypi/DataShape 40 | -------------------------------------------------------------------------------- /conda.recipe/bld.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | %PYTHON% setup.py install 4 | -------------------------------------------------------------------------------- /conda.recipe/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | $PYTHON setup.py install 4 | 5 | -------------------------------------------------------------------------------- /conda.recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: datashape 3 | version: {{ environ.get('GIT_DESCRIBE_TAG', '')}} 4 | 5 | build: 6 | number: {{ environ.get('GIT_DESCRIBE_NUMBER', 0) }} 7 | {% if environ.get('GIT_DESCRIBE_NUMBER', '0') == '0' %}string: py{{ environ.get('PY_VER').replace('.', '') }}_0 8 | {% else %}string: py{{ environ.get('PY_VER').replace('.', '') }}_{{ environ.get('GIT_BUILD_STR', 'GIT_STUB') }}{% endif %} 9 | 10 | source: 11 | git_url: ../ 12 | 13 | requirements: 14 | build: 15 | - python 16 | - numpy >=1.7 17 | - multipledispatch >=0.4.7 18 | - python-dateutil 19 | 20 | run: 21 | - python 22 | - numpy >=1.7 23 | - multipledispatch >=0.4.7 24 | - python-dateutil 25 | 26 | test: 27 | requires: 28 | - pytest 29 | - mock 30 | commands: 31 | - py.test -vx --doctest-modules --pyargs datashape 32 | 33 | 34 | about: 35 | home: http://datashape.pydata.org 36 | license: BSD 37 | -------------------------------------------------------------------------------- /datashape/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import lexer, parser 4 | from .coretypes import * 5 | from .predicates import * 6 | from .typesets import * 7 | from .user import * 8 | from .type_symbol_table import * 9 | from .discovery import discover 10 | from .util import * 11 | from .promote import promote, optionify 12 | from .error import DataShapeSyntaxError 13 | 14 | from ._version import get_versions 15 | __version__ = get_versions()['version'] 16 | del get_versions 17 | -------------------------------------------------------------------------------- /datashape/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.15 (https://github.com/warner/python-versioneer) 10 | 11 | import errno 12 | import os 13 | import re 14 | import subprocess 15 | import sys 16 | 17 | 18 | def get_keywords(): 19 | # these strings will be replaced by git during git-archive. 20 | # setup.py/versioneer.py will grep for the variable names, so they must 21 | # each be defined on a line of their own. _version.py will just call 22 | # get_keywords(). 23 | git_refnames = " (HEAD -> master)" 24 | git_full = "c9d2bd75414a69d94498e7340ef9dd5fce903007" 25 | keywords = {"refnames": git_refnames, "full": git_full} 26 | return keywords 27 | 28 | 29 | class VersioneerConfig: 30 | pass 31 | 32 | 33 | def get_config(): 34 | # these strings are filled in when 'setup.py versioneer' creates 35 | # _version.py 36 | cfg = VersioneerConfig() 37 | cfg.VCS = "git" 38 | cfg.style = "pep440" 39 | cfg.tag_prefix = "" 40 | cfg.parentdir_prefix = "datashape-" 41 | cfg.versionfile_source = "datashape/_version.py" 42 | cfg.verbose = False 43 | return cfg 44 | 45 | 46 | class NotThisMethod(Exception): 47 | pass 48 | 49 | 50 | LONG_VERSION_PY = {} 51 | HANDLERS = {} 52 | 53 | 54 | def register_vcs_handler(vcs, method): # decorator 55 | def decorate(f): 56 | if vcs not in HANDLERS: 57 | HANDLERS[vcs] = {} 58 | HANDLERS[vcs][method] = f 59 | return f 60 | return decorate 61 | 62 | 63 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): 64 | assert isinstance(commands, list) 65 | p = None 66 | for c in commands: 67 | try: 68 | dispcmd = str([c] + args) 69 | # remember shell=False, so use git.cmd on windows, not just git 70 | p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, 71 | stderr=(subprocess.PIPE if hide_stderr 72 | else None)) 73 | break 74 | except EnvironmentError: 75 | e = sys.exc_info()[1] 76 | if e.errno == errno.ENOENT: 77 | continue 78 | if verbose: 79 | print("unable to run %s" % dispcmd) 80 | print(e) 81 | return None 82 | else: 83 | if verbose: 84 | print("unable to find command, tried %s" % (commands,)) 85 | return None 86 | stdout = p.communicate()[0].strip() 87 | if sys.version_info[0] >= 3: 88 | stdout = stdout.decode() 89 | if p.returncode != 0: 90 | if verbose: 91 | print("unable to run %s (error)" % dispcmd) 92 | return None 93 | return stdout 94 | 95 | 96 | def versions_from_parentdir(parentdir_prefix, root, verbose): 97 | # Source tarballs conventionally unpack into a directory that includes 98 | # both the project name and a version string. 99 | dirname = os.path.basename(root) 100 | if not dirname.startswith(parentdir_prefix): 101 | if verbose: 102 | print("guessing rootdir is '%s', but '%s' doesn't start with " 103 | "prefix '%s'" % (root, dirname, parentdir_prefix)) 104 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 105 | return {"version": dirname[len(parentdir_prefix):], 106 | "full-revisionid": None, 107 | "dirty": False, "error": None} 108 | 109 | 110 | @register_vcs_handler("git", "get_keywords") 111 | def git_get_keywords(versionfile_abs): 112 | # the code embedded in _version.py can just fetch the value of these 113 | # keywords. When used from setup.py, we don't want to import _version.py, 114 | # so we do it with a regexp instead. This function is not used from 115 | # _version.py. 116 | keywords = {} 117 | try: 118 | f = open(versionfile_abs, "r") 119 | for line in f.readlines(): 120 | if line.strip().startswith("git_refnames ="): 121 | mo = re.search(r'=\s*"(.*)"', line) 122 | if mo: 123 | keywords["refnames"] = mo.group(1) 124 | if line.strip().startswith("git_full ="): 125 | mo = re.search(r'=\s*"(.*)"', line) 126 | if mo: 127 | keywords["full"] = mo.group(1) 128 | f.close() 129 | except EnvironmentError: 130 | pass 131 | return keywords 132 | 133 | 134 | @register_vcs_handler("git", "keywords") 135 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 136 | if not keywords: 137 | raise NotThisMethod("no keywords at all, weird") 138 | refnames = keywords["refnames"].strip() 139 | if refnames.startswith("$Format"): 140 | if verbose: 141 | print("keywords are unexpanded, not using") 142 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 143 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 144 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 145 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 146 | TAG = "tag: " 147 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 148 | if not tags: 149 | # Either we're using git < 1.8.3, or there really are no tags. We use 150 | # a heuristic: assume all version tags have a digit. The old git %d 151 | # expansion behaves like git log --decorate=short and strips out the 152 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 153 | # between branches and tags. By ignoring refnames without digits, we 154 | # filter out many common branch names like "release" and 155 | # "stabilization", as well as "HEAD" and "master". 156 | tags = set([r for r in refs if re.search(r'\d', r)]) 157 | if verbose: 158 | print("discarding '%s', no digits" % ",".join(refs-tags)) 159 | if verbose: 160 | print("likely tags: %s" % ",".join(sorted(tags))) 161 | for ref in sorted(tags): 162 | # sorting will prefer e.g. "2.0" over "2.0rc1" 163 | if ref.startswith(tag_prefix): 164 | r = ref[len(tag_prefix):] 165 | if verbose: 166 | print("picking %s" % r) 167 | return {"version": r, 168 | "full-revisionid": keywords["full"].strip(), 169 | "dirty": False, "error": None 170 | } 171 | # no suitable tags, so version is "0+unknown", but full hex is still there 172 | if verbose: 173 | print("no suitable tags, using unknown + full revision id") 174 | return {"version": "0+unknown", 175 | "full-revisionid": keywords["full"].strip(), 176 | "dirty": False, "error": "no suitable tags"} 177 | 178 | 179 | @register_vcs_handler("git", "pieces_from_vcs") 180 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 181 | # this runs 'git' from the root of the source tree. This only gets called 182 | # if the git-archive 'subst' keywords were *not* expanded, and 183 | # _version.py hasn't already been rewritten with a short version string, 184 | # meaning we're inside a checked out source tree. 185 | 186 | if not os.path.exists(os.path.join(root, ".git")): 187 | if verbose: 188 | print("no .git in %s" % root) 189 | raise NotThisMethod("no .git directory") 190 | 191 | GITS = ["git"] 192 | if sys.platform == "win32": 193 | GITS = ["git.cmd", "git.exe"] 194 | # if there is a tag, this yields TAG-NUM-gHEX[-dirty] 195 | # if there are no tags, this yields HEX[-dirty] (no NUM) 196 | describe_out = run_command(GITS, ["describe", "--tags", "--dirty", 197 | "--always", "--long"], 198 | cwd=root) 199 | # --long was added in git-1.5.5 200 | if describe_out is None: 201 | raise NotThisMethod("'git describe' failed") 202 | describe_out = describe_out.strip() 203 | full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 204 | if full_out is None: 205 | raise NotThisMethod("'git rev-parse' failed") 206 | full_out = full_out.strip() 207 | 208 | pieces = {} 209 | pieces["long"] = full_out 210 | pieces["short"] = full_out[:7] # maybe improved later 211 | pieces["error"] = None 212 | 213 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 214 | # TAG might have hyphens. 215 | git_describe = describe_out 216 | 217 | # look for -dirty suffix 218 | dirty = git_describe.endswith("-dirty") 219 | pieces["dirty"] = dirty 220 | if dirty: 221 | git_describe = git_describe[:git_describe.rindex("-dirty")] 222 | 223 | # now we have TAG-NUM-gHEX or HEX 224 | 225 | if "-" in git_describe: 226 | # TAG-NUM-gHEX 227 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 228 | if not mo: 229 | # unparseable. Maybe git-describe is misbehaving? 230 | pieces["error"] = ("unable to parse git-describe output: '%s'" 231 | % describe_out) 232 | return pieces 233 | 234 | # tag 235 | full_tag = mo.group(1) 236 | if not full_tag.startswith(tag_prefix): 237 | if verbose: 238 | fmt = "tag '%s' doesn't start with prefix '%s'" 239 | print(fmt % (full_tag, tag_prefix)) 240 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 241 | % (full_tag, tag_prefix)) 242 | return pieces 243 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 244 | 245 | # distance: number of commits since tag 246 | pieces["distance"] = int(mo.group(2)) 247 | 248 | # commit: short hex revision ID 249 | pieces["short"] = mo.group(3) 250 | 251 | else: 252 | # HEX: no tags 253 | pieces["closest-tag"] = None 254 | count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], 255 | cwd=root) 256 | pieces["distance"] = int(count_out) # total number of commits 257 | 258 | return pieces 259 | 260 | 261 | def plus_or_dot(pieces): 262 | if "+" in pieces.get("closest-tag", ""): 263 | return "." 264 | return "+" 265 | 266 | 267 | def render_pep440(pieces): 268 | # now build up version string, with post-release "local version 269 | # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 270 | # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 271 | 272 | # exceptions: 273 | # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 274 | 275 | if pieces["closest-tag"]: 276 | rendered = pieces["closest-tag"] 277 | if pieces["distance"] or pieces["dirty"]: 278 | rendered += plus_or_dot(pieces) 279 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 280 | if pieces["dirty"]: 281 | rendered += ".dirty" 282 | else: 283 | # exception #1 284 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 285 | pieces["short"]) 286 | if pieces["dirty"]: 287 | rendered += ".dirty" 288 | return rendered 289 | 290 | 291 | def render_pep440_pre(pieces): 292 | # TAG[.post.devDISTANCE] . No -dirty 293 | 294 | # exceptions: 295 | # 1: no tags. 0.post.devDISTANCE 296 | 297 | if pieces["closest-tag"]: 298 | rendered = pieces["closest-tag"] 299 | if pieces["distance"]: 300 | rendered += ".post.dev%d" % pieces["distance"] 301 | else: 302 | # exception #1 303 | rendered = "0.post.dev%d" % pieces["distance"] 304 | return rendered 305 | 306 | 307 | def render_pep440_post(pieces): 308 | # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that 309 | # .dev0 sorts backwards (a dirty tree will appear "older" than the 310 | # corresponding clean one), but you shouldn't be releasing software with 311 | # -dirty anyways. 312 | 313 | # exceptions: 314 | # 1: no tags. 0.postDISTANCE[.dev0] 315 | 316 | if pieces["closest-tag"]: 317 | rendered = pieces["closest-tag"] 318 | if pieces["distance"] or pieces["dirty"]: 319 | rendered += ".post%d" % pieces["distance"] 320 | if pieces["dirty"]: 321 | rendered += ".dev0" 322 | rendered += plus_or_dot(pieces) 323 | rendered += "g%s" % pieces["short"] 324 | else: 325 | # exception #1 326 | rendered = "0.post%d" % pieces["distance"] 327 | if pieces["dirty"]: 328 | rendered += ".dev0" 329 | rendered += "+g%s" % pieces["short"] 330 | return rendered 331 | 332 | 333 | def render_pep440_old(pieces): 334 | # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. 335 | 336 | # exceptions: 337 | # 1: no tags. 0.postDISTANCE[.dev0] 338 | 339 | if pieces["closest-tag"]: 340 | rendered = pieces["closest-tag"] 341 | if pieces["distance"] or pieces["dirty"]: 342 | rendered += ".post%d" % pieces["distance"] 343 | if pieces["dirty"]: 344 | rendered += ".dev0" 345 | else: 346 | # exception #1 347 | rendered = "0.post%d" % pieces["distance"] 348 | if pieces["dirty"]: 349 | rendered += ".dev0" 350 | return rendered 351 | 352 | 353 | def render_git_describe(pieces): 354 | # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty 355 | # --always' 356 | 357 | # exceptions: 358 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 359 | 360 | if pieces["closest-tag"]: 361 | rendered = pieces["closest-tag"] 362 | if pieces["distance"]: 363 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 364 | else: 365 | # exception #1 366 | rendered = pieces["short"] 367 | if pieces["dirty"]: 368 | rendered += "-dirty" 369 | return rendered 370 | 371 | 372 | def render_git_describe_long(pieces): 373 | # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty 374 | # --always -long'. The distance/hash is unconditional. 375 | 376 | # exceptions: 377 | # 1: no tags. HEX[-dirty] (note: no 'g' prefix) 378 | 379 | if pieces["closest-tag"]: 380 | rendered = pieces["closest-tag"] 381 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 382 | else: 383 | # exception #1 384 | rendered = pieces["short"] 385 | if pieces["dirty"]: 386 | rendered += "-dirty" 387 | return rendered 388 | 389 | 390 | def render(pieces, style): 391 | if pieces["error"]: 392 | return {"version": "unknown", 393 | "full-revisionid": pieces.get("long"), 394 | "dirty": None, 395 | "error": pieces["error"]} 396 | 397 | if not style or style == "default": 398 | style = "pep440" # the default 399 | 400 | if style == "pep440": 401 | rendered = render_pep440(pieces) 402 | elif style == "pep440-pre": 403 | rendered = render_pep440_pre(pieces) 404 | elif style == "pep440-post": 405 | rendered = render_pep440_post(pieces) 406 | elif style == "pep440-old": 407 | rendered = render_pep440_old(pieces) 408 | elif style == "git-describe": 409 | rendered = render_git_describe(pieces) 410 | elif style == "git-describe-long": 411 | rendered = render_git_describe_long(pieces) 412 | else: 413 | raise ValueError("unknown style '%s'" % style) 414 | 415 | return {"version": rendered, "full-revisionid": pieces["long"], 416 | "dirty": pieces["dirty"], "error": None} 417 | 418 | 419 | def get_versions(): 420 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 421 | # __file__, we can work backwards from there to the root. Some 422 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 423 | # case we can only use expanded keywords. 424 | 425 | cfg = get_config() 426 | verbose = cfg.verbose 427 | 428 | try: 429 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 430 | verbose) 431 | except NotThisMethod: 432 | pass 433 | 434 | try: 435 | root = os.path.realpath(__file__) 436 | # versionfile_source is the relative path from the top of the source 437 | # tree (where the .git directory might live) to this file. Invert 438 | # this to find the root from __file__. 439 | for i in cfg.versionfile_source.split('/'): 440 | root = os.path.dirname(root) 441 | except NameError: 442 | return {"version": "0+unknown", "full-revisionid": None, 443 | "dirty": None, 444 | "error": "unable to find root of source tree"} 445 | 446 | try: 447 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 448 | return render(pieces, cfg.style) 449 | except NotThisMethod: 450 | pass 451 | 452 | try: 453 | if cfg.parentdir_prefix: 454 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 455 | except NotThisMethod: 456 | pass 457 | 458 | return {"version": "0+unknown", "full-revisionid": None, 459 | "dirty": None, 460 | "error": "unable to compute version"} 461 | -------------------------------------------------------------------------------- /datashape/discovery.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | from datetime import datetime, date, time, timedelta 4 | from itertools import chain 5 | import re 6 | import sys 7 | from textwrap import dedent 8 | from warnings import warn 9 | 10 | from dateutil.parser import parse as dateparse 11 | import numpy as np 12 | 13 | from .dispatch import dispatch 14 | from .coretypes import (int32, int64, float64, bool_, complex128, datetime_, 15 | Option, var, from_numpy, Tuple, null, 16 | Record, string, Null, DataShape, real, date_, time_, 17 | Unit, timedelta_, TimeDelta, object_, String) 18 | from .predicates import isdimension, isrecord 19 | from .py2help import _strtypes, _inttypes, MappingProxyType, OrderedDict 20 | from .internal_utils import _toposort, groupby 21 | from .util import subclasses 22 | 23 | 24 | __all__ = ['discover'] 25 | 26 | 27 | @dispatch(object) 28 | def discover(obj, **kwargs): 29 | """ Discover datashape of object 30 | 31 | A datashape encodes the datatypes and the shape/length of an object. 32 | Discover returns the datashape of a Python object. This object can refer 33 | to external data. 34 | 35 | Datashapes range from simple scalars 36 | 37 | >>> discover(10) 38 | ctype('int64') 39 | 40 | To collections 41 | 42 | >>> discover([[1, 2, 3], [4, 5, 6]]) 43 | dshape('2 * 3 * int64') 44 | 45 | To record types and other objects 46 | 47 | >>> x = np.array([('Alice', 100), ('Bob', 200)], dtype=[('name', 'S7'), 48 | ... ('amount', 'i4')]) 49 | >>> discover(x) 50 | dshape('2 * {name: string[7, "ascii"], amount: int32}') 51 | 52 | See http://datashape.pydata.org/grammar.html#some-simple-examples 53 | for more examples 54 | """ 55 | type_name = type(obj).__name__ 56 | if hasattr(obj, 'shape') and hasattr(obj, 'dtype'): 57 | warn( 58 | dedent( 59 | """\ 60 | array-like discovery is deperecated. 61 | Please write an explicit discover function for type '%s'. 62 | """ % type_name, 63 | ), 64 | DeprecationWarning, 65 | ) 66 | return from_numpy(obj.shape, obj.dtype) 67 | raise NotImplementedError("Don't know how to discover type %r" % type_name) 68 | 69 | 70 | @dispatch(_inttypes) 71 | def discover(i): 72 | return int64 73 | 74 | 75 | npinttypes = tuple(chain.from_iterable((x for x in subclasses(icls) 76 | if x.__name__.startswith(('int', 77 | 'uint'))) 78 | for icls in subclasses(np.integer))) 79 | 80 | 81 | if sys.version_info[0] == 3: 82 | @dispatch(bytes) 83 | def discover(b): 84 | return String('A') 85 | 86 | 87 | @dispatch(npinttypes) 88 | def discover(n): 89 | return from_numpy((), n.dtype) 90 | 91 | 92 | @dispatch(float) 93 | def discover(f): 94 | return float64 95 | 96 | 97 | @dispatch(bool) 98 | def discover(b): 99 | return bool_ 100 | 101 | 102 | @dispatch(complex) 103 | def discover(z): 104 | return complex128 105 | 106 | 107 | @dispatch(datetime) 108 | def discover(dt): 109 | return datetime_ 110 | 111 | 112 | @dispatch(timedelta) 113 | def discover(td): 114 | return TimeDelta(unit='us') 115 | 116 | 117 | @dispatch(date) 118 | def discover(dt): 119 | return date_ 120 | 121 | 122 | @dispatch(time) 123 | def discover(t): 124 | return time_ 125 | 126 | 127 | @dispatch((type(None), Null)) 128 | def discover(i): 129 | return null 130 | 131 | 132 | bools = {'False': False, 133 | 'false': False, 134 | 'True': True, 135 | 'true': True} 136 | 137 | 138 | def timeparse(x, formats=('%H:%M:%S', '%H:%M:%S.%f')): 139 | msg = '' 140 | for format in formats: 141 | try: 142 | return datetime.strptime(x, format).time() 143 | except ValueError as e: # raises if it doesn't match the format 144 | msg = str(e) 145 | raise ValueError(msg) 146 | 147 | 148 | def deltaparse(x): 149 | """Naive timedelta string parser 150 | 151 | Examples 152 | -------- 153 | >>> td = '1 day' 154 | >>> deltaparse(td) 155 | numpy.timedelta64(1,'D') 156 | >>> deltaparse('1.2 days') # doctest: +IGNORE_EXCEPTION_DETAIL 157 | Traceback (most recent call last): 158 | ... 159 | ValueError: floating point timedelta value not supported 160 | """ 161 | value, unit = re.split('\s+', x.strip()) 162 | value = float(value) 163 | if not value.is_integer(): 164 | raise ValueError('floating point timedelta values not supported') 165 | return np.timedelta64(int(value), TimeDelta(unit=unit).unit) 166 | 167 | 168 | string_coercions = int, float, bools.__getitem__, deltaparse, timeparse 169 | 170 | 171 | def is_zero_time(t): 172 | return not (t.hour or t.minute or t.second or t.microsecond) 173 | 174 | 175 | @dispatch(_strtypes) 176 | def discover(s): 177 | if not s: 178 | return null 179 | 180 | for f in string_coercions: 181 | try: 182 | return discover(f(s)) 183 | except (ValueError, KeyError): 184 | pass 185 | 186 | # don't let dateutil parse things like sunday, monday etc into dates 187 | if s.isalpha() or s.isspace(): 188 | return string 189 | 190 | try: 191 | d = dateparse(s) 192 | except (ValueError, OverflowError): # OverflowError for stuff like 'INF...' 193 | pass 194 | else: 195 | return date_ if is_zero_time(d.time()) else datetime_ 196 | 197 | return string 198 | 199 | 200 | @dispatch((tuple, list, set, frozenset)) 201 | def discover(seq): 202 | if not seq: 203 | return var * string 204 | unite = do_one([unite_identical, unite_base, unite_merge_dimensions]) 205 | # [(a, b), (a, c)] 206 | if (all(isinstance(item, (tuple, list)) for item in seq) and 207 | len(set(map(len, seq))) == 1): 208 | columns = list(zip(*seq)) 209 | try: 210 | types = [unite([discover(data) for data in column]).subshape[0] 211 | for column in columns] 212 | unite = do_one([unite_identical, unite_merge_dimensions, Tuple]) 213 | return len(seq) * unite(types) 214 | except AttributeError: # no subshape available 215 | pass 216 | 217 | # [{k: v, k: v}, {k: v, k: v}] 218 | if all(isinstance(item, dict) for item in seq): 219 | keys = sorted(set.union(*(set(d) for d in seq))) 220 | columns = [[item.get(key) for item in seq] for key in keys] 221 | try: 222 | types = [unite([discover(data) for data in column]).subshape[0] 223 | for column in columns] 224 | return len(seq) * Record(list(zip(keys, types))) 225 | except AttributeError: 226 | pass 227 | 228 | types = list(map(discover, seq)) 229 | return do_one([unite_identical, unite_merge_dimensions, Tuple])(types) 230 | 231 | 232 | def isnull(ds): 233 | return ds == null or ds == DataShape(null) 234 | 235 | 236 | identity = lambda x: x 237 | 238 | # (a, b) implies that b can turn into a 239 | edges = [ 240 | (string, int64), # E.g. int64 can be turned into a string 241 | (string, real), 242 | (string, date_), 243 | (string, datetime_), 244 | (string, timedelta_), 245 | (string, bool_), 246 | (datetime_, date_), 247 | (int64, int32), 248 | (real, int64), 249 | (string, null)] 250 | 251 | numeric_edges = [ 252 | (int64, int32), 253 | (real, int64), 254 | (string, null) 255 | ] 256 | 257 | 258 | # {a: [b, c]} a is more general than b or c 259 | edges = groupby(lambda x: x[1], edges) 260 | edges = dict((k, set(a for a, b in v)) for k, v in edges.items()) 261 | toposorted = _toposort(edges) 262 | 263 | 264 | def lowest_common_dshape(dshapes): 265 | """ Find common shared dshape 266 | 267 | >>> lowest_common_dshape([int32, int64, float64]) 268 | ctype("float64") 269 | 270 | >>> lowest_common_dshape([int32, int64]) 271 | ctype("int64") 272 | 273 | >>> lowest_common_dshape([string, int64]) 274 | ctype("string") 275 | """ 276 | common = set.intersection(*[descendents(edges, ds) for ds in dshapes]) 277 | if common and any(c in toposorted for c in common): 278 | return min(common, key=toposorted.index) 279 | raise ValueError("Not all dshapes are known. Extend edges.") 280 | 281 | 282 | def unite_base(dshapes): 283 | """ Performs lowest common dshape and also null aware 284 | 285 | >>> unite_base([float64, float64, int64]) 286 | dshape("3 * float64") 287 | 288 | >>> unite_base([int32, int64, null]) 289 | dshape("3 * ?int64") 290 | """ 291 | dshapes = [unpack(ds) for ds in dshapes] 292 | bynull = groupby(isnull, dshapes) 293 | try: 294 | good_dshapes = bynull[False] 295 | except KeyError: 296 | return len(dshapes) * null 297 | if all(isinstance(ds, Unit) for ds in good_dshapes): 298 | base = lowest_common_dshape(good_dshapes) 299 | elif (all(isinstance(ds, Record) for ds in good_dshapes) and 300 | ds.names == dshapes[0].names for ds in good_dshapes): 301 | names = good_dshapes[0].names 302 | base = Record([[name, 303 | unite_base([ds.dict.get(name, null) for ds in good_dshapes]).subshape[0]] 304 | for name in names]) 305 | if base: 306 | if bynull.get(True): 307 | base = Option(base) 308 | return len(dshapes) * base 309 | 310 | 311 | def unite_identical(dshapes): 312 | """ 313 | 314 | >>> unite_identical([int32, int32, int32]) 315 | dshape("3 * int32") 316 | """ 317 | if len(set(dshapes)) == 1: 318 | return len(dshapes) * dshapes[0] 319 | 320 | 321 | def unite_merge_dimensions(dshapes, unite=unite_identical): 322 | """ 323 | 324 | >>> unite_merge_dimensions([10 * string, 10 * string]) 325 | dshape("2 * 10 * string") 326 | 327 | >>> unite_merge_dimensions([10 * string, 20 * string]) 328 | dshape("2 * var * string") 329 | """ 330 | n = len(dshapes) 331 | if all(isinstance(ds, DataShape) and isdimension(ds[0]) for ds in dshapes): 332 | dims = [ds[0] for ds in dshapes] 333 | base = unite([ds.subshape[0] for ds in dshapes]) 334 | if base: 335 | if len(set(dims)) == 1: 336 | return n * (dims[0] * base.subshape[0]) 337 | else: 338 | return n * (var * base.subshape[0]) 339 | 340 | 341 | def do_one(funcs): 342 | def f(inp): 343 | for func in funcs: 344 | result = func(inp) 345 | if result: 346 | return result 347 | return inp 348 | return f 349 | 350 | 351 | def unpack(ds): 352 | """ Unpack DataShape constructor if unnecessary 353 | 354 | Record packs inputs in DataShape containers. This unpacks it. 355 | 356 | >>> from datashape import dshape 357 | >>> unpack(dshape('string')) 358 | ctype("string") 359 | """ 360 | if isinstance(ds, DataShape) and len(ds) == 1: 361 | return ds[0] 362 | else: 363 | return ds 364 | 365 | 366 | @discover.register(dict) 367 | @discover.register(MappingProxyType) 368 | def _mapping_discover(m): 369 | return Record((k, discover(m[k])) for k in sorted(m)) 370 | 371 | 372 | @dispatch(OrderedDict) 373 | def discover(od): 374 | return Record((k, discover(v)) for k, v in od.items()) 375 | 376 | 377 | @dispatch(np.number) 378 | def discover(n): 379 | return from_numpy((), type(n)) 380 | 381 | 382 | @dispatch(np.timedelta64) 383 | def discover(n): 384 | return from_numpy((), n) 385 | 386 | 387 | def is_string_array(x): 388 | """ Is an array of strings 389 | 390 | >>> is_string_array(np.array(['Hello', 'world'], dtype='O')) 391 | True 392 | >>> is_string_array(np.array(['Hello', None], dtype='O')) 393 | False 394 | """ 395 | return all(isinstance(i, _strtypes) for i in x.flat[:5].tolist()) 396 | 397 | 398 | @dispatch(np.ndarray) 399 | def discover(x): 400 | ds = from_numpy(x.shape, x.dtype) 401 | 402 | # NumPy uses object dtype both for strings (which we want to call string) 403 | # and for Python objects (which we want to call object) 404 | # Lets look at the first few elements and check 405 | if ds.measure == object_ and is_string_array(x): 406 | return DataShape(*(ds.shape + (string,))) 407 | 408 | if isrecord(ds.measure) and object_ in ds.measure.types: 409 | m = Record([[name, string if typ == object_ and is_string_array(x[name]) 410 | else typ] 411 | for name, typ in ds.measure.parameters[0]]) 412 | return DataShape(*(ds.shape + (m,))) 413 | else: 414 | return ds 415 | 416 | 417 | def descendents(d, x): 418 | """ 419 | 420 | >>> d = {3: [2], 2: [1, 0], 5: [6]} 421 | >>> sorted(descendents(d, 3)) 422 | [0, 1, 2, 3] 423 | """ 424 | desc = set([x]) 425 | children = d.get(x, set()) 426 | while children: 427 | children = set.union(*[set(d.get(kid, ())) for kid in desc]) 428 | children -= desc 429 | desc.update(children) 430 | return desc 431 | 432 | 433 | Mock = None 434 | try: 435 | from unittest.mock import Mock 436 | except ImportError: 437 | try: 438 | from mock import Mock 439 | except ImportError: 440 | pass 441 | 442 | if Mock is not None: 443 | @dispatch(Mock) 444 | def discover(m): 445 | raise NotImplementedError("Don't know how to discover mock objects") 446 | del Mock 447 | -------------------------------------------------------------------------------- /datashape/dispatch.py: -------------------------------------------------------------------------------- 1 | from multipledispatch import dispatch 2 | from functools import partial 3 | 4 | namespace = dict() 5 | 6 | dispatch = partial(dispatch, namespace=namespace) 7 | -------------------------------------------------------------------------------- /datashape/error.py: -------------------------------------------------------------------------------- 1 | """Error handling""" 2 | 3 | syntax_error = """ 4 | 5 | File {filename}, line {lineno} 6 | {line} 7 | {pointer} 8 | 9 | {error}: {msg} 10 | """ 11 | 12 | class DataShapeSyntaxError(SyntaxError): 13 | """ 14 | Makes datashape parse errors look like Python SyntaxError. 15 | """ 16 | def __init__(self, lexpos, filename, text, msg=None): 17 | self.lexpos = lexpos 18 | self.filename = filename 19 | self.text = text 20 | self.msg = msg or 'invalid syntax' 21 | self.lineno = text.count('\n', 0, lexpos) + 1 22 | # Get the extent of the line with the error 23 | linestart = text.rfind('\n', 0, lexpos) 24 | if linestart < 0: 25 | linestart = 0 26 | else: 27 | linestart = linestart + 1 28 | lineend = text.find('\n', lexpos) 29 | if lineend < 0: 30 | lineend = len(text) 31 | self.line = text[linestart:lineend] 32 | self.col_offset = lexpos - linestart 33 | 34 | def __str__(self): 35 | pointer = ' ' * self.col_offset + '^' 36 | 37 | return syntax_error.format( 38 | filename=self.filename, 39 | lineno=self.lineno, 40 | line=self.line, 41 | pointer=pointer, 42 | msg=self.msg, 43 | error=self.__class__.__name__, 44 | ) 45 | 46 | def __repr__(self): 47 | return str(self) 48 | -------------------------------------------------------------------------------- /datashape/internal_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions that are unrelated to datashape 3 | 4 | Do not import datashape modules into this module. See util.py in that case 5 | """ 6 | 7 | from __future__ import print_function, division, absolute_import 8 | 9 | import keyword 10 | import re 11 | 12 | 13 | class IndexCallable(object): 14 | """ Provide getitem syntax for functions 15 | 16 | >>> def inc(x): 17 | ... return x + 1 18 | 19 | >>> I = IndexCallable(inc) 20 | >>> I[3] 21 | 4 22 | """ 23 | __slots__ = 'fn', 24 | 25 | def __init__(self, fn): 26 | self.fn = fn 27 | 28 | def __getitem__(self, key): 29 | return self.fn(key) 30 | 31 | 32 | def remove(predicate, seq): 33 | return filter(lambda x: not predicate(x), seq) 34 | 35 | 36 | # Taken from theano/theano/gof/sched.py 37 | # Avoids licensing issues because this was written by Matthew Rocklin 38 | def reverse_dict(d): 39 | """Reverses direction of dependence dict 40 | 41 | >>> d = {'a': (1, 2), 'b': (2, 3), 'c':()} 42 | >>> reverse_dict(d) # doctest: +SKIP 43 | {1: ('a',), 2: ('a', 'b'), 3: ('b',)} 44 | 45 | :note: dict order are not deterministic. As we iterate on the 46 | input dict, it make the output of this function depend on the 47 | dict order. So this function output order should be considered 48 | as undeterministic. 49 | 50 | """ 51 | result = {} 52 | for key in d: 53 | for val in d[key]: 54 | result[val] = result.get(val, tuple()) + (key, ) 55 | return result 56 | 57 | 58 | # Taken from theano/theano/gof/sched.py 59 | # Avoids licensing issues because this was written by Matthew Rocklin 60 | def _toposort(edges): 61 | """ Topological sort algorithm by Kahn [1] - O(nodes + vertices) 62 | 63 | inputs: 64 | edges - a dict of the form {a: {b, c}} where b and c depend on a 65 | outputs: 66 | L - an ordered list of nodes that satisfy the dependencies of edges 67 | 68 | >>> _toposort({1: (2, 3), 2: (3, )}) 69 | [1, 2, 3] 70 | 71 | Closely follows the wikipedia page [2] 72 | 73 | [1] Kahn, Arthur B. (1962), "Topological sorting of large networks", 74 | Communications of the ACM 75 | [2] http://en.wikipedia.org/wiki/Toposort#Algorithms 76 | """ 77 | incoming_edges = reverse_dict(edges) 78 | incoming_edges = dict((k, set(val)) for k, val in incoming_edges.items()) 79 | S = set((v for v in edges if v not in incoming_edges)) 80 | L = [] 81 | 82 | while S: 83 | n = S.pop() 84 | L.append(n) 85 | for m in edges.get(n, ()): 86 | assert n in incoming_edges[m] 87 | incoming_edges[m].remove(n) 88 | if not incoming_edges[m]: 89 | S.add(m) 90 | if any(incoming_edges.get(v) for v in edges): 91 | raise ValueError("Input has cycles") 92 | return L 93 | 94 | 95 | # Taken from toolz 96 | # Avoids licensing issues because this version was authored by Matthew Rocklin 97 | def groupby(func, seq): 98 | """ Group a collection by a key function 99 | 100 | >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] 101 | >>> groupby(len, names) # doctest: +SKIP 102 | {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} 103 | 104 | >>> iseven = lambda x: x % 2 == 0 105 | >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) 106 | {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} 107 | 108 | See Also: 109 | ``countby`` 110 | """ 111 | 112 | d = dict() 113 | for item in seq: 114 | key = func(item) 115 | if key not in d: 116 | d[key] = list() 117 | d[key].append(item) 118 | return d 119 | 120 | 121 | def isidentifier(s): 122 | return (keyword.iskeyword(s) or 123 | re.match(r'^[_a-zA-Z][_a-zA-Z0-9]*$', s) is not None) 124 | -------------------------------------------------------------------------------- /datashape/lexer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Lexer for the datashape grammar. 3 | """ 4 | 5 | from __future__ import absolute_import, division, print_function 6 | 7 | import re 8 | import ast 9 | import collections 10 | 11 | from . import error 12 | 13 | # This is updated to include all the token names from _tokens, 14 | # where e.g. _tokens[NAME_LOWER-1] is the entry for NAME_LOWER 15 | __all__ = ['lex', 'Token'] 16 | 17 | def _str_val(s): 18 | # Use the Python parser via the ast module to parse the string, 19 | # since the string_escape and unicode_escape codecs do the wrong thing 20 | return ast.parse('u' + s).body[0].value.s 21 | 22 | # A list of the token names, corresponding regex, and value extraction function 23 | _tokens = [ 24 | ('BOOLEAN', r'True|False', ast.literal_eval), 25 | ('NAME_LOWER', r'[a-z][a-zA-Z0-9_]*', lambda x : x), 26 | ('NAME_UPPER', r'[A-Z][a-zA-Z0-9_]*', lambda x : x), 27 | ('NAME_OTHER', r'_[a-zA-Z0-9_]*', lambda x : x), 28 | ('ASTERISK', r'\*'), 29 | ('COMMA', r','), 30 | ('EQUAL', r'='), 31 | ('COLON', r':'), 32 | ('LBRACKET', r'\['), 33 | ('RBRACKET', r'\]'), 34 | ('LBRACE', r'\{'), 35 | ('RBRACE', r'\}'), 36 | ('LPAREN', r'\('), 37 | ('RPAREN', r'\)'), 38 | ('ELLIPSIS', r'\.\.\.'), 39 | ('RARROW', r'->'), 40 | ('QUESTIONMARK', r'\?'), 41 | ('INTEGER', r'0(?![0-9])|-?[1-9][0-9]*', int), 42 | ('STRING', (r"""(?:"(?:[^"\n\r\\]|(?:\\u[0-9a-fA-F]{4})|(?:\\["bfnrt]))*")|""" + 43 | r"""(?:'(?:[^'\n\r\\]|(?:\\u[0-9a-fA-F]{4})|(?:\\['bfnrt]))*')"""), 44 | _str_val), 45 | ] 46 | 47 | # Dynamically add all the token indices to globals() and __all__ 48 | __all__.extend(tok[0] for tok in _tokens) 49 | globals().update((tok[0], i) for i, tok in enumerate(_tokens, 1)) 50 | 51 | # Regex for skipping whitespace and comments 52 | _whitespace = r'(?:\s|(?:#.*$))*' 53 | 54 | # Compile the token-matching and whitespace-matching regular expressions 55 | _tokens_re = re.compile('|'.join('(' + tok[1] + ')' for tok in _tokens), 56 | re.MULTILINE) 57 | _whitespace_re = re.compile(_whitespace, re.MULTILINE) 58 | 59 | Token = collections.namedtuple('Token', 'id, name, span, val') 60 | 61 | def lex(ds_str): 62 | """A generator which lexes a datashape string into a 63 | sequence of tokens. 64 | 65 | Example 66 | ------- 67 | 68 | import datashape 69 | s = ' -> ... A... "string" 1234 Blah _eil(# comment' 70 | print('lexing %r' % s) 71 | for tok in datashape.lexer.lex(s): 72 | print(tok.id, tok.name, tok.span, repr(tok.val)) 73 | """ 74 | pos = 0 75 | # Skip whitespace 76 | m = _whitespace_re.match(ds_str, pos) 77 | if m: 78 | pos = m.end() 79 | while pos < len(ds_str): 80 | # Try to match a token 81 | m = _tokens_re.match(ds_str, pos) 82 | if m: 83 | # m.lastindex gives us which group was matched, which 84 | # is one greater than the index into the _tokens list. 85 | id = m.lastindex 86 | tokinfo = _tokens[id - 1] 87 | name = tokinfo[0] 88 | span = m.span() 89 | if len(tokinfo) > 2: 90 | val = tokinfo[2](ds_str[span[0]:span[1]]) 91 | else: 92 | val = None 93 | pos = m.end() 94 | yield Token(id, name, span, val) 95 | else: 96 | raise error.DataShapeSyntaxError(pos, '', 97 | ds_str, 98 | 'Invalid DataShape token') 99 | # Skip whitespace 100 | m = _whitespace_re.match(ds_str, pos) 101 | if m: 102 | pos = m.end() 103 | 104 | -------------------------------------------------------------------------------- /datashape/predicates.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .util import collect, dshape 4 | from .internal_utils import remove 5 | from .coretypes import (DataShape, Fixed, Var, Ellipsis, Record, Tuple, Unit, 6 | date_, datetime_, TypeVar, to_numpy_dtype, Map, 7 | Option, Categorical) 8 | from .typesets import floating, boolean 9 | 10 | # https://github.com/blaze/datashape/blob/master/docs/source/types.rst 11 | 12 | __all__ = ['isdimension', 'ishomogeneous', 'istabular', 'isfixed', 'isscalar', 13 | 'isrecord', 'iscollection', 'isnumeric', 'isboolean', 'isdatelike', 14 | 'isreal'] 15 | 16 | dimension_types = Fixed, Var, Ellipsis, int 17 | 18 | 19 | def isscalar(ds): 20 | """ Is this dshape a single dtype? 21 | 22 | >>> isscalar('int') 23 | True 24 | >>> isscalar('?int') 25 | True 26 | >>> isscalar('{name: string, amount: int}') 27 | False 28 | """ 29 | if isinstance(ds, str): 30 | ds = dshape(ds) 31 | if isinstance(ds, DataShape) and len(ds) == 1: 32 | ds = ds[0] 33 | return isinstance(getattr(ds, 'ty', ds), (Unit, Categorical)) 34 | 35 | 36 | def isrecord(ds): 37 | """ Is this dshape a record type? 38 | 39 | >>> isrecord('{name: string, amount: int}') 40 | True 41 | >>> isrecord('int') 42 | False 43 | >>> isrecord('?{name: string, amount: int}') 44 | True 45 | """ 46 | if isinstance(ds, str): 47 | ds = dshape(ds) 48 | if isinstance(ds, DataShape) and len(ds) == 1: 49 | ds = ds[0] 50 | return isinstance(getattr(ds, 'ty', ds), Record) 51 | 52 | 53 | def isdimension(ds): 54 | """ Is a component a dimension? 55 | 56 | >>> from datashape import int32 57 | >>> isdimension(Fixed(10)) 58 | True 59 | >>> isdimension(Var()) 60 | True 61 | >>> isdimension(int32) 62 | False 63 | """ 64 | return isinstance(ds, dimension_types) 65 | 66 | 67 | def ishomogeneous(ds): 68 | """ Does datashape contain only one dtype? 69 | 70 | >>> from datashape import int32 71 | >>> ishomogeneous(int32) 72 | True 73 | >>> ishomogeneous('var * 3 * string') 74 | True 75 | >>> ishomogeneous('var * {name: string, amount: int}') 76 | False 77 | """ 78 | ds = dshape(ds) 79 | return len(set(remove(isdimension, collect(isscalar, ds)))) == 1 80 | 81 | 82 | def _dimensions(ds): 83 | """Number of dimensions of datashape 84 | """ 85 | return len(dshape(ds).shape) 86 | 87 | 88 | def isfixed(ds): 89 | """ Contains no variable dimensions 90 | 91 | >>> isfixed('10 * int') 92 | True 93 | >>> isfixed('var * int') 94 | False 95 | >>> isfixed('10 * {name: string, amount: int}') 96 | True 97 | >>> isfixed('10 * {name: string, amounts: var * int}') 98 | False 99 | """ 100 | ds = dshape(ds) 101 | if isinstance(ds[0], TypeVar): 102 | return None # don't know 103 | if isinstance(ds[0], Var): 104 | return False 105 | if isinstance(ds[0], Record): 106 | return all(map(isfixed, ds[0].types)) 107 | if len(ds) > 1: 108 | return isfixed(ds.subarray(1)) 109 | return True 110 | 111 | 112 | def istabular(ds): 113 | """ A collection of records 114 | 115 | >>> istabular('var * {name: string, amount: int}') 116 | True 117 | >>> istabular('var * 10 * 3 * int') 118 | False 119 | >>> istabular('10 * var * int') 120 | False 121 | >>> istabular('var * (int64, string, ?float64)') 122 | False 123 | """ 124 | ds = dshape(ds) 125 | return _dimensions(ds) == 1 and isrecord(ds.measure) 126 | 127 | 128 | def iscollection(ds): 129 | """ Is a collection of items, has dimension 130 | 131 | >>> iscollection('5 * int32') 132 | True 133 | >>> iscollection('int32') 134 | False 135 | """ 136 | if isinstance(ds, str): 137 | ds = dshape(ds) 138 | return isdimension(ds[0]) 139 | 140 | 141 | def isnumeric(ds): 142 | """ Has a numeric measure 143 | 144 | >>> isnumeric('int32') 145 | True 146 | >>> isnumeric('3 * ?real') 147 | True 148 | >>> isnumeric('string') 149 | False 150 | >>> isnumeric('var * {amount: ?int32}') 151 | False 152 | """ 153 | ds = launder(ds) 154 | 155 | try: 156 | npdtype = to_numpy_dtype(ds) 157 | except TypeError: 158 | return False 159 | else: 160 | return isinstance(ds, Unit) and np.issubdtype(npdtype, np.number) 161 | 162 | 163 | def launder(ds): 164 | if isinstance(ds, str): 165 | ds = dshape(ds) 166 | if isinstance(ds, DataShape): 167 | ds = ds.measure 168 | return getattr(ds, 'ty', ds) 169 | 170 | 171 | def isreal(ds): 172 | """ Has a numeric measure 173 | 174 | >>> isreal('float32') 175 | True 176 | >>> isreal('3 * ?real') 177 | True 178 | >>> isreal('string') 179 | False 180 | """ 181 | ds = launder(ds) 182 | return isinstance(ds, Unit) and ds in floating 183 | 184 | 185 | def isboolean(ds): 186 | """ Has a boolean measure 187 | 188 | >>> isboolean('bool') 189 | True 190 | >>> isboolean('3 * ?bool') 191 | True 192 | >>> isboolean('int') 193 | False 194 | """ 195 | return launder(ds) in boolean 196 | 197 | 198 | def isdatelike(ds): 199 | """ Has a date or datetime measure 200 | 201 | >>> isdatelike('int32') 202 | False 203 | >>> isdatelike('3 * datetime') 204 | True 205 | >>> isdatelike('?datetime') 206 | True 207 | """ 208 | ds = launder(ds) 209 | return ds == date_ or ds == datetime_ 210 | -------------------------------------------------------------------------------- /datashape/promote.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import datashape 5 | 6 | 7 | __all__ = ['promote', 'optionify'] 8 | 9 | 10 | def promote(lhs, rhs, promote_option=True): 11 | """Promote two scalar dshapes to a possibly larger, but compatible type. 12 | 13 | Examples 14 | -------- 15 | >>> from datashape import int32, int64, Option, string 16 | >>> x = Option(int32) 17 | >>> y = int64 18 | >>> promote(x, y) 19 | Option(ty=ctype("int64")) 20 | >>> promote(int64, int64) 21 | ctype("int64") 22 | 23 | Don't promote to option types. 24 | >>> promote(x, y, promote_option=False) 25 | ctype("int64") 26 | 27 | Strings are handled differently than NumPy, which promotes to ctype("object") 28 | >>> x = string 29 | >>> y = Option(string) 30 | >>> promote(x, y) == promote(y, x) == Option(string) 31 | True 32 | >>> promote(x, y, promote_option=False) 33 | ctype("string") 34 | 35 | Notes 36 | ---- 37 | Except for ``datashape.string`` types, this uses ``numpy.result_type`` for 38 | type promotion logic. See the numpy documentation at: 39 | 40 | http://docs.scipy.org/doc/numpy/reference/generated/numpy.result_type.html 41 | """ 42 | if lhs == rhs: 43 | return lhs 44 | left, right = getattr(lhs, 'ty', lhs), getattr(rhs, 'ty', rhs) 45 | if left == right == datashape.string: 46 | # Special case string promotion, since numpy promotes to `object`. 47 | dtype = datashape.string 48 | else: 49 | np_res_type = np.result_type(datashape.to_numpy_dtype(left), 50 | datashape.to_numpy_dtype(right)) 51 | dtype = datashape.CType.from_numpy_dtype(np_res_type) 52 | if promote_option: 53 | dtype = optionify(lhs, rhs, dtype) 54 | return dtype 55 | 56 | 57 | def optionify(lhs, rhs, dshape): 58 | """Check whether a binary operation's dshape came from 59 | :class:`~datashape.coretypes.Option` typed operands and construct an 60 | :class:`~datashape.coretypes.Option` type accordingly. 61 | 62 | Examples 63 | -------- 64 | >>> from datashape import int32, int64, Option 65 | >>> x = Option(int32) 66 | >>> x 67 | Option(ty=ctype("int32")) 68 | >>> y = int64 69 | >>> y 70 | ctype("int64") 71 | >>> optionify(x, y, int64) 72 | Option(ty=ctype("int64")) 73 | """ 74 | if hasattr(dshape.measure, 'ty'): 75 | return dshape 76 | if hasattr(lhs, 'ty') or hasattr(rhs, 'ty'): 77 | return datashape.Option(dshape) 78 | return dshape 79 | -------------------------------------------------------------------------------- /datashape/py2help.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import itertools 3 | 4 | # Portions of this taken from the six library, licensed as follows. 5 | # 6 | # Copyright (c) 2010-2013 Benjamin Peterson 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | import platform 26 | 27 | PY2 = sys.version_info[0] == 2 28 | PY3 = sys.version_info[0] == 3 29 | 30 | CPYTHON = platform.python_implementation() == 'CPython' 31 | 32 | if PY2: 33 | import __builtin__ 34 | reduce = __builtin__.reduce 35 | _inttypes = (int, long) 36 | unicode = __builtin__.unicode 37 | basestring = __builtin__.basestring 38 | _strtypes = (str, unicode) 39 | 40 | from types import DictProxyType as MappingProxyType 41 | 42 | if CPYTHON: 43 | from ctypes import pythonapi, py_object 44 | 45 | mappingproxy = pythonapi.PyDictProxy_New 46 | mappingproxy.argtypes = [py_object] 47 | mappingproxy.restype = py_object 48 | del pythonapi 49 | del py_object 50 | else: 51 | # TODO: Figure out how to make these on pypy. 52 | # If this gets done, please update the skipif condition in: 53 | # test_discovery:test_mappingproxy 54 | def mappingproxy(ob): 55 | raise ValueError('cannot create mapping proxies in py2 on pypy') 56 | 57 | else: 58 | from functools import reduce 59 | _inttypes = (int,) 60 | unicode = str 61 | basestring = str 62 | _strtypes = (str,) 63 | 64 | from types import MappingProxyType 65 | mappingproxy = MappingProxyType 66 | 67 | 68 | def with_metaclass(metaclass, *bases): 69 | """Helper for using metaclasses in a py2/3 compatible way. 70 | 71 | Parameters 72 | ---------- 73 | metaclass : type 74 | The metaclass to apply. 75 | bases : iterable of type 76 | The types to subclass. 77 | 78 | Notes 79 | ----- 80 | The translations for python 2 and 3 look like: 81 | 82 | :: 83 | # Compat 84 | class C(with_metaclass(M, A, B)): 85 | pass 86 | 87 | # Pyton 2 88 | class C(A, B): 89 | __metaclass__ = M 90 | 91 | # Python 3 92 | class C(A, B, metaclass=M): 93 | pass 94 | """ 95 | return metaclass('_', bases, {}) 96 | 97 | 98 | try: 99 | from collections import OrderedDict 100 | except ImportError: 101 | class OrderedDict(object): 102 | def __new__(cls, *args, **kwargs): 103 | raise TypeError('OrderedDict not supported before python 2.7') 104 | -------------------------------------------------------------------------------- /datashape/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blaze/datashape/c9d2bd75414a69d94498e7340ef9dd5fce903007/datashape/tests/__init__.py -------------------------------------------------------------------------------- /datashape/tests/test_creation.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import ctypes 4 | import unittest 5 | 6 | import pytest 7 | 8 | import datashape 9 | from datashape import dshape, error, DataShape, Record 10 | 11 | 12 | class TestDataShapeCreation(unittest.TestCase): 13 | 14 | def test_raise_on_bad_input(self): 15 | # Make sure it raises exceptions on a few nonsense inputs 16 | self.assertRaises(TypeError, dshape, None) 17 | self.assertRaises(TypeError, dshape, lambda x: x+1) 18 | # Check issue 11 19 | self.assertRaises(datashape.DataShapeSyntaxError, dshape, '1 *') 20 | self.assertRaises(datashape.DataShapeSyntaxError, dshape, '1,') 21 | 22 | def test_reserved_future_bigint(self): 23 | # The "bigint" datashape is reserved for a future big integer type 24 | self.assertRaises(Exception, dshape, "bigint") 25 | 26 | def test_atom_shapes(self): 27 | self.assertEqual(dshape('bool'), dshape(datashape.bool_)) 28 | self.assertEqual(dshape('int8'), dshape(datashape.int8)) 29 | self.assertEqual(dshape('int16'), dshape(datashape.int16)) 30 | self.assertEqual(dshape('int32'), dshape(datashape.int32)) 31 | self.assertEqual(dshape('int64'), dshape(datashape.int64)) 32 | self.assertEqual(dshape('uint8'), dshape(datashape.uint8)) 33 | self.assertEqual(dshape('uint16'), dshape(datashape.uint16)) 34 | self.assertEqual(dshape('uint32'), dshape(datashape.uint32)) 35 | self.assertEqual(dshape('uint64'), dshape(datashape.uint64)) 36 | self.assertEqual(dshape('float32'), dshape(datashape.float32)) 37 | self.assertEqual(dshape('float64'), dshape(datashape.float64)) 38 | self.assertEqual(dshape('complex64'), dshape(datashape.complex64)) 39 | self.assertEqual(dshape('complex128'), dshape(datashape.complex128)) 40 | self.assertEqual(dshape('complex64'), dshape('complex[float32]')) 41 | self.assertEqual(dshape('complex128'), dshape('complex[float64]')) 42 | self.assertEqual(dshape("string"), dshape(datashape.string)) 43 | self.assertEqual(dshape("json"), dshape(datashape.json)) 44 | self.assertEqual(dshape("date"), dshape(datashape.date_)) 45 | self.assertEqual(dshape("time"), dshape(datashape.time_)) 46 | self.assertEqual(dshape("datetime"), dshape(datashape.datetime_)) 47 | 48 | def test_atom_shape_errors(self): 49 | self.assertRaises(error.DataShapeSyntaxError, dshape, 'boot') 50 | self.assertRaises(error.DataShapeSyntaxError, dshape, 'int33') 51 | self.assertRaises(error.DataShapeSyntaxError, dshape, '12') 52 | self.assertRaises(error.DataShapeSyntaxError, dshape, 'var') 53 | 54 | @pytest.mark.xfail(reason='implements has not been implemented in the new parser') 55 | def test_constraints_error(self): 56 | self.assertRaises(error.DataShapeTypeError, dshape, 57 | 'A : integral * B : numeric') 58 | 59 | def test_ellipsis_error(self): 60 | self.assertRaises(error.DataShapeSyntaxError, dshape, 'T * ...') 61 | self.assertRaises(error.DataShapeSyntaxError, dshape, 'T * S...') 62 | 63 | @pytest.mark.xfail(reason='type decl has been removed in the new parser') 64 | def test_type_decl(self): 65 | self.assertRaises(error.DataShapeTypeError, dshape, 'type X T = 3, T') 66 | 67 | @pytest.mark.xfail(reason='type decl has been removed in the new parser') 68 | def test_type_decl_concrete(self): 69 | self.assertEqual(dshape('3, int32'), dshape('type X = 3, int32')) 70 | 71 | def test_string_atom(self): 72 | self.assertEqual(dshape('string'), dshape("string['U8']")) 73 | self.assertEqual(dshape("string['ascii']")[0].encoding, 'A') 74 | self.assertEqual(dshape("string['A']")[0].encoding, 'A') 75 | self.assertEqual(dshape("string['utf-8']")[0].encoding, 'U8') 76 | self.assertEqual(dshape("string['U8']")[0].encoding, 'U8') 77 | self.assertEqual(dshape("string['utf-16']")[0].encoding, 'U16') 78 | self.assertEqual(dshape("string['U16']")[0].encoding, 'U16') 79 | self.assertEqual(dshape("string['utf-32']")[0].encoding, 'U32') 80 | self.assertEqual(dshape("string['U32']")[0].encoding, 'U32') 81 | 82 | def test_time(self): 83 | self.assertEqual(dshape('time')[0].tz, None) 84 | self.assertEqual(dshape('time[tz="UTC"]')[0].tz, 'UTC') 85 | self.assertEqual(dshape('time[tz="America/Vancouver"]')[0].tz, 86 | 'America/Vancouver') 87 | self.assertEqual(str(dshape('time[tz="UTC"]')), "time[tz='UTC']") 88 | 89 | def test_datetime(self): 90 | self.assertEqual(dshape('datetime')[0].tz, None) 91 | self.assertEqual(dshape('datetime[tz="UTC"]')[0].tz, 'UTC') 92 | self.assertEqual(dshape('datetime[tz="America/Vancouver"]')[0].tz, 93 | 'America/Vancouver') 94 | self.assertEqual(str(dshape('datetime[tz="UTC"]')), 95 | "datetime[tz='UTC']") 96 | 97 | def test_units(self): 98 | self.assertEqual(dshape('units["second"]')[0].unit, 'second') 99 | self.assertEqual(dshape('units["second"]')[0].tp, dshape('float64')) 100 | self.assertEqual(dshape('units["second", int32]')[0].unit, 'second') 101 | self.assertEqual(dshape('units["second", int32]')[0].tp, 102 | dshape('int32')) 103 | 104 | def test_empty_struct(self): 105 | self.assertEqual(dshape('{}'), DataShape(Record([]))) 106 | 107 | def test_struct_of_array(self): 108 | self.assertEqual(str(dshape('5 * int32')), '5 * int32') 109 | self.assertEqual(str(dshape('{field: 5 * int32}')), 110 | '{field: 5 * int32}') 111 | self.assertEqual(str(dshape('{field: M * int32}')), 112 | '{field: M * int32}') 113 | 114 | def test_ragged_array(self): 115 | self.assertTrue(isinstance(dshape('3 * var * int32')[1], 116 | datashape.Var)) 117 | 118 | def test_from_numpy_fields(self): 119 | import numpy as np 120 | dt = np.dtype('i4,i8,f8') 121 | ds = datashape.from_numpy((), dt) 122 | self.assertEqual(ds.names, ['f0', 'f1', 'f2']) 123 | self.assertEqual(ds.types, 124 | [datashape.int32, datashape.int64, datashape.float64]) 125 | 126 | def test_to_numpy_fields(self): 127 | import numpy as np 128 | ds = datashape.dshape('{x: int32, y: float32}') 129 | shape, dt = datashape.to_numpy(ds) 130 | self.assertEqual(shape, ()) 131 | self.assertEqual(dt, np.dtype([('x', 'int32'), ('y', 'float32')])) 132 | 133 | def test_syntax(self): 134 | self.assertEqual(datashape.Fixed(3) * dshape('int32'), 135 | dshape('3 * int32')) 136 | self.assertEqual(3 * dshape('int32'), 137 | dshape('3 * int32')) 138 | self.assertEqual(datashape.Var() * dshape('int32'), 139 | dshape('var * int32')) 140 | self.assertEqual(datashape.Var() * datashape.int32, 141 | dshape('var * int32')) 142 | self.assertEqual(datashape.Var() * 'int32', 143 | dshape('var * int32')) 144 | self.assertEqual(3 * datashape.int32, 145 | dshape('3 * int32')) 146 | 147 | def test_python_containers(self): 148 | var = datashape.Var() 149 | int32 = datashape.int32 150 | self.assertEqual(dshape('3 * int32'), 151 | dshape((3, int32))) 152 | self.assertEqual(dshape('3 * int32'), 153 | dshape([3, int32])) 154 | self.assertEqual(dshape('var * 3 * int32'), 155 | dshape((var, 3, int32))) 156 | 157 | dshapes = ['bool', 158 | 'int8', 159 | 'int16', 160 | 'int32', 161 | 'int64', 162 | 'uint8', 163 | 'uint16', 164 | 'uint32', 165 | 'uint64', 166 | 'float32', 167 | 'float64', 168 | 'complex64', 169 | 'complex128', 170 | 'string', 171 | 'json', 172 | 'date', 173 | 'time', 174 | 'datetime', 175 | 'int', 176 | 'real', 177 | 'complex', 178 | 'intptr', 179 | 'uintptr', 180 | '{id: int8, value: bool, result: int16}', 181 | '{a: int32, b: int64, x: uint8, y: uint16, z: uint32}', 182 | '{a: float32, b: float64, c: complex64, d: complex128, ' 183 | ' e: string, f: json, g: date, h: time, i: datetime}'] 184 | 185 | dimensions = ['2', 186 | '100', 187 | '...', 188 | 'var', 189 | '2 * var * 2', 190 | ] 191 | 192 | def test_dshape_into_repr(self): 193 | for ds in self.dshapes: 194 | self.assertEqual(eval(repr(dshape(ds))), dshape(ds)) 195 | for dm in self.dimensions: 196 | d = dshape(dm + ' * ' + ds) 197 | self.assertEqual(eval(repr(d)), d) 198 | 199 | 200 | pointer_sizes = { 201 | 4: { 202 | 'intptr': datashape.int32, 203 | 'uintptr': datashape.uint32, 204 | }, 205 | 8: { 206 | 'intptr': datashape.int64, 207 | 'uintptr': datashape.uint64, 208 | } 209 | } 210 | 211 | 212 | @pytest.mark.parametrize('kind', ['intptr', 'uintptr']) 213 | def test_intptr_size(kind): 214 | assert (dshape(kind) == 215 | dshape(pointer_sizes[ctypes.sizeof(ctypes.c_void_p)][kind])) 216 | -------------------------------------------------------------------------------- /datashape/tests/test_discovery.py: -------------------------------------------------------------------------------- 1 | from itertools import starmap 2 | import sys 3 | from warnings import catch_warnings, simplefilter 4 | 5 | import numpy as np 6 | import pytest 7 | 8 | from datashape.discovery import (discover, null, unite_identical, unite_base, 9 | unite_merge_dimensions, do_one, 10 | lowest_common_dshape) 11 | from datashape.coretypes import (int64, float64, complex128, string, bool_, 12 | Tuple, Record, date_, datetime_, time_, 13 | timedelta_, int32, var, Option, real, Null, 14 | TimeDelta, String, float32, R) 15 | from datashape.py2help import PY2, CPYTHON, mappingproxy, OrderedDict 16 | from datashape.util.testing import assert_dshape_equal 17 | from datashape import dshape 18 | from datetime import date, time, datetime, timedelta 19 | 20 | 21 | def test_simple(): 22 | assert discover(3) == int64 23 | assert discover(3.0) == float64 24 | assert discover(3.0 + 1j) == complex128 25 | assert discover('Hello') == string 26 | assert discover(True) == bool_ 27 | assert discover(None) == null 28 | 29 | 30 | def test_long(): 31 | if sys.version_info[0] == 2: 32 | assert eval('discover(3L)') == int64 33 | 34 | 35 | def test_list(): 36 | assert discover([1, 2, 3]) == 3 * discover(1) 37 | assert discover([1.0, 2.0, 3.0]) == 3 * discover(1.0) 38 | 39 | 40 | def test_set(): 41 | assert discover(set([1])) == 1 * discover(1) 42 | 43 | 44 | def test_frozenset(): 45 | assert discover(frozenset([1])) == 1 * discover(1) 46 | 47 | 48 | def test_heterogeneous_ordered_container(): 49 | assert discover(('Hello', 1)) == Tuple([discover('Hello'), discover(1)]) 50 | 51 | 52 | def test_string(): 53 | assert discover('1') == discover(1) 54 | assert discover('1.0') == discover(1.0) 55 | assert discover('True') == discover(True) 56 | assert discover('true') == discover(True) 57 | 58 | 59 | def test_record(): 60 | assert (discover({'name': 'Alice', 'amount': 100}) == 61 | Record([['amount', discover(100)], 62 | ['name', discover('Alice')]])) 63 | 64 | 65 | @pytest.mark.skipif( 66 | PY2 and not CPYTHON, 67 | reason='We cannot create mapping proxies in python 2 when not in CPython') 68 | def test_mappingproxy(): 69 | d = {'a': np.int64(1), 'b': 'cs', 'c': np.float32(1.0)} 70 | assert_dshape_equal( 71 | discover(mappingproxy(d)), 72 | discover(d), 73 | ) 74 | 75 | 76 | def test_ordereddict(): 77 | od = OrderedDict((('c', np.int64(1)), ('b', 'cs'), ('a', np.float32(1.0)))) 78 | assert_dshape_equal( 79 | discover(od), 80 | R['c': int64, 'b': string, 'a': float32], 81 | ) 82 | 83 | 84 | def test_datetime(): 85 | inputs = ["1991-02-03 04:05:06", 86 | "11/12/1822 06:47:26.00", 87 | "1822-11-12T06:47:26", 88 | "Fri Dec 19 15:10:11 1997", 89 | "Friday, November 11, 2005 17:56:21", 90 | "1982-2-20 5:02:00", 91 | "20030331 05:59:59.9", 92 | "Jul 6 2030 5:55PM", 93 | "1994-10-20 T 11:15", 94 | "2013-03-04T14:38:05.123", 95 | datetime(2014, 1, 1, 12, 1, 1), 96 | # "15MAR1985:14:15:22", 97 | # "201303041438" 98 | ] 99 | for dt in inputs: 100 | assert discover(dt) == datetime_ 101 | 102 | 103 | def test_string_date(): 104 | assert discover('2014-01-01') == date_ 105 | 106 | 107 | def test_python_date(): 108 | assert discover(date(2014, 1, 1)) == date_ 109 | 110 | 111 | def test_single_space_string_is_not_date(): 112 | assert discover(' ') == string 113 | 114 | 115 | def test_string_that_looks_like_date(): 116 | # GH 91 117 | assert discover("31-DEC-99 12.00.00.000000000") == string 118 | 119 | 120 | def test_time(): 121 | assert discover(time(12, 0, 1)) == time_ 122 | 123 | 124 | def test_timedelta(): 125 | objs = starmap(timedelta, (range(10, 10 - i, -1) for i in range(1, 8))) 126 | for ts in objs: 127 | assert discover(ts) == timedelta_ 128 | 129 | 130 | def test_timedelta_strings(): 131 | inputs = ["1 day", 132 | "-2 hours", 133 | "3 seconds", 134 | "1 microsecond", 135 | "1003 milliseconds"] 136 | for ts in inputs: 137 | assert discover(ts) == TimeDelta(unit=ts.split()[1]) 138 | 139 | with pytest.raises(ValueError): 140 | TimeDelta(unit='buzz light-years') 141 | 142 | 143 | def test_time_string(): 144 | assert discover('12:00:01') == time_ 145 | assert discover('12:00:01.000') == time_ 146 | assert discover('12:00:01.123456') == time_ 147 | assert discover('12:00:01.1234') == time_ 148 | assert discover('10-10-01T12:00:01') == datetime_ 149 | assert discover('10-10-01 12:00:01') == datetime_ 150 | 151 | 152 | def test_integrative(): 153 | data = [{'name': 'Alice', 'amount': '100'}, 154 | {'name': 'Bob', 'amount': '200'}, 155 | {'name': 'Charlie', 'amount': '300'}] 156 | 157 | assert (dshape(discover(data)) == 158 | dshape('3 * {amount: int64, name: string}')) 159 | 160 | 161 | def test_numpy_scalars(): 162 | assert discover(np.int32(1)) == int32 163 | assert discover(np.float64(1)) == float64 164 | 165 | 166 | def test_numpy_array(): 167 | assert discover(np.ones((3, 2), dtype=np.int32)) == dshape('3 * 2 * int32') 168 | 169 | 170 | def test_numpy_array_with_strings(): 171 | x = np.array(['Hello', 'world'], dtype='O') 172 | assert discover(x) == 2 * string 173 | 174 | 175 | def test_numpy_recarray_with_strings(): 176 | x = np.array([('Alice', 1), ('Bob', 2)], 177 | dtype=[('name', 'O'), ('amt', 'i4')]) 178 | assert discover(x) == dshape('2 * {name: string, amt: int32}') 179 | 180 | 181 | unite = do_one([unite_identical, 182 | unite_merge_dimensions, 183 | unite_base]) 184 | 185 | 186 | def test_unite(): 187 | assert unite([int32, int32, int32]) == 3 * int32 188 | assert unite([3 * int32, 2 * int32]) == 2 * (var * int32) 189 | assert unite([2 * int32, 2 * int32]) == 2 * (2 * int32) 190 | assert unite([3 * (2 * int32), 2 * (2 * int32)]) == 2 * (var * (2 * int32)) 191 | 192 | 193 | def test_unite_missing_values(): 194 | assert unite([int32, null, int32]) == 3 * Option(int32) 195 | assert unite([string, null, int32]) 196 | 197 | 198 | def test_unite_tuples(): 199 | assert (discover([[1, 1, 'hello'], 200 | [1, '', ''], 201 | [1, 1, 'hello']]) == 202 | 3 * Tuple([int64, Option(int64), Option(string)])) 203 | 204 | assert (discover([[1, 1, 'hello', 1], 205 | [1, '', '', 1], 206 | [1, 1, 'hello', 1]]) == 207 | 3 * Tuple([int64, Option(int64), Option(string), int64])) 208 | 209 | 210 | def test_unite_records(): 211 | assert (discover([{'name': 'Alice', 'balance': 100}, 212 | {'name': 'Bob', 'balance': ''}]) == 213 | 2 * Record([['balance', Option(int64)], ['name', string]])) 214 | 215 | assert (discover([{'name': 'Alice', 's': 'foo'}, 216 | {'name': 'Bob', 's': None}]) == 217 | 2 * Record([['name', string], ['s', Option(string)]])) 218 | 219 | assert (discover([{'name': 'Alice', 's': 'foo', 'f': 1.0}, 220 | {'name': 'Bob', 's': None, 'f': None}]) == 221 | 2 * Record([['f', Option(float64)], 222 | ['name', string], 223 | ['s', Option(string)]])) 224 | 225 | # assert unite((Record([['name', string], ['balance', int32]]), 226 | # Record([['name', string]]))) == \ 227 | # Record([['name', string], ['balance', Option(int32)]]) 228 | 229 | 230 | def test_dshape_missing_data(): 231 | assert (discover([[1, 2, '', 3], 232 | [1, 2, '', 3], 233 | [1, 2, '', 3]]) == 234 | 3 * Tuple([int64, int64, null, int64])) 235 | 236 | 237 | def test_discover_mixed(): 238 | i = discover(1) 239 | f = discover(1.0) 240 | exp = 10 * Tuple([i, i, f, f]) 241 | assert dshape(discover([[1, 2, 1.0, 2.0]] * 10)) == exp 242 | 243 | exp = 10 * (4 * f) 244 | assert dshape(discover([[1, 2, 1.0, 2.0], [1.0, 2.0, 1, 2]] * 5)) == exp 245 | 246 | 247 | def test_test(): 248 | expected = 2 * Tuple([string, int64]) 249 | assert discover([['Alice', 100], ['Bob', 200]]) == expected 250 | 251 | 252 | def test_discover_appropriate(): 253 | assert discover((1, 1.0)) == Tuple([int64, real]) 254 | assert discover([(1, 1.0), (1, 1.0), (1, 1)]) == 3 * Tuple([int64, real]) 255 | 256 | 257 | def test_big_discover(): 258 | data = [['1'] + ['hello']*20] * 10 259 | assert discover(data) == 10 * Tuple([int64] + [string]*20) 260 | 261 | 262 | def test_unite_base(): 263 | assert unite_base([date_, datetime_]) == 2 * datetime_ 264 | 265 | 266 | def test_list_of_dicts_no_difference(): 267 | data = [{'name': 'Alice', 'amount': 100}, 268 | {'name': 'Bob'}] 269 | result = discover(data) 270 | expected = dshape('2 * {amount: ?int64, name: string}') 271 | assert result == expected 272 | 273 | 274 | def test_list_of_dicts_difference(): 275 | data = [{'name': 'Alice', 'amount': 100}, 276 | {'name': 'Bob', 'house_color': 'blue'}] 277 | result = discover(data) 278 | s = '2 * {amount: ?int64, house_color: ?string, name: string}' 279 | expected = dshape(s) 280 | assert result == expected 281 | 282 | 283 | def test_unite_base_on_records(): 284 | dshapes = [dshape('{name: string, amount: int32}'), 285 | dshape('{name: string, amount: int32}')] 286 | assert unite_base(dshapes) == dshape('2 * {name: string, amount: int32}') 287 | 288 | dshapes = [Null(), dshape('{name: string, amount: int32}')] 289 | assert unite_base(dshapes) == dshape('2 * ?{name: string, amount: int32}') 290 | 291 | dshapes = [dshape('{name: string, amount: int32}'), 292 | dshape('{name: string, amount: int64}')] 293 | assert unite_base(dshapes) == dshape('2 * {name: string, amount: int64}') 294 | 295 | 296 | def test_nested_complex_record_type(): 297 | dt = np.dtype([('a', 'U7'), ('b', [('c', 'int64', 2), ('d', 'float64')])]) 298 | x = np.zeros(5, dt) 299 | s = "5 * {a: string[7, 'U32'], b: {c: 2 * int64, d: float64}}" 300 | assert discover(x) == dshape(s) 301 | 302 | 303 | def test_letters_only_strings(): 304 | strings = ('sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 305 | 'friday', 'saturday', 'a', 'b', 'now', 'yesterday', 'tonight') 306 | for s in strings: 307 | assert discover(s) == string 308 | 309 | 310 | def test_discover_array_like(): 311 | class MyArray(object): 312 | def __init__(self, shape, dtype): 313 | self.shape = shape 314 | self.dtype = dtype 315 | 316 | with catch_warnings(record=True) as wl: 317 | simplefilter('always') 318 | assert discover(MyArray((4, 3), 'f4')) == dshape('4 * 3 * float32') 319 | assert len(wl) == 1 320 | assert issubclass(wl[0].category, DeprecationWarning) 321 | assert 'MyArray' in str(wl[0].message) 322 | 323 | 324 | @pytest.mark.xfail(sys.version_info[0] == 2, 325 | raises=AssertionError, 326 | reason=('discovery behavior is different for raw strings ' 327 | 'in python 2')) 328 | def test_discover_bytes(): 329 | x = b'abcdefg' 330 | assert discover(x) == String('A') 331 | 332 | 333 | def test_discover_undiscoverable(): 334 | class MyClass(object): 335 | pass 336 | with pytest.raises(NotImplementedError): 337 | discover(MyClass()) 338 | 339 | 340 | @pytest.mark.parametrize('seq', [(), [], set()]) 341 | def test_discover_empty_sequence(seq): 342 | assert discover(seq) == var * string 343 | 344 | 345 | @pytest.mark.xfail(raises=ValueError, reason='Not yet implemented') 346 | def test_lowest_common_dshape_varlen_strings(): 347 | assert lowest_common_dshape([String(10), String(11)]) == String(11) 348 | assert lowest_common_dshape([String(11), string]) == string 349 | 350 | 351 | def test_discover_mock(): 352 | try: 353 | from unittest.mock import Mock 354 | except ImportError: 355 | from mock import Mock 356 | 357 | # This used to segfault because we were sending mocks into numpy 358 | with pytest.raises(NotImplementedError): 359 | discover(Mock()) 360 | 361 | 362 | def test_string_with_overflow(): 363 | assert discover('INF US Equity') == string 364 | -------------------------------------------------------------------------------- /datashape/tests/test_lexer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test the DataShape lexer. 3 | """ 4 | 5 | from __future__ import absolute_import, division, print_function 6 | 7 | import unittest 8 | 9 | import datashape 10 | from datashape import lexer 11 | 12 | 13 | class TestDataShapeLexer(unittest.TestCase): 14 | 15 | def check_isolated_token(self, ds_str, tname, val=None): 16 | # The token name should be a property in parser 17 | tid = getattr(lexer, tname) 18 | # Lexing should produce a single token matching the specification 19 | self.assertEqual(list(lexer.lex(ds_str)), 20 | [lexer.Token(tid, tname, (0, len(ds_str)), val)]) 21 | 22 | def check_failing_token(self, ds_str): 23 | # Creating the lexer will fail, because the error is 24 | # in the first token. 25 | self.assertRaises(datashape.DataShapeSyntaxError, list, lexer.lex(ds_str)) 26 | 27 | def test_isolated_tokens(self): 28 | self.check_isolated_token('testing', 'NAME_LOWER', 'testing') 29 | self.check_isolated_token('Testing', 'NAME_UPPER', 'Testing') 30 | self.check_isolated_token('_testing', 'NAME_OTHER', '_testing') 31 | self.check_isolated_token('*', 'ASTERISK') 32 | self.check_isolated_token(',', 'COMMA') 33 | self.check_isolated_token('=', 'EQUAL') 34 | self.check_isolated_token(':', 'COLON') 35 | self.check_isolated_token('[', 'LBRACKET') 36 | self.check_isolated_token(']', 'RBRACKET') 37 | self.check_isolated_token('{', 'LBRACE') 38 | self.check_isolated_token('}', 'RBRACE') 39 | self.check_isolated_token('(', 'LPAREN') 40 | self.check_isolated_token(')', 'RPAREN') 41 | self.check_isolated_token('...', 'ELLIPSIS') 42 | self.check_isolated_token('->', 'RARROW') 43 | self.check_isolated_token('?', 'QUESTIONMARK') 44 | self.check_isolated_token('32102', 'INTEGER', 32102) 45 | self.check_isolated_token('->', 'RARROW') 46 | self.check_isolated_token('"testing"', 'STRING', 'testing') 47 | self.check_isolated_token("'testing'", 'STRING', 'testing') 48 | 49 | def test_integer(self): 50 | # Digits 51 | self.check_isolated_token('0', 'INTEGER', 0) 52 | self.check_isolated_token('1', 'INTEGER', 1) 53 | self.check_isolated_token('2', 'INTEGER', 2) 54 | self.check_isolated_token('3', 'INTEGER', 3) 55 | self.check_isolated_token('4', 'INTEGER', 4) 56 | self.check_isolated_token('5', 'INTEGER', 5) 57 | self.check_isolated_token('6', 'INTEGER', 6) 58 | self.check_isolated_token('7', 'INTEGER', 7) 59 | self.check_isolated_token('8', 'INTEGER', 8) 60 | self.check_isolated_token('9', 'INTEGER', 9) 61 | # Various-sized numbers 62 | self.check_isolated_token('10', 'INTEGER', 10) 63 | self.check_isolated_token('102', 'INTEGER', 102) 64 | self.check_isolated_token('1024', 'INTEGER', 1024) 65 | self.check_isolated_token('10246', 'INTEGER', 10246) 66 | self.check_isolated_token('102468', 'INTEGER', 102468) 67 | self.check_isolated_token('1024683', 'INTEGER', 1024683) 68 | self.check_isolated_token('10246835', 'INTEGER', 10246835) 69 | self.check_isolated_token('102468357', 'INTEGER', 102468357) 70 | self.check_isolated_token('1024683579', 'INTEGER', 1024683579) 71 | # Leading zeros are not allowed 72 | self.check_failing_token('00') 73 | self.check_failing_token('01') 74 | self.check_failing_token('090') 75 | 76 | def test_string(self): 77 | # Trivial strings 78 | self.check_isolated_token('""', 'STRING', '') 79 | self.check_isolated_token("''", 'STRING', '') 80 | self.check_isolated_token('"test"', 'STRING', 'test') 81 | self.check_isolated_token("'test'", 'STRING', 'test') 82 | # Valid escaped characters 83 | self.check_isolated_token(r'"\"\b\f\n\r\t\ub155"', 'STRING', 84 | u'"\b\f\n\r\t\ub155') 85 | self.check_isolated_token(r"'\'\b\f\n\r\t\ub155'", 'STRING', 86 | u"'\b\f\n\r\t\ub155") 87 | # A sampling of invalid escaped characters 88 | self.check_failing_token(r'''"\'"''') 89 | self.check_failing_token(r"""'\"'""") 90 | self.check_failing_token(r"'\a'") 91 | self.check_failing_token(r"'\s'") 92 | self.check_failing_token(r"'\R'") 93 | self.check_failing_token(r"'\N'") 94 | self.check_failing_token(r"'\U'") 95 | self.check_failing_token(r"'\u123g'") 96 | self.check_failing_token(r"'\u123'") 97 | # Some unescaped and escapted unicode characters 98 | self.check_isolated_token(u'"\uc548\ub155 \\uc548\\ub155"', 'STRING', 99 | u'\uc548\ub155 \uc548\ub155') 100 | 101 | def test_failing_tokens(self): 102 | self.check_failing_token('~') 103 | self.check_failing_token('`') 104 | self.check_failing_token('@') 105 | self.check_failing_token('$') 106 | self.check_failing_token('%') 107 | self.check_failing_token('^') 108 | self.check_failing_token('&') 109 | self.check_failing_token('-') 110 | self.check_failing_token('+') 111 | self.check_failing_token(';') 112 | self.check_failing_token('<') 113 | self.check_failing_token('>') 114 | self.check_failing_token('.') 115 | self.check_failing_token('..') 116 | self.check_failing_token('/') 117 | self.check_failing_token('|') 118 | self.check_failing_token('\\') 119 | 120 | def test_whitespace(self): 121 | expected_idval = [(lexer.COLON, None), 122 | (lexer.STRING, 'a'), 123 | (lexer.INTEGER, 12345), 124 | (lexer.RARROW, None), 125 | (lexer.EQUAL, None), 126 | (lexer.ASTERISK, None), 127 | (lexer.NAME_OTHER, '_b')] 128 | # With minimal whitespace 129 | toks = list(lexer.lex(':"a"12345->=*_b')) 130 | self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) 131 | # With spaces 132 | toks = list(lexer.lex(' : "a" 12345 -> = * _b ')) 133 | self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) 134 | # With tabs 135 | toks = list(lexer.lex('\t:\t"a"\t12345\t->\t=\t*\t_b\t')) 136 | self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) 137 | # With newlines 138 | toks = list(lexer.lex('\n:\n"a"\n12345\n->\n=\n*\n_b\n')) 139 | self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) 140 | # With spaces, tabs, newlines and comments 141 | toks = list(lexer.lex('# comment\n' + 142 | ': # X\n' + 143 | ' "a" # "b"\t\n' + 144 | '\t12345\n\n' + 145 | '->\n' + 146 | '=\n' + 147 | '*\n' + 148 | '_b # comment\n' + 149 | ' \t # end')) 150 | self.assertEqual([(tok.id, tok.val) for tok in toks], expected_idval) 151 | -------------------------------------------------------------------------------- /datashape/tests/test_operations.py: -------------------------------------------------------------------------------- 1 | import datashape 2 | import pytest 3 | 4 | 5 | def test_scalar_subarray(): 6 | assert datashape.int32.subarray(0) == datashape.int32 7 | with pytest.raises(IndexError): 8 | datashape.int32.subarray(1) 9 | assert datashape.string.subarray(0) == datashape.string 10 | with pytest.raises(IndexError): 11 | datashape.string.subarray(1) 12 | 13 | 14 | def test_array_subarray(): 15 | assert (datashape.dshape('3 * int32').subarray(0) == 16 | datashape.dshape('3 * int32')) 17 | assert (datashape.dshape('3 * int32').subarray(1) == 18 | datashape.DataShape(datashape.int32)) 19 | assert (str(datashape.dshape('3 * var * M * int32').subarray(2)) == 20 | str(datashape.dshape('M * int32'))) 21 | assert (str(datashape.dshape('3 * var * M * float64').subarray(3)) == 22 | str(datashape.float64)) 23 | 24 | 25 | def test_dshape_compare(): 26 | assert datashape.int32 != datashape.dshape('1 * int32') 27 | -------------------------------------------------------------------------------- /datashape/tests/test_predicates.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from datashape import dshape 4 | from datashape.predicates import isfixed, _dimensions, isnumeric, isscalar 5 | from datashape.coretypes import TypeVar, int32, Categorical 6 | 7 | 8 | def test_isfixed(): 9 | assert not isfixed(TypeVar('M') * int32) 10 | 11 | 12 | def test_isscalar(): 13 | assert isscalar('?int32') 14 | assert isscalar('float32') 15 | assert isscalar(int32) 16 | assert isscalar(Categorical(['a', 'b', 'c'])) 17 | assert not isscalar('{a: int32, b: float64}') 18 | 19 | 20 | def test_option(): 21 | assert _dimensions('?int') == _dimensions('int') 22 | assert _dimensions('3 * ?int') == _dimensions('3 * int') 23 | 24 | 25 | def test_time(): 26 | assert not isnumeric('time') 27 | -------------------------------------------------------------------------------- /datashape/tests/test_promote.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from datashape import (promote, Option, float64, int64, float32, optionify, 4 | string, datetime_ as datetime, dshape) 5 | 6 | 7 | def test_simple(): 8 | x = int64 9 | y = float32 10 | z = promote(x, y) 11 | assert z == float64 12 | 13 | 14 | def test_option(): 15 | x = int64 16 | y = Option(float32) 17 | z = promote(x, y) 18 | assert z == Option(float64) 19 | 20 | 21 | def test_no_promote_option(): 22 | x = int64 23 | y = Option(float64) 24 | z = promote(x, y, promote_option=False) 25 | assert z == float64 26 | 27 | 28 | def test_option_in_parent(): 29 | x = int64 30 | y = Option(float32) 31 | z = optionify(x, y, y) 32 | assert z == y 33 | 34 | 35 | @pytest.mark.parametrize('x,y,p,r', 36 | [[string, string, True, string], 37 | [string, string, False, string], 38 | 39 | [Option(string), 40 | Option(string), 41 | True, 42 | Option(string)], 43 | 44 | [Option(string), 45 | Option(string), 46 | False, 47 | Option(string)], 48 | 49 | [Option(string), 50 | string, 51 | True, 52 | Option(string)], 53 | 54 | [Option(string), 55 | string, 56 | False, 57 | string], 58 | 59 | [Option(string), 60 | dshape('?string'), 61 | True, 62 | Option(string)], 63 | 64 | [dshape('?string'), 65 | Option(string), 66 | False, 67 | Option(string)], 68 | 69 | [dshape('string'), 70 | Option(string), 71 | True, 72 | Option(string)], 73 | 74 | [dshape('string'), 75 | Option(string), 76 | False, 77 | string]]) 78 | def test_promote_string_with_option(x, y, p, r): 79 | assert (promote(x, y, promote_option=p) == 80 | promote(y, x, promote_option=p) == 81 | r) 82 | 83 | 84 | @pytest.mark.parametrize('x,y,p,r', 85 | [[datetime, datetime, True, datetime], 86 | [datetime, datetime, False, datetime], 87 | 88 | [Option(datetime), 89 | Option(datetime), 90 | True, 91 | Option(datetime)], 92 | 93 | [Option(datetime), 94 | Option(datetime), 95 | False, 96 | Option(datetime)], 97 | 98 | [Option(datetime), 99 | datetime, 100 | True, 101 | Option(datetime)], 102 | 103 | [Option(datetime), 104 | datetime, 105 | False, 106 | datetime], 107 | 108 | [Option(datetime), 109 | dshape('?datetime'), 110 | True, 111 | Option(datetime)], 112 | 113 | [dshape('?datetime'), 114 | Option(datetime), 115 | False, 116 | Option(datetime)], 117 | 118 | [dshape('datetime'), 119 | Option(datetime), 120 | True, 121 | Option(datetime)], 122 | 123 | [dshape('datetime'), 124 | Option(datetime), 125 | False, 126 | datetime]]) 127 | def test_promote_datetime_with_option(x, y, p, r): 128 | assert (promote(x, y, promote_option=p) == 129 | promote(y, x, promote_option=p) == 130 | r) 131 | -------------------------------------------------------------------------------- /datashape/tests/test_str.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pytest 3 | 4 | import datashape 5 | from datashape import dshape, DataShapeSyntaxError 6 | 7 | 8 | class TestDataShapeStr(unittest.TestCase): 9 | def test_primitive_measure_str(self): 10 | self.assertEqual(str(datashape.int8), 'int8') 11 | self.assertEqual(str(datashape.int16), 'int16') 12 | self.assertEqual(str(datashape.int32), 'int32') 13 | self.assertEqual(str(datashape.int64), 'int64') 14 | self.assertEqual(str(datashape.uint8), 'uint8') 15 | self.assertEqual(str(datashape.uint16), 'uint16') 16 | self.assertEqual(str(datashape.uint32), 'uint32') 17 | self.assertEqual(str(datashape.uint64), 'uint64') 18 | self.assertEqual(str(datashape.float32), 'float32') 19 | self.assertEqual(str(datashape.float64), 'float64') 20 | self.assertEqual(str(datashape.string), 'string') 21 | self.assertEqual(str(datashape.String(3)), 'string[3]') 22 | self.assertEqual(str(datashape.String('A')), "string['A']") 23 | 24 | def test_structure_str(self): 25 | self.assertEqual(str(dshape('{x:int32, y:int64}')), 26 | '{x: int32, y: int64}') 27 | 28 | def test_array_str(self): 29 | self.assertEqual(str(dshape('3*5*int16')), 30 | '3 * 5 * int16') 31 | 32 | def test_primitive_measure_repr(self): 33 | self.assertEqual(repr(datashape.int8), 'ctype("int8")') 34 | self.assertEqual(repr(datashape.int16), 'ctype("int16")') 35 | self.assertEqual(repr(datashape.int32), 'ctype("int32")') 36 | self.assertEqual(repr(datashape.int64), 'ctype("int64")') 37 | self.assertEqual(repr(datashape.uint8), 'ctype("uint8")') 38 | self.assertEqual(repr(datashape.uint16), 'ctype("uint16")') 39 | self.assertEqual(repr(datashape.uint32), 'ctype("uint32")') 40 | self.assertEqual(repr(datashape.uint64), 'ctype("uint64")') 41 | self.assertEqual(repr(datashape.float32), 'ctype("float32")') 42 | self.assertEqual(repr(datashape.float64), 'ctype("float64")') 43 | self.assertEqual(repr(datashape.string), 'ctype("string")') 44 | self.assertEqual(repr(datashape.String(3)), 'ctype("string[3]")') 45 | self.assertEqual(repr(datashape.String('A')), 46 | """ctype("string['A']")""") 47 | 48 | def test_structure_repr(self): 49 | self.assertEqual(repr(dshape('{x:int32, y:int64}')), 50 | 'dshape("{x: int32, y: int64}")') 51 | 52 | def test_array_repr(self): 53 | self.assertEqual(repr(dshape('3*5*int16')), 54 | 'dshape("3 * 5 * int16")') 55 | 56 | 57 | @pytest.mark.parametrize('s', 58 | ['{"./abc": int64}', 59 | '{"./a b c": float64}', 60 | '{"./a b\tc": string}', 61 | '{"./a/[0 1 2]/b/\\n": float32}', 62 | pytest.mark.xfail('{"/a/b/0/c\v/d": int8}', 63 | raises=DataShapeSyntaxError), 64 | pytest.mark.xfail('{"/a/b/0/c\n/d": int8}', 65 | raises=DataShapeSyntaxError), 66 | pytest.mark.xfail('{"/a/b/0/c\r/d": int8}', 67 | raises=DataShapeSyntaxError)]) 68 | def test_arbitrary_string(s): 69 | ds = dshape(s) 70 | assert dshape(str(ds)) == ds 71 | -------------------------------------------------------------------------------- /datashape/tests/test_typeset.py: -------------------------------------------------------------------------------- 1 | import datashape 2 | import pytest 3 | 4 | 5 | def test_equal(): 6 | assert datashape.integral == datashape.integral 7 | assert datashape.floating != datashape.integral 8 | 9 | 10 | def test_repr(): 11 | assert repr(datashape.integral) == '{integral}' 12 | 13 | 14 | def test_custom_typeset_repr(): 15 | mytypeset = datashape.TypeSet(datashape.int64, datashape.float64) 16 | assert repr(mytypeset).startswith('TypeSet(') 17 | assert repr(mytypeset).endswith('name=None)') 18 | 19 | 20 | def test_register_already_existing_typeset_fails(): 21 | mytypeset = datashape.TypeSet(datashape.int64, datashape.float64, 22 | name='foo') 23 | with pytest.raises(TypeError): 24 | datashape.typesets.register_typeset('foo', mytypeset) 25 | 26 | 27 | def test_getitem(): 28 | assert datashape.typesets.registry['integral'] == datashape.integral 29 | 30 | 31 | def test_getitem_non_existent_typeset(): 32 | with pytest.raises(KeyError): 33 | datashape.typesets.registry['footypeset'] 34 | -------------------------------------------------------------------------------- /datashape/tests/test_user.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from datashape.user import * 4 | from datashape import dshape 5 | from datetime import date, time, datetime 6 | import numpy as np 7 | 8 | 9 | min_np = pytest.mark.skipif( 10 | np.__version__ > '1.14', 11 | reason="issubdtype no longer downcasts" 12 | ) 13 | 14 | 15 | @min_np 16 | def test_validate(): 17 | assert validate(int, 1) 18 | assert validate('int', 1) 19 | assert validate(str, 'Alice') 20 | assert validate(dshape('string'), 'Alice') 21 | assert validate(dshape('int'), 1) 22 | assert validate(dshape('int')[0], 1) 23 | assert validate('real', 2.0) 24 | assert validate('2 * int', (1, 2)) 25 | assert not validate('3 * int', (1, 2)) 26 | assert not validate('2 * int', 2) 27 | 28 | 29 | @min_np 30 | def test_nested_iteratables(): 31 | assert validate('2 * 3 * int', [(1, 2, 3), (4, 5, 6)]) 32 | 33 | 34 | def test_numeric_tower(): 35 | assert validate(np.integer, np.int32(1)) 36 | assert validate(np.number, np.int32(1)) 37 | 38 | 39 | @min_np 40 | def test_validate_dicts(): 41 | assert validate('{x: int, y: int}', {'x': 1, 'y': 2}) 42 | assert not validate('{x: int, y: int}', {'x': 1, 'y': 2.0}) 43 | assert not validate('{x: int, y: int}', {'x': 1, 'z': 2}) 44 | 45 | assert validate('var * {x: int, y: int}', [{'x': 1, 'y': 2}]) 46 | 47 | assert validate('var * {x: int, y: int}', [{'x': 1, 'y': 2}, 48 | {'x': 3, 'y': 4}]) 49 | 50 | 51 | @min_np 52 | def test_tuples_can_be_records_too(): 53 | assert validate('{x: int, y: real}', (1, 2.0)) 54 | assert not validate('{x: int, y: real}', (1.0, 2)) 55 | 56 | 57 | def test_datetimes(): 58 | assert validate('time', time(12, 0, 0)) 59 | assert validate('date', date(1999, 1, 20)) 60 | assert validate('datetime', datetime(1999, 1, 20, 12, 0, 0)) 61 | 62 | 63 | def test_numpy(): 64 | assert validate('2 * int32', np.array([1, 2], dtype='int32')) 65 | 66 | 67 | def test_issubschema(): 68 | assert issubschema('int', 'int') 69 | assert not issubschema('int', 'float32') 70 | 71 | assert issubschema('2 * int', '2 * int') 72 | assert not issubschema('2 * int', '3 * int') 73 | 74 | # assert issubschema('float32', 'real') 75 | 76 | 77 | def test_integration(): 78 | assert validate('{name: string, arrived: date}', 79 | {'name': 'Alice', 'arrived': date(2012, 1, 5)}) 80 | -------------------------------------------------------------------------------- /datashape/tests/test_util.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import datashape 4 | from datashape import dshape, has_var_dim, has_ellipsis 5 | 6 | 7 | def test_cat_dshapes(): 8 | # concatenating 1 dshape is a no-op 9 | dslist = [dshape('3 * 10 * int32')] 10 | assert datashape.cat_dshapes(dslist) == dslist[0] 11 | # two dshapes 12 | dslist = [dshape('3 * 10 * int32'), 13 | dshape('7 * 10 * int32')] 14 | assert datashape.cat_dshapes(dslist) == dshape('10 * 10 * int32') 15 | 16 | 17 | def test_cat_dshapes_errors(): 18 | # need at least one dshape 19 | with pytest.raises(ValueError): 20 | datashape.cat_dshapes([]) 21 | 22 | # dshapes need to match after the first dimension 23 | with pytest.raises(ValueError): 24 | datashape.cat_dshapes([dshape('3 * 10 * int32'), 25 | dshape('3 * 1 * int32')]) 26 | 27 | 28 | @pytest.mark.parametrize('ds_pos', 29 | ["... * float32", 30 | "A... * float32", 31 | "var * float32", 32 | "10 * { f0: int32, f1: A... * float32 }", 33 | "{ f0 : { g0 : var * int }, f1: int32 }", 34 | (dshape("var * int32"),)]) 35 | def test_has_var_dim(ds_pos): 36 | assert has_var_dim(dshape(ds_pos)) 37 | 38 | 39 | @pytest.mark.parametrize('ds_neg', 40 | [dshape("float32"), 41 | dshape("10 * float32"), 42 | dshape("10 * { f0: int32, f1: 10 * float32 }"), 43 | dshape("{ f0 : { g0 : 2 * int }, f1: int32 }"), 44 | (dshape("int32"),)]) 45 | def test_not_has_var_dim(ds_neg): 46 | assert not has_var_dim(ds_neg) 47 | 48 | 49 | @pytest.mark.parametrize('ds', 50 | [dshape("... * float32"), 51 | dshape("A... * float32"), 52 | dshape("var * ... * float32"), 53 | dshape("(int32, M... * int16) -> var * int8"), 54 | dshape("(int32, var * int16) -> ... * int8"), 55 | dshape("10 * { f0: int32, f1: A... * float32 }"), 56 | dshape("{ f0 : { g0 : ... * int }, f1: int32 }"), 57 | (dshape("... * int32"),)]) 58 | def test_has_ellipsis(ds): 59 | assert has_ellipsis(ds) 60 | 61 | 62 | @pytest.mark.parametrize('ds', 63 | [dshape("float32"), 64 | dshape("10 * var * float32"), 65 | dshape("M * float32"), 66 | dshape("(int32, M * int16) -> var * int8"), 67 | dshape("(int32, int16) -> var * int8"), 68 | dshape("10 * { f0: int32, f1: 10 * float32 }"), 69 | dshape("{ f0 : { g0 : 2 * int }, f1: int32 }"), 70 | (dshape("M * int32"),)]) 71 | def test_not_has_ellipsis(ds): 72 | assert not has_ellipsis(ds) 73 | -------------------------------------------------------------------------------- /datashape/tests/test_version.py: -------------------------------------------------------------------------------- 1 | def test_version(): 2 | import datashape 3 | assert datashape.__version__ != 'unknown' 4 | -------------------------------------------------------------------------------- /datashape/type_symbol_table.py: -------------------------------------------------------------------------------- 1 | """ 2 | A symbol table object to hold types for the parser. 3 | """ 4 | 5 | from __future__ import absolute_import, division, print_function 6 | import ctypes 7 | from itertools import chain 8 | 9 | from . import coretypes as ct 10 | 11 | __all__ = ['TypeSymbolTable', 'sym'] 12 | 13 | 14 | _is_64bit = (ctypes.sizeof(ctypes.c_void_p) == 8) 15 | 16 | 17 | def _complex(tp): 18 | """Simple temporary type constructor for complex""" 19 | if tp == ct.DataShape(ct.float32): 20 | return ct.complex_float32 21 | elif tp == ct.DataShape(ct.float64): 22 | return ct.complex_float64 23 | else: 24 | raise TypeError( 25 | 'Cannot contruct a complex type with real component %s' % tp) 26 | 27 | 28 | def _struct(names, dshapes): 29 | """Simple temporary type constructor for struct""" 30 | return ct.Record(list(zip(names, dshapes))) 31 | 32 | 33 | def _funcproto(args, ret): 34 | """Simple temporary type constructor for funcproto""" 35 | return ct.Function(*chain(args, (ret,))) 36 | 37 | 38 | def _typevar_dim(name): 39 | """Simple temporary type constructor for typevar as a dim""" 40 | # Note: Presently no difference between dim and dtype typevar 41 | return ct.TypeVar(name) 42 | 43 | 44 | def _typevar_dtype(name): 45 | """Simple temporary type constructor for typevar as a dtype""" 46 | # Note: Presently no difference between dim and dtype typevar 47 | return ct.TypeVar(name) 48 | 49 | 50 | def _ellipsis(name): 51 | return ct.Ellipsis(ct.TypeVar(name)) 52 | 53 | # data types with no type constructor 54 | no_constructor_types = [ 55 | ('bool', ct.bool_), 56 | ('int8', ct.int8), 57 | ('int16', ct.int16), 58 | ('int32', ct.int32), 59 | ('int64', ct.int64), 60 | ('intptr', ct.int64 if _is_64bit else ct.int32), 61 | ('int', ct.int32), 62 | ('uint8', ct.uint8), 63 | ('uint16', ct.uint16), 64 | ('uint32', ct.uint32), 65 | ('uint64', ct.uint64), 66 | ('uintptr', ct.uint64 if _is_64bit else ct.uint32), 67 | ('float16', ct.float16), 68 | ('float32', ct.float32), 69 | ('float64', ct.float64), 70 | ('complex64', ct.complex64), 71 | ('complex128', ct.complex128), 72 | ('real', ct.float64), 73 | ('complex', ct.complex_float64), 74 | ('string', ct.string), 75 | ('json', ct.json), 76 | ('date', ct.date_), 77 | ('time', ct.time_), 78 | ('datetime', ct.datetime_), 79 | ('timedelta', ct.timedelta_), 80 | ('null', ct.null), 81 | ('void', ct.void), 82 | ('object', ct.object_), 83 | ] 84 | 85 | # data types with a type constructor 86 | constructor_types = [ 87 | ('complex', _complex), 88 | ('string', ct.String), 89 | ('struct', _struct), 90 | ('tuple', ct.Tuple), 91 | ('funcproto', _funcproto), 92 | ('typevar', _typevar_dtype), 93 | ('option', ct.Option), 94 | ('map', ct.Map), 95 | ('time', ct.Time), 96 | ('datetime', ct.DateTime), 97 | ('timedelta', ct.TimeDelta), 98 | ('units', ct.Units), 99 | ('decimal', ct.Decimal), 100 | ('categorical', ct.Categorical), 101 | ] 102 | 103 | # dim types with no type constructor 104 | dim_no_constructor = [ 105 | ('var', ct.Var()), 106 | ('ellipsis', ct.Ellipsis()), 107 | ] 108 | 109 | # dim types with a type constructor 110 | dim_constructor = [ 111 | ('fixed', ct.Fixed), 112 | ('typevar', _typevar_dim), 113 | ('ellipsis', _ellipsis), 114 | ] 115 | 116 | 117 | class TypeSymbolTable(object): 118 | 119 | """ 120 | This is a class which holds symbols for types and type constructors, 121 | and is used by the datashape parser to build types during its parsing. 122 | A TypeSymbolTable sym has four tables, as follows: 123 | 124 | sym.dtype 125 | Data type symbols with no type constructor. 126 | sym.dtype_constr 127 | Data type symbols with a type constructor. This may contain 128 | symbols also in sym.dtype, e.g. for 'complex' and 'complex[float64]'. 129 | sym.dim 130 | Dimension symbols with no type constructor. 131 | sym.dim_constr 132 | Dimension symbols with a type constructor. 133 | """ 134 | __slots__ = ['dtype', 'dtype_constr', 'dim', 'dim_constr'] 135 | 136 | def __init__(self, bare=False): 137 | # Initialize all the symbol tables to empty dicts1 138 | self.dtype = {} 139 | self.dtype_constr = {} 140 | self.dim = {} 141 | self.dim_constr = {} 142 | if not bare: 143 | self.add_default_types() 144 | 145 | def add_default_types(self): 146 | """ 147 | Adds all the default datashape types to the symbol table. 148 | """ 149 | self.dtype.update(no_constructor_types) 150 | self.dtype_constr.update(constructor_types) 151 | self.dim.update(dim_no_constructor) 152 | self.dim_constr.update(dim_constructor) 153 | 154 | # Create the default global type symbol table 155 | sym = TypeSymbolTable() 156 | -------------------------------------------------------------------------------- /datashape/typesets.py: -------------------------------------------------------------------------------- 1 | """ 2 | Traits constituting sets of types. 3 | """ 4 | 5 | from itertools import chain 6 | 7 | from .coretypes import (Unit, int8, int16, int32, int64, uint8, uint16, uint32, 8 | uint64, float16, float32, float64, complex64, 9 | complex128, bool_, Decimal, TimeDelta, Option) 10 | 11 | 12 | __all__ = ['TypeSet', 'matches_typeset', 'signed', 'unsigned', 'integral', 13 | 'floating', 'complexes', 'boolean', 'numeric', 'scalar', 14 | 'maxtype'] 15 | 16 | 17 | class TypeSet(Unit): 18 | """ 19 | Create a new set of types. Keyword argument 'name' may create a registered 20 | typeset for use in datashape type strings. 21 | """ 22 | __slots__ = '_order', 'name' 23 | 24 | def __init__(self, *args, **kwds): 25 | self._order = args 26 | self.name = kwds.get('name') 27 | if self.name: 28 | register_typeset(self.name, self) 29 | 30 | @property 31 | def _set(self): 32 | return set(self._order) 33 | 34 | @property 35 | def types(self): 36 | return self._order 37 | 38 | def __eq__(self, other): 39 | return (isinstance(other, type(self)) and 40 | self.name == other.name and self.types == other.types) 41 | 42 | def __hash__(self): 43 | return hash((self.name, self.types)) 44 | 45 | def __contains__(self, val): 46 | return val in self._set 47 | 48 | def __repr__(self): 49 | if self.name: 50 | return '{%s}' % (self.name,) 51 | return "%s(%s, name=%s)" % (self.__class__.__name__, self._set, 52 | self.name) 53 | 54 | def __or__(self, other): 55 | return TypeSet(*chain(self, other)) 56 | 57 | def __iter__(self): 58 | return iter(self._order) 59 | 60 | def __len__(self): 61 | return len(self._set) 62 | 63 | 64 | def matches_typeset(types, signature): 65 | """Match argument types to the parameter types of a signature 66 | 67 | >>> matches_typeset(int32, integral) 68 | True 69 | >>> matches_typeset(float32, integral) 70 | False 71 | >>> matches_typeset(integral, real) 72 | True 73 | """ 74 | if types in signature: 75 | return True 76 | match = True 77 | for a, b in zip(types, signature): 78 | check = isinstance(b, TypeSet) 79 | if check and (a not in b) or (not check and a != b): 80 | match = False 81 | break 82 | return match 83 | 84 | 85 | class TypesetRegistry(object): 86 | def __init__(self): 87 | self.registry = {} 88 | self.lookup = self.registry.get 89 | 90 | def register_typeset(self, name, typeset): 91 | if name in self.registry: 92 | raise TypeError("TypeSet %s already defined with types %s" % 93 | (name, self.registry[name].types)) 94 | self.registry[name] = typeset 95 | return typeset 96 | 97 | def __getitem__(self, key): 98 | value = self.lookup(key) 99 | if value is None: 100 | raise KeyError(key) 101 | return value 102 | 103 | registry = TypesetRegistry() 104 | register_typeset = registry.register_typeset 105 | lookup = registry.lookup 106 | 107 | #------------------------------------------------------------------------ 108 | # Default Type Sets 109 | #------------------------------------------------------------------------ 110 | 111 | signed = TypeSet(int8, int16, int32, int64, name='signed') 112 | unsigned = TypeSet(uint8, uint16, uint32, uint64, name='unsigned') 113 | integral = TypeSet(*[x for t in zip(signed, unsigned) for x in t], 114 | name='integral') 115 | floating = TypeSet(float32, float64, name='floating') 116 | complexes = TypeSet(complex64, complex128, name='complexes') 117 | boolean = TypeSet(bool_, name='boolean') 118 | 119 | real = TypeSet(*integral | floating, name='real') 120 | numeric = TypeSet(*integral | floating | complexes, name='numeric') 121 | scalar = TypeSet(*boolean | numeric, name='scalar') 122 | 123 | 124 | supertype_map = { 125 | int8: signed, 126 | int16: signed, 127 | int32: signed, 128 | int64: signed, 129 | uint8: unsigned, 130 | uint16: unsigned, 131 | uint32: unsigned, 132 | uint64: unsigned, 133 | float16: floating, 134 | float32: floating, 135 | float64: floating, 136 | complex64: complexes, 137 | complex128: complexes, 138 | bool_: boolean 139 | } 140 | 141 | 142 | def supertype(measure): 143 | """Get the super type of a concrete numeric type 144 | 145 | Examples 146 | -------- 147 | >>> supertype(int8) 148 | {signed} 149 | 150 | >>> supertype(float32) 151 | {floating} 152 | 153 | >>> supertype(complex128) 154 | {complexes} 155 | 156 | >>> supertype(bool_) 157 | {boolean} 158 | 159 | >>> supertype(Option(bool_)) 160 | {boolean} 161 | """ 162 | if isinstance(measure, Option): 163 | measure = measure.ty 164 | assert matches_typeset(measure, scalar), 'measure must be numeric' 165 | return supertype_map[measure] 166 | 167 | 168 | def maxtype(measure): 169 | """Get the maximum width for a particular numeric type 170 | 171 | Examples 172 | -------- 173 | >>> maxtype(int8) 174 | ctype("int64") 175 | 176 | >>> maxtype(Option(float64)) 177 | Option(ty=ctype("float64")) 178 | 179 | >>> maxtype(bool_) 180 | ctype("bool") 181 | 182 | >>> maxtype(Decimal(11, 2)) 183 | Decimal(precision=11, scale=2) 184 | 185 | >>> maxtype(Option(Decimal(11, 2))) 186 | Option(ty=Decimal(precision=11, scale=2)) 187 | 188 | >>> maxtype(TimeDelta(unit='ms')) 189 | TimeDelta(unit='ms') 190 | 191 | >>> maxtype(Option(TimeDelta(unit='ms'))) 192 | Option(ty=TimeDelta(unit='ms')) 193 | """ 194 | measure = measure.measure 195 | isoption = isinstance(measure, Option) 196 | if isoption: 197 | measure = measure.ty 198 | if (not matches_typeset(measure, scalar) and 199 | not isinstance(measure, (Decimal, TimeDelta))): 200 | 201 | raise TypeError('measure must be numeric') 202 | 203 | if measure == bool_: 204 | result = bool_ 205 | elif isinstance(measure, (Decimal, TimeDelta)): 206 | result = measure 207 | else: 208 | result = max(supertype(measure).types, key=lambda x: x.itemsize) 209 | return Option(result) if isoption else result 210 | -------------------------------------------------------------------------------- /datashape/user.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | from datashape.dispatch import dispatch 3 | from .coretypes import * 4 | from .predicates import isdimension 5 | from .util import dshape 6 | import sys 7 | from datetime import date, time, datetime 8 | 9 | 10 | __all__ = ['validate', 'issubschema'] 11 | 12 | 13 | basetypes = np.generic, int, float, str, date, time, datetime 14 | 15 | 16 | @dispatch(np.dtype, basetypes) 17 | def validate(schema, value): 18 | return np.issubdtype(type(value), schema) 19 | 20 | 21 | @dispatch(CType, basetypes) 22 | def validate(schema, value): 23 | return validate(to_numpy_dtype(schema), value) 24 | 25 | 26 | @dispatch(DataShape, (tuple, list)) 27 | def validate(schema, value): 28 | head = schema[0] 29 | return ((len(schema) == 1 and validate(head, value)) 30 | or (isdimension(head) 31 | and (isinstance(head, Var) or int(head) == len(value)) 32 | and all(validate(DataShape(*schema[1:]), item) for item in value))) 33 | 34 | 35 | @dispatch(DataShape, object) 36 | def validate(schema, value): 37 | if len(schema) == 1: 38 | return validate(schema[0], value) 39 | 40 | 41 | @dispatch(Record, dict) 42 | def validate(schema, d): 43 | return all(validate(sch, d.get(k)) for k, sch in schema.parameters[0]) 44 | 45 | 46 | @dispatch(Record, (tuple, list)) 47 | def validate(schema, seq): 48 | return all(validate(sch, item) for (k, sch), item 49 | in zip(schema.parameters[0], seq)) 50 | 51 | 52 | @dispatch(str, object) 53 | def validate(schema, value): 54 | return validate(dshape(schema), value) 55 | 56 | 57 | @dispatch(type, object) 58 | def validate(schema, value): 59 | return isinstance(value, schema) 60 | 61 | 62 | @dispatch(tuple, object) 63 | def validate(schemas, value): 64 | return any(validate(schema, value) for schema in schemas) 65 | 66 | 67 | @dispatch(object, object) 68 | def validate(schema, value): 69 | return False 70 | 71 | 72 | @validate.register(String, str) 73 | @validate.register(Time, time) 74 | @validate.register(Date, date) 75 | @validate.register(DateTime, datetime) 76 | def validate_always_true(schema, value): 77 | return True 78 | 79 | 80 | @dispatch(DataShape, np.ndarray) 81 | def validate(schema, value): 82 | return issubschema(from_numpy(value.shape, value.dtype), schema) 83 | 84 | 85 | @dispatch(object, object) 86 | def issubschema(a, b): 87 | return issubschema(dshape(a), dshape(b)) 88 | 89 | 90 | @dispatch(DataShape, DataShape) 91 | def issubschema(a, b): 92 | if a == b: 93 | return True 94 | # TODO, handle cases like float < real 95 | # TODO, handle records {x: int, y: int, z: int} < {x: int, y: int} 96 | 97 | return None # We don't know, return something falsey 98 | -------------------------------------------------------------------------------- /datashape/util/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import print_function, division, absolute_import 3 | 4 | from itertools import chain 5 | import operator 6 | 7 | from .. import py2help 8 | from .. import parser 9 | from .. import type_symbol_table 10 | from ..validation import validate 11 | from .. import coretypes 12 | 13 | 14 | __all__ = 'dshape', 'dshapes', 'has_var_dim', 'has_ellipsis', 'cat_dshapes' 15 | 16 | subclasses = operator.methodcaller('__subclasses__') 17 | 18 | #------------------------------------------------------------------------ 19 | # Utility Functions for DataShapes 20 | #------------------------------------------------------------------------ 21 | 22 | def dshapes(*args): 23 | """ 24 | Parse a bunch of datashapes all at once. 25 | 26 | >>> a, b = dshapes('3 * int32', '2 * var * float64') 27 | """ 28 | return [dshape(arg) for arg in args] 29 | 30 | 31 | def dshape(o): 32 | """ 33 | Parse a datashape. For a thorough description see 34 | http://blaze.pydata.org/docs/datashape.html 35 | 36 | >>> ds = dshape('2 * int32') 37 | >>> ds[1] 38 | ctype("int32") 39 | """ 40 | if isinstance(o, coretypes.DataShape): 41 | return o 42 | if isinstance(o, py2help._strtypes): 43 | ds = parser.parse(o, type_symbol_table.sym) 44 | elif isinstance(o, (coretypes.CType, coretypes.String, 45 | coretypes.Record, coretypes.JSON, 46 | coretypes.Date, coretypes.Time, coretypes.DateTime, 47 | coretypes.Unit)): 48 | ds = coretypes.DataShape(o) 49 | elif isinstance(o, coretypes.Mono): 50 | ds = o 51 | elif isinstance(o, (list, tuple)): 52 | ds = coretypes.DataShape(*o) 53 | else: 54 | raise TypeError('Cannot create dshape from object of type %s' % type(o)) 55 | validate(ds) 56 | return ds 57 | 58 | 59 | def cat_dshapes(dslist): 60 | """ 61 | Concatenates a list of dshapes together along 62 | the first axis. Raises an error if there is 63 | a mismatch along another axis or the measures 64 | are different. 65 | 66 | Requires that the leading dimension be a known 67 | size for all data shapes. 68 | TODO: Relax this restriction to support 69 | streaming dimensions. 70 | 71 | >>> cat_dshapes(dshapes('10 * int32', '5 * int32')) 72 | dshape("15 * int32") 73 | """ 74 | if len(dslist) == 0: 75 | raise ValueError('Cannot concatenate an empty list of dshapes') 76 | elif len(dslist) == 1: 77 | return dslist[0] 78 | 79 | outer_dim_size = operator.index(dslist[0][0]) 80 | inner_ds = dslist[0][1:] 81 | for ds in dslist[1:]: 82 | outer_dim_size += operator.index(ds[0]) 83 | if ds[1:] != inner_ds: 84 | raise ValueError(('The datashapes to concatenate much' 85 | ' all match after' 86 | ' the first dimension (%s vs %s)') % 87 | (inner_ds, ds[1:])) 88 | return coretypes.DataShape(*[coretypes.Fixed(outer_dim_size)] + list(inner_ds)) 89 | 90 | 91 | def collect(pred, expr): 92 | """ Collect terms in expression that match predicate 93 | 94 | >>> from datashape import Unit, dshape 95 | >>> predicate = lambda term: isinstance(term, Unit) 96 | >>> dshape = dshape('var * {value: int64, loc: 2 * int32}') 97 | >>> sorted(set(collect(predicate, dshape)), key=str) 98 | [Fixed(val=2), ctype("int32"), ctype("int64"), Var()] 99 | >>> from datashape import var, int64 100 | >>> sorted(set(collect(predicate, [var, int64])), key=str) 101 | [ctype("int64"), Var()] 102 | """ 103 | if pred(expr): 104 | return [expr] 105 | if isinstance(expr, coretypes.Record): 106 | return chain.from_iterable(collect(pred, typ) for typ in expr.types) 107 | if isinstance(expr, coretypes.Mono): 108 | return chain.from_iterable(collect(pred, typ) for typ in expr.parameters) 109 | if isinstance(expr, (list, tuple)): 110 | return chain.from_iterable(collect(pred, item) for item in expr) 111 | 112 | 113 | def has_var_dim(ds): 114 | """Returns True if datashape has a variable dimension 115 | 116 | Note currently treats variable length string as scalars. 117 | 118 | >>> has_var_dim(dshape('2 * int32')) 119 | False 120 | >>> has_var_dim(dshape('var * 2 * int32')) 121 | True 122 | """ 123 | return has((coretypes.Ellipsis, coretypes.Var), ds) 124 | 125 | 126 | def has(typ, ds): 127 | if isinstance(ds, typ): 128 | return True 129 | if isinstance(ds, coretypes.Record): 130 | return any(has(typ, t) for t in ds.types) 131 | if isinstance(ds, coretypes.Mono): 132 | return any(has(typ, p) for p in ds.parameters) 133 | if isinstance(ds, (list, tuple)): 134 | return any(has(typ, item) for item in ds) 135 | return False 136 | 137 | 138 | def has_ellipsis(ds): 139 | """Returns True if the datashape has an ellipsis 140 | 141 | >>> has_ellipsis(dshape('2 * int')) 142 | False 143 | >>> has_ellipsis(dshape('... * int')) 144 | True 145 | """ 146 | return has(coretypes.Ellipsis, ds) 147 | -------------------------------------------------------------------------------- /datashape/util/testing.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | 3 | from ..py2help import with_metaclass 4 | from ..coretypes import ( 5 | DataShape, 6 | DateTime, 7 | Function, 8 | Option, 9 | Record, 10 | String, 11 | Time, 12 | TimeDelta, 13 | Tuple, 14 | Units, 15 | ) 16 | from ..dispatch import dispatch 17 | 18 | 19 | def _fmt_path(path): 20 | """Format the path for final display. 21 | 22 | Parameters 23 | ---------- 24 | path : iterable of str 25 | The path to the values that are not equal. 26 | 27 | Returns 28 | ------- 29 | fmtd : str 30 | The formatted path to put into the error message. 31 | """ 32 | if not path: 33 | return '' 34 | return 'path: _' + ''.join(path) 35 | 36 | 37 | @dispatch(DataShape, DataShape) 38 | def assert_dshape_equal(a, b, check_dim=True, path=None, **kwargs): 39 | """Assert that two dshapes are equal, providing an informative error 40 | message when they are not equal. 41 | 42 | Parameters 43 | ---------- 44 | a, b : dshape 45 | The dshapes to check for equality. 46 | check_dim : bool, optional 47 | Check shapes for equality with respect to their dimensions. 48 | default: True 49 | check_tz : bool, optional 50 | Checks times and datetimes for equality with respect to timezones. 51 | default: True 52 | check_timedelta_unit : bool, optional 53 | Checks timedeltas for equality with respect to their unit (us, ns, ...). 54 | default: True 55 | check_str_encoding : bool, optional 56 | Checks strings for equality with respect to their encoding. 57 | default: True 58 | check_str_fixlen : bool, optional 59 | Checks string for equality with respect to their fixlen. 60 | default: True 61 | check_record_order : bool, optional 62 | Checks records for equality with respect to the order of the fields. 63 | default: True 64 | 65 | Raises 66 | ------ 67 | AssertionError 68 | Raised when the two dshapes are not equal. 69 | """ 70 | ashape = a.shape 71 | bshape = b.shape 72 | 73 | if path is None: 74 | path = () 75 | 76 | if check_dim: 77 | for n, (adim, bdim) in enumerate(zip(ashape, bshape)): 78 | if adim != bdim: 79 | path += '.shape[%d]' % n, 80 | raise AssertionError( 81 | 'dimensions do not match: %s != %s%s\n%s' % ( 82 | adim, 83 | bdim, 84 | ('\n%s != %s' % ( 85 | ' * '.join(map(str, ashape)), 86 | ' * '.join(map(str, bshape)), 87 | )) if len(a.shape) > 1 else '', 88 | _fmt_path(path), 89 | ), 90 | ) 91 | 92 | path += '.measure', 93 | assert_dshape_equal( 94 | a.measure, 95 | b.measure, 96 | check_dim=check_dim, 97 | path=path, 98 | **kwargs 99 | ) 100 | 101 | 102 | class Slotted(with_metaclass(ABCMeta)): 103 | @classmethod 104 | def __subclasshook__(cls, subcls): 105 | return hasattr(subcls, '__slots__') 106 | 107 | 108 | @assert_dshape_equal.register(Slotted, Slotted) 109 | def _check_slots(a, b, path=None, **kwargs): 110 | if type(a) != type(b): 111 | return _base_case(a, b, path=path, **kwargs) 112 | 113 | assert a.__slots__ == b.__slots__, 'slots mismatch: %r != %r\n%s' % ( 114 | a.__slots__, b.__slots__, _fmt_path(path), 115 | ) 116 | if path is None: 117 | path = () 118 | for slot in a.__slots__: 119 | assert getattr(a, slot) == getattr(b, slot), \ 120 | "%s %ss do not match: %r != %r\n%s" % ( 121 | type(a).__name__.lower(), 122 | slot, 123 | getattr(a, slot), 124 | getattr(b, slot), 125 | _fmt_path(path + ('.' + slot,)), 126 | ) 127 | 128 | 129 | @assert_dshape_equal.register(object, object) 130 | def _base_case(a, b, path=None, **kwargs): 131 | assert a == b, '%s != %s\n%s' % (a, b, _fmt_path(path)) 132 | 133 | 134 | @dispatch((DateTime, Time), (DateTime, Time)) 135 | def assert_dshape_equal(a, b, path=None, check_tz=True, **kwargs): 136 | if type(a) != type(b): 137 | return _base_case(a, b) 138 | if check_tz: 139 | _check_slots(a, b, path) 140 | 141 | 142 | @dispatch(TimeDelta, TimeDelta) 143 | def assert_dshape_equal(a, b, path=None, check_timedelta_unit=True, **kwargs): 144 | if check_timedelta_unit: 145 | _check_slots(a, b, path) 146 | 147 | 148 | @dispatch(Units, Units) 149 | def assert_dshape_equal(a, b, path=None, **kwargs): 150 | if path is None: 151 | path = () 152 | 153 | assert a.unit == b.unit, '%s units do not match: %r != %s\n%s' % ( 154 | type(a).__name__.lower(), a.unit, b.unit, _fmt_path(path + ('.unit',)), 155 | ) 156 | 157 | path.append('.tp') 158 | assert_dshape_equal(a.tp, b.tp, **kwargs) 159 | 160 | 161 | @dispatch(String, String) 162 | def assert_dshape_equal(a, 163 | b, 164 | path=None, 165 | check_str_encoding=True, 166 | check_str_fixlen=True, 167 | **kwargs): 168 | if path is None: 169 | path = () 170 | if check_str_encoding: 171 | assert a.encoding == b.encoding, \ 172 | 'string encodings do not match: %r != %r\n%s' % ( 173 | a.encoding, b.encoding, _fmt_path(path + ('.encoding',)), 174 | ) 175 | 176 | if check_str_fixlen: 177 | assert a.fixlen == b.fixlen, \ 178 | 'string fixlens do not match: %d != %d\n%s' % ( 179 | a.fixlen, b.fixlen, _fmt_path(path + ('.fixlen',)), 180 | ) 181 | 182 | 183 | @dispatch(Option, Option) 184 | def assert_dshape_equal(a, b, path=None, **kwargs): 185 | if path is None: 186 | path = () 187 | path += '.ty', 188 | return assert_dshape_equal(a.ty, b.ty, path=path, **kwargs) 189 | 190 | 191 | @dispatch(Record, Record) 192 | def assert_dshape_equal(a, b, check_record_order=True, path=None, **kwargs): 193 | afields = a.fields 194 | bfields = b.fields 195 | 196 | assert len(afields) == len(bfields), \ 197 | 'records have mismatched field counts: %d != %d\n%r != %r\n%s' % ( 198 | len(afields), len(bfields), a.names, b.names, _fmt_path(path), 199 | ) 200 | 201 | if not check_record_order: 202 | afields = sorted(afields) 203 | bfields = sorted(bfields) 204 | 205 | if path is None: 206 | path = () 207 | for n, ((aname, afield), (bname, bfield)) in enumerate( 208 | zip(afields, bfields)): 209 | 210 | assert aname == bname, \ 211 | 'record field name at position %d does not match: %r != %r\n%s' % ( 212 | n, aname, bname, _fmt_path(path), 213 | ) 214 | 215 | assert_dshape_equal( 216 | afield, 217 | bfield, 218 | path=path + ('[%s]' % repr(aname),), 219 | check_record_order=check_record_order, 220 | **kwargs 221 | ) 222 | 223 | 224 | @dispatch(Tuple, Tuple) 225 | def assert_dshape_equal(a, b, path=None, **kwargs): 226 | assert len(a.dshapes) == len(b.dshapes), \ 227 | 'tuples have mismatched field counts: %d != %d\n%r != %r\n%s' % ( 228 | len(a.dshapes), len(b.dshapes), a, b, _fmt_path(path), 229 | ) 230 | 231 | if path is None: 232 | path = () 233 | path += '.dshapes', 234 | for n, (ashape, bshape) in enumerate(zip(a.dshapes, b.dshapes)): 235 | assert_dshape_equal( 236 | ashape, 237 | bshape, 238 | path=path + ('[%d]' % n,), 239 | **kwargs 240 | ) 241 | 242 | 243 | @dispatch(Function, Function) 244 | def assert_dshape_equal(a, b, path=None, **kwargs): 245 | assert len(a.argtypes) == len(b.argtypes),\ 246 | 'functions have different arities: %d != %d\n%r != %r\n%s' % ( 247 | len(a.argtypes), len(b.argtypes), a, b, _fmt_path(path), 248 | ) 249 | 250 | if path is None: 251 | path = () 252 | for n, (aarg, barg) in enumerate(zip(a.argtypes, b.argtypes)): 253 | assert_dshape_equal( 254 | aarg, 255 | barg, 256 | path=path + ('.argtypes[%d]' % n,), **kwargs 257 | ) 258 | assert_dshape_equal( 259 | a.restype, 260 | b.restype, 261 | path=path + ('.restype',), 262 | **kwargs 263 | ) 264 | -------------------------------------------------------------------------------- /datashape/util/tests/test_testing.py: -------------------------------------------------------------------------------- 1 | """Testing the test helpers. 2 | 3 | Kill me now. 4 | """ 5 | import pytest 6 | 7 | from datashape.coretypes import ( 8 | DateTime, 9 | R, 10 | String, 11 | Time, 12 | TimeDelta, 13 | Tuple, 14 | Option, 15 | int32, 16 | float32, 17 | ) 18 | from datashape.py2help import PY2 19 | from datashape.util import dshape 20 | from datashape.util.testing import assert_dshape_equal 21 | 22 | 23 | def test_datashape_measure(): 24 | assert_dshape_equal(dshape('int'), dshape('int')) 25 | 26 | with pytest.raises(AssertionError) as e: 27 | assert_dshape_equal(dshape('int'), dshape('string')) 28 | assert 'int32 != string' in str(e.value) 29 | assert '_.measure' in str(e.value) 30 | 31 | 32 | def test_dim(): 33 | assert_dshape_equal(dshape('var * int'), dshape('var * int')) 34 | assert_dshape_equal(dshape('3 * string'), dshape('3 * string')) 35 | 36 | with pytest.raises(AssertionError) as e: 37 | assert_dshape_equal(dshape('var * int'), dshape('3 * int')) 38 | assert 'var != 3' in str(e.value) 39 | assert '_.shape[0]' in str(e.value) 40 | 41 | assert_dshape_equal(dshape('var * var * int'), dshape('var * var * int')) 42 | assert_dshape_equal(dshape('var * 3 * string'), dshape('var * 3 * string')) 43 | assert_dshape_equal( 44 | dshape('3 * var * float32'), 45 | dshape('3 * var * float32'), 46 | ) 47 | assert_dshape_equal( 48 | dshape('3 * 3 * datetime'), 49 | dshape('3 * 3 * datetime'), 50 | ) 51 | 52 | with pytest.raises(AssertionError) as e: 53 | assert_dshape_equal( 54 | dshape('var * var * int'), 55 | dshape('3 * var * int'), 56 | ) 57 | assert 'var != 3' in str(e.value) 58 | assert '_.shape[0]' in str(e.value) 59 | 60 | with pytest.raises(AssertionError) as e: 61 | assert_dshape_equal( 62 | dshape('var * var * int'), 63 | dshape('var * 3 * int'), 64 | ) 65 | assert 'var != 3' in str(e.value) 66 | assert '_.shape[1]' in str(e.value) 67 | 68 | 69 | def test_record(): 70 | assert_dshape_equal( 71 | R['a': int32, 'b': float32], 72 | R['a': int32, 'b': float32], 73 | ) 74 | 75 | with pytest.raises(AssertionError) as e: 76 | assert_dshape_equal( 77 | R['a': int32, 'b': float32], 78 | R['a': int32, 'b': int32], 79 | ) 80 | assert "'float32' != 'int32'" in str(e) 81 | assert "_['b'].name" in str(e.value) 82 | 83 | with pytest.raises(AssertionError) as e: 84 | assert_dshape_equal( 85 | R['a': int32, 'b': float32], 86 | R['a': int32, 'c': float32], 87 | ) 88 | assert "'b' != 'c'" in str(e.value) 89 | 90 | with pytest.raises(AssertionError) as e: 91 | assert_dshape_equal( 92 | R['b': float32, 'a': float32], 93 | R['a': int32, 'b': float32], 94 | check_record_order=False, 95 | ) 96 | assert "'float32' != 'int32'" in str(e.value) 97 | assert "_['a']" in str(e.value) 98 | 99 | assert_dshape_equal( 100 | R['b': float32, 'a': int32], 101 | R['a': int32, 'b': float32], 102 | check_record_order=False, 103 | ) 104 | 105 | # check a nested record with and without ordering 106 | assert_dshape_equal( 107 | R['a': R['b': float32, 'a': int32]], 108 | R['a': R['a': int32, 'b': float32]], 109 | check_record_order=False, 110 | ) 111 | 112 | with pytest.raises(AssertionError) as e: 113 | assert_dshape_equal( 114 | R['a': R['a': int32, 'b': float32]], 115 | R['a': R['b': float32, 'a': int32]], 116 | ) 117 | 118 | assert "'a' != 'b'" in str(e.value) 119 | assert "_['a']" in str(e.value) 120 | 121 | 122 | def test_tuple(): 123 | assert_dshape_equal(Tuple((int32, float32)), Tuple((int32, float32))) 124 | 125 | with pytest.raises(AssertionError) as e: 126 | assert_dshape_equal(Tuple((int32, float32)), Tuple((int32, int32))) 127 | assert "'float32' != 'int32'" in str(e) 128 | assert "_.dshapes[1].measure.name" in str(e.value) 129 | 130 | with pytest.raises(AssertionError) as e: 131 | assert_dshape_equal(Tuple((int32, float32)), Tuple((int32, int32))) 132 | assert "'float32' != 'int32'" in str(e) 133 | assert '_.dshapes[1].measure.name' in str(e.value) 134 | 135 | 136 | def test_option(): 137 | assert_dshape_equal(Option(int32), Option(int32)) 138 | 139 | with pytest.raises(AssertionError) as e: 140 | assert_dshape_equal(Option(int32), Option(float32)) 141 | assert "'int32' != 'float32'" in str(e.value) 142 | assert '_.ty' in str(e.value) 143 | 144 | 145 | def test_string(): 146 | assert_dshape_equal(String(), String()) 147 | assert_dshape_equal(String('U8'), String('U8')) 148 | assert_dshape_equal(String(1), String(1)) 149 | assert_dshape_equal(String(1, 'U8'), String(1, 'U8')) 150 | 151 | with pytest.raises(AssertionError) as e: 152 | assert_dshape_equal(String('U8'), String('U16')) 153 | 154 | assert "{u}'U8' != {u}'U16'".format(u='u' if PY2 else '') in str(e.value) 155 | assert '_.encoding' in str(e.value) 156 | 157 | with pytest.raises(AssertionError) as e: 158 | assert_dshape_equal(String(1), String(2)) 159 | assert '1 != 2' in str(e.value) 160 | assert '_.fixlen' in str(e.value) 161 | 162 | 163 | def test_timedelta(): 164 | assert_dshape_equal(TimeDelta(), TimeDelta()) 165 | assert_dshape_equal(TimeDelta('ns'), TimeDelta('ns')) 166 | 167 | with pytest.raises(AssertionError) as e: 168 | assert_dshape_equal(TimeDelta('us'), TimeDelta('ns')) 169 | assert "'us' != 'ns'" in str(e.value) 170 | assert '_.unit' in str(e.value) 171 | 172 | assert_dshape_equal( 173 | TimeDelta('us'), 174 | TimeDelta('ns'), 175 | check_timedelta_unit=False, 176 | ) 177 | 178 | 179 | @pytest.mark.parametrize('cls', (DateTime, Time)) 180 | def test_datetime(cls): 181 | assert_dshape_equal(cls(), cls()) 182 | assert_dshape_equal(cls('US/Eastern'), cls('US/Eastern')) 183 | 184 | with pytest.raises(AssertionError) as e: 185 | assert_dshape_equal(cls('US/Eastern'), cls('US/Central')) 186 | assert "'US/Eastern' != 'US/Central'" in str(e.value) 187 | assert '_.tz' in str(e.value) 188 | 189 | assert_dshape_equal( 190 | cls('US/Eastern'), 191 | cls('US/Central'), 192 | check_tz=False, 193 | ) 194 | 195 | 196 | def test_nested(): 197 | assert_dshape_equal( 198 | dshape('var * {a: 3 * {b: int32}}'), 199 | dshape('var * {a: 3 * {b: int32}}'), 200 | ) 201 | 202 | with pytest.raises(AssertionError) as e: 203 | assert_dshape_equal( 204 | dshape('var * {a: 3 * {b: int32}}'), 205 | dshape('var * {a: 3 * {b: float32}}'), 206 | ) 207 | assert "'int32' != 'float32'" in str(e.value) 208 | assert "_.measure['a'].measure['b'].name" in str(e.value) 209 | 210 | 211 | @pytest.mark.parametrize( 212 | 'dshape_,contains', ( 213 | ( 214 | '(string, int64) -> int64', ( 215 | 'string != int32', 216 | '_.measure.argtypes[0].measure', 217 | ), 218 | ), 219 | ( 220 | '(int32, int32) -> int64', ( 221 | "'int32' != 'int64'", 222 | '_.measure.argtypes[1].measure.name', 223 | ), 224 | ), 225 | ( 226 | '(int32, int64) -> int32', ( 227 | "'int32' != 'int64'", 228 | '_.measure.restype.measure.name', 229 | ), 230 | ), 231 | ), 232 | ) 233 | def test_function(dshape_, contains): 234 | base = dshape('(int32, int64) -> int64') 235 | assert_dshape_equal(base, base) 236 | 237 | with pytest.raises(AssertionError) as e: 238 | assert_dshape_equal(dshape(dshape_), base) 239 | for c in contains: 240 | assert c in str(e.value) 241 | -------------------------------------------------------------------------------- /datashape/validation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Datashape validation. 5 | """ 6 | 7 | from . import coretypes as T 8 | 9 | 10 | def traverse(f, t): 11 | """ 12 | Map `f` over `t`, calling `f` with type `t` and the map result of the 13 | mapping `f` over `t` 's parameters. 14 | 15 | Parameters 16 | ---------- 17 | f : callable 18 | t : DataShape 19 | 20 | Returns 21 | ------- 22 | DataShape 23 | """ 24 | if isinstance(t, T.Mono) and not isinstance(t, T.Unit): 25 | return f(t, [traverse(f, p) for p in t.parameters]) 26 | return t 27 | 28 | 29 | def validate(ds): 30 | """ 31 | Validate a datashape to see whether it is well-formed. 32 | 33 | Parameters 34 | ---------- 35 | ds : DataShape 36 | 37 | Examples 38 | -------- 39 | >>> from datashape import dshape 40 | >>> dshape('10 * int32') 41 | dshape("10 * int32") 42 | >>> dshape('... * int32') 43 | dshape("... * int32") 44 | >>> dshape('... * ... * int32') # doctest: +IGNORE_EXCEPTION_DETAIL 45 | Traceback (most recent call last): 46 | ... 47 | TypeError: Can only use a single wildcard 48 | >>> dshape('T * ... * X * ... * X') # doctest: +IGNORE_EXCEPTION_DETAIL 49 | Traceback (most recent call last): 50 | ... 51 | TypeError: Can only use a single wildcard 52 | >>> dshape('T * ...') # doctest: +IGNORE_EXCEPTION_DETAIL 53 | Traceback (most recent call last): 54 | ... 55 | DataShapeSyntaxError: Expected a dtype 56 | """ 57 | traverse(_validate, ds) 58 | 59 | 60 | def _validate(ds, params): 61 | if isinstance(ds, T.DataShape): 62 | # Check ellipses 63 | ellipses = [x for x in ds.parameters if isinstance(x, T.Ellipsis)] 64 | if len(ellipses) > 1: 65 | raise TypeError("Can only use a single wildcard") 66 | elif isinstance(ds.parameters[-1], T.Ellipsis): 67 | raise TypeError("Measure may not be an Ellipsis (...)") 68 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/datashape.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/datashape.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/datashape" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/datashape" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\datashape.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\datashape.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # datashape documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Sep 4 08:31:10 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | #sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | 'sphinx.ext.autodoc', 33 | 'sphinx.ext.doctest', 34 | 'sphinx.ext.intersphinx', 35 | 'sphinx.ext.todo', 36 | 'sphinx.ext.coverage', 37 | 'sphinx.ext.mathjax', 38 | 'sphinx.ext.ifconfig', 39 | 'sphinx.ext.viewcode', 40 | 'sphinx.ext.extlinks', 41 | ] 42 | 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ['_templates'] 45 | 46 | # The suffix of source filenames. 47 | source_suffix = '.rst' 48 | 49 | # The encoding of source files. 50 | #source_encoding = 'utf-8-sig' 51 | 52 | # The master toctree document. 53 | master_doc = 'index' 54 | 55 | # General information about the project. 56 | project = u'datashape' 57 | copyright = u'2015, Continuum Analytics' 58 | 59 | # The version info for the project you're documenting, acts as replacement for 60 | # |version| and |release|, also used in various other places throughout the 61 | # built documents. 62 | # 63 | # The short X.Y version. 64 | from datashape import __version__ as version 65 | # The full version, including alpha/beta/rc tags. 66 | release = version 67 | 68 | # The language for content autogenerated by Sphinx. Refer to documentation 69 | # for a list of supported languages. 70 | #language = None 71 | 72 | # There are two options for replacing |today|: either, you set today to some 73 | # non-false value, then it is used: 74 | #today = '' 75 | # Else, today_fmt is used as the format for a strftime call. 76 | #today_fmt = '%B %d, %Y' 77 | 78 | # List of patterns, relative to source directory, that match files and 79 | # directories to ignore when looking for source files. 80 | exclude_patterns = [] 81 | 82 | # The reST default role (used for this markup: `text`) to use for all 83 | # documents. 84 | #default_role = None 85 | 86 | # If true, '()' will be appended to :func: etc. cross-reference text. 87 | #add_function_parentheses = True 88 | 89 | # If true, the current module name will be prepended to all description 90 | # unit titles (such as .. function::). 91 | #add_module_names = True 92 | 93 | # If true, sectionauthor and moduleauthor directives will be shown in the 94 | # output. They are ignored by default. 95 | #show_authors = False 96 | 97 | # The name of the Pygments (syntax highlighting) style to use. 98 | pygments_style = 'sphinx' 99 | 100 | # A list of ignored prefixes for module index sorting. 101 | #modindex_common_prefix = [] 102 | 103 | # If true, keep warnings as "system message" paragraphs in the built documents. 104 | #keep_warnings = False 105 | 106 | 107 | # -- Options for HTML output ---------------------------------------------- 108 | 109 | # The theme to use for HTML and HTML Help pages. See the documentation for 110 | # a list of builtin themes. 111 | html_theme = 'default' 112 | 113 | # Theme options are theme-specific and customize the look and feel of a theme 114 | # further. For a list of options available for each theme, see the 115 | # documentation. 116 | #html_theme_options = {} 117 | 118 | # Add any paths that contain custom themes here, relative to this directory. 119 | #html_theme_path = [] 120 | 121 | # The name for this set of Sphinx documents. If None, it defaults to 122 | # " v documentation". 123 | #html_title = None 124 | 125 | # A shorter title for the navigation bar. Default is the same as html_title. 126 | #html_short_title = None 127 | 128 | # The name of an image file (relative to this directory) to place at the top 129 | # of the sidebar. 130 | #html_logo = None 131 | 132 | # The name of an image file (within the static path) to use as favicon of the 133 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 134 | # pixels large. 135 | #html_favicon = None 136 | 137 | # Add any paths that contain custom static files (such as style sheets) here, 138 | # relative to this directory. They are copied after the builtin static files, 139 | # so a file named "default.css" will overwrite the builtin "default.css". 140 | html_static_path = ['_static'] 141 | 142 | # Add any extra paths that contain custom files (such as robots.txt or 143 | # .htaccess) here, relative to this directory. These files are copied 144 | # directly to the root of the documentation. 145 | #html_extra_path = [] 146 | 147 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 148 | # using the given strftime format. 149 | #html_last_updated_fmt = '%b %d, %Y' 150 | 151 | # If true, SmartyPants will be used to convert quotes and dashes to 152 | # typographically correct entities. 153 | #html_use_smartypants = True 154 | 155 | # Custom sidebar templates, maps document names to template names. 156 | #html_sidebars = {} 157 | 158 | # Additional templates that should be rendered to pages, maps page names to 159 | # template names. 160 | #html_additional_pages = {} 161 | 162 | # If false, no module index is generated. 163 | #html_domain_indices = True 164 | 165 | # If false, no index is generated. 166 | #html_use_index = True 167 | 168 | # If true, the index is split into individual pages for each letter. 169 | #html_split_index = False 170 | 171 | # If true, links to the reST sources are added to the pages. 172 | #html_show_sourcelink = True 173 | 174 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 175 | #html_show_sphinx = True 176 | 177 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 178 | #html_show_copyright = True 179 | 180 | # If true, an OpenSearch description file will be output, and all pages will 181 | # contain a tag referring to it. The value of this option must be the 182 | # base URL from which the finished HTML is served. 183 | #html_use_opensearch = '' 184 | 185 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 186 | #html_file_suffix = None 187 | 188 | # Output file base name for HTML help builder. 189 | htmlhelp_basename = 'datashapedoc' 190 | 191 | 192 | # -- Options for LaTeX output --------------------------------------------- 193 | 194 | latex_elements = { 195 | # The paper size ('letterpaper' or 'a4paper'). 196 | #'papersize': 'letterpaper', 197 | 198 | # The font size ('10pt', '11pt' or '12pt'). 199 | #'pointsize': '10pt', 200 | 201 | # Additional stuff for the LaTeX preamble. 202 | #'preamble': '', 203 | } 204 | 205 | # Grouping the document tree into LaTeX files. List of tuples 206 | # (source start file, target name, title, 207 | # author, documentclass [howto, manual, or own class]). 208 | latex_documents = [ 209 | ('index', 'datashape.tex', u'datashape Documentation', 210 | u'Continuum Analytics', 'manual'), 211 | ] 212 | 213 | # The name of an image file (relative to this directory) to place at the top of 214 | # the title page. 215 | #latex_logo = None 216 | 217 | # For "manual" documents, if this is true, then toplevel headings are parts, 218 | # not chapters. 219 | #latex_use_parts = False 220 | 221 | # If true, show page references after internal links. 222 | #latex_show_pagerefs = False 223 | 224 | # If true, show URL addresses after external links. 225 | #latex_show_urls = False 226 | 227 | # Documents to append as an appendix to all manuals. 228 | #latex_appendices = [] 229 | 230 | # If false, no module index is generated. 231 | #latex_domain_indices = True 232 | 233 | 234 | # -- Options for manual page output --------------------------------------- 235 | 236 | # One entry per manual page. List of tuples 237 | # (source start file, name, description, authors, manual section). 238 | man_pages = [ 239 | ('index', 'datashape', u'datashape Documentation', 240 | [u'Continuum Analytics'], 1) 241 | ] 242 | 243 | # If true, show URL addresses after external links. 244 | #man_show_urls = False 245 | 246 | 247 | # -- Options for Texinfo output ------------------------------------------- 248 | 249 | # Grouping the document tree into Texinfo files. List of tuples 250 | # (source start file, target name, title, author, 251 | # dir menu entry, description, category) 252 | texinfo_documents = [ 253 | ('index', 'datashape', u'datashape Documentation', 254 | u'Continuum Analytics', 'datashape', 'One line description of project.', 255 | 'Miscellaneous'), 256 | ] 257 | 258 | # Documents to append as an appendix to all manuals. 259 | #texinfo_appendices = [] 260 | 261 | # If false, no module index is generated. 262 | #texinfo_domain_indices = True 263 | 264 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 265 | #texinfo_show_urls = 'footnote' 266 | 267 | # If true, do not generate a @detailmenu in the "Top" node's menu. 268 | #texinfo_no_detailmenu = False 269 | 270 | 271 | # Example configuration for intersphinx: refer to the Python standard library. 272 | intersphinx_mapping = {'http://docs.python.org/': None} 273 | extlinks = dict(issue=('https://github.com/blaze/datashape/issues/%s', '#')) 274 | -------------------------------------------------------------------------------- /docs/source/grammar.rst: -------------------------------------------------------------------------------- 1 | DataShape Grammar 2 | ================= 3 | 4 | The datashape language is a DSL which describes the structure of data, abstracted from 5 | a particular implementation in a language or file format. Compared to the Python 6 | library NumPy, it combines `shape` and `dtype` together, and introduces a 7 | syntax for describing structured data. 8 | 9 | Some of the basic features include: 10 | 11 | * Dimensions are separated by asterisks. 12 | 13 | * Lists of types are separated by commas. 14 | 15 | * Types and Typevars are distinguished by the capitalization of the leading 16 | character. Lowercase for types, and uppercase for typevars. 17 | 18 | * Type constructors operate using square brackets. 19 | 20 | * Type constructors accept positional and keyword arguments, 21 | and their arguments may be: 22 | 23 | * datashape, string, integer, list of datashape, list of string, 24 | list of integer 25 | 26 | * In multi-line datashape strings or files, comments start from # 27 | characters to the end of the line. 28 | 29 | Some Simple Examples 30 | -------------------- 31 | 32 | Here are some simple examples to motivate the idea:: 33 | 34 | # Scalar types 35 | bool 36 | int32 37 | float64 38 | 39 | # Scalar types with missing data/NA support 40 | ?bool 41 | ?float32 42 | ?complex 43 | 44 | # Arrays 45 | 3 * 4 * int32 46 | 3 * 4 * int32 47 | 10 * var * float64 48 | 3 * complex[float64] 49 | 50 | # Array of Structures 51 | 100 * { 52 | name: string, 53 | birthday: date, 54 | address: { 55 | street: string, 56 | city: string, 57 | postalcode: string, 58 | country: string 59 | } 60 | } 61 | 62 | # Structure of Arrays 63 | { 64 | x: 100 * 100 * float32, 65 | y: 100 * 100 * float32, 66 | u: 100 * 100 * float32, 67 | v: 100 * 100 * float32, 68 | } 69 | 70 | # Structure with strings for field names 71 | { 72 | 'field 0': 100 * float32, 73 | 'field 1': float32, 74 | 'field 2': float32, 75 | } 76 | 77 | # Array of Tuples 78 | 20 * (int32, float64) 79 | 80 | # Function prototype 81 | (3 * int32, float64) -> 3 * float64 82 | 83 | # Function prototype with broadcasting dimensions 84 | (A... * int32, A... * int32) -> A... * int32 85 | 86 | Syntactic Sugar 87 | --------------- 88 | 89 | Many syntax elements in datashape are syntax sugar for particular 90 | type constructors. For dtypes, this is:: 91 | 92 | {x : int32, y : int16} => struct[['x', 'y'], [int32, int16]] 93 | (int64, float32) => tuple[[int64, float32]] 94 | (int64, float32) -> bool => funcproto[[int64, float32], bool] 95 | DTypeVar => typevar['DTypeVar'] 96 | ?int32 => option[int32] 97 | 2 * ?3 * int32 => 2 * option[3 * int32] 98 | 99 | For dims, this is:: 100 | 101 | 3 * int32 => fixed[3] * int32 102 | DimVar * int32 => typevar['DimVar'] * int32 103 | ... * int32 => ellipsis * int32 104 | DimVar... * int32 => ellipsis['DimVar'] * int32 105 | 106 | The DataShape Grammar 107 | --------------------- 108 | 109 | Dimension Type Symbol Table:: 110 | 111 | # Variable-sized dimension 112 | var 113 | 114 | Dimension Type Constructor Symbol Table:: 115 | 116 | # Arrays which are either missing or fully there 117 | # option[3 * int32] 118 | option 119 | 120 | Data Type Symbol Table:: 121 | 122 | # Numeric 123 | bool 124 | # Two's complement binary integers 125 | int8 126 | int16 127 | int32 128 | int64 129 | int128 130 | # Unsigned binary integers 131 | uint8 132 | uint16 133 | uint32 134 | uint64 135 | uint128 136 | # IEEE 754-2008 binary### floating point binary numbers 137 | float16 138 | float32 139 | float64 140 | float128 141 | # IEEE 754-2008 decimal### floating point decimal numbers 142 | decimal32 143 | decimal64 144 | decimal128 145 | # Arbitrary precision integer 146 | bignum 147 | # Alias for int32 148 | int 149 | # Alias for float64 150 | real 151 | # Alias for complex[float64] 152 | complex 153 | # Alias for int32 or int64 depending on platform 154 | intptr 155 | # Alias for uint32 or uint64 depending on platform 156 | uintptr 157 | 158 | # A unicode string 159 | string 160 | # A single unicode code point 161 | char 162 | # A blob of bytes 163 | bytes 164 | # A date 165 | date 166 | # A string containing JSON 167 | json 168 | # No data 169 | void 170 | 171 | Data Type Constructor Symbol Table:: 172 | 173 | # complex[float32], complex[type=float64] 174 | complex 175 | # string['ascii'], string[enc='cp949'] 176 | string 177 | # bytes[size=4,align=2] 178 | bytes 179 | # datetime[unit='minutes',tz='CST'] 180 | datetime 181 | # categorical[type=string, values=['low', 'medium', 'high']] 182 | categorical 183 | # option[float64] 184 | option 185 | # pointer[target=2 * 3 * int32] 186 | pointer 187 | 188 | Tokens:: 189 | 190 | NAME_LOWER : [a-z][a-zA-Z0-9_]* 191 | NAME_UPPER : [A-Z][a-zA-Z0-9_]* 192 | NAME_OTHER : _[a-zA-Z0-9_]* 193 | ASTERISK : \* 194 | COMMA : , 195 | EQUAL : = 196 | COLON : : 197 | LBRACKET : \[ 198 | RBRACKET : \] 199 | LBRACE : \{ 200 | RBRACE : \} 201 | LPAREN : \( 202 | RPAREN : \) 203 | ELLIPSIS : \.\.\. 204 | RARROW : -> 205 | QUESTIONMARK : ? 206 | INTEGER : 0(?![0-9])|[1-9][0-9]* 207 | STRING : (?:"(?:[^"\n\r\\]|(?:\\u[0-9a-fA-F]{4})|(?:\\["bfnrt]))*")|(?:\'(?:[^\'\n\r\\]|(?:\\u[0-9a-fA-F]{4})|(?:\\['bfnrt]))*"))*\') 208 | 209 | 210 | Grammar:: 211 | 212 | # Datashape may start with a '?' or not to signal optionality 213 | datashape : datashape_nooption 214 | | QUESTIONMARK datashape_nooption 215 | 216 | # Asterisk-separated list of dimensions, followed by data type 217 | datashape_nooption : dim ASTERISK datashape 218 | | dtype 219 | 220 | # Dimension Type (from the dimension type symbol table) 221 | dim : typevar 222 | | ellipsis_typevar 223 | | type 224 | | type_constr 225 | | INTEGER 226 | | ELLIPSIS 227 | 228 | # Data Type (from the data type symbol table) 229 | dtype : typevar 230 | | type 231 | | type_constr 232 | | struct_type 233 | | funcproto_or_tuple_type 234 | 235 | # A type variable 236 | typevar : NAME_UPPER 237 | 238 | # A type variable with ellipsis 239 | ellipsis_typevar : NAME_UPPER ELLIPSIS 240 | 241 | # A bare type (from the data type symbol table) 242 | type : NAME_LOWER 243 | 244 | # Type Constructor (from the data type constructor symbol table) 245 | type_constr : NAME_LOWER LBRACKET type_arg_list RBRACKET 246 | 247 | # Type Constructor: list of arguments 248 | type_arg_list : type_arg COMMA type_arg_list 249 | | type_kwarg_list 250 | | type_arg 251 | 252 | # Type Constructor: list of arguments 253 | type_kwarg_list : type_kwarg COMMA type_kwarg_list 254 | | type_kwarg 255 | 256 | # Type Constructor : single argument 257 | type_arg : datashape 258 | | INTEGER 259 | | STRING 260 | | list_type_arg 261 | 262 | # Type Constructor : single keyword argument 263 | type_kwarg : NAME_LOWER EQUAL type_arg 264 | 265 | # Type Constructor : single list argument 266 | list_type_arg : LBRACKET RBRACKET 267 | | LBRACKET datashape_list RBRACKET 268 | | LBRACKET integer_list RBRACKET 269 | | LBRACKET string_list RBRACKET 270 | 271 | datashape_list : datashape COMMA datashape_list 272 | | datashape 273 | 274 | integer_list : INTEGER COMMA integer_list 275 | | INTEGER 276 | 277 | string_list : STRING COMMA string_list 278 | | STRING 279 | 280 | 281 | # Struct/Record type (allowing for a trailing comma) 282 | struct_type : LBRACE struct_field_list RBRACE 283 | | LBRACE struct_field_list COMMA RBRACE 284 | 285 | struct_field_list : struct_field COMMA struct_field_list 286 | | struct_field 287 | 288 | struct_field : struct_field_name COLON datashape 289 | 290 | struct_field_name : NAME_LOWER 291 | | NAME_UPPER 292 | | NAME_OTHER 293 | | STRING 294 | 295 | # Function prototype is a tuple with an arrow to the output type 296 | funcproto_or_tuple_type : tuple_type RARROW datashape 297 | | tuple_type 298 | 299 | # Tuple type (allowing for a trailing comma) 300 | tuple_type : LPAREN tuple_item_list RPAREN 301 | | LPAREN tuple_item_list COMMA RPAREN 302 | 303 | tuple_item_list : datashape COMMA tuple_item_list 304 | | datashape 305 | 306 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. datashape documentation master file, created by 2 | sphinx-quickstart on Fri Sep 4 08:31:10 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to datashape's documentation! 7 | ===================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | overview 15 | types 16 | pattern_matching 17 | grammar 18 | releases 19 | 20 | 21 | 22 | Indices and tables 23 | ================== 24 | 25 | * :ref:`genindex` 26 | * :ref:`modindex` 27 | * :ref:`search` 28 | 29 | -------------------------------------------------------------------------------- /docs/source/overview.rst: -------------------------------------------------------------------------------- 1 | Datashape Overview 2 | ================== 3 | 4 | Datashape is a data layout language for array programming. It is designed 5 | to describe in-situ structured data without requiring transformation 6 | into a canonical form. 7 | 8 | Similar to NumPy, datashape includes ``shape`` and ``dtype``, but combined 9 | together in the type system. 10 | 11 | Units 12 | ----- 13 | 14 | Single named types in datashape are called ``unit`` types. They represent 15 | either a dtype like ``int32`` or ``datetime``, or a single dimension 16 | like ``var``. Dimensions and a single dtype are composed together in 17 | a datashape type. 18 | 19 | Primitive Types 20 | ~~~~~~~~~~~~~~~ 21 | 22 | DataShape includes a variety of dtypes corresponding to C/C++ 23 | types, similar to NumPy. 24 | 25 | .. cssclass:: table-striped 26 | 27 | ================ ========================================================= 28 | Bit type Description 29 | ================ ========================================================= 30 | bool Boolean (True or False) stored as a byte 31 | int8 Byte (-128 to 127) 32 | int16 Two's Complement Integer (-32768 to 32767) 33 | int32 Two's Complement Integer (-2147483648 to 2147483647) 34 | int64 Two's Complement Integer (-9223372036854775808 to 9223372036854775807) 35 | uint8 Unsigned integer (0 to 255) 36 | uint16 Unsigned integer (0 to 65535) 37 | uint32 Unsigned integer (0 to 4294967295) 38 | uint64 Unsigned integer (0 to 18446744073709551615) 39 | float16 Half precision float: sign bit, 5 bits exponent, 40 | 10 bits mantissa 41 | float32 Single precision float: sign bit, 8 bits exponent, 42 | 23 bits mantissa 43 | float64 Double precision float: sign bit, 11 bits exponent, 44 | 52 bits mantissa 45 | complex[float32] Complex number, represented by two 32-bit floats (real 46 | and imaginary components) 47 | complex[float64] Complex number, represented by two 64-bit floats (real 48 | and imaginary components) 49 | ================ ========================================================= 50 | 51 | Additionally, there are types which are not fully specified at the 52 | bit/byte level. 53 | 54 | .. cssclass:: table-striped 55 | 56 | ========== ========================================================= 57 | Bit type Description 58 | ========== ========================================================= 59 | string Variable length Unicode string. 60 | bytes Variable length array of bytes. 61 | json Variable length Unicode string which contains JSON. 62 | date Date in the proleptic Gregorian calendar. 63 | time Time not attached to a date. 64 | datetime Point in time, combination of date and time. 65 | units Associates physical units with numerical values. 66 | ========== ========================================================= 67 | 68 | Many python types can be mapped to datashape types: 69 | 70 | .. cssclass:: table-striped 71 | 72 | ================== ========================================================= 73 | Python type Datashape 74 | ================== ========================================================= 75 | int int32 76 | bool bool 77 | float float64 78 | complex complex[float64] 79 | str string 80 | unicode string 81 | datetime.date date 82 | datetime.time time 83 | datetime.datetime datetime or datetime[tz=''] 84 | datetime.timedelta units['microsecond', int64] 85 | bytes bytes 86 | bytearray bytes 87 | buffer bytes 88 | ================== ========================================================= 89 | 90 | String Types 91 | ~~~~~~~~~~~~ 92 | 93 | To Blaze, all strings are sequences of unicode code points, following 94 | in the footsteps of Python 3. The default Blaze string atom, simply 95 | called "string", is a variable-length string which can contain any 96 | unicode values. There is also a fixed-size variant compatible with 97 | NumPy's strings, like ``string[16, "ascii"]``. 98 | 99 | Dimensions 100 | ---------- 101 | 102 | An asterisk (*) between two types signifies an array. A datashape 103 | consists of 0 or more ``dimensions`` followed by a ``dtype``. 104 | 105 | For example, an integer array of size three is:: 106 | 107 | 3 * int 108 | 109 | In this type, 3 is is a ``fixed`` dimension, which means it is a dimension 110 | whose size is always as given. Other dimension types include ``var``. 111 | 112 | Comparing with NumPy, the array created by 113 | ``np.empty((2, 3), 'int32')`` has datashape ``2 * 3 * int32``. 114 | 115 | Records 116 | ~~~~~~~ 117 | 118 | Record types are ordered struct dtypes which hold a collection of 119 | types keyed by labels. Records look similar to Python 120 | dictionaries but the order the names appear is important. 121 | 122 | Example 1:: 123 | 124 | { 125 | name : string, 126 | age : int, 127 | height : int, 128 | weight : int 129 | } 130 | 131 | Example 2:: 132 | 133 | { 134 | r: int8, 135 | g: int8, 136 | b: int8, 137 | a: int8 138 | } 139 | 140 | Records are themselves types declaration so they can be nested, 141 | but cannot be self-referential: 142 | 143 | Example 2:: 144 | 145 | { 146 | a: { x: int, y: int }, 147 | b: { x: int, z: int } 148 | } 149 | 150 | Datashape Traits 151 | ~~~~~~~~~~~~~~~~ 152 | 153 | While datashape is a very general type system, there are a number 154 | of patterns a datashape might fit in. 155 | 156 | Tabular datashapes have just one dimension, typically ``fixed`` or 157 | ``var``, followed by a record containing only simple types, not 158 | nested records. This can be intuitively thought of as data which 159 | will fit in a SQL table.:: 160 | 161 | var * { x : int, y : real, z : date } 162 | 163 | Homogenous datashapes are arrays that have a simple dtype, the kind 164 | of data typically used in numeric computations. For example, 165 | a 3D velocity field might look like:: 166 | 167 | 100 * 100 * 100 * 3 * real 168 | 169 | Type Variables 170 | ~~~~~~~~~~~~~~ 171 | 172 | Type variables are a separate class of types that express free variables 173 | scoped within type signatures. Holding type variables as first order 174 | terms in the signatures encodes the fact that a term can be used in many 175 | concrete contexts with different concrete types. 176 | 177 | For example the type capable of expressing all square two dimensional 178 | matrices could be written as a datashape with type variable ``A``, 179 | constraining the two dimensions to be the same:: 180 | 181 | A * A * int32 182 | 183 | A type capable of rectangular variable length arrays of integers 184 | can be written as two free type vars:: 185 | 186 | A * B * int32 187 | 188 | .. note:: 189 | 190 | Any name beginning with an uppercase letter is parsed as a symbolic type 191 | (as opposed to concrete). Symbolic types can be used both as dimensions and 192 | as data types. 193 | 194 | Option 195 | ~~~~~~ 196 | 197 | An option type represents data which may be there or not. This is like 198 | data with ``NA`` values in R, or nullable columns in SQL. Given a type 199 | like ``int``, it can be transformed by prefixing it with a question mark 200 | as ``?int``, or equivalently using the type constructor ``option[int]`` 201 | 202 | For example a ``5 * ?int`` array can model the Python data: 203 | 204 | :: 205 | 206 | [1, 2, 3, None, None, 4] 207 | 208 | -------------------------------------------------------------------------------- /docs/source/pattern_matching.rst: -------------------------------------------------------------------------------- 1 | Pattern Matching DataShapes 2 | =========================== 3 | 4 | DataShape includes type variables, as symbols beginning with a 5 | capital letter. For example `A * int32` represents a one-dimensional 6 | array of `int32`, where the size or type of the dimension is 7 | unspecified. Similarly, `3 * A` represents a size 3 one-dimensional 8 | array where the data type is unspecified. 9 | 10 | The main usage of pattern matching in the DataShape system is for 11 | specifying function signatures. To provide a little bit of motivation, 12 | let's examine what happens in NumPy ufuncs, and see how we can 13 | represent their behaviors via DataShape types. 14 | 15 | NumPy `ldexp` UFunc Example 16 | --------------------------- 17 | 18 | We're going to use the `ldexp` ufunc, which is for the C/C++ 19 | function with overloads `double ldexp(double x, int exp)` 20 | and `float ldexp(float x, int exp)`, computing `x * 2^exp` 21 | by tweaking the exponent in the floating point format. (We're 22 | ignoring the long double for now.) 23 | 24 | These C++ functions can be represented with the DataShape 25 | function signatures:: 26 | 27 | (float32, int32) -> float32 28 | (float64, int32) -> float64 29 | 30 | As a NumPy ufunc, there is an behavior for arrays, where the 31 | source arrays are "broadcasted" together, and the function is 32 | computed elementwise. 33 | 34 | In the simplest case, given two arrays which match, the result 35 | is an array of the same size. When one array has size one in a 36 | dimension, it gets repeated to match the size of the other dimension. 37 | When one array has fewer dimensions, it gets repeated to fill 38 | in the outer dimensions. The "broadcast" array shape is the result 39 | of combining all these repetitions, and is the shape of the output. 40 | Represented as DataShape function signatures, some examples are:: 41 | 42 | (12 * float32, 12 * int32) -> 12 * float32 43 | (10 * float64, 1 * int32) -> 10 * float64 44 | (float32, 3 * 4 * int32) -> 3 * 4 * float32 45 | (3 * float64, 4 * 1 * int64) -> 4 * 3 * float64 46 | 47 | Ellipsis for Broadcasting 48 | ------------------------- 49 | 50 | To represent the general broadcasting behavior, DataShape provides 51 | ellipsis type variables.:: 52 | 53 | (A... * float32, A... * int32) -> A... * float32 54 | (A... * float64, A... * int64) -> A... * float64 55 | 56 | Coercions/Broadcasting as a System of Equations 57 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 58 | 59 | Let's say as input we get two arrays with datashapes 60 | `3 * 4 * float64` and `int32`. We can express this as 61 | two systems of coercion equations as follows (using ==> 62 | as a "coerces to" operator):: 63 | 64 | # float32 prototype 65 | 3 * 4 * float64 ==> A... * float32 66 | int32 ==> A... * int32 67 | 68 | # float64 prototype 69 | 3 * 4 * float64 ==> A... * float64 70 | int32 ==> A... * int32 71 | 72 | To solve these equations, we evaluate the legality 73 | of each coercion, and accumulate the set of values 74 | the `A...` type variable must take.:: 75 | 76 | # float32 prototype 77 | float64 ==> float32 # ILLEGAL 78 | 3 * 4 * ==> A... * # "3 * 4 *" in A... 79 | int32 ==> int32 # LEGAL 80 | * ==> A... # "*" in A... 81 | 82 | # float64 prototype 83 | float64 ==> float64 # LEGAL 84 | 3 * 4 * ==> A... * # "3 * 4 *" in A... 85 | int32 ==> int32 # LEGAL 86 | * ==> A... # "*" in A... 87 | 88 | The float32 prototype can be discarded because it requires an 89 | illegal coercion. In the float64 prototype, we collect the set 90 | of all `A...` values `{"3 * 4 *", "*"}`, broadcast them together 91 | to get `"3 * 4 *"`, and substitute this in the output. Doing 92 | all the substitutions in the full prototype produces:: 93 | 94 | (3 * 4 * float64, int32) -> 3 * 4 * float64 95 | 96 | as the matched function prototype that results. 97 | 98 | Disallowing Coercion 99 | -------------------- 100 | 101 | In the particular function we picked, ideally we don't want to 102 | allow implicit coercion of the type, because the nature of the 103 | function is to "load the exponent" in particular formats of 104 | floating point number. Saying `ldexp(True, 3)`, and having it 105 | work is kind of weird. 106 | 107 | One way to tackle this would be to add an `exact` type, both 108 | as a dimension and a data type, which indicates that broadcasting 109 | should be disallowed. For the discussion, in addition to `ldexp`, 110 | lets introduce a vector magnitude function `mag`, where we want 111 | to disallow scalar arrays to broadcast into it.:: 112 | 113 | # ldexp signatures 114 | (A... * exact[float32], A... * int32) -> A... * float32 115 | (A... * exact[float64], A... * int64) -> A... * float64 116 | 117 | # mag signatures 118 | (A... * exact[2] * float32) -> A... * float32 119 | (A... * exact[3] * float32) -> A... * float32 120 | 121 | # ufunc but disallowing broadcasting 122 | (exact[A...] * int32, exact[A...] * int32) -> A... * int32 123 | 124 | A possible syntactic sugar (which I'm not attached to, I think 125 | this needs some exploration) for this is:: 126 | 127 | # ldexp signatures 128 | (A... * float32=, A... * int32) -> A... * float32 129 | (A... * float64=, A... * int64) -> A... * float64 130 | 131 | # mag signatures 132 | (A... * 2= * float32) -> A... * float32 133 | (A... * 3= * float32) -> A... * float32 134 | 135 | # ufunc but disallowing broadcasting 136 | (A=.. * int32, A=.. * int32) -> A... * int32 137 | 138 | Factoring a Set of Signatures 139 | ----------------------------- 140 | 141 | One of the main things the multiple dispatch in DataShape has 142 | to do is match input arrays against a set of signatures very 143 | efficiently. We need to be able to hide the abstraction we're 144 | creating, and provide performance competitive with, but ideally 145 | superior to, what NumPy provides in its ufunc system. 146 | 147 | Factoring the set of signatures into two or more stages which 148 | are simpler to solve and can prune the possibilities more quickly 149 | is one way to do this abstraction hiding. Let's use the `add` function 150 | for our example, with the following subset of signatures. We've 151 | included the `datetime` signatures to dispel any notion that the 152 | signatures will always match precisely.:: 153 | 154 | # add signatures 155 | (A... * int32, A... * int32) -> A... * int32 156 | (A... * int64, A... * int64) -> A... * int64 157 | (A... * float32, A... * float32) -> A... * float32 158 | (A... * float64, A... * float64) -> A... * float64 159 | (A... * timedelta, A... * timedelta) -> A... * timedelta 160 | (A... * datetime, A... * timedelta) -> A... * datetime 161 | (A... * timedelta, A... * datetime) -> A... * datetime 162 | 163 | Because the broadcasting of all these cases is identical, we 164 | can transform this set of signatures into two stages as follows:: 165 | 166 | # broadcasting stage 167 | (A... * X, A... * Y) -> A... * Z 168 | 169 | # data type stage matched against (X, Y) 170 | (int32, int32) -> int32 171 | (int64, int64) -> int64 172 | (float32, float32) -> float32 173 | (float64, float64) -> float64 174 | (timedelta, timedelta) -> timedelta 175 | (datetime, timedelta) -> datetime 176 | (timedelta, datetime) -> datetime 177 | 178 | Let's work through this example to illustrate how it works.:: 179 | 180 | # Stage 1: Input arrays "3 * 1 * int32", "4 * float32" 181 | # (A... * X, A... * Y) -> A... * Z 182 | int32 ==> X # "int32" in X 183 | 3 * 1 * ==> A... # "3 * 1 *" in A... 184 | float32 ==> Y # "float32" in Y 185 | 4 * ==> A... # "4 *" in A... 186 | 187 | # Solution: A... is "3 * 4 *", X is "int32", and Y is "float32" 188 | # Stage 2: Input arrays "int32" and "float32" 189 | # (int32, int32) -> int32 190 | int32 ==> int32 # LEGAL 191 | float32 ==> int32 # ILLEGAL 192 | # (float32, float32) -> float32 193 | int32 ==> float32 # LEGAL 194 | float32 ==> float32 # LEGAL 195 | # etc. 196 | 197 | # Assume we picked (float32, float32) -> float32 198 | # so the variables are: 199 | # X is "float32" 200 | # Y is "float32" 201 | # Z is "float32" 202 | # giving the solution substituted into stage 1: 203 | (3 * 1 * float32, 4 * float32) -> 3 * 4 * float32 204 | 205 | -------------------------------------------------------------------------------- /docs/source/releases.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | Release Notes 3 | ============= 4 | 5 | 6 | .. include:: whatsnew/0.5.0.txt 7 | -------------------------------------------------------------------------------- /docs/source/svg/type_expand.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/blaze/datashape/c9d2bd75414a69d94498e7340ef9dd5fce903007/docs/source/svg/type_expand.png -------------------------------------------------------------------------------- /docs/source/types.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | DataShape Types 3 | =============== 4 | 5 | In addition to :doc:`defining the grammar `, datashape specifies 6 | a standard set of types and some properties those types should have. 7 | Type constructors can be classified as ``dimension`` or ``dtype``, and a 8 | datashape is always composed of zero or more dimensions followed by 9 | a dtype. 10 | 11 | Dimension Types 12 | =============== 13 | 14 | Fixed Dimension 15 | --------------- 16 | 17 | ``fixed[4]`` 18 | 19 | A dimension whose size is specified. This is the most common 20 | dimension type used in Blaze, and ``4 * int32`` is syntactic sugar for 21 | ``fixed[4] * int32`` in datashape syntax. 22 | 23 | Var Dimension 24 | ------------- 25 | 26 | ``var`` 27 | 28 | A dimension whose size may be different across instances. 29 | A common use of this is a ragged array like ``4 * var * int32``. 30 | 31 | Type Variables 32 | -------------- 33 | 34 | ``typevar['DimName']`` 35 | 36 | Constructs a type variable. ``DimName`` is syntactic sugar for 37 | ``typevar['DimName']``. This is used for pattern matching types, 38 | particularly for function prototypes. For example the 39 | datashape ``(M * N * int32) -> N * int32`` accepts an input 40 | with two dimensions that are type variables, and returns a 41 | one dimensional array using one of those dimension types. 42 | 43 | Ellipsis 44 | -------- 45 | 46 | ``ellipsis`` 47 | 48 | Constructs an ellipsis for matching multiple broadcast dimensions. 49 | ``...`` is syntactic sugar for ``ellipsis``. 50 | 51 | ``ellipsis['DimVar']`` 52 | 53 | Constructs a named ellipsis for matching multiple broadcast dimensions. 54 | ``Dim...`` is syntactic sugar for ``ellipsis['Dim']``. 55 | 56 | DTypes 57 | ====== 58 | 59 | Boolean Type 60 | ------------ 61 | 62 | ``bool`` 63 | 64 | A boolean type which may take on two values, ``True`` and ``False``. 65 | In Blaze and DyND, this is stored as a single byte which may take on 66 | the values 1 and 0. 67 | 68 | Default Integer 69 | --------------- 70 | 71 | ``int`` 72 | 73 | This is an alias for ``int32``. 74 | 75 | Arbitrary-Precision Integer 76 | --------------------------- 77 | 78 | ``bignum`` or ``bigint`` 79 | 80 | An integer type which has no minimum or maximum value. This is not 81 | implemented in Blaze or DyND presently and the final name for it hasn't 82 | been locked down. 83 | 84 | Signed Integer Types 85 | -------------------- 86 | 87 | ``int8`` 88 | ``int16`` 89 | ``int32`` 90 | ``int64`` 91 | ``int128`` 92 | 93 | Integer types whose behavior follows that of twos-complement integers 94 | of the given size. 95 | 96 | Unsigned Integer Types 97 | ---------------------- 98 | 99 | ``uint8`` 100 | ``uint16`` 101 | ``uint32`` 102 | ``uint64`` 103 | ``uint128`` 104 | 105 | Integer types whose behavior follows that of unsigned integers of 106 | the given size. 107 | 108 | Platform-Specific Integer Aliases 109 | --------------------------------- 110 | 111 | ``intptr`` 112 | ``uintptr`` 113 | 114 | Aliases for ``int##`` and ``uint##`` where ## is the size of a pointer type on 115 | the platform. 116 | 117 | Default Floating Point 118 | ---------------------- 119 | 120 | ``real`` 121 | 122 | This is an alias for ``float64``. 123 | 124 | Binary Floating Point 125 | --------------------- 126 | 127 | ``float16`` 128 | ``float32`` 129 | ``float64`` 130 | ``float128`` 131 | 132 | Binary floating point types as defined by IEEE 754-2008. Each type 133 | corresponds to the ``binary##`` type defined in the standard. 134 | 135 | Note that ``float128`` is not a C/C++ ``long double``, except on such 136 | platforms where they coincide. NumPy defines a ``float128`` on 137 | some platforms which is not IEEE ``binary128``, and is thus different 138 | from DataShape's type of the same name on those platforms. 139 | 140 | TODO: Support for C/C++ ``long double``. This is tricky given that 141 | DataShape intends to be cross-platform, and maybe some inspiration 142 | can be taken from HDF5 for specifying them. 143 | 144 | Decimal Floating Point 145 | ---------------------- 146 | 147 | ``decimal32`` 148 | ``decimal64`` 149 | ``decimal128`` 150 | 151 | Decimal floating point types as defined by IEEE 754-2008. These are 152 | not implemented in Blaze or DyND presently. 153 | 154 | Default Complex 155 | ---------------------- 156 | 157 | ``complex`` 158 | 159 | This is an alias for ``complex[float64]``. 160 | 161 | Complex 162 | ------- 163 | 164 | ``complex[float32]`` 165 | 166 | Constructs a complex number type from a real number type. 167 | 168 | Void 169 | ---- 170 | 171 | ``void`` 172 | 173 | A type which can store no data. It is not intended to be constructed 174 | in concrete arrays, but to allow for things like function prototypes 175 | with ``void`` return type. 176 | 177 | String 178 | ------ 179 | 180 | ``string`` 181 | 182 | A unicode string that can be arbitrarily sized. In Blaze and DyND, this 183 | is a UTF-8 encoded string. 184 | 185 | ``string[16]`` 186 | 187 | A unicode string in a UTF-8 fixed-sized buffer. The string is 188 | zero-terminated, but as in NumPy, all bytes may be filled with character 189 | data so the buffer is not valid as a C-style string. 190 | 191 | ``string['utf16']`` 192 | 193 | A unicode string that can be arbitrarily sized, using the specified 194 | encoding. Valid values for the encoding are ``'ascii'``, ``'utf8'``, 195 | ``'utf16'``, ``'utf32'``, ``'ucs2'``, and ``'cp###'`` for valid 196 | code pages. 197 | 198 | ``string[16, 'utf16']`` 199 | 200 | A unicode string in a fixed-size buffer of the specified number of bytes, 201 | encoded as the requested encoding. The string is 202 | zero-terminated, but as in NumPy, all bytes may be filled with character 203 | data so the buffer is not valid as a C-style string. 204 | 205 | Character 206 | --------- 207 | 208 | ``char`` 209 | 210 | A value which contains a single unicode code point. Typically stored as 211 | a 32-bit integer. 212 | 213 | Bytes 214 | ----- 215 | 216 | ``bytes`` 217 | 218 | An arbitrarily sized blob of bytes. This like ``bytes`` in Python 3. 219 | 220 | ``bytes[16]`` 221 | 222 | A fixed-size blob of bytes. This is not zero-terminated as in the 223 | ``string`` case, it is always exactly the specified number of bytes. 224 | 225 | Categorical 226 | ----------- 227 | 228 | ``categorical[['low', 'medium', 'high'], type=string, ordered=True]`` 229 | 230 | Constructs a type whose values are constrained to a particular set. 231 | The ``type`` parameter is optional and is inferred by the first argument. 232 | The ``ordered`` parameter is a boolean indicating whether the values in the 233 | set are ordered, so certain functions like min and max work. 234 | 235 | .. note:: 236 | 237 | The categorical type *assumes* that the input categories are unique. 238 | 239 | JSON 240 | ---- 241 | 242 | ``json`` 243 | 244 | A unicode string which is known to contain values represented as JSON. 245 | 246 | Records 247 | ------- 248 | 249 | ``struct[['name', 'age', 'height'], [string, int, real]]`` 250 | 251 | Constructs a record type with the given field names and types. 252 | ``{name: string, age: int}`` is syntactic sugar for 253 | ``struct[['name', 'age'], [string, int]]``. 254 | 255 | Tuples 256 | ------ 257 | 258 | ``tuple[[string, int, real]]`` 259 | 260 | Constructs a tuple type with the given types. ``(string, int)`` 261 | is syntactic sugar for ``tuple[[string, int]]``. 262 | 263 | Function Prototype 264 | ------------------ 265 | 266 | ``funcproto[[string, int], bool]`` 267 | 268 | Constructs a function prototype with the given argument and return types. 269 | ``(string, int) -> bool`` is syntactic sugar for 270 | ``funcproto[[string, int], bool]``. 271 | 272 | Type Variables 273 | -------------- 274 | 275 | ``typevar['DTypeName']`` 276 | 277 | Constructs a type variable. ``DTypeName`` is syntactic sugar for 278 | ``typevar['DTypeName']``. This is used for pattern matching types, 279 | particularly for function prototypes. For example the 280 | datashape ``(T, T) -> T`` accepts any types as input, but requires 281 | they have the same types. 282 | 283 | Option/Missing Data 284 | ------------------- 285 | 286 | ``option[float32]`` 287 | 288 | Constructs a type based on the provided type which may have missing 289 | values. ``?float32`` is syntactic sugar for ``option[float32]``. 290 | 291 | The type inside the option parameter may also have its own dimensions, 292 | for example ``?3 * float32`` is syntactic sugar for ``option[3 * float32]``. 293 | 294 | Pointer 295 | ------- 296 | 297 | :: 298 | 299 | pointer[target=2 * 3 * int32] 300 | 301 | Constructs a type whose value is a pointer to values of the target type. 302 | 303 | Maps 304 | ---- 305 | 306 | Represents the type of key-value pairs. This is used for discovering foreign 307 | key relationships in relational databases, but is meant to be useful outside of 308 | that context as well. For example the type of a column of Python dictionaries 309 | whose keys are strings and values are 64-bit integers would be written as:: 310 | 311 | var * map[string, int64] 312 | 313 | Date, Time, and DateTime 314 | ------------------------ 315 | 316 | ``date`` 317 | 318 | A type which represents a single date in the Gregorian calendar. 319 | In DyND and Blaze, it is represented as a 32-bit signed integer offset 320 | from the date ``1970-01-01``. 321 | 322 | ``time`` 323 | ``time[tz='UTC']`` 324 | 325 | Represents a time in an abstract day (no time zone), or a day 326 | with the specified time zone. 327 | 328 | Stored as a 64-bit integer offset from midnight, 329 | stored as ticks (100 ns units). 330 | 331 | ``datetime`` 332 | ``datetime[tz='UTC']`` 333 | 334 | Represents a moment in time in an abstract time zone if no time 335 | zone is provided, otherwise stored as UTC but representing time 336 | in the specified time zone. 337 | 338 | Stored as a 64-bit signed integer offset from 339 | ``0001-01-01T00:00:00`` in ticks (100 ns units), the "universal 340 | time scale" from the ICU library. Follows the POSIX convention 341 | of ignoring leap seconds. 342 | 343 | http://userguide.icu-project.org/datetime/universaltimescale 344 | 345 | ``units['second', int64]`` 346 | 347 | A type which represents a value with the units and type specified. 348 | Initially only supporting time units, to support the datetime 349 | functionality without adding a special "timedelta" type. 350 | 351 | Initial valid units are: '100*nanosecond' (ticks as in the datetime storage), 352 | 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'day'. 353 | Need to decide on valid shortcuts in a context with more physical units, 354 | probably by adopting conventions from a good physical units library. 355 | 356 | ``timetz`` 357 | ``datetimetz`` 358 | 359 | Represents a time/datetime with the time zone attached to the data. Not 360 | implemented in Blaze/DyND. 361 | 362 | -------------------------------------------------------------------------------- /docs/source/whatsnew/0.4.7.txt: -------------------------------------------------------------------------------- 1 | Release |version| 2 | ----------------- 3 | 4 | :Release: |version| 5 | :Date: September 15, 2015 6 | 7 | New Features 8 | ------------ 9 | 10 | * ``discover`` now works on ``frozenset`` objects (:issue:`160`). 11 | 12 | New Types 13 | --------- 14 | 15 | None 16 | 17 | Experimental Types 18 | ------------------ 19 | 20 | .. warning:: 21 | 22 | Experimental types are subject to change. 23 | 24 | * Added a :class:`~datashape.coretypes.Map` type for representing key-value 25 | pairs (:issue:`164`). 26 | * Add ``Decimal`` as a valid type. (:issue:`118`). 27 | 28 | 29 | API Changes 30 | ----------- 31 | 32 | * Removed the ``Implements`` type. This will eventually be superseded by kind 33 | types (:issue:`166`). 34 | * Auto inference of things with the ``shape`` and ``dtype`` attributes will is 35 | deprecated and will be removed in 0.5.0 (:issue:`165`). 36 | * :func:`~datashape.promote.promote` now accepts a new boolean argument 37 | ``promote_option`` that says if ``Option(a), a`` should promote up to an 38 | option type. This defaults to ``True``. For example: 39 | ``promote(int64, Option(int32), promote_option=True)`` is ``Option(int64)`` 40 | however ``promote(int64, Option(int64), promote_option=False)`` is ``int64`` 41 | (:issue:`172`). 42 | 43 | Bug Fixes 44 | --------- 45 | 46 | * Allow ``float16`` in the parser (:issue:`163`). 47 | * ``Mock`` objects were causing a segfault (:issue:`165`). 48 | * Fix an issue where calling :func:`~datashape.predicates.isnumeric` on an 49 | instance of :class:`~datashape.coretypes.Time` would raise a ``TypeError`` 50 | (:issue:`167`). 51 | 52 | Miscellaneous 53 | ------------- 54 | 55 | * Lots of dead code removed (:issue:`156`). 56 | * Updated versioneer to have a proper PyPI compatible version string 57 | (:issue:`157`). 58 | -------------------------------------------------------------------------------- /docs/source/whatsnew/0.5.0.txt: -------------------------------------------------------------------------------- 1 | Release |version| 2 | ----------------- 3 | 4 | :Release: |version| 5 | :Date: TBD 6 | 7 | New Features 8 | ------------ 9 | 10 | None 11 | 12 | New Types 13 | --------- 14 | 15 | * datashape now supports a ``categorical`` type (:issue:`150`). 16 | 17 | Experimental Types 18 | ------------------ 19 | 20 | .. warning:: 21 | 22 | Experimental types are subject to change. 23 | 24 | * Records can now be constructed with the new syntax: 25 | ``R['field0':type0, 'field1':type1, ...]`` 26 | where each slice object represents a field in the record. 27 | ``R`` acts as an alias for ``Record`` to make it more pleasant to 28 | construct these literal types (:issue:`186`). 29 | 30 | API Changes 31 | ----------- 32 | 33 | * ``datashape`` no longer supports Python 2.6 (:issue:`189`). 34 | * ``datashape`` no longer support Python 3.3 (:issue:`191`). 35 | * The default ``repr`` of ``Mono`` subclass now prints out the slot 36 | names as keyword arguments next to their values (:issue:`188`). 37 | For example 38 | 39 | Instead of 40 | 41 | .. code-block:: python 42 | 43 | >>> from datashape import Decimal 44 | >>> Decimal(precision=11, scale=2) 45 | Decimal(11, 2) 46 | 47 | we have 48 | 49 | .. code-block:: python 50 | 51 | >>> Decimal(precision=11, scale=2) 52 | Decimal(precision=11, scale=2) 53 | * Fields are now always constructed with ``str`` in Record datashapes 54 | (:issue:`197`). 55 | 56 | Bug Fixes 57 | --------- 58 | 59 | * Makes the parser recognize ``null`` and ``void`` (:issue:`183`). 60 | * Cache the datashape hash value to avoid potentially expensive recomputation 61 | during memoization (:issue:`184`). 62 | * Fix discovery of strings that start with things that look like numbers 63 | (:issue:`190`). 64 | * Makes the parser recognize ``object`` (:issue:`193`). 65 | * Make string field names in Record types have the same string type 66 | (:issue:`200`). 67 | * Fix the reprs for :class:`~datashape.coretypes.Function` objects 68 | (:issue:`194`). 69 | 70 | Miscellaneous 71 | ------------- 72 | 73 | None 74 | -------------------------------------------------------------------------------- /docs/source/whatsnew/0.5.1.txt: -------------------------------------------------------------------------------- 1 | Release |version| 2 | ----------------- 3 | 4 | :Release: |version| 5 | :Date: TBD 6 | 7 | New Features 8 | ------------ 9 | 10 | None 11 | 12 | New Types 13 | --------- 14 | 15 | None 16 | 17 | Experimental Types 18 | ------------------ 19 | 20 | .. warning:: 21 | 22 | Experimental types are subject to change. 23 | 24 | None 25 | 26 | API Changes 27 | ----------- 28 | 29 | * Function parse now accepts functions with no arguments like: ``() -> A`` 30 | (:issue:`198`). 31 | * Tuple parse now accepts the unit ``()`` (:issue:`198`). 32 | 33 | 34 | Bug Fixes 35 | --------- 36 | 37 | * Correctly return the input for :func:`~datashape.typesets.maxtype` when a 38 | :class:`~datashape.coretypes.TimeDelta` is passed (:issue:`207`). 39 | * :func:`~datashape.predicates.isscalar` now returns True for 40 | :class:`~datashape.coretypes.Categorical` types. 41 | 42 | Miscellaneous 43 | ------------- 44 | 45 | None 46 | -------------------------------------------------------------------------------- /docs/source/whatsnew/0.5.2.txt: -------------------------------------------------------------------------------- 1 | Release |version| 2 | ----------------- 3 | 4 | :Release: |version| 5 | :Date: TBD 6 | 7 | New Features 8 | ------------ 9 | 10 | * Adds :func:`~datashape.discover` support for :class:`types.MappingProxyType` 11 | objects in python 3 and :class:`types.DictProxyType` in python 2 12 | (:issue:`212`). 13 | * Adds :func:`~datashape.discover` support for :class:`collections.OrderedDict` 14 | objects (:issue:`212`). 15 | 16 | New Types 17 | --------- 18 | 19 | None 20 | 21 | Experimental Types 22 | ------------------ 23 | 24 | .. warning:: 25 | 26 | Experimental types are subject to change. 27 | 28 | None 29 | 30 | API Changes 31 | ----------- 32 | 33 | None 34 | 35 | Bug Fixes 36 | --------- 37 | 38 | * Fixes :func:`~datashape.promote` to handle :class:`~datashape.string` types 39 | correctly when mixing with :class:`~datashape.Option` types (:issue:`213`). 40 | 41 | Miscellaneous 42 | ------------- 43 | 44 | None 45 | -------------------------------------------------------------------------------- /docs/source/whatsnew/0.5.3.txt: -------------------------------------------------------------------------------- 1 | Release 0.5.3 2 | ----------------- 3 | 4 | :Release: 0.5.3 5 | :Date: TBD 6 | 7 | New Features 8 | ------------ 9 | 10 | None 11 | 12 | New Types 13 | --------- 14 | 15 | None 16 | 17 | Experimental Types 18 | ------------------ 19 | 20 | .. warning:: 21 | 22 | Experimental types are subject to change. 23 | 24 | None 25 | 26 | API Changes 27 | ----------- 28 | 29 | None 30 | 31 | Bug Fixes 32 | --------- 33 | 34 | * Fixes :func:`~coretypes.Mono.__eq__` to ensure equality among 35 | :class:`~coretypes.Mono` subclasses compare equal when it makes sense to do so. 36 | For instance ``dshape("?string") == Option(string)`` now holds true 37 | (:issue:`214`). 38 | 39 | Miscellaneous 40 | ------------- 41 | 42 | None 43 | -------------------------------------------------------------------------------- /docs/source/whatsnew/0.5.4.txt: -------------------------------------------------------------------------------- 1 | Release 0.5.4 2 | ------------- 3 | 4 | :Release: 0.5.4 5 | :Date: December 28, 2016 6 | 7 | New Features 8 | ------------ 9 | 10 | None 11 | 12 | New Types 13 | --------- 14 | 15 | None 16 | 17 | Experimental Types 18 | ------------------ 19 | 20 | .. warning:: 21 | 22 | Experimental types are subject to change. 23 | 24 | None 25 | 26 | API Changes 27 | ----------- 28 | 29 | None 30 | 31 | Bug Fixes 32 | --------- 33 | 34 | * Python-3.6 compatibility for tests. 35 | 36 | Miscellaneous 37 | ------------- 38 | 39 | None 40 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy >= 1.7 2 | multipledispatch >= 0.4.7 3 | python-dateutil 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | 2 | # See the docstring in versioneer.py for instructions. Note that you must 3 | # re-run 'versioneer.py setup' after changing this section, and commit the 4 | # resulting files. 5 | 6 | [versioneer] 7 | VCS = git 8 | style = pep440 9 | versionfile_source = datashape/_version.py 10 | versionfile_build = datashape/_version.py 11 | tag_prefix = 12 | parentdir_prefix = datashape- 13 | 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | 4 | import versioneer 5 | 6 | # Utility function to read the README file. 7 | # Used for the long_description. It's nice, because now 1) we have a top level 8 | # README file and 2) it's easier to type in the README file than to put a raw 9 | # string in below ... 10 | 11 | 12 | def read(fname): 13 | with open(os.path.join(os.path.dirname(__file__), fname)) as f: 14 | return f.read() 15 | 16 | 17 | setup( 18 | name='datashape', 19 | version=versioneer.get_version(), 20 | cmdclass=versioneer.get_cmdclass(), 21 | author='Continuum Analytics', 22 | author_email='blaze-dev@continuum.io', 23 | description='A data description language.', 24 | license='BSD', 25 | keywords='data language', 26 | url='http://datashape.readthedocs.org/en/latest/', 27 | packages=['datashape', 'datashape.util', 'datashape.tests'], 28 | install_requires=read('requirements.txt').strip().split('\n'), 29 | long_description=read('README.rst'), 30 | zip_safe=False, 31 | classifiers=[ 32 | 'Development Status :: 3 - Alpha', 33 | 'Intended Audience :: Developers', 34 | 'Topic :: Software Development', 35 | 'License :: OSI Approved :: BSD License', 36 | ], 37 | ) 38 | --------------------------------------------------------------------------------